1 /* Various utility functions.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 Additional permission under GNU GPL version 3 section 7
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
37 #ifdef HAVE_SYS_TIME_H
38 # include <sys/time.h>
44 # include <sys/mman.h>
49 #ifdef HAVE_SYS_UTIME_H
50 # include <sys/utime.h>
58 /* For TIOCGWINSZ and friends: */
59 #ifdef HAVE_SYS_IOCTL_H
60 # include <sys/ioctl.h>
66 /* Needed for Unix version of run_with_timeout. */
70 #ifndef HAVE_SIGSETJMP
71 /* If sigsetjmp is a macro, configure won't pick it up. */
73 # define HAVE_SIGSETJMP
77 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
78 # define USE_SIGNAL_TIMEOUT
89 /* Utility function: like xstrdup(), but also lowercases S. */
92 xstrdup_lower (const char *s)
94 char *copy = xstrdup (s);
101 /* Copy the string formed by two pointers (one on the beginning, other
102 on the char after the last char) to a new, malloc-ed location.
105 strdupdelim (const char *beg, const char *end)
107 char *res = xmalloc (end - beg + 1);
108 memcpy (res, beg, end - beg);
109 res[end - beg] = '\0';
113 /* Parse a string containing comma-separated elements, and return a
114 vector of char pointers with the elements. Spaces following the
115 commas are ignored. */
117 sepstring (const char *s)
131 res = xrealloc (res, (i + 2) * sizeof (char *));
132 res[i] = strdupdelim (p, s);
135 /* Skip the blanks following the ','. */
143 res = xrealloc (res, (i + 2) * sizeof (char *));
144 res[i] = strdupdelim (p, s);
149 /* Like sprintf, but prints into a string of sufficient size freshly
150 allocated with malloc, which is returned. If unable to print due
151 to invalid format, returns NULL. Inability to allocate needed
152 memory results in abort, as with xmalloc. This is in spirit
153 similar to the GNU/BSD extension asprintf, but somewhat easier to
156 Internally the function either calls vasprintf or loops around
157 vsnprintf until the correct size is found. Since Wget also ships a
158 fallback implementation of vsnprintf, this should be portable. */
161 aprintf (const char *fmt, ...)
163 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
168 va_start (args, fmt);
169 ret = vasprintf (&str, fmt, args);
171 if (ret < 0 && errno == ENOMEM)
172 abort (); /* for consistency with xmalloc/xrealloc */
176 #else /* not HAVE_VASPRINTF */
178 /* vasprintf is unavailable. snprintf into a small buffer and
179 resize it as necessary. */
181 char *str = xmalloc (size);
183 /* #### This code will infloop and eventually abort in xrealloc if
184 passed a FMT that causes snprintf to consistently return -1. */
191 va_start (args, fmt);
192 n = vsnprintf (str, size, fmt, args);
195 /* If the printing worked, return the string. */
196 if (n > -1 && n < size)
199 /* Else try again with a larger buffer. */
200 if (n > -1) /* C99 */
201 size = n + 1; /* precisely what is needed */
203 size <<= 1; /* twice the old size */
204 str = xrealloc (str, size);
206 #endif /* not HAVE_VASPRINTF */
209 /* Concatenate the NULL-terminated list of string arguments into
210 freshly allocated space. */
213 concat_strings (const char *str0, ...)
216 int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
219 const char *next_str;
220 int total_length = 0;
223 /* Calculate the length of and allocate the resulting string. */
226 va_start (args, str0);
227 for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
229 int len = strlen (next_str);
230 if (argcount < countof (saved_lengths))
231 saved_lengths[argcount++] = len;
235 p = ret = xmalloc (total_length + 1);
237 /* Copy the strings into the allocated space. */
240 va_start (args, str0);
241 for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
244 if (argcount < countof (saved_lengths))
245 len = saved_lengths[argcount++];
247 len = strlen (next_str);
248 memcpy (p, next_str, len);
257 /* Format the provided time according to the specified format. The
258 format is a string with format elements supported by strftime. */
261 fmttime (time_t t, const char *fmt)
263 static char output[32];
264 struct tm *tm = localtime(&t);
267 if (!strftime(output, sizeof(output), fmt, tm))
272 /* Return pointer to a static char[] buffer in which zero-terminated
273 string-representation of TM (in form hh:mm:ss) is printed.
275 If TM is NULL, the current time will be used. */
280 return fmttime(t, "%H:%M:%S");
283 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
286 datetime_str (time_t t)
288 return fmttime(t, "%Y-%m-%d %H:%M:%S");
291 /* The Windows versions of the following two functions are defined in
292 mswindows.c. On MSDOS this function should never be called. */
294 #if !defined(WINDOWS) && !defined(MSDOS)
296 fork_to_background (void)
299 /* Whether we arrange our own version of opt.lfilename here. */
300 bool logfile_changed = false;
304 /* We must create the file immediately to avoid either a race
305 condition (which arises from using unique_name and failing to
306 use fopen_excl) or lying to the user about the log file name
307 (which arises from using unique_name, printing the name, and
308 using fopen_excl later on.) */
309 FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
312 logfile_changed = true;
325 /* parent, no error */
326 printf (_("Continuing in background, pid %d.\n"), (int) pid);
328 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
329 exit (0); /* #### should we use _exit()? */
332 /* child: give up the privileges and keep running. */
334 freopen ("/dev/null", "r", stdin);
335 freopen ("/dev/null", "w", stdout);
336 freopen ("/dev/null", "w", stderr);
338 #endif /* !WINDOWS && !MSDOS */
340 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
341 specified with TM. The atime ("access time") is set to the current
345 touch (const char *file, time_t tm)
347 #ifdef HAVE_STRUCT_UTIMBUF
348 struct utimbuf times;
356 times.actime = time (NULL);
357 if (utime (file, ×) == -1)
358 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
361 /* Checks if FILE is a symbolic link, and removes it if it is. Does
362 nothing under MS-Windows. */
364 remove_link (const char *file)
369 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
371 DEBUGP (("Unlinking %s (symlink).\n", file));
374 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
375 file, strerror (errno));
380 /* Does FILENAME exist? This is quite a lousy implementation, since
381 it supplies no error codes -- only a yes-or-no answer. Thus it
382 will return that a file does not exist if, e.g., the directory is
383 unreadable. I don't mind it too much currently, though. The
384 proper way should, of course, be to have a third, error state,
385 other than true/false, but that would introduce uncalled-for
386 additional complexity to the callers. */
388 file_exists_p (const char *filename)
391 return access (filename, F_OK) >= 0;
394 return stat (filename, &buf) >= 0;
398 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
399 Returns 0 on error. */
401 file_non_directory_p (const char *path)
404 /* Use lstat() rather than stat() so that symbolic links pointing to
405 directories can be identified correctly. */
406 if (lstat (path, &buf) != 0)
408 return S_ISDIR (buf.st_mode) ? false : true;
411 /* Return the size of file named by FILENAME, or -1 if it cannot be
412 opened or seeked into. */
414 file_size (const char *filename)
416 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
418 /* We use fseek rather than stat to determine the file size because
419 that way we can also verify that the file is readable without
420 explicitly checking for permissions. Inspired by the POST patch
422 FILE *fp = fopen (filename, "rb");
425 fseeko (fp, 0, SEEK_END);
431 if (stat (filename, &st) < 0)
437 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
438 doesn't exist is found. Return a freshly allocated copy of the
442 unique_name_1 (const char *prefix)
445 int plen = strlen (prefix);
446 char *template = (char *)alloca (plen + 1 + 24);
447 char *template_tail = template + plen;
449 memcpy (template, prefix, plen);
450 *template_tail++ = '.';
453 number_to_string (template_tail, count++);
454 while (file_exists_p (template));
456 return xstrdup (template);
459 /* Return a unique file name, based on FILE.
461 More precisely, if FILE doesn't exist, it is returned unmodified.
462 If not, FILE.1 is tried, then FILE.2, etc. The first FILE.<number>
463 file name that doesn't exist is returned.
465 The resulting file is not created, only verified that it didn't
466 exist at the point in time when the function was called.
467 Therefore, where security matters, don't rely that the file created
468 by this function exists until you open it with O_EXCL or
471 If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
472 string. Otherwise, it may return FILE if the file doesn't exist
473 (and therefore doesn't need changing). */
476 unique_name (const char *file, bool allow_passthrough)
478 /* If the FILE itself doesn't exist, return it without
480 if (!file_exists_p (file))
481 return allow_passthrough ? (char *)file : xstrdup (file);
483 /* Otherwise, find a numeric suffix that results in unused file name
485 return unique_name_1 (file);
488 /* Create a file based on NAME, except without overwriting an existing
489 file with that name. Providing O_EXCL is correctly implemented,
490 this function does not have the race condition associated with
491 opening the file returned by unique_name. */
494 unique_create (const char *name, bool binary, char **opened_name)
496 /* unique file name, based on NAME */
497 char *uname = unique_name (name, false);
499 while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
502 uname = unique_name (name, false);
504 if (opened_name && fp != NULL)
507 *opened_name = uname;
519 /* Open the file for writing, with the addition that the file is
520 opened "exclusively". This means that, if the file already exists,
521 this function will *fail* and errno will be set to EEXIST. If
522 BINARY is set, the file will be opened in binary mode, equivalent
525 If opening the file fails for any reason, including the file having
526 previously existed, this function returns NULL and sets errno
530 fopen_excl (const char *fname, bool binary)
534 int flags = O_WRONLY | O_CREAT | O_EXCL;
539 fd = open (fname, flags, 0666);
542 return fdopen (fd, binary ? "wb" : "w");
543 #else /* not O_EXCL */
544 /* Manually check whether the file exists. This is prone to race
545 conditions, but systems without O_EXCL haven't deserved
547 if (file_exists_p (fname))
552 return fopen (fname, binary ? "wb" : "w");
553 #endif /* not O_EXCL */
556 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
557 are missing, create them first. In case any mkdir() call fails,
558 return its error status. Returns 0 on successful completion.
560 The behaviour of this function should be identical to the behaviour
561 of `mkdir -p' on systems where mkdir supports the `-p' option. */
563 make_directory (const char *directory)
565 int i, ret, quit = 0;
568 /* Make a copy of dir, to be able to write to it. Otherwise, the
569 function is unsafe if called with a read-only char *argument. */
570 STRDUP_ALLOCA (dir, directory);
572 /* If the first character of dir is '/', skip it (and thus enable
573 creation of absolute-pathname directories. */
574 for (i = (*dir == '/'); 1; ++i)
576 for (; dir[i] && dir[i] != '/'; i++)
581 /* Check whether the directory already exists. Allow creation of
582 of intermediate directories to fail, as the initial path components
583 are not necessarily directories! */
584 if (!file_exists_p (dir))
585 ret = mkdir (dir, 0777);
596 /* Merge BASE with FILE. BASE can be a directory or a file name, FILE
597 should be a file name.
599 file_merge("/foo/bar", "baz") => "/foo/baz"
600 file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
601 file_merge("foo", "bar") => "bar"
603 In other words, it's a simpler and gentler version of uri_merge. */
606 file_merge (const char *base, const char *file)
609 const char *cut = (const char *)strrchr (base, '/');
612 return xstrdup (file);
614 result = xmalloc (cut - base + 1 + strlen (file) + 1);
615 memcpy (result, base, cut - base);
616 result[cut - base] = '/';
617 strcpy (result + (cut - base) + 1, file);
622 /* Like fnmatch, but performs a case-insensitive match. */
625 fnmatch_nocase (const char *pattern, const char *string, int flags)
628 /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
629 also present on *BSD platforms, and possibly elsewhere. */
630 return fnmatch (pattern, string, flags | FNM_CASEFOLD);
632 /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
633 char *patcopy = (char *) alloca (strlen (pattern) + 1);
634 char *strcopy = (char *) alloca (strlen (string) + 1);
636 for (p = patcopy; *pattern; pattern++, p++)
637 *p = TOLOWER (*pattern);
639 for (p = strcopy; *string; string++, p++)
640 *p = TOLOWER (*string);
642 return fnmatch (patcopy, strcopy, flags);
646 static bool in_acclist (const char *const *, const char *, bool);
648 /* Determine whether a file is acceptable to be followed, according to
649 lists of patterns to accept/reject. */
651 acceptable (const char *s)
655 while (l && s[l] != '/')
662 return (in_acclist ((const char *const *)opt.accepts, s, true)
663 && !in_acclist ((const char *const *)opt.rejects, s, true));
665 return in_acclist ((const char *const *)opt.accepts, s, true);
667 else if (opt.rejects)
668 return !in_acclist ((const char *const *)opt.rejects, s, true);
672 /* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p()
673 will return true if and only if D2 begins with `/something/' or is exactly
676 subdir_p (const char *d1, const char *d2)
678 if (!opt.ignore_case)
679 for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
682 for (; *d1 && *d2 && (TOLOWER (*d1) == TOLOWER (*d2)); ++d1, ++d2)
685 return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
688 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
689 first element that matches DIR, through wildcards or front comparison (as
692 dir_matches_p (char **dirlist, const char *dir)
695 int (*matcher) (const char *, const char *, int)
696 = opt.ignore_case ? fnmatch_nocase : fnmatch;
698 for (x = dirlist; *x; x++)
700 /* Remove leading '/' */
701 char *p = *x + (**x == '/');
702 if (has_wildcards_p (p))
704 if (matcher (p, dir, FNM_PATHNAME) == 0)
709 if (subdir_p (p, dir))
714 return *x ? true : false;
717 /* Returns whether DIRECTORY is acceptable for download, wrt the
718 include/exclude lists.
720 The leading `/' is ignored in paths; relative and absolute paths
721 may be freely intermixed. */
724 accdir (const char *directory)
726 /* Remove starting '/'. */
727 if (*directory == '/')
731 if (!dir_matches_p (opt.includes, directory))
736 if (dir_matches_p (opt.excludes, directory))
742 /* Return true if STRING ends with TAIL. For instance:
744 match_tail ("abc", "bc", false) -> 1
745 match_tail ("abc", "ab", false) -> 0
746 match_tail ("abc", "abc", false) -> 1
748 If FOLD_CASE is true, the comparison will be case-insensitive. */
751 match_tail (const char *string, const char *tail, bool fold_case)
755 /* We want this to be fast, so we code two loops, one with
756 case-folding, one without. */
760 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
761 if (string[i] != tail[j])
766 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
767 if (TOLOWER (string[i]) != TOLOWER (tail[j]))
771 /* If the tail was exhausted, the match was succesful. */
778 /* Checks whether string S matches each element of ACCEPTS. A list
779 element are matched either with fnmatch() or match_tail(),
780 according to whether the element contains wildcards or not.
782 If the BACKWARD is false, don't do backward comparison -- just compare
785 in_acclist (const char *const *accepts, const char *s, bool backward)
787 for (; *accepts; accepts++)
789 if (has_wildcards_p (*accepts))
791 int res = opt.ignore_case
792 ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
793 /* fnmatch returns 0 if the pattern *does* match the string. */
801 if (match_tail (s, *accepts, opt.ignore_case))
806 int cmp = opt.ignore_case
807 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
816 /* Return the location of STR's suffix (file extension). Examples:
817 suffix ("foo.bar") -> "bar"
818 suffix ("foo.bar.baz") -> "baz"
819 suffix ("/foo/bar") -> NULL
820 suffix ("/foo.bar/baz") -> NULL */
822 suffix (const char *str)
826 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
830 return (char *)str + i;
835 /* Return true if S contains globbing wildcards (`*', `?', `[' or
839 has_wildcards_p (const char *s)
842 if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
847 /* Return true if FNAME ends with a typical HTML suffix. The
848 following (case-insensitive) suffixes are presumed to be HTML
853 ?html (`?' matches one character)
855 #### CAVEAT. This is not necessarily a good indication that FNAME
856 refers to a file that contains HTML! */
858 has_html_suffix_p (const char *fname)
862 if ((suf = suffix (fname)) == NULL)
864 if (!strcasecmp (suf, "html"))
866 if (!strcasecmp (suf, "htm"))
868 if (suf[0] && !strcasecmp (suf + 1, "html"))
873 /* Read a line from FP and return the pointer to freshly allocated
874 storage. The storage space is obtained through malloc() and should
875 be freed with free() when it is no longer needed.
877 The length of the line is not limited, except by available memory.
878 The newline character at the end of line is retained. The line is
879 terminated with a zero character.
881 After end-of-file is encountered without anything being read, NULL
882 is returned. NULL is also returned on error. To distinguish
883 between these two cases, use the stdio function ferror(). */
886 read_whole_line (FILE *fp)
890 char *line = xmalloc (bufsize);
892 while (fgets (line + length, bufsize - length, fp))
894 length += strlen (line + length);
896 /* Possible for example when reading from a binary file where
897 a line begins with \0. */
900 if (line[length - 1] == '\n')
903 /* fgets() guarantees to read the whole line, or to use up the
904 space we've given it. We can double the buffer
907 line = xrealloc (line, bufsize);
909 if (length == 0 || ferror (fp))
914 if (length + 1 < bufsize)
915 /* Relieve the memory from our exponential greediness. We say
916 `length + 1' because the terminating \0 is not included in
917 LENGTH. We don't need to zero-terminate the string ourselves,
918 though, because fgets() does that. */
919 line = xrealloc (line, length + 1);
923 /* Read FILE into memory. A pointer to `struct file_memory' are
924 returned; use struct element `content' to access file contents, and
925 the element `length' to know the file length. `content' is *not*
926 zero-terminated, and you should *not* read or write beyond the [0,
927 length) range of characters.
929 After you are done with the file contents, call read_file_free to
932 Depending on the operating system and the type of file that is
933 being read, read_file() either mmap's the file into memory, or
934 reads the file into the core using read().
936 If file is named "-", fileno(stdin) is used for reading instead.
937 If you want to read from a real file named "-", use "./-" instead. */
940 read_file (const char *file)
943 struct file_memory *fm;
945 bool inhibit_close = false;
947 /* Some magic in the finest tradition of Perl and its kin: if FILE
948 is "-", just use stdin. */
952 inhibit_close = true;
953 /* Note that we don't inhibit mmap() in this case. If stdin is
954 redirected from a regular file, mmap() will still work. */
957 fd = open (file, O_RDONLY);
960 fm = xnew (struct file_memory);
965 if (fstat (fd, &buf) < 0)
967 fm->length = buf.st_size;
968 /* NOTE: As far as I know, the callers of this function never
969 modify the file text. Relying on this would enable us to
970 specify PROT_READ and MAP_SHARED for a marginal gain in
971 efficiency, but at some cost to generality. */
972 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
974 if (fm->content == (char *)MAP_FAILED)
984 /* The most common reason why mmap() fails is that FD does not point
985 to a plain file. However, it's also possible that mmap() doesn't
986 work for a particular type of file. Therefore, whenever mmap()
987 fails, we just fall back to the regular method. */
988 #endif /* HAVE_MMAP */
991 size = 512; /* number of bytes fm->contents can
992 hold at any given time. */
993 fm->content = xmalloc (size);
997 if (fm->length > size / 2)
999 /* #### I'm not sure whether the whole exponential-growth
1000 thing makes sense with kernel read. On Linux at least,
1001 read() refuses to read more than 4K from a file at a
1002 single chunk anyway. But other Unixes might optimize it
1003 better, and it doesn't *hurt* anything, so I'm leaving
1006 /* Normally, we grow SIZE exponentially to make the number
1007 of calls to read() and realloc() logarithmic in relation
1008 to file size. However, read() can read an amount of data
1009 smaller than requested, and it would be unreasonable to
1010 double SIZE every time *something* was read. Therefore,
1011 we double SIZE only when the length exceeds half of the
1012 entire allocated size. */
1014 fm->content = xrealloc (fm->content, size);
1016 nread = read (fd, fm->content + fm->length, size - fm->length);
1018 /* Successful read. */
1019 fm->length += nread;
1029 if (size > fm->length && fm->length != 0)
1030 /* Due to exponential growth of fm->content, the allocated region
1031 might be much larger than what is actually needed. */
1032 fm->content = xrealloc (fm->content, fm->length);
1039 xfree (fm->content);
1044 /* Release the resources held by FM. Specifically, this calls
1045 munmap() or xfree() on fm->content, depending whether mmap or
1046 malloc/read were used to read in the file. It also frees the
1047 memory needed to hold the FM structure itself. */
1050 read_file_free (struct file_memory *fm)
1055 munmap (fm->content, fm->length);
1060 xfree (fm->content);
1065 /* Free the pointers in a NULL-terminated vector of pointers, then
1066 free the pointer itself. */
1068 free_vec (char **vec)
1079 /* Append vector V2 to vector V1. The function frees V2 and
1080 reallocates V1 (thus you may not use the contents of neither
1081 pointer after the call). If V1 is NULL, V2 is returned. */
1083 merge_vecs (char **v1, char **v2)
1093 /* To avoid j == 0 */
1098 for (i = 0; v1[i]; i++)
1101 for (j = 0; v2[j]; j++)
1103 /* Reallocate v1. */
1104 v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1105 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1110 /* Append a freshly allocated copy of STR to VEC. If VEC is NULL, it
1111 is allocated as needed. Return the new value of the vector. */
1114 vec_append (char **vec, const char *str)
1116 int cnt; /* count of vector elements, including
1117 the one we're about to append */
1120 for (cnt = 0; vec[cnt]; cnt++)
1126 /* Reallocate the array to fit the new element and the NULL. */
1127 vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1128 /* Append a copy of STR to the vector. */
1129 vec[cnt - 1] = xstrdup (str);
1134 /* Sometimes it's useful to create "sets" of strings, i.e. special
1135 hash tables where you want to store strings as keys and merely
1136 query for their existence. Here is a set of utility routines that
1137 makes that transparent. */
1140 string_set_add (struct hash_table *ht, const char *s)
1142 /* First check whether the set element already exists. If it does,
1143 do nothing so that we don't have to free() the old element and
1144 then strdup() a new one. */
1145 if (hash_table_contains (ht, s))
1148 /* We use "1" as value. It provides us a useful and clear arbitrary
1149 value, and it consumes no memory -- the pointers to the same
1150 string "1" will be shared by all the key-value pairs in all `set'
1152 hash_table_put (ht, xstrdup (s), "1");
1155 /* Synonym for hash_table_contains... */
1158 string_set_contains (struct hash_table *ht, const char *s)
1160 return hash_table_contains (ht, s);
1163 /* Convert the specified string set to array. ARRAY should be large
1164 enough to hold hash_table_count(ht) char pointers. */
1166 void string_set_to_array (struct hash_table *ht, char **array)
1168 hash_table_iterator iter;
1169 for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1170 *array++ = iter.key;
1173 /* Free the string set. This frees both the storage allocated for
1174 keys and the actual hash table. (hash_table_destroy would only
1175 destroy the hash table.) */
1178 string_set_free (struct hash_table *ht)
1180 hash_table_iterator iter;
1181 for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1183 hash_table_destroy (ht);
1186 /* Utility function: simply call xfree() on all keys and values of HT. */
1189 free_keys_and_values (struct hash_table *ht)
1191 hash_table_iterator iter;
1192 for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1199 /* Get digit grouping data for thousand separors by calling
1200 localeconv(). The data includes separator string and grouping info
1201 and is cached after the first call to the function.
1203 In locales that don't set a thousand separator (such as the "C"
1204 locale), this forces it to be ",". We are now only showing
1205 thousand separators in one place, so this shouldn't be a problem in
1209 get_grouping_data (const char **sep, const char **grouping)
1211 static const char *cached_sep;
1212 static const char *cached_grouping;
1213 static bool initialized;
1216 /* Get the grouping info from the locale. */
1217 struct lconv *lconv = localeconv ();
1218 cached_sep = lconv->thousands_sep;
1219 cached_grouping = lconv->grouping;
1222 /* Many locales (such as "C" or "hr_HR") don't specify
1223 grouping, which we still want to use it for legibility.
1224 In those locales set the sep char to ',', unless that
1225 character is used for decimal point, in which case set it
1227 if (*lconv->decimal_point != ',')
1231 cached_grouping = "\x03";
1236 *grouping = cached_grouping;
1239 /* Return a printed representation of N with thousand separators.
1240 This should respect locale settings, with the exception of the "C"
1241 locale which mandates no separator, but we use one anyway.
1243 Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1244 the separators because it's too non-portable, and it's hard to test
1245 for this feature at configure time. Besides, it wouldn't display
1246 separators in the "C" locale, still used by many Unix users. */
1249 with_thousand_seps (wgint n)
1251 static char outbuf[48];
1252 char *p = outbuf + sizeof outbuf;
1254 /* Info received from locale */
1255 const char *grouping, *sep;
1258 /* State information */
1259 int i = 0, groupsize;
1260 const char *atgroup;
1262 bool negative = n < 0;
1264 /* Initialize grouping data. */
1265 get_grouping_data (&sep, &grouping);
1266 seplen = strlen (sep);
1268 groupsize = *atgroup++;
1270 /* This would overflow on WGINT_MIN, but printing negative numbers
1271 is not an important goal of this fuinction. */
1275 /* Write the number into the buffer, backwards, inserting the
1276 separators as necessary. */
1280 *--p = n % 10 + '0';
1284 /* Prepend SEP to every groupsize'd digit and get new groupsize. */
1285 if (++i == groupsize)
1290 memcpy (p -= seplen, sep, seplen);
1293 groupsize = *atgroup++;
1302 /* N, a byte quantity, is converted to a human-readable abberviated
1303 form a la sizes printed by `ls -lh'. The result is written to a
1304 static buffer, a pointer to which is returned.
1306 Unlike `with_thousand_seps', this approximates to the nearest unit.
1307 Quoting GNU libit: "Most people visually process strings of 3-4
1308 digits effectively, but longer strings of digits are more prone to
1309 misinterpretation. Hence, converting to an abbreviated form
1310 usually improves readability."
1312 This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1313 original computer-related meaning of "powers of 1024". We don't
1314 use the "*bibyte" names invented in 1998, and seldom used in
1315 practice. Wikipedia's entry on "binary prefix" discusses this in
1319 human_readable (HR_NUMTYPE n)
1321 /* These suffixes are compatible with those of GNU `ls -lh'. */
1322 static char powers[] =
1324 'K', /* kilobyte, 2^10 bytes */
1325 'M', /* megabyte, 2^20 bytes */
1326 'G', /* gigabyte, 2^30 bytes */
1327 'T', /* terabyte, 2^40 bytes */
1328 'P', /* petabyte, 2^50 bytes */
1329 'E', /* exabyte, 2^60 bytes */
1334 /* If the quantity is smaller than 1K, just print it. */
1337 snprintf (buf, sizeof (buf), "%d", (int) n);
1341 /* Loop over powers, dividing N with 1024 in each iteration. This
1342 works unchanged for all sizes of wgint, while still avoiding
1343 non-portable `long double' arithmetic. */
1344 for (i = 0; i < countof (powers); i++)
1346 /* At each iteration N is greater than the *subsequent* power.
1347 That way N/1024.0 produces a decimal number in the units of
1349 if ((n / 1024) < 1024 || i == countof (powers) - 1)
1351 double val = n / 1024.0;
1352 /* Print values smaller than 10 with one decimal digits, and
1353 others without any decimals. */
1354 snprintf (buf, sizeof (buf), "%.*f%c",
1355 val < 10 ? 1 : 0, val, powers[i]);
1360 return NULL; /* unreached */
1363 /* Count the digits in the provided number. Used to allocate space
1364 when printing numbers. */
1367 numdigit (wgint number)
1371 ++cnt; /* accomodate '-' */
1372 while ((number /= 10) != 0)
1377 #define PR(mask) *p++ = n / (mask) + '0'
1379 /* DIGITS_<D> is used to print a D-digit number and should be called
1380 with mask==10^(D-1). It prints n/mask (the first digit), reducing
1381 n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1382 Recursively this continues until DIGITS_1 is invoked. */
1384 #define DIGITS_1(mask) PR (mask)
1385 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1386 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1387 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1388 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1389 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1390 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1391 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1392 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1393 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1395 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1397 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1398 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1399 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1400 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1401 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1402 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1403 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1404 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1405 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1407 /* Shorthand for casting to wgint. */
1410 /* Print NUMBER to BUFFER in base 10. This is equivalent to
1411 `sprintf(buffer, "%lld", (long long) number)', only typically much
1412 faster and portable to machines without long long.
1414 The speedup may make a difference in programs that frequently
1415 convert numbers to strings. Some implementations of sprintf,
1416 particularly the one in some versions of GNU libc, have been known
1417 to be quite slow when converting integers to strings.
1419 Return the pointer to the location where the terminating zero was
1420 printed. (Equivalent to calling buffer+strlen(buffer) after the
1423 BUFFER should be large enough to accept as many bytes as you expect
1424 the number to take up. On machines with 64-bit wgints the maximum
1425 needed size is 24 bytes. That includes the digits needed for the
1426 largest 64-bit number, the `-' sign in case it's negative, and the
1427 terminating '\0'. */
1430 number_to_string (char *buffer, wgint number)
1435 int last_digit_char = 0;
1437 #if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1438 /* We are running in a very strange environment. Leave the correct
1439 printing to sprintf. */
1440 p += sprintf (buf, "%j", (intmax_t) (n));
1441 #else /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1447 /* n = -n would overflow because -n would evaluate to a
1448 wgint value larger than WGINT_MAX. Need to make n
1449 smaller and handle the last digit separately. */
1450 int last_digit = n % 10;
1451 /* The sign of n%10 is implementation-defined. */
1453 last_digit_char = '0' - last_digit;
1455 last_digit_char = '0' + last_digit;
1456 /* After n is made smaller, -n will not overflow. */
1464 /* Use the DIGITS_ macro appropriate for N's number of digits. That
1465 way printing any N is fully open-coded without a loop or jump.
1466 (Also see description of DIGITS_*.) */
1468 if (n < 10) DIGITS_1 (1);
1469 else if (n < 100) DIGITS_2 (10);
1470 else if (n < 1000) DIGITS_3 (100);
1471 else if (n < 10000) DIGITS_4 (1000);
1472 else if (n < 100000) DIGITS_5 (10000);
1473 else if (n < 1000000) DIGITS_6 (100000);
1474 else if (n < 10000000) DIGITS_7 (1000000);
1475 else if (n < 100000000) DIGITS_8 (10000000);
1476 else if (n < 1000000000) DIGITS_9 (100000000);
1477 #if SIZEOF_WGINT == 4
1478 /* wgint is 32 bits wide: no number has more than 10 digits. */
1479 else DIGITS_10 (1000000000);
1481 /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1482 Constants are constructed by compile-time multiplication to avoid
1483 dealing with different notations for 64-bit constants
1484 (nL/nLL/nI64, depending on the compiler and architecture). */
1485 else if (n < 10*(W)1000000000) DIGITS_10 (1000000000);
1486 else if (n < 100*(W)1000000000) DIGITS_11 (10*(W)1000000000);
1487 else if (n < 1000*(W)1000000000) DIGITS_12 (100*(W)1000000000);
1488 else if (n < 10000*(W)1000000000) DIGITS_13 (1000*(W)1000000000);
1489 else if (n < 100000*(W)1000000000) DIGITS_14 (10000*(W)1000000000);
1490 else if (n < 1000000*(W)1000000000) DIGITS_15 (100000*(W)1000000000);
1491 else if (n < 10000000*(W)1000000000) DIGITS_16 (1000000*(W)1000000000);
1492 else if (n < 100000000*(W)1000000000) DIGITS_17 (10000000*(W)1000000000);
1493 else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1494 else DIGITS_19 (1000000000*(W)1000000000);
1497 if (last_digit_char)
1498 *p++ = last_digit_char;
1501 #endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1508 #undef SPRINTF_WGINT
1531 /* Print NUMBER to a statically allocated string and return a pointer
1532 to the printed representation.
1534 This function is intended to be used in conjunction with printf.
1535 It is hard to portably print wgint values:
1536 a) you cannot use printf("%ld", number) because wgint can be long
1537 long on 32-bit machines with LFS.
1538 b) you cannot use printf("%lld", number) because NUMBER could be
1539 long on 32-bit machines without LFS, or on 64-bit machines,
1540 which do not require LFS. Also, Windows doesn't support %lld.
1541 c) you cannot use printf("%j", (int_max_t) number) because not all
1542 versions of printf support "%j", the most notable being the one
1544 d) you cannot #define WGINT_FMT to the appropriate format and use
1545 printf(WGINT_FMT, number) because that would break translations
1546 for user-visible messages, such as printf("Downloaded: %d
1549 What you should use instead is printf("%s", number_to_static_string
1552 CAVEAT: since the function returns pointers to static data, you
1553 must be careful to copy its result before calling it again.
1554 However, to make it more useful with printf, the function maintains
1555 an internal ring of static buffers to return. That way things like
1556 printf("%s %s", number_to_static_string (num1),
1557 number_to_static_string (num2)) work as expected. Three buffers
1558 are currently used, which means that "%s %s %s" will work, but "%s
1559 %s %s %s" won't. If you need to print more than three wgints,
1560 bump the RING_SIZE (or rethink your message.) */
1563 number_to_static_string (wgint number)
1565 static char ring[RING_SIZE][24];
1567 char *buf = ring[ringpos];
1568 number_to_string (buf, number);
1569 ringpos = (ringpos + 1) % RING_SIZE;
1573 /* Determine the width of the terminal we're running on. If that's
1574 not possible, return 0. */
1577 determine_screen_width (void)
1579 /* If there's a way to get the terminal size using POSIX
1580 tcgetattr(), somebody please tell me. */
1585 if (opt.lfilename != NULL)
1588 fd = fileno (stderr);
1589 if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1590 return 0; /* most likely ENOTTY */
1593 #elif defined(WINDOWS)
1594 CONSOLE_SCREEN_BUFFER_INFO csbi;
1595 if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1597 return csbi.dwSize.X;
1598 #else /* neither TIOCGWINSZ nor WINDOWS */
1600 #endif /* neither TIOCGWINSZ nor WINDOWS */
1603 /* Whether the rnd system (either rand or [dl]rand48) has been
1605 static int rnd_seeded;
1607 /* Return a random number between 0 and MAX-1, inclusive.
1609 If the system does not support lrand48 and MAX is greater than the
1610 value of RAND_MAX+1 on the system, the returned value will be in
1611 the range [0, RAND_MAX]. This may be fixed in a future release.
1612 The random number generator is seeded automatically the first time
1615 This uses lrand48 where available, rand elsewhere. DO NOT use it
1616 for cryptography. It is only meant to be used in situations where
1617 quality of the random numbers returned doesn't really matter. */
1620 random_number (int max)
1625 srand48 ((long) time (NULL) ^ (long) getpid ());
1628 return lrand48 () % max;
1629 #else /* not HAVE_DRAND48 */
1635 srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1640 /* Like rand() % max, but uses the high-order bits for better
1641 randomness on architectures where rand() is implemented using a
1642 simple congruential generator. */
1644 bounded = (double) max * rnd / (RAND_MAX + 1.0);
1645 return (int) bounded;
1647 #endif /* not HAVE_DRAND48 */
1650 /* Return a random uniformly distributed floating point number in the
1651 [0, 1) range. Uses drand48 where available, and a really lame
1652 kludge elsewhere. */
1660 srand48 ((long) time (NULL) ^ (long) getpid ());
1664 #else /* not HAVE_DRAND48 */
1665 return ( random_number (10000) / 10000.0
1666 + random_number (10000) / (10000.0 * 10000.0)
1667 + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1668 + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1669 #endif /* not HAVE_DRAND48 */
1672 /* Implementation of run_with_timeout, a generic timeout-forcing
1673 routine for systems with Unix-like signal handling. */
1675 #ifdef USE_SIGNAL_TIMEOUT
1676 # ifdef HAVE_SIGSETJMP
1677 # define SETJMP(env) sigsetjmp (env, 1)
1679 static sigjmp_buf run_with_timeout_env;
1682 abort_run_with_timeout (int sig)
1684 assert (sig == SIGALRM);
1685 siglongjmp (run_with_timeout_env, -1);
1687 # else /* not HAVE_SIGSETJMP */
1688 # define SETJMP(env) setjmp (env)
1690 static jmp_buf run_with_timeout_env;
1693 abort_run_with_timeout (int sig)
1695 assert (sig == SIGALRM);
1696 /* We don't have siglongjmp to preserve the set of blocked signals;
1697 if we longjumped out of the handler at this point, SIGALRM would
1698 remain blocked. We must unblock it manually. */
1699 int mask = siggetmask ();
1700 mask &= ~sigmask (SIGALRM);
1703 /* Now it's safe to longjump. */
1704 longjmp (run_with_timeout_env, -1);
1706 # endif /* not HAVE_SIGSETJMP */
1708 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds. This uses
1709 setitimer where available, alarm otherwise.
1711 TIMEOUT should be non-zero. If the timeout value is so small that
1712 it would be rounded to zero, it is rounded to the least legal value
1713 instead (1us for setitimer, 1s for alarm). That ensures that
1714 SIGALRM will be delivered in all cases. */
1717 alarm_set (double timeout)
1720 /* Use the modern itimer interface. */
1721 struct itimerval itv;
1723 itv.it_value.tv_sec = (long) timeout;
1724 itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1725 if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1726 /* Ensure that we wait for at least the minimum interval.
1727 Specifying zero would mean "wait forever". */
1728 itv.it_value.tv_usec = 1;
1729 setitimer (ITIMER_REAL, &itv, NULL);
1730 #else /* not ITIMER_REAL */
1731 /* Use the old alarm() interface. */
1732 int secs = (int) timeout;
1734 /* Round TIMEOUTs smaller than 1 to 1, not to zero. This is
1735 because alarm(0) means "never deliver the alarm", i.e. "wait
1736 forever", which is not what someone who specifies a 0.5s
1737 timeout would expect. */
1740 #endif /* not ITIMER_REAL */
1743 /* Cancel the alarm set with alarm_set. */
1749 struct itimerval disable;
1751 setitimer (ITIMER_REAL, &disable, NULL);
1752 #else /* not ITIMER_REAL */
1754 #endif /* not ITIMER_REAL */
1757 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1758 seconds. Returns true if the function was interrupted with a
1759 timeout, false otherwise.
1761 This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1762 using setitimer() or alarm(). The timeout is enforced by
1763 longjumping out of the SIGALRM handler. This has several
1764 advantages compared to the traditional approach of relying on
1765 signals causing system calls to exit with EINTR:
1767 * The callback function is *forcibly* interrupted after the
1768 timeout expires, (almost) regardless of what it was doing and
1769 whether it was in a syscall. For example, a calculation that
1770 takes a long time is interrupted as reliably as an IO
1773 * It works with both SYSV and BSD signals because it doesn't
1774 depend on the default setting of SA_RESTART.
1776 * It doesn't require special handler setup beyond a simple call
1777 to signal(). (It does use sigsetjmp/siglongjmp, but they're
1780 The only downside is that, if FUN allocates internal resources that
1781 are normally freed prior to exit from the functions, they will be
1782 lost in case of timeout. */
1785 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1795 signal (SIGALRM, abort_run_with_timeout);
1796 if (SETJMP (run_with_timeout_env) != 0)
1798 /* Longjumped out of FUN with a timeout. */
1799 signal (SIGALRM, SIG_DFL);
1802 alarm_set (timeout);
1805 /* Preserve errno in case alarm() or signal() modifies it. */
1806 saved_errno = errno;
1808 signal (SIGALRM, SIG_DFL);
1809 errno = saved_errno;
1814 #else /* not USE_SIGNAL_TIMEOUT */
1817 /* A stub version of run_with_timeout that just calls FUN(ARG). Don't
1818 define it under Windows, because Windows has its own version of
1819 run_with_timeout that uses threads. */
1822 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1827 #endif /* not WINDOWS */
1828 #endif /* not USE_SIGNAL_TIMEOUT */
1832 /* Sleep the specified amount of seconds. On machines without
1833 nanosleep(), this may sleep shorter if interrupted by signals. */
1836 xsleep (double seconds)
1838 #ifdef HAVE_NANOSLEEP
1839 /* nanosleep is the preferred interface because it offers high
1840 accuracy and, more importantly, because it allows us to reliably
1841 restart receiving a signal such as SIGWINCH. (There was an
1842 actual Debian bug report about --limit-rate malfunctioning while
1843 the terminal was being resized.) */
1844 struct timespec sleep, remaining;
1845 sleep.tv_sec = (long) seconds;
1846 sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
1847 while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1848 /* If nanosleep has been interrupted by a signal, adjust the
1849 sleeping period and return to sleep. */
1851 #elif defined(HAVE_USLEEP)
1852 /* If usleep is available, use it in preference to select. */
1855 /* On some systems, usleep cannot handle values larger than
1856 1,000,000. If the period is larger than that, use sleep
1857 first, then add usleep for subsecond accuracy. */
1859 seconds -= (long) seconds;
1861 usleep (seconds * 1000000);
1862 #else /* fall back select */
1863 /* Note that, although Windows supports select, it can't be used to
1864 implement sleeping because Winsock's select doesn't implement
1865 timeout when it is passed NULL pointers for all fd sets. (But it
1866 does under Cygwin, which implements Unix-compatible select.) */
1867 struct timeval sleep;
1868 sleep.tv_sec = (long) seconds;
1869 sleep.tv_usec = 1000000 * (seconds - (long) seconds);
1870 select (0, NULL, NULL, NULL, &sleep);
1871 /* If select returns -1 and errno is EINTR, it means we were
1872 interrupted by a signal. But without knowing how long we've
1873 actually slept, we can't return to sleep. Using gettimeofday to
1874 track sleeps is slow and unreliable due to clock skew. */
1878 #endif /* not WINDOWS */
1880 /* Encode the octets in DATA of length LENGTH to base64 format,
1881 storing the result to DEST. The output will be zero-terminated,
1882 and must point to a writable buffer of at least
1883 1+BASE64_LENGTH(length) bytes. The function returns the length of
1884 the resulting base64 data, not counting the terminating zero.
1886 This implementation does not emit newlines after 76 characters of
1890 base64_encode (const void *data, int length, char *dest)
1892 /* Conversion table. */
1893 static const char tbl[64] = {
1894 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
1895 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
1896 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
1897 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
1899 /* Access bytes in DATA as unsigned char, otherwise the shifts below
1900 don't work for data with MSB set. */
1901 const unsigned char *s = data;
1902 /* Theoretical ANSI violation when length < 3. */
1903 const unsigned char *end = (const unsigned char *) data + length - 2;
1906 /* Transform the 3x8 bits to 4x6 bits, as required by base64. */
1907 for (; s < end; s += 3)
1909 *p++ = tbl[s[0] >> 2];
1910 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1911 *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
1912 *p++ = tbl[s[2] & 0x3f];
1915 /* Pad the result if necessary... */
1919 *p++ = tbl[s[0] >> 2];
1920 *p++ = tbl[(s[0] & 3) << 4];
1925 *p++ = tbl[s[0] >> 2];
1926 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1927 *p++ = tbl[((s[1] & 0xf) << 2)];
1931 /* ...and zero-terminate it. */
1937 /* Store in C the next non-whitespace character from the string, or \0
1938 when end of string is reached. */
1939 #define NEXT_CHAR(c, p) do { \
1940 c = (unsigned char) *p++; \
1941 } while (ISSPACE (c))
1943 #define IS_ASCII(c) (((c) & 0x80) == 0)
1945 /* Decode data from BASE64 (a null-terminated string) into memory
1946 pointed to by DEST. DEST is assumed to be large enough to
1947 accomodate the decoded data, which is guaranteed to be no more than
1950 Since DEST is assumed to contain binary data, it is not
1951 NUL-terminated. The function returns the length of the data
1952 written to TO. -1 is returned in case of error caused by malformed
1955 This function originates from Free Recode. */
1958 base64_decode (const char *base64, void *dest)
1960 /* Table of base64 values for first 128 characters. Note that this
1961 assumes ASCII (but so does Wget in other places). */
1962 static const signed char base64_char_to_value[128] =
1964 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */
1965 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */
1966 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20- 29 */
1967 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 30- 39 */
1968 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, /* 40- 49 */
1969 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, /* 50- 59 */
1970 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, /* 60- 69 */
1971 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 70- 79 */
1972 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 80- 89 */
1973 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, /* 90- 99 */
1974 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, /* 100-109 */
1975 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, /* 110-119 */
1976 49, 50, 51, -1, -1, -1, -1, -1 /* 120-127 */
1978 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
1979 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
1981 const char *p = base64;
1987 unsigned long value;
1989 /* Process first byte of a quadruplet. */
1993 if (c == '=' || !IS_BASE64 (c))
1994 return -1; /* illegal char while decoding base64 */
1995 value = BASE64_CHAR_TO_VALUE (c) << 18;
1997 /* Process second byte of a quadruplet. */
2000 return -1; /* premature EOF while decoding base64 */
2001 if (c == '=' || !IS_BASE64 (c))
2002 return -1; /* illegal char while decoding base64 */
2003 value |= BASE64_CHAR_TO_VALUE (c) << 12;
2006 /* Process third byte of a quadruplet. */
2009 return -1; /* premature EOF while decoding base64 */
2011 return -1; /* illegal char while decoding base64 */
2017 return -1; /* premature EOF while decoding base64 */
2019 return -1; /* padding `=' expected but not found */
2023 value |= BASE64_CHAR_TO_VALUE (c) << 6;
2024 *q++ = 0xff & value >> 8;
2026 /* Process fourth byte of a quadruplet. */
2029 return -1; /* premature EOF while decoding base64 */
2033 return -1; /* illegal char while decoding base64 */
2035 value |= BASE64_CHAR_TO_VALUE (c);
2036 *q++ = 0xff & value;
2039 #undef BASE64_CHAR_TO_VALUE
2041 return q - (char *) dest;
2047 /* Simple merge sort for use by stable_sort. Implementation courtesy
2048 Zeljko Vrba with additional debugging by Nenad Barbutov. */
2051 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2052 int (*cmpfun) (const void *, const void *))
2054 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2058 size_t mid = (to + from) / 2;
2059 mergesort_internal (base, temp, size, from, mid, cmpfun);
2060 mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2063 for (k = from; (i <= mid) && (j <= to); k++)
2064 if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2065 memcpy (ELT (temp, k), ELT (base, i++), size);
2067 memcpy (ELT (temp, k), ELT (base, j++), size);
2069 memcpy (ELT (temp, k++), ELT (base, i++), size);
2071 memcpy (ELT (temp, k++), ELT (base, j++), size);
2072 for (k = from; k <= to; k++)
2073 memcpy (ELT (base, k), ELT (temp, k), size);
2078 /* Stable sort with interface exactly like standard library's qsort.
2079 Uses mergesort internally, allocating temporary storage with
2083 stable_sort (void *base, size_t nmemb, size_t size,
2084 int (*cmpfun) (const void *, const void *))
2088 void *temp = alloca (nmemb * size * sizeof (void *));
2089 mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2093 /* Print a decimal number. If it is equal to or larger than ten, the
2094 number is rounded. Otherwise it is printed with one significant
2095 digit without trailing zeros and with no more than three fractional
2096 digits total. For example, 0.1 is printed as "0.1", 0.035 is
2097 printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2099 This is useful for displaying durations because it provides
2100 order-of-magnitude information without unnecessary clutter --
2101 long-running downloads are shown without the fractional part, and
2102 short ones still retain one significant digit. */
2105 print_decimal (double number)
2107 static char buf[32];
2108 double n = number >= 0 ? number : -number;
2111 /* Cut off at 9.95 because the below %.1f would round 9.96 to
2112 "10.0" instead of "10". OTOH 9.94 will print as "9.9". */
2113 snprintf (buf, sizeof buf, "%.0f", number);
2115 snprintf (buf, sizeof buf, "%.1f", number);
2116 else if (n >= 0.001)
2117 snprintf (buf, sizeof buf, "%.1g", number);
2118 else if (n >= 0.0005)
2119 /* round [0.0005, 0.001) to 0.001 */
2120 snprintf (buf, sizeof buf, "%.3f", number);
2122 /* print numbers close to 0 as 0, not 0.000 */
2139 { "/somedir", "/somedir", true },
2140 { "/somedir", "/somedir/d2", true },
2141 { "/somedir/d1", "/somedir", false },
2144 for (i = 0; i < countof(test_array); ++i)
2146 bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2148 mu_assert ("test_subdir_p: wrong result",
2149 res == test_array[i].result);
2156 test_dir_matches_p()
2164 { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2165 { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2166 { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2167 { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2168 { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2171 for (i = 0; i < countof(test_array); ++i)
2173 bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2175 mu_assert ("test_dir_matches_p: wrong result",
2176 res == test_array[i].result);
2182 #endif /* TESTING */