1 /* Various functions of utilitarian nature.
2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
27 #else /* not HAVE_STRING_H */
29 #endif /* not HAVE_STRING_H */
30 #include <sys/types.h>
35 # include <sys/mman.h>
44 #ifdef HAVE_SYS_UTIME_H
45 # include <sys/utime.h>
49 # include <libc.h> /* for access() */
53 #ifdef HAVE_SYS_IOCTL_H
54 # include <sys/ioctl.h>
66 /* This section implements several wrappers around the basic
67 allocation routines. This is done for two reasons: first, so that
68 the callers of these functions need not consistently check for
69 errors. If there is not enough virtual memory for running Wget,
70 something is seriously wrong, and Wget exits with an appropriate
73 The second reason why these are useful is that, if DEBUG_MALLOC is
74 defined, they also provide a handy (if crude) malloc debugging
75 interface that checks memory leaks. */
77 /* Croak the fatal memory error and bail out with non-zero exit
80 memfatal (const char *what)
82 /* HACK: expose save_log_p from log.c, so we can turn it off in
83 order to prevent saving the log. Saving the log is dangerous
84 because logprintf() and logputs() can call malloc(), so this
85 could infloop. When logging is turned off, infloop can no longer
88 #### This is no longer really necessary because the new routines
89 in log.c cons only if the line exceeds eighty characters. But
90 this can come at the end of a line, so it's OK to be careful.
92 On a more serious note, it would be good to have a
93 log_forced_shutdown() routine that exposes this cleanly. */
94 extern int save_log_p;
97 logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
101 /* These functions end with _real because they need to be
102 distinguished from the debugging functions, and from the macros.
105 If memory debugging is not turned on, wget.h defines these:
107 #define xmalloc xmalloc_real
108 #define xrealloc xrealloc_real
109 #define xstrdup xstrdup_real
112 In case of memory debugging, the definitions are a bit more
113 complex, because we want to provide more information, *and* we want
114 to call the debugging code. (The former is the reason why xmalloc
115 and friends need to be macros in the first place.) Then it looks
118 #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
119 #define xfree(a) xfree_debug (a, __FILE__, __LINE__)
120 #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
121 #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
123 Each of the *_debug function does its magic and calls the real one. */
126 # define STATIC_IF_DEBUG static
128 # define STATIC_IF_DEBUG
131 STATIC_IF_DEBUG void *
132 xmalloc_real (size_t size)
134 void *ptr = malloc (size);
140 STATIC_IF_DEBUG void *
141 xrealloc_real (void *ptr, size_t newsize)
145 /* Not all Un*xes have the feature of realloc() that calling it with
146 a NULL-pointer is the same as malloc(), but it is easy to
149 newptr = realloc (ptr, newsize);
151 newptr = malloc (newsize);
153 memfatal ("realloc");
157 STATIC_IF_DEBUG char *
158 xstrdup_real (const char *s)
164 copy = malloc (l + 1);
167 memcpy (copy, s, l + 1);
168 #else /* HAVE_STRDUP */
172 #endif /* HAVE_STRDUP */
179 /* Crude home-grown routines for debugging some malloc-related
182 * Counting the number of malloc and free invocations, and reporting
183 the "balance", i.e. how many times more malloc was called than it
184 was the case with free.
186 * Making malloc store its entry into a simple array and free remove
187 stuff from that array. At the end, print the pointers which have
188 not been freed, along with the source file and the line number.
189 This also has the side-effect of detecting freeing memory that
192 Note that this kind of memory leak checking strongly depends on
193 every malloc() being followed by a free(), even if the program is
194 about to finish. Wget is careful to free the data structure it
195 allocated in init.c. */
197 static int malloc_count, free_count;
203 } malloc_debug[100000];
205 /* Both register_ptr and unregister_ptr take O(n) operations to run,
206 which can be a real problem. It would be nice to use a hash table
207 for malloc_debug, but the functions in hash.c are not suitable
208 because they can call malloc() themselves. Maybe it would work if
209 the hash table were preallocated to a huge size, and if we set the
210 rehash threshold to 1.0. */
212 /* Register PTR in malloc_debug. Abort if this is not possible
213 (presumably due to the number of current allocations exceeding the
214 size of malloc_debug.) */
217 register_ptr (void *ptr, const char *file, int line)
220 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
221 if (malloc_debug[i].ptr == NULL)
223 malloc_debug[i].ptr = ptr;
224 malloc_debug[i].file = file;
225 malloc_debug[i].line = line;
231 /* Unregister PTR from malloc_debug. Abort if PTR is not present in
232 malloc_debug. (This catches calling free() with a bogus pointer.) */
235 unregister_ptr (void *ptr)
238 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
239 if (malloc_debug[i].ptr == ptr)
241 malloc_debug[i].ptr = NULL;
247 /* Print the malloc debug stats that can be gathered from the above
248 information. Currently this is the count of mallocs, frees, the
249 difference between the two, and the dump of the contents of
250 malloc_debug. The last part are the memory leaks. */
253 print_malloc_debug_stats (void)
256 printf ("\nMalloc: %d\nFree: %d\nBalance: %d\n\n",
257 malloc_count, free_count, malloc_count - free_count);
258 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
259 if (malloc_debug[i].ptr != NULL)
260 printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
261 malloc_debug[i].file, malloc_debug[i].line);
265 xmalloc_debug (size_t size, const char *source_file, int source_line)
267 void *ptr = xmalloc_real (size);
269 register_ptr (ptr, source_file, source_line);
274 xfree_debug (void *ptr, const char *source_file, int source_line)
276 assert (ptr != NULL);
278 unregister_ptr (ptr);
283 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
285 void *newptr = xrealloc_real (ptr, newsize);
289 register_ptr (newptr, source_file, source_line);
291 else if (newptr != ptr)
293 unregister_ptr (ptr);
294 register_ptr (newptr, source_file, source_line);
300 xstrdup_debug (const char *s, const char *source_file, int source_line)
302 char *copy = xstrdup_real (s);
304 register_ptr (copy, source_file, source_line);
308 #endif /* DEBUG_MALLOC */
310 /* Utility function: like xstrdup(), but also lowercases S. */
313 xstrdup_lower (const char *s)
315 char *copy = xstrdup (s);
322 /* Return a count of how many times CHR occurs in STRING. */
325 count_char (const char *string, char chr)
329 for (p = string; *p; p++)
335 /* Copy the string formed by two pointers (one on the beginning, other
336 on the char after the last char) to a new, malloc-ed location.
339 strdupdelim (const char *beg, const char *end)
341 char *res = (char *)xmalloc (end - beg + 1);
342 memcpy (res, beg, end - beg);
343 res[end - beg] = '\0';
347 /* Parse a string containing comma-separated elements, and return a
348 vector of char pointers with the elements. Spaces following the
349 commas are ignored. */
351 sepstring (const char *s)
365 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
366 res[i] = strdupdelim (p, s);
369 /* Skip the blanks following the ','. */
377 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
378 res[i] = strdupdelim (p, s);
383 /* Return pointer to a static char[] buffer in which zero-terminated
384 string-representation of TM (in form hh:mm:ss) is printed.
386 If TM is non-NULL, the current time-in-seconds will be stored
389 (#### This is misleading: one would expect TM would be used instead
390 of the current time in that case. This design was probably
391 influenced by the design time(2), and should be changed at some
392 points. No callers use non-NULL TM anyway.) */
395 time_str (time_t *tm)
397 static char output[15];
399 time_t secs = time (tm);
403 /* In case of error, return the empty string. Maybe we should
404 just abort if this happens? */
408 ptm = localtime (&secs);
409 sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
413 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
416 datetime_str (time_t *tm)
418 static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
420 time_t secs = time (tm);
424 /* In case of error, return the empty string. Maybe we should
425 just abort if this happens? */
429 ptm = localtime (&secs);
430 sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
431 ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
432 ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
436 /* The Windows versions of the following two functions are defined in
441 fork_to_background (void)
444 /* Whether we arrange our own version of opt.lfilename here. */
449 opt.lfilename = unique_name (DEFAULT_LOGFILE);
461 /* parent, no error */
462 printf (_("Continuing in background.\n"));
464 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
467 /* child: keep running */
469 #endif /* not WINDOWS */
476 char *r = xstrdup (orig);
482 /* Canonicalize PATH, and return a new path. The new path differs from PATH
484 Multple `/'s are collapsed to a single `/'.
485 Leading `./'s and trailing `/.'s are removed.
486 Trailing `/'s are removed.
487 Non-leading `../'s and trailing `..'s are handled by removing
488 portions of the path.
490 E.g. "a/b/c/./../d/.." will yield "a/b/". This function originates
491 from GNU Bash and has been mutilated to unrecognition for use in
495 Always use '/' as stub_char.
496 Don't check for local things using canon_stat.
497 Change the original string instead of strdup-ing.
498 React correctly when beginning with `./' and `../'.
499 Don't zip out trailing slashes.
500 Return a value indicating whether any modifications took place.
502 If you dare change this function, take a careful look at the test
503 cases below, and make sure that they pass. */
506 path_simplify (char *path)
508 register int i, start;
518 /* Preserve initial '/'. */
521 /* Nix out leading `.' or `..' with. */
522 if ((path[0] == '.' && path[1] == '\0')
523 || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
530 /* Walk along PATH looking for things to compact. */
537 while (path[i] && path[i] != '/')
542 /* If we didn't find any slashes, then there is nothing left to do. */
546 /* Handle multiple `/'s in a row. */
547 while (path[i] == '/')
550 if ((start + 1) != i)
552 strcpy (path + start + 1, path + i);
557 /* Check for `../', `./' or trailing `.' by itself. */
560 /* Handle trailing `.' by itself. */
569 if (path[i + 1] == '/')
571 strcpy (path + i, path + i + 1);
572 i = (start < 0) ? 0 : start;
577 /* Handle `../' or trailing `..' by itself. */
578 if (path[i + 1] == '.' &&
579 (path[i + 2] == '/' || !path[i + 2]))
581 while (--start > -1 && path[start] != '/');
582 strcpy (path + start + 1, path + i + 2 + (start == -1 && path[i + 2]));
583 i = (start < 0) ? 0 : start;
590 /* Addition: Remove all `./'-s and `../'-s preceding the string. */
594 if (path[i] == '.' && path[i + 1] == '/')
596 else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
603 strcpy (path, path + i - 0);
618 ps("foo/bar") -> "foo/bar"
619 ps("foo//bar") -> "foo/bar" (possibly a bug)
620 ps("foo/../bar") -> "bar"
621 ps("foo/bar/..") -> "foo/"
622 ps("foo/bar/../x") -> "foo/x"
623 ps("foo/bar/../x/") -> "foo/x/"
626 ps("a/b/../../c") -> "c"
627 ps("/a/b/../../c") -> "/c"
628 ps("./a/../b") -> "b"
629 ps("/./a/../b") -> "/b"
632 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
633 specified with TM. */
635 touch (const char *file, time_t tm)
637 #ifdef HAVE_STRUCT_UTIMBUF
638 struct utimbuf times;
639 times.actime = times.modtime = tm;
642 times[0] = times[1] = tm;
645 if (utime (file, ×) == -1)
646 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
649 /* Checks if FILE is a symbolic link, and removes it if it is. Does
650 nothing under MS-Windows. */
652 remove_link (const char *file)
657 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
659 DEBUGP (("Unlinking %s (symlink).\n", file));
662 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
663 file, strerror (errno));
668 /* Does FILENAME exist? This is quite a lousy implementation, since
669 it supplies no error codes -- only a yes-or-no answer. Thus it
670 will return that a file does not exist if, e.g., the directory is
671 unreadable. I don't mind it too much currently, though. The
672 proper way should, of course, be to have a third, error state,
673 other than true/false, but that would introduce uncalled-for
674 additional complexity to the callers. */
676 file_exists_p (const char *filename)
679 return access (filename, F_OK) >= 0;
682 return stat (filename, &buf) >= 0;
686 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
687 Returns 0 on error. */
689 file_non_directory_p (const char *path)
692 /* Use lstat() rather than stat() so that symbolic links pointing to
693 directories can be identified correctly. */
694 if (lstat (path, &buf) != 0)
696 return S_ISDIR (buf.st_mode) ? 0 : 1;
699 /* Return a unique filename, given a prefix and count */
701 unique_name_1 (const char *fileprefix, int count)
707 filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
708 sprintf (filename, "%s.%d", fileprefix, count);
711 filename = xstrdup (fileprefix);
713 if (!file_exists_p (filename))
722 /* Return a unique file name, based on PREFIX. */
724 unique_name (const char *prefix)
730 file = unique_name_1 (prefix, count++);
734 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
735 are missing, create them first. In case any mkdir() call fails,
736 return its error status. Returns 0 on successful completion.
738 The behaviour of this function should be identical to the behaviour
739 of `mkdir -p' on systems where mkdir supports the `-p' option. */
741 make_directory (const char *directory)
747 /* Make a copy of dir, to be able to write to it. Otherwise, the
748 function is unsafe if called with a read-only char *argument. */
749 STRDUP_ALLOCA (dir, directory);
751 /* If the first character of dir is '/', skip it (and thus enable
752 creation of absolute-pathname directories. */
753 for (i = (*dir == '/'); 1; ++i)
755 for (; dir[i] && dir[i] != '/'; i++)
760 /* Check whether the directory already exists. */
761 if (!file_exists_p (dir))
763 if (mkdir (dir, 0777) < 0)
774 /* Merge BASE with FILE. BASE can be a directory or a file name, FILE
775 should be a file name.
777 file_merge("/foo/bar", "baz") => "/foo/baz"
778 file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
779 file_merge("foo", "bar") => "bar"
781 In other words, it's a simpler and gentler version of uri_merge_1. */
784 file_merge (const char *base, const char *file)
787 const char *cut = (const char *)strrchr (base, '/');
790 return xstrdup (file);
792 result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
793 memcpy (result, base, cut - base);
794 result[cut - base] = '/';
795 strcpy (result + (cut - base) + 1, file);
800 static int in_acclist PARAMS ((const char *const *, const char *, int));
802 /* Determine whether a file is acceptable to be followed, according to
803 lists of patterns to accept/reject. */
805 acceptable (const char *s)
809 while (l && s[l] != '/')
816 return (in_acclist ((const char *const *)opt.accepts, s, 1)
817 && !in_acclist ((const char *const *)opt.rejects, s, 1));
819 return in_acclist ((const char *const *)opt.accepts, s, 1);
821 else if (opt.rejects)
822 return !in_acclist ((const char *const *)opt.rejects, s, 1);
826 /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
827 `/something', frontcmp() will return 1 only if S2 begins with
828 `/something'. Otherwise, 0 is returned. */
830 frontcmp (const char *s1, const char *s2)
832 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
836 /* Iterate through STRLIST, and return the first element that matches
837 S, through wildcards or front comparison (as appropriate). */
839 proclist (char **strlist, const char *s, enum accd flags)
843 for (x = strlist; *x; x++)
844 if (has_wildcards_p (*x))
846 if (fnmatch (*x, s, FNM_PATHNAME) == 0)
851 char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
858 /* Returns whether DIRECTORY is acceptable for download, wrt the
859 include/exclude lists.
861 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
862 and absolute paths may be freely intermixed. */
864 accdir (const char *directory, enum accd flags)
866 /* Remove starting '/'. */
867 if (flags & ALLABS && *directory == '/')
871 if (!proclist (opt.includes, directory, flags))
876 if (proclist (opt.excludes, directory, flags))
882 /* Match the end of STRING against PATTERN. For instance:
884 match_backwards ("abc", "bc") -> 1
885 match_backwards ("abc", "ab") -> 0
886 match_backwards ("abc", "abc") -> 1 */
888 match_tail (const char *string, const char *pattern)
892 for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
893 if (string[i] != pattern[j])
895 /* If the pattern was exhausted, the match was succesful. */
902 /* Checks whether string S matches each element of ACCEPTS. A list
903 element are matched either with fnmatch() or match_tail(),
904 according to whether the element contains wildcards or not.
906 If the BACKWARD is 0, don't do backward comparison -- just compare
909 in_acclist (const char *const *accepts, const char *s, int backward)
911 for (; *accepts; accepts++)
913 if (has_wildcards_p (*accepts))
915 /* fnmatch returns 0 if the pattern *does* match the
917 if (fnmatch (*accepts, s, 0) == 0)
924 if (match_tail (s, *accepts))
929 if (!strcmp (s, *accepts))
937 /* Return the location of STR's suffix (file extension). Examples:
938 suffix ("foo.bar") -> "bar"
939 suffix ("foo.bar.baz") -> "baz"
940 suffix ("/foo/bar") -> NULL
941 suffix ("/foo.bar/baz") -> NULL */
943 suffix (const char *str)
947 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
951 return (char *)str + i;
956 /* Read a line from FP. The function reallocs the storage as needed
957 to accomodate for any length of the line. Reallocs are done
958 exponentially, doubling the storage after each overflow to minimize
959 the number of calls to realloc() and fgets(). The newline
960 character at the end of line is retained.
962 After end-of-file is encountered without anything being read, NULL
963 is returned. NULL is also returned on error. To distinguish
964 between these two cases, use the stdio function ferror().
966 A future version of this function will be rewritten to use fread()
967 instead of fgets(), and to return the length of the line, which
968 will make the function usable on files with binary content. */
971 read_whole_line (FILE *fp)
975 char *line = (char *)xmalloc (bufsize);
977 while (fgets (line + length, bufsize - length, fp))
979 length += strlen (line + length);
981 /* Possible for example when reading from a binary file where
982 a line begins with \0. */
985 if (line[length - 1] == '\n')
988 /* fgets() guarantees to read the whole line, or to use up the
989 space we've given it. We can double the buffer
992 line = xrealloc (line, bufsize);
994 if (length == 0 || ferror (fp))
999 if (length + 1 < bufsize)
1000 /* Relieve the memory from our exponential greediness. We say
1001 `length + 1' because the terminating \0 is not included in
1002 LENGTH. We don't need to zero-terminate the string ourselves,
1003 though, because fgets() does that. */
1004 line = xrealloc (line, length + 1);
1008 /* Read FILE into memory. A pointer to `struct file_memory' are
1009 returned; use struct element `content' to access file contents, and
1010 the element `length' to know the file length. `content' is *not*
1011 zero-terminated, and you should *not* read or write beyond the [0,
1012 length) range of characters.
1014 After you are done with the file contents, call read_file_free to
1017 Depending on the operating system and the type of file that is
1018 being read, read_file() either mmap's the file into memory, or
1019 reads the file into the core using read().
1021 If file is named "-", fileno(stdin) is used for reading instead.
1022 If you want to read from a real file named "-", use "./-" instead. */
1024 struct file_memory *
1025 read_file (const char *file)
1028 struct file_memory *fm;
1030 int inhibit_close = 0;
1032 /* Some magic in the finest tradition of Perl and its kin: if FILE
1033 is "-", just use stdin. */
1036 fd = fileno (stdin);
1038 /* Note that we don't inhibit mmap() in this case. If stdin is
1039 redirected from a regular file, mmap() will still work. */
1042 fd = open (file, O_RDONLY);
1045 fm = xmalloc (sizeof (struct file_memory));
1050 if (fstat (fd, &buf) < 0)
1052 fm->length = buf.st_size;
1053 /* NOTE: As far as I know, the callers of this function never
1054 modify the file text. Relying on this would enable us to
1055 specify PROT_READ and MAP_SHARED for a marginal gain in
1056 efficiency, but at some cost to generality. */
1057 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1058 MAP_PRIVATE, fd, 0);
1059 if (fm->content == (char *)MAP_FAILED)
1069 /* The most common reason why mmap() fails is that FD does not point
1070 to a plain file. However, it's also possible that mmap() doesn't
1071 work for a particular type of file. Therefore, whenever mmap()
1072 fails, we just fall back to the regular method. */
1073 #endif /* HAVE_MMAP */
1076 size = 512; /* number of bytes fm->contents can
1077 hold at any given time. */
1078 fm->content = xmalloc (size);
1082 if (fm->length > size / 2)
1084 /* #### I'm not sure whether the whole exponential-growth
1085 thing makes sense with kernel read. On Linux at least,
1086 read() refuses to read more than 4K from a file at a
1087 single chunk anyway. But other Unixes might optimize it
1088 better, and it doesn't *hurt* anything, so I'm leaving
1091 /* Normally, we grow SIZE exponentially to make the number
1092 of calls to read() and realloc() logarithmic in relation
1093 to file size. However, read() can read an amount of data
1094 smaller than requested, and it would be unreasonably to
1095 double SIZE every time *something* was read. Therefore,
1096 we double SIZE only when the length exceeds half of the
1097 entire allocated size. */
1099 fm->content = xrealloc (fm->content, size);
1101 nread = read (fd, fm->content + fm->length, size - fm->length);
1103 /* Successful read. */
1104 fm->length += nread;
1114 if (size > fm->length && fm->length != 0)
1115 /* Due to exponential growth of fm->content, the allocated region
1116 might be much larger than what is actually needed. */
1117 fm->content = xrealloc (fm->content, fm->length);
1124 xfree (fm->content);
1129 /* Release the resources held by FM. Specifically, this calls
1130 munmap() or xfree() on fm->content, depending whether mmap or
1131 malloc/read were used to read in the file. It also frees the
1132 memory needed to hold the FM structure itself. */
1135 read_file_free (struct file_memory *fm)
1140 munmap (fm->content, fm->length);
1145 xfree (fm->content);
1150 /* Free the pointers in a NULL-terminated vector of pointers, then
1151 free the pointer itself. */
1153 free_vec (char **vec)
1164 /* Append vector V2 to vector V1. The function frees V2 and
1165 reallocates V1 (thus you may not use the contents of neither
1166 pointer after the call). If V1 is NULL, V2 is returned. */
1168 merge_vecs (char **v1, char **v2)
1178 /* To avoid j == 0 */
1183 for (i = 0; v1[i]; i++);
1185 for (j = 0; v2[j]; j++);
1186 /* Reallocate v1. */
1187 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1188 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1193 /* A set of simple-minded routines to store strings in a linked list.
1194 This used to also be used for searching, but now we have hash
1197 /* It's a shame that these simple things like linked lists and hash
1198 tables (see hash.c) need to be implemented over and over again. It
1199 would be nice to be able to use the routines from glib -- see
1200 www.gtk.org for details. However, that would make Wget depend on
1201 glib, and I want to avoid dependencies to external libraries for
1202 reasons of convenience and portability (I suspect Wget is more
1203 portable than anything ever written for Gnome). */
1205 /* Append an element to the list. If the list has a huge number of
1206 elements, this can get slow because it has to find the list's
1207 ending. If you think you have to call slist_append in a loop,
1208 think about calling slist_prepend() followed by slist_nreverse(). */
1211 slist_append (slist *l, const char *s)
1213 slist *newel = (slist *)xmalloc (sizeof (slist));
1216 newel->string = xstrdup (s);
1221 /* Find the last element. */
1228 /* Prepend S to the list. Unlike slist_append(), this is O(1). */
1231 slist_prepend (slist *l, const char *s)
1233 slist *newel = (slist *)xmalloc (sizeof (slist));
1234 newel->string = xstrdup (s);
1239 /* Destructively reverse L. */
1242 slist_nreverse (slist *l)
1247 slist *next = l->next;
1255 /* Is there a specific entry in the list? */
1257 slist_contains (slist *l, const char *s)
1259 for (; l; l = l->next)
1260 if (!strcmp (l->string, s))
1265 /* Free the whole slist. */
1267 slist_free (slist *l)
1278 /* Sometimes it's useful to create "sets" of strings, i.e. special
1279 hash tables where you want to store strings as keys and merely
1280 query for their existence. Here is a set of utility routines that
1281 makes that transparent. */
1284 string_set_add (struct hash_table *ht, const char *s)
1286 /* First check whether the set element already exists. If it does,
1287 do nothing so that we don't have to free() the old element and
1288 then strdup() a new one. */
1289 if (hash_table_contains (ht, s))
1292 /* We use "1" as value. It provides us a useful and clear arbitrary
1293 value, and it consumes no memory -- the pointers to the same
1294 string "1" will be shared by all the key-value pairs in all `set'
1296 hash_table_put (ht, xstrdup (s), "1");
1299 /* Synonym for hash_table_contains... */
1302 string_set_contains (struct hash_table *ht, const char *s)
1304 return hash_table_contains (ht, s);
1308 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1315 string_set_free (struct hash_table *ht)
1317 hash_table_map (ht, string_set_free_mapper, NULL);
1318 hash_table_destroy (ht);
1322 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1329 /* Another utility function: call free() on all keys and values of HT. */
1332 free_keys_and_values (struct hash_table *ht)
1334 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1338 /* Engine for legible and legible_very_long; this function works on
1342 legible_1 (const char *repr)
1344 static char outbuf[128];
1349 /* Reset the pointers. */
1352 /* If the number is negative, shift the pointers. */
1358 /* How many digits before the first separator? */
1359 mod = strlen (inptr) % 3;
1361 for (i = 0; i < mod; i++)
1362 *outptr++ = inptr[i];
1363 /* Now insert the rest of them, putting separator before every
1365 for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1367 if (i % 3 == 0 && i1 != 0)
1369 *outptr++ = inptr[i1];
1371 /* Zero-terminate the string. */
1376 /* Legible -- return a static pointer to the legibly printed long. */
1381 /* Print the number into the buffer. */
1382 long_to_string (inbuf, l);
1383 return legible_1 (inbuf);
1386 /* Write a string representation of NUMBER into the provided buffer.
1387 We cannot use sprintf() because we cannot be sure whether the
1388 platform supports printing of what we chose for VERY_LONG_TYPE.
1390 Example: Gcc supports `long long' under many platforms, but on many
1391 of those the native libc knows nothing of it and therefore cannot
1394 How long BUFFER needs to be depends on the platform and the content
1395 of NUMBER. For 64-bit VERY_LONG_TYPE (the most common case), 24
1396 bytes are sufficient. Using more might be a good idea.
1398 This function does not go through the hoops that long_to_string
1399 goes to because it doesn't aspire to be fast. (It's called perhaps
1400 once in a Wget run.) */
1403 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1408 /* Print the number backwards... */
1411 buffer[i++] = '0' + number % 10;
1416 /* ...and reverse the order of the digits. */
1417 for (j = 0; j < i / 2; j++)
1420 buffer[j] = buffer[i - 1 - j];
1421 buffer[i - 1 - j] = c;
1426 /* The same as legible(), but works on VERY_LONG_TYPE. See sysdep.h. */
1428 legible_very_long (VERY_LONG_TYPE l)
1431 /* Print the number into the buffer. */
1432 very_long_to_string (inbuf, l);
1433 return legible_1 (inbuf);
1436 /* Count the digits in a (long) integer. */
1446 while ((a /= 10) != 0)
1451 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1452 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1454 #define DIGITS_1(figure) ONE_DIGIT (figure)
1455 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1456 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1457 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1458 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1459 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1460 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1461 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1462 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1463 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1465 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1467 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1468 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1469 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1470 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1471 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1472 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1473 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1474 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1475 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1477 /* Print NUMBER to BUFFER in base 10. This is completely equivalent
1478 to `sprintf(buffer, "%ld", number)', only much faster.
1480 The speedup may make a difference in programs that frequently
1481 convert numbers to strings. Some implementations of sprintf,
1482 particularly the one in GNU libc, have been known to be extremely
1483 slow compared to this function.
1485 BUFFER should accept as many bytes as you expect the number to take
1486 up. On machines with 64-bit longs the maximum needed size is 24
1487 bytes. That includes the worst-case digits, the optional `-' sign,
1488 and the trailing \0. */
1491 long_to_string (char *buffer, long number)
1496 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1497 /* We are running in a strange or misconfigured environment. Let
1498 sprintf cope with it. */
1499 sprintf (buffer, "%ld", n);
1500 #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1508 if (n < 10) { DIGITS_1 (1); }
1509 else if (n < 100) { DIGITS_2 (10); }
1510 else if (n < 1000) { DIGITS_3 (100); }
1511 else if (n < 10000) { DIGITS_4 (1000); }
1512 else if (n < 100000) { DIGITS_5 (10000); }
1513 else if (n < 1000000) { DIGITS_6 (100000); }
1514 else if (n < 10000000) { DIGITS_7 (1000000); }
1515 else if (n < 100000000) { DIGITS_8 (10000000); }
1516 else if (n < 1000000000) { DIGITS_9 (100000000); }
1517 #if SIZEOF_LONG == 4
1518 /* ``if (1)'' serves only to preserve editor indentation. */
1519 else if (1) { DIGITS_10 (1000000000); }
1520 #else /* SIZEOF_LONG != 4 */
1521 else if (n < 10000000000L) { DIGITS_10 (1000000000L); }
1522 else if (n < 100000000000L) { DIGITS_11 (10000000000L); }
1523 else if (n < 1000000000000L) { DIGITS_12 (100000000000L); }
1524 else if (n < 10000000000000L) { DIGITS_13 (1000000000000L); }
1525 else if (n < 100000000000000L) { DIGITS_14 (10000000000000L); }
1526 else if (n < 1000000000000000L) { DIGITS_15 (100000000000000L); }
1527 else if (n < 10000000000000000L) { DIGITS_16 (1000000000000000L); }
1528 else if (n < 100000000000000000L) { DIGITS_17 (10000000000000000L); }
1529 else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1530 else { DIGITS_19 (1000000000000000000L); }
1531 #endif /* SIZEOF_LONG != 4 */
1534 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1538 #undef ONE_DIGIT_ADVANCE
1560 /* Support for timers. */
1562 #undef TIMER_WINDOWS
1563 #undef TIMER_GETTIMEOFDAY
1566 /* Depending on the OS and availability of gettimeofday(), one and
1567 only one of the above constants will be defined. Virtually all
1568 modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1569 use TIMER_WINDOWS. TIMER_TIME is a catch-all method for
1570 non-Windows systems without gettimeofday.
1572 #### Perhaps we should also support ftime(), which exists on old
1573 BSD 4.2-influenced systems? (It also existed under MS DOS Borland
1574 C, if memory serves me.) */
1577 # define TIMER_WINDOWS
1578 #else /* not WINDOWS */
1579 # ifdef HAVE_GETTIMEOFDAY
1580 # define TIMER_GETTIMEOFDAY
1584 #endif /* not WINDOWS */
1587 #ifdef TIMER_GETTIMEOFDAY
1596 #ifdef TIMER_WINDOWS
1597 ULARGE_INTEGER wintime;
1601 /* Allocate a timer. It is not legal to do anything with a freshly
1602 allocated timer, except call wtimer_reset() or wtimer_delete(). */
1605 wtimer_allocate (void)
1607 struct wget_timer *wt =
1608 (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1612 /* Allocate a new timer and reset it. Return the new timer. */
1617 struct wget_timer *wt = wtimer_allocate ();
1622 /* Free the resources associated with the timer. Its further use is
1626 wtimer_delete (struct wget_timer *wt)
1631 /* Reset timer WT. This establishes the starting point from which
1632 wtimer_elapsed() will return the number of elapsed
1633 milliseconds. It is allowed to reset a previously used timer. */
1636 wtimer_reset (struct wget_timer *wt)
1638 #ifdef TIMER_GETTIMEOFDAY
1640 gettimeofday (&t, NULL);
1641 wt->secs = t.tv_sec;
1642 wt->usecs = t.tv_usec;
1646 wt->secs = time (NULL);
1649 #ifdef TIMER_WINDOWS
1652 GetSystemTime (&st);
1653 SystemTimeToFileTime (&st, &ft);
1654 wt->wintime.HighPart = ft.dwHighDateTime;
1655 wt->wintime.LowPart = ft.dwLowDateTime;
1659 /* Return the number of milliseconds elapsed since the timer was last
1660 reset. It is allowed to call this function more than once to get
1661 increasingly higher elapsed values. */
1664 wtimer_elapsed (struct wget_timer *wt)
1666 #ifdef TIMER_GETTIMEOFDAY
1668 gettimeofday (&t, NULL);
1669 return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1673 time_t now = time (NULL);
1674 return 1000 * (now - wt->secs);
1681 GetSystemTime (&st);
1682 SystemTimeToFileTime (&st, &ft);
1683 uli.HighPart = ft.dwHighDateTime;
1684 uli.LowPart = ft.dwLowDateTime;
1685 return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1689 /* Return the assessed granularity of the timer implementation. This
1690 is important for certain code that tries to deal with "zero" time
1694 wtimer_granularity (void)
1696 #ifdef TIMER_GETTIMEOFDAY
1697 /* Granularity of gettimeofday is hugely architecture-dependent.
1698 However, it appears that on modern machines it is better than
1704 /* This is clear. */
1708 #ifdef TIMER_WINDOWS
1714 /* This should probably be at a better place, but it doesn't really
1715 fit into html-parse.c. */
1717 /* The function returns the pointer to the malloc-ed quoted version of
1718 string s. It will recognize and quote numeric and special graphic
1719 entities, as per RFC1866:
1727 No other entities are recognized or replaced. */
1729 html_quote_string (const char *s)
1735 /* Pass through the string, and count the new size. */
1736 for (i = 0; *s; s++, i++)
1739 i += 4; /* `amp;' */
1740 else if (*s == '<' || *s == '>')
1741 i += 3; /* `lt;' and `gt;' */
1742 else if (*s == '\"')
1743 i += 5; /* `quot;' */
1747 res = (char *)xmalloc (i + 1);
1749 for (p = res; *s; s++)
1762 *p++ = (*s == '<' ? 'l' : 'g');
1789 /* Determine the width of the terminal we're running on. If that's
1790 not possible, return 0. */
1793 determine_screen_width (void)
1795 /* If there's a way to get the terminal size using POSIX
1796 tcgetattr(), somebody please tell me. */
1799 #else /* TIOCGWINSZ */
1803 if (opt.lfilename != NULL)
1806 fd = fileno (stderr);
1807 if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1808 return 0; /* most likely ENOTTY */
1811 #endif /* TIOCGWINSZ */
1815 /* A debugging function for checking whether an MD5 library works. */
1817 #include "gen-md5.h"
1820 debug_test_md5 (char *buf)
1822 unsigned char raw[16];
1823 static char res[33];
1827 ALLOCA_MD5_CONTEXT (ctx);
1830 gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1831 gen_md5_finish (ctx, raw);
1838 *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
1839 *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);