1 /* Various functions of utilitarian nature.
2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
27 #else /* not HAVE_STRING_H */
29 #endif /* not HAVE_STRING_H */
30 #include <sys/types.h>
35 # include <sys/mman.h>
44 #ifdef HAVE_SYS_UTIME_H
45 # include <sys/utime.h>
49 # include <libc.h> /* for access() */
54 /* For TIOCGWINSZ and friends: */
55 #ifdef HAVE_SYS_IOCTL_H
56 # include <sys/ioctl.h>
71 /* This section implements several wrappers around the basic
72 allocation routines. This is done for two reasons: first, so that
73 the callers of these functions need not consistently check for
74 errors. If there is not enough virtual memory for running Wget,
75 something is seriously wrong, and Wget exits with an appropriate
78 The second reason why these are useful is that, if DEBUG_MALLOC is
79 defined, they also provide a handy (if crude) malloc debugging
80 interface that checks memory leaks. */
82 /* Croak the fatal memory error and bail out with non-zero exit
85 memfatal (const char *what)
87 /* HACK: expose save_log_p from log.c, so we can turn it off in
88 order to prevent saving the log. Saving the log is dangerous
89 because logprintf() and logputs() can call malloc(), so this
90 could infloop. When logging is turned off, infloop can no longer
93 #### This is no longer really necessary because the new routines
94 in log.c cons only if the line exceeds eighty characters. But
95 this can come at the end of a line, so it's OK to be careful.
97 On a more serious note, it would be good to have a
98 log_forced_shutdown() routine that exposes this cleanly. */
99 extern int save_log_p;
102 logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
106 /* These functions end with _real because they need to be
107 distinguished from the debugging functions, and from the macros.
110 If memory debugging is not turned on, wget.h defines these:
112 #define xmalloc xmalloc_real
113 #define xrealloc xrealloc_real
114 #define xstrdup xstrdup_real
117 In case of memory debugging, the definitions are a bit more
118 complex, because we want to provide more information, *and* we want
119 to call the debugging code. (The former is the reason why xmalloc
120 and friends need to be macros in the first place.) Then it looks
123 #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
124 #define xfree(a) xfree_debug (a, __FILE__, __LINE__)
125 #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
126 #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
128 Each of the *_debug function does its magic and calls the real one. */
131 # define STATIC_IF_DEBUG static
133 # define STATIC_IF_DEBUG
136 STATIC_IF_DEBUG void *
137 xmalloc_real (size_t size)
139 void *ptr = malloc (size);
145 STATIC_IF_DEBUG void *
146 xrealloc_real (void *ptr, size_t newsize)
150 /* Not all Un*xes have the feature of realloc() that calling it with
151 a NULL-pointer is the same as malloc(), but it is easy to
154 newptr = realloc (ptr, newsize);
156 newptr = malloc (newsize);
158 memfatal ("realloc");
162 STATIC_IF_DEBUG char *
163 xstrdup_real (const char *s)
169 copy = malloc (l + 1);
172 memcpy (copy, s, l + 1);
173 #else /* HAVE_STRDUP */
177 #endif /* HAVE_STRDUP */
184 /* Crude home-grown routines for debugging some malloc-related
187 * Counting the number of malloc and free invocations, and reporting
188 the "balance", i.e. how many times more malloc was called than it
189 was the case with free.
191 * Making malloc store its entry into a simple array and free remove
192 stuff from that array. At the end, print the pointers which have
193 not been freed, along with the source file and the line number.
194 This also has the side-effect of detecting freeing memory that
197 Note that this kind of memory leak checking strongly depends on
198 every malloc() being followed by a free(), even if the program is
199 about to finish. Wget is careful to free the data structure it
200 allocated in init.c. */
202 static int malloc_count, free_count;
208 } malloc_debug[100000];
210 /* Both register_ptr and unregister_ptr take O(n) operations to run,
211 which can be a real problem. It would be nice to use a hash table
212 for malloc_debug, but the functions in hash.c are not suitable
213 because they can call malloc() themselves. Maybe it would work if
214 the hash table were preallocated to a huge size, and if we set the
215 rehash threshold to 1.0. */
217 /* Register PTR in malloc_debug. Abort if this is not possible
218 (presumably due to the number of current allocations exceeding the
219 size of malloc_debug.) */
222 register_ptr (void *ptr, const char *file, int line)
225 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
226 if (malloc_debug[i].ptr == NULL)
228 malloc_debug[i].ptr = ptr;
229 malloc_debug[i].file = file;
230 malloc_debug[i].line = line;
236 /* Unregister PTR from malloc_debug. Abort if PTR is not present in
237 malloc_debug. (This catches calling free() with a bogus pointer.) */
240 unregister_ptr (void *ptr)
243 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
244 if (malloc_debug[i].ptr == ptr)
246 malloc_debug[i].ptr = NULL;
252 /* Print the malloc debug stats that can be gathered from the above
253 information. Currently this is the count of mallocs, frees, the
254 difference between the two, and the dump of the contents of
255 malloc_debug. The last part are the memory leaks. */
258 print_malloc_debug_stats (void)
261 printf ("\nMalloc: %d\nFree: %d\nBalance: %d\n\n",
262 malloc_count, free_count, malloc_count - free_count);
263 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
264 if (malloc_debug[i].ptr != NULL)
265 printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
266 malloc_debug[i].file, malloc_debug[i].line);
270 xmalloc_debug (size_t size, const char *source_file, int source_line)
272 void *ptr = xmalloc_real (size);
274 register_ptr (ptr, source_file, source_line);
279 xfree_debug (void *ptr, const char *source_file, int source_line)
281 assert (ptr != NULL);
283 unregister_ptr (ptr);
288 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
290 void *newptr = xrealloc_real (ptr, newsize);
294 register_ptr (newptr, source_file, source_line);
296 else if (newptr != ptr)
298 unregister_ptr (ptr);
299 register_ptr (newptr, source_file, source_line);
305 xstrdup_debug (const char *s, const char *source_file, int source_line)
307 char *copy = xstrdup_real (s);
309 register_ptr (copy, source_file, source_line);
313 #endif /* DEBUG_MALLOC */
315 /* Utility function: like xstrdup(), but also lowercases S. */
318 xstrdup_lower (const char *s)
320 char *copy = xstrdup (s);
327 /* Return a count of how many times CHR occurs in STRING. */
330 count_char (const char *string, char chr)
334 for (p = string; *p; p++)
340 /* Copy the string formed by two pointers (one on the beginning, other
341 on the char after the last char) to a new, malloc-ed location.
344 strdupdelim (const char *beg, const char *end)
346 char *res = (char *)xmalloc (end - beg + 1);
347 memcpy (res, beg, end - beg);
348 res[end - beg] = '\0';
352 /* Parse a string containing comma-separated elements, and return a
353 vector of char pointers with the elements. Spaces following the
354 commas are ignored. */
356 sepstring (const char *s)
370 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
371 res[i] = strdupdelim (p, s);
374 /* Skip the blanks following the ','. */
382 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
383 res[i] = strdupdelim (p, s);
388 /* Return pointer to a static char[] buffer in which zero-terminated
389 string-representation of TM (in form hh:mm:ss) is printed.
391 If TM is non-NULL, the current time-in-seconds will be stored
394 (#### This is misleading: one would expect TM would be used instead
395 of the current time in that case. This design was probably
396 influenced by the design time(2), and should be changed at some
397 points. No callers use non-NULL TM anyway.) */
400 time_str (time_t *tm)
402 static char output[15];
404 time_t secs = time (tm);
408 /* In case of error, return the empty string. Maybe we should
409 just abort if this happens? */
413 ptm = localtime (&secs);
414 sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
418 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
421 datetime_str (time_t *tm)
423 static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
425 time_t secs = time (tm);
429 /* In case of error, return the empty string. Maybe we should
430 just abort if this happens? */
434 ptm = localtime (&secs);
435 sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
436 ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
437 ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
441 /* The Windows versions of the following two functions are defined in
446 fork_to_background (void)
449 /* Whether we arrange our own version of opt.lfilename here. */
454 opt.lfilename = unique_name (DEFAULT_LOGFILE);
466 /* parent, no error */
467 printf (_("Continuing in background.\n"));
469 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
472 /* child: keep running */
474 #endif /* not WINDOWS */
481 char *r = xstrdup (orig);
487 /* Canonicalize PATH, and return a new path. The new path differs from PATH
489 Multple `/'s are collapsed to a single `/'.
490 Leading `./'s and trailing `/.'s are removed.
491 Trailing `/'s are removed.
492 Non-leading `../'s and trailing `..'s are handled by removing
493 portions of the path.
495 E.g. "a/b/c/./../d/.." will yield "a/b/". This function originates
496 from GNU Bash and has been mutilated to unrecognition for use in
500 Always use '/' as stub_char.
501 Don't check for local things using canon_stat.
502 Change the original string instead of strdup-ing.
503 React correctly when beginning with `./' and `../'.
504 Don't zip out trailing slashes.
505 Return a value indicating whether any modifications took place.
507 If you dare change this function, take a careful look at the test
508 cases below, and make sure that they pass. */
511 path_simplify (char *path)
513 register int i, start;
520 /* Preserve initial '/'. */
523 /* Nix out leading `.' or `..' with. */
524 if ((path[0] == '.' && path[1] == '\0')
525 || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
532 /* Walk along PATH looking for things to compact. */
539 while (path[i] && path[i] != '/')
544 /* If we didn't find any slashes, then there is nothing left to do. */
548 /* Handle multiple `/'s in a row. */
549 while (path[i] == '/')
552 if ((start + 1) != i)
554 strcpy (path + start + 1, path + i);
559 /* Check for `../', `./' or trailing `.' by itself. */
562 /* Handle trailing `.' by itself. */
571 if (path[i + 1] == '/')
573 strcpy (path + i, path + i + 1);
574 i = (start < 0) ? 0 : start;
579 /* Handle `../' or trailing `..' by itself. */
580 if (path[i + 1] == '.' &&
581 (path[i + 2] == '/' || !path[i + 2]))
583 while (--start > -1 && path[start] != '/');
584 strcpy (path + start + 1, path + i + 2 + (start == -1 && path[i + 2]));
585 i = (start < 0) ? 0 : start;
592 /* Addition: Remove all `./'-s and `../'-s preceding the string. */
596 if (path[i] == '.' && path[i + 1] == '/')
598 else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
605 strcpy (path, path + i - 0);
620 ps("foo/bar") -> "foo/bar"
621 ps("foo//bar") -> "foo/bar" (possibly a bug)
622 ps("foo/../bar") -> "bar"
623 ps("foo/bar/..") -> "foo/"
624 ps("foo/bar/../x") -> "foo/x"
625 ps("foo/bar/../x/") -> "foo/x/"
628 ps("a/b/../../c") -> "c"
629 ps("/a/b/../../c") -> "/c"
630 ps("./a/../b") -> "b"
631 ps("/./a/../b") -> "/b"
634 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
635 specified with TM. */
637 touch (const char *file, time_t tm)
639 #ifdef HAVE_STRUCT_UTIMBUF
640 struct utimbuf times;
641 times.actime = times.modtime = tm;
644 times[0] = times[1] = tm;
647 if (utime (file, ×) == -1)
648 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
651 /* Checks if FILE is a symbolic link, and removes it if it is. Does
652 nothing under MS-Windows. */
654 remove_link (const char *file)
659 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
661 DEBUGP (("Unlinking %s (symlink).\n", file));
664 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
665 file, strerror (errno));
670 /* Does FILENAME exist? This is quite a lousy implementation, since
671 it supplies no error codes -- only a yes-or-no answer. Thus it
672 will return that a file does not exist if, e.g., the directory is
673 unreadable. I don't mind it too much currently, though. The
674 proper way should, of course, be to have a third, error state,
675 other than true/false, but that would introduce uncalled-for
676 additional complexity to the callers. */
678 file_exists_p (const char *filename)
681 return access (filename, F_OK) >= 0;
684 return stat (filename, &buf) >= 0;
688 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
689 Returns 0 on error. */
691 file_non_directory_p (const char *path)
694 /* Use lstat() rather than stat() so that symbolic links pointing to
695 directories can be identified correctly. */
696 if (lstat (path, &buf) != 0)
698 return S_ISDIR (buf.st_mode) ? 0 : 1;
701 /* Return a unique filename, given a prefix and count */
703 unique_name_1 (const char *fileprefix, int count)
709 filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
710 sprintf (filename, "%s.%d", fileprefix, count);
713 filename = xstrdup (fileprefix);
715 if (!file_exists_p (filename))
724 /* Return a unique file name, based on PREFIX. */
726 unique_name (const char *prefix)
732 file = unique_name_1 (prefix, count++);
736 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
737 are missing, create them first. In case any mkdir() call fails,
738 return its error status. Returns 0 on successful completion.
740 The behaviour of this function should be identical to the behaviour
741 of `mkdir -p' on systems where mkdir supports the `-p' option. */
743 make_directory (const char *directory)
749 /* Make a copy of dir, to be able to write to it. Otherwise, the
750 function is unsafe if called with a read-only char *argument. */
751 STRDUP_ALLOCA (dir, directory);
753 /* If the first character of dir is '/', skip it (and thus enable
754 creation of absolute-pathname directories. */
755 for (i = (*dir == '/'); 1; ++i)
757 for (; dir[i] && dir[i] != '/'; i++)
762 /* Check whether the directory already exists. */
763 if (!file_exists_p (dir))
765 if (mkdir (dir, 0777) < 0)
776 /* Merge BASE with FILE. BASE can be a directory or a file name, FILE
777 should be a file name.
779 file_merge("/foo/bar", "baz") => "/foo/baz"
780 file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
781 file_merge("foo", "bar") => "bar"
783 In other words, it's a simpler and gentler version of uri_merge_1. */
786 file_merge (const char *base, const char *file)
789 const char *cut = (const char *)strrchr (base, '/');
792 return xstrdup (file);
794 result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
795 memcpy (result, base, cut - base);
796 result[cut - base] = '/';
797 strcpy (result + (cut - base) + 1, file);
802 static int in_acclist PARAMS ((const char *const *, const char *, int));
804 /* Determine whether a file is acceptable to be followed, according to
805 lists of patterns to accept/reject. */
807 acceptable (const char *s)
811 while (l && s[l] != '/')
818 return (in_acclist ((const char *const *)opt.accepts, s, 1)
819 && !in_acclist ((const char *const *)opt.rejects, s, 1));
821 return in_acclist ((const char *const *)opt.accepts, s, 1);
823 else if (opt.rejects)
824 return !in_acclist ((const char *const *)opt.rejects, s, 1);
828 /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
829 `/something', frontcmp() will return 1 only if S2 begins with
830 `/something'. Otherwise, 0 is returned. */
832 frontcmp (const char *s1, const char *s2)
834 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
838 /* Iterate through STRLIST, and return the first element that matches
839 S, through wildcards or front comparison (as appropriate). */
841 proclist (char **strlist, const char *s, enum accd flags)
845 for (x = strlist; *x; x++)
846 if (has_wildcards_p (*x))
848 if (fnmatch (*x, s, FNM_PATHNAME) == 0)
853 char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
860 /* Returns whether DIRECTORY is acceptable for download, wrt the
861 include/exclude lists.
863 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
864 and absolute paths may be freely intermixed. */
866 accdir (const char *directory, enum accd flags)
868 /* Remove starting '/'. */
869 if (flags & ALLABS && *directory == '/')
873 if (!proclist (opt.includes, directory, flags))
878 if (proclist (opt.excludes, directory, flags))
884 /* Match the end of STRING against PATTERN. For instance:
886 match_backwards ("abc", "bc") -> 1
887 match_backwards ("abc", "ab") -> 0
888 match_backwards ("abc", "abc") -> 1 */
890 match_tail (const char *string, const char *pattern)
894 for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
895 if (string[i] != pattern[j])
897 /* If the pattern was exhausted, the match was succesful. */
904 /* Checks whether string S matches each element of ACCEPTS. A list
905 element are matched either with fnmatch() or match_tail(),
906 according to whether the element contains wildcards or not.
908 If the BACKWARD is 0, don't do backward comparison -- just compare
911 in_acclist (const char *const *accepts, const char *s, int backward)
913 for (; *accepts; accepts++)
915 if (has_wildcards_p (*accepts))
917 /* fnmatch returns 0 if the pattern *does* match the
919 if (fnmatch (*accepts, s, 0) == 0)
926 if (match_tail (s, *accepts))
931 if (!strcmp (s, *accepts))
939 /* Return the location of STR's suffix (file extension). Examples:
940 suffix ("foo.bar") -> "bar"
941 suffix ("foo.bar.baz") -> "baz"
942 suffix ("/foo/bar") -> NULL
943 suffix ("/foo.bar/baz") -> NULL */
945 suffix (const char *str)
949 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
953 return (char *)str + i;
958 /* Read a line from FP. The function reallocs the storage as needed
959 to accomodate for any length of the line. Reallocs are done
960 exponentially, doubling the storage after each overflow to minimize
961 the number of calls to realloc() and fgets(). The newline
962 character at the end of line is retained.
964 After end-of-file is encountered without anything being read, NULL
965 is returned. NULL is also returned on error. To distinguish
966 between these two cases, use the stdio function ferror().
968 A future version of this function will be rewritten to use fread()
969 instead of fgets(), and to return the length of the line, which
970 will make the function usable on files with binary content. */
973 read_whole_line (FILE *fp)
977 char *line = (char *)xmalloc (bufsize);
979 while (fgets (line + length, bufsize - length, fp))
981 length += strlen (line + length);
983 /* Possible for example when reading from a binary file where
984 a line begins with \0. */
987 if (line[length - 1] == '\n')
990 /* fgets() guarantees to read the whole line, or to use up the
991 space we've given it. We can double the buffer
994 line = xrealloc (line, bufsize);
996 if (length == 0 || ferror (fp))
1001 if (length + 1 < bufsize)
1002 /* Relieve the memory from our exponential greediness. We say
1003 `length + 1' because the terminating \0 is not included in
1004 LENGTH. We don't need to zero-terminate the string ourselves,
1005 though, because fgets() does that. */
1006 line = xrealloc (line, length + 1);
1010 /* Read FILE into memory. A pointer to `struct file_memory' are
1011 returned; use struct element `content' to access file contents, and
1012 the element `length' to know the file length. `content' is *not*
1013 zero-terminated, and you should *not* read or write beyond the [0,
1014 length) range of characters.
1016 After you are done with the file contents, call read_file_free to
1019 Depending on the operating system and the type of file that is
1020 being read, read_file() either mmap's the file into memory, or
1021 reads the file into the core using read().
1023 If file is named "-", fileno(stdin) is used for reading instead.
1024 If you want to read from a real file named "-", use "./-" instead. */
1026 struct file_memory *
1027 read_file (const char *file)
1030 struct file_memory *fm;
1032 int inhibit_close = 0;
1034 /* Some magic in the finest tradition of Perl and its kin: if FILE
1035 is "-", just use stdin. */
1038 fd = fileno (stdin);
1040 /* Note that we don't inhibit mmap() in this case. If stdin is
1041 redirected from a regular file, mmap() will still work. */
1044 fd = open (file, O_RDONLY);
1047 fm = xmalloc (sizeof (struct file_memory));
1052 if (fstat (fd, &buf) < 0)
1054 fm->length = buf.st_size;
1055 /* NOTE: As far as I know, the callers of this function never
1056 modify the file text. Relying on this would enable us to
1057 specify PROT_READ and MAP_SHARED for a marginal gain in
1058 efficiency, but at some cost to generality. */
1059 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1060 MAP_PRIVATE, fd, 0);
1061 if (fm->content == (char *)MAP_FAILED)
1071 /* The most common reason why mmap() fails is that FD does not point
1072 to a plain file. However, it's also possible that mmap() doesn't
1073 work for a particular type of file. Therefore, whenever mmap()
1074 fails, we just fall back to the regular method. */
1075 #endif /* HAVE_MMAP */
1078 size = 512; /* number of bytes fm->contents can
1079 hold at any given time. */
1080 fm->content = xmalloc (size);
1084 if (fm->length > size / 2)
1086 /* #### I'm not sure whether the whole exponential-growth
1087 thing makes sense with kernel read. On Linux at least,
1088 read() refuses to read more than 4K from a file at a
1089 single chunk anyway. But other Unixes might optimize it
1090 better, and it doesn't *hurt* anything, so I'm leaving
1093 /* Normally, we grow SIZE exponentially to make the number
1094 of calls to read() and realloc() logarithmic in relation
1095 to file size. However, read() can read an amount of data
1096 smaller than requested, and it would be unreasonably to
1097 double SIZE every time *something* was read. Therefore,
1098 we double SIZE only when the length exceeds half of the
1099 entire allocated size. */
1101 fm->content = xrealloc (fm->content, size);
1103 nread = read (fd, fm->content + fm->length, size - fm->length);
1105 /* Successful read. */
1106 fm->length += nread;
1116 if (size > fm->length && fm->length != 0)
1117 /* Due to exponential growth of fm->content, the allocated region
1118 might be much larger than what is actually needed. */
1119 fm->content = xrealloc (fm->content, fm->length);
1126 xfree (fm->content);
1131 /* Release the resources held by FM. Specifically, this calls
1132 munmap() or xfree() on fm->content, depending whether mmap or
1133 malloc/read were used to read in the file. It also frees the
1134 memory needed to hold the FM structure itself. */
1137 read_file_free (struct file_memory *fm)
1142 munmap (fm->content, fm->length);
1147 xfree (fm->content);
1152 /* Free the pointers in a NULL-terminated vector of pointers, then
1153 free the pointer itself. */
1155 free_vec (char **vec)
1166 /* Append vector V2 to vector V1. The function frees V2 and
1167 reallocates V1 (thus you may not use the contents of neither
1168 pointer after the call). If V1 is NULL, V2 is returned. */
1170 merge_vecs (char **v1, char **v2)
1180 /* To avoid j == 0 */
1185 for (i = 0; v1[i]; i++);
1187 for (j = 0; v2[j]; j++);
1188 /* Reallocate v1. */
1189 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1190 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1195 /* A set of simple-minded routines to store strings in a linked list.
1196 This used to also be used for searching, but now we have hash
1199 /* It's a shame that these simple things like linked lists and hash
1200 tables (see hash.c) need to be implemented over and over again. It
1201 would be nice to be able to use the routines from glib -- see
1202 www.gtk.org for details. However, that would make Wget depend on
1203 glib, and I want to avoid dependencies to external libraries for
1204 reasons of convenience and portability (I suspect Wget is more
1205 portable than anything ever written for Gnome). */
1207 /* Append an element to the list. If the list has a huge number of
1208 elements, this can get slow because it has to find the list's
1209 ending. If you think you have to call slist_append in a loop,
1210 think about calling slist_prepend() followed by slist_nreverse(). */
1213 slist_append (slist *l, const char *s)
1215 slist *newel = (slist *)xmalloc (sizeof (slist));
1218 newel->string = xstrdup (s);
1223 /* Find the last element. */
1230 /* Prepend S to the list. Unlike slist_append(), this is O(1). */
1233 slist_prepend (slist *l, const char *s)
1235 slist *newel = (slist *)xmalloc (sizeof (slist));
1236 newel->string = xstrdup (s);
1241 /* Destructively reverse L. */
1244 slist_nreverse (slist *l)
1249 slist *next = l->next;
1257 /* Is there a specific entry in the list? */
1259 slist_contains (slist *l, const char *s)
1261 for (; l; l = l->next)
1262 if (!strcmp (l->string, s))
1267 /* Free the whole slist. */
1269 slist_free (slist *l)
1280 /* Sometimes it's useful to create "sets" of strings, i.e. special
1281 hash tables where you want to store strings as keys and merely
1282 query for their existence. Here is a set of utility routines that
1283 makes that transparent. */
1286 string_set_add (struct hash_table *ht, const char *s)
1288 /* First check whether the set element already exists. If it does,
1289 do nothing so that we don't have to free() the old element and
1290 then strdup() a new one. */
1291 if (hash_table_contains (ht, s))
1294 /* We use "1" as value. It provides us a useful and clear arbitrary
1295 value, and it consumes no memory -- the pointers to the same
1296 string "1" will be shared by all the key-value pairs in all `set'
1298 hash_table_put (ht, xstrdup (s), "1");
1301 /* Synonym for hash_table_contains... */
1304 string_set_contains (struct hash_table *ht, const char *s)
1306 return hash_table_contains (ht, s);
1310 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1317 string_set_free (struct hash_table *ht)
1319 hash_table_map (ht, string_set_free_mapper, NULL);
1320 hash_table_destroy (ht);
1324 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1331 /* Another utility function: call free() on all keys and values of HT. */
1334 free_keys_and_values (struct hash_table *ht)
1336 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1340 /* Engine for legible and legible_very_long; this function works on
1344 legible_1 (const char *repr)
1346 static char outbuf[128];
1351 /* Reset the pointers. */
1354 /* If the number is negative, shift the pointers. */
1360 /* How many digits before the first separator? */
1361 mod = strlen (inptr) % 3;
1363 for (i = 0; i < mod; i++)
1364 *outptr++ = inptr[i];
1365 /* Now insert the rest of them, putting separator before every
1367 for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1369 if (i % 3 == 0 && i1 != 0)
1371 *outptr++ = inptr[i1];
1373 /* Zero-terminate the string. */
1378 /* Legible -- return a static pointer to the legibly printed long. */
1383 /* Print the number into the buffer. */
1384 long_to_string (inbuf, l);
1385 return legible_1 (inbuf);
1388 /* Write a string representation of NUMBER into the provided buffer.
1389 We cannot use sprintf() because we cannot be sure whether the
1390 platform supports printing of what we chose for VERY_LONG_TYPE.
1392 Example: Gcc supports `long long' under many platforms, but on many
1393 of those the native libc knows nothing of it and therefore cannot
1396 How long BUFFER needs to be depends on the platform and the content
1397 of NUMBER. For 64-bit VERY_LONG_TYPE (the most common case), 24
1398 bytes are sufficient. Using more might be a good idea.
1400 This function does not go through the hoops that long_to_string
1401 goes to because it doesn't aspire to be fast. (It's called perhaps
1402 once in a Wget run.) */
1405 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1410 /* Print the number backwards... */
1413 buffer[i++] = '0' + number % 10;
1418 /* ...and reverse the order of the digits. */
1419 for (j = 0; j < i / 2; j++)
1422 buffer[j] = buffer[i - 1 - j];
1423 buffer[i - 1 - j] = c;
1428 /* The same as legible(), but works on VERY_LONG_TYPE. See sysdep.h. */
1430 legible_very_long (VERY_LONG_TYPE l)
1433 /* Print the number into the buffer. */
1434 very_long_to_string (inbuf, l);
1435 return legible_1 (inbuf);
1438 /* Count the digits in a (long) integer. */
1448 while ((a /= 10) != 0)
1453 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1454 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1456 #define DIGITS_1(figure) ONE_DIGIT (figure)
1457 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1458 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1459 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1460 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1461 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1462 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1463 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1464 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1465 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1467 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1469 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1470 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1471 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1472 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1473 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1474 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1475 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1476 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1477 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1479 /* Print NUMBER to BUFFER in base 10. This is completely equivalent
1480 to `sprintf(buffer, "%ld", number)', only much faster.
1482 The speedup may make a difference in programs that frequently
1483 convert numbers to strings. Some implementations of sprintf,
1484 particularly the one in GNU libc, have been known to be extremely
1485 slow compared to this function.
1487 BUFFER should accept as many bytes as you expect the number to take
1488 up. On machines with 64-bit longs the maximum needed size is 24
1489 bytes. That includes the worst-case digits, the optional `-' sign,
1490 and the trailing \0. */
1493 long_to_string (char *buffer, long number)
1498 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1499 /* We are running in a strange or misconfigured environment. Let
1500 sprintf cope with it. */
1501 sprintf (buffer, "%ld", n);
1502 #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1510 if (n < 10) { DIGITS_1 (1); }
1511 else if (n < 100) { DIGITS_2 (10); }
1512 else if (n < 1000) { DIGITS_3 (100); }
1513 else if (n < 10000) { DIGITS_4 (1000); }
1514 else if (n < 100000) { DIGITS_5 (10000); }
1515 else if (n < 1000000) { DIGITS_6 (100000); }
1516 else if (n < 10000000) { DIGITS_7 (1000000); }
1517 else if (n < 100000000) { DIGITS_8 (10000000); }
1518 else if (n < 1000000000) { DIGITS_9 (100000000); }
1519 #if SIZEOF_LONG == 4
1520 /* ``if (1)'' serves only to preserve editor indentation. */
1521 else if (1) { DIGITS_10 (1000000000); }
1522 #else /* SIZEOF_LONG != 4 */
1523 else if (n < 10000000000L) { DIGITS_10 (1000000000L); }
1524 else if (n < 100000000000L) { DIGITS_11 (10000000000L); }
1525 else if (n < 1000000000000L) { DIGITS_12 (100000000000L); }
1526 else if (n < 10000000000000L) { DIGITS_13 (1000000000000L); }
1527 else if (n < 100000000000000L) { DIGITS_14 (10000000000000L); }
1528 else if (n < 1000000000000000L) { DIGITS_15 (100000000000000L); }
1529 else if (n < 10000000000000000L) { DIGITS_16 (1000000000000000L); }
1530 else if (n < 100000000000000000L) { DIGITS_17 (10000000000000000L); }
1531 else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1532 else { DIGITS_19 (1000000000000000000L); }
1533 #endif /* SIZEOF_LONG != 4 */
1536 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1540 #undef ONE_DIGIT_ADVANCE
1562 /* Support for timers. */
1564 #undef TIMER_WINDOWS
1565 #undef TIMER_GETTIMEOFDAY
1568 /* Depending on the OS and availability of gettimeofday(), one and
1569 only one of the above constants will be defined. Virtually all
1570 modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1571 use TIMER_WINDOWS. TIMER_TIME is a catch-all method for
1572 non-Windows systems without gettimeofday.
1574 #### Perhaps we should also support ftime(), which exists on old
1575 BSD 4.2-influenced systems? (It also existed under MS DOS Borland
1576 C, if memory serves me.) */
1579 # define TIMER_WINDOWS
1580 #else /* not WINDOWS */
1581 # ifdef HAVE_GETTIMEOFDAY
1582 # define TIMER_GETTIMEOFDAY
1586 #endif /* not WINDOWS */
1589 #ifdef TIMER_GETTIMEOFDAY
1598 #ifdef TIMER_WINDOWS
1599 ULARGE_INTEGER wintime;
1603 /* Allocate a timer. It is not legal to do anything with a freshly
1604 allocated timer, except call wtimer_reset() or wtimer_delete(). */
1607 wtimer_allocate (void)
1609 struct wget_timer *wt =
1610 (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1614 /* Allocate a new timer and reset it. Return the new timer. */
1619 struct wget_timer *wt = wtimer_allocate ();
1624 /* Free the resources associated with the timer. Its further use is
1628 wtimer_delete (struct wget_timer *wt)
1633 /* Reset timer WT. This establishes the starting point from which
1634 wtimer_elapsed() will return the number of elapsed
1635 milliseconds. It is allowed to reset a previously used timer. */
1638 wtimer_reset (struct wget_timer *wt)
1640 #ifdef TIMER_GETTIMEOFDAY
1642 gettimeofday (&t, NULL);
1643 wt->secs = t.tv_sec;
1644 wt->usecs = t.tv_usec;
1648 wt->secs = time (NULL);
1651 #ifdef TIMER_WINDOWS
1654 GetSystemTime (&st);
1655 SystemTimeToFileTime (&st, &ft);
1656 wt->wintime.HighPart = ft.dwHighDateTime;
1657 wt->wintime.LowPart = ft.dwLowDateTime;
1661 /* Return the number of milliseconds elapsed since the timer was last
1662 reset. It is allowed to call this function more than once to get
1663 increasingly higher elapsed values. */
1666 wtimer_elapsed (struct wget_timer *wt)
1668 #ifdef TIMER_GETTIMEOFDAY
1670 gettimeofday (&t, NULL);
1671 return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1675 time_t now = time (NULL);
1676 return 1000 * (now - wt->secs);
1683 GetSystemTime (&st);
1684 SystemTimeToFileTime (&st, &ft);
1685 uli.HighPart = ft.dwHighDateTime;
1686 uli.LowPart = ft.dwLowDateTime;
1687 return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1691 /* Return the assessed granularity of the timer implementation. This
1692 is important for certain code that tries to deal with "zero" time
1696 wtimer_granularity (void)
1698 #ifdef TIMER_GETTIMEOFDAY
1699 /* Granularity of gettimeofday is hugely architecture-dependent.
1700 However, it appears that on modern machines it is better than
1706 /* This is clear. */
1710 #ifdef TIMER_WINDOWS
1716 /* This should probably be at a better place, but it doesn't really
1717 fit into html-parse.c. */
1719 /* The function returns the pointer to the malloc-ed quoted version of
1720 string s. It will recognize and quote numeric and special graphic
1721 entities, as per RFC1866:
1729 No other entities are recognized or replaced. */
1731 html_quote_string (const char *s)
1737 /* Pass through the string, and count the new size. */
1738 for (i = 0; *s; s++, i++)
1741 i += 4; /* `amp;' */
1742 else if (*s == '<' || *s == '>')
1743 i += 3; /* `lt;' and `gt;' */
1744 else if (*s == '\"')
1745 i += 5; /* `quot;' */
1749 res = (char *)xmalloc (i + 1);
1751 for (p = res; *s; s++)
1764 *p++ = (*s == '<' ? 'l' : 'g');
1791 /* Determine the width of the terminal we're running on. If that's
1792 not possible, return 0. */
1795 determine_screen_width (void)
1797 /* If there's a way to get the terminal size using POSIX
1798 tcgetattr(), somebody please tell me. */
1801 #else /* TIOCGWINSZ */
1805 if (opt.lfilename != NULL)
1808 fd = fileno (stderr);
1809 if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1810 return 0; /* most likely ENOTTY */
1813 #endif /* TIOCGWINSZ */
1817 /* A debugging function for checking whether an MD5 library works. */
1819 #include "gen-md5.h"
1822 debug_test_md5 (char *buf)
1824 unsigned char raw[16];
1825 static char res[33];
1829 ALLOCA_MD5_CONTEXT (ctx);
1832 gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);
1833 gen_md5_finish (ctx, raw);
1840 *p2++ = XDIGIT_TO_xchar (*p1 >> 4);
1841 *p2++ = XDIGIT_TO_xchar (*p1 & 0xf);