1 /* Various functions of utilitarian nature.
2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
27 #else /* not HAVE_STRING_H */
29 #endif /* not HAVE_STRING_H */
30 #include <sys/types.h>
35 # include <sys/mman.h>
44 #ifdef HAVE_SYS_UTIME_H
45 # include <sys/utime.h>
49 # include <libc.h> /* for access() */
63 /* This section implements several wrappers around the basic
64 allocation routines. This is done for two reasons: first, so that
65 the callers of these functions need not consistently check for
66 errors. If there is not enough virtual memory for running Wget,
67 something is seriously wrong, and Wget exits with an appropriate
70 The second reason why these are useful is that, if DEBUG_MALLOC is
71 defined, they also provide a handy (if crude) malloc debugging
72 interface that checks memory leaks. */
74 /* Croak the fatal memory error and bail out with non-zero exit
77 memfatal (const char *what)
79 /* HACK: expose save_log_p from log.c, so we can turn it off in
80 order to prevent saving the log. Saving the log is dangerous
81 because logprintf() and logputs() can call malloc(), so this
82 could infloop. When logging is turned off, infloop can no longer
85 #### This is no longer really necessary because the new routines
86 in log.c cons only if the line exceeds eighty characters. But
87 this can come at the end of a line, so it's OK to be careful.
89 On a more serious note, it would be good to have a
90 log_forced_shutdown() routine that exposes this cleanly. */
91 extern int save_log_p;
94 logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
98 /* These functions end with _real because they need to be
99 distinguished from the debugging functions, and from the macros.
102 If memory debugging is not turned on, wget.h defines these:
104 #define xmalloc xmalloc_real
105 #define xrealloc xrealloc_real
106 #define xstrdup xstrdup_real
109 In case of memory debugging, the definitions are a bit more
110 complex, because we want to provide more information, *and* we want
111 to call the debugging code. (The former is the reason why xmalloc
112 and friends need to be macros in the first place.) Then it looks
115 #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
116 #define xfree(a) xfree_debug (a, __FILE__, __LINE__)
117 #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
118 #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
120 Each of the *_debug function does its magic and calls the real one. */
123 # define STATIC_IF_DEBUG static
125 # define STATIC_IF_DEBUG
128 STATIC_IF_DEBUG void *
129 xmalloc_real (size_t size)
131 void *ptr = malloc (size);
137 STATIC_IF_DEBUG void *
138 xrealloc_real (void *ptr, size_t newsize)
142 /* Not all Un*xes have the feature of realloc() that calling it with
143 a NULL-pointer is the same as malloc(), but it is easy to
146 newptr = realloc (ptr, newsize);
148 newptr = malloc (newsize);
150 memfatal ("realloc");
154 STATIC_IF_DEBUG char *
155 xstrdup_real (const char *s)
161 copy = malloc (l + 1);
164 memcpy (copy, s, l + 1);
165 #else /* HAVE_STRDUP */
169 #endif /* HAVE_STRDUP */
176 /* Crude home-grown routines for debugging some malloc-related
179 * Counting the number of malloc and free invocations, and reporting
180 the "balance", i.e. how many times more malloc was called than it
181 was the case with free.
183 * Making malloc store its entry into a simple array and free remove
184 stuff from that array. At the end, print the pointers which have
185 not been freed, along with the source file and the line number.
186 This also has the side-effect of detecting freeing memory that
189 Note that this kind of memory leak checking strongly depends on
190 every malloc() being followed by a free(), even if the program is
191 about to finish. Wget is careful to free the data structure it
192 allocated in init.c. */
194 static int malloc_count, free_count;
200 } malloc_debug[100000];
202 /* Both register_ptr and unregister_ptr take O(n) operations to run,
203 which can be a real problem. It would be nice to use a hash table
204 for malloc_debug, but the functions in hash.c are not suitable
205 because they can call malloc() themselves. Maybe it would work if
206 the hash table were preallocated to a huge size, and if we set the
207 rehash threshold to 1.0. */
209 /* Register PTR in malloc_debug. Abort if this is not possible
210 (presumably due to the number of current allocations exceeding the
211 size of malloc_debug.) */
214 register_ptr (void *ptr, const char *file, int line)
217 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
218 if (malloc_debug[i].ptr == NULL)
220 malloc_debug[i].ptr = ptr;
221 malloc_debug[i].file = file;
222 malloc_debug[i].line = line;
228 /* Unregister PTR from malloc_debug. Abort if PTR is not present in
229 malloc_debug. (This catches calling free() with a bogus pointer.) */
232 unregister_ptr (void *ptr)
235 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
236 if (malloc_debug[i].ptr == ptr)
238 malloc_debug[i].ptr = NULL;
244 /* Print the malloc debug stats that can be gathered from the above
245 information. Currently this is the count of mallocs, frees, the
246 difference between the two, and the dump of the contents of
247 malloc_debug. The last part are the memory leaks. */
250 print_malloc_debug_stats (void)
253 printf ("\nMalloc: %d\nFree: %d\nBalance: %d\n\n",
254 malloc_count, free_count, malloc_count - free_count);
255 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
256 if (malloc_debug[i].ptr != NULL)
257 printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
258 malloc_debug[i].file, malloc_debug[i].line);
262 xmalloc_debug (size_t size, const char *source_file, int source_line)
264 void *ptr = xmalloc_real (size);
266 register_ptr (ptr, source_file, source_line);
271 xfree_debug (void *ptr, const char *source_file, int source_line)
273 assert (ptr != NULL);
275 unregister_ptr (ptr);
280 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
282 void *newptr = xrealloc_real (ptr, newsize);
286 register_ptr (newptr, source_file, source_line);
288 else if (newptr != ptr)
290 unregister_ptr (ptr);
291 register_ptr (newptr, source_file, source_line);
297 xstrdup_debug (const char *s, const char *source_file, int source_line)
299 char *copy = xstrdup_real (s);
301 register_ptr (copy, source_file, source_line);
305 #endif /* DEBUG_MALLOC */
307 /* Copy the string formed by two pointers (one on the beginning, other
308 on the char after the last char) to a new, malloc-ed location.
311 strdupdelim (const char *beg, const char *end)
313 char *res = (char *)xmalloc (end - beg + 1);
314 memcpy (res, beg, end - beg);
315 res[end - beg] = '\0';
319 /* Parse a string containing comma-separated elements, and return a
320 vector of char pointers with the elements. Spaces following the
321 commas are ignored. */
323 sepstring (const char *s)
337 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
338 res[i] = strdupdelim (p, s);
341 /* Skip the blanks following the ','. */
349 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
350 res[i] = strdupdelim (p, s);
355 /* Return pointer to a static char[] buffer in which zero-terminated
356 string-representation of TM (in form hh:mm:ss) is printed.
358 If TM is non-NULL, the current time-in-seconds will be stored
361 (#### This is misleading: one would expect TM would be used instead
362 of the current time in that case. This design was probably
363 influenced by the design time(2), and should be changed at some
364 points. No callers use non-NULL TM anyway.) */
367 time_str (time_t *tm)
369 static char output[15];
371 time_t secs = time (tm);
375 /* In case of error, return the empty string. Maybe we should
376 just abort if this happens? */
380 ptm = localtime (&secs);
381 sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
385 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
388 datetime_str (time_t *tm)
390 static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
392 time_t secs = time (tm);
396 /* In case of error, return the empty string. Maybe we should
397 just abort if this happens? */
401 ptm = localtime (&secs);
402 sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
403 ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
404 ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
408 /* The Windows versions of the following two functions are defined in
413 fork_to_background (void)
416 /* Whether we arrange our own version of opt.lfilename here. */
421 opt.lfilename = unique_name (DEFAULT_LOGFILE);
433 /* parent, no error */
434 printf (_("Continuing in background.\n"));
436 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
439 /* child: keep running */
441 #endif /* not WINDOWS */
446 char *r = xstrdup (orig);
451 /* Canonicalize PATH, and return a new path. The new path differs from PATH
453 Multple `/'s are collapsed to a single `/'.
454 Leading `./'s and trailing `/.'s are removed.
455 Trailing `/'s are removed.
456 Non-leading `../'s and trailing `..'s are handled by removing
457 portions of the path.
459 E.g. "a/b/c/./../d/.." will yield "a/b". This function originates
463 Always use '/' as stub_char.
464 Don't check for local things using canon_stat.
465 Change the original string instead of strdup-ing.
466 React correctly when beginning with `./' and `../'.
467 Don't zip out trailing slashes. */
469 path_simplify (char *path)
471 register int i, start, ddot;
477 /*stub_char = (*path == '/') ? '/' : '.';*/
480 /* Addition: Remove all `./'-s preceding the string. If `../'-s
481 precede, put `/' in front and remove them too. */
486 if (path[i] == '.' && path[i + 1] == '/')
488 else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
497 strcpy (path, path + i - ddot);
499 /* Replace single `.' or `..' with `/'. */
500 if ((path[0] == '.' && path[1] == '\0')
501 || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
507 /* Walk along PATH looking for things to compact. */
514 while (path[i] && path[i] != '/')
519 /* If we didn't find any slashes, then there is nothing left to do. */
523 /* Handle multiple `/'s in a row. */
524 while (path[i] == '/')
527 if ((start + 1) != i)
529 strcpy (path + start + 1, path + i);
533 /* Check for `../', `./' or trailing `.' by itself. */
536 /* Handle trailing `.' by itself. */
544 if (path[i + 1] == '/')
546 strcpy (path + i, path + i + 1);
547 i = (start < 0) ? 0 : start;
551 /* Handle `../' or trailing `..' by itself. */
552 if (path[i + 1] == '.' &&
553 (path[i + 2] == '/' || !path[i + 2]))
555 while (--start > -1 && path[start] != '/');
556 strcpy (path + start + 1, path + i + 2);
557 i = (start < 0) ? 0 : start;
564 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
565 specified with TM. */
567 touch (const char *file, time_t tm)
569 #ifdef HAVE_STRUCT_UTIMBUF
570 struct utimbuf times;
571 times.actime = times.modtime = tm;
574 times[0] = times[1] = tm;
577 if (utime (file, ×) == -1)
578 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
581 /* Checks if FILE is a symbolic link, and removes it if it is. Does
582 nothing under MS-Windows. */
584 remove_link (const char *file)
589 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
591 DEBUGP (("Unlinking %s (symlink).\n", file));
594 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
595 file, strerror (errno));
600 /* Does FILENAME exist? This is quite a lousy implementation, since
601 it supplies no error codes -- only a yes-or-no answer. Thus it
602 will return that a file does not exist if, e.g., the directory is
603 unreadable. I don't mind it too much currently, though. The
604 proper way should, of course, be to have a third, error state,
605 other than true/false, but that would introduce uncalled-for
606 additional complexity to the callers. */
608 file_exists_p (const char *filename)
611 return access (filename, F_OK) >= 0;
614 return stat (filename, &buf) >= 0;
618 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
619 Returns 0 on error. */
621 file_non_directory_p (const char *path)
624 /* Use lstat() rather than stat() so that symbolic links pointing to
625 directories can be identified correctly. */
626 if (lstat (path, &buf) != 0)
628 return S_ISDIR (buf.st_mode) ? 0 : 1;
631 /* Return a unique filename, given a prefix and count */
633 unique_name_1 (const char *fileprefix, int count)
639 filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
640 sprintf (filename, "%s.%d", fileprefix, count);
643 filename = xstrdup (fileprefix);
645 if (!file_exists_p (filename))
654 /* Return a unique file name, based on PREFIX. */
656 unique_name (const char *prefix)
662 file = unique_name_1 (prefix, count++);
666 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
667 are missing, create them first. In case any mkdir() call fails,
668 return its error status. Returns 0 on successful completion.
670 The behaviour of this function should be identical to the behaviour
671 of `mkdir -p' on systems where mkdir supports the `-p' option. */
673 make_directory (const char *directory)
679 /* Make a copy of dir, to be able to write to it. Otherwise, the
680 function is unsafe if called with a read-only char *argument. */
681 STRDUP_ALLOCA (dir, directory);
683 /* If the first character of dir is '/', skip it (and thus enable
684 creation of absolute-pathname directories. */
685 for (i = (*dir == '/'); 1; ++i)
687 for (; dir[i] && dir[i] != '/'; i++)
692 /* Check whether the directory already exists. */
693 if (!file_exists_p (dir))
695 if (mkdir (dir, 0777) < 0)
706 /* Merge BASE with FILE. BASE can be a directory or a file name, FILE
707 should be a file name. For example, file_merge("/foo/bar", "baz")
708 will return "/foo/baz". file_merge("/foo/bar/", "baz") will return
711 In other words, it's a simpler and gentler version of uri_merge_1. */
714 file_merge (const char *base, const char *file)
717 const char *cut = (const char *)strrchr (base, '/');
720 cut = base + strlen (base);
722 result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
723 memcpy (result, base, cut - base);
724 result[cut - base] = '/';
725 strcpy (result + (cut - base) + 1, file);
730 static int in_acclist PARAMS ((const char *const *, const char *, int));
732 /* Determine whether a file is acceptable to be followed, according to
733 lists of patterns to accept/reject. */
735 acceptable (const char *s)
739 while (l && s[l] != '/')
746 return (in_acclist ((const char *const *)opt.accepts, s, 1)
747 && !in_acclist ((const char *const *)opt.rejects, s, 1));
749 return in_acclist ((const char *const *)opt.accepts, s, 1);
751 else if (opt.rejects)
752 return !in_acclist ((const char *const *)opt.rejects, s, 1);
756 /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
757 `/something', frontcmp() will return 1 only if S2 begins with
758 `/something'. Otherwise, 0 is returned. */
760 frontcmp (const char *s1, const char *s2)
762 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
766 /* Iterate through STRLIST, and return the first element that matches
767 S, through wildcards or front comparison (as appropriate). */
769 proclist (char **strlist, const char *s, enum accd flags)
773 for (x = strlist; *x; x++)
774 if (has_wildcards_p (*x))
776 if (fnmatch (*x, s, FNM_PATHNAME) == 0)
781 char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
788 /* Returns whether DIRECTORY is acceptable for download, wrt the
789 include/exclude lists.
791 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
792 and absolute paths may be freely intermixed. */
794 accdir (const char *directory, enum accd flags)
796 /* Remove starting '/'. */
797 if (flags & ALLABS && *directory == '/')
801 if (!proclist (opt.includes, directory, flags))
806 if (proclist (opt.excludes, directory, flags))
812 /* Match the end of STRING against PATTERN. For instance:
814 match_backwards ("abc", "bc") -> 1
815 match_backwards ("abc", "ab") -> 0
816 match_backwards ("abc", "abc") -> 1 */
818 match_backwards (const char *string, const char *pattern)
822 for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
823 if (string[i] != pattern[j])
825 /* If the pattern was exhausted, the match was succesful. */
832 /* Checks whether string S matches each element of ACCEPTS. A list
833 element are matched either with fnmatch() or match_backwards(),
834 according to whether the element contains wildcards or not.
836 If the BACKWARD is 0, don't do backward comparison -- just compare
839 in_acclist (const char *const *accepts, const char *s, int backward)
841 for (; *accepts; accepts++)
843 if (has_wildcards_p (*accepts))
845 /* fnmatch returns 0 if the pattern *does* match the
847 if (fnmatch (*accepts, s, 0) == 0)
854 if (match_backwards (s, *accepts))
859 if (!strcmp (s, *accepts))
867 /* Return the malloc-ed suffix of STR. For instance:
868 suffix ("foo.bar") -> "bar"
869 suffix ("foo.bar.baz") -> "baz"
870 suffix ("/foo/bar") -> NULL
871 suffix ("/foo.bar/baz") -> NULL */
873 suffix (const char *str)
877 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--);
879 return xstrdup (str + i);
884 /* Read a line from FP. The function reallocs the storage as needed
885 to accomodate for any length of the line. Reallocs are done
886 storage exponentially, doubling the storage after each overflow to
887 minimize the number of calls to realloc() and fgets(). The newline
888 character at the end of line is retained.
890 After end-of-file is encountered without anything being read, NULL
891 is returned. NULL is also returned on error. To distinguish
892 between these two cases, use the stdio function ferror(). */
895 read_whole_line (FILE *fp)
899 char *line = (char *)xmalloc (bufsize);
901 while (fgets (line + length, bufsize - length, fp))
903 length += strlen (line + length);
905 if (line[length - 1] == '\n')
907 /* fgets() guarantees to read the whole line, or to use up the
908 space we've given it. We can double the buffer
911 line = xrealloc (line, bufsize);
913 if (length == 0 || ferror (fp))
918 if (length + 1 < bufsize)
919 /* Relieve the memory from our exponential greediness. We say
920 `length + 1' because the terminating \0 is not included in
921 LENGTH. We don't need to zero-terminate the string ourselves,
922 though, because fgets() does that. */
923 line = xrealloc (line, length + 1);
927 /* Read FILE into memory. A pointer to `struct file_memory' are
928 returned; use struct element `content' to access file contents, and
929 the element `length' to know the file length. `content' is *not*
930 zero-terminated, and you should *not* read or write beyond the [0,
931 length) range of characters.
933 After you are done with the file contents, call read_file_free to
936 Depending on the operating system and the type of file that is
937 being read, read_file() either mmap's the file into memory, or
938 reads the file into the core using read().
940 If file is named "-", fileno(stdin) is used for reading instead.
941 If you want to read from a real file named "-", use "./-" instead. */
944 read_file (const char *file)
947 struct file_memory *fm;
949 int inhibit_close = 0;
951 /* Some magic in the finest tradition of Perl and its kin: if FILE
952 is "-", just use stdin. */
957 /* Note that we don't inhibit mmap() in this case. If stdin is
958 redirected from a regular file, mmap() will still work. */
961 fd = open (file, O_RDONLY);
964 fm = xmalloc (sizeof (struct file_memory));
969 if (fstat (fd, &buf) < 0)
971 fm->length = buf.st_size;
972 /* NOTE: As far as I know, the callers of this function never
973 modify the file text. Relying on this would enable us to
974 specify PROT_READ and MAP_SHARED for a marginal gain in
975 efficiency, but at some cost to generality. */
976 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
978 if (fm->content == (char *)MAP_FAILED)
988 /* The most common reason why mmap() fails is that FD does not point
989 to a plain file. However, it's also possible that mmap() doesn't
990 work for a particular type of file. Therefore, whenever mmap()
991 fails, we just fall back to the regular method. */
992 #endif /* HAVE_MMAP */
995 size = 512; /* number of bytes fm->contents can
996 hold at any given time. */
997 fm->content = xmalloc (size);
1001 if (fm->length > size / 2)
1003 /* #### I'm not sure whether the whole exponential-growth
1004 thing makes sense with kernel read. On Linux at least,
1005 read() refuses to read more than 4K from a file at a
1006 single chunk anyway. But other Unixes might optimize it
1007 better, and it doesn't *hurt* anything, so I'm leaving
1010 /* Normally, we grow SIZE exponentially to make the number
1011 of calls to read() and realloc() logarithmic in relation
1012 to file size. However, read() can read an amount of data
1013 smaller than requested, and it would be unreasonably to
1014 double SIZE every time *something* was read. Therefore,
1015 we double SIZE only when the length exceeds half of the
1016 entire allocated size. */
1018 fm->content = xrealloc (fm->content, size);
1020 nread = read (fd, fm->content + fm->length, size - fm->length);
1022 /* Successful read. */
1023 fm->length += nread;
1033 if (size > fm->length && fm->length != 0)
1034 /* Due to exponential growth of fm->content, the allocated region
1035 might be much larger than what is actually needed. */
1036 fm->content = xrealloc (fm->content, fm->length);
1043 xfree (fm->content);
1048 /* Release the resources held by FM. Specifically, this calls
1049 munmap() or xfree() on fm->content, depending whether mmap or
1050 malloc/read were used to read in the file. It also frees the
1051 memory needed to hold the FM structure itself. */
1054 read_file_free (struct file_memory *fm)
1059 munmap (fm->content, fm->length);
1064 xfree (fm->content);
1069 /* Free the pointers in a NULL-terminated vector of pointers, then
1070 free the pointer itself. */
1072 free_vec (char **vec)
1083 /* Append vector V2 to vector V1. The function frees V2 and
1084 reallocates V1 (thus you may not use the contents of neither
1085 pointer after the call). If V1 is NULL, V2 is returned. */
1087 merge_vecs (char **v1, char **v2)
1097 /* To avoid j == 0 */
1102 for (i = 0; v1[i]; i++);
1104 for (j = 0; v2[j]; j++);
1105 /* Reallocate v1. */
1106 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1107 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1112 /* A set of simple-minded routines to store strings in a linked list.
1113 This used to also be used for searching, but now we have hash
1116 /* It's a shame that these simple things like linked lists and hash
1117 tables (see hash.c) need to be implemented over and over again. It
1118 would be nice to be able to use the routines from glib -- see
1119 www.gtk.org for details. However, that would make Wget depend on
1120 glib, and I want to avoid dependencies to external libraries for
1121 reasons of convenience and portability (I suspect Wget is more
1122 portable than anything ever written for Gnome). */
1124 /* Append an element to the list. If the list has a huge number of
1125 elements, this can get slow because it has to find the list's
1126 ending. If you think you have to call slist_append in a loop,
1127 think about calling slist_prepend() followed by slist_nreverse(). */
1130 slist_append (slist *l, const char *s)
1132 slist *newel = (slist *)xmalloc (sizeof (slist));
1135 newel->string = xstrdup (s);
1140 /* Find the last element. */
1147 /* Prepend S to the list. Unlike slist_append(), this is O(1). */
1150 slist_prepend (slist *l, const char *s)
1152 slist *newel = (slist *)xmalloc (sizeof (slist));
1153 newel->string = xstrdup (s);
1158 /* Destructively reverse L. */
1161 slist_nreverse (slist *l)
1166 slist *next = l->next;
1174 /* Is there a specific entry in the list? */
1176 slist_contains (slist *l, const char *s)
1178 for (; l; l = l->next)
1179 if (!strcmp (l->string, s))
1184 /* Free the whole slist. */
1186 slist_free (slist *l)
1197 /* Sometimes it's useful to create "sets" of strings, i.e. special
1198 hash tables where you want to store strings as keys and merely
1199 query for their existence. Here is a set of utility routines that
1200 makes that transparent. */
1203 string_set_add (struct hash_table *ht, const char *s)
1205 /* First check whether the set element already exists. If it does,
1206 do nothing so that we don't have to free() the old element and
1207 then strdup() a new one. */
1208 if (hash_table_contains (ht, s))
1211 /* We use "1" as value. It provides us a useful and clear arbitrary
1212 value, and it consumes no memory -- the pointers to the same
1213 string "1" will be shared by all the key-value pairs in all `set'
1215 hash_table_put (ht, xstrdup (s), "1");
1218 /* Synonym for hash_table_contains... */
1221 string_set_contains (struct hash_table *ht, const char *s)
1223 return hash_table_contains (ht, s);
1227 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1234 string_set_free (struct hash_table *ht)
1236 hash_table_map (ht, string_set_free_mapper, NULL);
1237 hash_table_destroy (ht);
1241 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1248 /* Another utility function: call free() on all keys and values of HT. */
1251 free_keys_and_values (struct hash_table *ht)
1253 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1257 /* Engine for legible and legible_very_long; this function works on
1261 legible_1 (const char *repr)
1263 static char outbuf[128];
1268 /* Reset the pointers. */
1271 /* If the number is negative, shift the pointers. */
1277 /* How many digits before the first separator? */
1278 mod = strlen (inptr) % 3;
1280 for (i = 0; i < mod; i++)
1281 *outptr++ = inptr[i];
1282 /* Now insert the rest of them, putting separator before every
1284 for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1286 if (i % 3 == 0 && i1 != 0)
1288 *outptr++ = inptr[i1];
1290 /* Zero-terminate the string. */
1295 /* Legible -- return a static pointer to the legibly printed long. */
1300 /* Print the number into the buffer. */
1301 long_to_string (inbuf, l);
1302 return legible_1 (inbuf);
1305 /* Write a string representation of NUMBER into the provided buffer.
1306 We cannot use sprintf() because we cannot be sure whether the
1307 platform supports printing of what we chose for VERY_LONG_TYPE.
1309 Example: Gcc supports `long long' under many platforms, but on many
1310 of those the native libc knows nothing of it and therefore cannot
1313 How long BUFFER needs to be depends on the platform and the content
1314 of NUMBER. For 64-bit VERY_LONG_TYPE (the most common case), 24
1315 bytes are sufficient. Using more might be a good idea.
1317 This function does not go through the hoops that long_to_string
1318 goes to because it doesn't aspire to be fast. (It's called perhaps
1319 once in a Wget run.) */
1322 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1327 /* Print the number backwards... */
1330 buffer[i++] = '0' + number % 10;
1335 /* ...and reverse the order of the digits. */
1336 for (j = 0; j < i / 2; j++)
1339 buffer[j] = buffer[i - 1 - j];
1340 buffer[i - 1 - j] = c;
1345 /* The same as legible(), but works on VERY_LONG_TYPE. See sysdep.h. */
1347 legible_very_long (VERY_LONG_TYPE l)
1350 /* Print the number into the buffer. */
1351 very_long_to_string (inbuf, l);
1352 return legible_1 (inbuf);
1355 /* Count the digits in a (long) integer. */
1365 while ((a /= 10) != 0)
1370 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1371 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1373 #define DIGITS_1(figure) ONE_DIGIT (figure)
1374 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1375 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1376 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1377 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1378 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1379 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1380 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1381 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1382 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1384 /* DIGITS_<11-20> are only used on machines with 64-bit longs. */
1386 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1387 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1388 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1389 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1390 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1391 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1392 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1393 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1394 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1396 /* Print NUMBER to BUFFER in base 10. This is completely equivalent
1397 to `sprintf(buffer, "%ld", number)', only much faster.
1399 The speedup may make a difference in programs that frequently
1400 convert numbers to strings. Some implementations of sprintf,
1401 particularly the one in GNU libc, have been known to be extremely
1402 slow compared to this function.
1404 BUFFER should accept as many bytes as you expect the number to take
1405 up. On machines with 64-bit longs the maximum needed size is 24
1406 bytes. That includes the worst-case digits, the optional `-' sign,
1407 and the trailing \0. */
1410 long_to_string (char *buffer, long number)
1415 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1416 /* We are running in a strange or misconfigured environment. Let
1417 sprintf cope with it. */
1418 sprintf (buffer, "%ld", n);
1419 #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1427 if (n < 10) { DIGITS_1 (1); }
1428 else if (n < 100) { DIGITS_2 (10); }
1429 else if (n < 1000) { DIGITS_3 (100); }
1430 else if (n < 10000) { DIGITS_4 (1000); }
1431 else if (n < 100000) { DIGITS_5 (10000); }
1432 else if (n < 1000000) { DIGITS_6 (100000); }
1433 else if (n < 10000000) { DIGITS_7 (1000000); }
1434 else if (n < 100000000) { DIGITS_8 (10000000); }
1435 else if (n < 1000000000) { DIGITS_9 (100000000); }
1436 #if SIZEOF_LONG == 4
1437 /* ``if (1)'' serves only to preserve editor indentation. */
1438 else if (1) { DIGITS_10 (1000000000); }
1439 #else /* SIZEOF_LONG != 4 */
1440 else if (n < 10000000000L) { DIGITS_10 (1000000000L); }
1441 else if (n < 100000000000L) { DIGITS_11 (10000000000L); }
1442 else if (n < 1000000000000L) { DIGITS_12 (100000000000L); }
1443 else if (n < 10000000000000L) { DIGITS_13 (1000000000000L); }
1444 else if (n < 100000000000000L) { DIGITS_14 (10000000000000L); }
1445 else if (n < 1000000000000000L) { DIGITS_15 (100000000000000L); }
1446 else if (n < 10000000000000000L) { DIGITS_16 (1000000000000000L); }
1447 else if (n < 100000000000000000L) { DIGITS_17 (10000000000000000L); }
1448 else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1449 else { DIGITS_19 (1000000000000000000L); }
1450 #endif /* SIZEOF_LONG != 4 */
1453 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1457 #undef ONE_DIGIT_ADVANCE
1479 /* Support for timers. */
1481 #undef TIMER_WINDOWS
1482 #undef TIMER_GETTIMEOFDAY
1485 /* Depending on the OS and availability of gettimeofday(), one and
1486 only one of the above constants will be defined. Virtually all
1487 modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1488 use TIMER_WINDOWS. TIMER_TIME is a catch-all method for
1489 non-Windows systems without gettimeofday.
1491 #### Perhaps we should also support ftime(), which exists on old
1492 BSD 4.2-influenced systems? (It also existed under MS DOS Borland
1493 C, if memory serves me.) */
1496 # define TIMER_WINDOWS
1497 #else /* not WINDOWS */
1498 # ifdef HAVE_GETTIMEOFDAY
1499 # define TIMER_GETTIMEOFDAY
1503 #endif /* not WINDOWS */
1506 #ifdef TIMER_GETTIMEOFDAY
1515 #ifdef TIMER_WINDOWS
1516 ULARGE_INTEGER wintime;
1520 /* Allocate a timer. It is not legal to do anything with a freshly
1521 allocated timer, except call wtimer_reset() or wtimer_delete(). */
1524 wtimer_allocate (void)
1526 struct wget_timer *wt =
1527 (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1531 /* Allocate a new timer and reset it. Return the new timer. */
1536 struct wget_timer *wt = wtimer_allocate ();
1541 /* Free the resources associated with the timer. Its further use is
1545 wtimer_delete (struct wget_timer *wt)
1550 /* Reset timer WT. This establishes the starting point from which
1551 wtimer_elapsed() will return the number of elapsed
1552 milliseconds. It is allowed to reset a previously used timer. */
1555 wtimer_reset (struct wget_timer *wt)
1557 #ifdef TIMER_GETTIMEOFDAY
1559 gettimeofday (&t, NULL);
1560 wt->secs = t.tv_sec;
1561 wt->usecs = t.tv_usec;
1565 wt->secs = time (NULL);
1568 #ifdef TIMER_WINDOWS
1571 GetSystemTime (&st);
1572 SystemTimeToFileTime (&st, &ft);
1573 wt->wintime.HighPart = ft.dwHighDateTime;
1574 wt->wintime.LowPart = ft.dwLowDateTime;
1578 /* Return the number of milliseconds elapsed since the timer was last
1579 reset. It is allowed to call this function more than once to get
1580 increasingly higher elapsed values. */
1583 wtimer_elapsed (struct wget_timer *wt)
1585 #ifdef TIMER_GETTIMEOFDAY
1587 gettimeofday (&t, NULL);
1588 return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1592 time_t now = time (NULL);
1593 return 1000 * (now - wt->secs);
1600 GetSystemTime (&st);
1601 SystemTimeToFileTime (&st, &ft);
1602 uli.HighPart = ft.dwHighDateTime;
1603 uli.LowPart = ft.dwLowDateTime;
1604 return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1608 /* Return the assessed granularity of the timer implementation. This
1609 is important for certain code that tries to deal with "zero" time
1613 wtimer_granularity (void)
1615 #ifdef TIMER_GETTIMEOFDAY
1616 /* Granularity of gettimeofday is hugely architecture-dependent.
1617 However, it appears that on modern machines it is better than
1623 /* This is clear. */
1627 #ifdef TIMER_WINDOWS
1633 /* This should probably be at a better place, but it doesn't really
1634 fit into html-parse.c. */
1636 /* The function returns the pointer to the malloc-ed quoted version of
1637 string s. It will recognize and quote numeric and special graphic
1638 entities, as per RFC1866:
1646 No other entities are recognized or replaced. */
1648 html_quote_string (const char *s)
1654 /* Pass through the string, and count the new size. */
1655 for (i = 0; *s; s++, i++)
1658 i += 4; /* `amp;' */
1659 else if (*s == '<' || *s == '>')
1660 i += 3; /* `lt;' and `gt;' */
1661 else if (*s == '\"')
1662 i += 5; /* `quot;' */
1666 res = (char *)xmalloc (i + 1);
1668 for (p = res; *s; s++)
1681 *p++ = (*s == '<' ? 'l' : 'g');