1 /* Various functions of utilitarian nature.
2 Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
26 #else /* not HAVE_STRING_H */
28 #endif /* not HAVE_STRING_H */
29 #include <sys/types.h>
34 # include <sys/mman.h>
43 #ifdef HAVE_SYS_UTIME_H
44 # include <sys/utime.h>
48 # include <libc.h> /* for access() */
62 /* This section implements several wrappers around the basic
63 allocation routines. This is done for two reasons: first, so that
64 the callers of these functions need not consistently check for
65 errors. If there is not enough virtual memory for running Wget,
66 something is seriously wrong, and Wget exits with an appropriate
69 The second reason why these are useful is that, if DEBUG_MALLOC is
70 defined, they also provide a handy (if crude) malloc debugging
71 interface that checks memory leaks. */
73 /* Croak the fatal memory error and bail out with non-zero exit
76 memfatal (const char *what)
78 /* HACK: expose save_log_p from log.c, so we can turn it off in
79 order to prevent saving the log. Saving the log is dangerous
80 because logprintf() and logputs() can call malloc(), so this
81 could infloop. When logging is turned off, infloop can no longer
84 #### This is no longer really necessary because the new routines
85 in log.c cons only if the line exceeds eighty characters. But
86 this can come at the end of a line, so it's OK to be careful.
88 On a more serious note, it would be good to have a
89 log_forced_shutdown() routine that exposes this cleanly. */
90 extern int save_log_p;
93 logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
97 /* These functions end with _real because they need to be
98 distinguished from the debugging functions, and from the macros.
101 If memory debugging is not turned on, wget.h defines these:
103 #define xmalloc xmalloc_real
104 #define xrealloc xrealloc_real
105 #define xstrdup xstrdup_real
108 In case of memory debugging, the definitions are a bit more
109 complex, because we want to provide more information, *and* we want
110 to call the debugging code. (The former is the reason why xmalloc
111 and friends need to be macros in the first place.) Then it looks
114 #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
115 #define xfree(a) xfree_debug (a, __FILE__, __LINE__)
116 #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
117 #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
119 Each of the *_debug function does its magic and calls the real one. */
122 # define STATIC_IF_DEBUG static
124 # define STATIC_IF_DEBUG
127 STATIC_IF_DEBUG void *
128 xmalloc_real (size_t size)
130 void *ptr = malloc (size);
136 STATIC_IF_DEBUG void *
137 xrealloc_real (void *ptr, size_t newsize)
141 /* Not all Un*xes have the feature of realloc() that calling it with
142 a NULL-pointer is the same as malloc(), but it is easy to
145 newptr = realloc (ptr, newsize);
147 newptr = malloc (newsize);
149 memfatal ("realloc");
153 STATIC_IF_DEBUG char *
154 xstrdup_real (const char *s)
160 copy = malloc (l + 1);
163 memcpy (copy, s, l + 1);
164 #else /* HAVE_STRDUP */
168 #endif /* HAVE_STRDUP */
175 /* Crude home-grown routines for debugging some malloc-related
178 * Counting the number of malloc and free invocations, and reporting
179 the "balance", i.e. how many times more malloc was called than it
180 was the case with free.
182 * Making malloc store its entry into a simple array and free remove
183 stuff from that array. At the end, print the pointers which have
184 not been freed, along with the source file and the line number.
185 This also has the side-effect of detecting freeing memory that
188 Note that this kind of memory leak checking strongly depends on
189 every malloc() being followed by a free(), even if the program is
190 about to finish. Wget is careful to free the data structure it
191 allocated in init.c. */
193 static int malloc_count, free_count;
199 } malloc_debug[100000];
201 /* Both register_ptr and unregister_ptr take O(n) operations to run,
202 which can be a real problem. It would be nice to use a hash table
203 for malloc_debug, but the functions in hash.c are not suitable
204 because they can call malloc() themselves. Maybe it would work if
205 the hash table were preallocated to a huge size, and if we set the
206 rehash threshold to 1.0. */
208 /* Register PTR in malloc_debug. Abort if this is not possible
209 (presumably due to the number of current allocations exceeding the
210 size of malloc_debug.) */
213 register_ptr (void *ptr, const char *file, int line)
216 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
217 if (malloc_debug[i].ptr == NULL)
219 malloc_debug[i].ptr = ptr;
220 malloc_debug[i].file = file;
221 malloc_debug[i].line = line;
227 /* Unregister PTR from malloc_debug. Abort if PTR is not present in
228 malloc_debug. (This catches calling free() with a bogus pointer.) */
231 unregister_ptr (void *ptr)
234 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
235 if (malloc_debug[i].ptr == ptr)
237 malloc_debug[i].ptr = NULL;
243 /* Print the malloc debug stats that can be gathered from the above
244 information. Currently this is the count of mallocs, frees, the
245 difference between the two, and the dump of the contents of
246 malloc_debug. The last part are the memory leaks. */
249 print_malloc_debug_stats (void)
252 printf ("\nMalloc: %d\nFree: %d\nBalance: %d\n\n",
253 malloc_count, free_count, malloc_count - free_count);
254 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
255 if (malloc_debug[i].ptr != NULL)
256 printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
257 malloc_debug[i].file, malloc_debug[i].line);
261 xmalloc_debug (size_t size, const char *source_file, int source_line)
263 void *ptr = xmalloc_real (size);
265 register_ptr (ptr, source_file, source_line);
270 xfree_debug (void *ptr, const char *source_file, int source_line)
272 assert (ptr != NULL);
274 unregister_ptr (ptr);
279 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
281 void *newptr = xrealloc_real (ptr, newsize);
285 register_ptr (newptr, source_file, source_line);
287 else if (newptr != ptr)
289 unregister_ptr (ptr);
290 register_ptr (newptr, source_file, source_line);
296 xstrdup_debug (const char *s, const char *source_file, int source_line)
298 char *copy = xstrdup_real (s);
300 register_ptr (copy, source_file, source_line);
304 #endif /* DEBUG_MALLOC */
306 /* Copy the string formed by two pointers (one on the beginning, other
307 on the char after the last char) to a new, malloc-ed location.
310 strdupdelim (const char *beg, const char *end)
312 char *res = (char *)xmalloc (end - beg + 1);
313 memcpy (res, beg, end - beg);
314 res[end - beg] = '\0';
318 /* Parse a string containing comma-separated elements, and return a
319 vector of char pointers with the elements. Spaces following the
320 commas are ignored. */
322 sepstring (const char *s)
336 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
337 res[i] = strdupdelim (p, s);
340 /* Skip the blanks following the ','. */
348 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
349 res[i] = strdupdelim (p, s);
354 /* Return pointer to a static char[] buffer in which zero-terminated
355 string-representation of TM (in form hh:mm:ss) is printed.
357 If TM is non-NULL, the current time-in-seconds will be stored
360 (#### This is misleading: one would expect TM would be used instead
361 of the current time in that case. This design was probably
362 influenced by the design time(2), and should be changed at some
363 points. No callers use non-NULL TM anyway.) */
366 time_str (time_t *tm)
368 static char output[15];
370 time_t secs = time (tm);
374 /* In case of error, return the empty string. Maybe we should
375 just abort if this happens? */
379 ptm = localtime (&secs);
380 sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
384 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
387 datetime_str (time_t *tm)
389 static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
391 time_t secs = time (tm);
395 /* In case of error, return the empty string. Maybe we should
396 just abort if this happens? */
400 ptm = localtime (&secs);
401 sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
402 ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
403 ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
407 /* Returns an error message for ERRNUM. #### This requires more work.
408 This function, as well as the whole error system, is very
411 uerrmsg (uerr_t errnum)
416 return _("Unknown/unsupported protocol");
419 return _("Invalid port specification");
422 return _("Invalid host name");
426 /* $@#@#$ compiler. */
431 /* The Windows versions of the following two functions are defined in
434 /* A cuserid() immitation using getpwuid(), to avoid hassling with
435 utmp. Besides, not all systems have cuesrid(). Under Windows, it
436 is defined in mswindows.c.
438 If WHERE is non-NULL, the username will be stored there.
439 Otherwise, it will be returned as a static buffer (as returned by
440 getpwuid()). In the latter case, the buffer should be copied
441 before calling getpwuid() or pwd_cuserid() again. */
444 pwd_cuserid (char *where)
448 if (!(pwd = getpwuid (getuid ())) || !pwd->pw_name)
452 strcpy (where, pwd->pw_name);
460 fork_to_background (void)
463 /* Whether we arrange our own version of opt.lfilename here. */
468 opt.lfilename = unique_name (DEFAULT_LOGFILE);
480 /* parent, no error */
481 printf (_("Continuing in background.\n"));
483 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
486 /* child: keep running */
488 #endif /* not WINDOWS */
490 /* Canonicalize PATH, and return a new path. The new path differs from PATH
492 Multple `/'s are collapsed to a single `/'.
493 Leading `./'s and trailing `/.'s are removed.
494 Trailing `/'s are removed.
495 Non-leading `../'s and trailing `..'s are handled by removing
496 portions of the path.
498 E.g. "a/b/c/./../d/.." will yield "a/b". This function originates
502 Always use '/' as stub_char.
503 Don't check for local things using canon_stat.
504 Change the original string instead of strdup-ing.
505 React correctly when beginning with `./' and `../'. */
507 path_simplify (char *path)
509 register int i, start, ddot;
515 /*stub_char = (*path == '/') ? '/' : '.';*/
518 /* Addition: Remove all `./'-s preceding the string. If `../'-s
519 precede, put `/' in front and remove them too. */
524 if (path[i] == '.' && path[i + 1] == '/')
526 else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
535 strcpy (path, path + i - ddot);
537 /* Replace single `.' or `..' with `/'. */
538 if ((path[0] == '.' && path[1] == '\0')
539 || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
545 /* Walk along PATH looking for things to compact. */
552 while (path[i] && path[i] != '/')
557 /* If we didn't find any slashes, then there is nothing left to do. */
561 /* Handle multiple `/'s in a row. */
562 while (path[i] == '/')
565 if ((start + 1) != i)
567 strcpy (path + start + 1, path + i);
571 /* Check for trailing `/'. */
572 if (start && !path[i])
579 /* Check for `../', `./' or trailing `.' by itself. */
582 /* Handle trailing `.' by itself. */
587 if (path[i + 1] == '/')
589 strcpy (path + i, path + i + 1);
590 i = (start < 0) ? 0 : start;
594 /* Handle `../' or trailing `..' by itself. */
595 if (path[i + 1] == '.' &&
596 (path[i + 2] == '/' || !path[i + 2]))
598 while (--start > -1 && path[start] != '/');
599 strcpy (path + start + 1, path + i + 2);
600 i = (start < 0) ? 0 : start;
613 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
614 specified with TM. */
616 touch (const char *file, time_t tm)
618 #ifdef HAVE_STRUCT_UTIMBUF
619 struct utimbuf times;
620 times.actime = times.modtime = tm;
623 times[0] = times[1] = tm;
626 if (utime (file, ×) == -1)
627 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
630 /* Checks if FILE is a symbolic link, and removes it if it is. Does
631 nothing under MS-Windows. */
633 remove_link (const char *file)
638 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
640 DEBUGP (("Unlinking %s (symlink).\n", file));
643 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
644 file, strerror (errno));
649 /* Does FILENAME exist? This is quite a lousy implementation, since
650 it supplies no error codes -- only a yes-or-no answer. Thus it
651 will return that a file does not exist if, e.g., the directory is
652 unreadable. I don't mind it too much currently, though. The
653 proper way should, of course, be to have a third, error state,
654 other than true/false, but that would introduce uncalled-for
655 additional complexity to the callers. */
657 file_exists_p (const char *filename)
660 return access (filename, F_OK) >= 0;
663 return stat (filename, &buf) >= 0;
667 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
668 Returns 0 on error. */
670 file_non_directory_p (const char *path)
673 /* Use lstat() rather than stat() so that symbolic links pointing to
674 directories can be identified correctly. */
675 if (lstat (path, &buf) != 0)
677 return S_ISDIR (buf.st_mode) ? 0 : 1;
680 /* Return a unique filename, given a prefix and count */
682 unique_name_1 (const char *fileprefix, int count)
688 filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
689 sprintf (filename, "%s.%d", fileprefix, count);
692 filename = xstrdup (fileprefix);
694 if (!file_exists_p (filename))
703 /* Return a unique file name, based on PREFIX. */
705 unique_name (const char *prefix)
711 file = unique_name_1 (prefix, count++);
715 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
716 are missing, create them first. In case any mkdir() call fails,
717 return its error status. Returns 0 on successful completion.
719 The behaviour of this function should be identical to the behaviour
720 of `mkdir -p' on systems where mkdir supports the `-p' option. */
722 make_directory (const char *directory)
728 /* Make a copy of dir, to be able to write to it. Otherwise, the
729 function is unsafe if called with a read-only char *argument. */
730 STRDUP_ALLOCA (dir, directory);
732 /* If the first character of dir is '/', skip it (and thus enable
733 creation of absolute-pathname directories. */
734 for (i = (*dir == '/'); 1; ++i)
736 for (; dir[i] && dir[i] != '/'; i++)
741 /* Check whether the directory already exists. */
742 if (!file_exists_p (dir))
744 if (mkdir (dir, 0777) < 0)
755 static int in_acclist PARAMS ((const char *const *, const char *, int));
757 /* Determine whether a file is acceptable to be followed, according to
758 lists of patterns to accept/reject. */
760 acceptable (const char *s)
764 while (l && s[l] != '/')
771 return (in_acclist ((const char *const *)opt.accepts, s, 1)
772 && !in_acclist ((const char *const *)opt.rejects, s, 1));
774 return in_acclist ((const char *const *)opt.accepts, s, 1);
776 else if (opt.rejects)
777 return !in_acclist ((const char *const *)opt.rejects, s, 1);
781 /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
782 `/something', frontcmp() will return 1 only if S2 begins with
783 `/something'. Otherwise, 0 is returned. */
785 frontcmp (const char *s1, const char *s2)
787 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
791 /* Iterate through STRLIST, and return the first element that matches
792 S, through wildcards or front comparison (as appropriate). */
794 proclist (char **strlist, const char *s, enum accd flags)
798 for (x = strlist; *x; x++)
799 if (has_wildcards_p (*x))
801 if (fnmatch (*x, s, FNM_PATHNAME) == 0)
806 char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
813 /* Returns whether DIRECTORY is acceptable for download, wrt the
814 include/exclude lists.
816 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
817 and absolute paths may be freely intermixed. */
819 accdir (const char *directory, enum accd flags)
821 /* Remove starting '/'. */
822 if (flags & ALLABS && *directory == '/')
826 if (!proclist (opt.includes, directory, flags))
831 if (proclist (opt.excludes, directory, flags))
837 /* Match the end of STRING against PATTERN. For instance:
839 match_backwards ("abc", "bc") -> 1
840 match_backwards ("abc", "ab") -> 0
841 match_backwards ("abc", "abc") -> 1 */
843 match_backwards (const char *string, const char *pattern)
847 for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
848 if (string[i] != pattern[j])
850 /* If the pattern was exhausted, the match was succesful. */
857 /* Checks whether string S matches each element of ACCEPTS. A list
858 element are matched either with fnmatch() or match_backwards(),
859 according to whether the element contains wildcards or not.
861 If the BACKWARD is 0, don't do backward comparison -- just compare
864 in_acclist (const char *const *accepts, const char *s, int backward)
866 for (; *accepts; accepts++)
868 if (has_wildcards_p (*accepts))
870 /* fnmatch returns 0 if the pattern *does* match the
872 if (fnmatch (*accepts, s, 0) == 0)
879 if (match_backwards (s, *accepts))
884 if (!strcmp (s, *accepts))
892 /* Return the malloc-ed suffix of STR. For instance:
893 suffix ("foo.bar") -> "bar"
894 suffix ("foo.bar.baz") -> "baz"
895 suffix ("/foo/bar") -> NULL
896 suffix ("/foo.bar/baz") -> NULL */
898 suffix (const char *str)
902 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--);
904 return xstrdup (str + i);
909 /* Read a line from FP. The function reallocs the storage as needed
910 to accomodate for any length of the line. Reallocs are done
911 storage exponentially, doubling the storage after each overflow to
912 minimize the number of calls to realloc() and fgets(). The newline
913 character at the end of line is retained.
915 After end-of-file is encountered without anything being read, NULL
916 is returned. NULL is also returned on error. To distinguish
917 between these two cases, use the stdio function ferror(). */
920 read_whole_line (FILE *fp)
924 char *line = (char *)xmalloc (bufsize);
926 while (fgets (line + length, bufsize - length, fp))
928 length += strlen (line + length);
930 if (line[length - 1] == '\n')
932 /* fgets() guarantees to read the whole line, or to use up the
933 space we've given it. We can double the buffer
936 line = xrealloc (line, bufsize);
938 if (length == 0 || ferror (fp))
943 if (length + 1 < bufsize)
944 /* Relieve the memory from our exponential greediness. We say
945 `length + 1' because the terminating \0 is not included in
946 LENGTH. We don't need to zero-terminate the string ourselves,
947 though, because fgets() does that. */
948 line = xrealloc (line, length + 1);
952 /* Read FILE into memory. A pointer to `struct file_memory' are
953 returned; use struct element `content' to access file contents, and
954 the element `length' to know the file length. `content' is *not*
955 zero-terminated, and you should *not* read or write beyond the [0,
956 length) range of characters.
958 After you are done with the file contents, call read_file_free to
961 Depending on the operating system and the type of file that is
962 being read, read_file() either mmap's the file into memory, or
963 reads the file into the core using read().
965 If file is named "-", fileno(stdin) is used for reading instead.
966 If you want to read from a real file named "-", use "./-" instead. */
969 read_file (const char *file)
972 struct file_memory *fm;
974 int inhibit_close = 0;
976 /* Some magic in the finest tradition of Perl and its kin: if FILE
977 is "-", just use stdin. */
982 /* Note that we don't inhibit mmap() in this case. If stdin is
983 redirected from a regular file, mmap() will still work. */
986 fd = open (file, O_RDONLY);
989 fm = xmalloc (sizeof (struct file_memory));
994 if (fstat (fd, &buf) < 0)
996 fm->length = buf.st_size;
997 /* NOTE: As far as I know, the callers of this function never
998 modify the file text. Relying on this would enable us to
999 specify PROT_READ and MAP_SHARED for a marginal gain in
1000 efficiency, but at some cost to generality. */
1001 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1002 MAP_PRIVATE, fd, 0);
1003 if (fm->content == (char *)MAP_FAILED)
1013 /* The most common reason why mmap() fails is that FD does not point
1014 to a plain file. However, it's also possible that mmap() doesn't
1015 work for a particular type of file. Therefore, whenever mmap()
1016 fails, we just fall back to the regular method. */
1017 #endif /* HAVE_MMAP */
1020 size = 512; /* number of bytes fm->contents can
1021 hold at any given time. */
1022 fm->content = xmalloc (size);
1026 if (fm->length > size / 2)
1028 /* #### I'm not sure whether the whole exponential-growth
1029 thing makes sense with kernel read. On Linux at least,
1030 read() refuses to read more than 4K from a file at a
1031 single chunk anyway. But other Unixes might optimize it
1032 better, and it doesn't *hurt* anything, so I'm leaving
1035 /* Normally, we grow SIZE exponentially to make the number
1036 of calls to read() and realloc() logarithmic in relation
1037 to file size. However, read() can read an amount of data
1038 smaller than requested, and it would be unreasonably to
1039 double SIZE every time *something* was read. Therefore,
1040 we double SIZE only when the length exceeds half of the
1041 entire allocated size. */
1043 fm->content = xrealloc (fm->content, size);
1045 nread = read (fd, fm->content + fm->length, size - fm->length);
1047 /* Successful read. */
1048 fm->length += nread;
1058 if (size > fm->length && fm->length != 0)
1059 /* Due to exponential growth of fm->content, the allocated region
1060 might be much larger than what is actually needed. */
1061 fm->content = xrealloc (fm->content, fm->length);
1068 xfree (fm->content);
1073 /* Release the resources held by FM. Specifically, this calls
1074 munmap() or xfree() on fm->content, depending whether mmap or
1075 malloc/read were used to read in the file. It also frees the
1076 memory needed to hold the FM structure itself. */
1079 read_file_free (struct file_memory *fm)
1084 munmap (fm->content, fm->length);
1089 xfree (fm->content);
1094 /* Free the pointers in a NULL-terminated vector of pointers, then
1095 free the pointer itself. */
1097 free_vec (char **vec)
1108 /* Append vector V2 to vector V1. The function frees V2 and
1109 reallocates V1 (thus you may not use the contents of neither
1110 pointer after the call). If V1 is NULL, V2 is returned. */
1112 merge_vecs (char **v1, char **v2)
1122 /* To avoid j == 0 */
1127 for (i = 0; v1[i]; i++);
1129 for (j = 0; v2[j]; j++);
1130 /* Reallocate v1. */
1131 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1132 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1137 /* A set of simple-minded routines to store strings in a linked list.
1138 This used to also be used for searching, but now we have hash
1141 /* It's a shame that these simple things like linked lists and hash
1142 tables (see hash.c) need to be implemented over and over again. It
1143 would be nice to be able to use the routines from glib -- see
1144 www.gtk.org for details. However, that would make Wget depend on
1145 glib, and I want to avoid dependencies to external libraries for
1146 reasons of convenience and portability (I suspect Wget is more
1147 portable than anything ever written for Gnome). */
1149 /* Append an element to the list. If the list has a huge number of
1150 elements, this can get slow because it has to find the list's
1151 ending. If you think you have to call slist_append in a loop,
1152 think about calling slist_prepend() followed by slist_nreverse(). */
1155 slist_append (slist *l, const char *s)
1157 slist *newel = (slist *)xmalloc (sizeof (slist));
1160 newel->string = xstrdup (s);
1165 /* Find the last element. */
1172 /* Prepend S to the list. Unlike slist_append(), this is O(1). */
1175 slist_prepend (slist *l, const char *s)
1177 slist *newel = (slist *)xmalloc (sizeof (slist));
1178 newel->string = xstrdup (s);
1183 /* Destructively reverse L. */
1186 slist_nreverse (slist *l)
1191 slist *next = l->next;
1199 /* Is there a specific entry in the list? */
1201 slist_contains (slist *l, const char *s)
1203 for (; l; l = l->next)
1204 if (!strcmp (l->string, s))
1209 /* Free the whole slist. */
1211 slist_free (slist *l)
1222 /* Sometimes it's useful to create "sets" of strings, i.e. special
1223 hash tables where you want to store strings as keys and merely
1224 query for their existence. Here is a set of utility routines that
1225 makes that transparent. */
1228 string_set_add (struct hash_table *ht, const char *s)
1230 /* First check whether the set element already exists. If it does,
1231 do nothing so that we don't have to free() the old element and
1232 then strdup() a new one. */
1233 if (hash_table_exists (ht, s))
1236 /* We use "1" as value. It provides us a useful and clear arbitrary
1237 value, and it consumes no memory -- the pointers to the same
1238 string "1" will be shared by all the key-value pairs in all `set'
1240 hash_table_put (ht, xstrdup (s), "1");
1243 /* Synonym for hash_table_exists... */
1246 string_set_exists (struct hash_table *ht, const char *s)
1248 return hash_table_exists (ht, s);
1252 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1259 string_set_free (struct hash_table *ht)
1261 hash_table_map (ht, string_set_free_mapper, NULL);
1262 hash_table_destroy (ht);
1266 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1273 /* Another utility function: call free() on all keys and values of HT. */
1276 free_keys_and_values (struct hash_table *ht)
1278 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1282 /* Engine for legible and legible_very_long; this function works on
1286 legible_1 (const char *repr)
1288 static char outbuf[128];
1293 /* Reset the pointers. */
1296 /* If the number is negative, shift the pointers. */
1302 /* How many digits before the first separator? */
1303 mod = strlen (inptr) % 3;
1305 for (i = 0; i < mod; i++)
1306 *outptr++ = inptr[i];
1307 /* Now insert the rest of them, putting separator before every
1309 for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1311 if (i % 3 == 0 && i1 != 0)
1313 *outptr++ = inptr[i1];
1315 /* Zero-terminate the string. */
1320 /* Legible -- return a static pointer to the legibly printed long. */
1325 /* Print the number into the buffer. */
1326 long_to_string (inbuf, l);
1327 return legible_1 (inbuf);
1330 /* Write a string representation of NUMBER into the provided buffer.
1331 We cannot use sprintf() because we cannot be sure whether the
1332 platform supports printing of what we chose for VERY_LONG_TYPE.
1334 Example: Gcc supports `long long' under many platforms, but on many
1335 of those the native libc knows nothing of it and therefore cannot
1338 How long BUFFER needs to be depends on the platform and the content
1339 of NUMBER. For 64-bit VERY_LONG_TYPE (the most common case), 24
1340 bytes are sufficient. Using more might be a good idea.
1342 This function does not go through the hoops that long_to_string
1343 goes to because it doesn't need to be fast. (It's called perhaps
1344 once in a Wget run.) */
1347 very_long_to_string (char *buffer, VERY_LONG_TYPE number)
1352 /* Print the number backwards... */
1355 buffer[i++] = '0' + number % 10;
1360 /* ...and reverse the order of the digits. */
1361 for (j = 0; j < i / 2; j++)
1364 buffer[j] = buffer[i - 1 - j];
1365 buffer[i - 1 - j] = c;
1370 /* The same as legible(), but works on VERY_LONG_TYPE. See sysdep.h. */
1372 legible_very_long (VERY_LONG_TYPE l)
1375 /* Print the number into the buffer. */
1376 very_long_to_string (inbuf, l);
1377 return legible_1 (inbuf);
1380 /* Count the digits in a (long) integer. */
1387 while ((a /= 10) != 0)
1392 #define ONE_DIGIT(figure) *p++ = n / (figure) + '0'
1393 #define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))
1395 #define DIGITS_1(figure) ONE_DIGIT (figure)
1396 #define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)
1397 #define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)
1398 #define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)
1399 #define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)
1400 #define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)
1401 #define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)
1402 #define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)
1403 #define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)
1404 #define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)
1406 /* DIGITS_<11-20> are only used on 64-bit machines. */
1408 #define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)
1409 #define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)
1410 #define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)
1411 #define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)
1412 #define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)
1413 #define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)
1414 #define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)
1415 #define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)
1416 #define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)
1418 /* Print NUMBER to BUFFER in base 10. This is completely equivalent
1419 to `sprintf(buffer, "%ld", number)', only much faster.
1421 The speedup may make a difference in programs that frequently
1422 convert numbers to strings. Some implementations of sprintf,
1423 particularly the one in GNU libc, have been known to be extremely
1424 slow compared to this function.
1426 BUFFER should accept as many bytes as you expect the number to take
1427 up. On 64-bit machines, the maximum needed size is 24 bytes. That
1428 includes all the digits, as well as the `-' sign for negative
1429 numbers and the trailing \0. */
1432 long_to_string (char *buffer, long number)
1437 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1438 /* We are running in a strange or misconfigured environment. Let
1439 sprintf cope with it. */
1440 sprintf (buffer, "%ld", n);
1441 #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1449 if (n < 10) { DIGITS_1 (1); }
1450 else if (n < 100) { DIGITS_2 (10); }
1451 else if (n < 1000) { DIGITS_3 (100); }
1452 else if (n < 10000) { DIGITS_4 (1000); }
1453 else if (n < 100000) { DIGITS_5 (10000); }
1454 else if (n < 1000000) { DIGITS_6 (100000); }
1455 else if (n < 10000000) { DIGITS_7 (1000000); }
1456 else if (n < 100000000) { DIGITS_8 (10000000); }
1457 else if (n < 1000000000) { DIGITS_9 (100000000); }
1458 #if SIZEOF_LONG == 4
1459 /* ``if (1)'' serves only to preserve editor indentation. */
1460 else if (1) { DIGITS_10 (1000000000); }
1461 #else /* SIZEOF_LONG != 4 */
1462 else if (n < 10000000000L) { DIGITS_10 (1000000000L); }
1463 else if (n < 100000000000L) { DIGITS_11 (10000000000L); }
1464 else if (n < 1000000000000L) { DIGITS_12 (100000000000L); }
1465 else if (n < 10000000000000L) { DIGITS_13 (1000000000000L); }
1466 else if (n < 100000000000000L) { DIGITS_14 (10000000000000L); }
1467 else if (n < 1000000000000000L) { DIGITS_15 (100000000000000L); }
1468 else if (n < 10000000000000000L) { DIGITS_16 (1000000000000000L); }
1469 else if (n < 100000000000000000L) { DIGITS_17 (10000000000000000L); }
1470 else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }
1471 else { DIGITS_19 (1000000000000000000L); }
1472 #endif /* SIZEOF_LONG != 4 */
1475 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1479 #undef ONE_DIGIT_ADVANCE
1501 /* Support for timers. */
1503 #undef TIMER_WINDOWS
1504 #undef TIMER_GETTIMEOFDAY
1507 /* Depending on the OS and availability of gettimeofday(), one and
1508 only one of the above constants will be defined. Virtually all
1509 modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will
1510 use TIMER_WINDOWS. TIMER_TIME is a catch-all method for
1511 non-Windows systems without gettimeofday.
1513 #### Perhaps we should also support ftime(), which exists on old
1514 BSD 4.2-influenced systems? (It also existed under MS DOS Borland
1515 C, if memory serves me.) */
1518 # define TIMER_WINDOWS
1519 #else /* not WINDOWS */
1520 # ifdef HAVE_GETTIMEOFDAY
1521 # define TIMER_GETTIMEOFDAY
1525 #endif /* not WINDOWS */
1528 #ifdef TIMER_GETTIMEOFDAY
1537 #ifdef TIMER_WINDOWS
1538 ULARGE_INTEGER wintime;
1542 /* Allocate a timer. It is not legal to do anything with a freshly
1543 allocated timer, except call wtimer_reset(). */
1546 wtimer_allocate (void)
1548 struct wget_timer *wt =
1549 (struct wget_timer *)xmalloc (sizeof (struct wget_timer));
1553 /* Allocate a new timer and reset it. Return the new timer. */
1558 struct wget_timer *wt = wtimer_allocate ();
1563 /* Free the resources associated with the timer. Its further use is
1567 wtimer_delete (struct wget_timer *wt)
1572 /* Reset timer WT. This establishes the starting point from which
1573 wtimer_elapsed() will return the number of elapsed
1574 milliseconds. It is allowed to reset a previously used timer. */
1577 wtimer_reset (struct wget_timer *wt)
1579 #ifdef TIMER_GETTIMEOFDAY
1581 gettimeofday (&t, NULL);
1582 wt->secs = t.tv_sec;
1583 wt->usecs = t.tv_usec;
1587 wt->secs = time (NULL);
1590 #ifdef TIMER_WINDOWS
1593 GetSystemTime (&st);
1594 SystemTimeToFileTime (&st, &ft);
1595 wt->wintime.HighPart = ft.dwHighDateTime;
1596 wt->wintime.LowPart = ft.dwLowDateTime;
1600 /* Return the number of milliseconds elapsed since the timer was last
1601 reset. It is allowed to call this function more than once to get
1602 increasingly higher elapsed values. */
1605 wtimer_elapsed (struct wget_timer *wt)
1607 #ifdef TIMER_GETTIMEOFDAY
1609 gettimeofday (&t, NULL);
1610 return (t.tv_sec - wt->secs) * 1000 + (t.tv_usec - wt->usecs) / 1000;
1614 time_t now = time (NULL);
1615 return 1000 * (now - wt->secs);
1622 GetSystemTime (&st);
1623 SystemTimeToFileTime (&st, &ft);
1624 uli.HighPart = ft.dwHighDateTime;
1625 uli.LowPart = ft.dwLowDateTime;
1626 return (long)((uli.QuadPart - wt->wintime.QuadPart) / 10000);
1630 /* Return the assessed granularity of the timer implementation. This
1631 is important for certain code that tries to deal with "zero" time
1635 wtimer_granularity (void)
1637 #ifdef TIMER_GETTIMEOFDAY
1638 /* Granularity of gettimeofday is hugely architecture-dependent.
1639 However, it appears that on modern machines it is better than
1645 /* This is clear. */
1649 #ifdef TIMER_WINDOWS
1655 /* This should probably be at a better place, but it doesn't really
1656 fit into html-parse.c. */
1658 /* The function returns the pointer to the malloc-ed quoted version of
1659 string s. It will recognize and quote numeric and special graphic
1660 entities, as per RFC1866:
1668 No other entities are recognized or replaced. */
1670 html_quote_string (const char *s)
1676 /* Pass through the string, and count the new size. */
1677 for (i = 0; *s; s++, i++)
1680 i += 4; /* `amp;' */
1681 else if (*s == '<' || *s == '>')
1682 i += 3; /* `lt;' and `gt;' */
1683 else if (*s == '\"')
1684 i += 5; /* `quot;' */
1688 res = (char *)xmalloc (i + 1);
1690 for (p = res; *s; s++)
1703 *p++ = (*s == '<' ? 'l' : 'g');