1 /* Various functions of utilitarian nature.
2 Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
26 #else /* not HAVE_STRING_H */
28 #endif /* not HAVE_STRING_H */
30 #include <sys/types.h>
35 # include <sys/mman.h>
44 #ifdef HAVE_SYS_UTIME_H
45 # include <sys/utime.h>
49 # include <libc.h> /* for access() */
63 /* This section implements several wrappers around the basic
64 allocation routines. This is done for two reasons: first, so that
65 the callers of these functions need not consistently check for
66 errors. If there is not enough virtual memory for running Wget,
67 something is seriously wrong, and Wget exits with an appropriate
70 The second reason why these are useful is that, if DEBUG_MALLOC is
71 defined, they also provide a handy (if crude) malloc debugging
72 interface that checks memory leaks. */
74 /* Croak the fatal memory error and bail out with non-zero exit
77 memfatal (const char *what)
79 /* HACK: expose save_log_p from log.c, so we can turn it off in
80 order to prevent saving the log. Saving the log is dangerous
81 because logprintf() and logputs() can call malloc(), so this
82 could infloop. When logging is turned off, infloop can no longer
85 #### This is no longer really necessary because the new routines
86 in log.c cons only if the line exceeds eighty characters. But
87 this can come at the end of a line, so it's OK to be careful.
89 On a more serious note, it would be good to have a
90 log_forced_shutdown() routine that exposes this cleanly. */
91 extern int save_log_p;
94 logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);
98 /* These functions end with _real because they need to be
99 distinguished from the debugging functions, and from the macros.
102 If memory debugging is not turned on, wget.h defines these:
104 #define xmalloc xmalloc_real
105 #define xfree xfree_real
106 #define xrealloc xrealloc_real
107 #define xstrdup xstrdup_real
109 In case of memory debugging, the definitions are a bit more
110 complex, because we want to provide more information, *and* we want
111 to call the debugging code. (The former is the reason why xmalloc
112 and friends need to be macros in the first place.) Then it looks
115 #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)
116 #define xfree(a) xfree_debug (a, __FILE__, __LINE__)
117 #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)
118 #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)
120 Each of the *_debug function does its magic and calls the real one. */
123 xmalloc_real (size_t size)
125 void *ptr = malloc (size);
132 xfree_real (void *ptr)
138 xrealloc_real (void *ptr, size_t newsize)
142 /* Not all Un*xes have the feature of realloc() that calling it with
143 a NULL-pointer is the same as malloc(), but it is easy to
146 newptr = realloc (ptr, newsize);
148 newptr = malloc (newsize);
150 memfatal ("realloc");
155 xstrdup_real (const char *s)
161 copy = malloc (l + 1);
164 memcpy (copy, s, l + 1);
165 #else /* HAVE_STRDUP */
169 #endif /* HAVE_STRDUP */
176 /* Crude home-grown routines for debugging some malloc-related
179 * Counting the number of malloc and free invocations, and reporting
180 the "balance", i.e. how many times more malloc was called than it
181 was the case with free.
183 * Making malloc store its entry into a simple array and free remove
184 stuff from that array. At the end, print the pointers which have
185 not been freed, along with the source file and the line number.
186 This also has the side-effect of detecting freeing memory that
189 Note that this kind of memory leak checking strongly depends on
190 every malloc() being followed by a free(), even if the program is
191 about to finish. Wget is careful to free the data structure it
192 allocated in init.c. */
194 static int malloc_count, free_count;
200 } malloc_debug[100000];
202 /* Both register_ptr and unregister_ptr take O(n) operations to run,
203 which can be a real problem. It would be nice to use a hash table
204 for malloc_debug, but the functions in hash.c are not suitable
205 because they can call malloc() themselves. Maybe it would work if
206 the hash table were preallocated to a huge size, and if we set the
207 rehash threshold to 1.0. */
209 /* Register PTR in malloc_debug. Abort if this is not possible
210 (presumably due to the number of current allocations exceeding the
211 size of malloc_debug.) */
214 register_ptr (void *ptr, const char *file, int line)
217 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
218 if (malloc_debug[i].ptr == NULL)
220 malloc_debug[i].ptr = ptr;
221 malloc_debug[i].file = file;
222 malloc_debug[i].line = line;
228 /* Unregister PTR from malloc_debug. Abort if PTR is not present in
229 malloc_debug. (This catches calling free() with a bogus pointer.) */
232 unregister_ptr (void *ptr)
235 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
236 if (malloc_debug[i].ptr == ptr)
238 malloc_debug[i].ptr = NULL;
244 /* Print the malloc debug stats that can be gathered from the above
245 information. Currently this is the count of mallocs, frees, the
246 difference between the two, and the dump of the contents of
247 malloc_debug. The last part are the memory leaks. */
250 print_malloc_debug_stats (void)
253 printf ("\nMalloc: %d\nFree: %d\nBalance: %d\n\n",
254 malloc_count, free_count, malloc_count - free_count);
255 for (i = 0; i < ARRAY_SIZE (malloc_debug); i++)
256 if (malloc_debug[i].ptr != NULL)
257 printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,
258 malloc_debug[i].file, malloc_debug[i].line);
262 xmalloc_debug (size_t size, const char *source_file, int source_line)
264 void *ptr = xmalloc_real (size);
266 register_ptr (ptr, source_file, source_line);
271 xfree_debug (void *ptr, const char *source_file, int source_line)
273 assert (ptr != NULL);
275 unregister_ptr (ptr);
280 xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)
282 void *newptr = xrealloc_real (ptr, newsize);
286 register_ptr (newptr, source_file, source_line);
290 unregister_ptr (ptr);
291 register_ptr (newptr, source_file, source_line);
297 xstrdup_debug (const char *s, const char *source_file, int source_line)
299 char *copy = xstrdup_real (s);
301 register_ptr (copy, source_file, source_line);
305 #endif /* DEBUG_MALLOC */
307 /* Copy the string formed by two pointers (one on the beginning, other
308 on the char after the last char) to a new, malloc-ed location.
311 strdupdelim (const char *beg, const char *end)
313 char *res = (char *)xmalloc (end - beg + 1);
314 memcpy (res, beg, end - beg);
315 res[end - beg] = '\0';
319 /* Parse a string containing comma-separated elements, and return a
320 vector of char pointers with the elements. Spaces following the
321 commas are ignored. */
323 sepstring (const char *s)
337 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
338 res[i] = strdupdelim (p, s);
341 /* Skip the blanks following the ','. */
349 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
350 res[i] = strdupdelim (p, s);
355 /* Return pointer to a static char[] buffer in which zero-terminated
356 string-representation of TM (in form hh:mm:ss) is printed. It is
357 shamelessly non-reentrant, but it doesn't matter, really.
359 If TM is non-NULL, the time_t of the current time will be stored
362 time_str (time_t *tm)
372 ptm = localtime (&tim);
373 sprintf (tms, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
377 /* Returns an error message for ERRNUM. #### This requires more work.
378 This function, as well as the whole error system, is very
381 uerrmsg (uerr_t errnum)
386 return _("Unknown/unsupported protocol");
389 return _("Invalid port specification");
392 return _("Invalid host name");
396 /* $@#@#$ compiler. */
401 /* The Windows versions of the following two functions are defined in
404 /* A cuserid() immitation using getpwuid(), to avoid hassling with
405 utmp. Besides, not all systems have cuesrid(). Under Windows, it
406 is defined in mswindows.c.
408 If WHERE is non-NULL, the username will be stored there.
409 Otherwise, it will be returned as a static buffer (as returned by
410 getpwuid()). In the latter case, the buffer should be copied
411 before calling getpwuid() or pwd_cuserid() again. */
414 pwd_cuserid (char *where)
418 if (!(pwd = getpwuid (getuid ())) || !pwd->pw_name)
422 strcpy (where, pwd->pw_name);
430 fork_to_background (void)
433 /* Whether we arrange our own version of opt.lfilename here. */
438 opt.lfilename = unique_name (DEFAULT_LOGFILE);
450 /* parent, no error */
451 printf (_("Continuing in background.\n"));
453 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
456 /* child: keep running */
458 #endif /* not WINDOWS */
460 /* Canonicalize PATH, and return a new path. The new path differs from PATH
462 Multple `/'s are collapsed to a single `/'.
463 Leading `./'s and trailing `/.'s are removed.
464 Trailing `/'s are removed.
465 Non-leading `../'s and trailing `..'s are handled by removing
466 portions of the path.
468 E.g. "a/b/c/./../d/.." will yield "a/b". This function originates
472 Always use '/' as stub_char.
473 Don't check for local things using canon_stat.
474 Change the original string instead of strdup-ing.
475 React correctly when beginning with `./' and `../'. */
477 path_simplify (char *path)
479 register int i, start, ddot;
485 /*stub_char = (*path == '/') ? '/' : '.';*/
488 /* Addition: Remove all `./'-s preceding the string. If `../'-s
489 precede, put `/' in front and remove them too. */
494 if (path[i] == '.' && path[i + 1] == '/')
496 else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
505 strcpy (path, path + i - ddot);
507 /* Replace single `.' or `..' with `/'. */
508 if ((path[0] == '.' && path[1] == '\0')
509 || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
515 /* Walk along PATH looking for things to compact. */
522 while (path[i] && path[i] != '/')
527 /* If we didn't find any slashes, then there is nothing left to do. */
531 /* Handle multiple `/'s in a row. */
532 while (path[i] == '/')
535 if ((start + 1) != i)
537 strcpy (path + start + 1, path + i);
541 /* Check for trailing `/'. */
542 if (start && !path[i])
549 /* Check for `../', `./' or trailing `.' by itself. */
552 /* Handle trailing `.' by itself. */
557 if (path[i + 1] == '/')
559 strcpy (path + i, path + i + 1);
560 i = (start < 0) ? 0 : start;
564 /* Handle `../' or trailing `..' by itself. */
565 if (path[i + 1] == '.' &&
566 (path[i + 2] == '/' || !path[i + 2]))
568 while (--start > -1 && path[start] != '/');
569 strcpy (path + start + 1, path + i + 2);
570 i = (start < 0) ? 0 : start;
583 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
584 specified with TM. */
586 touch (const char *file, time_t tm)
588 #ifdef HAVE_STRUCT_UTIMBUF
589 struct utimbuf times;
590 times.actime = times.modtime = tm;
593 times[0] = times[1] = tm;
596 if (utime (file, ×) == -1)
597 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
600 /* Checks if FILE is a symbolic link, and removes it if it is. Does
601 nothing under MS-Windows. */
603 remove_link (const char *file)
608 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
610 DEBUGP (("Unlinking %s (symlink).\n", file));
613 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
614 file, strerror (errno));
619 /* Does FILENAME exist? This is quite a lousy implementation, since
620 it supplies no error codes -- only a yes-or-no answer. Thus it
621 will return that a file does not exist if, e.g., the directory is
622 unreadable. I don't mind it too much currently, though. The
623 proper way should, of course, be to have a third, error state,
624 other than true/false, but that would introduce uncalled-for
625 additional complexity to the callers. */
627 file_exists_p (const char *filename)
630 return access (filename, F_OK) >= 0;
633 return stat (filename, &buf) >= 0;
637 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
638 Returns 0 on error. */
640 file_non_directory_p (const char *path)
643 /* Use lstat() rather than stat() so that symbolic links pointing to
644 directories can be identified correctly. */
645 if (lstat (path, &buf) != 0)
647 return S_ISDIR (buf.st_mode) ? 0 : 1;
650 /* Return a unique filename, given a prefix and count */
652 unique_name_1 (const char *fileprefix, int count)
658 filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
659 sprintf (filename, "%s.%d", fileprefix, count);
662 filename = xstrdup (fileprefix);
664 if (!file_exists_p (filename))
673 /* Return a unique file name, based on PREFIX. */
675 unique_name (const char *prefix)
681 file = unique_name_1 (prefix, count++);
685 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
686 are missing, create them first. In case any mkdir() call fails,
687 return its error status. Returns 0 on successful completion.
689 The behaviour of this function should be identical to the behaviour
690 of `mkdir -p' on systems where mkdir supports the `-p' option. */
692 make_directory (const char *directory)
698 /* Make a copy of dir, to be able to write to it. Otherwise, the
699 function is unsafe if called with a read-only char *argument. */
700 STRDUP_ALLOCA (dir, directory);
702 /* If the first character of dir is '/', skip it (and thus enable
703 creation of absolute-pathname directories. */
704 for (i = (*dir == '/'); 1; ++i)
706 for (; dir[i] && dir[i] != '/'; i++)
711 /* Check whether the directory already exists. */
712 if (!file_exists_p (dir))
714 if (mkdir (dir, 0777) < 0)
725 static int in_acclist PARAMS ((const char *const *, const char *, int));
727 /* Determine whether a file is acceptable to be followed, according to
728 lists of patterns to accept/reject. */
730 acceptable (const char *s)
734 while (l && s[l] != '/')
741 return (in_acclist ((const char *const *)opt.accepts, s, 1)
742 && !in_acclist ((const char *const *)opt.rejects, s, 1));
744 return in_acclist ((const char *const *)opt.accepts, s, 1);
746 else if (opt.rejects)
747 return !in_acclist ((const char *const *)opt.rejects, s, 1);
751 /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
752 `/something', frontcmp() will return 1 only if S2 begins with
753 `/something'. Otherwise, 0 is returned. */
755 frontcmp (const char *s1, const char *s2)
757 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
761 /* Iterate through STRLIST, and return the first element that matches
762 S, through wildcards or front comparison (as appropriate). */
764 proclist (char **strlist, const char *s, enum accd flags)
768 for (x = strlist; *x; x++)
769 if (has_wildcards_p (*x))
771 if (fnmatch (*x, s, FNM_PATHNAME) == 0)
776 char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
783 /* Returns whether DIRECTORY is acceptable for download, wrt the
784 include/exclude lists.
786 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
787 and absolute paths may be freely intermixed. */
789 accdir (const char *directory, enum accd flags)
791 /* Remove starting '/'. */
792 if (flags & ALLABS && *directory == '/')
796 if (!proclist (opt.includes, directory, flags))
801 if (proclist (opt.excludes, directory, flags))
807 /* Match the end of STRING against PATTERN. For instance:
809 match_backwards ("abc", "bc") -> 1
810 match_backwards ("abc", "ab") -> 0
811 match_backwards ("abc", "abc") -> 1 */
813 match_backwards (const char *string, const char *pattern)
817 for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
818 if (string[i] != pattern[j])
820 /* If the pattern was exhausted, the match was succesful. */
827 /* Checks whether string S matches each element of ACCEPTS. A list
828 element are matched either with fnmatch() or match_backwards(),
829 according to whether the element contains wildcards or not.
831 If the BACKWARD is 0, don't do backward comparison -- just compare
834 in_acclist (const char *const *accepts, const char *s, int backward)
836 for (; *accepts; accepts++)
838 if (has_wildcards_p (*accepts))
840 /* fnmatch returns 0 if the pattern *does* match the
842 if (fnmatch (*accepts, s, 0) == 0)
849 if (match_backwards (s, *accepts))
854 if (!strcmp (s, *accepts))
862 /* Return the malloc-ed suffix of STR. For instance:
863 suffix ("foo.bar") -> "bar"
864 suffix ("foo.bar.baz") -> "baz"
865 suffix ("/foo/bar") -> NULL
866 suffix ("/foo.bar/baz") -> NULL */
868 suffix (const char *str)
872 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--);
874 return xstrdup (str + i);
879 /* Read a line from FP. The function reallocs the storage as needed
880 to accomodate for any length of the line. Reallocs are done
881 storage exponentially, doubling the storage after each overflow to
882 minimize the number of calls to realloc() and fgets(). The newline
883 character at the end of line is retained.
885 After end-of-file is encountered without anything being read, NULL
886 is returned. NULL is also returned on error. To distinguish
887 between these two cases, use the stdio function ferror(). */
890 read_whole_line (FILE *fp)
894 char *line = (char *)xmalloc (bufsize);
896 while (fgets (line + length, bufsize - length, fp))
898 length += strlen (line + length);
900 if (line[length - 1] == '\n')
902 /* fgets() guarantees to read the whole line, or to use up the
903 space we've given it. We can double the buffer
906 line = xrealloc (line, bufsize);
908 if (length == 0 || ferror (fp))
913 if (length + 1 < bufsize)
914 /* Relieve the memory from our exponential greediness. We say
915 `length + 1' because the terminating \0 is not included in
916 LENGTH. We don't need to zero-terminate the string ourselves,
917 though, because fgets() does that. */
918 line = xrealloc (line, length + 1);
922 /* Read FILE into memory. A pointer to `struct file_memory' are
923 returned; use struct element `content' to access file contents, and
924 the element `length' to know the file length. `content' is *not*
925 zero-terminated, and you should *not* read or write beyond the [0,
926 length) range of characters.
928 After you are done with the file contents, call read_file_free to
931 Depending on the operating system and the type of file that is
932 being read, read_file() either mmap's the file into memory, or
933 reads the file into the core using read().
935 If file is named "-", fileno(stdin) is used for reading instead.
936 If you want to read from a real file named "-", use "./-" instead. */
939 read_file (const char *file)
942 struct file_memory *fm;
944 int inhibit_close = 0;
946 /* Some magic in the finest tradition of Perl and its kin: if FILE
947 is "-", just use stdin. */
952 /* Note that we don't inhibit mmap() in this case. If stdin is
953 redirected from a regular file, mmap() will still work. */
956 fd = open (file, O_RDONLY);
959 fm = xmalloc (sizeof (struct file_memory));
964 if (fstat (fd, &buf) < 0)
966 fm->length = buf.st_size;
967 /* NOTE: As far as I know, the callers of this function never
968 modify the file text. Relying on this would enable us to
969 specify PROT_READ and MAP_SHARED for a marginal gain in
970 efficiency, but at some cost to generality. */
971 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
973 if (fm->content == MAP_FAILED)
983 /* The most common reason why mmap() fails is that FD does not point
984 to a plain file. However, it's also possible that mmap() doesn't
985 work for a particular type of file. Therefore, whenever mmap()
986 fails, we just fall back to the regular method. */
987 #endif /* HAVE_MMAP */
990 size = 512; /* number of bytes fm->contents can
991 hold at any given time. */
992 fm->content = xmalloc (size);
996 if (fm->length > size / 2)
998 /* #### I'm not sure whether the whole exponential-growth
999 thing makes sense with kernel read. On Linux at least,
1000 read() refuses to read more than 4K from a file at a
1001 single chunk anyway. But other Unixes might optimize it
1002 better, and it doesn't *hurt* anything, so I'm leaving
1005 /* Normally, we grow SIZE exponentially to make the number
1006 of calls to read() and realloc() logarithmic in relation
1007 to file size. However, read() can read an amount of data
1008 smaller than requested, and it would be unreasonably to
1009 double SIZE every time *something* was read. Therefore,
1010 we double SIZE only when the length exceeds half of the
1011 entire allocated size. */
1013 fm->content = xrealloc (fm->content, size);
1015 nread = read (fd, fm->content + fm->length, size - fm->length);
1017 /* Successful read. */
1018 fm->length += nread;
1028 if (size > fm->length && fm->length != 0)
1029 /* Due to exponential growth of fm->content, the allocated region
1030 might be much larger than what is actually needed. */
1031 fm->content = xrealloc (fm->content, fm->length);
1038 xfree (fm->content);
1043 /* Release the resources held by FM. Specifically, this calls
1044 munmap() or xfree() on fm->content, depending whether mmap or
1045 malloc/read were used to read in the file. It also frees the
1046 memory needed to hold the FM structure itself. */
1049 read_file_free (struct file_memory *fm)
1054 munmap (fm->content, fm->length);
1059 xfree (fm->content);
1064 /* Free the pointers in a NULL-terminated vector of pointers, then
1065 free the pointer itself. */
1067 free_vec (char **vec)
1078 /* Append vector V2 to vector V1. The function frees V2 and
1079 reallocates V1 (thus you may not use the contents of neither
1080 pointer after the call). If V1 is NULL, V2 is returned. */
1082 merge_vecs (char **v1, char **v2)
1092 /* To avoid j == 0 */
1097 for (i = 0; v1[i]; i++);
1099 for (j = 0; v2[j]; j++);
1100 /* Reallocate v1. */
1101 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1102 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1107 /* A set of simple-minded routines to store strings in a linked list.
1108 This used to also be used for searching, but now we have hash
1111 /* It's a shame that these simple things like linked lists and hash
1112 tables (see hash.c) need to be implemented over and over again. It
1113 would be nice to be able to use the routines from glib -- see
1114 www.gtk.org for details. However, that would make Wget depend on
1115 glib, and I want to avoid dependencies to external libraries for
1116 reasons of convenience and portability (I suspect Wget is more
1117 portable than anything ever written for Gnome). */
1119 /* Append an element to the list. If the list has a huge number of
1120 elements, this can get slow because it has to find the list's
1121 ending. If you think you have to call slist_append in a loop,
1122 think about calling slist_prepend() followed by slist_nreverse(). */
1125 slist_append (slist *l, const char *s)
1127 slist *newel = (slist *)xmalloc (sizeof (slist));
1130 newel->string = xstrdup (s);
1135 /* Find the last element. */
1142 /* Prepend S to the list. Unlike slist_append(), this is O(1). */
1145 slist_prepend (slist *l, const char *s)
1147 slist *newel = (slist *)xmalloc (sizeof (slist));
1148 newel->string = xstrdup (s);
1153 /* Destructively reverse L. */
1156 slist_nreverse (slist *l)
1161 slist *next = l->next;
1169 /* Is there a specific entry in the list? */
1171 slist_contains (slist *l, const char *s)
1173 for (; l; l = l->next)
1174 if (!strcmp (l->string, s))
1179 /* Free the whole slist. */
1181 slist_free (slist *l)
1192 /* Sometimes it's useful to create "sets" of strings, i.e. special
1193 hash tables where you want to store strings as keys and merely
1194 query for their existence. Here is a set of utility routines that
1195 makes that transparent. */
1198 string_set_add (struct hash_table *ht, const char *s)
1200 /* First check whether the set element already exists. If it does,
1201 do nothing so that we don't have to free() the old element and
1202 then strdup() a new one. */
1203 if (hash_table_exists (ht, s))
1206 /* We use "1" as value. It provides us a useful and clear arbitrary
1207 value, and it consumes no memory -- the pointers to the same
1208 string "1" will be shared by all the key-value pairs in all `set'
1210 hash_table_put (ht, xstrdup (s), "1");
1213 /* Synonym for hash_table_exists... */
1216 string_set_exists (struct hash_table *ht, const char *s)
1218 return hash_table_exists (ht, s);
1222 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1229 string_set_free (struct hash_table *ht)
1231 hash_table_map (ht, string_set_free_mapper, NULL);
1232 hash_table_destroy (ht);
1236 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1243 /* Another utility function: call free() on all keys and values of HT. */
1246 free_keys_and_values (struct hash_table *ht)
1248 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1252 /* Engine for legible and legible_long_long; this function works on
1256 legible_1 (const char *repr)
1258 static char outbuf[128];
1263 /* Reset the pointers. */
1266 /* If the number is negative, shift the pointers. */
1272 /* How many digits before the first separator? */
1273 mod = strlen (inptr) % 3;
1275 for (i = 0; i < mod; i++)
1276 *outptr++ = inptr[i];
1277 /* Now insert the rest of them, putting separator before every
1279 for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1281 if (i % 3 == 0 && i1 != 0)
1283 *outptr++ = inptr[i1];
1285 /* Zero-terminate the string. */
1290 /* Legible -- return a static pointer to the legibly printed long. */
1295 /* Print the number into the buffer. */
1296 long_to_string (inbuf, l);
1297 return legible_1 (inbuf);
1300 /* The same as legible(), but works on VERY_LONG_TYPE. See sysdep.h. */
1302 legible_very_long (VERY_LONG_TYPE l)
1305 /* Print the number into the buffer. */
1306 sprintf (inbuf, VERY_LONG_FORMAT, l);
1307 return legible_1 (inbuf);
1310 /* Count the digits in a (long) integer. */
1315 while ((a /= 10) != 0)
1320 /* Print NUMBER to BUFFER. This is equivalent to sprintf(buffer,
1321 "%ld", number), only much faster.
1323 BUFFER should accept 24 bytes. This should suffice for the longest
1324 numbers on 64-bit machines, including the `-' sign and the trailing
1327 long_to_string (char *buffer, long number)
1329 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1331 sprintf (buffer, "%ld", number);
1332 #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1342 #define FROB(figure) do { \
1343 if (force || number >= figure) \
1344 *p++ = number / figure + '0', number %= figure, force = 1; \
1346 #if SIZEOF_LONG == 8
1347 FROB (1000000000000000000L);
1348 FROB (100000000000000000L);
1349 FROB (10000000000000000L);
1350 FROB (1000000000000000L);
1351 FROB (100000000000000L);
1352 FROB (10000000000000L);
1353 FROB (1000000000000L);
1354 FROB (100000000000L);
1355 FROB (10000000000L);
1356 #endif /* SIZEOF_LONG == 8 */
1367 *p++ = number + '0';
1369 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1372 /* This should probably be at a better place, but it doesn't really
1373 fit into html-parse.c. */
1375 /* The function returns the pointer to the malloc-ed quoted version of
1376 string s. It will recognize and quote numeric and special graphic
1377 entities, as per RFC1866:
1385 No other entities are recognized or replaced. */
1387 html_quote_string (const char *s)
1393 /* Pass through the string, and count the new size. */
1394 for (i = 0; *s; s++, i++)
1397 i += 4; /* `amp;' */
1398 else if (*s == '<' || *s == '>')
1399 i += 3; /* `lt;' and `gt;' */
1400 else if (*s == '\"')
1401 i += 5; /* `quot;' */
1405 res = (char *)xmalloc (i + 1);
1407 for (p = res; *s; s++)
1420 *p++ = (*s == '<' ? 'l' : 'g');