1 /* Various functions of utilitarian nature.
2 Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
26 #else /* not HAVE_STRING_H */
28 #endif /* not HAVE_STRING_H */
30 #include <sys/types.h>
35 # include <sys/mman.h>
44 #ifdef HAVE_SYS_UTIME_H
45 # include <sys/utime.h>
49 # include <libc.h> /* for access() */
64 /* Croak the fatal memory error and bail out with non-zero exit
67 memfatal (const char *s)
69 /* HACK: expose save_log_p from log.c, so we can turn it off in
70 order to prevent saving the log. Saving the log is dangerous
71 because logprintf() and logputs() can call malloc(), so this
72 could infloop. When logging is turned off, infloop can no longer
74 extern int save_log_p;
77 logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, s);
81 /* xmalloc, xrealloc and xstrdup exit the program if there is not
82 enough memory. xstrdup also implements strdup on systems that do
83 not have it. xfree is provided to make leak-tracking easier.
84 Currently it's a no-op. */
104 xrealloc (void *obj, size_t size)
108 /* Not all Un*xes have the feature of realloc() that calling it with
109 a NULL-pointer is the same as malloc(), but it is easy to
112 res = realloc (obj, size);
116 memfatal ("realloc");
121 xstrdup (const char *s)
125 char *s1 = malloc (l + 1);
128 memcpy (s1, s, l + 1);
130 #else /* HAVE_STRDUP */
131 char *s1 = strdup (s);
135 #endif /* HAVE_STRDUP */
138 /* Copy the string formed by two pointers (one on the beginning, other
139 on the char after the last char) to a new, malloc-ed location.
142 strdupdelim (const char *beg, const char *end)
144 char *res = (char *)xmalloc (end - beg + 1);
145 memcpy (res, beg, end - beg);
146 res[end - beg] = '\0';
150 /* Parse a string containing comma-separated elements, and return a
151 vector of char pointers with the elements. Spaces following the
152 commas are ignored. */
154 sepstring (const char *s)
168 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
169 res[i] = strdupdelim (p, s);
172 /* Skip the blanks following the ','. */
180 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
181 res[i] = strdupdelim (p, s);
186 /* Return pointer to a static char[] buffer in which zero-terminated
187 string-representation of TM (in form hh:mm:ss) is printed. It is
188 shamelessly non-reentrant, but it doesn't matter, really.
190 If TM is non-NULL, the time_t of the current time will be stored
193 time_str (time_t *tm)
203 ptm = localtime (&tim);
204 sprintf (tms, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
208 /* Returns an error message for ERRNUM. #### This requires more work.
209 This function, as well as the whole error system, is very
212 uerrmsg (uerr_t errnum)
217 return _("Unknown/unsupported protocol");
220 return _("Invalid port specification");
223 return _("Invalid host name");
227 /* $@#@#$ compiler. */
232 /* The Windows versions of the following two functions are defined in
235 /* A cuserid() immitation using getpwuid(), to avoid hassling with
236 utmp. Besides, not all systems have cuesrid(). Under Windows, it
237 is defined in mswindows.c.
239 If WHERE is non-NULL, the username will be stored there.
240 Otherwise, it will be returned as a static buffer (as returned by
241 getpwuid()). In the latter case, the buffer should be copied
242 before calling getpwuid() or pwd_cuserid() again. */
245 pwd_cuserid (char *where)
249 if (!(pwd = getpwuid (getuid ())) || !pwd->pw_name)
253 strcpy (where, pwd->pw_name);
261 fork_to_background (void)
264 /* Whether we arrange our own version of opt.lfilename here. */
269 opt.lfilename = unique_name (DEFAULT_LOGFILE);
281 /* parent, no error */
282 printf (_("Continuing in background.\n"));
284 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
287 /* child: keep running */
289 #endif /* not WINDOWS */
291 /* Canonicalize PATH, and return a new path. The new path differs from PATH
293 Multple `/'s are collapsed to a single `/'.
294 Leading `./'s and trailing `/.'s are removed.
295 Trailing `/'s are removed.
296 Non-leading `../'s and trailing `..'s are handled by removing
297 portions of the path.
299 E.g. "a/b/c/./../d/.." will yield "a/b". This function originates
303 Always use '/' as stub_char.
304 Don't check for local things using canon_stat.
305 Change the original string instead of strdup-ing.
306 React correctly when beginning with `./' and `../'. */
308 path_simplify (char *path)
310 register int i, start, ddot;
316 /*stub_char = (*path == '/') ? '/' : '.';*/
319 /* Addition: Remove all `./'-s preceding the string. If `../'-s
320 precede, put `/' in front and remove them too. */
325 if (path[i] == '.' && path[i + 1] == '/')
327 else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
336 strcpy (path, path + i - ddot);
338 /* Replace single `.' or `..' with `/'. */
339 if ((path[0] == '.' && path[1] == '\0')
340 || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
346 /* Walk along PATH looking for things to compact. */
353 while (path[i] && path[i] != '/')
358 /* If we didn't find any slashes, then there is nothing left to do. */
362 /* Handle multiple `/'s in a row. */
363 while (path[i] == '/')
366 if ((start + 1) != i)
368 strcpy (path + start + 1, path + i);
372 /* Check for trailing `/'. */
373 if (start && !path[i])
380 /* Check for `../', `./' or trailing `.' by itself. */
383 /* Handle trailing `.' by itself. */
388 if (path[i + 1] == '/')
390 strcpy (path + i, path + i + 1);
391 i = (start < 0) ? 0 : start;
395 /* Handle `../' or trailing `..' by itself. */
396 if (path[i + 1] == '.' &&
397 (path[i + 2] == '/' || !path[i + 2]))
399 while (--start > -1 && path[start] != '/');
400 strcpy (path + start + 1, path + i + 2);
401 i = (start < 0) ? 0 : start;
414 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
415 specified with TM. */
417 touch (const char *file, time_t tm)
419 #ifdef HAVE_STRUCT_UTIMBUF
420 struct utimbuf times;
421 times.actime = times.modtime = tm;
424 times[0] = times[1] = tm;
427 if (utime (file, ×) == -1)
428 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
431 /* Checks if FILE is a symbolic link, and removes it if it is. Does
432 nothing under MS-Windows. */
434 remove_link (const char *file)
439 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
441 DEBUGP (("Unlinking %s (symlink).\n", file));
444 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
445 file, strerror (errno));
450 /* Does FILENAME exist? This is quite a lousy implementation, since
451 it supplies no error codes -- only a yes-or-no answer. Thus it
452 will return that a file does not exist if, e.g., the directory is
453 unreadable. I don't mind it too much currently, though. The
454 proper way should, of course, be to have a third, error state,
455 other than true/false, but that would introduce uncalled-for
456 additional complexity to the callers. */
458 file_exists_p (const char *filename)
461 return access (filename, F_OK) >= 0;
464 return stat (filename, &buf) >= 0;
468 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
469 Returns 0 on error. */
471 file_non_directory_p (const char *path)
474 /* Use lstat() rather than stat() so that symbolic links pointing to
475 directories can be identified correctly. */
476 if (lstat (path, &buf) != 0)
478 return S_ISDIR (buf.st_mode) ? 0 : 1;
481 /* Return a unique filename, given a prefix and count */
483 unique_name_1 (const char *fileprefix, int count)
489 filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
490 sprintf (filename, "%s.%d", fileprefix, count);
493 filename = xstrdup (fileprefix);
495 if (!file_exists_p (filename))
504 /* Return a unique file name, based on PREFIX. */
506 unique_name (const char *prefix)
512 file = unique_name_1 (prefix, count++);
516 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
517 are missing, create them first. In case any mkdir() call fails,
518 return its error status. Returns 0 on successful completion.
520 The behaviour of this function should be identical to the behaviour
521 of `mkdir -p' on systems where mkdir supports the `-p' option. */
523 make_directory (const char *directory)
529 /* Make a copy of dir, to be able to write to it. Otherwise, the
530 function is unsafe if called with a read-only char *argument. */
531 STRDUP_ALLOCA (dir, directory);
533 /* If the first character of dir is '/', skip it (and thus enable
534 creation of absolute-pathname directories. */
535 for (i = (*dir == '/'); 1; ++i)
537 for (; dir[i] && dir[i] != '/'; i++)
542 /* Check whether the directory already exists. */
543 if (!file_exists_p (dir))
545 if (mkdir (dir, 0777) < 0)
556 static int in_acclist PARAMS ((const char *const *, const char *, int));
558 /* Determine whether a file is acceptable to be followed, according to
559 lists of patterns to accept/reject. */
561 acceptable (const char *s)
565 while (l && s[l] != '/')
572 return (in_acclist ((const char *const *)opt.accepts, s, 1)
573 && !in_acclist ((const char *const *)opt.rejects, s, 1));
575 return in_acclist ((const char *const *)opt.accepts, s, 1);
577 else if (opt.rejects)
578 return !in_acclist ((const char *const *)opt.rejects, s, 1);
582 /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
583 `/something', frontcmp() will return 1 only if S2 begins with
584 `/something'. Otherwise, 0 is returned. */
586 frontcmp (const char *s1, const char *s2)
588 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
592 /* Iterate through STRLIST, and return the first element that matches
593 S, through wildcards or front comparison (as appropriate). */
595 proclist (char **strlist, const char *s, enum accd flags)
599 for (x = strlist; *x; x++)
600 if (has_wildcards_p (*x))
602 if (fnmatch (*x, s, FNM_PATHNAME) == 0)
607 char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
614 /* Returns whether DIRECTORY is acceptable for download, wrt the
615 include/exclude lists.
617 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
618 and absolute paths may be freely intermixed. */
620 accdir (const char *directory, enum accd flags)
622 /* Remove starting '/'. */
623 if (flags & ALLABS && *directory == '/')
627 if (!proclist (opt.includes, directory, flags))
632 if (proclist (opt.excludes, directory, flags))
638 /* Match the end of STRING against PATTERN. For instance:
640 match_backwards ("abc", "bc") -> 1
641 match_backwards ("abc", "ab") -> 0
642 match_backwards ("abc", "abc") -> 1 */
644 match_backwards (const char *string, const char *pattern)
648 for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
649 if (string[i] != pattern[j])
651 /* If the pattern was exhausted, the match was succesful. */
658 /* Checks whether string S matches each element of ACCEPTS. A list
659 element are matched either with fnmatch() or match_backwards(),
660 according to whether the element contains wildcards or not.
662 If the BACKWARD is 0, don't do backward comparison -- just compare
665 in_acclist (const char *const *accepts, const char *s, int backward)
667 for (; *accepts; accepts++)
669 if (has_wildcards_p (*accepts))
671 /* fnmatch returns 0 if the pattern *does* match the
673 if (fnmatch (*accepts, s, 0) == 0)
680 if (match_backwards (s, *accepts))
685 if (!strcmp (s, *accepts))
693 /* Return the malloc-ed suffix of STR. For instance:
694 suffix ("foo.bar") -> "bar"
695 suffix ("foo.bar.baz") -> "baz"
696 suffix ("/foo/bar") -> NULL
697 suffix ("/foo.bar/baz") -> NULL */
699 suffix (const char *str)
703 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--);
705 return xstrdup (str + i);
710 /* Read a line from FP. The function reallocs the storage as needed
711 to accomodate for any length of the line. Reallocs are done
712 storage exponentially, doubling the storage after each overflow to
713 minimize the number of calls to realloc() and fgets(). The newline
714 character at the end of line is retained.
716 After end-of-file is encountered without anything being read, NULL
717 is returned. NULL is also returned on error. To distinguish
718 between these two cases, use the stdio function ferror(). */
721 read_whole_line (FILE *fp)
725 char *line = (char *)xmalloc (bufsize);
727 while (fgets (line + length, bufsize - length, fp))
729 length += strlen (line + length);
731 if (line[length - 1] == '\n')
733 /* fgets() guarantees to read the whole line, or to use up the
734 space we've given it. We can double the buffer
737 line = xrealloc (line, bufsize);
739 if (length == 0 || ferror (fp))
744 if (length + 1 < bufsize)
745 /* Relieve the memory from our exponential greediness. We say
746 `length + 1' because the terminating \0 is not included in
747 LENGTH. We don't need to zero-terminate the string ourselves,
748 though, because fgets() does that. */
749 line = xrealloc (line, length + 1);
753 /* Read FILE into memory. A pointer to `struct file_memory' are
754 returned; use struct element `content' to access file contents, and
755 the element `length' to know the file length. `content' is *not*
756 zero-terminated, and you should *not* read or write beyond the [0,
757 length) range of characters.
759 After you are done with the file contents, call read_file_free to
762 Depending on the operating system and the type of file that is
763 being read, read_file() either mmap's the file into memory, or
764 reads the file into the core using read().
766 If file is named "-", fileno(stdin) is used for reading instead.
767 If you want to read from a real file named "-", use "./-" instead. */
770 read_file (const char *file)
773 struct file_memory *fm;
775 int inhibit_close = 0;
777 /* Some magic in the finest tradition of Perl and its kin: if FILE
778 is "-", just use stdin. */
783 /* Note that we don't inhibit mmap() in this case. If stdin is
784 redirected from a regular file, mmap() will still work. */
787 fd = open (file, O_RDONLY);
790 fm = xmalloc (sizeof (struct file_memory));
795 if (fstat (fd, &buf) < 0)
797 fm->length = buf.st_size;
798 /* NOTE: As far as I know, the callers of this function never
799 modify the file text. Relying on this would enable us to
800 specify PROT_READ and MAP_SHARED for a marginal gain in
801 efficiency, but at some cost to generality. */
802 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
804 if (fm->content == MAP_FAILED)
814 /* The most common reason why mmap() fails is that FD does not point
815 to a plain file. However, it's also possible that mmap() doesn't
816 work for a particular type of file. Therefore, whenever mmap()
817 fails, we just fall back to the regular method. */
818 #endif /* HAVE_MMAP */
821 size = 512; /* number of bytes fm->contents can
822 hold at any given time. */
823 fm->content = xmalloc (size);
827 if (fm->length > size / 2)
829 /* #### I'm not sure whether the whole exponential-growth
830 thing makes sense with kernel read. On Linux at least,
831 read() refuses to read more than 4K from a file at a
832 single chunk anyway. But other Unixes might optimize it
833 better, and it doesn't *hurt* anything, so I'm leaving
836 /* Normally, we grow SIZE exponentially to make the number
837 of calls to read() and realloc() logarithmic in relation
838 to file size. However, read() can read an amount of data
839 smaller than requested, and it would be unreasonably to
840 double SIZE every time *something* was read. Therefore,
841 we double SIZE only when the length exceeds half of the
842 entire allocated size. */
844 fm->content = xrealloc (fm->content, size);
846 nread = read (fd, fm->content + fm->length, size - fm->length);
848 /* Successful read. */
859 if (size > fm->length && fm->length != 0)
860 /* Due to exponential growth of fm->content, the allocated region
861 might be much larger than what is actually needed. */
862 fm->content = xrealloc (fm->content, fm->length);
874 /* Release the resources held by FM. Specifically, this calls
875 munmap() or xfree() on fm->content, depending whether mmap or
876 malloc/read were used to read in the file. It also frees the
877 memory needed to hold the FM structure itself. */
880 read_file_free (struct file_memory *fm)
885 munmap (fm->content, fm->length);
895 /* Free the pointers in a NULL-terminated vector of pointers, then
896 free the pointer itself. */
898 free_vec (char **vec)
909 /* Append vector V2 to vector V1. The function frees V2 and
910 reallocates V1 (thus you may not use the contents of neither
911 pointer after the call). If V1 is NULL, V2 is returned. */
913 merge_vecs (char **v1, char **v2)
923 /* To avoid j == 0 */
928 for (i = 0; v1[i]; i++);
930 for (j = 0; v2[j]; j++);
932 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
933 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
938 /* A set of simple-minded routines to store strings in a linked list.
939 This used to also be used for searching, but now we have hash
942 /* It's a shame that these simple things like linked lists and hash
943 tables (see hash.c) need to be implemented over and over again. It
944 would be nice to be able to use the routines from glib -- see
945 www.gtk.org for details. However, that would make Wget depend on
946 glib, and I want to avoid dependencies to external libraries for
947 reasons of convenience and portability (I suspect Wget is more
948 portable than anything ever written for Gnome). */
950 /* Append an element to the list. If the list has a huge number of
951 elements, this can get slow because it has to find the list's
952 ending. If you think you have to call slist_append in a loop,
953 think about calling slist_prepend() followed by slist_nreverse(). */
956 slist_append (slist *l, const char *s)
958 slist *newel = (slist *)xmalloc (sizeof (slist));
961 newel->string = xstrdup (s);
966 /* Find the last element. */
973 /* Prepend S to the list. Unlike slist_append(), this is O(1). */
976 slist_prepend (slist *l, const char *s)
978 slist *newel = (slist *)xmalloc (sizeof (slist));
979 newel->string = xstrdup (s);
984 /* Destructively reverse L. */
987 slist_nreverse (slist *l)
992 slist *next = l->next;
1000 /* Is there a specific entry in the list? */
1002 slist_contains (slist *l, const char *s)
1004 for (; l; l = l->next)
1005 if (!strcmp (l->string, s))
1010 /* Free the whole slist. */
1012 slist_free (slist *l)
1023 /* Sometimes it's useful to create "sets" of strings, i.e. special
1024 hash tables where you want to store strings as keys and merely
1025 query for their existence. Here is a set of utility routines that
1026 makes that transparent. */
1029 string_set_add (struct hash_table *ht, const char *s)
1031 /* First check whether the set element already exists. If it does,
1032 do nothing so that we don't have to free() the old element and
1033 then strdup() a new one. */
1034 if (hash_table_exists (ht, s))
1037 /* We use "1" as value. It provides us a useful and clear arbitrary
1038 value, and it consumes no memory -- the pointers to the same
1039 string "1" will be shared by all the key-value pairs in all `set'
1041 hash_table_put (ht, xstrdup (s), "1");
1044 /* Synonym for hash_table_exists... */
1047 string_set_exists (struct hash_table *ht, const char *s)
1049 return hash_table_exists (ht, s);
1053 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1060 string_set_free (struct hash_table *ht)
1062 hash_table_map (ht, string_set_free_mapper, NULL);
1063 hash_table_destroy (ht);
1067 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1074 /* Another utility function: call free() on all keys and values of HT. */
1077 free_keys_and_values (struct hash_table *ht)
1079 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1083 /* Engine for legible and legible_long_long; this function works on
1087 legible_1 (const char *repr)
1089 static char outbuf[128];
1094 /* Reset the pointers. */
1097 /* If the number is negative, shift the pointers. */
1103 /* How many digits before the first separator? */
1104 mod = strlen (inptr) % 3;
1106 for (i = 0; i < mod; i++)
1107 *outptr++ = inptr[i];
1108 /* Now insert the rest of them, putting separator before every
1110 for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1112 if (i % 3 == 0 && i1 != 0)
1114 *outptr++ = inptr[i1];
1116 /* Zero-terminate the string. */
1121 /* Legible -- return a static pointer to the legibly printed long. */
1126 /* Print the number into the buffer. */
1127 long_to_string (inbuf, l);
1128 return legible_1 (inbuf);
1131 /* The same as legible(), but works on VERY_LONG_TYPE. See sysdep.h. */
1133 legible_very_long (VERY_LONG_TYPE l)
1136 /* Print the number into the buffer. */
1137 sprintf (inbuf, VERY_LONG_FORMAT, l);
1138 return legible_1 (inbuf);
1141 /* Count the digits in a (long) integer. */
1146 while ((a /= 10) != 0)
1151 /* Print NUMBER to BUFFER. This is equivalent to sprintf(buffer,
1152 "%ld", number), only much faster.
1154 BUFFER should accept 24 bytes. This should suffice for the longest
1155 numbers on 64-bit machines, including the `-' sign and the trailing
1158 long_to_string (char *buffer, long number)
1160 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1162 sprintf (buffer, "%ld", number);
1163 #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1173 #define FROB(figure) do { \
1174 if (force || number >= figure) \
1175 *p++ = number / figure + '0', number %= figure, force = 1; \
1177 #if SIZEOF_LONG == 8
1178 FROB (1000000000000000000L);
1179 FROB (100000000000000000L);
1180 FROB (10000000000000000L);
1181 FROB (1000000000000000L);
1182 FROB (100000000000000L);
1183 FROB (10000000000000L);
1184 FROB (1000000000000L);
1185 FROB (100000000000L);
1186 FROB (10000000000L);
1187 #endif /* SIZEOF_LONG == 8 */
1198 *p++ = number + '0';
1200 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1203 /* This should probably be at a better place, but it doesn't really
1204 fit into html-parse.c. */
1206 /* The function returns the pointer to the malloc-ed quoted version of
1207 string s. It will recognize and quote numeric and special graphic
1208 entities, as per RFC1866:
1216 No other entities are recognized or replaced. */
1218 html_quote_string (const char *s)
1224 /* Pass through the string, and count the new size. */
1225 for (i = 0; *s; s++, i++)
1228 i += 4; /* `amp;' */
1229 else if (*s == '<' || *s == '>')
1230 i += 3; /* `lt;' and `gt;' */
1231 else if (*s == '\"')
1232 i += 5; /* `quot;' */
1236 res = (char *)xmalloc (i + 1);
1238 for (p = res; *s; s++)
1251 *p++ = (*s == '<' ? 'l' : 'g');