1 /* Various functions of utilitarian nature.
2 Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
26 #else /* not HAVE_STRING_H */
28 #endif /* not HAVE_STRING_H */
30 #include <sys/types.h>
35 # include <sys/mman.h>
44 #ifdef HAVE_SYS_UTIME_H
45 # include <sys/utime.h>
49 # include <libc.h> /* for access() */
64 /* Croak the fatal memory error and bail out with non-zero exit
67 memfatal (const char *s)
69 /* HACK: expose save_log_p from log.c, so we can turn it off in
70 order to prevent saving the log. Saving the log is dangerous
71 because logprintf() and logputs() can call malloc(), so this
72 could infloop. When logging is turned off, infloop can no longer
74 extern int save_log_p;
77 logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, s);
81 /* xmalloc, xrealloc and xstrdup exit the program if there is not
82 enough memory. xstrdup also implements strdup on systems that do
96 xrealloc (void *obj, size_t size)
100 /* Not all Un*xes have the feature of realloc() that calling it with
101 a NULL-pointer is the same as malloc(), but it is easy to
104 res = realloc (obj, size);
108 memfatal ("realloc");
113 xstrdup (const char *s)
117 char *s1 = malloc (l + 1);
120 memcpy (s1, s, l + 1);
122 #else /* HAVE_STRDUP */
123 char *s1 = strdup (s);
127 #endif /* HAVE_STRDUP */
130 /* Copy the string formed by two pointers (one on the beginning, other
131 on the char after the last char) to a new, malloc-ed location.
134 strdupdelim (const char *beg, const char *end)
136 char *res = (char *)xmalloc (end - beg + 1);
137 memcpy (res, beg, end - beg);
138 res[end - beg] = '\0';
142 /* Parse a string containing comma-separated elements, and return a
143 vector of char pointers with the elements. Spaces following the
144 commas are ignored. */
146 sepstring (const char *s)
160 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
161 res[i] = strdupdelim (p, s);
164 /* Skip the blanks following the ','. */
172 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
173 res[i] = strdupdelim (p, s);
178 /* Return pointer to a static char[] buffer in which zero-terminated
179 string-representation of TM (in form hh:mm:ss) is printed. It is
180 shamelessly non-reentrant, but it doesn't matter, really.
182 If TM is non-NULL, the time_t of the current time will be stored
185 time_str (time_t *tm)
195 ptm = localtime (&tim);
196 sprintf (tms, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
200 /* Returns an error message for ERRNUM. #### This requires more work.
201 This function, as well as the whole error system, is very
204 uerrmsg (uerr_t errnum)
209 return _("Unknown/unsupported protocol");
212 return _("Invalid port specification");
215 return _("Invalid host name");
219 /* $@#@#$ compiler. */
224 /* The Windows versions of the following two functions are defined in
227 /* A cuserid() immitation using getpwuid(), to avoid hassling with
228 utmp. Besides, not all systems have cuesrid(). Under Windows, it
229 is defined in mswindows.c.
231 If WHERE is non-NULL, the username will be stored there.
232 Otherwise, it will be returned as a static buffer (as returned by
233 getpwuid()). In the latter case, the buffer should be copied
234 before calling getpwuid() or pwd_cuserid() again. */
237 pwd_cuserid (char *where)
241 if (!(pwd = getpwuid (getuid ())) || !pwd->pw_name)
245 strcpy (where, pwd->pw_name);
253 fork_to_background (void)
256 /* Whether we arrange our own version of opt.lfilename here. */
261 opt.lfilename = unique_name (DEFAULT_LOGFILE);
273 /* parent, no error */
274 printf (_("Continuing in background.\n"));
276 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
279 /* child: keep running */
281 #endif /* not WINDOWS */
283 /* Canonicalize PATH, and return a new path. The new path differs from PATH
285 Multple `/'s are collapsed to a single `/'.
286 Leading `./'s and trailing `/.'s are removed.
287 Trailing `/'s are removed.
288 Non-leading `../'s and trailing `..'s are handled by removing
289 portions of the path.
291 E.g. "a/b/c/./../d/.." will yield "a/b". This function originates
295 Always use '/' as stub_char.
296 Don't check for local things using canon_stat.
297 Change the original string instead of strdup-ing.
298 React correctly when beginning with `./' and `../'. */
300 path_simplify (char *path)
302 register int i, start, ddot;
308 /*stub_char = (*path == '/') ? '/' : '.';*/
311 /* Addition: Remove all `./'-s preceding the string. If `../'-s
312 precede, put `/' in front and remove them too. */
317 if (path[i] == '.' && path[i + 1] == '/')
319 else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')
328 strcpy (path, path + i - ddot);
330 /* Replace single `.' or `..' with `/'. */
331 if ((path[0] == '.' && path[1] == '\0')
332 || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))
338 /* Walk along PATH looking for things to compact. */
345 while (path[i] && path[i] != '/')
350 /* If we didn't find any slashes, then there is nothing left to do. */
354 /* Handle multiple `/'s in a row. */
355 while (path[i] == '/')
358 if ((start + 1) != i)
360 strcpy (path + start + 1, path + i);
364 /* Check for trailing `/'. */
365 if (start && !path[i])
372 /* Check for `../', `./' or trailing `.' by itself. */
375 /* Handle trailing `.' by itself. */
380 if (path[i + 1] == '/')
382 strcpy (path + i, path + i + 1);
383 i = (start < 0) ? 0 : start;
387 /* Handle `../' or trailing `..' by itself. */
388 if (path[i + 1] == '.' &&
389 (path[i + 2] == '/' || !path[i + 2]))
391 while (--start > -1 && path[start] != '/');
392 strcpy (path + start + 1, path + i + 2);
393 i = (start < 0) ? 0 : start;
406 /* "Touch" FILE, i.e. make its atime and mtime equal to the time
407 specified with TM. */
409 touch (const char *file, time_t tm)
411 #ifdef HAVE_STRUCT_UTIMBUF
412 struct utimbuf times;
413 times.actime = times.modtime = tm;
416 times[0] = times[1] = tm;
419 if (utime (file, ×) == -1)
420 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
423 /* Checks if FILE is a symbolic link, and removes it if it is. Does
424 nothing under MS-Windows. */
426 remove_link (const char *file)
431 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
433 DEBUGP (("Unlinking %s (symlink).\n", file));
436 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
437 file, strerror (errno));
442 /* Does FILENAME exist? This is quite a lousy implementation, since
443 it supplies no error codes -- only a yes-or-no answer. Thus it
444 will return that a file does not exist if, e.g., the directory is
445 unreadable. I don't mind it too much currently, though. The
446 proper way should, of course, be to have a third, error state,
447 other than true/false, but that would introduce uncalled-for
448 additional complexity to the callers. */
450 file_exists_p (const char *filename)
453 return access (filename, F_OK) >= 0;
456 return stat (filename, &buf) >= 0;
460 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
461 Returns 0 on error. */
463 file_non_directory_p (const char *path)
466 /* Use lstat() rather than stat() so that symbolic links pointing to
467 directories can be identified correctly. */
468 if (lstat (path, &buf) != 0)
470 return S_ISDIR (buf.st_mode) ? 0 : 1;
473 /* Return a unique filename, given a prefix and count */
475 unique_name_1 (const char *fileprefix, int count)
481 filename = (char *)xmalloc (strlen (fileprefix) + numdigit (count) + 2);
482 sprintf (filename, "%s.%d", fileprefix, count);
485 filename = xstrdup (fileprefix);
487 if (!file_exists_p (filename))
496 /* Return a unique file name, based on PREFIX. */
498 unique_name (const char *prefix)
504 file = unique_name_1 (prefix, count++);
508 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
509 are missing, create them first. In case any mkdir() call fails,
510 return its error status. Returns 0 on successful completion.
512 The behaviour of this function should be identical to the behaviour
513 of `mkdir -p' on systems where mkdir supports the `-p' option. */
515 make_directory (const char *directory)
521 /* Make a copy of dir, to be able to write to it. Otherwise, the
522 function is unsafe if called with a read-only char *argument. */
523 STRDUP_ALLOCA (dir, directory);
525 /* If the first character of dir is '/', skip it (and thus enable
526 creation of absolute-pathname directories. */
527 for (i = (*dir == '/'); 1; ++i)
529 for (; dir[i] && dir[i] != '/'; i++)
534 /* Check whether the directory already exists. */
535 if (!file_exists_p (dir))
537 if (mkdir (dir, 0777) < 0)
548 static int in_acclist PARAMS ((const char *const *, const char *, int));
550 /* Determine whether a file is acceptable to be followed, according to
551 lists of patterns to accept/reject. */
553 acceptable (const char *s)
557 while (l && s[l] != '/')
564 return (in_acclist ((const char *const *)opt.accepts, s, 1)
565 && !in_acclist ((const char *const *)opt.rejects, s, 1));
567 return in_acclist ((const char *const *)opt.accepts, s, 1);
569 else if (opt.rejects)
570 return !in_acclist ((const char *const *)opt.rejects, s, 1);
574 /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
575 `/something', frontcmp() will return 1 only if S2 begins with
576 `/something'. Otherwise, 0 is returned. */
578 frontcmp (const char *s1, const char *s2)
580 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
584 /* Iterate through STRLIST, and return the first element that matches
585 S, through wildcards or front comparison (as appropriate). */
587 proclist (char **strlist, const char *s, enum accd flags)
591 for (x = strlist; *x; x++)
592 if (has_wildcards_p (*x))
594 if (fnmatch (*x, s, FNM_PATHNAME) == 0)
599 char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */
606 /* Returns whether DIRECTORY is acceptable for download, wrt the
607 include/exclude lists.
609 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
610 and absolute paths may be freely intermixed. */
612 accdir (const char *directory, enum accd flags)
614 /* Remove starting '/'. */
615 if (flags & ALLABS && *directory == '/')
619 if (!proclist (opt.includes, directory, flags))
624 if (proclist (opt.excludes, directory, flags))
630 /* Match the end of STRING against PATTERN. For instance:
632 match_backwards ("abc", "bc") -> 1
633 match_backwards ("abc", "ab") -> 0
634 match_backwards ("abc", "abc") -> 1 */
636 match_backwards (const char *string, const char *pattern)
640 for (i = strlen (string), j = strlen (pattern); i >= 0 && j >= 0; i--, j--)
641 if (string[i] != pattern[j])
643 /* If the pattern was exhausted, the match was succesful. */
650 /* Checks whether string S matches each element of ACCEPTS. A list
651 element are matched either with fnmatch() or match_backwards(),
652 according to whether the element contains wildcards or not.
654 If the BACKWARD is 0, don't do backward comparison -- just compare
657 in_acclist (const char *const *accepts, const char *s, int backward)
659 for (; *accepts; accepts++)
661 if (has_wildcards_p (*accepts))
663 /* fnmatch returns 0 if the pattern *does* match the
665 if (fnmatch (*accepts, s, 0) == 0)
672 if (match_backwards (s, *accepts))
677 if (!strcmp (s, *accepts))
685 /* Return the malloc-ed suffix of STR. For instance:
686 suffix ("foo.bar") -> "bar"
687 suffix ("foo.bar.baz") -> "baz"
688 suffix ("/foo/bar") -> NULL
689 suffix ("/foo.bar/baz") -> NULL */
691 suffix (const char *str)
695 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--);
697 return xstrdup (str + i);
702 /* Read a line from FP. The function reallocs the storage as needed
703 to accomodate for any length of the line. Reallocs are done
704 storage exponentially, doubling the storage after each overflow to
705 minimize the number of calls to realloc() and fgets(). The newline
706 character at the end of line is retained.
708 After end-of-file is encountered without anything being read, NULL
709 is returned. NULL is also returned on error. To distinguish
710 between these two cases, use the stdio function ferror(). */
713 read_whole_line (FILE *fp)
717 char *line = (char *)xmalloc (bufsize);
719 while (fgets (line + length, bufsize - length, fp))
721 length += strlen (line + length);
723 if (line[length - 1] == '\n')
725 /* fgets() guarantees to read the whole line, or to use up the
726 space we've given it. We can double the buffer
729 line = xrealloc (line, bufsize);
731 if (length == 0 || ferror (fp))
736 if (length + 1 < bufsize)
737 /* Relieve the memory from our exponential greediness. We say
738 `length + 1' because the terminating \0 is not included in
739 LENGTH. We don't need to zero-terminate the string ourselves,
740 though, because fgets() does that. */
741 line = xrealloc (line, length + 1);
745 /* Read FILE into memory. A pointer to `struct file_memory' are
746 returned; use struct element `content' to access file contents, and
747 the element `length' to know the file length. `content' is *not*
748 zero-terminated, and you should *not* read or write beyond the [0,
749 length) range of characters.
751 After you are done with the file contents, call read_file_free to
754 Depending on the operating system and the type of file that is
755 being read, read_file() either mmap's the file into memory, or
756 reads the file into the core using read().
758 If file is named "-", fileno(stdin) is used for reading instead.
759 If you want to read from a real file named "-", use "./-" instead. */
762 read_file (const char *file)
765 struct file_memory *fm;
767 int inhibit_close = 0;
769 /* Some magic in the finest tradition of Perl and its kin: if FILE
770 is "-", just use stdin. */
775 /* Note that we don't inhibit mmap() in this case. If stdin is
776 redirected from a regular file, mmap() will still work. */
779 fd = open (file, O_RDONLY);
782 fm = xmalloc (sizeof (struct file_memory));
787 if (fstat (fd, &buf) < 0)
789 fm->length = buf.st_size;
790 /* NOTE: As far as I know, the callers of this function never
791 modify the file text. Relying on this would enable us to
792 specify PROT_READ and MAP_SHARED for a marginal gain in
793 efficiency, but at some cost to generality. */
794 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
796 if (fm->content == MAP_FAILED)
806 /* The most common reason why mmap() fails is that FD does not point
807 to a plain file. However, it's also possible that mmap() doesn't
808 work for a particular type of file. Therefore, whenever mmap()
809 fails, we just fall back to the regular method. */
810 #endif /* HAVE_MMAP */
813 size = 512; /* number of bytes fm->contents can
814 hold at any given time. */
815 fm->content = xmalloc (size);
819 if (fm->length > size / 2)
821 /* #### I'm not sure whether the whole exponential-growth
822 thing makes sense with kernel read. On Linux at least,
823 read() refuses to read more than 4K from a file at a
824 single chunk anyway. But other Unixes might optimize it
825 better, and it doesn't *hurt* anything, so I'm leaving
828 /* Normally, we grow SIZE exponentially to make the number
829 of calls to read() and realloc() logarithmic in relation
830 to file size. However, read() can read an amount of data
831 smaller than requested, and it would be unreasonably to
832 double SIZE every time *something* was read. Therefore,
833 we double SIZE only when the length exceeds half of the
834 entire allocated size. */
836 fm->content = xrealloc (fm->content, size);
838 nread = read (fd, fm->content + fm->length, size - fm->length);
840 /* Successful read. */
851 if (size > fm->length && fm->length != 0)
852 /* Due to exponential growth of fm->content, the allocated region
853 might be much larger than what is actually needed. */
854 fm->content = xrealloc (fm->content, fm->length);
866 /* Release the resources held by FM. Specifically, this calls
867 munmap() or free() on fm->content, depending whether mmap or
868 malloc/read were used to read in the file. It also frees the
869 memory needed to hold the FM structure itself. */
872 read_file_free (struct file_memory *fm)
877 munmap (fm->content, fm->length);
887 /* Free the pointers in a NULL-terminated vector of pointers, then
888 free the pointer itself. */
890 free_vec (char **vec)
901 /* Append vector V2 to vector V1. The function frees V2 and
902 reallocates V1 (thus you may not use the contents of neither
903 pointer after the call). If V1 is NULL, V2 is returned. */
905 merge_vecs (char **v1, char **v2)
915 /* To avoid j == 0 */
920 for (i = 0; v1[i]; i++);
922 for (j = 0; v2[j]; j++);
924 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
925 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
930 /* A set of simple-minded routines to store strings in a linked list.
931 This used to also be used for searching, but now we have hash
934 /* Append an element to the list. */
936 slist_append (slist *l, const char *s)
938 slist *newel = (slist *)xmalloc (sizeof (slist));
941 newel->string = xstrdup (s);
946 /* Find the last element. */
953 /* Is there a specific entry in the list? */
955 slist_contains (slist *l, const char *s)
957 for (; l; l = l->next)
958 if (!strcmp (l->string, s))
963 /* Free the whole slist. */
965 slist_free (slist *l)
978 /* Sometimes it's useful to create "sets" of strings, i.e. special
979 hash tables where you want to store strings as keys and merely
980 query for their existence. Here is a set of utility routines that
981 makes that transparent. */
984 string_set_add (struct hash_table *ht, const char *s)
986 /* We use "1" as value. It provides us a useful and clear arbitrary
987 value, and it consumes no memory -- the pointers to the same
988 string "1" will be shared by all the key-value pairs in the hash
990 hash_table_put (ht, xstrdup (s), "1");
994 string_set_exists (struct hash_table *ht, const char *s)
996 return hash_table_exists (ht, s);
1000 string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1007 string_set_free (struct hash_table *ht)
1009 hash_table_map (ht, string_set_free_mapper, NULL);
1010 hash_table_destroy (ht);
1014 free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1021 /* Another utility function: call free() on all keys and values of HT. */
1024 free_keys_and_values (struct hash_table *ht)
1026 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1030 /* Engine for legible and legible_long_long; this function works on
1034 legible_1 (const char *repr)
1036 static char outbuf[128];
1041 /* Reset the pointers. */
1044 /* If the number is negative, shift the pointers. */
1050 /* How many digits before the first separator? */
1051 mod = strlen (inptr) % 3;
1053 for (i = 0; i < mod; i++)
1054 *outptr++ = inptr[i];
1055 /* Now insert the rest of them, putting separator before every
1057 for (i1 = i, i = 0; inptr[i1]; i++, i1++)
1059 if (i % 3 == 0 && i1 != 0)
1061 *outptr++ = inptr[i1];
1063 /* Zero-terminate the string. */
1068 /* Legible -- return a static pointer to the legibly printed long. */
1073 /* Print the number into the buffer. */
1074 long_to_string (inbuf, l);
1075 return legible_1 (inbuf);
1078 /* The same as legible(), but works on VERY_LONG_TYPE. See sysdep.h. */
1080 legible_very_long (VERY_LONG_TYPE l)
1083 /* Print the number into the buffer. */
1084 sprintf (inbuf, VERY_LONG_FORMAT, l);
1085 return legible_1 (inbuf);
1088 /* Count the digits in a (long) integer. */
1093 while ((a /= 10) != 0)
1098 /* Print NUMBER to BUFFER. This is equivalent to sprintf(buffer,
1099 "%ld", number), only much faster.
1101 BUFFER should accept 24 bytes. This should suffice for the longest
1102 numbers on 64-bit machines, including the `-' sign and the trailing
1105 long_to_string (char *buffer, long number)
1107 #if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)
1109 sprintf (buffer, "%ld", number);
1110 #else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */
1120 #define FROB(figure) do { \
1121 if (force || number >= figure) \
1122 *p++ = number / figure + '0', number %= figure, force = 1; \
1124 #if SIZEOF_LONG == 8
1125 FROB (1000000000000000000L);
1126 FROB (100000000000000000L);
1127 FROB (10000000000000000L);
1128 FROB (1000000000000000L);
1129 FROB (100000000000000L);
1130 FROB (10000000000000L);
1131 FROB (1000000000000L);
1132 FROB (100000000000L);
1133 FROB (10000000000L);
1134 #endif /* SIZEOF_LONG == 8 */
1145 *p++ = number + '0';
1147 #endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */