#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
+#ifdef HAVE_MMAP
+# include <sys/mman.h>
+#endif
#ifdef HAVE_PWD_H
# include <pwd.h>
#endif
#ifdef NeXT
# include <libc.h> /* for access() */
#endif
+#include <fcntl.h>
+#include <assert.h>
#include "wget.h"
#include "utils.h"
#include "fnmatch.h"
+#include "hash.h"
#ifndef errno
extern int errno;
/* Read a line from FP. The function reallocs the storage as needed
to accomodate for any length of the line. Reallocs are done
storage exponentially, doubling the storage after each overflow to
- minimize the number of calls to realloc().
+ minimize the number of calls to realloc() and fgets(). The newline
+ character at the end of line is retained.
+
+ After end-of-file is encountered without anything being read, NULL
+ is returned. NULL is also returned on error. To distinguish
+ between these two cases, use the stdio function ferror(). */
- It is not an exemplary of correctness, since it kills off the
- newline (and no, there is no way to know if there was a newline at
- EOF). */
char *
read_whole_line (FILE *fp)
{
- char *line;
- int i, bufsize, c;
+ int length = 0;
+ int bufsize = 81;
+ char *line = (char *)xmalloc (bufsize);
- i = 0;
- bufsize = 40;
- line = (char *)xmalloc (bufsize);
- /* Construct the line. */
- while ((c = getc (fp)) != EOF && c != '\n')
+ while (fgets (line + length, bufsize - length, fp))
{
- if (i > bufsize - 1)
- line = (char *)xrealloc (line, (bufsize <<= 1));
- line[i++] = c;
+ length += strlen (line + length);
+ assert (length > 0);
+ if (line[length - 1] == '\n')
+ break;
+ /* fgets() guarantees to read the whole line, or to use up the
+ space we've given it. We can double the buffer
+ unconditionally. */
+ bufsize <<= 1;
+ line = xrealloc (line, bufsize);
}
- if (c == EOF && !i)
+ if (length == 0 || ferror (fp))
{
free (line);
return NULL;
}
- /* Check for overflow at zero-termination (no need to double the
- buffer in this case. */
- if (i == bufsize)
- line = (char *)xrealloc (line, i + 1);
- line[i] = '\0';
+ if (length + 1 < bufsize)
+ /* Relieve the memory from our exponential greediness. We say
+ `length + 1' because the terminating \0 is not included in
+ LENGTH. We don't need to zero-terminate the string ourselves,
+ though, because fgets() does that. */
+ line = xrealloc (line, length + 1);
return line;
}
+\f
+/* Read FILE into memory. A pointer to `struct file_memory' are
+ returned; use struct element `content' to access file contents, and
+ the element `length' to know the file length. `content' is *not*
+ zero-terminated, and you should *not* read or write beyond the [0,
+ length) range of characters.
-/* Load file pointed to by FP to memory and return the malloc-ed
- buffer with the contents. *NREAD will contain the number of read
- bytes. The file is loaded in chunks, allocated exponentially,
- starting with FILE_BUFFER_SIZE bytes. */
-void
-load_file (FILE *fp, char **buf, long *nread)
-{
- long bufsize;
+ After you are done with the file contents, call read_file_free to
+ release the memory.
+
+ Depending on the operating system and the type of file that is
+ being read, read_file() either mmap's the file into memory, or
+ reads the file into the core using read().
- bufsize = 512;
- *nread = 0;
- *buf = NULL;
- while (!feof (fp) && !ferror (fp))
+ If file is named "-", fileno(stdin) is used for reading instead.
+ If you want to read from a real file named "-", use "./-" instead. */
+
+struct file_memory *
+read_file (const char *file)
+{
+ int fd;
+ struct file_memory *fm;
+ long size;
+ int inhibit_close = 0;
+
+ /* Some magic in the finest tradition of Perl and its kin: if FILE
+ is "-", just use stdin. */
+ if (HYPHENP (file))
{
- *buf = (char *)xrealloc (*buf, bufsize + *nread);
- *nread += fread (*buf + *nread, sizeof (char), bufsize, fp);
- bufsize <<= 1;
+ fd = fileno (stdin);
+ inhibit_close = 1;
+ /* Note that we don't inhibit mmap() in this case. If stdin is
+ redirected from a regular file, mmap() will still work. */
}
- /* #### No indication of encountered error?? */
+ else
+ fd = open (file, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+ fm = xmalloc (sizeof (struct file_memory));
+
+#ifdef HAVE_MMAP
+ {
+ struct stat buf;
+ if (fstat (fd, &buf) < 0)
+ goto mmap_lose;
+ fm->length = buf.st_size;
+ /* NOTE: As far as I know, the callers of this function never
+ modify the file text. Relying on this would enable us to
+ specify PROT_READ and MAP_SHARED for a marginal gain in
+ efficiency, but at some cost to generality. */
+ fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, fd, 0);
+ if (fm->content == MAP_FAILED)
+ goto mmap_lose;
+ if (!inhibit_close)
+ close (fd);
+
+ fm->mmap_p = 1;
+ return fm;
+ }
+
+ mmap_lose:
+ /* The most common reason why mmap() fails is that FD does not point
+ to a plain file. However, it's also possible that mmap() doesn't
+ work for a particular type of file. Therefore, whenever mmap()
+ fails, we just fall back to the regular method. */
+#endif /* HAVE_MMAP */
+
+ fm->length = 0;
+ size = 512; /* number of bytes fm->contents can
+ hold at any given time. */
+ fm->content = xmalloc (size);
+ while (1)
+ {
+ long nread;
+ if (fm->length > size / 2)
+ {
+ /* #### I'm not sure whether the whole exponential-growth
+ thing makes sense with kernel read. On Linux at least,
+ read() refuses to read more than 4K from a file at a
+ single chunk anyway. But other Unixes might optimize it
+ better, and it doesn't *hurt* anything, so I'm leaving
+ it. */
+
+ /* Normally, we grow SIZE exponentially to make the number
+ of calls to read() and realloc() logarithmic in relation
+ to file size. However, read() can read an amount of data
+ smaller than requested, and it would be unreasonably to
+ double SIZE every time *something* was read. Therefore,
+ we double SIZE only when the length exceeds half of the
+ entire allocated size. */
+ size <<= 1;
+ fm->content = xrealloc (fm->content, size);
+ }
+ nread = read (fd, fm->content + fm->length, size - fm->length);
+ if (nread > 0)
+ /* Successful read. */
+ fm->length += nread;
+ else if (nread < 0)
+ /* Error. */
+ goto lose;
+ else
+ /* EOF */
+ break;
+ }
+ if (!inhibit_close)
+ close (fd);
+ if (size > fm->length && fm->length != 0)
+ /* Due to exponential growth of fm->content, the allocated region
+ might be much larger than what is actually needed. */
+ fm->content = xrealloc (fm->content, fm->length);
+ fm->mmap_p = 0;
+ return fm;
+
+ lose:
+ if (!inhibit_close)
+ close (fd);
+ free (fm->content);
+ free (fm);
+ return NULL;
}
+/* Release the resources held by FM. Specifically, this calls
+ munmap() or free() on fm->content, depending whether mmap or
+ malloc/read were used to read in the file. It also frees the
+ memory needed to hold the FM structure itself. */
+
+void
+read_file_free (struct file_memory *fm)
+{
+#ifdef HAVE_MMAP
+ if (fm->mmap_p)
+ {
+ munmap (fm->content, fm->length);
+ }
+ else
+#endif
+ {
+ free (fm->content);
+ }
+ free (fm);
+}
+\f
/* Free the pointers in a NULL-terminated vector of pointers, then
free the pointer itself. */
void
return v1;
}
-/* A set of simple-minded routines to store and search for strings in
- a linked list. You may add a string to the slist, and peek whether
- it's still in there at any time later. */
+/* A set of simple-minded routines to store strings in a linked list.
+ This used to also be used for searching, but now we have hash
+ tables for that. */
+
+/* It's a shame that these simple things like linked lists and hash
+ tables (see hash.c) need to be implemented over and over again. It
+ would be nice to be able to use the routines from glib -- see
+ www.gtk.org for details. However, that would make Wget depend on
+ glib, and I want to avoid dependencies to external libraries for
+ reasons of convenience and portability (I suspect Wget is more
+ portable than anything ever written for Gnome). */
+
+/* Append an element to the list. If the list has a huge number of
+ elements, this can get slow because it has to find the list's
+ ending. If you think you have to call slist_append in a loop,
+ think about calling slist_prepend() followed by slist_nreverse(). */
-/* Add an element to the list. If flags is NOSORT, the list will not
- be sorted. */
slist *
-add_slist (slist *l, const char *s, int flags)
+slist_append (slist *l, const char *s)
{
- slist *t, *old, *beg;
- int cmp;
+ slist *newel = (slist *)xmalloc (sizeof (slist));
+ slist *beg = l;
- if (flags & NOSORT)
- {
- if (!l)
- {
- t = (slist *)xmalloc (sizeof (slist));
- t->string = xstrdup (s);
- t->next = NULL;
- return t;
- }
- beg = l;
- /* Find the last element. */
- while (l->next)
- l = l->next;
- t = (slist *)xmalloc (sizeof (slist));
- l->next = t;
- t->string = xstrdup (s);
- t->next = NULL;
- return beg;
- }
- /* Empty list or changing the first element. */
- if (!l || (cmp = strcmp (l->string, s)) > 0)
- {
- t = (slist *)xmalloc (sizeof (slist));
- t->string = xstrdup (s);
- t->next = l;
- return t;
- }
+ newel->string = xstrdup (s);
+ newel->next = NULL;
- beg = l;
- if (cmp == 0)
- return beg;
-
- /* Second two one-before-the-last element. */
+ if (!l)
+ return newel;
+ /* Find the last element. */
while (l->next)
- {
- old = l;
- l = l->next;
- cmp = strcmp (s, l->string);
- if (cmp == 0) /* no repeating in the list */
- return beg;
- else if (cmp > 0)
- continue;
- /* If the next list element is greater than s, put s between the
- current and the next list element. */
- t = (slist *)xmalloc (sizeof (slist));
- old->next = t;
- t->next = l;
- t->string = xstrdup (s);
- return beg;
- }
- t = (slist *)xmalloc (sizeof (slist));
- t->string = xstrdup (s);
- /* Insert the new element after the last element. */
- l->next = t;
- t->next = NULL;
+ l = l->next;
+ l->next = newel;
return beg;
}
-/* Is there a specific entry in the list? */
-int
-in_slist (slist *l, const char *s)
+/* Prepend S to the list. Unlike slist_append(), this is O(1). */
+
+slist *
+slist_prepend (slist *l, const char *s)
{
- int cmp;
+ slist *newel = (slist *)xmalloc (sizeof (slist));
+ newel->string = xstrdup (s);
+ newel->next = l;
+ return newel;
+}
+/* Destructively reverse L. */
+
+slist *
+slist_nreverse (slist *l)
+{
+ slist *prev = NULL;
while (l)
{
- cmp = strcmp (l->string, s);
- if (cmp == 0)
- return 1;
- else if (cmp > 0) /* the list is ordered! */
- return 0;
- l = l->next;
+ slist *next = l->next;
+ l->next = prev;
+ prev = l;
+ l = next;
}
+ return prev;
+}
+
+/* Is there a specific entry in the list? */
+int
+slist_contains (slist *l, const char *s)
+{
+ for (; l; l = l->next)
+ if (!strcmp (l->string, s))
+ return 1;
return 0;
}
/* Free the whole slist. */
void
-free_slist (slist *l)
+slist_free (slist *l)
{
- slist *n;
-
while (l)
{
- n = l->next;
+ slist *n = l->next;
free (l->string);
free (l);
l = n;
}
}
+\f
+/* Sometimes it's useful to create "sets" of strings, i.e. special
+ hash tables where you want to store strings as keys and merely
+ query for their existence. Here is a set of utility routines that
+ makes that transparent. */
+
+void
+string_set_add (struct hash_table *ht, const char *s)
+{
+ /* First check whether the set element already exists. If it does,
+ do nothing so that we don't have to free() the old element and
+ then strdup() a new one. */
+ if (hash_table_exists (ht, s))
+ return;
+
+ /* We use "1" as value. It provides us a useful and clear arbitrary
+ value, and it consumes no memory -- the pointers to the same
+ string "1" will be shared by all the key-value pairs in all `set'
+ hash tables. */
+ hash_table_put (ht, xstrdup (s), "1");
+}
+
+/* Synonym for hash_table_exists... */
+
+int
+string_set_exists (struct hash_table *ht, const char *s)
+{
+ return hash_table_exists (ht, s);
+}
+
+static int
+string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
+{
+ free (key);
+ return 0;
+}
+
+void
+string_set_free (struct hash_table *ht)
+{
+ hash_table_map (ht, string_set_free_mapper, NULL);
+ hash_table_destroy (ht);
+}
+
+static int
+free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
+{
+ free (key);
+ free (value);
+ return 0;
+}
+
+/* Another utility function: call free() on all keys and values of HT. */
+
+void
+free_keys_and_values (struct hash_table *ht)
+{
+ hash_table_map (ht, free_keys_and_values_mapper, NULL);
+}
+
\f
/* Engine for legible and legible_long_long; this function works on
strings. */