# define TOLOWER(x) tolower (x)
# define TOUPPER(x) toupper (x)
-static struct options opt;
-#endif /* STANDALONE */
+struct hash_table {
+ int dummy;
+};
+static void *
+hash_table_get (const struct hash_table *ht, void *ptr)
+{
+ return ptr;
+}
+#else /* not STANDALONE */
+# include "hash.h"
+#endif
/* Pool support. A pool is a resizable chunk of memory. It is first
allocated on the stack, and moved to the heap if it needs to be
struct pool {
char *contents; /* pointer to the contents. */
int size; /* size of the pool. */
- int index; /* next unoccupied position in
- contents. */
-
- int alloca_p; /* whether contents was allocated
- using alloca(). */
- char *orig_contents; /* orig_contents, allocated by
- alloca(). this is used by
- POOL_FREE to restore the pool to
- the "initial" state. */
+ int tail; /* next available position index. */
+ int resized; /* whether the pool has been resized
+ using malloc. */
+
+ char *orig_contents; /* original pool contents, usually
+ stack-allocated. used by POOL_FREE
+ to restore the pool to the initial
+ state. */
int orig_size;
};
/* Initialize the pool to hold INITIAL_SIZE bytes of storage. */
-#define POOL_INIT(pool, initial_size) do { \
- (pool).size = (initial_size); \
- (pool).contents = ALLOCA_ARRAY (char, (pool).size); \
- (pool).index = 0; \
- (pool).alloca_p = 1; \
- (pool).orig_contents = (pool).contents; \
- (pool).orig_size = (pool).size; \
+#define POOL_INIT(p, initial_storage, initial_size) do { \
+ struct pool *P = (p); \
+ P->contents = (initial_storage); \
+ P->size = (initial_size); \
+ P->tail = 0; \
+ P->resized = 0; \
+ P->orig_contents = P->contents; \
+ P->orig_size = P->size; \
} while (0)
/* Grow the pool to accomodate at least SIZE new bytes. If the pool
already has room to accomodate SIZE bytes of data, this is a no-op. */
-#define POOL_GROW(pool, increase) do { \
- int PG_newsize = (pool).index + increase; \
- DO_REALLOC_FROM_ALLOCA ((pool).contents, (pool).size, PG_newsize, \
- (pool).alloca_p, char); \
-} while (0)
+#define POOL_GROW(p, increase) \
+ GROW_ARRAY ((p)->contents, (p)->size, (p)->tail + (increase), \
+ (p)->resized, char)
/* Append text in the range [beg, end) to POOL. No zero-termination
is done. */
-#define POOL_APPEND(pool, beg, end) do { \
- const char *PA_beg = (beg); \
- int PA_size = (end) - PA_beg; \
- POOL_GROW (pool, PA_size); \
- memcpy ((pool).contents + (pool).index, PA_beg, PA_size); \
- (pool).index += PA_size; \
+#define POOL_APPEND(p, beg, end) do { \
+ const char *PA_beg = (beg); \
+ int PA_size = (end) - PA_beg; \
+ POOL_GROW (p, PA_size); \
+ memcpy ((p)->contents + (p)->tail, PA_beg, PA_size); \
+ (p)->tail += PA_size; \
} while (0)
/* Append one character to the pool. Can be used to zero-terminate
pool strings. */
-#define POOL_APPEND_CHR(pool, ch) do { \
+#define POOL_APPEND_CHR(p, ch) do { \
char PAC_char = (ch); \
- POOL_GROW (pool, 1); \
- (pool).contents[(pool).index++] = PAC_char; \
+ POOL_GROW (p, 1); \
+ (p)->contents[(p)->tail++] = PAC_char; \
} while (0)
/* Forget old pool contents. The allocated memory is not freed. */
-#define POOL_REWIND(pool) pool.index = 0
+#define POOL_REWIND(p) (p)->tail = 0
/* Free heap-allocated memory for contents of POOL. This calls
xfree() if the memory was allocated through malloc. It also
values. That way after POOL_FREE, the pool is fully usable, just
as if it were freshly initialized with POOL_INIT. */
-#define POOL_FREE(pool) do { \
- if (!(pool).alloca_p) \
- xfree ((pool).contents); \
- (pool).contents = (pool).orig_contents; \
- (pool).size = (pool).orig_size; \
- (pool).index = 0; \
- (pool).alloca_p = 1; \
+#define POOL_FREE(p) do { \
+ struct pool *P = p; \
+ if (P->resized) \
+ xfree (P->contents); \
+ P->contents = P->orig_contents; \
+ P->size = P->orig_size; \
+ P->tail = 0; \
+ P->resized = 0; \
} while (0)
+/* Used for small stack-allocated memory chunks that might grow. Like
+ DO_REALLOC, this macro grows BASEVAR as necessary to take
+ NEEDED_SIZE items of TYPE.
+
+ The difference is that on the first resize, it will use
+ malloc+memcpy rather than realloc. That way you can stack-allocate
+ the initial chunk, and only resort to heap allocation if you
+ stumble upon large data.
+
+ After the first resize, subsequent ones are performed with realloc,
+ just like DO_REALLOC. */
+
+#define GROW_ARRAY(basevar, sizevar, needed_size, resized, type) do { \
+ long ga_needed_size = (needed_size); \
+ long ga_newsize = (sizevar); \
+ while (ga_newsize < ga_needed_size) \
+ ga_newsize <<= 1; \
+ if (ga_newsize != (sizevar)) \
+ { \
+ if (resized) \
+ basevar = (type *)xrealloc (basevar, ga_newsize * sizeof (type)); \
+ else \
+ { \
+ void *ga_new = xmalloc (ga_newsize * sizeof (type)); \
+ memcpy (ga_new, basevar, (sizevar) * sizeof (type)); \
+ (basevar) = ga_new; \
+ resized = 1; \
+ } \
+ (sizevar) = ga_newsize; \
+ } \
+} while (0)
\f
#define AP_DOWNCASE 1
#define AP_PROCESS_ENTITIES 2
static void
convert_and_copy (struct pool *pool, const char *beg, const char *end, int flags)
{
- int old_index = pool->index;
+ int old_tail = pool->tail;
int size;
/* First, skip blanks if required. We must do this before entities
It's safe (and necessary) to grow the pool in advance because
processing the entities can only *shorten* the string, it can
never lengthen it. */
- POOL_GROW (*pool, end - beg);
const char *from = beg;
- char *to = pool->contents + pool->index;
+ char *to;
+
+ POOL_GROW (pool, end - beg);
+ to = pool->contents + pool->tail;
while (from < end)
{
}
/* Verify that we haven't exceeded the original size. (It
shouldn't happen, hence the assert.) */
- assert (to - (pool->contents + pool->index) <= end - beg);
+ assert (to - (pool->contents + pool->tail) <= end - beg);
/* Make POOL's tail point to the position following the string
we've written. */
- pool->index = to - pool->contents;
- POOL_APPEND_CHR (*pool, '\0');
+ pool->tail = to - pool->contents;
+ POOL_APPEND_CHR (pool, '\0');
}
else
{
/* Just copy the text to the pool. */
- POOL_APPEND (*pool, beg, end);
- POOL_APPEND_CHR (*pool, '\0');
+ POOL_APPEND (pool, beg, end);
+ POOL_APPEND_CHR (pool, '\0');
}
if (flags & AP_DOWNCASE)
{
- char *p = pool->contents + old_index;
+ char *p = pool->contents + old_tail;
for (; *p; p++)
*p = TOLOWER (*p);
}
}
\f
-/* Check whether the contents of [POS, POS+LENGTH) match any of the
- strings in the ARRAY. */
-static int
-array_allowed (const char **array, const char *beg, const char *end)
-{
- int length = end - beg;
- if (array)
- {
- for (; *array; array++)
- if (length >= strlen (*array)
- && !strncasecmp (*array, beg, length))
- break;
- if (!*array)
- return 0;
- }
- return 1;
-}
-\f
/* Originally we used to adhere to rfc 1866 here, and allowed only
letters, digits, periods, and hyphens as names (of tags or
attributes). However, this broke too many pages which used
return NULL;
}
\f
+/* Return non-zero of the string inside [b, e) are present in hash
+ table HT. */
+
+static int
+name_allowed (const struct hash_table *ht, const char *b, const char *e)
+{
+ char *copy;
+ if (!ht)
+ return 1;
+ BOUNDED_TO_ALLOCA (b, e, copy);
+ return hash_table_get (ht, copy) != NULL;
+}
+
/* Advance P (a char pointer), with the explicit intent of being able
to read the next character. If this is not possible, go to finish. */
/* Map MAPFUN over HTML tags in TEXT, which is SIZE characters long.
MAPFUN will be called with two arguments: pointer to an initialized
- struct taginfo, and CLOSURE.
+ struct taginfo, and MAPARG.
ALLOWED_TAG_NAMES should be a NULL-terminated array of tag names to
be processed by this function. If it is NULL, all the tags are
void
map_html_tags (const char *text, int size,
- const char **allowed_tag_names,
- const char **allowed_attribute_names,
- void (*mapfun) (struct taginfo *, void *),
- void *closure)
+ void (*mapfun) (struct taginfo *, void *), void *maparg,
+ int flags,
+ const struct hash_table *allowed_tags,
+ const struct hash_table *allowed_attributes)
{
+ /* storage for strings passed to MAPFUN callback; if 256 bytes is
+ too little, POOL_APPEND allocates more with malloc. */
+ char pool_initial_storage[256];
+ struct pool pool;
+
const char *p = text;
const char *end = text + size;
- int attr_pair_count = 8;
- int attr_pair_alloca_p = 1;
- struct attr_pair *pairs = ALLOCA_ARRAY (struct attr_pair, attr_pair_count);
- struct pool pool;
+ struct attr_pair attr_pair_initial_storage[8];
+ int attr_pair_size = countof (attr_pair_initial_storage);
+ int attr_pair_resized = 0;
+ struct attr_pair *pairs = attr_pair_initial_storage;
if (!size)
return;
- POOL_INIT (pool, 256);
+ POOL_INIT (&pool, pool_initial_storage, countof (pool_initial_storage));
{
int nattrs, end_tag;
int uninteresting_tag;
look_for_tag:
- POOL_REWIND (pool);
+ POOL_REWIND (&pool);
nattrs = 0;
end_tag = 0;
declaration). */
if (*p == '!')
{
- if (!opt.strict_comments
+ if (!(flags & MHT_STRICT_COMMENTS)
&& p < end + 3 && p[1] == '-' && p[2] == '-')
{
/* If strict comments are not enforced and if we know
if (end_tag && *p != '>')
goto backout_tag;
- if (!array_allowed (allowed_tag_names, tag_name_begin, tag_name_end))
+ if (!name_allowed (allowed_tags, tag_name_begin, tag_name_end))
/* We can't just say "goto look_for_tag" here because we need
the loop below to properly advance over the tag's attributes. */
uninteresting_tag = 1;
goto look_for_tag;
attr_raw_value_end = p; /* <foo bar="baz"> */
/* ^ */
- /* The AP_TRIM_BLANKS is there for buggy HTML
- generators that generate <a href=" foo"> instead of
- <a href="foo"> (Netscape ignores spaces as well.)
- If you really mean space, use &32; or %20. */
- operation = AP_PROCESS_ENTITIES | AP_TRIM_BLANKS;
+ operation = AP_PROCESS_ENTITIES;
+ if (flags & MHT_TRIM_VALUES)
+ operation |= AP_TRIM_BLANKS;
}
else
{
/* If we aren't interested in the attribute, skip it. We
cannot do this test any sooner, because our text pointer
needs to correctly advance over the attribute. */
- if (allowed_attribute_names
- && !array_allowed (allowed_attribute_names, attr_name_begin,
- attr_name_end))
+ if (!name_allowed (allowed_attributes, attr_name_begin, attr_name_end))
continue;
- DO_REALLOC_FROM_ALLOCA (pairs, attr_pair_count, nattrs + 1,
- attr_pair_alloca_p, struct attr_pair);
+ GROW_ARRAY (pairs, attr_pair_size, nattrs + 1, attr_pair_resized,
+ struct attr_pair);
- pairs[nattrs].name_pool_index = pool.index;
+ pairs[nattrs].name_pool_index = pool.tail;
convert_and_copy (&pool, attr_name_begin, attr_name_end, AP_DOWNCASE);
- pairs[nattrs].value_pool_index = pool.index;
+ pairs[nattrs].value_pool_index = pool.tail;
convert_and_copy (&pool, attr_value_begin, attr_value_end, operation);
pairs[nattrs].value_raw_beginning = attr_raw_value_begin;
pairs[nattrs].value_raw_size = (attr_raw_value_end
taginfo.start_position = tag_start_position;
taginfo.end_position = p + 1;
/* Ta-dam! */
- (*mapfun) (&taginfo, closure);
+ (*mapfun) (&taginfo, maparg);
ADVANCE (p);
}
goto look_for_tag;
}
finish:
- POOL_FREE (pool);
- if (!attr_pair_alloca_p)
+ POOL_FREE (&pool);
+ if (attr_pair_resized)
xfree (pairs);
}
x = (char *)xrealloc (x, size);
}
- map_html_tags (x, length, NULL, NULL, test_mapper, &tag_counter);
+ map_html_tags (x, length, test_mapper, &tag_counter, 0, NULL, NULL);
printf ("TAGS: %d\n", tag_counter);
printf ("Tag backouts: %d\n", tag_backout_count);
printf ("Comment backouts: %d\n", comment_backout_count);