+2003-10-08 Hrvoje Niksic <hniksic@xemacs.org>
+
+ * html-url.c (get_urls_html): Parse the appropriate flags to
+ html-parse.c.
+
+ * html-parse.c (map_html_tags): Accept FLAGS from the caller
+ instead of examining OPT.
+
2003-10-08 Hrvoje Niksic <hniksic@xemacs.org>
* html-url.c (find_tag): Switch to binary search.
# define ISALNUM(x) isalnum (x)
# define TOLOWER(x) tolower (x)
# define TOUPPER(x) toupper (x)
-
-static struct options opt;
#endif /* STANDALONE */
/* Pool support. A pool is a resizable chunk of memory. It is first
/* Map MAPFUN over HTML tags in TEXT, which is SIZE characters long.
MAPFUN will be called with two arguments: pointer to an initialized
- struct taginfo, and CLOSURE.
+ struct taginfo, and MAPARG.
ALLOWED_TAG_NAMES should be a NULL-terminated array of tag names to
be processed by this function. If it is NULL, all the tags are
void
map_html_tags (const char *text, int size,
+ void (*mapfun) (struct taginfo *, void *), void *maparg,
+ int flags,
const char **allowed_tag_names,
- const char **allowed_attribute_names,
- void (*mapfun) (struct taginfo *, void *),
- void *closure)
+ const char **allowed_attribute_names)
{
/* storage for strings passed to MAPFUN callback; if 256 bytes is
too little, POOL_APPEND allocates more with malloc. */
declaration). */
if (*p == '!')
{
- if (!opt.strict_comments
+ if (!(flags & MHT_STRICT_COMMENTS)
&& p < end + 3 && p[1] == '-' && p[2] == '-')
{
/* If strict comments are not enforced and if we know
goto look_for_tag;
attr_raw_value_end = p; /* <foo bar="baz"> */
/* ^ */
- /* The AP_TRIM_BLANKS is there for buggy HTML
- generators that generate <a href=" foo"> instead of
- <a href="foo"> (Netscape ignores spaces as well.)
- If you really mean space, use &32; or %20. */
- operation = AP_PROCESS_ENTITIES | AP_TRIM_BLANKS;
+ operation = AP_PROCESS_ENTITIES;
+ if (flags & MHT_TRIM_VALUES)
+ operation |= AP_TRIM_BLANKS;
}
else
{
taginfo.start_position = tag_start_position;
taginfo.end_position = p + 1;
/* Ta-dam! */
- (*mapfun) (&taginfo, closure);
+ (*mapfun) (&taginfo, maparg);
ADVANCE (p);
}
goto look_for_tag;
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
+ (at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
const char *end_position; /* end position of tag */
};
-void map_html_tags PARAMS ((const char *, int, const char **, const char **,
- void (*) (struct taginfo *, void *), void *));
+/* Flags for map_html_tags: */
+#define MHT_STRICT_COMMENTS 1 /* use strict comment interpretation */
+#define MHT_TRIM_VALUES 2 /* trim attribute values, e.g. interpret
+ <a href=" foo "> as "foo" */
+
+void map_html_tags PARAMS ((const char *, int,
+ void (*) (struct taginfo *, void *), void *,
+ int, const char **, const char **));
#endif /* HTML_PARSE_H */
{
struct file_memory *fm;
struct map_context ctx;
+ int flags;
/* Load the file. */
fm = read_file (file);
if (!interesting_tags)
init_interesting ();
- map_html_tags (fm->content, fm->length, interesting_tags,
- interesting_attributes, collect_tags_mapper, &ctx);
+ /* Specify MHT_TRIM_VALUES because of buggy HTML generators that
+ generate <a href=" foo"> instead of <a href="foo"> (Netscape
+ ignores spaces as well.) If you really mean space, use &32; or
+ %20. */
+ flags = MHT_TRIM_VALUES;
+ if (opt.strict_comments)
+ flags |= MHT_STRICT_COMMENTS;
+
+ map_html_tags (fm->content, fm->length, collect_tags_mapper, &ctx, flags,
+ interesting_tags, interesting_attributes);
DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow));
if (meta_disallow_follow)