# include <strings.h>
#endif
#include <stdlib.h>
-#include <ctype.h>
#include <errno.h>
#include <assert.h>
{ "th", TC_LINK }
};
+
/* Flags for specific url-attr pairs handled through TC_LINK: */
+
+/* This tag points to an external document not necessary for rendering this
+ document (i.e. it's not an inlined image, stylesheet, etc.). */
#define AF_EXTERNAL 1
+
/* For tags handled by TC_LINK: attributes that contain URLs to
download. */
static struct {
/* Normally here we could say:
interesting_tags[i] = name;
But we need to respect the settings of --ignore-tags and
- --follow-tags, so the code gets a bit harier. */
+ --follow-tags, so the code gets a bit hairier. */
if (opt.ignore_tags)
{
through if there's no match. */
int j, lose = 0;
for (j = 0; opt.ignore_tags[j] != NULL; j++)
- /* Loop through all the tags this user doesn't care
- about. */
+ /* Loop through all the tags this user doesn't care about. */
if (strcasecmp(opt.ignore_tags[j], name) == EQ)
{
lose = 1;
if (opt.follow_tags)
{
- /* --follow-tags was specified. Only match these specific
- tags, so return FALSE if we don't match one of them. */
+ /* --follow-tags was specified. Only match these specific tags, so
+ continue back to top of for if we don't match one of them. */
int j, win = 0;
for (j = 0; opt.follow_tags[j] != NULL; j++)
/* Loop through all the tags this user cares about. */
break;
}
if (!win)
- continue; /* wasn't one of the explicitly
- desired tags */
+ continue; /* wasn't one of the explicitly desired tags */
}
/* If we get to here, --follow-tags isn't being used or the
- tag is among the ones that are follwed, and --ignore-tags,
+ tag is among the ones that are followed, and --ignore-tags,
if specified, didn't include this tag, so it's an
"interesting" one. */
interesting_tags[ind++] = name;
complete_uri = xstrdup (link_uri);
}
else
- complete_uri = url_concat (base, link_uri);
+ complete_uri = uri_merge (base, link_uri);
DEBUGP (("%s: merge(\"%s\", \"%s\") -> %s\n",
closure->document_file, base ? base : "(null)",
if (closure->dash_p_leaf_HTML
&& (url_tag_attr_map[i].flags & AF_EXTERNAL))
/* If we're at a -p leaf node, we don't want to retrieve
- links to references we know are external, such as <a
- href=...>. */
+ links to references we know are external to this document,
+ such as <a href=...>. */
continue;
/* This find_attr() buried in a loop may seem inefficient
if (closure->base)
xfree (closure->base);
if (closure->parent_base)
- closure->base = url_concat (closure->parent_base, newbase);
+ closure->base = uri_merge (closure->parent_base, newbase);
else
closure->base = xstrdup (newbase);
}
and we're at a leaf node (relative to the -l
max. depth) in the HTML document tree, the only
<LINK> tag we'll follow is a <LINK REL=
- "stylesheet">, as it's necessary for displaying
+ "stylesheet">, as it'll be necessary for displaying
this document properly. We won't follow other
<LINK> tags, like <LINK REL="home">, for instance,
as they refer to external documents. */
read_file_free (fm);
return closure.head;
}
+
+void
+cleanup_html_url (void)
+{
+ FREE_MAYBE (interesting_tags);
+ FREE_MAYBE (interesting_attributes);
+}