#include "recur.h"
#include "html-url.h"
#include "css-url.h"
-#include "iri.h"
typedef void (*tag_handler_t) (int, struct taginfo *, struct map_context *);
static struct hash_table *interesting_tags;
static struct hash_table *interesting_attributes;
+/* Will contains the (last) charset found in 'http-equiv=content-type'
+ meta tags */
+static char *meta_charset;
+
static void
init_interesting (void)
{
return NULL;
}
- set_ugly_no_encode (true);
- url = url_parse (link_uri, NULL);
- set_ugly_no_encode (false);
+ url = url_parse (link_uri, NULL, NULL, false);
if (!url)
{
DEBUGP (("%s: link \"%s\" doesn't parse.\n",
char *complete_uri = uri_merge (base, link_uri);
- DEBUGP (("%s: merge(\"%s\", \"%s\") -> %s\n",
- ctx->document_file, base, link_uri, complete_uri));
+ DEBUGP (("%s: merge(%s, %s) -> %s\n",
+ quotearg_n_style (0, escape_quoting_style, ctx->document_file),
+ quote_n (1, base),
+ quote_n (2, link_uri),
+ quotearg_n_style (3, escape_quoting_style, complete_uri)));
- set_ugly_no_encode (true);
- url = url_parse (complete_uri, NULL);
- set_ugly_no_encode (false);
+ url = url_parse (complete_uri, NULL, NULL, false);
if (!url)
{
DEBUGP (("%s: merged link \"%s\" doesn't parse.\n",
xfree (complete_uri);
}
- DEBUGP (("appending \"%s\" to urlpos.\n", url->url));
+ DEBUGP (("appending %s to urlpos.\n", quote (url->url)));
newel = xnew0 (struct urlpos);
newel->url = url;
if (!mcharset)
return;
- /*logprintf (LOG_VERBOSE, "Meta tag charset : %s\n", quote (mcharset));*/
-
- set_current_charset (mcharset);
- xfree (mcharset);
+ xfree_null (meta_charset);
+ meta_charset = mcharset;
}
else if (name && 0 == strcasecmp (name, "robots"))
{
<base href=...> and does the right thing. */
struct urlpos *
-get_urls_html (const char *file, const char *url, bool *meta_disallow_follow)
+get_urls_html (const char *file, const char *url, bool *meta_disallow_follow,
+ struct iri *iri)
{
struct file_memory *fm;
struct map_context ctx;
map_html_tags (fm->content, fm->length, collect_tags_mapper, &ctx, flags,
NULL, interesting_attributes);
+ /* If meta charset isn't null, override content encoding */
+ if (iri && meta_charset)
+ set_content_encoding (iri, meta_charset);
+
DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow));
if (meta_disallow_follow)
*meta_disallow_follow = ctx.nofollow;
url_text = merged;
}
- set_ugly_no_encode (true);
- url = url_parse (url_text, &up_error_code);
- set_ugly_no_encode (false);
+ url = url_parse (url_text, &up_error_code, NULL, false);
if (!url)
{
+ char *error = url_error (url_text, up_error_code);
logprintf (LOG_NOTQUIET, _("%s: Invalid URL %s: %s\n"),
- file, url_text, url_error (up_error_code));
+ file, url_text, error);
xfree (url_text);
+ xfree (error);
continue;
}
xfree (url_text);