shall include the source code for the parts of OpenSSL used as well
as that of the covered work. */
-#define USE_GNULIB_ALLOC
-
#include "wget.h"
#include <stdio.h>
#include "hash.h"
#include "convert.h"
#include "recur.h" /* declaration of get_urls_html */
+#include "iri.h"
struct map_context;
matches the user's preferences as specified through --ignore-tags
and --follow-tags. */
- int i;
+ size_t i;
interesting_tags = make_nocase_string_hash_table (countof (known_tags));
/* First, add all the tags we know hot to handle, mapped to their
struct urlpos *newel;
const char *base = ctx->base ? ctx->base : ctx->parent_base;
struct url *url;
+ bool utf8_encode = false;
if (!base)
{
return NULL;
}
- url = url_parse (link_uri, NULL);
+ url = url_parse (link_uri, NULL, &utf8_encode);
if (!url)
{
DEBUGP (("%s: link \"%s\" doesn't parse.\n",
DEBUGP (("%s: merge(\"%s\", \"%s\") -> %s\n",
ctx->document_file, base, link_uri, complete_uri));
- url = url_parse (complete_uri, NULL);
+ url = url_parse (complete_uri, NULL, &utf8_encode);
if (!url)
{
DEBUGP (("%s: merged link \"%s\" doesn't parse.\n",
static void
tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx)
{
- int i, attrind;
+ size_t i;
+ int attrind;
int first = -1;
for (i = 0; i < countof (tag_url_attributes); i++)
/* Find whether TAG/ATTRIND is a combination that contains a
URL. */
char *link = tag->attrs[attrind].value;
- const int size = countof (tag_url_attributes);
+ const size_t size = countof (tag_url_attributes);
/* If you're cringing at the inefficiency of the nested loops,
remember that they both iterate over a very small number of
entry->link_expect_html = 1;
}
}
+ else if (http_equiv && 0 == strcasecmp (http_equiv, "content-type"))
+ {
+ /* Handle stuff like:
+ <meta http-equiv="Content-Type" content="text/html; charset=CHARSET"> */
+
+ char *mcharset;
+ char *content = find_attr (tag, "content", NULL);
+ if (!content)
+ return;
+
+ mcharset = parse_charset (content);
+ if (!mcharset)
+ return;
+
+ /*logprintf (LOG_VERBOSE, "Meta tag charset : %s\n", quote (mcharset));*/
+
+ set_current_charset (mcharset);
+ xfree (mcharset);
+ }
else if (name && 0 == strcasecmp (name, "robots"))
{
/* Handle stuff like:
struct file_memory *fm;
struct urlpos *head, *tail;
const char *text, *text_end;
+ bool utf8_encode = false;
/* Load the file. */
fm = read_file (file);
url_text = merged;
}
- url = url_parse (url_text, &up_error_code);
+ url = url_parse (url_text, &up_error_code, &utf8_encode);
if (!url)
{
logprintf (LOG_NOTQUIET, _("%s: Invalid URL %s: %s\n"),