From: Saint Xavier Date: Thu, 19 Jun 2008 20:33:38 +0000 (+0200) Subject: Add "content-type" meta tag parsing for retrieving HTML page encoding. X-Git-Tag: v1.13~338^2~7^2~6^2~13^2~26 X-Git-Url: http://sjero.net/git/?p=wget;a=commitdiff_plain;h=13fec855660ee55c43f64fe47fbc284f35ca6e6e Add "content-type" meta tag parsing for retrieving HTML page encoding. --- diff --git a/src/ChangeLog b/src/ChangeLog index ac27e15a..e30990b0 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,11 @@ +2008-06-19 Xavier Saint + + * html-url.c : Add "content-type" meta tag parsing for + retrieving page encoding. + + * iri.h : Make no-op version of parse_charset() return + NULL. + 2008-06-14 Xavier Saint * iri.c, iri.h : New files. diff --git a/src/html-url.c b/src/html-url.c index 0eb66506..9b515432 100644 --- a/src/html-url.c +++ b/src/html-url.c @@ -42,6 +42,7 @@ as that of the covered work. */ #include "hash.h" #include "convert.h" #include "recur.h" /* declaration of get_urls_html */ +#include "iri.h" struct map_context; @@ -534,6 +535,25 @@ tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx) entry->link_expect_html = 1; } } + else if (http_equiv && 0 == strcasecmp (http_equiv, "content-type")) + { + /* Handle stuff like: + */ + + char *mcharset; + char *content = find_attr (tag, "content", NULL); + if (!content) + return; + + mcharset = parse_charset (content); + if (!mcharset) + return; + + logprintf (LOG_VERBOSE, "Meta tag charset : %s\n", quote (mcharset)); + + /* sXXXav: Not used yet */ + xfree (mcharset); + } else if (name && 0 == strcasecmp (name, "robots")) { /* Handle stuff like: diff --git a/src/iri.h b/src/iri.h index d135e868..2ac7d5e7 100644 --- a/src/iri.h +++ b/src/iri.h @@ -37,7 +37,7 @@ char *parse_charset (char *str); #else /* ENABLE_IRI */ -#define parse_charset(str) /* no-op */ +#define parse_charset(str) NULL #endif /* ENABLE_IRI */ #endif /* IRI_H */