X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhtml-url.c;h=3c6c9b924c9a3c9a22648ce0e4a59880cde1c824;hp=55563e2d3fc2aa9e5bd804999b3e33533f456f6d;hb=38a7829dcb4eb5dba28dbf0f05c6a80fea9217f8;hpb=0aa3c5d33c5faa8902fa638c36314deae45460f3 diff --git a/src/html-url.c b/src/html-url.c index 55563e2d..3c6c9b92 100644 --- a/src/html-url.c +++ b/src/html-url.c @@ -79,7 +79,10 @@ enum { TAG_SCRIPT, TAG_TABLE, TAG_TD, - TAG_TH + TAG_TH, + TAG_VIDEO, + TAG_AUDIO, + TAG_SOURCE }; /* The list of known tags and functions used for handling them. Most @@ -110,7 +113,10 @@ static struct known_tag { { TAG_SCRIPT, "script", tag_find_urls }, { TAG_TABLE, "table", tag_find_urls }, { TAG_TD, "td", tag_find_urls }, - { TAG_TH, "th", tag_find_urls } + { TAG_TH, "th", tag_find_urls }, + { TAG_VIDEO, "video", tag_find_urls }, + { TAG_AUDIO, "audio", tag_find_urls }, + { TAG_SOURCE, "source", tag_find_urls } }; /* tag_url_attributes documents which attributes of which tags contain @@ -157,7 +163,12 @@ static struct { { TAG_SCRIPT, "src", ATTR_INLINE }, { TAG_TABLE, "background", ATTR_INLINE }, { TAG_TD, "background", ATTR_INLINE }, - { TAG_TH, "background", ATTR_INLINE } + { TAG_TH, "background", ATTR_INLINE }, + { TAG_VIDEO, "src", ATTR_INLINE }, + { TAG_VIDEO, "poster", ATTR_INLINE }, + { TAG_AUDIO, "src", ATTR_INLINE }, + { TAG_AUDIO, "poster", ATTR_INLINE }, + { TAG_SOURCE, "src", ATTR_INLINE } }; /* The lists of interesting tags and attributes are built dynamically, @@ -273,6 +284,10 @@ append_url (const char *link_uri, int position, int size, const char *base = ctx->base ? ctx->base : ctx->parent_base; struct url *url; + struct iri *iri = iri_new (); + set_uri_encoding (iri, opt.locale, true); + iri->utf8_encode = true; + if (!base) { DEBUGP (("%s: no base, merge will use \"%s\".\n", @@ -290,7 +305,7 @@ append_url (const char *link_uri, int position, int size, return NULL; } - url = url_parse (link_uri, NULL, NULL, false); + url = url_parse (link_uri, NULL, iri, false); if (!url) { DEBUGP (("%s: link \"%s\" doesn't parse.\n", @@ -312,7 +327,7 @@ append_url (const char *link_uri, int position, int size, quote_n (2, link_uri), quotearg_n_style (3, escape_quoting_style, complete_uri))); - url = url_parse (complete_uri, NULL, NULL, false); + url = url_parse (complete_uri, NULL, iri, false); if (!url) { DEBUGP (("%s: merged link \"%s\" doesn't parse.\n", @@ -323,6 +338,8 @@ append_url (const char *link_uri, int position, int size, xfree (complete_uri); } + iri_free (iri); + DEBUGP (("appending %s to urlpos.\n", quote (url->url))); newel = xnew0 (struct urlpos); @@ -455,7 +472,7 @@ tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx) /* Handle the BASE tag, for . */ static void -tag_handle_base (int tagid, struct taginfo *tag, struct map_context *ctx) +tag_handle_base (int tagid _GL_UNUSED, struct taginfo *tag, struct map_context *ctx) { struct urlpos *base_urlpos; int attrind; @@ -481,7 +498,7 @@ tag_handle_base (int tagid, struct taginfo *tag, struct map_context *ctx) /* Mark the URL found in
for conversion. */ static void -tag_handle_form (int tagid, struct taginfo *tag, struct map_context *ctx) +tag_handle_form (int tagid _GL_UNUSED, struct taginfo *tag, struct map_context *ctx) { int attrind; char *action = find_attr (tag, "action", &attrind); @@ -499,7 +516,7 @@ tag_handle_form (int tagid, struct taginfo *tag, struct map_context *ctx) links will be followed in -p mode depends on the REL attribute. */ static void -tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx) +tag_handle_link (int tagid _GL_UNUSED, struct taginfo *tag, struct map_context *ctx) { int attrind; char *href = find_attr (tag, "href", &attrind); @@ -548,7 +565,7 @@ tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx) refresh feature and because of robot exclusion. */ static void -tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx) +tag_handle_meta (int tagid _GL_UNUSED, struct taginfo *tag, struct map_context *ctx) { char *name = find_attr (tag, "name", NULL); char *http_equiv = find_attr (tag, "http-equiv", NULL); @@ -804,6 +821,13 @@ get_urls_file (const char *file) url_text = merged; } + char *new_url = rewrite_shorthand_url (url_text); + if (new_url) + { + xfree (url_text); + url_text = new_url; + } + url = url_parse (url_text, &up_error_code, NULL, false); if (!url) { @@ -830,7 +854,7 @@ get_urls_file (const char *file) return head; } -static void +void cleanup_html_url (void) { /* Destroy the hash tables. The hash table keys and values are not