X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhtml-url.c;h=f02982173cec0302e0295e68aaecac5abad5942d;hb=5dcb116087b182793cb64b9ec4e7659d72416bae;hp=54a0141c4533672c1c8c5de70d60acb6c6fa8d61;hpb=62aab82ead701780bd397dd99f792ce7e993cb02;p=wget diff --git a/src/html-url.c b/src/html-url.c index 54a0141c..f0298217 100644 --- a/src/html-url.c +++ b/src/html-url.c @@ -1,6 +1,6 @@ /* Collect URLs from HTML source. Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, - 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -36,6 +36,7 @@ as that of the covered work. */ #include #include +#include "exits.h" #include "html-parse.h" #include "url.h" #include "utils.h" @@ -164,6 +165,7 @@ static struct { to the attributes not mentioned here. We add them manually. */ static const char *additional_attributes[] = { "rel", /* used by tag_handle_link */ + "type", /* used by tag_handle_link */ "http-equiv", /* used by tag_handle_meta */ "name", /* used by tag_handle_meta */ "content", /* used by tag_handle_meta */ @@ -505,8 +507,8 @@ tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx) /* All link references are external, except those known not to be, such as style sheet and shortcut icon: - - + + */ if (href) { @@ -526,11 +528,18 @@ tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx) { up->link_inline_p = 1; } + else + { + /* The external ones usually point to HTML pages, such as + + except when the type attribute says otherwise: + + */ + char *type = find_attr (tag, "type", NULL); + if (!type || strcasecmp (type, "text/html") == 0) + up->link_expect_html = 1; + } } - else - /* The external ones usually point to HTML pages, such as - */ - up->link_expect_html = 1; } } } @@ -666,8 +675,9 @@ collect_tags_mapper (struct taginfo *tag, void *arg) check_style_attr (tag, ctx); - if (tag->end_tag_p && (0 == strcasecmp (tag->name, "style")) && - tag->contents_begin && tag->contents_end) + if (tag->end_tag_p && (0 == strcasecmp (tag->name, "style")) + && tag->contents_begin && tag->contents_end + && tag->contents_begin <= tag->contents_end) { /* parse contents */ get_urls_css (ctx, tag->contents_begin - ctx->text, @@ -794,6 +804,13 @@ get_urls_file (const char *file) url_text = merged; } + char *new_url = rewrite_shorthand_url (url_text); + if (new_url) + { + xfree (url_text); + url_text = new_url; + } + url = url_parse (url_text, &up_error_code, NULL, false); if (!url) { @@ -802,6 +819,7 @@ get_urls_file (const char *file) file, url_text, error); xfree (url_text); xfree (error); + inform_exit_status (URLERROR); continue; } xfree (url_text);