/* Collect URLs from HTML source.
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
- 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+ 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of GNU Wget.
#include <errno.h>
#include <assert.h>
+#include "exits.h"
#include "html-parse.h"
#include "url.h"
#include "utils.h"
to the attributes not mentioned here. We add them manually. */
static const char *additional_attributes[] = {
"rel", /* used by tag_handle_link */
+ "type", /* used by tag_handle_link */
"http-equiv", /* used by tag_handle_meta */
"name", /* used by tag_handle_meta */
"content", /* used by tag_handle_meta */
/* All <link href="..."> link references are external, except those
known not to be, such as style sheet and shortcut icon:
- <link rel="stylesheet" href="...">
- <link rel="shortcut icon" href="...">
+ <link rel="stylesheet" href="...">
+ <link rel="shortcut icon" href="...">
*/
if (href)
{
{
up->link_inline_p = 1;
}
+ else
+ {
+ /* The external ones usually point to HTML pages, such as
+ <link rel="next" href="...">
+ except when the type attribute says otherwise:
+ <link rel="alternate" type="application/rss+xml" href=".../?feed=rss2" />
+ */
+ char *type = find_attr (tag, "type", NULL);
+ if (!type || strcasecmp (type, "text/html") == 0)
+ up->link_expect_html = 1;
+ }
}
- else
- /* The external ones usually point to HTML pages, such as
- <link rel="next" href="..."> */
- up->link_expect_html = 1;
}
}
}
check_style_attr (tag, ctx);
- if (tag->end_tag_p && (0 == strcasecmp (tag->name, "style")) &&
- tag->contents_begin && tag->contents_end)
+ if (tag->end_tag_p && (0 == strcasecmp (tag->name, "style"))
+ && tag->contents_begin && tag->contents_end
+ && tag->contents_begin <= tag->contents_end)
{
/* parse contents */
get_urls_css (ctx, tag->contents_begin - ctx->text,
url_text = merged;
}
+ char *new_url = rewrite_shorthand_url (url_text);
+ if (new_url)
+ {
+ xfree (url_text);
+ url_text = new_url;
+ }
+
url = url_parse (url_text, &up_error_code, NULL, false);
if (!url)
{
file, url_text, error);
xfree (url_text);
xfree (error);
+ inform_exit_status (URLERROR);
continue;
}
xfree (url_text);