/* Collect URLs from HTML source.
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
- 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+ 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of GNU Wget.
#include <errno.h>
#include <assert.h>
+#include "exits.h"
#include "html-parse.h"
#include "url.h"
#include "utils.h"
TAG_SCRIPT,
TAG_TABLE,
TAG_TD,
- TAG_TH
+ TAG_TH,
+ TAG_VIDEO,
+ TAG_AUDIO,
+ TAG_SOURCE
};
/* The list of known tags and functions used for handling them. Most
{ TAG_SCRIPT, "script", tag_find_urls },
{ TAG_TABLE, "table", tag_find_urls },
{ TAG_TD, "td", tag_find_urls },
- { TAG_TH, "th", tag_find_urls }
+ { TAG_TH, "th", tag_find_urls },
+ { TAG_VIDEO, "video", tag_find_urls },
+ { TAG_AUDIO, "audio", tag_find_urls },
+ { TAG_SOURCE, "source", tag_find_urls }
};
/* tag_url_attributes documents which attributes of which tags contain
{ TAG_SCRIPT, "src", ATTR_INLINE },
{ TAG_TABLE, "background", ATTR_INLINE },
{ TAG_TD, "background", ATTR_INLINE },
- { TAG_TH, "background", ATTR_INLINE }
+ { TAG_TH, "background", ATTR_INLINE },
+ { TAG_VIDEO, "src", ATTR_INLINE },
+ { TAG_VIDEO, "poster", ATTR_INLINE },
+ { TAG_AUDIO, "src", ATTR_INLINE },
+ { TAG_AUDIO, "poster", ATTR_INLINE },
+ { TAG_SOURCE, "src", ATTR_INLINE }
};
/* The lists of interesting tags and attributes are built dynamically,
check_style_attr (tag, ctx);
- if (tag->end_tag_p && (0 == strcasecmp (tag->name, "style")) &&
- tag->contents_begin && tag->contents_end)
+ if (tag->end_tag_p && (0 == strcasecmp (tag->name, "style"))
+ && tag->contents_begin && tag->contents_end
+ && tag->contents_begin <= tag->contents_end)
{
/* parse contents */
get_urls_css (ctx, tag->contents_begin - ctx->text,
url_text = merged;
}
+ char *new_url = rewrite_shorthand_url (url_text);
+ if (new_url)
+ {
+ xfree (url_text);
+ url_text = new_url;
+ }
+
url = url_parse (url_text, &up_error_code, NULL, false);
if (!url)
{