X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhtml-url.c;h=11789e59f017561f966b989a2a1d731b76d12c55;hp=73425c05913b8ebd4c2ee372ff9cdef8dcf3aaca;hb=8f93191f2656768dee6f9f700b9653421df55e1c;hpb=390adeea93dd76e8a9c818e996067670009023d3 diff --git a/src/html-url.c b/src/html-url.c index 73425c05..11789e59 100644 --- a/src/html-url.c +++ b/src/html-url.c @@ -48,6 +48,7 @@ typedef void (*tag_handler_t) PARAMS ((int, struct taginfo *, DECLARE_TAG_HANDLER (tag_find_urls); DECLARE_TAG_HANDLER (tag_handle_base); +DECLARE_TAG_HANDLER (tag_handle_form); DECLARE_TAG_HANDLER (tag_handle_link); DECLARE_TAG_HANDLER (tag_handle_meta); @@ -73,29 +74,31 @@ static struct { { "embed", tag_find_urls }, #define TAG_FIG 7 { "fig", tag_find_urls }, -#define TAG_FRAME 8 +#define TAG_FORM 8 + { "form", tag_handle_form }, +#define TAG_FRAME 9 { "frame", tag_find_urls }, -#define TAG_IFRAME 9 +#define TAG_IFRAME 10 { "iframe", tag_find_urls }, -#define TAG_IMG 10 +#define TAG_IMG 11 { "img", tag_find_urls }, -#define TAG_INPUT 11 +#define TAG_INPUT 12 { "input", tag_find_urls }, -#define TAG_LAYER 12 +#define TAG_LAYER 13 { "layer", tag_find_urls }, -#define TAG_LINK 13 +#define TAG_LINK 14 { "link", tag_handle_link }, -#define TAG_META 14 +#define TAG_META 15 { "meta", tag_handle_meta }, -#define TAG_OVERLAY 15 +#define TAG_OVERLAY 16 { "overlay", tag_find_urls }, -#define TAG_SCRIPT 16 +#define TAG_SCRIPT 17 { "script", tag_find_urls }, -#define TAG_TABLE 17 +#define TAG_TABLE 18 { "table", tag_find_urls }, -#define TAG_TD 18 +#define TAG_TD 19 { "td", tag_find_urls }, -#define TAG_TH 19 +#define TAG_TH 20 { "th", tag_find_urls } }; @@ -120,7 +123,7 @@ static struct { { TAG_AREA, "href", TUA_EXTERNAL }, { TAG_BGSOUND, "src", 0 }, { TAG_BODY, "background", 0 }, - { TAG_EMBED, "href", 0 }, + { TAG_EMBED, "href", TUA_EXTERNAL }, { TAG_EMBED, "src", 0 }, { TAG_FIG, "src", 0 }, { TAG_FRAME, "src", 0 }, @@ -141,10 +144,11 @@ static struct { from the information above. However, some places in the code refer to the attributes not mentioned here. We add them manually. */ static const char *additional_attributes[] = { - "rel", /* for TAG_LINK */ - "http-equiv", /* for TAG_META */ - "name", /* for TAG_META */ - "content" /* for TAG_META */ + "rel", /* used by tag_handle_link */ + "http-equiv", /* used by tag_handle_meta */ + "name", /* used by tag_handle_meta */ + "content", /* used by tag_handle_meta */ + "action" /* used by tag_handle_form */ }; static const char **interesting_tags; @@ -475,6 +479,22 @@ tag_handle_base (int tagid, struct taginfo *tag, struct map_context *ctx) ctx->base = xstrdup (newbase); } +/* Mark the URL found in
for conversion. */ + +static void +tag_handle_form (int tagid, struct taginfo *tag, struct map_context *ctx) +{ + int attrind; + char *action = find_attr (tag, "action", &attrind); + if (action) + { + struct urlpos *action_urlpos = append_one_url (action, 0, tag, + attrind, ctx); + if (action_urlpos) + action_urlpos->ignore_when_downloading = 1; + } +} + /* Handle the LINK tag. It requires special handling because how its links will be followed in -p mode depends on the REL attribute. */ @@ -484,12 +504,18 @@ tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx) int attrind; char *href = find_attr (tag, "href", &attrind); - /* All link references are external, - except for . */ + /* All link references are external, except those + known not to be, such as style sheet and shortcut icon: + + + + */ if (href) { char *rel = find_attr (tag, "rel", NULL); - int inlinep = (rel && 0 == strcasecmp (rel, "stylesheet")); + int inlinep = (rel + && (0 == strcasecmp (rel, "stylesheet") + || 0 == strcasecmp (rel, "shortcut icon"))); append_one_url (href, inlinep, tag, attrind, ctx); } } @@ -515,10 +541,13 @@ tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx) get to the URL. */ struct urlpos *entry; - int attrind; - char *p, *refresh = find_attr (tag, "content", &attrind); int timeout = 0; + char *p; + + char *refresh = find_attr (tag, "content", &attrind); + if (!refresh) + return; for (p = refresh; ISDIGIT (*p); p++) timeout = 10 * timeout + *p - '0';