X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fhtml-url.c;h=09962eddc9286a0f6a6569c697660caf9f2e96a8;hp=abaa2f8e59bcdc18007caa60c4c55fbc75f832e8;hb=ae1d264fcc190f9c74cb490aa6da0240b0b77b1e;hpb=a9c3c58c9fb22e71e0878d4da1d3de0cd9e36445 diff --git a/src/html-url.c b/src/html-url.c index abaa2f8e..09962edd 100644 --- a/src/html-url.c +++ b/src/html-url.c @@ -643,6 +643,7 @@ get_urls_html (const char *file, const char *url, int *meta_disallow_follow) { struct file_memory *fm; struct map_context ctx; + int flags; /* Load the file. */ fm = read_file (file); @@ -663,8 +664,16 @@ get_urls_html (const char *file, const char *url, int *meta_disallow_follow) if (!interesting_tags) init_interesting (); - map_html_tags (fm->content, fm->length, interesting_tags, - interesting_attributes, collect_tags_mapper, &ctx); + /* Specify MHT_TRIM_VALUES because of buggy HTML generators that + generate instead of (Netscape + ignores spaces as well.) If you really mean space, use &32; or + %20. */ + flags = MHT_TRIM_VALUES; + if (opt.strict_comments) + flags |= MHT_STRICT_COMMENTS; + + map_html_tags (fm->content, fm->length, collect_tags_mapper, &ctx, flags, + interesting_tags, interesting_attributes); DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow)); if (meta_disallow_follow)