#include "utils.h"
#include "hash.h"
#include "convert.h"
+#include "recur.h" /* declaration of get_urls_html */
#ifndef errno
extern int errno;
&& (0 == strcasecmp (rel, "stylesheet")
|| 0 == strcasecmp (rel, "shortcut icon")))
up->link_inline_p = 1;
+ else
+ /* The external ones usually point to HTML pages, such as
+ <link rel="next" href="..."> */
+ up->link_expect_html = 1;
}
}
}
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
return NULL;
}
- DEBUGP (("Loaded %s (size %ld).\n", file, fm->length));
+ DEBUGP (("Loaded %s (size %s).\n", file, number_to_static_string (fm->length)));
ctx.text = fm->content;
ctx.head = ctx.tail = NULL;
init_interesting ();
/* Specify MHT_TRIM_VALUES because of buggy HTML generators that
- generate <a href=" foo"> instead of <a href="foo"> (Netscape
- ignores spaces as well.) If you really mean space, use &32; or
- %20. */
+ generate <a href=" foo"> instead of <a href="foo"> (browsers
+ ignore spaces as well.) If you really mean space, use &32; or
+ %20. MHT_TRIM_VALUES also causes squashing of embedded newlines,
+ e.g. in <img src="foo.[newline]html">. Such newlines are also
+ ignored by IE and Mozilla and are presumably introduced by
+ writing HTML with editors that force word wrap. */
flags = MHT_TRIM_VALUES;
if (opt.strict_comments)
flags |= MHT_STRICT_COMMENTS;
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
return NULL;
}
- DEBUGP (("Loaded %s (size %ld).\n", file, fm->length));
+ DEBUGP (("Loaded %s (size %s).\n", file, number_to_static_string (fm->length)));
head = tail = NULL;
text = fm->content;
url = url_parse (url_text, &up_error_code);
if (!url)
{
- logprintf (LOG_NOTQUIET, "%s: Invalid URL %s: %s\n",
+ logprintf (LOG_NOTQUIET, _("%s: Invalid URL %s: %s\n"),
file, url_text, url_error (up_error_code));
xfree (url_text);
continue;