{
while (*content)
{
- /* Find the next occurrence of ',' or the end of
- the string. */
- char *end = strchr (content, ',');
- if (end)
- ++end;
- else
- end = content + strlen (content);
+ char *end;
+ /* Skip any initial whitespace. */
+ content += strspn (content, " \f\n\r\t\v");
+ /* Find the next occurrence of ',' or whitespace,
+ * or the end of the string. */
+ end = content + strcspn (content, ", \f\n\r\t\v");
if (!strncasecmp (content, "nofollow", end - content))
ctx->nofollow = true;
+ /* Skip past the next comma, if any. */
+ if (*end == ',')
+ ++end;
+ else
+ {
+ end = strchr (end, ',');
+ if (end)
+ ++end;
+ else
+ end = content + strlen (content);
+ }
content = end;
}
}
--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use HTTPTest;
+
+# This test checks that Wget parses "nofollow" when it appears in <meta
+# name="robots"> tags, regardless of where in a list of comma-separated
+# values it appears, and regardless of spelling.
+#
+# Three different files contain links to the file "bombshell.html", each
+# with "nofollow" set, at various positions in a list of values for a
+# <meta name="robots"> tag, and with various degrees of separating
+# whitesspace. If bombshell.html is downloaded, the test
+# has failed.
+
+###############################################################################
+
+my $nofollow_start = <<EOF;
+<meta name="roBoTS" content="noFolLow , foo, bar ">
+<a href="/bombshell.html">Don't follow me!</a>
+EOF
+
+my $nofollow_mid = <<EOF;
+<meta name="rObOts" content=" foo , NOfOllow , bar ">
+<a href="/bombshell.html">Don't follow me!</a>
+EOF
+
+my $nofollow_end = <<EOF;
+<meta name="RoBotS" content="foo,BAr, nofOLLOw ">
+<a href="/bombshell.html">Don't follow me!</a>
+EOF
+
+my $nofollow_solo = <<EOF;
+<meta name="robots" content="nofollow">
+<a href="/bombshell.html">Don't follow me!</a>
+EOF
+
+# code, msg, headers, content
+my %urls = (
+ '/start.html' => {
+ code => "200",
+ msg => "Ok",
+ headers => {
+ "Content-type" => "text/html",
+ },
+ content => $nofollow_start,
+ },
+ '/mid.html' => {
+ code => "200",
+ msg => "Ok",
+ headers => {
+ "Content-type" => "text/html",
+ },
+ content => $nofollow_mid,
+ },
+ '/end.html' => {
+ code => "200",
+ msg => "Ok",
+ headers => {
+ "Content-type" => "text/html",
+ },
+ content => $nofollow_end,
+ },
+ '/solo.html' => {
+ code => "200",
+ msg => "Ok",
+ headers => {
+ "Content-type" => "text/html",
+ },
+ content => $nofollow_solo,
+ },
+ '/bombshell.html' => {
+ code => "200",
+ msg => "Ok",
+ headers => {
+ "Content-type" => "text/html",
+ },
+ content => 'Hello',
+ },
+);
+
+my $cmdline = $WgetTest::WGETPATH . " -r -nd "
+ . join(' ',(map "http://localhost:{{port}}/$_.html",
+ qw(start mid end solo)));
+
+my $expected_error_code = 0;
+
+my %expected_downloaded_files = (
+ 'start.html' => {
+ content => $nofollow_start,
+ },
+ 'mid.html' => {
+ content => $nofollow_mid,
+ },
+ 'end.html' => {
+ content => $nofollow_end,
+ },
+ 'solo.html' => {
+ content => $nofollow_solo,
+ }
+);
+
+###############################################################################
+
+my $the_test = HTTPTest->new (name => "Test-meta-robots",
+ input => \%urls,
+ cmdline => $cmdline,
+ errcode => $expected_error_code,
+ output => \%expected_downloaded_files);
+exit $the_test->run();
+
+# vim: et ts=4 sw=4
+