]> sjero.net Git - wget/blobdiff - tests/Test-meta-robots.px
Fix meta name=robots.
[wget] / tests / Test-meta-robots.px
diff --git a/tests/Test-meta-robots.px b/tests/Test-meta-robots.px
new file mode 100755 (executable)
index 0000000..2560369
--- /dev/null
@@ -0,0 +1,115 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use HTTPTest;
+
+# This test checks that Wget parses "nofollow" when it appears in <meta
+# name="robots"> tags, regardless of where in a list of comma-separated
+# values it appears, and regardless of spelling.
+#
+# Three different files contain links to the file "bombshell.html", each
+# with "nofollow" set, at various positions in a list of values for a
+# <meta name="robots"> tag, and with various degrees of separating
+# whitesspace. If bombshell.html is downloaded, the test
+# has failed.
+
+###############################################################################
+
+my $nofollow_start = <<EOF;
+<meta name="roBoTS" content="noFolLow ,  foo, bar ">
+<a href="/bombshell.html">Don't follow me!</a>
+EOF
+
+my $nofollow_mid = <<EOF;
+<meta name="rObOts" content=" foo  ,  NOfOllow ,  bar ">
+<a href="/bombshell.html">Don't follow me!</a>
+EOF
+
+my $nofollow_end = <<EOF;
+<meta name="RoBotS" content="foo,BAr,   nofOLLOw    ">
+<a href="/bombshell.html">Don't follow me!</a>
+EOF
+
+my $nofollow_solo = <<EOF;
+<meta name="robots" content="nofollow">
+<a href="/bombshell.html">Don't follow me!</a>
+EOF
+
+# code, msg, headers, content
+my %urls = (
+    '/start.html' => {
+        code => "200",
+        msg => "Ok",
+        headers => {
+            "Content-type" => "text/html",
+        },
+        content => $nofollow_start,
+    },
+    '/mid.html' => {
+        code => "200",
+        msg => "Ok",
+        headers => {
+            "Content-type" => "text/html",
+        },
+        content => $nofollow_mid,
+    },
+    '/end.html' => {
+        code => "200",
+        msg => "Ok",
+        headers => {
+            "Content-type" => "text/html",
+        },
+        content => $nofollow_end,
+    },
+    '/solo.html' => {
+        code => "200",
+        msg => "Ok",
+        headers => {
+            "Content-type" => "text/html",
+        },
+        content => $nofollow_solo,
+    },
+    '/bombshell.html' => {
+        code => "200",
+        msg => "Ok",
+        headers => {
+            "Content-type" => "text/html",
+        },
+        content => 'Hello',
+    },
+);
+
+my $cmdline = $WgetTest::WGETPATH . " -r -nd "
+    . join(' ',(map "http://localhost:{{port}}/$_.html",
+                qw(start mid end solo)));
+
+my $expected_error_code = 0;
+
+my %expected_downloaded_files = (
+    'start.html' => {
+        content => $nofollow_start,
+    },
+    'mid.html' => {
+        content => $nofollow_mid,
+    },
+    'end.html' => {
+        content => $nofollow_end,
+    },
+    'solo.html' => {
+        content => $nofollow_solo,
+    }
+);
+
+###############################################################################
+
+my $the_test = HTTPTest->new (name => "Test-meta-robots",
+                              input => \%urls, 
+                              cmdline => $cmdline, 
+                              errcode => $expected_error_code, 
+                              output => \%expected_downloaded_files);
+exit $the_test->run();
+
+# vim: et ts=4 sw=4
+