From b4dca1816fcb6ffdf6da04b9952a78e881d1bebe Mon Sep 17 00:00:00 2001 From: Steven Schubiger Date: Sat, 29 Aug 2009 23:24:49 +0200 Subject: [PATCH] Escape semicolons when converting links (#27272). --- src/ChangeLog | 5 +++ src/convert.c | 13 +++++--- tests/ChangeLog | 6 ++++ tests/Test-k.px | 87 +++++++++++++++++++++++++++++++++++++++++++++++++ tests/run-px | 1 + 5 files changed, 108 insertions(+), 4 deletions(-) create mode 100755 tests/Test-k.px diff --git a/src/ChangeLog b/src/ChangeLog index 7a096ef3..caad2a27 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,8 @@ +2009-08-29 Steven Schubiger + + * convert.c (local_quote_string): Percent-encode semicolons + in local file strings. + 2009-08-27 Micah Cowan * wget.h (uerr_t): added new VERIFCERTERR code for SSL certificate diff --git a/src/convert.c b/src/convert.c index 71e3d8f0..653c7b4d 100644 --- a/src/convert.c +++ b/src/convert.c @@ -1,6 +1,6 @@ /* Conversion of links to local files. Copyright (C) 2003, 2004, 2005, 2006, 2007, - 2008 Free Software Foundation, Inc. + 2008, 2009 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -598,8 +598,8 @@ find_fragment (const char *beg, int size, const char **bp, const char **ep) "index.html?foo=bar.html" to "index.html%3Ffoo=bar.html" should be safe for both local and HTTP-served browsing. - We always quote "#" as "%23" and "%" as "%25" because those - characters have special meanings in URLs. */ + We always quote "#" as "%23", "%" as "%25" and ";" as "%3B" + because those characters have special meanings in URLs. */ static char * local_quote_string (const char *file) @@ -607,7 +607,7 @@ local_quote_string (const char *file) const char *from; char *newname, *to; - char *any = strpbrk (file, "?#%"); + char *any = strpbrk (file, "?#%;"); if (!any) return html_quote_string (file); @@ -627,6 +627,11 @@ local_quote_string (const char *file) *to++ = '2'; *to++ = '3'; break; + case ';': + *to++ = '%'; + *to++ = '3'; + *to++ = 'B'; + break; case '?': if (opt.adjust_extension) { diff --git a/tests/ChangeLog b/tests/ChangeLog index f5e4f348..2a5c097f 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,9 @@ +2009-08-29 Steven Schubiger + + * run-px: Add Test-k.px to the list. + + * Test-k.px: Test escaping of semicolons in local file strings. + 2009-08-27 Micah Cowan * WgetTest.pm.in (run): Shift the errcode right by 8 binary places. diff --git a/tests/Test-k.px b/tests/Test-k.px new file mode 100755 index 00000000..d94fb3b4 --- /dev/null +++ b/tests/Test-k.px @@ -0,0 +1,87 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use HTTPTest; + + +############################################################################### + +my $index = < + + Index + + + Site + + +EOF + +my $converted = < + + Index + + + Site + + +EOF + +my $site = < + + Site + + + Subsite + + +EOF + +# code, msg, headers, content +my %urls = ( + '/index.html' => { + code => "200", + msg => "Ok", + headers => { + "Content-type" => "text/html", + }, + content => $index, + }, + '/site;sub:.html' => { + code => "200", + msg => "Ok", + headers => { + "Content-type" => "text/html", + }, + content => $site, + }, +); + +my $cmdline = $WgetTest::WGETPATH . " -k -r -nH http://localhost:{{port}}/index.html"; + +my $expected_error_code = 0; + +my %expected_downloaded_files = ( + 'index.html' => { + content => $converted, + }, + 'site;sub:.html' => { + content => $site, + }, +); + +############################################################################### + +my $the_test = HTTPTest->new (name => "Test-k", + input => \%urls, + cmdline => $cmdline, + errcode => $expected_error_code, + output => \%expected_downloaded_files); +exit $the_test->run(); + +# vim: et ts=4 sw=4 + diff --git a/tests/run-px b/tests/run-px index e4e7c7dc..5dade1bd 100755 --- a/tests/run-px +++ b/tests/run-px @@ -43,6 +43,7 @@ my @tests = ( 'Test-iri-disabled.px', 'Test-iri-forced-remote.px', 'Test-iri-list.px', + 'Test-k.px', 'Test-meta-robots.px', 'Test-N-current.px', 'Test-N-smaller.px', -- 2.39.2