From 37c85ecba31b22de68cca986c2c8757f709dd058 Mon Sep 17 00:00:00 2001 From: Merinov Nikolay Date: Tue, 26 Jul 2011 09:27:08 +0200 Subject: [PATCH] Fix problem with IDN and UTF-8 encoding. --- src/ChangeLog | 6 ++++++ src/iri.c | 15 +++++++++++++++ src/res.c | 2 +- tests/ChangeLog | 6 ++++++ tests/Makefile.am | 2 ++ tests/run-px | 2 ++ 6 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/ChangeLog b/src/ChangeLog index f2c03037..d394166e 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,9 @@ +2011-07-20 Merinov Nikolay + + * iri.c (remote_to_utf8): Add test for non-ASCII symbols with + UTF-8 URI encoding. + * res.c (res_retrieve_file): Fix url_parse call. + 2011-06-08 Giuseppe Scrivano * retr.c (retrieve_from_file): Parse the url careless if IRI is enabled. diff --git a/src/iri.c b/src/iri.c index 08cfde40..9b16639e 100644 --- a/src/iri.c +++ b/src/iri.c @@ -264,6 +264,21 @@ remote_to_utf8 (struct iri *i, const char *str, const char **new) if (!i->uri_encoding) return false; + /* When `i->uri_encoding' == "UTF-8" there is nothing to convert. But we must + test for non-ASCII symbols for correct hostname processing in `idn_encode' + function. */ + if (!strcmp (i->uri_encoding, "UTF-8")) + { + int i, len = strlen (str); + for (i = 0; i < len; i++) + if ((unsigned char) str[i] >= (unsigned char) '\200') + { + *new = strdup (str); + return true; + } + return false; + } + cd = iconv_open ("UTF-8", i->uri_encoding); if (cd == (iconv_t)(-1)) return false; diff --git a/src/res.c b/src/res.c index edb12bd6..50dcb56d 100644 --- a/src/res.c +++ b/src/res.c @@ -552,7 +552,7 @@ res_retrieve_file (const char *url, char **file, struct iri *iri) opt.timestamping = false; opt.spider = false; - url_parsed = url_parse (robots_url, &url_err, iri, true); + url_parsed = url_parse (robots_url, &url_err, i, true); if (!url_parsed) { char *error = url_error (robots_url, url_err); diff --git a/tests/ChangeLog b/tests/ChangeLog index 1995c826..f686c03d 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,9 @@ +2011-06-03 Merinov Nikolay + + * Test-idn-cmd-utf8.px: Added test for idn with utf-8 local encoding. + * Test-idn-robots-utf8.px: Added test for idn with utf-8 local encoding + and robots.txt file. + * Makefile.am, run-px: Add new tests. 2011-04-19 Giuseppe Scrivano * Makefile.am (LIBS): Add $(LIB_CLOCK_GETTIME). diff --git a/tests/Makefile.am b/tests/Makefile.am index ccd274d1..6cdbb991 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -90,7 +90,9 @@ EXTRA_DIST = FTPServer.pm FTPTest.pm HTTPServer.pm HTTPTest.pm \ Test-idn-headers.px \ Test-idn-meta.px \ Test-idn-cmd.px \ + Test-idn-cmd-utf8.px \ Test-idn-robots.px \ + Test-idn-robots-utf8.px \ Test-iri.px \ Test-iri-percent.px \ Test-iri-disabled.px \ diff --git a/tests/run-px b/tests/run-px index cce44e45..21074cc9 100755 --- a/tests/run-px +++ b/tests/run-px @@ -43,7 +43,9 @@ my @tests = ( 'Test-idn-headers.px', 'Test-idn-meta.px', 'Test-idn-cmd.px', + 'Test-idn-cmd-utf8.px', 'Test-idn-robots.px', + 'Test-idn-robots-utf8.px', 'Test-iri.px', 'Test-iri-percent.px', 'Test-iri-disabled.px', -- 2.39.2