X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Firi.c;h=5fb06d0992bde05c5dda9768e68e2b9b8d5ad454;hp=fea7b150e7a5e3163ec642d68c0738b9bad9fbca;hb=5bb11da009c2f3bc4381bc8009c57007fd86534e;hpb=e6376b47433be6a0df64b0cd87b2d5c2c53a66f1 diff --git a/src/iri.c b/src/iri.c index fea7b150..5fb06d09 100644 --- a/src/iri.c +++ b/src/iri.c @@ -34,13 +34,22 @@ as that of the covered work. */ #include #include #include - +#include #include +#include +#include #include "utils.h" #include "iri.h" +static iconv_t locale2utf8; + + +static bool open_locale_to_utf8 (void); +static bool do_conversion (iconv_t cd, char *in, size_t inlen, char **out); + + /* Given a string containing "charset=XXX", return the encoding if found, or NULL otherwise */ char * @@ -77,7 +86,6 @@ parse_charset (char *str) return charset; } - /* Find the locale used, or fall back on a default value */ char * find_locale (void) @@ -86,7 +94,6 @@ find_locale (void) return (char *) stringprep_locale_charset (); } - /* Basic check of an encoding name. */ bool check_encoding_name (char *encoding) @@ -107,4 +114,125 @@ check_encoding_name (char *encoding) return true; } +/* Try opening an iconv_t descriptor for conversion from locale to UTF-8 */ +static bool +open_locale_to_utf8 (void) +{ + if (locale2utf8) + return true; + + /* sXXXav : That shouldn't happen, just in case */ + if (!opt.locale) + { + logprintf (LOG_VERBOSE, "open_locale_to_utf8: locale is unset\n"); + opt.locale = find_locale (); + } + + if (!opt.locale) + return false; + + locale2utf8 = iconv_open ("UTF-8", opt.locale); + if (locale2utf8 != (iconv_t)(-1)) + return true; + + logprintf (LOG_VERBOSE, "Conversion from %s to %s isn't supported\n", + quote (opt.locale), quote("UTF-8")); + locale2utf8 = NULL; + return false; +} + +/* Return a new string */ +const char * +locale_to_utf8 (const char *str) +{ + char *new; + + if (!strcasecmp (opt.locale, "utf-8")) + return str; + + if (!open_locale_to_utf8 ()) + return str; + + if (do_conversion (locale2utf8, (char *) str, strlen ((char *) str), &new)) + return (const char *) new; + + return str; +} + +/* */ +static bool +do_conversion (iconv_t cd, char *in, size_t inlen, char **out) +{ + /* sXXXav : hummm hard to guess... */ + size_t len, done, outlen = inlen * 2; + int invalid = 0, tooshort = 0; + char *s; + + s = xmalloc (outlen + 1); + *out = s; + len = outlen; + done = 0; + + /* sXXXav : put a maximum looping factor ??? */ + for (;;) + { + if (iconv (cd, &in, &inlen, out, &outlen) != (size_t)(-1)) + { + *out = s; + *(s + len - outlen - done) = '\0'; + return true; + } + + /* Incomplete or invalid multibyte sequence */ + if (errno == EINVAL || errno == EILSEQ) + { + invalid++; + **out = *in; + in++; + inlen--; + (*out)++; + outlen--; + } + else if (errno == E2BIG) /* Output buffer full */ + { + char *new; + + tooshort++; + done = len; + outlen = done + inlen * 2; + new = xmalloc (outlen + 1); + memcpy (new, s, done); + xfree (s); + s = new; + len = outlen; + *out = s + done; + } + else /* Weird, we got an unspecified error */ + { + logprintf (LOG_VERBOSE, "Unhandled errno %d\n", errno); + break; + } + } + + return false; +} + +/* Try to encode UTF-8 host to ASCII. Return the new domain on success or NULL + on error. */ +char *idn_encode (char *host) +{ + char *new; + int ret; + + /* toASCII UTF-8 NULL terminated string */ + ret = idna_to_ascii_8z (host, &new, 0); + if (ret != IDNA_SUCCESS) + { + logprintf (LOG_VERBOSE, "idn_encode failed (%d): %s\n", ret, + quote (idna_strerror (ret))); + return NULL; + } + + return new; +}