#include <errno.h>
#include "utils.h"
-#include "iri.h"
-char *remote;
-char *current;
-bool utf8_encode;
-bool ugly_no_encode;
+/* RFC3987 section 3.1 mandates STD3 ASCII RULES */
+#define IDNA_FLAGS IDNA_USE_STD3_ASCII_RULES
-static iconv_t locale2utf8;
+/* Note: locale encoding is kept in options struct (opt.locale) */
-
-static bool open_locale_to_utf8 (void);
static bool do_conversion (iconv_t cd, char *in, size_t inlen, char **out);
char *
find_locale (void)
{
- /* sXXXav, made our own function or use libidn one ?! */
return (char *) stringprep_locale_charset ();
}
static bool
open_locale_to_utf8 (void)
{
- if (locale2utf8)
- return true;
-
- /* sXXXav : That shouldn't happen, just in case */
- if (!opt.locale)
- {
- logprintf (LOG_VERBOSE, "open_locale_to_utf8: locale is unset\n");
- opt.locale = find_locale ();
- }
- if (!opt.locale)
- return false;
-
- locale2utf8 = iconv_open ("UTF-8", opt.locale);
- if (locale2utf8 != (iconv_t)(-1))
- return true;
-
- logprintf (LOG_VERBOSE, "Conversion from %s to %s isn't supported\n",
- quote (opt.locale), quote ("UTF-8"));
- locale2utf8 = NULL;
- return false;
}
-/* Return a new string */
+/* Try converting string str from locale to UTF-8. Return a new string
+ on success, or str on error or if conversion isn't needed. */
const char *
locale_to_utf8 (const char *str)
{
+ iconv_t l2u;
char *new;
- if (!strcasecmp (opt.locale, "utf-8"))
- return str;
+ /* That shouldn't happen, just in case */
+ if (!opt.locale)
+ {
+ logprintf (LOG_VERBOSE, "open_locale_to_utf8: locale is unset\n");
+ opt.locale = find_locale ();
+ }
- if (!open_locale_to_utf8 ())
+ if (!opt.locale || !strcasecmp (opt.locale, "utf-8"))
return str;
- if (do_conversion (locale2utf8, (char *) str, strlen ((char *) str), &new))
+ l2u = iconv_open ("UTF-8", opt.locale);
+ if (l2u != (iconv_t)(-1))
+ {
+ logprintf (LOG_VERBOSE, "Conversion from %s to %s isn't supported\n",
+ quote (opt.locale), quote ("UTF-8"));
+ return str;
+ }
+
+ if (do_conversion (l2u, (char *) str, strlen ((char *) str), &new))
return (const char *) new;
return str;
}
-/* */
+/* Do the conversion according to the passed conversion descriptor cd. *out
+ will contain the transcoded string on success. *out content is
+ unspecified otherwise. */
static bool
do_conversion (iconv_t cd, char *in, size_t inlen, char **out)
{
len = outlen;
done = 0;
- /* sXXXav : put a maximum looping factor ??? */
for (;;)
{
if (iconv (cd, &in, &inlen, out, &outlen) != (size_t)(-1))
return false;
}
-/* Try to ASCII encode UTF-8 host. Return the new domain on success or NULL
+/* Try to "ASCII encode" UTF-8 host. Return the new domain on success or NULL
on error. */
char *
-idn_encode (char *host, bool utf8_encoded)
+idn_encode (struct iri *i, char *host)
{
char *new;
int ret;
- /* Encode to UTF-8 if not done using current remote */
- if (!utf8_encoded)
+ /* Encode to UTF-8 if not done */
+ if (!i->utf8_encode)
{
- if (!remote_to_utf8 ((const char *) host, (const char **) &new))
- {
- /* Nothing to encode or an error occured */
- return NULL;
- }
-
+ if (!remote_to_utf8 (i, (const char *) host, (const char **) &new))
+ return NULL; /* Nothing to encode or an error occured */
host = new;
}
/* toASCII UTF-8 NULL terminated string */
- ret = idna_to_ascii_8z (host, &new, 0);
+ ret = idna_to_ascii_8z (host, &new, IDNA_FLAGS);
if (ret != IDNA_SUCCESS)
{
/* sXXXav : free new when needed ! */
return new;
}
-/* Try to decode an ASCII encoded host. Return the new domain in the locale on
- success or NULL on error. */
+/* Try to decode an "ASCII encoded" host. Return the new domain in the locale
+ on success or NULL on error. */
char *
idn_decode (char *host)
{
char *new;
int ret;
- ret = idna_to_unicode_8zlz (host, &new, 0);
+ ret = idna_to_unicode_8zlz (host, &new, IDNA_FLAGS);
if (ret != IDNA_SUCCESS)
{
logprintf (LOG_VERBOSE, "idn_decode failed (%d): %s\n", ret,
return new;
}
-/* Return a new string */
+/* Try to transcode string str from remote encoding to UTF-8. On success, *new
+ contains the transcoded string. *new content is unspecified otherwise. */
bool
-remote_to_utf8 (const char *str, const char **new)
+remote_to_utf8 (struct iri *i, const char *str, const char **new)
{
- char *remote;
iconv_t cd;
bool ret = false;
- if (opt.encoding_remote)
- remote = opt.encoding_remote;
- else if (current)
- remote = current;
- else
+ if (!i->uri_encoding)
return false;
- cd = iconv_open ("UTF-8", remote);
+ cd = iconv_open ("UTF-8", i->uri_encoding);
if (cd == (iconv_t)(-1))
return false;
return ret;
}
-char *get_remote_charset (void)
-{
- return remote;
-}
-
-char *get_current_charset (void)
+/* Allocate a new iri structure and return a pointer to it. */
+struct iri *
+iri_new (void)
{
- return current;
-}
-
-void set_current_charset (char *charset)
-{
- /*printf("[ current = `%s'\n", charset);*/
-
- if (current)
- xfree (current);
-
- current = charset ? xstrdup (charset) : NULL;
-}
-
-void set_current_as_locale (void)
-{
- /*printf("[ current = locale = `%s'\n", opt.locale);*/
- if (current)
- xfree (current);
-
- /* sXXXav : assert opt.locale NULL ? */
- current = xstrdup (opt.locale);
+ struct iri *i = xmalloc (sizeof (struct iri));
+ i->uri_encoding = opt.encoding_remote ? xstrdup (opt.encoding_remote) : NULL;
+ i->content_encoding = NULL;
+ i->utf8_encode = opt.enable_iri;
+ return i;
}
+/* Completely free an iri structure. */
void
-set_remote_charset (char *charset)
+iri_free (struct iri *i)
{
- /*printf("[ remote = `%s'\n", charset);*/
- if (remote)
- xfree (remote);
-
- remote = charset ? xstrdup (charset) : NULL;
+ xfree_null (i->uri_encoding);
+ xfree_null (i->content_encoding);
+ xfree (i);
}
+/* Set uri_encoding of struct iri i. If a remote encoding was specified, use
+ it unless force is true. */
void
-set_remote_as_current (void)
-{
- /*printf("[ remote = current = `%s'\n", current);*/
- if (remote)
- xfree (remote);
-
- remote = current ? xstrdup (current) : NULL;
-}
-
-void reset_utf8_encode (void)
+set_uri_encoding (struct iri *i, char *charset, bool force)
{
- set_utf8_encode (opt.enable_iri);
-}
-
-void set_utf8_encode (bool encode)
-{
- utf8_encode = encode;
-}
+ DEBUGP (("URI encoding = %s\n", charset ? quote (charset) : "None"));
+ if (!force && opt.encoding_remote)
+ return;
+ if (i->uri_encoding)
+ {
+ if (charset && !strcasecmp (i->uri_encoding, charset))
+ return;
+ xfree (i->uri_encoding);
+ }
-bool get_utf8_encode (void)
-{
- return utf8_encode;
+ i->uri_encoding = charset ? xstrdup (charset) : NULL;
}
-void set_ugly_no_encode (bool ugly)
+/* Set content_encoding of struct iri i. */
+void
+set_content_encoding (struct iri *i, char *charset)
{
- ugly_no_encode = ugly;
-}
+ DEBUGP (("URI content encoding = %s\n", charset ? quote (charset) : "None"));
+ if (opt.encoding_remote)
+ return;
+ if (i->content_encoding)
+ {
+ if (charset && !strcasecmp (i->content_encoding, charset))
+ return;
+ xfree (i->content_encoding);
+ }
-bool get_ugly_no_encode (void)
-{
- return ugly_no_encode;
+ i->content_encoding = charset ? xstrdup (charset) : NULL;
}