Merge with mainline.

author Micah Cowan <micah@cowan.name>

Thu, 25 Jun 2009 08:14:11 +0000 (01:14 -0700)

committer Micah Cowan <micah@cowan.name>

Thu, 25 Jun 2009 08:14:11 +0000 (01:14 -0700)
author Micah Cowan <micah@cowan.name>
Thu, 25 Jun 2009 08:14:11 +0000 (01:14 -0700)
committer Micah Cowan <micah@cowan.name>
Thu, 25 Jun 2009 08:14:11 +0000 (01:14 -0700)
diff --combined ChangeLog

index 8358b3bda9f97163aa77845768ddbec09fff4ac3,a891c52ebce1f56abae0a87f577dbdad2ee93ab5..659415aa5c1ea3237eb2ff7bfe721639af1c14fe
--- 1/ChangeLog
--- 2/ChangeLog
+++ b/ChangeLog
@@@ -1,28 -1,3 +1,28 @@@
+ +2009-06-14  Micah Cowan  <micah@cowan.name>
+ +
+ +      * po/Makefile.in.in (distclean): remove en_US.po, too.
+ +
+ +      * Makefile.am: Include md5 as a subdir unconditionally. 
+ +      It may result in useless compilation, and additional risk of
+ +      breaking a build of something that isn't actually needed, but
+ +      otherwise it's too much of a hassle to manage a failure-free
+ +      distcheck.
+ +
+ +2009-06-12  Micah Cowan  <micah@cowan.name>
+ +
+ +      * configure.ac: Check for h_errno declaration. Idea thanks to
+ +      Maciej W. Rozycki.
+ +
+ +2009-03-03  Steven Schubiger  <stsc@member.fsf.org>
+ +
+ +      * src/ftp.c, src/http.c, src/main.c, src/recur.h,
+ +      tests/Makefile.am: Update the copyright years.
+ +
+ +2009-01-23  Steven Schubiger  <stsc@members.fsf.org>
+ +
+ +      * util/freeopts, util/rmold.pl, util/trunc.c: Remove
+ +      unnecessary whitespace.
+ +
   2008-11-10  Micah Cowan  <micah@cowan.name>
   
         * MAILING-LIST: Mention Gmane, introduce subsections.
@@@ -49,6 -24,14 +49,14 @@@
   
         * AUTHORS: Added Steven Schubiger.
   
+ 2008-06-26  Xavier Saint  <wget@sxav.eu>
+ 
+       * configure.ac : IRIs support required libiconv, check it.
+ 
+ 2008-06-14  Xavier Saint  <wget@sxav.eu>
+ 
+       * configure.ac: Add support for IRIs
+ 
   2008-05-29  Micah Cowan  <micah@cowan.name>
   
         * po/*.po: Updated from TP (the 1.11.3 set).
diff --combined configure.ac

index 78fd5e143f39f7d29e912a13b96f72c639fb7955,fb0c65d10d10bef526dd59126868a463653e8c3e..dcb302fa187cd2d5e3cf69397825098f08e5f8e4
--- 1/configure.ac
--- 2/configure.ac
+++ b/configure.ac
@@@ -163,8 -163,6 +163,8 @@@ AC_CHECK_HEADERS(unistd.h sys/time.h
   AC_CHECK_HEADERS(termios.h sys/ioctl.h sys/select.h utime.h sys/utime.h)
   AC_CHECK_HEADERS(stdint.h inttypes.h pwd.h wchar.h)
   
+ +AC_CHECK_DECLS(h_errno,,,[#include <netdb.h>])
+ +
   dnl
   dnl Check sizes of integer types.  These are used to find n-bit
   dnl integral types on older systems that fail to provide intN_t and
@@@ -462,6 -460,77 +462,77 @@@ els
   fi
   AC_SUBST(COMMENT_IF_NO_POD2MAN)
   
+ 
+ dnl
+ dnl Check for IDN/IRIs
+ dnl
+ 
+ AC_ARG_ENABLE(iri,
+   AC_HELP_STRING([--disable-iri],[disable IDN/IRIs support]),
+   [case "${enable_iri}" in
+     no)
+       dnl Disable IRIs checking
+       AC_MSG_NOTICE([disabling IRIs at user request])
+       iri=no
+       ;;
+     yes)
+       dnl IRIs explicitly enabled
+       iri=yes
+       force_iri=yes
+       ;;
+     auto)
+       dnl Auto-detect IRI
+       iri=yes
+       ;;
+     *)
+       AC_MSG_ERROR([Invalid --enable-iri argument \`$enable_iri'])
+       ;;
+     esac
+   ], [
+     dnl If nothing is specified, assume auto-detection
+     iri=yes
+   ]
+ )
+ 
+ AC_ARG_WITH(libidn, AC_HELP_STRING([--with-libidn=[DIR]],
+                                    [Support IDN/IRIs (needs GNU Libidn)]),
+                                    libidn=$withval, libidn="")
+ if test "X$iri" != "Xno"; then
+   AM_ICONV
+ 
+   if test "X$am_cv_func_iconv" != "Xyes"; then
+     iri=no
+     if test "X$force_iri" = "Xyes"; then
+       AC_MSG_ERROR([Libiconv is required for IRIs support])
+     else
+       AC_MSG_NOTICE([disabling IRIs because libiconv wasn't found])
+     fi
+   fi
+ fi
+ 
+ if test "X$iri" != "Xno"; then
+   if test "$libidn" != ""; then
+     LDFLAGS="${LDFLAGS} -L$libidn/lib"
+     CPPFLAGS="${CPPFLAGS} -I$libidn/include"
+   fi
+   AC_CHECK_HEADER(idna.h,
+     AC_CHECK_LIB(idn, stringprep_check_version,
+       [iri=yes LIBS="${LIBS} -lidn"], iri=no),
+     iri=no)
+ 
+   if test "X$iri" != "Xno" ; then
+     AC_DEFINE(ENABLE_IRI, 1, [Define if IRI support is enabled.])
+     AC_MSG_NOTICE([Enabling support for IRI.])
+   else
+     AC_MSG_WARN([Libidn not found])
+   fi
+ fi
+ 
+ 
+ dnl Needed by src/Makefile.am
+ AM_CONDITIONAL([IRI_IS_ENABLED], [test "X$iri" != "Xno"])
+ 
+ 
   dnl
   dnl Create output
   dnl
diff --combined doc/ChangeLog

index 39f390c4b2608817140b3cea88f3b1e2e8ffe301,dc1d408467421c0779403deb7acfbf1850ed0787..898e3c6e5822d689c4067c824428e8cb968774a0
--- 1/doc/ChangeLog
--- 2/doc/ChangeLog
+++ b/doc/ChangeLog
@@@ -1,31 -1,3 +1,31 @@@
+ +2009-06-20  Micah Cowan  <micah@cowan.name>
+ +
+ +      * wget.texi (Contributors): Added Jay Krell.
+ +
+ +2009-06-14  Micah Cowan  <micah@cowan.name>
+ +
+ +      * Makefile.am (wget.pod): $(srcdir)/version.texi -> version.texi
+ +
+ +2009-06-12  Micah Cowan  <micah@cowan.name>
+ +
+ +      * wget.texi (Download Options): More accuracy on what happens when
+ +      -nd is used with -r or -p.
+ +
+ +2009-06-11  Micah Cowan  <micah@cowan.name>
+ +
+ +      * wget.texi (Contributors): Added Xin Zou, Benjamin Wolsley, and
+ +      Robert Millan.
+ +
+ +2009-06-11  Joao Ferreira  <joao@joaoff.com>
+ +
+ +      * wget.texi (Option Syntax): Fixed contradictory and confusing
+ +      explanation of --folow-ftp and negation.
+ +
+ +2009-06-10  Micah Cowan  <micah@cowan.name>
+ +
+ +      * sample.wgetrc: Add "https_proxy" to the proxy examples. Thanks
+ +      to Martin Paul <martin@par.univie.ac.at> for the suggestion.
+ +
   2008-11-15  Steven Schubiger  <stsc@members.fsf.org>
   
         * sample.wgetrc: Comment the waitretry "default" value, 
@@@ -70,6 -42,15 +70,15 @@@
         * wget.texi (Robot Exclusion): Fixed typo "downloads" ->
         "download"
   
+ 2008-08-03  Xavier Saint  <wget@sxav.eu>
+ 
+       * wget.texi : Add option descriptions for the three new
+       options --iri, --locale and --remote-encoding related to
+       IRI support.
+ 
+       * sample.wgetrc : Add commented lines for the three new
+       command iri, locale and encoding related to IRI support.
+ 
   2008-08-03  Micah Cowan  <micah@cowan.name>
   
         * wget.texi: Don't set UPDATED; already set by version.texi.
diff --combined doc/sample.wgetrc

index 62981c8f2bf31c892a3481790612d2695a9c27c9,12914aea89d41d03aea34d8f1f051be536857508..1ce90dea24ca576f507ead9834ab6027b39a69fa
--- 1/doc/sample.wgetrc
--- 2/doc/sample.wgetrc
+++ b/doc/sample.wgetrc
@@@ -73,9 -73,8 +73,9 @@@
   # is *not* sent by default.
   #header = Accept-Language: en
   
- -# You can set the default proxies for Wget to use for http and ftp.
+ +# You can set the default proxies for Wget to use for http, https, and ftp.
   # They will override the value in the environment.
+ +#https_proxy = http://proxy.yoyodyne.com:18023/
   #http_proxy = http://proxy.yoyodyne.com:18023/
   #ftp_proxy = http://proxy.yoyodyne.com:18023/
   
@@@ -114,3 -113,12 +114,12 @@@
   
   # To try ipv6 addresses first:
   #prefer-family = IPv6
+ 
+ # Set default IRI support state
+ #iri = off
+ 
+ # Force the default system encoding
+ #locale = UTF-8
+ 
+ # Force the default remote server encoding
+ #remoteencoding = UTF-8
diff --combined doc/wget.texi

index 92ed7905ba538090266328743d308cd1af5b6da1,a2804fb4d6568de710d3eb4bdb61bdbd75736361..252548f8094d54276bf62c6e407165a6ac0e0760
--- 1/doc/wget.texi
--- 2/doc/wget.texi
+++ b/doc/wget.texi
@@@ -396,8 -396,8 +396,8 @@@ the option name; negative options can b
   @samp{--no-} prefix.  This might seem superfluous---if the default for
   an affirmative option is to not do something, then why provide a way
   to explicitly turn it off?  But the startup file may in fact change
- -the default.  For instance, using @code{follow_ftp = off} in
- -@file{.wgetrc} makes Wget @emph{not} follow FTP links by default, and
+ +the default.  For instance, using @code{follow_ftp = on} in
+ +@file{.wgetrc} makes Wget @emph{follow} FTP links by default, and
   using @samp{--no-follow-ftp} is the only way to restore the factory
   default from the command line.
   
@@@ -582,24 -582,23 +582,24 @@@ behavior depends on a few options, incl
   cases, the local file will be @dfn{clobbered}, or overwritten, upon
   repeated download.  In other cases it will be preserved.
   
- -When running Wget without @samp{-N}, @samp{-nc}, @samp{-r}, or @samp{p},
- -downloading the same file in the same directory will result in the
- -original copy of @var{file} being preserved and the second copy being
- -named @samp{@var{file}.1}.  If that file is downloaded yet again, the
- -third copy will be named @samp{@var{file}.2}, and so on.  When
- -@samp{-nc} is specified, this behavior is suppressed, and Wget will
- -refuse to download newer copies of @samp{@var{file}}.  Therefore,
- -``@code{no-clobber}'' is actually a misnomer in this mode---it's not
- -clobbering that's prevented (as the numeric suffixes were already
- -preventing clobbering), but rather the multiple version saving that's
- -prevented.
- -
- -When running Wget with @samp{-r} or @samp{-p}, but without @samp{-N}
- -or @samp{-nc}, re-downloading a file will result in the new copy
- -simply overwriting the old.  Adding @samp{-nc} will prevent this
- -behavior, instead causing the original version to be preserved and any
- -newer copies on the server to be ignored.
+ +When running Wget without @samp{-N}, @samp{-nc}, @samp{-r}, or
+ +@samp{-p}, downloading the same file in the same directory will result
+ +in the original copy of @var{file} being preserved and the second copy
+ +being named @samp{@var{file}.1}.  If that file is downloaded yet
+ +again, the third copy will be named @samp{@var{file}.2}, and so on.
+ +(This is also the behavior with @samp{-nd}, even if @samp{-r} or
+ +@samp{-p} are in effect.)  When @samp{-nc} is specified, this behavior
+ +is suppressed, and Wget will refuse to download newer copies of
+ +@samp{@var{file}}.  Therefore, ``@code{no-clobber}'' is actually a
+ +misnomer in this mode---it's not clobbering that's prevented (as the
+ +numeric suffixes were already preventing clobbering), but rather the
+ +multiple version saving that's prevented.
+ +
+ +When running Wget with @samp{-r} or @samp{-p}, but without @samp{-N},
+ +@samp{-nd}, or @samp{-nc}, re-downloading a file will result in the
+ +new copy simply overwriting the old.  Adding @samp{-nc} will prevent
+ +this behavior, instead causing the original version to be preserved
+ +and any newer copies on the server to be ignored.
   
   When running Wget with @samp{-N}, with or without @samp{-r} or
   @samp{-p}, the decision as to whether or not to download a newer copy
@@@ -675,6 -674,30 +675,30 @@@ Another instance where you'll get a gar
   Note that @samp{-c} only works with @sc{ftp} servers and with @sc{http}
   servers that support the @code{Range} header.
   
+ @cindex iri support
+ @cindex idn support
+ @item --iri
+ 
+ Turn on internationalized URI (IRI) support. Use @samp{--iri=no} to
+ turn it off. IRI support is activated by default.
+ 
+ You can set the default state of IRI support using @code{iri} command in
+ @file{.wgetrc}. That setting may be overridden from the command line.
+ 
+ @cindex local encoding
+ @cindex locale
+ @item --locale=@var{encoding}
+ 
+ Force Wget to use @var{encoding} as the default system encoding. That affects
+ how Wget converts URLs specified as arguments from locale to @sc{utf-8} for
+ IRI support.
+ 
+ Wget use the function @code{nl_langinfo()} and then the @code{CHARSET}
+ environment variable to get the locale. If it fails, @sc{ascii} is used.
+ 
+ You can set the default locale using the @code{locale} command in
+ @file{.wgetrc}. That setting may be overridden from the command line.
+ 
   @cindex progress indicator
   @cindex dot style
   @item --progress=@var{type}
@@@ -706,6 -729,21 +730,21 @@@ command line.  The exception is that, w
   ``dot'' progress will be favored over ``bar''.  To force the bar output,
   use @samp{--progress=bar:force}.
   
+ @cindex remote encoding
+ @item --remote-encoding=@var{encoding}
+ 
+ Force Wget to use encoding as the default remote server encoding. That
+ affects how Wget converts URIs found in files from remote encoding to
+ @sc{utf-8} during a recursive fetch. This options is only useful for
+ IRI support, for the interpretation of non-@sc{ascii} characters.
+ 
+ For HTTP, remote encoding can be found in HTTP @code{Content-Type}
+ header and in HTML @code{Content-Type http-equiv} meta tag.
+ 
+ You can set the default encoding using the @code{remoteencoding}
+ command in @file{.wgetrc}. That setting may be overridden from the
+ command line.
+ 
   @item -N
   @itemx --timestamping
   Turn on time-stamping.  @xref{Time-Stamping}, for details.
@@@ -3935,7 -3973,6 +3974,7 @@@ Fila Kolodny
   Alexander Kourakos,
   Martin Kraemer,
   Sami Krank,
+ +Jay Krell,
   @tex
   $\Sigma\acute{\iota}\mu o\varsigma\;
   \Xi\varepsilon\nu\iota\tau\acute{\epsilon}\lambda\lambda\eta\varsigma$
@@@ -3966,7 -4003,6 +4005,7 @@@ Aurelien Marchand
   Matthew J.@: Mellon,
   Jordan Mendelson,
   Ted Mielczarek,
+ +Robert Millan,
   Lin Zhe Min,
   Jan Minar,
   Tim Mooney,
@@@ -4042,7 -4078,6 +4081,7 @@@ Charles G Waldman
   Douglas E.@: Wegscheid,
   Ralf Wildenhues,
   Joshua David Williams,
+ +Benjamin Wolsey,
   YAMAZAKI Makoto,
   Jasmin Zainul,
   @iftex
@@@ -4051,8 -4086,7 +4090,8 @@@ Bojan @v{Z}drnja
   @ifnottex
   Bojan Zdrnja,
   @end ifnottex
- -Kristijan Zimmer.
+ +Kristijan Zimmer,
+ +Xin Zou.
   
   Apologies to all who I accidentally left out, and many thanks to all the
   subscribers of the Wget mailing list.
diff --combined src/ChangeLog

index a6dd402cc4836506ad9f86f5c7aa9f5874a42c75,2d3331f1b2a50d054c860c475ecf63d656d41081..bd833ea008529ef05c031443f608e242919ea480
--- 1/src/ChangeLog
--- 2/src/ChangeLog
+++ b/src/ChangeLog
@@@ -1,144 -1,8 +1,144 @@@
+ +2009-06-20  Jay Krell  <jay.krell@cornell.edu>
+ +
+ +      * sysdep.h (_ALL_SOURCE): (small change) Define the _ALL_SOURCE
+ +      macro on INTERIX systems. (I switched the location from ftp.c to
+ +      sysdep.h --mjc)
+ +
+ +2009-06-15  Micah Cowan  <micah@cowan.name>
+ +
+ +      * ftp.c (getftp): If we can't accept the connection, return
+ +      CONERROR, not whatever the contents of err happens to be. Fixes
+ +      bug #25015.
+ +
+ +      * retr.c (fd_read_body): Make both args to progress_create
+ +      consistent, resulting in an accurate progress display. Fixes bug
+ +      #24948.
+ +
+ +2009-06-14  Micah Cowan  <micah@cowan.name>
+ +
+ +      * Makefile.am (wget_SOURCES): css-tokens.h needs to ship with
+ +      dist, too.
+ +
+ +2009-06-13  Micah Cowan  <micah@cowan.name>
+ +
+ +      * init.c: Rename setval_internal_wrapper to setval_internal_tilde,
+ +      ensure we don't "replace" the tilde unless it's actually
+ +      present. Clean up some minor GNU style issues.
+ +
+ +2009-06-13  Julien Pichon  <julienpichon7@gmail.com>
+ +
+ +      * init.c: Handle tilde-expansion in wgetrc commands, without
+ +      resorting to setting/unsetting globals to change behavior in one
+ +      call location.
+ +
+ +2009-06-12  Micah Cowan  <micah@cowan.name>
+ +
+ +      * host.c: Include <sys/types.h> before <sys/socket.h>. Not
+ +      required by POSIX any more, but some older systems (such as
+ +      FreeBSD 4.1) still need it, and it doesn't seem like it could
+ +      hurt...
+ +
+ +      * build_info.c (library): Handle "https" as a feature in its own
+ +      right, apart from "gnutls" and "openssl".
+ +
+ +      * host.c: Declare h_errno if no declaration is provided. Idea
+ +      thanks to Maciej W. Rozycki.
+ +
+ +2009-06-11  Xin Zou  <zouxin2008@gmail.com>   
+ +      
+ +      * http.c (gethttp): Fix some memory leaks.
+ +      
+ +2009-06-11  Micah Cowan  <micah@cowan.name>
+ +
+ +      * http.c (http_atotm): Handle potential for setlocale's return
+ +      value to be static storage. Thanks to Benjamin Wolsey
+ +      <bwy@benjaminwolsey.de>.
+ +
+ +      * sysdep.h: Need NAMESPACE_TWEAKS on non-Linux glibc-based
+ +      systems, too. Thanks to Robert Millan.
+ +
+ +2009-05-28  Steven Schubiger  <stsc@member.fsf.org>
+ +
+ +      * ftp.c (ftp_get_listing): Update the "listing file" 
+ +      string after calling ftp_loop_internal().
+ +
+ +2009-05-27  Steven Schubiger  <stsc@member.fsf.org>
+ +
+ +      * ftp.c (ftp_get_listing): Duplicate the "listing file"
+ +      string to avoid memory corruption when FOPEN_EXCL_ERR is
+ +      encountered.
+ +
+ +2009-05-17  Steven Schubiger  <stsc@member.fsf.org>
+ +
+ +      * progress.c (eta_to_human_short): Fix the remaining hours
+ +      to be displayed. Spotted by Tadeu Martins (#26411).
+ +
+ +2009-04-24  Micah Cowan  <micah@cowan.name>
+ +
+ +      * hash.c: Change stdint.h inclusion to use HAVE_STDINT_H, not C99
+ +      check.
+ +
+ +      * connect.c: stdint.h inclusion added.
+ +
+ +      Thanks to Markus Duft <mduft@gentoo.org> for a similar patch.
+ +      
+ +2009-04-20  Micah Cowan  <micah@cowan.name>
+ +
+ +      * Makefile.am (version.c): Fix unportable use of "echo -n".
+ +
+ +2009-04-13  Steven Schubiger  <stsc@member.fsf.org>
+ +
+ +      * ftp.c (ftp_retrieve_list): Move the duplicated code that
+ +      determines the local file to a function.
+ +      
+ +      * http.c (http_loop): Likewise.
+ +
+ +      * retr.c (set_local_file): New function.
+ +
+ +2009-04-11  Steven Schubiger  <stsc@member.fsf.org>
+ +
+ +      * init.c (initialize): Run a custom SYSTEM_WGETRC when 
+ +      provided as an environment variable.
+ +
+ +2009-02-27  Gisle Vanem  <gvanem@broadpark.no>
+ +
+ +      * main.c (main): "freopen (NULL,.." causes an assertion in MSVC
+ +      debug-mode.  I.e. NULL isn't legal. But the "CONOUT$" device works
+ +      fine.
+ +
+ +2009-02-27  Steven Schubiger  <stsc@member.fsf.org>
+ +
+ +      * ftp.c (ftp_loop_internal): Don't claim for FTP retrievals
+ +      when writing to standard output either that the document
+ +      has been saved. Addresses bug #20520 again.
+ +
+ +2009-02-21  Steven Schubiger  <stsc@member.fsf.org>
+ +
+ +      * http.c (http_loop): When a document is written to 
+ +      standard output, don't claim it has been saved to a file.
+ +      Addresses bug #20520.
+ +
+ +2009-02-18  Steven Schubiger  <stsc@members.fsf.org>
+ +
+ +      * recur.h: Remove the dangling declaration for recursive_cleanup().
+ +
+ +2009-02-01  Gerardo E. Gidoni  <gerel@gnu.org>
+ +
+ +      * main.c, recur.c, recur.h, res.c, retr.c, retr.h: restructured code to
+ +      avoid multiple 'url_parse' calls.
+ +
   2008-11-13  Micah Cowan  <micah@cowan.name>
   
         * http.c (gethttp): Don't do anything when content-length >= our
         requested range.
   
+ +2008-11-27  Saint Xavier  <wget@sxav.eu>
+ +
+ +      * http.c (gethttp): Move authentication code before filename
+ +      allocation avoiding fallbacking on default filename because
+ +      "Content-Disposition" header wasn't present before authentcation
+ +      has been completed. Fixes bug #24862.
+ +
   2008-11-16  Steven Schubiger  <stsc@members.fsf.org>
   
         * main.c: Declare and initialize the numurls counter.
@@@ -256,11 -120,27 +256,27 @@@
         * init.c (cleanup): Free the memory associated with the base
         option (when DEBUG_MALLOC is defined).
   
+ 2008-07-02  Xavier Saint  <wget@sxav.eu>
+ 
+       * iri.c, iri.h  : New function idn_decode() to decode ASCII
+       encoded hostname to the locale.
+ 
+       * host.c : Show hostname to be resolved both in locale and
+       ASCII encoded.
+ 
   2008-06-28  Steven Schubiger  <stsc@members.fsf.org>
   
         * retr.c (retrieve_from_file): Allow for reading the links from
         an external file (HTTP/FTP).
   
+ 2008-06-26  Xavier Saint  <wget@sxav.eu>
+ 
+       * iri.c, iri.h : New functions locale_to_utf8() and
+       idn_encode() adding basic capabilities of IRI/IDN.
+ 
+       * url.c : Convert URLs from locale to UTF-8 allowing a basic
+       support of IRI/IDN
+ 
   2008-06-25  Steven Schubiger  <stsc@members.fsf.org>
   
         * ftp.c (getftp): When spidering a FTP URL, emit a diagnostic
@@@ -285,7 -165,7 +301,7 @@@
   
         * http.c: Make -nv --spider include the file's name when it
         exists.
-       
+ 
   2008-06-22  Micah Cowan  <micah@cowan.name>
   
         * Makefile.am (version.c): Fixed version string invocation so it
@@@ -293,12 -173,57 +309,57 @@@
         string vars pointers-to-const, and moved line lengths
         below 80 (in Makefile.am, not in version.c).
   
+ 2008-06-19  Xavier Saint  <wget@sxav.eu>
+ 
+       * iri.c, iri.h : New function check_encoding_name() as
+       a preliminary encoding name check.
+ 
+       * main.c, iri.c : Make use of check_encoding_name().
+ 
+ 2008-06-19  Xavier Saint  <wget@sxav.eu>
+ 
+       * iri.c : Include missing stringprep.h file and add a
+       cast.
+ 
+       * init.c : set a default initial value for opt.enable_iri,
+       opt.locale and opt.encoding_remote.
+ 
+ 2008-06-19  Xavier Saint  <wget@sxav.eu>
+ 
+       * iri.c, iri.h : Add a new function find_locale() to find
+       out the local system encoding.
+ 
+       * main.c : Make use of find_locale().
+ 
+ 2008-06-19  Xavier Saint  <wget@sxav.eu>
+ 
+       * html-url.c : Add "content-type" meta tag parsing for
+       retrieving page encoding.
+ 
+       * iri.h : Make no-op version of parse_charset() return
+       NULL.
+ 
   2008-06-16  Micah Cowan  <micah@cowan.name>
   
         * http.c (http_loop): When hstat.len is higher than the
         successfully completed content's length, but it's because we
         _set_ it that way, don't abort.
   
+ 2008-06-14  Xavier Saint  <wget@sxav.eu>
+ 
+       * iri.c, iri.h : New files.
+ 
+       * Makefile.am : Add files iri.h and conditional iri.c.
+ 
+       * build_info.c : Add compiled feature "iri".
+ 
+       * http.c : include iri.h and parse charset from Content-Type
+       header.
+ 
+       * init.c, main.c, options.h : if an options isn't supported
+       at compiled time, don't get rid off it and show a dummy
+       message instead if they are used.
+ 
   2008-06-13  Micah Cowan  <micah@cowan.name>
   
         * build_info.c: ENABLE_NTLM, not HAVE_NTLM; distinguish OpenSSL
@@@ -342,11 -267,11 +403,11 @@@
         default.
   
   2008-05-17  Kenny Parnell  <k.parnell@gmail.com>
-       
+ 
         (cmd_spec_prefer_family): Initialize prefer_family to prefer_none.
   
   2008-05-17  Micah Cowan  <micah@cowan.name>
-       
+ 
         * main.c (main): Handle Ctrl-D on command-line.
   
   2008-05-15  Steven Schubiger  <schubiger@gmail.com>
@@@ -385,7 -310,7 +446,7 @@@
   
         * options.h: Add an according boolean member to the options
         struct.
-       
+ 
         * sysdep.h: Comment the defines __EXTENSIONS__ and _GNU_SOURCE
         out, because they're now defined independently by config.h.
   
diff --combined src/Makefile.am

index 1ced6a90a491fe00b9928c0945e2abc072c12507,ab830ba080a828041ed65d92a7371901ade35618..58e9b545bd1ef218a1895db9e2a430769f2faab9
--- 1/src/Makefile.am
--- 2/src/Makefile.am
+++ b/src/Makefile.am
@@@ -30,18 -30,22 +30,22 @@@
   # Version: @VERSION@
   #
   
+ if IRI_IS_ENABLED
+ IRI_OBJ = iri.c
+ endif
+ 
   # The following line is losing on some versions of make!
   DEFS     = @DEFS@ -DSYSTEM_WGETRC=\"$(sysconfdir)/wgetrc\" -DLOCALEDIR=\"$(localedir)\"
   LIBS     = @LIBSSL@ @LIBGNUTLS@ @LIBINTL@ @LIBS@
   
   bin_PROGRAMS = wget
   wget_SOURCES = build_info.c cmpt.c connect.c convert.c cookies.c ftp.c    \
- -             css.l css-url.c \
+ +             css.l css-url.c css-tokens.h \
                ftp-basic.c ftp-ls.c hash.c host.c html-parse.c html-url.c \
                http.c init.c log.c main.c netrc.c progress.c ptimer.c     \
                recur.c res.c retr.c snprintf.c spider.c url.c             \
-              utils.c                                    \
-              css-url.h connect.h convert.h cookies.h \
+              utils.c $(IRI_OBJ)                                         \
+              css-url.h connect.h convert.h cookies.h                    \
                ftp.h gen-md5.h hash.h host.h html-parse.h html-url.h      \
                http.h http-ntlm.h init.h log.h mswindows.h netrc.h        \
                options.h progress.h ptimer.h recur.h res.h retr.h         \
@@@ -62,7 -66,7 +66,7 @@@ version.c:  $(wget_SOURCES) $(LDADD) $(
         echo '/* version.c */' > $@
         echo '/* Autogenerated by Makefile - DO NOT EDIT */' >> $@
         echo '' >> $@
- -      echo -n 'const char *version_string = "@VERSION@"' >> $@
+ +      echo 'const char *version_string = "@VERSION@"' >> $@
         -hg log -r . --template='" ({node|short})"\n' 2>/dev/null >> $@
         echo ';' >> $@
         echo 'const char *compilation_string = "'$(COMPILE)'";' \
diff --combined src/build_info.c

index f60c76ee7f8a76d4561e5340ede902a6c3c81ca8,532dccaf76f6430b44e24e1706c85d549f4e4dd8..89ae74f8dcab05365fbdd5c336f5be870d5dc616
--- 1/src/build_info.c
--- 2/src/build_info.c
+++ b/src/build_info.c
@@@ -80,12 -80,6 +80,12 @@@ const char* (compiled_features[]) 
     "-md5",
   #endif
   
+ +#ifdef HAVE_SSL
+ +  "+https",
+ +#else
+ +  "-https",
+ +#endif
+ +
   #ifdef HAVE_LIBGNUTLS
     "+gnutls",
   #else
@@@ -103,6 -97,13 +103,13 @@@
   #else
     "-gettext",
   #endif
+ 
+ #ifdef ENABLE_IRI
+   "+iri",
+ #else
+   "-iri",
+ #endif
+ 
     /* sentinel value */
     NULL
   };
diff --combined src/connect.c

index f46f11c44461017a2dce3549622268f7fc0fea70,41258d26fca07176243b1b498a5afbba34cdc299..0a54c852a89fa804fc212d2c4f1b843800cb704f
--- 1/src/connect.c
--- 2/src/connect.c
+++ b/src/connect.c
@@@ -59,11 -59,6 +59,11 @@@ as that of the covered work.  *
   #include "connect.h"
   #include "hash.h"
   
+ +/* Apparently needed for Interix: */
+ +#ifdef HAVE_STDINT_H
+ +# include <stdint.h>
+ +#endif
+ +
   /* Define sockaddr_storage where unavailable (presumably on IPv4-only
      hosts).  */
   
@@@ -271,9 -266,25 +271,25 @@@ connect_to_ip (const ip_address *ip, in
     if (print)
       {
         const char *txt_addr = print_address (ip);
-       if (print && 0 != strcmp (print, txt_addr))
-         logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
-                    escnonprint_uri (print), txt_addr, port);
+       if (0 != strcmp (print, txt_addr))
+         {
+                                 char *str = NULL, *name;
+ 
+           if (opt.enable_iri && (name = idn_decode ((char *) print)) != NULL)
+             {
+               int len = strlen (print) + strlen (name) + 4;
+               str = xmalloc (len);
+               snprintf (str, len, "%s (%s)", name, print);
+               str[len-1] = '\0';
+               xfree (name);
+             }
+ 
+           logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
+                      str ? str : escnonprint_uri (print), txt_addr, port);
+ 
+                                       if (str)
+                                         xfree (str);
+         }
         else
           logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
       }
diff --combined src/host.c

index 7b8c418963bd6f329ea1409698b73efd8dd51f99,bbf40222aabde4aca00520a3c0681d8266f86da3..b9aaebb45c713e43e31f97805421bc65218143ae
--- 1/src/host.c
--- 2/src/host.c
+++ b/src/host.c
@@@ -36,7 -36,6 +36,7 @@@ as that of the covered work.  *
   #include <assert.h>
   
   #ifndef WINDOWS
+ +# include <sys/types.h>
   # include <sys/socket.h>
   # include <netinet/in.h>
   # ifndef __BEOS__
@@@ -59,11 -58,6 +59,11 @@@
   # define NO_ADDRESS NO_DATA
   #endif
   
+ +#if !HAVE_DECL_H_ERRNO
+ +extern int h_errno;
+ +#endif
+ +
+ +
   /* Lists of IP addresses that result from running DNS queries.  See
      lookup_host for details.  */
   
@@@ -718,8 -712,24 +718,24 @@@ lookup_host (const char *host, int flag
     /* No luck with the cache; resolve HOST. */
   
     if (!silent && !numeric_address)
-     logprintf (LOG_VERBOSE, _("Resolving %s... "), 
-                quotearg_style (escape_quoting_style, host));
+     {
+       char *str = NULL, *name;
+ 
+       if (opt.enable_iri && (name = idn_decode ((char *) host)) != NULL)
+         {
+           int len = strlen (host) + strlen (name) + 4;
+           str = xmalloc (len);
+           snprintf (str, len, "%s (%s)", name, host);
+           str[len-1] = '\0';
+           xfree (name);
+         }
+ 
+       logprintf (LOG_VERBOSE, _("Resolving %s... "),
+                  quotearg_style (escape_quoting_style, str ? str : host));
+ 
+       if (str)
+         xfree (str);
+     }
   
   #ifdef ENABLE_IPV6
     {
diff --combined src/http.c

index 50f0c6439c776e50a4a75c25f8beb47de4cfcfc5,9ed226cb9d1062ad0d65c0642d28790f54853788..ae89c46d642fb5e5c6807e88576b5f94422730e4
--- 1/src/http.c
--- 2/src/http.c
+++ b/src/http.c
@@@ -1,6 -1,6 +1,6 @@@
   /* HTTP support.
      Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- -   2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ +   2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   
   This file is part of GNU Wget.
   
@@@ -1366,7 -1366,8 +1366,8 @@@ free_hstat (struct http_stat *hs
      If PROXY is non-NULL, the connection will be made to the proxy
      server, and u->url will be requested.  */
   static uerr_t
- gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
+ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
+          struct iri *iri)
   {
     struct request *req;
   
@@@ -1815,101 -1816,6 +1816,101 @@@
         print_server_response (resp, "  ");
       }
   
+ +  /* Check for keep-alive related responses. */
+ +  if (!inhibit_keep_alive && contlen != -1)
+ +    {
+ +      if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
+ +        keep_alive = true;
+ +      else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
+ +        {
+ +          if (0 == strcasecmp (hdrval, "Keep-Alive"))
+ +            keep_alive = true;
+ +        }
+ +    }
+ +
+ +  if (keep_alive)
+ +    /* The server has promised that it will not close the connection
+ +       when we're done.  This means that we can register it.  */
+ +    register_persistent (conn->host, conn->port, sock, using_ssl);
+ +
+ +  if (statcode == HTTP_STATUS_UNAUTHORIZED)
+ +    {
+ +      /* Authorization is required.  */
+ +      if (keep_alive && !head_only && skip_short_body (sock, contlen))
+ +        CLOSE_FINISH (sock);
+ +      else
+ +        CLOSE_INVALIDATE (sock);
+ +      pconn.authorized = false;
+ +      if (!auth_finished && (user && passwd))
+ +        {
+ +          /* IIS sends multiple copies of WWW-Authenticate, one with
+ +             the value "negotiate", and other(s) with data.  Loop over
+ +             all the occurrences and pick the one we recognize.  */
+ +          int wapos;
+ +          const char *wabeg, *waend;
+ +          char *www_authenticate = NULL;
+ +          for (wapos = 0;
+ +               (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
+ +                                            &wabeg, &waend)) != -1;
+ +               ++wapos)
+ +            if (known_authentication_scheme_p (wabeg, waend))
+ +              {
+ +                BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
+ +                break;
+ +              }
+ +
+ +          if (!www_authenticate)
+ +            {
+ +              /* If the authentication header is missing or
+ +                 unrecognized, there's no sense in retrying.  */
+ +              logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
+ +            }
+ +          else if (!basic_auth_finished
+ +                   || !BEGINS_WITH (www_authenticate, "Basic"))
+ +            {
+ +              char *pth;
+ +              pth = url_full_path (u);
+ +              request_set_header (req, "Authorization",
+ +                                  create_authorization_line (www_authenticate,
+ +                                                             user, passwd,
+ +                                                             request_method (req),
+ +                                                             pth,
+ +                                                             &auth_finished),
+ +                                  rel_value);
+ +              if (BEGINS_WITH (www_authenticate, "NTLM"))
+ +                ntlm_seen = true;
+ +              else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
+ +                {
+ +                  /* Need to register this host as using basic auth,
+ +                   * so we automatically send creds next time. */
+ +                  register_basic_auth_host (u->host);
+ +                }
+ +              xfree (pth);
+ +              xfree_null (message);
+ +              resp_free (resp);
+ +              xfree (head);
+ +              goto retry_with_auth;
+ +            }
+ +          else
+ +            {
+ +              /* We already did Basic auth, and it failed. Gotta
+ +               * give up. */
+ +            }
+ +        }
+ +      logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
+ +      request_free (req);
+ +      xfree_null (message);
+ +      resp_free (resp);
+ +      xfree (head);
+ +      return AUTHFAILED;
+ +    }
+ +  else /* statcode != HTTP_STATUS_UNAUTHORIZED */
+ +    {
+ +      /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
+ +      if (ntlm_seen)
+ +        pconn.authorized = true;
+ +    }
+ +
     /* Determine the local filename if needed. Notice that if -O is used 
      * hstat.local_file is set by http_loop to the argument of -O. */
     if (!hs->local_file)
@@@ -1925,7 -1831,7 +1926,7 @@@
             hs->local_file = url_file_name (u);
           }
       }
-   
+ 
     /* TODO: perform this check only once. */
     if (!hs->existence_checked && file_exists_p (hs->local_file))
       {
@@@ -1944,8 -1850,6 +1945,8 @@@ File %s already there; not retrieving.\
             if (has_html_suffix_p (hs->local_file))
               *dt |= TEXTHTML;
   
+ +          xfree (head);
+ +          xfree_null (message);
             return RETRUNNEEDED;
           }
         else if (!ALLOW_CLOBBER)
@@@ -1996,7 -1900,7 +1997,7 @@@
                 local_dot_orig_file_exists = true;
                 local_filename = filename_plus_orig_suffix;
               }
-         }      
+         }
   
         if (!local_dot_orig_file_exists)
           /* Couldn't stat() <file>.orig, so try to stat() <file>. */
@@@ -2044,6 -1948,93 +2045,6 @@@
           contlen = parsed;
       }
   
- -  /* Check for keep-alive related responses. */
- -  if (!inhibit_keep_alive && contlen != -1)
- -    {
- -      if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
- -        keep_alive = true;
- -      else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
- -        {
- -          if (0 == strcasecmp (hdrval, "Keep-Alive"))
- -            keep_alive = true;
- -        }
- -    }
- -  if (keep_alive)
- -    /* The server has promised that it will not close the connection
- -       when we're done.  This means that we can register it.  */
- -    register_persistent (conn->host, conn->port, sock, using_ssl);
- -
- -  if (statcode == HTTP_STATUS_UNAUTHORIZED)
- -    {
- -      /* Authorization is required.  */
- -      if (keep_alive && !head_only && skip_short_body (sock, contlen))
- -        CLOSE_FINISH (sock);
- -      else
- -        CLOSE_INVALIDATE (sock);
- -      pconn.authorized = false;
- -      if (!auth_finished && (user && passwd))
- -        {
- -          /* IIS sends multiple copies of WWW-Authenticate, one with
- -             the value "negotiate", and other(s) with data.  Loop over
- -             all the occurrences and pick the one we recognize.  */
- -          int wapos;
- -          const char *wabeg, *waend;
- -          char *www_authenticate = NULL;
- -          for (wapos = 0;
- -               (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
- -                                            &wabeg, &waend)) != -1;
- -               ++wapos)
- -            if (known_authentication_scheme_p (wabeg, waend))
- -              {
- -                BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
- -                break;
- -              }
- -
- -          if (!www_authenticate)
- -            {
- -              /* If the authentication header is missing or
- -                 unrecognized, there's no sense in retrying.  */
- -              logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
- -            }
- -          else if (!basic_auth_finished
- -                   || !BEGINS_WITH (www_authenticate, "Basic"))
- -            {
- -              char *pth;
- -              pth = url_full_path (u);
- -              request_set_header (req, "Authorization",
- -                                  create_authorization_line (www_authenticate,
- -                                                             user, passwd,
- -                                                             request_method (req),
- -                                                             pth,
- -                                                             &auth_finished),
- -                                  rel_value);
- -              if (BEGINS_WITH (www_authenticate, "NTLM"))
- -                ntlm_seen = true;
- -              else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
- -                {
- -                  /* Need to register this host as using basic auth,
- -                   * so we automatically send creds next time. */
- -                  register_basic_auth_host (u->host);
- -                }
- -              xfree (pth);
- -              goto retry_with_auth;
- -            }
- -          else
- -            {
- -              /* We already did Basic auth, and it failed. Gotta
- -               * give up. */
- -            }
- -        }
- -      logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
- -      request_free (req);
- -      return AUTHFAILED;
- -    }
- -  else /* statcode != HTTP_STATUS_UNAUTHORIZED */
- -    {
- -      /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
- -      if (ntlm_seen)
- -        pconn.authorized = true;
- -    }
     request_free (req);
   
     hs->statcode = statcode;
@@@ -2061,9 -2052,20 +2062,20 @@@
         char *tmp = strchr (type, ';');
         if (tmp)
           {
+           /* sXXXav: only needed if IRI support is enabled */
+           char *tmp2 = tmp + 1;
+ 
             while (tmp > type && c_isspace (tmp[-1]))
               --tmp;
             *tmp = '\0';
+ 
+           /* Try to get remote encoding if needed */
+           if (opt.enable_iri && !opt.encoding_remote)
+             {
+               tmp = parse_charset (tmp2);
+               if (tmp)
+                 set_content_encoding (iri, tmp);
+             }
           }
       }
     hs->newloc = resp_header_strdup (resp, "Location");
@@@ -2124,7 -2126,6 +2136,7 @@@
             else
               CLOSE_INVALIDATE (sock);
             xfree_null (type);
+ +          xfree (head);
             return NEWLOCATION;
           }
       }
@@@ -2180,7 -2181,6 +2192,7 @@@
         xfree_null (type);
         CLOSE_INVALIDATE (sock);        /* would be CLOSE_FINISH, but there
                                      might be more bytes in the body. */
+ +      xfree (head);
         return RETRUNNEEDED;
       }
     if ((contrange != 0 && contrange != hs->restval)
@@@ -2190,7 -2190,6 +2202,7 @@@
            Bail out.  */
         xfree_null (type);
         CLOSE_INVALIDATE (sock);
+ +      xfree (head);
         return RANGEERR;
       }
     if (contlen == -1)
@@@ -2254,7 -2253,6 +2266,7 @@@
           CLOSE_FINISH (sock);
         else
           CLOSE_INVALIDATE (sock);
+ +      xfree (head);
         return RETRFINISHED;
       }
   
@@@ -2281,7 -2279,6 +2293,7 @@@
                            _("%s has sprung into existence.\n"),
                            hs->local_file);
                 CLOSE_INVALIDATE (sock);
+ +              xfree (head);
                 return FOPEN_EXCL_ERR;
               }
           }
@@@ -2289,7 -2286,6 +2301,7 @@@
           {
             logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
             CLOSE_INVALIDATE (sock);
+ +          xfree (head);
             return FOPENERR;
           }
       }
@@@ -2348,7 -2344,7 +2360,7 @@@
      retried, and retried, and retried, and...  */
   uerr_t
   http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
-            int *dt, struct url *proxy)
+            int *dt, struct url *proxy, struct iri *iri)
   {
     int count;
     bool got_head = false;         /* used for time-stamping and filename detection */
@@@ -2359,17 -2355,16 +2371,17 @@@
     uerr_t err, ret = TRYLIMEXC;
     time_t tmr = -1;               /* remote time-stamp */
     struct http_stat hstat;        /* HTTP status */
-   struct_stat st;  
+   struct_stat st;
     bool send_head_first = true;
+ +  char *file_name;
   
     /* Assert that no value for *LOCAL_FILE was passed. */
     assert (local_file == NULL || *local_file == NULL);
-   
+ 
     /* Set LOCAL_FILE parameter. */
     if (local_file && opt.output_document)
       *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
-   
+ 
     /* Reset NEWLOC parameter. */
     *newloc = NULL;
   
@@@ -2406,7 -2401,7 +2418,7 @@@
            retrieve the file. But if the output_document was given, then this
            test was already done and the file didn't exist. Hence the !opt.output_document */
         logprintf (LOG_VERBOSE, _("\
- File %s already there; not retrieving.\n\n"), 
+ File %s already there; not retrieving.\n\n"),
                    quote (hstat.local_file));
         /* If the file is there, we suppose it's retrieved OK.  */
         *dt |= RETROKF;
@@@ -2422,10 -2417,10 +2434,10 @@@
   
     /* Reset the counter. */
     count = 0;
-   
+ 
     /* Reset the document type. */
     *dt = 0;
-   
+ 
     /* Skip preliminary HEAD request if we're not in spider mode AND
      * if -O was given or HTTP Content-Disposition support is disabled. */
     if (!opt.spider
@@@ -2434,23 -2429,21 +2446,23 @@@
   
     /* Send preliminary HEAD request if -N is given and we have an existing 
      * destination file. */
-   if (opt.timestamping 
+ +  file_name = url_file_name (u);
+   if (opt.timestamping
         && !opt.content_disposition
- -      && file_exists_p (url_file_name (u)))
+ +      && file_exists_p (file_name))
       send_head_first = true;
- -
+ +  xfree (file_name);
+ +  
     /* THE loop */
     do
       {
         /* Increment the pass counter.  */
         ++count;
         sleep_between_retrievals (count);
-       
+ 
         /* Get the current time string.  */
         tms = datetime_str (time (NULL));
-       
+ 
         if (opt.spider && !got_head)
           logprintf (LOG_VERBOSE, _("\
   Spider mode enabled. Check if remote file exists.\n"));
@@@ -2459,20 -2452,20 +2471,20 @@@
         if (opt.verbose)
           {
             char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
-           
-           if (count > 1) 
+ 
+           if (count > 1)
               {
                 char tmp[256];
                 sprintf (tmp, _("(try:%2d)"), count);
                 logprintf (LOG_NOTQUIET, "--%s--  %s  %s\n",
                            tms, tmp, hurl);
               }
-           else 
+           else
               {
                 logprintf (LOG_NOTQUIET, "--%s--  %s\n",
                            tms, hurl);
               }
-           
+ 
   #ifdef WINDOWS
             ws_changetitle (hurl);
   #endif
@@@ -2482,7 -2475,7 +2494,7 @@@
         /* Default document type is empty.  However, if spider mode is
            on or time-stamping is employed, HEAD_ONLY commands is
            encoded within *dt.  */
-       if (send_head_first && !got_head) 
+       if (send_head_first && !got_head)
           *dt |= HEAD_ONLY;
         else
           *dt &= ~HEAD_ONLY;
@@@ -2515,11 -2508,11 +2527,11 @@@
           *dt &= ~SEND_NOCACHE;
   
         /* Try fetching the document, or at least its head.  */
-       err = gethttp (u, &hstat, dt, proxy);
+       err = gethttp (u, &hstat, dt, proxy, iri);
   
         /* Time?  */
         tms = datetime_str (time (NULL));
-       
+ 
         /* Get the new location (with or without the redirection).  */
         if (hstat.newloc)
           *newloc = xstrdup (hstat.newloc);
@@@ -2558,7 -2551,7 +2570,7 @@@
                            hstat.statcode);
                 ret = WRONGCODE;
               }
-           else 
+           else
               {
                 ret = NEWLOCATION;
               }
@@@ -2574,7 -2567,7 +2586,7 @@@
             /* All possibilities should have been exhausted.  */
             abort ();
           }
-       
+ 
         if (!(*dt & RETROKF))
           {
             char *hurl = NULL;
@@@ -2593,11 -2586,13 +2605,13 @@@
                 continue;
               }
             /* Maybe we should always keep track of broken links, not just in
-            * spider mode.  */
-           else if (opt.spider)
+            * spider mode.
+            * Don't log error if it was UTF-8 encoded because we will try
+            * once unencoded. */
+           else if (opt.spider && !iri->utf8_encode)
               {
                 /* #### Again: ugly ugly ugly! */
-               if (!hurl) 
+               if (!hurl)
                   hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
                 nonexisting_url (hurl);
                 logprintf (LOG_NOTQUIET, _("\
@@@ -2606,7 -2601,7 +2620,7 @@@ Remote file does not exist -- broken li
             else
               {
                 logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
-                          tms, hstat.statcode, 
+                          tms, hstat.statcode,
                            quotearg_style (escape_quoting_style, hstat.error));
               }
             logputs (LOG_VERBOSE, "\n");
@@@ -2740,8 -2735,16 +2754,8 @@@ Remote file exists.\n\n"))
             && ((hstat.len == hstat.contlen) ||
                 ((hstat.res == 0) && (hstat.contlen == -1))))
           {
- -          /* #### This code repeats in http.c and ftp.c.  Move it to a
- -             function!  */
             const char *fl = NULL;
- -          if (opt.output_document)
- -            {
- -              if (output_stream_regular)
- -                fl = opt.output_document;
- -            }
- -          else
- -            fl = hstat.local_file;
+ +          set_local_file (&fl, hstat.local_file);
             if (fl)
               {
                 time_t newtmr = -1;
@@@ -2765,14 -2768,9 +2779,14 @@@
           {
             if (*dt & RETROKF)
               {
+ +              bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+ +
                 logprintf (LOG_VERBOSE,
- -                         _("%s (%s) - %s saved [%s/%s]\n\n"),
- -                         tms, tmrate, quote (hstat.local_file),
+ +                         write_to_stdout 
+ +                         ? _("%s (%s) - written to stdout %s[%s/%s]\n\n")
+ +                         : _("%s (%s) - %s saved [%s/%s]\n\n"),
+ +                         tms, tmrate,
+ +                         write_to_stdout ? "" : quote (hstat.local_file),
                            number_to_static_string (hstat.len),
                            number_to_static_string (hstat.contlen));
                 logprintf (LOG_NONVERBOSE,
@@@ -2801,14 -2799,9 +2815,14 @@@
               {
                 if (*dt & RETROKF)
                   {
+ +                  bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+ +
                     logprintf (LOG_VERBOSE,
- -                             _("%s (%s) - %s saved [%s]\n\n"),
- -                             tms, tmrate, quote (hstat.local_file),
+ +                             write_to_stdout
+ +                             ? _("%s (%s) - written to stdout %s[%s]\n\n")
+ +                             : _("%s (%s) - %s saved [%s]\n\n"),
+ +                             tms, tmrate, 
+ +                             write_to_stdout ? "" : quote (hstat.local_file),
                                number_to_static_string (hstat.len));
                     logprintf (LOG_NONVERBOSE,
                                "%s URL:%s [%s] -> \"%s\" [%d]\n",
@@@ -2952,7 -2945,6 +2966,7 @@@ http_atotm (const char *time_string
                                      Netscape cookie specification.) */
     };
     const char *oldlocale;
+ +  char savedlocale[256];
     size_t i;
     time_t ret = (time_t) -1;
   
@@@ -2960,16 -2952,6 +2974,16 @@@
        non-English locales, which we work around by temporarily setting
        locale to C before invoking strptime.  */
     oldlocale = setlocale (LC_TIME, NULL);
+ +  if (oldlocale)
+ +    {
+ +      size_t l = strlen (oldlocale);
+ +      if (l >= sizeof savedlocale)
+ +        savedlocale[0] = '\0';
+ +      else
+ +        memcpy (savedlocale, oldlocale, l);
+ +    }
+ +  else savedlocale[0] = '\0';
+ +
     setlocale (LC_TIME, "C");
   
     for (i = 0; i < countof (time_formats); i++)
@@@ -2989,8 -2971,7 +3003,8 @@@
       }
   
     /* Restore the previous locale. */
- -  setlocale (LC_TIME, oldlocale);
+ +  if (savedlocale[0])
+ +    setlocale (LC_TIME, savedlocale);
   
     return ret;
   }
diff --combined src/init.c

index bbe6b585a74926a2a1d9cb0f59e0bd3529a6016b,5ab0862cbcf527c729fbff534382608c817f4450..23f8cb2cfd954271e8f49107ac761a5cb0a69ba3
--- 1/src/init.c
--- 2/src/init.c
+++ b/src/init.c
@@@ -1,6 -1,6 +1,6 @@@
   /* Reading/parsing the initialization file.
      Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- -   2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ +   2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   
   This file is part of GNU Wget.
   
@@@ -58,6 -58,11 +58,6 @@@ as that of the covered work.  *
   #include "test.h"
   #endif
   
- -/* We want tilde expansion enabled only when reading `.wgetrc' lines;
- -   otherwise, it will be performed by the shell.  This variable will
- -   be set by the wgetrc-reading function.  */
- -
- -static bool enable_tilde_expansion;
   
   
   #define CMD_DECLARE(func) static bool func (const char *, const char *, void *)
@@@ -177,9 -182,11 +177,11 @@@ static const struct 
     { "inet6only",        &opt.ipv6_only,         cmd_boolean },
   #endif
     { "input",            &opt.input_filename,    cmd_file },
+   { "iri",              &opt.enable_iri,        cmd_boolean },
     { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
     { "limitrate",        &opt.limit_rate,        cmd_bytes },
     { "loadcookies",      &opt.cookies_input,     cmd_file },
+   { "locale",           &opt.locale,            cmd_string },
     { "logfile",          &opt.lfilename,         cmd_file },
     { "login",            &opt.ftp_user,          cmd_string },/* deprecated*/
     { "maxredirect",      &opt.max_redirect,      cmd_number },
@@@ -219,6 -226,7 +221,7 @@@
     { "referer",          &opt.referer,           cmd_string },
     { "reject",           &opt.rejects,           cmd_vector },
     { "relativeonly",     &opt.relative_only,     cmd_boolean },
+   { "remoteencoding",   &opt.encoding_remote,   cmd_string },
     { "removelisting",    &opt.remove_listing,    cmd_boolean },
     { "restrictfilenames", NULL,                  cmd_spec_restrict_file_names },
     { "retrsymlinks",     &opt.retr_symlinks,     cmd_boolean },
@@@ -328,6 -336,14 +331,14 @@@ defaults (void
     opt.max_redirect = 20;
   
     opt.waitretry = 10;
+ 
+ #ifdef ENABLE_IRI
+   opt.enable_iri = true;
+ #else
+   opt.enable_iri = false;
+ #endif
+   opt.locale = NULL;
+   opt.encoding_remote = NULL;
   }
   \f
   /* Return the user's home directory (strdup-ed), or NULL if none is
@@@ -468,7 -484,6 +479,7 @@@ enum parse_line 
   
   static enum parse_line parse_line (const char *, char **, char **, int *);
   static bool setval_internal (int, const char *, const char *);
+ +static bool setval_internal_tilde (int, const char *, const char *);
   
   /* Initialize variables from a wgetrc file.  Returns zero (failure) if
      there were errors in the file.  */
@@@ -488,6 -503,7 +499,6 @@@ run_wgetrc (const char *file
                  file, strerror (errno));
         return true;                      /* not a fatal error */
       }
- -  enable_tilde_expansion = true;
     ln = 1;
     while ((line = read_whole_line (fp)) != NULL)
       {
@@@ -499,7 -515,7 +510,7 @@@
           {
           case line_ok:
             /* If everything is OK, set the value.  */
- -          if (!setval_internal (comind, com, val))
+ +          if (!setval_internal_tilde (comind, com, val))
               {
                 fprintf (stderr, _("%s: Error in %s at line %d.\n"),
                          exec_name, file, ln);
@@@ -526,6 -542,7 +537,6 @@@
         xfree (line);
         ++ln;
       }
- -  enable_tilde_expansion = false;
     fclose (fp);
   
     return errcnt == 0;
@@@ -536,20 -553,15 +547,20 @@@
   void
   initialize (void)
   {
- -  char *file;
+ +  char *file, *env_sysrc;
     int ok = true;
   
     /* Load the hard-coded defaults.  */
     defaults ();
- -
- -  /* If SYSTEM_WGETRC is defined, use it.  */
+ +  
+ +  /* Run a non-standard system rc file when the according environment 
+ +     variable has been set. For internal testing purposes only!  */
+ +  env_sysrc = getenv ("SYSTEM_WGETRC");
+ +  if (env_sysrc && file_exists_p (env_sysrc))
+ +    ok &= run_wgetrc (env_sysrc);
+ +  /* Otherwise, if SYSTEM_WGETRC is defined, use it.  */
   #ifdef SYSTEM_WGETRC
- -  if (file_exists_p (SYSTEM_WGETRC))
+ +  else if (file_exists_p (SYSTEM_WGETRC))
       ok &= run_wgetrc (SYSTEM_WGETRC);
   #endif
     /* Override it with your own, if one exists.  */
@@@ -662,12 -674,6 +673,12 @@@ parse_line (const char *line, char **co
     return line_ok;
   }
   
+ +#if defined(WINDOWS) || defined(MSDOS)
+ +# define ISSEP(c) ((c) == '/' || (c) == '\\')
+ +#else
+ +# define ISSEP(c) ((c) == '/')
+ +#endif
+ +
   /* Run commands[comind].action. */
   
   static bool
@@@ -678,37 -684,6 +689,37 @@@ setval_internal (int comind, const cha
     return commands[comind].action (com, val, commands[comind].place);
   }
   
+ +static bool
+ +setval_internal_tilde (int comind, const char *com, const char *val)
+ +{
+ +  bool ret;
+ +  int homelen;
+ +  char *home;
+ +  char **pstring;
+ +  ret = setval_internal (comind, com, val);
+ +
+ +  /* We make tilde expansion for cmd_file and cmd_directory */
+ +  if (((commands[comind].action == cmd_file) ||
+ +       (commands[comind].action == cmd_directory))
+ +      && ret && (*val == '~' && ISSEP (val[1])))
+ +    {
+ +      pstring = commands[comind].place;
+ +      home = home_dir ();
+ +      if (home)
+ +      {
+ +        homelen = strlen (home);
+ +        while (homelen && ISSEP (home[homelen - 1]))
+ +            home[--homelen] = '\0';
+ +
+ +        /* Skip the leading "~/". */
+ +        for (++val; ISSEP (*val); val++)
+ +          ;
+ +        *pstring = concat_strings (home, "/", val, (char *)0);
+ +      }
+ +    }
+ +  return ret;
+ +}
+ +
   /* Run command COM with value VAL.  If running the command produces an
      error, report the error and exit.
   
@@@ -844,6 -819,11 +855,6 @@@ cmd_string (const char *com, const cha
     return true;
   }
   
- -#if defined(WINDOWS) || defined(MSDOS)
- -# define ISSEP(c) ((c) == '/' || (c) == '\\')
- -#else
- -# define ISSEP(c) ((c) == '/')
- -#endif
   
   /* Like the above, but handles tilde-expansion when reading a user's
      `.wgetrc'.  In that case, and if VAL begins with `~', the tilde
@@@ -857,7 -837,28 +868,7 @@@ cmd_file (const char *com, const char *
   
     /* #### If VAL is empty, perhaps should set *PLACE to NULL.  */
   
- -  if (!enable_tilde_expansion || !(*val == '~' && ISSEP (val[1])))
- -    {
- -    noexpand:
- -      *pstring = xstrdup (val);
- -    }
- -  else
- -    {
- -      int homelen;
- -      char *home = home_dir ();
- -      if (!home)
- -        goto noexpand;
- -
- -      homelen = strlen (home);
- -      while (homelen && ISSEP (home[homelen - 1]))
- -        home[--homelen] = '\0';
- -
- -      /* Skip the leading "~/". */
- -      for (++val; ISSEP (*val); val++)
- -        ;
- -
- -      *pstring = concat_strings (home, "/", val, (char *) 0);
- -    }
+ +  *pstring = xstrdup (val);
   
   #if defined(WINDOWS) || defined(MSDOS)
     /* Convert "\" to "/". */
diff --combined src/main.c

index b8039d6b16b1526a31423fd0c6e0a72ce089f674,a2d408888e816ef7b0038caa949e58f14c319ffc..69df08a73d443c1f8192a88988c67422be1fd4a6
--- 1/src/main.c
--- 2/src/main.c
+++ b/src/main.c
@@@ -1,6 -1,6 +1,6 @@@
   /* Command line parsing.
      Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- -   2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ +   2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   
   This file is part of GNU Wget.
   
@@@ -202,10 -202,12 +202,12 @@@ static struct cmdline_option option_dat
       { "inet6-only", '6', OPT_BOOLEAN, "inet6only", -1 },
   #endif
       { "input-file", 'i', OPT_VALUE, "input", -1 },
+     { "iri", 0, OPT_BOOLEAN, "iri", -1 },
       { "keep-session-cookies", 0, OPT_BOOLEAN, "keepsessioncookies", -1 },
       { "level", 'l', OPT_VALUE, "reclevel", -1 },
       { "limit-rate", 0, OPT_VALUE, "limitrate", -1 },
       { "load-cookies", 0, OPT_VALUE, "loadcookies", -1 },
+     { "locale", 0, OPT_VALUE, "locale", -1 },
       { "max-redirect", 0, OPT_VALUE, "maxredirect", -1 },
       { "mirror", 'm', OPT_BOOLEAN, "mirror", -1 },
       { "no", 'n', OPT__NO, NULL, required_argument },
@@@ -239,6 -241,7 +241,7 @@@
       { "referer", 0, OPT_VALUE, "referer", -1 },
       { "reject", 'R', OPT_VALUE, "reject", -1 },
       { "relative", 'L', OPT_BOOLEAN, "relativeonly", -1 },
+     { "remote-encoding", 0, OPT_VALUE, "remoteencoding", -1},
       { "remove-listing", 0, OPT_BOOLEAN, "removelisting", -1 },
       { "restrict-file-names", 0, OPT_BOOLEAN, "restrictfilenames", -1 },
       { "retr-symlinks", 0, OPT_BOOLEAN, "retrsymlinks", -1 },
@@@ -1077,6 -1080,27 +1080,27 @@@ for details.\n\n"))
         exit (1);
       }
   
+ #ifdef ENABLE_IRI
+   if (opt.enable_iri)
+     {
+       if (opt.locale && !check_encoding_name (opt.locale))
+         opt.locale = NULL;
+ 
+       if (!opt.locale)
+         opt.locale = find_locale ();
+ 
+       if (opt.encoding_remote && !check_encoding_name (opt.encoding_remote))
+         opt.encoding_remote = NULL;
+     }
+ #else
+   if (opt.enable_iri || opt.locale || opt.encoding_remote)
+     {
+       /* sXXXav : be more specific... */
+       printf(_("This version does not have support for IRIs\n"));
+       exit(1);
+     }
+ #endif
+ 
     if (opt.ask_passwd)
       {
         opt.passwd = prompt_for_password ();
@@@ -1124,7 -1148,7 +1148,7 @@@
           {
   #ifdef WINDOWS
             FILE *result;
- -          result = freopen (NULL, "wb", stdout);
+ +          result = freopen ("CONOUT$", "wb", stdout);
             if (result == NULL)
               {
                 logputs (LOG_NOTQUIET, _("\
@@@ -1178,45 -1202,40 +1202,51 @@@ WARNING: Can't reopen standard output i
     for (t = url; *t; t++)
       {
         char *filename = NULL, *redirected_URL = NULL;
- -      int dt;
+ +      int dt, url_err;
-       struct url *url_parsed = url_parse (*t, &url_err);
++      struct url *url_parsed = url_parse (*t, &url_err, NULL, false);
   
- -      if ((opt.recursive || opt.page_requisites)
- -          && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (*t)))
+ +      if (!url_parsed)
           {
- -          int old_follow_ftp = opt.follow_ftp;
- -
- -          /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
- -          if (url_scheme (*t) == SCHEME_FTP)
- -            opt.follow_ftp = 1;
- -
- -          status = retrieve_tree (*t, NULL);
- -
- -          opt.follow_ftp = old_follow_ftp;
+ +          char *error = url_error (*t, url_err);
+ +          logprintf (LOG_NOTQUIET, "%s: %s.\n",*t, error);
+ +          xfree (error);
+ +          status = URLERROR;
           }
         else
           {
- -          struct iri *i = iri_new ();
- -          set_uri_encoding (i, opt.locale, true);
- -          status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt,
- -                                 opt.recursive, i);
- -          iri_free (i);
- -        }
+ +          if ((opt.recursive || opt.page_requisites)
+ +              && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (url_parsed)))
+ +            {
+ +              int old_follow_ftp = opt.follow_ftp;
   
- -      if (opt.delete_after && file_exists_p(filename))
- -        {
- -          DEBUGP (("Removing file due to --delete-after in main():\n"));
- -          logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
- -          if (unlink (filename))
- -            logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
- -        }
+ +              /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
+ +              if (url_scheme (*t) == SCHEME_FTP) 
+ +                opt.follow_ftp = 1;
+ +          
-               status = retrieve_tree (url_parsed);
++              status = retrieve_tree (url_parsed, NULL);
   
- -      xfree_null (redirected_URL);
- -      xfree_null (filename);
+ +              opt.follow_ftp = old_follow_ftp;
+ +            }
+ +          else
-             status = retrieve_url (url_parsed, *t, &filename, &redirected_URL, NULL, &dt, opt.recursive);
++          {
++            struct iri *i = iri_new ();
++            set_uri_encoding (i, opt.locale, true);
++            status = retrieve_url (url_parsed, *t, &filename, &redirected_URL,
++                                   NULL, &dt, opt.recursive, i);
++            iri_free (i);
++          }
+ +
+ +          if (opt.delete_after && file_exists_p(filename))
+ +            {
+ +              DEBUGP (("Removing file due to --delete-after in main():\n"));
+ +              logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
+ +              if (unlink (filename))
+ +                logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
+ +            }
+ +          xfree_null (redirected_URL);
+ +          xfree_null (filename);
+ +          url_free (url_parsed);
+ +        }
       }
   
     /* And then from the input file, if any.  */
diff --combined src/recur.c

index 2e067505c1c6a521c8ebcef66178ad7347fb8184,95581486b28e5e06a9d43ff023e73b7cf3e4a8ba..83a9b4ee84d5b155196263841ea37214fd3d014c
--- 1/src/recur.c
--- 2/src/recur.c
+++ b/src/recur.c
@@@ -51,7 -51,7 +51,7 @@@ as that of the covered work.  *
   #include "html-url.h"
   #include "css-url.h"
   #include "spider.h"
- 
+ \f
   /* Functions for maintaining the URL queue.  */
   
   struct queue_element {
@@@ -60,6 -60,7 +60,7 @@@
     int depth;                    /* the depth */
     bool html_allowed;            /* whether the document is allowed to
                                      be treated as HTML. */
+   struct iri *iri;                /* sXXXav */
     bool css_allowed;             /* whether the document is allowed to
                                      be treated as CSS. */
     struct queue_element *next;   /* next element in queue */
@@@ -93,11 -94,12 +94,12 @@@ url_queue_delete (struct url_queue *que
      into it.  */
   
   static void
- url_enqueue (struct url_queue *queue,
+ url_enqueue (struct url_queue *queue, struct iri *i,
                const char *url, const char *referer, int depth,
                bool html_allowed, bool css_allowed)
   {
     struct queue_element *qel = xnew (struct queue_element);
+   qel->iri = i;
     qel->url = url;
     qel->referer = referer;
     qel->depth = depth;
@@@ -112,6 -114,10 +114,10 @@@
     DEBUGP (("Enqueuing %s at depth %d\n", url, depth));
     DEBUGP (("Queue count %d, maxcount %d.\n", queue->count, queue->maxcount));
   
+   if (i)
+     DEBUGP (("[IRI Enqueuing %s with %s\n", quote_n (0, url),
+              i->uri_encoding ? quote_n (1, i->uri_encoding) : "None"));
+ 
     if (queue->tail)
       queue->tail->next = qel;
     queue->tail = qel;
@@@ -124,7 -130,7 +130,7 @@@
      succeeded, or false if the queue is empty.  */
   
   static bool
- url_dequeue (struct url_queue *queue,
+ url_dequeue (struct url_queue *queue, struct iri **i,
                const char **url, const char **referer, int *depth,
                bool *html_allowed, bool *css_allowed)
   {
@@@ -137,6 -143,7 +143,7 @@@
     if (!queue->head)
       queue->tail = NULL;
   
+   *i = qel->iri;
     *url = qel->url;
     *referer = qel->referer;
     *depth = qel->depth;
@@@ -153,9 -160,9 +160,9 @@@
   }
   \f
   static bool download_child_p (const struct urlpos *, struct url *, int,
-                               struct url *, struct hash_table *);
+                               struct url *, struct hash_table *, struct iri *);
- -static bool descend_redirect_p (const char *, const char *, int,
+ +static bool descend_redirect_p (const char *, struct url *, int,
-                                 struct url *, struct hash_table *);
+                                 struct url *, struct hash_table *, struct iri *);
   
   
   /* Retrieve a part of the web beginning with START_URL.  This used to
@@@ -180,7 -187,7 +187,7 @@@
             options, add it to the queue. */
   
   uerr_t
- retrieve_tree (struct url *start_url_parsed)
- -retrieve_tree (const char *start_url, struct iri *pi)
++retrieve_tree (struct url *start_url_parsed, struct iri *pi)
   {
     uerr_t status = RETROK;
   
@@@ -191,12 -198,38 +198,28 @@@
        the queue, but haven't been downloaded yet.  */
     struct hash_table *blacklist;
   
- -  struct url *start_url_parsed;
+   int up_error_code;
- -  start_url_parsed = url_parse (start_url, &up_error_code, i, true);
- -  if (!start_url_parsed)
- -    {
- -      char *error = url_error (start_url, up_error_code);
- -      logprintf (LOG_NOTQUIET, "%s: %s.\n", start_url, error);
- -      xfree (error);
- -      return URLERROR;
- -    }
- -
+   struct iri *i = iri_new ();
+ 
+ #define COPYSTR(x)  (x) ? xstrdup(x) : NULL;
+   /* Duplicate pi struct if not NULL */
+   if (pi)
+     {
+       i->uri_encoding = COPYSTR (pi->uri_encoding);
+       i->content_encoding = COPYSTR (pi->content_encoding);
+       i->utf8_encode = pi->utf8_encode;
+     }
+   else
+     set_uri_encoding (i, opt.locale, true);
+ #undef COPYSTR
+ 
     queue = url_queue_new ();
     blacklist = make_string_hash_table (0);
   
     /* Enqueue the starting URL.  Use start_url_parsed->url rather than
        just URL so we enqueue the canonical form of the URL.  */
-   url_enqueue (queue, xstrdup (start_url_parsed->url), NULL, 0, true, false);
+   url_enqueue (queue, i, xstrdup (start_url_parsed->url), NULL, 0, true,
+                false);
     string_set_add (blacklist, start_url_parsed->url);
   
     while (1)
@@@ -215,7 -248,7 +238,7 @@@
   
         /* Get the next URL from the queue... */
   
-       if (!url_dequeue (queue,
+       if (!url_dequeue (queue, (struct iri **) &i,
                           (const char **)&url, (const char **)&referer,
                           &depth, &html_allowed, &css_allowed))
           break;
@@@ -253,22 -286,11 +276,12 @@@
           }
         else
           {
- -          int dt = 0;
+ +          int dt = 0, url_err;
             char *redirected = NULL;
-           struct url *url_parsed = url_parse (url, &url_err);
++          struct url *url_parsed = url_parse (url, &url_err, i, false);
   
-           if (!url_parsed)
-             {
-               char *error = url_error (url, url_err);
-               logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
-               xfree (error);
-               status = URLERROR;
-             }
-           else
-             {
-               status = retrieve_url (url_parsed, url, &file, &redirected,
-                                      referer, &dt, false);
-             }
- -          status = retrieve_url (url, &file, &redirected, referer, &dt,
- -                                 false, i);
++          status = retrieve_url (url_parsed, url, &file, &redirected, referer,
++                                 &dt, false, i);
   
             if (html_allowed && file && status == RETROK
                 && (dt & RETROKF) && (dt & TEXTHTML))
@@@ -295,8 -317,8 +308,8 @@@
                    want to follow it.  */
                 if (descend)
                   {
- -                  if (!descend_redirect_p (redirected, url, depth,
+ +                  if (!descend_redirect_p (redirected, url_parsed, depth,
-                                            start_url_parsed, blacklist))
+                                            start_url_parsed, blacklist, i))
                       descend = false;
                     else
                       /* Make sure that the old pre-redirect form gets
@@@ -307,7 -329,6 +320,7 @@@
                 xfree (url);
                 url = redirected;
               }
+ +          url_free(url_parsed);
           }
   
         if (opt.spider)
@@@ -349,7 -370,7 +362,7 @@@
             bool meta_disallow_follow = false;
             struct urlpos *children
               = is_css ? get_urls_css_file (file, url) :
-                        get_urls_html (file, url, &meta_disallow_follow);
+                        get_urls_html (file, url, &meta_disallow_follow, i);
   
             if (opt.use_robots && meta_disallow_follow)
               {
@@@ -360,7 -381,8 +373,8 @@@
             if (children)
               {
                 struct urlpos *child = children;
-               struct url *url_parsed = url_parsed = url_parse (url, NULL);
+               struct url *url_parsed = url_parse (url, NULL, i, false);
+               struct iri *ci;
                 char *referer_url = url;
                 bool strip_auth = (url_parsed != NULL
                                    && url_parsed->user != NULL);
@@@ -377,9 -399,11 +391,11 @@@
                     if (dash_p_leaf_HTML && !child->link_inline_p)
                       continue;
                     if (download_child_p (child, url_parsed, depth, start_url_parsed,
-                                         blacklist))
+                                         blacklist, i))
                       {
-                       url_enqueue (queue, xstrdup (child->url->url),
+                       ci = iri_new ();
+                       set_uri_encoding (ci, i->content_encoding, false);
+                       url_enqueue (queue, ci, xstrdup (child->url->url),
                                      xstrdup (referer_url), depth + 1,
                                      child->link_expect_html,
                                      child->link_expect_css);
@@@ -397,18 -421,18 +413,18 @@@
               }
           }
   
-       if (file 
-           && (opt.delete_after 
+       if (file
+           && (opt.delete_after
                 || opt.spider /* opt.recursive is implicitely true */
                 || !acceptable (file)))
           {
             /* Either --delete-after was specified, or we loaded this
-              (otherwise unneeded because of --spider or rejected by -R) 
-              HTML file just to harvest its hyperlinks -- in either case, 
+              (otherwise unneeded because of --spider or rejected by -R)
+              HTML file just to harvest its hyperlinks -- in either case,
                delete the local file. */
             DEBUGP (("Removing file due to %s in recursive_retrieve():\n",
                      opt.delete_after ? "--delete-after" :
-                    (opt.spider ? "--spider" : 
+                    (opt.spider ? "--spider" :
                       "recursive rejection criteria")));
             logprintf (LOG_VERBOSE,
                        (opt.delete_after || opt.spider
@@@ -424,6 -448,7 +440,7 @@@
         xfree (url);
         xfree_null (referer);
         xfree_null (file);
+       iri_free (i);
       }
   
     /* If anything is left of the queue due to a premature exit, free it
@@@ -432,15 -457,19 +449,17 @@@
       char *d1, *d2;
       int d3;
       bool d4, d5;
-     while (url_dequeue (queue,
+     struct iri *d6;
+     while (url_dequeue (queue, (struct iri **)&d6,
                           (const char **)&d1, (const char **)&d2, &d3, &d4, &d5))
         {
+         iri_free (d6);
           xfree (d1);
           xfree_null (d2);
         }
     }
     url_queue_delete (queue);
   
- -  if (start_url_parsed)
- -    url_free (start_url_parsed);
     string_set_free (blacklist);
   
     if (opt.quota && total_downloaded_bytes > opt.quota)
@@@ -461,7 -490,8 +480,8 @@@
   
   static bool
   download_child_p (const struct urlpos *upos, struct url *parent, int depth,
-                   struct url *start_url_parsed, struct hash_table *blacklist)
+                   struct url *start_url_parsed, struct hash_table *blacklist,
+                   struct iri *iri)
   {
     struct url *u = upos->url;
     const char *url = u->url;
@@@ -471,7 -501,7 +491,7 @@@
   
     if (string_set_contains (blacklist, url))
       {
-       if (opt.spider) 
+       if (opt.spider)
           {
             char *referrer = url_string (parent, URL_AUTH_HIDE_PASSWD);
             DEBUGP (("download_child_p: parent->url is: %s\n", quote (parent->url)));
@@@ -602,7 -632,7 +622,7 @@@
         if (!specs)
           {
             char *rfile;
-           if (res_retrieve_file (url, &rfile))
+           if (res_retrieve_file (url, &rfile, iri))
               {
                 specs = res_parse_from_file (rfile);
   
@@@ -656,24 -686,27 +676,25 @@@
      it is merely a simple-minded wrapper around download_child_p.  */
   
   static bool
- -descend_redirect_p (const char *redirected, const char *original, int depth,
+ +descend_redirect_p (const char *redirected, struct url *orig_parsed, int depth,
-                     struct url *start_url_parsed, struct hash_table *blacklist)
+                     struct url *start_url_parsed, struct hash_table *blacklist,
+                     struct iri *iri)
   {
- -  struct url *orig_parsed, *new_parsed;
+ +  struct url *new_parsed;
     struct urlpos *upos;
     bool success;
   
- -  orig_parsed = url_parse (original, NULL, NULL, false);
     assert (orig_parsed != NULL);
   
-   new_parsed = url_parse (redirected, NULL);
+   new_parsed = url_parse (redirected, NULL, NULL, false);
     assert (new_parsed != NULL);
   
     upos = xnew0 (struct urlpos);
     upos->url = new_parsed;
   
     success = download_child_p (upos, orig_parsed, depth,
-                               start_url_parsed, blacklist);
+                               start_url_parsed, blacklist, iri);
   
- -  url_free (orig_parsed);
     url_free (new_parsed);
     xfree (upos);
   
diff --combined src/recur.h

index 7eeb5642cb3bdd58148db206dff4c923f9da0314,515a382b03dfbfa48cb7d0d70e7aa9d8c0e2e9bd..76c0ef5f51f511ccec3ebd32d705d2efe7c0004f
--- 1/src/recur.h
--- 2/src/recur.h
+++ b/src/recur.h
@@@ -1,6 -1,6 +1,6 @@@
   /* Declarations for recur.c.
      Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- -   2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ +   2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   
   This file is part of GNU Wget.
   
@@@ -31,8 -31,6 +31,8 @@@ as that of the covered work.  *
   #ifndef RECUR_H
   #define RECUR_H
   
+ +#include "url.h"
+ +
   /* For most options, 0 means no limits, but with -p in the picture,
      that causes a problem on the maximum recursion depth variable.  To
      retain backwards compatibility we allow users to consider "0" to be
@@@ -44,6 -42,6 +44,6 @@@
   struct urlpos;
   
   void recursive_cleanup (void);
- uerr_t retrieve_tree (struct url *);
- -uerr_t retrieve_tree (const char *, struct iri *);
++uerr_t retrieve_tree (struct url *, struct iri *);
   
   #endif /* RECUR_H */
diff --combined src/res.c

index 20ffe1c8de45947b1d8cd9c262823151703b6bcd,0320d034246cfce5639397522a1a41f9789fdbb8..4b0ff82ba5b5a15ca4cae87e607ea2ac37f016e6
--- 1/src/res.c
--- 2/src/res.c
+++ b/src/res.c
@@@ -532,37 -532,28 +532,44 @@@ res_get_specs (const char *host, int po
      Return true if robots were retrieved OK, false otherwise.  */
   
   bool
- res_retrieve_file (const char *url, char **file)
+ res_retrieve_file (const char *url, char **file, struct iri *iri)
   {
+   struct iri *i = iri_new ();
     uerr_t err;
     char *robots_url = uri_merge (url, RES_SPECS_LOCATION);
     int saved_ts_val = opt.timestamping;
- -  int saved_sp_val = opt.spider;
+ +  int saved_sp_val = opt.spider, url_err;
+ +  struct url * url_parsed;
   
+   /* Copy server URI encoding for a possible IDNA transformation, no need to
+      encode the full URI in UTF-8 because "robots.txt" is plain ASCII */
+   set_uri_encoding (i, iri->uri_encoding, false);
+   i->utf8_encode = false;
+ 
     logputs (LOG_VERBOSE, _("Loading robots.txt; please ignore errors.\n"));
     *file = NULL;
     opt.timestamping = false;
     opt.spider       = false;
- -  err = retrieve_url (robots_url, file, NULL, NULL, NULL, false, i);
+ +
-   url_parsed = url_parse (robots_url, &url_err);
++  url_parsed = url_parse (robots_url, &url_err, iri, true);
+ +  if (!url_parsed)
+ +    {
+ +      char *error = url_error (robots_url, url_err);
+ +      logprintf (LOG_NOTQUIET, "%s: %s.\n", robots_url, error);
+ +      xfree (error);
+ +      err = URLERROR;
+ +    }
+ +  else
+ +    {
+ +      err = retrieve_url (url_parsed, robots_url, file, NULL, NULL, NULL,
-                           false);
++                          false, i);
+ +      url_free(url_parsed);
+ +    }
+ +
     opt.timestamping = saved_ts_val;
-   opt.spider       = saved_sp_val;  
+   opt.spider       = saved_sp_val;
     xfree (robots_url);
+   iri_free (i);
   
     if (err != RETROK && *file != NULL)
       {
diff --combined src/retr.c

index ffa84c38410ef8238ce752714b2c2fb683f23539,1d9d74782126dcd9e134a0f7e421e2edd903e384..0fd936d0d9f540061cf6274c8d869d19259ad823
--- 1/src/retr.c
--- 2/src/retr.c
+++ b/src/retr.c
@@@ -226,8 -226,7 +226,8 @@@ fd_read_body (int fd, FILE *out, wgint 
         /* If we're skipping STARTPOS bytes, pass 0 as the INITIAL
            argument to progress_create because the indicator doesn't
            (yet) know about "skipping" data.  */
- -      progress = progress_create (skip ? 0 : startpos, startpos + toread);
+ +      wgint start = skip ? 0 : startpos;
+ +      progress = progress_create (start, start + toread);
         progress_interactive = progress_interactive_p (progress);
       }
   
@@@ -597,15 -596,15 +597,16 @@@ static char *getproxy (struct url *)
      multiple points. */
   
   uerr_t
- -retrieve_url (const char *origurl, char **file, char **newloc,
- -              const char *refurl, int *dt, bool recursive, struct iri *iri)
+ +retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
-               char **newloc, const char *refurl, int *dt, bool recursive)
++              char **newloc, const char *refurl, int *dt, bool recursive,
++              struct iri *iri)
   {
     uerr_t result;
     char *url;
     bool location_changed;
     int dummy;
     char *mynewloc, *proxy;
- -  struct url *u, *proxy_url;
+ +  struct url *u = orig_parsed, *proxy_url;
     int up_error_code;            /* url parse error code */
     char *local_file;
     int redirection_count = 0;
@@@ -626,6 -625,21 +627,11 @@@
     if (file)
       *file = NULL;
   
- -  u = url_parse (url, &up_error_code, iri, true);
- -  if (!u)
- -    {
- -      char *error = url_error (url, up_error_code);
- -      logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
- -      xfree (url);
- -      xfree (error);
- -      return URLERROR;
- -    }
- -
+  second_try:
+   DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote_n (0, url),
+            iri->uri_encoding ? quote_n (1, iri->uri_encoding) : "None",
+            iri->utf8_encode));
+ 
     if (!refurl)
       refurl = opt.referer;
   
@@@ -639,8 -653,12 +645,12 @@@
     proxy = getproxy (u);
     if (proxy)
       {
+       struct iri *pi = iri_new ();
+       set_uri_encoding (pi, opt.locale, true);
+       pi->utf8_encode = false;
+ 
         /* Parse the proxy URL.  */
-       proxy_url = url_parse (proxy, &up_error_code);
+       proxy_url = url_parse (proxy, &up_error_code, NULL, true);
         if (!proxy_url)
           {
             char *error = url_error (proxy, up_error_code);
@@@ -667,7 -685,7 +677,7 @@@
   #endif
         || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
       {
-       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
+       result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url, iri);
       }
     else if (u->scheme == SCHEME_FTP)
       {
@@@ -717,17 -735,20 +727,23 @@@
         xfree (mynewloc);
         mynewloc = construced_newloc;
   
+       /* Reset UTF-8 encoding state, keep the URI encoding and reset
+          the content encoding. */
+       iri->utf8_encode = opt.enable_iri;
+       set_content_encoding (iri, NULL);
+       xfree_null (iri->orig_url);
+ 
         /* Now, see if this new location makes sense. */
-       newloc_parsed = url_parse (mynewloc, &up_error_code);
+       newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true);
         if (!newloc_parsed)
           {
             char *error = url_error (mynewloc, up_error_code);
             logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
                        error);
- -          url_free (u);
+ +          if (orig_parsed != u)
+ +            {
+ +              url_free (u);
+ +            }
             xfree (url);
             xfree (mynewloc);
             xfree (error);
@@@ -747,10 -768,7 +763,10 @@@
             logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
                        opt.max_redirect);
             url_free (newloc_parsed);
- -          url_free (u);
+ +          if (orig_parsed != u)
+ +            {
+ +              url_free (u);
+ +            }
             xfree (url);
             xfree (mynewloc);
             RESTORE_POST_DATA;
@@@ -759,10 -777,7 +775,10 @@@
   
         xfree (url);
         url = mynewloc;
- -      url_free (u);
+ +      if (orig_parsed != u)
+ +        {
+ +          url_free (u);
+ +        }
         u = newloc_parsed;
   
         /* If we're being redirected from POST, we don't want to POST
@@@ -776,8 -791,21 +792,21 @@@
         goto redirected;
       }
   
-   if (local_file)
+   /* Try to not encode in UTF-8 if fetching failed */
+   if (!(*dt & RETROKF) && iri->utf8_encode)
+     {
+       iri->utf8_encode = false;
+       DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
+       goto second_try;
+     }
+ 
+   if (local_file && *dt & RETROKF)
       {
+       register_download (u->url, local_file);
+       if (redirection_count && 0 != strcmp (origurl, u->url))
+         register_redirection (origurl, u->url);
+       if (*dt & TEXTHTML)
+         register_html (u->url, local_file);
         if (*dt & RETROKF)
           {
             register_download (u->url, local_file);
@@@ -795,10 -823,7 +824,10 @@@
     else
       xfree_null (local_file);
   
- -  url_free (u);
+ +  if (orig_parsed != u)
+ +    {
+ +      url_free (u);
+ +    }
   
     if (redirection_count)
       {
@@@ -830,41 -855,41 +859,51 @@@ retrieve_from_file (const char *file, b
   {
     uerr_t status;
     struct urlpos *url_list, *cur_url;
+   struct iri *iri = iri_new();
   
     char *input_file = NULL;
     const char *url = file;
   
     status = RETROK;             /* Suppose everything is OK.  */
     *count = 0;                  /* Reset the URL count.  */
-   
+ 
+   /* sXXXav : Assume filename and links in the file are in the locale */
+   set_uri_encoding (iri, opt.locale, true);
+   set_content_encoding (iri, opt.locale);
+ 
     if (url_has_scheme (url))
       {
- -      int dt;
+ +      int dt,url_err;
         uerr_t status;
-       struct url * url_parsed = url_parse(url, &url_err);
++      struct url * url_parsed = url_parse(url, &url_err, NULL, true);
+ +
+ +      if (!url_parsed)
+ +        {
+ +          char *error = url_error (url, url_err);
+ +          logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
+ +          xfree (error);
+ +          return URLERROR;
+ +        }
   
         if (!opt.base_href)
           opt.base_href = xstrdup (url);
   
-       status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt, false);
- -      status = retrieve_url (url, &input_file, NULL, NULL, &dt, false, iri);
++      status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt,
++                             false, iri);
         if (status != RETROK)
           return status;
   
         if (dt & TEXTHTML)
           html = true;
+ 
+       /* If we have a found a content encoding, use it */
+       if (iri->content_encoding)
+         set_uri_encoding (iri, iri->content_encoding, false);
       }
     else
       input_file = (char *) file;
   
-   url_list = (html ? get_urls_html (input_file, NULL, NULL)
+   url_list = (html ? get_urls_html (input_file, NULL, NULL, iri)
                 : get_urls_file (input_file));
   
     for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
@@@ -880,24 -905,28 +919,28 @@@
             status = QUOTEXC;
             break;
           }
+ 
+       /* Reset UTF-8 encode status */
+       iri->utf8_encode = opt.enable_iri;
+       xfree_null (iri->orig_url);
+       iri->orig_url = NULL;
+ 
         if ((opt.recursive || opt.page_requisites)
             && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
           {
             int old_follow_ftp = opt.follow_ftp;
   
             /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
-           if (cur_url->url->scheme == SCHEME_FTP) 
+           if (cur_url->url->scheme == SCHEME_FTP)
               opt.follow_ftp = 1;
-           
-           status = retrieve_tree (cur_url->url);
+ 
- -          status = retrieve_tree (cur_url->url->url, iri);
++          status = retrieve_tree (cur_url->url, iri);
   
             opt.follow_ftp = old_follow_ftp;
           }
         else
-         {
-           status = retrieve_url (cur_url->url, cur_url->url->url, &filename,
-                                  &new_file, NULL, &dt, opt.recursive);
-         }
- -        status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL,
- -                             &dt, opt.recursive, iri);
++        status = retrieve_url (cur_url->url, cur_url->url->url, &filename,
++                               &new_file, NULL, &dt, opt.recursive, iri);
   
         if (filename && opt.delete_after && file_exists_p (filename))
           {
@@@ -916,6 -945,8 +959,8 @@@ Removing file due to --delete-after in 
     /* Free the linked list of URL-s.  */
     free_urlpos (url_list);
   
+   iri_free (iri);
+ 
     return status;
   }
   
@@@ -1065,12 -1096,18 +1110,12 @@@ getproxy (struct url *u
   /* Returns true if URL would be downloaded through a proxy. */
   
   bool
- -url_uses_proxy (const char *url)
+ +url_uses_proxy (struct url * u)
   {
     bool ret;
- -  struct url *u;
- -  struct iri *i = iri_new();
- -  /* url was given in the command line, so use locale as encoding */
- -  set_uri_encoding (i, opt.locale, true);
- -  u= url_parse (url, NULL, i, false);
     if (!u)
       return false;
     ret = getproxy (u) != NULL;
- -  url_free (u);
     return ret;
   }
   
@@@ -1083,16 -1120,3 +1128,16 @@@ no_proxy_match (const char *host, cons
     else
       return sufmatch (no_proxy, host);
   }
+ +
+ +/* Set the file parameter to point to the local file string.  */
+ +void
+ +set_local_file (const char **file, const char *default_file)
+ +{
+ +  if (opt.output_document)
+ +    {
+ +      if (output_stream_regular)
+ +        *file = opt.output_document;
+ +    }
+ +  else
+ +    *file = default_file;
+ +}
diff --combined src/retr.h

index 72be93b718d067ab11af0394aca81537ba5047eb,bb2e66d3102160ef75a8fb265b19c68c4555db75..8854b68404179a252f4dca1ce165dec1fec26104
--- 1/src/retr.h
--- 2/src/retr.h
+++ b/src/retr.h
@@@ -31,8 -31,6 +31,8 @@@ as that of the covered work.  *
   #ifndef RETR_H
   #define RETR_H
   
+ +#include "url.h"
+ +
   /* These global vars should be made static to retr.c and exported via
      functions! */
   extern SUM_SIZE_INT total_downloaded_bytes;
@@@ -53,7 -51,8 +53,8 @@@ typedef const char *(*hunk_terminator_t
   char *fd_read_hunk (int, hunk_terminator_t, long, long);
   char *fd_read_line (int);
   
- uerr_t retrieve_url (struct url *, const char *, char **, char **, const char *, int *, bool);
- -uerr_t retrieve_url (const char *, char **, char **, const char *, int *,
- -                     bool, struct iri *);
++uerr_t retrieve_url (struct url *, const char *, char **, char **,
++                     const char *, int *, bool, struct iri *);
   uerr_t retrieve_from_file (const char *, bool, int *);
   
   const char *retr_rate (wgint, double);
@@@ -64,6 -63,6 +65,6 @@@ void sleep_between_retrievals (int)
   
   void rotate_backups (const char *);
   
- -bool url_uses_proxy (const char *);
+ +bool url_uses_proxy (struct url *);
   
   #endif /* RETR_H */
diff --combined src/url.c

index d416fcf7fae38e6ee5b19276773ace45425d41a5,86d099a7190b80aaec8bc1cf6771c49892364602..4c22a9fc6e460c5d34cc63ac81f0d6d3ca69c453
--- 1/src/url.c
--- 2/src/url.c
+++ b/src/url.c
@@@ -649,7 -649,7 +649,7 @@@ static const char *parse_errors[] = 
      error, and if ERROR is not NULL, also set *ERROR to the appropriate
      error code. */
   struct url *
- url_parse (const char *url, int *error)
+ url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
   {
     struct url *u;
     const char *p;
@@@ -668,7 -668,7 +668,8 @@@
     int port;
     char *user = NULL, *passwd = NULL;
   
-   char *url_encoded = NULL;
- -  char *url_encoded = NULL, *new_url = NULL;
++  const char *url_encoded = NULL;
++  char *new_url = NULL;
   
     int error_code;
   
@@@ -679,9 -679,26 +680,26 @@@
         goto error;
       }
   
-   url_encoded = reencode_escapes (url);
+   if (iri && iri->utf8_encode)
+     {
+       iri->utf8_encode = remote_to_utf8 (iri, iri->orig_url ? iri->orig_url : url, (const char **) &new_url);
+       if (!iri->utf8_encode)
+         new_url = NULL;
+       else
+         iri->orig_url = xstrdup (url);
+     }
+ 
+   /* XXX XXX Could that change introduce (security) bugs ???  XXX XXX*/
+   if (percent_encode)
+     url_encoded = reencode_escapes (new_url ? new_url : url);
+   else
+      url_encoded = new_url ? new_url : url;
+ 
     p = url_encoded;
   
+   if (new_url && url_encoded != new_url)
+     xfree (new_url);
+ 
     p += strlen (supported_schemes[scheme].leading_string);
     uname_b = p;
     p = url_skip_credentials (p);
@@@ -851,6 -868,18 +869,18 @@@
       {
         url_unescape (u->host);
         host_modified = true;
+ 
+       /* Apply IDNA regardless of iri->utf8_encode status */
+       if (opt.enable_iri && iri)
+         {
+           char *new = idn_encode (iri, u->host);
+           if (new)
+             {
+               xfree (u->host);
+               u->host = new;
+               host_modified = true;
+             }
+         }
       }
   
     if (params_b)
@@@ -860,7 -889,7 +890,7 @@@
     if (fragment_b)
       u->fragment = strdupdelim (fragment_b, fragment_e);
   
-   if (path_modified || u->fragment || host_modified || path_b == path_e)
+   if (opt.enable_iri || path_modified || u->fragment || host_modified || path_b == path_e)
       {
         /* If we suspect that a transformation has rendered what
            url_string might return different from URL_ENCODED, rebuild
@@@ -875,7 -904,7 +905,7 @@@
         if (url_encoded == url)
           u->url = xstrdup (url);
         else
--        u->url = url_encoded;
++        u->url = (char *) url_encoded;
       }
   
     return u;
@@@ -883,7 -912,7 +913,7 @@@
    error:
     /* Cleanup in case of error: */
     if (url_encoded && url_encoded != url)
--    xfree (url_encoded);
++    xfree ((char *) url_encoded);
   
     /* Transmit the error code to the caller, if the caller wants to
        know.  */
@@@ -1978,12 -2007,12 +2008,12 @@@ schemes_are_similar_p (enum url_scheme 
   \f
   static int
   getchar_from_escaped_string (const char *str, char *c)
- {  
+ {
     const char *p = str;
   
     assert (str && *str);
     assert (c);
-   
+ 
     if (p[0] == '%')
       {
         if (!c_isxdigit(p[1]) || !c_isxdigit(p[2]))
@@@ -2033,7 -2062,7 +2063,7 @@@ are_urls_equal (const char *u1, const c
         p += pp;
         q += qq;
       }
-   
+ 
     return (*p == 0 && *q == 0 ? true : false);
   }
   \f
@@@ -2142,7 -2171,7 +2172,7 @@@ test_append_uri_pathel(
     } test_array[] = {
       { "http://www.yoyodyne.com/path/", "somepage.html", false, "http://www.yoyodyne.com/path/somepage.html" },
     };
-   
+ 
     for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) 
       {
         struct growable dest;
diff --combined tests/ChangeLog

index 522bd2020c1b57fbaef690882cbe3e1105562827,d9ba6531debc3f6b09042e25230d6c705133367c..3dfc60a312935733fd66edf479849ba73a183012
--- 1/tests/ChangeLog
--- 2/tests/ChangeLog
+++ b/tests/ChangeLog
@@@ -1,27 -1,19 +1,43 @@@
+ 2008-12-04  Micah Cowan  <micah@cowan.name> (not copyrightable)
+ 
+       * run-px, Test-idn-robots.px: Added test for robots-file
+       downloads.
+ 
+       * Test-idn-cmd.px, Test-idn-meta.px, Test-idn-headers.px:
+       Fix test names.
+ 
+ 2008-11-26  Micah Cowan  <micah@cowan.name>  (not copyrightable)
+ 
+       * Test-ftp-iri-disabled.px, Test-ftp-iri-fallback.px,
+       Test-ftp-iri.px, Test-idn-cmd.px, Test-idn-headers.px,
+       Test-idn-meta.px, Test-iri-disabled.px,
+       Test-iri-forced-remote.px, Test-iri-list.px, Test-iri.px: More
+       module-scope warnings.
+ 
+ +2009-06-14  Micah Cowan  <micah@cowan.name>
+ +
+ +      * Makefile.am (EXTRA_DIST): Include all the tests, run-px, and
+ +      certs/, to make distcheck happy.
+ +
+ +2009-06-11  Benjamin Wolsey <bwy@benjaminwolsey.de>
+ +
+ +      * Test-proxied-https-auth.px: Take an optional argument for the
+ +      top source directory, so we can find the cert and key.
+ +
+ +      * run-px: Provide the top source directory as an argument, so
+ +      scripts can find their way around.
+ +
+ +2009-04-11  Steven Schubiger  <stsc@member.fsf.org>
+ +
+ +      * run-px: Skip testing with real rc files by setting 
+ +      SYSTEM_WGETRC and WGETRC to /dev/null.
+ +
+ +2009-02-25  Benjamin Wolsey  <bwy@benjaminwolsey.de>
+ +
+ +      * Makefile.am (run-px-tests): Ensure run-px is run from srcdir.
+ +
+ +      * run-px: Include modules from srcdir.
+ +
   2008-11-25  Steven Schubiger  <stsc@members.fsf.org>
   
         * WgetTest.pm.in: Remove the magic interpreter line;
@@@ -95,6 -87,51 +111,51 @@@
   
         * run-px: Use strict (thanks Steven Schubiger!).
   
+ 2008-09-09  Micah Cowan  <micah@cowan.name>
+ 
+       * Test-idn-cmd.px: Added.
+ 
+       * run-px: Added Test-idn-cmd.px.
+ 
+ 2008-08-28  Micah Cowan  <micah@cowan.name>
+ 
+       * HTTPServer.pm (run): Allow distinguishing between hostnames,
+       when used as a proxy.
+ 
+       * Test-idn-headers.px, Test-idn-meta.px: Added.
+ 
+       * run-px: Added Test-idn-headers.px, Test-idn-meta.px.
+ 
+       * Test-proxy-auth-basic.px: Use the full URL, rather than just the
+       path (made necessary by the accompanying change to HTTPServer.pm).
+ 
+ 2008-08-14  Xavier Saint <wget@sxav.eu>
+       
+       * Test-iri-list.px : Fetch files from a remote list.
+ 
+ 2008-08-03  Xavier Saint <wget@sxav.eu>
+ 
+       * Test-iri.px : HTTP recursive fetch for testing IRI support and
+       fallback.
+ 
+       * Test-iri-disabled.px : Same file structure as Test-iri.px but with
+       IRI support disabled
+ 
+       * Test-iri-forced-remote.px : There's a difference between ISO-8859-1
+       and ISO-8859-15 for character 0xA4 (respectively currency sign and
+       euro sign). So with a forced ISO-8859-1 remote encoding, wget should
+       see 0xA4 as a currency sign and transcode it correctly in UTF-8 instead
+       of using the ISO-8859-15 given by the server.
+ 
+       * Test-ftp-iri.px : Give a file to fetch via FTP in a specific locale
+       and expect wget to fetch the file UTF-8 encoded.
+ 
+       * Test-ftp-iri-fallback.px : Same as above but wget should fallback on
+       locale encoding to fetch the file.
+ 
+       * Test-ftp-iri.px : Same as Test-ftp-iri.px but with IRI support
+       disabled. The UTF-8 encoded file should not be retrieved.
+ 
   2008-06-22  Micah Cowan  <micah@cowan.name>
   
         * Test-proxied-https-auth.px: Shift exit code so it falls in the
diff --combined tests/run-px

index 33e4c60075f7b1c3a4a3a32e8ee88beedf6822a1,01d84995c1d44409bcd98bf5e5d8e9349280213d..3b5449bd9059cf57bcb88f431e6b8de60f1e2ee2
--- 1/tests/run-px
--- 2/tests/run-px
+++ b/tests/run-px
@@@ -25,9 -25,20 +25,20 @@@ my @tests = 
       'Test-E-k-K.px',
       'Test-E-k.px',
       'Test-ftp.px',
+     'Test-ftp-iri.px',
+     'Test-ftp-iri-fallback.px',
+     'Test-ftp-iri-disabled.px',
       'Test-HTTP-Content-Disposition-1.px',
       'Test-HTTP-Content-Disposition-2.px',
       'Test-HTTP-Content-Disposition.px',
+     'Test-idn-headers.px',
+     'Test-idn-meta.px',
+     'Test-idn-cmd.px',
+     'Test-idn-robots.px',
+     'Test-iri.px',
+     'Test-iri-disabled.px',
+     'Test-iri-forced-remote.px',
+     'Test-iri-list.px',
       'Test-N-current.px',
       'Test-N-smaller.px',
       'Test-N-no-info.px',
@@@ -55,22 -66,14 +66,22 @@@
       'Test--spider-r.px',
   );
   
+ +foreach my $var (qw(SYSTEM_WGETRC WGETRC)) {
+ +    $ENV{$var} = '/dev/null';
+ +}
+ +
   my @tested;
   
   foreach my $test (@tests) {
       print "Running $test\n\n";
- -    system("$^X $top_srcdir/tests/$test");
+ +    system("$^X -I$top_srcdir/tests $top_srcdir/tests/$test $top_srcdir");
       push @tested, { name => $test, result => $? };
   }
   
+ +foreach my $var (qw(SYSTEM_WGETRC WGETRC)) {
+ +    delete $ENV{$var};
+ +}
+ +
   print "\n";
   foreach my $test (@tested) {
       ($test->{result} == 0)
author	Micah Cowan <micah@cowan.name>
	Thu, 25 Jun 2009 08:14:11 +0000 (01:14 -0700)
committer	Micah Cowan <micah@cowan.name>
	Thu, 25 Jun 2009 08:14:11 +0000 (01:14 -0700)
		1	2
ChangeLog	patch \|	diff1 \|	diff2 \|	blob \| history
configure.ac	patch \|	diff1 \|	diff2 \|	blob \| history
doc/ChangeLog	patch \|	diff1 \|	diff2 \|	blob \| history
doc/sample.wgetrc	patch \|	diff1 \|	diff2 \|	blob \| history
doc/wget.texi	patch \|	diff1 \|	diff2 \|	blob \| history
src/ChangeLog	patch \|	diff1 \|	diff2 \|	blob \| history
src/Makefile.am	patch \|	diff1 \|	diff2 \|	blob \| history
src/build_info.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/connect.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/host.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/http.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/init.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/recur.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/recur.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/res.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/retr.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/retr.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/url.c	patch \|	diff1 \|	diff2 \|	blob \| history
tests/ChangeLog	patch \|	diff1 \|	diff2 \|	blob \| history
tests/run-px	patch \|	diff1 \|	diff2 \|	blob \| history