+2009-06-14 Micah Cowan <micah@cowan.name>
+
+ * po/Makefile.in.in (distclean): remove en_US.po, too.
+
+ * Makefile.am: Include md5 as a subdir unconditionally.
+ It may result in useless compilation, and additional risk of
+ breaking a build of something that isn't actually needed, but
+ otherwise it's too much of a hassle to manage a failure-free
+ distcheck.
+
+2009-06-12 Micah Cowan <micah@cowan.name>
+
+ * configure.ac: Check for h_errno declaration. Idea thanks to
+ Maciej W. Rozycki.
+
+2009-03-03 Steven Schubiger <stsc@member.fsf.org>
+
+ * src/ftp.c, src/http.c, src/main.c, src/recur.h,
+ tests/Makefile.am: Update the copyright years.
+
+2009-01-23 Steven Schubiger <stsc@members.fsf.org>
+
+ * util/freeopts, util/rmold.pl, util/trunc.c: Remove
+ unnecessary whitespace.
+
2008-11-10 Micah Cowan <micah@cowan.name>
* MAILING-LIST: Mention Gmane, introduce subsections.
* AUTHORS: Added Steven Schubiger.
+ 2008-06-26 Xavier Saint <wget@sxav.eu>
+
+ * configure.ac : IRIs support required libiconv, check it.
+
+ 2008-06-14 Xavier Saint <wget@sxav.eu>
+
+ * configure.ac: Add support for IRIs
+
2008-05-29 Micah Cowan <micah@cowan.name>
* po/*.po: Updated from TP (the 1.11.3 set).
AC_CHECK_HEADERS(termios.h sys/ioctl.h sys/select.h utime.h sys/utime.h)
AC_CHECK_HEADERS(stdint.h inttypes.h pwd.h wchar.h)
+AC_CHECK_DECLS(h_errno,,,[#include <netdb.h>])
+
dnl
dnl Check sizes of integer types. These are used to find n-bit
dnl integral types on older systems that fail to provide intN_t and
fi
AC_SUBST(COMMENT_IF_NO_POD2MAN)
+
+ dnl
+ dnl Check for IDN/IRIs
+ dnl
+
+ AC_ARG_ENABLE(iri,
+ AC_HELP_STRING([--disable-iri],[disable IDN/IRIs support]),
+ [case "${enable_iri}" in
+ no)
+ dnl Disable IRIs checking
+ AC_MSG_NOTICE([disabling IRIs at user request])
+ iri=no
+ ;;
+ yes)
+ dnl IRIs explicitly enabled
+ iri=yes
+ force_iri=yes
+ ;;
+ auto)
+ dnl Auto-detect IRI
+ iri=yes
+ ;;
+ *)
+ AC_MSG_ERROR([Invalid --enable-iri argument \`$enable_iri'])
+ ;;
+ esac
+ ], [
+ dnl If nothing is specified, assume auto-detection
+ iri=yes
+ ]
+ )
+
+ AC_ARG_WITH(libidn, AC_HELP_STRING([--with-libidn=[DIR]],
+ [Support IDN/IRIs (needs GNU Libidn)]),
+ libidn=$withval, libidn="")
+ if test "X$iri" != "Xno"; then
+ AM_ICONV
+
+ if test "X$am_cv_func_iconv" != "Xyes"; then
+ iri=no
+ if test "X$force_iri" = "Xyes"; then
+ AC_MSG_ERROR([Libiconv is required for IRIs support])
+ else
+ AC_MSG_NOTICE([disabling IRIs because libiconv wasn't found])
+ fi
+ fi
+ fi
+
+ if test "X$iri" != "Xno"; then
+ if test "$libidn" != ""; then
+ LDFLAGS="${LDFLAGS} -L$libidn/lib"
+ CPPFLAGS="${CPPFLAGS} -I$libidn/include"
+ fi
+ AC_CHECK_HEADER(idna.h,
+ AC_CHECK_LIB(idn, stringprep_check_version,
+ [iri=yes LIBS="${LIBS} -lidn"], iri=no),
+ iri=no)
+
+ if test "X$iri" != "Xno" ; then
+ AC_DEFINE(ENABLE_IRI, 1, [Define if IRI support is enabled.])
+ AC_MSG_NOTICE([Enabling support for IRI.])
+ else
+ AC_MSG_WARN([Libidn not found])
+ fi
+ fi
+
+
+ dnl Needed by src/Makefile.am
+ AM_CONDITIONAL([IRI_IS_ENABLED], [test "X$iri" != "Xno"])
+
+
dnl
dnl Create output
dnl
+2009-06-20 Micah Cowan <micah@cowan.name>
+
+ * wget.texi (Contributors): Added Jay Krell.
+
+2009-06-14 Micah Cowan <micah@cowan.name>
+
+ * Makefile.am (wget.pod): $(srcdir)/version.texi -> version.texi
+
+2009-06-12 Micah Cowan <micah@cowan.name>
+
+ * wget.texi (Download Options): More accuracy on what happens when
+ -nd is used with -r or -p.
+
+2009-06-11 Micah Cowan <micah@cowan.name>
+
+ * wget.texi (Contributors): Added Xin Zou, Benjamin Wolsley, and
+ Robert Millan.
+
+2009-06-11 Joao Ferreira <joao@joaoff.com>
+
+ * wget.texi (Option Syntax): Fixed contradictory and confusing
+ explanation of --folow-ftp and negation.
+
+2009-06-10 Micah Cowan <micah@cowan.name>
+
+ * sample.wgetrc: Add "https_proxy" to the proxy examples. Thanks
+ to Martin Paul <martin@par.univie.ac.at> for the suggestion.
+
2008-11-15 Steven Schubiger <stsc@members.fsf.org>
* sample.wgetrc: Comment the waitretry "default" value,
* wget.texi (Robot Exclusion): Fixed typo "downloads" ->
"download"
+ 2008-08-03 Xavier Saint <wget@sxav.eu>
+
+ * wget.texi : Add option descriptions for the three new
+ options --iri, --locale and --remote-encoding related to
+ IRI support.
+
+ * sample.wgetrc : Add commented lines for the three new
+ command iri, locale and encoding related to IRI support.
+
2008-08-03 Micah Cowan <micah@cowan.name>
* wget.texi: Don't set UPDATED; already set by version.texi.
# is *not* sent by default.
#header = Accept-Language: en
-# You can set the default proxies for Wget to use for http and ftp.
+# You can set the default proxies for Wget to use for http, https, and ftp.
# They will override the value in the environment.
+#https_proxy = http://proxy.yoyodyne.com:18023/
#http_proxy = http://proxy.yoyodyne.com:18023/
#ftp_proxy = http://proxy.yoyodyne.com:18023/
# To try ipv6 addresses first:
#prefer-family = IPv6
+
+ # Set default IRI support state
+ #iri = off
+
+ # Force the default system encoding
+ #locale = UTF-8
+
+ # Force the default remote server encoding
+ #remoteencoding = UTF-8
@samp{--no-} prefix. This might seem superfluous---if the default for
an affirmative option is to not do something, then why provide a way
to explicitly turn it off? But the startup file may in fact change
-the default. For instance, using @code{follow_ftp = off} in
-@file{.wgetrc} makes Wget @emph{not} follow FTP links by default, and
+the default. For instance, using @code{follow_ftp = on} in
+@file{.wgetrc} makes Wget @emph{follow} FTP links by default, and
using @samp{--no-follow-ftp} is the only way to restore the factory
default from the command line.
cases, the local file will be @dfn{clobbered}, or overwritten, upon
repeated download. In other cases it will be preserved.
-When running Wget without @samp{-N}, @samp{-nc}, @samp{-r}, or @samp{p},
-downloading the same file in the same directory will result in the
-original copy of @var{file} being preserved and the second copy being
-named @samp{@var{file}.1}. If that file is downloaded yet again, the
-third copy will be named @samp{@var{file}.2}, and so on. When
-@samp{-nc} is specified, this behavior is suppressed, and Wget will
-refuse to download newer copies of @samp{@var{file}}. Therefore,
-``@code{no-clobber}'' is actually a misnomer in this mode---it's not
-clobbering that's prevented (as the numeric suffixes were already
-preventing clobbering), but rather the multiple version saving that's
-prevented.
-
-When running Wget with @samp{-r} or @samp{-p}, but without @samp{-N}
-or @samp{-nc}, re-downloading a file will result in the new copy
-simply overwriting the old. Adding @samp{-nc} will prevent this
-behavior, instead causing the original version to be preserved and any
-newer copies on the server to be ignored.
+When running Wget without @samp{-N}, @samp{-nc}, @samp{-r}, or
+@samp{-p}, downloading the same file in the same directory will result
+in the original copy of @var{file} being preserved and the second copy
+being named @samp{@var{file}.1}. If that file is downloaded yet
+again, the third copy will be named @samp{@var{file}.2}, and so on.
+(This is also the behavior with @samp{-nd}, even if @samp{-r} or
+@samp{-p} are in effect.) When @samp{-nc} is specified, this behavior
+is suppressed, and Wget will refuse to download newer copies of
+@samp{@var{file}}. Therefore, ``@code{no-clobber}'' is actually a
+misnomer in this mode---it's not clobbering that's prevented (as the
+numeric suffixes were already preventing clobbering), but rather the
+multiple version saving that's prevented.
+
+When running Wget with @samp{-r} or @samp{-p}, but without @samp{-N},
+@samp{-nd}, or @samp{-nc}, re-downloading a file will result in the
+new copy simply overwriting the old. Adding @samp{-nc} will prevent
+this behavior, instead causing the original version to be preserved
+and any newer copies on the server to be ignored.
When running Wget with @samp{-N}, with or without @samp{-r} or
@samp{-p}, the decision as to whether or not to download a newer copy
Note that @samp{-c} only works with @sc{ftp} servers and with @sc{http}
servers that support the @code{Range} header.
+ @cindex iri support
+ @cindex idn support
+ @item --iri
+
+ Turn on internationalized URI (IRI) support. Use @samp{--iri=no} to
+ turn it off. IRI support is activated by default.
+
+ You can set the default state of IRI support using @code{iri} command in
+ @file{.wgetrc}. That setting may be overridden from the command line.
+
+ @cindex local encoding
+ @cindex locale
+ @item --locale=@var{encoding}
+
+ Force Wget to use @var{encoding} as the default system encoding. That affects
+ how Wget converts URLs specified as arguments from locale to @sc{utf-8} for
+ IRI support.
+
+ Wget use the function @code{nl_langinfo()} and then the @code{CHARSET}
+ environment variable to get the locale. If it fails, @sc{ascii} is used.
+
+ You can set the default locale using the @code{locale} command in
+ @file{.wgetrc}. That setting may be overridden from the command line.
+
@cindex progress indicator
@cindex dot style
@item --progress=@var{type}
``dot'' progress will be favored over ``bar''. To force the bar output,
use @samp{--progress=bar:force}.
+ @cindex remote encoding
+ @item --remote-encoding=@var{encoding}
+
+ Force Wget to use encoding as the default remote server encoding. That
+ affects how Wget converts URIs found in files from remote encoding to
+ @sc{utf-8} during a recursive fetch. This options is only useful for
+ IRI support, for the interpretation of non-@sc{ascii} characters.
+
+ For HTTP, remote encoding can be found in HTTP @code{Content-Type}
+ header and in HTML @code{Content-Type http-equiv} meta tag.
+
+ You can set the default encoding using the @code{remoteencoding}
+ command in @file{.wgetrc}. That setting may be overridden from the
+ command line.
+
@item -N
@itemx --timestamping
Turn on time-stamping. @xref{Time-Stamping}, for details.
Alexander Kourakos,
Martin Kraemer,
Sami Krank,
+Jay Krell,
@tex
$\Sigma\acute{\iota}\mu o\varsigma\;
\Xi\varepsilon\nu\iota\tau\acute{\epsilon}\lambda\lambda\eta\varsigma$
Matthew J.@: Mellon,
Jordan Mendelson,
Ted Mielczarek,
+Robert Millan,
Lin Zhe Min,
Jan Minar,
Tim Mooney,
Douglas E.@: Wegscheid,
Ralf Wildenhues,
Joshua David Williams,
+Benjamin Wolsey,
YAMAZAKI Makoto,
Jasmin Zainul,
@iftex
@ifnottex
Bojan Zdrnja,
@end ifnottex
-Kristijan Zimmer.
+Kristijan Zimmer,
+Xin Zou.
Apologies to all who I accidentally left out, and many thanks to all the
subscribers of the Wget mailing list.
+2009-06-20 Jay Krell <jay.krell@cornell.edu>
+
+ * sysdep.h (_ALL_SOURCE): (small change) Define the _ALL_SOURCE
+ macro on INTERIX systems. (I switched the location from ftp.c to
+ sysdep.h --mjc)
+
+2009-06-15 Micah Cowan <micah@cowan.name>
+
+ * ftp.c (getftp): If we can't accept the connection, return
+ CONERROR, not whatever the contents of err happens to be. Fixes
+ bug #25015.
+
+ * retr.c (fd_read_body): Make both args to progress_create
+ consistent, resulting in an accurate progress display. Fixes bug
+ #24948.
+
+2009-06-14 Micah Cowan <micah@cowan.name>
+
+ * Makefile.am (wget_SOURCES): css-tokens.h needs to ship with
+ dist, too.
+
+2009-06-13 Micah Cowan <micah@cowan.name>
+
+ * init.c: Rename setval_internal_wrapper to setval_internal_tilde,
+ ensure we don't "replace" the tilde unless it's actually
+ present. Clean up some minor GNU style issues.
+
+2009-06-13 Julien Pichon <julienpichon7@gmail.com>
+
+ * init.c: Handle tilde-expansion in wgetrc commands, without
+ resorting to setting/unsetting globals to change behavior in one
+ call location.
+
+2009-06-12 Micah Cowan <micah@cowan.name>
+
+ * host.c: Include <sys/types.h> before <sys/socket.h>. Not
+ required by POSIX any more, but some older systems (such as
+ FreeBSD 4.1) still need it, and it doesn't seem like it could
+ hurt...
+
+ * build_info.c (library): Handle "https" as a feature in its own
+ right, apart from "gnutls" and "openssl".
+
+ * host.c: Declare h_errno if no declaration is provided. Idea
+ thanks to Maciej W. Rozycki.
+
+2009-06-11 Xin Zou <zouxin2008@gmail.com>
+
+ * http.c (gethttp): Fix some memory leaks.
+
+2009-06-11 Micah Cowan <micah@cowan.name>
+
+ * http.c (http_atotm): Handle potential for setlocale's return
+ value to be static storage. Thanks to Benjamin Wolsey
+ <bwy@benjaminwolsey.de>.
+
+ * sysdep.h: Need NAMESPACE_TWEAKS on non-Linux glibc-based
+ systems, too. Thanks to Robert Millan.
+
+2009-05-28 Steven Schubiger <stsc@member.fsf.org>
+
+ * ftp.c (ftp_get_listing): Update the "listing file"
+ string after calling ftp_loop_internal().
+
+2009-05-27 Steven Schubiger <stsc@member.fsf.org>
+
+ * ftp.c (ftp_get_listing): Duplicate the "listing file"
+ string to avoid memory corruption when FOPEN_EXCL_ERR is
+ encountered.
+
+2009-05-17 Steven Schubiger <stsc@member.fsf.org>
+
+ * progress.c (eta_to_human_short): Fix the remaining hours
+ to be displayed. Spotted by Tadeu Martins (#26411).
+
+2009-04-24 Micah Cowan <micah@cowan.name>
+
+ * hash.c: Change stdint.h inclusion to use HAVE_STDINT_H, not C99
+ check.
+
+ * connect.c: stdint.h inclusion added.
+
+ Thanks to Markus Duft <mduft@gentoo.org> for a similar patch.
+
+2009-04-20 Micah Cowan <micah@cowan.name>
+
+ * Makefile.am (version.c): Fix unportable use of "echo -n".
+
+2009-04-13 Steven Schubiger <stsc@member.fsf.org>
+
+ * ftp.c (ftp_retrieve_list): Move the duplicated code that
+ determines the local file to a function.
+
+ * http.c (http_loop): Likewise.
+
+ * retr.c (set_local_file): New function.
+
+2009-04-11 Steven Schubiger <stsc@member.fsf.org>
+
+ * init.c (initialize): Run a custom SYSTEM_WGETRC when
+ provided as an environment variable.
+
+2009-02-27 Gisle Vanem <gvanem@broadpark.no>
+
+ * main.c (main): "freopen (NULL,.." causes an assertion in MSVC
+ debug-mode. I.e. NULL isn't legal. But the "CONOUT$" device works
+ fine.
+
+2009-02-27 Steven Schubiger <stsc@member.fsf.org>
+
+ * ftp.c (ftp_loop_internal): Don't claim for FTP retrievals
+ when writing to standard output either that the document
+ has been saved. Addresses bug #20520 again.
+
+2009-02-21 Steven Schubiger <stsc@member.fsf.org>
+
+ * http.c (http_loop): When a document is written to
+ standard output, don't claim it has been saved to a file.
+ Addresses bug #20520.
+
+2009-02-18 Steven Schubiger <stsc@members.fsf.org>
+
+ * recur.h: Remove the dangling declaration for recursive_cleanup().
+
+2009-02-01 Gerardo E. Gidoni <gerel@gnu.org>
+
+ * main.c, recur.c, recur.h, res.c, retr.c, retr.h: restructured code to
+ avoid multiple 'url_parse' calls.
+
2008-11-13 Micah Cowan <micah@cowan.name>
* http.c (gethttp): Don't do anything when content-length >= our
requested range.
+2008-11-27 Saint Xavier <wget@sxav.eu>
+
+ * http.c (gethttp): Move authentication code before filename
+ allocation avoiding fallbacking on default filename because
+ "Content-Disposition" header wasn't present before authentcation
+ has been completed. Fixes bug #24862.
+
2008-11-16 Steven Schubiger <stsc@members.fsf.org>
* main.c: Declare and initialize the numurls counter.
* init.c (cleanup): Free the memory associated with the base
option (when DEBUG_MALLOC is defined).
+ 2008-07-02 Xavier Saint <wget@sxav.eu>
+
+ * iri.c, iri.h : New function idn_decode() to decode ASCII
+ encoded hostname to the locale.
+
+ * host.c : Show hostname to be resolved both in locale and
+ ASCII encoded.
+
2008-06-28 Steven Schubiger <stsc@members.fsf.org>
* retr.c (retrieve_from_file): Allow for reading the links from
an external file (HTTP/FTP).
+ 2008-06-26 Xavier Saint <wget@sxav.eu>
+
+ * iri.c, iri.h : New functions locale_to_utf8() and
+ idn_encode() adding basic capabilities of IRI/IDN.
+
+ * url.c : Convert URLs from locale to UTF-8 allowing a basic
+ support of IRI/IDN
+
2008-06-25 Steven Schubiger <stsc@members.fsf.org>
* ftp.c (getftp): When spidering a FTP URL, emit a diagnostic
* http.c: Make -nv --spider include the file's name when it
exists.
-
+
2008-06-22 Micah Cowan <micah@cowan.name>
* Makefile.am (version.c): Fixed version string invocation so it
string vars pointers-to-const, and moved line lengths
below 80 (in Makefile.am, not in version.c).
+ 2008-06-19 Xavier Saint <wget@sxav.eu>
+
+ * iri.c, iri.h : New function check_encoding_name() as
+ a preliminary encoding name check.
+
+ * main.c, iri.c : Make use of check_encoding_name().
+
+ 2008-06-19 Xavier Saint <wget@sxav.eu>
+
+ * iri.c : Include missing stringprep.h file and add a
+ cast.
+
+ * init.c : set a default initial value for opt.enable_iri,
+ opt.locale and opt.encoding_remote.
+
+ 2008-06-19 Xavier Saint <wget@sxav.eu>
+
+ * iri.c, iri.h : Add a new function find_locale() to find
+ out the local system encoding.
+
+ * main.c : Make use of find_locale().
+
+ 2008-06-19 Xavier Saint <wget@sxav.eu>
+
+ * html-url.c : Add "content-type" meta tag parsing for
+ retrieving page encoding.
+
+ * iri.h : Make no-op version of parse_charset() return
+ NULL.
+
2008-06-16 Micah Cowan <micah@cowan.name>
* http.c (http_loop): When hstat.len is higher than the
successfully completed content's length, but it's because we
_set_ it that way, don't abort.
+ 2008-06-14 Xavier Saint <wget@sxav.eu>
+
+ * iri.c, iri.h : New files.
+
+ * Makefile.am : Add files iri.h and conditional iri.c.
+
+ * build_info.c : Add compiled feature "iri".
+
+ * http.c : include iri.h and parse charset from Content-Type
+ header.
+
+ * init.c, main.c, options.h : if an options isn't supported
+ at compiled time, don't get rid off it and show a dummy
+ message instead if they are used.
+
2008-06-13 Micah Cowan <micah@cowan.name>
* build_info.c: ENABLE_NTLM, not HAVE_NTLM; distinguish OpenSSL
default.
2008-05-17 Kenny Parnell <k.parnell@gmail.com>
-
+
(cmd_spec_prefer_family): Initialize prefer_family to prefer_none.
2008-05-17 Micah Cowan <micah@cowan.name>
-
+
* main.c (main): Handle Ctrl-D on command-line.
2008-05-15 Steven Schubiger <schubiger@gmail.com>
* options.h: Add an according boolean member to the options
struct.
-
+
* sysdep.h: Comment the defines __EXTENSIONS__ and _GNU_SOURCE
out, because they're now defined independently by config.h.
# Version: @VERSION@
#
+ if IRI_IS_ENABLED
+ IRI_OBJ = iri.c
+ endif
+
# The following line is losing on some versions of make!
DEFS = @DEFS@ -DSYSTEM_WGETRC=\"$(sysconfdir)/wgetrc\" -DLOCALEDIR=\"$(localedir)\"
LIBS = @LIBSSL@ @LIBGNUTLS@ @LIBINTL@ @LIBS@
bin_PROGRAMS = wget
wget_SOURCES = build_info.c cmpt.c connect.c convert.c cookies.c ftp.c \
- css.l css-url.c \
+ css.l css-url.c css-tokens.h \
ftp-basic.c ftp-ls.c hash.c host.c html-parse.c html-url.c \
http.c init.c log.c main.c netrc.c progress.c ptimer.c \
recur.c res.c retr.c snprintf.c spider.c url.c \
- utils.c \
- css-url.h connect.h convert.h cookies.h \
+ utils.c $(IRI_OBJ) \
+ css-url.h connect.h convert.h cookies.h \
ftp.h gen-md5.h hash.h host.h html-parse.h html-url.h \
http.h http-ntlm.h init.h log.h mswindows.h netrc.h \
options.h progress.h ptimer.h recur.h res.h retr.h \
echo '/* version.c */' > $@
echo '/* Autogenerated by Makefile - DO NOT EDIT */' >> $@
echo '' >> $@
- echo -n 'const char *version_string = "@VERSION@"' >> $@
+ echo 'const char *version_string = "@VERSION@"' >> $@
-hg log -r . --template='" ({node|short})"\n' 2>/dev/null >> $@
echo ';' >> $@
echo 'const char *compilation_string = "'$(COMPILE)'";' \
"-md5",
#endif
+#ifdef HAVE_SSL
+ "+https",
+#else
+ "-https",
+#endif
+
#ifdef HAVE_LIBGNUTLS
"+gnutls",
#else
#else
"-gettext",
#endif
+
+ #ifdef ENABLE_IRI
+ "+iri",
+ #else
+ "-iri",
+ #endif
+
/* sentinel value */
NULL
};
#include "connect.h"
#include "hash.h"
+/* Apparently needed for Interix: */
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+
/* Define sockaddr_storage where unavailable (presumably on IPv4-only
hosts). */
if (print)
{
const char *txt_addr = print_address (ip);
- if (print && 0 != strcmp (print, txt_addr))
- logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
- escnonprint_uri (print), txt_addr, port);
+ if (0 != strcmp (print, txt_addr))
+ {
+ char *str = NULL, *name;
+
+ if (opt.enable_iri && (name = idn_decode ((char *) print)) != NULL)
+ {
+ int len = strlen (print) + strlen (name) + 4;
+ str = xmalloc (len);
+ snprintf (str, len, "%s (%s)", name, print);
+ str[len-1] = '\0';
+ xfree (name);
+ }
+
+ logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
+ str ? str : escnonprint_uri (print), txt_addr, port);
+
+ if (str)
+ xfree (str);
+ }
else
logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
}
#include <assert.h>
#ifndef WINDOWS
+# include <sys/types.h>
# include <sys/socket.h>
# include <netinet/in.h>
# ifndef __BEOS__
# define NO_ADDRESS NO_DATA
#endif
+#if !HAVE_DECL_H_ERRNO
+extern int h_errno;
+#endif
+
+
/* Lists of IP addresses that result from running DNS queries. See
lookup_host for details. */
/* No luck with the cache; resolve HOST. */
if (!silent && !numeric_address)
- logprintf (LOG_VERBOSE, _("Resolving %s... "),
- quotearg_style (escape_quoting_style, host));
+ {
+ char *str = NULL, *name;
+
+ if (opt.enable_iri && (name = idn_decode ((char *) host)) != NULL)
+ {
+ int len = strlen (host) + strlen (name) + 4;
+ str = xmalloc (len);
+ snprintf (str, len, "%s (%s)", name, host);
+ str[len-1] = '\0';
+ xfree (name);
+ }
+
+ logprintf (LOG_VERBOSE, _("Resolving %s... "),
+ quotearg_style (escape_quoting_style, str ? str : host));
+
+ if (str)
+ xfree (str);
+ }
#ifdef ENABLE_IPV6
{
/* HTTP support.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU Wget.
If PROXY is non-NULL, the connection will be made to the proxy
server, and u->url will be requested. */
static uerr_t
- gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
+ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
+ struct iri *iri)
{
struct request *req;
print_server_response (resp, " ");
}
+ /* Check for keep-alive related responses. */
+ if (!inhibit_keep_alive && contlen != -1)
+ {
+ if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
+ keep_alive = true;
+ else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
+ {
+ if (0 == strcasecmp (hdrval, "Keep-Alive"))
+ keep_alive = true;
+ }
+ }
+
+ if (keep_alive)
+ /* The server has promised that it will not close the connection
+ when we're done. This means that we can register it. */
+ register_persistent (conn->host, conn->port, sock, using_ssl);
+
+ if (statcode == HTTP_STATUS_UNAUTHORIZED)
+ {
+ /* Authorization is required. */
+ if (keep_alive && !head_only && skip_short_body (sock, contlen))
+ CLOSE_FINISH (sock);
+ else
+ CLOSE_INVALIDATE (sock);
+ pconn.authorized = false;
+ if (!auth_finished && (user && passwd))
+ {
+ /* IIS sends multiple copies of WWW-Authenticate, one with
+ the value "negotiate", and other(s) with data. Loop over
+ all the occurrences and pick the one we recognize. */
+ int wapos;
+ const char *wabeg, *waend;
+ char *www_authenticate = NULL;
+ for (wapos = 0;
+ (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
+ &wabeg, &waend)) != -1;
+ ++wapos)
+ if (known_authentication_scheme_p (wabeg, waend))
+ {
+ BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
+ break;
+ }
+
+ if (!www_authenticate)
+ {
+ /* If the authentication header is missing or
+ unrecognized, there's no sense in retrying. */
+ logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
+ }
+ else if (!basic_auth_finished
+ || !BEGINS_WITH (www_authenticate, "Basic"))
+ {
+ char *pth;
+ pth = url_full_path (u);
+ request_set_header (req, "Authorization",
+ create_authorization_line (www_authenticate,
+ user, passwd,
+ request_method (req),
+ pth,
+ &auth_finished),
+ rel_value);
+ if (BEGINS_WITH (www_authenticate, "NTLM"))
+ ntlm_seen = true;
+ else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
+ {
+ /* Need to register this host as using basic auth,
+ * so we automatically send creds next time. */
+ register_basic_auth_host (u->host);
+ }
+ xfree (pth);
+ xfree_null (message);
+ resp_free (resp);
+ xfree (head);
+ goto retry_with_auth;
+ }
+ else
+ {
+ /* We already did Basic auth, and it failed. Gotta
+ * give up. */
+ }
+ }
+ logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
+ request_free (req);
+ xfree_null (message);
+ resp_free (resp);
+ xfree (head);
+ return AUTHFAILED;
+ }
+ else /* statcode != HTTP_STATUS_UNAUTHORIZED */
+ {
+ /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
+ if (ntlm_seen)
+ pconn.authorized = true;
+ }
+
/* Determine the local filename if needed. Notice that if -O is used
* hstat.local_file is set by http_loop to the argument of -O. */
if (!hs->local_file)
hs->local_file = url_file_name (u);
}
}
-
+
/* TODO: perform this check only once. */
if (!hs->existence_checked && file_exists_p (hs->local_file))
{
if (has_html_suffix_p (hs->local_file))
*dt |= TEXTHTML;
+ xfree (head);
+ xfree_null (message);
return RETRUNNEEDED;
}
else if (!ALLOW_CLOBBER)
local_dot_orig_file_exists = true;
local_filename = filename_plus_orig_suffix;
}
- }
+ }
if (!local_dot_orig_file_exists)
/* Couldn't stat() <file>.orig, so try to stat() <file>. */
contlen = parsed;
}
- /* Check for keep-alive related responses. */
- if (!inhibit_keep_alive && contlen != -1)
- {
- if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
- keep_alive = true;
- else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
- {
- if (0 == strcasecmp (hdrval, "Keep-Alive"))
- keep_alive = true;
- }
- }
- if (keep_alive)
- /* The server has promised that it will not close the connection
- when we're done. This means that we can register it. */
- register_persistent (conn->host, conn->port, sock, using_ssl);
-
- if (statcode == HTTP_STATUS_UNAUTHORIZED)
- {
- /* Authorization is required. */
- if (keep_alive && !head_only && skip_short_body (sock, contlen))
- CLOSE_FINISH (sock);
- else
- CLOSE_INVALIDATE (sock);
- pconn.authorized = false;
- if (!auth_finished && (user && passwd))
- {
- /* IIS sends multiple copies of WWW-Authenticate, one with
- the value "negotiate", and other(s) with data. Loop over
- all the occurrences and pick the one we recognize. */
- int wapos;
- const char *wabeg, *waend;
- char *www_authenticate = NULL;
- for (wapos = 0;
- (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
- &wabeg, &waend)) != -1;
- ++wapos)
- if (known_authentication_scheme_p (wabeg, waend))
- {
- BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
- break;
- }
-
- if (!www_authenticate)
- {
- /* If the authentication header is missing or
- unrecognized, there's no sense in retrying. */
- logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
- }
- else if (!basic_auth_finished
- || !BEGINS_WITH (www_authenticate, "Basic"))
- {
- char *pth;
- pth = url_full_path (u);
- request_set_header (req, "Authorization",
- create_authorization_line (www_authenticate,
- user, passwd,
- request_method (req),
- pth,
- &auth_finished),
- rel_value);
- if (BEGINS_WITH (www_authenticate, "NTLM"))
- ntlm_seen = true;
- else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
- {
- /* Need to register this host as using basic auth,
- * so we automatically send creds next time. */
- register_basic_auth_host (u->host);
- }
- xfree (pth);
- goto retry_with_auth;
- }
- else
- {
- /* We already did Basic auth, and it failed. Gotta
- * give up. */
- }
- }
- logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
- request_free (req);
- return AUTHFAILED;
- }
- else /* statcode != HTTP_STATUS_UNAUTHORIZED */
- {
- /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
- if (ntlm_seen)
- pconn.authorized = true;
- }
request_free (req);
hs->statcode = statcode;
char *tmp = strchr (type, ';');
if (tmp)
{
+ /* sXXXav: only needed if IRI support is enabled */
+ char *tmp2 = tmp + 1;
+
while (tmp > type && c_isspace (tmp[-1]))
--tmp;
*tmp = '\0';
+
+ /* Try to get remote encoding if needed */
+ if (opt.enable_iri && !opt.encoding_remote)
+ {
+ tmp = parse_charset (tmp2);
+ if (tmp)
+ set_content_encoding (iri, tmp);
+ }
}
}
hs->newloc = resp_header_strdup (resp, "Location");
else
CLOSE_INVALIDATE (sock);
xfree_null (type);
+ xfree (head);
return NEWLOCATION;
}
}
xfree_null (type);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
+ xfree (head);
return RETRUNNEEDED;
}
if ((contrange != 0 && contrange != hs->restval)
Bail out. */
xfree_null (type);
CLOSE_INVALIDATE (sock);
+ xfree (head);
return RANGEERR;
}
if (contlen == -1)
CLOSE_FINISH (sock);
else
CLOSE_INVALIDATE (sock);
+ xfree (head);
return RETRFINISHED;
}
_("%s has sprung into existence.\n"),
hs->local_file);
CLOSE_INVALIDATE (sock);
+ xfree (head);
return FOPEN_EXCL_ERR;
}
}
{
logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
CLOSE_INVALIDATE (sock);
+ xfree (head);
return FOPENERR;
}
}
retried, and retried, and retried, and... */
uerr_t
http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
- int *dt, struct url *proxy)
+ int *dt, struct url *proxy, struct iri *iri)
{
int count;
bool got_head = false; /* used for time-stamping and filename detection */
uerr_t err, ret = TRYLIMEXC;
time_t tmr = -1; /* remote time-stamp */
struct http_stat hstat; /* HTTP status */
- struct_stat st;
+ struct_stat st;
bool send_head_first = true;
+ char *file_name;
/* Assert that no value for *LOCAL_FILE was passed. */
assert (local_file == NULL || *local_file == NULL);
-
+
/* Set LOCAL_FILE parameter. */
if (local_file && opt.output_document)
*local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
-
+
/* Reset NEWLOC parameter. */
*newloc = NULL;
retrieve the file. But if the output_document was given, then this
test was already done and the file didn't exist. Hence the !opt.output_document */
logprintf (LOG_VERBOSE, _("\
- File %s already there; not retrieving.\n\n"),
+ File %s already there; not retrieving.\n\n"),
quote (hstat.local_file));
/* If the file is there, we suppose it's retrieved OK. */
*dt |= RETROKF;
/* Reset the counter. */
count = 0;
-
+
/* Reset the document type. */
*dt = 0;
-
+
/* Skip preliminary HEAD request if we're not in spider mode AND
* if -O was given or HTTP Content-Disposition support is disabled. */
if (!opt.spider
/* Send preliminary HEAD request if -N is given and we have an existing
* destination file. */
- if (opt.timestamping
+ file_name = url_file_name (u);
+ if (opt.timestamping
&& !opt.content_disposition
- && file_exists_p (url_file_name (u)))
+ && file_exists_p (file_name))
send_head_first = true;
-
+ xfree (file_name);
+
/* THE loop */
do
{
/* Increment the pass counter. */
++count;
sleep_between_retrievals (count);
-
+
/* Get the current time string. */
tms = datetime_str (time (NULL));
-
+
if (opt.spider && !got_head)
logprintf (LOG_VERBOSE, _("\
Spider mode enabled. Check if remote file exists.\n"));
if (opt.verbose)
{
char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
-
- if (count > 1)
+
+ if (count > 1)
{
char tmp[256];
sprintf (tmp, _("(try:%2d)"), count);
logprintf (LOG_NOTQUIET, "--%s-- %s %s\n",
tms, tmp, hurl);
}
- else
+ else
{
logprintf (LOG_NOTQUIET, "--%s-- %s\n",
tms, hurl);
}
-
+
#ifdef WINDOWS
ws_changetitle (hurl);
#endif
/* Default document type is empty. However, if spider mode is
on or time-stamping is employed, HEAD_ONLY commands is
encoded within *dt. */
- if (send_head_first && !got_head)
+ if (send_head_first && !got_head)
*dt |= HEAD_ONLY;
else
*dt &= ~HEAD_ONLY;
*dt &= ~SEND_NOCACHE;
/* Try fetching the document, or at least its head. */
- err = gethttp (u, &hstat, dt, proxy);
+ err = gethttp (u, &hstat, dt, proxy, iri);
/* Time? */
tms = datetime_str (time (NULL));
-
+
/* Get the new location (with or without the redirection). */
if (hstat.newloc)
*newloc = xstrdup (hstat.newloc);
hstat.statcode);
ret = WRONGCODE;
}
- else
+ else
{
ret = NEWLOCATION;
}
/* All possibilities should have been exhausted. */
abort ();
}
-
+
if (!(*dt & RETROKF))
{
char *hurl = NULL;
continue;
}
/* Maybe we should always keep track of broken links, not just in
- * spider mode. */
- else if (opt.spider)
+ * spider mode.
+ * Don't log error if it was UTF-8 encoded because we will try
+ * once unencoded. */
+ else if (opt.spider && !iri->utf8_encode)
{
/* #### Again: ugly ugly ugly! */
- if (!hurl)
+ if (!hurl)
hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
nonexisting_url (hurl);
logprintf (LOG_NOTQUIET, _("\
else
{
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
- tms, hstat.statcode,
+ tms, hstat.statcode,
quotearg_style (escape_quoting_style, hstat.error));
}
logputs (LOG_VERBOSE, "\n");
&& ((hstat.len == hstat.contlen) ||
((hstat.res == 0) && (hstat.contlen == -1))))
{
- /* #### This code repeats in http.c and ftp.c. Move it to a
- function! */
const char *fl = NULL;
- if (opt.output_document)
- {
- if (output_stream_regular)
- fl = opt.output_document;
- }
- else
- fl = hstat.local_file;
+ set_local_file (&fl, hstat.local_file);
if (fl)
{
time_t newtmr = -1;
{
if (*dt & RETROKF)
{
+ bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
logprintf (LOG_VERBOSE,
- _("%s (%s) - %s saved [%s/%s]\n\n"),
- tms, tmrate, quote (hstat.local_file),
+ write_to_stdout
+ ? _("%s (%s) - written to stdout %s[%s/%s]\n\n")
+ : _("%s (%s) - %s saved [%s/%s]\n\n"),
+ tms, tmrate,
+ write_to_stdout ? "" : quote (hstat.local_file),
number_to_static_string (hstat.len),
number_to_static_string (hstat.contlen));
logprintf (LOG_NONVERBOSE,
{
if (*dt & RETROKF)
{
+ bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
+
logprintf (LOG_VERBOSE,
- _("%s (%s) - %s saved [%s]\n\n"),
- tms, tmrate, quote (hstat.local_file),
+ write_to_stdout
+ ? _("%s (%s) - written to stdout %s[%s]\n\n")
+ : _("%s (%s) - %s saved [%s]\n\n"),
+ tms, tmrate,
+ write_to_stdout ? "" : quote (hstat.local_file),
number_to_static_string (hstat.len));
logprintf (LOG_NONVERBOSE,
"%s URL:%s [%s] -> \"%s\" [%d]\n",
Netscape cookie specification.) */
};
const char *oldlocale;
+ char savedlocale[256];
size_t i;
time_t ret = (time_t) -1;
non-English locales, which we work around by temporarily setting
locale to C before invoking strptime. */
oldlocale = setlocale (LC_TIME, NULL);
+ if (oldlocale)
+ {
+ size_t l = strlen (oldlocale);
+ if (l >= sizeof savedlocale)
+ savedlocale[0] = '\0';
+ else
+ memcpy (savedlocale, oldlocale, l);
+ }
+ else savedlocale[0] = '\0';
+
setlocale (LC_TIME, "C");
for (i = 0; i < countof (time_formats); i++)
}
/* Restore the previous locale. */
- setlocale (LC_TIME, oldlocale);
+ if (savedlocale[0])
+ setlocale (LC_TIME, savedlocale);
return ret;
}
/* Reading/parsing the initialization file.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU Wget.
#include "test.h"
#endif
-/* We want tilde expansion enabled only when reading `.wgetrc' lines;
- otherwise, it will be performed by the shell. This variable will
- be set by the wgetrc-reading function. */
-
-static bool enable_tilde_expansion;
#define CMD_DECLARE(func) static bool func (const char *, const char *, void *)
{ "inet6only", &opt.ipv6_only, cmd_boolean },
#endif
{ "input", &opt.input_filename, cmd_file },
+ { "iri", &opt.enable_iri, cmd_boolean },
{ "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
{ "limitrate", &opt.limit_rate, cmd_bytes },
{ "loadcookies", &opt.cookies_input, cmd_file },
+ { "locale", &opt.locale, cmd_string },
{ "logfile", &opt.lfilename, cmd_file },
{ "login", &opt.ftp_user, cmd_string },/* deprecated*/
{ "maxredirect", &opt.max_redirect, cmd_number },
{ "referer", &opt.referer, cmd_string },
{ "reject", &opt.rejects, cmd_vector },
{ "relativeonly", &opt.relative_only, cmd_boolean },
+ { "remoteencoding", &opt.encoding_remote, cmd_string },
{ "removelisting", &opt.remove_listing, cmd_boolean },
{ "restrictfilenames", NULL, cmd_spec_restrict_file_names },
{ "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
opt.max_redirect = 20;
opt.waitretry = 10;
+
+ #ifdef ENABLE_IRI
+ opt.enable_iri = true;
+ #else
+ opt.enable_iri = false;
+ #endif
+ opt.locale = NULL;
+ opt.encoding_remote = NULL;
}
\f
/* Return the user's home directory (strdup-ed), or NULL if none is
static enum parse_line parse_line (const char *, char **, char **, int *);
static bool setval_internal (int, const char *, const char *);
+static bool setval_internal_tilde (int, const char *, const char *);
/* Initialize variables from a wgetrc file. Returns zero (failure) if
there were errors in the file. */
file, strerror (errno));
return true; /* not a fatal error */
}
- enable_tilde_expansion = true;
ln = 1;
while ((line = read_whole_line (fp)) != NULL)
{
{
case line_ok:
/* If everything is OK, set the value. */
- if (!setval_internal (comind, com, val))
+ if (!setval_internal_tilde (comind, com, val))
{
fprintf (stderr, _("%s: Error in %s at line %d.\n"),
exec_name, file, ln);
xfree (line);
++ln;
}
- enable_tilde_expansion = false;
fclose (fp);
return errcnt == 0;
void
initialize (void)
{
- char *file;
+ char *file, *env_sysrc;
int ok = true;
/* Load the hard-coded defaults. */
defaults ();
-
- /* If SYSTEM_WGETRC is defined, use it. */
+
+ /* Run a non-standard system rc file when the according environment
+ variable has been set. For internal testing purposes only! */
+ env_sysrc = getenv ("SYSTEM_WGETRC");
+ if (env_sysrc && file_exists_p (env_sysrc))
+ ok &= run_wgetrc (env_sysrc);
+ /* Otherwise, if SYSTEM_WGETRC is defined, use it. */
#ifdef SYSTEM_WGETRC
- if (file_exists_p (SYSTEM_WGETRC))
+ else if (file_exists_p (SYSTEM_WGETRC))
ok &= run_wgetrc (SYSTEM_WGETRC);
#endif
/* Override it with your own, if one exists. */
return line_ok;
}
+#if defined(WINDOWS) || defined(MSDOS)
+# define ISSEP(c) ((c) == '/' || (c) == '\\')
+#else
+# define ISSEP(c) ((c) == '/')
+#endif
+
/* Run commands[comind].action. */
static bool
return commands[comind].action (com, val, commands[comind].place);
}
+static bool
+setval_internal_tilde (int comind, const char *com, const char *val)
+{
+ bool ret;
+ int homelen;
+ char *home;
+ char **pstring;
+ ret = setval_internal (comind, com, val);
+
+ /* We make tilde expansion for cmd_file and cmd_directory */
+ if (((commands[comind].action == cmd_file) ||
+ (commands[comind].action == cmd_directory))
+ && ret && (*val == '~' && ISSEP (val[1])))
+ {
+ pstring = commands[comind].place;
+ home = home_dir ();
+ if (home)
+ {
+ homelen = strlen (home);
+ while (homelen && ISSEP (home[homelen - 1]))
+ home[--homelen] = '\0';
+
+ /* Skip the leading "~/". */
+ for (++val; ISSEP (*val); val++)
+ ;
+ *pstring = concat_strings (home, "/", val, (char *)0);
+ }
+ }
+ return ret;
+}
+
/* Run command COM with value VAL. If running the command produces an
error, report the error and exit.
return true;
}
-#if defined(WINDOWS) || defined(MSDOS)
-# define ISSEP(c) ((c) == '/' || (c) == '\\')
-#else
-# define ISSEP(c) ((c) == '/')
-#endif
/* Like the above, but handles tilde-expansion when reading a user's
`.wgetrc'. In that case, and if VAL begins with `~', the tilde
/* #### If VAL is empty, perhaps should set *PLACE to NULL. */
- if (!enable_tilde_expansion || !(*val == '~' && ISSEP (val[1])))
- {
- noexpand:
- *pstring = xstrdup (val);
- }
- else
- {
- int homelen;
- char *home = home_dir ();
- if (!home)
- goto noexpand;
-
- homelen = strlen (home);
- while (homelen && ISSEP (home[homelen - 1]))
- home[--homelen] = '\0';
-
- /* Skip the leading "~/". */
- for (++val; ISSEP (*val); val++)
- ;
-
- *pstring = concat_strings (home, "/", val, (char *) 0);
- }
+ *pstring = xstrdup (val);
#if defined(WINDOWS) || defined(MSDOS)
/* Convert "\" to "/". */
/* Command line parsing.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU Wget.
{ "inet6-only", '6', OPT_BOOLEAN, "inet6only", -1 },
#endif
{ "input-file", 'i', OPT_VALUE, "input", -1 },
+ { "iri", 0, OPT_BOOLEAN, "iri", -1 },
{ "keep-session-cookies", 0, OPT_BOOLEAN, "keepsessioncookies", -1 },
{ "level", 'l', OPT_VALUE, "reclevel", -1 },
{ "limit-rate", 0, OPT_VALUE, "limitrate", -1 },
{ "load-cookies", 0, OPT_VALUE, "loadcookies", -1 },
+ { "locale", 0, OPT_VALUE, "locale", -1 },
{ "max-redirect", 0, OPT_VALUE, "maxredirect", -1 },
{ "mirror", 'm', OPT_BOOLEAN, "mirror", -1 },
{ "no", 'n', OPT__NO, NULL, required_argument },
{ "referer", 0, OPT_VALUE, "referer", -1 },
{ "reject", 'R', OPT_VALUE, "reject", -1 },
{ "relative", 'L', OPT_BOOLEAN, "relativeonly", -1 },
+ { "remote-encoding", 0, OPT_VALUE, "remoteencoding", -1},
{ "remove-listing", 0, OPT_BOOLEAN, "removelisting", -1 },
{ "restrict-file-names", 0, OPT_BOOLEAN, "restrictfilenames", -1 },
{ "retr-symlinks", 0, OPT_BOOLEAN, "retrsymlinks", -1 },
exit (1);
}
+ #ifdef ENABLE_IRI
+ if (opt.enable_iri)
+ {
+ if (opt.locale && !check_encoding_name (opt.locale))
+ opt.locale = NULL;
+
+ if (!opt.locale)
+ opt.locale = find_locale ();
+
+ if (opt.encoding_remote && !check_encoding_name (opt.encoding_remote))
+ opt.encoding_remote = NULL;
+ }
+ #else
+ if (opt.enable_iri || opt.locale || opt.encoding_remote)
+ {
+ /* sXXXav : be more specific... */
+ printf(_("This version does not have support for IRIs\n"));
+ exit(1);
+ }
+ #endif
+
if (opt.ask_passwd)
{
opt.passwd = prompt_for_password ();
{
#ifdef WINDOWS
FILE *result;
- result = freopen (NULL, "wb", stdout);
+ result = freopen ("CONOUT$", "wb", stdout);
if (result == NULL)
{
logputs (LOG_NOTQUIET, _("\
for (t = url; *t; t++)
{
char *filename = NULL, *redirected_URL = NULL;
- int dt;
+ int dt, url_err;
- struct url *url_parsed = url_parse (*t, &url_err);
++ struct url *url_parsed = url_parse (*t, &url_err, NULL, false);
- if ((opt.recursive || opt.page_requisites)
- && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (*t)))
+ if (!url_parsed)
{
- int old_follow_ftp = opt.follow_ftp;
-
- /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
- if (url_scheme (*t) == SCHEME_FTP)
- opt.follow_ftp = 1;
-
- status = retrieve_tree (*t, NULL);
-
- opt.follow_ftp = old_follow_ftp;
+ char *error = url_error (*t, url_err);
+ logprintf (LOG_NOTQUIET, "%s: %s.\n",*t, error);
+ xfree (error);
+ status = URLERROR;
}
else
{
- struct iri *i = iri_new ();
- set_uri_encoding (i, opt.locale, true);
- status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt,
- opt.recursive, i);
- iri_free (i);
- }
+ if ((opt.recursive || opt.page_requisites)
+ && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (url_parsed)))
+ {
+ int old_follow_ftp = opt.follow_ftp;
- if (opt.delete_after && file_exists_p(filename))
- {
- DEBUGP (("Removing file due to --delete-after in main():\n"));
- logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
- if (unlink (filename))
- logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
- }
+ /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
+ if (url_scheme (*t) == SCHEME_FTP)
+ opt.follow_ftp = 1;
+
- status = retrieve_tree (url_parsed);
++ status = retrieve_tree (url_parsed, NULL);
- xfree_null (redirected_URL);
- xfree_null (filename);
+ opt.follow_ftp = old_follow_ftp;
+ }
+ else
- status = retrieve_url (url_parsed, *t, &filename, &redirected_URL, NULL, &dt, opt.recursive);
++ {
++ struct iri *i = iri_new ();
++ set_uri_encoding (i, opt.locale, true);
++ status = retrieve_url (url_parsed, *t, &filename, &redirected_URL,
++ NULL, &dt, opt.recursive, i);
++ iri_free (i);
++ }
+
+ if (opt.delete_after && file_exists_p(filename))
+ {
+ DEBUGP (("Removing file due to --delete-after in main():\n"));
+ logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
+ if (unlink (filename))
+ logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
+ }
+ xfree_null (redirected_URL);
+ xfree_null (filename);
+ url_free (url_parsed);
+ }
}
/* And then from the input file, if any. */
#include "html-url.h"
#include "css-url.h"
#include "spider.h"
-
+ \f
/* Functions for maintaining the URL queue. */
struct queue_element {
int depth; /* the depth */
bool html_allowed; /* whether the document is allowed to
be treated as HTML. */
+ struct iri *iri; /* sXXXav */
bool css_allowed; /* whether the document is allowed to
be treated as CSS. */
struct queue_element *next; /* next element in queue */
into it. */
static void
- url_enqueue (struct url_queue *queue,
+ url_enqueue (struct url_queue *queue, struct iri *i,
const char *url, const char *referer, int depth,
bool html_allowed, bool css_allowed)
{
struct queue_element *qel = xnew (struct queue_element);
+ qel->iri = i;
qel->url = url;
qel->referer = referer;
qel->depth = depth;
DEBUGP (("Enqueuing %s at depth %d\n", url, depth));
DEBUGP (("Queue count %d, maxcount %d.\n", queue->count, queue->maxcount));
+ if (i)
+ DEBUGP (("[IRI Enqueuing %s with %s\n", quote_n (0, url),
+ i->uri_encoding ? quote_n (1, i->uri_encoding) : "None"));
+
if (queue->tail)
queue->tail->next = qel;
queue->tail = qel;
succeeded, or false if the queue is empty. */
static bool
- url_dequeue (struct url_queue *queue,
+ url_dequeue (struct url_queue *queue, struct iri **i,
const char **url, const char **referer, int *depth,
bool *html_allowed, bool *css_allowed)
{
if (!queue->head)
queue->tail = NULL;
+ *i = qel->iri;
*url = qel->url;
*referer = qel->referer;
*depth = qel->depth;
}
\f
static bool download_child_p (const struct urlpos *, struct url *, int,
- struct url *, struct hash_table *);
+ struct url *, struct hash_table *, struct iri *);
-static bool descend_redirect_p (const char *, const char *, int,
+static bool descend_redirect_p (const char *, struct url *, int,
- struct url *, struct hash_table *);
+ struct url *, struct hash_table *, struct iri *);
/* Retrieve a part of the web beginning with START_URL. This used to
options, add it to the queue. */
uerr_t
- retrieve_tree (struct url *start_url_parsed)
-retrieve_tree (const char *start_url, struct iri *pi)
++retrieve_tree (struct url *start_url_parsed, struct iri *pi)
{
uerr_t status = RETROK;
the queue, but haven't been downloaded yet. */
struct hash_table *blacklist;
- struct url *start_url_parsed;
+ int up_error_code;
- start_url_parsed = url_parse (start_url, &up_error_code, i, true);
- if (!start_url_parsed)
- {
- char *error = url_error (start_url, up_error_code);
- logprintf (LOG_NOTQUIET, "%s: %s.\n", start_url, error);
- xfree (error);
- return URLERROR;
- }
-
+ struct iri *i = iri_new ();
+
+ #define COPYSTR(x) (x) ? xstrdup(x) : NULL;
+ /* Duplicate pi struct if not NULL */
+ if (pi)
+ {
+ i->uri_encoding = COPYSTR (pi->uri_encoding);
+ i->content_encoding = COPYSTR (pi->content_encoding);
+ i->utf8_encode = pi->utf8_encode;
+ }
+ else
+ set_uri_encoding (i, opt.locale, true);
+ #undef COPYSTR
+
queue = url_queue_new ();
blacklist = make_string_hash_table (0);
/* Enqueue the starting URL. Use start_url_parsed->url rather than
just URL so we enqueue the canonical form of the URL. */
- url_enqueue (queue, xstrdup (start_url_parsed->url), NULL, 0, true, false);
+ url_enqueue (queue, i, xstrdup (start_url_parsed->url), NULL, 0, true,
+ false);
string_set_add (blacklist, start_url_parsed->url);
while (1)
/* Get the next URL from the queue... */
- if (!url_dequeue (queue,
+ if (!url_dequeue (queue, (struct iri **) &i,
(const char **)&url, (const char **)&referer,
&depth, &html_allowed, &css_allowed))
break;
}
else
{
- int dt = 0;
+ int dt = 0, url_err;
char *redirected = NULL;
- struct url *url_parsed = url_parse (url, &url_err);
++ struct url *url_parsed = url_parse (url, &url_err, i, false);
- if (!url_parsed)
- {
- char *error = url_error (url, url_err);
- logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
- xfree (error);
- status = URLERROR;
- }
- else
- {
- status = retrieve_url (url_parsed, url, &file, &redirected,
- referer, &dt, false);
- }
- status = retrieve_url (url, &file, &redirected, referer, &dt,
- false, i);
++ status = retrieve_url (url_parsed, url, &file, &redirected, referer,
++ &dt, false, i);
if (html_allowed && file && status == RETROK
&& (dt & RETROKF) && (dt & TEXTHTML))
want to follow it. */
if (descend)
{
- if (!descend_redirect_p (redirected, url, depth,
+ if (!descend_redirect_p (redirected, url_parsed, depth,
- start_url_parsed, blacklist))
+ start_url_parsed, blacklist, i))
descend = false;
else
/* Make sure that the old pre-redirect form gets
xfree (url);
url = redirected;
}
+ url_free(url_parsed);
}
if (opt.spider)
bool meta_disallow_follow = false;
struct urlpos *children
= is_css ? get_urls_css_file (file, url) :
- get_urls_html (file, url, &meta_disallow_follow);
+ get_urls_html (file, url, &meta_disallow_follow, i);
if (opt.use_robots && meta_disallow_follow)
{
if (children)
{
struct urlpos *child = children;
- struct url *url_parsed = url_parsed = url_parse (url, NULL);
+ struct url *url_parsed = url_parse (url, NULL, i, false);
+ struct iri *ci;
char *referer_url = url;
bool strip_auth = (url_parsed != NULL
&& url_parsed->user != NULL);
if (dash_p_leaf_HTML && !child->link_inline_p)
continue;
if (download_child_p (child, url_parsed, depth, start_url_parsed,
- blacklist))
+ blacklist, i))
{
- url_enqueue (queue, xstrdup (child->url->url),
+ ci = iri_new ();
+ set_uri_encoding (ci, i->content_encoding, false);
+ url_enqueue (queue, ci, xstrdup (child->url->url),
xstrdup (referer_url), depth + 1,
child->link_expect_html,
child->link_expect_css);
}
}
- if (file
- && (opt.delete_after
+ if (file
+ && (opt.delete_after
|| opt.spider /* opt.recursive is implicitely true */
|| !acceptable (file)))
{
/* Either --delete-after was specified, or we loaded this
- (otherwise unneeded because of --spider or rejected by -R)
- HTML file just to harvest its hyperlinks -- in either case,
+ (otherwise unneeded because of --spider or rejected by -R)
+ HTML file just to harvest its hyperlinks -- in either case,
delete the local file. */
DEBUGP (("Removing file due to %s in recursive_retrieve():\n",
opt.delete_after ? "--delete-after" :
- (opt.spider ? "--spider" :
+ (opt.spider ? "--spider" :
"recursive rejection criteria")));
logprintf (LOG_VERBOSE,
(opt.delete_after || opt.spider
xfree (url);
xfree_null (referer);
xfree_null (file);
+ iri_free (i);
}
/* If anything is left of the queue due to a premature exit, free it
char *d1, *d2;
int d3;
bool d4, d5;
- while (url_dequeue (queue,
+ struct iri *d6;
+ while (url_dequeue (queue, (struct iri **)&d6,
(const char **)&d1, (const char **)&d2, &d3, &d4, &d5))
{
+ iri_free (d6);
xfree (d1);
xfree_null (d2);
}
}
url_queue_delete (queue);
- if (start_url_parsed)
- url_free (start_url_parsed);
string_set_free (blacklist);
if (opt.quota && total_downloaded_bytes > opt.quota)
static bool
download_child_p (const struct urlpos *upos, struct url *parent, int depth,
- struct url *start_url_parsed, struct hash_table *blacklist)
+ struct url *start_url_parsed, struct hash_table *blacklist,
+ struct iri *iri)
{
struct url *u = upos->url;
const char *url = u->url;
if (string_set_contains (blacklist, url))
{
- if (opt.spider)
+ if (opt.spider)
{
char *referrer = url_string (parent, URL_AUTH_HIDE_PASSWD);
DEBUGP (("download_child_p: parent->url is: %s\n", quote (parent->url)));
if (!specs)
{
char *rfile;
- if (res_retrieve_file (url, &rfile))
+ if (res_retrieve_file (url, &rfile, iri))
{
specs = res_parse_from_file (rfile);
it is merely a simple-minded wrapper around download_child_p. */
static bool
-descend_redirect_p (const char *redirected, const char *original, int depth,
+descend_redirect_p (const char *redirected, struct url *orig_parsed, int depth,
- struct url *start_url_parsed, struct hash_table *blacklist)
+ struct url *start_url_parsed, struct hash_table *blacklist,
+ struct iri *iri)
{
- struct url *orig_parsed, *new_parsed;
+ struct url *new_parsed;
struct urlpos *upos;
bool success;
- orig_parsed = url_parse (original, NULL, NULL, false);
assert (orig_parsed != NULL);
- new_parsed = url_parse (redirected, NULL);
+ new_parsed = url_parse (redirected, NULL, NULL, false);
assert (new_parsed != NULL);
upos = xnew0 (struct urlpos);
upos->url = new_parsed;
success = download_child_p (upos, orig_parsed, depth,
- start_url_parsed, blacklist);
+ start_url_parsed, blacklist, iri);
- url_free (orig_parsed);
url_free (new_parsed);
xfree (upos);
/* Declarations for recur.c.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU Wget.
#ifndef RECUR_H
#define RECUR_H
+#include "url.h"
+
/* For most options, 0 means no limits, but with -p in the picture,
that causes a problem on the maximum recursion depth variable. To
retain backwards compatibility we allow users to consider "0" to be
struct urlpos;
void recursive_cleanup (void);
- uerr_t retrieve_tree (struct url *);
-uerr_t retrieve_tree (const char *, struct iri *);
++uerr_t retrieve_tree (struct url *, struct iri *);
#endif /* RECUR_H */
Return true if robots were retrieved OK, false otherwise. */
bool
- res_retrieve_file (const char *url, char **file)
+ res_retrieve_file (const char *url, char **file, struct iri *iri)
{
+ struct iri *i = iri_new ();
uerr_t err;
char *robots_url = uri_merge (url, RES_SPECS_LOCATION);
int saved_ts_val = opt.timestamping;
- int saved_sp_val = opt.spider;
+ int saved_sp_val = opt.spider, url_err;
+ struct url * url_parsed;
+ /* Copy server URI encoding for a possible IDNA transformation, no need to
+ encode the full URI in UTF-8 because "robots.txt" is plain ASCII */
+ set_uri_encoding (i, iri->uri_encoding, false);
+ i->utf8_encode = false;
+
logputs (LOG_VERBOSE, _("Loading robots.txt; please ignore errors.\n"));
*file = NULL;
opt.timestamping = false;
opt.spider = false;
- err = retrieve_url (robots_url, file, NULL, NULL, NULL, false, i);
+
- url_parsed = url_parse (robots_url, &url_err);
++ url_parsed = url_parse (robots_url, &url_err, iri, true);
+ if (!url_parsed)
+ {
+ char *error = url_error (robots_url, url_err);
+ logprintf (LOG_NOTQUIET, "%s: %s.\n", robots_url, error);
+ xfree (error);
+ err = URLERROR;
+ }
+ else
+ {
+ err = retrieve_url (url_parsed, robots_url, file, NULL, NULL, NULL,
- false);
++ false, i);
+ url_free(url_parsed);
+ }
+
opt.timestamping = saved_ts_val;
- opt.spider = saved_sp_val;
+ opt.spider = saved_sp_val;
xfree (robots_url);
+ iri_free (i);
if (err != RETROK && *file != NULL)
{
/* If we're skipping STARTPOS bytes, pass 0 as the INITIAL
argument to progress_create because the indicator doesn't
(yet) know about "skipping" data. */
- progress = progress_create (skip ? 0 : startpos, startpos + toread);
+ wgint start = skip ? 0 : startpos;
+ progress = progress_create (start, start + toread);
progress_interactive = progress_interactive_p (progress);
}
multiple points. */
uerr_t
-retrieve_url (const char *origurl, char **file, char **newloc,
- const char *refurl, int *dt, bool recursive, struct iri *iri)
+retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
- char **newloc, const char *refurl, int *dt, bool recursive)
++ char **newloc, const char *refurl, int *dt, bool recursive,
++ struct iri *iri)
{
uerr_t result;
char *url;
bool location_changed;
int dummy;
char *mynewloc, *proxy;
- struct url *u, *proxy_url;
+ struct url *u = orig_parsed, *proxy_url;
int up_error_code; /* url parse error code */
char *local_file;
int redirection_count = 0;
if (file)
*file = NULL;
- u = url_parse (url, &up_error_code, iri, true);
- if (!u)
- {
- char *error = url_error (url, up_error_code);
- logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
- xfree (url);
- xfree (error);
- return URLERROR;
- }
-
+ second_try:
+ DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote_n (0, url),
+ iri->uri_encoding ? quote_n (1, iri->uri_encoding) : "None",
+ iri->utf8_encode));
+
if (!refurl)
refurl = opt.referer;
proxy = getproxy (u);
if (proxy)
{
+ struct iri *pi = iri_new ();
+ set_uri_encoding (pi, opt.locale, true);
+ pi->utf8_encode = false;
+
/* Parse the proxy URL. */
- proxy_url = url_parse (proxy, &up_error_code);
+ proxy_url = url_parse (proxy, &up_error_code, NULL, true);
if (!proxy_url)
{
char *error = url_error (proxy, up_error_code);
#endif
|| (proxy_url && proxy_url->scheme == SCHEME_HTTP))
{
- result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
+ result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url, iri);
}
else if (u->scheme == SCHEME_FTP)
{
xfree (mynewloc);
mynewloc = construced_newloc;
+ /* Reset UTF-8 encoding state, keep the URI encoding and reset
+ the content encoding. */
+ iri->utf8_encode = opt.enable_iri;
+ set_content_encoding (iri, NULL);
+ xfree_null (iri->orig_url);
+
/* Now, see if this new location makes sense. */
- newloc_parsed = url_parse (mynewloc, &up_error_code);
+ newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true);
if (!newloc_parsed)
{
char *error = url_error (mynewloc, up_error_code);
logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
error);
- url_free (u);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
xfree (url);
xfree (mynewloc);
xfree (error);
logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
opt.max_redirect);
url_free (newloc_parsed);
- url_free (u);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
xfree (url);
xfree (mynewloc);
RESTORE_POST_DATA;
xfree (url);
url = mynewloc;
- url_free (u);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
u = newloc_parsed;
/* If we're being redirected from POST, we don't want to POST
goto redirected;
}
- if (local_file)
+ /* Try to not encode in UTF-8 if fetching failed */
+ if (!(*dt & RETROKF) && iri->utf8_encode)
+ {
+ iri->utf8_encode = false;
+ DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
+ goto second_try;
+ }
+
+ if (local_file && *dt & RETROKF)
{
+ register_download (u->url, local_file);
+ if (redirection_count && 0 != strcmp (origurl, u->url))
+ register_redirection (origurl, u->url);
+ if (*dt & TEXTHTML)
+ register_html (u->url, local_file);
if (*dt & RETROKF)
{
register_download (u->url, local_file);
else
xfree_null (local_file);
- url_free (u);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
if (redirection_count)
{
{
uerr_t status;
struct urlpos *url_list, *cur_url;
+ struct iri *iri = iri_new();
char *input_file = NULL;
const char *url = file;
status = RETROK; /* Suppose everything is OK. */
*count = 0; /* Reset the URL count. */
-
+
+ /* sXXXav : Assume filename and links in the file are in the locale */
+ set_uri_encoding (iri, opt.locale, true);
+ set_content_encoding (iri, opt.locale);
+
if (url_has_scheme (url))
{
- int dt;
+ int dt,url_err;
uerr_t status;
- struct url * url_parsed = url_parse(url, &url_err);
++ struct url * url_parsed = url_parse(url, &url_err, NULL, true);
+
+ if (!url_parsed)
+ {
+ char *error = url_error (url, url_err);
+ logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
+ xfree (error);
+ return URLERROR;
+ }
if (!opt.base_href)
opt.base_href = xstrdup (url);
- status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt, false);
- status = retrieve_url (url, &input_file, NULL, NULL, &dt, false, iri);
++ status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt,
++ false, iri);
if (status != RETROK)
return status;
if (dt & TEXTHTML)
html = true;
+
+ /* If we have a found a content encoding, use it */
+ if (iri->content_encoding)
+ set_uri_encoding (iri, iri->content_encoding, false);
}
else
input_file = (char *) file;
- url_list = (html ? get_urls_html (input_file, NULL, NULL)
+ url_list = (html ? get_urls_html (input_file, NULL, NULL, iri)
: get_urls_file (input_file));
for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
status = QUOTEXC;
break;
}
+
+ /* Reset UTF-8 encode status */
+ iri->utf8_encode = opt.enable_iri;
+ xfree_null (iri->orig_url);
+ iri->orig_url = NULL;
+
if ((opt.recursive || opt.page_requisites)
&& (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
{
int old_follow_ftp = opt.follow_ftp;
/* Turn opt.follow_ftp on in case of recursive FTP retrieval */
- if (cur_url->url->scheme == SCHEME_FTP)
+ if (cur_url->url->scheme == SCHEME_FTP)
opt.follow_ftp = 1;
-
- status = retrieve_tree (cur_url->url);
+
- status = retrieve_tree (cur_url->url->url, iri);
++ status = retrieve_tree (cur_url->url, iri);
opt.follow_ftp = old_follow_ftp;
}
else
- {
- status = retrieve_url (cur_url->url, cur_url->url->url, &filename,
- &new_file, NULL, &dt, opt.recursive);
- }
- status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL,
- &dt, opt.recursive, iri);
++ status = retrieve_url (cur_url->url, cur_url->url->url, &filename,
++ &new_file, NULL, &dt, opt.recursive, iri);
if (filename && opt.delete_after && file_exists_p (filename))
{
/* Free the linked list of URL-s. */
free_urlpos (url_list);
+ iri_free (iri);
+
return status;
}
/* Returns true if URL would be downloaded through a proxy. */
bool
-url_uses_proxy (const char *url)
+url_uses_proxy (struct url * u)
{
bool ret;
- struct url *u;
- struct iri *i = iri_new();
- /* url was given in the command line, so use locale as encoding */
- set_uri_encoding (i, opt.locale, true);
- u= url_parse (url, NULL, i, false);
if (!u)
return false;
ret = getproxy (u) != NULL;
- url_free (u);
return ret;
}
else
return sufmatch (no_proxy, host);
}
+
+/* Set the file parameter to point to the local file string. */
+void
+set_local_file (const char **file, const char *default_file)
+{
+ if (opt.output_document)
+ {
+ if (output_stream_regular)
+ *file = opt.output_document;
+ }
+ else
+ *file = default_file;
+}
#ifndef RETR_H
#define RETR_H
+#include "url.h"
+
/* These global vars should be made static to retr.c and exported via
functions! */
extern SUM_SIZE_INT total_downloaded_bytes;
char *fd_read_hunk (int, hunk_terminator_t, long, long);
char *fd_read_line (int);
- uerr_t retrieve_url (struct url *, const char *, char **, char **, const char *, int *, bool);
-uerr_t retrieve_url (const char *, char **, char **, const char *, int *,
- bool, struct iri *);
++uerr_t retrieve_url (struct url *, const char *, char **, char **,
++ const char *, int *, bool, struct iri *);
uerr_t retrieve_from_file (const char *, bool, int *);
const char *retr_rate (wgint, double);
void rotate_backups (const char *);
-bool url_uses_proxy (const char *);
+bool url_uses_proxy (struct url *);
#endif /* RETR_H */
error, and if ERROR is not NULL, also set *ERROR to the appropriate
error code. */
struct url *
- url_parse (const char *url, int *error)
+ url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
{
struct url *u;
const char *p;
int port;
char *user = NULL, *passwd = NULL;
- char *url_encoded = NULL;
- char *url_encoded = NULL, *new_url = NULL;
++ const char *url_encoded = NULL;
++ char *new_url = NULL;
int error_code;
goto error;
}
- url_encoded = reencode_escapes (url);
+ if (iri && iri->utf8_encode)
+ {
+ iri->utf8_encode = remote_to_utf8 (iri, iri->orig_url ? iri->orig_url : url, (const char **) &new_url);
+ if (!iri->utf8_encode)
+ new_url = NULL;
+ else
+ iri->orig_url = xstrdup (url);
+ }
+
+ /* XXX XXX Could that change introduce (security) bugs ??? XXX XXX*/
+ if (percent_encode)
+ url_encoded = reencode_escapes (new_url ? new_url : url);
+ else
+ url_encoded = new_url ? new_url : url;
+
p = url_encoded;
+ if (new_url && url_encoded != new_url)
+ xfree (new_url);
+
p += strlen (supported_schemes[scheme].leading_string);
uname_b = p;
p = url_skip_credentials (p);
{
url_unescape (u->host);
host_modified = true;
+
+ /* Apply IDNA regardless of iri->utf8_encode status */
+ if (opt.enable_iri && iri)
+ {
+ char *new = idn_encode (iri, u->host);
+ if (new)
+ {
+ xfree (u->host);
+ u->host = new;
+ host_modified = true;
+ }
+ }
}
if (params_b)
if (fragment_b)
u->fragment = strdupdelim (fragment_b, fragment_e);
- if (path_modified || u->fragment || host_modified || path_b == path_e)
+ if (opt.enable_iri || path_modified || u->fragment || host_modified || path_b == path_e)
{
/* If we suspect that a transformation has rendered what
url_string might return different from URL_ENCODED, rebuild
if (url_encoded == url)
u->url = xstrdup (url);
else
-- u->url = url_encoded;
++ u->url = (char *) url_encoded;
}
return u;
error:
/* Cleanup in case of error: */
if (url_encoded && url_encoded != url)
-- xfree (url_encoded);
++ xfree ((char *) url_encoded);
/* Transmit the error code to the caller, if the caller wants to
know. */
\f
static int
getchar_from_escaped_string (const char *str, char *c)
- {
+ {
const char *p = str;
assert (str && *str);
assert (c);
-
+
if (p[0] == '%')
{
if (!c_isxdigit(p[1]) || !c_isxdigit(p[2]))
p += pp;
q += qq;
}
-
+
return (*p == 0 && *q == 0 ? true : false);
}
\f
} test_array[] = {
{ "http://www.yoyodyne.com/path/", "somepage.html", false, "http://www.yoyodyne.com/path/somepage.html" },
};
-
+
for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
{
struct growable dest;
+ 2008-12-04 Micah Cowan <micah@cowan.name> (not copyrightable)
+
+ * run-px, Test-idn-robots.px: Added test for robots-file
+ downloads.
+
+ * Test-idn-cmd.px, Test-idn-meta.px, Test-idn-headers.px:
+ Fix test names.
+
+ 2008-11-26 Micah Cowan <micah@cowan.name> (not copyrightable)
+
+ * Test-ftp-iri-disabled.px, Test-ftp-iri-fallback.px,
+ Test-ftp-iri.px, Test-idn-cmd.px, Test-idn-headers.px,
+ Test-idn-meta.px, Test-iri-disabled.px,
+ Test-iri-forced-remote.px, Test-iri-list.px, Test-iri.px: More
+ module-scope warnings.
+
+2009-06-14 Micah Cowan <micah@cowan.name>
+
+ * Makefile.am (EXTRA_DIST): Include all the tests, run-px, and
+ certs/, to make distcheck happy.
+
+2009-06-11 Benjamin Wolsey <bwy@benjaminwolsey.de>
+
+ * Test-proxied-https-auth.px: Take an optional argument for the
+ top source directory, so we can find the cert and key.
+
+ * run-px: Provide the top source directory as an argument, so
+ scripts can find their way around.
+
+2009-04-11 Steven Schubiger <stsc@member.fsf.org>
+
+ * run-px: Skip testing with real rc files by setting
+ SYSTEM_WGETRC and WGETRC to /dev/null.
+
+2009-02-25 Benjamin Wolsey <bwy@benjaminwolsey.de>
+
+ * Makefile.am (run-px-tests): Ensure run-px is run from srcdir.
+
+ * run-px: Include modules from srcdir.
+
2008-11-25 Steven Schubiger <stsc@members.fsf.org>
* WgetTest.pm.in: Remove the magic interpreter line;
* run-px: Use strict (thanks Steven Schubiger!).
+ 2008-09-09 Micah Cowan <micah@cowan.name>
+
+ * Test-idn-cmd.px: Added.
+
+ * run-px: Added Test-idn-cmd.px.
+
+ 2008-08-28 Micah Cowan <micah@cowan.name>
+
+ * HTTPServer.pm (run): Allow distinguishing between hostnames,
+ when used as a proxy.
+
+ * Test-idn-headers.px, Test-idn-meta.px: Added.
+
+ * run-px: Added Test-idn-headers.px, Test-idn-meta.px.
+
+ * Test-proxy-auth-basic.px: Use the full URL, rather than just the
+ path (made necessary by the accompanying change to HTTPServer.pm).
+
+ 2008-08-14 Xavier Saint <wget@sxav.eu>
+
+ * Test-iri-list.px : Fetch files from a remote list.
+
+ 2008-08-03 Xavier Saint <wget@sxav.eu>
+
+ * Test-iri.px : HTTP recursive fetch for testing IRI support and
+ fallback.
+
+ * Test-iri-disabled.px : Same file structure as Test-iri.px but with
+ IRI support disabled
+
+ * Test-iri-forced-remote.px : There's a difference between ISO-8859-1
+ and ISO-8859-15 for character 0xA4 (respectively currency sign and
+ euro sign). So with a forced ISO-8859-1 remote encoding, wget should
+ see 0xA4 as a currency sign and transcode it correctly in UTF-8 instead
+ of using the ISO-8859-15 given by the server.
+
+ * Test-ftp-iri.px : Give a file to fetch via FTP in a specific locale
+ and expect wget to fetch the file UTF-8 encoded.
+
+ * Test-ftp-iri-fallback.px : Same as above but wget should fallback on
+ locale encoding to fetch the file.
+
+ * Test-ftp-iri.px : Same as Test-ftp-iri.px but with IRI support
+ disabled. The UTF-8 encoded file should not be retrieved.
+
2008-06-22 Micah Cowan <micah@cowan.name>
* Test-proxied-https-auth.px: Shift exit code so it falls in the
'Test-E-k-K.px',
'Test-E-k.px',
'Test-ftp.px',
+ 'Test-ftp-iri.px',
+ 'Test-ftp-iri-fallback.px',
+ 'Test-ftp-iri-disabled.px',
'Test-HTTP-Content-Disposition-1.px',
'Test-HTTP-Content-Disposition-2.px',
'Test-HTTP-Content-Disposition.px',
+ 'Test-idn-headers.px',
+ 'Test-idn-meta.px',
+ 'Test-idn-cmd.px',
+ 'Test-idn-robots.px',
+ 'Test-iri.px',
+ 'Test-iri-disabled.px',
+ 'Test-iri-forced-remote.px',
+ 'Test-iri-list.px',
'Test-N-current.px',
'Test-N-smaller.px',
'Test-N-no-info.px',
'Test--spider-r.px',
);
+foreach my $var (qw(SYSTEM_WGETRC WGETRC)) {
+ $ENV{$var} = '/dev/null';
+}
+
my @tested;
foreach my $test (@tests) {
print "Running $test\n\n";
- system("$^X $top_srcdir/tests/$test");
+ system("$^X -I$top_srcdir/tests $top_srcdir/tests/$test $top_srcdir");
push @tested, { name => $test, result => $? };
}
+foreach my $var (qw(SYSTEM_WGETRC WGETRC)) {
+ delete $ENV{$var};
+}
+
print "\n";
foreach my $test (@tested) {
($test->{result} == 0)