From: Micah Cowan Date: Thu, 25 Jun 2009 08:14:11 +0000 (-0700) Subject: Merge with mainline. X-Git-Tag: v1.13~338 X-Git-Url: http://sjero.net/git/?p=wget;a=commitdiff_plain;h=4f3dd6817348433eafde04a3c2946f43364de7ef;hp=5d0073b8f290dee2e9bad3e83230f6b57dd06beb Merge with mainline. --- diff --git a/ChangeLog b/ChangeLog index a891c52e..659415aa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +2009-06-14 Micah Cowan + + * po/Makefile.in.in (distclean): remove en_US.po, too. + + * Makefile.am: Include md5 as a subdir unconditionally. + It may result in useless compilation, and additional risk of + breaking a build of something that isn't actually needed, but + otherwise it's too much of a hassle to manage a failure-free + distcheck. + +2009-06-12 Micah Cowan + + * configure.ac: Check for h_errno declaration. Idea thanks to + Maciej W. Rozycki. + +2009-03-03 Steven Schubiger + + * src/ftp.c, src/http.c, src/main.c, src/recur.h, + tests/Makefile.am: Update the copyright years. + +2009-01-23 Steven Schubiger + + * util/freeopts, util/rmold.pl, util/trunc.c: Remove + unnecessary whitespace. + 2008-11-10 Micah Cowan * MAILING-LIST: Mention Gmane, introduce subsections. diff --git a/Makefile.am b/Makefile.am index 4bff177a..d5b415c7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -34,10 +34,10 @@ ACLOCAL_AMFLAGS = -I m4 -I md5/m4 # subdirectories in the distribution -SUBDIRS = lib @MD5_SUBDIR@ src doc po tests util windows +SUBDIRS = lib md5 src doc po tests util windows EXTRA_DIST = ChangeLog.README configure.bat MAILING-LIST \ - msdos/ChangeLog msdos/config.h msdos/Makefile.DJ \ - msdos/Makefile.WC ABOUT-NLS autogen.sh + msdos/ChangeLog msdos/config.h msdos/Makefile.DJ \ + msdos/Makefile.WC ABOUT-NLS autogen.sh CLEANFILES = *~ *.bak $(DISTNAME).tar.gz diff --git a/configure.ac b/configure.ac index fb0c65d1..dcb302fa 100644 --- a/configure.ac +++ b/configure.ac @@ -163,6 +163,8 @@ AC_CHECK_HEADERS(unistd.h sys/time.h) AC_CHECK_HEADERS(termios.h sys/ioctl.h sys/select.h utime.h sys/utime.h) AC_CHECK_HEADERS(stdint.h inttypes.h pwd.h wchar.h) +AC_CHECK_DECLS(h_errno,,,[#include ]) + dnl dnl Check sizes of integer types. These are used to find n-bit dnl integral types on older systems that fail to provide intN_t and diff --git a/doc/ChangeLog b/doc/ChangeLog index dc1d4084..898e3c6e 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,31 @@ +2009-06-20 Micah Cowan + + * wget.texi (Contributors): Added Jay Krell. + +2009-06-14 Micah Cowan + + * Makefile.am (wget.pod): $(srcdir)/version.texi -> version.texi + +2009-06-12 Micah Cowan + + * wget.texi (Download Options): More accuracy on what happens when + -nd is used with -r or -p. + +2009-06-11 Micah Cowan + + * wget.texi (Contributors): Added Xin Zou, Benjamin Wolsley, and + Robert Millan. + +2009-06-11 Joao Ferreira + + * wget.texi (Option Syntax): Fixed contradictory and confusing + explanation of --folow-ftp and negation. + +2009-06-10 Micah Cowan + + * sample.wgetrc: Add "https_proxy" to the proxy examples. Thanks + to Martin Paul for the suggestion. + 2008-11-15 Steven Schubiger * sample.wgetrc: Comment the waitretry "default" value, diff --git a/doc/Makefile.am b/doc/Makefile.am index 74abe7f6..46c77d9e 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -52,7 +52,7 @@ EXTRA_DIST = sample.wgetrc \ $(SAMPLERCTEXI) \ texi2pod.pl -wget.pod: $(srcdir)/wget.texi $(srcdir)/version.texi +wget.pod: $(srcdir)/wget.texi version.texi $(TEXI2POD) $(srcdir)/wget.texi $@ $(MAN): wget.pod diff --git a/doc/sample.wgetrc b/doc/sample.wgetrc index 12914aea..1ce90dea 100644 --- a/doc/sample.wgetrc +++ b/doc/sample.wgetrc @@ -73,8 +73,9 @@ # is *not* sent by default. #header = Accept-Language: en -# You can set the default proxies for Wget to use for http and ftp. +# You can set the default proxies for Wget to use for http, https, and ftp. # They will override the value in the environment. +#https_proxy = http://proxy.yoyodyne.com:18023/ #http_proxy = http://proxy.yoyodyne.com:18023/ #ftp_proxy = http://proxy.yoyodyne.com:18023/ diff --git a/doc/wget.texi b/doc/wget.texi index a2804fb4..252548f8 100644 --- a/doc/wget.texi +++ b/doc/wget.texi @@ -396,8 +396,8 @@ the option name; negative options can be negated by omitting the @samp{--no-} prefix. This might seem superfluous---if the default for an affirmative option is to not do something, then why provide a way to explicitly turn it off? But the startup file may in fact change -the default. For instance, using @code{follow_ftp = off} in -@file{.wgetrc} makes Wget @emph{not} follow FTP links by default, and +the default. For instance, using @code{follow_ftp = on} in +@file{.wgetrc} makes Wget @emph{follow} FTP links by default, and using @samp{--no-follow-ftp} is the only way to restore the factory default from the command line. @@ -582,23 +582,24 @@ behavior depends on a few options, including @samp{-nc}. In certain cases, the local file will be @dfn{clobbered}, or overwritten, upon repeated download. In other cases it will be preserved. -When running Wget without @samp{-N}, @samp{-nc}, @samp{-r}, or @samp{p}, -downloading the same file in the same directory will result in the -original copy of @var{file} being preserved and the second copy being -named @samp{@var{file}.1}. If that file is downloaded yet again, the -third copy will be named @samp{@var{file}.2}, and so on. When -@samp{-nc} is specified, this behavior is suppressed, and Wget will -refuse to download newer copies of @samp{@var{file}}. Therefore, -``@code{no-clobber}'' is actually a misnomer in this mode---it's not -clobbering that's prevented (as the numeric suffixes were already -preventing clobbering), but rather the multiple version saving that's -prevented. - -When running Wget with @samp{-r} or @samp{-p}, but without @samp{-N} -or @samp{-nc}, re-downloading a file will result in the new copy -simply overwriting the old. Adding @samp{-nc} will prevent this -behavior, instead causing the original version to be preserved and any -newer copies on the server to be ignored. +When running Wget without @samp{-N}, @samp{-nc}, @samp{-r}, or +@samp{-p}, downloading the same file in the same directory will result +in the original copy of @var{file} being preserved and the second copy +being named @samp{@var{file}.1}. If that file is downloaded yet +again, the third copy will be named @samp{@var{file}.2}, and so on. +(This is also the behavior with @samp{-nd}, even if @samp{-r} or +@samp{-p} are in effect.) When @samp{-nc} is specified, this behavior +is suppressed, and Wget will refuse to download newer copies of +@samp{@var{file}}. Therefore, ``@code{no-clobber}'' is actually a +misnomer in this mode---it's not clobbering that's prevented (as the +numeric suffixes were already preventing clobbering), but rather the +multiple version saving that's prevented. + +When running Wget with @samp{-r} or @samp{-p}, but without @samp{-N}, +@samp{-nd}, or @samp{-nc}, re-downloading a file will result in the +new copy simply overwriting the old. Adding @samp{-nc} will prevent +this behavior, instead causing the original version to be preserved +and any newer copies on the server to be ignored. When running Wget with @samp{-N}, with or without @samp{-r} or @samp{-p}, the decision as to whether or not to download a newer copy @@ -3973,6 +3974,7 @@ Fila Kolodny, Alexander Kourakos, Martin Kraemer, Sami Krank, +Jay Krell, @tex $\Sigma\acute{\iota}\mu o\varsigma\; \Xi\varepsilon\nu\iota\tau\acute{\epsilon}\lambda\lambda\eta\varsigma$ @@ -4003,6 +4005,7 @@ Aurelien Marchand, Matthew J.@: Mellon, Jordan Mendelson, Ted Mielczarek, +Robert Millan, Lin Zhe Min, Jan Minar, Tim Mooney, @@ -4078,6 +4081,7 @@ Charles G Waldman, Douglas E.@: Wegscheid, Ralf Wildenhues, Joshua David Williams, +Benjamin Wolsey, YAMAZAKI Makoto, Jasmin Zainul, @iftex @@ -4086,7 +4090,8 @@ Bojan @v{Z}drnja, @ifnottex Bojan Zdrnja, @end ifnottex -Kristijan Zimmer. +Kristijan Zimmer, +Xin Zou. Apologies to all who I accidentally left out, and many thanks to all the subscribers of the Wget mailing list. diff --git a/po/Makefile.in.in b/po/Makefile.in.in index e6da10aa..27cbb1bf 100644 --- a/po/Makefile.in.in +++ b/po/Makefile.in.in @@ -312,7 +312,7 @@ mostlyclean: clean: mostlyclean distclean: clean - rm -f Makefile Makefile.in POTFILES *.mo + rm -f Makefile Makefile.in POTFILES *.mo en_US.po maintainer-clean: distclean @echo "This command is intended for maintainers to use;" diff --git a/src/ChangeLog b/src/ChangeLog index 2d3331f1..bd833ea0 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,8 +1,144 @@ +2009-06-20 Jay Krell + + * sysdep.h (_ALL_SOURCE): (small change) Define the _ALL_SOURCE + macro on INTERIX systems. (I switched the location from ftp.c to + sysdep.h --mjc) + +2009-06-15 Micah Cowan + + * ftp.c (getftp): If we can't accept the connection, return + CONERROR, not whatever the contents of err happens to be. Fixes + bug #25015. + + * retr.c (fd_read_body): Make both args to progress_create + consistent, resulting in an accurate progress display. Fixes bug + #24948. + +2009-06-14 Micah Cowan + + * Makefile.am (wget_SOURCES): css-tokens.h needs to ship with + dist, too. + +2009-06-13 Micah Cowan + + * init.c: Rename setval_internal_wrapper to setval_internal_tilde, + ensure we don't "replace" the tilde unless it's actually + present. Clean up some minor GNU style issues. + +2009-06-13 Julien Pichon + + * init.c: Handle tilde-expansion in wgetrc commands, without + resorting to setting/unsetting globals to change behavior in one + call location. + +2009-06-12 Micah Cowan + + * host.c: Include before . Not + required by POSIX any more, but some older systems (such as + FreeBSD 4.1) still need it, and it doesn't seem like it could + hurt... + + * build_info.c (library): Handle "https" as a feature in its own + right, apart from "gnutls" and "openssl". + + * host.c: Declare h_errno if no declaration is provided. Idea + thanks to Maciej W. Rozycki. + +2009-06-11 Xin Zou + + * http.c (gethttp): Fix some memory leaks. + +2009-06-11 Micah Cowan + + * http.c (http_atotm): Handle potential for setlocale's return + value to be static storage. Thanks to Benjamin Wolsey + . + + * sysdep.h: Need NAMESPACE_TWEAKS on non-Linux glibc-based + systems, too. Thanks to Robert Millan. + +2009-05-28 Steven Schubiger + + * ftp.c (ftp_get_listing): Update the "listing file" + string after calling ftp_loop_internal(). + +2009-05-27 Steven Schubiger + + * ftp.c (ftp_get_listing): Duplicate the "listing file" + string to avoid memory corruption when FOPEN_EXCL_ERR is + encountered. + +2009-05-17 Steven Schubiger + + * progress.c (eta_to_human_short): Fix the remaining hours + to be displayed. Spotted by Tadeu Martins (#26411). + +2009-04-24 Micah Cowan + + * hash.c: Change stdint.h inclusion to use HAVE_STDINT_H, not C99 + check. + + * connect.c: stdint.h inclusion added. + + Thanks to Markus Duft for a similar patch. + +2009-04-20 Micah Cowan + + * Makefile.am (version.c): Fix unportable use of "echo -n". + +2009-04-13 Steven Schubiger + + * ftp.c (ftp_retrieve_list): Move the duplicated code that + determines the local file to a function. + + * http.c (http_loop): Likewise. + + * retr.c (set_local_file): New function. + +2009-04-11 Steven Schubiger + + * init.c (initialize): Run a custom SYSTEM_WGETRC when + provided as an environment variable. + +2009-02-27 Gisle Vanem + + * main.c (main): "freopen (NULL,.." causes an assertion in MSVC + debug-mode. I.e. NULL isn't legal. But the "CONOUT$" device works + fine. + +2009-02-27 Steven Schubiger + + * ftp.c (ftp_loop_internal): Don't claim for FTP retrievals + when writing to standard output either that the document + has been saved. Addresses bug #20520 again. + +2009-02-21 Steven Schubiger + + * http.c (http_loop): When a document is written to + standard output, don't claim it has been saved to a file. + Addresses bug #20520. + +2009-02-18 Steven Schubiger + + * recur.h: Remove the dangling declaration for recursive_cleanup(). + +2009-02-01 Gerardo E. Gidoni + + * main.c, recur.c, recur.h, res.c, retr.c, retr.h: restructured code to + avoid multiple 'url_parse' calls. + 2008-11-13 Micah Cowan * http.c (gethttp): Don't do anything when content-length >= our requested range. +2008-11-27 Saint Xavier + + * http.c (gethttp): Move authentication code before filename + allocation avoiding fallbacking on default filename because + "Content-Disposition" header wasn't present before authentcation + has been completed. Fixes bug #24862. + 2008-11-16 Steven Schubiger * main.c: Declare and initialize the numurls counter. diff --git a/src/Makefile.am b/src/Makefile.am index ab830ba0..58e9b545 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -40,7 +40,7 @@ LIBS = @LIBSSL@ @LIBGNUTLS@ @LIBINTL@ @LIBS@ bin_PROGRAMS = wget wget_SOURCES = build_info.c cmpt.c connect.c convert.c cookies.c ftp.c \ - css.l css-url.c \ + css.l css-url.c css-tokens.h \ ftp-basic.c ftp-ls.c hash.c host.c html-parse.c html-url.c \ http.c init.c log.c main.c netrc.c progress.c ptimer.c \ recur.c res.c retr.c snprintf.c spider.c url.c \ @@ -66,7 +66,7 @@ version.c: $(wget_SOURCES) $(LDADD) $(srcdir)/Makefile.am echo '/* version.c */' > $@ echo '/* Autogenerated by Makefile - DO NOT EDIT */' >> $@ echo '' >> $@ - echo -n 'const char *version_string = "@VERSION@"' >> $@ + echo 'const char *version_string = "@VERSION@"' >> $@ -hg log -r . --template='" ({node|short})"\n' 2>/dev/null >> $@ echo ';' >> $@ echo 'const char *compilation_string = "'$(COMPILE)'";' \ diff --git a/src/build_info.c b/src/build_info.c index 532dccaf..89ae74f8 100644 --- a/src/build_info.c +++ b/src/build_info.c @@ -80,6 +80,12 @@ const char* (compiled_features[]) = "-md5", #endif +#ifdef HAVE_SSL + "+https", +#else + "-https", +#endif + #ifdef HAVE_LIBGNUTLS "+gnutls", #else diff --git a/src/connect.c b/src/connect.c index 41258d26..0a54c852 100644 --- a/src/connect.c +++ b/src/connect.c @@ -59,6 +59,11 @@ as that of the covered work. */ #include "connect.h" #include "hash.h" +/* Apparently needed for Interix: */ +#ifdef HAVE_STDINT_H +# include +#endif + /* Define sockaddr_storage where unavailable (presumably on IPv4-only hosts). */ diff --git a/src/ftp.c b/src/ftp.c index e4b90189..8e05a796 100644 --- a/src/ftp.c +++ b/src/ftp.c @@ -1,6 +1,6 @@ /* File Transfer Protocol support. Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -945,7 +945,7 @@ Error in server response, closing control connection.\n")); if (dtsock < 0) { logprintf (LOG_NOTQUIET, "accept: %s\n", strerror (errno)); - return err; + return CONERROR; } } @@ -1275,8 +1275,17 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con) con->csock = -1; } if (!opt.spider) - logprintf (LOG_VERBOSE, _("%s (%s) - %s saved [%s]\n\n"), - tms, tmrate, quote (locf), number_to_static_string (len)); + { + bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document)); + + logprintf (LOG_VERBOSE, + write_to_stdout + ? _("%s (%s) - written to stdout %s[%s]\n\n") + : _("%s (%s) - %s saved [%s]\n\n"), + tms, tmrate, + write_to_stdout ? "" : quote (locf), + number_to_static_string (len)); + } if (!opt.verbose && !opt.quiet) { /* Need to hide the password from the URL. The `if' is here @@ -1361,8 +1370,11 @@ ftp_get_listing (struct url *u, ccon *con, struct fileinfo **f) xfree (uf); DEBUGP ((_("Using %s as listing tmp file.\n"), quote (lf))); - con->target = lf; + con->target = xstrdup (lf); + xfree (lf); err = ftp_loop_internal (u, NULL, con); + lf = xstrdup (con->target); + xfree (con->target); con->target = old_target; if (err == RETROK) @@ -1581,16 +1593,8 @@ Already have correct symlink %s -> %s\n\n"), && dlthis && file_exists_p (con->target)) { - /* #### This code repeats in http.c and ftp.c. Move it to a - function! */ const char *fl = NULL; - if (opt.output_document) - { - if (output_stream_regular) - fl = opt.output_document; - } - else - fl = con->target; + set_local_file (&fl, con->target); if (fl) touch (fl, f->tstamp); } diff --git a/src/hash.c b/src/hash.c index 1e19fd63..80922d0f 100644 --- a/src/hash.c +++ b/src/hash.c @@ -55,8 +55,8 @@ as that of the covered work. */ # endif # include # define c_tolower(x) tolower ((unsigned char) (x)) -# if __STDC_VERSION__ >= 199901L -# include /* for uintptr_t */ +# ifdef HAVE_STDINT_H +# include # else typedef unsigned long uintptr_t; # endif diff --git a/src/host.c b/src/host.c index bbf40222..b9aaebb4 100644 --- a/src/host.c +++ b/src/host.c @@ -36,6 +36,7 @@ as that of the covered work. */ #include #ifndef WINDOWS +# include # include # include # ifndef __BEOS__ @@ -58,6 +59,11 @@ as that of the covered work. */ # define NO_ADDRESS NO_DATA #endif +#if !HAVE_DECL_H_ERRNO +extern int h_errno; +#endif + + /* Lists of IP addresses that result from running DNS queries. See lookup_host for details. */ diff --git a/src/http.c b/src/http.c index 9ed226cb..ae89c46d 100644 --- a/src/http.c +++ b/src/http.c @@ -1,6 +1,6 @@ /* HTTP support. Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -1816,6 +1816,101 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, print_server_response (resp, " "); } + /* Check for keep-alive related responses. */ + if (!inhibit_keep_alive && contlen != -1) + { + if (resp_header_copy (resp, "Keep-Alive", NULL, 0)) + keep_alive = true; + else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval))) + { + if (0 == strcasecmp (hdrval, "Keep-Alive")) + keep_alive = true; + } + } + + if (keep_alive) + /* The server has promised that it will not close the connection + when we're done. This means that we can register it. */ + register_persistent (conn->host, conn->port, sock, using_ssl); + + if (statcode == HTTP_STATUS_UNAUTHORIZED) + { + /* Authorization is required. */ + if (keep_alive && !head_only && skip_short_body (sock, contlen)) + CLOSE_FINISH (sock); + else + CLOSE_INVALIDATE (sock); + pconn.authorized = false; + if (!auth_finished && (user && passwd)) + { + /* IIS sends multiple copies of WWW-Authenticate, one with + the value "negotiate", and other(s) with data. Loop over + all the occurrences and pick the one we recognize. */ + int wapos; + const char *wabeg, *waend; + char *www_authenticate = NULL; + for (wapos = 0; + (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos, + &wabeg, &waend)) != -1; + ++wapos) + if (known_authentication_scheme_p (wabeg, waend)) + { + BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate); + break; + } + + if (!www_authenticate) + { + /* If the authentication header is missing or + unrecognized, there's no sense in retrying. */ + logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n")); + } + else if (!basic_auth_finished + || !BEGINS_WITH (www_authenticate, "Basic")) + { + char *pth; + pth = url_full_path (u); + request_set_header (req, "Authorization", + create_authorization_line (www_authenticate, + user, passwd, + request_method (req), + pth, + &auth_finished), + rel_value); + if (BEGINS_WITH (www_authenticate, "NTLM")) + ntlm_seen = true; + else if (!u->user && BEGINS_WITH (www_authenticate, "Basic")) + { + /* Need to register this host as using basic auth, + * so we automatically send creds next time. */ + register_basic_auth_host (u->host); + } + xfree (pth); + xfree_null (message); + resp_free (resp); + xfree (head); + goto retry_with_auth; + } + else + { + /* We already did Basic auth, and it failed. Gotta + * give up. */ + } + } + logputs (LOG_NOTQUIET, _("Authorization failed.\n")); + request_free (req); + xfree_null (message); + resp_free (resp); + xfree (head); + return AUTHFAILED; + } + else /* statcode != HTTP_STATUS_UNAUTHORIZED */ + { + /* Kludge: if NTLM is used, mark the TCP connection as authorized. */ + if (ntlm_seen) + pconn.authorized = true; + } + /* Determine the local filename if needed. Notice that if -O is used * hstat.local_file is set by http_loop to the argument of -O. */ if (!hs->local_file) @@ -1850,6 +1945,8 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); if (has_html_suffix_p (hs->local_file)) *dt |= TEXTHTML; + xfree (head); + xfree_null (message); return RETRUNNEEDED; } else if (!ALLOW_CLOBBER) @@ -1948,93 +2045,6 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); contlen = parsed; } - /* Check for keep-alive related responses. */ - if (!inhibit_keep_alive && contlen != -1) - { - if (resp_header_copy (resp, "Keep-Alive", NULL, 0)) - keep_alive = true; - else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval))) - { - if (0 == strcasecmp (hdrval, "Keep-Alive")) - keep_alive = true; - } - } - if (keep_alive) - /* The server has promised that it will not close the connection - when we're done. This means that we can register it. */ - register_persistent (conn->host, conn->port, sock, using_ssl); - - if (statcode == HTTP_STATUS_UNAUTHORIZED) - { - /* Authorization is required. */ - if (keep_alive && !head_only && skip_short_body (sock, contlen)) - CLOSE_FINISH (sock); - else - CLOSE_INVALIDATE (sock); - pconn.authorized = false; - if (!auth_finished && (user && passwd)) - { - /* IIS sends multiple copies of WWW-Authenticate, one with - the value "negotiate", and other(s) with data. Loop over - all the occurrences and pick the one we recognize. */ - int wapos; - const char *wabeg, *waend; - char *www_authenticate = NULL; - for (wapos = 0; - (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos, - &wabeg, &waend)) != -1; - ++wapos) - if (known_authentication_scheme_p (wabeg, waend)) - { - BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate); - break; - } - - if (!www_authenticate) - { - /* If the authentication header is missing or - unrecognized, there's no sense in retrying. */ - logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n")); - } - else if (!basic_auth_finished - || !BEGINS_WITH (www_authenticate, "Basic")) - { - char *pth; - pth = url_full_path (u); - request_set_header (req, "Authorization", - create_authorization_line (www_authenticate, - user, passwd, - request_method (req), - pth, - &auth_finished), - rel_value); - if (BEGINS_WITH (www_authenticate, "NTLM")) - ntlm_seen = true; - else if (!u->user && BEGINS_WITH (www_authenticate, "Basic")) - { - /* Need to register this host as using basic auth, - * so we automatically send creds next time. */ - register_basic_auth_host (u->host); - } - xfree (pth); - goto retry_with_auth; - } - else - { - /* We already did Basic auth, and it failed. Gotta - * give up. */ - } - } - logputs (LOG_NOTQUIET, _("Authorization failed.\n")); - request_free (req); - return AUTHFAILED; - } - else /* statcode != HTTP_STATUS_UNAUTHORIZED */ - { - /* Kludge: if NTLM is used, mark the TCP connection as authorized. */ - if (ntlm_seen) - pconn.authorized = true; - } request_free (req); hs->statcode = statcode; @@ -2126,6 +2136,7 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); else CLOSE_INVALIDATE (sock); xfree_null (type); + xfree (head); return NEWLOCATION; } } @@ -2181,6 +2192,7 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); xfree_null (type); CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there might be more bytes in the body. */ + xfree (head); return RETRUNNEEDED; } if ((contrange != 0 && contrange != hs->restval) @@ -2190,6 +2202,7 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); Bail out. */ xfree_null (type); CLOSE_INVALIDATE (sock); + xfree (head); return RANGEERR; } if (contlen == -1) @@ -2253,6 +2266,7 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); CLOSE_FINISH (sock); else CLOSE_INVALIDATE (sock); + xfree (head); return RETRFINISHED; } @@ -2279,6 +2293,7 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); _("%s has sprung into existence.\n"), hs->local_file); CLOSE_INVALIDATE (sock); + xfree (head); return FOPEN_EXCL_ERR; } } @@ -2286,6 +2301,7 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file)); { logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno)); CLOSE_INVALIDATE (sock); + xfree (head); return FOPENERR; } } @@ -2357,6 +2373,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer, struct http_stat hstat; /* HTTP status */ struct_stat st; bool send_head_first = true; + char *file_name; /* Assert that no value for *LOCAL_FILE was passed. */ assert (local_file == NULL || *local_file == NULL); @@ -2429,11 +2446,13 @@ File %s already there; not retrieving.\n\n"), /* Send preliminary HEAD request if -N is given and we have an existing * destination file. */ + file_name = url_file_name (u); if (opt.timestamping && !opt.content_disposition - && file_exists_p (url_file_name (u))) + && file_exists_p (file_name)) send_head_first = true; - + xfree (file_name); + /* THE loop */ do { @@ -2735,16 +2754,8 @@ Remote file exists.\n\n")); && ((hstat.len == hstat.contlen) || ((hstat.res == 0) && (hstat.contlen == -1)))) { - /* #### This code repeats in http.c and ftp.c. Move it to a - function! */ const char *fl = NULL; - if (opt.output_document) - { - if (output_stream_regular) - fl = opt.output_document; - } - else - fl = hstat.local_file; + set_local_file (&fl, hstat.local_file); if (fl) { time_t newtmr = -1; @@ -2768,9 +2779,14 @@ Remote file exists.\n\n")); { if (*dt & RETROKF) { + bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document)); + logprintf (LOG_VERBOSE, - _("%s (%s) - %s saved [%s/%s]\n\n"), - tms, tmrate, quote (hstat.local_file), + write_to_stdout + ? _("%s (%s) - written to stdout %s[%s/%s]\n\n") + : _("%s (%s) - %s saved [%s/%s]\n\n"), + tms, tmrate, + write_to_stdout ? "" : quote (hstat.local_file), number_to_static_string (hstat.len), number_to_static_string (hstat.contlen)); logprintf (LOG_NONVERBOSE, @@ -2799,9 +2815,14 @@ Remote file exists.\n\n")); { if (*dt & RETROKF) { + bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document)); + logprintf (LOG_VERBOSE, - _("%s (%s) - %s saved [%s]\n\n"), - tms, tmrate, quote (hstat.local_file), + write_to_stdout + ? _("%s (%s) - written to stdout %s[%s]\n\n") + : _("%s (%s) - %s saved [%s]\n\n"), + tms, tmrate, + write_to_stdout ? "" : quote (hstat.local_file), number_to_static_string (hstat.len)); logprintf (LOG_NONVERBOSE, "%s URL:%s [%s] -> \"%s\" [%d]\n", @@ -2945,6 +2966,7 @@ http_atotm (const char *time_string) Netscape cookie specification.) */ }; const char *oldlocale; + char savedlocale[256]; size_t i; time_t ret = (time_t) -1; @@ -2952,6 +2974,16 @@ http_atotm (const char *time_string) non-English locales, which we work around by temporarily setting locale to C before invoking strptime. */ oldlocale = setlocale (LC_TIME, NULL); + if (oldlocale) + { + size_t l = strlen (oldlocale); + if (l >= sizeof savedlocale) + savedlocale[0] = '\0'; + else + memcpy (savedlocale, oldlocale, l); + } + else savedlocale[0] = '\0'; + setlocale (LC_TIME, "C"); for (i = 0; i < countof (time_formats); i++) @@ -2971,7 +3003,8 @@ http_atotm (const char *time_string) } /* Restore the previous locale. */ - setlocale (LC_TIME, oldlocale); + if (savedlocale[0]) + setlocale (LC_TIME, savedlocale); return ret; } diff --git a/src/init.c b/src/init.c index 5ab0862c..23f8cb2c 100644 --- a/src/init.c +++ b/src/init.c @@ -1,6 +1,6 @@ /* Reading/parsing the initialization file. Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -58,11 +58,6 @@ as that of the covered work. */ #include "test.h" #endif -/* We want tilde expansion enabled only when reading `.wgetrc' lines; - otherwise, it will be performed by the shell. This variable will - be set by the wgetrc-reading function. */ - -static bool enable_tilde_expansion; #define CMD_DECLARE(func) static bool func (const char *, const char *, void *) @@ -484,6 +479,7 @@ enum parse_line { static enum parse_line parse_line (const char *, char **, char **, int *); static bool setval_internal (int, const char *, const char *); +static bool setval_internal_tilde (int, const char *, const char *); /* Initialize variables from a wgetrc file. Returns zero (failure) if there were errors in the file. */ @@ -503,7 +499,6 @@ run_wgetrc (const char *file) file, strerror (errno)); return true; /* not a fatal error */ } - enable_tilde_expansion = true; ln = 1; while ((line = read_whole_line (fp)) != NULL) { @@ -515,7 +510,7 @@ run_wgetrc (const char *file) { case line_ok: /* If everything is OK, set the value. */ - if (!setval_internal (comind, com, val)) + if (!setval_internal_tilde (comind, com, val)) { fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name, file, ln); @@ -542,7 +537,6 @@ run_wgetrc (const char *file) xfree (line); ++ln; } - enable_tilde_expansion = false; fclose (fp); return errcnt == 0; @@ -553,15 +547,20 @@ run_wgetrc (const char *file) void initialize (void) { - char *file; + char *file, *env_sysrc; int ok = true; /* Load the hard-coded defaults. */ defaults (); - - /* If SYSTEM_WGETRC is defined, use it. */ + + /* Run a non-standard system rc file when the according environment + variable has been set. For internal testing purposes only! */ + env_sysrc = getenv ("SYSTEM_WGETRC"); + if (env_sysrc && file_exists_p (env_sysrc)) + ok &= run_wgetrc (env_sysrc); + /* Otherwise, if SYSTEM_WGETRC is defined, use it. */ #ifdef SYSTEM_WGETRC - if (file_exists_p (SYSTEM_WGETRC)) + else if (file_exists_p (SYSTEM_WGETRC)) ok &= run_wgetrc (SYSTEM_WGETRC); #endif /* Override it with your own, if one exists. */ @@ -674,6 +673,12 @@ parse_line (const char *line, char **com, char **val, int *comind) return line_ok; } +#if defined(WINDOWS) || defined(MSDOS) +# define ISSEP(c) ((c) == '/' || (c) == '\\') +#else +# define ISSEP(c) ((c) == '/') +#endif + /* Run commands[comind].action. */ static bool @@ -684,6 +689,37 @@ setval_internal (int comind, const char *com, const char *val) return commands[comind].action (com, val, commands[comind].place); } +static bool +setval_internal_tilde (int comind, const char *com, const char *val) +{ + bool ret; + int homelen; + char *home; + char **pstring; + ret = setval_internal (comind, com, val); + + /* We make tilde expansion for cmd_file and cmd_directory */ + if (((commands[comind].action == cmd_file) || + (commands[comind].action == cmd_directory)) + && ret && (*val == '~' && ISSEP (val[1]))) + { + pstring = commands[comind].place; + home = home_dir (); + if (home) + { + homelen = strlen (home); + while (homelen && ISSEP (home[homelen - 1])) + home[--homelen] = '\0'; + + /* Skip the leading "~/". */ + for (++val; ISSEP (*val); val++) + ; + *pstring = concat_strings (home, "/", val, (char *)0); + } + } + return ret; +} + /* Run command COM with value VAL. If running the command produces an error, report the error and exit. @@ -819,11 +855,6 @@ cmd_string (const char *com, const char *val, void *place) return true; } -#if defined(WINDOWS) || defined(MSDOS) -# define ISSEP(c) ((c) == '/' || (c) == '\\') -#else -# define ISSEP(c) ((c) == '/') -#endif /* Like the above, but handles tilde-expansion when reading a user's `.wgetrc'. In that case, and if VAL begins with `~', the tilde @@ -837,28 +868,7 @@ cmd_file (const char *com, const char *val, void *place) /* #### If VAL is empty, perhaps should set *PLACE to NULL. */ - if (!enable_tilde_expansion || !(*val == '~' && ISSEP (val[1]))) - { - noexpand: - *pstring = xstrdup (val); - } - else - { - int homelen; - char *home = home_dir (); - if (!home) - goto noexpand; - - homelen = strlen (home); - while (homelen && ISSEP (home[homelen - 1])) - home[--homelen] = '\0'; - - /* Skip the leading "~/". */ - for (++val; ISSEP (*val); val++) - ; - - *pstring = concat_strings (home, "/", val, (char *) 0); - } + *pstring = xstrdup (val); #if defined(WINDOWS) || defined(MSDOS) /* Convert "\" to "/". */ diff --git a/src/main.c b/src/main.c index a2d40888..69df08a7 100644 --- a/src/main.c +++ b/src/main.c @@ -1,6 +1,6 @@ /* Command line parsing. Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -1148,7 +1148,7 @@ for details.\n\n")); { #ifdef WINDOWS FILE *result; - result = freopen (NULL, "wb", stdout); + result = freopen ("CONOUT$", "wb", stdout); if (result == NULL) { logputs (LOG_NOTQUIET, _("\ @@ -1202,40 +1202,51 @@ WARNING: Can't reopen standard output in binary mode;\n\ for (t = url; *t; t++) { char *filename = NULL, *redirected_URL = NULL; - int dt; + int dt, url_err; + struct url *url_parsed = url_parse (*t, &url_err, NULL, false); - if ((opt.recursive || opt.page_requisites) - && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (*t))) + if (!url_parsed) { - int old_follow_ftp = opt.follow_ftp; - - /* Turn opt.follow_ftp on in case of recursive FTP retrieval */ - if (url_scheme (*t) == SCHEME_FTP) - opt.follow_ftp = 1; - - status = retrieve_tree (*t, NULL); - - opt.follow_ftp = old_follow_ftp; + char *error = url_error (*t, url_err); + logprintf (LOG_NOTQUIET, "%s: %s.\n",*t, error); + xfree (error); + status = URLERROR; } else { - struct iri *i = iri_new (); - set_uri_encoding (i, opt.locale, true); - status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt, - opt.recursive, i); - iri_free (i); - } + if ((opt.recursive || opt.page_requisites) + && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (url_parsed))) + { + int old_follow_ftp = opt.follow_ftp; - if (opt.delete_after && file_exists_p(filename)) - { - DEBUGP (("Removing file due to --delete-after in main():\n")); - logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename); - if (unlink (filename)) - logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno)); - } + /* Turn opt.follow_ftp on in case of recursive FTP retrieval */ + if (url_scheme (*t) == SCHEME_FTP) + opt.follow_ftp = 1; + + status = retrieve_tree (url_parsed, NULL); - xfree_null (redirected_URL); - xfree_null (filename); + opt.follow_ftp = old_follow_ftp; + } + else + { + struct iri *i = iri_new (); + set_uri_encoding (i, opt.locale, true); + status = retrieve_url (url_parsed, *t, &filename, &redirected_URL, + NULL, &dt, opt.recursive, i); + iri_free (i); + } + + if (opt.delete_after && file_exists_p(filename)) + { + DEBUGP (("Removing file due to --delete-after in main():\n")); + logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename); + if (unlink (filename)) + logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno)); + } + xfree_null (redirected_URL); + xfree_null (filename); + url_free (url_parsed); + } } /* And then from the input file, if any. */ diff --git a/src/progress.c b/src/progress.c index 2f1c2225..9b9d1e49 100644 --- a/src/progress.c +++ b/src/progress.c @@ -1157,7 +1157,7 @@ eta_to_human_short (int secs, bool condensed) else if (secs < 48 * 3600) sprintf (buf, "%dh%s%dm", secs / 3600, space, (secs / 60) % 60); else if (secs < 100 * 86400) - sprintf (buf, "%dd%s%dh", secs / 86400, space, (secs / 3600) % 60); + sprintf (buf, "%dd%s%dh", secs / 86400, space, (secs / 3600) % 24); else /* even (2^31-1)/86400 doesn't overflow BUF. */ sprintf (buf, "%dd", secs / 86400); diff --git a/src/recur.c b/src/recur.c index 95581486..83a9b4ee 100644 --- a/src/recur.c +++ b/src/recur.c @@ -161,7 +161,7 @@ url_dequeue (struct url_queue *queue, struct iri **i, static bool download_child_p (const struct urlpos *, struct url *, int, struct url *, struct hash_table *, struct iri *); -static bool descend_redirect_p (const char *, const char *, int, +static bool descend_redirect_p (const char *, struct url *, int, struct url *, struct hash_table *, struct iri *); @@ -187,7 +187,7 @@ static bool descend_redirect_p (const char *, const char *, int, options, add it to the queue. */ uerr_t -retrieve_tree (const char *start_url, struct iri *pi) +retrieve_tree (struct url *start_url_parsed, struct iri *pi) { uerr_t status = RETROK; @@ -199,7 +199,6 @@ retrieve_tree (const char *start_url, struct iri *pi) struct hash_table *blacklist; int up_error_code; - struct url *start_url_parsed; struct iri *i = iri_new (); #define COPYSTR(x) (x) ? xstrdup(x) : NULL; @@ -214,15 +213,6 @@ retrieve_tree (const char *start_url, struct iri *pi) set_uri_encoding (i, opt.locale, true); #undef COPYSTR - start_url_parsed = url_parse (start_url, &up_error_code, i, true); - if (!start_url_parsed) - { - char *error = url_error (start_url, up_error_code); - logprintf (LOG_NOTQUIET, "%s: %s.\n", start_url, error); - xfree (error); - return URLERROR; - } - queue = url_queue_new (); blacklist = make_string_hash_table (0); @@ -286,11 +276,12 @@ retrieve_tree (const char *start_url, struct iri *pi) } else { - int dt = 0; + int dt = 0, url_err; char *redirected = NULL; + struct url *url_parsed = url_parse (url, &url_err, i, false); - status = retrieve_url (url, &file, &redirected, referer, &dt, - false, i); + status = retrieve_url (url_parsed, url, &file, &redirected, referer, + &dt, false, i); if (html_allowed && file && status == RETROK && (dt & RETROKF) && (dt & TEXTHTML)) @@ -317,7 +308,7 @@ retrieve_tree (const char *start_url, struct iri *pi) want to follow it. */ if (descend) { - if (!descend_redirect_p (redirected, url, depth, + if (!descend_redirect_p (redirected, url_parsed, depth, start_url_parsed, blacklist, i)) descend = false; else @@ -329,6 +320,7 @@ retrieve_tree (const char *start_url, struct iri *pi) xfree (url); url = redirected; } + url_free(url_parsed); } if (opt.spider) @@ -468,8 +460,6 @@ retrieve_tree (const char *start_url, struct iri *pi) } url_queue_delete (queue); - if (start_url_parsed) - url_free (start_url_parsed); string_set_free (blacklist); if (opt.quota && total_downloaded_bytes > opt.quota) @@ -686,15 +676,14 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth, it is merely a simple-minded wrapper around download_child_p. */ static bool -descend_redirect_p (const char *redirected, const char *original, int depth, +descend_redirect_p (const char *redirected, struct url *orig_parsed, int depth, struct url *start_url_parsed, struct hash_table *blacklist, struct iri *iri) { - struct url *orig_parsed, *new_parsed; + struct url *new_parsed; struct urlpos *upos; bool success; - orig_parsed = url_parse (original, NULL, NULL, false); assert (orig_parsed != NULL); new_parsed = url_parse (redirected, NULL, NULL, false); @@ -706,7 +695,6 @@ descend_redirect_p (const char *redirected, const char *original, int depth, success = download_child_p (upos, orig_parsed, depth, start_url_parsed, blacklist, iri); - url_free (orig_parsed); url_free (new_parsed); xfree (upos); diff --git a/src/recur.h b/src/recur.h index 515a382b..76c0ef5f 100644 --- a/src/recur.h +++ b/src/recur.h @@ -1,6 +1,6 @@ /* Declarations for recur.c. Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -31,6 +31,8 @@ as that of the covered work. */ #ifndef RECUR_H #define RECUR_H +#include "url.h" + /* For most options, 0 means no limits, but with -p in the picture, that causes a problem on the maximum recursion depth variable. To retain backwards compatibility we allow users to consider "0" to be @@ -42,6 +44,6 @@ as that of the covered work. */ struct urlpos; void recursive_cleanup (void); -uerr_t retrieve_tree (const char *, struct iri *); +uerr_t retrieve_tree (struct url *, struct iri *); #endif /* RECUR_H */ diff --git a/src/res.c b/src/res.c index 0320d034..4b0ff82b 100644 --- a/src/res.c +++ b/src/res.c @@ -538,7 +538,8 @@ res_retrieve_file (const char *url, char **file, struct iri *iri) uerr_t err; char *robots_url = uri_merge (url, RES_SPECS_LOCATION); int saved_ts_val = opt.timestamping; - int saved_sp_val = opt.spider; + int saved_sp_val = opt.spider, url_err; + struct url * url_parsed; /* Copy server URI encoding for a possible IDNA transformation, no need to encode the full URI in UTF-8 because "robots.txt" is plain ASCII */ @@ -549,7 +550,22 @@ res_retrieve_file (const char *url, char **file, struct iri *iri) *file = NULL; opt.timestamping = false; opt.spider = false; - err = retrieve_url (robots_url, file, NULL, NULL, NULL, false, i); + + url_parsed = url_parse (robots_url, &url_err, iri, true); + if (!url_parsed) + { + char *error = url_error (robots_url, url_err); + logprintf (LOG_NOTQUIET, "%s: %s.\n", robots_url, error); + xfree (error); + err = URLERROR; + } + else + { + err = retrieve_url (url_parsed, robots_url, file, NULL, NULL, NULL, + false, i); + url_free(url_parsed); + } + opt.timestamping = saved_ts_val; opt.spider = saved_sp_val; xfree (robots_url); diff --git a/src/retr.c b/src/retr.c index 1d9d7478..0fd936d0 100644 --- a/src/retr.c +++ b/src/retr.c @@ -226,7 +226,8 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, /* If we're skipping STARTPOS bytes, pass 0 as the INITIAL argument to progress_create because the indicator doesn't (yet) know about "skipping" data. */ - progress = progress_create (skip ? 0 : startpos, startpos + toread); + wgint start = skip ? 0 : startpos; + progress = progress_create (start, start + toread); progress_interactive = progress_interactive_p (progress); } @@ -596,15 +597,16 @@ static char *getproxy (struct url *); multiple points. */ uerr_t -retrieve_url (const char *origurl, char **file, char **newloc, - const char *refurl, int *dt, bool recursive, struct iri *iri) +retrieve_url (struct url * orig_parsed, const char *origurl, char **file, + char **newloc, const char *refurl, int *dt, bool recursive, + struct iri *iri) { uerr_t result; char *url; bool location_changed; int dummy; char *mynewloc, *proxy; - struct url *u, *proxy_url; + struct url *u = orig_parsed, *proxy_url; int up_error_code; /* url parse error code */ char *local_file; int redirection_count = 0; @@ -626,16 +628,6 @@ retrieve_url (const char *origurl, char **file, char **newloc, *file = NULL; second_try: - u = url_parse (url, &up_error_code, iri, true); - if (!u) - { - char *error = url_error (url, up_error_code); - logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error); - xfree (url); - xfree (error); - return URLERROR; - } - DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote_n (0, url), iri->uri_encoding ? quote_n (1, iri->uri_encoding) : "None", iri->utf8_encode)); @@ -748,7 +740,10 @@ retrieve_url (const char *origurl, char **file, char **newloc, char *error = url_error (mynewloc, up_error_code); logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc), error); - url_free (u); + if (orig_parsed != u) + { + url_free (u); + } xfree (url); xfree (mynewloc); xfree (error); @@ -768,7 +763,10 @@ retrieve_url (const char *origurl, char **file, char **newloc, logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"), opt.max_redirect); url_free (newloc_parsed); - url_free (u); + if (orig_parsed != u) + { + url_free (u); + } xfree (url); xfree (mynewloc); RESTORE_POST_DATA; @@ -777,7 +775,10 @@ retrieve_url (const char *origurl, char **file, char **newloc, xfree (url); url = mynewloc; - url_free (u); + if (orig_parsed != u) + { + url_free (u); + } u = newloc_parsed; /* If we're being redirected from POST, we don't want to POST @@ -823,7 +824,10 @@ retrieve_url (const char *origurl, char **file, char **newloc, else xfree_null (local_file); - url_free (u); + if (orig_parsed != u) + { + url_free (u); + } if (redirection_count) { @@ -869,13 +873,23 @@ retrieve_from_file (const char *file, bool html, int *count) if (url_has_scheme (url)) { - int dt; + int dt,url_err; uerr_t status; + struct url * url_parsed = url_parse(url, &url_err, NULL, true); + + if (!url_parsed) + { + char *error = url_error (url, url_err); + logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error); + xfree (error); + return URLERROR; + } if (!opt.base_href) opt.base_href = xstrdup (url); - status = retrieve_url (url, &input_file, NULL, NULL, &dt, false, iri); + status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt, + false, iri); if (status != RETROK) return status; @@ -920,13 +934,13 @@ retrieve_from_file (const char *file, bool html, int *count) if (cur_url->url->scheme == SCHEME_FTP) opt.follow_ftp = 1; - status = retrieve_tree (cur_url->url->url, iri); + status = retrieve_tree (cur_url->url, iri); opt.follow_ftp = old_follow_ftp; } else - status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, - &dt, opt.recursive, iri); + status = retrieve_url (cur_url->url, cur_url->url->url, &filename, + &new_file, NULL, &dt, opt.recursive, iri); if (filename && opt.delete_after && file_exists_p (filename)) { @@ -1096,18 +1110,12 @@ getproxy (struct url *u) /* Returns true if URL would be downloaded through a proxy. */ bool -url_uses_proxy (const char *url) +url_uses_proxy (struct url * u) { bool ret; - struct url *u; - struct iri *i = iri_new(); - /* url was given in the command line, so use locale as encoding */ - set_uri_encoding (i, opt.locale, true); - u= url_parse (url, NULL, i, false); if (!u) return false; ret = getproxy (u) != NULL; - url_free (u); return ret; } @@ -1120,3 +1128,16 @@ no_proxy_match (const char *host, const char **no_proxy) else return sufmatch (no_proxy, host); } + +/* Set the file parameter to point to the local file string. */ +void +set_local_file (const char **file, const char *default_file) +{ + if (opt.output_document) + { + if (output_stream_regular) + *file = opt.output_document; + } + else + *file = default_file; +} diff --git a/src/retr.h b/src/retr.h index bb2e66d3..8854b684 100644 --- a/src/retr.h +++ b/src/retr.h @@ -31,6 +31,8 @@ as that of the covered work. */ #ifndef RETR_H #define RETR_H +#include "url.h" + /* These global vars should be made static to retr.c and exported via functions! */ extern SUM_SIZE_INT total_downloaded_bytes; @@ -51,8 +53,8 @@ typedef const char *(*hunk_terminator_t) (const char *, const char *, int); char *fd_read_hunk (int, hunk_terminator_t, long, long); char *fd_read_line (int); -uerr_t retrieve_url (const char *, char **, char **, const char *, int *, - bool, struct iri *); +uerr_t retrieve_url (struct url *, const char *, char **, char **, + const char *, int *, bool, struct iri *); uerr_t retrieve_from_file (const char *, bool, int *); const char *retr_rate (wgint, double); @@ -63,6 +65,6 @@ void sleep_between_retrievals (int); void rotate_backups (const char *); -bool url_uses_proxy (const char *); +bool url_uses_proxy (struct url *); #endif /* RETR_H */ diff --git a/src/sysdep.h b/src/sysdep.h index 84301b78..0377029e 100644 --- a/src/sysdep.h +++ b/src/sysdep.h @@ -41,6 +41,10 @@ as that of the covered work. */ # endif #endif +#if defined(__INTERIX) && !defined(_ALL_SOURCE) +# define _ALL_SOURCE +#endif + /* The "namespace tweaks" below attempt to set a friendly "compilation environment" under popular operating systems. Default compilation environment often means that some functions that are "extensions" @@ -57,7 +61,7 @@ as that of the covered work. */ # define NAMESPACE_TWEAKS #endif -#ifdef __linux__ +#if defined(__linux__) || defined(__GLIBC__) # define NAMESPACE_TWEAKS #endif diff --git a/src/url.c b/src/url.c index 86d099a7..4c22a9fc 100644 --- a/src/url.c +++ b/src/url.c @@ -668,7 +668,8 @@ url_parse (const char *url, int *error, struct iri *iri, bool percent_encode) int port; char *user = NULL, *passwd = NULL; - char *url_encoded = NULL, *new_url = NULL; + const char *url_encoded = NULL; + char *new_url = NULL; int error_code; @@ -904,7 +905,7 @@ url_parse (const char *url, int *error, struct iri *iri, bool percent_encode) if (url_encoded == url) u->url = xstrdup (url); else - u->url = url_encoded; + u->url = (char *) url_encoded; } return u; @@ -912,7 +913,7 @@ url_parse (const char *url, int *error, struct iri *iri, bool percent_encode) error: /* Cleanup in case of error: */ if (url_encoded && url_encoded != url) - xfree (url_encoded); + xfree ((char *) url_encoded); /* Transmit the error code to the caller, if the caller wants to know. */ diff --git a/tests/ChangeLog b/tests/ChangeLog index d9ba6531..3dfc60a3 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -14,6 +14,30 @@ Test-iri-forced-remote.px, Test-iri-list.px, Test-iri.px: More module-scope warnings. +2009-06-14 Micah Cowan + + * Makefile.am (EXTRA_DIST): Include all the tests, run-px, and + certs/, to make distcheck happy. + +2009-06-11 Benjamin Wolsey + + * Test-proxied-https-auth.px: Take an optional argument for the + top source directory, so we can find the cert and key. + + * run-px: Provide the top source directory as an argument, so + scripts can find their way around. + +2009-04-11 Steven Schubiger + + * run-px: Skip testing with real rc files by setting + SYSTEM_WGETRC and WGETRC to /dev/null. + +2009-02-25 Benjamin Wolsey + + * Makefile.am (run-px-tests): Ensure run-px is run from srcdir. + + * run-px: Include modules from srcdir. + 2008-11-25 Steven Schubiger * WgetTest.pm.in: Remove the magic interpreter line; diff --git a/tests/Makefile.am b/tests/Makefile.am index 6e703f01..82c31360 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,6 +1,6 @@ # Makefile for `wget' utility # Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, -# 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +# 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -55,19 +55,52 @@ run-unit-tests: unit-tests$(EXEEXT) ./unit-tests$(EXEEXT) run-px-tests: WgetTest.pm ../src/wget$(EXEEXT) - ./run-px $(top_srcdir) + $(srcdir)/run-px $(top_srcdir) EXTRA_DIST = FTPServer.pm FTPTest.pm HTTPServer.pm HTTPTest.pm \ - Test-auth-basic.px Test-c-full.px Test-c-partial.px \ - Test-c.px Test-E-k-K.px Test-E-k.px Test-ftp.px \ - Test-HTTP-Content-Disposition-1.px \ - Test-HTTP-Content-Disposition-2.px \ - Test-HTTP-Content-Disposition.px \ - Test-N-current.px Test-N-old.px Test-nonexisting-quiet.px \ - Test-noop.px Test-np.px Test-N.px \ - Test-O-nonexisting.px Test-O.px \ - Test-Restrict-Lowercase.px Test-Restrict-Uppercase.px \ - Test--spider-fail.px Test--spider.px Test--spider-r.px + Test-auth-basic.px \ + Test-auth-no-challenge.px \ + Test-auth-no-challenge-url.px \ + Test-c-full.px \ + Test-c-partial.px \ + Test-c.px \ + Test-c-shorter.px \ + Test-E-k-K.px \ + Test-E-k.px \ + Test-ftp.px \ + Test-HTTP-Content-Disposition-1.px \ + Test-HTTP-Content-Disposition-2.px \ + Test-HTTP-Content-Disposition.px \ + Test-N-current.px \ + Test-N-HTTP-Content-Disposition.px \ + Test-N--no-content-disposition.px \ + Test-N--no-content-disposition-trivial.px \ + Test-N-no-info.px \ + Test--no-content-disposition.px \ + Test--no-content-disposition-trivial.px \ + Test-N-old.px \ + Test-nonexisting-quiet.px \ + Test-noop.px \ + Test-np.px \ + Test-N.px \ + Test-N-smaller.px \ + Test-O-HTTP-Content-Disposition.px \ + Test-O-nc.px \ + Test-O--no-content-disposition.px \ + Test-O--no-content-disposition-trivial.px \ + Test-O-nonexisting.px \ + Test-O.px \ + Test-proxied-https-auth.px \ + Test-proxy-auth-basic.px \ + Test-Restrict-Lowercase.px \ + Test-Restrict-Uppercase.px \ + Test--spider-fail.px \ + Test--spider.px \ + Test--spider-r-HTTP-Content-Disposition.px \ + Test--spider-r--no-content-disposition.px \ + Test--spider-r--no-content-disposition-trivial.px \ + Test--spider-r.px \ + run-px certs check_PROGRAMS = unit-tests unit_tests_SOURCES = diff --git a/tests/Test-proxied-https-auth.px b/tests/Test-proxied-https-auth.px index 4e3fb206..2b37f32b 100755 --- a/tests/Test-proxied-https-auth.px +++ b/tests/Test-proxied-https-auth.px @@ -5,6 +5,15 @@ use warnings; use WgetTest; # For $WGETPATH. +my $cert_path; +my $key_path; + +if (@ARGV) { + my $top_srcdir = shift @ARGV; + $key_path = "$top_srcdir/tests/certs/server-key.pem"; + $cert_path = "$top_srcdir/tests/certs/server-cert.pem"; +} + # Have we even built an HTTPS-supporting Wget? { my @version_lines = `${WgetTest::WGETPATH} --version`; @@ -55,8 +64,16 @@ sub do_server { $rspn = HTTP::Response->new(200, 'OK'); $conn->send_response($rspn); - $conn = IO::Socket::SSL->new_from_fd($conn->fileno, SSL_server => 1, - SSL_passwd_cb => sub { return "Hello"; }) + my %options = ( + SSL_server => 1, + SSL_passwd_cb => sub { return "Hello"; }); + + $options{SSL_cert_file} = $cert_path if ($cert_path); + $options{SSL_key_file} = $key_path if ($key_path); + + my @options = %options; + + $conn = IO::Socket::SSL->new_from_fd($conn->fileno, @options) or die "Couldn't initiate SSL"; $rqst = &get_request($conn) diff --git a/tests/run-px b/tests/run-px index 01d84995..3b5449bd 100755 --- a/tests/run-px +++ b/tests/run-px @@ -66,14 +66,22 @@ my @tests = ( 'Test--spider-r.px', ); +foreach my $var (qw(SYSTEM_WGETRC WGETRC)) { + $ENV{$var} = '/dev/null'; +} + my @tested; foreach my $test (@tests) { print "Running $test\n\n"; - system("$^X $top_srcdir/tests/$test"); + system("$^X -I$top_srcdir/tests $top_srcdir/tests/$test $top_srcdir"); push @tested, { name => $test, result => $? }; } +foreach my $var (qw(SYSTEM_WGETRC WGETRC)) { + delete $ENV{$var}; +} + print "\n"; foreach my $test (@tested) { ($test->{result} == 0) diff --git a/util/freeopts b/util/freeopts index 75f594a1..ce43a02d 100755 --- a/util/freeopts +++ b/util/freeopts @@ -28,7 +28,7 @@ elsif ( END { my $cols = 0; my $max_cols = 13; - my $opt_chars = + my $opt_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; print "Free chars:\n\t"; for (my $i = 0; $i < length $opt_chars; ++$i, ++$cols) { diff --git a/util/rmold.pl b/util/rmold.pl index f08deac1..c227e2c0 100755 --- a/util/rmold.pl +++ b/util/rmold.pl @@ -45,7 +45,7 @@ sub procdir local(@lcfiles, @lcdirs, %files, @fl); print STDERR "Processing directory '$dir':\n" if $verbose; - + opendir(DH, $dir) || die("Cannot open $dir: $!\n"); @lcfiles = (); @lcdirs = (); diff --git a/util/trunc.c b/util/trunc.c index fb52ec25..55cb19d3 100644 --- a/util/trunc.c +++ b/util/trunc.c @@ -3,7 +3,7 @@ * * Copyright (C) 2008 Micah J. Cowan * - * Copying and distribution of this file, with or without modification, + * Copying and distribution of this file, with or without modification, * are permitted in any medium without royalty provided the copyright * notice and this notice are preserved. */ @@ -62,7 +62,7 @@ get_size (const char str[]) { val *= 1024 * 1024; } - + return val; }