Merging Ted Mielczarek's CSS changes with tip.

author Micah Cowan <micah@cowan.name>

Tue, 22 Apr 2008 08:28:15 +0000 (01:28 -0700)

committer Micah Cowan <micah@cowan.name>

Tue, 22 Apr 2008 08:28:15 +0000 (01:28 -0700)
author Micah Cowan <micah@cowan.name>
Tue, 22 Apr 2008 08:28:15 +0000 (01:28 -0700)
committer Micah Cowan <micah@cowan.name>
Tue, 22 Apr 2008 08:28:15 +0000 (01:28 -0700)
diff --cc configure.ac

index a49de3cd073fec3e3b9a917f259b27be2e3595d4,0000000000000000000000000000000000000000..cf201aeafe46a34e4601244507993605e08fc736

mode 100644,000000..100644
--- 1/configure.ac
--- /dev/null
+++ b/configure.ac
@@@ -1,469 -1,0 +1,471 @@@
+ +dnl Template file for GNU Autoconf
+ +dnl Copyright (C) 1995, 1996, 1997, 2001, 2007,
+ +dnl 2008 Free Software Foundation, Inc.
+ +
+ +dnl This program is free software; you can redistribute it and/or modify
+ +dnl it under the terms of the GNU General Public License as published by
+ +dnl the Free Software Foundation; either version 3 of the License, or
+ +dnl (at your option) any later version.
+ +
+ +dnl This program is distributed in the hope that it will be useful,
+ +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+ +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ +dnl GNU General Public License for more details.
+ +
+ +dnl You should have received a copy of the GNU General Public License
+ +dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ +
+ +dnl Additional permission under GNU GPL version 3 section 7
+ +
+ +dnl If you modify this program, or any covered work, by linking or
+ +dnl combining it with the OpenSSL project's OpenSSL library (or a
+ +dnl modified version of that library), containing parts covered by the
+ +dnl terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
+ +dnl grants you additional permission to convey the resulting work.
+ +dnl Corresponding Source for a non-source form of such a combination
+ +dnl shall include the source code for the parts of OpenSSL used as well
+ +dnl as that of the covered work.
+ +
+ +dnl
+ +dnl Process this file with autoconf to produce a configure script.
+ +dnl
+ +
+ +AC_INIT([wget], 
+ +      [1.12-devel],
+ +      [bug-wget@gnu.org])
+ +AC_PREREQ(2.61)
+ +
+ +dnl
+ +dnl What version of Wget are we building?
+ +dnl
+ +AC_MSG_NOTICE([configuring for GNU Wget $PACKAGE_VERSION])
+ +
+ +AC_CONFIG_MACRO_DIR([m4])
+ +AC_CONFIG_AUX_DIR([.])
+ +
+ +dnl
+ +dnl Automake setup
+ +dnl
+ +AM_INIT_AUTOMAKE(1.9)
+ +
+ +dnl
+ +dnl Gettext
+ +dnl
+ +AM_GNU_GETTEXT([external],[need-ngettext])
+ +AM_GNU_GETTEXT_VERSION([0.16.1])
+ +
+ +dnl
+ +dnl Get cannonical host
+ +dnl
+ +AC_CANONICAL_HOST
+ +AC_DEFINE_UNQUOTED([OS_TYPE], "$host_os",
+ +                   [Define to be the name of the operating system.])
+ +
+ +dnl
+ +dnl Process features.
+ +dnl
+ +
+ +AC_ARG_WITH(ssl,
+ +[[  --without-ssl           disable SSL autodetection]])
+ +
+ +AC_ARG_ENABLE(opie,
+ +[  --disable-opie          disable support for opie or s/key FTP login],
+ +ENABLE_OPIE=$enableval, ENABLE_OPIE=yes)
+ +test x"${ENABLE_OPIE}" = xyes && AC_DEFINE([ENABLE_OPIE], 1,
+ +   [Define if you want the Opie support for FTP compiled in.])
+ +
+ +AC_ARG_ENABLE(digest,
+ +[  --disable-digest        disable support for HTTP digest authorization],
+ +ENABLE_DIGEST=$enableval, ENABLE_DIGEST=yes)
+ +test x"${ENABLE_DIGEST}" = xyes && AC_DEFINE([ENABLE_DIGEST], 1,
+ +   [Define if you want the HTTP Digest Authorization compiled in.])
+ +
+ +AC_ARG_ENABLE(ntlm,
+ +[  --disable-ntlm          disable support for NTLM authorization],
+ +[ENABLE_NTLM=$enableval], [ENABLE_NTLM=auto])
+ +
+ +AC_ARG_ENABLE(debug,
+ +[  --disable-debug         disable support for debugging output],
+ +ENABLE_DEBUG=$enableval, ENABLE_DEBUG=yes)
+ +test x"${ENABLE_DEBUG}" = xyes && AC_DEFINE([ENABLE_DEBUG], 1,
+ +   [Define if you want the debug output support compiled in.])
+ +
+ +wget_need_md5=no
+ +
+ +case "${ENABLE_OPIE}${ENABLE_DIGEST}" in
+ +*yes*)
+ +      wget_need_md5=yes
+ +esac
+ +
+ +dnl
+ +dnl Find the compiler
+ +dnl
+ +
+ +dnl We want these before the checks, so the checks can modify their values.
+ +test -z "$CFLAGS"  && CFLAGS= auto_cflags=1
+ +test -z "$CC" && cc_specified=yes
+ +
+ +AC_PROG_CC
+ +AM_PROG_CC_C_O
+ +AC_AIX
+ +gl_EARLY
+ +md5_EARLY
+ +
+ +AC_PROG_RANLIB
+ +
++AC_PROG_LEX
++
+ +dnl Turn on optimization by default.  Specifically:
+ +dnl
+ +dnl if the user hasn't specified CFLAGS, then
+ +dnl   if compiler is gcc, then
+ +dnl     use -O2 and some warning flags
+ +dnl   else
+ +dnl     use os-specific flags or -O
+ +if test -n "$auto_cflags"; then
+ +  if test -n "$GCC"; then
+ +    CFLAGS="$CFLAGS -O2 -Wall"
+ +  else
+ +    case "$host_os" in
+ +      *hpux*)  CFLAGS="$CFLAGS +O3"                      ;;
+ +      *ultrix* | *osf*) CFLAGS="$CFLAGS -O -Olimit 2000" ;;
+ +      *)       CFLAGS="$CFLAGS -O" ;;
+ +    esac
+ +  fi
+ +fi
+ +
+ +dnl
+ +dnl Checks for basic compiler characteristics.
+ +dnl
+ +AC_C_CONST
+ +AC_C_INLINE
+ +AC_C_VOLATILE
+ +
+ +dnl Check for basic headers, even though we expect them to exist and
+ +dnl #include them unconditionally in the code.  Their detection is
+ +dnl still needed because test programs used by Autoconf macros check
+ +dnl for STDC_HEADERS, HAVE_SYS_TYPES_H, etc. before using them.
+ +dnl Without the checks they will fail to be included in test programs,
+ +dnl which will subsequently fail.
+ +AC_HEADER_STDC
+ +
+ +dnl Check for large file support.  This check needs to come fairly
+ +dnl early because it could (in principle) affect whether functions and
+ +dnl headers are available, whether they work, etc.
+ +AC_SYS_LARGEFILE
+ +AC_CHECK_SIZEOF(off_t)
+ +
+ +dnl
+ +dnl Checks for system header files that might be missing.
+ +dnl
+ +AC_HEADER_STDBOOL
+ +AC_CHECK_HEADERS(unistd.h sys/time.h)
+ +AC_CHECK_HEADERS(termios.h sys/ioctl.h sys/select.h utime.h sys/utime.h)
+ +AC_CHECK_HEADERS(stdint.h inttypes.h pwd.h wchar.h)
+ +
+ +dnl
+ +dnl Check sizes of integer types.  These are used to find n-bit
+ +dnl integral types on older systems that fail to provide intN_t and
+ +dnl uintN_t typedefs.
+ +dnl
+ +AC_CHECK_SIZEOF(short)
+ +AC_CHECK_SIZEOF(int)
+ +AC_CHECK_SIZEOF(long)
+ +AC_CHECK_SIZEOF(long long)
+ +AC_CHECK_SIZEOF(void *)
+ +
+ +dnl
+ +dnl Checks for non-universal or system-specific types.
+ +dnl
+ +AC_TYPE_SIZE_T
+ +AC_TYPE_PID_T
+ +AC_CHECK_TYPES([uint32_t, uintptr_t, intptr_t, int64_t])
+ +AC_CHECK_TYPES(sig_atomic_t, [], [], [
+ +#include <stdio.h>
+ +#include <sys/types.h>
+ +#if HAVE_INTTYPES_H
+ +# include <inttypes.h>
+ +#endif
+ +#include <signal.h>
+ +])
+ +
+ +# gnulib
+ +gl_INIT
+ +
+ +dnl
+ +dnl Checks for library functions.
+ +dnl
+ +AC_FUNC_ALLOCA
+ +AC_FUNC_MMAP
+ +AC_FUNC_FSEEKO
+ +AC_CHECK_FUNCS(strptime timegm snprintf vsnprintf vasprintf drand48)
+ +AC_CHECK_FUNCS(strtoll usleep ftello sigblock sigsetjmp memrchr wcwidth mbtowc)
+ +
+ +if test x"$ENABLE_OPIE" = xyes; then
+ +  AC_LIBOBJ([ftp-opie])
+ +fi
+ +
+ +dnl We expect to have these functions on Unix-like systems configure
+ +dnl runs on.  The defines are provided to get them in config.h.in so
+ +dnl Wget can still be ported to non-Unix systems (such as Windows)
+ +dnl that lack some of these functions.
+ +AC_DEFINE([HAVE_STRCASECMP], 1, [Define to 1 if you have the `strcasecmp' function.])
+ +AC_DEFINE([HAVE_STRNCASECMP], 1, [Define to 1 if you have the `strncasecmp' function.])
+ +AC_DEFINE([HAVE_STRDUP], 1, [Define to 1 if you have the `strdup' function.])
+ +AC_DEFINE([HAVE_ISATTY], 1, [Define to 1 if you have the `isatty' function.])
+ +AC_DEFINE([HAVE_SYMLINK], 1, [Define to 1 if you have the `symlink' function.])
+ +
+ +dnl
+ +dnl Call Wget-specific macros defined in aclocal.
+ +dnl
+ +WGET_STRUCT_UTIMBUF
+ +WGET_SOCKLEN_T
+ +WGET_FNMATCH
+ +WGET_NANOSLEEP
+ +WGET_POSIX_CLOCK
+ +WGET_NSL_SOCKET
+ +
+ +dnl
+ +dnl Checks for libraries.
+ +dnl
+ +
+ +AS_IF([test x"$with_ssl" = xgnutls], [
+ +  dnl Now actually check for -lssl
+ +  AC_LIB_HAVE_LINKFLAGS([gnutls], [], [
+ +#include <gnutls/gnutls.h>
+ +  ], [gnutls_global_init()])
+ +  if test x"$LIBGNUTLS" != x
+ +  then
+ +    AC_MSG_NOTICE([compiling in support for SSL via GnuTLS])
+ +    AC_LIBOBJ([gnutls])
+ +  else
+ +    AC_MSG_ERROR([--with-ssl=gnutls was given, but GNUTLS is not available.])
+ +  fi
+ +], [
+ +  # --with-ssl is not gnutls: check if it's no
+ +  AS_IF([test x"$with_ssl" != xno], [
+ +    dnl As of this writing (OpenSSL 0.9.6), the libcrypto shared library
+ +    dnl doesn't record its dependency on libdl, so we need to make sure
+ +    dnl -ldl ends up in LIBS on systems that have it.  Most OSes use
+ +    dnl dlopen(), but HP-UX uses shl_load().
+ +    AC_CHECK_LIB(dl, dlopen, [], [
+ +      AC_CHECK_LIB(dl, shl_load)
+ +    ])
+ +
+ +    dnl Now actually check for -lssl
+ +    AC_LIB_HAVE_LINKFLAGS([ssl], [crypto], [
+ +  #include <openssl/ssl.h>
+ +  #include <openssl/x509.h>
+ +  #include <openssl/err.h>
+ +  #include <openssl/rand.h>
+ +  #include <openssl/des.h>
+ +  #include <openssl/md4.h>
+ +  #include <openssl/md5.h>
+ +    ], [SSL_library_init ()])
+ +    if test x"$LIBSSL" != x
+ +    then
+ +      AC_MSG_NOTICE([compiling in support for SSL via OpenSSL])
+ +      AC_LIBOBJ([openssl])
+ +    elif test x"$with_ssl" != x
+ +    then
+ +      AC_MSG_ERROR([--with-ssl was given, but SSL is not available.])
+ +    fi
+ +  ]) # endif: --with-ssl == no?
+ +]) # endif: --with-ssl == gnutls?
+ +
+ +
+ +dnl Enable NTLM if requested and if SSL is available.
+ +if test x"$LIBSSL" != x
+ +then
+ +  if test x"$ENABLE_NTLM" != xno
+ +  then
+ +    AC_DEFINE([ENABLE_NTLM], 1,
+ +     [Define if you want the NTLM authorization support compiled in.])
+ +    AC_LIBOBJ([http-ntlm])
+ +  fi
+ +else
+ +  dnl If SSL is unavailable and the user explicitly requested NTLM,
+ +  dnl abort.
+ +  if test x"$ENABLE_NTLM" = xyes
+ +  then
+ +    AC_MSG_ERROR([NTLM authorization requested and OpenSSL not found; aborting])
+ +  fi
+ +fi
+ +
+ +dnl
+ +dnl Find an MD5 implementation.  Since Wget rarely needs MD5, we try
+ +dnl to use an existing library implementation to save on code size.
+ +dnl
+ +
+ +if test x"$wget_need_md5" = xyes
+ +then
+ +  dnl This should be moved to an AC_DEFUN, but I'm not sure how to
+ +  dnl manipulate MD5_OBJ from the defun.
+ +
+ +  AC_LIBOBJ([gen-md5])
+ +  found_md5=no
+ +
+ +  dnl Check for the system MD5 library on Solaris.  We don't check for
+ +  dnl something simple like "MD5Update" because there are a number of
+ +  dnl MD5 implementations that use that name, but have an otherwise
+ +  dnl incompatible interface.  md5_calc is, hopefully, specific to the
+ +  dnl Solaris MD5 library.
+ +  if test x"$found_md5" = xno; then
+ +    AC_CHECK_LIB(md5, md5_calc, [
+ +      dnl Some installations have bogus <md5.h> in the compiler's
+ +      dnl include path, making the system md5 library useless.
+ +      AC_MSG_CHECKING([for working md5.h])
+ +      AC_COMPILE_IFELSE([#include <md5.h>
+ +                        ], [
+ +        AC_MSG_RESULT(yes)
+ +        AC_DEFINE([HAVE_SOLARIS_MD5], 1, [Define when using Solaris MD5.])
+ +        LIBS="-lmd5 $LIBS"
+ +        found_md5=yes
+ +        AC_MSG_NOTICE([using the Solaris MD5 implementation])
+ +      ], [AC_MSG_RESULT(no)])
+ +    ])
+ +  fi
+ +
+ +  dnl Then see if we're linking OpenSSL anyway; if yes, use its md5
+ +  dnl implementation.
+ +  if test x"$found_md5" = xno; then
+ +    if test x"$LIBSSL" != x; then
+ +      AC_DEFINE([HAVE_OPENSSL_MD5], 1, [Define when using OpenSSL MD5.])
+ +      found_md5=yes
+ +      AC_MSG_NOTICE([using the OpenSSL MD5 implementation])
+ +    fi
+ +  fi
+ +
+ +  dnl If none of the above worked, use the one we ship with Wget.
+ +  if test x"$found_md5" = xno; then
+ +    AC_DEFINE([HAVE_BUILTIN_MD5], 1, [Define when using built-in MD5.])
+ +    found_md5=yes
+ +    AC_MSG_NOTICE([using the built-in (GNU) MD5 implementation])
+ +    AC_C_BIGENDIAN
+ +
+ +    AC_SUBST(MD5_CPPFLAGS, '-I $(top_srcdir)/md5')
+ +    AC_SUBST(MD5_LDADD, '../md5/libmd5.a')
+ +    AC_SUBST(MD5_SUBDIR, md5)
+ +    md5_INIT
+ +  fi
+ +  AC_DEFINE([HAVE_MD5], 1, [Define if we're compiling support for MD5.])
+ +fi
+ +
+ +dnl **********************************************************************
+ +dnl Checks for IPv6
+ +dnl **********************************************************************
+ +
+ +dnl
+ +dnl We test for IPv6 by checking, in turn, for availability of
+ +dnl getaddrinfo, presence of the INET6 address/protocol family, and
+ +dnl the existence of struct sockaddr_in6.  If any of them is missing,
+ +dnl IPv6 is disabled, and the code reverts to old-style gethostbyname.
+ +dnl
+ +dnl If --enable-ipv6 is explicitly specified on the configure command
+ +dnl line, we check for IPv6 and abort if not found.  If --disable-ipv6
+ +dnl is specified, we disable IPv6 and don't check for it.  The default
+ +dnl is to autodetect IPv6 and use it where available.
+ +dnl
+ +
+ +AC_ARG_ENABLE(ipv6,
+ +  AC_HELP_STRING([--disable-ipv6],[disable IPv6 support]),
+ +  [case "${enable_ipv6}" in
+ +    no)
+ +      AC_MSG_NOTICE([disabling IPv6 at user request])
+ +      dnl Disable IPv6 checking
+ +      ipv6=no
+ +      ;;
+ +    yes)
+ +      dnl IPv6 explicitly enabled: force its use (abort if unavailable).
+ +      ipv6=yes
+ +      force_ipv6=yes
+ +      ;;
+ +    auto)
+ +      dnl Auto-detect IPv6, i.e. check for IPv6, but don't force it.
+ +      ipv6=yes
+ +      ;;
+ +    *)
+ +      AC_MSG_ERROR([Invalid --enable-ipv6 argument \`$enable_ipv6'])
+ +      ;;
+ +    esac
+ +  ], [
+ +    dnl If nothing is specified, assume auto-detection.
+ +    ipv6=yes
+ +  ]
+ +)
+ +
+ +if test "X$ipv6" = "Xyes"; then
+ +  AC_CHECK_FUNCS(getaddrinfo, [], [
+ +    AC_MSG_NOTICE([Disabling IPv6 support: your system does not support getaddrinfo(3)])
+ +    ipv6=no
+ +  ])
+ +fi
+ +
+ +if test "X$ipv6" = "Xyes"; then
+ +  PROTO_INET6([], [
+ +    AC_MSG_NOTICE([Disabling IPv6 support: your system does not support the PF_INET6 protocol family])
+ +    ipv6=no
+ +  ])
+ +fi
+ +
+ +if test "X$ipv6" = "Xyes"; then
+ +  TYPE_STRUCT_SOCKADDR_IN6([],[
+ +    AC_MSG_NOTICE([Disabling IPv6 support: your system does not support \`struct sockaddr_in6'])
+ +    ipv6=no
+ +  ])
+ +  if test "X$ipv6" = "Xyes"; then
+ +    WGET_STRUCT_SOCKADDR_STORAGE
+ +    MEMBER_SIN6_SCOPE_ID
+ +  fi
+ +fi
+ +
+ +if test "X$ipv6" = "Xyes"; then
+ +  AC_DEFINE([ENABLE_IPV6], 1, [Define if IPv6 support is enabled.])
+ +  AC_MSG_NOTICE([Enabling support for IPv6.])
+ +elif test "x$force_ipv6" = "xyes"; then
+ +  AC_MSG_ERROR([IPv6 support requested but not found; aborting])
+ +fi
+ +
+ +
+ +dnl
+ +dnl Set of available languages.
+ +dnl
+ +dnl Originally this used to be static, looking like this:
+ +dnl     ALL_LINGUAS="cs de hr it ..."
+ +dnl The downside was that configure needed to be rebuilt whenever a
+ +dnl new language was added.
+ +dnl
+ +ALL_LINGUAS="en@quot en@boldquot en_US $(cd ${srcdir}/po && ls *.po | grep -v 'en@.*quot' | grep -v 'en_US\.po' | sed -e 's/\.po$//' | tr '\012' ' ')"
+ +
+ +dnl
+ +dnl Find makeinfo.  We used to provide support for Emacs processing
+ +dnl Texinfo using `emacs -batch -eval ...' where makeinfo is
+ +dnl unavailable, but that broke with the addition of makeinfo-specific
+ +dnl command-line options, such as `-I'.  Now we depend on makeinfo to
+ +dnl build the Info documentation.
+ +dnl
+ +
+ +AC_CHECK_PROGS(MAKEINFO, [makeinfo], [true])
+ +
+ +dnl
+ +dnl Find perl and pod2man
+ +dnl
+ +
+ +AC_PATH_PROGS(PERL, [perl5 perl], no)
+ +AC_PATH_PROG(POD2MAN, pod2man, no)
+ +
+ +if test "x${POD2MAN}" = xno; then
+ +  COMMENT_IF_NO_POD2MAN="# "
+ +else
+ +  COMMENT_IF_NO_POD2MAN=
+ +fi
+ +AC_SUBST(COMMENT_IF_NO_POD2MAN)
+ +
+ +dnl
+ +dnl Create output
+ +dnl
+ +AC_CONFIG_FILES([Makefile src/Makefile doc/Makefile util/Makefile
+ +                 po/Makefile.in tests/Makefile tests/WgetTest.pm
+ +                 lib/Makefile md5/Makefile windows/Makefile])
+ +AC_CONFIG_HEADERS([src/config.h])
+ +AC_OUTPUT
diff --cc src/Makefile.am

index f598d9084eb1d82791da472388741cccb1b30d24,0000000000000000000000000000000000000000..2403f6718d7bfa045d0b3b3d0f539267049a4317

mode 100644,000000..100644
--- 1/src/Makefile.am
--- /dev/null
+++ b/src/Makefile.am
@@@ -1,64 -1,0 +1,66 @@@
- wget_SOURCES = cmpt.c connect.c convert.c cookies.c ftp.c ftp-basic.c \
+ +# Makefile for `wget' utility
+ +# Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+ +# 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ +
+ +# This program is free software; you can redistribute it and/or modify
+ +# it under the terms of the GNU General Public License as published by
+ +# the Free Software Foundation; either version 3 of the License, or
+ +# (at your option) any later version.
+ +
+ +# This program is distributed in the hope that it will be useful,
+ +# but WITHOUT ANY WARRANTY; without even the implied warranty of
+ +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ +# GNU General Public License for more details.
+ +
+ +# You should have received a copy of the GNU General Public License
+ +# along with Wget.  If not, see <http://www.gnu.org/licenses/>.
+ +
+ +# Additional permission under GNU GPL version 3 section 7
+ +
+ +# If you modify this program, or any covered work, by linking or
+ +# combining it with the OpenSSL project's OpenSSL library (or a
+ +# modified version of that library), containing parts covered by the
+ +# terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
+ +# grants you additional permission to convey the resulting work.
+ +# Corresponding Source for a non-source form of such a combination
+ +# shall include the source code for the parts of OpenSSL used as well
+ +# as that of the covered work.
+ +
+ +#
+ +# Version: @VERSION@
+ +#
+ +
+ +# The following line is losing on some versions of make!
+ +DEFS     = @DEFS@ -DSYSTEM_WGETRC=\"$(sysconfdir)/wgetrc\" -DLOCALEDIR=\"$(localedir)\"
+ +LIBS     = @LIBS@ @LIBSSL@ @LIBGNUTLS@ @LIBINTL@
+ +
+ +bin_PROGRAMS = wget
-              connect.h convert.h cookies.h            \
-              ftp.h gen-md5.h hash.h host.h html-parse.h \
++wget_SOURCES = cmpt.c connect.c convert.c cookies.c \
++             css.lex css-url.c \
++             ftp.c ftp-basic.c \
+ +             ftp-ls.c hash.c host.c html-parse.c html-url.c http.c    \
+ +             init.c log.c main.c netrc.c progress.c ptimer.c recur.c  \
+ +             res.c retr.c snprintf.c spider.c url.c   \
+ +             utils.c xmalloc.c                                \
++             css-url.h connect.h convert.h cookies.h \
++             ftp.h gen-md5.h hash.h host.h html-parse.h html-url.h \
+ +             http.h http-ntlm.h init.h log.h mswindows.h netrc.h \
+ +             options.h progress.h ptimer.h recur.h res.h retr.h \
+ +             spider.h ssl.h sysdep.h url.h utils.h wget.h xmalloc.h
+ +nodist_wget_SOURCES = version.c
+ +EXTRA_wget_SOURCES = mswindows.c
+ +LDADD = $(ALLOCA) $(LIBOBJS) ../lib/libgnu.a @MD5_LDADD@
+ +AM_CPPFLAGS = -I $(top_srcdir)/lib @MD5_CPPFLAGS@
+ +
+ +version.c:  $(wget_SOURCES) $(LDADD) $(srcdir)/Makefile.am
+ +      echo 'const char *version_string = "@VERSION@"' > $@
+ +      -hg log -r . --template='" ({node|short})"\n' 2>/dev/null >> $@
+ +      echo ';' >> $@
+ +
+ +check_LIBRARIES = libunittest.a
+ +libunittest_a_SOURCES = $(wget_SOURCES) test.c test.h
+ +nodist_libunittest_a_SOURCES = version.c
+ +libunittest_a_CPPFLAGS = -DTESTING -I$(top_srcdir)/lib
+ +libunittest_a_LIBADD = $(ALLOCA) $(LIBOBJS)
+ +
+ +CLEANFILES = *~ *.bak core core.[0-9]* version.c
diff --cc src/convert.c

index 2811bff7e5d6553d64bca52f2aa238aa900183f1,7b38550be5ee803ba839a8223373c8a8b5a66fe7..4f90bb3b062c17662ad53b3f260eeeb34cb8703c
--- 1/src/convert.c
--- 2/src/convert.c
+++ b/src/convert.c
@@@ -83,12 -71,12 +70,12 @@@ convert_links_in_hashtable (struct hash
     char **file_array;
   
     cnt = 0;
-   if (downloaded_html_set)
-     cnt = hash_table_count (downloaded_html_set);
+   if (downloaded_set)
+     cnt = hash_table_count (downloaded_set);
     if (cnt == 0)
- -    return;
+ +    goto cleanup;
     file_array = alloca_array (char *, cnt);
-   string_set_to_array (downloaded_html_set, file_array);
+   string_set_to_array (downloaded_set, file_array);
   
     for (i = 0; i < cnt; i++)
       {
@@@ -165,12 -154,38 +153,39 @@@
         /* Free the data.  */
         free_urlpos (urls);
       }
+ }
+ 
+ /* This function is called when the retrieval is done to convert the
+    links that have been downloaded.  It has to be called at the end of
+    the retrieval, because only then does Wget know conclusively which
+    URLs have been downloaded, and which not, so it can tell which
+    direction to convert to.
+ 
+    The "direction" means that the URLs to the files that have been
+    downloaded get converted to the relative URL which will point to
+    that file.  And the other URLs get converted to the remote URL on
+    the server.
+ 
+    All the downloaded HTMLs are kept in downloaded_html_files, and
+    downloaded URLs in urls_downloaded.  All the information is
+    extracted from these two lists.  */
+ 
+ void
+ convert_all_links (void)
+ {
+   double secs;
+   int file_count = 0;
+ 
+   struct ptimer *timer = ptimer_new ();
+ 
+   convert_links_in_hashtable (downloaded_html_set, 0, &file_count);
+   convert_links_in_hashtable (downloaded_css_set, 1, &file_count);
   
     secs = ptimer_measure (timer);
- -  ptimer_destroy (timer);
     logprintf (LOG_VERBOSE, _("Converted %d files in %s seconds.\n"),
                file_count, print_decimal (secs));
+ +cleanup:
+ +  ptimer_destroy (timer);
   }
   
   static void write_backup_file (const char *, downloaded_file_t);
diff --cc src/convert.h
Simple merge
diff --cc src/html-parse.c

index ade82f2ba5ded805769a8d0555df2ba0b4cd9bb4,8254c6dc15d416d42909aec750028b697bf0557a..f744597b364f9d8b51b76643164a9573fd275950
--- 1/src/html-parse.c
--- 2/src/html-parse.c
+++ b/src/html-parse.c
@@@ -881,112 -984,117 +984,117 @@@ map_html_tags (const char *text, int si
                  COMPACT="compact">.  Even if such attributes are not
                  useful to Wget, we need to support them, so that the
                  tags containing them can be parsed correctly. */
- -          attr_raw_value_begin = attr_value_begin = attr_name_begin;
- -          attr_raw_value_end = attr_value_end = attr_name_end;
- -        }
- -      else if (*p == '=')
- -        {
- -          ADVANCE (p);
- -          SKIP_WS (p);
- -          if (*p == '\"' || *p == '\'')
- -            {
- -              bool newline_seen = false;
- -              char quote_char = *p;
- -              attr_raw_value_begin = p;
- -              ADVANCE (p);
- -              attr_value_begin = p; /* <foo bar="baz"> */
- -                                    /*           ^     */
- -              while (*p != quote_char)
- -                {
- -                  if (!newline_seen && *p == '\n')
- -                    {
- -                      /* If a newline is seen within the quotes, it
- -                         is most likely that someone forgot to close
- -                         the quote.  In that case, we back out to
- -                         the value beginning, and terminate the tag
- -                         at either `>' or the delimiter, whichever
- -                         comes first.  Such a tag terminated at `>'
- -                         is discarded.  */
- -                      p = attr_value_begin;
- -                      newline_seen = true;
- -                      continue;
- -                    }
- -                  else if (newline_seen && *p == '>')
- -                    break;
- -                  ADVANCE (p);
- -                }
- -              attr_value_end = p; /* <foo bar="baz"> */
- -                                  /*              ^  */
- -              if (*p == quote_char)
- -                ADVANCE (p);
- -              else
- -                goto look_for_tag;
- -              attr_raw_value_end = p; /* <foo bar="baz"> */
- -                                      /*               ^ */
- -              operation = AP_DECODE_ENTITIES;
- -              if (flags & MHT_TRIM_VALUES)
- -                operation |= AP_TRIM_BLANKS;
- -            }
- -          else
- -            {
- -              attr_value_begin = p; /* <foo bar=baz> */
- -                                    /*          ^    */
- -              /* According to SGML, a name token should consist only
- -                 of alphanumerics, . and -.  However, this is often
- -                 violated by, for instance, `%' in `width=75%'.
- -                 We'll be liberal and allow just about anything as
- -                 an attribute value.  */
- -              while (!ISSPACE (*p) && *p != '>')
- -                ADVANCE (p);
- -              attr_value_end = p; /* <foo bar=baz qux=quix> */
- -                                  /*             ^          */
- -              if (attr_value_begin == attr_value_end)
- -                /* <foo bar=> */
- -                /*          ^ */
- -                goto backout_tag;
- -              attr_raw_value_begin = attr_value_begin;
- -              attr_raw_value_end = attr_value_end;
- -              operation = AP_DECODE_ENTITIES;
- -            }
- -        }
- -      else
- -        {
- -          /* We skipped the whitespace and found something that is
- -             neither `=' nor the beginning of the next attribute's
- -             name.  Back out.  */
- -          goto backout_tag;   /* <foo bar [... */
- -                              /*          ^    */
- -        }
- -
- -      /* If we're not interested in the tag, don't bother with any
+ +            attr_raw_value_begin = attr_value_begin = attr_name_begin;
+ +            attr_raw_value_end = attr_value_end = attr_name_end;
+ +          }
+ +        else if (*p == '=')
+ +          {
+ +            ADVANCE (p);
+ +            SKIP_WS (p);
+ +            if (*p == '\"' || *p == '\'')
+ +              {
+ +                bool newline_seen = false;
+ +                char quote_char = *p;
+ +                attr_raw_value_begin = p;
+ +                ADVANCE (p);
+ +                attr_value_begin = p; /* <foo bar="baz"> */
+ +                                      /*           ^     */
+ +                while (*p != quote_char)
+ +                  {
+ +                    if (!newline_seen && *p == '\n')
+ +                      {
+ +                        /* If a newline is seen within the quotes, it
+ +                           is most likely that someone forgot to close
+ +                           the quote.  In that case, we back out to
+ +                           the value beginning, and terminate the tag
+ +                           at either `>' or the delimiter, whichever
+ +                           comes first.  Such a tag terminated at `>'
+ +                           is discarded.  */
+ +                        p = attr_value_begin;
+ +                        newline_seen = true;
+ +                        continue;
+ +                      }
+ +                    else if (newline_seen && *p == '>')
+ +                      break;
+ +                    ADVANCE (p);
+ +                  }
+ +                attr_value_end = p; /* <foo bar="baz"> */
+ +                                    /*              ^  */
+ +                if (*p == quote_char)
+ +                  ADVANCE (p);
+ +                else
+ +                  goto look_for_tag;
+ +                attr_raw_value_end = p; /* <foo bar="baz"> */
+ +                                        /*               ^ */
+ +                operation = AP_DECODE_ENTITIES;
+ +                if (flags & MHT_TRIM_VALUES)
+ +                  operation |= AP_TRIM_BLANKS;
+ +              }
+ +            else
+ +              {
+ +                attr_value_begin = p; /* <foo bar=baz> */
+ +                                      /*          ^    */
+ +                /* According to SGML, a name token should consist only
+ +                   of alphanumerics, . and -.  However, this is often
+ +                   violated by, for instance, `%' in `width=75%'.
+ +                   We'll be liberal and allow just about anything as
+ +                   an attribute value.  */
+ +                while (!c_isspace (*p) && *p != '>')
+ +                  ADVANCE (p);
+ +                attr_value_end = p; /* <foo bar=baz qux=quix> */
+ +                                    /*             ^          */
+ +                if (attr_value_begin == attr_value_end)
+ +                  /* <foo bar=> */
+ +                  /*          ^ */
+ +                  goto backout_tag;
+ +                attr_raw_value_begin = attr_value_begin;
+ +                attr_raw_value_end = attr_value_end;
+ +                operation = AP_DECODE_ENTITIES;
+ +              }
+ +          }
+ +        else
+ +          {
+ +            /* We skipped the whitespace and found something that is
+ +               neither `=' nor the beginning of the next attribute's
+ +               name.  Back out.  */
+ +            goto backout_tag;   /* <foo bar [... */
+ +                                /*          ^    */
+ +          }
+ +
+ +        /* If we're not interested in the tag, don't bother with any
              of the attributes.  */
- -      if (uninteresting_tag)
- -        continue;
+ +        if (uninteresting_tag)
+ +          continue;
   
- -      /* If we aren't interested in the attribute, skip it.  We
+ +        /* If we aren't interested in the attribute, skip it.  We
              cannot do this test any sooner, because our text pointer
              needs to correctly advance over the attribute.  */
- -      if (!name_allowed (allowed_attributes, attr_name_begin, attr_name_end))
- -        continue;
+ +        if (!name_allowed (allowed_attributes, attr_name_begin, attr_name_end))
+ +          continue;
   
- -      GROW_ARRAY (pairs, attr_pair_size, nattrs + 1, attr_pair_resized,
- -                  struct attr_pair);
+ +        GROW_ARRAY (pairs, attr_pair_size, nattrs + 1, attr_pair_resized,
+ +                    struct attr_pair);
   
- -      pairs[nattrs].name_pool_index = pool.tail;
- -      convert_and_copy (&pool, attr_name_begin, attr_name_end, AP_DOWNCASE);
+ +        pairs[nattrs].name_pool_index = pool.tail;
+ +        convert_and_copy (&pool, attr_name_begin, attr_name_end, AP_DOWNCASE);
   
- -      pairs[nattrs].value_pool_index = pool.tail;
- -      convert_and_copy (&pool, attr_value_begin, attr_value_end, operation);
- -      pairs[nattrs].value_raw_beginning = attr_raw_value_begin;
- -      pairs[nattrs].value_raw_size = (attr_raw_value_end
- -                                      - attr_raw_value_begin);
- -      ++nattrs;
+ +        pairs[nattrs].value_pool_index = pool.tail;
+ +        convert_and_copy (&pool, attr_value_begin, attr_value_end, operation);
+ +        pairs[nattrs].value_raw_beginning = attr_raw_value_begin;
+ +        pairs[nattrs].value_raw_size = (attr_raw_value_end
+ +                                        - attr_raw_value_begin);
+ +        ++nattrs;
         }
   
+     if (!end_tag && tail && (tail->tagname_begin == tag_name_begin))
+       {
+         tail->contents_begin = p+1;
+       }
+ 
       if (uninteresting_tag)
         {
- -      ADVANCE (p);
- -      goto look_for_tag;
+ +        ADVANCE (p);
+ +        goto look_for_tag;
         }
   
       /* By now, we have a valid tag with a name and zero or more
diff --cc src/html-parse.h
Simple merge
diff --cc src/html-url.c

index e9f2773ab409eeabb49966fcc9f7a8e6fede8a39,ebf8494db99b29f3e6941e70e96b3dbe8e8fc6b0..c9cf28f6df39fd470966b236b13d16c7e0f184b0
--- 1/src/html-url.c
--- 2/src/html-url.c
+++ b/src/html-url.c
@@@ -163,11 -163,12 +163,12 @@@ static struct 
      from the information above.  However, some places in the code refer
      to the attributes not mentioned here.  We add them manually.  */
   static const char *additional_attributes[] = {
-   "rel",                        /* used by tag_handle_link */
-   "http-equiv",                 /* used by tag_handle_meta */
-   "name",                       /* used by tag_handle_meta */
-   "content",                    /* used by tag_handle_meta */
-   "action"                      /* used by tag_handle_form */
- -  "rel",                      /* used by tag_handle_link  */
- -  "http-equiv",                       /* used by tag_handle_meta  */
- -  "name",                     /* used by tag_handle_meta  */
- -  "content",                  /* used by tag_handle_meta  */
- -  "action",                   /* used by tag_handle_form  */
- -  "style"                     /* used by check_style_attr */
++  "rel",                        /* used by tag_handle_link  */
++  "http-equiv",                 /* used by tag_handle_meta  */
++  "name",                       /* used by tag_handle_meta  */
++  "content",                    /* used by tag_handle_meta  */
++  "action",                     /* used by tag_handle_form  */
++  "style"                       /* used by check_style_attr */
   };
   
   static struct hash_table *interesting_tags;
@@@ -385,25 -390,26 +390,26 @@@ tag_find_urls (int tagid, struct taginf
         const int size = countof (tag_url_attributes);
   
         /* If you're cringing at the inefficiency of the nested loops,
- -       remember that they both iterate over a very small number of
- -       items.  The worst-case inner loop is for the IMG tag, which
- -       has three attributes.  */
+ +         remember that they both iterate over a very small number of
+ +         items.  The worst-case inner loop is for the IMG tag, which
+ +         has three attributes.  */
         for (i = first; i < size && tag_url_attributes[i].tagid == tagid; i++)
- -      {
- -        if (0 == strcasecmp (tag->attrs[attrind].name,
- -                             tag_url_attributes[i].attr_name))
- -          {
- -            struct urlpos *up = append_url (link, ATTR_POS(tag,attrind,ctx),
+ +        {
+ +          if (0 == strcasecmp (tag->attrs[attrind].name,
+ +                               tag_url_attributes[i].attr_name))
+ +            {
-               struct urlpos *up = append_url (link, tag, attrind, ctx);
++              struct urlpos *up = append_url (link, ATTR_POS(tag,attrind,ctx),
+                                               ATTR_SIZE(tag,attrind), ctx);
- -            if (up)
- -              {
- -                int flags = tag_url_attributes[i].flags;
- -                if (flags & ATTR_INLINE)
- -                  up->link_inline_p = 1;
- -                if (flags & ATTR_HTML)
- -                  up->link_expect_html = 1;
- -              }
- -          }
- -      }
+ +              if (up)
+ +                {
+ +                  int flags = tag_url_attributes[i].flags;
+ +                  if (flags & ATTR_INLINE)
+ +                    up->link_inline_p = 1;
+ +                  if (flags & ATTR_HTML)
+ +                    up->link_expect_html = 1;
+ +                }
+ +            }
+ +        }
       }
   }
   
@@@ -439,11 -446,13 +446,13 @@@ tag_handle_form (int tagid, struct tagi
   {
     int attrind;
     char *action = find_attr (tag, "action", &attrind);
+ 
     if (action)
       {
-       struct urlpos *up = append_url (action, tag, attrind, ctx);
+       struct urlpos *up = append_url (action, ATTR_POS(tag,attrind,ctx),
+                                       ATTR_SIZE(tag,attrind), ctx);
         if (up)
- -      up->ignore_when_downloading = 1;
+ +        up->ignore_when_downloading = 1;
       }
   }
   
@@@ -464,19 -473,28 +473,28 @@@ tag_handle_link (int tagid, struct tagi
     */
     if (href)
       {
-       struct urlpos *up = append_url (href, tag, attrind, ctx);
+       struct urlpos *up = append_url (href, ATTR_POS(tag,attrind,ctx),
+                                       ATTR_SIZE(tag,attrind), ctx);
         if (up)
- -      {
- -        char *rel = find_attr (tag, "rel", NULL);
- -        if (rel)
+ +        {
+ +          char *rel = find_attr (tag, "rel", NULL);
-           if (rel
-               && (0 == strcasecmp (rel, "stylesheet")
-                   || 0 == strcasecmp (rel, "shortcut icon")))
-             up->link_inline_p = 1;
++          if (rel)
+             {
- -            if (0 == strcasecmp (rel, "stylesheet"))
++              if (0 == strcasecmp (rel, "stylesheet"))
+                 {
+                   up->link_inline_p = 1;
+                   up->link_expect_css = 1;
+                 }
- -            else if (0 == strcasecmp (rel, "shortcut icon"))
++              else if (0 == strcasecmp (rel, "shortcut icon"))
+                 {
+                   up->link_inline_p = 1;
+                 }
+             }
- -        else
- -          /* The external ones usually point to HTML pages, such as
- -             <link rel="next" href="..."> */
- -          up->link_expect_html = 1;
- -      }
+ +          else
+ +            /* The external ones usually point to HTML pages, such as
+ +               <link rel="next" href="..."> */
+ +            up->link_expect_html = 1;
+ +        }
       }
   }
   
@@@ -507,31 -525,32 +525,32 @@@ tag_handle_meta (int tagid, struct tagi
   
         char *refresh = find_attr (tag, "content", &attrind);
         if (!refresh)
- -      return;
+ +        return;
   
- -      for (p = refresh; ISDIGIT (*p); p++)
- -      timeout = 10 * timeout + *p - '0';
+ +      for (p = refresh; c_isdigit (*p); p++)
+ +        timeout = 10 * timeout + *p - '0';
         if (*p++ != ';')
- -      return;
- -
- -      while (ISSPACE (*p))
- -      ++p;
- -      if (!(   TOUPPER (*p)       == 'U'
- -          && TOUPPER (*(p + 1)) == 'R'
- -          && TOUPPER (*(p + 2)) == 'L'
- -          &&          *(p + 3)  == '='))
- -      return;
+ +        return;
+ +
+ +      while (c_isspace (*p))
+ +        ++p;
+ +      if (!(   c_toupper (*p)       == 'U'
+ +            && c_toupper (*(p + 1)) == 'R'
+ +            && c_toupper (*(p + 2)) == 'L'
+ +            &&          *(p + 3)  == '='))
+ +        return;
         p += 4;
- -      while (ISSPACE (*p))
- -      ++p;
+ +      while (c_isspace (*p))
+ +        ++p;
   
-       entry = append_url (p, tag, attrind, ctx);
+       entry = append_url (p, ATTR_POS(tag,attrind,ctx),
+                           ATTR_SIZE(tag,attrind), ctx);
         if (entry)
- -      {
- -        entry->link_refresh_p = 1;
- -        entry->refresh_timeout = timeout;
- -        entry->link_expect_html = 1;
- -      }
+ +        {
+ +          entry->link_refresh_p = 1;
+ +          entry->refresh_timeout = timeout;
+ +          entry->link_expect_html = 1;
+ +        }
       }
     else if (name && 0 == strcasecmp (name, "robots"))
       {
@@@ -618,8 -652,9 +652,9 @@@ get_urls_html (const char *file, const 
     if (opt.strict_comments)
       flags |= MHT_STRICT_COMMENTS;
   
+   /* the NULL here used to be interesting_tags */
     map_html_tags (fm->content, fm->length, collect_tags_mapper, &ctx, flags,
-                  interesting_tags, interesting_attributes);
- -               NULL, interesting_attributes);
++                 NULL, interesting_attributes);
   
     DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow));
     if (meta_disallow_follow)
diff --cc src/http.c

index ec815c8ff5061dddc36ebdfbe7073f630185b473,d3f6704f87f7af033cf8239aa1101b94cb73ea5d..fb8184f1cec7cfda2739433341857db708eebbb8
--- 1/src/http.c
--- 2/src/http.c
+++ b/src/http.c
@@@ -68,14 -67,6 +68,15 @@@ as that of the covered work.  *
   
   extern char *version_string;
   
+ +/* Forward decls. */
+ +static char *create_authorization_line (const char *, const char *,
+ +                                        const char *, const char *,
+ +                                        const char *, bool *);
+ +static char *basic_authentication_encode (const char *, const char *);
+ +static bool known_authentication_scheme_p (const char *, const char *);
++static void ensure_extension (struct http_stat *, const char *, int *);
+ +static void load_cookies (void);
+ +
   #ifndef MIN
   # define MIN(x, y) ((x) > (y) ? (y) : (x))
   #endif
diff --cc src/recur.c

index c11cfdad998b94a4d4ef804902c9210f7f2e9cb9,024073ce3cf43aa900e0a3a90f2a7892bd6ef76a..daf8a374ad94751d65cc6d489ab9b06085e25e18
--- 1/src/recur.c
--- 2/src/recur.c
+++ b/src/recur.c
@@@ -53,13 -55,14 +55,13 @@@ as that of the covered work.  *
   /* Functions for maintaining the URL queue.  */
   
   struct queue_element {
-   const char *url;              /* the URL to download */
- -  const char *url;            /* the URL to download */
- -  const char *referer;                /* the referring document */
- -  int depth;                  /* the depth */
- -  bool html_allowed;          /* whether the document is allowed to
- -                                 be treated as HTML. */
- -  bool css_allowed;           /* whether the document is allowed to
- -                                 be treated as CSS. */
- -  struct queue_element *next; /* next element in queue */
+ +  const char *referer;          /* the referring document */
+ +  int depth;                    /* the depth */
+ +  bool html_allowed;            /* whether the document is allowed to
+ +                                   be treated as HTML. */
- 
++  bool css_allowed;             /* whether the document is allowed to
++                                   be treated as CSS. */
+ +  struct queue_element *next;   /* next element in queue */
   };
   
   struct url_queue {
@@@ -91,7 -94,8 +93,8 @@@ url_queue_delete (struct url_queue *que
   
   static void
   url_enqueue (struct url_queue *queue,
-              const char *url, const char *referer, int depth, bool html_allowed)
- -           const char *url, const char *referer, int depth,
++             const char *url, const char *referer, int depth,
+              bool html_allowed, bool css_allowed)
   {
     struct queue_element *qel = xnew (struct queue_element);
     qel->url = url;
@@@ -120,8 -125,8 +124,8 @@@
   
   static bool
   url_dequeue (struct url_queue *queue,
- -           const char **url, const char **referer, int *depth,
- -           bool *html_allowed, bool *css_allowed)
+ +             const char **url, const char **referer, int *depth,
-              bool *html_allowed)
++             bool *html_allowed, bool *css_allowed)
   {
     struct queue_element *qel = queue->head;
   
@@@ -219,146 -226,164 +225,173 @@@ retrieve_tree (const char *start_url
         /* Get the next URL from the queue... */
   
         if (!url_dequeue (queue,
- -                      (const char **)&url, (const char **)&referer,
- -                      &depth, &html_allowed, &css_allowed))
- -      break;
+ +                        (const char **)&url, (const char **)&referer,
-                         &depth, &html_allowed))
++                        &depth, &html_allowed, &css_allowed))
+ +        break;
   
         /* ...and download it.  Note that this download is in most cases
- -       unconditional, as download_child_p already makes sure a file
- -       doesn't get enqueued twice -- and yet this check is here, and
- -       not in download_child_p.  This is so that if you run `wget -r
- -       URL1 URL2', and a random URL is encountered once under URL1
- -       and again under URL2, but at a different (possibly smaller)
- -       depth, we want the URL's children to be taken into account
- -       the second time.  */
+ +         unconditional, as download_child_p already makes sure a file
+ +         doesn't get enqueued twice -- and yet this check is here, and
+ +         not in download_child_p.  This is so that if you run `wget -r
+ +         URL1 URL2', and a random URL is encountered once under URL1
+ +         and again under URL2, but at a different (possibly smaller)
+ +         depth, we want the URL's children to be taken into account
+ +         the second time.  */
         if (dl_url_file_map && hash_table_contains (dl_url_file_map, url))
- -      {
- -        file = xstrdup (hash_table_get (dl_url_file_map, url));
+ +        {
+ +          file = xstrdup (hash_table_get (dl_url_file_map, url));
   
- -        DEBUGP (("Already downloaded \"%s\", reusing it from \"%s\".\n",
- -                 url, file));
+ +          DEBUGP (("Already downloaded \"%s\", reusing it from \"%s\".\n",
+ +                   url, file));
   
- -        if (html_allowed
- -            && downloaded_html_set
- -            && string_set_contains (downloaded_html_set, file))
+           /* this sucks, needs to be combined! */
-             descend = true;
+ +          if (html_allowed
+ +              && downloaded_html_set
+ +              && string_set_contains (downloaded_html_set, file))
- -              descend = 1;
+             {
+               descend = true;
+               is_css = false;
+             }
+           if (css_allowed
+               && downloaded_css_set
+               && string_set_contains (downloaded_css_set, file))
+             {
- -      }
++              descend = true;
+               is_css = true;
+             }
+ +        }
         else
- -      {
- -        int dt = 0;
- -        char *redirected = NULL;
+ +        {
+ +          int dt = 0;
+ +          char *redirected = NULL;
   
- -        status = retrieve_url (url, &file, &redirected, referer, &dt, false);
+ +          status = retrieve_url (url, &file, &redirected, referer, &dt, false);
   
- -        if (html_allowed && file && status == RETROK
- -            && (dt & RETROKF) && (dt & TEXTHTML))
+ +          if (html_allowed && file && status == RETROK
+ +              && (dt & RETROKF) && (dt & TEXTHTML))
-             descend = true;
+             {
+               descend = true;
+               is_css = false;
+             }
+ 
+           /* a little different, css_allowed can override content type
+              lots of web servers serve css with an incorrect content type
+           */
+           if (file && status == RETROK
+               && (dt & RETROKF) &&
+               ((dt & TEXTCSS) || css_allowed))
+             {
+               descend = true;
+               is_css = false;
+             }
   
- -        if (redirected)
- -          {
- -            /* We have been redirected, possibly to another host, or
- -               different path, or wherever.  Check whether we really
- -               want to follow it.  */
- -            if (descend)
- -              {
- -                if (!descend_redirect_p (redirected, url, depth,
- -                                         start_url_parsed, blacklist))
- -                  descend = false;
- -                else
- -                  /* Make sure that the old pre-redirect form gets
- -                     blacklisted. */
- -                  string_set_add (blacklist, url);
- -              }
- -
- -            xfree (url);
- -            url = redirected;
- -          }
- -      }
+ +          if (redirected)
+ +            {
+ +              /* We have been redirected, possibly to another host, or
+ +                 different path, or wherever.  Check whether we really
+ +                 want to follow it.  */
+ +              if (descend)
+ +                {
+ +                  if (!descend_redirect_p (redirected, url, depth,
+ +                                           start_url_parsed, blacklist))
+ +                    descend = false;
+ +                  else
+ +                    /* Make sure that the old pre-redirect form gets
+ +                       blacklisted. */
+ +                    string_set_add (blacklist, url);
+ +                }
+ +
+ +              xfree (url);
+ +              url = redirected;
+ +            }
+ +        }
   
         if (opt.spider)
- -              {
+ +        {
             visited_url (url, referer);
- -      }
+ +        }
   
         if (descend
- -        && depth >= opt.reclevel && opt.reclevel != INFINITE_RECURSION)
- -      {
- -        if (opt.page_requisites
- -            && (depth == opt.reclevel || depth == opt.reclevel + 1))
- -          {
- -            /* When -p is specified, we are allowed to exceed the
- -               maximum depth, but only for the "inline" links,
- -               i.e. those that are needed to display the page.
- -               Originally this could exceed the depth at most by
- -               one, but we allow one more level so that the leaf
- -               pages that contain frames can be loaded
- -               correctly.  */
- -            dash_p_leaf_HTML = true;
- -          }
- -        else
- -          {
- -            /* Either -p wasn't specified or it was and we've
- -               already spent the two extra (pseudo-)levels that it
- -               affords us, so we need to bail out. */
- -            DEBUGP (("Not descending further; at depth %d, max. %d.\n",
- -                     depth, opt.reclevel));
- -            descend = false;
- -          }
- -      }
+ +          && depth >= opt.reclevel && opt.reclevel != INFINITE_RECURSION)
+ +        {
+ +          if (opt.page_requisites
+ +              && (depth == opt.reclevel || depth == opt.reclevel + 1))
+ +            {
+ +              /* When -p is specified, we are allowed to exceed the
+ +                 maximum depth, but only for the "inline" links,
+ +                 i.e. those that are needed to display the page.
+ +                 Originally this could exceed the depth at most by
+ +                 one, but we allow one more level so that the leaf
+ +                 pages that contain frames can be loaded
+ +                 correctly.  */
+ +              dash_p_leaf_HTML = true;
+ +            }
+ +          else
+ +            {
+ +              /* Either -p wasn't specified or it was and we've
+ +                 already spent the two extra (pseudo-)levels that it
+ +                 affords us, so we need to bail out. */
+ +              DEBUGP (("Not descending further; at depth %d, max. %d.\n",
+ +                       depth, opt.reclevel));
+ +              descend = false;
+ +            }
+ +        }
   
-       /* If the downloaded document was HTML, parse it and enqueue the
+       /* If the downloaded document was HTML or CSS, parse it and enqueue the
- -       links it contains. */
+ +         links it contains. */
   
         if (descend)
- -      {
- -        bool meta_disallow_follow = false;
- -        struct urlpos *children
- -          = is_css ? get_urls_css_file (file, url) :
+ +        {
+ +          bool meta_disallow_follow = false;
+ +          struct urlpos *children
-             = get_urls_html (file, url, &meta_disallow_follow);
++            = is_css ? get_urls_css_file (file, url) :
+                        get_urls_html (file, url, &meta_disallow_follow);
   
- -        if (opt.use_robots && meta_disallow_follow)
- -          {
- -            free_urlpos (children);
- -            children = NULL;
- -          }
- -
- -        if (children)
- -          {
- -            struct urlpos *child = children;
- -            struct url *url_parsed = url_parsed = url_parse (url, NULL);
- -            assert (url_parsed != NULL);
- -
- -            for (; child; child = child->next)
- -              {
- -                if (child->ignore_when_downloading)
- -                  continue;
- -                if (dash_p_leaf_HTML && !child->link_inline_p)
- -                  continue;
- -                if (download_child_p (child, url_parsed, depth, start_url_parsed,
- -                                      blacklist))
- -                  {
- -                    url_enqueue (queue, xstrdup (child->url->url),
- -                                 xstrdup (url), depth + 1,
- -                                 child->link_expect_html,
- -                                 child->link_expect_css);
- -                    /* We blacklist the URL we have enqueued, because we
- -                       don't want to enqueue (and hence download) the
- -                       same URL twice.  */
- -                    string_set_add (blacklist, child->url->url);
- -                  }
- -              }
- -
- -            url_free (url_parsed);
- -            free_urlpos (children);
- -          }
- -      }
+ +          if (opt.use_robots && meta_disallow_follow)
+ +            {
+ +              free_urlpos (children);
+ +              children = NULL;
+ +            }
+ +
+ +          if (children)
+ +            {
+ +              struct urlpos *child = children;
+ +              struct url *url_parsed = url_parsed = url_parse (url, NULL);
+ +              char *referer_url = url;
+ +              bool strip_auth = (url_parsed != NULL
+ +                                 && url_parsed->user != NULL);
+ +              assert (url_parsed != NULL);
+ +
+ +              /* Strip auth info if present */
+ +              if (strip_auth)
+ +                referer_url = url_string (url_parsed, URL_AUTH_HIDE);
+ +
+ +              for (; child; child = child->next)
+ +                {
+ +                  if (child->ignore_when_downloading)
+ +                    continue;
+ +                  if (dash_p_leaf_HTML && !child->link_inline_p)
+ +                    continue;
+ +                  if (download_child_p (child, url_parsed, depth, start_url_parsed,
+ +                                        blacklist))
+ +                    {
+ +                      url_enqueue (queue, xstrdup (child->url->url),
+ +                                   xstrdup (referer_url), depth + 1,
-                                    child->link_expect_html);
++                                   child->link_expect_html,
++                                   child->link_expect_css);
+ +                      /* We blacklist the URL we have enqueued, because we
+ +                         don't want to enqueue (and hence download) the
+ +                         same URL twice.  */
+ +                      string_set_add (blacklist, child->url->url);
+ +                    }
+ +                }
+ +
+ +              if (strip_auth)
+ +                xfree (referer_url);
+ +              url_free (url_parsed);
+ +              free_urlpos (children);
+ +            }
+ +        }
   
         if (file 
             && (opt.delete_after 
@@@ -394,12 -419,12 +427,12 @@@
     {
       char *d1, *d2;
       int d3;
-     bool d4;
+     bool d4, d5;
       while (url_dequeue (queue,
-                         (const char **)&d1, (const char **)&d2, &d3, &d4))
- -                      (const char **)&d1, (const char **)&d2, &d3, &d4, &d5))
++                        (const char **)&d1, (const char **)&d2, &d3, &d4, &d5))
         {
- -      xfree (d1);
- -      xfree_null (d2);
+ +        xfree (d1);
+ +        xfree_null (d2);
         }
     }
     url_queue_delete (queue);
diff --cc src/recur.h
Simple merge
diff --cc src/retr.c

index 179430acf72b83454fa319e1f987ddc320860303,245eb129f40d5f49644d0bb69de1e7489f54cf1d..7bdd4193fed50e1058c2c44a962394f002a3358d
--- 1/src/retr.c
--- 2/src/retr.c
+++ b/src/retr.c
@@@ -772,13 -779,15 +773,15 @@@ retrieve_url (const char *origurl, cha
     if (local_file)
       {
         if (*dt & RETROKF)
- -      {
- -        register_download (u->url, local_file);
- -        if (redirection_count && 0 != strcmp (origurl, u->url))
- -          register_redirection (origurl, u->url);
- -        if (*dt & TEXTHTML)
- -          register_html (u->url, local_file);
- -        if (*dt & TEXTCSS)
- -          register_css (u->url, local_file);
- -      }
+ +        {
+ +          register_download (u->url, local_file);
+ +          if (redirection_count && 0 != strcmp (origurl, u->url))
+ +            register_redirection (origurl, u->url);
+ +          if (*dt & TEXTHTML)
+ +            register_html (u->url, local_file);
++          if (*dt & TEXTCSS)
++            register_css (u->url, local_file);
+ +        }
       }
   
     if (file)
diff --cc src/wget.h
Simple merge
author	Micah Cowan <micah@cowan.name>
	Tue, 22 Apr 2008 08:28:15 +0000 (01:28 -0700)
committer	Micah Cowan <micah@cowan.name>
	Tue, 22 Apr 2008 08:28:15 +0000 (01:28 -0700)
		1	2
configure.ac	patch \|	diff1 \|	\|	blob \| history
src/Makefile.am	patch \|	diff1 \|	\|	blob \| history
src/convert.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/convert.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/html-parse.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/html-parse.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/html-url.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/http.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/recur.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/recur.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/retr.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/wget.h	patch \|	diff1 \|	diff2 \|	blob \| history