/* Command line parsing.
- Copyright (C) 1996-2006 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
+ Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
+the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with Wget; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+along with Wget. If not, see <http://www.gnu.org/licenses/>.
-In addition, as a special exception, the Free Software Foundation
-gives permission to link the code of its release of Wget with the
-OpenSSL project's "OpenSSL" library (or with modified versions of it
-that use the same license as the "OpenSSL" library), and distribute
-the linked executables. You must obey the GNU General Public License
-in all respects for all of the code used other than "OpenSSL". If you
-modify this file, you may extend this exception to your version of the
-file, but you are not obligated to do so. If you do not wish to do
-so, delete this exception statement from your version. */
+Additional permission under GNU GPL version 3 section 7
-#include <config.h>
+If you modify this program, or any covered work, by linking or
+combining it with the OpenSSL project's OpenSSL library (or a
+modified version of that library), containing parts covered by the
+terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
+grants you additional permission to convey the resulting work.
+Corresponding Source for a non-source form of such a combination
+shall include the source code for the parts of OpenSSL used as well
+as that of the covered work. */
+
+#include "wget.h"
#include <stdio.h>
#include <stdlib.h>
-#ifdef HAVE_UNISTD_H
-# include <unistd.h>
-#endif /* HAVE_UNISTD_H */
+#include <unistd.h>
#include <string.h>
#include <signal.h>
-#ifdef HAVE_NLS
+#ifdef ENABLE_NLS
# include <locale.h>
#endif
#include <assert.h>
#include <errno.h>
#include <time.h>
-#include "wget.h"
+#include "exits.h"
#include "utils.h"
#include "init.h"
#include "retr.h"
#include "convert.h"
#include "spider.h"
#include "http.h" /* for save_cookies */
+#include "ptimer.h"
+#include "warc.h"
+#include <getopt.h>
+#include <getpass.h>
+#include <quote.h>
+
+#ifdef WINDOWS
+# include <io.h>
+# include <fcntl.h>
+#endif
-/* On GNU system this will include system-wide getopt.h. */
-#include "getopt.h"
+#ifdef __VMS
+# include "vms.h"
+#endif /* __VMS */
#ifndef PATH_SEPARATOR
# define PATH_SEPARATOR '/'
#endif
+#ifndef ENABLE_IRI
+struct iri dummy_iri;
+#endif
+
struct options opt;
+/* defined in version.c */
extern char *version_string;
+extern char *compilation_string;
+extern char *system_getrc;
+extern char *link_string;
+/* defined in build_info.c */
+extern const char *compiled_features[];
+/* Used for --version output in print_version */
+#define MAX_CHARS_PER_LINE 72
+#define TABULATION 4
#if defined(SIGHUP) || defined(SIGUSR1)
static void redirect_output_signal (int);
#endif
const char *exec_name;
+
+/* Number of successfully downloaded URLs */
+int numurls = 0;
\f
+#ifndef TESTING
/* Initialize I18N/L10N. That amounts to invoking setlocale, and
setting up gettext's message catalog using bindtextdomain and
textdomain. Does nothing if NLS is disabled or missing. */
static void
i18n_initialize (void)
{
- /* HAVE_NLS implies existence of functions invoked here. */
-#ifdef HAVE_NLS
+ /* ENABLE_NLS implies existence of functions invoked here. */
+#ifdef ENABLE_NLS
/* Set the current locale. */
setlocale (LC_ALL, "");
/* Set the text message domain. */
bindtextdomain ("wget", LOCALEDIR);
textdomain ("wget");
-#endif /* HAVE_NLS */
+#endif /* ENABLE_NLS */
}
\f
/* Definition of command-line options. */
static struct cmdline_option option_data[] =
{
{ "accept", 'A', OPT_VALUE, "accept", -1 },
+ { "accept-regex", 0, OPT_VALUE, "acceptregex", -1 },
+ { "adjust-extension", 'E', OPT_BOOLEAN, "adjustextension", -1 },
{ "append-output", 'a', OPT__APPEND_OUTPUT, NULL, required_argument },
+ { "ask-password", 0, OPT_BOOLEAN, "askpassword", -1 },
+ { "auth-no-challenge", 0, OPT_BOOLEAN, "authnochallenge", -1 },
{ "background", 'b', OPT_BOOLEAN, "background", -1 },
{ "backup-converted", 'K', OPT_BOOLEAN, "backupconverted", -1 },
{ "backups", 0, OPT_BOOLEAN, "backups", -1 },
{ "base", 'B', OPT_VALUE, "base", -1 },
{ "bind-address", 0, OPT_VALUE, "bindaddress", -1 },
+ { "body-data", 0, OPT_VALUE, "bodydata", -1 },
+ { "body-file", 0, OPT_VALUE, "bodyfile", -1 },
{ IF_SSL ("ca-certificate"), 0, OPT_VALUE, "cacertificate", -1 },
{ IF_SSL ("ca-directory"), 0, OPT_VALUE, "cadirectory", -1 },
{ "cache", 0, OPT_BOOLEAN, "cache", -1 },
{ IF_SSL ("certificate-type"), 0, OPT_VALUE, "certificatetype", -1 },
{ IF_SSL ("check-certificate"), 0, OPT_BOOLEAN, "checkcertificate", -1 },
{ "clobber", 0, OPT__CLOBBER, NULL, optional_argument },
+ { "config", 0, OPT_VALUE, "chooseconfig", -1 },
{ "connect-timeout", 0, OPT_VALUE, "connecttimeout", -1 },
{ "continue", 'c', OPT_BOOLEAN, "continue", -1 },
{ "convert-links", 'k', OPT_BOOLEAN, "convertlinks", -1 },
{ "content-disposition", 0, OPT_BOOLEAN, "contentdisposition", -1 },
+ { "content-on-error", 0, OPT_BOOLEAN, "contentonerror", -1 },
{ "cookies", 0, OPT_BOOLEAN, "cookies", -1 },
{ "cut-dirs", 0, OPT_VALUE, "cutdirs", -1 },
{ WHEN_DEBUG ("debug"), 'd', OPT_BOOLEAN, "debug", -1 },
+ { "default-page", 0, OPT_VALUE, "defaultpage", -1 },
{ "delete-after", 0, OPT_BOOLEAN, "deleteafter", -1 },
{ "directories", 0, OPT_BOOLEAN, "dirstruct", -1 },
{ "directory-prefix", 'P', OPT_VALUE, "dirprefix", -1 },
{ "dns-timeout", 0, OPT_VALUE, "dnstimeout", -1 },
{ "domains", 'D', OPT_VALUE, "domains", -1 },
{ "dont-remove-listing", 0, OPT__DONT_REMOVE_LISTING, NULL, no_argument },
- { "dot-style", 0, OPT_VALUE, "dotstyle", -1 },
+ { "dot-style", 0, OPT_VALUE, "dotstyle", -1 }, /* deprecated */
{ "egd-file", 0, OPT_VALUE, "egdfile", -1 },
{ "exclude-directories", 'X', OPT_VALUE, "excludedirectories", -1 },
{ "exclude-domains", 0, OPT_VALUE, "excludedomains", -1 },
{ "force-directories", 'x', OPT_BOOLEAN, "dirstruct", -1 },
{ "force-html", 'F', OPT_BOOLEAN, "forcehtml", -1 },
{ "ftp-password", 0, OPT_VALUE, "ftppassword", -1 },
+#ifdef __VMS
+ { "ftp-stmlf", 0, OPT_BOOLEAN, "ftpstmlf", -1 },
+#endif /* def __VMS */
{ "ftp-user", 0, OPT_VALUE, "ftpuser", -1 },
{ "glob", 0, OPT_BOOLEAN, "glob", -1 },
{ "header", 0, OPT_VALUE, "header", -1 },
{ "help", 'h', OPT_FUNCALL, (void *)print_help, no_argument },
{ "host-directories", 0, OPT_BOOLEAN, "addhostdir", -1 },
- { "html-extension", 'E', OPT_BOOLEAN, "htmlextension", -1 },
+ { "html-extension", 'E', OPT_BOOLEAN, "adjustextension", -1 }, /* deprecated */
{ "htmlify", 0, OPT_BOOLEAN, "htmlify", -1 },
{ "http-keep-alive", 0, OPT_BOOLEAN, "httpkeepalive", -1 },
{ "http-passwd", 0, OPT_VALUE, "httppassword", -1 }, /* deprecated */
{ "inet6-only", '6', OPT_BOOLEAN, "inet6only", -1 },
#endif
{ "input-file", 'i', OPT_VALUE, "input", -1 },
+ { "iri", 0, OPT_BOOLEAN, "iri", -1 },
{ "keep-session-cookies", 0, OPT_BOOLEAN, "keepsessioncookies", -1 },
{ "level", 'l', OPT_VALUE, "reclevel", -1 },
{ "limit-rate", 0, OPT_VALUE, "limitrate", -1 },
{ "load-cookies", 0, OPT_VALUE, "loadcookies", -1 },
+ { "local-encoding", 0, OPT_VALUE, "localencoding", -1 },
+ { "max-redirect", 0, OPT_VALUE, "maxredirect", -1 },
+ { "method", 0, OPT_VALUE, "method", -1 },
{ "mirror", 'm', OPT_BOOLEAN, "mirror", -1 },
{ "no", 'n', OPT__NO, NULL, required_argument },
{ "no-clobber", 0, OPT_BOOLEAN, "noclobber", -1 },
{ "read-timeout", 0, OPT_VALUE, "readtimeout", -1 },
{ "recursive", 'r', OPT_BOOLEAN, "recursive", -1 },
{ "referer", 0, OPT_VALUE, "referer", -1 },
+ { "regex-type", 0, OPT_VALUE, "regextype", -1 },
{ "reject", 'R', OPT_VALUE, "reject", -1 },
+ { "reject-regex", 0, OPT_VALUE, "rejectregex", -1 },
{ "relative", 'L', OPT_BOOLEAN, "relativeonly", -1 },
+ { "remote-encoding", 0, OPT_VALUE, "remoteencoding", -1 },
{ "remove-listing", 0, OPT_BOOLEAN, "removelisting", -1 },
+ { "report-speed", 0, OPT_BOOLEAN, "reportspeed", -1 },
{ "restrict-file-names", 0, OPT_BOOLEAN, "restrictfilenames", -1 },
{ "retr-symlinks", 0, OPT_BOOLEAN, "retrsymlinks", -1 },
{ "retry-connrefused", 0, OPT_BOOLEAN, "retryconnrefused", -1 },
{ "timeout", 'T', OPT_VALUE, "timeout", -1 },
{ "timestamping", 'N', OPT_BOOLEAN, "timestamping", -1 },
{ "tries", 't', OPT_VALUE, "tries", -1 },
+ { "unlink", 0, OPT_BOOLEAN, "unlink", -1 },
+ { "trust-server-names", 0, OPT_BOOLEAN, "trustservernames", -1 },
+ { "use-server-timestamps", 0, OPT_BOOLEAN, "useservertimestamps", -1 },
{ "user", 0, OPT_VALUE, "user", -1 },
{ "user-agent", 'U', OPT_VALUE, "useragent", -1 },
{ "verbose", 'v', OPT_BOOLEAN, "verbose", -1 },
{ "version", 'V', OPT_FUNCALL, (void *) print_version, no_argument },
{ "wait", 'w', OPT_VALUE, "wait", -1 },
{ "waitretry", 0, OPT_VALUE, "waitretry", -1 },
+ { "warc-cdx", 0, OPT_BOOLEAN, "warccdx", -1 },
+#ifdef HAVE_LIBZ
+ { "warc-compression", 0, OPT_BOOLEAN, "warccompression", -1 },
+#endif
+ { "warc-dedup", 0, OPT_VALUE, "warccdxdedup", -1 },
+ { "warc-digests", 0, OPT_BOOLEAN, "warcdigests", -1 },
+ { "warc-file", 0, OPT_VALUE, "warcfile", -1 },
+ { "warc-header", 0, OPT_VALUE, "warcheader", -1 },
+ { "warc-keep-log", 0, OPT_BOOLEAN, "warckeeplog", -1 },
+ { "warc-max-size", 0, OPT_VALUE, "warcmaxsize", -1 },
+ { "warc-tempdir", 0, OPT_VALUE, "warctempdir", -1 },
+#ifdef USE_WATT32
+ { "wdebug", 0, OPT_BOOLEAN, "wdebug", -1 },
+#endif
};
#undef WHEN_DEBUG
init_switches (void)
{
char *p = short_options;
- int i, o = 0;
+ size_t i, o = 0;
for (i = 0; i < countof (option_data); i++)
{
struct cmdline_option *opt = &option_data[i];
}
/* Print the usage message. */
-static void
-print_usage (void)
+static int
+print_usage (int error)
{
- printf (_("Usage: %s [OPTION]... [URL]...\n"), exec_name);
+ return fprintf (error ? stderr : stdout,
+ _("Usage: %s [OPTION]... [URL]...\n"), exec_name);
}
/* Print the help message, describing all the available options. If
#ifdef ENABLE_DEBUG
N_("\
-d, --debug print lots of debugging information.\n"),
+#endif
+#ifdef USE_WATT32
+ N_("\
+ --wdebug print Watt-32 debug output.\n"),
#endif
N_("\
-q, --quiet quiet (no output).\n"),
N_("\
-nv, --no-verbose turn off verboseness, without being quiet.\n"),
N_("\
- -i, --input-file=FILE download URLs found in FILE.\n"),
+ --report-speed=TYPE Output bandwidth as TYPE. TYPE can be bits.\n"),
+ N_("\
+ -i, --input-file=FILE download URLs found in local or external FILE.\n"),
N_("\
-F, --force-html treat input file as HTML.\n"),
N_("\
- -B, --base=URL prepends URL to relative links in -F -i file.\n"),
+ -B, --base=URL resolves HTML input-file links (-i -F)\n\
+ relative to URL.\n"),
+ N_("\
+ --config=FILE Specify config file to use.\n"),
"\n",
N_("\
-O, --output-document=FILE write documents to FILE.\n"),
N_("\
-nc, --no-clobber skip downloads that would download to\n\
- existing files.\n"),
+ existing files (overwriting them).\n"),
N_("\
-c, --continue resume getting a partially-downloaded file.\n"),
N_("\
-N, --timestamping don't re-retrieve files unless newer than\n\
local.\n"),
N_("\
+ --no-use-server-timestamps don't set the local file's timestamp by\n\
+ the one on the server.\n"),
+ N_("\
-S, --server-response print server response.\n"),
N_("\
--spider don't download anything.\n"),
N_("\
--waitretry=SECONDS wait 1..SECONDS between retries of a retrieval.\n"),
N_("\
- --random-wait wait from 0...2*WAIT secs between retrievals.\n"),
- N_("\
- -Y, --proxy explicitly turn on proxy.\n"),
+ --random-wait wait from 0.5*WAIT...1.5*WAIT secs between retrievals.\n"),
N_("\
--no-proxy explicitly turn off proxy.\n"),
N_("\
--user=USER set both ftp and http user to USER.\n"),
N_("\
--password=PASS set both ftp and http password to PASS.\n"),
+ N_("\
+ --ask-password prompt for passwords.\n"),
+ N_("\
+ --no-iri turn off IRI support.\n"),
+ N_("\
+ --local-encoding=ENC use ENC as the local encoding for IRIs.\n"),
+ N_("\
+ --remote-encoding=ENC use ENC as the default remote encoding.\n"),
+ N_("\
+ --unlink remove file before clobber.\n"),
"\n",
N_("\
--http-password=PASS set http password to PASS.\n"),
N_("\
--no-cache disallow server-cached data.\n"),
+ N_ ("\
+ --default-page=NAME Change the default page name (normally\n\
+ this is `index.html'.).\n"),
N_("\
- -E, --html-extension save HTML documents with `.html' extension.\n"),
+ -E, --adjust-extension save HTML/CSS documents with proper extensions.\n"),
N_("\
--ignore-length ignore `Content-Length' header field.\n"),
N_("\
--header=STRING insert STRING among the headers.\n"),
+ N_("\
+ --max-redirect maximum redirections allowed per page.\n"),
N_("\
--proxy-user=USER set USER as proxy username.\n"),
N_("\
N_("\
--post-file=FILE use the POST method; send contents of FILE.\n"),
N_("\
- --no-content-disposition don't honor Content-Disposition header.\n"),
+ --method=HTTPMethod use method \"HTTPMethod\" in the header.\n"),
+ N_("\
+ --body-data=STRING Send STRING as data. --method MUST be set.\n"),
+ N_("\
+ --body-file=FILE Send contents of FILE. --method MUST be set.\n"),
+ N_("\
+ --content-disposition honor the Content-Disposition header when\n\
+ choosing local file names (EXPERIMENTAL).\n"),
+ N_("\
+ --content-on-error output the received content on server errors.\n"),
+ N_("\
+ --auth-no-challenge send Basic HTTP authentication information\n\
+ without first waiting for the server's\n\
+ challenge.\n"),
"\n",
#ifdef HAVE_SSL
N_("\
FTP options:\n"),
+#ifdef __VMS
+ N_("\
+ --ftp-stmlf Use Stream_LF format for all binary FTP files.\n"),
+#endif /* def __VMS */
N_("\
--ftp-user=USER set ftp user to USER.\n"),
N_("\
--no-glob turn off FTP file name globbing.\n"),
N_("\
--no-passive-ftp disable the \"passive\" transfer mode.\n"),
+ N_("\
+ --preserve-permissions preserve remote file permissions.\n"),
N_("\
--retr-symlinks when recursing, get linked-to files (not dir).\n"),
+ "\n",
+
N_("\
- --preserve-permissions preserve remote file permissions.\n"),
+WARC options:\n"),
+ N_("\
+ --warc-file=FILENAME save request/response data to a .warc.gz file.\n"),
+ N_("\
+ --warc-header=STRING insert STRING into the warcinfo record.\n"),
+ N_("\
+ --warc-max-size=NUMBER set maximum size of WARC files to NUMBER.\n"),
+ N_("\
+ --warc-cdx write CDX index files.\n"),
+ N_("\
+ --warc-dedup=FILENAME do not store records listed in this CDX file.\n"),
+#ifdef HAVE_LIBZ
+ N_("\
+ --no-warc-compression do not compress WARC files with GZIP.\n"),
+#endif
+ N_("\
+ --no-warc-digests do not calculate SHA1 digests.\n"),
+ N_("\
+ --no-warc-keep-log do not store the log file in a WARC record.\n"),
+ N_("\
+ --warc-tempdir=DIRECTORY location for temporary files created by the\n\
+ WARC writer.\n"),
"\n",
N_("\
N_("\
--delete-after delete files locally after downloading them.\n"),
N_("\
- -k, --convert-links make links in downloaded HTML point to local files.\n"),
+ -k, --convert-links make links in downloaded HTML or CSS point to\n\
+ local files.\n"),
+#ifdef __VMS
+ N_("\
+ -K, --backup-converted before converting file X, back up as X_orig.\n"),
+#else /* def __VMS */
N_("\
-K, --backup-converted before converting file X, back up as X.orig.\n"),
+#endif /* def __VMS [else] */
N_("\
-m, --mirror shortcut for -N -r -l inf --no-remove-listing.\n"),
N_("\
-A, --accept=LIST comma-separated list of accepted extensions.\n"),
N_("\
-R, --reject=LIST comma-separated list of rejected extensions.\n"),
+ N_("\
+ --accept-regex=REGEX regex matching accepted URLs.\n"),
+ N_("\
+ --reject-regex=REGEX regex matching rejected URLs.\n"),
+#ifdef HAVE_LIBPCRE
+ N_("\
+ --regex-type=TYPE regex type (posix|pcre).\n"),
+#else
+ N_("\
+ --regex-type=TYPE regex type (posix).\n"),
+#endif
N_("\
-D, --domains=LIST comma-separated list of accepted domains.\n"),
N_("\
N_("\
-I, --include-directories=LIST list of allowed directories.\n"),
N_("\
+ --trust-server-names use the name specified by the redirection\n\
+ url last component.\n"),
+ N_("\
-X, --exclude-directories=LIST list of excluded directories.\n"),
N_("\
-np, --no-parent don't ascend to the parent directory.\n"),
"\n",
-
N_("Mail bug reports and suggestions to <bug-wget@gnu.org>.\n")
};
- int i;
+ size_t i;
- printf (_("GNU Wget %s, a non-interactive network retriever.\n"),
- version_string);
- print_usage ();
+ if (printf (_("GNU Wget %s, a non-interactive network retriever.\n"),
+ version_string) < 0)
+ exit (3);
+ if (print_usage (0) < 0)
+ exit (3);
for (i = 0; i < countof (help); i++)
- fputs (_(help[i]), stdout);
+ if (fputs (_(help[i]), stdout) < 0)
+ exit (3);
exit (0);
}
return buf;
}
+static char *
+prompt_for_password (void)
+{
+ if (opt.user)
+ fprintf (stderr, _("Password for user %s: "), quote (opt.user));
+ else
+ fprintf (stderr, _("Password: "));
+ return getpass("");
+}
+
+/* Function that prints the line argument while limiting it
+ to at most line_length. prefix is printed on the first line
+ and an appropriate number of spaces are added on subsequent
+ lines.*/
+static int
+format_and_print_line (const char *prefix, const char *line,
+ int line_length)
+{
+ int remaining_chars;
+ char *line_dup, *token;
+
+ assert (prefix != NULL);
+ assert (line != NULL);
+
+ line_dup = xstrdup (line);
+
+ if (line_length <= 0)
+ line_length = MAX_CHARS_PER_LINE - TABULATION;
+
+ if (printf ("%s", prefix) < 0)
+ return -1;
+ remaining_chars = line_length;
+ /* We break on spaces. */
+ token = strtok (line_dup, " ");
+ while (token != NULL)
+ {
+ /* If however a token is much larger than the maximum
+ line length, all bets are off and we simply print the
+ token on the next line. */
+ if (remaining_chars <= strlen (token))
+ {
+ if (printf ("\n%*c", TABULATION, ' ') < 0)
+ return -1;
+ remaining_chars = line_length - TABULATION;
+ }
+ if (printf ("%s ", token) < 0)
+ return -1;
+ remaining_chars -= strlen (token) + 1; /* account for " " */
+ token = strtok (NULL, " ");
+ }
+
+ if (printf ("\n") < 0)
+ return -1;
+
+ xfree (line_dup);
+ return 0;
+}
+
static void
print_version (void)
{
- printf ("GNU Wget %s\n\n", version_string);
- fputs (_("\
-Copyright (C) 2006 Free Software Foundation, Inc.\n"), stdout);
- fputs (_("\
-This program is distributed in the hope that it will be useful,\n\
-but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
-GNU General Public License for more details.\n"), stdout);
- fputs (_("\nOriginally written by Hrvoje Niksic <hniksic@xemacs.org>.\n"),
- stdout);
- fputs (_("\nCurrently maintained by Mauro Tortonesi <mauro@ferrara.linux.it>.\n"),
- stdout);
+ const char *wgetrc_title = _("Wgetrc: ");
+ const char *locale_title = _("Locale: ");
+ const char *compile_title = _("Compile: ");
+ const char *link_title = _("Link: ");
+ char *env_wgetrc, *user_wgetrc;
+ int i;
+
+ if (printf (_("GNU Wget %s built on %s.\n\n"), version_string, OS_TYPE) < 0)
+ exit (3);
+
+ for (i = 0; compiled_features[i] != NULL; )
+ {
+ int line_length = MAX_CHARS_PER_LINE;
+ while ((line_length > 0) && (compiled_features[i] != NULL))
+ {
+ if (printf ("%s ", compiled_features[i]) < 0)
+ exit (3);
+ line_length -= strlen (compiled_features[i]) + 2;
+ i++;
+ }
+ if (printf ("\n") < 0)
+ exit (3);
+ }
+ if (printf ("\n") < 0)
+ exit (3);
+
+ /* Handle the case when $WGETRC is unset and $HOME/.wgetrc is
+ absent. */
+ if (printf ("%s\n", wgetrc_title) < 0)
+ exit (3);
+
+ env_wgetrc = wgetrc_env_file_name ();
+ if (env_wgetrc && *env_wgetrc)
+ {
+ if (printf (_(" %s (env)\n"), env_wgetrc) < 0)
+ exit (3);
+ xfree (env_wgetrc);
+ }
+ user_wgetrc = wgetrc_user_file_name ();
+ if (user_wgetrc)
+ {
+ if (printf (_(" %s (user)\n"), user_wgetrc) < 0)
+ exit (3);
+ xfree (user_wgetrc);
+ }
+#ifdef SYSTEM_WGETRC
+ if (printf (_(" %s (system)\n"), SYSTEM_WGETRC) < 0)
+ exit (3);
+#endif
+
+#ifdef ENABLE_NLS
+ if (format_and_print_line (locale_title,
+ LOCALEDIR,
+ MAX_CHARS_PER_LINE) < 0)
+ exit (3);
+#endif /* def ENABLE_NLS */
+
+ if (compilation_string != NULL)
+ if (format_and_print_line (compile_title,
+ compilation_string,
+ MAX_CHARS_PER_LINE) < 0)
+ exit (3);
+
+ if (link_string != NULL)
+ if (format_and_print_line (link_title,
+ link_string,
+ MAX_CHARS_PER_LINE) < 0)
+ exit (3);
+
+ if (printf ("\n") < 0)
+ exit (3);
+
+ /* TRANSLATORS: When available, an actual copyright character
+ (circle-c) should be used in preference to "(C)". */
+ if (fputs (_("\
+Copyright (C) 2011 Free Software Foundation, Inc.\n"), stdout) < 0)
+ exit (3);
+ if (fputs (_("\
+License GPLv3+: GNU GPL version 3 or later\n\
+<http://www.gnu.org/licenses/gpl.html>.\n\
+This is free software: you are free to change and redistribute it.\n\
+There is NO WARRANTY, to the extent permitted by law.\n"), stdout) < 0)
+ exit (3);
+ /* TRANSLATORS: When available, please use the proper diacritics for
+ names such as this one. See en_US.po for reference. */
+ if (fputs (_("\nOriginally written by Hrvoje Niksic <hniksic@xemacs.org>.\n"),
+ stdout) < 0)
+ exit (3);
+ if (fputs (_("Please send bug reports and questions to <bug-wget@gnu.org>.\n"),
+ stdout) < 0)
+ exit (3);
+
exit (0);
}
-\f
-#ifndef TESTING
+
+char *program_name; /* Needed by lib/error.c. */
+char *program_argstring; /* Needed by wget_warc.c. */
+
int
-main (int argc, char *const *argv)
+main (int argc, char **argv)
{
char **url, **t;
int i, ret, longindex;
- int nurl, status;
+ int nurl;
bool append_to_log = false;
+ total_downloaded_bytes = 0;
+
+ program_name = argv[0];
+
+ struct ptimer *timer = ptimer_new ();
+ double start_time = ptimer_measure (timer);
+
i18n_initialize ();
/* Construct the name of the executable, without the directory part. */
+#ifdef __VMS
+ /* On VMS, lose the "dev:[dir]" prefix and the ".EXE;nnn" suffix. */
+ exec_name = vms_basename (argv[0]);
+#else /* def __VMS */
exec_name = strrchr (argv[0], PATH_SEPARATOR);
if (!exec_name)
exec_name = argv[0];
else
++exec_name;
+#endif /* def __VMS [else] */
#ifdef WINDOWS
/* Drop extension (typically .EXE) from executable filename. */
- windows_main (&argc, (char **) argv, (char **) &exec_name);
+ windows_main ((char **) &exec_name);
#endif
- /* Set option defaults; read the system wgetrc and ~/.wgetrc. */
- initialize ();
+ /* Construct the arguments string. */
+ int argstring_length = 1;
+ for (i = 1; i < argc; i++)
+ argstring_length += strlen (argv[i]) + 2 + 1;
+ char *p = program_argstring = malloc (argstring_length * sizeof (char));
+ if (p == NULL)
+ {
+ fprintf (stderr, _("Memory allocation problem\n"));
+ exit (2);
+ }
+ for (i = 1; i < argc; i++)
+ {
+ *p++ = '"';
+ int arglen = strlen (argv[i]);
+ memcpy (p, argv[i], arglen);
+ p += arglen;
+ *p++ = '"';
+ *p++ = ' ';
+ }
+ *p = '\0';
+
+ /* Load the hard-coded defaults. */
+ defaults ();
init_switches ();
+
+ /* This separate getopt_long is needed to find the user config file
+ option ("--config") and parse it before the other user options. */
+ longindex = -1;
+ int retconf;
+ bool use_userconfig = false;
+
+ while ((retconf = getopt_long (argc, argv,
+ short_options, long_options, &longindex)) != -1)
+ {
+ int confval;
+ struct cmdline_option *config_opt;
+
+ /* There is no short option for "--config". */
+ if (longindex >= 0)
+ {
+ confval = long_options[longindex].val;
+ config_opt = &option_data[confval & ~BOOLEAN_NEG_MARKER];
+ if (strcmp (config_opt->long_name, "config") == 0)
+ {
+ bool userrc_ret = true;
+ userrc_ret &= run_wgetrc (optarg);
+ use_userconfig = true;
+ if (userrc_ret)
+ break;
+ else
+ {
+ fprintf (stderr, _("Exiting due to error in %s\n"), optarg);
+ exit (2);
+ }
+ }
+ }
+ }
+
+ /* If the user did not specify a config, read the system wgetrc and ~/.wgetrc. */
+ if (use_userconfig == false)
+ initialize ();
+
+ opterr = 0;
+ optind = 0;
+
longindex = -1;
while ((ret = getopt_long (argc, argv,
short_options, long_options, &longindex)) != -1)
{
if (ret == '?')
{
- print_usage ();
- printf ("\n");
- printf (_("Try `%s --help' for more options.\n"), exec_name);
+ print_usage (1);
+ fprintf (stderr, "\n");
+ fprintf (stderr, _("Try `%s --help' for more options.\n"),
+ exec_name);
exit (2);
}
/* Find the short option character in the mapping. */
short options for convenience and backward
compatibility. */
char *p;
- for (p = optarg; *p; p++)
+ for (p = optarg; p && *p; p++)
switch (*p)
{
case 'v':
setoptval ("noparent", "1", opt->long_name);
break;
default:
- printf (_("%s: illegal option -- `-n%c'\n"), exec_name, *p);
- print_usage ();
- printf ("\n");
- printf (_("Try `%s --help' for more options.\n"), exec_name);
+ fprintf (stderr, _("%s: illegal option -- `-n%c'\n"),
+ exec_name, *p);
+ print_usage (1);
+ fprintf (stderr, "\n");
+ fprintf (stderr, _("Try `%s --help' for more options.\n"),
+ exec_name);
exit (1);
}
break;
before passing the value to setoptval. */
bool flag = true;
if (optarg)
- flag = (*optarg == '1' || TOLOWER (*optarg) == 'y'
- || (TOLOWER (optarg[0]) == 'o'
- && TOLOWER (optarg[1]) == 'n'));
+ flag = (*optarg == '1' || c_tolower (*optarg) == 'y'
+ || (c_tolower (optarg[0]) == 'o'
+ && c_tolower (optarg[1]) == 'n'));
setoptval (opt->type == OPT__PARENT ? "noparent" : "noclobber",
flag ? "0" : "1", opt->long_name);
break;
/* All user options have now been processed, so it's now safe to do
interoption dependency checks. */
+ if (opt.noclobber && opt.convert_links)
+ {
+ fprintf (stderr,
+ _("Both --no-clobber and --convert-links were specified,"
+ " only --convert-links will be used.\n"));
+ opt.noclobber = false;
+ }
+
if (opt.reclevel == 0)
- opt.reclevel = INFINITE_RECURSION; /* see recur.h for commentary on this */
+ opt.reclevel = INFINITE_RECURSION; /* see recur.h for commentary */
+
+ if (opt.spider || opt.delete_after)
+ opt.no_dirstruct = true;
if (opt.page_requisites && !opt.recursive)
{
if (opt.verbose == -1)
opt.verbose = !opt.quiet;
+
/* Sanity checks. */
if (opt.verbose && opt.quiet)
{
- printf (_("Can't be verbose and quiet at the same time.\n"));
- print_usage ();
+ fprintf (stderr, _("Can't be verbose and quiet at the same time.\n"));
+ print_usage (1);
exit (1);
}
if (opt.timestamping && opt.noclobber)
{
- printf (_("\
+ fprintf (stderr, _("\
Can't timestamp and not clobber old files at the same time.\n"));
- print_usage ();
+ print_usage (1);
exit (1);
}
#ifdef ENABLE_IPV6
if (opt.ipv4_only && opt.ipv6_only)
{
- printf (_("Cannot specify both --inet4-only and --inet6-only.\n"));
- print_usage ();
+ fprintf (stderr,
+ _("Cannot specify both --inet4-only and --inet6-only.\n"));
+ print_usage (1);
exit (1);
}
#endif
- if (opt.output_document
- && (opt.page_requisites
- || opt.recursive
- || opt.timestamping))
+ if (opt.output_document)
{
- printf (_("Cannot specify -r, -p or -N if -O is given.\n"));
- print_usage ();
+ if (opt.convert_links
+ && (nurl > 1 || opt.page_requisites || opt.recursive))
+ {
+ fputs (_("\
+Cannot specify both -k and -O if multiple URLs are given, or in combination\n\
+with -p or -r. See the manual for details.\n\n"), stderr);
+ print_usage (1);
exit (1);
+ }
+ if (opt.page_requisites
+ || opt.recursive)
+ {
+ logprintf (LOG_NOTQUIET, "%s", _("\
+WARNING: combining -O with -r or -p will mean that all downloaded content\n\
+will be placed in the single file you specified.\n\n"));
+ }
+ if (opt.timestamping)
+ {
+ logprintf (LOG_NOTQUIET, "%s", _("\
+WARNING: timestamping does nothing in combination with -O. See the manual\n\
+for details.\n\n"));
+ opt.timestamping = false;
+ }
+ if (opt.noclobber && file_exists_p(opt.output_document))
+ {
+ /* Check if output file exists; if it does, exit. */
+ logprintf (LOG_VERBOSE,
+ _("File `%s' already there; not retrieving.\n"),
+ opt.output_document);
+ exit(1);
+ }
}
- if (opt.output_document
- && opt.convert_links
- && nurl > 1)
+
+ if (opt.warc_filename != 0)
{
- printf (_("Cannot specify both -k and -O if multiple URLs are given.\n"));
- print_usage ();
+ if (opt.noclobber)
+ {
+ fprintf (stderr,
+ _("WARC output does not work with --no-clobber, "
+ "--no-clobber will be disabled.\n"));
+ opt.noclobber = false;
+ }
+ if (opt.timestamping)
+ {
+ fprintf (stderr,
+ _("WARC output does not work with timestamping, "
+ "timestamping will be disabled.\n"));
+ opt.timestamping = false;
+ }
+ if (opt.spider)
+ {
+ fprintf (stderr,
+ _("WARC output does not work with --spider.\n"));
exit (1);
+ }
+ if (opt.always_rest)
+ {
+ fprintf (stderr,
+ _("WARC output does not work with --continue, "
+ "--continue will be disabled.\n"));
+ opt.always_rest = false;
+ }
+ if (opt.warc_cdx_dedup_filename != 0 && !opt.warc_digests_enabled)
+ {
+ fprintf (stderr,
+ _("Digests are disabled; WARC deduplication will "
+ "not find duplicate records.\n"));
+ }
+ if (opt.warc_keep_log)
+ {
+ opt.progress_type = xstrdup ("dot");
+ }
+ }
+
+ if (opt.ask_passwd && opt.passwd)
+ {
+ fprintf (stderr,
+ _("Cannot specify both --ask-password and --password.\n"));
+ print_usage (1);
+ exit (1);
}
if (!nurl && !opt.input_filename)
{
/* No URL specified. */
- printf (_("%s: missing URL\n"), exec_name);
- print_usage ();
- printf ("\n");
+ fprintf (stderr, _("%s: missing URL\n"), exec_name);
+ print_usage (1);
+ fprintf (stderr, "\n");
/* #### Something nicer should be printed here -- similar to the
pre-1.5 `--help' page. */
- printf (_("Try `%s --help' for more options.\n"), exec_name);
+ fprintf (stderr, _("Try `%s --help' for more options.\n"), exec_name);
exit (1);
}
+ /* Compile the regular expressions. */
+ switch (opt.regex_type)
+ {
+#ifdef HAVE_LIBPCRE
+ case regex_type_pcre:
+ opt.regex_compile_fun = compile_pcre_regex;
+ opt.regex_match_fun = match_pcre_regex;
+ break;
+#endif
+
+ case regex_type_posix:
+ default:
+ opt.regex_compile_fun = compile_posix_regex;
+ opt.regex_match_fun = match_posix_regex;
+ break;
+ }
+ if (opt.acceptregex_s)
+ {
+ opt.acceptregex = opt.regex_compile_fun (opt.acceptregex_s);
+ if (!opt.acceptregex)
+ exit (1);
+ }
+ if (opt.rejectregex_s)
+ {
+ opt.rejectregex = opt.regex_compile_fun (opt.rejectregex_s);
+ if (!opt.rejectregex)
+ exit (1);
+ }
+ if (opt.post_data || opt.post_file_name)
+ {
+ if (opt.post_data && opt.post_file_name)
+ {
+ fprintf (stderr, _("You cannot specify both --post-data and --post-file.\n"));
+ exit (1);
+ }
+ else if (opt.method)
+ {
+ fprintf (stderr, _("You cannot use --post-data or --post-file along with --method. "
+ "--method expects data through --body-data and --body-file options"));
+ exit (1);
+ }
+ }
+ if (opt.body_data || opt.body_file)
+ {
+ if (!opt.method)
+ {
+ fprintf (stderr, _("You must specify a method through --method=HTTPMethod "
+ "to use with --body-data or --body-file.\n"));
+ exit (1);
+ }
+ else if (opt.body_data && opt.body_file)
+ {
+ fprintf (stderr, _("You cannot specify both --body-data and --body-file.\n"));
+ exit (1);
+ }
+ }
+
+ /* Set various options as required for opt.method. */
+
+ /* When user specifies HEAD as the method, we do not wish to download any
+ files. Hence, set wget to run in spider mode. */
+ if (opt.method && strcasecmp (opt.method, "HEAD") == 0)
+ setoptval ("spider", "1", "spider");
+
+ /* Convert post_data to body-data and post_file_name to body-file options.
+ This is required so as to remove redundant code later on in gethttp().
+ The --post-data and --post-file options may also be removed in
+ the future hence it makes sense to convert them to aliases for
+ the more generic --method options.
+ This MUST occur only after the sanity checks so as to prevent the
+ user from setting both post and body options simultaneously.
+ */
+ if (opt.post_data || opt.post_file_name)
+ {
+ setoptval ("method", "POST", "method");
+ if (opt.post_data)
+ {
+ setoptval ("bodydata", opt.post_data, "body-data");
+ opt.post_data = NULL;
+ }
+ else
+ {
+ setoptval ("bodyfile", opt.post_file_name, "body-file");
+ opt.post_file_name = NULL;
+ }
+ }
+
+#ifdef ENABLE_IRI
+ if (opt.enable_iri)
+ {
+ if (opt.locale && !check_encoding_name (opt.locale))
+ opt.locale = NULL;
+
+ if (!opt.locale)
+ opt.locale = find_locale ();
+
+ if (opt.encoding_remote && !check_encoding_name (opt.encoding_remote))
+ opt.encoding_remote = NULL;
+ }
+#else
+ memset (&dummy_iri, 0, sizeof (dummy_iri));
+ if (opt.enable_iri || opt.locale || opt.encoding_remote)
+ {
+ /* sXXXav : be more specific... */
+ fprintf (stderr, _("This version does not have support for IRIs\n"));
+ exit(1);
+ }
+#endif
+
+ if (opt.ask_passwd)
+ {
+ opt.passwd = prompt_for_password ();
+
+ if (opt.passwd == NULL || opt.passwd[0] == '\0')
+ exit (1);
+ }
+
+#ifdef USE_WATT32
+ if (opt.wdebug)
+ dbug_init();
+ sock_init();
+#else
if (opt.background)
fork_to_background ();
+#endif
/* Initialize progress. Have to do this after the options are
processed so we know where the log file is. */
/* Fill in the arguments. */
url = alloca_array (char *, nurl + 1);
+ if (url == NULL)
+ {
+ fprintf (stderr, _("Memory allocation problem\n"));
+ exit (2);
+ }
for (i = 0; i < nurl; i++, optind++)
{
char *rewritten = rewrite_shorthand_url (argv[optind]);
/* Initialize logging. */
log_init (opt.lfilename, append_to_log);
- DEBUGP (("DEBUG output created by Wget %s on %s.\n\n", version_string,
- OS_TYPE));
+ /* Open WARC file. */
+ if (opt.warc_filename != 0)
+ warc_init ();
+
+ DEBUGP (("DEBUG output created by Wget %s on %s.\n\n",
+ version_string, OS_TYPE));
/* Open the output filename if necessary. */
+
+/* 2005-04-17 SMS.
+ Note that having the output_stream ("-O") file opened here for an FTP
+ URL rather than in getftp() (ftp.c) (and the http equivalent) rather
+ limits the ability in VMS to open the file differently for ASCII
+ versus binary FTP there. (Of course, doing it here allows a open
+ failure to be detected immediately, without first connecting to the
+ server.)
+*/
if (opt.output_document)
{
if (HYPHENP (opt.output_document))
- output_stream = stdout;
+ {
+#ifdef WINDOWS
+ _setmode (_fileno (stdout), _O_BINARY);
+#endif
+ output_stream = stdout;
+ }
else
{
struct_fstat st;
+
+#ifdef __VMS
+/* Common fopen() optional arguments:
+ sequential access only, access callback function.
+*/
+# define FOPEN_OPT_ARGS , "fop=sqo", "acc", acc_cb, &open_id
+ int open_id = 7;
+#else /* def __VMS */
+# define FOPEN_OPT_ARGS
+#endif /* def __VMS [else] */
+
output_stream = fopen (opt.output_document,
- opt.always_rest ? "ab" : "wb");
+ opt.always_rest ? "ab" : "wb"
+ FOPEN_OPT_ARGS);
if (output_stream == NULL)
{
perror (opt.output_document);
if (fstat (fileno (output_stream), &st) == 0 && S_ISREG (st.st_mode))
output_stream_regular = true;
}
+ if (!output_stream_regular && opt.convert_links)
+ {
+ fprintf (stderr, _("-k can be used together with -O only if \
+outputting to a regular file.\n"));
+ print_usage (1);
+ exit(1);
+ }
}
+#ifdef __VMS
+ /* Set global ODS5 flag according to the specified destination (if
+ any), otherwise according to the current default device.
+ */
+ if (output_stream == NULL)
+ set_ods5_dest( "SYS$DISK");
+ else if (output_stream != stdout)
+ set_ods5_dest( opt.output_document);
+#endif /* def __VMS */
+
#ifdef WINDOWS
ws_startup ();
#endif
signal (SIGWINCH, progress_handle_sigwinch);
#endif
- status = RETROK; /* initialize it, just-in-case */
/* Retrieve the URLs from argument list. */
for (t = url; *t; t++)
{
char *filename = NULL, *redirected_URL = NULL;
- int dt;
-
- if ((opt.recursive || opt.page_requisites)
- && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (*t)))
- {
- int old_follow_ftp = opt.follow_ftp;
+ int dt, url_err;
+ /* Need to do a new struct iri every time, because
+ * retrieve_url may modify it in some circumstances,
+ * currently. */
+ struct iri *iri = iri_new ();
+ struct url *url_parsed;
- /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
- if (url_scheme (*t) == SCHEME_FTP)
- opt.follow_ftp = 1;
-
- status = retrieve_tree (*t);
+ set_uri_encoding (iri, opt.locale, true);
+ url_parsed = url_parse (*t, &url_err, iri, true);
- opt.follow_ftp = old_follow_ftp;
+ if (!url_parsed)
+ {
+ char *error = url_error (*t, url_err);
+ logprintf (LOG_NOTQUIET, "%s: %s.\n",*t, error);
+ xfree (error);
+ inform_exit_status (URLERROR);
}
else
- status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt, opt.recursive);
-
- if (opt.delete_after && file_exists_p(filename))
{
- DEBUGP (("Removing file due to --delete-after in main():\n"));
- logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
- if (unlink (filename))
- logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
- }
+ if ((opt.recursive || opt.page_requisites)
+ && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (url_parsed)))
+ {
+ int old_follow_ftp = opt.follow_ftp;
+
+ /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
+ if (url_scheme (*t) == SCHEME_FTP)
+ opt.follow_ftp = 1;
+
+ retrieve_tree (url_parsed, NULL);
+
+ opt.follow_ftp = old_follow_ftp;
+ }
+ else
+ {
+ retrieve_url (url_parsed, *t, &filename, &redirected_URL, NULL,
+ &dt, opt.recursive, iri, true);
+ }
- xfree_null (redirected_URL);
- xfree_null (filename);
+ if (opt.delete_after && filename != NULL && file_exists_p (filename))
+ {
+ DEBUGP (("Removing file due to --delete-after in main():\n"));
+ logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
+ if (unlink (filename))
+ logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
+ }
+ xfree_null (redirected_URL);
+ xfree_null (filename);
+ url_free (url_parsed);
+ }
+ iri_free (iri);
}
/* And then from the input file, if any. */
if (opt.input_filename)
{
int count;
+ int status;
status = retrieve_from_file (opt.input_filename, opt.force_html, &count);
+ inform_exit_status (status);
if (!count)
logprintf (LOG_NOTQUIET, _("No URLs found in %s.\n"),
opt.input_filename);
/* Print broken links. */
if (opt.recursive && opt.spider)
- {
- print_broken_links();
- }
-
+ print_broken_links ();
+
/* Print the downloaded sum. */
if ((opt.recursive || opt.page_requisites
|| nurl > 1
&&
total_downloaded_bytes != 0)
{
+ double end_time = ptimer_measure (timer);
+ ptimer_destroy (timer);
+
+ char *wall_time = xstrdup (secs_to_human_time (end_time - start_time));
+ char *download_time = xstrdup (secs_to_human_time (total_download_time));
logprintf (LOG_NOTQUIET,
- _("FINISHED --%s--\nDownloaded: %d files, %s in %s (%s)\n"),
- time_str (time (NULL)),
- opt.numurls,
- human_readable (total_downloaded_bytes),
- secs_to_human_time (total_download_time),
- retr_rate (total_downloaded_bytes, total_download_time));
+ _("FINISHED --%s--\nTotal wall clock time: %s\n"
+ "Downloaded: %d files, %s in %s (%s)\n"),
+ datetime_str (time (NULL)),
+ wall_time,
+ numurls,
+ human_readable (total_downloaded_bytes),
+ download_time,
+ retr_rate (total_downloaded_bytes, total_download_time));
+ xfree (wall_time);
+ xfree (download_time);
+
/* Print quota warning, if exceeded. */
if (opt.quota && total_downloaded_bytes > opt.quota)
logprintf (LOG_NOTQUIET,
if (opt.convert_links && !opt.delete_after)
convert_all_links ();
- log_close ();
- for (i = 0; i < nurl; i++)
- xfree (url[i]);
cleanup ();
-#ifdef DEBUG_MALLOC
- print_malloc_debug_stats ();
-#endif
- if (status == RETROK)
- return 0;
- else
- return 1;
+ exit (get_exit_status ());
}
#endif /* TESTING */
\f