#include "spider.h"
#include "http.h" /* for save_cookies */
#include "ptimer.h"
+#include "warc.h"
#include <getopt.h>
#include <getpass.h>
{ "continue", 'c', OPT_BOOLEAN, "continue", -1 },
{ "convert-links", 'k', OPT_BOOLEAN, "convertlinks", -1 },
{ "content-disposition", 0, OPT_BOOLEAN, "contentdisposition", -1 },
+ { "content-on-error", 0, OPT_BOOLEAN, "contentonerror", -1 },
{ "cookies", 0, OPT_BOOLEAN, "cookies", -1 },
{ "cut-dirs", 0, OPT_VALUE, "cutdirs", -1 },
{ WHEN_DEBUG ("debug"), 'd', OPT_BOOLEAN, "debug", -1 },
{ "post-data", 0, OPT_VALUE, "postdata", -1 },
{ "post-file", 0, OPT_VALUE, "postfile", -1 },
{ "prefer-family", 0, OPT_VALUE, "preferfamily", -1 },
- { "preserve-permissions", 0, OPT_BOOLEAN, "preservepermissions", -1 }, /* deprecated */
+ { "preserve-permissions", 0, OPT_BOOLEAN, "preservepermissions", -1 },
{ IF_SSL ("private-key"), 0, OPT_VALUE, "privatekey", -1 },
{ IF_SSL ("private-key-type"), 0, OPT_VALUE, "privatekeytype", -1 },
{ "progress", 0, OPT_VALUE, "progress", -1 },
{ "version", 'V', OPT_FUNCALL, (void *) print_version, no_argument },
{ "wait", 'w', OPT_VALUE, "wait", -1 },
{ "waitretry", 0, OPT_VALUE, "waitretry", -1 },
+ { "warc-cdx", 0, OPT_BOOLEAN, "warccdx", -1 },
+#ifdef HAVE_LIBZ
+ { "warc-compression", 0, OPT_BOOLEAN, "warccompression", -1 },
+#endif
+ { "warc-dedup", 0, OPT_VALUE, "warccdxdedup", -1 },
+ { "warc-digests", 0, OPT_BOOLEAN, "warcdigests", -1 },
+ { "warc-file", 0, OPT_VALUE, "warcfile", -1 },
+ { "warc-header", 0, OPT_VALUE, "warcheader", -1 },
+ { "warc-keep-log", 0, OPT_BOOLEAN, "warckeeplog", -1 },
+ { "warc-max-size", 0, OPT_VALUE, "warcmaxsize", -1 },
+ { "warc-tempdir", 0, OPT_VALUE, "warctempdir", -1 },
#ifdef USE_WATT32
{ "wdebug", 0, OPT_BOOLEAN, "wdebug", -1 },
#endif
N_("\
--content-disposition honor the Content-Disposition header when\n\
choosing local file names (EXPERIMENTAL).\n"),
+ N_("\
+ --content-on-error output the received content on server errors.\n"),
N_("\
--auth-no-challenge send Basic HTTP authentication information\n\
without first waiting for the server's\n\
--no-glob turn off FTP file name globbing.\n"),
N_("\
--no-passive-ftp disable the \"passive\" transfer mode.\n"),
+ N_("\
+ --preserve-permissions preserve remote file permissions.\n"),
N_("\
--retr-symlinks when recursing, get linked-to files (not dir).\n"),
"\n",
+ N_("\
+WARC options:\n"),
+ N_("\
+ --warc-file=FILENAME save request/response data to a .warc.gz file.\n"),
+ N_("\
+ --warc-header=STRING insert STRING into the warcinfo record.\n"),
+ N_("\
+ --warc-max-size=NUMBER set maximum size of WARC files to NUMBER.\n"),
+ N_("\
+ --warc-cdx write CDX index files.\n"),
+ N_("\
+ --warc-dedup=FILENAME do not store records listed in this CDX file.\n"),
+#ifdef HAVE_LIBZ
+ N_("\
+ --no-warc-compression do not compress WARC files with GZIP.\n"),
+#endif
+ N_("\
+ --no-warc-digests do not calculate SHA1 digests.\n"),
+ N_("\
+ --no-warc-keep-log do not store the log file in a WARC record.\n"),
+ N_("\
+ --warc-tempdir=DIRECTORY location for temporary files created by the\n\
+ WARC writer.\n"),
+ "\n",
+
N_("\
Recursive download:\n"),
N_("\
size_t i;
if (printf (_("GNU Wget %s, a non-interactive network retriever.\n"),
- version_string))
+ version_string) < 0)
exit (3);
if (print_usage (0) < 0)
exit (3);
int line_length = MAX_CHARS_PER_LINE;
while ((line_length > 0) && (compiled_features[i] != NULL))
{
- if (printf ("%s ", compiled_features[i]))
+ if (printf ("%s ", compiled_features[i]) < 0)
exit (3);
line_length -= strlen (compiled_features[i]) + 2;
i++;
exit (3);
/* TRANSLATORS: When available, an actual copyright character
- (cirle-c) should be used in preference to "(C)". */
+ (circle-c) should be used in preference to "(C)". */
if (fputs (_("\
-Copyright (C) 2009 Free Software Foundation, Inc.\n"), stdout) < 0)
+Copyright (C) 2011 Free Software Foundation, Inc.\n"), stdout) < 0)
exit (3);
if (fputs (_("\
License GPLv3+: GNU GPL version 3 or later\n\
}
char *program_name; /* Needed by lib/error.c. */
+char *program_argstring; /* Needed by wget_warc.c. */
int
main (int argc, char **argv)
windows_main ((char **) &exec_name);
#endif
+ /* Construct the arguments string. */
+ int argstring_length = 1;
+ for (i = 1; i < argc; i++)
+ argstring_length += strlen (argv[i]) + 2 + 1;
+ char *p = program_argstring = malloc (argstring_length * sizeof (char));
+ if (p == NULL)
+ {
+ fprintf (stderr, _("Memory allocation problem\n"));
+ exit (2);
+ }
+ for (i = 1; i < argc; i++)
+ {
+ *p++ = '"';
+ int arglen = strlen (argv[i]);
+ memcpy (p, argv[i], arglen);
+ p += arglen;
+ *p++ = '"';
+ *p++ = ' ';
+ }
+ *p = '\0';
+
/* Load the hard-coded defaults. */
defaults ();
init_switches ();
- /* This seperate getopt_long is needed to find the user config
- and parse it before the other user options. */
+ /* This separate getopt_long is needed to find the user config file
+ option ("--config") and parse it before the other user options. */
longindex = -1;
int retconf;
bool use_userconfig = false;
int confval;
bool userrc_ret = true;
struct cmdline_option *config_opt;
- confval = long_options[longindex].val;
- config_opt = &option_data[confval & ~BOOLEAN_NEG_MARKER];
- if (strcmp (config_opt->long_name, "config") == 0)
- {
- userrc_ret &= run_wgetrc (optarg);
- use_userconfig = true;
- }
- if (!userrc_ret)
+
+ /* There is no short option for "--config". */
+ if (longindex >= 0)
{
- printf ("Exiting due to error in %s\n", optarg);
- exit (2);
+ confval = long_options[longindex].val;
+ config_opt = &option_data[confval & ~BOOLEAN_NEG_MARKER];
+ if (strcmp (config_opt->long_name, "config") == 0)
+ {
+ userrc_ret &= run_wgetrc (optarg);
+ use_userconfig = true;
+ }
+ if (!userrc_ret)
+ {
+ printf ("Exiting due to error in %s\n", optarg);
+ exit (2);
+ }
+ else
+ break;
}
- else
- break;
}
/* If the user did not specify a config, read the system wgetrc and ~/.wgetrc. */
}
}
+ if (opt.warc_filename != 0)
+ {
+ if (opt.noclobber)
+ {
+ fprintf (stderr,
+ _("WARC output does not work with --no-clobber, "
+ "--no-clobber will be disabled.\n"));
+ opt.noclobber = false;
+ }
+ if (opt.timestamping)
+ {
+ fprintf (stderr,
+ _("WARC output does not work with timestamping, "
+ "timestamping will be disabled.\n"));
+ opt.timestamping = false;
+ }
+ if (opt.spider)
+ {
+ fprintf (stderr,
+ _("WARC output does not work with --spider.\n"));
+ exit (1);
+ }
+ if (opt.always_rest)
+ {
+ fprintf (stderr,
+ _("WARC output does not work with --continue, "
+ "--continue will be disabled.\n"));
+ opt.always_rest = false;
+ }
+ if (opt.warc_cdx_dedup_filename != 0 && !opt.warc_digests_enabled)
+ {
+ fprintf (stderr,
+ _("Digests are disabled; WARC deduplication will "
+ "not find duplicate records.\n"));
+ }
+ if (opt.warc_keep_log)
+ {
+ opt.progress_type = "dot";
+ }
+ }
+
if (opt.ask_passwd && opt.passwd)
{
fprintf (stderr,
/* Fill in the arguments. */
url = alloca_array (char *, nurl + 1);
+ if (url == NULL)
+ {
+ fprintf (stderr, _("Memory allocation problem\n"));
+ exit (2);
+ }
for (i = 0; i < nurl; i++, optind++)
{
char *rewritten = rewrite_shorthand_url (argv[optind]);
/* Initialize logging. */
log_init (opt.lfilename, append_to_log);
+ /* Open WARC file. */
+ if (opt.warc_filename != 0)
+ warc_init ();
+
DEBUGP (("DEBUG output created by Wget %s on %s.\n\n",
version_string, OS_TYPE));
if (opt.convert_links && !opt.delete_after)
convert_all_links ();
+ /* Close WARC file. */
+ if (opt.warc_filename != 0)
+ warc_close ();
+
log_close ();
+
for (i = 0; i < nurl; i++)
xfree (url[i]);
cleanup ();
- return get_exit_status ();
+ exit (get_exit_status ());
}
#endif /* TESTING */
\f