X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fmain.c;h=3e731e9d02924bf9dda268ad54d75f9f8193767f;hb=04f29f2f08da21cbcebbf86fe98de0522f024c64;hp=05ad0e76576c73c2e6ace66838513287621e8b31;hpb=8c7bd588fe94bdc12b62b38e286027acfedde751;p=wget diff --git a/src/main.c b/src/main.c index 05ad0e76..3e731e9d 100644 --- a/src/main.c +++ b/src/main.c @@ -1,6 +1,6 @@ /* Command line parsing. Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, - 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, + 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -55,7 +55,8 @@ as that of the covered work. */ #include "spider.h" #include "http.h" /* for save_cookies */ #include "ptimer.h" - +#include "warc.h" +#include "closeout.h" #include #include #include @@ -287,6 +288,17 @@ static struct cmdline_option option_data[] = { "version", 'V', OPT_FUNCALL, (void *) print_version, no_argument }, { "wait", 'w', OPT_VALUE, "wait", -1 }, { "waitretry", 0, OPT_VALUE, "waitretry", -1 }, + { "warc-cdx", 0, OPT_BOOLEAN, "warccdx", -1 }, +#ifdef HAVE_LIBZ + { "warc-compression", 0, OPT_BOOLEAN, "warccompression", -1 }, +#endif + { "warc-dedup", 0, OPT_VALUE, "warccdxdedup", -1 }, + { "warc-digests", 0, OPT_BOOLEAN, "warcdigests", -1 }, + { "warc-file", 0, OPT_VALUE, "warcfile", -1 }, + { "warc-header", 0, OPT_VALUE, "warcheader", -1 }, + { "warc-keep-log", 0, OPT_BOOLEAN, "warckeeplog", -1 }, + { "warc-max-size", 0, OPT_VALUE, "warcmaxsize", -1 }, + { "warc-tempdir", 0, OPT_VALUE, "warctempdir", -1 }, #ifdef USE_WATT32 { "wdebug", 0, OPT_BOOLEAN, "wdebug", -1 }, #endif @@ -652,6 +664,31 @@ FTP options:\n"), --retr-symlinks when recursing, get linked-to files (not dir).\n"), "\n", + N_("\ +WARC options:\n"), + N_("\ + --warc-file=FILENAME save request/response data to a .warc.gz file.\n"), + N_("\ + --warc-header=STRING insert STRING into the warcinfo record.\n"), + N_("\ + --warc-max-size=NUMBER set maximum size of WARC files to NUMBER.\n"), + N_("\ + --warc-cdx write CDX index files.\n"), + N_("\ + --warc-dedup=FILENAME do not store records listed in this CDX file.\n"), +#ifdef HAVE_LIBZ + N_("\ + --no-warc-compression do not compress WARC files with GZIP.\n"), +#endif + N_("\ + --no-warc-digests do not calculate SHA1 digests.\n"), + N_("\ + --no-warc-keep-log do not store the log file in a WARC record.\n"), + N_("\ + --warc-tempdir=DIRECTORY location for temporary files created by the\n\ + WARC writer.\n"), + "\n", + N_("\ Recursive download:\n"), N_("\ @@ -910,6 +947,7 @@ There is NO WARRANTY, to the extent permitted by law.\n"), stdout) < 0) } char *program_name; /* Needed by lib/error.c. */ +char *program_argstring; /* Needed by wget_warc.c. */ int main (int argc, char **argv) @@ -928,6 +966,8 @@ main (int argc, char **argv) i18n_initialize (); + atexit (close_stdout); + /* Construct the name of the executable, without the directory part. */ #ifdef __VMS /* On VMS, lose the "dev:[dir]" prefix and the ".EXE;nnn" suffix. */ @@ -945,6 +985,27 @@ main (int argc, char **argv) windows_main ((char **) &exec_name); #endif + /* Construct the arguments string. */ + int argstring_length = 1; + for (i = 1; i < argc; i++) + argstring_length += strlen (argv[i]) + 2 + 1; + char *p = program_argstring = malloc (argstring_length * sizeof (char)); + if (p == NULL) + { + fprintf (stderr, _("Memory allocation problem\n")); + exit (2); + } + for (i = 1; i < argc; i++) + { + *p++ = '"'; + int arglen = strlen (argv[i]); + memcpy (p, argv[i], arglen); + p += arglen; + *p++ = '"'; + *p++ = ' '; + } + *p = '\0'; + /* Load the hard-coded defaults. */ defaults (); @@ -975,7 +1036,7 @@ main (int argc, char **argv) } if (!userrc_ret) { - printf ("Exiting due to error in %s\n", optarg); + fprintf (stderr, "Exiting due to error in %s\n", optarg); exit (2); } else @@ -1003,9 +1064,10 @@ main (int argc, char **argv) { if (ret == '?') { - print_usage (0); - printf ("\n"); - printf (_("Try `%s --help' for more options.\n"), exec_name); + print_usage (1); + fprintf (stderr, "\n"); + fprintf (stderr, _("Try `%s --help' for more options.\n"), + exec_name); exit (2); } /* Find the short option character in the mapping. */ @@ -1194,6 +1256,47 @@ for details.\n\n")); } } + if (opt.warc_filename != 0) + { + if (opt.noclobber) + { + fprintf (stderr, + _("WARC output does not work with --no-clobber, " + "--no-clobber will be disabled.\n")); + opt.noclobber = false; + } + if (opt.timestamping) + { + fprintf (stderr, + _("WARC output does not work with timestamping, " + "timestamping will be disabled.\n")); + opt.timestamping = false; + } + if (opt.spider) + { + fprintf (stderr, + _("WARC output does not work with --spider.\n")); + exit (1); + } + if (opt.always_rest) + { + fprintf (stderr, + _("WARC output does not work with --continue, " + "--continue will be disabled.\n")); + opt.always_rest = false; + } + if (opt.warc_cdx_dedup_filename != 0 && !opt.warc_digests_enabled) + { + fprintf (stderr, + _("Digests are disabled; WARC deduplication will " + "not find duplicate records.\n")); + } + if (opt.warc_keep_log) + { + opt.progress_type = "dot"; + } + } + if (opt.ask_passwd && opt.passwd) { fprintf (stderr, @@ -1207,7 +1310,7 @@ for details.\n\n")); /* No URL specified. */ fprintf (stderr, _("%s: missing URL\n"), exec_name); print_usage (1); - printf ("\n"); + fprintf (stderr, "\n"); /* #### Something nicer should be printed here -- similar to the pre-1.5 `--help' page. */ fprintf (stderr, _("Try `%s --help' for more options.\n"), exec_name); @@ -1260,6 +1363,11 @@ for details.\n\n")); /* Fill in the arguments. */ url = alloca_array (char *, nurl + 1); + if (url == NULL) + { + fprintf (stderr, _("Memory allocation problem\n")); + exit (2); + } for (i = 0; i < nurl; i++, optind++) { char *rewritten = rewrite_shorthand_url (argv[optind]); @@ -1273,6 +1381,10 @@ for details.\n\n")); /* Initialize logging. */ log_init (opt.lfilename, append_to_log); + /* Open WARC file. */ + if (opt.warc_filename != 0) + warc_init (); + DEBUGP (("DEBUG output created by Wget %s on %s.\n\n", version_string, OS_TYPE)); @@ -1472,7 +1584,12 @@ outputting to a regular file.\n")); if (opt.convert_links && !opt.delete_after) convert_all_links (); + /* Close WARC file. */ + if (opt.warc_filename != 0) + warc_close (); + log_close (); + for (i = 0; i < nurl; i++) xfree (url[i]); cleanup ();