1 /* Command line parsing.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
6 This file is part of GNU Wget.
8 GNU Wget is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 GNU Wget is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with Wget. If not, see <http://www.gnu.org/licenses/>.
21 Additional permission under GNU GPL version 3 section 7
23 If you modify this program, or any covered work, by linking or
24 combining it with the OpenSSL project's OpenSSL library (or a
25 modified version of that library), containing parts covered by the
26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
27 grants you additional permission to convey the resulting work.
28 Corresponding Source for a non-source form of such a combination
29 shall include the source code for the parts of OpenSSL used as well
30 as that of the covered work. */
53 #include "progress.h" /* for progress_handle_sigwinch */
56 #include "http.h" /* for save_cookies */
72 #ifndef PATH_SEPARATOR
73 # define PATH_SEPARATOR '/'
82 /* defined in version.c */
83 extern char *version_string;
84 extern char *compilation_string;
85 extern char *system_getrc;
86 extern char *link_string;
87 /* defined in build_info.c */
88 extern const char *compiled_features[];
89 /* Used for --version output in print_version */
90 #define MAX_CHARS_PER_LINE 72
93 #if defined(SIGHUP) || defined(SIGUSR1)
94 static void redirect_output_signal (int);
97 const char *exec_name;
99 /* Number of successfully downloaded URLs */
103 /* Initialize I18N/L10N. That amounts to invoking setlocale, and
104 setting up gettext's message catalog using bindtextdomain and
105 textdomain. Does nothing if NLS is disabled or missing. */
108 i18n_initialize (void)
110 /* ENABLE_NLS implies existence of functions invoked here. */
112 /* Set the current locale. */
113 setlocale (LC_ALL, "");
114 /* Set the text message domain. */
115 bindtextdomain ("wget", LOCALEDIR);
117 #endif /* ENABLE_NLS */
120 /* Definition of command-line options. */
122 static void print_help (void);
123 static void print_version (void);
128 # define IF_SSL(x) NULL
132 # define WHEN_DEBUG(x) x
134 # define WHEN_DEBUG(x) NULL
137 struct cmdline_option {
138 const char *long_name;
144 /* Non-standard options that have to be handled specially in
148 OPT__DONT_REMOVE_LISTING,
153 const void *data; /* for standard options */
154 int argtype; /* for non-standard options */
157 static struct cmdline_option option_data[] =
159 { "accept", 'A', OPT_VALUE, "accept", -1 },
160 { "accept-regex", 0, OPT_VALUE, "acceptregex", -1 },
161 { "adjust-extension", 'E', OPT_BOOLEAN, "adjustextension", -1 },
162 { "append-output", 'a', OPT__APPEND_OUTPUT, NULL, required_argument },
163 { "ask-password", 0, OPT_BOOLEAN, "askpassword", -1 },
164 { "auth-no-challenge", 0, OPT_BOOLEAN, "authnochallenge", -1 },
165 { "background", 'b', OPT_BOOLEAN, "background", -1 },
166 { "backup-converted", 'K', OPT_BOOLEAN, "backupconverted", -1 },
167 { "backups", 0, OPT_BOOLEAN, "backups", -1 },
168 { "base", 'B', OPT_VALUE, "base", -1 },
169 { "bind-address", 0, OPT_VALUE, "bindaddress", -1 },
170 { IF_SSL ("ca-certificate"), 0, OPT_VALUE, "cacertificate", -1 },
171 { IF_SSL ("ca-directory"), 0, OPT_VALUE, "cadirectory", -1 },
172 { "cache", 0, OPT_BOOLEAN, "cache", -1 },
173 { IF_SSL ("certificate"), 0, OPT_VALUE, "certificate", -1 },
174 { IF_SSL ("certificate-type"), 0, OPT_VALUE, "certificatetype", -1 },
175 { IF_SSL ("check-certificate"), 0, OPT_BOOLEAN, "checkcertificate", -1 },
176 { "clobber", 0, OPT__CLOBBER, NULL, optional_argument },
177 { "config", 0, OPT_VALUE, "chooseconfig", -1 },
178 { "connect-timeout", 0, OPT_VALUE, "connecttimeout", -1 },
179 { "continue", 'c', OPT_BOOLEAN, "continue", -1 },
180 { "convert-links", 'k', OPT_BOOLEAN, "convertlinks", -1 },
181 { "content-disposition", 0, OPT_BOOLEAN, "contentdisposition", -1 },
182 { "content-on-error", 0, OPT_BOOLEAN, "contentonerror", -1 },
183 { "cookies", 0, OPT_BOOLEAN, "cookies", -1 },
184 { "cut-dirs", 0, OPT_VALUE, "cutdirs", -1 },
185 { WHEN_DEBUG ("debug"), 'd', OPT_BOOLEAN, "debug", -1 },
186 { "default-page", 0, OPT_VALUE, "defaultpage", -1 },
187 { "delete-after", 0, OPT_BOOLEAN, "deleteafter", -1 },
188 { "directories", 0, OPT_BOOLEAN, "dirstruct", -1 },
189 { "directory-prefix", 'P', OPT_VALUE, "dirprefix", -1 },
190 { "dns-cache", 0, OPT_BOOLEAN, "dnscache", -1 },
191 { "dns-timeout", 0, OPT_VALUE, "dnstimeout", -1 },
192 { "domains", 'D', OPT_VALUE, "domains", -1 },
193 { "dont-remove-listing", 0, OPT__DONT_REMOVE_LISTING, NULL, no_argument },
194 { "dot-style", 0, OPT_VALUE, "dotstyle", -1 }, /* deprecated */
195 { "egd-file", 0, OPT_VALUE, "egdfile", -1 },
196 { "exclude-directories", 'X', OPT_VALUE, "excludedirectories", -1 },
197 { "exclude-domains", 0, OPT_VALUE, "excludedomains", -1 },
198 { "execute", 'e', OPT__EXECUTE, NULL, required_argument },
199 { "follow-ftp", 0, OPT_BOOLEAN, "followftp", -1 },
200 { "follow-tags", 0, OPT_VALUE, "followtags", -1 },
201 { "force-directories", 'x', OPT_BOOLEAN, "dirstruct", -1 },
202 { "force-html", 'F', OPT_BOOLEAN, "forcehtml", -1 },
203 { "ftp-password", 0, OPT_VALUE, "ftppassword", -1 },
205 { "ftp-stmlf", 0, OPT_BOOLEAN, "ftpstmlf", -1 },
206 #endif /* def __VMS */
207 { "ftp-user", 0, OPT_VALUE, "ftpuser", -1 },
208 { "glob", 0, OPT_BOOLEAN, "glob", -1 },
209 { "header", 0, OPT_VALUE, "header", -1 },
210 { "help", 'h', OPT_FUNCALL, (void *)print_help, no_argument },
211 { "host-directories", 0, OPT_BOOLEAN, "addhostdir", -1 },
212 { "html-extension", 'E', OPT_BOOLEAN, "adjustextension", -1 }, /* deprecated */
213 { "htmlify", 0, OPT_BOOLEAN, "htmlify", -1 },
214 { "http-keep-alive", 0, OPT_BOOLEAN, "httpkeepalive", -1 },
215 { "http-passwd", 0, OPT_VALUE, "httppassword", -1 }, /* deprecated */
216 { "http-password", 0, OPT_VALUE, "httppassword", -1 },
217 { "http-user", 0, OPT_VALUE, "httpuser", -1 },
218 { "ignore-case", 0, OPT_BOOLEAN, "ignorecase", -1 },
219 { "ignore-length", 0, OPT_BOOLEAN, "ignorelength", -1 },
220 { "ignore-tags", 0, OPT_VALUE, "ignoretags", -1 },
221 { "include-directories", 'I', OPT_VALUE, "includedirectories", -1 },
223 { "inet4-only", '4', OPT_BOOLEAN, "inet4only", -1 },
224 { "inet6-only", '6', OPT_BOOLEAN, "inet6only", -1 },
226 { "input-file", 'i', OPT_VALUE, "input", -1 },
227 { "iri", 0, OPT_BOOLEAN, "iri", -1 },
228 { "keep-session-cookies", 0, OPT_BOOLEAN, "keepsessioncookies", -1 },
229 { "level", 'l', OPT_VALUE, "reclevel", -1 },
230 { "limit-rate", 0, OPT_VALUE, "limitrate", -1 },
231 { "load-cookies", 0, OPT_VALUE, "loadcookies", -1 },
232 { "local-encoding", 0, OPT_VALUE, "localencoding", -1 },
233 { "max-redirect", 0, OPT_VALUE, "maxredirect", -1 },
234 { "mirror", 'm', OPT_BOOLEAN, "mirror", -1 },
235 { "no", 'n', OPT__NO, NULL, required_argument },
236 { "no-clobber", 0, OPT_BOOLEAN, "noclobber", -1 },
237 { "no-parent", 0, OPT_BOOLEAN, "noparent", -1 },
238 { "output-document", 'O', OPT_VALUE, "outputdocument", -1 },
239 { "output-file", 'o', OPT_VALUE, "logfile", -1 },
240 { "page-requisites", 'p', OPT_BOOLEAN, "pagerequisites", -1 },
241 { "parent", 0, OPT__PARENT, NULL, optional_argument },
242 { "passive-ftp", 0, OPT_BOOLEAN, "passiveftp", -1 },
243 { "password", 0, OPT_VALUE, "password", -1 },
244 { "post-data", 0, OPT_VALUE, "postdata", -1 },
245 { "post-file", 0, OPT_VALUE, "postfile", -1 },
246 { "prefer-family", 0, OPT_VALUE, "preferfamily", -1 },
247 { "preserve-permissions", 0, OPT_BOOLEAN, "preservepermissions", -1 },
248 { IF_SSL ("private-key"), 0, OPT_VALUE, "privatekey", -1 },
249 { IF_SSL ("private-key-type"), 0, OPT_VALUE, "privatekeytype", -1 },
250 { "progress", 0, OPT_VALUE, "progress", -1 },
251 { "protocol-directories", 0, OPT_BOOLEAN, "protocoldirectories", -1 },
252 { "proxy", 0, OPT_BOOLEAN, "useproxy", -1 },
253 { "proxy__compat", 'Y', OPT_VALUE, "useproxy", -1 }, /* back-compatible */
254 { "proxy-passwd", 0, OPT_VALUE, "proxypassword", -1 }, /* deprecated */
255 { "proxy-password", 0, OPT_VALUE, "proxypassword", -1 },
256 { "proxy-user", 0, OPT_VALUE, "proxyuser", -1 },
257 { "quiet", 'q', OPT_BOOLEAN, "quiet", -1 },
258 { "quota", 'Q', OPT_VALUE, "quota", -1 },
259 { "random-file", 0, OPT_VALUE, "randomfile", -1 },
260 { "random-wait", 0, OPT_BOOLEAN, "randomwait", -1 },
261 { "read-timeout", 0, OPT_VALUE, "readtimeout", -1 },
262 { "recursive", 'r', OPT_BOOLEAN, "recursive", -1 },
263 { "referer", 0, OPT_VALUE, "referer", -1 },
264 { "regex-type", 0, OPT_VALUE, "regextype", -1 },
265 { "reject", 'R', OPT_VALUE, "reject", -1 },
266 { "reject-regex", 0, OPT_VALUE, "rejectregex", -1 },
267 { "relative", 'L', OPT_BOOLEAN, "relativeonly", -1 },
268 { "remote-encoding", 0, OPT_VALUE, "remoteencoding", -1 },
269 { "remove-listing", 0, OPT_BOOLEAN, "removelisting", -1 },
270 { "report-speed", 0, OPT_BOOLEAN, "reportspeed", -1 },
271 { "restrict-file-names", 0, OPT_BOOLEAN, "restrictfilenames", -1 },
272 { "retr-symlinks", 0, OPT_BOOLEAN, "retrsymlinks", -1 },
273 { "retry-connrefused", 0, OPT_BOOLEAN, "retryconnrefused", -1 },
274 { "save-cookies", 0, OPT_VALUE, "savecookies", -1 },
275 { "save-headers", 0, OPT_BOOLEAN, "saveheaders", -1 },
276 { IF_SSL ("secure-protocol"), 0, OPT_VALUE, "secureprotocol", -1 },
277 { "server-response", 'S', OPT_BOOLEAN, "serverresponse", -1 },
278 { "span-hosts", 'H', OPT_BOOLEAN, "spanhosts", -1 },
279 { "spider", 0, OPT_BOOLEAN, "spider", -1 },
280 { "strict-comments", 0, OPT_BOOLEAN, "strictcomments", -1 },
281 { "timeout", 'T', OPT_VALUE, "timeout", -1 },
282 { "timestamping", 'N', OPT_BOOLEAN, "timestamping", -1 },
283 { "tries", 't', OPT_VALUE, "tries", -1 },
284 { "unlink", 0, OPT_BOOLEAN, "unlink", -1 },
285 { "trust-server-names", 0, OPT_BOOLEAN, "trustservernames", -1 },
286 { "use-server-timestamps", 0, OPT_BOOLEAN, "useservertimestamps", -1 },
287 { "user", 0, OPT_VALUE, "user", -1 },
288 { "user-agent", 'U', OPT_VALUE, "useragent", -1 },
289 { "verbose", 'v', OPT_BOOLEAN, "verbose", -1 },
290 { "verbose", 0, OPT_BOOLEAN, "verbose", -1 },
291 { "version", 'V', OPT_FUNCALL, (void *) print_version, no_argument },
292 { "wait", 'w', OPT_VALUE, "wait", -1 },
293 { "waitretry", 0, OPT_VALUE, "waitretry", -1 },
294 { "warc-cdx", 0, OPT_BOOLEAN, "warccdx", -1 },
296 { "warc-compression", 0, OPT_BOOLEAN, "warccompression", -1 },
298 { "warc-dedup", 0, OPT_VALUE, "warccdxdedup", -1 },
299 { "warc-digests", 0, OPT_BOOLEAN, "warcdigests", -1 },
300 { "warc-file", 0, OPT_VALUE, "warcfile", -1 },
301 { "warc-header", 0, OPT_VALUE, "warcheader", -1 },
302 { "warc-keep-log", 0, OPT_BOOLEAN, "warckeeplog", -1 },
303 { "warc-max-size", 0, OPT_VALUE, "warcmaxsize", -1 },
304 { "warc-tempdir", 0, OPT_VALUE, "warctempdir", -1 },
306 { "wdebug", 0, OPT_BOOLEAN, "wdebug", -1 },
313 /* Return a string that contains S with "no-" prepended. The string
314 is NUL-terminated and allocated off static storage at Wget
318 no_prefix (const char *s)
320 static char buffer[1024];
321 static char *p = buffer;
324 int size = 3 + strlen (s) + 1; /* "no-STRING\0" */
325 if (p + size >= buffer + sizeof (buffer))
328 cp[0] = 'n', cp[1] = 'o', cp[2] = '-';
334 /* The arguments that that main passes to getopt_long. */
335 static struct option long_options[2 * countof (option_data) + 1];
336 static char short_options[128];
338 /* Mapping between short option chars and option_data indices. */
339 static unsigned char optmap[96];
341 /* Marker for `--no-FOO' values in long_options. */
342 #define BOOLEAN_NEG_MARKER 1024
344 /* Initialize the long_options array used by getopt_long from the data
350 char *p = short_options;
352 for (i = 0; i < countof (option_data); i++)
354 struct cmdline_option *opt = &option_data[i];
355 struct option *longopt;
358 /* The option is disabled. */
361 longopt = &long_options[o++];
362 longopt->name = opt->long_name;
366 *p++ = opt->short_name;
367 optmap[opt->short_name - 32] = longopt - long_options;
372 longopt->has_arg = required_argument;
377 /* Specify an optional argument for long options, so that
378 --option=off works the same as --no-option, for
379 compatibility with pre-1.10 Wget. However, don't specify
380 optional arguments short-option booleans because they
381 prevent combining of short options. */
382 longopt->has_arg = optional_argument;
383 /* For Boolean options, add the "--no-FOO" variant, which is
384 identical to "--foo", except it has opposite meaning and
385 it doesn't allow an argument. */
386 longopt = &long_options[o++];
387 longopt->name = no_prefix (opt->long_name);
388 longopt->has_arg = no_argument;
389 /* Mask the value so we'll be able to recognize that we're
390 dealing with the false value. */
391 longopt->val = i | BOOLEAN_NEG_MARKER;
394 assert (opt->argtype != -1);
395 longopt->has_arg = opt->argtype;
398 if (longopt->has_arg == required_argument)
400 /* Don't handle optional_argument */
404 /* Terminate short_options. */
406 /* No need for xzero(long_options[o]) because its storage is static
407 and it will be zeroed by default. */
408 assert (o <= countof (long_options));
411 /* Print the usage message. */
413 print_usage (int error)
415 return fprintf (error ? stderr : stdout,
416 _("Usage: %s [OPTION]... [URL]...\n"), exec_name);
419 /* Print the help message, describing all the available options. If
420 you add an option, be sure to update this list. */
424 /* We split the help text this way to ease translation of individual
426 static const char *help[] = {
429 Mandatory arguments to long options are mandatory for short options too.\n\n"),
433 -V, --version display the version of Wget and exit.\n"),
435 -h, --help print this help.\n"),
437 -b, --background go to background after startup.\n"),
439 -e, --execute=COMMAND execute a `.wgetrc'-style command.\n"),
443 Logging and input file:\n"),
445 -o, --output-file=FILE log messages to FILE.\n"),
447 -a, --append-output=FILE append messages to FILE.\n"),
450 -d, --debug print lots of debugging information.\n"),
454 --wdebug print Watt-32 debug output.\n"),
457 -q, --quiet quiet (no output).\n"),
459 -v, --verbose be verbose (this is the default).\n"),
461 -nv, --no-verbose turn off verboseness, without being quiet.\n"),
463 --report-speed=TYPE Output bandwidth as TYPE. TYPE can be bits.\n"),
465 -i, --input-file=FILE download URLs found in local or external FILE.\n"),
467 -F, --force-html treat input file as HTML.\n"),
469 -B, --base=URL resolves HTML input-file links (-i -F)\n\
470 relative to URL.\n"),
472 --config=FILE Specify config file to use.\n"),
478 -t, --tries=NUMBER set number of retries to NUMBER (0 unlimits).\n"),
480 --retry-connrefused retry even if connection is refused.\n"),
482 -O, --output-document=FILE write documents to FILE.\n"),
484 -nc, --no-clobber skip downloads that would download to\n\
485 existing files (overwriting them).\n"),
487 -c, --continue resume getting a partially-downloaded file.\n"),
489 --progress=TYPE select progress gauge type.\n"),
491 -N, --timestamping don't re-retrieve files unless newer than\n\
494 --no-use-server-timestamps don't set the local file's timestamp by\n\
495 the one on the server.\n"),
497 -S, --server-response print server response.\n"),
499 --spider don't download anything.\n"),
501 -T, --timeout=SECONDS set all timeout values to SECONDS.\n"),
503 --dns-timeout=SECS set the DNS lookup timeout to SECS.\n"),
505 --connect-timeout=SECS set the connect timeout to SECS.\n"),
507 --read-timeout=SECS set the read timeout to SECS.\n"),
509 -w, --wait=SECONDS wait SECONDS between retrievals.\n"),
511 --waitretry=SECONDS wait 1..SECONDS between retries of a retrieval.\n"),
513 --random-wait wait from 0.5*WAIT...1.5*WAIT secs between retrievals.\n"),
515 --no-proxy explicitly turn off proxy.\n"),
517 -Q, --quota=NUMBER set retrieval quota to NUMBER.\n"),
519 --bind-address=ADDRESS bind to ADDRESS (hostname or IP) on local host.\n"),
521 --limit-rate=RATE limit download rate to RATE.\n"),
523 --no-dns-cache disable caching DNS lookups.\n"),
525 --restrict-file-names=OS restrict chars in file names to ones OS allows.\n"),
527 --ignore-case ignore case when matching files/directories.\n"),
530 -4, --inet4-only connect only to IPv4 addresses.\n"),
532 -6, --inet6-only connect only to IPv6 addresses.\n"),
534 --prefer-family=FAMILY connect first to addresses of specified family,\n\
535 one of IPv6, IPv4, or none.\n"),
538 --user=USER set both ftp and http user to USER.\n"),
540 --password=PASS set both ftp and http password to PASS.\n"),
542 --ask-password prompt for passwords.\n"),
544 --no-iri turn off IRI support.\n"),
546 --local-encoding=ENC use ENC as the local encoding for IRIs.\n"),
548 --remote-encoding=ENC use ENC as the default remote encoding.\n"),
550 --unlink remove file before clobber.\n"),
556 -nd, --no-directories don't create directories.\n"),
558 -x, --force-directories force creation of directories.\n"),
560 -nH, --no-host-directories don't create host directories.\n"),
562 --protocol-directories use protocol name in directories.\n"),
564 -P, --directory-prefix=PREFIX save files to PREFIX/...\n"),
566 --cut-dirs=NUMBER ignore NUMBER remote directory components.\n"),
572 --http-user=USER set http user to USER.\n"),
574 --http-password=PASS set http password to PASS.\n"),
576 --no-cache disallow server-cached data.\n"),
578 --default-page=NAME Change the default page name (normally\n\
579 this is `index.html'.).\n"),
581 -E, --adjust-extension save HTML/CSS documents with proper extensions.\n"),
583 --ignore-length ignore `Content-Length' header field.\n"),
585 --header=STRING insert STRING among the headers.\n"),
587 --max-redirect maximum redirections allowed per page.\n"),
589 --proxy-user=USER set USER as proxy username.\n"),
591 --proxy-password=PASS set PASS as proxy password.\n"),
593 --referer=URL include `Referer: URL' header in HTTP request.\n"),
595 --save-headers save the HTTP headers to file.\n"),
597 -U, --user-agent=AGENT identify as AGENT instead of Wget/VERSION.\n"),
599 --no-http-keep-alive disable HTTP keep-alive (persistent connections).\n"),
601 --no-cookies don't use cookies.\n"),
603 --load-cookies=FILE load cookies from FILE before session.\n"),
605 --save-cookies=FILE save cookies to FILE after session.\n"),
607 --keep-session-cookies load and save session (non-permanent) cookies.\n"),
609 --post-data=STRING use the POST method; send STRING as the data.\n"),
611 --post-file=FILE use the POST method; send contents of FILE.\n"),
613 --content-disposition honor the Content-Disposition header when\n\
614 choosing local file names (EXPERIMENTAL).\n"),
616 --content-on-error output the received content on server errors.\n"),
618 --auth-no-challenge send Basic HTTP authentication information\n\
619 without first waiting for the server's\n\
625 HTTPS (SSL/TLS) options:\n"),
627 --secure-protocol=PR choose secure protocol, one of auto, SSLv2,\n\
628 SSLv3, and TLSv1.\n"),
630 --no-check-certificate don't validate the server's certificate.\n"),
632 --certificate=FILE client certificate file.\n"),
634 --certificate-type=TYPE client certificate type, PEM or DER.\n"),
636 --private-key=FILE private key file.\n"),
638 --private-key-type=TYPE private key type, PEM or DER.\n"),
640 --ca-certificate=FILE file with the bundle of CA's.\n"),
642 --ca-directory=DIR directory where hash list of CA's is stored.\n"),
644 --random-file=FILE file with random data for seeding the SSL PRNG.\n"),
646 --egd-file=FILE file naming the EGD socket with random data.\n"),
648 #endif /* HAVE_SSL */
654 --ftp-stmlf Use Stream_LF format for all binary FTP files.\n"),
655 #endif /* def __VMS */
657 --ftp-user=USER set ftp user to USER.\n"),
659 --ftp-password=PASS set ftp password to PASS.\n"),
661 --no-remove-listing don't remove `.listing' files.\n"),
663 --no-glob turn off FTP file name globbing.\n"),
665 --no-passive-ftp disable the \"passive\" transfer mode.\n"),
667 --preserve-permissions preserve remote file permissions.\n"),
669 --retr-symlinks when recursing, get linked-to files (not dir).\n"),
675 --warc-file=FILENAME save request/response data to a .warc.gz file.\n"),
677 --warc-header=STRING insert STRING into the warcinfo record.\n"),
679 --warc-max-size=NUMBER set maximum size of WARC files to NUMBER.\n"),
681 --warc-cdx write CDX index files.\n"),
683 --warc-dedup=FILENAME do not store records listed in this CDX file.\n"),
686 --no-warc-compression do not compress WARC files with GZIP.\n"),
689 --no-warc-digests do not calculate SHA1 digests.\n"),
691 --no-warc-keep-log do not store the log file in a WARC record.\n"),
693 --warc-tempdir=DIRECTORY location for temporary files created by the\n\
698 Recursive download:\n"),
700 -r, --recursive specify recursive download.\n"),
702 -l, --level=NUMBER maximum recursion depth (inf or 0 for infinite).\n"),
704 --delete-after delete files locally after downloading them.\n"),
706 -k, --convert-links make links in downloaded HTML or CSS point to\n\
710 -K, --backup-converted before converting file X, back up as X_orig.\n"),
711 #else /* def __VMS */
713 -K, --backup-converted before converting file X, back up as X.orig.\n"),
714 #endif /* def __VMS [else] */
716 -m, --mirror shortcut for -N -r -l inf --no-remove-listing.\n"),
718 -p, --page-requisites get all images, etc. needed to display HTML page.\n"),
720 --strict-comments turn on strict (SGML) handling of HTML comments.\n"),
724 Recursive accept/reject:\n"),
726 -A, --accept=LIST comma-separated list of accepted extensions.\n"),
728 -R, --reject=LIST comma-separated list of rejected extensions.\n"),
730 --accept-regex=REGEX regex matching accepted URLs.\n"),
732 --reject-regex=REGEX regex matching rejected URLs.\n"),
735 --regex-type=TYPE regex type (posix|pcre).\n"),
738 --regex-type=TYPE regex type (posix).\n"),
741 -D, --domains=LIST comma-separated list of accepted domains.\n"),
743 --exclude-domains=LIST comma-separated list of rejected domains.\n"),
745 --follow-ftp follow FTP links from HTML documents.\n"),
747 --follow-tags=LIST comma-separated list of followed HTML tags.\n"),
749 --ignore-tags=LIST comma-separated list of ignored HTML tags.\n"),
751 -H, --span-hosts go to foreign hosts when recursive.\n"),
753 -L, --relative follow relative links only.\n"),
755 -I, --include-directories=LIST list of allowed directories.\n"),
757 --trust-server-names use the name specified by the redirection\n\
758 url last component.\n"),
760 -X, --exclude-directories=LIST list of excluded directories.\n"),
762 -np, --no-parent don't ascend to the parent directory.\n"),
764 N_("Mail bug reports and suggestions to <bug-wget@gnu.org>.\n")
769 if (printf (_("GNU Wget %s, a non-interactive network retriever.\n"),
772 if (print_usage (0) < 0)
775 for (i = 0; i < countof (help); i++)
776 if (fputs (_(help[i]), stdout) < 0)
782 /* Return a human-readable printed representation of INTERVAL,
783 measured in seconds. */
786 secs_to_human_time (double interval)
789 int secs = (int) (interval + 0.5);
790 int hours, mins, days;
792 days = secs / 86400, secs %= 86400;
793 hours = secs / 3600, secs %= 3600;
794 mins = secs / 60, secs %= 60;
797 sprintf (buf, "%dd %dh %dm %ds", days, hours, mins, secs);
799 sprintf (buf, "%dh %dm %ds", hours, mins, secs);
801 sprintf (buf, "%dm %ds", mins, secs);
803 sprintf (buf, "%ss", print_decimal (interval));
809 prompt_for_password (void)
812 fprintf (stderr, _("Password for user %s: "), quote (opt.user));
814 fprintf (stderr, _("Password: "));
818 /* Function that prints the line argument while limiting it
819 to at most line_length. prefix is printed on the first line
820 and an appropriate number of spaces are added on subsequent
823 format_and_print_line (const char *prefix, const char *line,
827 char *line_dup, *token;
829 assert (prefix != NULL);
830 assert (line != NULL);
832 line_dup = xstrdup (line);
834 if (line_length <= 0)
835 line_length = MAX_CHARS_PER_LINE - TABULATION;
837 if (printf ("%s", prefix) < 0)
839 remaining_chars = line_length;
840 /* We break on spaces. */
841 token = strtok (line_dup, " ");
842 while (token != NULL)
844 /* If however a token is much larger than the maximum
845 line length, all bets are off and we simply print the
846 token on the next line. */
847 if (remaining_chars <= strlen (token))
849 if (printf ("\n%*c", TABULATION, ' ') < 0)
851 remaining_chars = line_length - TABULATION;
853 if (printf ("%s ", token) < 0)
855 remaining_chars -= strlen (token) + 1; /* account for " " */
856 token = strtok (NULL, " ");
859 if (printf ("\n") < 0)
869 const char *wgetrc_title = _("Wgetrc: ");
870 const char *locale_title = _("Locale: ");
871 const char *compile_title = _("Compile: ");
872 const char *link_title = _("Link: ");
873 char *env_wgetrc, *user_wgetrc;
876 if (printf (_("GNU Wget %s built on %s.\n\n"), version_string, OS_TYPE) < 0)
879 for (i = 0; compiled_features[i] != NULL; )
881 int line_length = MAX_CHARS_PER_LINE;
882 while ((line_length > 0) && (compiled_features[i] != NULL))
884 if (printf ("%s ", compiled_features[i]) < 0)
886 line_length -= strlen (compiled_features[i]) + 2;
889 if (printf ("\n") < 0)
892 if (printf ("\n") < 0)
895 /* Handle the case when $WGETRC is unset and $HOME/.wgetrc is
897 if (printf ("%s\n", wgetrc_title) < 0)
900 env_wgetrc = wgetrc_env_file_name ();
901 if (env_wgetrc && *env_wgetrc)
903 if (printf (_(" %s (env)\n"), env_wgetrc) < 0)
907 user_wgetrc = wgetrc_user_file_name ();
910 if (printf (_(" %s (user)\n"), user_wgetrc) < 0)
915 if (printf (_(" %s (system)\n"), SYSTEM_WGETRC) < 0)
920 if (format_and_print_line (locale_title,
922 MAX_CHARS_PER_LINE) < 0)
924 #endif /* def ENABLE_NLS */
926 if (compilation_string != NULL)
927 if (format_and_print_line (compile_title,
929 MAX_CHARS_PER_LINE) < 0)
932 if (link_string != NULL)
933 if (format_and_print_line (link_title,
935 MAX_CHARS_PER_LINE) < 0)
938 if (printf ("\n") < 0)
941 /* TRANSLATORS: When available, an actual copyright character
942 (circle-c) should be used in preference to "(C)". */
944 Copyright (C) 2011 Free Software Foundation, Inc.\n"), stdout) < 0)
947 License GPLv3+: GNU GPL version 3 or later\n\
948 <http://www.gnu.org/licenses/gpl.html>.\n\
949 This is free software: you are free to change and redistribute it.\n\
950 There is NO WARRANTY, to the extent permitted by law.\n"), stdout) < 0)
952 /* TRANSLATORS: When available, please use the proper diacritics for
953 names such as this one. See en_US.po for reference. */
954 if (fputs (_("\nOriginally written by Hrvoje Niksic <hniksic@xemacs.org>.\n"),
957 if (fputs (_("Please send bug reports and questions to <bug-wget@gnu.org>.\n"),
964 char *program_name; /* Needed by lib/error.c. */
965 char *program_argstring; /* Needed by wget_warc.c. */
968 main (int argc, char **argv)
971 int i, ret, longindex;
973 bool append_to_log = false;
975 total_downloaded_bytes = 0;
977 program_name = argv[0];
979 struct ptimer *timer = ptimer_new ();
980 double start_time = ptimer_measure (timer);
984 /* Construct the name of the executable, without the directory part. */
986 /* On VMS, lose the "dev:[dir]" prefix and the ".EXE;nnn" suffix. */
987 exec_name = vms_basename (argv[0]);
988 #else /* def __VMS */
989 exec_name = strrchr (argv[0], PATH_SEPARATOR);
994 #endif /* def __VMS [else] */
997 /* Drop extension (typically .EXE) from executable filename. */
998 windows_main ((char **) &exec_name);
1001 /* Construct the arguments string. */
1002 int argstring_length = 1;
1003 for (i = 1; i < argc; i++)
1004 argstring_length += strlen (argv[i]) + 2 + 1;
1005 char *p = program_argstring = malloc (argstring_length * sizeof (char));
1008 fprintf (stderr, _("Memory allocation problem\n"));
1011 for (i = 1; i < argc; i++)
1014 int arglen = strlen (argv[i]);
1015 memcpy (p, argv[i], arglen);
1022 /* Load the hard-coded defaults. */
1027 /* This separate getopt_long is needed to find the user config file
1028 option ("--config") and parse it before the other user options. */
1031 bool use_userconfig = false;
1033 while ((retconf = getopt_long (argc, argv,
1034 short_options, long_options, &longindex)) != -1)
1037 bool userrc_ret = true;
1038 struct cmdline_option *config_opt;
1040 /* There is no short option for "--config". */
1043 confval = long_options[longindex].val;
1044 config_opt = &option_data[confval & ~BOOLEAN_NEG_MARKER];
1045 if (strcmp (config_opt->long_name, "config") == 0)
1047 userrc_ret &= run_wgetrc (optarg);
1048 use_userconfig = true;
1052 fprintf (stderr, "Exiting due to error in %s\n", optarg);
1060 /* If the user did not specify a config, read the system wgetrc and ~/.wgetrc. */
1061 if (use_userconfig == false)
1068 while ((ret = getopt_long (argc, argv,
1069 short_options, long_options, &longindex)) != -1)
1072 struct cmdline_option *opt;
1074 /* If LONGINDEX is unchanged, it means RET is referring a short
1076 if (longindex == -1)
1081 fprintf (stderr, "\n");
1082 fprintf (stderr, _("Try `%s --help' for more options.\n"),
1086 /* Find the short option character in the mapping. */
1087 longindex = optmap[ret - 32];
1089 val = long_options[longindex].val;
1091 /* Use the retrieved value to locate the option in the
1092 option_data array, and to see if we're dealing with the
1093 negated "--no-FOO" variant of the boolean option "--foo". */
1094 opt = &option_data[val & ~BOOLEAN_NEG_MARKER];
1098 setoptval (opt->data, optarg, opt->long_name);
1102 /* The user has specified a value -- use it. */
1103 setoptval (opt->data, optarg, opt->long_name);
1106 /* NEG is true for `--no-FOO' style boolean options. */
1107 bool neg = !!(val & BOOLEAN_NEG_MARKER);
1108 setoptval (opt->data, neg ? "0" : "1", opt->long_name);
1113 void (*func) (void) = (void (*) (void)) opt->data;
1117 case OPT__APPEND_OUTPUT:
1118 setoptval ("logfile", optarg, opt->long_name);
1119 append_to_log = true;
1122 run_command (optarg);
1126 /* We support real --no-FOO flags now, but keep these
1127 short options for convenience and backward
1130 for (p = optarg; p && *p; p++)
1134 setoptval ("verbose", "0", opt->long_name);
1137 setoptval ("addhostdir", "0", opt->long_name);
1140 setoptval ("dirstruct", "0", opt->long_name);
1143 setoptval ("noclobber", "1", opt->long_name);
1146 setoptval ("noparent", "1", opt->long_name);
1149 fprintf (stderr, _("%s: illegal option -- `-n%c'\n"),
1152 fprintf (stderr, "\n");
1153 fprintf (stderr, _("Try `%s --help' for more options.\n"),
1162 /* The wgetrc commands are named noparent and noclobber,
1163 so we must revert the meaning of the cmdline options
1164 before passing the value to setoptval. */
1167 flag = (*optarg == '1' || c_tolower (*optarg) == 'y'
1168 || (c_tolower (optarg[0]) == 'o'
1169 && c_tolower (optarg[1]) == 'n'));
1170 setoptval (opt->type == OPT__PARENT ? "noparent" : "noclobber",
1171 flag ? "0" : "1", opt->long_name);
1174 case OPT__DONT_REMOVE_LISTING:
1175 setoptval ("removelisting", "0", opt->long_name);
1182 nurl = argc - optind;
1184 /* All user options have now been processed, so it's now safe to do
1185 interoption dependency checks. */
1187 if (opt.noclobber && opt.convert_links)
1190 _("Both --no-clobber and --convert-links were specified,"
1191 "only --convert-links will be used.\n"));
1192 opt.noclobber = false;
1195 if (opt.reclevel == 0)
1196 opt.reclevel = INFINITE_RECURSION; /* see recur.h for commentary */
1198 if (opt.spider || opt.delete_after)
1199 opt.no_dirstruct = true;
1201 if (opt.page_requisites && !opt.recursive)
1203 /* Don't set opt.recursive here because it would confuse the FTP
1204 code. Instead, call retrieve_tree below when either
1205 page_requisites or recursive is requested. */
1207 if (!opt.no_dirstruct)
1208 opt.dirstruct = 1; /* normally handled by cmd_spec_recursive() */
1211 if (opt.verbose == -1)
1212 opt.verbose = !opt.quiet;
1214 /* Sanity checks. */
1215 if (opt.verbose && opt.quiet)
1217 fprintf (stderr, _("Can't be verbose and quiet at the same time.\n"));
1221 if (opt.timestamping && opt.noclobber)
1223 fprintf (stderr, _("\
1224 Can't timestamp and not clobber old files at the same time.\n"));
1229 if (opt.ipv4_only && opt.ipv6_only)
1232 _("Cannot specify both --inet4-only and --inet6-only.\n"));
1237 if (opt.output_document)
1239 if (opt.convert_links
1240 && (nurl > 1 || opt.page_requisites || opt.recursive))
1243 Cannot specify both -k and -O if multiple URLs are given, or in combination\n\
1244 with -p or -r. See the manual for details.\n\n"), stderr);
1248 if (opt.page_requisites
1251 logprintf (LOG_NOTQUIET, "%s", _("\
1252 WARNING: combining -O with -r or -p will mean that all downloaded content\n\
1253 will be placed in the single file you specified.\n\n"));
1255 if (opt.timestamping)
1257 logprintf (LOG_NOTQUIET, "%s", _("\
1258 WARNING: timestamping does nothing in combination with -O. See the manual\n\
1259 for details.\n\n"));
1260 opt.timestamping = false;
1262 if (opt.noclobber && file_exists_p(opt.output_document))
1264 /* Check if output file exists; if it does, exit. */
1265 logprintf (LOG_VERBOSE,
1266 _("File `%s' already there; not retrieving.\n"),
1267 opt.output_document);
1272 if (opt.warc_filename != 0)
1277 _("WARC output does not work with --no-clobber, "
1278 "--no-clobber will be disabled.\n"));
1279 opt.noclobber = false;
1281 if (opt.timestamping)
1284 _("WARC output does not work with timestamping, "
1285 "timestamping will be disabled.\n"));
1286 opt.timestamping = false;
1291 _("WARC output does not work with --spider.\n"));
1294 if (opt.always_rest)
1297 _("WARC output does not work with --continue, "
1298 "--continue will be disabled.\n"));
1299 opt.always_rest = false;
1301 if (opt.warc_cdx_dedup_filename != 0 && !opt.warc_digests_enabled)
1304 _("Digests are disabled; WARC deduplication will "
1305 "not find duplicate records.\n"));
1307 if (opt.warc_keep_log)
1309 opt.progress_type = xstrdup ("dot");
1313 if (opt.ask_passwd && opt.passwd)
1316 _("Cannot specify both --ask-password and --password.\n"));
1321 if (!nurl && !opt.input_filename)
1323 /* No URL specified. */
1324 fprintf (stderr, _("%s: missing URL\n"), exec_name);
1326 fprintf (stderr, "\n");
1327 /* #### Something nicer should be printed here -- similar to the
1328 pre-1.5 `--help' page. */
1329 fprintf (stderr, _("Try `%s --help' for more options.\n"), exec_name);
1333 /* Compile the regular expressions. */
1334 switch (opt.regex_type)
1337 case regex_type_pcre:
1338 opt.regex_compile_fun = compile_pcre_regex;
1339 opt.regex_match_fun = match_pcre_regex;
1343 case regex_type_posix:
1345 opt.regex_compile_fun = compile_posix_regex;
1346 opt.regex_match_fun = match_posix_regex;
1349 if (opt.acceptregex_s)
1351 opt.acceptregex = opt.regex_compile_fun (opt.acceptregex_s);
1352 if (!opt.acceptregex)
1355 if (opt.rejectregex_s)
1357 opt.rejectregex = opt.regex_compile_fun (opt.rejectregex_s);
1358 if (!opt.rejectregex)
1365 if (opt.locale && !check_encoding_name (opt.locale))
1369 opt.locale = find_locale ();
1371 if (opt.encoding_remote && !check_encoding_name (opt.encoding_remote))
1372 opt.encoding_remote = NULL;
1375 memset (&dummy_iri, 0, sizeof (dummy_iri));
1376 if (opt.enable_iri || opt.locale || opt.encoding_remote)
1378 /* sXXXav : be more specific... */
1379 fprintf (stderr, _("This version does not have support for IRIs\n"));
1386 opt.passwd = prompt_for_password ();
1388 if (opt.passwd == NULL || opt.passwd[0] == '\0')
1398 fork_to_background ();
1401 /* Initialize progress. Have to do this after the options are
1402 processed so we know where the log file is. */
1404 set_progress_implementation (opt.progress_type);
1406 /* Fill in the arguments. */
1407 url = alloca_array (char *, nurl + 1);
1410 fprintf (stderr, _("Memory allocation problem\n"));
1413 for (i = 0; i < nurl; i++, optind++)
1415 char *rewritten = rewrite_shorthand_url (argv[optind]);
1419 url[i] = xstrdup (argv[optind]);
1423 /* Initialize logging. */
1424 log_init (opt.lfilename, append_to_log);
1426 /* Open WARC file. */
1427 if (opt.warc_filename != 0)
1430 DEBUGP (("DEBUG output created by Wget %s on %s.\n\n",
1431 version_string, OS_TYPE));
1433 /* Open the output filename if necessary. */
1436 Note that having the output_stream ("-O") file opened here for an FTP
1437 URL rather than in getftp() (ftp.c) (and the http equivalent) rather
1438 limits the ability in VMS to open the file differently for ASCII
1439 versus binary FTP there. (Of course, doing it here allows a open
1440 failure to be detected immediately, without first connecting to the
1443 if (opt.output_document)
1445 if (HYPHENP (opt.output_document))
1448 _setmode (_fileno (stdout), _O_BINARY);
1450 output_stream = stdout;
1457 /* Common fopen() optional arguments:
1458 sequential access only, access callback function.
1460 # define FOPEN_OPT_ARGS , "fop=sqo", "acc", acc_cb, &open_id
1462 #else /* def __VMS */
1463 # define FOPEN_OPT_ARGS
1464 #endif /* def __VMS [else] */
1466 output_stream = fopen (opt.output_document,
1467 opt.always_rest ? "ab" : "wb"
1469 if (output_stream == NULL)
1471 perror (opt.output_document);
1474 if (fstat (fileno (output_stream), &st) == 0 && S_ISREG (st.st_mode))
1475 output_stream_regular = true;
1477 if (!output_stream_regular && opt.convert_links)
1479 fprintf (stderr, _("-k can be used together with -O only if \
1480 outputting to a regular file.\n"));
1487 /* Set global ODS5 flag according to the specified destination (if
1488 any), otherwise according to the current default device.
1490 if (output_stream == NULL)
1491 set_ods5_dest( "SYS$DISK");
1492 else if (output_stream != stdout)
1493 set_ods5_dest( opt.output_document);
1494 #endif /* def __VMS */
1501 /* Setup the signal handler to redirect output when hangup is
1503 if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
1504 signal(SIGHUP, redirect_output_signal);
1506 /* ...and do the same for SIGUSR1. */
1508 signal (SIGUSR1, redirect_output_signal);
1511 /* Writing to a closed socket normally signals SIGPIPE, and the
1512 process exits. What we want is to ignore SIGPIPE and just check
1513 for the return value of write(). */
1514 signal (SIGPIPE, SIG_IGN);
1517 signal (SIGWINCH, progress_handle_sigwinch);
1520 /* Retrieve the URLs from argument list. */
1521 for (t = url; *t; t++)
1523 char *filename = NULL, *redirected_URL = NULL;
1525 /* Need to do a new struct iri every time, because
1526 * retrieve_url may modify it in some circumstances,
1528 struct iri *iri = iri_new ();
1529 struct url *url_parsed;
1531 set_uri_encoding (iri, opt.locale, true);
1532 url_parsed = url_parse (*t, &url_err, iri, true);
1536 char *error = url_error (*t, url_err);
1537 logprintf (LOG_NOTQUIET, "%s: %s.\n",*t, error);
1539 inform_exit_status (URLERROR);
1543 if ((opt.recursive || opt.page_requisites)
1544 && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (url_parsed)))
1546 int old_follow_ftp = opt.follow_ftp;
1548 /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
1549 if (url_scheme (*t) == SCHEME_FTP)
1552 retrieve_tree (url_parsed, NULL);
1554 opt.follow_ftp = old_follow_ftp;
1558 retrieve_url (url_parsed, *t, &filename, &redirected_URL, NULL,
1559 &dt, opt.recursive, iri, true);
1562 if (opt.delete_after && filename != NULL && file_exists_p (filename))
1564 DEBUGP (("Removing file due to --delete-after in main():\n"));
1565 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
1566 if (unlink (filename))
1567 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
1569 xfree_null (redirected_URL);
1570 xfree_null (filename);
1571 url_free (url_parsed);
1576 /* And then from the input file, if any. */
1577 if (opt.input_filename)
1581 status = retrieve_from_file (opt.input_filename, opt.force_html, &count);
1582 inform_exit_status (status);
1584 logprintf (LOG_NOTQUIET, _("No URLs found in %s.\n"),
1585 opt.input_filename);
1588 /* Print broken links. */
1589 if (opt.recursive && opt.spider)
1590 print_broken_links ();
1592 /* Print the downloaded sum. */
1593 if ((opt.recursive || opt.page_requisites
1595 || (opt.input_filename && total_downloaded_bytes != 0))
1597 total_downloaded_bytes != 0)
1599 double end_time = ptimer_measure (timer);
1600 ptimer_destroy (timer);
1602 char *wall_time = xstrdup (secs_to_human_time (end_time - start_time));
1603 char *download_time = xstrdup (secs_to_human_time (total_download_time));
1604 logprintf (LOG_NOTQUIET,
1605 _("FINISHED --%s--\nTotal wall clock time: %s\n"
1606 "Downloaded: %d files, %s in %s (%s)\n"),
1607 datetime_str (time (NULL)),
1610 human_readable (total_downloaded_bytes),
1612 retr_rate (total_downloaded_bytes, total_download_time));
1614 xfree (download_time);
1616 /* Print quota warning, if exceeded. */
1617 if (opt.quota && total_downloaded_bytes > opt.quota)
1618 logprintf (LOG_NOTQUIET,
1619 _("Download quota of %s EXCEEDED!\n"),
1620 human_readable (opt.quota));
1623 if (opt.cookies_output)
1626 if (opt.convert_links && !opt.delete_after)
1627 convert_all_links ();
1631 exit (get_exit_status ());
1633 #endif /* TESTING */
1635 #if defined(SIGHUP) || defined(SIGUSR1)
1637 /* So the signal_name check doesn't blow when only one is available. */
1645 /* Hangup signal handler. When wget receives SIGHUP or SIGUSR1, it
1646 will proceed operation as usual, trying to write into a log file.
1647 If that is impossible, the output will be turned off. */
1650 redirect_output_signal (int sig)
1652 const char *signal_name = (sig == SIGHUP ? "SIGHUP" :
1653 (sig == SIGUSR1 ? "SIGUSR1" :
1655 log_request_redirect_output (signal_name);
1656 progress_schedule_redirect ();
1657 signal (sig, redirect_output_signal);