1 /* Command line parsing.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
6 This file is part of GNU Wget.
8 GNU Wget is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 GNU Wget is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with Wget. If not, see <http://www.gnu.org/licenses/>.
21 Additional permission under GNU GPL version 3 section 7
23 If you modify this program, or any covered work, by linking or
24 combining it with the OpenSSL project's OpenSSL library (or a
25 modified version of that library), containing parts covered by the
26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
27 grants you additional permission to convey the resulting work.
28 Corresponding Source for a non-source form of such a combination
29 shall include the source code for the parts of OpenSSL used as well
30 as that of the covered work. */
53 #include "progress.h" /* for progress_handle_sigwinch */
56 #include "http.h" /* for save_cookies */
73 #ifndef PATH_SEPARATOR
74 # define PATH_SEPARATOR '/'
83 /* defined in version.c */
84 extern char *version_string;
85 extern char *compilation_string;
86 extern char *system_getrc;
87 extern char *link_string;
88 /* defined in build_info.c */
89 extern const char *compiled_features[];
90 /* Used for --version output in print_version */
91 #define MAX_CHARS_PER_LINE 72
94 #if defined(SIGHUP) || defined(SIGUSR1)
95 static void redirect_output_signal (int);
98 const char *exec_name;
100 /* Number of successfully downloaded URLs */
104 /* Initialize I18N/L10N. That amounts to invoking setlocale, and
105 setting up gettext's message catalog using bindtextdomain and
106 textdomain. Does nothing if NLS is disabled or missing. */
109 i18n_initialize (void)
111 /* ENABLE_NLS implies existence of functions invoked here. */
113 /* Set the current locale. */
114 setlocale (LC_ALL, "");
115 /* Set the text message domain. */
116 bindtextdomain ("wget", LOCALEDIR);
118 #endif /* ENABLE_NLS */
121 /* Definition of command-line options. */
123 static void print_help (void);
124 static void print_version (void);
129 # define IF_SSL(x) NULL
133 # define WHEN_DEBUG(x) x
135 # define WHEN_DEBUG(x) NULL
138 struct cmdline_option {
139 const char *long_name;
145 /* Non-standard options that have to be handled specially in
149 OPT__DONT_REMOVE_LISTING,
154 const void *data; /* for standard options */
155 int argtype; /* for non-standard options */
158 static struct cmdline_option option_data[] =
160 { "accept", 'A', OPT_VALUE, "accept", -1 },
161 { "accept-regex", 0, OPT_VALUE, "acceptregex", -1 },
162 { "adjust-extension", 'E', OPT_BOOLEAN, "adjustextension", -1 },
163 { "append-output", 'a', OPT__APPEND_OUTPUT, NULL, required_argument },
164 { "ask-password", 0, OPT_BOOLEAN, "askpassword", -1 },
165 { "auth-no-challenge", 0, OPT_BOOLEAN, "authnochallenge", -1 },
166 { "background", 'b', OPT_BOOLEAN, "background", -1 },
167 { "backup-converted", 'K', OPT_BOOLEAN, "backupconverted", -1 },
168 { "backups", 0, OPT_BOOLEAN, "backups", -1 },
169 { "base", 'B', OPT_VALUE, "base", -1 },
170 { "bind-address", 0, OPT_VALUE, "bindaddress", -1 },
171 { "bits", 0, OPT_BOOLEAN, "bits", -1 },
172 { IF_SSL ("ca-certificate"), 0, OPT_VALUE, "cacertificate", -1 },
173 { IF_SSL ("ca-directory"), 0, OPT_VALUE, "cadirectory", -1 },
174 { "cache", 0, OPT_BOOLEAN, "cache", -1 },
175 { IF_SSL ("certificate"), 0, OPT_VALUE, "certificate", -1 },
176 { IF_SSL ("certificate-type"), 0, OPT_VALUE, "certificatetype", -1 },
177 { IF_SSL ("check-certificate"), 0, OPT_BOOLEAN, "checkcertificate", -1 },
178 { "clobber", 0, OPT__CLOBBER, NULL, optional_argument },
179 { "config", 0, OPT_VALUE, "chooseconfig", -1 },
180 { "connect-timeout", 0, OPT_VALUE, "connecttimeout", -1 },
181 { "continue", 'c', OPT_BOOLEAN, "continue", -1 },
182 { "convert-links", 'k', OPT_BOOLEAN, "convertlinks", -1 },
183 { "content-disposition", 0, OPT_BOOLEAN, "contentdisposition", -1 },
184 { "content-on-error", 0, OPT_BOOLEAN, "contentonerror", -1 },
185 { "cookies", 0, OPT_BOOLEAN, "cookies", -1 },
186 { "cut-dirs", 0, OPT_VALUE, "cutdirs", -1 },
187 { WHEN_DEBUG ("debug"), 'd', OPT_BOOLEAN, "debug", -1 },
188 { "default-page", 0, OPT_VALUE, "defaultpage", -1 },
189 { "delete-after", 0, OPT_BOOLEAN, "deleteafter", -1 },
190 { "directories", 0, OPT_BOOLEAN, "dirstruct", -1 },
191 { "directory-prefix", 'P', OPT_VALUE, "dirprefix", -1 },
192 { "dns-cache", 0, OPT_BOOLEAN, "dnscache", -1 },
193 { "dns-timeout", 0, OPT_VALUE, "dnstimeout", -1 },
194 { "domains", 'D', OPT_VALUE, "domains", -1 },
195 { "dont-remove-listing", 0, OPT__DONT_REMOVE_LISTING, NULL, no_argument },
196 { "dot-style", 0, OPT_VALUE, "dotstyle", -1 }, /* deprecated */
197 { "egd-file", 0, OPT_VALUE, "egdfile", -1 },
198 { "exclude-directories", 'X', OPT_VALUE, "excludedirectories", -1 },
199 { "exclude-domains", 0, OPT_VALUE, "excludedomains", -1 },
200 { "execute", 'e', OPT__EXECUTE, NULL, required_argument },
201 { "follow-ftp", 0, OPT_BOOLEAN, "followftp", -1 },
202 { "follow-tags", 0, OPT_VALUE, "followtags", -1 },
203 { "force-directories", 'x', OPT_BOOLEAN, "dirstruct", -1 },
204 { "force-html", 'F', OPT_BOOLEAN, "forcehtml", -1 },
205 { "ftp-password", 0, OPT_VALUE, "ftppassword", -1 },
207 { "ftp-stmlf", 0, OPT_BOOLEAN, "ftpstmlf", -1 },
208 #endif /* def __VMS */
209 { "ftp-user", 0, OPT_VALUE, "ftpuser", -1 },
210 { "glob", 0, OPT_BOOLEAN, "glob", -1 },
211 { "header", 0, OPT_VALUE, "header", -1 },
212 { "help", 'h', OPT_FUNCALL, (void *)print_help, no_argument },
213 { "host-directories", 0, OPT_BOOLEAN, "addhostdir", -1 },
214 { "html-extension", 'E', OPT_BOOLEAN, "adjustextension", -1 }, /* deprecated */
215 { "htmlify", 0, OPT_BOOLEAN, "htmlify", -1 },
216 { "http-keep-alive", 0, OPT_BOOLEAN, "httpkeepalive", -1 },
217 { "http-passwd", 0, OPT_VALUE, "httppassword", -1 }, /* deprecated */
218 { "http-password", 0, OPT_VALUE, "httppassword", -1 },
219 { "http-user", 0, OPT_VALUE, "httpuser", -1 },
220 { "ignore-case", 0, OPT_BOOLEAN, "ignorecase", -1 },
221 { "ignore-length", 0, OPT_BOOLEAN, "ignorelength", -1 },
222 { "ignore-tags", 0, OPT_VALUE, "ignoretags", -1 },
223 { "include-directories", 'I', OPT_VALUE, "includedirectories", -1 },
225 { "inet4-only", '4', OPT_BOOLEAN, "inet4only", -1 },
226 { "inet6-only", '6', OPT_BOOLEAN, "inet6only", -1 },
228 { "input-file", 'i', OPT_VALUE, "input", -1 },
229 { "iri", 0, OPT_BOOLEAN, "iri", -1 },
230 { "keep-session-cookies", 0, OPT_BOOLEAN, "keepsessioncookies", -1 },
231 { "level", 'l', OPT_VALUE, "reclevel", -1 },
232 { "limit-rate", 0, OPT_VALUE, "limitrate", -1 },
233 { "load-cookies", 0, OPT_VALUE, "loadcookies", -1 },
234 { "local-encoding", 0, OPT_VALUE, "localencoding", -1 },
235 { "max-redirect", 0, OPT_VALUE, "maxredirect", -1 },
236 { "mirror", 'm', OPT_BOOLEAN, "mirror", -1 },
237 { "no", 'n', OPT__NO, NULL, required_argument },
238 { "no-clobber", 0, OPT_BOOLEAN, "noclobber", -1 },
239 { "no-parent", 0, OPT_BOOLEAN, "noparent", -1 },
240 { "output-document", 'O', OPT_VALUE, "outputdocument", -1 },
241 { "output-file", 'o', OPT_VALUE, "logfile", -1 },
242 { "page-requisites", 'p', OPT_BOOLEAN, "pagerequisites", -1 },
243 { "parent", 0, OPT__PARENT, NULL, optional_argument },
244 { "passive-ftp", 0, OPT_BOOLEAN, "passiveftp", -1 },
245 { "password", 0, OPT_VALUE, "password", -1 },
246 { "post-data", 0, OPT_VALUE, "postdata", -1 },
247 { "post-file", 0, OPT_VALUE, "postfile", -1 },
248 { "prefer-family", 0, OPT_VALUE, "preferfamily", -1 },
249 { "preserve-permissions", 0, OPT_BOOLEAN, "preservepermissions", -1 },
250 { IF_SSL ("private-key"), 0, OPT_VALUE, "privatekey", -1 },
251 { IF_SSL ("private-key-type"), 0, OPT_VALUE, "privatekeytype", -1 },
252 { "progress", 0, OPT_VALUE, "progress", -1 },
253 { "protocol-directories", 0, OPT_BOOLEAN, "protocoldirectories", -1 },
254 { "proxy", 0, OPT_BOOLEAN, "useproxy", -1 },
255 { "proxy__compat", 'Y', OPT_VALUE, "useproxy", -1 }, /* back-compatible */
256 { "proxy-passwd", 0, OPT_VALUE, "proxypassword", -1 }, /* deprecated */
257 { "proxy-password", 0, OPT_VALUE, "proxypassword", -1 },
258 { "proxy-user", 0, OPT_VALUE, "proxyuser", -1 },
259 { "quiet", 'q', OPT_BOOLEAN, "quiet", -1 },
260 { "quota", 'Q', OPT_VALUE, "quota", -1 },
261 { "random-file", 0, OPT_VALUE, "randomfile", -1 },
262 { "random-wait", 0, OPT_BOOLEAN, "randomwait", -1 },
263 { "read-timeout", 0, OPT_VALUE, "readtimeout", -1 },
264 { "recursive", 'r', OPT_BOOLEAN, "recursive", -1 },
265 { "referer", 0, OPT_VALUE, "referer", -1 },
266 { "regex-type", 0, OPT_VALUE, "regextype", -1 },
267 { "reject", 'R', OPT_VALUE, "reject", -1 },
268 { "reject-regex", 0, OPT_VALUE, "rejectregex", -1 },
269 { "relative", 'L', OPT_BOOLEAN, "relativeonly", -1 },
270 { "remote-encoding", 0, OPT_VALUE, "remoteencoding", -1 },
271 { "remove-listing", 0, OPT_BOOLEAN, "removelisting", -1 },
272 { "restrict-file-names", 0, OPT_BOOLEAN, "restrictfilenames", -1 },
273 { "retr-symlinks", 0, OPT_BOOLEAN, "retrsymlinks", -1 },
274 { "retry-connrefused", 0, OPT_BOOLEAN, "retryconnrefused", -1 },
275 { "save-cookies", 0, OPT_VALUE, "savecookies", -1 },
276 { "save-headers", 0, OPT_BOOLEAN, "saveheaders", -1 },
277 { IF_SSL ("secure-protocol"), 0, OPT_VALUE, "secureprotocol", -1 },
278 { "server-response", 'S', OPT_BOOLEAN, "serverresponse", -1 },
279 { "span-hosts", 'H', OPT_BOOLEAN, "spanhosts", -1 },
280 { "spider", 0, OPT_BOOLEAN, "spider", -1 },
281 { "strict-comments", 0, OPT_BOOLEAN, "strictcomments", -1 },
282 { "timeout", 'T', OPT_VALUE, "timeout", -1 },
283 { "timestamping", 'N', OPT_BOOLEAN, "timestamping", -1 },
284 { "tries", 't', OPT_VALUE, "tries", -1 },
285 { "unlink", 0, OPT_BOOLEAN, "unlink", -1 },
286 { "trust-server-names", 0, OPT_BOOLEAN, "trustservernames", -1 },
287 { "use-server-timestamps", 0, OPT_BOOLEAN, "useservertimestamps", -1 },
288 { "user", 0, OPT_VALUE, "user", -1 },
289 { "user-agent", 'U', OPT_VALUE, "useragent", -1 },
290 { "verbose", 'v', OPT_BOOLEAN, "verbose", -1 },
291 { "verbose", 0, OPT_BOOLEAN, "verbose", -1 },
292 { "version", 'V', OPT_FUNCALL, (void *) print_version, no_argument },
293 { "wait", 'w', OPT_VALUE, "wait", -1 },
294 { "waitretry", 0, OPT_VALUE, "waitretry", -1 },
295 { "warc-cdx", 0, OPT_BOOLEAN, "warccdx", -1 },
297 { "warc-compression", 0, OPT_BOOLEAN, "warccompression", -1 },
299 { "warc-dedup", 0, OPT_VALUE, "warccdxdedup", -1 },
300 { "warc-digests", 0, OPT_BOOLEAN, "warcdigests", -1 },
301 { "warc-file", 0, OPT_VALUE, "warcfile", -1 },
302 { "warc-header", 0, OPT_VALUE, "warcheader", -1 },
303 { "warc-keep-log", 0, OPT_BOOLEAN, "warckeeplog", -1 },
304 { "warc-max-size", 0, OPT_VALUE, "warcmaxsize", -1 },
305 { "warc-tempdir", 0, OPT_VALUE, "warctempdir", -1 },
307 { "wdebug", 0, OPT_BOOLEAN, "wdebug", -1 },
314 /* Return a string that contains S with "no-" prepended. The string
315 is NUL-terminated and allocated off static storage at Wget
319 no_prefix (const char *s)
321 static char buffer[1024];
322 static char *p = buffer;
325 int size = 3 + strlen (s) + 1; /* "no-STRING\0" */
326 if (p + size >= buffer + sizeof (buffer))
329 cp[0] = 'n', cp[1] = 'o', cp[2] = '-';
335 /* The arguments that that main passes to getopt_long. */
336 static struct option long_options[2 * countof (option_data) + 1];
337 static char short_options[128];
339 /* Mapping between short option chars and option_data indices. */
340 static unsigned char optmap[96];
342 /* Marker for `--no-FOO' values in long_options. */
343 #define BOOLEAN_NEG_MARKER 1024
345 /* Initialize the long_options array used by getopt_long from the data
351 char *p = short_options;
353 for (i = 0; i < countof (option_data); i++)
355 struct cmdline_option *opt = &option_data[i];
356 struct option *longopt;
359 /* The option is disabled. */
362 longopt = &long_options[o++];
363 longopt->name = opt->long_name;
367 *p++ = opt->short_name;
368 optmap[opt->short_name - 32] = longopt - long_options;
373 longopt->has_arg = required_argument;
378 /* Specify an optional argument for long options, so that
379 --option=off works the same as --no-option, for
380 compatibility with pre-1.10 Wget. However, don't specify
381 optional arguments short-option booleans because they
382 prevent combining of short options. */
383 longopt->has_arg = optional_argument;
384 /* For Boolean options, add the "--no-FOO" variant, which is
385 identical to "--foo", except it has opposite meaning and
386 it doesn't allow an argument. */
387 longopt = &long_options[o++];
388 longopt->name = no_prefix (opt->long_name);
389 longopt->has_arg = no_argument;
390 /* Mask the value so we'll be able to recognize that we're
391 dealing with the false value. */
392 longopt->val = i | BOOLEAN_NEG_MARKER;
395 assert (opt->argtype != -1);
396 longopt->has_arg = opt->argtype;
399 if (longopt->has_arg == required_argument)
401 /* Don't handle optional_argument */
405 /* Terminate short_options. */
407 /* No need for xzero(long_options[o]) because its storage is static
408 and it will be zeroed by default. */
409 assert (o <= countof (long_options));
412 /* Print the usage message. */
414 print_usage (int error)
416 return fprintf (error ? stderr : stdout,
417 _("Usage: %s [OPTION]... [URL]...\n"), exec_name);
420 /* Print the help message, describing all the available options. If
421 you add an option, be sure to update this list. */
425 /* We split the help text this way to ease translation of individual
427 static const char *help[] = {
430 Mandatory arguments to long options are mandatory for short options too.\n\n"),
434 -V, --version display the version of Wget and exit.\n"),
436 -h, --help print this help.\n"),
438 -b, --background go to background after startup.\n"),
440 -e, --execute=COMMAND execute a `.wgetrc'-style command.\n"),
444 Logging and input file:\n"),
446 -o, --output-file=FILE log messages to FILE.\n"),
448 -a, --append-output=FILE append messages to FILE.\n"),
451 -d, --debug print lots of debugging information.\n"),
455 --wdebug print Watt-32 debug output.\n"),
458 -q, --quiet quiet (no output).\n"),
460 -v, --verbose be verbose (this is the default).\n"),
462 -nv, --no-verbose turn off verboseness, without being quiet.\n"),
464 -i, --input-file=FILE download URLs found in local or external FILE.\n"),
466 -F, --force-html treat input file as HTML.\n"),
468 -B, --base=URL resolves HTML input-file links (-i -F)\n\
469 relative to URL.\n"),
471 --config=FILE Specify config file to use.\n"),
477 -t, --tries=NUMBER set number of retries to NUMBER (0 unlimits).\n"),
479 --retry-connrefused retry even if connection is refused.\n"),
481 -O, --output-document=FILE write documents to FILE.\n"),
483 -nc, --no-clobber skip downloads that would download to\n\
484 existing files (overwriting them).\n"),
486 -c, --continue resume getting a partially-downloaded file.\n"),
488 --progress=TYPE select progress gauge type.\n"),
490 -N, --timestamping don't re-retrieve files unless newer than\n\
493 --no-use-server-timestamps don't set the local file's timestamp by\n\
494 the one on the server.\n"),
496 -S, --server-response print server response.\n"),
498 --spider don't download anything.\n"),
500 -T, --timeout=SECONDS set all timeout values to SECONDS.\n"),
502 --dns-timeout=SECS set the DNS lookup timeout to SECS.\n"),
504 --connect-timeout=SECS set the connect timeout to SECS.\n"),
506 --read-timeout=SECS set the read timeout to SECS.\n"),
508 -w, --wait=SECONDS wait SECONDS between retrievals.\n"),
510 --waitretry=SECONDS wait 1..SECONDS between retries of a retrieval.\n"),
512 --random-wait wait from 0.5*WAIT...1.5*WAIT secs between retrievals.\n"),
514 --no-proxy explicitly turn off proxy.\n"),
516 -Q, --quota=NUMBER set retrieval quota to NUMBER.\n"),
518 --bind-address=ADDRESS bind to ADDRESS (hostname or IP) on local host.\n"),
520 --limit-rate=RATE limit download rate to RATE.\n"),
522 --no-dns-cache disable caching DNS lookups.\n"),
524 --restrict-file-names=OS restrict chars in file names to ones OS allows.\n"),
526 --ignore-case ignore case when matching files/directories.\n"),
529 -4, --inet4-only connect only to IPv4 addresses.\n"),
531 -6, --inet6-only connect only to IPv6 addresses.\n"),
533 --prefer-family=FAMILY connect first to addresses of specified family,\n\
534 one of IPv6, IPv4, or none.\n"),
537 --user=USER set both ftp and http user to USER.\n"),
539 --password=PASS set both ftp and http password to PASS.\n"),
541 --ask-password prompt for passwords.\n"),
543 --no-iri turn off IRI support.\n"),
545 --local-encoding=ENC use ENC as the local encoding for IRIs.\n"),
547 --remote-encoding=ENC use ENC as the default remote encoding.\n"),
549 --unlink remove file before clobber.\n"),
555 -nd, --no-directories don't create directories.\n"),
557 -x, --force-directories force creation of directories.\n"),
559 -nH, --no-host-directories don't create host directories.\n"),
561 --protocol-directories use protocol name in directories.\n"),
563 -P, --directory-prefix=PREFIX save files to PREFIX/...\n"),
565 --cut-dirs=NUMBER ignore NUMBER remote directory components.\n"),
571 --http-user=USER set http user to USER.\n"),
573 --http-password=PASS set http password to PASS.\n"),
575 --no-cache disallow server-cached data.\n"),
577 --default-page=NAME Change the default page name (normally\n\
578 this is `index.html'.).\n"),
580 -E, --adjust-extension save HTML/CSS documents with proper extensions.\n"),
582 --ignore-length ignore `Content-Length' header field.\n"),
584 --header=STRING insert STRING among the headers.\n"),
586 --max-redirect maximum redirections allowed per page.\n"),
588 --proxy-user=USER set USER as proxy username.\n"),
590 --proxy-password=PASS set PASS as proxy password.\n"),
592 --referer=URL include `Referer: URL' header in HTTP request.\n"),
594 --save-headers save the HTTP headers to file.\n"),
596 -U, --user-agent=AGENT identify as AGENT instead of Wget/VERSION.\n"),
598 --no-http-keep-alive disable HTTP keep-alive (persistent connections).\n"),
600 --no-cookies don't use cookies.\n"),
602 --load-cookies=FILE load cookies from FILE before session.\n"),
604 --save-cookies=FILE save cookies to FILE after session.\n"),
606 --keep-session-cookies load and save session (non-permanent) cookies.\n"),
608 --post-data=STRING use the POST method; send STRING as the data.\n"),
610 --post-file=FILE use the POST method; send contents of FILE.\n"),
612 --content-disposition honor the Content-Disposition header when\n\
613 choosing local file names (EXPERIMENTAL).\n"),
615 --content-on-error output the received content on server errors.\n"),
617 --auth-no-challenge send Basic HTTP authentication information\n\
618 without first waiting for the server's\n\
624 HTTPS (SSL/TLS) options:\n"),
626 --secure-protocol=PR choose secure protocol, one of auto, SSLv2,\n\
627 SSLv3, and TLSv1.\n"),
629 --no-check-certificate don't validate the server's certificate.\n"),
631 --certificate=FILE client certificate file.\n"),
633 --certificate-type=TYPE client certificate type, PEM or DER.\n"),
635 --private-key=FILE private key file.\n"),
637 --private-key-type=TYPE private key type, PEM or DER.\n"),
639 --ca-certificate=FILE file with the bundle of CA's.\n"),
641 --ca-directory=DIR directory where hash list of CA's is stored.\n"),
643 --random-file=FILE file with random data for seeding the SSL PRNG.\n"),
645 --egd-file=FILE file naming the EGD socket with random data.\n"),
647 #endif /* HAVE_SSL */
653 --ftp-stmlf Use Stream_LF format for all binary FTP files.\n"),
654 #endif /* def __VMS */
656 --ftp-user=USER set ftp user to USER.\n"),
658 --ftp-password=PASS set ftp password to PASS.\n"),
660 --no-remove-listing don't remove `.listing' files.\n"),
662 --no-glob turn off FTP file name globbing.\n"),
664 --no-passive-ftp disable the \"passive\" transfer mode.\n"),
666 --preserve-permissions preserve remote file permissions.\n"),
668 --retr-symlinks when recursing, get linked-to files (not dir).\n"),
674 --warc-file=FILENAME save request/response data to a .warc.gz file.\n"),
676 --warc-header=STRING insert STRING into the warcinfo record.\n"),
678 --warc-max-size=NUMBER set maximum size of WARC files to NUMBER.\n"),
680 --warc-cdx write CDX index files.\n"),
682 --warc-dedup=FILENAME do not store records listed in this CDX file.\n"),
685 --no-warc-compression do not compress WARC files with GZIP.\n"),
688 --no-warc-digests do not calculate SHA1 digests.\n"),
690 --no-warc-keep-log do not store the log file in a WARC record.\n"),
692 --warc-tempdir=DIRECTORY location for temporary files created by the\n\
697 Recursive download:\n"),
699 -r, --recursive specify recursive download.\n"),
701 -l, --level=NUMBER maximum recursion depth (inf or 0 for infinite).\n"),
703 --delete-after delete files locally after downloading them.\n"),
705 -k, --convert-links make links in downloaded HTML or CSS point to\n\
709 -K, --backup-converted before converting file X, back up as X_orig.\n"),
710 #else /* def __VMS */
712 -K, --backup-converted before converting file X, back up as X.orig.\n"),
713 #endif /* def __VMS [else] */
715 -m, --mirror shortcut for -N -r -l inf --no-remove-listing.\n"),
717 -p, --page-requisites get all images, etc. needed to display HTML page.\n"),
719 --strict-comments turn on strict (SGML) handling of HTML comments.\n"),
723 Recursive accept/reject:\n"),
725 -A, --accept=LIST comma-separated list of accepted extensions.\n"),
727 -R, --reject=LIST comma-separated list of rejected extensions.\n"),
729 --accept-regex=REGEX regex matching accepted URLs.\n"),
731 --reject-regex=REGEX regex matching rejected URLs.\n"),
734 --regex-type=TYPE regex type (posix|pcre).\n"),
737 --regex-type=TYPE regex type (posix).\n"),
740 -D, --domains=LIST comma-separated list of accepted domains.\n"),
742 --exclude-domains=LIST comma-separated list of rejected domains.\n"),
744 --follow-ftp follow FTP links from HTML documents.\n"),
746 --follow-tags=LIST comma-separated list of followed HTML tags.\n"),
748 --ignore-tags=LIST comma-separated list of ignored HTML tags.\n"),
750 -H, --span-hosts go to foreign hosts when recursive.\n"),
752 -L, --relative follow relative links only.\n"),
754 -I, --include-directories=LIST list of allowed directories.\n"),
756 --trust-server-names use the name specified by the redirection\n\
757 url last component.\n"),
759 -X, --exclude-directories=LIST list of excluded directories.\n"),
761 -np, --no-parent don't ascend to the parent directory.\n"),
767 --bits Output bandwidth in bits.\n"),
769 N_("Mail bug reports and suggestions to <bug-wget@gnu.org>.\n")
774 if (printf (_("GNU Wget %s, a non-interactive network retriever.\n"),
777 if (print_usage (0) < 0)
780 for (i = 0; i < countof (help); i++)
781 if (fputs (_(help[i]), stdout) < 0)
787 /* Return a human-readable printed representation of INTERVAL,
788 measured in seconds. */
791 secs_to_human_time (double interval)
794 int secs = (int) (interval + 0.5);
795 int hours, mins, days;
797 days = secs / 86400, secs %= 86400;
798 hours = secs / 3600, secs %= 3600;
799 mins = secs / 60, secs %= 60;
802 sprintf (buf, "%dd %dh %dm %ds", days, hours, mins, secs);
804 sprintf (buf, "%dh %dm %ds", hours, mins, secs);
806 sprintf (buf, "%dm %ds", mins, secs);
808 sprintf (buf, "%ss", print_decimal (interval));
814 prompt_for_password (void)
817 fprintf (stderr, _("Password for user %s: "), quote (opt.user));
819 fprintf (stderr, _("Password: "));
823 /* Function that prints the line argument while limiting it
824 to at most line_length. prefix is printed on the first line
825 and an appropriate number of spaces are added on subsequent
828 format_and_print_line (const char *prefix, const char *line,
832 char *line_dup, *token;
834 assert (prefix != NULL);
835 assert (line != NULL);
837 line_dup = xstrdup (line);
839 if (line_length <= 0)
840 line_length = MAX_CHARS_PER_LINE - TABULATION;
842 if (printf ("%s", prefix) < 0)
844 remaining_chars = line_length;
845 /* We break on spaces. */
846 token = strtok (line_dup, " ");
847 while (token != NULL)
849 /* If however a token is much larger than the maximum
850 line length, all bets are off and we simply print the
851 token on the next line. */
852 if (remaining_chars <= strlen (token))
854 if (printf ("\n%*c", TABULATION, ' ') < 0)
856 remaining_chars = line_length - TABULATION;
858 if (printf ("%s ", token) < 0)
860 remaining_chars -= strlen (token) + 1; /* account for " " */
861 token = strtok (NULL, " ");
864 if (printf ("\n") < 0)
874 const char *wgetrc_title = _("Wgetrc: ");
875 const char *locale_title = _("Locale: ");
876 const char *compile_title = _("Compile: ");
877 const char *link_title = _("Link: ");
878 char *env_wgetrc, *user_wgetrc;
881 if (printf (_("GNU Wget %s built on %s.\n\n"), version_string, OS_TYPE) < 0)
884 for (i = 0; compiled_features[i] != NULL; )
886 int line_length = MAX_CHARS_PER_LINE;
887 while ((line_length > 0) && (compiled_features[i] != NULL))
889 if (printf ("%s ", compiled_features[i]) < 0)
891 line_length -= strlen (compiled_features[i]) + 2;
894 if (printf ("\n") < 0)
897 if (printf ("\n") < 0)
900 /* Handle the case when $WGETRC is unset and $HOME/.wgetrc is
902 if (printf ("%s\n", wgetrc_title) < 0)
905 env_wgetrc = wgetrc_env_file_name ();
906 if (env_wgetrc && *env_wgetrc)
908 if (printf (_(" %s (env)\n"), env_wgetrc) < 0)
912 user_wgetrc = wgetrc_user_file_name ();
915 if (printf (_(" %s (user)\n"), user_wgetrc) < 0)
920 if (printf (_(" %s (system)\n"), SYSTEM_WGETRC) < 0)
925 if (format_and_print_line (locale_title,
927 MAX_CHARS_PER_LINE) < 0)
929 #endif /* def ENABLE_NLS */
931 if (compilation_string != NULL)
932 if (format_and_print_line (compile_title,
934 MAX_CHARS_PER_LINE) < 0)
937 if (link_string != NULL)
938 if (format_and_print_line (link_title,
940 MAX_CHARS_PER_LINE) < 0)
943 if (printf ("\n") < 0)
946 /* TRANSLATORS: When available, an actual copyright character
947 (circle-c) should be used in preference to "(C)". */
949 Copyright (C) 2011 Free Software Foundation, Inc.\n"), stdout) < 0)
952 License GPLv3+: GNU GPL version 3 or later\n\
953 <http://www.gnu.org/licenses/gpl.html>.\n\
954 This is free software: you are free to change and redistribute it.\n\
955 There is NO WARRANTY, to the extent permitted by law.\n"), stdout) < 0)
957 /* TRANSLATORS: When available, please use the proper diacritics for
958 names such as this one. See en_US.po for reference. */
959 if (fputs (_("\nOriginally written by Hrvoje Niksic <hniksic@xemacs.org>.\n"),
962 if (fputs (_("Please send bug reports and questions to <bug-wget@gnu.org>.\n"),
969 char *program_name; /* Needed by lib/error.c. */
970 char *program_argstring; /* Needed by wget_warc.c. */
973 main (int argc, char **argv)
976 int i, ret, longindex;
978 bool append_to_log = false;
980 total_downloaded_bytes = 0;
982 program_name = argv[0];
984 struct ptimer *timer = ptimer_new ();
985 double start_time = ptimer_measure (timer);
989 atexit (close_stdout);
991 /* Construct the name of the executable, without the directory part. */
993 /* On VMS, lose the "dev:[dir]" prefix and the ".EXE;nnn" suffix. */
994 exec_name = vms_basename (argv[0]);
995 #else /* def __VMS */
996 exec_name = strrchr (argv[0], PATH_SEPARATOR);
1001 #endif /* def __VMS [else] */
1004 /* Drop extension (typically .EXE) from executable filename. */
1005 windows_main ((char **) &exec_name);
1008 /* Construct the arguments string. */
1009 int argstring_length = 1;
1010 for (i = 1; i < argc; i++)
1011 argstring_length += strlen (argv[i]) + 2 + 1;
1012 char *p = program_argstring = malloc (argstring_length * sizeof (char));
1015 fprintf (stderr, _("Memory allocation problem\n"));
1018 for (i = 1; i < argc; i++)
1021 int arglen = strlen (argv[i]);
1022 memcpy (p, argv[i], arglen);
1029 /* Load the hard-coded defaults. */
1034 /* This separate getopt_long is needed to find the user config file
1035 option ("--config") and parse it before the other user options. */
1038 bool use_userconfig = false;
1040 while ((retconf = getopt_long (argc, argv,
1041 short_options, long_options, &longindex)) != -1)
1044 bool userrc_ret = true;
1045 struct cmdline_option *config_opt;
1047 /* There is no short option for "--config". */
1050 confval = long_options[longindex].val;
1051 config_opt = &option_data[confval & ~BOOLEAN_NEG_MARKER];
1052 if (strcmp (config_opt->long_name, "config") == 0)
1054 userrc_ret &= run_wgetrc (optarg);
1055 use_userconfig = true;
1059 fprintf (stderr, "Exiting due to error in %s\n", optarg);
1067 /* If the user did not specify a config, read the system wgetrc and ~/.wgetrc. */
1068 if (use_userconfig == false)
1075 while ((ret = getopt_long (argc, argv,
1076 short_options, long_options, &longindex)) != -1)
1079 struct cmdline_option *opt;
1081 /* If LONGINDEX is unchanged, it means RET is referring a short
1083 if (longindex == -1)
1088 fprintf (stderr, "\n");
1089 fprintf (stderr, _("Try `%s --help' for more options.\n"),
1093 /* Find the short option character in the mapping. */
1094 longindex = optmap[ret - 32];
1096 val = long_options[longindex].val;
1098 /* Use the retrieved value to locate the option in the
1099 option_data array, and to see if we're dealing with the
1100 negated "--no-FOO" variant of the boolean option "--foo". */
1101 opt = &option_data[val & ~BOOLEAN_NEG_MARKER];
1105 setoptval (opt->data, optarg, opt->long_name);
1109 /* The user has specified a value -- use it. */
1110 setoptval (opt->data, optarg, opt->long_name);
1113 /* NEG is true for `--no-FOO' style boolean options. */
1114 bool neg = !!(val & BOOLEAN_NEG_MARKER);
1115 setoptval (opt->data, neg ? "0" : "1", opt->long_name);
1120 void (*func) (void) = (void (*) (void)) opt->data;
1124 case OPT__APPEND_OUTPUT:
1125 setoptval ("logfile", optarg, opt->long_name);
1126 append_to_log = true;
1129 run_command (optarg);
1133 /* We support real --no-FOO flags now, but keep these
1134 short options for convenience and backward
1137 for (p = optarg; p && *p; p++)
1141 setoptval ("verbose", "0", opt->long_name);
1144 setoptval ("addhostdir", "0", opt->long_name);
1147 setoptval ("dirstruct", "0", opt->long_name);
1150 setoptval ("noclobber", "1", opt->long_name);
1153 setoptval ("noparent", "1", opt->long_name);
1156 fprintf (stderr, _("%s: illegal option -- `-n%c'\n"),
1159 fprintf (stderr, "\n");
1160 fprintf (stderr, _("Try `%s --help' for more options.\n"),
1169 /* The wgetrc commands are named noparent and noclobber,
1170 so we must revert the meaning of the cmdline options
1171 before passing the value to setoptval. */
1174 flag = (*optarg == '1' || c_tolower (*optarg) == 'y'
1175 || (c_tolower (optarg[0]) == 'o'
1176 && c_tolower (optarg[1]) == 'n'));
1177 setoptval (opt->type == OPT__PARENT ? "noparent" : "noclobber",
1178 flag ? "0" : "1", opt->long_name);
1181 case OPT__DONT_REMOVE_LISTING:
1182 setoptval ("removelisting", "0", opt->long_name);
1189 nurl = argc - optind;
1191 /* All user options have now been processed, so it's now safe to do
1192 interoption dependency checks. */
1194 if (opt.noclobber && opt.convert_links)
1197 _("Both --no-clobber and --convert-links were specified,"
1198 "only --convert-links will be used.\n"));
1199 opt.noclobber = false;
1202 if (opt.reclevel == 0)
1203 opt.reclevel = INFINITE_RECURSION; /* see recur.h for commentary */
1205 if (opt.spider || opt.delete_after)
1206 opt.no_dirstruct = true;
1208 if (opt.page_requisites && !opt.recursive)
1210 /* Don't set opt.recursive here because it would confuse the FTP
1211 code. Instead, call retrieve_tree below when either
1212 page_requisites or recursive is requested. */
1214 if (!opt.no_dirstruct)
1215 opt.dirstruct = 1; /* normally handled by cmd_spec_recursive() */
1218 if (opt.verbose == -1)
1219 opt.verbose = !opt.quiet;
1221 /* Sanity checks. */
1222 if (opt.verbose && opt.quiet)
1224 fprintf (stderr, _("Can't be verbose and quiet at the same time.\n"));
1228 if (opt.timestamping && opt.noclobber)
1230 fprintf (stderr, _("\
1231 Can't timestamp and not clobber old files at the same time.\n"));
1236 if (opt.ipv4_only && opt.ipv6_only)
1239 _("Cannot specify both --inet4-only and --inet6-only.\n"));
1244 if (opt.output_document)
1246 if (opt.convert_links
1247 && (nurl > 1 || opt.page_requisites || opt.recursive))
1250 Cannot specify both -k and -O if multiple URLs are given, or in combination\n\
1251 with -p or -r. See the manual for details.\n\n"), stderr);
1255 if (opt.page_requisites
1258 logprintf (LOG_NOTQUIET, "%s", _("\
1259 WARNING: combining -O with -r or -p will mean that all downloaded content\n\
1260 will be placed in the single file you specified.\n\n"));
1262 if (opt.timestamping)
1264 logprintf (LOG_NOTQUIET, "%s", _("\
1265 WARNING: timestamping does nothing in combination with -O. See the manual\n\
1266 for details.\n\n"));
1267 opt.timestamping = false;
1269 if (opt.noclobber && file_exists_p(opt.output_document))
1271 /* Check if output file exists; if it does, exit. */
1272 logprintf (LOG_VERBOSE,
1273 _("File `%s' already there; not retrieving.\n"),
1274 opt.output_document);
1279 if (opt.warc_filename != 0)
1284 _("WARC output does not work with --no-clobber, "
1285 "--no-clobber will be disabled.\n"));
1286 opt.noclobber = false;
1288 if (opt.timestamping)
1291 _("WARC output does not work with timestamping, "
1292 "timestamping will be disabled.\n"));
1293 opt.timestamping = false;
1298 _("WARC output does not work with --spider.\n"));
1301 if (opt.always_rest)
1304 _("WARC output does not work with --continue, "
1305 "--continue will be disabled.\n"));
1306 opt.always_rest = false;
1308 if (opt.warc_cdx_dedup_filename != 0 && !opt.warc_digests_enabled)
1311 _("Digests are disabled; WARC deduplication will "
1312 "not find duplicate records.\n"));
1314 if (opt.warc_keep_log)
1316 opt.progress_type = xstrdup ("dot");
1320 if (opt.ask_passwd && opt.passwd)
1323 _("Cannot specify both --ask-password and --password.\n"));
1328 if (!nurl && !opt.input_filename)
1330 /* No URL specified. */
1331 fprintf (stderr, _("%s: missing URL\n"), exec_name);
1333 fprintf (stderr, "\n");
1334 /* #### Something nicer should be printed here -- similar to the
1335 pre-1.5 `--help' page. */
1336 fprintf (stderr, _("Try `%s --help' for more options.\n"), exec_name);
1340 /* Compile the regular expressions. */
1341 switch (opt.regex_type)
1344 case regex_type_pcre:
1345 opt.regex_compile_fun = compile_pcre_regex;
1346 opt.regex_match_fun = match_pcre_regex;
1350 case regex_type_posix:
1352 opt.regex_compile_fun = compile_posix_regex;
1353 opt.regex_match_fun = match_posix_regex;
1356 if (opt.acceptregex_s)
1358 opt.acceptregex = opt.regex_compile_fun (opt.acceptregex_s);
1359 if (!opt.acceptregex)
1362 if (opt.rejectregex_s)
1364 opt.rejectregex = opt.regex_compile_fun (opt.rejectregex_s);
1365 if (!opt.rejectregex)
1372 if (opt.locale && !check_encoding_name (opt.locale))
1376 opt.locale = find_locale ();
1378 if (opt.encoding_remote && !check_encoding_name (opt.encoding_remote))
1379 opt.encoding_remote = NULL;
1382 memset (&dummy_iri, 0, sizeof (dummy_iri));
1383 if (opt.enable_iri || opt.locale || opt.encoding_remote)
1385 /* sXXXav : be more specific... */
1386 fprintf (stderr, _("This version does not have support for IRIs\n"));
1393 opt.passwd = prompt_for_password ();
1395 if (opt.passwd == NULL || opt.passwd[0] == '\0')
1405 fork_to_background ();
1408 /* Initialize progress. Have to do this after the options are
1409 processed so we know where the log file is. */
1411 set_progress_implementation (opt.progress_type);
1413 /* Fill in the arguments. */
1414 url = alloca_array (char *, nurl + 1);
1417 fprintf (stderr, _("Memory allocation problem\n"));
1420 for (i = 0; i < nurl; i++, optind++)
1422 char *rewritten = rewrite_shorthand_url (argv[optind]);
1426 url[i] = xstrdup (argv[optind]);
1430 /* Initialize logging. */
1431 log_init (opt.lfilename, append_to_log);
1433 /* Open WARC file. */
1434 if (opt.warc_filename != 0)
1437 DEBUGP (("DEBUG output created by Wget %s on %s.\n\n",
1438 version_string, OS_TYPE));
1440 /* Open the output filename if necessary. */
1443 Note that having the output_stream ("-O") file opened here for an FTP
1444 URL rather than in getftp() (ftp.c) (and the http equivalent) rather
1445 limits the ability in VMS to open the file differently for ASCII
1446 versus binary FTP there. (Of course, doing it here allows a open
1447 failure to be detected immediately, without first connecting to the
1450 if (opt.output_document)
1452 if (HYPHENP (opt.output_document))
1455 _setmode (_fileno (stdout), _O_BINARY);
1457 output_stream = stdout;
1464 /* Common fopen() optional arguments:
1465 sequential access only, access callback function.
1467 # define FOPEN_OPT_ARGS , "fop=sqo", "acc", acc_cb, &open_id
1469 #else /* def __VMS */
1470 # define FOPEN_OPT_ARGS
1471 #endif /* def __VMS [else] */
1473 output_stream = fopen (opt.output_document,
1474 opt.always_rest ? "ab" : "wb"
1476 if (output_stream == NULL)
1478 perror (opt.output_document);
1481 if (fstat (fileno (output_stream), &st) == 0 && S_ISREG (st.st_mode))
1482 output_stream_regular = true;
1484 if (!output_stream_regular && opt.convert_links)
1486 fprintf (stderr, _("-k can be used together with -O only if \
1487 outputting to a regular file.\n"));
1494 /* Set global ODS5 flag according to the specified destination (if
1495 any), otherwise according to the current default device.
1497 if (output_stream == NULL)
1498 set_ods5_dest( "SYS$DISK");
1499 else if (output_stream != stdout)
1500 set_ods5_dest( opt.output_document);
1501 #endif /* def __VMS */
1508 /* Setup the signal handler to redirect output when hangup is
1510 if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
1511 signal(SIGHUP, redirect_output_signal);
1513 /* ...and do the same for SIGUSR1. */
1515 signal (SIGUSR1, redirect_output_signal);
1518 /* Writing to a closed socket normally signals SIGPIPE, and the
1519 process exits. What we want is to ignore SIGPIPE and just check
1520 for the return value of write(). */
1521 signal (SIGPIPE, SIG_IGN);
1524 signal (SIGWINCH, progress_handle_sigwinch);
1527 /* Retrieve the URLs from argument list. */
1528 for (t = url; *t; t++)
1530 char *filename = NULL, *redirected_URL = NULL;
1532 /* Need to do a new struct iri every time, because
1533 * retrieve_url may modify it in some circumstances,
1535 struct iri *iri = iri_new ();
1536 struct url *url_parsed;
1538 set_uri_encoding (iri, opt.locale, true);
1539 url_parsed = url_parse (*t, &url_err, iri, true);
1543 char *error = url_error (*t, url_err);
1544 logprintf (LOG_NOTQUIET, "%s: %s.\n",*t, error);
1546 inform_exit_status (URLERROR);
1550 if ((opt.recursive || opt.page_requisites)
1551 && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (url_parsed)))
1553 int old_follow_ftp = opt.follow_ftp;
1555 /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
1556 if (url_scheme (*t) == SCHEME_FTP)
1559 retrieve_tree (url_parsed, NULL);
1561 opt.follow_ftp = old_follow_ftp;
1565 retrieve_url (url_parsed, *t, &filename, &redirected_URL, NULL,
1566 &dt, opt.recursive, iri, true);
1569 if (opt.delete_after && file_exists_p(filename))
1571 DEBUGP (("Removing file due to --delete-after in main():\n"));
1572 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
1573 if (unlink (filename))
1574 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
1576 xfree_null (redirected_URL);
1577 xfree_null (filename);
1578 url_free (url_parsed);
1583 /* And then from the input file, if any. */
1584 if (opt.input_filename)
1588 status = retrieve_from_file (opt.input_filename, opt.force_html, &count);
1589 inform_exit_status (status);
1591 logprintf (LOG_NOTQUIET, _("No URLs found in %s.\n"),
1592 opt.input_filename);
1595 /* Print broken links. */
1596 if (opt.recursive && opt.spider)
1597 print_broken_links ();
1599 /* Print the downloaded sum. */
1600 if ((opt.recursive || opt.page_requisites
1602 || (opt.input_filename && total_downloaded_bytes != 0))
1604 total_downloaded_bytes != 0)
1606 double end_time = ptimer_measure (timer);
1607 ptimer_destroy (timer);
1609 char *wall_time = xstrdup (secs_to_human_time (end_time - start_time));
1610 char *download_time = xstrdup (secs_to_human_time (total_download_time));
1611 logprintf (LOG_NOTQUIET,
1612 _("FINISHED --%s--\nTotal wall clock time: %s\n"
1613 "Downloaded: %d files, %s in %s (%s)\n"),
1614 datetime_str (time (NULL)),
1617 human_readable (total_downloaded_bytes),
1619 retr_rate (total_downloaded_bytes, total_download_time));
1621 xfree (download_time);
1623 /* Print quota warning, if exceeded. */
1624 if (opt.quota && total_downloaded_bytes > opt.quota)
1625 logprintf (LOG_NOTQUIET,
1626 _("Download quota of %s EXCEEDED!\n"),
1627 human_readable (opt.quota));
1630 if (opt.cookies_output)
1633 if (opt.convert_links && !opt.delete_after)
1634 convert_all_links ();
1636 /* Close WARC file. */
1637 if (opt.warc_filename != 0)
1642 for (i = 0; i < nurl; i++)
1646 exit (get_exit_status ());
1648 #endif /* TESTING */
1650 #if defined(SIGHUP) || defined(SIGUSR1)
1652 /* So the signal_name check doesn't blow when only one is available. */
1660 /* Hangup signal handler. When wget receives SIGHUP or SIGUSR1, it
1661 will proceed operation as usual, trying to write into a log file.
1662 If that is impossible, the output will be turned off. */
1665 redirect_output_signal (int sig)
1667 const char *signal_name = (sig == SIGHUP ? "SIGHUP" :
1668 (sig == SIGUSR1 ? "SIGUSR1" :
1670 log_request_redirect_output (signal_name);
1671 progress_schedule_redirect ();
1672 signal (sig, redirect_output_signal);