1 /* Command line parsing.
2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
27 #endif /* HAVE_UNISTD_H */
28 #include <sys/types.h>
33 #endif /* HAVE_STRING_H */
40 #endif /* HAVE_LOCALE_H */
52 #include "progress.h" /* for progress_handle_sigwinch */
55 # include "gen_sslfunc.h"
58 /* On GNU system this will include system-wide getopt.h. */
61 #ifndef PATH_SEPARATOR
62 # define PATH_SEPARATOR '/'
65 extern char *version_string;
73 extern struct cookie_jar *wget_cookie_jar;
76 void log_init PARAMS ((const char *, int));
77 void log_close PARAMS ((void));
78 void log_request_redirect_output PARAMS ((const char *));
80 static RETSIGTYPE redirect_output_signal PARAMS ((int));
82 const char *exec_name;
84 /* Initialize I18N. The initialization amounts to invoking
85 setlocale(), bindtextdomain() and textdomain().
86 Does nothing if NLS is disabled or missing. */
88 i18n_initialize (void)
90 /* If HAVE_NLS is defined, assume the existence of the three
91 functions invoked here. */
93 /* Set the current locale. */
94 /* Here we use LC_MESSAGES instead of LC_ALL, for two reasons.
95 First, message catalogs are all of I18N Wget uses anyway.
96 Second, setting LC_ALL has a dangerous potential of messing
97 things up. For example, when in a foreign locale, Solaris
98 strptime() fails to handle international dates correctly, which
99 makes http_atotm() malfunction. */
101 setlocale (LC_MESSAGES, "");
102 setlocale (LC_CTYPE, "");
104 setlocale (LC_ALL, "");
106 /* Set the text message domain. */
107 bindtextdomain ("wget", LOCALEDIR);
109 #endif /* HAVE_NLS */
112 /* Print the usage message. */
116 printf (_("Usage: %s [OPTION]... [URL]...\n"), exec_name);
119 /* Print the help message, describing all the available options. If
120 you add an option, be sure to update this list. */
124 printf (_("GNU Wget %s, a non-interactive network retriever.\n"),
127 /* Had to split this in parts, so the #@@#%# Ultrix compiler and cpp
128 don't bitch. Also, it makes translation much easier. */
131 Mandatory arguments to long options are mandatory for short options too.\n\
135 -V, --version display the version of Wget and exit.\n\
136 -h, --help print this help.\n\
137 -b, --background go to background after startup.\n\
138 -e, --execute=COMMAND execute a `.wgetrc\'-style command.\n\
141 Logging and input file:\n\
142 -o, --output-file=FILE log messages to FILE.\n\
143 -a, --append-output=FILE append messages to FILE.\n\
144 -d, --debug print debug output.\n\
145 -q, --quiet quiet (no output).\n\
146 -v, --verbose be verbose (this is the default).\n\
147 -nv, --non-verbose turn off verboseness, without being quiet.\n\
148 -i, --input-file=FILE download URLs found in FILE.\n\
149 -F, --force-html treat input file as HTML.\n\
150 -B, --base=URL prepends URL to relative links in -F -i file.\n\
151 --sslcertfile=FILE optional client certificate.\n\
152 --sslcertkey=KEYFILE optional keyfile for this certificate.\n\
153 --egd-file=FILE file name of the EGD socket.\n\
157 --bind-address=ADDRESS bind to ADDRESS (hostname or IP) on local host.\n\
158 -t, --tries=NUMBER set number of retries to NUMBER (0 unlimits).\n\
159 -O --output-document=FILE write documents to FILE.\n\
160 -nc, --no-clobber don\'t clobber existing files or use .# suffixes.\n\
161 -c, --continue resume getting a partially-downloaded file.\n\
162 --progress=TYPE select progress gauge type.\n\
163 -N, --timestamping don\'t re-retrieve files unless newer than local.\n\
164 -S, --server-response print server response.\n\
165 --spider don\'t download anything.\n\
166 -T, --timeout=SECONDS set the read timeout to SECONDS.\n\
167 -w, --wait=SECONDS wait SECONDS between retrievals.\n\
168 --waitretry=SECONDS wait 1...SECONDS between retries of a retrieval.\n\
169 --random-wait wait from 0...2*WAIT secs between retrievals.\n\
170 -Y, --proxy=on/off turn proxy on or off.\n\
171 -Q, --quota=NUMBER set retrieval quota to NUMBER.\n\
172 --limit-rate=RATE limit download rate to RATE.\n\
176 -nd --no-directories don\'t create directories.\n\
177 -x, --force-directories force creation of directories.\n\
178 -nH, --no-host-directories don\'t create host directories.\n\
179 -P, --directory-prefix=PREFIX save files to PREFIX/...\n\
180 --cut-dirs=NUMBER ignore NUMBER remote directory components.\n\
184 --http-user=USER set http user to USER.\n\
185 --http-passwd=PASS set http password to PASS.\n\
186 -C, --cache=on/off (dis)allow server-cached data (normally allowed).\n\
187 -E, --html-extension save all text/html documents with .html extension.\n\
188 --ignore-length ignore `Content-Length\' header field.\n\
189 --header=STRING insert STRING among the headers.\n\
190 --proxy-user=USER set USER as proxy username.\n\
191 --proxy-passwd=PASS set PASS as proxy password.\n\
192 --referer=URL include `Referer: URL\' header in HTTP request.\n\
193 -s, --save-headers save the HTTP headers to file.\n\
194 -U, --user-agent=AGENT identify as AGENT instead of Wget/VERSION.\n\
195 --no-http-keep-alive disable HTTP keep-alive (persistent connections).\n\
196 --cookies=off don't use cookies.\n\
197 --load-cookies=FILE load cookies from FILE before session.\n\
198 --save-cookies=FILE save cookies to FILE after session.\n\
199 --post-data=STRING use the POST method; send STRING as the data.\n\
200 --post-file=FILE use the POST method; send contents of FILE.\n\
204 -nr, --dont-remove-listing don\'t remove `.listing\' files.\n\
205 -g, --glob=on/off turn file name globbing on or off.\n\
206 --passive-ftp use the \"passive\" transfer mode.\n\
207 --retr-symlinks when recursing, get linked-to files (not dirs).\n\
210 Recursive retrieval:\n\
211 -r, --recursive recursive web-suck -- use with care!\n\
212 -l, --level=NUMBER maximum recursion depth (inf or 0 for infinite).\n\
213 --delete-after delete files locally after downloading them.\n\
214 -k, --convert-links convert non-relative links to relative.\n\
215 -K, --backup-converted before converting file X, back up as X.orig.\n\
216 -m, --mirror shortcut option equivalent to -r -N -l inf -nr.\n\
217 -p, --page-requisites get all images, etc. needed to display HTML page.\n\
220 Recursive accept/reject:\n\
221 -A, --accept=LIST comma-separated list of accepted extensions.\n\
222 -R, --reject=LIST comma-separated list of rejected extensions.\n\
223 -D, --domains=LIST comma-separated list of accepted domains.\n\
224 --exclude-domains=LIST comma-separated list of rejected domains.\n\
225 --follow-ftp follow FTP links from HTML documents.\n\
226 --follow-tags=LIST comma-separated list of followed HTML tags.\n\
227 -G, --ignore-tags=LIST comma-separated list of ignored HTML tags.\n\
228 -H, --span-hosts go to foreign hosts when recursive.\n\
229 -L, --relative follow relative links only.\n\
230 -I, --include-directories=LIST list of allowed directories.\n\
231 -X, --exclude-directories=LIST list of excluded directories.\n\
232 -np, --no-parent don\'t ascend to the parent directory.\n\
234 fputs (_("Mail bug reports and suggestions to <bug-wget@gnu.org>.\n"),
239 main (int argc, char *const *argv)
242 int i, c, nurl, status, append_to_log;
244 static struct option long_options[] =
246 /* Options without arguments: */
247 { "background", no_argument, NULL, 'b' },
248 { "backup-converted", no_argument, NULL, 'K' },
249 { "continue", no_argument, NULL, 'c' },
250 { "convert-links", no_argument, NULL, 'k' },
251 { "debug", no_argument, NULL, 'd' },
252 { "delete-after", no_argument, NULL, 136 },
253 { "dont-remove-listing", no_argument, NULL, 149 },
254 { "follow-ftp", no_argument, NULL, 142 },
255 { "force-directories", no_argument, NULL, 'x' },
256 { "force-hier", no_argument, NULL, 'x' }, /* obsolete */
257 { "force-html", no_argument, NULL, 'F'},
258 { "help", no_argument, NULL, 'h' },
259 { "html-extension", no_argument, NULL, 'E' },
260 { "ignore-length", no_argument, NULL, 138 },
261 { "mirror", no_argument, NULL, 'm' },
262 { "no-clobber", no_argument, NULL, 141 },
263 { "no-directories", no_argument, NULL, 147 },
264 { "no-host-directories", no_argument, NULL, 148 },
265 { "no-host-lookup", no_argument, NULL, 150 },
266 { "no-http-keep-alive", no_argument, NULL, 156 },
267 { "no-parent", no_argument, NULL, 133 },
268 { "non-verbose", no_argument, NULL, 146 },
269 { "passive-ftp", no_argument, NULL, 139 },
270 { "page-requisites", no_argument, NULL, 'p' },
271 { "quiet", no_argument, NULL, 'q' },
272 { "random-wait", no_argument, NULL, 165 },
273 { "recursive", no_argument, NULL, 'r' },
274 { "relative", no_argument, NULL, 'L' },
275 { "retr-symlinks", no_argument, NULL, 137 },
276 { "save-headers", no_argument, NULL, 's' },
277 { "server-response", no_argument, NULL, 'S' },
278 { "span-hosts", no_argument, NULL, 'H' },
279 { "spider", no_argument, NULL, 132 },
280 { "timestamping", no_argument, NULL, 'N' },
281 { "verbose", no_argument, NULL, 'v' },
282 { "version", no_argument, NULL, 'V' },
284 /* Options accepting an argument: */
285 { "accept", required_argument, NULL, 'A' },
286 { "append-output", required_argument, NULL, 'a' },
287 { "backups", required_argument, NULL, 151 }, /* undocumented */
288 { "base", required_argument, NULL, 'B' },
289 { "bind-address", required_argument, NULL, 155 },
290 { "cache", required_argument, NULL, 'C' },
291 { "cookies", required_argument, NULL, 160 },
292 { "cut-dirs", required_argument, NULL, 145 },
293 { "directory-prefix", required_argument, NULL, 'P' },
294 { "domains", required_argument, NULL, 'D' },
295 { "dot-style", required_argument, NULL, 134 },
296 { "execute", required_argument, NULL, 'e' },
297 { "exclude-directories", required_argument, NULL, 'X' },
298 { "exclude-domains", required_argument, NULL, 140 },
299 { "follow-tags", required_argument, NULL, 153 },
300 { "glob", required_argument, NULL, 'g' },
301 { "header", required_argument, NULL, 131 },
302 { "htmlify", required_argument, NULL, 135 },
303 { "http-passwd", required_argument, NULL, 130 },
304 { "http-user", required_argument, NULL, 129 },
305 { "ignore-tags", required_argument, NULL, 'G' },
306 { "include-directories", required_argument, NULL, 'I' },
307 { "input-file", required_argument, NULL, 'i' },
308 { "level", required_argument, NULL, 'l' },
309 { "limit-rate", required_argument, NULL, 164 },
310 { "load-cookies", required_argument, NULL, 161 },
311 { "no", required_argument, NULL, 'n' },
312 { "output-document", required_argument, NULL, 'O' },
313 { "output-file", required_argument, NULL, 'o' },
314 { "post-data", required_argument, NULL, 167 },
315 { "post-file", required_argument, NULL, 168 },
316 { "progress", required_argument, NULL, 163 },
317 { "proxy", required_argument, NULL, 'Y' },
318 { "proxy-passwd", required_argument, NULL, 144 },
319 { "proxy-user", required_argument, NULL, 143 },
320 { "quota", required_argument, NULL, 'Q' },
321 { "reject", required_argument, NULL, 'R' },
322 { "save-cookies", required_argument, NULL, 162 },
323 { "timeout", required_argument, NULL, 'T' },
324 { "tries", required_argument, NULL, 't' },
325 { "user-agent", required_argument, NULL, 'U' },
326 { "referer", required_argument, NULL, 157 },
327 { "use-proxy", required_argument, NULL, 'Y' },
329 { "sslcertfile", required_argument, NULL, 158 },
330 { "sslcertkey", required_argument, NULL, 159 },
331 { "egd-file", required_argument, NULL, 166 },
332 #endif /* HAVE_SSL */
333 { "wait", required_argument, NULL, 'w' },
334 { "waitretry", required_argument, NULL, 152 },
342 /* Construct the name of the executable, without the directory part. */
343 exec_name = strrchr (argv[0], PATH_SEPARATOR);
350 windows_main_junk (&argc, (char **) argv, (char **) &exec_name);
353 initialize (); /* sets option defaults; reads the system wgetrc and .wgetrc */
355 /* [Is the order of the option letters significant? If not, they should be
356 alphabetized, like the long_options. The only thing I know for sure is
357 that the options with required arguments must be followed by a ':'.
358 -- Dan Harkless <wget@harkless.org>] */
359 while ((c = getopt_long (argc, argv, "\
360 hpVqvdkKsxmNWrHSLcFbEY:G:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:C:",
361 long_options, (int *)0)) != EOF)
365 /* Options without arguments: */
367 setval ("spider", "on");
370 setval ("noparent", "on");
373 setval ("deleteafter", "on");
376 setval ("retrsymlinks", "on");
379 setval ("ignorelength", "on");
382 setval ("passiveftp", "on");
385 setval ("noclobber", "on");
388 setval ("followftp", "on");
391 setval ("cutdirs", optarg);
394 setval ("verbose", "off");
397 setval ("dirstruct", "off");
400 setval ("addhostdir", "off");
403 setval ("removelisting", "off");
406 setval ("bindaddress", optarg);
409 setval ("httpkeepalive", "off");
412 setval ("randomwait", "on");
415 setval ("background", "on");
418 setval ("continue", "on");
422 setval ("debug", "on");
423 #else /* not DEBUG */
424 fprintf (stderr, _("%s: debug support not compiled in.\n"),
426 #endif /* not DEBUG */
429 setval ("htmlextension", "on");
432 setval ("forcehtml", "on");
435 setval ("spanhosts", "on");
445 setval ("backupconverted", "on");
448 setval ("convertlinks", "on");
451 setval ("relativeonly", "on");
454 setval ("mirror", "on");
457 setval ("timestamping", "on");
460 setval ("pagerequisites", "on");
463 setval ("serverresponse", "on");
466 setval ("saveheaders", "on");
469 setval ("quiet", "on");
472 setval ("recursive", "on");
475 printf ("GNU Wget %s\n\n", version_string);
477 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.\n"));
479 This program is distributed in the hope that it will be useful,\n\
480 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
481 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
482 GNU General Public License for more details.\n"));
483 printf (_("\nOriginally written by Hrvoje Niksic <hniksic@arsdigita.com>.\n"));
487 setval ("verbose", "on");
490 setval ("dirstruct", "on");
493 /* Options accepting an argument: */
495 setval ("httpuser", optarg);
498 setval ("httppasswd", optarg);
501 setval ("header", optarg);
504 setval ("dotstyle", optarg);
507 setval ("htmlify", optarg);
510 setval ("excludedomains", optarg);
513 setval ("proxyuser", optarg);
516 setval ("proxypasswd", optarg);
519 setval ("backups", optarg);
522 setval ("waitretry", optarg);
525 setval ("followtags", optarg);
528 setval ("cookies", optarg);
531 setval ("loadcookies", optarg);
534 setval ("savecookies", optarg);
537 setval ("progress", optarg);
540 setval ("limitrate", optarg);
543 setval ("referer", optarg);
547 setval ("sslcertfile", optarg);
550 setval ("sslcertkey", optarg);
553 setval ("egdfile", optarg);
555 #endif /* HAVE_SSL */
557 setval ("postdata", optarg);
560 setval ("postfile", optarg);
563 setval ("accept", optarg);
566 setval ("logfile", optarg);
570 setval ("base", optarg);
573 setval ("cache", optarg);
576 setval ("domains", optarg);
581 if (parse_line (optarg, &com, &val))
583 if (!setval (com, val))
588 fprintf (stderr, _("%s: %s: invalid command\n"), exec_name,
597 setval ("ignoretags", optarg);
600 setval ("glob", optarg);
603 setval ("includedirectories", optarg);
606 setval ("input", optarg);
609 setval ("reclevel", optarg);
613 /* #### What we really want here is --no-foo. */
616 for (p = optarg; *p; p++)
620 setval ("verbose", "off");
623 setval ("addhostdir", "off");
626 setval ("dirstruct", "off");
629 setval ("noclobber", "on");
632 setval ("removelisting", "off");
635 setval ("noparent", "on");
638 setval ("httpkeepalive", "off");
641 printf (_("%s: illegal option -- `-n%c'\n"), exec_name, *p);
644 printf (_("Try `%s --help\' for more options.\n"), exec_name);
650 setval ("outputdocument", optarg);
653 setval ("logfile", optarg);
656 setval ("dirprefix", optarg);
659 setval ("quota", optarg);
662 setval ("reject", optarg);
665 setval ("timeout", optarg);
668 setval ("tries", optarg);
671 setval ("useragent", optarg);
674 setval ("wait", optarg);
677 setval ("excludedirectories", optarg);
680 setval ("useproxy", optarg);
686 printf (_("Try `%s --help' for more options.\n"), exec_name);
692 /* All user options have now been processed, so it's now safe to do
693 interoption dependency checks. */
695 if (opt.reclevel == 0)
696 opt.reclevel = INFINITE_RECURSION; /* see wget.h for commentary on this */
698 if (opt.page_requisites && !opt.recursive)
700 opt.recursive = TRUE;
702 if (!opt.no_dirstruct)
703 opt.dirstruct = TRUE; /* usually handled by cmd_spec_recursive() */
706 if (opt.verbose == -1)
707 opt.verbose = !opt.quiet;
710 if (opt.verbose && opt.quiet)
712 printf (_("Can't be verbose and quiet at the same time.\n"));
716 if (opt.timestamping && opt.noclobber)
719 Can't timestamp and not clobber old files at the same time.\n"));
723 nurl = argc - optind;
724 if (!nurl && !opt.input_filename)
726 /* No URL specified. */
727 printf (_("%s: missing URL\n"), exec_name);
730 /* #### Something nicer should be printed here -- similar to the
731 pre-1.5 `--help' page. */
732 printf (_("Try `%s --help' for more options.\n"), exec_name);
737 fork_to_background ();
739 /* Initialize progress. Have to do this after the options are
740 processed so we know where the log file is. */
742 set_progress_implementation (opt.progress_type);
744 /* Allocate basic pointer. */
745 url = ALLOCA_ARRAY (char *, nurl + 1);
746 /* Fill in the arguments. */
747 for (i = 0; i < nurl; i++, optind++)
749 char *rewritten = rewrite_shorthand_url (argv[optind]);
753 url[i] = xstrdup (argv[optind]);
757 /* Change the title of console window on Windows. #### I think this
758 statement should belong to retrieve_url(). --hniksic. */
760 ws_changetitle (*url, nurl);
763 /* Initialize logging. */
764 log_init (opt.lfilename, append_to_log);
766 DEBUGP (("DEBUG output created by Wget %s on %s.\n\n", version_string,
769 /* Open the output filename if necessary. */
770 if (opt.output_document)
772 if (HYPHENP (opt.output_document))
777 opt.dfp = fopen (opt.output_document, opt.always_rest ? "ab" : "wb");
780 perror (opt.output_document);
783 if (fstat (fileno (opt.dfp), &st) == 0 && S_ISREG (st.st_mode))
784 opt.od_known_regular = 1;
792 /* Setup the signal handler to redirect output when hangup is
795 if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
796 signal(SIGHUP, redirect_output_signal);
797 /* ...and do the same for SIGUSR1. */
798 signal (SIGUSR1, redirect_output_signal);
799 /* Writing to a closed socket normally signals SIGPIPE, and the
800 process exits. What we want is to ignore SIGPIPE and just check
801 for the return value of write(). */
802 signal (SIGPIPE, SIG_IGN);
804 signal (SIGWINCH, progress_handle_sigwinch);
806 #endif /* HAVE_SIGNAL */
809 /* Must call this before resolving any URLs because it has the power
810 to disable `https'. */
814 status = RETROK; /* initialize it, just-in-case */
815 /* Retrieve the URLs from argument list. */
816 for (t = url; *t; t++)
818 char *filename = NULL, *redirected_URL = NULL;
821 if (opt.recursive && url_scheme (*t) != SCHEME_FTP)
822 status = retrieve_tree (*t);
824 status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt);
826 if (opt.delete_after && file_exists_p(filename))
828 DEBUGP (("Removing file due to --delete-after in main():\n"));
829 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
830 if (unlink (filename))
831 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
834 FREE_MAYBE (redirected_URL);
835 FREE_MAYBE (filename);
838 /* And then from the input file, if any. */
839 if (opt.input_filename)
842 status = retrieve_from_file (opt.input_filename, opt.force_html, &count);
844 logprintf (LOG_NOTQUIET, _("No URLs found in %s.\n"),
847 /* Print the downloaded sum. */
850 || (opt.input_filename && opt.downloaded != 0))
852 logprintf (LOG_NOTQUIET,
853 _("\nFINISHED --%s--\nDownloaded: %s bytes in %d files\n"),
855 (opt.downloaded_overflow ?
856 "<overflow>" : legible_very_long (opt.downloaded)),
858 /* Print quota warning, if exceeded. */
859 if (downloaded_exceeds_quota ())
860 logprintf (LOG_NOTQUIET,
861 _("Download quota (%s bytes) EXCEEDED!\n"),
862 legible (opt.quota));
865 if (opt.cookies_output && wget_cookie_jar)
866 cookie_jar_save (wget_cookie_jar, opt.cookies_output);
868 if (opt.convert_links && !opt.delete_after)
870 convert_all_links ();
874 for (i = 0; i < nurl; i++)
879 print_malloc_debug_stats ();
881 if (status == RETROK)
888 /* Hangup signal handler. When wget receives SIGHUP or SIGUSR1, it
889 will proceed operation as usual, trying to write into a log file.
890 If that is impossible, the output will be turned off.
892 #### It is unsafe to do call libc functions from a signal handler.
893 What we should do is, set a global variable, and have the code in
897 redirect_output_signal (int sig)
899 char *signal_name = (sig == SIGHUP ? "SIGHUP" :
900 (sig == SIGUSR1 ? "SIGUSR1" :
902 log_request_redirect_output (signal_name);
903 progress_schedule_redirect ();
904 signal (sig, redirect_output_signal);
906 #endif /* HAVE_SIGNAL */