1 /* Command line parsing.
2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
27 #endif /* HAVE_UNISTD_H */
28 #include <sys/types.h>
33 #endif /* HAVE_STRING_H */
40 #endif /* HAVE_LOCALE_H */
44 #define OPTIONS_DEFINED_HERE /* for options.h */
55 /* On GNU system this will include system-wide getopt.h. */
58 #ifndef PATH_SEPARATOR
59 # define PATH_SEPARATOR '/'
62 extern char *version_string;
71 void log_init PARAMS ((const char *, int));
72 void log_close PARAMS ((void));
73 void redirect_output PARAMS ((const char *));
75 static RETSIGTYPE redirect_output_signal PARAMS ((int));
77 const char *exec_name;
79 /* Initialize I18N. The initialization amounts to invoking
80 setlocale(), bindtextdomain() and textdomain().
81 Does nothing if NLS is disabled or missing. */
83 i18n_initialize (void)
85 /* If HAVE_NLS is defined, assume the existence of the three
86 functions invoked here. */
88 /* Set the current locale. */
89 /* Here we use LC_MESSAGES instead of LC_ALL, for two reasons.
90 First, message catalogs are all of I18N Wget uses anyway.
91 Second, setting LC_ALL has a dangerous potential of messing
92 things up. For example, when in a foreign locale, Solaris
93 strptime() fails to handle international dates correctly, which
94 makes http_atotm() malfunction. */
96 setlocale (LC_MESSAGES, "");
97 setlocale (LC_CTYPE, "");
99 setlocale (LC_ALL, "");
101 /* Set the text message domain. */
102 bindtextdomain ("wget", LOCALEDIR);
104 #endif /* HAVE_NLS */
107 /* It's kosher to declare these here because their interface _has_ to
108 be void foo(void). */
109 void host_init PARAMS ((void));
111 /* This just calls the various initialization functions from the
112 modules that need one-time initialization. */
114 private_initialize (void)
119 /* Print the usage message. */
123 printf (_("Usage: %s [OPTION]... [URL]...\n"), exec_name);
126 /* Print the help message, describing all the available options. If
127 you add an option, be sure to update this list. */
131 printf (_("GNU Wget %s, a non-interactive network retriever.\n"),
134 /* Had to split this in parts, so the #@@#%# Ultrix compiler and cpp
135 don't bitch. Also, it makes translation much easier. */
138 Mandatory arguments to long options are mandatory for short options too.\n\
142 -V, --version display the version of Wget and exit.\n\
143 -h, --help print this help.\n\
144 -b, --background go to background after startup.\n\
145 -e, --execute=COMMAND execute a `.wgetrc\'-style command.\n\
148 Logging and input file:\n\
149 -o, --output-file=FILE log messages to FILE.\n\
150 -a, --append-output=FILE append messages to FILE.\n\
151 -d, --debug print debug output.\n\
152 -q, --quiet quiet (no output).\n\
153 -v, --verbose be verbose (this is the default).\n\
154 -nv, --non-verbose turn off verboseness, without being quiet.\n\
155 -i, --input-file=FILE download URLs found in FILE.\n\
156 -F, --force-html treat input file as HTML.\n\
157 -B, --base=URL prepends URL to relative links in -F -i file.\n\
158 --sslcertfile=FILE optional client certificate.\n\
159 --sslcertkey=KEYFILE optional keyfile for this certificate.\n\
163 --bind-address=ADDRESS bind to ADDRESS (hostname or IP) on local host.\n\
164 -t, --tries=NUMBER set number of retries to NUMBER (0 unlimits).\n\
165 -O --output-document=FILE write documents to FILE.\n\
166 -nc, --no-clobber don\'t clobber existing files or use .# suffixes.\n\
167 -c, --continue resume getting a partially-downloaded file.\n\
168 --dot-style=STYLE set retrieval display style.\n\
169 -N, --timestamping don\'t re-retrieve files unless newer than local.\n\
170 -S, --server-response print server response.\n\
171 --spider don\'t download anything.\n\
172 -T, --timeout=SECONDS set the read timeout to SECONDS.\n\
173 -w, --wait=SECONDS wait SECONDS between retrievals.\n\
174 --waitretry=SECONDS wait 1...SECONDS between retries of a retrieval.\n\
175 -Y, --proxy=on/off turn proxy on or off.\n\
176 -Q, --quota=NUMBER set retrieval quota to NUMBER.\n\
180 -nd --no-directories don\'t create directories.\n\
181 -x, --force-directories force creation of directories.\n\
182 -nH, --no-host-directories don\'t create host directories.\n\
183 -P, --directory-prefix=PREFIX save files to PREFIX/...\n\
184 --cut-dirs=NUMBER ignore NUMBER remote directory components.\n\
188 --http-user=USER set http user to USER.\n\
189 --http-passwd=PASS set http password to PASS.\n\
190 -C, --cache=on/off (dis)allow server-cached data (normally allowed).\n\
191 -E, --html-extension save all text/html documents with .html extension.\n\
192 --ignore-length ignore `Content-Length\' header field.\n\
193 --header=STRING insert STRING among the headers.\n\
194 --proxy-user=USER set USER as proxy username.\n\
195 --proxy-passwd=PASS set PASS as proxy password.\n\
196 --referer=URL include `Referer: URL\' header in HTTP request.\n\
197 -s, --save-headers save the HTTP headers to file.\n\
198 -U, --user-agent=AGENT identify as AGENT instead of Wget/VERSION.\n\
199 --no-http-keep-alive disable HTTP keep-alive (persistent connections).\n\
200 --cookies=off don't use cookies.\n\
201 --load-cookies=FILE load cookies from FILE before session.\n\
202 --save-cookies=FILE save cookies to FILE after session.\n\
206 -nr, --dont-remove-listing don\'t remove `.listing\' files.\n\
207 -g, --glob=on/off turn file name globbing on or off.\n\
208 --passive-ftp use the \"passive\" transfer mode.\n\
209 --retr-symlinks when recursing, get linked-to files (not dirs).\n\
212 Recursive retrieval:\n\
213 -r, --recursive recursive web-suck -- use with care!\n\
214 -l, --level=NUMBER maximum recursion depth (inf or 0 for infinite).\n\
215 --delete-after delete files locally after downloading them.\n\
216 -k, --convert-links convert non-relative links to relative.\n\
217 -K, --backup-converted before converting file X, back up as X.orig.\n\
218 -m, --mirror shortcut option equivalent to -r -N -l inf -nr.\n\
219 -p, --page-requisites get all images, etc. needed to display HTML page.\n\
222 Recursive accept/reject:\n\
223 -A, --accept=LIST comma-separated list of accepted extensions.\n\
224 -R, --reject=LIST comma-separated list of rejected extensions.\n\
225 -D, --domains=LIST comma-separated list of accepted domains.\n\
226 --exclude-domains=LIST comma-separated list of rejected domains.\n\
227 --follow-ftp follow FTP links from HTML documents.\n\
228 --follow-tags=LIST comma-separated list of followed HTML tags.\n\
229 -G, --ignore-tags=LIST comma-separated list of ignored HTML tags.\n\
230 -H, --span-hosts go to foreign hosts when recursive.\n\
231 -L, --relative follow relative links only.\n\
232 -I, --include-directories=LIST list of allowed directories.\n\
233 -X, --exclude-directories=LIST list of excluded directories.\n\
234 -nh, --no-host-lookup don\'t DNS-lookup hosts.\n\
235 -np, --no-parent don\'t ascend to the parent directory.\n\
237 fputs (_("Mail bug reports and suggestions to <bug-wget@gnu.org>.\n"),
242 main (int argc, char *const *argv)
245 int i, c, nurl, status, append_to_log;
248 static struct option long_options[] =
250 /* Options without arguments: */
251 { "background", no_argument, NULL, 'b' },
252 { "backup-converted", no_argument, NULL, 'K' },
253 { "continue", no_argument, NULL, 'c' },
254 { "convert-links", no_argument, NULL, 'k' },
255 { "debug", no_argument, NULL, 'd' },
256 { "delete-after", no_argument, NULL, 136 },
257 { "dont-remove-listing", no_argument, NULL, 149 },
258 { "follow-ftp", no_argument, NULL, 142 },
259 { "force-directories", no_argument, NULL, 'x' },
260 { "force-hier", no_argument, NULL, 'x' }, /* obsolete */
261 { "force-html", no_argument, NULL, 'F'},
262 { "help", no_argument, NULL, 'h' },
263 { "html-extension", no_argument, NULL, 'E' },
264 { "ignore-length", no_argument, NULL, 138 },
265 { "mirror", no_argument, NULL, 'm' },
266 { "no-clobber", no_argument, NULL, 141 },
267 { "no-directories", no_argument, NULL, 147 },
268 { "no-host-directories", no_argument, NULL, 148 },
269 { "no-host-lookup", no_argument, NULL, 150 },
270 { "no-http-keep-alive", no_argument, NULL, 156 },
271 { "no-parent", no_argument, NULL, 133 },
272 { "non-verbose", no_argument, NULL, 146 },
273 { "passive-ftp", no_argument, NULL, 139 },
274 { "page-requisites", no_argument, NULL, 'p' },
275 { "quiet", no_argument, NULL, 'q' },
276 { "recursive", no_argument, NULL, 'r' },
277 { "relative", no_argument, NULL, 'L' },
278 { "retr-symlinks", no_argument, NULL, 137 },
279 { "save-headers", no_argument, NULL, 's' },
280 { "server-response", no_argument, NULL, 'S' },
281 { "span-hosts", no_argument, NULL, 'H' },
282 { "spider", no_argument, NULL, 132 },
283 { "timestamping", no_argument, NULL, 'N' },
284 { "verbose", no_argument, NULL, 'v' },
285 { "version", no_argument, NULL, 'V' },
287 /* Options accepting an argument: */
288 { "accept", required_argument, NULL, 'A' },
289 { "append-output", required_argument, NULL, 'a' },
290 { "backups", required_argument, NULL, 151 }, /* undocumented */
291 { "base", required_argument, NULL, 'B' },
292 { "bind-address", required_argument, NULL, 155 },
293 { "cache", required_argument, NULL, 'C' },
294 { "cookies", required_argument, NULL, 160 },
295 { "cut-dirs", required_argument, NULL, 145 },
296 { "directory-prefix", required_argument, NULL, 'P' },
297 { "domains", required_argument, NULL, 'D' },
298 { "dot-style", required_argument, NULL, 134 },
299 { "execute", required_argument, NULL, 'e' },
300 { "exclude-directories", required_argument, NULL, 'X' },
301 { "exclude-domains", required_argument, NULL, 140 },
302 { "follow-tags", required_argument, NULL, 153 },
303 { "glob", required_argument, NULL, 'g' },
304 { "header", required_argument, NULL, 131 },
305 { "htmlify", required_argument, NULL, 135 },
306 { "http-passwd", required_argument, NULL, 130 },
307 { "http-user", required_argument, NULL, 129 },
308 { "ignore-tags", required_argument, NULL, 'G' },
309 { "include-directories", required_argument, NULL, 'I' },
310 { "input-file", required_argument, NULL, 'i' },
311 { "level", required_argument, NULL, 'l' },
312 { "load-cookies", required_argument, NULL, 161 },
313 { "no", required_argument, NULL, 'n' },
314 { "output-document", required_argument, NULL, 'O' },
315 { "output-file", required_argument, NULL, 'o' },
316 { "proxy", required_argument, NULL, 'Y' },
317 { "proxy-passwd", required_argument, NULL, 144 },
318 { "proxy-user", required_argument, NULL, 143 },
319 { "quota", required_argument, NULL, 'Q' },
320 { "reject", required_argument, NULL, 'R' },
321 { "save-cookies", required_argument, NULL, 162 },
322 { "timeout", required_argument, NULL, 'T' },
323 { "tries", required_argument, NULL, 't' },
324 { "user-agent", required_argument, NULL, 'U' },
325 { "referer", required_argument, NULL, 157 },
326 { "use-proxy", required_argument, NULL, 'Y' },
328 { "sslcertfile", required_argument, NULL, 158 },
329 { "sslcertkey", required_argument, NULL, 159 },
330 #endif /* HAVE_SSL */
331 { "wait", required_argument, NULL, 'w' },
332 { "waitretry", required_argument, NULL, 152 },
337 private_initialize ();
341 /* Construct the name of the executable, without the directory part. */
342 exec_name = strrchr (argv[0], PATH_SEPARATOR);
349 windows_main_junk (&argc, (char **) argv, (char **) &exec_name);
352 initialize (); /* sets option defaults; reads the system wgetrc and .wgetrc */
354 /* [Is the order of the option letters significant? If not, they should be
355 alphabetized, like the long_options. The only thing I know for sure is
356 that the options with required arguments must be followed by a ':'.
357 -- Dan Harkless <wget@harkless.org>] */
358 while ((c = getopt_long (argc, argv, "\
359 hpVqvdkKsxmNWrHSLcFbEY:G:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:C:",
360 long_options, (int *)0)) != EOF)
364 /* Options without arguments: */
366 setval ("spider", "on");
369 setval ("noparent", "on");
372 setval ("deleteafter", "on");
375 setval ("retrsymlinks", "on");
378 setval ("ignorelength", "on");
381 setval ("passiveftp", "on");
384 setval ("noclobber", "on");
387 setval ("followftp", "on");
390 setval ("cutdirs", optarg);
393 setval ("verbose", "off");
396 setval ("dirstruct", "off");
399 setval ("addhostdir", "off");
402 setval ("removelisting", "off");
405 setval ("simplehostcheck", "on");
408 setval ("bindaddress", optarg);
411 setval ("httpkeepalive", "off");
414 setval ("background", "on");
417 setval ("continue", "on");
421 setval ("debug", "on");
422 #else /* not DEBUG */
423 fprintf (stderr, _("%s: debug support not compiled in.\n"),
425 #endif /* not DEBUG */
428 setval ("htmlextension", "on");
431 setval ("forcehtml", "on");
434 setval ("spanhosts", "on");
444 setval ("backupconverted", "on");
447 setval ("convertlinks", "on");
450 setval ("relativeonly", "on");
453 setval ("mirror", "on");
456 setval ("timestamping", "on");
459 setval ("pagerequisites", "on");
462 setval ("serverresponse", "on");
465 setval ("saveheaders", "on");
468 setval ("quiet", "on");
471 setval ("recursive", "on");
474 printf ("GNU Wget %s\n\n", version_string);
476 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.\n\
477 This program is distributed in the hope that it will be useful,\n\
478 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
479 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
480 GNU General Public License for more details.\n"));
481 printf (_("\nOriginally written by Hrvoje Niksic <hniksic@arsdigita.com>.\n"));
485 setval ("verbose", "on");
488 setval ("dirstruct", "on");
491 /* Options accepting an argument: */
493 setval ("httpuser", optarg);
496 setval ("httppasswd", optarg);
499 setval ("header", optarg);
502 setval ("dotstyle", optarg);
505 setval ("htmlify", optarg);
508 setval ("excludedomains", optarg);
511 setval ("proxyuser", optarg);
514 setval ("proxypasswd", optarg);
517 setval ("backups", optarg);
520 setval ("waitretry", optarg);
524 setval ("followtags", optarg);
527 setval ("cookies", optarg);
530 setval ("loadcookies", optarg);
533 setval ("savecookies", optarg);
536 setval ("referer", optarg);
540 setval ("sslcertfile", optarg);
543 setval ("sslcertkey", optarg);
545 #endif /* HAVE_SSL */
547 setval ("accept", optarg);
550 setval ("logfile", optarg);
554 setval ("base", optarg);
557 setval ("cache", optarg);
560 setval ("domains", optarg);
565 if (parse_line (optarg, &com, &val))
567 if (!setval (com, val))
572 fprintf (stderr, _("%s: %s: invalid command\n"), exec_name,
581 setval ("ignoretags", optarg);
584 setval ("glob", optarg);
587 setval ("includedirectories", optarg);
590 setval ("input", optarg);
593 setval ("reclevel", optarg);
597 /* #### The n? options are utter crock! */
600 for (p = optarg; *p; p++)
604 setval ("verbose", "off");
607 setval ("simplehostcheck", "on");
610 setval ("addhostdir", "off");
613 setval ("dirstruct", "off");
616 setval ("noclobber", "on");
619 setval ("removelisting", "off");
622 setval ("noparent", "on");
625 setval ("httpkeepalive", "off");
628 printf (_("%s: illegal option -- `-n%c'\n"), exec_name, *p);
631 printf (_("Try `%s --help\' for more options.\n"), exec_name);
637 setval ("outputdocument", optarg);
640 setval ("logfile", optarg);
643 setval ("dirprefix", optarg);
646 setval ("quota", optarg);
649 setval ("reject", optarg);
652 setval ("timeout", optarg);
655 setval ("tries", optarg);
658 setval ("useragent", optarg);
661 setval ("wait", optarg);
664 setval ("excludedirectories", optarg);
667 setval ("useproxy", optarg);
673 printf (_("Try `%s --help' for more options.\n"), exec_name);
679 /* All user options have now been processed, so it's now safe to do
680 interoption dependency checks. */
682 if (opt.reclevel == 0)
683 opt.reclevel = INFINITE_RECURSION; /* see wget.h for commentary on this */
685 if (opt.page_requisites && !opt.recursive)
687 opt.recursive = TRUE;
689 if (!opt.no_dirstruct)
690 opt.dirstruct = TRUE; /* usually handled by cmd_spec_recursive() */
693 if (opt.verbose == -1)
694 opt.verbose = !opt.quiet;
696 /* Retain compatibility with previous scripts.
697 if wait has been set, but waitretry has not, give it the wait value.
698 A simple check on the values is not enough, I could have set
699 wait to n>0 and waitretry to 0 - HEH */
702 char opt_wait_str[256]; /* bigger than needed buf to prevent overflow */
704 sprintf(opt_wait_str, "%ld", opt.wait);
705 setval ("waitretry", opt_wait_str);
709 if (opt.verbose && opt.quiet)
711 printf (_("Can't be verbose and quiet at the same time.\n"));
715 if (opt.timestamping && opt.noclobber)
718 Can't timestamp and not clobber old files at the same time.\n"));
722 nurl = argc - optind;
723 if (!nurl && !opt.input_filename)
725 /* No URL specified. */
726 printf (_("%s: missing URL\n"), exec_name);
729 /* #### Something nicer should be printed here -- similar to the
730 pre-1.5 `--help' page. */
731 printf (_("Try `%s --help' for more options.\n"), exec_name);
736 fork_to_background ();
738 /* Allocate basic pointer. */
739 url = ALLOCA_ARRAY (char *, nurl + 1);
740 /* Fill in the arguments. */
741 for (i = 0; i < nurl; i++, optind++)
743 char *rewritten = rewrite_url_maybe (argv[optind]);
746 printf ("Converted %s to %s\n", argv[optind], rewritten);
750 url[i] = xstrdup (argv[optind]);
754 /* Change the title of console window on Windows. #### I think this
755 statement should belong to retrieve_url(). --hniksic. */
757 ws_changetitle (*url, nurl);
760 /* Initialize logging. */
761 log_init (opt.lfilename, append_to_log);
763 DEBUGP (("DEBUG output created by Wget %s on %s.\n\n", version_string,
766 /* Open the output filename if necessary. */
767 if (opt.output_document)
769 if (HYPHENP (opt.output_document))
774 opt.dfp = fopen (opt.output_document, opt.always_rest ? "ab" : "wb");
777 perror (opt.output_document);
780 if (fstat (fileno (opt.dfp), &st) == 0 && S_ISREG (st.st_mode))
781 opt.od_known_regular = 1;
789 /* Setup the signal handler to redirect output when hangup is
792 if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
793 signal(SIGHUP, redirect_output_signal);
794 /* ...and do the same for SIGUSR1. */
795 signal (SIGUSR1, redirect_output_signal);
796 /* Writing to a closed socket normally signals SIGPIPE, and the
797 process exits. What we want is to ignore SIGPIPE and just check
798 for the return value of write(). */
799 signal (SIGPIPE, SIG_IGN);
800 #endif /* HAVE_SIGNAL */
802 status = RETROK; /* initialize it, just-in-case */
804 /* Retrieve the URLs from argument list. */
805 for (t = url; *t; t++)
807 char *filename, *redirected_URL;
810 status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt);
811 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
812 status = recursive_retrieve (filename,
813 redirected_URL ? redirected_URL : *t);
815 if (opt.delete_after && file_exists_p(filename))
817 DEBUGP (("Removing file due to --delete-after in main():\n"));
818 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
819 if (unlink (filename))
820 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
823 FREE_MAYBE (redirected_URL);
824 FREE_MAYBE (filename);
827 /* And then from the input file, if any. */
828 if (opt.input_filename)
831 status = retrieve_from_file (opt.input_filename, opt.force_html, &count);
833 logprintf (LOG_NOTQUIET, _("No URLs found in %s.\n"),
836 /* Print the downloaded sum. */
839 || (opt.input_filename && opt.downloaded != 0))
841 logprintf (LOG_NOTQUIET,
842 _("\nFINISHED --%s--\nDownloaded: %s bytes in %d files\n"),
844 (opt.downloaded_overflow ?
845 "<overflow>" : legible_very_long (opt.downloaded)),
847 /* Print quota warning, if exceeded. */
848 if (downloaded_exceeds_quota ())
849 logprintf (LOG_NOTQUIET,
850 _("Download quota (%s bytes) EXCEEDED!\n"),
851 legible (opt.quota));
854 if (opt.cookies_output)
855 save_cookies (opt.cookies_output);
857 if (opt.convert_links && !opt.delete_after)
859 convert_all_links ();
862 for (i = 0; i < nurl; i++)
866 print_malloc_debug_stats ();
868 if (status == RETROK)
874 /* Hangup signal handler. When wget receives SIGHUP or SIGUSR1, it
875 will proceed operation as usual, trying to write into a log file.
876 If that is impossible, the output will be turned off. */
880 redirect_output_signal (int sig)
883 signal (sig, redirect_output_signal);
884 /* Please note that the double `%' in `%%s' is intentional, because
885 redirect_output passes tmp through printf. */
886 sprintf (tmp, _("%s received, redirecting output to `%%s'.\n"),
887 (sig == SIGHUP ? "SIGHUP" :
888 (sig == SIGUSR1 ? "SIGUSR1" :
890 redirect_output (tmp);
892 #endif /* HAVE_SIGNAL */