1 /* Command line parsing.
2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
27 #endif /* HAVE_UNISTD_H */
28 #include <sys/types.h>
33 #endif /* HAVE_STRING_H */
40 #endif /* HAVE_LOCALE_H */
44 #define OPTIONS_DEFINED_HERE /* for options.h */
55 #ifndef PATH_SEPARATOR
56 # define PATH_SEPARATOR '/'
59 extern char *version_string;
68 void log_init PARAMS ((const char *, int));
69 void log_close PARAMS ((void));
70 void redirect_output PARAMS ((const char *));
72 static RETSIGTYPE redirect_output_signal PARAMS ((int));
74 const char *exec_name;
76 /* Initialize I18N. The initialization amounts to invoking
77 setlocale(), bindtextdomain() and textdomain().
78 Does nothing if NLS is disabled or missing. */
80 i18n_initialize (void)
82 /* If HAVE_NLS is defined, assume the existence of the three
83 functions invoked here. */
85 /* Set the current locale. */
86 /* Here we use LC_MESSAGES instead of LC_ALL, for two reasons.
87 First, message catalogs are all of I18N Wget uses anyway.
88 Second, setting LC_ALL has a dangerous potential of messing
89 things up. For example, when in a foreign locale, Solaris
90 strptime() fails to handle international dates correctly, which
91 makes http_atotm() malfunction. */
93 setlocale (LC_MESSAGES, "");
94 setlocale (LC_CTYPE, "");
96 setlocale (LC_ALL, "");
98 /* Set the text message domain. */
99 bindtextdomain ("wget", LOCALEDIR);
101 #endif /* HAVE_NLS */
104 /* It's kosher to declare these here because their interface _has_ to
105 be void foo(void). */
106 void host_init PARAMS ((void));
108 /* This just calls the various initialization functions from the
109 modules that need one-time initialization. */
111 private_initialize (void)
116 /* Print the usage message. */
120 printf (_("Usage: %s [OPTION]... [URL]...\n"), exec_name);
123 /* Print the help message, describing all the available options. If
124 you add an option, be sure to update this list. */
128 printf (_("GNU Wget %s, a non-interactive network retriever.\n"),
131 /* Had to split this in parts, so the #@@#%# Ultrix compiler and cpp
132 don't bitch. Also, it makes translation much easier. */
135 Mandatory arguments to long options are mandatory for short options too.\n\
139 -V, --version display the version of Wget and exit.\n\
140 -h, --help print this help.\n\
141 -b, --background go to background after startup.\n\
142 -e, --execute=COMMAND execute a `.wgetrc\'-style command.\n\
145 Logging and input file:\n\
146 -o, --output-file=FILE log messages to FILE.\n\
147 -a, --append-output=FILE append messages to FILE.\n\
148 -d, --debug print debug output.\n\
149 -q, --quiet quiet (no output).\n\
150 -v, --verbose be verbose (this is the default).\n\
151 -nv, --non-verbose turn off verboseness, without being quiet.\n\
152 -i, --input-file=FILE download URLs found in FILE.\n\
153 -F, --force-html treat input file as HTML.\n\
154 -B, --base=URL prepends URL to relative links in -F -i file.\n\
155 --sslcertfile=FILE optional client certificate.\n\
156 --sslcertkey=KEYFILE optional keyfile for this certificate.\n\
160 --bind-address=ADDRESS bind to ADDRESS (hostname or IP) on local host.\n\
161 -t, --tries=NUMBER set number of retries to NUMBER (0 unlimits).\n\
162 -O --output-document=FILE write documents to FILE.\n\
163 -nc, --no-clobber don\'t clobber existing files or use .# suffixes.\n\
164 -c, --continue resume getting a partially-downloaded file.\n\
165 --dot-style=STYLE set retrieval display style.\n\
166 -N, --timestamping don\'t re-retrieve files unless newer than local.\n\
167 -S, --server-response print server response.\n\
168 --spider don\'t download anything.\n\
169 -T, --timeout=SECONDS set the read timeout to SECONDS.\n\
170 -w, --wait=SECONDS wait SECONDS between retrievals.\n\
171 --waitretry=SECONDS wait 1...SECONDS between retries of a retrieval.\n\
172 -Y, --proxy=on/off turn proxy on or off.\n\
173 -Q, --quota=NUMBER set retrieval quota to NUMBER.\n\
177 -nd --no-directories don\'t create directories.\n\
178 -x, --force-directories force creation of directories.\n\
179 -nH, --no-host-directories don\'t create host directories.\n\
180 -P, --directory-prefix=PREFIX save files to PREFIX/...\n\
181 --cut-dirs=NUMBER ignore NUMBER remote directory components.\n\
185 --http-user=USER set http user to USER.\n\
186 --http-passwd=PASS set http password to PASS.\n\
187 -C, --cache=on/off (dis)allow server-cached data (normally allowed).\n\
188 -E, --html-extension save all text/html documents with .html extension.\n\
189 --ignore-length ignore `Content-Length\' header field.\n\
190 --header=STRING insert STRING among the headers.\n\
191 --proxy-user=USER set USER as proxy username.\n\
192 --proxy-passwd=PASS set PASS as proxy password.\n\
193 --referer=URL include `Referer: URL\' header in HTTP request.\n\
194 -s, --save-headers save the HTTP headers to file.\n\
195 -U, --user-agent=AGENT identify as AGENT instead of Wget/VERSION.\n\
196 --no-http-keep-alive disable HTTP keep-alive (persistent connections).\n\
197 --cookies=off don't use cookies.\n\
198 --load-cookies=FILE load cookies from FILE before session.\n\
199 --save-cookies=FILE save cookies to FILE after session.\n\
203 -nr, --dont-remove-listing don\'t remove `.listing\' files.\n\
204 -g, --glob=on/off turn file name globbing on or off.\n\
205 --passive-ftp use the \"passive\" transfer mode.\n\
206 --retr-symlinks when recursing, get linked-to files (not dirs).\n\
209 Recursive retrieval:\n\
210 -r, --recursive recursive web-suck -- use with care!\n\
211 -l, --level=NUMBER maximum recursion depth (inf or 0 for infinite).\n\
212 --delete-after delete files locally after downloading them.\n\
213 -k, --convert-links convert non-relative links to relative.\n\
214 -K, --backup-converted before converting file X, back up as X.orig.\n\
215 -m, --mirror shortcut option equivalent to -r -N -l inf -nr.\n\
216 -p, --page-requisites get all images, etc. needed to display HTML page.\n\
219 Recursive accept/reject:\n\
220 -A, --accept=LIST comma-separated list of accepted extensions.\n\
221 -R, --reject=LIST comma-separated list of rejected extensions.\n\
222 -D, --domains=LIST comma-separated list of accepted domains.\n\
223 --exclude-domains=LIST comma-separated list of rejected domains.\n\
224 --follow-ftp follow FTP links from HTML documents.\n\
225 --follow-tags=LIST comma-separated list of followed HTML tags.\n\
226 -G, --ignore-tags=LIST comma-separated list of ignored HTML tags.\n\
227 -H, --span-hosts go to foreign hosts when recursive.\n\
228 -L, --relative follow relative links only.\n\
229 -I, --include-directories=LIST list of allowed directories.\n\
230 -X, --exclude-directories=LIST list of excluded directories.\n\
231 -nh, --no-host-lookup don\'t DNS-lookup hosts.\n\
232 -np, --no-parent don\'t ascend to the parent directory.\n\
234 fputs (_("Mail bug reports and suggestions to <bug-wget@gnu.org>.\n"),
239 main (int argc, char *const *argv)
242 int i, c, nurl, status, append_to_log;
245 static struct option long_options[] =
247 /* Options without arguments: */
248 { "background", no_argument, NULL, 'b' },
249 { "backup-converted", no_argument, NULL, 'K' },
250 { "continue", no_argument, NULL, 'c' },
251 { "convert-links", no_argument, NULL, 'k' },
252 { "debug", no_argument, NULL, 'd' },
253 { "delete-after", no_argument, NULL, 136 },
254 { "dont-remove-listing", no_argument, NULL, 149 },
255 { "follow-ftp", no_argument, NULL, 142 },
256 { "force-directories", no_argument, NULL, 'x' },
257 { "force-hier", no_argument, NULL, 'x' }, /* obsolete */
258 { "force-html", no_argument, NULL, 'F'},
259 { "help", no_argument, NULL, 'h' },
260 { "html-extension", no_argument, NULL, 'E' },
261 { "ignore-length", no_argument, NULL, 138 },
262 { "mirror", no_argument, NULL, 'm' },
263 { "no-clobber", no_argument, NULL, 141 },
264 { "no-directories", no_argument, NULL, 147 },
265 { "no-host-directories", no_argument, NULL, 148 },
266 { "no-host-lookup", no_argument, NULL, 150 },
267 { "no-http-keep-alive", no_argument, NULL, 156 },
268 { "no-parent", no_argument, NULL, 133 },
269 { "non-verbose", no_argument, NULL, 146 },
270 { "passive-ftp", no_argument, NULL, 139 },
271 { "page-requisites", no_argument, NULL, 'p' },
272 { "quiet", no_argument, NULL, 'q' },
273 { "recursive", no_argument, NULL, 'r' },
274 { "relative", no_argument, NULL, 'L' },
275 { "retr-symlinks", no_argument, NULL, 137 },
276 { "save-headers", no_argument, NULL, 's' },
277 { "server-response", no_argument, NULL, 'S' },
278 { "span-hosts", no_argument, NULL, 'H' },
279 { "spider", no_argument, NULL, 132 },
280 { "timestamping", no_argument, NULL, 'N' },
281 { "verbose", no_argument, NULL, 'v' },
282 { "version", no_argument, NULL, 'V' },
284 /* Options accepting an argument: */
285 { "accept", required_argument, NULL, 'A' },
286 { "append-output", required_argument, NULL, 'a' },
287 { "backups", required_argument, NULL, 151 }, /* undocumented */
288 { "base", required_argument, NULL, 'B' },
289 { "bind-address", required_argument, NULL, 155 },
290 { "cache", required_argument, NULL, 'C' },
291 { "cookies", required_argument, NULL, 160 },
292 { "cut-dirs", required_argument, NULL, 145 },
293 { "directory-prefix", required_argument, NULL, 'P' },
294 { "domains", required_argument, NULL, 'D' },
295 { "dot-style", required_argument, NULL, 134 },
296 { "execute", required_argument, NULL, 'e' },
297 { "exclude-directories", required_argument, NULL, 'X' },
298 { "exclude-domains", required_argument, NULL, 140 },
299 { "follow-tags", required_argument, NULL, 153 },
300 { "glob", required_argument, NULL, 'g' },
301 { "header", required_argument, NULL, 131 },
302 { "htmlify", required_argument, NULL, 135 },
303 { "http-passwd", required_argument, NULL, 130 },
304 { "http-user", required_argument, NULL, 129 },
305 { "ignore-tags", required_argument, NULL, 'G' },
306 { "include-directories", required_argument, NULL, 'I' },
307 { "input-file", required_argument, NULL, 'i' },
308 { "level", required_argument, NULL, 'l' },
309 { "load-cookies", required_argument, NULL, 161 },
310 { "no", required_argument, NULL, 'n' },
311 { "output-document", required_argument, NULL, 'O' },
312 { "output-file", required_argument, NULL, 'o' },
313 { "proxy", required_argument, NULL, 'Y' },
314 { "proxy-passwd", required_argument, NULL, 144 },
315 { "proxy-user", required_argument, NULL, 143 },
316 { "quota", required_argument, NULL, 'Q' },
317 { "reject", required_argument, NULL, 'R' },
318 { "save-cookies", required_argument, NULL, 162 },
319 { "timeout", required_argument, NULL, 'T' },
320 { "tries", required_argument, NULL, 't' },
321 { "user-agent", required_argument, NULL, 'U' },
322 { "referer", required_argument, NULL, 157 },
323 { "use-proxy", required_argument, NULL, 'Y' },
325 { "sslcertfile", required_argument, NULL, 158 },
326 { "sslcertkey", required_argument, NULL, 159 },
327 #endif /* HAVE_SSL */
328 { "wait", required_argument, NULL, 'w' },
329 { "waitretry", required_argument, NULL, 152 },
334 private_initialize ();
338 /* Construct the name of the executable, without the directory part. */
339 exec_name = strrchr (argv[0], PATH_SEPARATOR);
346 windows_main_junk (&argc, (char **) argv, (char **) &exec_name);
349 initialize (); /* sets option defaults; reads the system wgetrc and .wgetrc */
351 /* [Is the order of the option letters significant? If not, they should be
352 alphabetized, like the long_options. The only thing I know for sure is
353 that the options with required arguments must be followed by a ':'.
354 -- Dan Harkless <wget@harkless.org>] */
355 while ((c = getopt_long (argc, argv, "\
356 hpVqvdkKsxmNWrHSLcFbEY:G:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:C:",
357 long_options, (int *)0)) != EOF)
361 /* Options without arguments: */
363 setval ("spider", "on");
366 setval ("noparent", "on");
369 setval ("deleteafter", "on");
372 setval ("retrsymlinks", "on");
375 setval ("ignorelength", "on");
378 setval ("passiveftp", "on");
381 setval ("noclobber", "on");
384 setval ("followftp", "on");
387 setval ("cutdirs", optarg);
390 setval ("verbose", "off");
393 setval ("dirstruct", "off");
396 setval ("addhostdir", "off");
399 setval ("removelisting", "off");
402 setval ("simplehostcheck", "on");
405 setval ("bindaddress", optarg);
408 setval ("httpkeepalive", "off");
411 setval ("background", "on");
414 setval ("continue", "on");
418 setval ("debug", "on");
419 #else /* not DEBUG */
420 fprintf (stderr, _("%s: debug support not compiled in.\n"),
422 #endif /* not DEBUG */
425 setval ("htmlextension", "on");
428 setval ("forcehtml", "on");
431 setval ("spanhosts", "on");
441 setval ("backupconverted", "on");
444 setval ("convertlinks", "on");
447 setval ("relativeonly", "on");
450 setval ("mirror", "on");
453 setval ("timestamping", "on");
456 setval ("pagerequisites", "on");
459 setval ("serverresponse", "on");
462 setval ("saveheaders", "on");
465 setval ("quiet", "on");
468 setval ("recursive", "on");
471 printf ("GNU Wget %s\n\n", version_string);
473 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.\n\
474 This program is distributed in the hope that it will be useful,\n\
475 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
476 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
477 GNU General Public License for more details.\n"));
478 printf (_("\nOriginally written by Hrvoje Niksic <hniksic@arsdigita.com>.\n"));
482 setval ("verbose", "on");
485 setval ("dirstruct", "on");
488 /* Options accepting an argument: */
490 setval ("httpuser", optarg);
493 setval ("httppasswd", optarg);
496 setval ("header", optarg);
499 setval ("dotstyle", optarg);
502 setval ("htmlify", optarg);
505 setval ("excludedomains", optarg);
508 setval ("proxyuser", optarg);
511 setval ("proxypasswd", optarg);
514 setval ("backups", optarg);
517 setval ("waitretry", optarg);
521 setval ("followtags", optarg);
524 setval ("cookies", optarg);
527 setval ("loadcookies", optarg);
530 setval ("savecookies", optarg);
533 setval ("referer", optarg);
537 setval ("sslcertfile", optarg);
540 setval ("sslcertkey", optarg);
542 #endif /* HAVE_SSL */
544 setval ("accept", optarg);
547 setval ("logfile", optarg);
551 setval ("base", optarg);
554 setval ("cache", optarg);
557 setval ("domains", optarg);
562 if (parse_line (optarg, &com, &val))
564 if (!setval (com, val))
569 fprintf (stderr, _("%s: %s: invalid command\n"), exec_name,
578 setval ("ignoretags", optarg);
581 setval ("glob", optarg);
584 setval ("includedirectories", optarg);
587 setval ("input", optarg);
590 setval ("reclevel", optarg);
594 /* #### The n? options are utter crock! */
597 for (p = optarg; *p; p++)
601 setval ("verbose", "off");
604 setval ("simplehostcheck", "on");
607 setval ("addhostdir", "off");
610 setval ("dirstruct", "off");
613 setval ("noclobber", "on");
616 setval ("removelisting", "off");
619 setval ("noparent", "on");
622 setval ("httpkeepalive", "off");
625 printf (_("%s: illegal option -- `-n%c'\n"), exec_name, *p);
628 printf (_("Try `%s --help\' for more options.\n"), exec_name);
634 setval ("outputdocument", optarg);
637 setval ("logfile", optarg);
640 setval ("dirprefix", optarg);
643 setval ("quota", optarg);
646 setval ("reject", optarg);
649 setval ("timeout", optarg);
652 setval ("tries", optarg);
655 setval ("useragent", optarg);
658 setval ("wait", optarg);
661 setval ("excludedirectories", optarg);
664 setval ("useproxy", optarg);
670 printf (_("Try `%s --help' for more options.\n"), exec_name);
676 /* All user options have now been processed, so it's now safe to do
677 interoption dependency checks. */
679 if (opt.reclevel == 0)
680 opt.reclevel = INFINITE_RECURSION; /* see wget.h for commentary on this */
682 if (opt.page_requisites && !opt.recursive)
684 opt.recursive = TRUE;
686 if (!opt.no_dirstruct)
687 opt.dirstruct = TRUE; /* usually handled by cmd_spec_recursive() */
690 if (opt.verbose == -1)
691 opt.verbose = !opt.quiet;
693 /* Retain compatibility with previous scripts.
694 if wait has been set, but waitretry has not, give it the wait value.
695 A simple check on the values is not enough, I could have set
696 wait to n>0 and waitretry to 0 - HEH */
699 char opt_wait_str[256]; /* bigger than needed buf to prevent overflow */
701 sprintf(opt_wait_str, "%ld", opt.wait);
702 setval ("waitretry", opt_wait_str);
706 if (opt.verbose && opt.quiet)
708 printf (_("Can't be verbose and quiet at the same time.\n"));
712 if (opt.timestamping && opt.noclobber)
715 Can't timestamp and not clobber old files at the same time.\n"));
719 nurl = argc - optind;
720 if (!nurl && !opt.input_filename)
722 /* No URL specified. */
723 printf (_("%s: missing URL\n"), exec_name);
726 /* #### Something nicer should be printed here -- similar to the
727 pre-1.5 `--help' page. */
728 printf (_("Try `%s --help' for more options.\n"), exec_name);
733 fork_to_background ();
735 /* Allocate basic pointer. */
736 url = ALLOCA_ARRAY (char *, nurl + 1);
737 /* Fill in the arguments. */
738 for (i = 0; i < nurl; i++, optind++)
740 char *irix4_cc_needs_this;
741 STRDUP_ALLOCA (irix4_cc_needs_this, argv[optind]);
742 url[i] = irix4_cc_needs_this;
746 /* Change the title of console window on Windows. #### I think this
747 statement should belong to retrieve_url(). --hniksic. */
749 ws_changetitle (*url, nurl);
752 /* Initialize logging. */
753 log_init (opt.lfilename, append_to_log);
755 DEBUGP (("DEBUG output created by Wget %s on %s.\n\n", version_string,
758 /* Open the output filename if necessary. */
759 if (opt.output_document)
761 if (HYPHENP (opt.output_document))
766 opt.dfp = fopen (opt.output_document, opt.always_rest ? "ab" : "wb");
769 perror (opt.output_document);
772 if (fstat (fileno (opt.dfp), &st) == 0 && S_ISREG (st.st_mode))
773 opt.od_known_regular = 1;
781 /* Setup the signal handler to redirect output when hangup is
784 if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
785 signal(SIGHUP, redirect_output_signal);
786 /* ...and do the same for SIGUSR1. */
787 signal (SIGUSR1, redirect_output_signal);
788 /* Writing to a closed socket normally signals SIGPIPE, and the
789 process exits. What we want is to ignore SIGPIPE and just check
790 for the return value of write(). */
791 signal (SIGPIPE, SIG_IGN);
792 #endif /* HAVE_SIGNAL */
794 status = RETROK; /* initialize it, just-in-case */
796 /* Retrieve the URLs from argument list. */
797 for (t = url; *t; t++)
799 char *filename, *redirected_URL;
802 status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt);
803 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
804 status = recursive_retrieve (filename,
805 redirected_URL ? redirected_URL : *t);
807 if (opt.delete_after && file_exists_p(filename))
809 DEBUGP (("Removing file due to --delete-after in main():\n"));
810 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
811 if (unlink (filename))
812 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
815 FREE_MAYBE (redirected_URL);
816 FREE_MAYBE (filename);
819 /* And then from the input file, if any. */
820 if (opt.input_filename)
823 status = retrieve_from_file (opt.input_filename, opt.force_html, &count);
825 logprintf (LOG_NOTQUIET, _("No URLs found in %s.\n"),
828 /* Print the downloaded sum. */
831 || (opt.input_filename && opt.downloaded != 0))
833 logprintf (LOG_NOTQUIET,
834 _("\nFINISHED --%s--\nDownloaded: %s bytes in %d files\n"),
836 (opt.downloaded_overflow ?
837 "<overflow>" : legible_very_long (opt.downloaded)),
839 /* Print quota warning, if exceeded. */
840 if (downloaded_exceeds_quota ())
841 logprintf (LOG_NOTQUIET,
842 _("Download quota (%s bytes) EXCEEDED!\n"),
843 legible (opt.quota));
846 if (opt.cookies_output)
847 save_cookies (opt.cookies_output);
849 if (opt.convert_links && !opt.delete_after)
851 convert_all_links ();
856 print_malloc_debug_stats ();
858 if (status == RETROK)
864 /* Hangup signal handler. When wget receives SIGHUP or SIGUSR1, it
865 will proceed operation as usual, trying to write into a log file.
866 If that is impossible, the output will be turned off. */
870 redirect_output_signal (int sig)
873 signal (sig, redirect_output_signal);
874 /* Please note that the double `%' in `%%s' is intentional, because
875 redirect_output passes tmp through printf. */
876 sprintf (tmp, _("%s received, redirecting output to `%%s'.\n"),
877 (sig == SIGHUP ? "SIGHUP" :
878 (sig == SIGUSR1 ? "SIGUSR1" :
880 redirect_output (tmp);
882 #endif /* HAVE_SIGNAL */