1 /* Command line parsing.
2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
27 #endif /* HAVE_UNISTD_H */
28 #include <sys/types.h>
33 #endif /* HAVE_STRING_H */
40 #endif /* HAVE_LOCALE_H */
44 #define OPTIONS_DEFINED_HERE /* for options.h */
55 /* On GNU system this will include system-wide getopt.h. */
58 #ifndef PATH_SEPARATOR
59 # define PATH_SEPARATOR '/'
62 extern char *version_string;
71 void log_init PARAMS ((const char *, int));
72 void log_close PARAMS ((void));
73 void redirect_output PARAMS ((const char *));
75 static RETSIGTYPE redirect_output_signal PARAMS ((int));
77 const char *exec_name;
79 /* Initialize I18N. The initialization amounts to invoking
80 setlocale(), bindtextdomain() and textdomain().
81 Does nothing if NLS is disabled or missing. */
83 i18n_initialize (void)
85 /* If HAVE_NLS is defined, assume the existence of the three
86 functions invoked here. */
88 /* Set the current locale. */
89 /* Here we use LC_MESSAGES instead of LC_ALL, for two reasons.
90 First, message catalogs are all of I18N Wget uses anyway.
91 Second, setting LC_ALL has a dangerous potential of messing
92 things up. For example, when in a foreign locale, Solaris
93 strptime() fails to handle international dates correctly, which
94 makes http_atotm() malfunction. */
96 setlocale (LC_MESSAGES, "");
97 setlocale (LC_CTYPE, "");
99 setlocale (LC_ALL, "");
101 /* Set the text message domain. */
102 bindtextdomain ("wget", LOCALEDIR);
104 #endif /* HAVE_NLS */
107 /* It's kosher to declare these here because their interface _has_ to
108 be void foo(void). */
109 void host_init PARAMS ((void));
111 /* This just calls the various initialization functions from the
112 modules that need one-time initialization. */
114 private_initialize (void)
119 /* Print the usage message. */
123 printf (_("Usage: %s [OPTION]... [URL]...\n"), exec_name);
126 /* Print the help message, describing all the available options. If
127 you add an option, be sure to update this list. */
131 printf (_("GNU Wget %s, a non-interactive network retriever.\n"),
134 /* Had to split this in parts, so the #@@#%# Ultrix compiler and cpp
135 don't bitch. Also, it makes translation much easier. */
138 Mandatory arguments to long options are mandatory for short options too.\n\
142 -V, --version display the version of Wget and exit.\n\
143 -h, --help print this help.\n\
144 -b, --background go to background after startup.\n\
145 -e, --execute=COMMAND execute a `.wgetrc\'-style command.\n\
148 Logging and input file:\n\
149 -o, --output-file=FILE log messages to FILE.\n\
150 -a, --append-output=FILE append messages to FILE.\n\
151 -d, --debug print debug output.\n\
152 -q, --quiet quiet (no output).\n\
153 -v, --verbose be verbose (this is the default).\n\
154 -nv, --non-verbose turn off verboseness, without being quiet.\n\
155 -i, --input-file=FILE download URLs found in FILE.\n\
156 -F, --force-html treat input file as HTML.\n\
157 -B, --base=URL prepends URL to relative links in -F -i file.\n\
158 --sslcertfile=FILE optional client certificate.\n\
159 --sslcertkey=KEYFILE optional keyfile for this certificate.\n\
163 --bind-address=ADDRESS bind to ADDRESS (hostname or IP) on local host.\n\
164 -t, --tries=NUMBER set number of retries to NUMBER (0 unlimits).\n\
165 -O --output-document=FILE write documents to FILE.\n\
166 -nc, --no-clobber don\'t clobber existing files or use .# suffixes.\n\
167 -c, --continue resume getting a partially-downloaded file.\n\
168 --dot-style=STYLE set retrieval display style.\n\
169 -N, --timestamping don\'t re-retrieve files unless newer than local.\n\
170 -S, --server-response print server response.\n\
171 --spider don\'t download anything.\n\
172 -T, --timeout=SECONDS set the read timeout to SECONDS.\n\
173 -w, --wait=SECONDS wait SECONDS between retrievals.\n\
174 --waitretry=SECONDS wait 1...SECONDS between retries of a retrieval.\n\
175 -Y, --proxy=on/off turn proxy on or off.\n\
176 -Q, --quota=NUMBER set retrieval quota to NUMBER.\n\
180 -nd --no-directories don\'t create directories.\n\
181 -x, --force-directories force creation of directories.\n\
182 -nH, --no-host-directories don\'t create host directories.\n\
183 -P, --directory-prefix=PREFIX save files to PREFIX/...\n\
184 --cut-dirs=NUMBER ignore NUMBER remote directory components.\n\
188 --http-user=USER set http user to USER.\n\
189 --http-passwd=PASS set http password to PASS.\n\
190 -C, --cache=on/off (dis)allow server-cached data (normally allowed).\n\
191 -E, --html-extension save all text/html documents with .html extension.\n\
192 --ignore-length ignore `Content-Length\' header field.\n\
193 --header=STRING insert STRING among the headers.\n\
194 --proxy-user=USER set USER as proxy username.\n\
195 --proxy-passwd=PASS set PASS as proxy password.\n\
196 --referer=URL include `Referer: URL\' header in HTTP request.\n\
197 -s, --save-headers save the HTTP headers to file.\n\
198 -U, --user-agent=AGENT identify as AGENT instead of Wget/VERSION.\n\
199 --no-http-keep-alive disable HTTP keep-alive (persistent connections).\n\
200 --cookies=off don't use cookies.\n\
201 --load-cookies=FILE load cookies from FILE before session.\n\
202 --save-cookies=FILE save cookies to FILE after session.\n\
206 -nr, --dont-remove-listing don\'t remove `.listing\' files.\n\
207 -g, --glob=on/off turn file name globbing on or off.\n\
208 --passive-ftp use the \"passive\" transfer mode.\n\
209 --retr-symlinks when recursing, get linked-to files (not dirs).\n\
212 Recursive retrieval:\n\
213 -r, --recursive recursive web-suck -- use with care!\n\
214 -l, --level=NUMBER maximum recursion depth (inf or 0 for infinite).\n\
215 --delete-after delete files locally after downloading them.\n\
216 -k, --convert-links convert non-relative links to relative.\n\
217 -K, --backup-converted before converting file X, back up as X.orig.\n\
218 -m, --mirror shortcut option equivalent to -r -N -l inf -nr.\n\
219 -p, --page-requisites get all images, etc. needed to display HTML page.\n\
222 Recursive accept/reject:\n\
223 -A, --accept=LIST comma-separated list of accepted extensions.\n\
224 -R, --reject=LIST comma-separated list of rejected extensions.\n\
225 -D, --domains=LIST comma-separated list of accepted domains.\n\
226 --exclude-domains=LIST comma-separated list of rejected domains.\n\
227 --follow-ftp follow FTP links from HTML documents.\n\
228 --follow-tags=LIST comma-separated list of followed HTML tags.\n\
229 -G, --ignore-tags=LIST comma-separated list of ignored HTML tags.\n\
230 -H, --span-hosts go to foreign hosts when recursive.\n\
231 -L, --relative follow relative links only.\n\
232 -I, --include-directories=LIST list of allowed directories.\n\
233 -X, --exclude-directories=LIST list of excluded directories.\n\
234 -nh, --no-host-lookup don\'t DNS-lookup hosts.\n\
235 -np, --no-parent don\'t ascend to the parent directory.\n\
237 fputs (_("Mail bug reports and suggestions to <bug-wget@gnu.org>.\n"),
242 main (int argc, char *const *argv)
245 int i, c, nurl, status, append_to_log;
247 static struct option long_options[] =
249 /* Options without arguments: */
250 { "background", no_argument, NULL, 'b' },
251 { "backup-converted", no_argument, NULL, 'K' },
252 { "continue", no_argument, NULL, 'c' },
253 { "convert-links", no_argument, NULL, 'k' },
254 { "debug", no_argument, NULL, 'd' },
255 { "delete-after", no_argument, NULL, 136 },
256 { "dont-remove-listing", no_argument, NULL, 149 },
257 { "follow-ftp", no_argument, NULL, 142 },
258 { "force-directories", no_argument, NULL, 'x' },
259 { "force-hier", no_argument, NULL, 'x' }, /* obsolete */
260 { "force-html", no_argument, NULL, 'F'},
261 { "help", no_argument, NULL, 'h' },
262 { "html-extension", no_argument, NULL, 'E' },
263 { "ignore-length", no_argument, NULL, 138 },
264 { "mirror", no_argument, NULL, 'm' },
265 { "no-clobber", no_argument, NULL, 141 },
266 { "no-directories", no_argument, NULL, 147 },
267 { "no-host-directories", no_argument, NULL, 148 },
268 { "no-host-lookup", no_argument, NULL, 150 },
269 { "no-http-keep-alive", no_argument, NULL, 156 },
270 { "no-parent", no_argument, NULL, 133 },
271 { "non-verbose", no_argument, NULL, 146 },
272 { "passive-ftp", no_argument, NULL, 139 },
273 { "page-requisites", no_argument, NULL, 'p' },
274 { "quiet", no_argument, NULL, 'q' },
275 { "recursive", no_argument, NULL, 'r' },
276 { "relative", no_argument, NULL, 'L' },
277 { "retr-symlinks", no_argument, NULL, 137 },
278 { "save-headers", no_argument, NULL, 's' },
279 { "server-response", no_argument, NULL, 'S' },
280 { "span-hosts", no_argument, NULL, 'H' },
281 { "spider", no_argument, NULL, 132 },
282 { "timestamping", no_argument, NULL, 'N' },
283 { "verbose", no_argument, NULL, 'v' },
284 { "version", no_argument, NULL, 'V' },
286 /* Options accepting an argument: */
287 { "accept", required_argument, NULL, 'A' },
288 { "append-output", required_argument, NULL, 'a' },
289 { "backups", required_argument, NULL, 151 }, /* undocumented */
290 { "base", required_argument, NULL, 'B' },
291 { "bind-address", required_argument, NULL, 155 },
292 { "cache", required_argument, NULL, 'C' },
293 { "cookies", required_argument, NULL, 160 },
294 { "cut-dirs", required_argument, NULL, 145 },
295 { "directory-prefix", required_argument, NULL, 'P' },
296 { "domains", required_argument, NULL, 'D' },
297 { "dot-style", required_argument, NULL, 134 },
298 { "execute", required_argument, NULL, 'e' },
299 { "exclude-directories", required_argument, NULL, 'X' },
300 { "exclude-domains", required_argument, NULL, 140 },
301 { "follow-tags", required_argument, NULL, 153 },
302 { "glob", required_argument, NULL, 'g' },
303 { "header", required_argument, NULL, 131 },
304 { "htmlify", required_argument, NULL, 135 },
305 { "http-passwd", required_argument, NULL, 130 },
306 { "http-user", required_argument, NULL, 129 },
307 { "ignore-tags", required_argument, NULL, 'G' },
308 { "include-directories", required_argument, NULL, 'I' },
309 { "input-file", required_argument, NULL, 'i' },
310 { "level", required_argument, NULL, 'l' },
311 { "load-cookies", required_argument, NULL, 161 },
312 { "no", required_argument, NULL, 'n' },
313 { "output-document", required_argument, NULL, 'O' },
314 { "output-file", required_argument, NULL, 'o' },
315 { "proxy", required_argument, NULL, 'Y' },
316 { "proxy-passwd", required_argument, NULL, 144 },
317 { "proxy-user", required_argument, NULL, 143 },
318 { "quota", required_argument, NULL, 'Q' },
319 { "reject", required_argument, NULL, 'R' },
320 { "save-cookies", required_argument, NULL, 162 },
321 { "timeout", required_argument, NULL, 'T' },
322 { "tries", required_argument, NULL, 't' },
323 { "user-agent", required_argument, NULL, 'U' },
324 { "referer", required_argument, NULL, 157 },
325 { "use-proxy", required_argument, NULL, 'Y' },
327 { "sslcertfile", required_argument, NULL, 158 },
328 { "sslcertkey", required_argument, NULL, 159 },
329 #endif /* HAVE_SSL */
330 { "wait", required_argument, NULL, 'w' },
331 { "waitretry", required_argument, NULL, 152 },
336 private_initialize ();
340 /* Construct the name of the executable, without the directory part. */
341 exec_name = strrchr (argv[0], PATH_SEPARATOR);
348 windows_main_junk (&argc, (char **) argv, (char **) &exec_name);
351 initialize (); /* sets option defaults; reads the system wgetrc and .wgetrc */
353 /* [Is the order of the option letters significant? If not, they should be
354 alphabetized, like the long_options. The only thing I know for sure is
355 that the options with required arguments must be followed by a ':'.
356 -- Dan Harkless <wget@harkless.org>] */
357 while ((c = getopt_long (argc, argv, "\
358 hpVqvdkKsxmNWrHSLcFbEY:G:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:C:",
359 long_options, (int *)0)) != EOF)
363 /* Options without arguments: */
365 setval ("spider", "on");
368 setval ("noparent", "on");
371 setval ("deleteafter", "on");
374 setval ("retrsymlinks", "on");
377 setval ("ignorelength", "on");
380 setval ("passiveftp", "on");
383 setval ("noclobber", "on");
386 setval ("followftp", "on");
389 setval ("cutdirs", optarg);
392 setval ("verbose", "off");
395 setval ("dirstruct", "off");
398 setval ("addhostdir", "off");
401 setval ("removelisting", "off");
404 setval ("simplehostcheck", "on");
407 setval ("bindaddress", optarg);
410 setval ("httpkeepalive", "off");
413 setval ("background", "on");
416 setval ("continue", "on");
420 setval ("debug", "on");
421 #else /* not DEBUG */
422 fprintf (stderr, _("%s: debug support not compiled in.\n"),
424 #endif /* not DEBUG */
427 setval ("htmlextension", "on");
430 setval ("forcehtml", "on");
433 setval ("spanhosts", "on");
443 setval ("backupconverted", "on");
446 setval ("convertlinks", "on");
449 setval ("relativeonly", "on");
452 setval ("mirror", "on");
455 setval ("timestamping", "on");
458 setval ("pagerequisites", "on");
461 setval ("serverresponse", "on");
464 setval ("saveheaders", "on");
467 setval ("quiet", "on");
470 setval ("recursive", "on");
473 printf ("GNU Wget %s\n\n", version_string);
475 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.\n\
476 This program is distributed in the hope that it will be useful,\n\
477 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
478 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
479 GNU General Public License for more details.\n"));
480 printf (_("\nOriginally written by Hrvoje Niksic <hniksic@arsdigita.com>.\n"));
484 setval ("verbose", "on");
487 setval ("dirstruct", "on");
490 /* Options accepting an argument: */
492 setval ("httpuser", optarg);
495 setval ("httppasswd", optarg);
498 setval ("header", optarg);
501 setval ("dotstyle", optarg);
504 setval ("htmlify", optarg);
507 setval ("excludedomains", optarg);
510 setval ("proxyuser", optarg);
513 setval ("proxypasswd", optarg);
516 setval ("backups", optarg);
519 setval ("waitretry", optarg);
522 setval ("followtags", optarg);
525 setval ("cookies", optarg);
528 setval ("loadcookies", optarg);
531 setval ("savecookies", optarg);
534 setval ("referer", optarg);
538 setval ("sslcertfile", optarg);
541 setval ("sslcertkey", optarg);
543 #endif /* HAVE_SSL */
545 setval ("accept", optarg);
548 setval ("logfile", optarg);
552 setval ("base", optarg);
555 setval ("cache", optarg);
558 setval ("domains", optarg);
563 if (parse_line (optarg, &com, &val))
565 if (!setval (com, val))
570 fprintf (stderr, _("%s: %s: invalid command\n"), exec_name,
579 setval ("ignoretags", optarg);
582 setval ("glob", optarg);
585 setval ("includedirectories", optarg);
588 setval ("input", optarg);
591 setval ("reclevel", optarg);
595 /* #### The n? options are utter crock! */
598 for (p = optarg; *p; p++)
602 setval ("verbose", "off");
605 setval ("simplehostcheck", "on");
608 setval ("addhostdir", "off");
611 setval ("dirstruct", "off");
614 setval ("noclobber", "on");
617 setval ("removelisting", "off");
620 setval ("noparent", "on");
623 setval ("httpkeepalive", "off");
626 printf (_("%s: illegal option -- `-n%c'\n"), exec_name, *p);
629 printf (_("Try `%s --help\' for more options.\n"), exec_name);
635 setval ("outputdocument", optarg);
638 setval ("logfile", optarg);
641 setval ("dirprefix", optarg);
644 setval ("quota", optarg);
647 setval ("reject", optarg);
650 setval ("timeout", optarg);
653 setval ("tries", optarg);
656 setval ("useragent", optarg);
659 setval ("wait", optarg);
662 setval ("excludedirectories", optarg);
665 setval ("useproxy", optarg);
671 printf (_("Try `%s --help' for more options.\n"), exec_name);
677 /* All user options have now been processed, so it's now safe to do
678 interoption dependency checks. */
680 if (opt.reclevel == 0)
681 opt.reclevel = INFINITE_RECURSION; /* see wget.h for commentary on this */
683 if (opt.page_requisites && !opt.recursive)
685 opt.recursive = TRUE;
687 if (!opt.no_dirstruct)
688 opt.dirstruct = TRUE; /* usually handled by cmd_spec_recursive() */
691 if (opt.verbose == -1)
692 opt.verbose = !opt.quiet;
695 if (opt.verbose && opt.quiet)
697 printf (_("Can't be verbose and quiet at the same time.\n"));
701 if (opt.timestamping && opt.noclobber)
704 Can't timestamp and not clobber old files at the same time.\n"));
708 nurl = argc - optind;
709 if (!nurl && !opt.input_filename)
711 /* No URL specified. */
712 printf (_("%s: missing URL\n"), exec_name);
715 /* #### Something nicer should be printed here -- similar to the
716 pre-1.5 `--help' page. */
717 printf (_("Try `%s --help' for more options.\n"), exec_name);
722 fork_to_background ();
724 /* Allocate basic pointer. */
725 url = ALLOCA_ARRAY (char *, nurl + 1);
726 /* Fill in the arguments. */
727 for (i = 0; i < nurl; i++, optind++)
729 char *rewritten = rewrite_url_maybe (argv[optind]);
732 printf ("Converted %s to %s\n", argv[optind], rewritten);
736 url[i] = xstrdup (argv[optind]);
740 /* Change the title of console window on Windows. #### I think this
741 statement should belong to retrieve_url(). --hniksic. */
743 ws_changetitle (*url, nurl);
746 /* Initialize logging. */
747 log_init (opt.lfilename, append_to_log);
749 DEBUGP (("DEBUG output created by Wget %s on %s.\n\n", version_string,
752 /* Open the output filename if necessary. */
753 if (opt.output_document)
755 if (HYPHENP (opt.output_document))
760 opt.dfp = fopen (opt.output_document, opt.always_rest ? "ab" : "wb");
763 perror (opt.output_document);
766 if (fstat (fileno (opt.dfp), &st) == 0 && S_ISREG (st.st_mode))
767 opt.od_known_regular = 1;
775 /* Setup the signal handler to redirect output when hangup is
778 if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
779 signal(SIGHUP, redirect_output_signal);
780 /* ...and do the same for SIGUSR1. */
781 signal (SIGUSR1, redirect_output_signal);
782 /* Writing to a closed socket normally signals SIGPIPE, and the
783 process exits. What we want is to ignore SIGPIPE and just check
784 for the return value of write(). */
785 signal (SIGPIPE, SIG_IGN);
786 #endif /* HAVE_SIGNAL */
788 status = RETROK; /* initialize it, just-in-case */
790 /* Retrieve the URLs from argument list. */
791 for (t = url; *t; t++)
793 char *filename, *redirected_URL;
796 status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt);
797 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
798 status = recursive_retrieve (filename,
799 redirected_URL ? redirected_URL : *t);
801 if (opt.delete_after && file_exists_p(filename))
803 DEBUGP (("Removing file due to --delete-after in main():\n"));
804 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
805 if (unlink (filename))
806 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
809 FREE_MAYBE (redirected_URL);
810 FREE_MAYBE (filename);
813 /* And then from the input file, if any. */
814 if (opt.input_filename)
817 status = retrieve_from_file (opt.input_filename, opt.force_html, &count);
819 logprintf (LOG_NOTQUIET, _("No URLs found in %s.\n"),
822 /* Print the downloaded sum. */
825 || (opt.input_filename && opt.downloaded != 0))
827 logprintf (LOG_NOTQUIET,
828 _("\nFINISHED --%s--\nDownloaded: %s bytes in %d files\n"),
830 (opt.downloaded_overflow ?
831 "<overflow>" : legible_very_long (opt.downloaded)),
833 /* Print quota warning, if exceeded. */
834 if (downloaded_exceeds_quota ())
835 logprintf (LOG_NOTQUIET,
836 _("Download quota (%s bytes) EXCEEDED!\n"),
837 legible (opt.quota));
840 if (opt.cookies_output)
841 save_cookies (opt.cookies_output);
843 if (opt.convert_links && !opt.delete_after)
845 convert_all_links ();
848 for (i = 0; i < nurl; i++)
852 print_malloc_debug_stats ();
854 if (status == RETROK)
860 /* Hangup signal handler. When wget receives SIGHUP or SIGUSR1, it
861 will proceed operation as usual, trying to write into a log file.
862 If that is impossible, the output will be turned off. */
866 redirect_output_signal (int sig)
869 signal (sig, redirect_output_signal);
870 /* Please note that the double `%' in `%%s' is intentional, because
871 redirect_output passes tmp through printf. */
872 sprintf (tmp, _("%s received, redirecting output to `%%s'.\n"),
873 (sig == SIGHUP ? "SIGHUP" :
874 (sig == SIGUSR1 ? "SIGUSR1" :
876 redirect_output (tmp);
878 #endif /* HAVE_SIGNAL */