1 /* Command line parsing.
2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
27 #endif /* HAVE_UNISTD_H */
28 #include <sys/types.h>
33 #endif /* HAVE_STRING_H */
40 #endif /* HAVE_LOCALE_H */
44 #define OPTIONS_DEFINED_HERE /* for options.h */
54 /* On GNU system this will include system-wide getopt.h. */
57 #ifndef PATH_SEPARATOR
58 # define PATH_SEPARATOR '/'
61 extern char *version_string;
70 void log_init PARAMS ((const char *, int));
71 void log_close PARAMS ((void));
72 void redirect_output PARAMS ((const char *));
74 static RETSIGTYPE redirect_output_signal PARAMS ((int));
76 const char *exec_name;
78 /* Initialize I18N. The initialization amounts to invoking
79 setlocale(), bindtextdomain() and textdomain().
80 Does nothing if NLS is disabled or missing. */
82 i18n_initialize (void)
84 /* If HAVE_NLS is defined, assume the existence of the three
85 functions invoked here. */
87 /* Set the current locale. */
88 /* Here we use LC_MESSAGES instead of LC_ALL, for two reasons.
89 First, message catalogs are all of I18N Wget uses anyway.
90 Second, setting LC_ALL has a dangerous potential of messing
91 things up. For example, when in a foreign locale, Solaris
92 strptime() fails to handle international dates correctly, which
93 makes http_atotm() malfunction. */
95 setlocale (LC_MESSAGES, "");
96 setlocale (LC_CTYPE, "");
98 setlocale (LC_ALL, "");
100 /* Set the text message domain. */
101 bindtextdomain ("wget", LOCALEDIR);
103 #endif /* HAVE_NLS */
106 /* It's kosher to declare these here because their interface _has_ to
107 be void foo(void). */
108 void host_init PARAMS ((void));
110 /* This just calls the various initialization functions from the
111 modules that need one-time initialization. */
113 private_initialize (void)
118 /* Print the usage message. */
122 printf (_("Usage: %s [OPTION]... [URL]...\n"), exec_name);
125 /* Print the help message, describing all the available options. If
126 you add an option, be sure to update this list. */
130 printf (_("GNU Wget %s, a non-interactive network retriever.\n"),
133 /* Had to split this in parts, so the #@@#%# Ultrix compiler and cpp
134 don't bitch. Also, it makes translation much easier. */
137 Mandatory arguments to long options are mandatory for short options too.\n\
141 -V, --version display the version of Wget and exit.\n\
142 -h, --help print this help.\n\
143 -b, --background go to background after startup.\n\
144 -e, --execute=COMMAND execute a `.wgetrc\'-style command.\n\
147 Logging and input file:\n\
148 -o, --output-file=FILE log messages to FILE.\n\
149 -a, --append-output=FILE append messages to FILE.\n\
150 -d, --debug print debug output.\n\
151 -q, --quiet quiet (no output).\n\
152 -v, --verbose be verbose (this is the default).\n\
153 -nv, --non-verbose turn off verboseness, without being quiet.\n\
154 -i, --input-file=FILE download URLs found in FILE.\n\
155 -F, --force-html treat input file as HTML.\n\
156 -B, --base=URL prepends URL to relative links in -F -i file.\n\
157 --sslcertfile=FILE optional client certificate.\n\
158 --sslcertkey=KEYFILE optional keyfile for this certificate.\n\
162 --bind-address=ADDRESS bind to ADDRESS (hostname or IP) on local host.\n\
163 -t, --tries=NUMBER set number of retries to NUMBER (0 unlimits).\n\
164 -O --output-document=FILE write documents to FILE.\n\
165 -nc, --no-clobber don\'t clobber existing files or use .# suffixes.\n\
166 -c, --continue resume getting a partially-downloaded file.\n\
167 --dot-style=STYLE set retrieval display style.\n\
168 -N, --timestamping don\'t re-retrieve files unless newer than local.\n\
169 -S, --server-response print server response.\n\
170 --spider don\'t download anything.\n\
171 -T, --timeout=SECONDS set the read timeout to SECONDS.\n\
172 -w, --wait=SECONDS wait SECONDS between retrievals.\n\
173 --waitretry=SECONDS wait 1...SECONDS between retries of a retrieval.\n\
174 -Y, --proxy=on/off turn proxy on or off.\n\
175 -Q, --quota=NUMBER set retrieval quota to NUMBER.\n\
179 -nd --no-directories don\'t create directories.\n\
180 -x, --force-directories force creation of directories.\n\
181 -nH, --no-host-directories don\'t create host directories.\n\
182 -P, --directory-prefix=PREFIX save files to PREFIX/...\n\
183 --cut-dirs=NUMBER ignore NUMBER remote directory components.\n\
187 --http-user=USER set http user to USER.\n\
188 --http-passwd=PASS set http password to PASS.\n\
189 -C, --cache=on/off (dis)allow server-cached data (normally allowed).\n\
190 -E, --html-extension save all text/html documents with .html extension.\n\
191 --ignore-length ignore `Content-Length\' header field.\n\
192 --header=STRING insert STRING among the headers.\n\
193 --proxy-user=USER set USER as proxy username.\n\
194 --proxy-passwd=PASS set PASS as proxy password.\n\
195 --referer=URL include `Referer: URL\' header in HTTP request.\n\
196 -s, --save-headers save the HTTP headers to file.\n\
197 -U, --user-agent=AGENT identify as AGENT instead of Wget/VERSION.\n\
198 --no-http-keep-alive disable HTTP keep-alive (persistent connections).\n\
199 --cookies=off don't use cookies.\n\
200 --load-cookies=FILE load cookies from FILE before session.\n\
201 --save-cookies=FILE save cookies to FILE after session.\n\
205 -nr, --dont-remove-listing don\'t remove `.listing\' files.\n\
206 -g, --glob=on/off turn file name globbing on or off.\n\
207 --passive-ftp use the \"passive\" transfer mode.\n\
208 --retr-symlinks when recursing, get linked-to files (not dirs).\n\
211 Recursive retrieval:\n\
212 -r, --recursive recursive web-suck -- use with care!\n\
213 -l, --level=NUMBER maximum recursion depth (inf or 0 for infinite).\n\
214 --delete-after delete files locally after downloading them.\n\
215 -k, --convert-links convert non-relative links to relative.\n\
216 -K, --backup-converted before converting file X, back up as X.orig.\n\
217 -m, --mirror shortcut option equivalent to -r -N -l inf -nr.\n\
218 -p, --page-requisites get all images, etc. needed to display HTML page.\n\
221 Recursive accept/reject:\n\
222 -A, --accept=LIST comma-separated list of accepted extensions.\n\
223 -R, --reject=LIST comma-separated list of rejected extensions.\n\
224 -D, --domains=LIST comma-separated list of accepted domains.\n\
225 --exclude-domains=LIST comma-separated list of rejected domains.\n\
226 --follow-ftp follow FTP links from HTML documents.\n\
227 --follow-tags=LIST comma-separated list of followed HTML tags.\n\
228 -G, --ignore-tags=LIST comma-separated list of ignored HTML tags.\n\
229 -H, --span-hosts go to foreign hosts when recursive.\n\
230 -L, --relative follow relative links only.\n\
231 -I, --include-directories=LIST list of allowed directories.\n\
232 -X, --exclude-directories=LIST list of excluded directories.\n\
233 -nh, --no-host-lookup don\'t DNS-lookup hosts.\n\
234 -np, --no-parent don\'t ascend to the parent directory.\n\
236 fputs (_("Mail bug reports and suggestions to <bug-wget@gnu.org>.\n"),
241 main (int argc, char *const *argv)
244 int i, c, nurl, status, append_to_log;
247 static struct option long_options[] =
249 /* Options without arguments: */
250 { "background", no_argument, NULL, 'b' },
251 { "backup-converted", no_argument, NULL, 'K' },
252 { "continue", no_argument, NULL, 'c' },
253 { "convert-links", no_argument, NULL, 'k' },
254 { "debug", no_argument, NULL, 'd' },
255 { "delete-after", no_argument, NULL, 136 },
256 { "dont-remove-listing", no_argument, NULL, 149 },
257 { "follow-ftp", no_argument, NULL, 142 },
258 { "force-directories", no_argument, NULL, 'x' },
259 { "force-hier", no_argument, NULL, 'x' }, /* obsolete */
260 { "force-html", no_argument, NULL, 'F'},
261 { "help", no_argument, NULL, 'h' },
262 { "html-extension", no_argument, NULL, 'E' },
263 { "ignore-length", no_argument, NULL, 138 },
264 { "mirror", no_argument, NULL, 'm' },
265 { "no-clobber", no_argument, NULL, 141 },
266 { "no-directories", no_argument, NULL, 147 },
267 { "no-host-directories", no_argument, NULL, 148 },
268 { "no-host-lookup", no_argument, NULL, 150 },
269 { "no-http-keep-alive", no_argument, NULL, 156 },
270 { "no-parent", no_argument, NULL, 133 },
271 { "non-verbose", no_argument, NULL, 146 },
272 { "passive-ftp", no_argument, NULL, 139 },
273 { "page-requisites", no_argument, NULL, 'p' },
274 { "quiet", no_argument, NULL, 'q' },
275 { "recursive", no_argument, NULL, 'r' },
276 { "relative", no_argument, NULL, 'L' },
277 { "retr-symlinks", no_argument, NULL, 137 },
278 { "save-headers", no_argument, NULL, 's' },
279 { "server-response", no_argument, NULL, 'S' },
280 { "span-hosts", no_argument, NULL, 'H' },
281 { "spider", no_argument, NULL, 132 },
282 { "timestamping", no_argument, NULL, 'N' },
283 { "verbose", no_argument, NULL, 'v' },
284 { "version", no_argument, NULL, 'V' },
286 /* Options accepting an argument: */
287 { "accept", required_argument, NULL, 'A' },
288 { "append-output", required_argument, NULL, 'a' },
289 { "backups", required_argument, NULL, 151 }, /* undocumented */
290 { "base", required_argument, NULL, 'B' },
291 { "bind-address", required_argument, NULL, 155 },
292 { "cache", required_argument, NULL, 'C' },
293 { "cookies", required_argument, NULL, 160 },
294 { "cut-dirs", required_argument, NULL, 145 },
295 { "directory-prefix", required_argument, NULL, 'P' },
296 { "domains", required_argument, NULL, 'D' },
297 { "dot-style", required_argument, NULL, 134 },
298 { "execute", required_argument, NULL, 'e' },
299 { "exclude-directories", required_argument, NULL, 'X' },
300 { "exclude-domains", required_argument, NULL, 140 },
301 { "follow-tags", required_argument, NULL, 153 },
302 { "glob", required_argument, NULL, 'g' },
303 { "header", required_argument, NULL, 131 },
304 { "htmlify", required_argument, NULL, 135 },
305 { "http-passwd", required_argument, NULL, 130 },
306 { "http-user", required_argument, NULL, 129 },
307 { "ignore-tags", required_argument, NULL, 'G' },
308 { "include-directories", required_argument, NULL, 'I' },
309 { "input-file", required_argument, NULL, 'i' },
310 { "level", required_argument, NULL, 'l' },
311 { "load-cookies", required_argument, NULL, 161 },
312 { "no", required_argument, NULL, 'n' },
313 { "output-document", required_argument, NULL, 'O' },
314 { "output-file", required_argument, NULL, 'o' },
315 { "proxy", required_argument, NULL, 'Y' },
316 { "proxy-passwd", required_argument, NULL, 144 },
317 { "proxy-user", required_argument, NULL, 143 },
318 { "quota", required_argument, NULL, 'Q' },
319 { "reject", required_argument, NULL, 'R' },
320 { "save-cookies", required_argument, NULL, 162 },
321 { "timeout", required_argument, NULL, 'T' },
322 { "tries", required_argument, NULL, 't' },
323 { "user-agent", required_argument, NULL, 'U' },
324 { "referer", required_argument, NULL, 157 },
325 { "use-proxy", required_argument, NULL, 'Y' },
327 { "sslcertfile", required_argument, NULL, 158 },
328 { "sslcertkey", required_argument, NULL, 159 },
329 #endif /* HAVE_SSL */
330 { "wait", required_argument, NULL, 'w' },
331 { "waitretry", required_argument, NULL, 152 },
336 private_initialize ();
340 /* Construct the name of the executable, without the directory part. */
341 exec_name = strrchr (argv[0], PATH_SEPARATOR);
348 windows_main_junk (&argc, (char **) argv, (char **) &exec_name);
351 initialize (); /* sets option defaults; reads the system wgetrc and .wgetrc */
353 /* [Is the order of the option letters significant? If not, they should be
354 alphabetized, like the long_options. The only thing I know for sure is
355 that the options with required arguments must be followed by a ':'.
356 -- Dan Harkless <wget@harkless.org>] */
357 while ((c = getopt_long (argc, argv, "\
358 hpVqvdkKsxmNWrHSLcFbEY:G:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:C:",
359 long_options, (int *)0)) != EOF)
363 /* Options without arguments: */
365 setval ("spider", "on");
368 setval ("noparent", "on");
371 setval ("deleteafter", "on");
374 setval ("retrsymlinks", "on");
377 setval ("ignorelength", "on");
380 setval ("passiveftp", "on");
383 setval ("noclobber", "on");
386 setval ("followftp", "on");
389 setval ("cutdirs", optarg);
392 setval ("verbose", "off");
395 setval ("dirstruct", "off");
398 setval ("addhostdir", "off");
401 setval ("removelisting", "off");
404 setval ("simplehostcheck", "on");
407 setval ("bindaddress", optarg);
410 setval ("httpkeepalive", "off");
413 setval ("background", "on");
416 setval ("continue", "on");
420 setval ("debug", "on");
421 #else /* not DEBUG */
422 fprintf (stderr, _("%s: debug support not compiled in.\n"),
424 #endif /* not DEBUG */
427 setval ("htmlextension", "on");
430 setval ("forcehtml", "on");
433 setval ("spanhosts", "on");
443 setval ("backupconverted", "on");
446 setval ("convertlinks", "on");
449 setval ("relativeonly", "on");
452 setval ("mirror", "on");
455 setval ("timestamping", "on");
458 setval ("pagerequisites", "on");
461 setval ("serverresponse", "on");
464 setval ("saveheaders", "on");
467 setval ("quiet", "on");
470 setval ("recursive", "on");
473 printf ("GNU Wget %s\n\n", version_string);
475 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.\n\
476 This program is distributed in the hope that it will be useful,\n\
477 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
478 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
479 GNU General Public License for more details.\n"));
480 printf (_("\nOriginally written by Hrvoje Niksic <hniksic@arsdigita.com>.\n"));
484 setval ("verbose", "on");
487 setval ("dirstruct", "on");
490 /* Options accepting an argument: */
492 setval ("httpuser", optarg);
495 setval ("httppasswd", optarg);
498 setval ("header", optarg);
501 setval ("dotstyle", optarg);
504 setval ("htmlify", optarg);
507 setval ("excludedomains", optarg);
510 setval ("proxyuser", optarg);
513 setval ("proxypasswd", optarg);
516 setval ("backups", optarg);
519 setval ("waitretry", optarg);
523 setval ("followtags", optarg);
526 setval ("cookies", optarg);
529 setval ("loadcookies", optarg);
532 setval ("savecookies", optarg);
535 setval ("referer", optarg);
539 setval ("sslcertfile", optarg);
542 setval ("sslcertkey", optarg);
544 #endif /* HAVE_SSL */
546 setval ("accept", optarg);
549 setval ("logfile", optarg);
553 setval ("base", optarg);
556 setval ("cache", optarg);
559 setval ("domains", optarg);
564 if (parse_line (optarg, &com, &val))
566 if (!setval (com, val))
571 fprintf (stderr, _("%s: %s: invalid command\n"), exec_name,
580 setval ("ignoretags", optarg);
583 setval ("glob", optarg);
586 setval ("includedirectories", optarg);
589 setval ("input", optarg);
592 setval ("reclevel", optarg);
596 /* #### The n? options are utter crock! */
599 for (p = optarg; *p; p++)
603 setval ("verbose", "off");
606 setval ("simplehostcheck", "on");
609 setval ("addhostdir", "off");
612 setval ("dirstruct", "off");
615 setval ("noclobber", "on");
618 setval ("removelisting", "off");
621 setval ("noparent", "on");
624 setval ("httpkeepalive", "off");
627 printf (_("%s: illegal option -- `-n%c'\n"), exec_name, *p);
630 printf (_("Try `%s --help\' for more options.\n"), exec_name);
636 setval ("outputdocument", optarg);
639 setval ("logfile", optarg);
642 setval ("dirprefix", optarg);
645 setval ("quota", optarg);
648 setval ("reject", optarg);
651 setval ("timeout", optarg);
654 setval ("tries", optarg);
657 setval ("useragent", optarg);
660 setval ("wait", optarg);
663 setval ("excludedirectories", optarg);
666 setval ("useproxy", optarg);
672 printf (_("Try `%s --help' for more options.\n"), exec_name);
678 /* All user options have now been processed, so it's now safe to do
679 interoption dependency checks. */
681 if (opt.reclevel == 0)
682 opt.reclevel = INFINITE_RECURSION; /* see wget.h for commentary on this */
684 if (opt.page_requisites && !opt.recursive)
686 opt.recursive = TRUE;
688 if (!opt.no_dirstruct)
689 opt.dirstruct = TRUE; /* usually handled by cmd_spec_recursive() */
692 if (opt.verbose == -1)
693 opt.verbose = !opt.quiet;
695 /* Retain compatibility with previous scripts.
696 if wait has been set, but waitretry has not, give it the wait value.
697 A simple check on the values is not enough, I could have set
698 wait to n>0 and waitretry to 0 - HEH */
701 char opt_wait_str[256]; /* bigger than needed buf to prevent overflow */
703 sprintf(opt_wait_str, "%ld", opt.wait);
704 setval ("waitretry", opt_wait_str);
708 if (opt.verbose && opt.quiet)
710 printf (_("Can't be verbose and quiet at the same time.\n"));
714 if (opt.timestamping && opt.noclobber)
717 Can't timestamp and not clobber old files at the same time.\n"));
721 nurl = argc - optind;
722 if (!nurl && !opt.input_filename)
724 /* No URL specified. */
725 printf (_("%s: missing URL\n"), exec_name);
728 /* #### Something nicer should be printed here -- similar to the
729 pre-1.5 `--help' page. */
730 printf (_("Try `%s --help' for more options.\n"), exec_name);
735 fork_to_background ();
737 /* Allocate basic pointer. */
738 url = ALLOCA_ARRAY (char *, nurl + 1);
739 /* Fill in the arguments. */
740 for (i = 0; i < nurl; i++, optind++)
742 char *irix4_cc_needs_this;
743 STRDUP_ALLOCA (irix4_cc_needs_this, argv[optind]);
744 url[i] = irix4_cc_needs_this;
748 /* Change the title of console window on Windows. #### I think this
749 statement should belong to retrieve_url(). --hniksic. */
751 ws_changetitle (*url, nurl);
754 /* Initialize logging. */
755 log_init (opt.lfilename, append_to_log);
757 DEBUGP (("DEBUG output created by Wget %s on %s.\n\n", version_string,
760 /* Open the output filename if necessary. */
761 if (opt.output_document)
763 if (HYPHENP (opt.output_document))
768 opt.dfp = fopen (opt.output_document, opt.always_rest ? "ab" : "wb");
771 perror (opt.output_document);
774 if (fstat (fileno (opt.dfp), &st) == 0 && S_ISREG (st.st_mode))
775 opt.od_known_regular = 1;
783 /* Setup the signal handler to redirect output when hangup is
786 if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
787 signal(SIGHUP, redirect_output_signal);
788 /* ...and do the same for SIGUSR1. */
789 signal (SIGUSR1, redirect_output_signal);
790 /* Writing to a closed socket normally signals SIGPIPE, and the
791 process exits. What we want is to ignore SIGPIPE and just check
792 for the return value of write(). */
793 signal (SIGPIPE, SIG_IGN);
794 #endif /* HAVE_SIGNAL */
796 status = RETROK; /* initialize it, just-in-case */
798 /* Retrieve the URLs from argument list. */
799 for (t = url; *t; t++)
801 char *filename, *redirected_URL;
804 status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt);
805 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
806 status = recursive_retrieve (filename,
807 redirected_URL ? redirected_URL : *t);
809 if (opt.delete_after && file_exists_p(filename))
811 DEBUGP (("Removing file due to --delete-after in main():\n"));
812 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
813 if (unlink (filename))
814 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
817 FREE_MAYBE (redirected_URL);
818 FREE_MAYBE (filename);
821 /* And then from the input file, if any. */
822 if (opt.input_filename)
825 status = retrieve_from_file (opt.input_filename, opt.force_html, &count);
827 logprintf (LOG_NOTQUIET, _("No URLs found in %s.\n"),
830 /* Print the downloaded sum. */
833 || (opt.input_filename && opt.downloaded != 0))
835 logprintf (LOG_NOTQUIET,
836 _("\nFINISHED --%s--\nDownloaded: %s bytes in %d files\n"),
838 (opt.downloaded_overflow ?
839 "<overflow>" : legible_very_long (opt.downloaded)),
841 /* Print quota warning, if exceeded. */
842 if (downloaded_exceeds_quota ())
843 logprintf (LOG_NOTQUIET,
844 _("Download quota (%s bytes) EXCEEDED!\n"),
845 legible (opt.quota));
848 if (opt.cookies_output)
849 save_cookies (opt.cookies_output);
851 if (opt.convert_links && !opt.delete_after)
853 convert_all_links ();
858 print_malloc_debug_stats ();
860 if (status == RETROK)
866 /* Hangup signal handler. When wget receives SIGHUP or SIGUSR1, it
867 will proceed operation as usual, trying to write into a log file.
868 If that is impossible, the output will be turned off. */
872 redirect_output_signal (int sig)
875 signal (sig, redirect_output_signal);
876 /* Please note that the double `%' in `%%s' is intentional, because
877 redirect_output passes tmp through printf. */
878 sprintf (tmp, _("%s received, redirecting output to `%%s'.\n"),
879 (sig == SIGHUP ? "SIGHUP" :
880 (sig == SIGUSR1 ? "SIGUSR1" :
882 redirect_output (tmp);
884 #endif /* HAVE_SIGNAL */