1 /* Reading/parsing the initialization file.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
6 This file is part of GNU Wget.
8 GNU Wget is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 GNU Wget is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with Wget. If not, see <http://www.gnu.org/licenses/>.
21 Additional permission under GNU GPL version 3 section 7
23 If you modify this program, or any covered work, by linking or
24 combining it with the OpenSSL project's OpenSSL library (or a
25 modified version of that library), containing parts covered by the
26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
27 grants you additional permission to convey the resulting work.
28 Corresponding Source for a non-source form of such a combination
29 shall include the source code for the parts of OpenSSL used as well
30 as that of the covered work. */
41 /* not all systems provide PATH_MAX in limits.h */
43 # include <sys/param.h>
45 # define PATH_MAX MAXPATHLEN
60 #include "recur.h" /* for INFINITE_RECURSION */
61 #include "convert.h" /* for convert_cleanup */
62 #include "res.h" /* for res_cleanup */
63 #include "http.h" /* for http_cleanup */
64 #include "retr.h" /* for output_stream */
72 #define CMD_DECLARE(func) static bool func (const char *, const char *, void *)
74 CMD_DECLARE (cmd_boolean);
75 CMD_DECLARE (cmd_bytes);
76 CMD_DECLARE (cmd_bytes_sum);
78 CMD_DECLARE (cmd_cert_type);
80 CMD_DECLARE (cmd_directory_vector);
81 CMD_DECLARE (cmd_number);
82 CMD_DECLARE (cmd_number_inf);
83 CMD_DECLARE (cmd_string);
84 CMD_DECLARE (cmd_file);
85 CMD_DECLARE (cmd_directory);
86 CMD_DECLARE (cmd_time);
87 CMD_DECLARE (cmd_vector);
89 CMD_DECLARE (cmd_spec_dirstruct);
90 CMD_DECLARE (cmd_spec_header);
91 CMD_DECLARE (cmd_spec_warc_header);
92 CMD_DECLARE (cmd_spec_htmlify);
93 CMD_DECLARE (cmd_spec_mirror);
94 CMD_DECLARE (cmd_spec_prefer_family);
95 CMD_DECLARE (cmd_spec_progress);
96 CMD_DECLARE (cmd_spec_recursive);
97 CMD_DECLARE (cmd_spec_restrict_file_names);
99 CMD_DECLARE (cmd_spec_secure_protocol);
101 CMD_DECLARE (cmd_spec_timeout);
102 CMD_DECLARE (cmd_spec_useragent);
103 CMD_DECLARE (cmd_spec_verbose);
105 /* List of recognized commands, each consisting of name, place and
106 function. When adding a new command, simply add it to the list,
107 but be sure to keep the list sorted alphabetically, as
108 command_by_name's binary search depends on it. Also, be sure to
109 add any entries that allocate memory (e.g. cmd_string and
110 cmd_vector) to the cleanup() function below. */
112 static const struct {
115 bool (*action) (const char *, const char *, void *);
117 /* KEEP THIS LIST ALPHABETICALLY SORTED */
118 { "accept", &opt.accepts, cmd_vector },
119 { "addhostdir", &opt.add_hostdir, cmd_boolean },
120 { "adjustextension", &opt.adjust_extension, cmd_boolean },
121 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
122 { "askpassword", &opt.ask_passwd, cmd_boolean },
123 { "authnochallenge", &opt.auth_without_challenge,
125 { "background", &opt.background, cmd_boolean },
126 { "backupconverted", &opt.backup_converted, cmd_boolean },
127 { "backups", &opt.backups, cmd_number },
128 { "base", &opt.base_href, cmd_string },
129 { "bindaddress", &opt.bind_address, cmd_string },
130 { "bits", &opt.bits_fmt, cmd_boolean},
132 { "cacertificate", &opt.ca_cert, cmd_file },
134 { "cache", &opt.allow_cache, cmd_boolean },
136 { "cadirectory", &opt.ca_directory, cmd_directory },
137 { "certificate", &opt.cert_file, cmd_file },
138 { "certificatetype", &opt.cert_type, cmd_cert_type },
139 { "checkcertificate", &opt.check_cert, cmd_boolean },
141 { "chooseconfig", &opt.choose_config, cmd_file },
142 { "connecttimeout", &opt.connect_timeout, cmd_time },
143 { "contentdisposition", &opt.content_disposition, cmd_boolean },
144 { "contentonerror", &opt.content_on_error, cmd_boolean },
145 { "continue", &opt.always_rest, cmd_boolean },
146 { "convertlinks", &opt.convert_links, cmd_boolean },
147 { "cookies", &opt.cookies, cmd_boolean },
148 { "cutdirs", &opt.cut_dirs, cmd_number },
150 { "debug", &opt.debug, cmd_boolean },
152 { "defaultpage", &opt.default_page, cmd_string},
153 { "deleteafter", &opt.delete_after, cmd_boolean },
154 { "dirprefix", &opt.dir_prefix, cmd_directory },
155 { "dirstruct", NULL, cmd_spec_dirstruct },
156 { "dnscache", &opt.dns_cache, cmd_boolean },
157 { "dnstimeout", &opt.dns_timeout, cmd_time },
158 { "domains", &opt.domains, cmd_vector },
159 { "dotbytes", &opt.dot_bytes, cmd_bytes },
160 { "dotsinline", &opt.dots_in_line, cmd_number },
161 { "dotspacing", &opt.dot_spacing, cmd_number },
162 { "dotstyle", &opt.dot_style, cmd_string }, /* deprecated */
164 { "egdfile", &opt.egd_file, cmd_file },
166 { "excludedirectories", &opt.excludes, cmd_directory_vector },
167 { "excludedomains", &opt.exclude_domains, cmd_vector },
168 { "followftp", &opt.follow_ftp, cmd_boolean },
169 { "followtags", &opt.follow_tags, cmd_vector },
170 { "forcehtml", &opt.force_html, cmd_boolean },
171 { "ftppasswd", &opt.ftp_passwd, cmd_string }, /* deprecated */
172 { "ftppassword", &opt.ftp_passwd, cmd_string },
173 { "ftpproxy", &opt.ftp_proxy, cmd_string },
175 { "ftpstmlf", &opt.ftp_stmlf, cmd_boolean },
176 #endif /* def __VMS */
177 { "ftpuser", &opt.ftp_user, cmd_string },
178 { "glob", &opt.ftp_glob, cmd_boolean },
179 { "header", NULL, cmd_spec_header },
180 { "htmlextension", &opt.adjust_extension, cmd_boolean }, /* deprecated */
181 { "htmlify", NULL, cmd_spec_htmlify },
182 { "httpkeepalive", &opt.http_keep_alive, cmd_boolean },
183 { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
184 { "httppassword", &opt.http_passwd, cmd_string },
185 { "httpproxy", &opt.http_proxy, cmd_string },
186 { "httpsproxy", &opt.https_proxy, cmd_string },
187 { "httpuser", &opt.http_user, cmd_string },
188 { "ignorecase", &opt.ignore_case, cmd_boolean },
189 { "ignorelength", &opt.ignore_length, cmd_boolean },
190 { "ignoretags", &opt.ignore_tags, cmd_vector },
191 { "includedirectories", &opt.includes, cmd_directory_vector },
193 { "inet4only", &opt.ipv4_only, cmd_boolean },
194 { "inet6only", &opt.ipv6_only, cmd_boolean },
196 { "input", &opt.input_filename, cmd_file },
197 { "iri", &opt.enable_iri, cmd_boolean },
198 { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
199 { "limitrate", &opt.limit_rate, cmd_bytes },
200 { "loadcookies", &opt.cookies_input, cmd_file },
201 { "localencoding", &opt.locale, cmd_string },
202 { "logfile", &opt.lfilename, cmd_file },
203 { "login", &opt.ftp_user, cmd_string },/* deprecated*/
204 { "maxredirect", &opt.max_redirect, cmd_number },
205 { "mirror", NULL, cmd_spec_mirror },
206 { "netrc", &opt.netrc, cmd_boolean },
207 { "noclobber", &opt.noclobber, cmd_boolean },
208 { "noparent", &opt.no_parent, cmd_boolean },
209 { "noproxy", &opt.no_proxy, cmd_vector },
210 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
211 { "outputdocument", &opt.output_document, cmd_file },
212 { "pagerequisites", &opt.page_requisites, cmd_boolean },
213 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
214 { "passwd", &opt.ftp_passwd, cmd_string },/* deprecated*/
215 { "password", &opt.passwd, cmd_string },
216 { "postdata", &opt.post_data, cmd_string },
217 { "postfile", &opt.post_file_name, cmd_file },
218 { "preferfamily", NULL, cmd_spec_prefer_family },
219 { "preservepermissions", &opt.preserve_perm, cmd_boolean },
221 { "privatekey", &opt.private_key, cmd_file },
222 { "privatekeytype", &opt.private_key_type, cmd_cert_type },
224 { "progress", &opt.progress_type, cmd_spec_progress },
225 { "protocoldirectories", &opt.protocol_directories, cmd_boolean },
226 { "proxypasswd", &opt.proxy_passwd, cmd_string }, /* deprecated */
227 { "proxypassword", &opt.proxy_passwd, cmd_string },
228 { "proxyuser", &opt.proxy_user, cmd_string },
229 { "quiet", &opt.quiet, cmd_boolean },
230 { "quota", &opt.quota, cmd_bytes_sum },
232 { "randomfile", &opt.random_file, cmd_file },
234 { "randomwait", &opt.random_wait, cmd_boolean },
235 { "readtimeout", &opt.read_timeout, cmd_time },
236 { "reclevel", &opt.reclevel, cmd_number_inf },
237 { "recursive", NULL, cmd_spec_recursive },
238 { "referer", &opt.referer, cmd_string },
239 { "reject", &opt.rejects, cmd_vector },
240 { "relativeonly", &opt.relative_only, cmd_boolean },
241 { "remoteencoding", &opt.encoding_remote, cmd_string },
242 { "removelisting", &opt.remove_listing, cmd_boolean },
243 { "restrictfilenames", NULL, cmd_spec_restrict_file_names },
244 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
245 { "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
246 { "robots", &opt.use_robots, cmd_boolean },
247 { "savecookies", &opt.cookies_output, cmd_file },
248 { "saveheaders", &opt.save_headers, cmd_boolean },
250 { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol },
252 { "serverresponse", &opt.server_response, cmd_boolean },
253 { "showalldnsentries", &opt.show_all_dns_entries, cmd_boolean },
254 { "spanhosts", &opt.spanhost, cmd_boolean },
255 { "spider", &opt.spider, cmd_boolean },
256 { "strictcomments", &opt.strict_comments, cmd_boolean },
257 { "timeout", NULL, cmd_spec_timeout },
258 { "timestamping", &opt.timestamping, cmd_boolean },
259 { "tries", &opt.ntry, cmd_number_inf },
260 { "trustservernames", &opt.trustservernames, cmd_boolean },
261 { "unlink", &opt.unlink, cmd_boolean },
262 { "useproxy", &opt.use_proxy, cmd_boolean },
263 { "user", &opt.user, cmd_string },
264 { "useragent", NULL, cmd_spec_useragent },
265 { "useservertimestamps", &opt.useservertimestamps, cmd_boolean },
266 { "verbose", NULL, cmd_spec_verbose },
267 { "wait", &opt.wait, cmd_time },
268 { "waitretry", &opt.waitretry, cmd_time },
269 { "warccdx", &opt.warc_cdx_enabled, cmd_boolean },
270 { "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file },
272 { "warccompression", &opt.warc_compression_enabled, cmd_boolean },
274 { "warcdigests", &opt.warc_digests_enabled, cmd_boolean },
275 { "warcfile", &opt.warc_filename, cmd_file },
276 { "warcheader", NULL, cmd_spec_warc_header },
277 { "warckeeplog", &opt.warc_keep_log, cmd_boolean },
278 { "warcmaxsize", &opt.warc_maxsize, cmd_bytes },
279 { "warctempdir", &opt.warc_tempdir, cmd_directory },
281 { "wdebug", &opt.wdebug, cmd_boolean },
285 /* Look up CMDNAME in the commands[] and return its position in the
286 array. If CMDNAME is not found, return -1. */
289 command_by_name (const char *cmdname)
291 /* Use binary search for speed. Wget has ~100 commands, which
292 guarantees a worst case performance of 7 string comparisons. */
293 int lo = 0, hi = countof (commands) - 1;
297 int mid = (lo + hi) >> 1;
298 int cmp = strcasecmp (cmdname, commands[mid].name);
309 /* Reset the variables to default values. */
315 /* Most of the default values are 0 (and 0.0, NULL, and false).
316 Just reset everything, and fill in the non-zero values. Note
317 that initializing pointers to NULL this way is technically
318 illegal, but porting Wget to a machine where NULL is not all-zero
319 bit pattern will be the least of the implementors' worries. */
326 opt.add_hostdir = true;
330 opt.http_keep_alive = true;
331 opt.use_proxy = true;
332 tmp = getenv ("no_proxy");
334 opt.no_proxy = sepstring (tmp);
335 opt.prefer_family = prefer_none;
336 opt.allow_cache = true;
338 opt.read_timeout = 900;
339 opt.use_robots = true;
341 opt.remove_listing = true;
343 opt.dot_bytes = 1024;
344 opt.dot_spacing = 10;
345 opt.dots_in_line = 50;
347 opt.dns_cache = true;
351 opt.check_cert = true;
354 /* The default for file name restriction defaults to the OS type. */
355 #if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__)
356 opt.restrict_files_os = restrict_windows;
358 opt.restrict_files_os = restrict_unix;
360 opt.restrict_files_ctrl = true;
361 opt.restrict_files_nonascii = false;
362 opt.restrict_files_case = restrict_no_case_restriction;
364 opt.max_redirect = 20;
369 opt.enable_iri = true;
371 opt.enable_iri = false;
374 opt.encoding_remote = NULL;
376 opt.useservertimestamps = true;
377 opt.show_all_dns_entries = false;
379 opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */
381 opt.warc_compression_enabled = true;
383 opt.warc_compression_enabled = false;
385 opt.warc_digests_enabled = true;
386 opt.warc_cdx_enabled = false;
387 opt.warc_cdx_dedup_filename = NULL;
388 opt.warc_tempdir = NULL;
389 opt.warc_keep_log = true;
392 /* Return the user's home directory (strdup-ed), or NULL if none is
397 static char *buf = NULL;
398 static char *home, *ret;
402 home = getenv ("HOME");
408 /* Under MSDOS, if $HOME isn't defined, use the directory where
409 `wget.exe' resides. */
410 const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */
413 buff = _w32_get_argv0 ();
415 p = strrchr (buf, '/'); /* djgpp */
417 p = strrchr (buf, '\\'); /* others */
421 buff = malloc (len + 1);
425 strncpy (buff, _w32_get_argv0 (), len);
429 #elif !defined(WINDOWS)
430 /* If HOME is not defined, try getting it from the password
432 struct passwd *pwd = getpwuid (getuid ());
433 if (!pwd || !pwd->pw_dir)
437 /* Under Windows, if $HOME isn't defined, use the directory where
438 `wget.exe' resides. */
444 ret = home ? xstrdup (home) : NULL;
451 /* Check the 'WGETRC' environment variable and return the file name
452 if 'WGETRC' is set and is a valid file.
453 If the `WGETRC' variable exists but the file does not exist, the
454 function will exit(). */
456 wgetrc_env_file_name (void)
458 char *env = getenv ("WGETRC");
461 if (!file_exists_p (env))
463 fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"),
467 return xstrdup (env);
472 /* Check for the existance of '$HOME/.wgetrc' and return its path
473 if it exists and is set. */
475 wgetrc_user_file_name (void)
479 /* If that failed, try $HOME/.wgetrc (or equivalent). */
482 file = "SYS$LOGIN:.wgetrc";
483 #else /* def __VMS */
486 file = aprintf ("%s/.wgetrc", home);
488 #endif /* def __VMS [else] */
492 if (!file_exists_p (file))
500 /* Return the path to the user's .wgetrc. This is either the value of
501 `WGETRC' environment variable, or `$HOME/.wgetrc'.
503 Additionally, for windows, look in the directory where wget.exe
506 wgetrc_file_name (void)
508 char *file = wgetrc_env_file_name ();
512 file = wgetrc_user_file_name ();
515 /* Under Windows, if we still haven't found .wgetrc, look for the file
516 `wget.ini' in the directory where `wget.exe' resides; we do this for
517 backward compatibility with previous versions of Wget.
518 SYSTEM_WGETRC should not be defined under WINDOWS. */
521 char *home = home_dir ();
527 file = aprintf ("%s/wget.ini", home);
528 if (!file_exists_p (file))
541 /* Return values of parse_line. */
549 static enum parse_line parse_line (const char *, char **, char **, int *);
550 static bool setval_internal (int, const char *, const char *);
551 static bool setval_internal_tilde (int, const char *, const char *);
553 /* Initialize variables from a wgetrc file. Returns zero (failure) if
554 there were errors in the file. */
557 run_wgetrc (const char *file)
564 fp = fopen (file, "r");
567 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
568 file, strerror (errno));
569 return true; /* not a fatal error */
572 while ((line = read_whole_line (fp)) != NULL)
574 char *com = NULL, *val = NULL;
577 /* Parse the line. */
578 switch (parse_line (line, &com, &val, &comind))
581 /* If everything is OK, set the value. */
582 if (!setval_internal_tilde (comind, com, val))
584 fprintf (stderr, _("%s: Error in %s at line %d.\n"),
585 exec_name, file, ln);
589 case line_syntax_error:
590 fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"),
591 exec_name, file, ln);
594 case line_unknown_command:
595 fprintf (stderr, _("%s: Unknown command %s in %s at line %d.\n"),
596 exec_name, quote (com), file, ln);
614 /* Initialize the defaults and run the system wgetrc and user's own
619 char *file, *env_sysrc;
622 /* Run a non-standard system rc file when the according environment
623 variable has been set. For internal testing purposes only! */
624 env_sysrc = getenv ("SYSTEM_WGETRC");
625 if (env_sysrc && file_exists_p (env_sysrc))
627 ok &= run_wgetrc (env_sysrc);
628 /* If there are any problems parsing the system wgetrc file, tell
632 fprintf (stderr, _("\
633 Parsing system wgetrc file (env SYSTEM_WGETRC) failed. Please check\n\
635 or specify a different file using --config.\n"), env_sysrc);
639 /* Otherwise, if SYSTEM_WGETRC is defined, use it. */
641 else if (file_exists_p (SYSTEM_WGETRC))
642 ok &= run_wgetrc (SYSTEM_WGETRC);
643 /* If there are any problems parsing the system wgetrc file, tell
647 fprintf (stderr, _("\
648 Parsing system wgetrc file failed. Please check\n\
650 or specify a different file using --config.\n"), SYSTEM_WGETRC);
654 /* Override it with your own, if one exists. */
655 file = wgetrc_file_name ();
658 /* #### We should canonicalize `file' and SYSTEM_WGETRC with
659 something like realpath() before comparing them with `strcmp' */
661 if (!strcmp (file, SYSTEM_WGETRC))
663 fprintf (stderr, _("\
664 %s: Warning: Both system and user wgetrc point to %s.\n"),
665 exec_name, quote (file));
669 ok &= run_wgetrc (file);
671 /* If there were errors processing either `.wgetrc', abort. */
679 /* Remove dashes and underscores from S, modifying S in the
685 char *t = s; /* t - tortoise */
686 char *h = s; /* h - hare */
688 if (*h == '_' || *h == '-')
695 /* Parse the line pointed by line, with the syntax:
696 <sp>* command <sp>* = <sp>* value <sp>*
697 Uses malloc to allocate space for command and value.
699 Returns one of line_ok, line_empty, line_syntax_error, or
700 line_unknown_command.
702 In case of line_ok, *COM and *VAL point to freshly allocated
703 strings, and *COMIND points to com's index. In case of error or
704 empty line, their values are unmodified. */
706 static enum parse_line
707 parse_line (const char *line, char **com, char **val, int *comind)
710 const char *end = line + strlen (line);
711 const char *cmdstart, *cmdend;
712 const char *valstart, *valend;
717 /* Skip leading and trailing whitespace. */
718 while (*line && c_isspace (*line))
720 while (end > line && c_isspace (end[-1]))
723 /* Skip empty lines and comments. */
724 if (!*line || *line == '#')
730 while (p < end && (c_isalnum (*p) || *p == '_' || *p == '-'))
734 /* Skip '=', as well as any space before or after it. */
735 while (p < end && c_isspace (*p))
737 if (p == end || *p != '=')
738 return line_syntax_error;
740 while (p < end && c_isspace (*p))
746 /* The syntax is valid (even though the command might not be). Fill
747 in the command name and value. */
748 *com = strdupdelim (cmdstart, cmdend);
749 *val = strdupdelim (valstart, valend);
751 /* The line now known to be syntactically correct. Check whether
752 the command is valid. */
753 BOUNDED_TO_ALLOCA (cmdstart, cmdend, cmdcopy);
755 ind = command_by_name (cmdcopy);
757 return line_unknown_command;
759 /* Report success to the caller. */
764 #if defined(WINDOWS) || defined(MSDOS)
765 # define ISSEP(c) ((c) == '/' || (c) == '\\')
767 # define ISSEP(c) ((c) == '/')
770 /* Run commands[comind].action. */
773 setval_internal (int comind, const char *com, const char *val)
775 assert (0 <= comind && ((size_t) comind) < countof (commands));
776 DEBUGP (("Setting %s (%s) to %s\n", com, commands[comind].name, val));
777 return commands[comind].action (com, val, commands[comind].place);
781 setval_internal_tilde (int comind, const char *com, const char *val)
787 ret = setval_internal (comind, com, val);
789 /* We make tilde expansion for cmd_file and cmd_directory */
790 if (((commands[comind].action == cmd_file) ||
791 (commands[comind].action == cmd_directory))
792 && ret && (*val == '~' && ISSEP (val[1])))
794 pstring = commands[comind].place;
798 homelen = strlen (home);
799 while (homelen && ISSEP (home[homelen - 1]))
800 home[--homelen] = '\0';
802 /* Skip the leading "~/". */
803 for (++val; ISSEP (*val); val++)
805 *pstring = concat_strings (home, "/", val, (char *)0);
811 /* Run command COM with value VAL. If running the command produces an
812 error, report the error and exit.
814 This is intended to be called from main() to modify Wget's behavior
815 through command-line switches. Since COM is hard-coded in main(),
816 it is not canonicalized, and this aborts when COM is not found.
818 If COMIND's are exported to init.h, this function will be changed
819 to accept COMIND directly. */
822 setoptval (const char *com, const char *val, const char *optname)
824 /* Prepend "--" to OPTNAME. */
825 char *dd_optname = (char *) alloca (2 + strlen (optname) + 1);
828 strcpy (dd_optname + 2, optname);
830 assert (val != NULL);
831 if (!setval_internal (command_by_name (com), dd_optname, val))
835 /* Parse OPT into command and value and run it. For example,
836 run_command("foo=bar") is equivalent to setoptval("foo", "bar").
837 This is used by the `--execute' flag in main.c. */
840 run_command (const char *opt)
844 switch (parse_line (opt, &com, &val, &comind))
847 if (!setval_internal (comind, com, val))
853 fprintf (stderr, _("%s: Invalid --execute command %s\n"),
854 exec_name, quote (opt));
859 /* Generic helper functions, for use with `commands'. */
861 /* Forward declarations: */
866 static bool decode_string (const char *, const struct decode_item *, int, int *);
867 static bool simple_atoi (const char *, const char *, int *);
868 static bool simple_atof (const char *, const char *, double *);
870 #define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0')
872 #define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \
873 && c_tolower((p)[1]) == (c1) \
876 #define CMP3(p, c0, c1, c2) (c_tolower((p)[0]) == (c0) \
877 && c_tolower((p)[1]) == (c1) \
878 && c_tolower((p)[2]) == (c2) \
882 /* Store the boolean value from VAL to PLACE. COM is ignored,
883 except for error messages. */
885 cmd_boolean (const char *com, const char *val, void *place)
889 if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1'))
890 /* "on", "yes" and "1" mean true. */
892 else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0'))
893 /* "off", "no" and "0" mean false. */
898 _("%s: %s: Invalid boolean %s; use `on' or `off'.\n"),
899 exec_name, com, quote (val));
903 *(bool *) place = value;
907 /* Set the non-negative integer value from VAL to PLACE. With
908 incorrect specification, the number remains unchanged. */
910 cmd_number (const char *com, const char *val, void *place)
912 if (!simple_atoi (val, val + strlen (val), place)
913 || *(int *) place < 0)
915 fprintf (stderr, _("%s: %s: Invalid number %s.\n"),
916 exec_name, com, quote (val));
922 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
924 cmd_number_inf (const char *com, const char *val, void *place)
926 if (!strcasecmp (val, "inf"))
931 return cmd_number (com, val, place);
934 /* Copy (strdup) the string at COM to a new location and place a
935 pointer to *PLACE. */
937 cmd_string (const char *com, const char *val, void *place)
939 char **pstring = (char **)place;
941 xfree_null (*pstring);
942 *pstring = xstrdup (val);
947 /* Like the above, but handles tilde-expansion when reading a user's
948 `.wgetrc'. In that case, and if VAL begins with `~', the tilde
949 gets expanded to the user's home directory. */
951 cmd_file (const char *com, const char *val, void *place)
953 char **pstring = (char **)place;
955 xfree_null (*pstring);
957 /* #### If VAL is empty, perhaps should set *PLACE to NULL. */
959 *pstring = xstrdup (val);
961 #if defined(WINDOWS) || defined(MSDOS)
962 /* Convert "\" to "/". */
965 for (s = *pstring; *s; s++)
973 /* Like cmd_file, but strips trailing '/' characters. */
975 cmd_directory (const char *com, const char *val, void *place)
979 /* Call cmd_file() for tilde expansion and separator
980 canonicalization (backslash -> slash under Windows). These
981 things should perhaps be in a separate function. */
982 if (!cmd_file (com, val, place))
987 while (t > s && *--t == '/')
993 /* Split VAL by space to a vector of values, and append those values
994 to vector pointed to by the PLACE argument. If VAL is empty, the
995 PLACE vector is cleared instead. */
998 cmd_vector (const char *com, const char *val, void *place)
1000 char ***pvec = (char ***)place;
1003 *pvec = merge_vecs (*pvec, sepstring (val));
1013 cmd_directory_vector (const char *com, const char *val, void *place)
1015 char ***pvec = (char ***)place;
1019 /* Strip the trailing slashes from directories. */
1022 seps = sepstring (val);
1023 for (t = seps; t && *t; t++)
1025 int len = strlen (*t);
1026 /* Skip degenerate case of root directory. */
1029 if ((*t)[len - 1] == '/')
1030 (*t)[len - 1] = '\0';
1033 *pvec = merge_vecs (*pvec, seps);
1043 /* Engine for cmd_bytes and cmd_bytes_sum: converts a string such as
1044 "100k" or "2.5G" to a floating point number. */
1047 parse_bytes_helper (const char *val, double *result)
1049 double number, mult;
1050 const char *end = val + strlen (val);
1052 /* Check for "inf". */
1053 if (0 == strcmp (val, "inf"))
1059 /* Strip trailing whitespace. */
1060 while (val < end && c_isspace (end[-1]))
1065 switch (c_tolower (end[-1]))
1068 --end, mult = 1024.0;
1071 --end, mult = 1048576.0;
1074 --end, mult = 1073741824.0;
1077 --end, mult = 1099511627776.0;
1080 /* Not a recognized suffix: assume it's a digit. (If not,
1081 simple_atof will raise an error.) */
1085 /* Skip leading and trailing whitespace. */
1086 while (val < end && c_isspace (*val))
1088 while (val < end && c_isspace (end[-1]))
1093 if (!simple_atof (val, end, &number) || number < 0)
1096 *result = number * mult;
1100 /* Parse VAL as a number and set its value to PLACE (which should
1103 By default, the value is assumed to be in bytes. If "K", "M", or
1104 "G" are appended, the value is multiplied with 1<<10, 1<<20, or
1105 1<<30, respectively. Floating point values are allowed and are
1106 cast to integer before use. The idea is to be able to use things
1107 like 1.5k instead of "1536".
1109 The string "inf" is returned as 0.
1111 In case of error, false is returned and memory pointed to by PLACE
1112 remains unmodified. */
1115 cmd_bytes (const char *com, const char *val, void *place)
1118 if (!parse_bytes_helper (val, &byte_value))
1120 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1121 exec_name, com, quote (val));
1124 *(wgint *)place = (wgint)byte_value;
1128 /* Like cmd_bytes, but PLACE is interpreted as a pointer to
1129 SIZE_SUM. It works by converting the string to double, therefore
1130 working with values up to 2^53-1 without loss of precision. This
1131 value (8192 TB) is large enough to serve for a while. */
1134 cmd_bytes_sum (const char *com, const char *val, void *place)
1137 if (!parse_bytes_helper (val, &byte_value))
1139 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1140 exec_name, com, quote (val));
1143 *(SUM_SIZE_INT *) place = (SUM_SIZE_INT) byte_value;
1147 /* Store the value of VAL to *OUT. The value is a time period, by
1148 default expressed in seconds, but also accepting suffixes "m", "h",
1149 "d", and "w" for minutes, hours, days, and weeks respectively. */
1152 cmd_time (const char *com, const char *val, void *place)
1154 double number, mult;
1155 const char *end = val + strlen (val);
1157 /* Strip trailing whitespace. */
1158 while (val < end && c_isspace (end[-1]))
1164 fprintf (stderr, _("%s: %s: Invalid time period %s\n"),
1165 exec_name, com, quote (val));
1169 switch (c_tolower (end[-1]))
1172 --end, mult = 1; /* seconds */
1175 --end, mult = 60; /* minutes */
1178 --end, mult = 3600; /* hours */
1181 --end, mult = 86400.0; /* days */
1184 --end, mult = 604800.0; /* weeks */
1187 /* Not a recognized suffix: assume it belongs to the number.
1188 (If not, simple_atof will raise an error.) */
1192 /* Skip leading and trailing whitespace. */
1193 while (val < end && c_isspace (*val))
1195 while (val < end && c_isspace (end[-1]))
1200 if (!simple_atof (val, end, &number))
1203 *(double *)place = number * mult;
1209 cmd_cert_type (const char *com, const char *val, void *place)
1211 static const struct decode_item choices[] = {
1212 { "pem", keyfile_pem },
1213 { "der", keyfile_asn1 },
1214 { "asn1", keyfile_asn1 },
1216 int ok = decode_string (val, choices, countof (choices), place);
1218 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1223 /* Specialized helper functions, used by `commands' to handle some
1224 options specially. */
1226 static bool check_user_specified_header (const char *);
1229 cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored)
1231 if (!cmd_boolean (com, val, &opt.dirstruct))
1233 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
1234 must be affected inversely. */
1236 opt.no_dirstruct = false;
1238 opt.no_dirstruct = true;
1243 cmd_spec_header (const char *com, const char *val, void *place_ignored)
1245 /* Empty value means reset the list of headers. */
1248 free_vec (opt.user_headers);
1249 opt.user_headers = NULL;
1253 if (!check_user_specified_header (val))
1255 fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
1256 exec_name, com, quote (val));
1259 opt.user_headers = vec_append (opt.user_headers, val);
1264 cmd_spec_warc_header (const char *com, const char *val, void *place_ignored)
1266 /* Empty value means reset the list of headers. */
1269 free_vec (opt.warc_user_headers);
1270 opt.warc_user_headers = NULL;
1274 if (!check_user_specified_header (val))
1276 fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"),
1277 exec_name, com, quote (val));
1280 opt.warc_user_headers = vec_append (opt.warc_user_headers, val);
1285 cmd_spec_htmlify (const char *com, const char *val, void *place_ignored)
1287 int flag = cmd_boolean (com, val, &opt.htmlify);
1288 if (flag && !opt.htmlify)
1289 opt.remove_listing = false;
1293 /* Set the "mirror" mode. It means: recursive download, timestamping,
1294 no limit on max. recursion depth, and don't remove listings. */
1297 cmd_spec_mirror (const char *com, const char *val, void *place_ignored)
1301 if (!cmd_boolean (com, val, &mirror))
1305 opt.recursive = true;
1306 if (!opt.no_dirstruct)
1307 opt.dirstruct = true;
1308 opt.timestamping = true;
1309 opt.reclevel = INFINITE_RECURSION;
1310 opt.remove_listing = false;
1315 /* Validate --prefer-family and set the choice. Allowed values are
1316 "IPv4", "IPv6", and "none". */
1319 cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored)
1321 static const struct decode_item choices[] = {
1322 { "IPv4", prefer_ipv4 },
1323 { "IPv6", prefer_ipv6 },
1324 { "none", prefer_none },
1326 int prefer_family = prefer_none;
1327 int ok = decode_string (val, choices, countof (choices), &prefer_family);
1329 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1330 opt.prefer_family = prefer_family;
1334 /* Set progress.type to VAL, but verify that it's a valid progress
1335 implementation before that. */
1338 cmd_spec_progress (const char *com, const char *val, void *place_ignored)
1340 if (!valid_progress_implementation_p (val))
1342 fprintf (stderr, _("%s: %s: Invalid progress type %s.\n"),
1343 exec_name, com, quote (val));
1346 xfree_null (opt.progress_type);
1348 /* Don't call set_progress_implementation here. It will be called
1349 in main() when it becomes clear what the log output is. */
1350 opt.progress_type = xstrdup (val);
1354 /* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is
1355 set to true, also set opt.dirstruct to true, unless opt.no_dirstruct
1359 cmd_spec_recursive (const char *com, const char *val, void *place_ignored)
1361 if (!cmd_boolean (com, val, &opt.recursive))
1365 if (opt.recursive && !opt.no_dirstruct)
1366 opt.dirstruct = true;
1372 cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored)
1374 int restrict_os = opt.restrict_files_os;
1375 int restrict_ctrl = opt.restrict_files_ctrl;
1376 int restrict_case = opt.restrict_files_case;
1377 int restrict_nonascii = opt.restrict_files_nonascii;
1381 #define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal)
1385 end = strchr (val, ',');
1387 end = val + strlen (val);
1389 if (VAL_IS ("unix"))
1390 restrict_os = restrict_unix;
1391 else if (VAL_IS ("windows"))
1392 restrict_os = restrict_windows;
1393 else if (VAL_IS ("lowercase"))
1394 restrict_case = restrict_lowercase;
1395 else if (VAL_IS ("uppercase"))
1396 restrict_case = restrict_uppercase;
1397 else if (VAL_IS ("nocontrol"))
1398 restrict_ctrl = false;
1399 else if (VAL_IS ("ascii"))
1400 restrict_nonascii = true;
1403 fprintf (stderr, _("\
1404 %s: %s: Invalid restriction %s,\n\
1405 use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"),
1406 exec_name, com, quote (val));
1413 while (*val && *end);
1417 opt.restrict_files_os = restrict_os;
1418 opt.restrict_files_ctrl = restrict_ctrl;
1419 opt.restrict_files_case = restrict_case;
1420 opt.restrict_files_nonascii = restrict_nonascii;
1427 cmd_spec_secure_protocol (const char *com, const char *val, void *place)
1429 static const struct decode_item choices[] = {
1430 { "auto", secure_protocol_auto },
1431 { "sslv2", secure_protocol_sslv2 },
1432 { "sslv3", secure_protocol_sslv3 },
1433 { "tlsv1", secure_protocol_tlsv1 },
1435 int ok = decode_string (val, choices, countof (choices), place);
1437 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1442 /* Set all three timeout values. */
1445 cmd_spec_timeout (const char *com, const char *val, void *place_ignored)
1448 if (!cmd_time (com, val, &value))
1450 opt.read_timeout = value;
1451 opt.connect_timeout = value;
1452 opt.dns_timeout = value;
1457 cmd_spec_useragent (const char *com, const char *val, void *place_ignored)
1459 /* Disallow embedded newlines. */
1460 if (strchr (val, '\n'))
1462 fprintf (stderr, _("%s: %s: Invalid value %s.\n"),
1463 exec_name, com, quote (val));
1466 xfree_null (opt.useragent);
1467 opt.useragent = xstrdup (val);
1471 /* The "verbose" option cannot be cmd_boolean because the variable is
1472 not bool -- it's of type int (-1 means uninitialized because of
1473 some random hackery for disallowing -q -v). */
1476 cmd_spec_verbose (const char *com, const char *val, void *place_ignored)
1479 if (cmd_boolean (com, val, &flag))
1487 /* Miscellaneous useful routines. */
1489 /* A very simple atoi clone, more useful than atoi because it works on
1490 delimited strings, and has error reportage. Returns true on success,
1491 false on failure. If successful, stores result to *DEST. */
1494 simple_atoi (const char *beg, const char *end, int *dest)
1497 bool negative = false;
1498 const char *p = beg;
1500 while (p < end && c_isspace (*p))
1502 if (p < end && (*p == '-' || *p == '+'))
1504 negative = (*p == '-');
1510 /* Read negative numbers in a separate loop because the most
1511 negative integer cannot be represented as a positive number. */
1514 for (; p < end && c_isdigit (*p); p++)
1516 int next = (10 * result) + (*p - '0');
1518 return false; /* overflow */
1522 for (; p < end && c_isdigit (*p); p++)
1524 int next = (10 * result) - (*p - '0');
1526 return false; /* underflow */
1537 /* Trivial atof, with error reporting. Handles "<digits>[.<digits>]",
1538 doesn't handle exponential notation. Returns true on success,
1539 false on failure. In case of success, stores its result to
1543 simple_atof (const char *beg, const char *end, double *dest)
1547 bool negative = false;
1548 bool seen_dot = false;
1549 bool seen_digit = false;
1552 const char *p = beg;
1554 while (p < end && c_isspace (*p))
1556 if (p < end && (*p == '-' || *p == '+'))
1558 negative = (*p == '-');
1562 for (; p < end; p++)
1568 result = (10 * result) + (ch - '0');
1570 result += (ch - '0') / (divider *= 10);
1592 /* Verify that the user-specified header in S is valid. It must
1593 contain a colon preceded by non-white-space characters and must not
1594 contain newlines. */
1597 check_user_specified_header (const char *s)
1601 for (p = s; *p && *p != ':' && !c_isspace (*p); p++)
1603 /* The header MUST contain `:' preceded by at least one
1604 non-whitespace character. */
1605 if (*p != ':' || p == s)
1607 /* The header MUST NOT contain newlines. */
1608 if (strchr (s, '\n'))
1613 /* Decode VAL into a number, according to ITEMS. */
1616 decode_string (const char *val, const struct decode_item *items, int itemcount,
1620 for (i = 0; i < itemcount; i++)
1621 if (0 == strcasecmp (val, items[i].name))
1623 *place = items[i].code;
1630 void cleanup_html_url (void);
1633 /* Free the memory allocated by global variables. */
1637 /* Free external resources, close files, etc. */
1640 fclose (output_stream);
1641 /* No need to check for error because Wget flushes its output (and
1642 checks for errors) after any data arrives. */
1644 /* We're exiting anyway so there's no real need to call free()
1645 hundreds of times. Skipping the frees will make Wget exit
1648 However, when detecting leaks, it's crucial to free() everything
1649 because then you can find the real leaks, i.e. the allocated
1650 memory which grows with the size of the program. */
1656 cleanup_html_url ();
1661 extern acc_t *netrc_list;
1662 free_netrc (netrc_list);
1664 xfree_null (opt.choose_config);
1665 xfree_null (opt.lfilename);
1666 xfree_null (opt.dir_prefix);
1667 xfree_null (opt.input_filename);
1668 xfree_null (opt.output_document);
1669 free_vec (opt.accepts);
1670 free_vec (opt.rejects);
1671 free_vec (opt.excludes);
1672 free_vec (opt.includes);
1673 free_vec (opt.domains);
1674 free_vec (opt.follow_tags);
1675 free_vec (opt.ignore_tags);
1676 xfree_null (opt.progress_type);
1677 xfree_null (opt.ftp_user);
1678 xfree_null (opt.ftp_passwd);
1679 xfree_null (opt.ftp_proxy);
1680 xfree_null (opt.https_proxy);
1681 xfree_null (opt.http_proxy);
1682 free_vec (opt.no_proxy);
1683 xfree_null (opt.useragent);
1684 xfree_null (opt.referer);
1685 xfree_null (opt.http_user);
1686 xfree_null (opt.http_passwd);
1687 free_vec (opt.user_headers);
1688 free_vec (opt.warc_user_headers);
1690 xfree_null (opt.cert_file);
1691 xfree_null (opt.private_key);
1692 xfree_null (opt.ca_directory);
1693 xfree_null (opt.ca_cert);
1694 xfree_null (opt.random_file);
1695 xfree_null (opt.egd_file);
1697 xfree_null (opt.bind_address);
1698 xfree_null (opt.cookies_input);
1699 xfree_null (opt.cookies_output);
1700 xfree_null (opt.user);
1701 xfree_null (opt.passwd);
1702 xfree_null (opt.base_href);
1704 #endif /* DEBUG_MALLOC */
1707 /* Unit testing routines. */
1712 test_commands_sorted()
1714 int prev_idx = 0, next_idx = 1;
1715 int command_count = countof (commands) - 1;
1717 while (next_idx <= command_count)
1719 cmp = strcasecmp (commands[prev_idx].name, commands[next_idx].name);
1722 mu_assert ("FAILED", false);
1735 test_cmd_spec_restrict_file_names()
1740 int expected_restrict_files_os;
1741 int expected_restrict_files_ctrl;
1742 int expected_restrict_files_case;
1745 { "windows", restrict_windows, true, restrict_no_case_restriction, true },
1746 { "windows,", restrict_windows, true, restrict_no_case_restriction, true },
1747 { "windows,lowercase", restrict_windows, true, restrict_lowercase, true },
1748 { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true },
1751 for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
1756 res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL);
1759 fprintf (stderr, "test_cmd_spec_restrict_file_names: TEST %d\n", i); fflush (stderr);
1760 fprintf (stderr, "opt.restrict_files_os: %d\n", opt.restrict_files_os); fflush (stderr);
1761 fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr);
1762 fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr);
1764 mu_assert ("test_cmd_spec_restrict_file_names: wrong result",
1765 res == test_array[i].result
1766 && opt.restrict_files_os == test_array[i].expected_restrict_files_os
1767 && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl
1768 && opt.restrict_files_case == test_array[i].expected_restrict_files_case);
1774 #endif /* TESTING */