1 /* Reading/parsing the initialization file.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
6 This file is part of GNU Wget.
8 GNU Wget is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 GNU Wget is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with Wget. If not, see <http://www.gnu.org/licenses/>.
21 Additional permission under GNU GPL version 3 section 7
23 If you modify this program, or any covered work, by linking or
24 combining it with the OpenSSL project's OpenSSL library (or a
25 modified version of that library), containing parts covered by the
26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
27 grants you additional permission to convey the resulting work.
28 Corresponding Source for a non-source form of such a combination
29 shall include the source code for the parts of OpenSSL used as well
30 as that of the covered work. */
42 /* not all systems provide PATH_MAX in limits.h */
44 # include <sys/param.h>
46 # define PATH_MAX MAXPATHLEN
65 #include "recur.h" /* for INFINITE_RECURSION */
66 #include "convert.h" /* for convert_cleanup */
67 #include "res.h" /* for res_cleanup */
68 #include "http.h" /* for http_cleanup */
69 #include "retr.h" /* for output_stream */
70 #include "warc.h" /* for warc_close */
78 #define CMD_DECLARE(func) static bool func (const char *, const char *, void *)
80 CMD_DECLARE (cmd_boolean);
81 CMD_DECLARE (cmd_bytes);
82 CMD_DECLARE (cmd_bytes_sum);
84 CMD_DECLARE (cmd_cert_type);
86 CMD_DECLARE (cmd_directory_vector);
87 CMD_DECLARE (cmd_number);
88 CMD_DECLARE (cmd_number_inf);
89 CMD_DECLARE (cmd_string);
90 CMD_DECLARE (cmd_string_uppercase);
91 CMD_DECLARE (cmd_file);
92 CMD_DECLARE (cmd_directory);
93 CMD_DECLARE (cmd_time);
94 CMD_DECLARE (cmd_vector);
96 CMD_DECLARE (cmd_spec_dirstruct);
97 CMD_DECLARE (cmd_spec_header);
98 CMD_DECLARE (cmd_spec_warc_header);
99 CMD_DECLARE (cmd_spec_htmlify);
100 CMD_DECLARE (cmd_spec_mirror);
101 CMD_DECLARE (cmd_spec_prefer_family);
102 CMD_DECLARE (cmd_spec_progress);
103 CMD_DECLARE (cmd_spec_recursive);
104 CMD_DECLARE (cmd_spec_regex_type);
105 CMD_DECLARE (cmd_spec_restrict_file_names);
106 CMD_DECLARE (cmd_spec_report_speed);
108 CMD_DECLARE (cmd_spec_secure_protocol);
110 CMD_DECLARE (cmd_spec_timeout);
111 CMD_DECLARE (cmd_spec_useragent);
112 CMD_DECLARE (cmd_spec_verbose);
114 /* List of recognized commands, each consisting of name, place and
115 function. When adding a new command, simply add it to the list,
116 but be sure to keep the list sorted alphabetically, as
117 command_by_name's binary search depends on it. Also, be sure to
118 add any entries that allocate memory (e.g. cmd_string and
119 cmd_vector) to the cleanup() function below. */
121 static const struct {
124 bool (*action) (const char *, const char *, void *);
126 /* KEEP THIS LIST ALPHABETICALLY SORTED */
127 { "accept", &opt.accepts, cmd_vector },
128 { "acceptregex", &opt.acceptregex_s, cmd_string },
129 { "addhostdir", &opt.add_hostdir, cmd_boolean },
130 { "adjustextension", &opt.adjust_extension, cmd_boolean },
131 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
132 { "askpassword", &opt.ask_passwd, cmd_boolean },
133 { "authnochallenge", &opt.auth_without_challenge,
135 { "background", &opt.background, cmd_boolean },
136 { "backupconverted", &opt.backup_converted, cmd_boolean },
137 { "backups", &opt.backups, cmd_number },
138 { "base", &opt.base_href, cmd_string },
139 { "bindaddress", &opt.bind_address, cmd_string },
140 { "bodydata", &opt.body_data, cmd_string },
141 { "bodyfile", &opt.body_file, cmd_string },
143 { "cacertificate", &opt.ca_cert, cmd_file },
145 { "cache", &opt.allow_cache, cmd_boolean },
147 { "cadirectory", &opt.ca_directory, cmd_directory },
148 { "certificate", &opt.cert_file, cmd_file },
149 { "certificatetype", &opt.cert_type, cmd_cert_type },
150 { "checkcertificate", &opt.check_cert, cmd_boolean },
152 { "chooseconfig", &opt.choose_config, cmd_file },
153 { "connecttimeout", &opt.connect_timeout, cmd_time },
154 { "contentdisposition", &opt.content_disposition, cmd_boolean },
155 { "contentonerror", &opt.content_on_error, cmd_boolean },
156 { "continue", &opt.always_rest, cmd_boolean },
157 { "convertlinks", &opt.convert_links, cmd_boolean },
158 { "cookies", &opt.cookies, cmd_boolean },
159 { "cutdirs", &opt.cut_dirs, cmd_number },
161 { "debug", &opt.debug, cmd_boolean },
163 { "defaultpage", &opt.default_page, cmd_string },
164 { "deleteafter", &opt.delete_after, cmd_boolean },
165 { "dirprefix", &opt.dir_prefix, cmd_directory },
166 { "dirstruct", NULL, cmd_spec_dirstruct },
167 { "dnscache", &opt.dns_cache, cmd_boolean },
168 { "dnstimeout", &opt.dns_timeout, cmd_time },
169 { "domains", &opt.domains, cmd_vector },
170 { "dotbytes", &opt.dot_bytes, cmd_bytes },
171 { "dotsinline", &opt.dots_in_line, cmd_number },
172 { "dotspacing", &opt.dot_spacing, cmd_number },
173 { "dotstyle", &opt.dot_style, cmd_string }, /* deprecated */
175 { "egdfile", &opt.egd_file, cmd_file },
177 { "excludedirectories", &opt.excludes, cmd_directory_vector },
178 { "excludedomains", &opt.exclude_domains, cmd_vector },
179 { "followftp", &opt.follow_ftp, cmd_boolean },
180 { "followtags", &opt.follow_tags, cmd_vector },
181 { "forcehtml", &opt.force_html, cmd_boolean },
182 { "ftppasswd", &opt.ftp_passwd, cmd_string }, /* deprecated */
183 { "ftppassword", &opt.ftp_passwd, cmd_string },
184 { "ftpproxy", &opt.ftp_proxy, cmd_string },
186 { "ftpstmlf", &opt.ftp_stmlf, cmd_boolean },
187 #endif /* def __VMS */
188 { "ftpuser", &opt.ftp_user, cmd_string },
189 { "glob", &opt.ftp_glob, cmd_boolean },
190 { "header", NULL, cmd_spec_header },
191 { "htmlextension", &opt.adjust_extension, cmd_boolean }, /* deprecated */
192 { "htmlify", NULL, cmd_spec_htmlify },
193 { "httpkeepalive", &opt.http_keep_alive, cmd_boolean },
194 { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
195 { "httppassword", &opt.http_passwd, cmd_string },
196 { "httpproxy", &opt.http_proxy, cmd_string },
197 { "httpsproxy", &opt.https_proxy, cmd_string },
198 { "httpuser", &opt.http_user, cmd_string },
199 { "ignorecase", &opt.ignore_case, cmd_boolean },
200 { "ignorelength", &opt.ignore_length, cmd_boolean },
201 { "ignoretags", &opt.ignore_tags, cmd_vector },
202 { "includedirectories", &opt.includes, cmd_directory_vector },
204 { "inet4only", &opt.ipv4_only, cmd_boolean },
205 { "inet6only", &opt.ipv6_only, cmd_boolean },
207 { "input", &opt.input_filename, cmd_file },
208 { "iri", &opt.enable_iri, cmd_boolean },
209 { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
210 { "limitrate", &opt.limit_rate, cmd_bytes },
211 { "loadcookies", &opt.cookies_input, cmd_file },
212 { "localencoding", &opt.locale, cmd_string },
213 { "logfile", &opt.lfilename, cmd_file },
214 { "login", &opt.ftp_user, cmd_string },/* deprecated*/
215 { "maxredirect", &opt.max_redirect, cmd_number },
216 { "method", &opt.method, cmd_string_uppercase },
217 { "mirror", NULL, cmd_spec_mirror },
218 { "netrc", &opt.netrc, cmd_boolean },
219 { "noclobber", &opt.noclobber, cmd_boolean },
220 { "noparent", &opt.no_parent, cmd_boolean },
221 { "noproxy", &opt.no_proxy, cmd_vector },
222 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
223 { "outputdocument", &opt.output_document, cmd_file },
224 { "pagerequisites", &opt.page_requisites, cmd_boolean },
225 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
226 { "passwd", &opt.ftp_passwd, cmd_string },/* deprecated*/
227 { "password", &opt.passwd, cmd_string },
228 { "postdata", &opt.post_data, cmd_string },
229 { "postfile", &opt.post_file_name, cmd_file },
230 { "preferfamily", NULL, cmd_spec_prefer_family },
231 { "preservepermissions", &opt.preserve_perm, cmd_boolean },
233 { "privatekey", &opt.private_key, cmd_file },
234 { "privatekeytype", &opt.private_key_type, cmd_cert_type },
236 { "progress", &opt.progress_type, cmd_spec_progress },
237 { "protocoldirectories", &opt.protocol_directories, cmd_boolean },
238 { "proxypasswd", &opt.proxy_passwd, cmd_string }, /* deprecated */
239 { "proxypassword", &opt.proxy_passwd, cmd_string },
240 { "proxyuser", &opt.proxy_user, cmd_string },
241 { "quiet", &opt.quiet, cmd_boolean },
242 { "quota", &opt.quota, cmd_bytes_sum },
244 { "randomfile", &opt.random_file, cmd_file },
246 { "randomwait", &opt.random_wait, cmd_boolean },
247 { "readtimeout", &opt.read_timeout, cmd_time },
248 { "reclevel", &opt.reclevel, cmd_number_inf },
249 { "recursive", NULL, cmd_spec_recursive },
250 { "referer", &opt.referer, cmd_string },
251 { "regextype", &opt.regex_type, cmd_spec_regex_type },
252 { "reject", &opt.rejects, cmd_vector },
253 { "rejectregex", &opt.rejectregex_s, cmd_string },
254 { "relativeonly", &opt.relative_only, cmd_boolean },
255 { "remoteencoding", &opt.encoding_remote, cmd_string },
256 { "removelisting", &opt.remove_listing, cmd_boolean },
257 { "reportspeed", &opt.report_bps, cmd_spec_report_speed},
258 { "restrictfilenames", NULL, cmd_spec_restrict_file_names },
259 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
260 { "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
261 { "robots", &opt.use_robots, cmd_boolean },
262 { "savecookies", &opt.cookies_output, cmd_file },
263 { "saveheaders", &opt.save_headers, cmd_boolean },
265 { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol },
267 { "serverresponse", &opt.server_response, cmd_boolean },
268 { "showalldnsentries", &opt.show_all_dns_entries, cmd_boolean },
269 { "spanhosts", &opt.spanhost, cmd_boolean },
270 { "spider", &opt.spider, cmd_boolean },
271 { "strictcomments", &opt.strict_comments, cmd_boolean },
272 { "timeout", NULL, cmd_spec_timeout },
273 { "timestamping", &opt.timestamping, cmd_boolean },
274 { "tries", &opt.ntry, cmd_number_inf },
275 { "trustservernames", &opt.trustservernames, cmd_boolean },
276 { "unlink", &opt.unlink, cmd_boolean },
277 { "useproxy", &opt.use_proxy, cmd_boolean },
278 { "user", &opt.user, cmd_string },
279 { "useragent", NULL, cmd_spec_useragent },
280 { "useservertimestamps", &opt.useservertimestamps, cmd_boolean },
281 { "verbose", NULL, cmd_spec_verbose },
282 { "wait", &opt.wait, cmd_time },
283 { "waitretry", &opt.waitretry, cmd_time },
284 { "warccdx", &opt.warc_cdx_enabled, cmd_boolean },
285 { "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file },
287 { "warccompression", &opt.warc_compression_enabled, cmd_boolean },
289 { "warcdigests", &opt.warc_digests_enabled, cmd_boolean },
290 { "warcfile", &opt.warc_filename, cmd_file },
291 { "warcheader", NULL, cmd_spec_warc_header },
292 { "warckeeplog", &opt.warc_keep_log, cmd_boolean },
293 { "warcmaxsize", &opt.warc_maxsize, cmd_bytes },
294 { "warctempdir", &opt.warc_tempdir, cmd_directory },
296 { "wdebug", &opt.wdebug, cmd_boolean },
300 /* Look up CMDNAME in the commands[] and return its position in the
301 array. If CMDNAME is not found, return -1. */
304 command_by_name (const char *cmdname)
306 /* Use binary search for speed. Wget has ~100 commands, which
307 guarantees a worst case performance of 7 string comparisons. */
308 int lo = 0, hi = countof (commands) - 1;
312 int mid = (lo + hi) >> 1;
313 int cmp = strcasecmp (cmdname, commands[mid].name);
324 /* Reset the variables to default values. */
330 /* Most of the default values are 0 (and 0.0, NULL, and false).
331 Just reset everything, and fill in the non-zero values. Note
332 that initializing pointers to NULL this way is technically
333 illegal, but porting Wget to a machine where NULL is not all-zero
334 bit pattern will be the least of the implementors' worries. */
341 opt.add_hostdir = true;
345 opt.http_keep_alive = true;
346 opt.use_proxy = true;
347 tmp = getenv ("no_proxy");
349 opt.no_proxy = sepstring (tmp);
350 opt.prefer_family = prefer_none;
351 opt.allow_cache = true;
353 opt.read_timeout = 900;
354 opt.use_robots = true;
356 opt.remove_listing = true;
358 opt.dot_bytes = 1024;
359 opt.dot_spacing = 10;
360 opt.dots_in_line = 50;
362 opt.dns_cache = true;
366 opt.check_cert = true;
369 /* The default for file name restriction defaults to the OS type. */
370 #if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__)
371 opt.restrict_files_os = restrict_windows;
373 opt.restrict_files_os = restrict_unix;
375 opt.restrict_files_ctrl = true;
376 opt.restrict_files_nonascii = false;
377 opt.restrict_files_case = restrict_no_case_restriction;
379 opt.regex_type = regex_type_posix;
381 opt.max_redirect = 20;
386 opt.enable_iri = true;
388 opt.enable_iri = false;
391 opt.encoding_remote = NULL;
393 opt.useservertimestamps = true;
394 opt.show_all_dns_entries = false;
396 opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */
398 opt.warc_compression_enabled = true;
400 opt.warc_compression_enabled = false;
402 opt.warc_digests_enabled = true;
403 opt.warc_cdx_enabled = false;
404 opt.warc_cdx_dedup_filename = NULL;
405 opt.warc_tempdir = NULL;
406 opt.warc_keep_log = true;
409 /* Return the user's home directory (strdup-ed), or NULL if none is
414 static char *buf = NULL;
415 static char *home, *ret;
419 home = getenv ("HOME");
425 /* Under MSDOS, if $HOME isn't defined, use the directory where
426 `wget.exe' resides. */
427 const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */
430 buff = _w32_get_argv0 ();
432 p = strrchr (buf, '/'); /* djgpp */
434 p = strrchr (buf, '\\'); /* others */
438 buff = malloc (len + 1);
442 strncpy (buff, _w32_get_argv0 (), len);
446 #elif !defined(WINDOWS)
447 /* If HOME is not defined, try getting it from the password
449 struct passwd *pwd = getpwuid (getuid ());
450 if (!pwd || !pwd->pw_dir)
454 /* Under Windows, if $HOME isn't defined, use the directory where
455 `wget.exe' resides. */
461 ret = home ? xstrdup (home) : NULL;
468 /* Check the 'WGETRC' environment variable and return the file name
469 if 'WGETRC' is set and is a valid file.
470 If the `WGETRC' variable exists but the file does not exist, the
471 function will exit(). */
473 wgetrc_env_file_name (void)
475 char *env = getenv ("WGETRC");
478 if (!file_exists_p (env))
480 fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"),
484 return xstrdup (env);
489 /* Check for the existance of '$HOME/.wgetrc' and return its path
490 if it exists and is set. */
492 wgetrc_user_file_name (void)
496 /* If that failed, try $HOME/.wgetrc (or equivalent). */
499 file = "SYS$LOGIN:.wgetrc";
500 #else /* def __VMS */
503 file = aprintf ("%s/.wgetrc", home);
505 #endif /* def __VMS [else] */
509 if (!file_exists_p (file))
517 /* Return the path to the user's .wgetrc. This is either the value of
518 `WGETRC' environment variable, or `$HOME/.wgetrc'.
520 Additionally, for windows, look in the directory where wget.exe
523 wgetrc_file_name (void)
525 char *file = wgetrc_env_file_name ();
529 file = wgetrc_user_file_name ();
532 /* Under Windows, if we still haven't found .wgetrc, look for the file
533 `wget.ini' in the directory where `wget.exe' resides; we do this for
534 backward compatibility with previous versions of Wget.
535 SYSTEM_WGETRC should not be defined under WINDOWS. */
538 char *home = home_dir ();
544 file = aprintf ("%s/wget.ini", home);
545 if (!file_exists_p (file))
558 /* Return values of parse_line. */
566 static enum parse_line parse_line (const char *, char **, char **, int *);
567 static bool setval_internal (int, const char *, const char *);
568 static bool setval_internal_tilde (int, const char *, const char *);
570 /* Initialize variables from a wgetrc file. Returns zero (failure) if
571 there were errors in the file. */
574 run_wgetrc (const char *file)
581 fp = fopen (file, "r");
584 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
585 file, strerror (errno));
586 return true; /* not a fatal error */
589 while ((line = read_whole_line (fp)) != NULL)
591 char *com = NULL, *val = NULL;
594 /* Parse the line. */
595 switch (parse_line (line, &com, &val, &comind))
598 /* If everything is OK, set the value. */
599 if (!setval_internal_tilde (comind, com, val))
601 fprintf (stderr, _("%s: Error in %s at line %d.\n"),
602 exec_name, file, ln);
606 case line_syntax_error:
607 fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"),
608 exec_name, file, ln);
611 case line_unknown_command:
612 fprintf (stderr, _("%s: Unknown command %s in %s at line %d.\n"),
613 exec_name, quote (com), file, ln);
631 /* Initialize the defaults and run the system wgetrc and user's own
636 char *file, *env_sysrc;
639 /* Run a non-standard system rc file when the according environment
640 variable has been set. For internal testing purposes only! */
641 env_sysrc = getenv ("SYSTEM_WGETRC");
642 if (env_sysrc && file_exists_p (env_sysrc))
644 ok &= run_wgetrc (env_sysrc);
645 /* If there are any problems parsing the system wgetrc file, tell
649 fprintf (stderr, _("\
650 Parsing system wgetrc file (env SYSTEM_WGETRC) failed. Please check\n\
652 or specify a different file using --config.\n"), env_sysrc);
656 /* Otherwise, if SYSTEM_WGETRC is defined, use it. */
658 else if (file_exists_p (SYSTEM_WGETRC))
659 ok &= run_wgetrc (SYSTEM_WGETRC);
660 /* If there are any problems parsing the system wgetrc file, tell
664 fprintf (stderr, _("\
665 Parsing system wgetrc file failed. Please check\n\
667 or specify a different file using --config.\n"), SYSTEM_WGETRC);
671 /* Override it with your own, if one exists. */
672 file = wgetrc_file_name ();
675 /* #### We should canonicalize `file' and SYSTEM_WGETRC with
676 something like realpath() before comparing them with `strcmp' */
678 if (!strcmp (file, SYSTEM_WGETRC))
680 fprintf (stderr, _("\
681 %s: Warning: Both system and user wgetrc point to %s.\n"),
682 exec_name, quote (file));
686 ok &= run_wgetrc (file);
688 /* If there were errors processing either `.wgetrc', abort. */
696 /* Remove dashes and underscores from S, modifying S in the
702 char *t = s; /* t - tortoise */
703 char *h = s; /* h - hare */
705 if (*h == '_' || *h == '-')
712 /* Parse the line pointed by line, with the syntax:
713 <sp>* command <sp>* = <sp>* value <sp>*
714 Uses malloc to allocate space for command and value.
716 Returns one of line_ok, line_empty, line_syntax_error, or
717 line_unknown_command.
719 In case of line_ok, *COM and *VAL point to freshly allocated
720 strings, and *COMIND points to com's index. In case of error or
721 empty line, their values are unmodified. */
723 static enum parse_line
724 parse_line (const char *line, char **com, char **val, int *comind)
727 const char *end = line + strlen (line);
728 const char *cmdstart, *cmdend;
729 const char *valstart, *valend;
734 /* Skip leading and trailing whitespace. */
735 while (*line && c_isspace (*line))
737 while (end > line && c_isspace (end[-1]))
740 /* Skip empty lines and comments. */
741 if (!*line || *line == '#')
747 while (p < end && (c_isalnum (*p) || *p == '_' || *p == '-'))
751 /* Skip '=', as well as any space before or after it. */
752 while (p < end && c_isspace (*p))
754 if (p == end || *p != '=')
755 return line_syntax_error;
757 while (p < end && c_isspace (*p))
763 /* The syntax is valid (even though the command might not be). Fill
764 in the command name and value. */
765 *com = strdupdelim (cmdstart, cmdend);
766 *val = strdupdelim (valstart, valend);
768 /* The line now known to be syntactically correct. Check whether
769 the command is valid. */
770 BOUNDED_TO_ALLOCA (cmdstart, cmdend, cmdcopy);
772 ind = command_by_name (cmdcopy);
774 return line_unknown_command;
776 /* Report success to the caller. */
781 #if defined(WINDOWS) || defined(MSDOS)
782 # define ISSEP(c) ((c) == '/' || (c) == '\\')
784 # define ISSEP(c) ((c) == '/')
787 /* Run commands[comind].action. */
790 setval_internal (int comind, const char *com, const char *val)
792 assert (0 <= comind && ((size_t) comind) < countof (commands));
793 DEBUGP (("Setting %s (%s) to %s\n", com, commands[comind].name, val));
794 return commands[comind].action (com, val, commands[comind].place);
798 setval_internal_tilde (int comind, const char *com, const char *val)
804 ret = setval_internal (comind, com, val);
806 /* We make tilde expansion for cmd_file and cmd_directory */
807 if (((commands[comind].action == cmd_file) ||
808 (commands[comind].action == cmd_directory))
809 && ret && (*val == '~' && ISSEP (val[1])))
811 pstring = commands[comind].place;
815 homelen = strlen (home);
816 while (homelen && ISSEP (home[homelen - 1]))
817 home[--homelen] = '\0';
819 /* Skip the leading "~/". */
820 for (++val; ISSEP (*val); val++)
822 *pstring = concat_strings (home, "/", val, (char *)0);
828 /* Run command COM with value VAL. If running the command produces an
829 error, report the error and exit.
831 This is intended to be called from main() to modify Wget's behavior
832 through command-line switches. Since COM is hard-coded in main(),
833 it is not canonicalized, and this aborts when COM is not found.
835 If COMIND's are exported to init.h, this function will be changed
836 to accept COMIND directly. */
839 setoptval (const char *com, const char *val, const char *optname)
841 /* Prepend "--" to OPTNAME. */
842 char *dd_optname = (char *) alloca (2 + strlen (optname) + 1);
845 strcpy (dd_optname + 2, optname);
847 assert (val != NULL);
848 if (!setval_internal (command_by_name (com), dd_optname, val))
852 /* Parse OPT into command and value and run it. For example,
853 run_command("foo=bar") is equivalent to setoptval("foo", "bar").
854 This is used by the `--execute' flag in main.c. */
857 run_command (const char *opt)
861 switch (parse_line (opt, &com, &val, &comind))
864 if (!setval_internal (comind, com, val))
870 fprintf (stderr, _("%s: Invalid --execute command %s\n"),
871 exec_name, quote (opt));
876 /* Generic helper functions, for use with `commands'. */
878 /* Forward declarations: */
883 static bool decode_string (const char *, const struct decode_item *, int, int *);
884 static bool simple_atoi (const char *, const char *, int *);
885 static bool simple_atof (const char *, const char *, double *);
887 #define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0')
889 #define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \
890 && c_tolower((p)[1]) == (c1) \
893 #define CMP3(p, c0, c1, c2) (c_tolower((p)[0]) == (c0) \
894 && c_tolower((p)[1]) == (c1) \
895 && c_tolower((p)[2]) == (c2) \
899 /* Store the boolean value from VAL to PLACE. COM is ignored,
900 except for error messages. */
902 cmd_boolean (const char *com, const char *val, void *place)
906 if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1'))
907 /* "on", "yes" and "1" mean true. */
909 else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0'))
910 /* "off", "no" and "0" mean false. */
915 _("%s: %s: Invalid boolean %s; use `on' or `off'.\n"),
916 exec_name, com, quote (val));
920 *(bool *) place = value;
924 /* Set the non-negative integer value from VAL to PLACE. With
925 incorrect specification, the number remains unchanged. */
927 cmd_number (const char *com, const char *val, void *place)
929 if (!simple_atoi (val, val + strlen (val), place)
930 || *(int *) place < 0)
932 fprintf (stderr, _("%s: %s: Invalid number %s.\n"),
933 exec_name, com, quote (val));
939 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
941 cmd_number_inf (const char *com, const char *val, void *place)
943 if (!strcasecmp (val, "inf"))
948 return cmd_number (com, val, place);
951 /* Copy (strdup) the string at COM to a new location and place a
952 pointer to *PLACE. */
954 cmd_string (const char *com, const char *val, void *place)
956 char **pstring = (char **)place;
958 xfree_null (*pstring);
959 *pstring = xstrdup (val);
963 /* Like cmd_string but ensure the string is upper case. */
965 cmd_string_uppercase (const char *com, const char *val, void *place)
968 bool ret = cmd_string (com, val, place);
969 q = *((char **) place);
970 if (!ret || q == NULL)
980 /* Like cmd_string, but handles tilde-expansion when reading a user's
981 `.wgetrc'. In that case, and if VAL begins with `~', the tilde
982 gets expanded to the user's home directory. */
984 cmd_file (const char *com, const char *val, void *place)
986 char **pstring = (char **)place;
988 xfree_null (*pstring);
990 /* #### If VAL is empty, perhaps should set *PLACE to NULL. */
992 *pstring = xstrdup (val);
994 #if defined(WINDOWS) || defined(MSDOS)
995 /* Convert "\" to "/". */
998 for (s = *pstring; *s; s++)
1006 /* Like cmd_file, but strips trailing '/' characters. */
1008 cmd_directory (const char *com, const char *val, void *place)
1012 /* Call cmd_file() for tilde expansion and separator
1013 canonicalization (backslash -> slash under Windows). These
1014 things should perhaps be in a separate function. */
1015 if (!cmd_file (com, val, place))
1018 s = *(char **)place;
1020 while (t > s && *--t == '/')
1026 /* Split VAL by space to a vector of values, and append those values
1027 to vector pointed to by the PLACE argument. If VAL is empty, the
1028 PLACE vector is cleared instead. */
1031 cmd_vector (const char *com, const char *val, void *place)
1033 char ***pvec = (char ***)place;
1036 *pvec = merge_vecs (*pvec, sepstring (val));
1046 cmd_directory_vector (const char *com, const char *val, void *place)
1048 char ***pvec = (char ***)place;
1052 /* Strip the trailing slashes from directories. */
1055 seps = sepstring (val);
1056 for (t = seps; t && *t; t++)
1058 int len = strlen (*t);
1059 /* Skip degenerate case of root directory. */
1062 if ((*t)[len - 1] == '/')
1063 (*t)[len - 1] = '\0';
1066 *pvec = merge_vecs (*pvec, seps);
1076 /* Engine for cmd_bytes and cmd_bytes_sum: converts a string such as
1077 "100k" or "2.5G" to a floating point number. */
1080 parse_bytes_helper (const char *val, double *result)
1082 double number, mult;
1083 const char *end = val + strlen (val);
1085 /* Check for "inf". */
1086 if (0 == strcmp (val, "inf"))
1092 /* Strip trailing whitespace. */
1093 while (val < end && c_isspace (end[-1]))
1098 switch (c_tolower (end[-1]))
1101 --end, mult = 1024.0;
1104 --end, mult = 1048576.0;
1107 --end, mult = 1073741824.0;
1110 --end, mult = 1099511627776.0;
1113 /* Not a recognized suffix: assume it's a digit. (If not,
1114 simple_atof will raise an error.) */
1118 /* Skip leading and trailing whitespace. */
1119 while (val < end && c_isspace (*val))
1121 while (val < end && c_isspace (end[-1]))
1126 if (!simple_atof (val, end, &number) || number < 0)
1129 *result = number * mult;
1133 /* Parse VAL as a number and set its value to PLACE (which should
1136 By default, the value is assumed to be in bytes. If "K", "M", or
1137 "G" are appended, the value is multiplied with 1<<10, 1<<20, or
1138 1<<30, respectively. Floating point values are allowed and are
1139 cast to integer before use. The idea is to be able to use things
1140 like 1.5k instead of "1536".
1142 The string "inf" is returned as 0.
1144 In case of error, false is returned and memory pointed to by PLACE
1145 remains unmodified. */
1148 cmd_bytes (const char *com, const char *val, void *place)
1151 if (!parse_bytes_helper (val, &byte_value))
1153 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1154 exec_name, com, quote (val));
1157 *(wgint *)place = (wgint)byte_value;
1161 /* Like cmd_bytes, but PLACE is interpreted as a pointer to
1162 SIZE_SUM. It works by converting the string to double, therefore
1163 working with values up to 2^53-1 without loss of precision. This
1164 value (8192 TB) is large enough to serve for a while. */
1167 cmd_bytes_sum (const char *com, const char *val, void *place)
1170 if (!parse_bytes_helper (val, &byte_value))
1172 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1173 exec_name, com, quote (val));
1176 *(SUM_SIZE_INT *) place = (SUM_SIZE_INT) byte_value;
1180 /* Store the value of VAL to *OUT. The value is a time period, by
1181 default expressed in seconds, but also accepting suffixes "m", "h",
1182 "d", and "w" for minutes, hours, days, and weeks respectively. */
1185 cmd_time (const char *com, const char *val, void *place)
1187 double number, mult;
1188 const char *end = val + strlen (val);
1190 /* Strip trailing whitespace. */
1191 while (val < end && c_isspace (end[-1]))
1197 fprintf (stderr, _("%s: %s: Invalid time period %s\n"),
1198 exec_name, com, quote (val));
1202 switch (c_tolower (end[-1]))
1205 --end, mult = 1; /* seconds */
1208 --end, mult = 60; /* minutes */
1211 --end, mult = 3600; /* hours */
1214 --end, mult = 86400.0; /* days */
1217 --end, mult = 604800.0; /* weeks */
1220 /* Not a recognized suffix: assume it belongs to the number.
1221 (If not, simple_atof will raise an error.) */
1225 /* Skip leading and trailing whitespace. */
1226 while (val < end && c_isspace (*val))
1228 while (val < end && c_isspace (end[-1]))
1233 if (!simple_atof (val, end, &number))
1236 *(double *)place = number * mult;
1242 cmd_cert_type (const char *com, const char *val, void *place)
1244 static const struct decode_item choices[] = {
1245 { "pem", keyfile_pem },
1246 { "der", keyfile_asn1 },
1247 { "asn1", keyfile_asn1 },
1249 int ok = decode_string (val, choices, countof (choices), place);
1251 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1256 /* Specialized helper functions, used by `commands' to handle some
1257 options specially. */
1259 static bool check_user_specified_header (const char *);
1262 cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored)
1264 if (!cmd_boolean (com, val, &opt.dirstruct))
1266 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
1267 must be affected inversely. */
1269 opt.no_dirstruct = false;
1271 opt.no_dirstruct = true;
1276 cmd_spec_header (const char *com, const char *val, void *place_ignored)
1278 /* Empty value means reset the list of headers. */
1281 free_vec (opt.user_headers);
1282 opt.user_headers = NULL;
1286 if (!check_user_specified_header (val))
1288 fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
1289 exec_name, com, quote (val));
1292 opt.user_headers = vec_append (opt.user_headers, val);
1297 cmd_spec_warc_header (const char *com, const char *val, void *place_ignored)
1299 /* Empty value means reset the list of headers. */
1302 free_vec (opt.warc_user_headers);
1303 opt.warc_user_headers = NULL;
1307 if (!check_user_specified_header (val))
1309 fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"),
1310 exec_name, com, quote (val));
1313 opt.warc_user_headers = vec_append (opt.warc_user_headers, val);
1318 cmd_spec_htmlify (const char *com, const char *val, void *place_ignored)
1320 int flag = cmd_boolean (com, val, &opt.htmlify);
1321 if (flag && !opt.htmlify)
1322 opt.remove_listing = false;
1326 /* Set the "mirror" mode. It means: recursive download, timestamping,
1327 no limit on max. recursion depth, and don't remove listings. */
1330 cmd_spec_mirror (const char *com, const char *val, void *place_ignored)
1334 if (!cmd_boolean (com, val, &mirror))
1338 opt.recursive = true;
1339 if (!opt.no_dirstruct)
1340 opt.dirstruct = true;
1341 opt.timestamping = true;
1342 opt.reclevel = INFINITE_RECURSION;
1343 opt.remove_listing = false;
1348 /* Validate --prefer-family and set the choice. Allowed values are
1349 "IPv4", "IPv6", and "none". */
1352 cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored)
1354 static const struct decode_item choices[] = {
1355 { "IPv4", prefer_ipv4 },
1356 { "IPv6", prefer_ipv6 },
1357 { "none", prefer_none },
1359 int prefer_family = prefer_none;
1360 int ok = decode_string (val, choices, countof (choices), &prefer_family);
1362 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1363 opt.prefer_family = prefer_family;
1367 /* Set progress.type to VAL, but verify that it's a valid progress
1368 implementation before that. */
1371 cmd_spec_progress (const char *com, const char *val, void *place_ignored)
1373 if (!valid_progress_implementation_p (val))
1375 fprintf (stderr, _("%s: %s: Invalid progress type %s.\n"),
1376 exec_name, com, quote (val));
1379 xfree_null (opt.progress_type);
1381 /* Don't call set_progress_implementation here. It will be called
1382 in main() when it becomes clear what the log output is. */
1383 opt.progress_type = xstrdup (val);
1387 /* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is
1388 set to true, also set opt.dirstruct to true, unless opt.no_dirstruct
1392 cmd_spec_recursive (const char *com, const char *val, void *place_ignored)
1394 if (!cmd_boolean (com, val, &opt.recursive))
1398 if (opt.recursive && !opt.no_dirstruct)
1399 opt.dirstruct = true;
1404 /* Validate --regex-type and set the choice. */
1407 cmd_spec_regex_type (const char *com, const char *val, void *place_ignored)
1409 static const struct decode_item choices[] = {
1410 { "posix", regex_type_posix },
1412 { "pcre", regex_type_pcre },
1415 int regex_type = regex_type_posix;
1416 int ok = decode_string (val, choices, countof (choices), ®ex_type);
1418 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1419 opt.regex_type = regex_type;
1424 cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored)
1426 int restrict_os = opt.restrict_files_os;
1427 int restrict_ctrl = opt.restrict_files_ctrl;
1428 int restrict_case = opt.restrict_files_case;
1429 int restrict_nonascii = opt.restrict_files_nonascii;
1433 #define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal)
1437 end = strchr (val, ',');
1439 end = val + strlen (val);
1441 if (VAL_IS ("unix"))
1442 restrict_os = restrict_unix;
1443 else if (VAL_IS ("windows"))
1444 restrict_os = restrict_windows;
1445 else if (VAL_IS ("lowercase"))
1446 restrict_case = restrict_lowercase;
1447 else if (VAL_IS ("uppercase"))
1448 restrict_case = restrict_uppercase;
1449 else if (VAL_IS ("nocontrol"))
1450 restrict_ctrl = false;
1451 else if (VAL_IS ("ascii"))
1452 restrict_nonascii = true;
1455 fprintf (stderr, _("\
1456 %s: %s: Invalid restriction %s,\n\
1457 use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"),
1458 exec_name, com, quote (val));
1465 while (*val && *end);
1469 opt.restrict_files_os = restrict_os;
1470 opt.restrict_files_ctrl = restrict_ctrl;
1471 opt.restrict_files_case = restrict_case;
1472 opt.restrict_files_nonascii = restrict_nonascii;
1478 cmd_spec_report_speed (const char *com, const char *val, void *place_ignored)
1480 opt.report_bps = strcasecmp (val, "bits") == 0;
1481 if (!opt.report_bps)
1482 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1483 return opt.report_bps;
1488 cmd_spec_secure_protocol (const char *com, const char *val, void *place)
1490 static const struct decode_item choices[] = {
1491 { "auto", secure_protocol_auto },
1492 { "sslv2", secure_protocol_sslv2 },
1493 { "sslv3", secure_protocol_sslv3 },
1494 { "tlsv1", secure_protocol_tlsv1 },
1496 int ok = decode_string (val, choices, countof (choices), place);
1498 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1503 /* Set all three timeout values. */
1506 cmd_spec_timeout (const char *com, const char *val, void *place_ignored)
1509 if (!cmd_time (com, val, &value))
1511 opt.read_timeout = value;
1512 opt.connect_timeout = value;
1513 opt.dns_timeout = value;
1518 cmd_spec_useragent (const char *com, const char *val, void *place_ignored)
1520 /* Disallow embedded newlines. */
1521 if (strchr (val, '\n'))
1523 fprintf (stderr, _("%s: %s: Invalid value %s.\n"),
1524 exec_name, com, quote (val));
1527 xfree_null (opt.useragent);
1528 opt.useragent = xstrdup (val);
1532 /* The "verbose" option cannot be cmd_boolean because the variable is
1533 not bool -- it's of type int (-1 means uninitialized because of
1534 some random hackery for disallowing -q -v). */
1537 cmd_spec_verbose (const char *com, const char *val, void *place_ignored)
1540 if (cmd_boolean (com, val, &flag))
1548 /* Miscellaneous useful routines. */
1550 /* A very simple atoi clone, more useful than atoi because it works on
1551 delimited strings, and has error reportage. Returns true on success,
1552 false on failure. If successful, stores result to *DEST. */
1555 simple_atoi (const char *beg, const char *end, int *dest)
1558 bool negative = false;
1559 const char *p = beg;
1561 while (p < end && c_isspace (*p))
1563 if (p < end && (*p == '-' || *p == '+'))
1565 negative = (*p == '-');
1571 /* Read negative numbers in a separate loop because the most
1572 negative integer cannot be represented as a positive number. */
1575 for (; p < end && c_isdigit (*p); p++)
1577 int next = (10 * result) + (*p - '0');
1579 return false; /* overflow */
1583 for (; p < end && c_isdigit (*p); p++)
1585 int next = (10 * result) - (*p - '0');
1587 return false; /* underflow */
1598 /* Trivial atof, with error reporting. Handles "<digits>[.<digits>]",
1599 doesn't handle exponential notation. Returns true on success,
1600 false on failure. In case of success, stores its result to
1604 simple_atof (const char *beg, const char *end, double *dest)
1608 bool negative = false;
1609 bool seen_dot = false;
1610 bool seen_digit = false;
1613 const char *p = beg;
1615 while (p < end && c_isspace (*p))
1617 if (p < end && (*p == '-' || *p == '+'))
1619 negative = (*p == '-');
1623 for (; p < end; p++)
1629 result = (10 * result) + (ch - '0');
1631 result += (ch - '0') / (divider *= 10);
1653 /* Verify that the user-specified header in S is valid. It must
1654 contain a colon preceded by non-white-space characters and must not
1655 contain newlines. */
1658 check_user_specified_header (const char *s)
1662 for (p = s; *p && *p != ':' && !c_isspace (*p); p++)
1664 /* The header MUST contain `:' preceded by at least one
1665 non-whitespace character. */
1666 if (*p != ':' || p == s)
1668 /* The header MUST NOT contain newlines. */
1669 if (strchr (s, '\n'))
1674 /* Decode VAL into a number, according to ITEMS. */
1677 decode_string (const char *val, const struct decode_item *items, int itemcount,
1681 for (i = 0; i < itemcount; i++)
1682 if (0 == strcasecmp (val, items[i].name))
1684 *place = items[i].code;
1691 void cleanup_html_url (void);
1692 void spider_cleanup (void);
1695 /* Free the memory allocated by global variables. */
1699 /* Free external resources, close files, etc. */
1701 /* Close WARC file. */
1702 if (opt.warc_filename != 0)
1708 if (fclose (output_stream) == EOF)
1709 inform_exit_status (CLOSEFAILED);
1711 /* No need to check for error because Wget flushes its output (and
1712 checks for errors) after any data arrives. */
1714 /* We're exiting anyway so there's no real need to call free()
1715 hundreds of times. Skipping the frees will make Wget exit
1718 However, when detecting leaks, it's crucial to free() everything
1719 because then you can find the real leaks, i.e. the allocated
1720 memory which grows with the size of the program. */
1726 cleanup_html_url ();
1731 for (i = 0; i < nurl; i++)
1735 extern acc_t *netrc_list;
1736 free_netrc (netrc_list);
1738 xfree_null (opt.choose_config);
1739 xfree_null (opt.lfilename);
1740 xfree_null (opt.dir_prefix);
1741 xfree_null (opt.input_filename);
1742 xfree_null (opt.output_document);
1743 free_vec (opt.accepts);
1744 free_vec (opt.rejects);
1745 free_vec (opt.excludes);
1746 free_vec (opt.includes);
1747 free_vec (opt.domains);
1748 free_vec (opt.follow_tags);
1749 free_vec (opt.ignore_tags);
1750 xfree_null (opt.progress_type);
1751 xfree_null (opt.ftp_user);
1752 xfree_null (opt.ftp_passwd);
1753 xfree_null (opt.ftp_proxy);
1754 xfree_null (opt.https_proxy);
1755 xfree_null (opt.http_proxy);
1756 free_vec (opt.no_proxy);
1757 xfree_null (opt.useragent);
1758 xfree_null (opt.referer);
1759 xfree_null (opt.http_user);
1760 xfree_null (opt.http_passwd);
1761 free_vec (opt.user_headers);
1762 free_vec (opt.warc_user_headers);
1764 xfree_null (opt.cert_file);
1765 xfree_null (opt.private_key);
1766 xfree_null (opt.ca_directory);
1767 xfree_null (opt.ca_cert);
1768 xfree_null (opt.random_file);
1769 xfree_null (opt.egd_file);
1771 xfree_null (opt.bind_address);
1772 xfree_null (opt.cookies_input);
1773 xfree_null (opt.cookies_output);
1774 xfree_null (opt.user);
1775 xfree_null (opt.passwd);
1776 xfree_null (opt.base_href);
1777 xfree_null (opt.method);
1779 #endif /* DEBUG_MALLOC */
1782 /* Unit testing routines. */
1787 test_commands_sorted()
1789 int prev_idx = 0, next_idx = 1;
1790 int command_count = countof (commands) - 1;
1792 while (next_idx <= command_count)
1794 cmp = strcasecmp (commands[prev_idx].name, commands[next_idx].name);
1797 mu_assert ("FAILED", false);
1810 test_cmd_spec_restrict_file_names()
1815 int expected_restrict_files_os;
1816 int expected_restrict_files_ctrl;
1817 int expected_restrict_files_case;
1820 { "windows", restrict_windows, true, restrict_no_case_restriction, true },
1821 { "windows,", restrict_windows, true, restrict_no_case_restriction, true },
1822 { "windows,lowercase", restrict_windows, true, restrict_lowercase, true },
1823 { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true },
1826 for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
1831 res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL);
1834 fprintf (stderr, "test_cmd_spec_restrict_file_names: TEST %d\n", i); fflush (stderr);
1835 fprintf (stderr, "opt.restrict_files_os: %d\n", opt.restrict_files_os); fflush (stderr);
1836 fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr);
1837 fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr);
1839 mu_assert ("test_cmd_spec_restrict_file_names: wrong result",
1840 res == test_array[i].result
1841 && opt.restrict_files_os == test_array[i].expected_restrict_files_os
1842 && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl
1843 && opt.restrict_files_case == test_array[i].expected_restrict_files_case);
1849 #endif /* TESTING */