1 /* Reading/parsing the initialization file.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
6 This file is part of GNU Wget.
8 GNU Wget is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 GNU Wget is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with Wget. If not, see <http://www.gnu.org/licenses/>.
21 Additional permission under GNU GPL version 3 section 7
23 If you modify this program, or any covered work, by linking or
24 combining it with the OpenSSL project's OpenSSL library (or a
25 modified version of that library), containing parts covered by the
26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
27 grants you additional permission to convey the resulting work.
28 Corresponding Source for a non-source form of such a combination
29 shall include the source code for the parts of OpenSSL used as well
30 as that of the covered work. */
42 /* not all systems provide PATH_MAX in limits.h */
44 # include <sys/param.h>
46 # define PATH_MAX MAXPATHLEN
65 #include "recur.h" /* for INFINITE_RECURSION */
66 #include "convert.h" /* for convert_cleanup */
67 #include "res.h" /* for res_cleanup */
68 #include "http.h" /* for http_cleanup */
69 #include "retr.h" /* for output_stream */
70 #include "warc.h" /* for warc_close */
71 #include "spider.h" /* for spider_cleanup */
79 #define CMD_DECLARE(func) static bool func (const char *, const char *, void *)
81 CMD_DECLARE (cmd_boolean);
82 CMD_DECLARE (cmd_bytes);
83 CMD_DECLARE (cmd_bytes_sum);
85 CMD_DECLARE (cmd_cert_type);
87 CMD_DECLARE (cmd_directory_vector);
88 CMD_DECLARE (cmd_number);
89 CMD_DECLARE (cmd_number_inf);
90 CMD_DECLARE (cmd_string);
91 CMD_DECLARE (cmd_string_uppercase);
92 CMD_DECLARE (cmd_file);
93 CMD_DECLARE (cmd_directory);
94 CMD_DECLARE (cmd_time);
95 CMD_DECLARE (cmd_vector);
97 CMD_DECLARE (cmd_spec_dirstruct);
98 CMD_DECLARE (cmd_spec_header);
99 CMD_DECLARE (cmd_spec_warc_header);
100 CMD_DECLARE (cmd_spec_htmlify);
101 CMD_DECLARE (cmd_spec_mirror);
102 CMD_DECLARE (cmd_spec_prefer_family);
103 CMD_DECLARE (cmd_spec_progress);
104 CMD_DECLARE (cmd_spec_recursive);
105 CMD_DECLARE (cmd_spec_regex_type);
106 CMD_DECLARE (cmd_spec_restrict_file_names);
107 CMD_DECLARE (cmd_spec_report_speed);
109 CMD_DECLARE (cmd_spec_secure_protocol);
111 CMD_DECLARE (cmd_spec_timeout);
112 CMD_DECLARE (cmd_spec_useragent);
113 CMD_DECLARE (cmd_spec_verbose);
115 /* List of recognized commands, each consisting of name, place and
116 function. When adding a new command, simply add it to the list,
117 but be sure to keep the list sorted alphabetically, as
118 command_by_name's binary search depends on it. Also, be sure to
119 add any entries that allocate memory (e.g. cmd_string and
120 cmd_vector) to the cleanup() function below. */
122 static const struct {
125 bool (*action) (const char *, const char *, void *);
127 /* KEEP THIS LIST ALPHABETICALLY SORTED */
128 { "accept", &opt.accepts, cmd_vector },
129 { "acceptregex", &opt.acceptregex_s, cmd_string },
130 { "addhostdir", &opt.add_hostdir, cmd_boolean },
131 { "adjustextension", &opt.adjust_extension, cmd_boolean },
132 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
133 { "askpassword", &opt.ask_passwd, cmd_boolean },
134 { "authnochallenge", &opt.auth_without_challenge,
136 { "background", &opt.background, cmd_boolean },
137 { "backupconverted", &opt.backup_converted, cmd_boolean },
138 { "backups", &opt.backups, cmd_number },
139 { "base", &opt.base_href, cmd_string },
140 { "bindaddress", &opt.bind_address, cmd_string },
141 { "bindport", &opt.bind_port, cmd_number },
142 { "bodydata", &opt.body_data, cmd_string },
143 { "bodyfile", &opt.body_file, cmd_string },
145 { "cacertificate", &opt.ca_cert, cmd_file },
147 { "cache", &opt.allow_cache, cmd_boolean },
149 { "cadirectory", &opt.ca_directory, cmd_directory },
150 { "certificate", &opt.cert_file, cmd_file },
151 { "certificatetype", &opt.cert_type, cmd_cert_type },
152 { "checkcertificate", &opt.check_cert, cmd_boolean },
154 { "chooseconfig", &opt.choose_config, cmd_file },
155 { "connecttimeout", &opt.connect_timeout, cmd_time },
156 { "contentdisposition", &opt.content_disposition, cmd_boolean },
157 { "contentonerror", &opt.content_on_error, cmd_boolean },
158 { "continue", &opt.always_rest, cmd_boolean },
159 { "convertlinks", &opt.convert_links, cmd_boolean },
160 { "cookies", &opt.cookies, cmd_boolean },
161 { "cutdirs", &opt.cut_dirs, cmd_number },
162 { "debug", &opt.debug, cmd_boolean },
163 { "defaultpage", &opt.default_page, cmd_string },
164 { "deleteafter", &opt.delete_after, cmd_boolean },
165 { "dirprefix", &opt.dir_prefix, cmd_directory },
166 { "dirstruct", NULL, cmd_spec_dirstruct },
167 { "dnscache", &opt.dns_cache, cmd_boolean },
168 { "dnstimeout", &opt.dns_timeout, cmd_time },
169 { "domains", &opt.domains, cmd_vector },
170 { "dotbytes", &opt.dot_bytes, cmd_bytes },
171 { "dotsinline", &opt.dots_in_line, cmd_number },
172 { "dotspacing", &opt.dot_spacing, cmd_number },
173 { "dotstyle", &opt.dot_style, cmd_string }, /* deprecated */
175 { "egdfile", &opt.egd_file, cmd_file },
177 { "excludedirectories", &opt.excludes, cmd_directory_vector },
178 { "excludedomains", &opt.exclude_domains, cmd_vector },
179 { "followftp", &opt.follow_ftp, cmd_boolean },
180 { "followtags", &opt.follow_tags, cmd_vector },
181 { "forcehtml", &opt.force_html, cmd_boolean },
182 { "ftppasswd", &opt.ftp_passwd, cmd_string }, /* deprecated */
183 { "ftppassword", &opt.ftp_passwd, cmd_string },
184 { "ftpproxy", &opt.ftp_proxy, cmd_string },
186 { "ftpstmlf", &opt.ftp_stmlf, cmd_boolean },
187 #endif /* def __VMS */
188 { "ftpuser", &opt.ftp_user, cmd_string },
189 { "glob", &opt.ftp_glob, cmd_boolean },
190 { "header", NULL, cmd_spec_header },
191 { "htmlextension", &opt.adjust_extension, cmd_boolean }, /* deprecated */
192 { "htmlify", NULL, cmd_spec_htmlify },
193 { "httpkeepalive", &opt.http_keep_alive, cmd_boolean },
194 { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
195 { "httppassword", &opt.http_passwd, cmd_string },
196 { "httpproxy", &opt.http_proxy, cmd_string },
198 { "httpsonly", &opt.https_only, cmd_boolean },
200 { "httpsproxy", &opt.https_proxy, cmd_string },
201 { "httpuser", &opt.http_user, cmd_string },
202 { "ignorecase", &opt.ignore_case, cmd_boolean },
203 { "ignorelength", &opt.ignore_length, cmd_boolean },
204 { "ignoretags", &opt.ignore_tags, cmd_vector },
205 { "includedirectories", &opt.includes, cmd_directory_vector },
207 { "inet4only", &opt.ipv4_only, cmd_boolean },
208 { "inet6only", &opt.ipv6_only, cmd_boolean },
210 { "input", &opt.input_filename, cmd_file },
211 { "iri", &opt.enable_iri, cmd_boolean },
212 { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
213 { "limitrate", &opt.limit_rate, cmd_bytes },
214 { "loadcookies", &opt.cookies_input, cmd_file },
215 { "localencoding", &opt.locale, cmd_string },
216 { "logfile", &opt.lfilename, cmd_file },
217 { "login", &opt.ftp_user, cmd_string },/* deprecated*/
218 { "maxredirect", &opt.max_redirect, cmd_number },
219 { "method", &opt.method, cmd_string_uppercase },
220 { "mirror", NULL, cmd_spec_mirror },
221 { "netrc", &opt.netrc, cmd_boolean },
222 { "noclobber", &opt.noclobber, cmd_boolean },
223 { "noconfig", &opt.noconfig, cmd_boolean },
224 { "noparent", &opt.no_parent, cmd_boolean },
225 { "noproxy", &opt.no_proxy, cmd_vector },
226 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
227 { "outputdocument", &opt.output_document, cmd_file },
228 { "pagerequisites", &opt.page_requisites, cmd_boolean },
229 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
230 { "passwd", &opt.ftp_passwd, cmd_string },/* deprecated*/
231 { "password", &opt.passwd, cmd_string },
232 { "postdata", &opt.post_data, cmd_string },
233 { "postfile", &opt.post_file_name, cmd_file },
234 { "preferfamily", NULL, cmd_spec_prefer_family },
235 { "preservepermissions", &opt.preserve_perm, cmd_boolean },
237 { "privatekey", &opt.private_key, cmd_file },
238 { "privatekeytype", &opt.private_key_type, cmd_cert_type },
240 { "progress", &opt.progress_type, cmd_spec_progress },
241 { "protocoldirectories", &opt.protocol_directories, cmd_boolean },
242 { "proxypasswd", &opt.proxy_passwd, cmd_string }, /* deprecated */
243 { "proxypassword", &opt.proxy_passwd, cmd_string },
244 { "proxyuser", &opt.proxy_user, cmd_string },
245 { "quiet", &opt.quiet, cmd_boolean },
246 { "quota", &opt.quota, cmd_bytes_sum },
248 { "randomfile", &opt.random_file, cmd_file },
250 { "randomwait", &opt.random_wait, cmd_boolean },
251 { "readtimeout", &opt.read_timeout, cmd_time },
252 { "reclevel", &opt.reclevel, cmd_number_inf },
253 { "recursive", NULL, cmd_spec_recursive },
254 { "referer", &opt.referer, cmd_string },
255 { "regextype", &opt.regex_type, cmd_spec_regex_type },
256 { "reject", &opt.rejects, cmd_vector },
257 { "rejectregex", &opt.rejectregex_s, cmd_string },
258 { "relativeonly", &opt.relative_only, cmd_boolean },
259 { "remoteencoding", &opt.encoding_remote, cmd_string },
260 { "removelisting", &opt.remove_listing, cmd_boolean },
261 { "reportspeed", &opt.report_bps, cmd_spec_report_speed},
262 { "restrictfilenames", NULL, cmd_spec_restrict_file_names },
263 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
264 { "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
265 { "robots", &opt.use_robots, cmd_boolean },
266 { "savecookies", &opt.cookies_output, cmd_file },
267 { "saveheaders", &opt.save_headers, cmd_boolean },
269 { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol },
271 { "serverresponse", &opt.server_response, cmd_boolean },
272 { "showalldnsentries", &opt.show_all_dns_entries, cmd_boolean },
273 { "showprogress", &opt.show_progress, cmd_boolean },
274 { "spanhosts", &opt.spanhost, cmd_boolean },
275 { "spider", &opt.spider, cmd_boolean },
276 { "startpos", &opt.start_pos, cmd_bytes },
277 { "strictcomments", &opt.strict_comments, cmd_boolean },
278 { "timeout", NULL, cmd_spec_timeout },
279 { "timestamping", &opt.timestamping, cmd_boolean },
280 { "tries", &opt.ntry, cmd_number_inf },
281 { "trustservernames", &opt.trustservernames, cmd_boolean },
282 { "unlink", &opt.unlink, cmd_boolean },
283 { "useproxy", &opt.use_proxy, cmd_boolean },
284 { "user", &opt.user, cmd_string },
285 { "useragent", NULL, cmd_spec_useragent },
286 { "useservertimestamps", &opt.useservertimestamps, cmd_boolean },
287 { "verbose", NULL, cmd_spec_verbose },
288 { "wait", &opt.wait, cmd_time },
289 { "waitretry", &opt.waitretry, cmd_time },
290 { "warccdx", &opt.warc_cdx_enabled, cmd_boolean },
291 { "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file },
293 { "warccompression", &opt.warc_compression_enabled, cmd_boolean },
295 { "warcdigests", &opt.warc_digests_enabled, cmd_boolean },
296 { "warcfile", &opt.warc_filename, cmd_file },
297 { "warcheader", NULL, cmd_spec_warc_header },
298 { "warckeeplog", &opt.warc_keep_log, cmd_boolean },
299 { "warcmaxsize", &opt.warc_maxsize, cmd_bytes },
300 { "warctempdir", &opt.warc_tempdir, cmd_directory },
302 { "wdebug", &opt.wdebug, cmd_boolean },
306 /* Look up CMDNAME in the commands[] and return its position in the
307 array. If CMDNAME is not found, return -1. */
310 command_by_name (const char *cmdname)
312 /* Use binary search for speed. Wget has ~100 commands, which
313 guarantees a worst case performance of 7 string comparisons. */
314 int lo = 0, hi = countof (commands) - 1;
318 int mid = (lo + hi) >> 1;
319 int cmp = strcasecmp (cmdname, commands[mid].name);
330 /* Reset the variables to default values. */
336 /* Most of the default values are 0 (and 0.0, NULL, and false).
337 Just reset everything, and fill in the non-zero values. Note
338 that initializing pointers to NULL this way is technically
339 illegal, but porting Wget to a machine where NULL is not all-zero
340 bit pattern will be the least of the implementors' worries. */
347 opt.add_hostdir = true;
351 opt.http_keep_alive = true;
352 opt.use_proxy = true;
353 tmp = getenv ("no_proxy");
355 opt.no_proxy = sepstring (tmp);
356 opt.prefer_family = prefer_none;
357 opt.allow_cache = true;
359 opt.read_timeout = 900;
360 opt.use_robots = true;
362 opt.remove_listing = true;
364 opt.dot_bytes = 1024;
365 opt.dot_spacing = 10;
366 opt.dots_in_line = 50;
368 opt.dns_cache = true;
372 opt.check_cert = true;
375 /* The default for file name restriction defaults to the OS type. */
376 #if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__)
377 opt.restrict_files_os = restrict_windows;
379 opt.restrict_files_os = restrict_unix;
381 opt.restrict_files_ctrl = true;
382 opt.restrict_files_nonascii = false;
383 opt.restrict_files_case = restrict_no_case_restriction;
385 opt.regex_type = regex_type_posix;
387 opt.max_redirect = 20;
392 opt.enable_iri = true;
394 opt.enable_iri = false;
397 opt.encoding_remote = NULL;
399 opt.useservertimestamps = true;
400 opt.show_all_dns_entries = false;
402 opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */
404 opt.warc_compression_enabled = true;
406 opt.warc_compression_enabled = false;
408 opt.warc_digests_enabled = true;
409 opt.warc_cdx_enabled = false;
410 opt.warc_cdx_dedup_filename = NULL;
411 opt.warc_tempdir = NULL;
412 opt.warc_keep_log = true;
414 /* Use a negative value to mark the absence of --start-pos option */
416 opt.show_progress = false;
417 opt.noscroll = false;
420 /* Return the user's home directory (strdup-ed), or NULL if none is
425 static char *buf = NULL;
426 static char *home, *ret;
430 home = getenv ("HOME");
436 /* Under MSDOS, if $HOME isn't defined, use the directory where
437 `wget.exe' resides. */
438 const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */
441 buff = _w32_get_argv0 ();
443 p = strrchr (buf, '/'); /* djgpp */
445 p = strrchr (buf, '\\'); /* others */
449 buff = malloc (len + 1);
453 strncpy (buff, _w32_get_argv0 (), len);
457 #elif !defined(WINDOWS)
458 /* If HOME is not defined, try getting it from the password
460 struct passwd *pwd = getpwuid (getuid ());
461 if (!pwd || !pwd->pw_dir)
465 /* Under Windows, if $HOME isn't defined, use the directory where
466 `wget.exe' resides. */
472 ret = home ? xstrdup (home) : NULL;
478 /* Check the 'WGETRC' environment variable and return the file name
479 if 'WGETRC' is set and is a valid file.
480 If the `WGETRC' variable exists but the file does not exist, the
481 function will exit(). */
483 wgetrc_env_file_name (void)
485 char *env = getenv ("WGETRC");
488 if (!file_exists_p (env))
490 fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"),
494 return xstrdup (env);
499 /* Check for the existance of '$HOME/.wgetrc' and return its path
500 if it exists and is set. */
502 wgetrc_user_file_name (void)
506 /* If that failed, try $HOME/.wgetrc (or equivalent). */
509 file = "SYS$LOGIN:.wgetrc";
510 #else /* def __VMS */
513 file = aprintf ("%s/.wgetrc", home);
515 #endif /* def __VMS [else] */
519 if (!file_exists_p (file))
527 /* Return the path to the user's .wgetrc. This is either the value of
528 `WGETRC' environment variable, or `$HOME/.wgetrc'.
530 Additionally, for windows, look in the directory where wget.exe
533 wgetrc_file_name (void)
535 char *file = wgetrc_env_file_name ();
539 file = wgetrc_user_file_name ();
542 /* Under Windows, if we still haven't found .wgetrc, look for the file
543 `wget.ini' in the directory where `wget.exe' resides; we do this for
544 backward compatibility with previous versions of Wget.
545 SYSTEM_WGETRC should not be defined under WINDOWS. */
548 char *home = home_dir ();
554 file = aprintf ("%s/wget.ini", home);
555 if (!file_exists_p (file))
568 /* Return values of parse_line. */
576 static enum parse_line parse_line (const char *, char **, char **, int *);
577 static bool setval_internal (int, const char *, const char *);
578 static bool setval_internal_tilde (int, const char *, const char *);
580 /* Initialize variables from a wgetrc file. Returns zero (failure) if
581 there were errors in the file. */
584 run_wgetrc (const char *file)
592 fp = fopen (file, "r");
595 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
596 file, strerror (errno));
597 return true; /* not a fatal error */
600 while (getline (&line, &bufsize, fp) > 0)
602 char *com = NULL, *val = NULL;
605 /* Parse the line. */
606 switch (parse_line (line, &com, &val, &comind))
609 /* If everything is OK, set the value. */
610 if (!setval_internal_tilde (comind, com, val))
612 fprintf (stderr, _("%s: Error in %s at line %d.\n"),
613 exec_name, file, ln);
617 case line_syntax_error:
618 fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"),
619 exec_name, file, ln);
622 case line_unknown_command:
623 fprintf (stderr, _("%s: Unknown command %s in %s at line %d.\n"),
624 exec_name, quote (com), file, ln);
642 /* Initialize the defaults and run the system wgetrc and user's own
647 char *file, *env_sysrc;
650 /* Run a non-standard system rc file when the according environment
651 variable has been set. For internal testing purposes only! */
652 env_sysrc = getenv ("SYSTEM_WGETRC");
653 if (env_sysrc && file_exists_p (env_sysrc))
655 ok &= run_wgetrc (env_sysrc);
656 /* If there are any problems parsing the system wgetrc file, tell
660 fprintf (stderr, _("\
661 Parsing system wgetrc file (env SYSTEM_WGETRC) failed. Please check\n\
663 or specify a different file using --config.\n"), env_sysrc);
667 /* Otherwise, if SYSTEM_WGETRC is defined, use it. */
669 else if (file_exists_p (SYSTEM_WGETRC))
670 ok &= run_wgetrc (SYSTEM_WGETRC);
671 /* If there are any problems parsing the system wgetrc file, tell
675 fprintf (stderr, _("\
676 Parsing system wgetrc file failed. Please check\n\
678 or specify a different file using --config.\n"), SYSTEM_WGETRC);
682 /* Override it with your own, if one exists. */
683 file = wgetrc_file_name ();
686 /* #### We should canonicalize `file' and SYSTEM_WGETRC with
687 something like realpath() before comparing them with `strcmp' */
689 if (!strcmp (file, SYSTEM_WGETRC))
691 fprintf (stderr, _("\
692 %s: Warning: Both system and user wgetrc point to %s.\n"),
693 exec_name, quote (file));
697 ok &= run_wgetrc (file);
699 /* If there were errors processing either `.wgetrc', abort. */
707 /* Remove dashes and underscores from S, modifying S in the
713 char *t = s; /* t - tortoise */
714 char *h = s; /* h - hare */
716 if (*h == '_' || *h == '-')
723 /* Parse the line pointed by line, with the syntax:
724 <sp>* command <sp>* = <sp>* value <sp>*
725 Uses malloc to allocate space for command and value.
727 Returns one of line_ok, line_empty, line_syntax_error, or
728 line_unknown_command.
730 In case of line_ok, *COM and *VAL point to freshly allocated
731 strings, and *COMIND points to com's index. In case of error or
732 empty line, their values are unmodified. */
734 static enum parse_line
735 parse_line (const char *line, char **com, char **val, int *comind)
738 const char *end = line + strlen (line);
739 const char *cmdstart, *cmdend;
740 const char *valstart, *valend;
745 /* Skip leading and trailing whitespace. */
746 while (*line && c_isspace (*line))
748 while (end > line && c_isspace (end[-1]))
751 /* Skip empty lines and comments. */
752 if (!*line || *line == '#')
758 while (p < end && (c_isalnum (*p) || *p == '_' || *p == '-'))
762 /* Skip '=', as well as any space before or after it. */
763 while (p < end && c_isspace (*p))
765 if (p == end || *p != '=')
766 return line_syntax_error;
768 while (p < end && c_isspace (*p))
774 /* The syntax is valid (even though the command might not be). Fill
775 in the command name and value. */
776 *com = strdupdelim (cmdstart, cmdend);
777 *val = strdupdelim (valstart, valend);
779 /* The line now known to be syntactically correct. Check whether
780 the command is valid. */
781 BOUNDED_TO_ALLOCA (cmdstart, cmdend, cmdcopy);
783 ind = command_by_name (cmdcopy);
785 return line_unknown_command;
787 /* Report success to the caller. */
792 #if defined(WINDOWS) || defined(MSDOS)
793 # define ISSEP(c) ((c) == '/' || (c) == '\\')
795 # define ISSEP(c) ((c) == '/')
798 /* Run commands[comind].action. */
801 setval_internal (int comind, const char *com, const char *val)
803 assert (0 <= comind && ((size_t) comind) < countof (commands));
804 DEBUGP (("Setting %s (%s) to %s\n", com, commands[comind].name, val));
805 return commands[comind].action (com, val, commands[comind].place);
809 setval_internal_tilde (int comind, const char *com, const char *val)
815 ret = setval_internal (comind, com, val);
817 /* We make tilde expansion for cmd_file and cmd_directory */
818 if (((commands[comind].action == cmd_file) ||
819 (commands[comind].action == cmd_directory))
820 && ret && (*val == '~' && ISSEP (val[1])))
822 pstring = commands[comind].place;
826 homelen = strlen (home);
827 while (homelen && ISSEP (home[homelen - 1]))
828 home[--homelen] = '\0';
830 /* Skip the leading "~/". */
831 for (++val; ISSEP (*val); val++)
833 *pstring = concat_strings (home, "/", val, (char *)0);
839 /* Run command COM with value VAL. If running the command produces an
840 error, report the error and exit.
842 This is intended to be called from main() to modify Wget's behavior
843 through command-line switches. Since COM is hard-coded in main(),
844 it is not canonicalized, and this aborts when COM is not found.
846 If COMIND's are exported to init.h, this function will be changed
847 to accept COMIND directly. */
850 setoptval (const char *com, const char *val, const char *optname)
852 /* Prepend "--" to OPTNAME. */
853 char *dd_optname = (char *) alloca (2 + strlen (optname) + 1);
856 strcpy (dd_optname + 2, optname);
858 assert (val != NULL);
859 if (!setval_internal (command_by_name (com), dd_optname, val))
863 /* Parse OPT into command and value and run it. For example,
864 run_command("foo=bar") is equivalent to setoptval("foo", "bar").
865 This is used by the `--execute' flag in main.c. */
868 run_command (const char *cmdopt)
872 switch (parse_line (cmdopt, &com, &val, &comind))
875 if (!setval_internal (comind, com, val))
881 fprintf (stderr, _("%s: Invalid --execute command %s\n"),
882 exec_name, quote (cmdopt));
887 /* Generic helper functions, for use with `commands'. */
889 /* Forward declarations: */
894 static bool decode_string (const char *, const struct decode_item *, int, int *);
895 static bool simple_atoi (const char *, const char *, int *);
896 static bool simple_atof (const char *, const char *, double *);
898 #define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0')
900 #define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \
901 && c_tolower((p)[1]) == (c1) \
904 #define CMP3(p, c0, c1, c2) (c_tolower((p)[0]) == (c0) \
905 && c_tolower((p)[1]) == (c1) \
906 && c_tolower((p)[2]) == (c2) \
910 /* Store the boolean value from VAL to PLACE. COM is ignored,
911 except for error messages. */
913 cmd_boolean (const char *com, const char *val, void *place)
917 if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1'))
918 /* "on", "yes" and "1" mean true. */
920 else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0'))
921 /* "off", "no" and "0" mean false. */
926 _("%s: %s: Invalid boolean %s; use `on' or `off'.\n"),
927 exec_name, com, quote (val));
931 *(bool *) place = value;
935 /* Set the non-negative integer value from VAL to PLACE. With
936 incorrect specification, the number remains unchanged. */
938 cmd_number (const char *com, const char *val, void *place)
940 if (!simple_atoi (val, val + strlen (val), place)
941 || *(int *) place < 0)
943 fprintf (stderr, _("%s: %s: Invalid number %s.\n"),
944 exec_name, com, quote (val));
950 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
952 cmd_number_inf (const char *com, const char *val, void *place)
954 if (!strcasecmp (val, "inf"))
959 return cmd_number (com, val, place);
962 /* Copy (strdup) the string at COM to a new location and place a
963 pointer to *PLACE. */
965 cmd_string (const char *com _GL_UNUSED, const char *val, void *place)
967 char **pstring = (char **)place;
969 xfree_null (*pstring);
970 *pstring = xstrdup (val);
974 /* Like cmd_string but ensure the string is upper case. */
976 cmd_string_uppercase (const char *com _GL_UNUSED, const char *val, void *place)
979 pstring = (char **)place;
980 xfree_null (*pstring);
982 *pstring = xmalloc (strlen (val) + 1);
984 for (q = *pstring; *val; val++, q++)
985 *q = c_toupper (*val);
992 /* Like cmd_string, but handles tilde-expansion when reading a user's
993 `.wgetrc'. In that case, and if VAL begins with `~', the tilde
994 gets expanded to the user's home directory. */
996 cmd_file (const char *com _GL_UNUSED, const char *val, void *place)
998 char **pstring = (char **)place;
1000 xfree_null (*pstring);
1002 /* #### If VAL is empty, perhaps should set *PLACE to NULL. */
1004 *pstring = xstrdup (val);
1006 #if defined(WINDOWS) || defined(MSDOS)
1007 /* Convert "\" to "/". */
1010 for (s = *pstring; *s; s++)
1018 /* Like cmd_file, but strips trailing '/' characters. */
1020 cmd_directory (const char *com, const char *val, void *place)
1024 /* Call cmd_file() for tilde expansion and separator
1025 canonicalization (backslash -> slash under Windows). These
1026 things should perhaps be in a separate function. */
1027 if (!cmd_file (com, val, place))
1030 s = *(char **)place;
1032 while (t > s && *--t == '/')
1038 /* Split VAL by space to a vector of values, and append those values
1039 to vector pointed to by the PLACE argument. If VAL is empty, the
1040 PLACE vector is cleared instead. */
1043 cmd_vector (const char *com _GL_UNUSED, const char *val, void *place)
1045 char ***pvec = (char ***)place;
1048 *pvec = merge_vecs (*pvec, sepstring (val));
1058 cmd_directory_vector (const char *com _GL_UNUSED, const char *val, void *place)
1060 char ***pvec = (char ***)place;
1064 /* Strip the trailing slashes from directories. */
1067 seps = sepstring (val);
1068 for (t = seps; t && *t; t++)
1070 int len = strlen (*t);
1071 /* Skip degenerate case of root directory. */
1074 if ((*t)[len - 1] == '/')
1075 (*t)[len - 1] = '\0';
1078 *pvec = merge_vecs (*pvec, seps);
1088 /* Engine for cmd_bytes and cmd_bytes_sum: converts a string such as
1089 "100k" or "2.5G" to a floating point number. */
1092 parse_bytes_helper (const char *val, double *result)
1094 double number, mult;
1095 const char *end = val + strlen (val);
1097 /* Check for "inf". */
1098 if (0 == strcmp (val, "inf"))
1104 /* Strip trailing whitespace. */
1105 while (val < end && c_isspace (end[-1]))
1110 switch (c_tolower (end[-1]))
1113 --end, mult = 1024.0;
1116 --end, mult = 1048576.0;
1119 --end, mult = 1073741824.0;
1122 --end, mult = 1099511627776.0;
1125 /* Not a recognized suffix: assume it's a digit. (If not,
1126 simple_atof will raise an error.) */
1130 /* Skip leading and trailing whitespace. */
1131 while (val < end && c_isspace (*val))
1133 while (val < end && c_isspace (end[-1]))
1138 if (!simple_atof (val, end, &number) || number < 0)
1141 *result = number * mult;
1145 /* Parse VAL as a number and set its value to PLACE (which should
1148 By default, the value is assumed to be in bytes. If "K", "M", or
1149 "G" are appended, the value is multiplied with 1<<10, 1<<20, or
1150 1<<30, respectively. Floating point values are allowed and are
1151 cast to integer before use. The idea is to be able to use things
1152 like 1.5k instead of "1536".
1154 The string "inf" is returned as 0.
1156 In case of error, false is returned and memory pointed to by PLACE
1157 remains unmodified. */
1160 cmd_bytes (const char *com, const char *val, void *place)
1163 if (!parse_bytes_helper (val, &byte_value))
1165 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1166 exec_name, com, quote (val));
1169 *(wgint *)place = (wgint)byte_value;
1173 /* Like cmd_bytes, but PLACE is interpreted as a pointer to
1174 SIZE_SUM. It works by converting the string to double, therefore
1175 working with values up to 2^53-1 without loss of precision. This
1176 value (8192 TB) is large enough to serve for a while. */
1179 cmd_bytes_sum (const char *com, const char *val, void *place)
1182 if (!parse_bytes_helper (val, &byte_value))
1184 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1185 exec_name, com, quote (val));
1188 *(SUM_SIZE_INT *) place = (SUM_SIZE_INT) byte_value;
1192 /* Store the value of VAL to *OUT. The value is a time period, by
1193 default expressed in seconds, but also accepting suffixes "m", "h",
1194 "d", and "w" for minutes, hours, days, and weeks respectively. */
1197 cmd_time (const char *com, const char *val, void *place)
1199 double number, mult;
1200 const char *end = val + strlen (val);
1202 /* Strip trailing whitespace. */
1203 while (val < end && c_isspace (end[-1]))
1209 fprintf (stderr, _("%s: %s: Invalid time period %s\n"),
1210 exec_name, com, quote (val));
1214 switch (c_tolower (end[-1]))
1217 --end, mult = 1; /* seconds */
1220 --end, mult = 60; /* minutes */
1223 --end, mult = 3600; /* hours */
1226 --end, mult = 86400.0; /* days */
1229 --end, mult = 604800.0; /* weeks */
1232 /* Not a recognized suffix: assume it belongs to the number.
1233 (If not, simple_atof will raise an error.) */
1237 /* Skip leading and trailing whitespace. */
1238 while (val < end && c_isspace (*val))
1240 while (val < end && c_isspace (end[-1]))
1245 if (!simple_atof (val, end, &number))
1248 *(double *)place = number * mult;
1254 cmd_cert_type (const char *com, const char *val, void *place)
1256 static const struct decode_item choices[] = {
1257 { "pem", keyfile_pem },
1258 { "der", keyfile_asn1 },
1259 { "asn1", keyfile_asn1 },
1261 int ok = decode_string (val, choices, countof (choices), place);
1263 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1268 /* Specialized helper functions, used by `commands' to handle some
1269 options specially. */
1271 static bool check_user_specified_header (const char *);
1274 cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1276 if (!cmd_boolean (com, val, &opt.dirstruct))
1278 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
1279 must be affected inversely. */
1281 opt.no_dirstruct = false;
1283 opt.no_dirstruct = true;
1288 cmd_spec_header (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1290 /* Empty value means reset the list of headers. */
1293 free_vec (opt.user_headers);
1294 opt.user_headers = NULL;
1298 if (!check_user_specified_header (val))
1300 fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
1301 exec_name, com, quote (val));
1304 opt.user_headers = vec_append (opt.user_headers, val);
1309 cmd_spec_warc_header (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1311 /* Empty value means reset the list of headers. */
1314 free_vec (opt.warc_user_headers);
1315 opt.warc_user_headers = NULL;
1319 if (!check_user_specified_header (val))
1321 fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"),
1322 exec_name, com, quote (val));
1325 opt.warc_user_headers = vec_append (opt.warc_user_headers, val);
1330 cmd_spec_htmlify (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1332 int flag = cmd_boolean (com, val, &opt.htmlify);
1333 if (flag && !opt.htmlify)
1334 opt.remove_listing = false;
1338 /* Set the "mirror" mode. It means: recursive download, timestamping,
1339 no limit on max. recursion depth, and don't remove listings. */
1342 cmd_spec_mirror (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1346 if (!cmd_boolean (com, val, &mirror))
1350 opt.recursive = true;
1351 if (!opt.no_dirstruct)
1352 opt.dirstruct = true;
1353 opt.timestamping = true;
1354 opt.reclevel = INFINITE_RECURSION;
1355 opt.remove_listing = false;
1360 /* Validate --prefer-family and set the choice. Allowed values are
1361 "IPv4", "IPv6", and "none". */
1364 cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1366 static const struct decode_item choices[] = {
1367 { "IPv4", prefer_ipv4 },
1368 { "IPv6", prefer_ipv6 },
1369 { "none", prefer_none },
1371 int prefer_family = prefer_none;
1372 int ok = decode_string (val, choices, countof (choices), &prefer_family);
1374 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1375 opt.prefer_family = prefer_family;
1379 /* Set progress.type to VAL, but verify that it's a valid progress
1380 implementation before that. */
1383 cmd_spec_progress (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1385 if (!valid_progress_implementation_p (val))
1387 fprintf (stderr, _("%s: %s: Invalid progress type %s.\n"),
1388 exec_name, com, quote (val));
1391 xfree_null (opt.progress_type);
1393 /* Don't call set_progress_implementation here. It will be called
1394 in main() when it becomes clear what the log output is. */
1395 opt.progress_type = xstrdup (val);
1399 /* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is
1400 set to true, also set opt.dirstruct to true, unless opt.no_dirstruct
1404 cmd_spec_recursive (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1406 if (!cmd_boolean (com, val, &opt.recursive))
1410 if (opt.recursive && !opt.no_dirstruct)
1411 opt.dirstruct = true;
1416 /* Validate --regex-type and set the choice. */
1419 cmd_spec_regex_type (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1421 static const struct decode_item choices[] = {
1422 { "posix", regex_type_posix },
1424 { "pcre", regex_type_pcre },
1427 int regex_type = regex_type_posix;
1428 int ok = decode_string (val, choices, countof (choices), ®ex_type);
1430 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1431 opt.regex_type = regex_type;
1436 cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1438 int restrict_os = opt.restrict_files_os;
1439 int restrict_ctrl = opt.restrict_files_ctrl;
1440 int restrict_case = opt.restrict_files_case;
1441 int restrict_nonascii = opt.restrict_files_nonascii;
1445 #define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal)
1449 end = strchr (val, ',');
1451 end = val + strlen (val);
1453 if (VAL_IS ("unix"))
1454 restrict_os = restrict_unix;
1455 else if (VAL_IS ("windows"))
1456 restrict_os = restrict_windows;
1457 else if (VAL_IS ("lowercase"))
1458 restrict_case = restrict_lowercase;
1459 else if (VAL_IS ("uppercase"))
1460 restrict_case = restrict_uppercase;
1461 else if (VAL_IS ("nocontrol"))
1462 restrict_ctrl = false;
1463 else if (VAL_IS ("ascii"))
1464 restrict_nonascii = true;
1467 fprintf (stderr, _("\
1468 %s: %s: Invalid restriction %s,\n\
1469 use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"),
1470 exec_name, com, quote (val));
1477 while (*val && *end);
1481 opt.restrict_files_os = restrict_os;
1482 opt.restrict_files_ctrl = restrict_ctrl;
1483 opt.restrict_files_case = restrict_case;
1484 opt.restrict_files_nonascii = restrict_nonascii;
1490 cmd_spec_report_speed (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1492 opt.report_bps = strcasecmp (val, "bits") == 0;
1493 if (!opt.report_bps)
1494 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1495 return opt.report_bps;
1500 cmd_spec_secure_protocol (const char *com, const char *val, void *place)
1502 static const struct decode_item choices[] = {
1503 { "auto", secure_protocol_auto },
1504 { "sslv2", secure_protocol_sslv2 },
1505 { "sslv3", secure_protocol_sslv3 },
1506 { "tlsv1", secure_protocol_tlsv1 },
1507 { "pfs", secure_protocol_pfs },
1509 int ok = decode_string (val, choices, countof (choices), place);
1511 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1516 /* Set all three timeout values. */
1519 cmd_spec_timeout (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1522 if (!cmd_time (com, val, &value))
1524 opt.read_timeout = value;
1525 opt.connect_timeout = value;
1526 opt.dns_timeout = value;
1531 cmd_spec_useragent (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1533 /* Disallow embedded newlines. */
1534 if (strchr (val, '\n'))
1536 fprintf (stderr, _("%s: %s: Invalid value %s.\n"),
1537 exec_name, com, quote (val));
1540 xfree_null (opt.useragent);
1541 opt.useragent = xstrdup (val);
1545 /* The "verbose" option cannot be cmd_boolean because the variable is
1546 not bool -- it's of type int (-1 means uninitialized because of
1547 some random hackery for disallowing -q -v). */
1550 cmd_spec_verbose (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1553 if (cmd_boolean (com, val, &flag))
1561 /* Miscellaneous useful routines. */
1563 /* A very simple atoi clone, more useful than atoi because it works on
1564 delimited strings, and has error reportage. Returns true on success,
1565 false on failure. If successful, stores result to *DEST. */
1568 simple_atoi (const char *beg, const char *end, int *dest)
1571 bool negative = false;
1572 const char *p = beg;
1574 while (p < end && c_isspace (*p))
1576 if (p < end && (*p == '-' || *p == '+'))
1578 negative = (*p == '-');
1584 /* Read negative numbers in a separate loop because the most
1585 negative integer cannot be represented as a positive number. */
1588 for (; p < end && c_isdigit (*p); p++)
1590 int next = (10 * result) + (*p - '0');
1592 return false; /* overflow */
1596 for (; p < end && c_isdigit (*p); p++)
1598 int next = (10 * result) - (*p - '0');
1600 return false; /* underflow */
1611 /* Trivial atof, with error reporting. Handles "<digits>[.<digits>]",
1612 doesn't handle exponential notation. Returns true on success,
1613 false on failure. In case of success, stores its result to
1617 simple_atof (const char *beg, const char *end, double *dest)
1621 bool negative = false;
1622 bool seen_dot = false;
1623 bool seen_digit = false;
1626 const char *p = beg;
1628 while (p < end && c_isspace (*p))
1630 if (p < end && (*p == '-' || *p == '+'))
1632 negative = (*p == '-');
1636 for (; p < end; p++)
1642 result = (10 * result) + (ch - '0');
1644 result += (ch - '0') / (divider *= 10);
1666 /* Verify that the user-specified header in S is valid. It must
1667 contain a colon preceded by non-white-space characters and must not
1668 contain newlines. */
1671 check_user_specified_header (const char *s)
1675 for (p = s; *p && *p != ':' && !c_isspace (*p); p++)
1677 /* The header MUST contain `:' preceded by at least one
1678 non-whitespace character. */
1679 if (*p != ':' || p == s)
1681 /* The header MUST NOT contain newlines. */
1682 if (strchr (s, '\n'))
1687 /* Decode VAL into a number, according to ITEMS. */
1690 decode_string (const char *val, const struct decode_item *items, int itemcount,
1694 for (i = 0; i < itemcount; i++)
1695 if (0 == strcasecmp (val, items[i].name))
1697 *place = items[i].code;
1703 /* Free the memory allocated by global variables. */
1707 /* Free external resources, close files, etc. */
1709 /* Close WARC file. */
1710 if (opt.warc_filename != 0)
1716 if (fclose (output_stream) == EOF)
1717 inform_exit_status (CLOSEFAILED);
1719 /* No need to check for error because Wget flushes its output (and
1720 checks for errors) after any data arrives. */
1722 /* We're exiting anyway so there's no real need to call free()
1723 hundreds of times. Skipping the frees will make Wget exit
1726 However, when detecting leaks, it's crucial to free() everything
1727 because then you can find the real leaks, i.e. the allocated
1728 memory which grows with the size of the program. */
1734 cleanup_html_url ();
1738 netrc_cleanup (netrc_list);
1740 for (i = 0; i < nurl; i++)
1743 xfree_null (opt.choose_config);
1744 xfree_null (opt.lfilename);
1745 xfree_null (opt.dir_prefix);
1746 xfree_null (opt.input_filename);
1747 xfree_null (opt.output_document);
1748 free_vec (opt.accepts);
1749 free_vec (opt.rejects);
1750 free_vec (opt.excludes);
1751 free_vec (opt.includes);
1752 free_vec (opt.domains);
1753 free_vec (opt.follow_tags);
1754 free_vec (opt.ignore_tags);
1755 xfree_null (opt.progress_type);
1756 xfree_null (opt.ftp_user);
1757 xfree_null (opt.ftp_passwd);
1758 xfree_null (opt.ftp_proxy);
1759 xfree_null (opt.https_proxy);
1760 xfree_null (opt.http_proxy);
1761 free_vec (opt.no_proxy);
1762 xfree_null (opt.useragent);
1763 xfree_null (opt.referer);
1764 xfree_null (opt.http_user);
1765 xfree_null (opt.http_passwd);
1766 free_vec (opt.user_headers);
1767 free_vec (opt.warc_user_headers);
1769 xfree_null (opt.cert_file);
1770 xfree_null (opt.private_key);
1771 xfree_null (opt.ca_directory);
1772 xfree_null (opt.ca_cert);
1773 xfree_null (opt.random_file);
1774 xfree_null (opt.egd_file);
1776 xfree_null (opt.bind_address);
1777 xfree_null (opt.cookies_input);
1778 xfree_null (opt.cookies_output);
1779 xfree_null (opt.user);
1780 xfree_null (opt.passwd);
1781 xfree_null (opt.base_href);
1782 xfree_null (opt.method);
1784 #endif /* DEBUG_MALLOC */
1787 /* Unit testing routines. */
1792 test_commands_sorted(void)
1796 for (i = 1; i < countof(commands); ++i)
1798 if (strcasecmp (commands[i - 1].name, commands[i].name) > 0)
1800 mu_assert ("FAILED", false);
1808 test_cmd_spec_restrict_file_names(void)
1811 static const struct {
1813 int expected_restrict_files_os;
1814 int expected_restrict_files_ctrl;
1815 int expected_restrict_files_case;
1818 { "windows", restrict_windows, true, restrict_no_case_restriction, true },
1819 { "windows,", restrict_windows, true, restrict_no_case_restriction, true },
1820 { "windows,lowercase", restrict_windows, true, restrict_lowercase, true },
1821 { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true },
1824 for (i = 0; i < countof(test_array); ++i)
1829 res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL);
1832 fprintf (stderr, "test_cmd_spec_restrict_file_names: TEST %d\n", i); fflush (stderr);
1833 fprintf (stderr, "opt.restrict_files_os: %d\n", opt.restrict_files_os); fflush (stderr);
1834 fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr);
1835 fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr);
1837 mu_assert ("test_cmd_spec_restrict_file_names: wrong result",
1838 res == test_array[i].result
1839 && opt.restrict_files_os == test_array[i].expected_restrict_files_os
1840 && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl
1841 && opt.restrict_files_case == test_array[i].expected_restrict_files_case);
1847 #endif /* TESTING */