1 /* Reading/parsing the initialization file.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
6 This file is part of GNU Wget.
8 GNU Wget is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 GNU Wget is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with Wget. If not, see <http://www.gnu.org/licenses/>.
21 Additional permission under GNU GPL version 3 section 7
23 If you modify this program, or any covered work, by linking or
24 combining it with the OpenSSL project's OpenSSL library (or a
25 modified version of that library), containing parts covered by the
26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
27 grants you additional permission to convey the resulting work.
28 Corresponding Source for a non-source form of such a combination
29 shall include the source code for the parts of OpenSSL used as well
30 as that of the covered work. */
41 /* not all systems provide PATH_MAX in limits.h */
43 # include <sys/param.h>
45 # define PATH_MAX MAXPATHLEN
64 #include "recur.h" /* for INFINITE_RECURSION */
65 #include "convert.h" /* for convert_cleanup */
66 #include "res.h" /* for res_cleanup */
67 #include "http.h" /* for http_cleanup */
68 #include "retr.h" /* for output_stream */
76 #define CMD_DECLARE(func) static bool func (const char *, const char *, void *)
78 CMD_DECLARE (cmd_boolean);
79 CMD_DECLARE (cmd_bytes);
80 CMD_DECLARE (cmd_bytes_sum);
82 CMD_DECLARE (cmd_cert_type);
84 CMD_DECLARE (cmd_directory_vector);
85 CMD_DECLARE (cmd_number);
86 CMD_DECLARE (cmd_number_inf);
87 CMD_DECLARE (cmd_string);
88 CMD_DECLARE (cmd_file);
89 CMD_DECLARE (cmd_directory);
90 CMD_DECLARE (cmd_time);
91 CMD_DECLARE (cmd_vector);
93 CMD_DECLARE (cmd_spec_dirstruct);
94 CMD_DECLARE (cmd_spec_header);
95 CMD_DECLARE (cmd_spec_warc_header);
96 CMD_DECLARE (cmd_spec_htmlify);
97 CMD_DECLARE (cmd_spec_mirror);
98 CMD_DECLARE (cmd_spec_prefer_family);
99 CMD_DECLARE (cmd_spec_progress);
100 CMD_DECLARE (cmd_spec_recursive);
101 CMD_DECLARE (cmd_spec_regex_type);
102 CMD_DECLARE (cmd_spec_restrict_file_names);
104 CMD_DECLARE (cmd_spec_secure_protocol);
106 CMD_DECLARE (cmd_spec_timeout);
107 CMD_DECLARE (cmd_spec_useragent);
108 CMD_DECLARE (cmd_spec_verbose);
110 /* List of recognized commands, each consisting of name, place and
111 function. When adding a new command, simply add it to the list,
112 but be sure to keep the list sorted alphabetically, as
113 command_by_name's binary search depends on it. Also, be sure to
114 add any entries that allocate memory (e.g. cmd_string and
115 cmd_vector) to the cleanup() function below. */
117 static const struct {
120 bool (*action) (const char *, const char *, void *);
122 /* KEEP THIS LIST ALPHABETICALLY SORTED */
123 { "accept", &opt.accepts, cmd_vector },
124 { "acceptregex", &opt.acceptregex_s, cmd_string },
125 { "addhostdir", &opt.add_hostdir, cmd_boolean },
126 { "adjustextension", &opt.adjust_extension, cmd_boolean },
127 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
128 { "askpassword", &opt.ask_passwd, cmd_boolean },
129 { "authnochallenge", &opt.auth_without_challenge,
131 { "background", &opt.background, cmd_boolean },
132 { "backupconverted", &opt.backup_converted, cmd_boolean },
133 { "backups", &opt.backups, cmd_number },
134 { "base", &opt.base_href, cmd_string },
135 { "bindaddress", &opt.bind_address, cmd_string },
136 { "bits", &opt.bits_fmt, cmd_boolean},
138 { "cacertificate", &opt.ca_cert, cmd_file },
140 { "cache", &opt.allow_cache, cmd_boolean },
142 { "cadirectory", &opt.ca_directory, cmd_directory },
143 { "certificate", &opt.cert_file, cmd_file },
144 { "certificatetype", &opt.cert_type, cmd_cert_type },
145 { "checkcertificate", &opt.check_cert, cmd_boolean },
147 { "chooseconfig", &opt.choose_config, cmd_file },
148 { "connecttimeout", &opt.connect_timeout, cmd_time },
149 { "contentdisposition", &opt.content_disposition, cmd_boolean },
150 { "contentonerror", &opt.content_on_error, cmd_boolean },
151 { "continue", &opt.always_rest, cmd_boolean },
152 { "convertlinks", &opt.convert_links, cmd_boolean },
153 { "cookies", &opt.cookies, cmd_boolean },
154 { "cutdirs", &opt.cut_dirs, cmd_number },
156 { "debug", &opt.debug, cmd_boolean },
158 { "defaultpage", &opt.default_page, cmd_string},
159 { "deleteafter", &opt.delete_after, cmd_boolean },
160 { "dirprefix", &opt.dir_prefix, cmd_directory },
161 { "dirstruct", NULL, cmd_spec_dirstruct },
162 { "dnscache", &opt.dns_cache, cmd_boolean },
163 { "dnstimeout", &opt.dns_timeout, cmd_time },
164 { "domains", &opt.domains, cmd_vector },
165 { "dotbytes", &opt.dot_bytes, cmd_bytes },
166 { "dotsinline", &opt.dots_in_line, cmd_number },
167 { "dotspacing", &opt.dot_spacing, cmd_number },
168 { "dotstyle", &opt.dot_style, cmd_string }, /* deprecated */
170 { "egdfile", &opt.egd_file, cmd_file },
172 { "excludedirectories", &opt.excludes, cmd_directory_vector },
173 { "excludedomains", &opt.exclude_domains, cmd_vector },
174 { "followftp", &opt.follow_ftp, cmd_boolean },
175 { "followtags", &opt.follow_tags, cmd_vector },
176 { "forcehtml", &opt.force_html, cmd_boolean },
177 { "ftppasswd", &opt.ftp_passwd, cmd_string }, /* deprecated */
178 { "ftppassword", &opt.ftp_passwd, cmd_string },
179 { "ftpproxy", &opt.ftp_proxy, cmd_string },
181 { "ftpstmlf", &opt.ftp_stmlf, cmd_boolean },
182 #endif /* def __VMS */
183 { "ftpuser", &opt.ftp_user, cmd_string },
184 { "glob", &opt.ftp_glob, cmd_boolean },
185 { "header", NULL, cmd_spec_header },
186 { "htmlextension", &opt.adjust_extension, cmd_boolean }, /* deprecated */
187 { "htmlify", NULL, cmd_spec_htmlify },
188 { "httpkeepalive", &opt.http_keep_alive, cmd_boolean },
189 { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
190 { "httppassword", &opt.http_passwd, cmd_string },
191 { "httpproxy", &opt.http_proxy, cmd_string },
192 { "httpsproxy", &opt.https_proxy, cmd_string },
193 { "httpuser", &opt.http_user, cmd_string },
194 { "ignorecase", &opt.ignore_case, cmd_boolean },
195 { "ignorelength", &opt.ignore_length, cmd_boolean },
196 { "ignoretags", &opt.ignore_tags, cmd_vector },
197 { "includedirectories", &opt.includes, cmd_directory_vector },
199 { "inet4only", &opt.ipv4_only, cmd_boolean },
200 { "inet6only", &opt.ipv6_only, cmd_boolean },
202 { "input", &opt.input_filename, cmd_file },
203 { "iri", &opt.enable_iri, cmd_boolean },
204 { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
205 { "limitrate", &opt.limit_rate, cmd_bytes },
206 { "loadcookies", &opt.cookies_input, cmd_file },
207 { "localencoding", &opt.locale, cmd_string },
208 { "logfile", &opt.lfilename, cmd_file },
209 { "login", &opt.ftp_user, cmd_string },/* deprecated*/
210 { "maxredirect", &opt.max_redirect, cmd_number },
211 { "mirror", NULL, cmd_spec_mirror },
212 { "netrc", &opt.netrc, cmd_boolean },
213 { "noclobber", &opt.noclobber, cmd_boolean },
214 { "noparent", &opt.no_parent, cmd_boolean },
215 { "noproxy", &opt.no_proxy, cmd_vector },
216 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
217 { "outputdocument", &opt.output_document, cmd_file },
218 { "pagerequisites", &opt.page_requisites, cmd_boolean },
219 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
220 { "passwd", &opt.ftp_passwd, cmd_string },/* deprecated*/
221 { "password", &opt.passwd, cmd_string },
222 { "postdata", &opt.post_data, cmd_string },
223 { "postfile", &opt.post_file_name, cmd_file },
224 { "preferfamily", NULL, cmd_spec_prefer_family },
225 { "preservepermissions", &opt.preserve_perm, cmd_boolean },
227 { "privatekey", &opt.private_key, cmd_file },
228 { "privatekeytype", &opt.private_key_type, cmd_cert_type },
230 { "progress", &opt.progress_type, cmd_spec_progress },
231 { "protocoldirectories", &opt.protocol_directories, cmd_boolean },
232 { "proxypasswd", &opt.proxy_passwd, cmd_string }, /* deprecated */
233 { "proxypassword", &opt.proxy_passwd, cmd_string },
234 { "proxyuser", &opt.proxy_user, cmd_string },
235 { "quiet", &opt.quiet, cmd_boolean },
236 { "quota", &opt.quota, cmd_bytes_sum },
238 { "randomfile", &opt.random_file, cmd_file },
240 { "randomwait", &opt.random_wait, cmd_boolean },
241 { "readtimeout", &opt.read_timeout, cmd_time },
242 { "reclevel", &opt.reclevel, cmd_number_inf },
243 { "recursive", NULL, cmd_spec_recursive },
244 { "referer", &opt.referer, cmd_string },
245 { "regextype", &opt.regex_type, cmd_spec_regex_type },
246 { "reject", &opt.rejects, cmd_vector },
247 { "rejectregex", &opt.rejectregex_s, cmd_string },
248 { "relativeonly", &opt.relative_only, cmd_boolean },
249 { "remoteencoding", &opt.encoding_remote, cmd_string },
250 { "removelisting", &opt.remove_listing, cmd_boolean },
251 { "restrictfilenames", NULL, cmd_spec_restrict_file_names },
252 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
253 { "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
254 { "robots", &opt.use_robots, cmd_boolean },
255 { "savecookies", &opt.cookies_output, cmd_file },
256 { "saveheaders", &opt.save_headers, cmd_boolean },
258 { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol },
260 { "serverresponse", &opt.server_response, cmd_boolean },
261 { "showalldnsentries", &opt.show_all_dns_entries, cmd_boolean },
262 { "spanhosts", &opt.spanhost, cmd_boolean },
263 { "spider", &opt.spider, cmd_boolean },
264 { "strictcomments", &opt.strict_comments, cmd_boolean },
265 { "timeout", NULL, cmd_spec_timeout },
266 { "timestamping", &opt.timestamping, cmd_boolean },
267 { "tries", &opt.ntry, cmd_number_inf },
268 { "trustservernames", &opt.trustservernames, cmd_boolean },
269 { "unlink", &opt.unlink, cmd_boolean },
270 { "useproxy", &opt.use_proxy, cmd_boolean },
271 { "user", &opt.user, cmd_string },
272 { "useragent", NULL, cmd_spec_useragent },
273 { "useservertimestamps", &opt.useservertimestamps, cmd_boolean },
274 { "verbose", NULL, cmd_spec_verbose },
275 { "wait", &opt.wait, cmd_time },
276 { "waitretry", &opt.waitretry, cmd_time },
277 { "warccdx", &opt.warc_cdx_enabled, cmd_boolean },
278 { "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file },
280 { "warccompression", &opt.warc_compression_enabled, cmd_boolean },
282 { "warcdigests", &opt.warc_digests_enabled, cmd_boolean },
283 { "warcfile", &opt.warc_filename, cmd_file },
284 { "warcheader", NULL, cmd_spec_warc_header },
285 { "warckeeplog", &opt.warc_keep_log, cmd_boolean },
286 { "warcmaxsize", &opt.warc_maxsize, cmd_bytes },
287 { "warctempdir", &opt.warc_tempdir, cmd_directory },
289 { "wdebug", &opt.wdebug, cmd_boolean },
293 /* Look up CMDNAME in the commands[] and return its position in the
294 array. If CMDNAME is not found, return -1. */
297 command_by_name (const char *cmdname)
299 /* Use binary search for speed. Wget has ~100 commands, which
300 guarantees a worst case performance of 7 string comparisons. */
301 int lo = 0, hi = countof (commands) - 1;
305 int mid = (lo + hi) >> 1;
306 int cmp = strcasecmp (cmdname, commands[mid].name);
317 /* Reset the variables to default values. */
323 /* Most of the default values are 0 (and 0.0, NULL, and false).
324 Just reset everything, and fill in the non-zero values. Note
325 that initializing pointers to NULL this way is technically
326 illegal, but porting Wget to a machine where NULL is not all-zero
327 bit pattern will be the least of the implementors' worries. */
334 opt.add_hostdir = true;
338 opt.http_keep_alive = true;
339 opt.use_proxy = true;
340 tmp = getenv ("no_proxy");
342 opt.no_proxy = sepstring (tmp);
343 opt.prefer_family = prefer_none;
344 opt.allow_cache = true;
346 opt.read_timeout = 900;
347 opt.use_robots = true;
349 opt.remove_listing = true;
351 opt.dot_bytes = 1024;
352 opt.dot_spacing = 10;
353 opt.dots_in_line = 50;
355 opt.dns_cache = true;
359 opt.check_cert = true;
362 /* The default for file name restriction defaults to the OS type. */
363 #if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__)
364 opt.restrict_files_os = restrict_windows;
366 opt.restrict_files_os = restrict_unix;
368 opt.restrict_files_ctrl = true;
369 opt.restrict_files_nonascii = false;
370 opt.restrict_files_case = restrict_no_case_restriction;
372 opt.regex_type = regex_type_posix;
374 opt.max_redirect = 20;
379 opt.enable_iri = true;
381 opt.enable_iri = false;
384 opt.encoding_remote = NULL;
386 opt.useservertimestamps = true;
387 opt.show_all_dns_entries = false;
389 opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */
391 opt.warc_compression_enabled = true;
393 opt.warc_compression_enabled = false;
395 opt.warc_digests_enabled = true;
396 opt.warc_cdx_enabled = false;
397 opt.warc_cdx_dedup_filename = NULL;
398 opt.warc_tempdir = NULL;
399 opt.warc_keep_log = true;
402 /* Return the user's home directory (strdup-ed), or NULL if none is
407 static char *buf = NULL;
408 static char *home, *ret;
412 home = getenv ("HOME");
418 /* Under MSDOS, if $HOME isn't defined, use the directory where
419 `wget.exe' resides. */
420 const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */
423 buff = _w32_get_argv0 ();
425 p = strrchr (buf, '/'); /* djgpp */
427 p = strrchr (buf, '\\'); /* others */
431 buff = malloc (len + 1);
435 strncpy (buff, _w32_get_argv0 (), len);
439 #elif !defined(WINDOWS)
440 /* If HOME is not defined, try getting it from the password
442 struct passwd *pwd = getpwuid (getuid ());
443 if (!pwd || !pwd->pw_dir)
447 /* Under Windows, if $HOME isn't defined, use the directory where
448 `wget.exe' resides. */
454 ret = home ? xstrdup (home) : NULL;
461 /* Check the 'WGETRC' environment variable and return the file name
462 if 'WGETRC' is set and is a valid file.
463 If the `WGETRC' variable exists but the file does not exist, the
464 function will exit(). */
466 wgetrc_env_file_name (void)
468 char *env = getenv ("WGETRC");
471 if (!file_exists_p (env))
473 fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"),
477 return xstrdup (env);
482 /* Check for the existance of '$HOME/.wgetrc' and return its path
483 if it exists and is set. */
485 wgetrc_user_file_name (void)
489 /* If that failed, try $HOME/.wgetrc (or equivalent). */
492 file = "SYS$LOGIN:.wgetrc";
493 #else /* def __VMS */
496 file = aprintf ("%s/.wgetrc", home);
498 #endif /* def __VMS [else] */
502 if (!file_exists_p (file))
510 /* Return the path to the user's .wgetrc. This is either the value of
511 `WGETRC' environment variable, or `$HOME/.wgetrc'.
513 Additionally, for windows, look in the directory where wget.exe
516 wgetrc_file_name (void)
518 char *file = wgetrc_env_file_name ();
522 file = wgetrc_user_file_name ();
525 /* Under Windows, if we still haven't found .wgetrc, look for the file
526 `wget.ini' in the directory where `wget.exe' resides; we do this for
527 backward compatibility with previous versions of Wget.
528 SYSTEM_WGETRC should not be defined under WINDOWS. */
531 char *home = home_dir ();
537 file = aprintf ("%s/wget.ini", home);
538 if (!file_exists_p (file))
551 /* Return values of parse_line. */
559 static enum parse_line parse_line (const char *, char **, char **, int *);
560 static bool setval_internal (int, const char *, const char *);
561 static bool setval_internal_tilde (int, const char *, const char *);
563 /* Initialize variables from a wgetrc file. Returns zero (failure) if
564 there were errors in the file. */
567 run_wgetrc (const char *file)
574 fp = fopen (file, "r");
577 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
578 file, strerror (errno));
579 return true; /* not a fatal error */
582 while ((line = read_whole_line (fp)) != NULL)
584 char *com = NULL, *val = NULL;
587 /* Parse the line. */
588 switch (parse_line (line, &com, &val, &comind))
591 /* If everything is OK, set the value. */
592 if (!setval_internal_tilde (comind, com, val))
594 fprintf (stderr, _("%s: Error in %s at line %d.\n"),
595 exec_name, file, ln);
599 case line_syntax_error:
600 fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"),
601 exec_name, file, ln);
604 case line_unknown_command:
605 fprintf (stderr, _("%s: Unknown command %s in %s at line %d.\n"),
606 exec_name, quote (com), file, ln);
624 /* Initialize the defaults and run the system wgetrc and user's own
629 char *file, *env_sysrc;
632 /* Run a non-standard system rc file when the according environment
633 variable has been set. For internal testing purposes only! */
634 env_sysrc = getenv ("SYSTEM_WGETRC");
635 if (env_sysrc && file_exists_p (env_sysrc))
637 ok &= run_wgetrc (env_sysrc);
638 /* If there are any problems parsing the system wgetrc file, tell
642 fprintf (stderr, _("\
643 Parsing system wgetrc file (env SYSTEM_WGETRC) failed. Please check\n\
645 or specify a different file using --config.\n"), env_sysrc);
649 /* Otherwise, if SYSTEM_WGETRC is defined, use it. */
651 else if (file_exists_p (SYSTEM_WGETRC))
652 ok &= run_wgetrc (SYSTEM_WGETRC);
653 /* If there are any problems parsing the system wgetrc file, tell
657 fprintf (stderr, _("\
658 Parsing system wgetrc file failed. Please check\n\
660 or specify a different file using --config.\n"), SYSTEM_WGETRC);
664 /* Override it with your own, if one exists. */
665 file = wgetrc_file_name ();
668 /* #### We should canonicalize `file' and SYSTEM_WGETRC with
669 something like realpath() before comparing them with `strcmp' */
671 if (!strcmp (file, SYSTEM_WGETRC))
673 fprintf (stderr, _("\
674 %s: Warning: Both system and user wgetrc point to %s.\n"),
675 exec_name, quote (file));
679 ok &= run_wgetrc (file);
681 /* If there were errors processing either `.wgetrc', abort. */
689 /* Remove dashes and underscores from S, modifying S in the
695 char *t = s; /* t - tortoise */
696 char *h = s; /* h - hare */
698 if (*h == '_' || *h == '-')
705 /* Parse the line pointed by line, with the syntax:
706 <sp>* command <sp>* = <sp>* value <sp>*
707 Uses malloc to allocate space for command and value.
709 Returns one of line_ok, line_empty, line_syntax_error, or
710 line_unknown_command.
712 In case of line_ok, *COM and *VAL point to freshly allocated
713 strings, and *COMIND points to com's index. In case of error or
714 empty line, their values are unmodified. */
716 static enum parse_line
717 parse_line (const char *line, char **com, char **val, int *comind)
720 const char *end = line + strlen (line);
721 const char *cmdstart, *cmdend;
722 const char *valstart, *valend;
727 /* Skip leading and trailing whitespace. */
728 while (*line && c_isspace (*line))
730 while (end > line && c_isspace (end[-1]))
733 /* Skip empty lines and comments. */
734 if (!*line || *line == '#')
740 while (p < end && (c_isalnum (*p) || *p == '_' || *p == '-'))
744 /* Skip '=', as well as any space before or after it. */
745 while (p < end && c_isspace (*p))
747 if (p == end || *p != '=')
748 return line_syntax_error;
750 while (p < end && c_isspace (*p))
756 /* The syntax is valid (even though the command might not be). Fill
757 in the command name and value. */
758 *com = strdupdelim (cmdstart, cmdend);
759 *val = strdupdelim (valstart, valend);
761 /* The line now known to be syntactically correct. Check whether
762 the command is valid. */
763 BOUNDED_TO_ALLOCA (cmdstart, cmdend, cmdcopy);
765 ind = command_by_name (cmdcopy);
767 return line_unknown_command;
769 /* Report success to the caller. */
774 #if defined(WINDOWS) || defined(MSDOS)
775 # define ISSEP(c) ((c) == '/' || (c) == '\\')
777 # define ISSEP(c) ((c) == '/')
780 /* Run commands[comind].action. */
783 setval_internal (int comind, const char *com, const char *val)
785 assert (0 <= comind && ((size_t) comind) < countof (commands));
786 DEBUGP (("Setting %s (%s) to %s\n", com, commands[comind].name, val));
787 return commands[comind].action (com, val, commands[comind].place);
791 setval_internal_tilde (int comind, const char *com, const char *val)
797 ret = setval_internal (comind, com, val);
799 /* We make tilde expansion for cmd_file and cmd_directory */
800 if (((commands[comind].action == cmd_file) ||
801 (commands[comind].action == cmd_directory))
802 && ret && (*val == '~' && ISSEP (val[1])))
804 pstring = commands[comind].place;
808 homelen = strlen (home);
809 while (homelen && ISSEP (home[homelen - 1]))
810 home[--homelen] = '\0';
812 /* Skip the leading "~/". */
813 for (++val; ISSEP (*val); val++)
815 *pstring = concat_strings (home, "/", val, (char *)0);
821 /* Run command COM with value VAL. If running the command produces an
822 error, report the error and exit.
824 This is intended to be called from main() to modify Wget's behavior
825 through command-line switches. Since COM is hard-coded in main(),
826 it is not canonicalized, and this aborts when COM is not found.
828 If COMIND's are exported to init.h, this function will be changed
829 to accept COMIND directly. */
832 setoptval (const char *com, const char *val, const char *optname)
834 /* Prepend "--" to OPTNAME. */
835 char *dd_optname = (char *) alloca (2 + strlen (optname) + 1);
838 strcpy (dd_optname + 2, optname);
840 assert (val != NULL);
841 if (!setval_internal (command_by_name (com), dd_optname, val))
845 /* Parse OPT into command and value and run it. For example,
846 run_command("foo=bar") is equivalent to setoptval("foo", "bar").
847 This is used by the `--execute' flag in main.c. */
850 run_command (const char *opt)
854 switch (parse_line (opt, &com, &val, &comind))
857 if (!setval_internal (comind, com, val))
863 fprintf (stderr, _("%s: Invalid --execute command %s\n"),
864 exec_name, quote (opt));
869 /* Generic helper functions, for use with `commands'. */
871 /* Forward declarations: */
876 static bool decode_string (const char *, const struct decode_item *, int, int *);
877 static bool simple_atoi (const char *, const char *, int *);
878 static bool simple_atof (const char *, const char *, double *);
880 #define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0')
882 #define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \
883 && c_tolower((p)[1]) == (c1) \
886 #define CMP3(p, c0, c1, c2) (c_tolower((p)[0]) == (c0) \
887 && c_tolower((p)[1]) == (c1) \
888 && c_tolower((p)[2]) == (c2) \
892 /* Store the boolean value from VAL to PLACE. COM is ignored,
893 except for error messages. */
895 cmd_boolean (const char *com, const char *val, void *place)
899 if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1'))
900 /* "on", "yes" and "1" mean true. */
902 else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0'))
903 /* "off", "no" and "0" mean false. */
908 _("%s: %s: Invalid boolean %s; use `on' or `off'.\n"),
909 exec_name, com, quote (val));
913 *(bool *) place = value;
917 /* Set the non-negative integer value from VAL to PLACE. With
918 incorrect specification, the number remains unchanged. */
920 cmd_number (const char *com, const char *val, void *place)
922 if (!simple_atoi (val, val + strlen (val), place)
923 || *(int *) place < 0)
925 fprintf (stderr, _("%s: %s: Invalid number %s.\n"),
926 exec_name, com, quote (val));
932 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
934 cmd_number_inf (const char *com, const char *val, void *place)
936 if (!strcasecmp (val, "inf"))
941 return cmd_number (com, val, place);
944 /* Copy (strdup) the string at COM to a new location and place a
945 pointer to *PLACE. */
947 cmd_string (const char *com, const char *val, void *place)
949 char **pstring = (char **)place;
951 xfree_null (*pstring);
952 *pstring = xstrdup (val);
957 /* Like the above, but handles tilde-expansion when reading a user's
958 `.wgetrc'. In that case, and if VAL begins with `~', the tilde
959 gets expanded to the user's home directory. */
961 cmd_file (const char *com, const char *val, void *place)
963 char **pstring = (char **)place;
965 xfree_null (*pstring);
967 /* #### If VAL is empty, perhaps should set *PLACE to NULL. */
969 *pstring = xstrdup (val);
971 #if defined(WINDOWS) || defined(MSDOS)
972 /* Convert "\" to "/". */
975 for (s = *pstring; *s; s++)
983 /* Like cmd_file, but strips trailing '/' characters. */
985 cmd_directory (const char *com, const char *val, void *place)
989 /* Call cmd_file() for tilde expansion and separator
990 canonicalization (backslash -> slash under Windows). These
991 things should perhaps be in a separate function. */
992 if (!cmd_file (com, val, place))
997 while (t > s && *--t == '/')
1003 /* Split VAL by space to a vector of values, and append those values
1004 to vector pointed to by the PLACE argument. If VAL is empty, the
1005 PLACE vector is cleared instead. */
1008 cmd_vector (const char *com, const char *val, void *place)
1010 char ***pvec = (char ***)place;
1013 *pvec = merge_vecs (*pvec, sepstring (val));
1023 cmd_directory_vector (const char *com, const char *val, void *place)
1025 char ***pvec = (char ***)place;
1029 /* Strip the trailing slashes from directories. */
1032 seps = sepstring (val);
1033 for (t = seps; t && *t; t++)
1035 int len = strlen (*t);
1036 /* Skip degenerate case of root directory. */
1039 if ((*t)[len - 1] == '/')
1040 (*t)[len - 1] = '\0';
1043 *pvec = merge_vecs (*pvec, seps);
1053 /* Engine for cmd_bytes and cmd_bytes_sum: converts a string such as
1054 "100k" or "2.5G" to a floating point number. */
1057 parse_bytes_helper (const char *val, double *result)
1059 double number, mult;
1060 const char *end = val + strlen (val);
1062 /* Check for "inf". */
1063 if (0 == strcmp (val, "inf"))
1069 /* Strip trailing whitespace. */
1070 while (val < end && c_isspace (end[-1]))
1075 switch (c_tolower (end[-1]))
1078 --end, mult = 1024.0;
1081 --end, mult = 1048576.0;
1084 --end, mult = 1073741824.0;
1087 --end, mult = 1099511627776.0;
1090 /* Not a recognized suffix: assume it's a digit. (If not,
1091 simple_atof will raise an error.) */
1095 /* Skip leading and trailing whitespace. */
1096 while (val < end && c_isspace (*val))
1098 while (val < end && c_isspace (end[-1]))
1103 if (!simple_atof (val, end, &number) || number < 0)
1106 *result = number * mult;
1110 /* Parse VAL as a number and set its value to PLACE (which should
1113 By default, the value is assumed to be in bytes. If "K", "M", or
1114 "G" are appended, the value is multiplied with 1<<10, 1<<20, or
1115 1<<30, respectively. Floating point values are allowed and are
1116 cast to integer before use. The idea is to be able to use things
1117 like 1.5k instead of "1536".
1119 The string "inf" is returned as 0.
1121 In case of error, false is returned and memory pointed to by PLACE
1122 remains unmodified. */
1125 cmd_bytes (const char *com, const char *val, void *place)
1128 if (!parse_bytes_helper (val, &byte_value))
1130 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1131 exec_name, com, quote (val));
1134 *(wgint *)place = (wgint)byte_value;
1138 /* Like cmd_bytes, but PLACE is interpreted as a pointer to
1139 SIZE_SUM. It works by converting the string to double, therefore
1140 working with values up to 2^53-1 without loss of precision. This
1141 value (8192 TB) is large enough to serve for a while. */
1144 cmd_bytes_sum (const char *com, const char *val, void *place)
1147 if (!parse_bytes_helper (val, &byte_value))
1149 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1150 exec_name, com, quote (val));
1153 *(SUM_SIZE_INT *) place = (SUM_SIZE_INT) byte_value;
1157 /* Store the value of VAL to *OUT. The value is a time period, by
1158 default expressed in seconds, but also accepting suffixes "m", "h",
1159 "d", and "w" for minutes, hours, days, and weeks respectively. */
1162 cmd_time (const char *com, const char *val, void *place)
1164 double number, mult;
1165 const char *end = val + strlen (val);
1167 /* Strip trailing whitespace. */
1168 while (val < end && c_isspace (end[-1]))
1174 fprintf (stderr, _("%s: %s: Invalid time period %s\n"),
1175 exec_name, com, quote (val));
1179 switch (c_tolower (end[-1]))
1182 --end, mult = 1; /* seconds */
1185 --end, mult = 60; /* minutes */
1188 --end, mult = 3600; /* hours */
1191 --end, mult = 86400.0; /* days */
1194 --end, mult = 604800.0; /* weeks */
1197 /* Not a recognized suffix: assume it belongs to the number.
1198 (If not, simple_atof will raise an error.) */
1202 /* Skip leading and trailing whitespace. */
1203 while (val < end && c_isspace (*val))
1205 while (val < end && c_isspace (end[-1]))
1210 if (!simple_atof (val, end, &number))
1213 *(double *)place = number * mult;
1219 cmd_cert_type (const char *com, const char *val, void *place)
1221 static const struct decode_item choices[] = {
1222 { "pem", keyfile_pem },
1223 { "der", keyfile_asn1 },
1224 { "asn1", keyfile_asn1 },
1226 int ok = decode_string (val, choices, countof (choices), place);
1228 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1233 /* Specialized helper functions, used by `commands' to handle some
1234 options specially. */
1236 static bool check_user_specified_header (const char *);
1239 cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored)
1241 if (!cmd_boolean (com, val, &opt.dirstruct))
1243 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
1244 must be affected inversely. */
1246 opt.no_dirstruct = false;
1248 opt.no_dirstruct = true;
1253 cmd_spec_header (const char *com, const char *val, void *place_ignored)
1255 /* Empty value means reset the list of headers. */
1258 free_vec (opt.user_headers);
1259 opt.user_headers = NULL;
1263 if (!check_user_specified_header (val))
1265 fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
1266 exec_name, com, quote (val));
1269 opt.user_headers = vec_append (opt.user_headers, val);
1274 cmd_spec_warc_header (const char *com, const char *val, void *place_ignored)
1276 /* Empty value means reset the list of headers. */
1279 free_vec (opt.warc_user_headers);
1280 opt.warc_user_headers = NULL;
1284 if (!check_user_specified_header (val))
1286 fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"),
1287 exec_name, com, quote (val));
1290 opt.warc_user_headers = vec_append (opt.warc_user_headers, val);
1295 cmd_spec_htmlify (const char *com, const char *val, void *place_ignored)
1297 int flag = cmd_boolean (com, val, &opt.htmlify);
1298 if (flag && !opt.htmlify)
1299 opt.remove_listing = false;
1303 /* Set the "mirror" mode. It means: recursive download, timestamping,
1304 no limit on max. recursion depth, and don't remove listings. */
1307 cmd_spec_mirror (const char *com, const char *val, void *place_ignored)
1311 if (!cmd_boolean (com, val, &mirror))
1315 opt.recursive = true;
1316 if (!opt.no_dirstruct)
1317 opt.dirstruct = true;
1318 opt.timestamping = true;
1319 opt.reclevel = INFINITE_RECURSION;
1320 opt.remove_listing = false;
1325 /* Validate --prefer-family and set the choice. Allowed values are
1326 "IPv4", "IPv6", and "none". */
1329 cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored)
1331 static const struct decode_item choices[] = {
1332 { "IPv4", prefer_ipv4 },
1333 { "IPv6", prefer_ipv6 },
1334 { "none", prefer_none },
1336 int prefer_family = prefer_none;
1337 int ok = decode_string (val, choices, countof (choices), &prefer_family);
1339 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1340 opt.prefer_family = prefer_family;
1344 /* Set progress.type to VAL, but verify that it's a valid progress
1345 implementation before that. */
1348 cmd_spec_progress (const char *com, const char *val, void *place_ignored)
1350 if (!valid_progress_implementation_p (val))
1352 fprintf (stderr, _("%s: %s: Invalid progress type %s.\n"),
1353 exec_name, com, quote (val));
1356 xfree_null (opt.progress_type);
1358 /* Don't call set_progress_implementation here. It will be called
1359 in main() when it becomes clear what the log output is. */
1360 opt.progress_type = xstrdup (val);
1364 /* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is
1365 set to true, also set opt.dirstruct to true, unless opt.no_dirstruct
1369 cmd_spec_recursive (const char *com, const char *val, void *place_ignored)
1371 if (!cmd_boolean (com, val, &opt.recursive))
1375 if (opt.recursive && !opt.no_dirstruct)
1376 opt.dirstruct = true;
1381 /* Validate --regex-type and set the choice. */
1384 cmd_spec_regex_type (const char *com, const char *val, void *place_ignored)
1386 static const struct decode_item choices[] = {
1387 { "posix", regex_type_posix },
1389 { "pcre", regex_type_pcre },
1392 int regex_type = regex_type_posix;
1393 int ok = decode_string (val, choices, countof (choices), ®ex_type);
1395 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1396 opt.regex_type = regex_type;
1401 cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored)
1403 int restrict_os = opt.restrict_files_os;
1404 int restrict_ctrl = opt.restrict_files_ctrl;
1405 int restrict_case = opt.restrict_files_case;
1406 int restrict_nonascii = opt.restrict_files_nonascii;
1410 #define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal)
1414 end = strchr (val, ',');
1416 end = val + strlen (val);
1418 if (VAL_IS ("unix"))
1419 restrict_os = restrict_unix;
1420 else if (VAL_IS ("windows"))
1421 restrict_os = restrict_windows;
1422 else if (VAL_IS ("lowercase"))
1423 restrict_case = restrict_lowercase;
1424 else if (VAL_IS ("uppercase"))
1425 restrict_case = restrict_uppercase;
1426 else if (VAL_IS ("nocontrol"))
1427 restrict_ctrl = false;
1428 else if (VAL_IS ("ascii"))
1429 restrict_nonascii = true;
1432 fprintf (stderr, _("\
1433 %s: %s: Invalid restriction %s,\n\
1434 use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"),
1435 exec_name, com, quote (val));
1442 while (*val && *end);
1446 opt.restrict_files_os = restrict_os;
1447 opt.restrict_files_ctrl = restrict_ctrl;
1448 opt.restrict_files_case = restrict_case;
1449 opt.restrict_files_nonascii = restrict_nonascii;
1456 cmd_spec_secure_protocol (const char *com, const char *val, void *place)
1458 static const struct decode_item choices[] = {
1459 { "auto", secure_protocol_auto },
1460 { "sslv2", secure_protocol_sslv2 },
1461 { "sslv3", secure_protocol_sslv3 },
1462 { "tlsv1", secure_protocol_tlsv1 },
1464 int ok = decode_string (val, choices, countof (choices), place);
1466 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1471 /* Set all three timeout values. */
1474 cmd_spec_timeout (const char *com, const char *val, void *place_ignored)
1477 if (!cmd_time (com, val, &value))
1479 opt.read_timeout = value;
1480 opt.connect_timeout = value;
1481 opt.dns_timeout = value;
1486 cmd_spec_useragent (const char *com, const char *val, void *place_ignored)
1488 /* Disallow embedded newlines. */
1489 if (strchr (val, '\n'))
1491 fprintf (stderr, _("%s: %s: Invalid value %s.\n"),
1492 exec_name, com, quote (val));
1495 xfree_null (opt.useragent);
1496 opt.useragent = xstrdup (val);
1500 /* The "verbose" option cannot be cmd_boolean because the variable is
1501 not bool -- it's of type int (-1 means uninitialized because of
1502 some random hackery for disallowing -q -v). */
1505 cmd_spec_verbose (const char *com, const char *val, void *place_ignored)
1508 if (cmd_boolean (com, val, &flag))
1516 /* Miscellaneous useful routines. */
1518 /* A very simple atoi clone, more useful than atoi because it works on
1519 delimited strings, and has error reportage. Returns true on success,
1520 false on failure. If successful, stores result to *DEST. */
1523 simple_atoi (const char *beg, const char *end, int *dest)
1526 bool negative = false;
1527 const char *p = beg;
1529 while (p < end && c_isspace (*p))
1531 if (p < end && (*p == '-' || *p == '+'))
1533 negative = (*p == '-');
1539 /* Read negative numbers in a separate loop because the most
1540 negative integer cannot be represented as a positive number. */
1543 for (; p < end && c_isdigit (*p); p++)
1545 int next = (10 * result) + (*p - '0');
1547 return false; /* overflow */
1551 for (; p < end && c_isdigit (*p); p++)
1553 int next = (10 * result) - (*p - '0');
1555 return false; /* underflow */
1566 /* Trivial atof, with error reporting. Handles "<digits>[.<digits>]",
1567 doesn't handle exponential notation. Returns true on success,
1568 false on failure. In case of success, stores its result to
1572 simple_atof (const char *beg, const char *end, double *dest)
1576 bool negative = false;
1577 bool seen_dot = false;
1578 bool seen_digit = false;
1581 const char *p = beg;
1583 while (p < end && c_isspace (*p))
1585 if (p < end && (*p == '-' || *p == '+'))
1587 negative = (*p == '-');
1591 for (; p < end; p++)
1597 result = (10 * result) + (ch - '0');
1599 result += (ch - '0') / (divider *= 10);
1621 /* Verify that the user-specified header in S is valid. It must
1622 contain a colon preceded by non-white-space characters and must not
1623 contain newlines. */
1626 check_user_specified_header (const char *s)
1630 for (p = s; *p && *p != ':' && !c_isspace (*p); p++)
1632 /* The header MUST contain `:' preceded by at least one
1633 non-whitespace character. */
1634 if (*p != ':' || p == s)
1636 /* The header MUST NOT contain newlines. */
1637 if (strchr (s, '\n'))
1642 /* Decode VAL into a number, according to ITEMS. */
1645 decode_string (const char *val, const struct decode_item *items, int itemcount,
1649 for (i = 0; i < itemcount; i++)
1650 if (0 == strcasecmp (val, items[i].name))
1652 *place = items[i].code;
1659 void cleanup_html_url (void);
1662 /* Free the memory allocated by global variables. */
1666 /* Free external resources, close files, etc. */
1669 fclose (output_stream);
1670 /* No need to check for error because Wget flushes its output (and
1671 checks for errors) after any data arrives. */
1673 /* We're exiting anyway so there's no real need to call free()
1674 hundreds of times. Skipping the frees will make Wget exit
1677 However, when detecting leaks, it's crucial to free() everything
1678 because then you can find the real leaks, i.e. the allocated
1679 memory which grows with the size of the program. */
1685 cleanup_html_url ();
1690 extern acc_t *netrc_list;
1691 free_netrc (netrc_list);
1693 xfree_null (opt.choose_config);
1694 xfree_null (opt.lfilename);
1695 xfree_null (opt.dir_prefix);
1696 xfree_null (opt.input_filename);
1697 xfree_null (opt.output_document);
1698 free_vec (opt.accepts);
1699 free_vec (opt.rejects);
1700 free_vec (opt.excludes);
1701 free_vec (opt.includes);
1702 free_vec (opt.domains);
1703 free_vec (opt.follow_tags);
1704 free_vec (opt.ignore_tags);
1705 xfree_null (opt.progress_type);
1706 xfree_null (opt.ftp_user);
1707 xfree_null (opt.ftp_passwd);
1708 xfree_null (opt.ftp_proxy);
1709 xfree_null (opt.https_proxy);
1710 xfree_null (opt.http_proxy);
1711 free_vec (opt.no_proxy);
1712 xfree_null (opt.useragent);
1713 xfree_null (opt.referer);
1714 xfree_null (opt.http_user);
1715 xfree_null (opt.http_passwd);
1716 free_vec (opt.user_headers);
1717 free_vec (opt.warc_user_headers);
1719 xfree_null (opt.cert_file);
1720 xfree_null (opt.private_key);
1721 xfree_null (opt.ca_directory);
1722 xfree_null (opt.ca_cert);
1723 xfree_null (opt.random_file);
1724 xfree_null (opt.egd_file);
1726 xfree_null (opt.bind_address);
1727 xfree_null (opt.cookies_input);
1728 xfree_null (opt.cookies_output);
1729 xfree_null (opt.user);
1730 xfree_null (opt.passwd);
1731 xfree_null (opt.base_href);
1733 #endif /* DEBUG_MALLOC */
1736 /* Unit testing routines. */
1741 test_commands_sorted()
1743 int prev_idx = 0, next_idx = 1;
1744 int command_count = countof (commands) - 1;
1746 while (next_idx <= command_count)
1748 cmp = strcasecmp (commands[prev_idx].name, commands[next_idx].name);
1751 mu_assert ("FAILED", false);
1764 test_cmd_spec_restrict_file_names()
1769 int expected_restrict_files_os;
1770 int expected_restrict_files_ctrl;
1771 int expected_restrict_files_case;
1774 { "windows", restrict_windows, true, restrict_no_case_restriction, true },
1775 { "windows,", restrict_windows, true, restrict_no_case_restriction, true },
1776 { "windows,lowercase", restrict_windows, true, restrict_lowercase, true },
1777 { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true },
1780 for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
1785 res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL);
1788 fprintf (stderr, "test_cmd_spec_restrict_file_names: TEST %d\n", i); fflush (stderr);
1789 fprintf (stderr, "opt.restrict_files_os: %d\n", opt.restrict_files_os); fflush (stderr);
1790 fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr);
1791 fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr);
1793 mu_assert ("test_cmd_spec_restrict_file_names: wrong result",
1794 res == test_array[i].result
1795 && opt.restrict_files_os == test_array[i].expected_restrict_files_os
1796 && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl
1797 && opt.restrict_files_case == test_array[i].expected_restrict_files_case);
1803 #endif /* TESTING */