1 /* Reading/parsing the initialization file.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
6 This file is part of GNU Wget.
8 GNU Wget is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 GNU Wget is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with Wget. If not, see <http://www.gnu.org/licenses/>.
21 Additional permission under GNU GPL version 3 section 7
23 If you modify this program, or any covered work, by linking or
24 combining it with the OpenSSL project's OpenSSL library (or a
25 modified version of that library), containing parts covered by the
26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
27 grants you additional permission to convey the resulting work.
28 Corresponding Source for a non-source form of such a combination
29 shall include the source code for the parts of OpenSSL used as well
30 as that of the covered work. */
42 /* not all systems provide PATH_MAX in limits.h */
44 # include <sys/param.h>
46 # define PATH_MAX MAXPATHLEN
65 #include "recur.h" /* for INFINITE_RECURSION */
66 #include "convert.h" /* for convert_cleanup */
67 #include "res.h" /* for res_cleanup */
68 #include "http.h" /* for http_cleanup */
69 #include "retr.h" /* for output_stream */
70 #include "warc.h" /* for warc_close */
71 #include "spider.h" /* for spider_cleanup */
79 #define CMD_DECLARE(func) static bool func (const char *, const char *, void *)
81 CMD_DECLARE (cmd_boolean);
82 CMD_DECLARE (cmd_bytes);
83 CMD_DECLARE (cmd_bytes_sum);
85 CMD_DECLARE (cmd_cert_type);
87 CMD_DECLARE (cmd_directory_vector);
88 CMD_DECLARE (cmd_number);
89 CMD_DECLARE (cmd_number_inf);
90 CMD_DECLARE (cmd_string);
91 CMD_DECLARE (cmd_string_uppercase);
92 CMD_DECLARE (cmd_file);
93 CMD_DECLARE (cmd_directory);
94 CMD_DECLARE (cmd_time);
95 CMD_DECLARE (cmd_vector);
97 CMD_DECLARE (cmd_spec_dirstruct);
98 CMD_DECLARE (cmd_spec_header);
99 CMD_DECLARE (cmd_spec_warc_header);
100 CMD_DECLARE (cmd_spec_htmlify);
101 CMD_DECLARE (cmd_spec_mirror);
102 CMD_DECLARE (cmd_spec_prefer_family);
103 CMD_DECLARE (cmd_spec_progress);
104 CMD_DECLARE (cmd_spec_recursive);
105 CMD_DECLARE (cmd_spec_regex_type);
106 CMD_DECLARE (cmd_spec_restrict_file_names);
107 CMD_DECLARE (cmd_spec_report_speed);
109 CMD_DECLARE (cmd_spec_secure_protocol);
111 CMD_DECLARE (cmd_spec_timeout);
112 CMD_DECLARE (cmd_spec_useragent);
113 CMD_DECLARE (cmd_spec_verbose);
115 /* List of recognized commands, each consisting of name, place and
116 function. When adding a new command, simply add it to the list,
117 but be sure to keep the list sorted alphabetically, as
118 command_by_name's binary search depends on it. Also, be sure to
119 add any entries that allocate memory (e.g. cmd_string and
120 cmd_vector) to the cleanup() function below. */
122 static const struct {
125 bool (*action) (const char *, const char *, void *);
127 /* KEEP THIS LIST ALPHABETICALLY SORTED */
128 { "accept", &opt.accepts, cmd_vector },
129 { "acceptregex", &opt.acceptregex_s, cmd_string },
130 { "addhostdir", &opt.add_hostdir, cmd_boolean },
131 { "adjustextension", &opt.adjust_extension, cmd_boolean },
132 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
133 { "askpassword", &opt.ask_passwd, cmd_boolean },
134 { "authnochallenge", &opt.auth_without_challenge,
136 { "background", &opt.background, cmd_boolean },
137 { "backupconverted", &opt.backup_converted, cmd_boolean },
138 { "backups", &opt.backups, cmd_number },
139 { "base", &opt.base_href, cmd_string },
140 { "bindaddress", &opt.bind_address, cmd_string },
141 { "bodydata", &opt.body_data, cmd_string },
142 { "bodyfile", &opt.body_file, cmd_string },
144 { "cacertificate", &opt.ca_cert, cmd_file },
146 { "cache", &opt.allow_cache, cmd_boolean },
148 { "cadirectory", &opt.ca_directory, cmd_directory },
149 { "certificate", &opt.cert_file, cmd_file },
150 { "certificatetype", &opt.cert_type, cmd_cert_type },
151 { "checkcertificate", &opt.check_cert, cmd_boolean },
153 { "chooseconfig", &opt.choose_config, cmd_file },
154 { "connecttimeout", &opt.connect_timeout, cmd_time },
155 { "contentdisposition", &opt.content_disposition, cmd_boolean },
156 { "contentonerror", &opt.content_on_error, cmd_boolean },
157 { "continue", &opt.always_rest, cmd_boolean },
158 { "convertlinks", &opt.convert_links, cmd_boolean },
159 { "cookies", &opt.cookies, cmd_boolean },
160 { "cutdirs", &opt.cut_dirs, cmd_number },
161 { "debug", &opt.debug, cmd_boolean },
162 { "defaultpage", &opt.default_page, cmd_string },
163 { "deleteafter", &opt.delete_after, cmd_boolean },
164 { "dirprefix", &opt.dir_prefix, cmd_directory },
165 { "dirstruct", NULL, cmd_spec_dirstruct },
166 { "dnscache", &opt.dns_cache, cmd_boolean },
167 { "dnstimeout", &opt.dns_timeout, cmd_time },
168 { "domains", &opt.domains, cmd_vector },
169 { "dotbytes", &opt.dot_bytes, cmd_bytes },
170 { "dotsinline", &opt.dots_in_line, cmd_number },
171 { "dotspacing", &opt.dot_spacing, cmd_number },
172 { "dotstyle", &opt.dot_style, cmd_string }, /* deprecated */
174 { "egdfile", &opt.egd_file, cmd_file },
176 { "excludedirectories", &opt.excludes, cmd_directory_vector },
177 { "excludedomains", &opt.exclude_domains, cmd_vector },
178 { "followftp", &opt.follow_ftp, cmd_boolean },
179 { "followtags", &opt.follow_tags, cmd_vector },
180 { "forcehtml", &opt.force_html, cmd_boolean },
181 { "ftppasswd", &opt.ftp_passwd, cmd_string }, /* deprecated */
182 { "ftppassword", &opt.ftp_passwd, cmd_string },
183 { "ftpproxy", &opt.ftp_proxy, cmd_string },
185 { "ftpstmlf", &opt.ftp_stmlf, cmd_boolean },
186 #endif /* def __VMS */
187 { "ftpuser", &opt.ftp_user, cmd_string },
188 { "glob", &opt.ftp_glob, cmd_boolean },
189 { "header", NULL, cmd_spec_header },
190 { "htmlextension", &opt.adjust_extension, cmd_boolean }, /* deprecated */
191 { "htmlify", NULL, cmd_spec_htmlify },
192 { "httpkeepalive", &opt.http_keep_alive, cmd_boolean },
193 { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
194 { "httppassword", &opt.http_passwd, cmd_string },
195 { "httpproxy", &opt.http_proxy, cmd_string },
197 { "httpsonly", &opt.https_only, cmd_boolean },
199 { "httpsproxy", &opt.https_proxy, cmd_string },
200 { "httpuser", &opt.http_user, cmd_string },
201 { "ignorecase", &opt.ignore_case, cmd_boolean },
202 { "ignorelength", &opt.ignore_length, cmd_boolean },
203 { "ignoretags", &opt.ignore_tags, cmd_vector },
204 { "includedirectories", &opt.includes, cmd_directory_vector },
206 { "inet4only", &opt.ipv4_only, cmd_boolean },
207 { "inet6only", &opt.ipv6_only, cmd_boolean },
209 { "input", &opt.input_filename, cmd_file },
210 { "iri", &opt.enable_iri, cmd_boolean },
211 { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
212 { "limitrate", &opt.limit_rate, cmd_bytes },
213 { "loadcookies", &opt.cookies_input, cmd_file },
214 { "localencoding", &opt.locale, cmd_string },
215 { "logfile", &opt.lfilename, cmd_file },
216 { "login", &opt.ftp_user, cmd_string },/* deprecated*/
217 { "maxredirect", &opt.max_redirect, cmd_number },
218 { "method", &opt.method, cmd_string_uppercase },
219 { "mirror", NULL, cmd_spec_mirror },
220 { "netrc", &opt.netrc, cmd_boolean },
221 { "noclobber", &opt.noclobber, cmd_boolean },
222 { "noconfig", &opt.noconfig, cmd_boolean },
223 { "noparent", &opt.no_parent, cmd_boolean },
224 { "noproxy", &opt.no_proxy, cmd_vector },
225 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
226 { "outputdocument", &opt.output_document, cmd_file },
227 { "pagerequisites", &opt.page_requisites, cmd_boolean },
228 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
229 { "passwd", &opt.ftp_passwd, cmd_string },/* deprecated*/
230 { "password", &opt.passwd, cmd_string },
231 { "postdata", &opt.post_data, cmd_string },
232 { "postfile", &opt.post_file_name, cmd_file },
233 { "preferfamily", NULL, cmd_spec_prefer_family },
234 { "preservepermissions", &opt.preserve_perm, cmd_boolean },
236 { "privatekey", &opt.private_key, cmd_file },
237 { "privatekeytype", &opt.private_key_type, cmd_cert_type },
239 { "progress", &opt.progress_type, cmd_spec_progress },
240 { "protocoldirectories", &opt.protocol_directories, cmd_boolean },
241 { "proxypasswd", &opt.proxy_passwd, cmd_string }, /* deprecated */
242 { "proxypassword", &opt.proxy_passwd, cmd_string },
243 { "proxyuser", &opt.proxy_user, cmd_string },
244 { "quiet", &opt.quiet, cmd_boolean },
245 { "quota", &opt.quota, cmd_bytes_sum },
247 { "randomfile", &opt.random_file, cmd_file },
249 { "randomwait", &opt.random_wait, cmd_boolean },
250 { "readtimeout", &opt.read_timeout, cmd_time },
251 { "reclevel", &opt.reclevel, cmd_number_inf },
252 { "recursive", NULL, cmd_spec_recursive },
253 { "referer", &opt.referer, cmd_string },
254 { "regextype", &opt.regex_type, cmd_spec_regex_type },
255 { "reject", &opt.rejects, cmd_vector },
256 { "rejectregex", &opt.rejectregex_s, cmd_string },
257 { "relativeonly", &opt.relative_only, cmd_boolean },
258 { "remoteencoding", &opt.encoding_remote, cmd_string },
259 { "removelisting", &opt.remove_listing, cmd_boolean },
260 { "reportspeed", &opt.report_bps, cmd_spec_report_speed},
261 { "restrictfilenames", NULL, cmd_spec_restrict_file_names },
262 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
263 { "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
264 { "robots", &opt.use_robots, cmd_boolean },
265 { "savecookies", &opt.cookies_output, cmd_file },
266 { "saveheaders", &opt.save_headers, cmd_boolean },
268 { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol },
270 { "serverresponse", &opt.server_response, cmd_boolean },
271 { "showalldnsentries", &opt.show_all_dns_entries, cmd_boolean },
272 { "showprogress", &opt.show_progress, cmd_boolean },
273 { "spanhosts", &opt.spanhost, cmd_boolean },
274 { "spider", &opt.spider, cmd_boolean },
275 { "startpos", &opt.start_pos, cmd_bytes },
276 { "strictcomments", &opt.strict_comments, cmd_boolean },
277 { "timeout", NULL, cmd_spec_timeout },
278 { "timestamping", &opt.timestamping, cmd_boolean },
279 { "tries", &opt.ntry, cmd_number_inf },
280 { "trustservernames", &opt.trustservernames, cmd_boolean },
281 { "unlink", &opt.unlink, cmd_boolean },
282 { "useproxy", &opt.use_proxy, cmd_boolean },
283 { "user", &opt.user, cmd_string },
284 { "useragent", NULL, cmd_spec_useragent },
285 { "useservertimestamps", &opt.useservertimestamps, cmd_boolean },
286 { "verbose", NULL, cmd_spec_verbose },
287 { "wait", &opt.wait, cmd_time },
288 { "waitretry", &opt.waitretry, cmd_time },
289 { "warccdx", &opt.warc_cdx_enabled, cmd_boolean },
290 { "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file },
292 { "warccompression", &opt.warc_compression_enabled, cmd_boolean },
294 { "warcdigests", &opt.warc_digests_enabled, cmd_boolean },
295 { "warcfile", &opt.warc_filename, cmd_file },
296 { "warcheader", NULL, cmd_spec_warc_header },
297 { "warckeeplog", &opt.warc_keep_log, cmd_boolean },
298 { "warcmaxsize", &opt.warc_maxsize, cmd_bytes },
299 { "warctempdir", &opt.warc_tempdir, cmd_directory },
301 { "wdebug", &opt.wdebug, cmd_boolean },
305 /* Look up CMDNAME in the commands[] and return its position in the
306 array. If CMDNAME is not found, return -1. */
309 command_by_name (const char *cmdname)
311 /* Use binary search for speed. Wget has ~100 commands, which
312 guarantees a worst case performance of 7 string comparisons. */
313 int lo = 0, hi = countof (commands) - 1;
317 int mid = (lo + hi) >> 1;
318 int cmp = strcasecmp (cmdname, commands[mid].name);
329 /* Reset the variables to default values. */
335 /* Most of the default values are 0 (and 0.0, NULL, and false).
336 Just reset everything, and fill in the non-zero values. Note
337 that initializing pointers to NULL this way is technically
338 illegal, but porting Wget to a machine where NULL is not all-zero
339 bit pattern will be the least of the implementors' worries. */
346 opt.add_hostdir = true;
350 opt.http_keep_alive = true;
351 opt.use_proxy = true;
352 tmp = getenv ("no_proxy");
354 opt.no_proxy = sepstring (tmp);
355 opt.prefer_family = prefer_none;
356 opt.allow_cache = true;
358 opt.read_timeout = 900;
359 opt.use_robots = true;
361 opt.remove_listing = true;
363 opt.dot_bytes = 1024;
364 opt.dot_spacing = 10;
365 opt.dots_in_line = 50;
367 opt.dns_cache = true;
371 opt.check_cert = true;
374 /* The default for file name restriction defaults to the OS type. */
375 #if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__)
376 opt.restrict_files_os = restrict_windows;
378 opt.restrict_files_os = restrict_unix;
380 opt.restrict_files_ctrl = true;
381 opt.restrict_files_nonascii = false;
382 opt.restrict_files_case = restrict_no_case_restriction;
384 opt.regex_type = regex_type_posix;
386 opt.max_redirect = 20;
391 opt.enable_iri = true;
393 opt.enable_iri = false;
396 opt.encoding_remote = NULL;
398 opt.useservertimestamps = true;
399 opt.show_all_dns_entries = false;
401 opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */
403 opt.warc_compression_enabled = true;
405 opt.warc_compression_enabled = false;
407 opt.warc_digests_enabled = true;
408 opt.warc_cdx_enabled = false;
409 opt.warc_cdx_dedup_filename = NULL;
410 opt.warc_tempdir = NULL;
411 opt.warc_keep_log = true;
413 /* Use a negative value to mark the absence of --start-pos option */
415 opt.show_progress = false;
418 /* Return the user's home directory (strdup-ed), or NULL if none is
423 static char *buf = NULL;
424 static char *home, *ret;
428 home = getenv ("HOME");
434 /* Under MSDOS, if $HOME isn't defined, use the directory where
435 `wget.exe' resides. */
436 const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */
439 buff = _w32_get_argv0 ();
441 p = strrchr (buf, '/'); /* djgpp */
443 p = strrchr (buf, '\\'); /* others */
447 buff = malloc (len + 1);
451 strncpy (buff, _w32_get_argv0 (), len);
455 #elif !defined(WINDOWS)
456 /* If HOME is not defined, try getting it from the password
458 struct passwd *pwd = getpwuid (getuid ());
459 if (!pwd || !pwd->pw_dir)
463 /* Under Windows, if $HOME isn't defined, use the directory where
464 `wget.exe' resides. */
470 ret = home ? xstrdup (home) : NULL;
476 /* Check the 'WGETRC' environment variable and return the file name
477 if 'WGETRC' is set and is a valid file.
478 If the `WGETRC' variable exists but the file does not exist, the
479 function will exit(). */
481 wgetrc_env_file_name (void)
483 char *env = getenv ("WGETRC");
486 if (!file_exists_p (env))
488 fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"),
492 return xstrdup (env);
497 /* Check for the existance of '$HOME/.wgetrc' and return its path
498 if it exists and is set. */
500 wgetrc_user_file_name (void)
504 /* If that failed, try $HOME/.wgetrc (or equivalent). */
507 file = "SYS$LOGIN:.wgetrc";
508 #else /* def __VMS */
511 file = aprintf ("%s/.wgetrc", home);
513 #endif /* def __VMS [else] */
517 if (!file_exists_p (file))
525 /* Return the path to the user's .wgetrc. This is either the value of
526 `WGETRC' environment variable, or `$HOME/.wgetrc'.
528 Additionally, for windows, look in the directory where wget.exe
531 wgetrc_file_name (void)
533 char *file = wgetrc_env_file_name ();
537 file = wgetrc_user_file_name ();
540 /* Under Windows, if we still haven't found .wgetrc, look for the file
541 `wget.ini' in the directory where `wget.exe' resides; we do this for
542 backward compatibility with previous versions of Wget.
543 SYSTEM_WGETRC should not be defined under WINDOWS. */
546 char *home = home_dir ();
552 file = aprintf ("%s/wget.ini", home);
553 if (!file_exists_p (file))
566 /* Return values of parse_line. */
574 static enum parse_line parse_line (const char *, char **, char **, int *);
575 static bool setval_internal (int, const char *, const char *);
576 static bool setval_internal_tilde (int, const char *, const char *);
578 /* Initialize variables from a wgetrc file. Returns zero (failure) if
579 there were errors in the file. */
582 run_wgetrc (const char *file)
590 fp = fopen (file, "r");
593 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
594 file, strerror (errno));
595 return true; /* not a fatal error */
598 while (getline (&line, &bufsize, fp) > 0)
600 char *com = NULL, *val = NULL;
603 /* Parse the line. */
604 switch (parse_line (line, &com, &val, &comind))
607 /* If everything is OK, set the value. */
608 if (!setval_internal_tilde (comind, com, val))
610 fprintf (stderr, _("%s: Error in %s at line %d.\n"),
611 exec_name, file, ln);
615 case line_syntax_error:
616 fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"),
617 exec_name, file, ln);
620 case line_unknown_command:
621 fprintf (stderr, _("%s: Unknown command %s in %s at line %d.\n"),
622 exec_name, quote (com), file, ln);
640 /* Initialize the defaults and run the system wgetrc and user's own
645 char *file, *env_sysrc;
648 /* Run a non-standard system rc file when the according environment
649 variable has been set. For internal testing purposes only! */
650 env_sysrc = getenv ("SYSTEM_WGETRC");
651 if (env_sysrc && file_exists_p (env_sysrc))
653 ok &= run_wgetrc (env_sysrc);
654 /* If there are any problems parsing the system wgetrc file, tell
658 fprintf (stderr, _("\
659 Parsing system wgetrc file (env SYSTEM_WGETRC) failed. Please check\n\
661 or specify a different file using --config.\n"), env_sysrc);
665 /* Otherwise, if SYSTEM_WGETRC is defined, use it. */
667 else if (file_exists_p (SYSTEM_WGETRC))
668 ok &= run_wgetrc (SYSTEM_WGETRC);
669 /* If there are any problems parsing the system wgetrc file, tell
673 fprintf (stderr, _("\
674 Parsing system wgetrc file failed. Please check\n\
676 or specify a different file using --config.\n"), SYSTEM_WGETRC);
680 /* Override it with your own, if one exists. */
681 file = wgetrc_file_name ();
684 /* #### We should canonicalize `file' and SYSTEM_WGETRC with
685 something like realpath() before comparing them with `strcmp' */
687 if (!strcmp (file, SYSTEM_WGETRC))
689 fprintf (stderr, _("\
690 %s: Warning: Both system and user wgetrc point to %s.\n"),
691 exec_name, quote (file));
695 ok &= run_wgetrc (file);
697 /* If there were errors processing either `.wgetrc', abort. */
705 /* Remove dashes and underscores from S, modifying S in the
711 char *t = s; /* t - tortoise */
712 char *h = s; /* h - hare */
714 if (*h == '_' || *h == '-')
721 /* Parse the line pointed by line, with the syntax:
722 <sp>* command <sp>* = <sp>* value <sp>*
723 Uses malloc to allocate space for command and value.
725 Returns one of line_ok, line_empty, line_syntax_error, or
726 line_unknown_command.
728 In case of line_ok, *COM and *VAL point to freshly allocated
729 strings, and *COMIND points to com's index. In case of error or
730 empty line, their values are unmodified. */
732 static enum parse_line
733 parse_line (const char *line, char **com, char **val, int *comind)
736 const char *end = line + strlen (line);
737 const char *cmdstart, *cmdend;
738 const char *valstart, *valend;
743 /* Skip leading and trailing whitespace. */
744 while (*line && c_isspace (*line))
746 while (end > line && c_isspace (end[-1]))
749 /* Skip empty lines and comments. */
750 if (!*line || *line == '#')
756 while (p < end && (c_isalnum (*p) || *p == '_' || *p == '-'))
760 /* Skip '=', as well as any space before or after it. */
761 while (p < end && c_isspace (*p))
763 if (p == end || *p != '=')
764 return line_syntax_error;
766 while (p < end && c_isspace (*p))
772 /* The syntax is valid (even though the command might not be). Fill
773 in the command name and value. */
774 *com = strdupdelim (cmdstart, cmdend);
775 *val = strdupdelim (valstart, valend);
777 /* The line now known to be syntactically correct. Check whether
778 the command is valid. */
779 BOUNDED_TO_ALLOCA (cmdstart, cmdend, cmdcopy);
781 ind = command_by_name (cmdcopy);
783 return line_unknown_command;
785 /* Report success to the caller. */
790 #if defined(WINDOWS) || defined(MSDOS)
791 # define ISSEP(c) ((c) == '/' || (c) == '\\')
793 # define ISSEP(c) ((c) == '/')
796 /* Run commands[comind].action. */
799 setval_internal (int comind, const char *com, const char *val)
801 assert (0 <= comind && ((size_t) comind) < countof (commands));
802 DEBUGP (("Setting %s (%s) to %s\n", com, commands[comind].name, val));
803 return commands[comind].action (com, val, commands[comind].place);
807 setval_internal_tilde (int comind, const char *com, const char *val)
813 ret = setval_internal (comind, com, val);
815 /* We make tilde expansion for cmd_file and cmd_directory */
816 if (((commands[comind].action == cmd_file) ||
817 (commands[comind].action == cmd_directory))
818 && ret && (*val == '~' && ISSEP (val[1])))
820 pstring = commands[comind].place;
824 homelen = strlen (home);
825 while (homelen && ISSEP (home[homelen - 1]))
826 home[--homelen] = '\0';
828 /* Skip the leading "~/". */
829 for (++val; ISSEP (*val); val++)
831 *pstring = concat_strings (home, "/", val, (char *)0);
837 /* Run command COM with value VAL. If running the command produces an
838 error, report the error and exit.
840 This is intended to be called from main() to modify Wget's behavior
841 through command-line switches. Since COM is hard-coded in main(),
842 it is not canonicalized, and this aborts when COM is not found.
844 If COMIND's are exported to init.h, this function will be changed
845 to accept COMIND directly. */
848 setoptval (const char *com, const char *val, const char *optname)
850 /* Prepend "--" to OPTNAME. */
851 char *dd_optname = (char *) alloca (2 + strlen (optname) + 1);
854 strcpy (dd_optname + 2, optname);
856 assert (val != NULL);
857 if (!setval_internal (command_by_name (com), dd_optname, val))
861 /* Parse OPT into command and value and run it. For example,
862 run_command("foo=bar") is equivalent to setoptval("foo", "bar").
863 This is used by the `--execute' flag in main.c. */
866 run_command (const char *cmdopt)
870 switch (parse_line (cmdopt, &com, &val, &comind))
873 if (!setval_internal (comind, com, val))
879 fprintf (stderr, _("%s: Invalid --execute command %s\n"),
880 exec_name, quote (cmdopt));
885 /* Generic helper functions, for use with `commands'. */
887 /* Forward declarations: */
892 static bool decode_string (const char *, const struct decode_item *, int, int *);
893 static bool simple_atoi (const char *, const char *, int *);
894 static bool simple_atof (const char *, const char *, double *);
896 #define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0')
898 #define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \
899 && c_tolower((p)[1]) == (c1) \
902 #define CMP3(p, c0, c1, c2) (c_tolower((p)[0]) == (c0) \
903 && c_tolower((p)[1]) == (c1) \
904 && c_tolower((p)[2]) == (c2) \
908 /* Store the boolean value from VAL to PLACE. COM is ignored,
909 except for error messages. */
911 cmd_boolean (const char *com, const char *val, void *place)
915 if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1'))
916 /* "on", "yes" and "1" mean true. */
918 else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0'))
919 /* "off", "no" and "0" mean false. */
924 _("%s: %s: Invalid boolean %s; use `on' or `off'.\n"),
925 exec_name, com, quote (val));
929 *(bool *) place = value;
933 /* Set the non-negative integer value from VAL to PLACE. With
934 incorrect specification, the number remains unchanged. */
936 cmd_number (const char *com, const char *val, void *place)
938 if (!simple_atoi (val, val + strlen (val), place)
939 || *(int *) place < 0)
941 fprintf (stderr, _("%s: %s: Invalid number %s.\n"),
942 exec_name, com, quote (val));
948 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
950 cmd_number_inf (const char *com, const char *val, void *place)
952 if (!strcasecmp (val, "inf"))
957 return cmd_number (com, val, place);
960 /* Copy (strdup) the string at COM to a new location and place a
961 pointer to *PLACE. */
963 cmd_string (const char *com _GL_UNUSED, const char *val, void *place)
965 char **pstring = (char **)place;
967 xfree_null (*pstring);
968 *pstring = xstrdup (val);
972 /* Like cmd_string but ensure the string is upper case. */
974 cmd_string_uppercase (const char *com _GL_UNUSED, const char *val, void *place)
977 pstring = (char **)place;
978 xfree_null (*pstring);
980 *pstring = xmalloc (strlen (val) + 1);
982 for (q = *pstring; *val; val++, q++)
983 *q = c_toupper (*val);
990 /* Like cmd_string, but handles tilde-expansion when reading a user's
991 `.wgetrc'. In that case, and if VAL begins with `~', the tilde
992 gets expanded to the user's home directory. */
994 cmd_file (const char *com _GL_UNUSED, const char *val, void *place)
996 char **pstring = (char **)place;
998 xfree_null (*pstring);
1000 /* #### If VAL is empty, perhaps should set *PLACE to NULL. */
1002 *pstring = xstrdup (val);
1004 #if defined(WINDOWS) || defined(MSDOS)
1005 /* Convert "\" to "/". */
1008 for (s = *pstring; *s; s++)
1016 /* Like cmd_file, but strips trailing '/' characters. */
1018 cmd_directory (const char *com, const char *val, void *place)
1022 /* Call cmd_file() for tilde expansion and separator
1023 canonicalization (backslash -> slash under Windows). These
1024 things should perhaps be in a separate function. */
1025 if (!cmd_file (com, val, place))
1028 s = *(char **)place;
1030 while (t > s && *--t == '/')
1036 /* Split VAL by space to a vector of values, and append those values
1037 to vector pointed to by the PLACE argument. If VAL is empty, the
1038 PLACE vector is cleared instead. */
1041 cmd_vector (const char *com _GL_UNUSED, const char *val, void *place)
1043 char ***pvec = (char ***)place;
1046 *pvec = merge_vecs (*pvec, sepstring (val));
1056 cmd_directory_vector (const char *com _GL_UNUSED, const char *val, void *place)
1058 char ***pvec = (char ***)place;
1062 /* Strip the trailing slashes from directories. */
1065 seps = sepstring (val);
1066 for (t = seps; t && *t; t++)
1068 int len = strlen (*t);
1069 /* Skip degenerate case of root directory. */
1072 if ((*t)[len - 1] == '/')
1073 (*t)[len - 1] = '\0';
1076 *pvec = merge_vecs (*pvec, seps);
1086 /* Engine for cmd_bytes and cmd_bytes_sum: converts a string such as
1087 "100k" or "2.5G" to a floating point number. */
1090 parse_bytes_helper (const char *val, double *result)
1092 double number, mult;
1093 const char *end = val + strlen (val);
1095 /* Check for "inf". */
1096 if (0 == strcmp (val, "inf"))
1102 /* Strip trailing whitespace. */
1103 while (val < end && c_isspace (end[-1]))
1108 switch (c_tolower (end[-1]))
1111 --end, mult = 1024.0;
1114 --end, mult = 1048576.0;
1117 --end, mult = 1073741824.0;
1120 --end, mult = 1099511627776.0;
1123 /* Not a recognized suffix: assume it's a digit. (If not,
1124 simple_atof will raise an error.) */
1128 /* Skip leading and trailing whitespace. */
1129 while (val < end && c_isspace (*val))
1131 while (val < end && c_isspace (end[-1]))
1136 if (!simple_atof (val, end, &number) || number < 0)
1139 *result = number * mult;
1143 /* Parse VAL as a number and set its value to PLACE (which should
1146 By default, the value is assumed to be in bytes. If "K", "M", or
1147 "G" are appended, the value is multiplied with 1<<10, 1<<20, or
1148 1<<30, respectively. Floating point values are allowed and are
1149 cast to integer before use. The idea is to be able to use things
1150 like 1.5k instead of "1536".
1152 The string "inf" is returned as 0.
1154 In case of error, false is returned and memory pointed to by PLACE
1155 remains unmodified. */
1158 cmd_bytes (const char *com, const char *val, void *place)
1161 if (!parse_bytes_helper (val, &byte_value))
1163 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1164 exec_name, com, quote (val));
1167 *(wgint *)place = (wgint)byte_value;
1171 /* Like cmd_bytes, but PLACE is interpreted as a pointer to
1172 SIZE_SUM. It works by converting the string to double, therefore
1173 working with values up to 2^53-1 without loss of precision. This
1174 value (8192 TB) is large enough to serve for a while. */
1177 cmd_bytes_sum (const char *com, const char *val, void *place)
1180 if (!parse_bytes_helper (val, &byte_value))
1182 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1183 exec_name, com, quote (val));
1186 *(SUM_SIZE_INT *) place = (SUM_SIZE_INT) byte_value;
1190 /* Store the value of VAL to *OUT. The value is a time period, by
1191 default expressed in seconds, but also accepting suffixes "m", "h",
1192 "d", and "w" for minutes, hours, days, and weeks respectively. */
1195 cmd_time (const char *com, const char *val, void *place)
1197 double number, mult;
1198 const char *end = val + strlen (val);
1200 /* Strip trailing whitespace. */
1201 while (val < end && c_isspace (end[-1]))
1207 fprintf (stderr, _("%s: %s: Invalid time period %s\n"),
1208 exec_name, com, quote (val));
1212 switch (c_tolower (end[-1]))
1215 --end, mult = 1; /* seconds */
1218 --end, mult = 60; /* minutes */
1221 --end, mult = 3600; /* hours */
1224 --end, mult = 86400.0; /* days */
1227 --end, mult = 604800.0; /* weeks */
1230 /* Not a recognized suffix: assume it belongs to the number.
1231 (If not, simple_atof will raise an error.) */
1235 /* Skip leading and trailing whitespace. */
1236 while (val < end && c_isspace (*val))
1238 while (val < end && c_isspace (end[-1]))
1243 if (!simple_atof (val, end, &number))
1246 *(double *)place = number * mult;
1252 cmd_cert_type (const char *com, const char *val, void *place)
1254 static const struct decode_item choices[] = {
1255 { "pem", keyfile_pem },
1256 { "der", keyfile_asn1 },
1257 { "asn1", keyfile_asn1 },
1259 int ok = decode_string (val, choices, countof (choices), place);
1261 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1266 /* Specialized helper functions, used by `commands' to handle some
1267 options specially. */
1269 static bool check_user_specified_header (const char *);
1272 cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1274 if (!cmd_boolean (com, val, &opt.dirstruct))
1276 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
1277 must be affected inversely. */
1279 opt.no_dirstruct = false;
1281 opt.no_dirstruct = true;
1286 cmd_spec_header (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1288 /* Empty value means reset the list of headers. */
1291 free_vec (opt.user_headers);
1292 opt.user_headers = NULL;
1296 if (!check_user_specified_header (val))
1298 fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
1299 exec_name, com, quote (val));
1302 opt.user_headers = vec_append (opt.user_headers, val);
1307 cmd_spec_warc_header (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1309 /* Empty value means reset the list of headers. */
1312 free_vec (opt.warc_user_headers);
1313 opt.warc_user_headers = NULL;
1317 if (!check_user_specified_header (val))
1319 fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"),
1320 exec_name, com, quote (val));
1323 opt.warc_user_headers = vec_append (opt.warc_user_headers, val);
1328 cmd_spec_htmlify (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1330 int flag = cmd_boolean (com, val, &opt.htmlify);
1331 if (flag && !opt.htmlify)
1332 opt.remove_listing = false;
1336 /* Set the "mirror" mode. It means: recursive download, timestamping,
1337 no limit on max. recursion depth, and don't remove listings. */
1340 cmd_spec_mirror (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1344 if (!cmd_boolean (com, val, &mirror))
1348 opt.recursive = true;
1349 if (!opt.no_dirstruct)
1350 opt.dirstruct = true;
1351 opt.timestamping = true;
1352 opt.reclevel = INFINITE_RECURSION;
1353 opt.remove_listing = false;
1358 /* Validate --prefer-family and set the choice. Allowed values are
1359 "IPv4", "IPv6", and "none". */
1362 cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1364 static const struct decode_item choices[] = {
1365 { "IPv4", prefer_ipv4 },
1366 { "IPv6", prefer_ipv6 },
1367 { "none", prefer_none },
1369 int prefer_family = prefer_none;
1370 int ok = decode_string (val, choices, countof (choices), &prefer_family);
1372 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1373 opt.prefer_family = prefer_family;
1377 /* Set progress.type to VAL, but verify that it's a valid progress
1378 implementation before that. */
1381 cmd_spec_progress (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1383 if (!valid_progress_implementation_p (val))
1385 fprintf (stderr, _("%s: %s: Invalid progress type %s.\n"),
1386 exec_name, com, quote (val));
1389 xfree_null (opt.progress_type);
1391 /* Don't call set_progress_implementation here. It will be called
1392 in main() when it becomes clear what the log output is. */
1393 opt.progress_type = xstrdup (val);
1397 /* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is
1398 set to true, also set opt.dirstruct to true, unless opt.no_dirstruct
1402 cmd_spec_recursive (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1404 if (!cmd_boolean (com, val, &opt.recursive))
1408 if (opt.recursive && !opt.no_dirstruct)
1409 opt.dirstruct = true;
1414 /* Validate --regex-type and set the choice. */
1417 cmd_spec_regex_type (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1419 static const struct decode_item choices[] = {
1420 { "posix", regex_type_posix },
1422 { "pcre", regex_type_pcre },
1425 int regex_type = regex_type_posix;
1426 int ok = decode_string (val, choices, countof (choices), ®ex_type);
1428 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1429 opt.regex_type = regex_type;
1434 cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1436 int restrict_os = opt.restrict_files_os;
1437 int restrict_ctrl = opt.restrict_files_ctrl;
1438 int restrict_case = opt.restrict_files_case;
1439 int restrict_nonascii = opt.restrict_files_nonascii;
1443 #define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal)
1447 end = strchr (val, ',');
1449 end = val + strlen (val);
1451 if (VAL_IS ("unix"))
1452 restrict_os = restrict_unix;
1453 else if (VAL_IS ("windows"))
1454 restrict_os = restrict_windows;
1455 else if (VAL_IS ("lowercase"))
1456 restrict_case = restrict_lowercase;
1457 else if (VAL_IS ("uppercase"))
1458 restrict_case = restrict_uppercase;
1459 else if (VAL_IS ("nocontrol"))
1460 restrict_ctrl = false;
1461 else if (VAL_IS ("ascii"))
1462 restrict_nonascii = true;
1465 fprintf (stderr, _("\
1466 %s: %s: Invalid restriction %s,\n\
1467 use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"),
1468 exec_name, com, quote (val));
1475 while (*val && *end);
1479 opt.restrict_files_os = restrict_os;
1480 opt.restrict_files_ctrl = restrict_ctrl;
1481 opt.restrict_files_case = restrict_case;
1482 opt.restrict_files_nonascii = restrict_nonascii;
1488 cmd_spec_report_speed (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1490 opt.report_bps = strcasecmp (val, "bits") == 0;
1491 if (!opt.report_bps)
1492 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1493 return opt.report_bps;
1498 cmd_spec_secure_protocol (const char *com, const char *val, void *place)
1500 static const struct decode_item choices[] = {
1501 { "auto", secure_protocol_auto },
1502 { "sslv2", secure_protocol_sslv2 },
1503 { "sslv3", secure_protocol_sslv3 },
1504 { "tlsv1", secure_protocol_tlsv1 },
1505 { "pfs", secure_protocol_pfs },
1507 int ok = decode_string (val, choices, countof (choices), place);
1509 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1514 /* Set all three timeout values. */
1517 cmd_spec_timeout (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1520 if (!cmd_time (com, val, &value))
1522 opt.read_timeout = value;
1523 opt.connect_timeout = value;
1524 opt.dns_timeout = value;
1529 cmd_spec_useragent (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1531 /* Disallow embedded newlines. */
1532 if (strchr (val, '\n'))
1534 fprintf (stderr, _("%s: %s: Invalid value %s.\n"),
1535 exec_name, com, quote (val));
1538 xfree_null (opt.useragent);
1539 opt.useragent = xstrdup (val);
1543 /* The "verbose" option cannot be cmd_boolean because the variable is
1544 not bool -- it's of type int (-1 means uninitialized because of
1545 some random hackery for disallowing -q -v). */
1548 cmd_spec_verbose (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1551 if (cmd_boolean (com, val, &flag))
1559 /* Miscellaneous useful routines. */
1561 /* A very simple atoi clone, more useful than atoi because it works on
1562 delimited strings, and has error reportage. Returns true on success,
1563 false on failure. If successful, stores result to *DEST. */
1566 simple_atoi (const char *beg, const char *end, int *dest)
1569 bool negative = false;
1570 const char *p = beg;
1572 while (p < end && c_isspace (*p))
1574 if (p < end && (*p == '-' || *p == '+'))
1576 negative = (*p == '-');
1582 /* Read negative numbers in a separate loop because the most
1583 negative integer cannot be represented as a positive number. */
1586 for (; p < end && c_isdigit (*p); p++)
1588 int next = (10 * result) + (*p - '0');
1590 return false; /* overflow */
1594 for (; p < end && c_isdigit (*p); p++)
1596 int next = (10 * result) - (*p - '0');
1598 return false; /* underflow */
1609 /* Trivial atof, with error reporting. Handles "<digits>[.<digits>]",
1610 doesn't handle exponential notation. Returns true on success,
1611 false on failure. In case of success, stores its result to
1615 simple_atof (const char *beg, const char *end, double *dest)
1619 bool negative = false;
1620 bool seen_dot = false;
1621 bool seen_digit = false;
1624 const char *p = beg;
1626 while (p < end && c_isspace (*p))
1628 if (p < end && (*p == '-' || *p == '+'))
1630 negative = (*p == '-');
1634 for (; p < end; p++)
1640 result = (10 * result) + (ch - '0');
1642 result += (ch - '0') / (divider *= 10);
1664 /* Verify that the user-specified header in S is valid. It must
1665 contain a colon preceded by non-white-space characters and must not
1666 contain newlines. */
1669 check_user_specified_header (const char *s)
1673 for (p = s; *p && *p != ':' && !c_isspace (*p); p++)
1675 /* The header MUST contain `:' preceded by at least one
1676 non-whitespace character. */
1677 if (*p != ':' || p == s)
1679 /* The header MUST NOT contain newlines. */
1680 if (strchr (s, '\n'))
1685 /* Decode VAL into a number, according to ITEMS. */
1688 decode_string (const char *val, const struct decode_item *items, int itemcount,
1692 for (i = 0; i < itemcount; i++)
1693 if (0 == strcasecmp (val, items[i].name))
1695 *place = items[i].code;
1701 /* Free the memory allocated by global variables. */
1705 /* Free external resources, close files, etc. */
1707 /* Close WARC file. */
1708 if (opt.warc_filename != 0)
1714 if (fclose (output_stream) == EOF)
1715 inform_exit_status (CLOSEFAILED);
1717 /* No need to check for error because Wget flushes its output (and
1718 checks for errors) after any data arrives. */
1720 /* We're exiting anyway so there's no real need to call free()
1721 hundreds of times. Skipping the frees will make Wget exit
1724 However, when detecting leaks, it's crucial to free() everything
1725 because then you can find the real leaks, i.e. the allocated
1726 memory which grows with the size of the program. */
1732 cleanup_html_url ();
1736 netrc_cleanup (netrc_list);
1738 for (i = 0; i < nurl; i++)
1741 xfree_null (opt.choose_config);
1742 xfree_null (opt.lfilename);
1743 xfree_null (opt.dir_prefix);
1744 xfree_null (opt.input_filename);
1745 xfree_null (opt.output_document);
1746 free_vec (opt.accepts);
1747 free_vec (opt.rejects);
1748 free_vec (opt.excludes);
1749 free_vec (opt.includes);
1750 free_vec (opt.domains);
1751 free_vec (opt.follow_tags);
1752 free_vec (opt.ignore_tags);
1753 xfree_null (opt.progress_type);
1754 xfree_null (opt.ftp_user);
1755 xfree_null (opt.ftp_passwd);
1756 xfree_null (opt.ftp_proxy);
1757 xfree_null (opt.https_proxy);
1758 xfree_null (opt.http_proxy);
1759 free_vec (opt.no_proxy);
1760 xfree_null (opt.useragent);
1761 xfree_null (opt.referer);
1762 xfree_null (opt.http_user);
1763 xfree_null (opt.http_passwd);
1764 free_vec (opt.user_headers);
1765 free_vec (opt.warc_user_headers);
1767 xfree_null (opt.cert_file);
1768 xfree_null (opt.private_key);
1769 xfree_null (opt.ca_directory);
1770 xfree_null (opt.ca_cert);
1771 xfree_null (opt.random_file);
1772 xfree_null (opt.egd_file);
1774 xfree_null (opt.bind_address);
1775 xfree_null (opt.cookies_input);
1776 xfree_null (opt.cookies_output);
1777 xfree_null (opt.user);
1778 xfree_null (opt.passwd);
1779 xfree_null (opt.base_href);
1780 xfree_null (opt.method);
1782 #endif /* DEBUG_MALLOC */
1785 /* Unit testing routines. */
1790 test_commands_sorted(void)
1794 for (i = 1; i < countof(commands); ++i)
1796 if (strcasecmp (commands[i - 1].name, commands[i].name) > 0)
1798 mu_assert ("FAILED", false);
1806 test_cmd_spec_restrict_file_names(void)
1809 static const struct {
1811 int expected_restrict_files_os;
1812 int expected_restrict_files_ctrl;
1813 int expected_restrict_files_case;
1816 { "windows", restrict_windows, true, restrict_no_case_restriction, true },
1817 { "windows,", restrict_windows, true, restrict_no_case_restriction, true },
1818 { "windows,lowercase", restrict_windows, true, restrict_lowercase, true },
1819 { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true },
1822 for (i = 0; i < countof(test_array); ++i)
1827 res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL);
1830 fprintf (stderr, "test_cmd_spec_restrict_file_names: TEST %d\n", i); fflush (stderr);
1831 fprintf (stderr, "opt.restrict_files_os: %d\n", opt.restrict_files_os); fflush (stderr);
1832 fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr);
1833 fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr);
1835 mu_assert ("test_cmd_spec_restrict_file_names: wrong result",
1836 res == test_array[i].result
1837 && opt.restrict_files_os == test_array[i].expected_restrict_files_os
1838 && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl
1839 && opt.restrict_files_case == test_array[i].expected_restrict_files_case);
1845 #endif /* TESTING */