1 /* Reading/parsing the initialization file.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
6 This file is part of GNU Wget.
8 GNU Wget is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 GNU Wget is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with Wget. If not, see <http://www.gnu.org/licenses/>.
21 Additional permission under GNU GPL version 3 section 7
23 If you modify this program, or any covered work, by linking or
24 combining it with the OpenSSL project's OpenSSL library (or a
25 modified version of that library), containing parts covered by the
26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
27 grants you additional permission to convey the resulting work.
28 Corresponding Source for a non-source form of such a combination
29 shall include the source code for the parts of OpenSSL used as well
30 as that of the covered work. */
41 /* not all systems provide PATH_MAX in limits.h */
43 # include <sys/param.h>
45 # define PATH_MAX MAXPATHLEN
60 #include "recur.h" /* for INFINITE_RECURSION */
61 #include "convert.h" /* for convert_cleanup */
62 #include "res.h" /* for res_cleanup */
63 #include "http.h" /* for http_cleanup */
64 #include "retr.h" /* for output_stream */
72 #define CMD_DECLARE(func) static bool func (const char *, const char *, void *)
74 CMD_DECLARE (cmd_boolean);
75 CMD_DECLARE (cmd_bytes);
76 CMD_DECLARE (cmd_bytes_sum);
78 CMD_DECLARE (cmd_cert_type);
80 CMD_DECLARE (cmd_directory_vector);
81 CMD_DECLARE (cmd_number);
82 CMD_DECLARE (cmd_number_inf);
83 CMD_DECLARE (cmd_string);
84 CMD_DECLARE (cmd_file);
85 CMD_DECLARE (cmd_directory);
86 CMD_DECLARE (cmd_time);
87 CMD_DECLARE (cmd_vector);
89 CMD_DECLARE (cmd_spec_dirstruct);
90 CMD_DECLARE (cmd_spec_header);
91 CMD_DECLARE (cmd_spec_warc_header);
92 CMD_DECLARE (cmd_spec_htmlify);
93 CMD_DECLARE (cmd_spec_mirror);
94 CMD_DECLARE (cmd_spec_prefer_family);
95 CMD_DECLARE (cmd_spec_progress);
96 CMD_DECLARE (cmd_spec_recursive);
97 CMD_DECLARE (cmd_spec_restrict_file_names);
99 CMD_DECLARE (cmd_spec_secure_protocol);
101 CMD_DECLARE (cmd_spec_timeout);
102 CMD_DECLARE (cmd_spec_useragent);
103 CMD_DECLARE (cmd_spec_verbose);
105 /* List of recognized commands, each consisting of name, place and
106 function. When adding a new command, simply add it to the list,
107 but be sure to keep the list sorted alphabetically, as
108 command_by_name's binary search depends on it. Also, be sure to
109 add any entries that allocate memory (e.g. cmd_string and
110 cmd_vector) to the cleanup() function below. */
112 static const struct {
115 bool (*action) (const char *, const char *, void *);
117 /* KEEP THIS LIST ALPHABETICALLY SORTED */
118 { "accept", &opt.accepts, cmd_vector },
119 { "addhostdir", &opt.add_hostdir, cmd_boolean },
120 { "adjustextension", &opt.adjust_extension, cmd_boolean },
121 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
122 { "askpassword", &opt.ask_passwd, cmd_boolean },
123 { "authnochallenge", &opt.auth_without_challenge,
125 { "background", &opt.background, cmd_boolean },
126 { "backupconverted", &opt.backup_converted, cmd_boolean },
127 { "backups", &opt.backups, cmd_number },
128 { "base", &opt.base_href, cmd_string },
129 { "bindaddress", &opt.bind_address, cmd_string },
131 { "cacertificate", &opt.ca_cert, cmd_file },
133 { "cache", &opt.allow_cache, cmd_boolean },
135 { "cadirectory", &opt.ca_directory, cmd_directory },
136 { "certificate", &opt.cert_file, cmd_file },
137 { "certificatetype", &opt.cert_type, cmd_cert_type },
138 { "checkcertificate", &opt.check_cert, cmd_boolean },
140 { "chooseconfig", &opt.choose_config, cmd_file },
141 { "connecttimeout", &opt.connect_timeout, cmd_time },
142 { "contentdisposition", &opt.content_disposition, cmd_boolean },
143 { "contentonerror", &opt.content_on_error, cmd_boolean },
144 { "continue", &opt.always_rest, cmd_boolean },
145 { "convertlinks", &opt.convert_links, cmd_boolean },
146 { "cookies", &opt.cookies, cmd_boolean },
147 { "cutdirs", &opt.cut_dirs, cmd_number },
149 { "debug", &opt.debug, cmd_boolean },
151 { "defaultpage", &opt.default_page, cmd_string},
152 { "deleteafter", &opt.delete_after, cmd_boolean },
153 { "dirprefix", &opt.dir_prefix, cmd_directory },
154 { "dirstruct", NULL, cmd_spec_dirstruct },
155 { "dnscache", &opt.dns_cache, cmd_boolean },
156 { "dnstimeout", &opt.dns_timeout, cmd_time },
157 { "domains", &opt.domains, cmd_vector },
158 { "dotbytes", &opt.dot_bytes, cmd_bytes },
159 { "dotsinline", &opt.dots_in_line, cmd_number },
160 { "dotspacing", &opt.dot_spacing, cmd_number },
161 { "dotstyle", &opt.dot_style, cmd_string }, /* deprecated */
163 { "egdfile", &opt.egd_file, cmd_file },
165 { "excludedirectories", &opt.excludes, cmd_directory_vector },
166 { "excludedomains", &opt.exclude_domains, cmd_vector },
167 { "followftp", &opt.follow_ftp, cmd_boolean },
168 { "followtags", &opt.follow_tags, cmd_vector },
169 { "forcehtml", &opt.force_html, cmd_boolean },
170 { "ftppasswd", &opt.ftp_passwd, cmd_string }, /* deprecated */
171 { "ftppassword", &opt.ftp_passwd, cmd_string },
172 { "ftpproxy", &opt.ftp_proxy, cmd_string },
174 { "ftpstmlf", &opt.ftp_stmlf, cmd_boolean },
175 #endif /* def __VMS */
176 { "ftpuser", &opt.ftp_user, cmd_string },
177 { "glob", &opt.ftp_glob, cmd_boolean },
178 { "header", NULL, cmd_spec_header },
179 { "htmlextension", &opt.adjust_extension, cmd_boolean }, /* deprecated */
180 { "htmlify", NULL, cmd_spec_htmlify },
181 { "httpkeepalive", &opt.http_keep_alive, cmd_boolean },
182 { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
183 { "httppassword", &opt.http_passwd, cmd_string },
184 { "httpproxy", &opt.http_proxy, cmd_string },
185 { "httpsproxy", &opt.https_proxy, cmd_string },
186 { "httpuser", &opt.http_user, cmd_string },
187 { "ignorecase", &opt.ignore_case, cmd_boolean },
188 { "ignorelength", &opt.ignore_length, cmd_boolean },
189 { "ignoretags", &opt.ignore_tags, cmd_vector },
190 { "includedirectories", &opt.includes, cmd_directory_vector },
192 { "inet4only", &opt.ipv4_only, cmd_boolean },
193 { "inet6only", &opt.ipv6_only, cmd_boolean },
195 { "input", &opt.input_filename, cmd_file },
196 { "iri", &opt.enable_iri, cmd_boolean },
197 { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
198 { "limitrate", &opt.limit_rate, cmd_bytes },
199 { "loadcookies", &opt.cookies_input, cmd_file },
200 { "localencoding", &opt.locale, cmd_string },
201 { "logfile", &opt.lfilename, cmd_file },
202 { "login", &opt.ftp_user, cmd_string },/* deprecated*/
203 { "maxredirect", &opt.max_redirect, cmd_number },
204 { "mirror", NULL, cmd_spec_mirror },
205 { "netrc", &opt.netrc, cmd_boolean },
206 { "noclobber", &opt.noclobber, cmd_boolean },
207 { "noparent", &opt.no_parent, cmd_boolean },
208 { "noproxy", &opt.no_proxy, cmd_vector },
209 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
210 { "outputdocument", &opt.output_document, cmd_file },
211 { "pagerequisites", &opt.page_requisites, cmd_boolean },
212 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
213 { "passwd", &opt.ftp_passwd, cmd_string },/* deprecated*/
214 { "password", &opt.passwd, cmd_string },
215 { "postdata", &opt.post_data, cmd_string },
216 { "postfile", &opt.post_file_name, cmd_file },
217 { "preferfamily", NULL, cmd_spec_prefer_family },
218 { "preservepermissions", &opt.preserve_perm, cmd_boolean },
220 { "privatekey", &opt.private_key, cmd_file },
221 { "privatekeytype", &opt.private_key_type, cmd_cert_type },
223 { "progress", &opt.progress_type, cmd_spec_progress },
224 { "protocoldirectories", &opt.protocol_directories, cmd_boolean },
225 { "proxypasswd", &opt.proxy_passwd, cmd_string }, /* deprecated */
226 { "proxypassword", &opt.proxy_passwd, cmd_string },
227 { "proxyuser", &opt.proxy_user, cmd_string },
228 { "quiet", &opt.quiet, cmd_boolean },
229 { "quota", &opt.quota, cmd_bytes_sum },
231 { "randomfile", &opt.random_file, cmd_file },
233 { "randomwait", &opt.random_wait, cmd_boolean },
234 { "readtimeout", &opt.read_timeout, cmd_time },
235 { "reclevel", &opt.reclevel, cmd_number_inf },
236 { "recursive", NULL, cmd_spec_recursive },
237 { "referer", &opt.referer, cmd_string },
238 { "reject", &opt.rejects, cmd_vector },
239 { "relativeonly", &opt.relative_only, cmd_boolean },
240 { "remoteencoding", &opt.encoding_remote, cmd_string },
241 { "removelisting", &opt.remove_listing, cmd_boolean },
242 { "restrictfilenames", NULL, cmd_spec_restrict_file_names },
243 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
244 { "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
245 { "robots", &opt.use_robots, cmd_boolean },
246 { "savecookies", &opt.cookies_output, cmd_file },
247 { "saveheaders", &opt.save_headers, cmd_boolean },
249 { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol },
251 { "serverresponse", &opt.server_response, cmd_boolean },
252 { "showalldnsentries", &opt.show_all_dns_entries, cmd_boolean },
253 { "spanhosts", &opt.spanhost, cmd_boolean },
254 { "spider", &opt.spider, cmd_boolean },
255 { "strictcomments", &opt.strict_comments, cmd_boolean },
256 { "timeout", NULL, cmd_spec_timeout },
257 { "timestamping", &opt.timestamping, cmd_boolean },
258 { "tries", &opt.ntry, cmd_number_inf },
259 { "trustservernames", &opt.trustservernames, cmd_boolean },
260 { "unlink", &opt.unlink, cmd_boolean },
261 { "useproxy", &opt.use_proxy, cmd_boolean },
262 { "user", &opt.user, cmd_string },
263 { "useragent", NULL, cmd_spec_useragent },
264 { "useservertimestamps", &opt.useservertimestamps, cmd_boolean },
265 { "verbose", NULL, cmd_spec_verbose },
266 { "wait", &opt.wait, cmd_time },
267 { "waitretry", &opt.waitretry, cmd_time },
268 { "warccdx", &opt.warc_cdx_enabled, cmd_boolean },
269 { "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file },
271 { "warccompression", &opt.warc_compression_enabled, cmd_boolean },
273 { "warcdigests", &opt.warc_digests_enabled, cmd_boolean },
274 { "warcfile", &opt.warc_filename, cmd_file },
275 { "warcheader", NULL, cmd_spec_warc_header },
276 { "warckeeplog", &opt.warc_keep_log, cmd_boolean },
277 { "warcmaxsize", &opt.warc_maxsize, cmd_bytes },
278 { "warctempdir", &opt.warc_tempdir, cmd_directory },
280 { "wdebug", &opt.wdebug, cmd_boolean },
284 /* Look up CMDNAME in the commands[] and return its position in the
285 array. If CMDNAME is not found, return -1. */
288 command_by_name (const char *cmdname)
290 /* Use binary search for speed. Wget has ~100 commands, which
291 guarantees a worst case performance of 7 string comparisons. */
292 int lo = 0, hi = countof (commands) - 1;
296 int mid = (lo + hi) >> 1;
297 int cmp = strcasecmp (cmdname, commands[mid].name);
308 /* Reset the variables to default values. */
314 /* Most of the default values are 0 (and 0.0, NULL, and false).
315 Just reset everything, and fill in the non-zero values. Note
316 that initializing pointers to NULL this way is technically
317 illegal, but porting Wget to a machine where NULL is not all-zero
318 bit pattern will be the least of the implementors' worries. */
325 opt.add_hostdir = true;
329 opt.http_keep_alive = true;
330 opt.use_proxy = true;
331 tmp = getenv ("no_proxy");
333 opt.no_proxy = sepstring (tmp);
334 opt.prefer_family = prefer_none;
335 opt.allow_cache = true;
337 opt.read_timeout = 900;
338 opt.use_robots = true;
340 opt.remove_listing = true;
342 opt.dot_bytes = 1024;
343 opt.dot_spacing = 10;
344 opt.dots_in_line = 50;
346 opt.dns_cache = true;
350 opt.check_cert = true;
353 /* The default for file name restriction defaults to the OS type. */
354 #if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__)
355 opt.restrict_files_os = restrict_windows;
357 opt.restrict_files_os = restrict_unix;
359 opt.restrict_files_ctrl = true;
360 opt.restrict_files_nonascii = false;
361 opt.restrict_files_case = restrict_no_case_restriction;
363 opt.max_redirect = 20;
368 opt.enable_iri = true;
370 opt.enable_iri = false;
373 opt.encoding_remote = NULL;
375 opt.useservertimestamps = true;
376 opt.show_all_dns_entries = false;
378 opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */
380 opt.warc_compression_enabled = true;
382 opt.warc_compression_enabled = false;
384 opt.warc_digests_enabled = true;
385 opt.warc_cdx_enabled = false;
386 opt.warc_cdx_dedup_filename = NULL;
387 opt.warc_tempdir = NULL;
388 opt.warc_keep_log = true;
391 /* Return the user's home directory (strdup-ed), or NULL if none is
396 static char *buf = NULL;
397 static char *home, *ret;
401 home = getenv ("HOME");
407 /* Under MSDOS, if $HOME isn't defined, use the directory where
408 `wget.exe' resides. */
409 const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */
412 buff = _w32_get_argv0 ();
414 p = strrchr (buf, '/'); /* djgpp */
416 p = strrchr (buf, '\\'); /* others */
420 buff = malloc (len + 1);
424 strncpy (buff, _w32_get_argv0 (), len);
428 #elif !defined(WINDOWS)
429 /* If HOME is not defined, try getting it from the password
431 struct passwd *pwd = getpwuid (getuid ());
432 if (!pwd || !pwd->pw_dir)
436 /* Under Windows, if $HOME isn't defined, use the directory where
437 `wget.exe' resides. */
443 ret = home ? xstrdup (home) : NULL;
450 /* Check the 'WGETRC' environment variable and return the file name
451 if 'WGETRC' is set and is a valid file.
452 If the `WGETRC' variable exists but the file does not exist, the
453 function will exit(). */
455 wgetrc_env_file_name (void)
457 char *env = getenv ("WGETRC");
460 if (!file_exists_p (env))
462 fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"),
466 return xstrdup (env);
471 /* Check for the existance of '$HOME/.wgetrc' and return it's path
472 if it exists and is set. */
474 wgetrc_user_file_name (void)
478 /* If that failed, try $HOME/.wgetrc (or equivalent). */
481 file = "SYS$LOGIN:.wgetrc";
482 #else /* def __VMS */
485 file = aprintf ("%s/.wgetrc", home);
487 #endif /* def __VMS [else] */
491 if (!file_exists_p (file))
499 /* Return the path to the user's .wgetrc. This is either the value of
500 `WGETRC' environment variable, or `$HOME/.wgetrc'.
502 Additionally, for windows, look in the directory where wget.exe
505 wgetrc_file_name (void)
507 char *file = wgetrc_env_file_name ();
511 file = wgetrc_user_file_name ();
514 /* Under Windows, if we still haven't found .wgetrc, look for the file
515 `wget.ini' in the directory where `wget.exe' resides; we do this for
516 backward compatibility with previous versions of Wget.
517 SYSTEM_WGETRC should not be defined under WINDOWS. */
520 char *home = home_dir ();
526 file = aprintf ("%s/wget.ini", home);
527 if (!file_exists_p (file))
540 /* Return values of parse_line. */
548 static enum parse_line parse_line (const char *, char **, char **, int *);
549 static bool setval_internal (int, const char *, const char *);
550 static bool setval_internal_tilde (int, const char *, const char *);
552 /* Initialize variables from a wgetrc file. Returns zero (failure) if
553 there were errors in the file. */
556 run_wgetrc (const char *file)
563 fp = fopen (file, "r");
566 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
567 file, strerror (errno));
568 return true; /* not a fatal error */
571 while ((line = read_whole_line (fp)) != NULL)
573 char *com = NULL, *val = NULL;
576 /* Parse the line. */
577 switch (parse_line (line, &com, &val, &comind))
580 /* If everything is OK, set the value. */
581 if (!setval_internal_tilde (comind, com, val))
583 fprintf (stderr, _("%s: Error in %s at line %d.\n"),
584 exec_name, file, ln);
588 case line_syntax_error:
589 fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"),
590 exec_name, file, ln);
593 case line_unknown_command:
594 fprintf (stderr, _("%s: Unknown command %s in %s at line %d.\n"),
595 exec_name, quote (com), file, ln);
613 /* Initialize the defaults and run the system wgetrc and user's own
618 char *file, *env_sysrc;
621 /* Run a non-standard system rc file when the according environment
622 variable has been set. For internal testing purposes only! */
623 env_sysrc = getenv ("SYSTEM_WGETRC");
624 if (env_sysrc && file_exists_p (env_sysrc))
626 ok &= run_wgetrc (env_sysrc);
627 /* If there are any problems parsing the system wgetrc file, tell
631 fprintf (stderr, _("\
632 Parsing system wgetrc file (env SYSTEM_WGETRC) failed. Please check\n\
634 or specify a different file using --config.\n"), env_sysrc);
638 /* Otherwise, if SYSTEM_WGETRC is defined, use it. */
640 else if (file_exists_p (SYSTEM_WGETRC))
641 ok &= run_wgetrc (SYSTEM_WGETRC);
642 /* If there are any problems parsing the system wgetrc file, tell
646 fprintf (stderr, _("\
647 Parsing system wgetrc file failed. Please check\n\
649 or specify a different file using --config.\n"), SYSTEM_WGETRC);
653 /* Override it with your own, if one exists. */
654 file = wgetrc_file_name ();
657 /* #### We should canonicalize `file' and SYSTEM_WGETRC with
658 something like realpath() before comparing them with `strcmp' */
660 if (!strcmp (file, SYSTEM_WGETRC))
662 fprintf (stderr, _("\
663 %s: Warning: Both system and user wgetrc point to %s.\n"),
664 exec_name, quote (file));
668 ok &= run_wgetrc (file);
670 /* If there were errors processing either `.wgetrc', abort. */
678 /* Remove dashes and underscores from S, modifying S in the
684 char *t = s; /* t - tortoise */
685 char *h = s; /* h - hare */
687 if (*h == '_' || *h == '-')
694 /* Parse the line pointed by line, with the syntax:
695 <sp>* command <sp>* = <sp>* value <sp>*
696 Uses malloc to allocate space for command and value.
698 Returns one of line_ok, line_empty, line_syntax_error, or
699 line_unknown_command.
701 In case of line_ok, *COM and *VAL point to freshly allocated
702 strings, and *COMIND points to com's index. In case of error or
703 empty line, their values are unmodified. */
705 static enum parse_line
706 parse_line (const char *line, char **com, char **val, int *comind)
709 const char *end = line + strlen (line);
710 const char *cmdstart, *cmdend;
711 const char *valstart, *valend;
716 /* Skip leading and trailing whitespace. */
717 while (*line && c_isspace (*line))
719 while (end > line && c_isspace (end[-1]))
722 /* Skip empty lines and comments. */
723 if (!*line || *line == '#')
729 while (p < end && (c_isalnum (*p) || *p == '_' || *p == '-'))
733 /* Skip '=', as well as any space before or after it. */
734 while (p < end && c_isspace (*p))
736 if (p == end || *p != '=')
737 return line_syntax_error;
739 while (p < end && c_isspace (*p))
745 /* The syntax is valid (even though the command might not be). Fill
746 in the command name and value. */
747 *com = strdupdelim (cmdstart, cmdend);
748 *val = strdupdelim (valstart, valend);
750 /* The line now known to be syntactically correct. Check whether
751 the command is valid. */
752 BOUNDED_TO_ALLOCA (cmdstart, cmdend, cmdcopy);
754 ind = command_by_name (cmdcopy);
756 return line_unknown_command;
758 /* Report success to the caller. */
763 #if defined(WINDOWS) || defined(MSDOS)
764 # define ISSEP(c) ((c) == '/' || (c) == '\\')
766 # define ISSEP(c) ((c) == '/')
769 /* Run commands[comind].action. */
772 setval_internal (int comind, const char *com, const char *val)
774 assert (0 <= comind && ((size_t) comind) < countof (commands));
775 DEBUGP (("Setting %s (%s) to %s\n", com, commands[comind].name, val));
776 return commands[comind].action (com, val, commands[comind].place);
780 setval_internal_tilde (int comind, const char *com, const char *val)
786 ret = setval_internal (comind, com, val);
788 /* We make tilde expansion for cmd_file and cmd_directory */
789 if (((commands[comind].action == cmd_file) ||
790 (commands[comind].action == cmd_directory))
791 && ret && (*val == '~' && ISSEP (val[1])))
793 pstring = commands[comind].place;
797 homelen = strlen (home);
798 while (homelen && ISSEP (home[homelen - 1]))
799 home[--homelen] = '\0';
801 /* Skip the leading "~/". */
802 for (++val; ISSEP (*val); val++)
804 *pstring = concat_strings (home, "/", val, (char *)0);
810 /* Run command COM with value VAL. If running the command produces an
811 error, report the error and exit.
813 This is intended to be called from main() to modify Wget's behavior
814 through command-line switches. Since COM is hard-coded in main(),
815 it is not canonicalized, and this aborts when COM is not found.
817 If COMIND's are exported to init.h, this function will be changed
818 to accept COMIND directly. */
821 setoptval (const char *com, const char *val, const char *optname)
823 /* Prepend "--" to OPTNAME. */
824 char *dd_optname = (char *) alloca (2 + strlen (optname) + 1);
827 strcpy (dd_optname + 2, optname);
829 assert (val != NULL);
830 if (!setval_internal (command_by_name (com), dd_optname, val))
834 /* Parse OPT into command and value and run it. For example,
835 run_command("foo=bar") is equivalent to setoptval("foo", "bar").
836 This is used by the `--execute' flag in main.c. */
839 run_command (const char *opt)
843 switch (parse_line (opt, &com, &val, &comind))
846 if (!setval_internal (comind, com, val))
852 fprintf (stderr, _("%s: Invalid --execute command %s\n"),
853 exec_name, quote (opt));
858 /* Generic helper functions, for use with `commands'. */
860 /* Forward declarations: */
865 static bool decode_string (const char *, const struct decode_item *, int, int *);
866 static bool simple_atoi (const char *, const char *, int *);
867 static bool simple_atof (const char *, const char *, double *);
869 #define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0')
871 #define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \
872 && c_tolower((p)[1]) == (c1) \
875 #define CMP3(p, c0, c1, c2) (c_tolower((p)[0]) == (c0) \
876 && c_tolower((p)[1]) == (c1) \
877 && c_tolower((p)[2]) == (c2) \
881 /* Store the boolean value from VAL to PLACE. COM is ignored,
882 except for error messages. */
884 cmd_boolean (const char *com, const char *val, void *place)
888 if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1'))
889 /* "on", "yes" and "1" mean true. */
891 else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0'))
892 /* "off", "no" and "0" mean false. */
897 _("%s: %s: Invalid boolean %s; use `on' or `off'.\n"),
898 exec_name, com, quote (val));
902 *(bool *) place = value;
906 /* Set the non-negative integer value from VAL to PLACE. With
907 incorrect specification, the number remains unchanged. */
909 cmd_number (const char *com, const char *val, void *place)
911 if (!simple_atoi (val, val + strlen (val), place)
912 || *(int *) place < 0)
914 fprintf (stderr, _("%s: %s: Invalid number %s.\n"),
915 exec_name, com, quote (val));
921 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
923 cmd_number_inf (const char *com, const char *val, void *place)
925 if (!strcasecmp (val, "inf"))
930 return cmd_number (com, val, place);
933 /* Copy (strdup) the string at COM to a new location and place a
934 pointer to *PLACE. */
936 cmd_string (const char *com, const char *val, void *place)
938 char **pstring = (char **)place;
940 xfree_null (*pstring);
941 *pstring = xstrdup (val);
946 /* Like the above, but handles tilde-expansion when reading a user's
947 `.wgetrc'. In that case, and if VAL begins with `~', the tilde
948 gets expanded to the user's home directory. */
950 cmd_file (const char *com, const char *val, void *place)
952 char **pstring = (char **)place;
954 xfree_null (*pstring);
956 /* #### If VAL is empty, perhaps should set *PLACE to NULL. */
958 *pstring = xstrdup (val);
960 #if defined(WINDOWS) || defined(MSDOS)
961 /* Convert "\" to "/". */
964 for (s = *pstring; *s; s++)
972 /* Like cmd_file, but strips trailing '/' characters. */
974 cmd_directory (const char *com, const char *val, void *place)
978 /* Call cmd_file() for tilde expansion and separator
979 canonicalization (backslash -> slash under Windows). These
980 things should perhaps be in a separate function. */
981 if (!cmd_file (com, val, place))
986 while (t > s && *--t == '/')
992 /* Split VAL by space to a vector of values, and append those values
993 to vector pointed to by the PLACE argument. If VAL is empty, the
994 PLACE vector is cleared instead. */
997 cmd_vector (const char *com, const char *val, void *place)
999 char ***pvec = (char ***)place;
1002 *pvec = merge_vecs (*pvec, sepstring (val));
1012 cmd_directory_vector (const char *com, const char *val, void *place)
1014 char ***pvec = (char ***)place;
1018 /* Strip the trailing slashes from directories. */
1021 seps = sepstring (val);
1022 for (t = seps; t && *t; t++)
1024 int len = strlen (*t);
1025 /* Skip degenerate case of root directory. */
1028 if ((*t)[len - 1] == '/')
1029 (*t)[len - 1] = '\0';
1032 *pvec = merge_vecs (*pvec, seps);
1042 /* Engine for cmd_bytes and cmd_bytes_sum: converts a string such as
1043 "100k" or "2.5G" to a floating point number. */
1046 parse_bytes_helper (const char *val, double *result)
1048 double number, mult;
1049 const char *end = val + strlen (val);
1051 /* Check for "inf". */
1052 if (0 == strcmp (val, "inf"))
1058 /* Strip trailing whitespace. */
1059 while (val < end && c_isspace (end[-1]))
1064 switch (c_tolower (end[-1]))
1067 --end, mult = 1024.0;
1070 --end, mult = 1048576.0;
1073 --end, mult = 1073741824.0;
1076 --end, mult = 1099511627776.0;
1079 /* Not a recognized suffix: assume it's a digit. (If not,
1080 simple_atof will raise an error.) */
1084 /* Skip leading and trailing whitespace. */
1085 while (val < end && c_isspace (*val))
1087 while (val < end && c_isspace (end[-1]))
1092 if (!simple_atof (val, end, &number) || number < 0)
1095 *result = number * mult;
1099 /* Parse VAL as a number and set its value to PLACE (which should
1102 By default, the value is assumed to be in bytes. If "K", "M", or
1103 "G" are appended, the value is multiplied with 1<<10, 1<<20, or
1104 1<<30, respectively. Floating point values are allowed and are
1105 cast to integer before use. The idea is to be able to use things
1106 like 1.5k instead of "1536".
1108 The string "inf" is returned as 0.
1110 In case of error, false is returned and memory pointed to by PLACE
1111 remains unmodified. */
1114 cmd_bytes (const char *com, const char *val, void *place)
1117 if (!parse_bytes_helper (val, &byte_value))
1119 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1120 exec_name, com, quote (val));
1123 *(wgint *)place = (wgint)byte_value;
1127 /* Like cmd_bytes, but PLACE is interpreted as a pointer to
1128 SIZE_SUM. It works by converting the string to double, therefore
1129 working with values up to 2^53-1 without loss of precision. This
1130 value (8192 TB) is large enough to serve for a while. */
1133 cmd_bytes_sum (const char *com, const char *val, void *place)
1136 if (!parse_bytes_helper (val, &byte_value))
1138 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1139 exec_name, com, quote (val));
1142 *(SUM_SIZE_INT *) place = (SUM_SIZE_INT) byte_value;
1146 /* Store the value of VAL to *OUT. The value is a time period, by
1147 default expressed in seconds, but also accepting suffixes "m", "h",
1148 "d", and "w" for minutes, hours, days, and weeks respectively. */
1151 cmd_time (const char *com, const char *val, void *place)
1153 double number, mult;
1154 const char *end = val + strlen (val);
1156 /* Strip trailing whitespace. */
1157 while (val < end && c_isspace (end[-1]))
1163 fprintf (stderr, _("%s: %s: Invalid time period %s\n"),
1164 exec_name, com, quote (val));
1168 switch (c_tolower (end[-1]))
1171 --end, mult = 1; /* seconds */
1174 --end, mult = 60; /* minutes */
1177 --end, mult = 3600; /* hours */
1180 --end, mult = 86400.0; /* days */
1183 --end, mult = 604800.0; /* weeks */
1186 /* Not a recognized suffix: assume it belongs to the number.
1187 (If not, simple_atof will raise an error.) */
1191 /* Skip leading and trailing whitespace. */
1192 while (val < end && c_isspace (*val))
1194 while (val < end && c_isspace (end[-1]))
1199 if (!simple_atof (val, end, &number))
1202 *(double *)place = number * mult;
1208 cmd_cert_type (const char *com, const char *val, void *place)
1210 static const struct decode_item choices[] = {
1211 { "pem", keyfile_pem },
1212 { "der", keyfile_asn1 },
1213 { "asn1", keyfile_asn1 },
1215 int ok = decode_string (val, choices, countof (choices), place);
1217 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1222 /* Specialized helper functions, used by `commands' to handle some
1223 options specially. */
1225 static bool check_user_specified_header (const char *);
1228 cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored)
1230 if (!cmd_boolean (com, val, &opt.dirstruct))
1232 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
1233 must be affected inversely. */
1235 opt.no_dirstruct = false;
1237 opt.no_dirstruct = true;
1242 cmd_spec_header (const char *com, const char *val, void *place_ignored)
1244 /* Empty value means reset the list of headers. */
1247 free_vec (opt.user_headers);
1248 opt.user_headers = NULL;
1252 if (!check_user_specified_header (val))
1254 fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
1255 exec_name, com, quote (val));
1258 opt.user_headers = vec_append (opt.user_headers, val);
1263 cmd_spec_warc_header (const char *com, const char *val, void *place_ignored)
1265 /* Empty value means reset the list of headers. */
1268 free_vec (opt.warc_user_headers);
1269 opt.warc_user_headers = NULL;
1273 if (!check_user_specified_header (val))
1275 fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"),
1276 exec_name, com, quote (val));
1279 opt.warc_user_headers = vec_append (opt.warc_user_headers, val);
1284 cmd_spec_htmlify (const char *com, const char *val, void *place_ignored)
1286 int flag = cmd_boolean (com, val, &opt.htmlify);
1287 if (flag && !opt.htmlify)
1288 opt.remove_listing = false;
1292 /* Set the "mirror" mode. It means: recursive download, timestamping,
1293 no limit on max. recursion depth, and don't remove listings. */
1296 cmd_spec_mirror (const char *com, const char *val, void *place_ignored)
1300 if (!cmd_boolean (com, val, &mirror))
1304 opt.recursive = true;
1305 if (!opt.no_dirstruct)
1306 opt.dirstruct = true;
1307 opt.timestamping = true;
1308 opt.reclevel = INFINITE_RECURSION;
1309 opt.remove_listing = false;
1314 /* Validate --prefer-family and set the choice. Allowed values are
1315 "IPv4", "IPv6", and "none". */
1318 cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored)
1320 static const struct decode_item choices[] = {
1321 { "IPv4", prefer_ipv4 },
1322 { "IPv6", prefer_ipv6 },
1323 { "none", prefer_none },
1325 int prefer_family = prefer_none;
1326 int ok = decode_string (val, choices, countof (choices), &prefer_family);
1328 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1329 opt.prefer_family = prefer_family;
1333 /* Set progress.type to VAL, but verify that it's a valid progress
1334 implementation before that. */
1337 cmd_spec_progress (const char *com, const char *val, void *place_ignored)
1339 if (!valid_progress_implementation_p (val))
1341 fprintf (stderr, _("%s: %s: Invalid progress type %s.\n"),
1342 exec_name, com, quote (val));
1345 xfree_null (opt.progress_type);
1347 /* Don't call set_progress_implementation here. It will be called
1348 in main() when it becomes clear what the log output is. */
1349 opt.progress_type = xstrdup (val);
1353 /* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is
1354 set to true, also set opt.dirstruct to true, unless opt.no_dirstruct
1358 cmd_spec_recursive (const char *com, const char *val, void *place_ignored)
1360 if (!cmd_boolean (com, val, &opt.recursive))
1364 if (opt.recursive && !opt.no_dirstruct)
1365 opt.dirstruct = true;
1371 cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored)
1373 int restrict_os = opt.restrict_files_os;
1374 int restrict_ctrl = opt.restrict_files_ctrl;
1375 int restrict_case = opt.restrict_files_case;
1376 int restrict_nonascii = opt.restrict_files_nonascii;
1380 #define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal)
1384 end = strchr (val, ',');
1386 end = val + strlen (val);
1388 if (VAL_IS ("unix"))
1389 restrict_os = restrict_unix;
1390 else if (VAL_IS ("windows"))
1391 restrict_os = restrict_windows;
1392 else if (VAL_IS ("lowercase"))
1393 restrict_case = restrict_lowercase;
1394 else if (VAL_IS ("uppercase"))
1395 restrict_case = restrict_uppercase;
1396 else if (VAL_IS ("nocontrol"))
1397 restrict_ctrl = false;
1398 else if (VAL_IS ("ascii"))
1399 restrict_nonascii = true;
1402 fprintf (stderr, _("\
1403 %s: %s: Invalid restriction %s,\n\
1404 use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"),
1405 exec_name, com, quote (val));
1412 while (*val && *end);
1416 opt.restrict_files_os = restrict_os;
1417 opt.restrict_files_ctrl = restrict_ctrl;
1418 opt.restrict_files_case = restrict_case;
1419 opt.restrict_files_nonascii = restrict_nonascii;
1426 cmd_spec_secure_protocol (const char *com, const char *val, void *place)
1428 static const struct decode_item choices[] = {
1429 { "auto", secure_protocol_auto },
1430 { "sslv2", secure_protocol_sslv2 },
1431 { "sslv3", secure_protocol_sslv3 },
1432 { "tlsv1", secure_protocol_tlsv1 },
1434 int ok = decode_string (val, choices, countof (choices), place);
1436 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1441 /* Set all three timeout values. */
1444 cmd_spec_timeout (const char *com, const char *val, void *place_ignored)
1447 if (!cmd_time (com, val, &value))
1449 opt.read_timeout = value;
1450 opt.connect_timeout = value;
1451 opt.dns_timeout = value;
1456 cmd_spec_useragent (const char *com, const char *val, void *place_ignored)
1458 /* Disallow embedded newlines. */
1459 if (strchr (val, '\n'))
1461 fprintf (stderr, _("%s: %s: Invalid value %s.\n"),
1462 exec_name, com, quote (val));
1465 xfree_null (opt.useragent);
1466 opt.useragent = xstrdup (val);
1470 /* The "verbose" option cannot be cmd_boolean because the variable is
1471 not bool -- it's of type int (-1 means uninitialized because of
1472 some random hackery for disallowing -q -v). */
1475 cmd_spec_verbose (const char *com, const char *val, void *place_ignored)
1478 if (cmd_boolean (com, val, &flag))
1486 /* Miscellaneous useful routines. */
1488 /* A very simple atoi clone, more useful than atoi because it works on
1489 delimited strings, and has error reportage. Returns true on success,
1490 false on failure. If successful, stores result to *DEST. */
1493 simple_atoi (const char *beg, const char *end, int *dest)
1496 bool negative = false;
1497 const char *p = beg;
1499 while (p < end && c_isspace (*p))
1501 if (p < end && (*p == '-' || *p == '+'))
1503 negative = (*p == '-');
1509 /* Read negative numbers in a separate loop because the most
1510 negative integer cannot be represented as a positive number. */
1513 for (; p < end && c_isdigit (*p); p++)
1515 int next = (10 * result) + (*p - '0');
1517 return false; /* overflow */
1521 for (; p < end && c_isdigit (*p); p++)
1523 int next = (10 * result) - (*p - '0');
1525 return false; /* underflow */
1536 /* Trivial atof, with error reporting. Handles "<digits>[.<digits>]",
1537 doesn't handle exponential notation. Returns true on success,
1538 false on failure. In case of success, stores its result to
1542 simple_atof (const char *beg, const char *end, double *dest)
1546 bool negative = false;
1547 bool seen_dot = false;
1548 bool seen_digit = false;
1551 const char *p = beg;
1553 while (p < end && c_isspace (*p))
1555 if (p < end && (*p == '-' || *p == '+'))
1557 negative = (*p == '-');
1561 for (; p < end; p++)
1567 result = (10 * result) + (ch - '0');
1569 result += (ch - '0') / (divider *= 10);
1591 /* Verify that the user-specified header in S is valid. It must
1592 contain a colon preceded by non-white-space characters and must not
1593 contain newlines. */
1596 check_user_specified_header (const char *s)
1600 for (p = s; *p && *p != ':' && !c_isspace (*p); p++)
1602 /* The header MUST contain `:' preceded by at least one
1603 non-whitespace character. */
1604 if (*p != ':' || p == s)
1606 /* The header MUST NOT contain newlines. */
1607 if (strchr (s, '\n'))
1612 /* Decode VAL into a number, according to ITEMS. */
1615 decode_string (const char *val, const struct decode_item *items, int itemcount,
1619 for (i = 0; i < itemcount; i++)
1620 if (0 == strcasecmp (val, items[i].name))
1622 *place = items[i].code;
1629 void cleanup_html_url (void);
1632 /* Free the memory allocated by global variables. */
1636 /* Free external resources, close files, etc. */
1639 fclose (output_stream);
1640 /* No need to check for error because Wget flushes its output (and
1641 checks for errors) after any data arrives. */
1643 /* We're exiting anyway so there's no real need to call free()
1644 hundreds of times. Skipping the frees will make Wget exit
1647 However, when detecting leaks, it's crucial to free() everything
1648 because then you can find the real leaks, i.e. the allocated
1649 memory which grows with the size of the program. */
1655 cleanup_html_url ();
1660 extern acc_t *netrc_list;
1661 free_netrc (netrc_list);
1663 xfree_null (opt.choose_config);
1664 xfree_null (opt.lfilename);
1665 xfree_null (opt.dir_prefix);
1666 xfree_null (opt.input_filename);
1667 xfree_null (opt.output_document);
1668 free_vec (opt.accepts);
1669 free_vec (opt.rejects);
1670 free_vec (opt.excludes);
1671 free_vec (opt.includes);
1672 free_vec (opt.domains);
1673 free_vec (opt.follow_tags);
1674 free_vec (opt.ignore_tags);
1675 xfree_null (opt.progress_type);
1676 xfree_null (opt.ftp_user);
1677 xfree_null (opt.ftp_passwd);
1678 xfree_null (opt.ftp_proxy);
1679 xfree_null (opt.https_proxy);
1680 xfree_null (opt.http_proxy);
1681 free_vec (opt.no_proxy);
1682 xfree_null (opt.useragent);
1683 xfree_null (opt.referer);
1684 xfree_null (opt.http_user);
1685 xfree_null (opt.http_passwd);
1686 free_vec (opt.user_headers);
1687 free_vec (opt.warc_user_headers);
1689 xfree_null (opt.cert_file);
1690 xfree_null (opt.private_key);
1691 xfree_null (opt.ca_directory);
1692 xfree_null (opt.ca_cert);
1693 xfree_null (opt.random_file);
1694 xfree_null (opt.egd_file);
1696 xfree_null (opt.bind_address);
1697 xfree_null (opt.cookies_input);
1698 xfree_null (opt.cookies_output);
1699 xfree_null (opt.user);
1700 xfree_null (opt.passwd);
1701 xfree_null (opt.base_href);
1703 #endif /* DEBUG_MALLOC */
1706 /* Unit testing routines. */
1711 test_commands_sorted()
1713 int prev_idx = 0, next_idx = 1;
1714 int command_count = countof (commands) - 1;
1716 while (next_idx <= command_count)
1718 cmp = strcasecmp (commands[prev_idx].name, commands[next_idx].name);
1721 mu_assert ("FAILED", false);
1734 test_cmd_spec_restrict_file_names()
1739 int expected_restrict_files_os;
1740 int expected_restrict_files_ctrl;
1741 int expected_restrict_files_case;
1744 { "windows", restrict_windows, true, restrict_no_case_restriction, true },
1745 { "windows,", restrict_windows, true, restrict_no_case_restriction, true },
1746 { "windows,lowercase", restrict_windows, true, restrict_lowercase, true },
1747 { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true },
1750 for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
1755 res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL);
1758 fprintf (stderr, "test_cmd_spec_restrict_file_names: TEST %d\n", i); fflush (stderr);
1759 fprintf (stderr, "opt.restrict_files_os: %d\n", opt.restrict_files_os); fflush (stderr);
1760 fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr);
1761 fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr);
1763 mu_assert ("test_cmd_spec_restrict_file_names: wrong result",
1764 res == test_array[i].result
1765 && opt.restrict_files_os == test_array[i].expected_restrict_files_os
1766 && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl
1767 && opt.restrict_files_case == test_array[i].expected_restrict_files_case);
1773 #endif /* TESTING */