1 /* Reading/parsing the initialization file.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
6 This file is part of GNU Wget.
8 GNU Wget is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 GNU Wget is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with Wget. If not, see <http://www.gnu.org/licenses/>.
21 Additional permission under GNU GPL version 3 section 7
23 If you modify this program, or any covered work, by linking or
24 combining it with the OpenSSL project's OpenSSL library (or a
25 modified version of that library), containing parts covered by the
26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
27 grants you additional permission to convey the resulting work.
28 Corresponding Source for a non-source form of such a combination
29 shall include the source code for the parts of OpenSSL used as well
30 as that of the covered work. */
41 /* not all systems provide PATH_MAX in limits.h */
43 # include <sys/param.h>
45 # define PATH_MAX MAXPATHLEN
60 #include "recur.h" /* for INFINITE_RECURSION */
61 #include "convert.h" /* for convert_cleanup */
62 #include "res.h" /* for res_cleanup */
63 #include "http.h" /* for http_cleanup */
64 #include "retr.h" /* for output_stream */
72 #define CMD_DECLARE(func) static bool func (const char *, const char *, void *)
74 CMD_DECLARE (cmd_boolean);
75 CMD_DECLARE (cmd_bytes);
76 CMD_DECLARE (cmd_bytes_sum);
78 CMD_DECLARE (cmd_cert_type);
80 CMD_DECLARE (cmd_directory_vector);
81 CMD_DECLARE (cmd_number);
82 CMD_DECLARE (cmd_number_inf);
83 CMD_DECLARE (cmd_string);
84 CMD_DECLARE (cmd_file);
85 CMD_DECLARE (cmd_directory);
86 CMD_DECLARE (cmd_time);
87 CMD_DECLARE (cmd_vector);
89 CMD_DECLARE (cmd_spec_dirstruct);
90 CMD_DECLARE (cmd_spec_header);
91 CMD_DECLARE (cmd_spec_htmlify);
92 CMD_DECLARE (cmd_spec_mirror);
93 CMD_DECLARE (cmd_spec_prefer_family);
94 CMD_DECLARE (cmd_spec_progress);
95 CMD_DECLARE (cmd_spec_recursive);
96 CMD_DECLARE (cmd_spec_restrict_file_names);
98 CMD_DECLARE (cmd_spec_secure_protocol);
100 CMD_DECLARE (cmd_spec_timeout);
101 CMD_DECLARE (cmd_spec_useragent);
102 CMD_DECLARE (cmd_spec_verbose);
104 /* List of recognized commands, each consisting of name, place and
105 function. When adding a new command, simply add it to the list,
106 but be sure to keep the list sorted alphabetically, as
107 command_by_name's binary search depends on it. Also, be sure to
108 add any entries that allocate memory (e.g. cmd_string and
109 cmd_vector) to the cleanup() function below. */
111 static const struct {
114 bool (*action) (const char *, const char *, void *);
116 /* KEEP THIS LIST ALPHABETICALLY SORTED */
117 { "accept", &opt.accepts, cmd_vector },
118 { "addhostdir", &opt.add_hostdir, cmd_boolean },
119 { "adjustextension", &opt.adjust_extension, cmd_boolean },
120 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
121 { "askpassword", &opt.ask_passwd, cmd_boolean },
122 { "authnochallenge", &opt.auth_without_challenge,
124 { "background", &opt.background, cmd_boolean },
125 { "backupconverted", &opt.backup_converted, cmd_boolean },
126 { "backups", &opt.backups, cmd_number },
127 { "base", &opt.base_href, cmd_string },
128 { "bindaddress", &opt.bind_address, cmd_string },
130 { "cacertificate", &opt.ca_cert, cmd_file },
132 { "cache", &opt.allow_cache, cmd_boolean },
134 { "cadirectory", &opt.ca_directory, cmd_directory },
135 { "certificate", &opt.cert_file, cmd_file },
136 { "certificatetype", &opt.cert_type, cmd_cert_type },
137 { "checkcertificate", &opt.check_cert, cmd_boolean },
139 { "chooseconfig", &opt.choose_config, cmd_file },
140 { "connecttimeout", &opt.connect_timeout, cmd_time },
141 { "contentdisposition", &opt.content_disposition, cmd_boolean },
142 { "continue", &opt.always_rest, cmd_boolean },
143 { "convertlinks", &opt.convert_links, cmd_boolean },
144 { "cookies", &opt.cookies, cmd_boolean },
145 { "cutdirs", &opt.cut_dirs, cmd_number },
147 { "debug", &opt.debug, cmd_boolean },
149 { "defaultpage", &opt.default_page, cmd_string},
150 { "deleteafter", &opt.delete_after, cmd_boolean },
151 { "dirprefix", &opt.dir_prefix, cmd_directory },
152 { "dirstruct", NULL, cmd_spec_dirstruct },
153 { "dnscache", &opt.dns_cache, cmd_boolean },
154 { "dnstimeout", &opt.dns_timeout, cmd_time },
155 { "domains", &opt.domains, cmd_vector },
156 { "dotbytes", &opt.dot_bytes, cmd_bytes },
157 { "dotsinline", &opt.dots_in_line, cmd_number },
158 { "dotspacing", &opt.dot_spacing, cmd_number },
159 { "dotstyle", &opt.dot_style, cmd_string }, /* deprecated */
161 { "egdfile", &opt.egd_file, cmd_file },
163 { "excludedirectories", &opt.excludes, cmd_directory_vector },
164 { "excludedomains", &opt.exclude_domains, cmd_vector },
165 { "followftp", &opt.follow_ftp, cmd_boolean },
166 { "followtags", &opt.follow_tags, cmd_vector },
167 { "forcehtml", &opt.force_html, cmd_boolean },
168 { "ftppasswd", &opt.ftp_passwd, cmd_string }, /* deprecated */
169 { "ftppassword", &opt.ftp_passwd, cmd_string },
170 { "ftpproxy", &opt.ftp_proxy, cmd_string },
172 { "ftpstmlf", &opt.ftp_stmlf, cmd_boolean },
173 #endif /* def __VMS */
174 { "ftpuser", &opt.ftp_user, cmd_string },
175 { "glob", &opt.ftp_glob, cmd_boolean },
176 { "header", NULL, cmd_spec_header },
177 { "htmlextension", &opt.adjust_extension, cmd_boolean }, /* deprecated */
178 { "htmlify", NULL, cmd_spec_htmlify },
179 { "httpkeepalive", &opt.http_keep_alive, cmd_boolean },
180 { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
181 { "httppassword", &opt.http_passwd, cmd_string },
182 { "httpproxy", &opt.http_proxy, cmd_string },
183 { "httpsproxy", &opt.https_proxy, cmd_string },
184 { "httpuser", &opt.http_user, cmd_string },
185 { "ignorecase", &opt.ignore_case, cmd_boolean },
186 { "ignorelength", &opt.ignore_length, cmd_boolean },
187 { "ignoretags", &opt.ignore_tags, cmd_vector },
188 { "includedirectories", &opt.includes, cmd_directory_vector },
190 { "inet4only", &opt.ipv4_only, cmd_boolean },
191 { "inet6only", &opt.ipv6_only, cmd_boolean },
193 { "input", &opt.input_filename, cmd_file },
194 { "iri", &opt.enable_iri, cmd_boolean },
195 { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
196 { "limitrate", &opt.limit_rate, cmd_bytes },
197 { "loadcookies", &opt.cookies_input, cmd_file },
198 { "localencoding", &opt.locale, cmd_string },
199 { "logfile", &opt.lfilename, cmd_file },
200 { "login", &opt.ftp_user, cmd_string },/* deprecated*/
201 { "maxredirect", &opt.max_redirect, cmd_number },
202 { "mirror", NULL, cmd_spec_mirror },
203 { "netrc", &opt.netrc, cmd_boolean },
204 { "noclobber", &opt.noclobber, cmd_boolean },
205 { "noparent", &opt.no_parent, cmd_boolean },
206 { "noproxy", &opt.no_proxy, cmd_vector },
207 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
208 { "outputdocument", &opt.output_document, cmd_file },
209 { "pagerequisites", &opt.page_requisites, cmd_boolean },
210 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
211 { "passwd", &opt.ftp_passwd, cmd_string },/* deprecated*/
212 { "password", &opt.passwd, cmd_string },
213 { "postdata", &opt.post_data, cmd_string },
214 { "postfile", &opt.post_file_name, cmd_file },
215 { "preferfamily", NULL, cmd_spec_prefer_family },
216 { "preservepermissions", &opt.preserve_perm, cmd_boolean },/* deprecated */
218 { "privatekey", &opt.private_key, cmd_file },
219 { "privatekeytype", &opt.private_key_type, cmd_cert_type },
221 { "progress", &opt.progress_type, cmd_spec_progress },
222 { "protocoldirectories", &opt.protocol_directories, cmd_boolean },
223 { "proxypasswd", &opt.proxy_passwd, cmd_string }, /* deprecated */
224 { "proxypassword", &opt.proxy_passwd, cmd_string },
225 { "proxyuser", &opt.proxy_user, cmd_string },
226 { "quiet", &opt.quiet, cmd_boolean },
227 { "quota", &opt.quota, cmd_bytes_sum },
229 { "randomfile", &opt.random_file, cmd_file },
231 { "randomwait", &opt.random_wait, cmd_boolean },
232 { "readtimeout", &opt.read_timeout, cmd_time },
233 { "reclevel", &opt.reclevel, cmd_number_inf },
234 { "recursive", NULL, cmd_spec_recursive },
235 { "referer", &opt.referer, cmd_string },
236 { "reject", &opt.rejects, cmd_vector },
237 { "relativeonly", &opt.relative_only, cmd_boolean },
238 { "remoteencoding", &opt.encoding_remote, cmd_string },
239 { "removelisting", &opt.remove_listing, cmd_boolean },
240 { "restrictfilenames", NULL, cmd_spec_restrict_file_names },
241 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
242 { "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
243 { "robots", &opt.use_robots, cmd_boolean },
244 { "savecookies", &opt.cookies_output, cmd_file },
245 { "saveheaders", &opt.save_headers, cmd_boolean },
247 { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol },
249 { "serverresponse", &opt.server_response, cmd_boolean },
250 { "spanhosts", &opt.spanhost, cmd_boolean },
251 { "spider", &opt.spider, cmd_boolean },
252 { "strictcomments", &opt.strict_comments, cmd_boolean },
253 { "timeout", NULL, cmd_spec_timeout },
254 { "timestamping", &opt.timestamping, cmd_boolean },
255 { "tries", &opt.ntry, cmd_number_inf },
256 { "trustservernames", &opt.trustservernames, cmd_boolean },
257 { "unlink", &opt.unlink, cmd_boolean },
258 { "useproxy", &opt.use_proxy, cmd_boolean },
259 { "user", &opt.user, cmd_string },
260 { "useragent", NULL, cmd_spec_useragent },
261 { "useservertimestamps", &opt.useservertimestamps, cmd_boolean },
262 { "verbose", NULL, cmd_spec_verbose },
263 { "wait", &opt.wait, cmd_time },
264 { "waitretry", &opt.waitretry, cmd_time },
266 { "wdebug", &opt.wdebug, cmd_boolean },
270 /* Look up CMDNAME in the commands[] and return its position in the
271 array. If CMDNAME is not found, return -1. */
274 command_by_name (const char *cmdname)
276 /* Use binary search for speed. Wget has ~100 commands, which
277 guarantees a worst case performance of 7 string comparisons. */
278 int lo = 0, hi = countof (commands) - 1;
282 int mid = (lo + hi) >> 1;
283 int cmp = strcasecmp (cmdname, commands[mid].name);
294 /* Reset the variables to default values. */
300 /* Most of the default values are 0 (and 0.0, NULL, and false).
301 Just reset everything, and fill in the non-zero values. Note
302 that initializing pointers to NULL this way is technically
303 illegal, but porting Wget to a machine where NULL is not all-zero
304 bit pattern will be the least of the implementors' worries. */
311 opt.add_hostdir = true;
315 opt.http_keep_alive = true;
316 opt.use_proxy = true;
317 tmp = getenv ("no_proxy");
319 opt.no_proxy = sepstring (tmp);
320 opt.prefer_family = prefer_none;
321 opt.allow_cache = true;
323 opt.read_timeout = 900;
324 opt.use_robots = true;
326 opt.remove_listing = true;
328 opt.dot_bytes = 1024;
329 opt.dot_spacing = 10;
330 opt.dots_in_line = 50;
332 opt.dns_cache = true;
336 opt.check_cert = true;
339 /* The default for file name restriction defaults to the OS type. */
340 #if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__)
341 opt.restrict_files_os = restrict_windows;
343 opt.restrict_files_os = restrict_unix;
345 opt.restrict_files_ctrl = true;
346 opt.restrict_files_nonascii = false;
347 opt.restrict_files_case = restrict_no_case_restriction;
349 opt.max_redirect = 20;
354 opt.enable_iri = true;
356 opt.enable_iri = false;
359 opt.encoding_remote = NULL;
361 opt.useservertimestamps = true;
364 /* Return the user's home directory (strdup-ed), or NULL if none is
369 static char *buf = NULL;
370 static char *home, *ret;
375 home = getenv ("HOME");
379 /* Under MSDOS, if $HOME isn't defined, use the directory where
380 `wget.exe' resides. */
381 const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */
384 buff = _w32_get_argv0 ();
386 p = strrchr (buf, '/'); /* djgpp */
388 p = strrchr (buf, '\\'); /* others */
392 buff = malloc (len + 1);
396 strncpy (buff, _w32_get_argv0 (), len);
400 #elif !defined(WINDOWS)
401 /* If HOME is not defined, try getting it from the password
403 struct passwd *pwd = getpwuid (getuid ());
404 if (!pwd || !pwd->pw_dir)
408 /* Under Windows, if $HOME isn't defined, use the directory where
409 `wget.exe' resides. */
415 ret = home ? xstrdup (home) : NULL;
422 /* Check the 'WGETRC' environment variable and return the file name
423 if 'WGETRC' is set and is a valid file.
424 If the `WGETRC' variable exists but the file does not exist, the
425 function will exit(). */
427 wgetrc_env_file_name (void)
429 char *env = getenv ("WGETRC");
432 if (!file_exists_p (env))
434 fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"),
438 return xstrdup (env);
443 /* Check for the existance of '$HOME/.wgetrc' and return it's path
444 if it exists and is set. */
446 wgetrc_user_file_name (void)
450 /* If that failed, try $HOME/.wgetrc (or equivalent). */
453 file = "SYS$LOGIN:.wgetrc";
454 #else /* def __VMS */
457 file = aprintf ("%s/.wgetrc", home);
459 #endif /* def __VMS [else] */
463 if (!file_exists_p (file))
471 /* Return the path to the user's .wgetrc. This is either the value of
472 `WGETRC' environment variable, or `$HOME/.wgetrc'.
474 Additionally, for windows, look in the directory where wget.exe
477 wgetrc_file_name (void)
479 char *file = wgetrc_env_file_name ();
483 file = wgetrc_user_file_name ();
486 /* Under Windows, if we still haven't found .wgetrc, look for the file
487 `wget.ini' in the directory where `wget.exe' resides; we do this for
488 backward compatibility with previous versions of Wget.
489 SYSTEM_WGETRC should not be defined under WINDOWS. */
492 char *home = home_dir ();
498 file = aprintf ("%s/wget.ini", home);
499 if (!file_exists_p (file))
512 /* Return values of parse_line. */
520 static enum parse_line parse_line (const char *, char **, char **, int *);
521 static bool setval_internal (int, const char *, const char *);
522 static bool setval_internal_tilde (int, const char *, const char *);
524 /* Initialize variables from a wgetrc file. Returns zero (failure) if
525 there were errors in the file. */
528 run_wgetrc (const char *file)
535 fp = fopen (file, "r");
538 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
539 file, strerror (errno));
540 return true; /* not a fatal error */
543 while ((line = read_whole_line (fp)) != NULL)
545 char *com = NULL, *val = NULL;
548 /* Parse the line. */
549 switch (parse_line (line, &com, &val, &comind))
552 /* If everything is OK, set the value. */
553 if (!setval_internal_tilde (comind, com, val))
555 fprintf (stderr, _("%s: Error in %s at line %d.\n"),
556 exec_name, file, ln);
560 case line_syntax_error:
561 fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"),
562 exec_name, file, ln);
565 case line_unknown_command:
566 fprintf (stderr, _("%s: Unknown command %s in %s at line %d.\n"),
567 exec_name, quote (com), file, ln);
585 /* Initialize the defaults and run the system wgetrc and user's own
590 char *file, *env_sysrc;
593 /* Run a non-standard system rc file when the according environment
594 variable has been set. For internal testing purposes only! */
595 env_sysrc = getenv ("SYSTEM_WGETRC");
596 if (env_sysrc && file_exists_p (env_sysrc))
597 ok &= run_wgetrc (env_sysrc);
598 /* Otherwise, if SYSTEM_WGETRC is defined, use it. */
600 else if (file_exists_p (SYSTEM_WGETRC))
601 ok &= run_wgetrc (SYSTEM_WGETRC);
603 /* If there are any problems parsing the system wgetrc file, tell
607 fprintf (stderr, _("\
608 Parsing system wgetrc file failed, please check '%s'. \
609 Or specify a different file using --config\n"), SYSTEM_WGETRC);
612 /* Override it with your own, if one exists. */
613 file = wgetrc_file_name ();
616 /* #### We should canonicalize `file' and SYSTEM_WGETRC with
617 something like realpath() before comparing them with `strcmp' */
619 if (!strcmp (file, SYSTEM_WGETRC))
621 fprintf (stderr, _("\
622 %s: Warning: Both system and user wgetrc point to %s.\n"),
623 exec_name, quote (file));
627 ok &= run_wgetrc (file);
629 /* If there were errors processing either `.wgetrc', abort. */
637 /* Remove dashes and underscores from S, modifying S in the
643 char *t = s; /* t - tortoise */
644 char *h = s; /* h - hare */
646 if (*h == '_' || *h == '-')
653 /* Parse the line pointed by line, with the syntax:
654 <sp>* command <sp>* = <sp>* value <sp>*
655 Uses malloc to allocate space for command and value.
657 Returns one of line_ok, line_empty, line_syntax_error, or
658 line_unknown_command.
660 In case of line_ok, *COM and *VAL point to freshly allocated
661 strings, and *COMIND points to com's index. In case of error or
662 empty line, their values are unmodified. */
664 static enum parse_line
665 parse_line (const char *line, char **com, char **val, int *comind)
668 const char *end = line + strlen (line);
669 const char *cmdstart, *cmdend;
670 const char *valstart, *valend;
675 /* Skip leading and trailing whitespace. */
676 while (*line && c_isspace (*line))
678 while (end > line && c_isspace (end[-1]))
681 /* Skip empty lines and comments. */
682 if (!*line || *line == '#')
688 while (p < end && (c_isalnum (*p) || *p == '_' || *p == '-'))
692 /* Skip '=', as well as any space before or after it. */
693 while (p < end && c_isspace (*p))
695 if (p == end || *p != '=')
696 return line_syntax_error;
698 while (p < end && c_isspace (*p))
704 /* The syntax is valid (even though the command might not be). Fill
705 in the command name and value. */
706 *com = strdupdelim (cmdstart, cmdend);
707 *val = strdupdelim (valstart, valend);
709 /* The line now known to be syntactically correct. Check whether
710 the command is valid. */
711 BOUNDED_TO_ALLOCA (cmdstart, cmdend, cmdcopy);
713 ind = command_by_name (cmdcopy);
715 return line_unknown_command;
717 /* Report success to the caller. */
722 #if defined(WINDOWS) || defined(MSDOS)
723 # define ISSEP(c) ((c) == '/' || (c) == '\\')
725 # define ISSEP(c) ((c) == '/')
728 /* Run commands[comind].action. */
731 setval_internal (int comind, const char *com, const char *val)
733 assert (0 <= comind && ((size_t) comind) < countof (commands));
734 DEBUGP (("Setting %s (%s) to %s\n", com, commands[comind].name, val));
735 return commands[comind].action (com, val, commands[comind].place);
739 setval_internal_tilde (int comind, const char *com, const char *val)
745 ret = setval_internal (comind, com, val);
747 /* We make tilde expansion for cmd_file and cmd_directory */
748 if (((commands[comind].action == cmd_file) ||
749 (commands[comind].action == cmd_directory))
750 && ret && (*val == '~' && ISSEP (val[1])))
752 pstring = commands[comind].place;
756 homelen = strlen (home);
757 while (homelen && ISSEP (home[homelen - 1]))
758 home[--homelen] = '\0';
760 /* Skip the leading "~/". */
761 for (++val; ISSEP (*val); val++)
763 *pstring = concat_strings (home, "/", val, (char *)0);
769 /* Run command COM with value VAL. If running the command produces an
770 error, report the error and exit.
772 This is intended to be called from main() to modify Wget's behavior
773 through command-line switches. Since COM is hard-coded in main(),
774 it is not canonicalized, and this aborts when COM is not found.
776 If COMIND's are exported to init.h, this function will be changed
777 to accept COMIND directly. */
780 setoptval (const char *com, const char *val, const char *optname)
782 /* Prepend "--" to OPTNAME. */
783 char *dd_optname = (char *) alloca (2 + strlen (optname) + 1);
786 strcpy (dd_optname + 2, optname);
788 assert (val != NULL);
789 if (!setval_internal (command_by_name (com), dd_optname, val))
793 /* Parse OPT into command and value and run it. For example,
794 run_command("foo=bar") is equivalent to setoptval("foo", "bar").
795 This is used by the `--execute' flag in main.c. */
798 run_command (const char *opt)
802 switch (parse_line (opt, &com, &val, &comind))
805 if (!setval_internal (comind, com, val))
811 fprintf (stderr, _("%s: Invalid --execute command %s\n"),
812 exec_name, quote (opt));
817 /* Generic helper functions, for use with `commands'. */
819 /* Forward declarations: */
824 static bool decode_string (const char *, const struct decode_item *, int, int *);
825 static bool simple_atoi (const char *, const char *, int *);
826 static bool simple_atof (const char *, const char *, double *);
828 #define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0')
830 #define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \
831 && c_tolower((p)[1]) == (c1) \
834 #define CMP3(p, c0, c1, c2) (c_tolower((p)[0]) == (c0) \
835 && c_tolower((p)[1]) == (c1) \
836 && c_tolower((p)[2]) == (c2) \
840 /* Store the boolean value from VAL to PLACE. COM is ignored,
841 except for error messages. */
843 cmd_boolean (const char *com, const char *val, void *place)
847 if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1'))
848 /* "on", "yes" and "1" mean true. */
850 else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0'))
851 /* "off", "no" and "0" mean false. */
856 _("%s: %s: Invalid boolean %s; use `on' or `off'.\n"),
857 exec_name, com, quote (val));
861 *(bool *) place = value;
865 /* Set the non-negative integer value from VAL to PLACE. With
866 incorrect specification, the number remains unchanged. */
868 cmd_number (const char *com, const char *val, void *place)
870 if (!simple_atoi (val, val + strlen (val), place)
871 || *(int *) place < 0)
873 fprintf (stderr, _("%s: %s: Invalid number %s.\n"),
874 exec_name, com, quote (val));
880 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
882 cmd_number_inf (const char *com, const char *val, void *place)
884 if (!strcasecmp (val, "inf"))
889 return cmd_number (com, val, place);
892 /* Copy (strdup) the string at COM to a new location and place a
893 pointer to *PLACE. */
895 cmd_string (const char *com, const char *val, void *place)
897 char **pstring = (char **)place;
899 xfree_null (*pstring);
900 *pstring = xstrdup (val);
905 /* Like the above, but handles tilde-expansion when reading a user's
906 `.wgetrc'. In that case, and if VAL begins with `~', the tilde
907 gets expanded to the user's home directory. */
909 cmd_file (const char *com, const char *val, void *place)
911 char **pstring = (char **)place;
913 xfree_null (*pstring);
915 /* #### If VAL is empty, perhaps should set *PLACE to NULL. */
917 *pstring = xstrdup (val);
919 #if defined(WINDOWS) || defined(MSDOS)
920 /* Convert "\" to "/". */
923 for (s = *pstring; *s; s++)
931 /* Like cmd_file, but strips trailing '/' characters. */
933 cmd_directory (const char *com, const char *val, void *place)
937 /* Call cmd_file() for tilde expansion and separator
938 canonicalization (backslash -> slash under Windows). These
939 things should perhaps be in a separate function. */
940 if (!cmd_file (com, val, place))
945 while (t > s && *--t == '/')
951 /* Split VAL by space to a vector of values, and append those values
952 to vector pointed to by the PLACE argument. If VAL is empty, the
953 PLACE vector is cleared instead. */
956 cmd_vector (const char *com, const char *val, void *place)
958 char ***pvec = (char ***)place;
961 *pvec = merge_vecs (*pvec, sepstring (val));
971 cmd_directory_vector (const char *com, const char *val, void *place)
973 char ***pvec = (char ***)place;
977 /* Strip the trailing slashes from directories. */
980 seps = sepstring (val);
981 for (t = seps; t && *t; t++)
983 int len = strlen (*t);
984 /* Skip degenerate case of root directory. */
987 if ((*t)[len - 1] == '/')
988 (*t)[len - 1] = '\0';
991 *pvec = merge_vecs (*pvec, seps);
1001 /* Engine for cmd_bytes and cmd_bytes_sum: converts a string such as
1002 "100k" or "2.5G" to a floating point number. */
1005 parse_bytes_helper (const char *val, double *result)
1007 double number, mult;
1008 const char *end = val + strlen (val);
1010 /* Check for "inf". */
1011 if (0 == strcmp (val, "inf"))
1017 /* Strip trailing whitespace. */
1018 while (val < end && c_isspace (end[-1]))
1023 switch (c_tolower (end[-1]))
1026 --end, mult = 1024.0;
1029 --end, mult = 1048576.0;
1032 --end, mult = 1073741824.0;
1035 --end, mult = 1099511627776.0;
1038 /* Not a recognized suffix: assume it's a digit. (If not,
1039 simple_atof will raise an error.) */
1043 /* Skip leading and trailing whitespace. */
1044 while (val < end && c_isspace (*val))
1046 while (val < end && c_isspace (end[-1]))
1051 if (!simple_atof (val, end, &number) || number < 0)
1054 *result = number * mult;
1058 /* Parse VAL as a number and set its value to PLACE (which should
1061 By default, the value is assumed to be in bytes. If "K", "M", or
1062 "G" are appended, the value is multiplied with 1<<10, 1<<20, or
1063 1<<30, respectively. Floating point values are allowed and are
1064 cast to integer before use. The idea is to be able to use things
1065 like 1.5k instead of "1536".
1067 The string "inf" is returned as 0.
1069 In case of error, false is returned and memory pointed to by PLACE
1070 remains unmodified. */
1073 cmd_bytes (const char *com, const char *val, void *place)
1076 if (!parse_bytes_helper (val, &byte_value))
1078 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1079 exec_name, com, quote (val));
1082 *(wgint *)place = (wgint)byte_value;
1086 /* Like cmd_bytes, but PLACE is interpreted as a pointer to
1087 SIZE_SUM. It works by converting the string to double, therefore
1088 working with values up to 2^53-1 without loss of precision. This
1089 value (8192 TB) is large enough to serve for a while. */
1092 cmd_bytes_sum (const char *com, const char *val, void *place)
1095 if (!parse_bytes_helper (val, &byte_value))
1097 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1098 exec_name, com, quote (val));
1101 *(SUM_SIZE_INT *) place = (SUM_SIZE_INT) byte_value;
1105 /* Store the value of VAL to *OUT. The value is a time period, by
1106 default expressed in seconds, but also accepting suffixes "m", "h",
1107 "d", and "w" for minutes, hours, days, and weeks respectively. */
1110 cmd_time (const char *com, const char *val, void *place)
1112 double number, mult;
1113 const char *end = val + strlen (val);
1115 /* Strip trailing whitespace. */
1116 while (val < end && c_isspace (end[-1]))
1122 fprintf (stderr, _("%s: %s: Invalid time period %s\n"),
1123 exec_name, com, quote (val));
1127 switch (c_tolower (end[-1]))
1130 --end, mult = 1; /* seconds */
1133 --end, mult = 60; /* minutes */
1136 --end, mult = 3600; /* hours */
1139 --end, mult = 86400.0; /* days */
1142 --end, mult = 604800.0; /* weeks */
1145 /* Not a recognized suffix: assume it belongs to the number.
1146 (If not, simple_atof will raise an error.) */
1150 /* Skip leading and trailing whitespace. */
1151 while (val < end && c_isspace (*val))
1153 while (val < end && c_isspace (end[-1]))
1158 if (!simple_atof (val, end, &number))
1161 *(double *)place = number * mult;
1167 cmd_cert_type (const char *com, const char *val, void *place)
1169 static const struct decode_item choices[] = {
1170 { "pem", keyfile_pem },
1171 { "der", keyfile_asn1 },
1172 { "asn1", keyfile_asn1 },
1174 int ok = decode_string (val, choices, countof (choices), place);
1176 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1181 /* Specialized helper functions, used by `commands' to handle some
1182 options specially. */
1184 static bool check_user_specified_header (const char *);
1187 cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored)
1189 if (!cmd_boolean (com, val, &opt.dirstruct))
1191 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
1192 must be affected inversely. */
1194 opt.no_dirstruct = false;
1196 opt.no_dirstruct = true;
1201 cmd_spec_header (const char *com, const char *val, void *place_ignored)
1203 /* Empty value means reset the list of headers. */
1206 free_vec (opt.user_headers);
1207 opt.user_headers = NULL;
1211 if (!check_user_specified_header (val))
1213 fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
1214 exec_name, com, quote (val));
1217 opt.user_headers = vec_append (opt.user_headers, val);
1222 cmd_spec_htmlify (const char *com, const char *val, void *place_ignored)
1224 int flag = cmd_boolean (com, val, &opt.htmlify);
1225 if (flag && !opt.htmlify)
1226 opt.remove_listing = false;
1230 /* Set the "mirror" mode. It means: recursive download, timestamping,
1231 no limit on max. recursion depth, and don't remove listings. */
1234 cmd_spec_mirror (const char *com, const char *val, void *place_ignored)
1238 if (!cmd_boolean (com, val, &mirror))
1242 opt.recursive = true;
1243 if (!opt.no_dirstruct)
1244 opt.dirstruct = true;
1245 opt.timestamping = true;
1246 opt.reclevel = INFINITE_RECURSION;
1247 opt.remove_listing = false;
1252 /* Validate --prefer-family and set the choice. Allowed values are
1253 "IPv4", "IPv6", and "none". */
1256 cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored)
1258 static const struct decode_item choices[] = {
1259 { "IPv4", prefer_ipv4 },
1260 { "IPv6", prefer_ipv6 },
1261 { "none", prefer_none },
1263 int prefer_family = prefer_none;
1264 int ok = decode_string (val, choices, countof (choices), &prefer_family);
1266 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1267 opt.prefer_family = prefer_family;
1271 /* Set progress.type to VAL, but verify that it's a valid progress
1272 implementation before that. */
1275 cmd_spec_progress (const char *com, const char *val, void *place_ignored)
1277 if (!valid_progress_implementation_p (val))
1279 fprintf (stderr, _("%s: %s: Invalid progress type %s.\n"),
1280 exec_name, com, quote (val));
1283 xfree_null (opt.progress_type);
1285 /* Don't call set_progress_implementation here. It will be called
1286 in main() when it becomes clear what the log output is. */
1287 opt.progress_type = xstrdup (val);
1291 /* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is
1292 set to true, also set opt.dirstruct to true, unless opt.no_dirstruct
1296 cmd_spec_recursive (const char *com, const char *val, void *place_ignored)
1298 if (!cmd_boolean (com, val, &opt.recursive))
1302 if (opt.recursive && !opt.no_dirstruct)
1303 opt.dirstruct = true;
1309 cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored)
1311 int restrict_os = opt.restrict_files_os;
1312 int restrict_ctrl = opt.restrict_files_ctrl;
1313 int restrict_case = opt.restrict_files_case;
1314 int restrict_nonascii = opt.restrict_files_nonascii;
1318 #define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal)
1322 end = strchr (val, ',');
1324 end = val + strlen (val);
1326 if (VAL_IS ("unix"))
1327 restrict_os = restrict_unix;
1328 else if (VAL_IS ("windows"))
1329 restrict_os = restrict_windows;
1330 else if (VAL_IS ("lowercase"))
1331 restrict_case = restrict_lowercase;
1332 else if (VAL_IS ("uppercase"))
1333 restrict_case = restrict_uppercase;
1334 else if (VAL_IS ("nocontrol"))
1335 restrict_ctrl = false;
1336 else if (VAL_IS ("ascii"))
1337 restrict_nonascii = true;
1340 fprintf (stderr, _("\
1341 %s: %s: Invalid restriction %s,\n\
1342 use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"),
1343 exec_name, com, quote (val));
1350 while (*val && *end);
1354 opt.restrict_files_os = restrict_os;
1355 opt.restrict_files_ctrl = restrict_ctrl;
1356 opt.restrict_files_case = restrict_case;
1357 opt.restrict_files_nonascii = restrict_nonascii;
1364 cmd_spec_secure_protocol (const char *com, const char *val, void *place)
1366 static const struct decode_item choices[] = {
1367 { "auto", secure_protocol_auto },
1368 { "sslv2", secure_protocol_sslv2 },
1369 { "sslv3", secure_protocol_sslv3 },
1370 { "tlsv1", secure_protocol_tlsv1 },
1372 int ok = decode_string (val, choices, countof (choices), place);
1374 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1379 /* Set all three timeout values. */
1382 cmd_spec_timeout (const char *com, const char *val, void *place_ignored)
1385 if (!cmd_time (com, val, &value))
1387 opt.read_timeout = value;
1388 opt.connect_timeout = value;
1389 opt.dns_timeout = value;
1394 cmd_spec_useragent (const char *com, const char *val, void *place_ignored)
1396 /* Disallow embedded newlines. */
1397 if (strchr (val, '\n'))
1399 fprintf (stderr, _("%s: %s: Invalid value %s.\n"),
1400 exec_name, com, quote (val));
1403 xfree_null (opt.useragent);
1404 opt.useragent = xstrdup (val);
1408 /* The "verbose" option cannot be cmd_boolean because the variable is
1409 not bool -- it's of type int (-1 means uninitialized because of
1410 some random hackery for disallowing -q -v). */
1413 cmd_spec_verbose (const char *com, const char *val, void *place_ignored)
1416 if (cmd_boolean (com, val, &flag))
1424 /* Miscellaneous useful routines. */
1426 /* A very simple atoi clone, more useful than atoi because it works on
1427 delimited strings, and has error reportage. Returns true on success,
1428 false on failure. If successful, stores result to *DEST. */
1431 simple_atoi (const char *beg, const char *end, int *dest)
1434 bool negative = false;
1435 const char *p = beg;
1437 while (p < end && c_isspace (*p))
1439 if (p < end && (*p == '-' || *p == '+'))
1441 negative = (*p == '-');
1447 /* Read negative numbers in a separate loop because the most
1448 negative integer cannot be represented as a positive number. */
1451 for (; p < end && c_isdigit (*p); p++)
1453 int next = (10 * result) + (*p - '0');
1455 return false; /* overflow */
1459 for (; p < end && c_isdigit (*p); p++)
1461 int next = (10 * result) - (*p - '0');
1463 return false; /* underflow */
1474 /* Trivial atof, with error reporting. Handles "<digits>[.<digits>]",
1475 doesn't handle exponential notation. Returns true on success,
1476 false on failure. In case of success, stores its result to
1480 simple_atof (const char *beg, const char *end, double *dest)
1484 bool negative = false;
1485 bool seen_dot = false;
1486 bool seen_digit = false;
1489 const char *p = beg;
1491 while (p < end && c_isspace (*p))
1493 if (p < end && (*p == '-' || *p == '+'))
1495 negative = (*p == '-');
1499 for (; p < end; p++)
1505 result = (10 * result) + (ch - '0');
1507 result += (ch - '0') / (divider *= 10);
1529 /* Verify that the user-specified header in S is valid. It must
1530 contain a colon preceded by non-white-space characters and must not
1531 contain newlines. */
1534 check_user_specified_header (const char *s)
1538 for (p = s; *p && *p != ':' && !c_isspace (*p); p++)
1540 /* The header MUST contain `:' preceded by at least one
1541 non-whitespace character. */
1542 if (*p != ':' || p == s)
1544 /* The header MUST NOT contain newlines. */
1545 if (strchr (s, '\n'))
1550 /* Decode VAL into a number, according to ITEMS. */
1553 decode_string (const char *val, const struct decode_item *items, int itemcount,
1557 for (i = 0; i < itemcount; i++)
1558 if (0 == strcasecmp (val, items[i].name))
1560 *place = items[i].code;
1567 void cleanup_html_url (void);
1570 /* Free the memory allocated by global variables. */
1574 /* Free external resources, close files, etc. */
1577 fclose (output_stream);
1578 /* No need to check for error because Wget flushes its output (and
1579 checks for errors) after any data arrives. */
1581 /* We're exiting anyway so there's no real need to call free()
1582 hundreds of times. Skipping the frees will make Wget exit
1585 However, when detecting leaks, it's crucial to free() everything
1586 because then you can find the real leaks, i.e. the allocated
1587 memory which grows with the size of the program. */
1593 cleanup_html_url ();
1598 extern acc_t *netrc_list;
1599 free_netrc (netrc_list);
1601 xfree_null (opt.choose_config);
1602 xfree_null (opt.lfilename);
1603 xfree_null (opt.dir_prefix);
1604 xfree_null (opt.input_filename);
1605 xfree_null (opt.output_document);
1606 free_vec (opt.accepts);
1607 free_vec (opt.rejects);
1608 free_vec (opt.excludes);
1609 free_vec (opt.includes);
1610 free_vec (opt.domains);
1611 free_vec (opt.follow_tags);
1612 free_vec (opt.ignore_tags);
1613 xfree_null (opt.progress_type);
1614 xfree_null (opt.ftp_user);
1615 xfree_null (opt.ftp_passwd);
1616 xfree_null (opt.ftp_proxy);
1617 xfree_null (opt.https_proxy);
1618 xfree_null (opt.http_proxy);
1619 free_vec (opt.no_proxy);
1620 xfree_null (opt.useragent);
1621 xfree_null (opt.referer);
1622 xfree_null (opt.http_user);
1623 xfree_null (opt.http_passwd);
1624 free_vec (opt.user_headers);
1626 xfree_null (opt.cert_file);
1627 xfree_null (opt.private_key);
1628 xfree_null (opt.ca_directory);
1629 xfree_null (opt.ca_cert);
1630 xfree_null (opt.random_file);
1631 xfree_null (opt.egd_file);
1633 xfree_null (opt.bind_address);
1634 xfree_null (opt.cookies_input);
1635 xfree_null (opt.cookies_output);
1636 xfree_null (opt.user);
1637 xfree_null (opt.passwd);
1638 xfree_null (opt.base_href);
1640 #endif /* DEBUG_MALLOC */
1643 /* Unit testing routines. */
1648 test_commands_sorted()
1650 int prev_idx = 0, next_idx = 1;
1651 int command_count = countof (commands) - 1;
1653 while (next_idx <= command_count)
1655 cmp = strcasecmp (commands[prev_idx].name, commands[next_idx].name);
1658 mu_assert ("FAILED", false);
1671 test_cmd_spec_restrict_file_names()
1676 int expected_restrict_files_os;
1677 int expected_restrict_files_ctrl;
1678 int expected_restrict_files_case;
1681 { "windows", restrict_windows, true, restrict_no_case_restriction, true },
1682 { "windows,", restrict_windows, true, restrict_no_case_restriction, true },
1683 { "windows,lowercase", restrict_windows, true, restrict_lowercase, true },
1684 { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true },
1687 for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
1692 res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL);
1695 fprintf (stderr, "test_cmd_spec_restrict_file_names: TEST %d\n", i); fflush (stderr);
1696 fprintf (stderr, "opt.restrict_files_os: %d\n", opt.restrict_files_os); fflush (stderr);
1697 fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr);
1698 fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr);
1700 mu_assert ("test_cmd_spec_restrict_file_names: wrong result",
1701 res == test_array[i].result
1702 && opt.restrict_files_os == test_array[i].expected_restrict_files_os
1703 && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl
1704 && opt.restrict_files_case == test_array[i].expected_restrict_files_case);
1710 #endif /* TESTING */