1 /* Reading/parsing the initialization file.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 Additional permission under GNU GPL version 3 section 7
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
42 /* not all systems provide PATH_MAX in limits.h */
44 # include <sys/param.h>
46 # define PATH_MAX MAXPATHLEN
61 #include "recur.h" /* for INFINITE_RECURSION */
62 #include "convert.h" /* for convert_cleanup */
63 #include "res.h" /* for res_cleanup */
64 #include "http.h" /* for http_cleanup */
65 #include "retr.h" /* for output_stream */
73 #define CMD_DECLARE(func) static bool func (const char *, const char *, void *)
75 CMD_DECLARE (cmd_boolean);
76 CMD_DECLARE (cmd_bytes);
77 CMD_DECLARE (cmd_bytes_sum);
79 CMD_DECLARE (cmd_cert_type);
81 CMD_DECLARE (cmd_directory_vector);
82 CMD_DECLARE (cmd_number);
83 CMD_DECLARE (cmd_number_inf);
84 CMD_DECLARE (cmd_string);
85 CMD_DECLARE (cmd_file);
86 CMD_DECLARE (cmd_directory);
87 CMD_DECLARE (cmd_time);
88 CMD_DECLARE (cmd_vector);
90 CMD_DECLARE (cmd_spec_dirstruct);
91 CMD_DECLARE (cmd_spec_header);
92 CMD_DECLARE (cmd_spec_htmlify);
93 CMD_DECLARE (cmd_spec_mirror);
94 CMD_DECLARE (cmd_spec_prefer_family);
95 CMD_DECLARE (cmd_spec_progress);
96 CMD_DECLARE (cmd_spec_recursive);
97 CMD_DECLARE (cmd_spec_restrict_file_names);
99 CMD_DECLARE (cmd_spec_secure_protocol);
101 CMD_DECLARE (cmd_spec_timeout);
102 CMD_DECLARE (cmd_spec_useragent);
103 CMD_DECLARE (cmd_spec_verbose);
105 /* List of recognized commands, each consisting of name, place and
106 function. When adding a new command, simply add it to the list,
107 but be sure to keep the list sorted alphabetically, as
108 command_by_name's binary search depends on it. Also, be sure to
109 add any entries that allocate memory (e.g. cmd_string and
110 cmd_vector) to the cleanup() function below. */
112 static const struct {
115 bool (*action) (const char *, const char *, void *);
117 /* KEEP THIS LIST ALPHABETICALLY SORTED */
118 { "accept", &opt.accepts, cmd_vector },
119 { "addhostdir", &opt.add_hostdir, cmd_boolean },
120 { "adjustextension", &opt.adjust_extension, cmd_boolean },
121 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
122 { "askpassword", &opt.ask_passwd, cmd_boolean },
123 { "authnochallenge", &opt.auth_without_challenge,
125 { "background", &opt.background, cmd_boolean },
126 { "backupconverted", &opt.backup_converted, cmd_boolean },
127 { "backups", &opt.backups, cmd_number },
128 { "base", &opt.base_href, cmd_string },
129 { "bindaddress", &opt.bind_address, cmd_string },
131 { "cacertificate", &opt.ca_cert, cmd_file },
133 { "cache", &opt.allow_cache, cmd_boolean },
135 { "cadirectory", &opt.ca_directory, cmd_directory },
136 { "certificate", &opt.cert_file, cmd_file },
137 { "certificatetype", &opt.cert_type, cmd_cert_type },
138 { "checkcertificate", &opt.check_cert, cmd_boolean },
140 { "chooseconfig", &opt.choose_config, cmd_file },
141 { "connecttimeout", &opt.connect_timeout, cmd_time },
142 { "contentdisposition", &opt.content_disposition, cmd_boolean },
143 { "continue", &opt.always_rest, cmd_boolean },
144 { "convertlinks", &opt.convert_links, cmd_boolean },
145 { "cookies", &opt.cookies, cmd_boolean },
146 { "cutdirs", &opt.cut_dirs, cmd_number },
148 { "debug", &opt.debug, cmd_boolean },
150 { "defaultpage", &opt.default_page, cmd_string},
151 { "deleteafter", &opt.delete_after, cmd_boolean },
152 { "dirprefix", &opt.dir_prefix, cmd_directory },
153 { "dirstruct", NULL, cmd_spec_dirstruct },
154 { "dnscache", &opt.dns_cache, cmd_boolean },
155 { "dnstimeout", &opt.dns_timeout, cmd_time },
156 { "domains", &opt.domains, cmd_vector },
157 { "dotbytes", &opt.dot_bytes, cmd_bytes },
158 { "dotsinline", &opt.dots_in_line, cmd_number },
159 { "dotspacing", &opt.dot_spacing, cmd_number },
160 { "dotstyle", &opt.dot_style, cmd_string }, /* deprecated */
162 { "egdfile", &opt.egd_file, cmd_file },
164 { "excludedirectories", &opt.excludes, cmd_directory_vector },
165 { "excludedomains", &opt.exclude_domains, cmd_vector },
166 { "followftp", &opt.follow_ftp, cmd_boolean },
167 { "followtags", &opt.follow_tags, cmd_vector },
168 { "forcehtml", &opt.force_html, cmd_boolean },
169 { "ftppasswd", &opt.ftp_passwd, cmd_string }, /* deprecated */
170 { "ftppassword", &opt.ftp_passwd, cmd_string },
171 { "ftpproxy", &opt.ftp_proxy, cmd_string },
173 { "ftpstmlf", &opt.ftp_stmlf, cmd_boolean },
174 #endif /* def __VMS */
175 { "ftpuser", &opt.ftp_user, cmd_string },
176 { "glob", &opt.ftp_glob, cmd_boolean },
177 { "header", NULL, cmd_spec_header },
178 { "htmlextension", &opt.adjust_extension, cmd_boolean }, /* deprecated */
179 { "htmlify", NULL, cmd_spec_htmlify },
180 { "httpkeepalive", &opt.http_keep_alive, cmd_boolean },
181 { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
182 { "httppassword", &opt.http_passwd, cmd_string },
183 { "httpproxy", &opt.http_proxy, cmd_string },
184 { "httpsproxy", &opt.https_proxy, cmd_string },
185 { "httpuser", &opt.http_user, cmd_string },
186 { "ignorecase", &opt.ignore_case, cmd_boolean },
187 { "ignorelength", &opt.ignore_length, cmd_boolean },
188 { "ignoretags", &opt.ignore_tags, cmd_vector },
189 { "includedirectories", &opt.includes, cmd_directory_vector },
191 { "inet4only", &opt.ipv4_only, cmd_boolean },
192 { "inet6only", &opt.ipv6_only, cmd_boolean },
194 { "input", &opt.input_filename, cmd_file },
195 { "iri", &opt.enable_iri, cmd_boolean },
196 { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
197 { "limitrate", &opt.limit_rate, cmd_bytes },
198 { "loadcookies", &opt.cookies_input, cmd_file },
199 { "localencoding", &opt.locale, cmd_string },
200 { "logfile", &opt.lfilename, cmd_file },
201 { "login", &opt.ftp_user, cmd_string },/* deprecated*/
202 { "maxredirect", &opt.max_redirect, cmd_number },
203 { "mirror", NULL, cmd_spec_mirror },
204 { "netrc", &opt.netrc, cmd_boolean },
205 { "noclobber", &opt.noclobber, cmd_boolean },
206 { "noparent", &opt.no_parent, cmd_boolean },
207 { "noproxy", &opt.no_proxy, cmd_vector },
208 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
209 { "outputdocument", &opt.output_document, cmd_file },
210 { "pagerequisites", &opt.page_requisites, cmd_boolean },
211 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
212 { "passwd", &opt.ftp_passwd, cmd_string },/* deprecated*/
213 { "password", &opt.passwd, cmd_string },
214 { "postdata", &opt.post_data, cmd_string },
215 { "postfile", &opt.post_file_name, cmd_file },
216 { "preferfamily", NULL, cmd_spec_prefer_family },
217 { "preservepermissions", &opt.preserve_perm, cmd_boolean },/* deprecated */
219 { "privatekey", &opt.private_key, cmd_file },
220 { "privatekeytype", &opt.private_key_type, cmd_cert_type },
222 { "progress", &opt.progress_type, cmd_spec_progress },
223 { "protocoldirectories", &opt.protocol_directories, cmd_boolean },
224 { "proxypasswd", &opt.proxy_passwd, cmd_string }, /* deprecated */
225 { "proxypassword", &opt.proxy_passwd, cmd_string },
226 { "proxyuser", &opt.proxy_user, cmd_string },
227 { "quiet", &opt.quiet, cmd_boolean },
228 { "quota", &opt.quota, cmd_bytes_sum },
230 { "randomfile", &opt.random_file, cmd_file },
232 { "randomwait", &opt.random_wait, cmd_boolean },
233 { "readtimeout", &opt.read_timeout, cmd_time },
234 { "reclevel", &opt.reclevel, cmd_number_inf },
235 { "recursive", NULL, cmd_spec_recursive },
236 { "referer", &opt.referer, cmd_string },
237 { "reject", &opt.rejects, cmd_vector },
238 { "relativeonly", &opt.relative_only, cmd_boolean },
239 { "remoteencoding", &opt.encoding_remote, cmd_string },
240 { "removelisting", &opt.remove_listing, cmd_boolean },
241 { "restrictfilenames", NULL, cmd_spec_restrict_file_names },
242 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
243 { "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
244 { "robots", &opt.use_robots, cmd_boolean },
245 { "savecookies", &opt.cookies_output, cmd_file },
246 { "saveheaders", &opt.save_headers, cmd_boolean },
248 { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol },
250 { "serverresponse", &opt.server_response, cmd_boolean },
251 { "spanhosts", &opt.spanhost, cmd_boolean },
252 { "spider", &opt.spider, cmd_boolean },
253 { "strictcomments", &opt.strict_comments, cmd_boolean },
254 { "timeout", NULL, cmd_spec_timeout },
255 { "timestamping", &opt.timestamping, cmd_boolean },
256 { "tries", &opt.ntry, cmd_number_inf },
257 { "trustservernames", &opt.trustservernames, cmd_boolean },
258 { "unlink", &opt.unlink, cmd_boolean },
259 { "useproxy", &opt.use_proxy, cmd_boolean },
260 { "user", &opt.user, cmd_string },
261 { "useragent", NULL, cmd_spec_useragent },
262 { "useservertimestamps", &opt.useservertimestamps, cmd_boolean },
263 { "verbose", NULL, cmd_spec_verbose },
264 { "wait", &opt.wait, cmd_time },
265 { "waitretry", &opt.waitretry, cmd_time },
267 { "wdebug", &opt.wdebug, cmd_boolean },
271 /* Look up CMDNAME in the commands[] and return its position in the
272 array. If CMDNAME is not found, return -1. */
275 command_by_name (const char *cmdname)
277 /* Use binary search for speed. Wget has ~100 commands, which
278 guarantees a worst case performance of 7 string comparisons. */
279 int lo = 0, hi = countof (commands) - 1;
283 int mid = (lo + hi) >> 1;
284 int cmp = strcasecmp (cmdname, commands[mid].name);
295 /* Reset the variables to default values. */
301 /* Most of the default values are 0 (and 0.0, NULL, and false).
302 Just reset everything, and fill in the non-zero values. Note
303 that initializing pointers to NULL this way is technically
304 illegal, but porting Wget to a machine where NULL is not all-zero
305 bit pattern will be the least of the implementors' worries. */
312 opt.add_hostdir = true;
316 opt.http_keep_alive = true;
317 opt.use_proxy = true;
318 tmp = getenv ("no_proxy");
320 opt.no_proxy = sepstring (tmp);
321 opt.prefer_family = prefer_none;
322 opt.allow_cache = true;
324 opt.read_timeout = 900;
325 opt.use_robots = true;
327 opt.remove_listing = true;
329 opt.dot_bytes = 1024;
330 opt.dot_spacing = 10;
331 opt.dots_in_line = 50;
333 opt.dns_cache = true;
337 opt.check_cert = true;
340 /* The default for file name restriction defaults to the OS type. */
341 #if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__)
342 opt.restrict_files_os = restrict_windows;
344 opt.restrict_files_os = restrict_unix;
346 opt.restrict_files_ctrl = true;
347 opt.restrict_files_nonascii = false;
348 opt.restrict_files_case = restrict_no_case_restriction;
350 opt.max_redirect = 20;
355 opt.enable_iri = true;
357 opt.enable_iri = false;
360 opt.encoding_remote = NULL;
362 opt.useservertimestamps = true;
365 /* Return the user's home directory (strdup-ed), or NULL if none is
370 static char buf[PATH_MAX];
375 home = getenv ("HOME");
379 /* Under MSDOS, if $HOME isn't defined, use the directory where
380 `wget.exe' resides. */
381 const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */
384 strcpy (buf, _w32_get_argv0 ());
385 p = strrchr (buf, '/'); /* djgpp */
387 p = strrchr (buf, '\\'); /* others */
391 #elif !defined(WINDOWS)
392 /* If HOME is not defined, try getting it from the password
394 struct passwd *pwd = getpwuid (getuid ());
395 if (!pwd || !pwd->pw_dir)
397 strcpy (buf, pwd->pw_dir);
400 /* Under Windows, if $HOME isn't defined, use the directory where
401 `wget.exe' resides. */
407 return home ? xstrdup (home) : NULL;
410 /* Check the 'WGETRC' environment variable and return the file name
411 if 'WGETRC' is set and is a valid file.
412 If the `WGETRC' variable exists but the file does not exist, the
413 function will exit(). */
415 wgetrc_env_file_name (void)
417 char *env = getenv ("WGETRC");
420 if (!file_exists_p (env))
422 fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"),
426 return xstrdup (env);
431 /* Check for the existance of '$HOME/.wgetrc' and return it's path
432 if it exists and is set. */
434 wgetrc_user_file_name (void)
438 /* If that failed, try $HOME/.wgetrc (or equivalent). */
441 file = "SYS$LOGIN:.wgetrc";
442 #else /* def __VMS */
445 file = aprintf ("%s/.wgetrc", home);
447 #endif /* def __VMS [else] */
451 if (!file_exists_p (file))
459 /* Return the path to the user's .wgetrc. This is either the value of
460 `WGETRC' environment variable, or `$HOME/.wgetrc'.
462 Additionally, for windows, look in the directory where wget.exe
465 wgetrc_file_name (void)
467 char *file = wgetrc_env_file_name ();
471 file = wgetrc_user_file_name ();
474 /* Under Windows, if we still haven't found .wgetrc, look for the file
475 `wget.ini' in the directory where `wget.exe' resides; we do this for
476 backward compatibility with previous versions of Wget.
477 SYSTEM_WGETRC should not be defined under WINDOWS. */
480 char *home = home_dir ();
486 file = aprintf ("%s/wget.ini", home);
487 if (!file_exists_p (file))
500 /* Return values of parse_line. */
508 static enum parse_line parse_line (const char *, char **, char **, int *);
509 static bool setval_internal (int, const char *, const char *);
510 static bool setval_internal_tilde (int, const char *, const char *);
512 /* Initialize variables from a wgetrc file. Returns zero (failure) if
513 there were errors in the file. */
516 run_wgetrc (const char *file)
523 fp = fopen (file, "r");
526 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
527 file, strerror (errno));
528 return true; /* not a fatal error */
531 while ((line = read_whole_line (fp)) != NULL)
533 char *com = NULL, *val = NULL;
536 /* Parse the line. */
537 switch (parse_line (line, &com, &val, &comind))
540 /* If everything is OK, set the value. */
541 if (!setval_internal_tilde (comind, com, val))
543 fprintf (stderr, _("%s: Error in %s at line %d.\n"),
544 exec_name, file, ln);
548 case line_syntax_error:
549 fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"),
550 exec_name, file, ln);
553 case line_unknown_command:
554 fprintf (stderr, _("%s: Unknown command %s in %s at line %d.\n"),
555 exec_name, quote (com), file, ln);
573 /* Initialize the defaults and run the system wgetrc and user's own
578 char *file, *env_sysrc;
581 /* Run a non-standard system rc file when the according environment
582 variable has been set. For internal testing purposes only! */
583 env_sysrc = getenv ("SYSTEM_WGETRC");
584 if (env_sysrc && file_exists_p (env_sysrc))
585 ok &= run_wgetrc (env_sysrc);
586 /* Otherwise, if SYSTEM_WGETRC is defined, use it. */
588 else if (file_exists_p (SYSTEM_WGETRC))
589 ok &= run_wgetrc (SYSTEM_WGETRC);
591 /* If there are any problems parsing the system wgetrc file, tell
595 fprintf (stderr, _("\
596 Parsing system wgetrc file failed, please check '%s'. \
597 Or specify a different file using --config\n"), SYSTEM_WGETRC);
600 /* Override it with your own, if one exists. */
601 file = wgetrc_file_name ();
604 /* #### We should canonicalize `file' and SYSTEM_WGETRC with
605 something like realpath() before comparing them with `strcmp' */
607 if (!strcmp (file, SYSTEM_WGETRC))
609 fprintf (stderr, _("\
610 %s: Warning: Both system and user wgetrc point to %s.\n"),
611 exec_name, quote (file));
615 ok &= run_wgetrc (file);
617 /* If there were errors processing either `.wgetrc', abort. */
625 /* Remove dashes and underscores from S, modifying S in the
631 char *t = s; /* t - tortoise */
632 char *h = s; /* h - hare */
634 if (*h == '_' || *h == '-')
641 /* Parse the line pointed by line, with the syntax:
642 <sp>* command <sp>* = <sp>* value <sp>*
643 Uses malloc to allocate space for command and value.
645 Returns one of line_ok, line_empty, line_syntax_error, or
646 line_unknown_command.
648 In case of line_ok, *COM and *VAL point to freshly allocated
649 strings, and *COMIND points to com's index. In case of error or
650 empty line, their values are unmodified. */
652 static enum parse_line
653 parse_line (const char *line, char **com, char **val, int *comind)
656 const char *end = line + strlen (line);
657 const char *cmdstart, *cmdend;
658 const char *valstart, *valend;
663 /* Skip leading and trailing whitespace. */
664 while (*line && c_isspace (*line))
666 while (end > line && c_isspace (end[-1]))
669 /* Skip empty lines and comments. */
670 if (!*line || *line == '#')
676 while (p < end && (c_isalnum (*p) || *p == '_' || *p == '-'))
680 /* Skip '=', as well as any space before or after it. */
681 while (p < end && c_isspace (*p))
683 if (p == end || *p != '=')
684 return line_syntax_error;
686 while (p < end && c_isspace (*p))
692 /* The syntax is valid (even though the command might not be). Fill
693 in the command name and value. */
694 *com = strdupdelim (cmdstart, cmdend);
695 *val = strdupdelim (valstart, valend);
697 /* The line now known to be syntactically correct. Check whether
698 the command is valid. */
699 BOUNDED_TO_ALLOCA (cmdstart, cmdend, cmdcopy);
701 ind = command_by_name (cmdcopy);
703 return line_unknown_command;
705 /* Report success to the caller. */
710 #if defined(WINDOWS) || defined(MSDOS)
711 # define ISSEP(c) ((c) == '/' || (c) == '\\')
713 # define ISSEP(c) ((c) == '/')
716 /* Run commands[comind].action. */
719 setval_internal (int comind, const char *com, const char *val)
721 assert (0 <= comind && ((size_t) comind) < countof (commands));
722 DEBUGP (("Setting %s (%s) to %s\n", com, commands[comind].name, val));
723 return commands[comind].action (com, val, commands[comind].place);
727 setval_internal_tilde (int comind, const char *com, const char *val)
733 ret = setval_internal (comind, com, val);
735 /* We make tilde expansion for cmd_file and cmd_directory */
736 if (((commands[comind].action == cmd_file) ||
737 (commands[comind].action == cmd_directory))
738 && ret && (*val == '~' && ISSEP (val[1])))
740 pstring = commands[comind].place;
744 homelen = strlen (home);
745 while (homelen && ISSEP (home[homelen - 1]))
746 home[--homelen] = '\0';
748 /* Skip the leading "~/". */
749 for (++val; ISSEP (*val); val++)
751 *pstring = concat_strings (home, "/", val, (char *)0);
757 /* Run command COM with value VAL. If running the command produces an
758 error, report the error and exit.
760 This is intended to be called from main() to modify Wget's behavior
761 through command-line switches. Since COM is hard-coded in main(),
762 it is not canonicalized, and this aborts when COM is not found.
764 If COMIND's are exported to init.h, this function will be changed
765 to accept COMIND directly. */
768 setoptval (const char *com, const char *val, const char *optname)
770 /* Prepend "--" to OPTNAME. */
771 char *dd_optname = (char *) alloca (2 + strlen (optname) + 1);
774 strcpy (dd_optname + 2, optname);
776 assert (val != NULL);
777 if (!setval_internal (command_by_name (com), dd_optname, val))
781 /* Parse OPT into command and value and run it. For example,
782 run_command("foo=bar") is equivalent to setoptval("foo", "bar").
783 This is used by the `--execute' flag in main.c. */
786 run_command (const char *opt)
790 switch (parse_line (opt, &com, &val, &comind))
793 if (!setval_internal (comind, com, val))
799 fprintf (stderr, _("%s: Invalid --execute command %s\n"),
800 exec_name, quote (opt));
805 /* Generic helper functions, for use with `commands'. */
807 /* Forward declarations: */
812 static bool decode_string (const char *, const struct decode_item *, int, int *);
813 static bool simple_atoi (const char *, const char *, int *);
814 static bool simple_atof (const char *, const char *, double *);
816 #define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0')
818 #define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \
819 && c_tolower((p)[1]) == (c1) \
822 #define CMP3(p, c0, c1, c2) (c_tolower((p)[0]) == (c0) \
823 && c_tolower((p)[1]) == (c1) \
824 && c_tolower((p)[2]) == (c2) \
828 /* Store the boolean value from VAL to PLACE. COM is ignored,
829 except for error messages. */
831 cmd_boolean (const char *com, const char *val, void *place)
835 if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1'))
836 /* "on", "yes" and "1" mean true. */
838 else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0'))
839 /* "off", "no" and "0" mean false. */
844 _("%s: %s: Invalid boolean %s; use `on' or `off'.\n"),
845 exec_name, com, quote (val));
849 *(bool *) place = value;
853 /* Set the non-negative integer value from VAL to PLACE. With
854 incorrect specification, the number remains unchanged. */
856 cmd_number (const char *com, const char *val, void *place)
858 if (!simple_atoi (val, val + strlen (val), place)
859 || *(int *) place < 0)
861 fprintf (stderr, _("%s: %s: Invalid number %s.\n"),
862 exec_name, com, quote (val));
868 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
870 cmd_number_inf (const char *com, const char *val, void *place)
872 if (!strcasecmp (val, "inf"))
877 return cmd_number (com, val, place);
880 /* Copy (strdup) the string at COM to a new location and place a
881 pointer to *PLACE. */
883 cmd_string (const char *com, const char *val, void *place)
885 char **pstring = (char **)place;
887 xfree_null (*pstring);
888 *pstring = xstrdup (val);
893 /* Like the above, but handles tilde-expansion when reading a user's
894 `.wgetrc'. In that case, and if VAL begins with `~', the tilde
895 gets expanded to the user's home directory. */
897 cmd_file (const char *com, const char *val, void *place)
899 char **pstring = (char **)place;
901 xfree_null (*pstring);
903 /* #### If VAL is empty, perhaps should set *PLACE to NULL. */
905 *pstring = xstrdup (val);
907 #if defined(WINDOWS) || defined(MSDOS)
908 /* Convert "\" to "/". */
911 for (s = *pstring; *s; s++)
919 /* Like cmd_file, but strips trailing '/' characters. */
921 cmd_directory (const char *com, const char *val, void *place)
925 /* Call cmd_file() for tilde expansion and separator
926 canonicalization (backslash -> slash under Windows). These
927 things should perhaps be in a separate function. */
928 if (!cmd_file (com, val, place))
933 while (t > s && *--t == '/')
939 /* Split VAL by space to a vector of values, and append those values
940 to vector pointed to by the PLACE argument. If VAL is empty, the
941 PLACE vector is cleared instead. */
944 cmd_vector (const char *com, const char *val, void *place)
946 char ***pvec = (char ***)place;
949 *pvec = merge_vecs (*pvec, sepstring (val));
959 cmd_directory_vector (const char *com, const char *val, void *place)
961 char ***pvec = (char ***)place;
965 /* Strip the trailing slashes from directories. */
968 seps = sepstring (val);
969 for (t = seps; t && *t; t++)
971 int len = strlen (*t);
972 /* Skip degenerate case of root directory. */
975 if ((*t)[len - 1] == '/')
976 (*t)[len - 1] = '\0';
979 *pvec = merge_vecs (*pvec, seps);
989 /* Engine for cmd_bytes and cmd_bytes_sum: converts a string such as
990 "100k" or "2.5G" to a floating point number. */
993 parse_bytes_helper (const char *val, double *result)
996 const char *end = val + strlen (val);
998 /* Check for "inf". */
999 if (0 == strcmp (val, "inf"))
1005 /* Strip trailing whitespace. */
1006 while (val < end && c_isspace (end[-1]))
1011 switch (c_tolower (end[-1]))
1014 --end, mult = 1024.0;
1017 --end, mult = 1048576.0;
1020 --end, mult = 1073741824.0;
1023 --end, mult = 1099511627776.0;
1026 /* Not a recognized suffix: assume it's a digit. (If not,
1027 simple_atof will raise an error.) */
1031 /* Skip leading and trailing whitespace. */
1032 while (val < end && c_isspace (*val))
1034 while (val < end && c_isspace (end[-1]))
1039 if (!simple_atof (val, end, &number) || number < 0)
1042 *result = number * mult;
1046 /* Parse VAL as a number and set its value to PLACE (which should
1049 By default, the value is assumed to be in bytes. If "K", "M", or
1050 "G" are appended, the value is multiplied with 1<<10, 1<<20, or
1051 1<<30, respectively. Floating point values are allowed and are
1052 cast to integer before use. The idea is to be able to use things
1053 like 1.5k instead of "1536".
1055 The string "inf" is returned as 0.
1057 In case of error, false is returned and memory pointed to by PLACE
1058 remains unmodified. */
1061 cmd_bytes (const char *com, const char *val, void *place)
1064 if (!parse_bytes_helper (val, &byte_value))
1066 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1067 exec_name, com, quote (val));
1070 *(wgint *)place = (wgint)byte_value;
1074 /* Like cmd_bytes, but PLACE is interpreted as a pointer to
1075 SIZE_SUM. It works by converting the string to double, therefore
1076 working with values up to 2^53-1 without loss of precision. This
1077 value (8192 TB) is large enough to serve for a while. */
1080 cmd_bytes_sum (const char *com, const char *val, void *place)
1083 if (!parse_bytes_helper (val, &byte_value))
1085 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1086 exec_name, com, quote (val));
1089 *(SUM_SIZE_INT *) place = (SUM_SIZE_INT) byte_value;
1093 /* Store the value of VAL to *OUT. The value is a time period, by
1094 default expressed in seconds, but also accepting suffixes "m", "h",
1095 "d", and "w" for minutes, hours, days, and weeks respectively. */
1098 cmd_time (const char *com, const char *val, void *place)
1100 double number, mult;
1101 const char *end = val + strlen (val);
1103 /* Strip trailing whitespace. */
1104 while (val < end && c_isspace (end[-1]))
1110 fprintf (stderr, _("%s: %s: Invalid time period %s\n"),
1111 exec_name, com, quote (val));
1115 switch (c_tolower (end[-1]))
1118 --end, mult = 1; /* seconds */
1121 --end, mult = 60; /* minutes */
1124 --end, mult = 3600; /* hours */
1127 --end, mult = 86400.0; /* days */
1130 --end, mult = 604800.0; /* weeks */
1133 /* Not a recognized suffix: assume it belongs to the number.
1134 (If not, simple_atof will raise an error.) */
1138 /* Skip leading and trailing whitespace. */
1139 while (val < end && c_isspace (*val))
1141 while (val < end && c_isspace (end[-1]))
1146 if (!simple_atof (val, end, &number))
1149 *(double *)place = number * mult;
1155 cmd_cert_type (const char *com, const char *val, void *place)
1157 static const struct decode_item choices[] = {
1158 { "pem", keyfile_pem },
1159 { "der", keyfile_asn1 },
1160 { "asn1", keyfile_asn1 },
1162 int ok = decode_string (val, choices, countof (choices), place);
1164 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1169 /* Specialized helper functions, used by `commands' to handle some
1170 options specially. */
1172 static bool check_user_specified_header (const char *);
1175 cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored)
1177 if (!cmd_boolean (com, val, &opt.dirstruct))
1179 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
1180 must be affected inversely. */
1182 opt.no_dirstruct = false;
1184 opt.no_dirstruct = true;
1189 cmd_spec_header (const char *com, const char *val, void *place_ignored)
1191 /* Empty value means reset the list of headers. */
1194 free_vec (opt.user_headers);
1195 opt.user_headers = NULL;
1199 if (!check_user_specified_header (val))
1201 fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
1202 exec_name, com, quote (val));
1205 opt.user_headers = vec_append (opt.user_headers, val);
1210 cmd_spec_htmlify (const char *com, const char *val, void *place_ignored)
1212 int flag = cmd_boolean (com, val, &opt.htmlify);
1213 if (flag && !opt.htmlify)
1214 opt.remove_listing = false;
1218 /* Set the "mirror" mode. It means: recursive download, timestamping,
1219 no limit on max. recursion depth, and don't remove listings. */
1222 cmd_spec_mirror (const char *com, const char *val, void *place_ignored)
1226 if (!cmd_boolean (com, val, &mirror))
1230 opt.recursive = true;
1231 if (!opt.no_dirstruct)
1232 opt.dirstruct = true;
1233 opt.timestamping = true;
1234 opt.reclevel = INFINITE_RECURSION;
1235 opt.remove_listing = false;
1240 /* Validate --prefer-family and set the choice. Allowed values are
1241 "IPv4", "IPv6", and "none". */
1244 cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored)
1246 static const struct decode_item choices[] = {
1247 { "IPv4", prefer_ipv4 },
1248 { "IPv6", prefer_ipv6 },
1249 { "none", prefer_none },
1251 int prefer_family = prefer_none;
1252 int ok = decode_string (val, choices, countof (choices), &prefer_family);
1254 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1255 opt.prefer_family = prefer_family;
1259 /* Set progress.type to VAL, but verify that it's a valid progress
1260 implementation before that. */
1263 cmd_spec_progress (const char *com, const char *val, void *place_ignored)
1265 if (!valid_progress_implementation_p (val))
1267 fprintf (stderr, _("%s: %s: Invalid progress type %s.\n"),
1268 exec_name, com, quote (val));
1271 xfree_null (opt.progress_type);
1273 /* Don't call set_progress_implementation here. It will be called
1274 in main() when it becomes clear what the log output is. */
1275 opt.progress_type = xstrdup (val);
1279 /* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is
1280 set to true, also set opt.dirstruct to true, unless opt.no_dirstruct
1284 cmd_spec_recursive (const char *com, const char *val, void *place_ignored)
1286 if (!cmd_boolean (com, val, &opt.recursive))
1290 if (opt.recursive && !opt.no_dirstruct)
1291 opt.dirstruct = true;
1297 cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored)
1299 int restrict_os = opt.restrict_files_os;
1300 int restrict_ctrl = opt.restrict_files_ctrl;
1301 int restrict_case = opt.restrict_files_case;
1302 int restrict_nonascii = opt.restrict_files_nonascii;
1306 #define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal)
1310 end = strchr (val, ',');
1312 end = val + strlen (val);
1314 if (VAL_IS ("unix"))
1315 restrict_os = restrict_unix;
1316 else if (VAL_IS ("windows"))
1317 restrict_os = restrict_windows;
1318 else if (VAL_IS ("lowercase"))
1319 restrict_case = restrict_lowercase;
1320 else if (VAL_IS ("uppercase"))
1321 restrict_case = restrict_uppercase;
1322 else if (VAL_IS ("nocontrol"))
1323 restrict_ctrl = false;
1324 else if (VAL_IS ("ascii"))
1325 restrict_nonascii = true;
1328 fprintf (stderr, _("\
1329 %s: %s: Invalid restriction %s,\n\
1330 use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"),
1331 exec_name, com, quote (val));
1338 while (*val && *end);
1342 opt.restrict_files_os = restrict_os;
1343 opt.restrict_files_ctrl = restrict_ctrl;
1344 opt.restrict_files_case = restrict_case;
1345 opt.restrict_files_nonascii = restrict_nonascii;
1352 cmd_spec_secure_protocol (const char *com, const char *val, void *place)
1354 static const struct decode_item choices[] = {
1355 { "auto", secure_protocol_auto },
1356 { "sslv2", secure_protocol_sslv2 },
1357 { "sslv3", secure_protocol_sslv3 },
1358 { "tlsv1", secure_protocol_tlsv1 },
1360 int ok = decode_string (val, choices, countof (choices), place);
1362 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1367 /* Set all three timeout values. */
1370 cmd_spec_timeout (const char *com, const char *val, void *place_ignored)
1373 if (!cmd_time (com, val, &value))
1375 opt.read_timeout = value;
1376 opt.connect_timeout = value;
1377 opt.dns_timeout = value;
1382 cmd_spec_useragent (const char *com, const char *val, void *place_ignored)
1384 /* Disallow embedded newlines. */
1385 if (strchr (val, '\n'))
1387 fprintf (stderr, _("%s: %s: Invalid value %s.\n"),
1388 exec_name, com, quote (val));
1391 xfree_null (opt.useragent);
1392 opt.useragent = xstrdup (val);
1396 /* The "verbose" option cannot be cmd_boolean because the variable is
1397 not bool -- it's of type int (-1 means uninitialized because of
1398 some random hackery for disallowing -q -v). */
1401 cmd_spec_verbose (const char *com, const char *val, void *place_ignored)
1404 if (cmd_boolean (com, val, &flag))
1412 /* Miscellaneous useful routines. */
1414 /* A very simple atoi clone, more useful than atoi because it works on
1415 delimited strings, and has error reportage. Returns true on success,
1416 false on failure. If successful, stores result to *DEST. */
1419 simple_atoi (const char *beg, const char *end, int *dest)
1422 bool negative = false;
1423 const char *p = beg;
1425 while (p < end && c_isspace (*p))
1427 if (p < end && (*p == '-' || *p == '+'))
1429 negative = (*p == '-');
1435 /* Read negative numbers in a separate loop because the most
1436 negative integer cannot be represented as a positive number. */
1439 for (; p < end && c_isdigit (*p); p++)
1441 int next = (10 * result) + (*p - '0');
1443 return false; /* overflow */
1447 for (; p < end && c_isdigit (*p); p++)
1449 int next = (10 * result) - (*p - '0');
1451 return false; /* underflow */
1462 /* Trivial atof, with error reporting. Handles "<digits>[.<digits>]",
1463 doesn't handle exponential notation. Returns true on success,
1464 false on failure. In case of success, stores its result to
1468 simple_atof (const char *beg, const char *end, double *dest)
1472 bool negative = false;
1473 bool seen_dot = false;
1474 bool seen_digit = false;
1477 const char *p = beg;
1479 while (p < end && c_isspace (*p))
1481 if (p < end && (*p == '-' || *p == '+'))
1483 negative = (*p == '-');
1487 for (; p < end; p++)
1493 result = (10 * result) + (ch - '0');
1495 result += (ch - '0') / (divider *= 10);
1517 /* Verify that the user-specified header in S is valid. It must
1518 contain a colon preceded by non-white-space characters and must not
1519 contain newlines. */
1522 check_user_specified_header (const char *s)
1526 for (p = s; *p && *p != ':' && !c_isspace (*p); p++)
1528 /* The header MUST contain `:' preceded by at least one
1529 non-whitespace character. */
1530 if (*p != ':' || p == s)
1532 /* The header MUST NOT contain newlines. */
1533 if (strchr (s, '\n'))
1538 /* Decode VAL into a number, according to ITEMS. */
1541 decode_string (const char *val, const struct decode_item *items, int itemcount,
1545 for (i = 0; i < itemcount; i++)
1546 if (0 == strcasecmp (val, items[i].name))
1548 *place = items[i].code;
1555 void cleanup_html_url (void);
1558 /* Free the memory allocated by global variables. */
1562 /* Free external resources, close files, etc. */
1565 fclose (output_stream);
1566 /* No need to check for error because Wget flushes its output (and
1567 checks for errors) after any data arrives. */
1569 /* We're exiting anyway so there's no real need to call free()
1570 hundreds of times. Skipping the frees will make Wget exit
1573 However, when detecting leaks, it's crucial to free() everything
1574 because then you can find the real leaks, i.e. the allocated
1575 memory which grows with the size of the program. */
1581 cleanup_html_url ();
1586 extern acc_t *netrc_list;
1587 free_netrc (netrc_list);
1589 xfree_null (opt.choose_config);
1590 xfree_null (opt.lfilename);
1591 xfree_null (opt.dir_prefix);
1592 xfree_null (opt.input_filename);
1593 xfree_null (opt.output_document);
1594 free_vec (opt.accepts);
1595 free_vec (opt.rejects);
1596 free_vec (opt.excludes);
1597 free_vec (opt.includes);
1598 free_vec (opt.domains);
1599 free_vec (opt.follow_tags);
1600 free_vec (opt.ignore_tags);
1601 xfree_null (opt.progress_type);
1602 xfree_null (opt.ftp_user);
1603 xfree_null (opt.ftp_passwd);
1604 xfree_null (opt.ftp_proxy);
1605 xfree_null (opt.https_proxy);
1606 xfree_null (opt.http_proxy);
1607 free_vec (opt.no_proxy);
1608 xfree_null (opt.useragent);
1609 xfree_null (opt.referer);
1610 xfree_null (opt.http_user);
1611 xfree_null (opt.http_passwd);
1612 free_vec (opt.user_headers);
1614 xfree_null (opt.cert_file);
1615 xfree_null (opt.private_key);
1616 xfree_null (opt.ca_directory);
1617 xfree_null (opt.ca_cert);
1618 xfree_null (opt.random_file);
1619 xfree_null (opt.egd_file);
1621 xfree_null (opt.bind_address);
1622 xfree_null (opt.cookies_input);
1623 xfree_null (opt.cookies_output);
1624 xfree_null (opt.user);
1625 xfree_null (opt.passwd);
1626 xfree_null (opt.base_href);
1628 #endif /* DEBUG_MALLOC */
1631 /* Unit testing routines. */
1636 test_commands_sorted()
1638 int prev_idx = 0, next_idx = 1;
1639 int command_count = countof (commands) - 1;
1641 while (next_idx <= command_count)
1643 cmp = strcasecmp (commands[prev_idx].name, commands[next_idx].name);
1646 mu_assert ("FAILED", false);
1659 test_cmd_spec_restrict_file_names()
1664 int expected_restrict_files_os;
1665 int expected_restrict_files_ctrl;
1666 int expected_restrict_files_case;
1669 { "windows", restrict_windows, true, restrict_no_case_restriction, true },
1670 { "windows,", restrict_windows, true, restrict_no_case_restriction, true },
1671 { "windows,lowercase", restrict_windows, true, restrict_lowercase, true },
1672 { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true },
1675 for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
1680 res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL);
1683 fprintf (stderr, "test_cmd_spec_restrict_file_names: TEST %d\n", i); fflush (stderr);
1684 fprintf (stderr, "opt.restrict_files_os: %d\n", opt.restrict_files_os); fflush (stderr);
1685 fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr);
1686 fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr);
1688 mu_assert ("test_cmd_spec_restrict_file_names: wrong result",
1689 res == test_array[i].result
1690 && opt.restrict_files_os == test_array[i].expected_restrict_files_os
1691 && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl
1692 && opt.restrict_files_case == test_array[i].expected_restrict_files_case);
1698 #endif /* TESTING */