1 /* Reading/parsing the initialization file.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 Additional permission under GNU GPL version 3 section 7
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
51 #include "recur.h" /* for INFINITE_RECURSION */
52 #include "convert.h" /* for convert_cleanup */
53 #include "res.h" /* for res_cleanup */
54 #include "http.h" /* for http_cleanup */
55 #include "retr.h" /* for output_stream */
63 #define CMD_DECLARE(func) static bool func (const char *, const char *, void *)
65 CMD_DECLARE (cmd_boolean);
66 CMD_DECLARE (cmd_bytes);
67 CMD_DECLARE (cmd_bytes_sum);
69 CMD_DECLARE (cmd_cert_type);
71 CMD_DECLARE (cmd_directory_vector);
72 CMD_DECLARE (cmd_number);
73 CMD_DECLARE (cmd_number_inf);
74 CMD_DECLARE (cmd_string);
75 CMD_DECLARE (cmd_file);
76 CMD_DECLARE (cmd_directory);
77 CMD_DECLARE (cmd_time);
78 CMD_DECLARE (cmd_vector);
80 CMD_DECLARE (cmd_spec_dirstruct);
81 CMD_DECLARE (cmd_spec_header);
82 CMD_DECLARE (cmd_spec_htmlify);
83 CMD_DECLARE (cmd_spec_mirror);
84 CMD_DECLARE (cmd_spec_prefer_family);
85 CMD_DECLARE (cmd_spec_progress);
86 CMD_DECLARE (cmd_spec_recursive);
87 CMD_DECLARE (cmd_spec_restrict_file_names);
89 CMD_DECLARE (cmd_spec_secure_protocol);
91 CMD_DECLARE (cmd_spec_timeout);
92 CMD_DECLARE (cmd_spec_useragent);
93 CMD_DECLARE (cmd_spec_verbose);
95 /* List of recognized commands, each consisting of name, place and
96 function. When adding a new command, simply add it to the list,
97 but be sure to keep the list sorted alphabetically, as
98 command_by_name's binary search depends on it. Also, be sure to
99 add any entries that allocate memory (e.g. cmd_string and
100 cmd_vector) to the cleanup() function below. */
102 static const struct {
105 bool (*action) (const char *, const char *, void *);
107 /* KEEP THIS LIST ALPHABETICALLY SORTED */
108 { "accept", &opt.accepts, cmd_vector },
109 { "addhostdir", &opt.add_hostdir, cmd_boolean },
110 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
111 { "askpassword", &opt.ask_passwd, cmd_boolean },
112 { "authnochallenge", &opt.auth_without_challenge,
114 { "background", &opt.background, cmd_boolean },
115 { "backupconverted", &opt.backup_converted, cmd_boolean },
116 { "backups", &opt.backups, cmd_number },
117 { "base", &opt.base_href, cmd_string },
118 { "bindaddress", &opt.bind_address, cmd_string },
120 { "cacertificate", &opt.ca_cert, cmd_file },
122 { "cache", &opt.allow_cache, cmd_boolean },
124 { "cadirectory", &opt.ca_directory, cmd_directory },
125 { "certificate", &opt.cert_file, cmd_file },
126 { "certificatetype", &opt.cert_type, cmd_cert_type },
127 { "checkcertificate", &opt.check_cert, cmd_boolean },
129 { "connecttimeout", &opt.connect_timeout, cmd_time },
130 { "contentdisposition", &opt.content_disposition, cmd_boolean },
131 { "continue", &opt.always_rest, cmd_boolean },
132 { "convertlinks", &opt.convert_links, cmd_boolean },
133 { "cookies", &opt.cookies, cmd_boolean },
134 { "cutdirs", &opt.cut_dirs, cmd_number },
136 { "debug", &opt.debug, cmd_boolean },
138 { "defaultpage", &opt.default_page, cmd_string},
139 { "deleteafter", &opt.delete_after, cmd_boolean },
140 { "dirprefix", &opt.dir_prefix, cmd_directory },
141 { "dirstruct", NULL, cmd_spec_dirstruct },
142 { "dnscache", &opt.dns_cache, cmd_boolean },
143 { "dnstimeout", &opt.dns_timeout, cmd_time },
144 { "domains", &opt.domains, cmd_vector },
145 { "dotbytes", &opt.dot_bytes, cmd_bytes },
146 { "dotsinline", &opt.dots_in_line, cmd_number },
147 { "dotspacing", &opt.dot_spacing, cmd_number },
148 { "dotstyle", &opt.dot_style, cmd_string },
150 { "egdfile", &opt.egd_file, cmd_file },
152 { "excludedirectories", &opt.excludes, cmd_directory_vector },
153 { "excludedomains", &opt.exclude_domains, cmd_vector },
154 { "followftp", &opt.follow_ftp, cmd_boolean },
155 { "followtags", &opt.follow_tags, cmd_vector },
156 { "forcehtml", &opt.force_html, cmd_boolean },
157 { "ftppasswd", &opt.ftp_passwd, cmd_string }, /* deprecated */
158 { "ftppassword", &opt.ftp_passwd, cmd_string },
159 { "ftpproxy", &opt.ftp_proxy, cmd_string },
160 { "ftpuser", &opt.ftp_user, cmd_string },
161 { "glob", &opt.ftp_glob, cmd_boolean },
162 { "header", NULL, cmd_spec_header },
163 { "htmlextension", &opt.html_extension, cmd_boolean },
164 { "htmlify", NULL, cmd_spec_htmlify },
165 { "httpkeepalive", &opt.http_keep_alive, cmd_boolean },
166 { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
167 { "httppassword", &opt.http_passwd, cmd_string },
168 { "httpproxy", &opt.http_proxy, cmd_string },
169 { "httpsproxy", &opt.https_proxy, cmd_string },
170 { "httpuser", &opt.http_user, cmd_string },
171 { "ignorecase", &opt.ignore_case, cmd_boolean },
172 { "ignorelength", &opt.ignore_length, cmd_boolean },
173 { "ignoretags", &opt.ignore_tags, cmd_vector },
174 { "includedirectories", &opt.includes, cmd_directory_vector },
176 { "inet4only", &opt.ipv4_only, cmd_boolean },
177 { "inet6only", &opt.ipv6_only, cmd_boolean },
179 { "input", &opt.input_filename, cmd_file },
180 { "iri", &opt.enable_iri, cmd_boolean },
181 { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
182 { "limitrate", &opt.limit_rate, cmd_bytes },
183 { "loadcookies", &opt.cookies_input, cmd_file },
184 { "locale", &opt.locale, cmd_string },
185 { "logfile", &opt.lfilename, cmd_file },
186 { "login", &opt.ftp_user, cmd_string },/* deprecated*/
187 { "maxredirect", &opt.max_redirect, cmd_number },
188 { "mirror", NULL, cmd_spec_mirror },
189 { "netrc", &opt.netrc, cmd_boolean },
190 { "noclobber", &opt.noclobber, cmd_boolean },
191 { "noparent", &opt.no_parent, cmd_boolean },
192 { "noproxy", &opt.no_proxy, cmd_vector },
193 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
194 { "outputdocument", &opt.output_document, cmd_file },
195 { "pagerequisites", &opt.page_requisites, cmd_boolean },
196 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
197 { "passwd", &opt.ftp_passwd, cmd_string },/* deprecated*/
198 { "password", &opt.passwd, cmd_string },
199 { "postdata", &opt.post_data, cmd_string },
200 { "postfile", &opt.post_file_name, cmd_file },
201 { "preferfamily", NULL, cmd_spec_prefer_family },
202 { "preservepermissions", &opt.preserve_perm, cmd_boolean },
204 { "privatekey", &opt.private_key, cmd_file },
205 { "privatekeytype", &opt.private_key_type, cmd_cert_type },
207 { "progress", &opt.progress_type, cmd_spec_progress },
208 { "protocoldirectories", &opt.protocol_directories, cmd_boolean },
209 { "proxypasswd", &opt.proxy_passwd, cmd_string }, /* deprecated */
210 { "proxypassword", &opt.proxy_passwd, cmd_string },
211 { "proxyuser", &opt.proxy_user, cmd_string },
212 { "quiet", &opt.quiet, cmd_boolean },
213 { "quota", &opt.quota, cmd_bytes_sum },
215 { "randomfile", &opt.random_file, cmd_file },
217 { "randomwait", &opt.random_wait, cmd_boolean },
218 { "readtimeout", &opt.read_timeout, cmd_time },
219 { "reclevel", &opt.reclevel, cmd_number_inf },
220 { "recursive", NULL, cmd_spec_recursive },
221 { "referer", &opt.referer, cmd_string },
222 { "reject", &opt.rejects, cmd_vector },
223 { "relativeonly", &opt.relative_only, cmd_boolean },
224 { "remoteencoding", &opt.encoding_remote, cmd_string },
225 { "removelisting", &opt.remove_listing, cmd_boolean },
226 { "restrictfilenames", NULL, cmd_spec_restrict_file_names },
227 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
228 { "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
229 { "robots", &opt.use_robots, cmd_boolean },
230 { "savecookies", &opt.cookies_output, cmd_file },
231 { "saveheaders", &opt.save_headers, cmd_boolean },
233 { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol },
235 { "serverresponse", &opt.server_response, cmd_boolean },
236 { "spanhosts", &opt.spanhost, cmd_boolean },
237 { "spider", &opt.spider, cmd_boolean },
238 { "strictcomments", &opt.strict_comments, cmd_boolean },
239 { "timeout", NULL, cmd_spec_timeout },
240 { "timestamping", &opt.timestamping, cmd_boolean },
241 { "tries", &opt.ntry, cmd_number_inf },
242 { "useproxy", &opt.use_proxy, cmd_boolean },
243 { "user", &opt.user, cmd_string },
244 { "useragent", NULL, cmd_spec_useragent },
245 { "verbose", NULL, cmd_spec_verbose },
246 { "wait", &opt.wait, cmd_time },
247 { "waitretry", &opt.waitretry, cmd_time },
249 { "wdebug", &opt.wdebug, cmd_boolean },
253 /* Look up CMDNAME in the commands[] and return its position in the
254 array. If CMDNAME is not found, return -1. */
257 command_by_name (const char *cmdname)
259 /* Use binary search for speed. Wget has ~100 commands, which
260 guarantees a worst case performance of 7 string comparisons. */
261 int lo = 0, hi = countof (commands) - 1;
265 int mid = (lo + hi) >> 1;
266 int cmp = strcasecmp (cmdname, commands[mid].name);
277 /* Reset the variables to default values. */
283 /* Most of the default values are 0 (and 0.0, NULL, and false).
284 Just reset everything, and fill in the non-zero values. Note
285 that initializing pointers to NULL this way is technically
286 illegal, but porting Wget to a machine where NULL is not all-zero
287 bit pattern will be the least of the implementors' worries. */
294 opt.add_hostdir = true;
298 opt.http_keep_alive = true;
299 opt.use_proxy = true;
300 tmp = getenv ("no_proxy");
302 opt.no_proxy = sepstring (tmp);
303 opt.prefer_family = prefer_none;
304 opt.allow_cache = true;
306 opt.read_timeout = 900;
307 opt.use_robots = true;
309 opt.remove_listing = true;
311 opt.dot_bytes = 1024;
312 opt.dot_spacing = 10;
313 opt.dots_in_line = 50;
315 opt.dns_cache = true;
319 opt.check_cert = true;
322 /* The default for file name restriction defaults to the OS type. */
323 #if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__)
324 opt.restrict_files_os = restrict_windows;
326 opt.restrict_files_os = restrict_unix;
328 opt.restrict_files_ctrl = true;
329 opt.restrict_files_case = restrict_no_case_restriction;
331 opt.max_redirect = 20;
336 opt.enable_iri = true;
338 opt.enable_iri = false;
341 opt.encoding_remote = NULL;
344 /* Return the user's home directory (strdup-ed), or NULL if none is
349 static char buf[PATH_MAX];
354 home = getenv ("HOME");
358 /* Under MSDOS, if $HOME isn't defined, use the directory where
359 `wget.exe' resides. */
360 const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */
363 strcpy (buf, _w32_get_argv0 ());
364 p = strrchr (buf, '/'); /* djgpp */
366 p = strrchr (buf, '\\'); /* others */
370 #elif !defined(WINDOWS)
371 /* If HOME is not defined, try getting it from the password
373 struct passwd *pwd = getpwuid (getuid ());
374 if (!pwd || !pwd->pw_dir)
376 strcpy (buf, pwd->pw_dir);
379 /* Under Windows, if $HOME isn't defined, use the directory where
380 `wget.exe' resides. */
386 return home ? xstrdup (home) : NULL;
389 /* Check the 'WGETRC' environment variable and return the file name
390 if 'WGETRC' is set and is a valid file.
391 If the `WGETRC' variable exists but the file does not exist, the
392 function will exit(). */
394 wgetrc_env_file_name (void)
396 char *env = getenv ("WGETRC");
399 if (!file_exists_p (env))
401 fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"),
405 return xstrdup (env);
410 /* Check for the existance of '$HOME/.wgetrc' and return it's path
411 if it exists and is set. */
413 wgetrc_user_file_name (void)
415 char *home = home_dir ();
418 file = aprintf ("%s/.wgetrc", home);
422 if (!file_exists_p (file))
430 /* Return the path to the user's .wgetrc. This is either the value of
431 `WGETRC' environment variable, or `$HOME/.wgetrc'.
433 Additionally, for windows, look in the directory where wget.exe
436 wgetrc_file_name (void)
439 char *file = wgetrc_env_file_name ();
443 file = wgetrc_user_file_name ();
446 /* Under Windows, if we still haven't found .wgetrc, look for the file
447 `wget.ini' in the directory where `wget.exe' resides; we do this for
448 backward compatibility with previous versions of Wget.
449 SYSTEM_WGETRC should not be defined under WINDOWS. */
451 if (!file || !file_exists_p (file))
457 file = aprintf ("%s/wget.ini", home);
464 if (!file_exists_p (file))
472 /* Return values of parse_line. */
480 static enum parse_line parse_line (const char *, char **, char **, int *);
481 static bool setval_internal (int, const char *, const char *);
482 static bool setval_internal_tilde (int, const char *, const char *);
484 /* Initialize variables from a wgetrc file. Returns zero (failure) if
485 there were errors in the file. */
488 run_wgetrc (const char *file)
495 fp = fopen (file, "rb");
498 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
499 file, strerror (errno));
500 return true; /* not a fatal error */
503 while ((line = read_whole_line (fp)) != NULL)
505 char *com = NULL, *val = NULL;
508 /* Parse the line. */
509 switch (parse_line (line, &com, &val, &comind))
512 /* If everything is OK, set the value. */
513 if (!setval_internal_tilde (comind, com, val))
515 fprintf (stderr, _("%s: Error in %s at line %d.\n"),
516 exec_name, file, ln);
520 case line_syntax_error:
521 fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"),
522 exec_name, file, ln);
525 case line_unknown_command:
526 fprintf (stderr, _("%s: Unknown command %s in %s at line %d.\n"),
527 exec_name, quote (com), file, ln);
545 /* Initialize the defaults and run the system wgetrc and user's own
550 char *file, *env_sysrc;
553 /* Load the hard-coded defaults. */
556 /* Run a non-standard system rc file when the according environment
557 variable has been set. For internal testing purposes only! */
558 env_sysrc = getenv ("SYSTEM_WGETRC");
559 if (env_sysrc && file_exists_p (env_sysrc))
560 ok &= run_wgetrc (env_sysrc);
561 /* Otherwise, if SYSTEM_WGETRC is defined, use it. */
563 else if (file_exists_p (SYSTEM_WGETRC))
564 ok &= run_wgetrc (SYSTEM_WGETRC);
566 /* Override it with your own, if one exists. */
567 file = wgetrc_file_name ();
570 /* #### We should canonicalize `file' and SYSTEM_WGETRC with
571 something like realpath() before comparing them with `strcmp' */
573 if (!strcmp (file, SYSTEM_WGETRC))
575 fprintf (stderr, _("\
576 %s: Warning: Both system and user wgetrc point to %s.\n"),
577 exec_name, quote (file));
581 ok &= run_wgetrc (file);
583 /* If there were errors processing either `.wgetrc', abort. */
591 /* Remove dashes and underscores from S, modifying S in the
597 char *t = s; /* t - tortoise */
598 char *h = s; /* h - hare */
600 if (*h == '_' || *h == '-')
607 /* Parse the line pointed by line, with the syntax:
608 <sp>* command <sp>* = <sp>* value <sp>*
609 Uses malloc to allocate space for command and value.
611 Returns one of line_ok, line_empty, line_syntax_error, or
612 line_unknown_command.
614 In case of line_ok, *COM and *VAL point to freshly allocated
615 strings, and *COMIND points to com's index. In case of error or
616 empty line, their values are unmodified. */
618 static enum parse_line
619 parse_line (const char *line, char **com, char **val, int *comind)
622 const char *end = line + strlen (line);
623 const char *cmdstart, *cmdend;
624 const char *valstart, *valend;
629 /* Skip leading and trailing whitespace. */
630 while (*line && c_isspace (*line))
632 while (end > line && c_isspace (end[-1]))
635 /* Skip empty lines and comments. */
636 if (!*line || *line == '#')
642 while (p < end && (c_isalnum (*p) || *p == '_' || *p == '-'))
646 /* Skip '=', as well as any space before or after it. */
647 while (p < end && c_isspace (*p))
649 if (p == end || *p != '=')
650 return line_syntax_error;
652 while (p < end && c_isspace (*p))
658 /* The syntax is valid (even though the command might not be). Fill
659 in the command name and value. */
660 *com = strdupdelim (cmdstart, cmdend);
661 *val = strdupdelim (valstart, valend);
663 /* The line now known to be syntactically correct. Check whether
664 the command is valid. */
665 BOUNDED_TO_ALLOCA (cmdstart, cmdend, cmdcopy);
667 ind = command_by_name (cmdcopy);
669 return line_unknown_command;
671 /* Report success to the caller. */
676 #if defined(WINDOWS) || defined(MSDOS)
677 # define ISSEP(c) ((c) == '/' || (c) == '\\')
679 # define ISSEP(c) ((c) == '/')
682 /* Run commands[comind].action. */
685 setval_internal (int comind, const char *com, const char *val)
687 assert (0 <= comind && ((size_t) comind) < countof (commands));
688 DEBUGP (("Setting %s (%s) to %s\n", com, commands[comind].name, val));
689 return commands[comind].action (com, val, commands[comind].place);
693 setval_internal_tilde (int comind, const char *com, const char *val)
699 ret = setval_internal (comind, com, val);
701 /* We make tilde expansion for cmd_file and cmd_directory */
702 if (((commands[comind].action == cmd_file) ||
703 (commands[comind].action == cmd_directory))
704 && ret && (*val == '~' && ISSEP (val[1])))
706 pstring = commands[comind].place;
710 homelen = strlen (home);
711 while (homelen && ISSEP (home[homelen - 1]))
712 home[--homelen] = '\0';
714 /* Skip the leading "~/". */
715 for (++val; ISSEP (*val); val++)
717 *pstring = concat_strings (home, "/", val, (char *)0);
723 /* Run command COM with value VAL. If running the command produces an
724 error, report the error and exit.
726 This is intended to be called from main() to modify Wget's behavior
727 through command-line switches. Since COM is hard-coded in main(),
728 it is not canonicalized, and this aborts when COM is not found.
730 If COMIND's are exported to init.h, this function will be changed
731 to accept COMIND directly. */
734 setoptval (const char *com, const char *val, const char *optname)
736 /* Prepend "--" to OPTNAME. */
737 char *dd_optname = (char *) alloca (2 + strlen (optname) + 1);
740 strcpy (dd_optname + 2, optname);
742 assert (val != NULL);
743 if (!setval_internal (command_by_name (com), dd_optname, val))
747 /* Parse OPT into command and value and run it. For example,
748 run_command("foo=bar") is equivalent to setoptval("foo", "bar").
749 This is used by the `--execute' flag in main.c. */
752 run_command (const char *opt)
756 switch (parse_line (opt, &com, &val, &comind))
759 if (!setval_internal (comind, com, val))
765 fprintf (stderr, _("%s: Invalid --execute command %s\n"),
766 exec_name, quote (opt));
771 /* Generic helper functions, for use with `commands'. */
773 /* Forward declarations: */
778 static bool decode_string (const char *, const struct decode_item *, int, int *);
779 static bool simple_atoi (const char *, const char *, int *);
780 static bool simple_atof (const char *, const char *, double *);
782 #define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0')
784 #define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \
785 && c_tolower((p)[1]) == (c1) \
788 #define CMP3(p, c0, c1, c2) (c_tolower((p)[0]) == (c0) \
789 && c_tolower((p)[1]) == (c1) \
790 && c_tolower((p)[2]) == (c2) \
794 /* Store the boolean value from VAL to PLACE. COM is ignored,
795 except for error messages. */
797 cmd_boolean (const char *com, const char *val, void *place)
801 if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1'))
802 /* "on", "yes" and "1" mean true. */
804 else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0'))
805 /* "off", "no" and "0" mean false. */
810 _("%s: %s: Invalid boolean %s; use `on' or `off'.\n"),
811 exec_name, com, quote (val));
815 *(bool *) place = value;
819 /* Set the non-negative integer value from VAL to PLACE. With
820 incorrect specification, the number remains unchanged. */
822 cmd_number (const char *com, const char *val, void *place)
824 if (!simple_atoi (val, val + strlen (val), place)
825 || *(int *) place < 0)
827 fprintf (stderr, _("%s: %s: Invalid number %s.\n"),
828 exec_name, com, quote (val));
834 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
836 cmd_number_inf (const char *com, const char *val, void *place)
838 if (!strcasecmp (val, "inf"))
843 return cmd_number (com, val, place);
846 /* Copy (strdup) the string at COM to a new location and place a
847 pointer to *PLACE. */
849 cmd_string (const char *com, const char *val, void *place)
851 char **pstring = (char **)place;
853 xfree_null (*pstring);
854 *pstring = xstrdup (val);
859 /* Like the above, but handles tilde-expansion when reading a user's
860 `.wgetrc'. In that case, and if VAL begins with `~', the tilde
861 gets expanded to the user's home directory. */
863 cmd_file (const char *com, const char *val, void *place)
865 char **pstring = (char **)place;
867 xfree_null (*pstring);
869 /* #### If VAL is empty, perhaps should set *PLACE to NULL. */
871 *pstring = xstrdup (val);
873 #if defined(WINDOWS) || defined(MSDOS)
874 /* Convert "\" to "/". */
877 for (s = *pstring; *s; s++)
885 /* Like cmd_file, but strips trailing '/' characters. */
887 cmd_directory (const char *com, const char *val, void *place)
891 /* Call cmd_file() for tilde expansion and separator
892 canonicalization (backslash -> slash under Windows). These
893 things should perhaps be in a separate function. */
894 if (!cmd_file (com, val, place))
899 while (t > s && *--t == '/')
905 /* Split VAL by space to a vector of values, and append those values
906 to vector pointed to by the PLACE argument. If VAL is empty, the
907 PLACE vector is cleared instead. */
910 cmd_vector (const char *com, const char *val, void *place)
912 char ***pvec = (char ***)place;
915 *pvec = merge_vecs (*pvec, sepstring (val));
925 cmd_directory_vector (const char *com, const char *val, void *place)
927 char ***pvec = (char ***)place;
931 /* Strip the trailing slashes from directories. */
934 seps = sepstring (val);
935 for (t = seps; t && *t; t++)
937 int len = strlen (*t);
938 /* Skip degenerate case of root directory. */
941 if ((*t)[len - 1] == '/')
942 (*t)[len - 1] = '\0';
945 *pvec = merge_vecs (*pvec, seps);
955 /* Engine for cmd_bytes and cmd_bytes_sum: converts a string such as
956 "100k" or "2.5G" to a floating point number. */
959 parse_bytes_helper (const char *val, double *result)
962 const char *end = val + strlen (val);
964 /* Check for "inf". */
965 if (0 == strcmp (val, "inf"))
971 /* Strip trailing whitespace. */
972 while (val < end && c_isspace (end[-1]))
977 switch (c_tolower (end[-1]))
980 --end, mult = 1024.0;
983 --end, mult = 1048576.0;
986 --end, mult = 1073741824.0;
989 --end, mult = 1099511627776.0;
992 /* Not a recognized suffix: assume it's a digit. (If not,
993 simple_atof will raise an error.) */
997 /* Skip leading and trailing whitespace. */
998 while (val < end && c_isspace (*val))
1000 while (val < end && c_isspace (end[-1]))
1005 if (!simple_atof (val, end, &number) || number < 0)
1008 *result = number * mult;
1012 /* Parse VAL as a number and set its value to PLACE (which should
1015 By default, the value is assumed to be in bytes. If "K", "M", or
1016 "G" are appended, the value is multiplied with 1<<10, 1<<20, or
1017 1<<30, respectively. Floating point values are allowed and are
1018 cast to integer before use. The idea is to be able to use things
1019 like 1.5k instead of "1536".
1021 The string "inf" is returned as 0.
1023 In case of error, false is returned and memory pointed to by PLACE
1024 remains unmodified. */
1027 cmd_bytes (const char *com, const char *val, void *place)
1030 if (!parse_bytes_helper (val, &byte_value))
1032 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1033 exec_name, com, quote (val));
1036 *(wgint *)place = (wgint)byte_value;
1040 /* Like cmd_bytes, but PLACE is interpreted as a pointer to
1041 SIZE_SUM. It works by converting the string to double, therefore
1042 working with values up to 2^53-1 without loss of precision. This
1043 value (8192 TB) is large enough to serve for a while. */
1046 cmd_bytes_sum (const char *com, const char *val, void *place)
1049 if (!parse_bytes_helper (val, &byte_value))
1051 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1052 exec_name, com, quote (val));
1055 *(SUM_SIZE_INT *) place = (SUM_SIZE_INT) byte_value;
1059 /* Store the value of VAL to *OUT. The value is a time period, by
1060 default expressed in seconds, but also accepting suffixes "m", "h",
1061 "d", and "w" for minutes, hours, days, and weeks respectively. */
1064 cmd_time (const char *com, const char *val, void *place)
1066 double number, mult;
1067 const char *end = val + strlen (val);
1069 /* Strip trailing whitespace. */
1070 while (val < end && c_isspace (end[-1]))
1076 fprintf (stderr, _("%s: %s: Invalid time period %s\n"),
1077 exec_name, com, quote (val));
1081 switch (c_tolower (end[-1]))
1084 --end, mult = 1; /* seconds */
1087 --end, mult = 60; /* minutes */
1090 --end, mult = 3600; /* hours */
1093 --end, mult = 86400.0; /* days */
1096 --end, mult = 604800.0; /* weeks */
1099 /* Not a recognized suffix: assume it belongs to the number.
1100 (If not, simple_atof will raise an error.) */
1104 /* Skip leading and trailing whitespace. */
1105 while (val < end && c_isspace (*val))
1107 while (val < end && c_isspace (end[-1]))
1112 if (!simple_atof (val, end, &number))
1115 *(double *)place = number * mult;
1121 cmd_cert_type (const char *com, const char *val, void *place)
1123 static const struct decode_item choices[] = {
1124 { "pem", keyfile_pem },
1125 { "der", keyfile_asn1 },
1126 { "asn1", keyfile_asn1 },
1128 int ok = decode_string (val, choices, countof (choices), place);
1130 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1135 /* Specialized helper functions, used by `commands' to handle some
1136 options specially. */
1138 static bool check_user_specified_header (const char *);
1141 cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored)
1143 if (!cmd_boolean (com, val, &opt.dirstruct))
1145 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
1146 must be affected inversely. */
1148 opt.no_dirstruct = false;
1150 opt.no_dirstruct = true;
1155 cmd_spec_header (const char *com, const char *val, void *place_ignored)
1157 /* Empty value means reset the list of headers. */
1160 free_vec (opt.user_headers);
1161 opt.user_headers = NULL;
1165 if (!check_user_specified_header (val))
1167 fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
1168 exec_name, com, quote (val));
1171 opt.user_headers = vec_append (opt.user_headers, val);
1176 cmd_spec_htmlify (const char *com, const char *val, void *place_ignored)
1178 int flag = cmd_boolean (com, val, &opt.htmlify);
1179 if (flag && !opt.htmlify)
1180 opt.remove_listing = false;
1184 /* Set the "mirror" mode. It means: recursive download, timestamping,
1185 no limit on max. recursion depth, and don't remove listings. */
1188 cmd_spec_mirror (const char *com, const char *val, void *place_ignored)
1192 if (!cmd_boolean (com, val, &mirror))
1196 opt.recursive = true;
1197 if (!opt.no_dirstruct)
1198 opt.dirstruct = true;
1199 opt.timestamping = true;
1200 opt.reclevel = INFINITE_RECURSION;
1201 opt.remove_listing = false;
1206 /* Validate --prefer-family and set the choice. Allowed values are
1207 "IPv4", "IPv6", and "none". */
1210 cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored)
1212 static const struct decode_item choices[] = {
1213 { "IPv4", prefer_ipv4 },
1214 { "IPv6", prefer_ipv6 },
1215 { "none", prefer_none },
1217 int prefer_family = prefer_none;
1218 int ok = decode_string (val, choices, countof (choices), &prefer_family);
1220 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1221 opt.prefer_family = prefer_family;
1225 /* Set progress.type to VAL, but verify that it's a valid progress
1226 implementation before that. */
1229 cmd_spec_progress (const char *com, const char *val, void *place_ignored)
1231 if (!valid_progress_implementation_p (val))
1233 fprintf (stderr, _("%s: %s: Invalid progress type %s.\n"),
1234 exec_name, com, quote (val));
1237 xfree_null (opt.progress_type);
1239 /* Don't call set_progress_implementation here. It will be called
1240 in main() when it becomes clear what the log output is. */
1241 opt.progress_type = xstrdup (val);
1245 /* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is
1246 set to true, also set opt.dirstruct to true, unless opt.no_dirstruct
1250 cmd_spec_recursive (const char *com, const char *val, void *place_ignored)
1252 if (!cmd_boolean (com, val, &opt.recursive))
1256 if (opt.recursive && !opt.no_dirstruct)
1257 opt.dirstruct = true;
1263 cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored)
1265 int restrict_os = opt.restrict_files_os;
1266 int restrict_ctrl = opt.restrict_files_ctrl;
1267 int restrict_case = opt.restrict_files_case;
1271 #define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal)
1275 end = strchr (val, ',');
1277 end = val + strlen (val);
1279 if (VAL_IS ("unix"))
1280 restrict_os = restrict_unix;
1281 else if (VAL_IS ("windows"))
1282 restrict_os = restrict_windows;
1283 else if (VAL_IS ("lowercase"))
1284 restrict_case = restrict_lowercase;
1285 else if (VAL_IS ("uppercase"))
1286 restrict_case = restrict_uppercase;
1287 else if (VAL_IS ("nocontrol"))
1288 restrict_ctrl = false;
1292 _("%s: %s: Invalid restriction %s, use [unix|windows],[lowercase|uppercase],[nocontrol].\n"),
1293 exec_name, com, quote (val));
1300 while (*val && *end);
1304 opt.restrict_files_os = restrict_os;
1305 opt.restrict_files_ctrl = restrict_ctrl;
1306 opt.restrict_files_case = restrict_case;
1313 cmd_spec_secure_protocol (const char *com, const char *val, void *place)
1315 static const struct decode_item choices[] = {
1316 { "auto", secure_protocol_auto },
1317 { "sslv2", secure_protocol_sslv2 },
1318 { "sslv3", secure_protocol_sslv3 },
1319 { "tlsv1", secure_protocol_tlsv1 },
1321 int ok = decode_string (val, choices, countof (choices), place);
1323 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1328 /* Set all three timeout values. */
1331 cmd_spec_timeout (const char *com, const char *val, void *place_ignored)
1334 if (!cmd_time (com, val, &value))
1336 opt.read_timeout = value;
1337 opt.connect_timeout = value;
1338 opt.dns_timeout = value;
1343 cmd_spec_useragent (const char *com, const char *val, void *place_ignored)
1345 /* Disallow embedded newlines. */
1346 if (strchr (val, '\n'))
1348 fprintf (stderr, _("%s: %s: Invalid value %s.\n"),
1349 exec_name, com, quote (val));
1352 xfree_null (opt.useragent);
1353 opt.useragent = xstrdup (val);
1357 /* The "verbose" option cannot be cmd_boolean because the variable is
1358 not bool -- it's of type int (-1 means uninitialized because of
1359 some random hackery for disallowing -q -v). */
1362 cmd_spec_verbose (const char *com, const char *val, void *place_ignored)
1365 if (cmd_boolean (com, val, &flag))
1373 /* Miscellaneous useful routines. */
1375 /* A very simple atoi clone, more useful than atoi because it works on
1376 delimited strings, and has error reportage. Returns true on success,
1377 false on failure. If successful, stores result to *DEST. */
1380 simple_atoi (const char *beg, const char *end, int *dest)
1383 bool negative = false;
1384 const char *p = beg;
1386 while (p < end && c_isspace (*p))
1388 if (p < end && (*p == '-' || *p == '+'))
1390 negative = (*p == '-');
1396 /* Read negative numbers in a separate loop because the most
1397 negative integer cannot be represented as a positive number. */
1400 for (; p < end && c_isdigit (*p); p++)
1402 int next = (10 * result) + (*p - '0');
1404 return false; /* overflow */
1408 for (; p < end && c_isdigit (*p); p++)
1410 int next = (10 * result) - (*p - '0');
1412 return false; /* underflow */
1423 /* Trivial atof, with error reporting. Handles "<digits>[.<digits>]",
1424 doesn't handle exponential notation. Returns true on success,
1425 false on failure. In case of success, stores its result to
1429 simple_atof (const char *beg, const char *end, double *dest)
1433 bool negative = false;
1434 bool seen_dot = false;
1435 bool seen_digit = false;
1438 const char *p = beg;
1440 while (p < end && c_isspace (*p))
1442 if (p < end && (*p == '-' || *p == '+'))
1444 negative = (*p == '-');
1448 for (; p < end; p++)
1454 result = (10 * result) + (ch - '0');
1456 result += (ch - '0') / (divider *= 10);
1478 /* Verify that the user-specified header in S is valid. It must
1479 contain a colon preceded by non-white-space characters and must not
1480 contain newlines. */
1483 check_user_specified_header (const char *s)
1487 for (p = s; *p && *p != ':' && !c_isspace (*p); p++)
1489 /* The header MUST contain `:' preceded by at least one
1490 non-whitespace character. */
1491 if (*p != ':' || p == s)
1493 /* The header MUST NOT contain newlines. */
1494 if (strchr (s, '\n'))
1499 /* Decode VAL into a number, according to ITEMS. */
1502 decode_string (const char *val, const struct decode_item *items, int itemcount,
1506 for (i = 0; i < itemcount; i++)
1507 if (0 == strcasecmp (val, items[i].name))
1509 *place = items[i].code;
1516 void cleanup_html_url (void);
1519 /* Free the memory allocated by global variables. */
1523 /* Free external resources, close files, etc. */
1526 fclose (output_stream);
1527 /* No need to check for error because Wget flushes its output (and
1528 checks for errors) after any data arrives. */
1530 /* We're exiting anyway so there's no real need to call free()
1531 hundreds of times. Skipping the frees will make Wget exit
1534 However, when detecting leaks, it's crucial to free() everything
1535 because then you can find the real leaks, i.e. the allocated
1536 memory which grows with the size of the program. */
1542 cleanup_html_url ();
1547 extern acc_t *netrc_list;
1548 free_netrc (netrc_list);
1550 xfree_null (opt.lfilename);
1551 xfree_null (opt.dir_prefix);
1552 xfree_null (opt.input_filename);
1553 xfree_null (opt.output_document);
1554 free_vec (opt.accepts);
1555 free_vec (opt.rejects);
1556 free_vec (opt.excludes);
1557 free_vec (opt.includes);
1558 free_vec (opt.domains);
1559 free_vec (opt.follow_tags);
1560 free_vec (opt.ignore_tags);
1561 xfree_null (opt.progress_type);
1562 xfree_null (opt.ftp_user);
1563 xfree_null (opt.ftp_passwd);
1564 xfree_null (opt.ftp_proxy);
1565 xfree_null (opt.https_proxy);
1566 xfree_null (opt.http_proxy);
1567 free_vec (opt.no_proxy);
1568 xfree_null (opt.useragent);
1569 xfree_null (opt.referer);
1570 xfree_null (opt.http_user);
1571 xfree_null (opt.http_passwd);
1572 free_vec (opt.user_headers);
1574 xfree_null (opt.cert_file);
1575 xfree_null (opt.private_key);
1576 xfree_null (opt.ca_directory);
1577 xfree_null (opt.ca_cert);
1578 xfree_null (opt.random_file);
1579 xfree_null (opt.egd_file);
1581 xfree_null (opt.bind_address);
1582 xfree_null (opt.cookies_input);
1583 xfree_null (opt.cookies_output);
1584 xfree_null (opt.user);
1585 xfree_null (opt.passwd);
1586 xfree_null (opt.base_href);
1588 #endif /* DEBUG_MALLOC */
1591 /* Unit testing routines. */
1596 test_commands_sorted()
1598 int prev_idx = 0, next_idx = 1;
1599 int command_count = countof (commands) - 1;
1601 while (next_idx <= command_count)
1603 cmp = strcasecmp (commands[prev_idx].name, commands[next_idx].name);
1606 mu_assert ("FAILED", false);
1619 test_cmd_spec_restrict_file_names()
1624 int expected_restrict_files_os;
1625 int expected_restrict_files_ctrl;
1626 int expected_restrict_files_case;
1629 { "windows", restrict_windows, true, restrict_no_case_restriction, true },
1630 { "windows,", restrict_windows, true, restrict_no_case_restriction, true },
1631 { "windows,lowercase", restrict_windows, true, restrict_lowercase, true },
1632 { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true },
1635 for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
1640 res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL);
1643 fprintf (stderr, "test_cmd_spec_restrict_file_names: TEST %d\n", i); fflush (stderr);
1644 fprintf (stderr, "opt.restrict_files_os: %d\n", opt.restrict_files_os); fflush (stderr);
1645 fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr);
1646 fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr);
1648 mu_assert ("test_cmd_spec_restrict_file_names: wrong result",
1649 res == test_array[i].result
1650 && opt.restrict_files_os == test_array[i].expected_restrict_files_os
1651 && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl
1652 && opt.restrict_files_case == test_array[i].expected_restrict_files_case);
1658 #endif /* TESTING */