1 /* Reading/parsing the initialization file.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 Additional permission under GNU GPL version 3 section 7
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
51 #include "recur.h" /* for INFINITE_RECURSION */
52 #include "convert.h" /* for convert_cleanup */
53 #include "res.h" /* for res_cleanup */
54 #include "http.h" /* for http_cleanup */
55 #include "retr.h" /* for output_stream */
63 #define CMD_DECLARE(func) static bool func (const char *, const char *, void *)
65 CMD_DECLARE (cmd_boolean);
66 CMD_DECLARE (cmd_bytes);
67 CMD_DECLARE (cmd_bytes_sum);
69 CMD_DECLARE (cmd_cert_type);
71 CMD_DECLARE (cmd_directory_vector);
72 CMD_DECLARE (cmd_number);
73 CMD_DECLARE (cmd_number_inf);
74 CMD_DECLARE (cmd_string);
75 CMD_DECLARE (cmd_file);
76 CMD_DECLARE (cmd_directory);
77 CMD_DECLARE (cmd_time);
78 CMD_DECLARE (cmd_vector);
80 CMD_DECLARE (cmd_spec_dirstruct);
81 CMD_DECLARE (cmd_spec_header);
82 CMD_DECLARE (cmd_spec_htmlify);
83 CMD_DECLARE (cmd_spec_mirror);
84 CMD_DECLARE (cmd_spec_prefer_family);
85 CMD_DECLARE (cmd_spec_progress);
86 CMD_DECLARE (cmd_spec_recursive);
87 CMD_DECLARE (cmd_spec_restrict_file_names);
89 CMD_DECLARE (cmd_spec_secure_protocol);
91 CMD_DECLARE (cmd_spec_timeout);
92 CMD_DECLARE (cmd_spec_useragent);
93 CMD_DECLARE (cmd_spec_verbose);
95 /* List of recognized commands, each consisting of name, place and
96 function. When adding a new command, simply add it to the list,
97 but be sure to keep the list sorted alphabetically, as
98 command_by_name's binary search depends on it. Also, be sure to
99 add any entries that allocate memory (e.g. cmd_string and
100 cmd_vector) to the cleanup() function below. */
102 static const struct {
105 bool (*action) (const char *, const char *, void *);
107 /* KEEP THIS LIST ALPHABETICALLY SORTED */
108 { "accept", &opt.accepts, cmd_vector },
109 { "addhostdir", &opt.add_hostdir, cmd_boolean },
110 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
111 { "askpassword", &opt.ask_passwd, cmd_boolean },
112 { "authnochallenge", &opt.auth_without_challenge,
114 { "background", &opt.background, cmd_boolean },
115 { "backupconverted", &opt.backup_converted, cmd_boolean },
116 { "backups", &opt.backups, cmd_number },
117 { "base", &opt.base_href, cmd_string },
118 { "bindaddress", &opt.bind_address, cmd_string },
120 { "cacertificate", &opt.ca_cert, cmd_file },
122 { "cache", &opt.allow_cache, cmd_boolean },
124 { "cadirectory", &opt.ca_directory, cmd_directory },
125 { "certificate", &opt.cert_file, cmd_file },
126 { "certificatetype", &opt.cert_type, cmd_cert_type },
127 { "checkcertificate", &opt.check_cert, cmd_boolean },
129 { "connecttimeout", &opt.connect_timeout, cmd_time },
130 { "contentdisposition", &opt.content_disposition, cmd_boolean },
131 { "continue", &opt.always_rest, cmd_boolean },
132 { "convertlinks", &opt.convert_links, cmd_boolean },
133 { "cookies", &opt.cookies, cmd_boolean },
134 { "cutdirs", &opt.cut_dirs, cmd_number },
136 { "debug", &opt.debug, cmd_boolean },
138 { "defaultpage", &opt.default_page, cmd_string},
139 { "deleteafter", &opt.delete_after, cmd_boolean },
140 { "dirprefix", &opt.dir_prefix, cmd_directory },
141 { "dirstruct", NULL, cmd_spec_dirstruct },
142 { "dnscache", &opt.dns_cache, cmd_boolean },
143 { "dnstimeout", &opt.dns_timeout, cmd_time },
144 { "domains", &opt.domains, cmd_vector },
145 { "dotbytes", &opt.dot_bytes, cmd_bytes },
146 { "dotsinline", &opt.dots_in_line, cmd_number },
147 { "dotspacing", &opt.dot_spacing, cmd_number },
148 { "dotstyle", &opt.dot_style, cmd_string }, /* deprecated */
150 { "egdfile", &opt.egd_file, cmd_file },
152 { "excludedirectories", &opt.excludes, cmd_directory_vector },
153 { "excludedomains", &opt.exclude_domains, cmd_vector },
154 { "followftp", &opt.follow_ftp, cmd_boolean },
155 { "followtags", &opt.follow_tags, cmd_vector },
156 { "forcehtml", &opt.force_html, cmd_boolean },
157 { "ftppasswd", &opt.ftp_passwd, cmd_string }, /* deprecated */
158 { "ftppassword", &opt.ftp_passwd, cmd_string },
159 { "ftpproxy", &opt.ftp_proxy, cmd_string },
161 { "ftpstmlf", &opt.ftp_stmlf, cmd_boolean },
162 #endif /* def __VMS */
163 { "ftpuser", &opt.ftp_user, cmd_string },
164 { "glob", &opt.ftp_glob, cmd_boolean },
165 { "header", NULL, cmd_spec_header },
166 { "htmlextension", &opt.html_extension, cmd_boolean },
167 { "htmlify", NULL, cmd_spec_htmlify },
168 { "httpkeepalive", &opt.http_keep_alive, cmd_boolean },
169 { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
170 { "httppassword", &opt.http_passwd, cmd_string },
171 { "httpproxy", &opt.http_proxy, cmd_string },
172 { "httpsproxy", &opt.https_proxy, cmd_string },
173 { "httpuser", &opt.http_user, cmd_string },
174 { "ignorecase", &opt.ignore_case, cmd_boolean },
175 { "ignorelength", &opt.ignore_length, cmd_boolean },
176 { "ignoretags", &opt.ignore_tags, cmd_vector },
177 { "includedirectories", &opt.includes, cmd_directory_vector },
179 { "inet4only", &opt.ipv4_only, cmd_boolean },
180 { "inet6only", &opt.ipv6_only, cmd_boolean },
182 { "input", &opt.input_filename, cmd_file },
183 { "iri", &opt.enable_iri, cmd_boolean },
184 { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
185 { "limitrate", &opt.limit_rate, cmd_bytes },
186 { "loadcookies", &opt.cookies_input, cmd_file },
187 { "localencoding", &opt.locale, cmd_string },
188 { "logfile", &opt.lfilename, cmd_file },
189 { "login", &opt.ftp_user, cmd_string },/* deprecated*/
190 { "maxredirect", &opt.max_redirect, cmd_number },
191 { "mirror", NULL, cmd_spec_mirror },
192 { "netrc", &opt.netrc, cmd_boolean },
193 { "noclobber", &opt.noclobber, cmd_boolean },
194 { "noparent", &opt.no_parent, cmd_boolean },
195 { "noproxy", &opt.no_proxy, cmd_vector },
196 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
197 { "outputdocument", &opt.output_document, cmd_file },
198 { "pagerequisites", &opt.page_requisites, cmd_boolean },
199 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
200 { "passwd", &opt.ftp_passwd, cmd_string },/* deprecated*/
201 { "password", &opt.passwd, cmd_string },
202 { "postdata", &opt.post_data, cmd_string },
203 { "postfile", &opt.post_file_name, cmd_file },
204 { "preferfamily", NULL, cmd_spec_prefer_family },
205 { "preservepermissions", &opt.preserve_perm, cmd_boolean },/* deprecated */
207 { "privatekey", &opt.private_key, cmd_file },
208 { "privatekeytype", &opt.private_key_type, cmd_cert_type },
210 { "progress", &opt.progress_type, cmd_spec_progress },
211 { "protocoldirectories", &opt.protocol_directories, cmd_boolean },
212 { "proxypasswd", &opt.proxy_passwd, cmd_string }, /* deprecated */
213 { "proxypassword", &opt.proxy_passwd, cmd_string },
214 { "proxyuser", &opt.proxy_user, cmd_string },
215 { "quiet", &opt.quiet, cmd_boolean },
216 { "quota", &opt.quota, cmd_bytes_sum },
218 { "randomfile", &opt.random_file, cmd_file },
220 { "randomwait", &opt.random_wait, cmd_boolean },
221 { "readtimeout", &opt.read_timeout, cmd_time },
222 { "reclevel", &opt.reclevel, cmd_number_inf },
223 { "recursive", NULL, cmd_spec_recursive },
224 { "referer", &opt.referer, cmd_string },
225 { "reject", &opt.rejects, cmd_vector },
226 { "relativeonly", &opt.relative_only, cmd_boolean },
227 { "remoteencoding", &opt.encoding_remote, cmd_string },
228 { "removelisting", &opt.remove_listing, cmd_boolean },
229 { "restrictfilenames", NULL, cmd_spec_restrict_file_names },
230 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
231 { "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
232 { "robots", &opt.use_robots, cmd_boolean },
233 { "savecookies", &opt.cookies_output, cmd_file },
234 { "saveheaders", &opt.save_headers, cmd_boolean },
236 { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol },
238 { "serverresponse", &opt.server_response, cmd_boolean },
239 { "spanhosts", &opt.spanhost, cmd_boolean },
240 { "spider", &opt.spider, cmd_boolean },
241 { "strictcomments", &opt.strict_comments, cmd_boolean },
242 { "timeout", NULL, cmd_spec_timeout },
243 { "timestamping", &opt.timestamping, cmd_boolean },
244 { "tries", &opt.ntry, cmd_number_inf },
245 { "useproxy", &opt.use_proxy, cmd_boolean },
246 { "user", &opt.user, cmd_string },
247 { "useragent", NULL, cmd_spec_useragent },
248 { "verbose", NULL, cmd_spec_verbose },
249 { "wait", &opt.wait, cmd_time },
250 { "waitretry", &opt.waitretry, cmd_time },
252 { "wdebug", &opt.wdebug, cmd_boolean },
256 /* Look up CMDNAME in the commands[] and return its position in the
257 array. If CMDNAME is not found, return -1. */
260 command_by_name (const char *cmdname)
262 /* Use binary search for speed. Wget has ~100 commands, which
263 guarantees a worst case performance of 7 string comparisons. */
264 int lo = 0, hi = countof (commands) - 1;
268 int mid = (lo + hi) >> 1;
269 int cmp = strcasecmp (cmdname, commands[mid].name);
280 /* Reset the variables to default values. */
286 /* Most of the default values are 0 (and 0.0, NULL, and false).
287 Just reset everything, and fill in the non-zero values. Note
288 that initializing pointers to NULL this way is technically
289 illegal, but porting Wget to a machine where NULL is not all-zero
290 bit pattern will be the least of the implementors' worries. */
297 opt.add_hostdir = true;
301 opt.http_keep_alive = true;
302 opt.use_proxy = true;
303 tmp = getenv ("no_proxy");
305 opt.no_proxy = sepstring (tmp);
306 opt.prefer_family = prefer_none;
307 opt.allow_cache = true;
309 opt.read_timeout = 900;
310 opt.use_robots = true;
312 opt.remove_listing = true;
314 opt.dot_bytes = 1024;
315 opt.dot_spacing = 10;
316 opt.dots_in_line = 50;
318 opt.dns_cache = true;
322 opt.check_cert = true;
325 /* The default for file name restriction defaults to the OS type. */
326 #if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__)
327 opt.restrict_files_os = restrict_windows;
329 opt.restrict_files_os = restrict_unix;
331 opt.restrict_files_ctrl = true;
332 opt.restrict_files_case = restrict_no_case_restriction;
334 opt.max_redirect = 20;
339 opt.enable_iri = true;
341 opt.enable_iri = false;
344 opt.encoding_remote = NULL;
347 /* Return the user's home directory (strdup-ed), or NULL if none is
352 static char buf[PATH_MAX];
357 home = getenv ("HOME");
361 /* Under MSDOS, if $HOME isn't defined, use the directory where
362 `wget.exe' resides. */
363 const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */
366 strcpy (buf, _w32_get_argv0 ());
367 p = strrchr (buf, '/'); /* djgpp */
369 p = strrchr (buf, '\\'); /* others */
373 #elif !defined(WINDOWS)
374 /* If HOME is not defined, try getting it from the password
376 struct passwd *pwd = getpwuid (getuid ());
377 if (!pwd || !pwd->pw_dir)
379 strcpy (buf, pwd->pw_dir);
382 /* Under Windows, if $HOME isn't defined, use the directory where
383 `wget.exe' resides. */
389 return home ? xstrdup (home) : NULL;
392 /* Check the 'WGETRC' environment variable and return the file name
393 if 'WGETRC' is set and is a valid file.
394 If the `WGETRC' variable exists but the file does not exist, the
395 function will exit(). */
397 wgetrc_env_file_name (void)
399 char *env = getenv ("WGETRC");
402 if (!file_exists_p (env))
404 fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"),
408 return xstrdup (env);
413 /* Check for the existance of '$HOME/.wgetrc' and return it's path
414 if it exists and is set. */
416 wgetrc_user_file_name (void)
418 char *home = home_dir ();
420 /* If that failed, try $HOME/.wgetrc (or equivalent). */
423 file = "SYS$LOGIN:.wgetrc";
424 #else /* def __VMS */
427 file = aprintf ("%s/.wgetrc", home);
429 #endif /* def __VMS [else] */
433 if (!file_exists_p (file))
441 /* Return the path to the user's .wgetrc. This is either the value of
442 `WGETRC' environment variable, or `$HOME/.wgetrc'.
444 Additionally, for windows, look in the directory where wget.exe
447 wgetrc_file_name (void)
449 char *file = wgetrc_env_file_name ();
453 file = wgetrc_user_file_name ();
456 /* Under Windows, if we still haven't found .wgetrc, look for the file
457 `wget.ini' in the directory where `wget.exe' resides; we do this for
458 backward compatibility with previous versions of Wget.
459 SYSTEM_WGETRC should not be defined under WINDOWS. */
462 char *home = home_dir ();
468 file = aprintf ("%s/wget.ini", home);
469 if (!file_exists_p (file))
482 /* Return values of parse_line. */
490 static enum parse_line parse_line (const char *, char **, char **, int *);
491 static bool setval_internal (int, const char *, const char *);
492 static bool setval_internal_tilde (int, const char *, const char *);
494 /* Initialize variables from a wgetrc file. Returns zero (failure) if
495 there were errors in the file. */
498 run_wgetrc (const char *file)
505 fp = fopen (file, "r");
508 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
509 file, strerror (errno));
510 return true; /* not a fatal error */
513 while ((line = read_whole_line (fp)) != NULL)
515 char *com = NULL, *val = NULL;
518 /* Parse the line. */
519 switch (parse_line (line, &com, &val, &comind))
522 /* If everything is OK, set the value. */
523 if (!setval_internal_tilde (comind, com, val))
525 fprintf (stderr, _("%s: Error in %s at line %d.\n"),
526 exec_name, file, ln);
530 case line_syntax_error:
531 fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"),
532 exec_name, file, ln);
535 case line_unknown_command:
536 fprintf (stderr, _("%s: Unknown command %s in %s at line %d.\n"),
537 exec_name, quote (com), file, ln);
555 /* Initialize the defaults and run the system wgetrc and user's own
560 char *file, *env_sysrc;
563 /* Load the hard-coded defaults. */
566 /* Run a non-standard system rc file when the according environment
567 variable has been set. For internal testing purposes only! */
568 env_sysrc = getenv ("SYSTEM_WGETRC");
569 if (env_sysrc && file_exists_p (env_sysrc))
570 ok &= run_wgetrc (env_sysrc);
571 /* Otherwise, if SYSTEM_WGETRC is defined, use it. */
573 else if (file_exists_p (SYSTEM_WGETRC))
574 ok &= run_wgetrc (SYSTEM_WGETRC);
576 /* Override it with your own, if one exists. */
577 file = wgetrc_file_name ();
580 /* #### We should canonicalize `file' and SYSTEM_WGETRC with
581 something like realpath() before comparing them with `strcmp' */
583 if (!strcmp (file, SYSTEM_WGETRC))
585 fprintf (stderr, _("\
586 %s: Warning: Both system and user wgetrc point to %s.\n"),
587 exec_name, quote (file));
591 ok &= run_wgetrc (file);
593 /* If there were errors processing either `.wgetrc', abort. */
601 /* Remove dashes and underscores from S, modifying S in the
607 char *t = s; /* t - tortoise */
608 char *h = s; /* h - hare */
610 if (*h == '_' || *h == '-')
617 /* Parse the line pointed by line, with the syntax:
618 <sp>* command <sp>* = <sp>* value <sp>*
619 Uses malloc to allocate space for command and value.
621 Returns one of line_ok, line_empty, line_syntax_error, or
622 line_unknown_command.
624 In case of line_ok, *COM and *VAL point to freshly allocated
625 strings, and *COMIND points to com's index. In case of error or
626 empty line, their values are unmodified. */
628 static enum parse_line
629 parse_line (const char *line, char **com, char **val, int *comind)
632 const char *end = line + strlen (line);
633 const char *cmdstart, *cmdend;
634 const char *valstart, *valend;
639 /* Skip leading and trailing whitespace. */
640 while (*line && c_isspace (*line))
642 while (end > line && c_isspace (end[-1]))
645 /* Skip empty lines and comments. */
646 if (!*line || *line == '#')
652 while (p < end && (c_isalnum (*p) || *p == '_' || *p == '-'))
656 /* Skip '=', as well as any space before or after it. */
657 while (p < end && c_isspace (*p))
659 if (p == end || *p != '=')
660 return line_syntax_error;
662 while (p < end && c_isspace (*p))
668 /* The syntax is valid (even though the command might not be). Fill
669 in the command name and value. */
670 *com = strdupdelim (cmdstart, cmdend);
671 *val = strdupdelim (valstart, valend);
673 /* The line now known to be syntactically correct. Check whether
674 the command is valid. */
675 BOUNDED_TO_ALLOCA (cmdstart, cmdend, cmdcopy);
677 ind = command_by_name (cmdcopy);
679 return line_unknown_command;
681 /* Report success to the caller. */
686 #if defined(WINDOWS) || defined(MSDOS)
687 # define ISSEP(c) ((c) == '/' || (c) == '\\')
689 # define ISSEP(c) ((c) == '/')
692 /* Run commands[comind].action. */
695 setval_internal (int comind, const char *com, const char *val)
697 assert (0 <= comind && ((size_t) comind) < countof (commands));
698 DEBUGP (("Setting %s (%s) to %s\n", com, commands[comind].name, val));
699 return commands[comind].action (com, val, commands[comind].place);
703 setval_internal_tilde (int comind, const char *com, const char *val)
709 ret = setval_internal (comind, com, val);
711 /* We make tilde expansion for cmd_file and cmd_directory */
712 if (((commands[comind].action == cmd_file) ||
713 (commands[comind].action == cmd_directory))
714 && ret && (*val == '~' && ISSEP (val[1])))
716 pstring = commands[comind].place;
720 homelen = strlen (home);
721 while (homelen && ISSEP (home[homelen - 1]))
722 home[--homelen] = '\0';
724 /* Skip the leading "~/". */
725 for (++val; ISSEP (*val); val++)
727 *pstring = concat_strings (home, "/", val, (char *)0);
733 /* Run command COM with value VAL. If running the command produces an
734 error, report the error and exit.
736 This is intended to be called from main() to modify Wget's behavior
737 through command-line switches. Since COM is hard-coded in main(),
738 it is not canonicalized, and this aborts when COM is not found.
740 If COMIND's are exported to init.h, this function will be changed
741 to accept COMIND directly. */
744 setoptval (const char *com, const char *val, const char *optname)
746 /* Prepend "--" to OPTNAME. */
747 char *dd_optname = (char *) alloca (2 + strlen (optname) + 1);
750 strcpy (dd_optname + 2, optname);
752 assert (val != NULL);
753 if (!setval_internal (command_by_name (com), dd_optname, val))
757 /* Parse OPT into command and value and run it. For example,
758 run_command("foo=bar") is equivalent to setoptval("foo", "bar").
759 This is used by the `--execute' flag in main.c. */
762 run_command (const char *opt)
766 switch (parse_line (opt, &com, &val, &comind))
769 if (!setval_internal (comind, com, val))
775 fprintf (stderr, _("%s: Invalid --execute command %s\n"),
776 exec_name, quote (opt));
781 /* Generic helper functions, for use with `commands'. */
783 /* Forward declarations: */
788 static bool decode_string (const char *, const struct decode_item *, int, int *);
789 static bool simple_atoi (const char *, const char *, int *);
790 static bool simple_atof (const char *, const char *, double *);
792 #define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0')
794 #define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \
795 && c_tolower((p)[1]) == (c1) \
798 #define CMP3(p, c0, c1, c2) (c_tolower((p)[0]) == (c0) \
799 && c_tolower((p)[1]) == (c1) \
800 && c_tolower((p)[2]) == (c2) \
804 /* Store the boolean value from VAL to PLACE. COM is ignored,
805 except for error messages. */
807 cmd_boolean (const char *com, const char *val, void *place)
811 if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1'))
812 /* "on", "yes" and "1" mean true. */
814 else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0'))
815 /* "off", "no" and "0" mean false. */
820 _("%s: %s: Invalid boolean %s; use `on' or `off'.\n"),
821 exec_name, com, quote (val));
825 *(bool *) place = value;
829 /* Set the non-negative integer value from VAL to PLACE. With
830 incorrect specification, the number remains unchanged. */
832 cmd_number (const char *com, const char *val, void *place)
834 if (!simple_atoi (val, val + strlen (val), place)
835 || *(int *) place < 0)
837 fprintf (stderr, _("%s: %s: Invalid number %s.\n"),
838 exec_name, com, quote (val));
844 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
846 cmd_number_inf (const char *com, const char *val, void *place)
848 if (!strcasecmp (val, "inf"))
853 return cmd_number (com, val, place);
856 /* Copy (strdup) the string at COM to a new location and place a
857 pointer to *PLACE. */
859 cmd_string (const char *com, const char *val, void *place)
861 char **pstring = (char **)place;
863 xfree_null (*pstring);
864 *pstring = xstrdup (val);
869 /* Like the above, but handles tilde-expansion when reading a user's
870 `.wgetrc'. In that case, and if VAL begins with `~', the tilde
871 gets expanded to the user's home directory. */
873 cmd_file (const char *com, const char *val, void *place)
875 char **pstring = (char **)place;
877 xfree_null (*pstring);
879 /* #### If VAL is empty, perhaps should set *PLACE to NULL. */
881 *pstring = xstrdup (val);
883 #if defined(WINDOWS) || defined(MSDOS)
884 /* Convert "\" to "/". */
887 for (s = *pstring; *s; s++)
895 /* Like cmd_file, but strips trailing '/' characters. */
897 cmd_directory (const char *com, const char *val, void *place)
901 /* Call cmd_file() for tilde expansion and separator
902 canonicalization (backslash -> slash under Windows). These
903 things should perhaps be in a separate function. */
904 if (!cmd_file (com, val, place))
909 while (t > s && *--t == '/')
915 /* Split VAL by space to a vector of values, and append those values
916 to vector pointed to by the PLACE argument. If VAL is empty, the
917 PLACE vector is cleared instead. */
920 cmd_vector (const char *com, const char *val, void *place)
922 char ***pvec = (char ***)place;
925 *pvec = merge_vecs (*pvec, sepstring (val));
935 cmd_directory_vector (const char *com, const char *val, void *place)
937 char ***pvec = (char ***)place;
941 /* Strip the trailing slashes from directories. */
944 seps = sepstring (val);
945 for (t = seps; t && *t; t++)
947 int len = strlen (*t);
948 /* Skip degenerate case of root directory. */
951 if ((*t)[len - 1] == '/')
952 (*t)[len - 1] = '\0';
955 *pvec = merge_vecs (*pvec, seps);
965 /* Engine for cmd_bytes and cmd_bytes_sum: converts a string such as
966 "100k" or "2.5G" to a floating point number. */
969 parse_bytes_helper (const char *val, double *result)
972 const char *end = val + strlen (val);
974 /* Check for "inf". */
975 if (0 == strcmp (val, "inf"))
981 /* Strip trailing whitespace. */
982 while (val < end && c_isspace (end[-1]))
987 switch (c_tolower (end[-1]))
990 --end, mult = 1024.0;
993 --end, mult = 1048576.0;
996 --end, mult = 1073741824.0;
999 --end, mult = 1099511627776.0;
1002 /* Not a recognized suffix: assume it's a digit. (If not,
1003 simple_atof will raise an error.) */
1007 /* Skip leading and trailing whitespace. */
1008 while (val < end && c_isspace (*val))
1010 while (val < end && c_isspace (end[-1]))
1015 if (!simple_atof (val, end, &number) || number < 0)
1018 *result = number * mult;
1022 /* Parse VAL as a number and set its value to PLACE (which should
1025 By default, the value is assumed to be in bytes. If "K", "M", or
1026 "G" are appended, the value is multiplied with 1<<10, 1<<20, or
1027 1<<30, respectively. Floating point values are allowed and are
1028 cast to integer before use. The idea is to be able to use things
1029 like 1.5k instead of "1536".
1031 The string "inf" is returned as 0.
1033 In case of error, false is returned and memory pointed to by PLACE
1034 remains unmodified. */
1037 cmd_bytes (const char *com, const char *val, void *place)
1040 if (!parse_bytes_helper (val, &byte_value))
1042 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1043 exec_name, com, quote (val));
1046 *(wgint *)place = (wgint)byte_value;
1050 /* Like cmd_bytes, but PLACE is interpreted as a pointer to
1051 SIZE_SUM. It works by converting the string to double, therefore
1052 working with values up to 2^53-1 without loss of precision. This
1053 value (8192 TB) is large enough to serve for a while. */
1056 cmd_bytes_sum (const char *com, const char *val, void *place)
1059 if (!parse_bytes_helper (val, &byte_value))
1061 fprintf (stderr, _("%s: %s: Invalid byte value %s\n"),
1062 exec_name, com, quote (val));
1065 *(SUM_SIZE_INT *) place = (SUM_SIZE_INT) byte_value;
1069 /* Store the value of VAL to *OUT. The value is a time period, by
1070 default expressed in seconds, but also accepting suffixes "m", "h",
1071 "d", and "w" for minutes, hours, days, and weeks respectively. */
1074 cmd_time (const char *com, const char *val, void *place)
1076 double number, mult;
1077 const char *end = val + strlen (val);
1079 /* Strip trailing whitespace. */
1080 while (val < end && c_isspace (end[-1]))
1086 fprintf (stderr, _("%s: %s: Invalid time period %s\n"),
1087 exec_name, com, quote (val));
1091 switch (c_tolower (end[-1]))
1094 --end, mult = 1; /* seconds */
1097 --end, mult = 60; /* minutes */
1100 --end, mult = 3600; /* hours */
1103 --end, mult = 86400.0; /* days */
1106 --end, mult = 604800.0; /* weeks */
1109 /* Not a recognized suffix: assume it belongs to the number.
1110 (If not, simple_atof will raise an error.) */
1114 /* Skip leading and trailing whitespace. */
1115 while (val < end && c_isspace (*val))
1117 while (val < end && c_isspace (end[-1]))
1122 if (!simple_atof (val, end, &number))
1125 *(double *)place = number * mult;
1131 cmd_cert_type (const char *com, const char *val, void *place)
1133 static const struct decode_item choices[] = {
1134 { "pem", keyfile_pem },
1135 { "der", keyfile_asn1 },
1136 { "asn1", keyfile_asn1 },
1138 int ok = decode_string (val, choices, countof (choices), place);
1140 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1145 /* Specialized helper functions, used by `commands' to handle some
1146 options specially. */
1148 static bool check_user_specified_header (const char *);
1151 cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored)
1153 if (!cmd_boolean (com, val, &opt.dirstruct))
1155 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
1156 must be affected inversely. */
1158 opt.no_dirstruct = false;
1160 opt.no_dirstruct = true;
1165 cmd_spec_header (const char *com, const char *val, void *place_ignored)
1167 /* Empty value means reset the list of headers. */
1170 free_vec (opt.user_headers);
1171 opt.user_headers = NULL;
1175 if (!check_user_specified_header (val))
1177 fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
1178 exec_name, com, quote (val));
1181 opt.user_headers = vec_append (opt.user_headers, val);
1186 cmd_spec_htmlify (const char *com, const char *val, void *place_ignored)
1188 int flag = cmd_boolean (com, val, &opt.htmlify);
1189 if (flag && !opt.htmlify)
1190 opt.remove_listing = false;
1194 /* Set the "mirror" mode. It means: recursive download, timestamping,
1195 no limit on max. recursion depth, and don't remove listings. */
1198 cmd_spec_mirror (const char *com, const char *val, void *place_ignored)
1202 if (!cmd_boolean (com, val, &mirror))
1206 opt.recursive = true;
1207 if (!opt.no_dirstruct)
1208 opt.dirstruct = true;
1209 opt.timestamping = true;
1210 opt.reclevel = INFINITE_RECURSION;
1211 opt.remove_listing = false;
1216 /* Validate --prefer-family and set the choice. Allowed values are
1217 "IPv4", "IPv6", and "none". */
1220 cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored)
1222 static const struct decode_item choices[] = {
1223 { "IPv4", prefer_ipv4 },
1224 { "IPv6", prefer_ipv6 },
1225 { "none", prefer_none },
1227 int prefer_family = prefer_none;
1228 int ok = decode_string (val, choices, countof (choices), &prefer_family);
1230 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1231 opt.prefer_family = prefer_family;
1235 /* Set progress.type to VAL, but verify that it's a valid progress
1236 implementation before that. */
1239 cmd_spec_progress (const char *com, const char *val, void *place_ignored)
1241 if (!valid_progress_implementation_p (val))
1243 fprintf (stderr, _("%s: %s: Invalid progress type %s.\n"),
1244 exec_name, com, quote (val));
1247 xfree_null (opt.progress_type);
1249 /* Don't call set_progress_implementation here. It will be called
1250 in main() when it becomes clear what the log output is. */
1251 opt.progress_type = xstrdup (val);
1255 /* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is
1256 set to true, also set opt.dirstruct to true, unless opt.no_dirstruct
1260 cmd_spec_recursive (const char *com, const char *val, void *place_ignored)
1262 if (!cmd_boolean (com, val, &opt.recursive))
1266 if (opt.recursive && !opt.no_dirstruct)
1267 opt.dirstruct = true;
1273 cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored)
1275 int restrict_os = opt.restrict_files_os;
1276 int restrict_ctrl = opt.restrict_files_ctrl;
1277 int restrict_case = opt.restrict_files_case;
1281 #define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal)
1285 end = strchr (val, ',');
1287 end = val + strlen (val);
1289 if (VAL_IS ("unix"))
1290 restrict_os = restrict_unix;
1291 else if (VAL_IS ("windows"))
1292 restrict_os = restrict_windows;
1293 else if (VAL_IS ("lowercase"))
1294 restrict_case = restrict_lowercase;
1295 else if (VAL_IS ("uppercase"))
1296 restrict_case = restrict_uppercase;
1297 else if (VAL_IS ("nocontrol"))
1298 restrict_ctrl = false;
1302 _("%s: %s: Invalid restriction %s, use [unix|windows],[lowercase|uppercase],[nocontrol].\n"),
1303 exec_name, com, quote (val));
1310 while (*val && *end);
1314 opt.restrict_files_os = restrict_os;
1315 opt.restrict_files_ctrl = restrict_ctrl;
1316 opt.restrict_files_case = restrict_case;
1323 cmd_spec_secure_protocol (const char *com, const char *val, void *place)
1325 static const struct decode_item choices[] = {
1326 { "auto", secure_protocol_auto },
1327 { "sslv2", secure_protocol_sslv2 },
1328 { "sslv3", secure_protocol_sslv3 },
1329 { "tlsv1", secure_protocol_tlsv1 },
1331 int ok = decode_string (val, choices, countof (choices), place);
1333 fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1338 /* Set all three timeout values. */
1341 cmd_spec_timeout (const char *com, const char *val, void *place_ignored)
1344 if (!cmd_time (com, val, &value))
1346 opt.read_timeout = value;
1347 opt.connect_timeout = value;
1348 opt.dns_timeout = value;
1353 cmd_spec_useragent (const char *com, const char *val, void *place_ignored)
1355 /* Disallow embedded newlines. */
1356 if (strchr (val, '\n'))
1358 fprintf (stderr, _("%s: %s: Invalid value %s.\n"),
1359 exec_name, com, quote (val));
1362 xfree_null (opt.useragent);
1363 opt.useragent = xstrdup (val);
1367 /* The "verbose" option cannot be cmd_boolean because the variable is
1368 not bool -- it's of type int (-1 means uninitialized because of
1369 some random hackery for disallowing -q -v). */
1372 cmd_spec_verbose (const char *com, const char *val, void *place_ignored)
1375 if (cmd_boolean (com, val, &flag))
1383 /* Miscellaneous useful routines. */
1385 /* A very simple atoi clone, more useful than atoi because it works on
1386 delimited strings, and has error reportage. Returns true on success,
1387 false on failure. If successful, stores result to *DEST. */
1390 simple_atoi (const char *beg, const char *end, int *dest)
1393 bool negative = false;
1394 const char *p = beg;
1396 while (p < end && c_isspace (*p))
1398 if (p < end && (*p == '-' || *p == '+'))
1400 negative = (*p == '-');
1406 /* Read negative numbers in a separate loop because the most
1407 negative integer cannot be represented as a positive number. */
1410 for (; p < end && c_isdigit (*p); p++)
1412 int next = (10 * result) + (*p - '0');
1414 return false; /* overflow */
1418 for (; p < end && c_isdigit (*p); p++)
1420 int next = (10 * result) - (*p - '0');
1422 return false; /* underflow */
1433 /* Trivial atof, with error reporting. Handles "<digits>[.<digits>]",
1434 doesn't handle exponential notation. Returns true on success,
1435 false on failure. In case of success, stores its result to
1439 simple_atof (const char *beg, const char *end, double *dest)
1443 bool negative = false;
1444 bool seen_dot = false;
1445 bool seen_digit = false;
1448 const char *p = beg;
1450 while (p < end && c_isspace (*p))
1452 if (p < end && (*p == '-' || *p == '+'))
1454 negative = (*p == '-');
1458 for (; p < end; p++)
1464 result = (10 * result) + (ch - '0');
1466 result += (ch - '0') / (divider *= 10);
1488 /* Verify that the user-specified header in S is valid. It must
1489 contain a colon preceded by non-white-space characters and must not
1490 contain newlines. */
1493 check_user_specified_header (const char *s)
1497 for (p = s; *p && *p != ':' && !c_isspace (*p); p++)
1499 /* The header MUST contain `:' preceded by at least one
1500 non-whitespace character. */
1501 if (*p != ':' || p == s)
1503 /* The header MUST NOT contain newlines. */
1504 if (strchr (s, '\n'))
1509 /* Decode VAL into a number, according to ITEMS. */
1512 decode_string (const char *val, const struct decode_item *items, int itemcount,
1516 for (i = 0; i < itemcount; i++)
1517 if (0 == strcasecmp (val, items[i].name))
1519 *place = items[i].code;
1526 void cleanup_html_url (void);
1529 /* Free the memory allocated by global variables. */
1533 /* Free external resources, close files, etc. */
1536 fclose (output_stream);
1537 /* No need to check for error because Wget flushes its output (and
1538 checks for errors) after any data arrives. */
1540 /* We're exiting anyway so there's no real need to call free()
1541 hundreds of times. Skipping the frees will make Wget exit
1544 However, when detecting leaks, it's crucial to free() everything
1545 because then you can find the real leaks, i.e. the allocated
1546 memory which grows with the size of the program. */
1552 cleanup_html_url ();
1557 extern acc_t *netrc_list;
1558 free_netrc (netrc_list);
1560 xfree_null (opt.lfilename);
1561 xfree_null (opt.dir_prefix);
1562 xfree_null (opt.input_filename);
1563 xfree_null (opt.output_document);
1564 free_vec (opt.accepts);
1565 free_vec (opt.rejects);
1566 free_vec (opt.excludes);
1567 free_vec (opt.includes);
1568 free_vec (opt.domains);
1569 free_vec (opt.follow_tags);
1570 free_vec (opt.ignore_tags);
1571 xfree_null (opt.progress_type);
1572 xfree_null (opt.ftp_user);
1573 xfree_null (opt.ftp_passwd);
1574 xfree_null (opt.ftp_proxy);
1575 xfree_null (opt.https_proxy);
1576 xfree_null (opt.http_proxy);
1577 free_vec (opt.no_proxy);
1578 xfree_null (opt.useragent);
1579 xfree_null (opt.referer);
1580 xfree_null (opt.http_user);
1581 xfree_null (opt.http_passwd);
1582 free_vec (opt.user_headers);
1584 xfree_null (opt.cert_file);
1585 xfree_null (opt.private_key);
1586 xfree_null (opt.ca_directory);
1587 xfree_null (opt.ca_cert);
1588 xfree_null (opt.random_file);
1589 xfree_null (opt.egd_file);
1591 xfree_null (opt.bind_address);
1592 xfree_null (opt.cookies_input);
1593 xfree_null (opt.cookies_output);
1594 xfree_null (opt.user);
1595 xfree_null (opt.passwd);
1596 xfree_null (opt.base_href);
1598 #endif /* DEBUG_MALLOC */
1601 /* Unit testing routines. */
1606 test_commands_sorted()
1608 int prev_idx = 0, next_idx = 1;
1609 int command_count = countof (commands) - 1;
1611 while (next_idx <= command_count)
1613 cmp = strcasecmp (commands[prev_idx].name, commands[next_idx].name);
1616 mu_assert ("FAILED", false);
1629 test_cmd_spec_restrict_file_names()
1634 int expected_restrict_files_os;
1635 int expected_restrict_files_ctrl;
1636 int expected_restrict_files_case;
1639 { "windows", restrict_windows, true, restrict_no_case_restriction, true },
1640 { "windows,", restrict_windows, true, restrict_no_case_restriction, true },
1641 { "windows,lowercase", restrict_windows, true, restrict_lowercase, true },
1642 { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true },
1645 for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
1650 res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL);
1653 fprintf (stderr, "test_cmd_spec_restrict_file_names: TEST %d\n", i); fflush (stderr);
1654 fprintf (stderr, "opt.restrict_files_os: %d\n", opt.restrict_files_os); fflush (stderr);
1655 fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr);
1656 fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr);
1658 mu_assert ("test_cmd_spec_restrict_file_names: wrong result",
1659 res == test_array[i].result
1660 && opt.restrict_files_os == test_array[i].expected_restrict_files_os
1661 && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl
1662 && opt.restrict_files_case == test_array[i].expected_restrict_files_case);
1668 #endif /* TESTING */