1 /* Reading/parsing the initialization file.
2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
51 #include "recur.h" /* for INFINITE_RECURSION */
52 #include "convert.h" /* for convert_cleanup */
53 #include "res.h" /* for res_cleanup */
54 #include "http.h" /* for http_cleanup */
55 #include "retr.h" /* for output_stream */
61 /* We want tilde expansion enabled only when reading `.wgetrc' lines;
62 otherwise, it will be performed by the shell. This variable will
63 be set by the wgetrc-reading function. */
65 static bool enable_tilde_expansion;
68 #define CMD_DECLARE(func) static bool func (const char *, const char *, void *)
70 CMD_DECLARE (cmd_boolean);
71 CMD_DECLARE (cmd_bytes);
72 CMD_DECLARE (cmd_bytes_sum);
74 CMD_DECLARE (cmd_cert_type);
76 CMD_DECLARE (cmd_directory_vector);
77 CMD_DECLARE (cmd_number);
78 CMD_DECLARE (cmd_number_inf);
79 CMD_DECLARE (cmd_string);
80 CMD_DECLARE (cmd_file);
81 CMD_DECLARE (cmd_directory);
82 CMD_DECLARE (cmd_time);
83 CMD_DECLARE (cmd_vector);
85 CMD_DECLARE (cmd_spec_dirstruct);
86 CMD_DECLARE (cmd_spec_header);
87 CMD_DECLARE (cmd_spec_htmlify);
88 CMD_DECLARE (cmd_spec_mirror);
89 CMD_DECLARE (cmd_spec_prefer_family);
90 CMD_DECLARE (cmd_spec_progress);
91 CMD_DECLARE (cmd_spec_recursive);
92 CMD_DECLARE (cmd_spec_restrict_file_names);
94 CMD_DECLARE (cmd_spec_secure_protocol);
96 CMD_DECLARE (cmd_spec_timeout);
97 CMD_DECLARE (cmd_spec_useragent);
98 CMD_DECLARE (cmd_spec_verbose);
100 /* List of recognized commands, each consisting of name, place and
101 function. When adding a new command, simply add it to the list,
102 but be sure to keep the list sorted alphabetically, as
103 command_by_name's binary search depends on it. Also, be sure to
104 add any entries that allocate memory (e.g. cmd_string and
105 cmd_vector) to the cleanup() function below. */
107 static const struct {
110 bool (*action) (const char *, const char *, void *);
112 /* KEEP THIS LIST ALPHABETICALLY SORTED */
113 { "accept", &opt.accepts, cmd_vector },
114 { "addhostdir", &opt.add_hostdir, cmd_boolean },
115 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
116 { "background", &opt.background, cmd_boolean },
117 { "backupconverted", &opt.backup_converted, cmd_boolean },
118 { "backups", &opt.backups, cmd_number },
119 { "base", &opt.base_href, cmd_string },
120 { "bindaddress", &opt.bind_address, cmd_string },
122 { "cacertificate", &opt.ca_cert, cmd_file },
124 { "cache", &opt.allow_cache, cmd_boolean },
126 { "cadirectory", &opt.ca_directory, cmd_directory },
127 { "certificate", &opt.cert_file, cmd_file },
128 { "certificatetype", &opt.cert_type, cmd_cert_type },
129 { "checkcertificate", &opt.check_cert, cmd_boolean },
131 { "connecttimeout", &opt.connect_timeout, cmd_time },
132 { "contentdisposition", &opt.content_disposition, cmd_boolean },
133 { "continue", &opt.always_rest, cmd_boolean },
134 { "convertlinks", &opt.convert_links, cmd_boolean },
135 { "cookies", &opt.cookies, cmd_boolean },
136 { "cutdirs", &opt.cut_dirs, cmd_number },
138 { "debug", &opt.debug, cmd_boolean },
140 { "deleteafter", &opt.delete_after, cmd_boolean },
141 { "dirprefix", &opt.dir_prefix, cmd_directory },
142 { "dirstruct", NULL, cmd_spec_dirstruct },
143 { "dnscache", &opt.dns_cache, cmd_boolean },
144 { "dnstimeout", &opt.dns_timeout, cmd_time },
145 { "domains", &opt.domains, cmd_vector },
146 { "dotbytes", &opt.dot_bytes, cmd_bytes },
147 { "dotsinline", &opt.dots_in_line, cmd_number },
148 { "dotspacing", &opt.dot_spacing, cmd_number },
149 { "dotstyle", &opt.dot_style, cmd_string },
151 { "egdfile", &opt.egd_file, cmd_file },
153 { "excludedirectories", &opt.excludes, cmd_directory_vector },
154 { "excludedomains", &opt.exclude_domains, cmd_vector },
155 { "followftp", &opt.follow_ftp, cmd_boolean },
156 { "followtags", &opt.follow_tags, cmd_vector },
157 { "forcehtml", &opt.force_html, cmd_boolean },
158 { "ftppasswd", &opt.ftp_passwd, cmd_string }, /* deprecated */
159 { "ftppassword", &opt.ftp_passwd, cmd_string },
160 { "ftpproxy", &opt.ftp_proxy, cmd_string },
161 { "ftpuser", &opt.ftp_user, cmd_string },
162 { "glob", &opt.ftp_glob, cmd_boolean },
163 { "header", NULL, cmd_spec_header },
164 { "htmlextension", &opt.html_extension, cmd_boolean },
165 { "htmlify", NULL, cmd_spec_htmlify },
166 { "httpkeepalive", &opt.http_keep_alive, cmd_boolean },
167 { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
168 { "httppassword", &opt.http_passwd, cmd_string },
169 { "httpproxy", &opt.http_proxy, cmd_string },
170 { "httpsproxy", &opt.https_proxy, cmd_string },
171 { "httpuser", &opt.http_user, cmd_string },
172 { "ignorecase", &opt.ignore_case, cmd_boolean },
173 { "ignorelength", &opt.ignore_length, cmd_boolean },
174 { "ignoretags", &opt.ignore_tags, cmd_vector },
175 { "includedirectories", &opt.includes, cmd_directory_vector },
177 { "inet4only", &opt.ipv4_only, cmd_boolean },
178 { "inet6only", &opt.ipv6_only, cmd_boolean },
180 { "input", &opt.input_filename, cmd_file },
181 { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
182 { "limitrate", &opt.limit_rate, cmd_bytes },
183 { "loadcookies", &opt.cookies_input, cmd_file },
184 { "logfile", &opt.lfilename, cmd_file },
185 { "login", &opt.ftp_user, cmd_string },/* deprecated*/
186 { "maxredirect", &opt.max_redirect, cmd_number },
187 { "mirror", NULL, cmd_spec_mirror },
188 { "netrc", &opt.netrc, cmd_boolean },
189 { "noclobber", &opt.noclobber, cmd_boolean },
190 { "noparent", &opt.no_parent, cmd_boolean },
191 { "noproxy", &opt.no_proxy, cmd_vector },
192 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
193 { "outputdocument", &opt.output_document, cmd_file },
194 { "pagerequisites", &opt.page_requisites, cmd_boolean },
195 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
196 { "passwd", &opt.ftp_passwd, cmd_string },/* deprecated*/
197 { "password", &opt.passwd, cmd_string },
198 { "postdata", &opt.post_data, cmd_string },
199 { "postfile", &opt.post_file_name, cmd_file },
200 { "preferfamily", NULL, cmd_spec_prefer_family },
201 { "preservepermissions", &opt.preserve_perm, cmd_boolean },
203 { "privatekey", &opt.private_key, cmd_file },
204 { "privatekeytype", &opt.private_key_type, cmd_cert_type },
206 { "progress", &opt.progress_type, cmd_spec_progress },
207 { "protocoldirectories", &opt.protocol_directories, cmd_boolean },
208 { "proxypasswd", &opt.proxy_passwd, cmd_string }, /* deprecated */
209 { "proxypassword", &opt.proxy_passwd, cmd_string },
210 { "proxyuser", &opt.proxy_user, cmd_string },
211 { "quiet", &opt.quiet, cmd_boolean },
212 { "quota", &opt.quota, cmd_bytes_sum },
214 { "randomfile", &opt.random_file, cmd_file },
216 { "randomwait", &opt.random_wait, cmd_boolean },
217 { "readtimeout", &opt.read_timeout, cmd_time },
218 { "reclevel", &opt.reclevel, cmd_number_inf },
219 { "recursive", NULL, cmd_spec_recursive },
220 { "referer", &opt.referer, cmd_string },
221 { "reject", &opt.rejects, cmd_vector },
222 { "relativeonly", &opt.relative_only, cmd_boolean },
223 { "removelisting", &opt.remove_listing, cmd_boolean },
224 { "restrictfilenames", NULL, cmd_spec_restrict_file_names },
225 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
226 { "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
227 { "robots", &opt.use_robots, cmd_boolean },
228 { "savecookies", &opt.cookies_output, cmd_file },
229 { "saveheaders", &opt.save_headers, cmd_boolean },
231 { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol },
233 { "serverresponse", &opt.server_response, cmd_boolean },
234 { "spanhosts", &opt.spanhost, cmd_boolean },
235 { "spider", &opt.spider, cmd_boolean },
236 { "strictcomments", &opt.strict_comments, cmd_boolean },
237 { "timeout", NULL, cmd_spec_timeout },
238 { "timestamping", &opt.timestamping, cmd_boolean },
239 { "tries", &opt.ntry, cmd_number_inf },
240 { "useproxy", &opt.use_proxy, cmd_boolean },
241 { "user", &opt.user, cmd_string },
242 { "useragent", NULL, cmd_spec_useragent },
243 { "verbose", NULL, cmd_spec_verbose },
244 { "wait", &opt.wait, cmd_time },
245 { "waitretry", &opt.waitretry, cmd_time },
247 { "wdebug", &opt.wdebug, cmd_boolean },
251 /* Look up CMDNAME in the commands[] and return its position in the
252 array. If CMDNAME is not found, return -1. */
255 command_by_name (const char *cmdname)
257 /* Use binary search for speed. Wget has ~100 commands, which
258 guarantees a worst case performance of 7 string comparisons. */
259 int lo = 0, hi = countof (commands) - 1;
263 int mid = (lo + hi) >> 1;
264 int cmp = strcasecmp (cmdname, commands[mid].name);
275 /* Reset the variables to default values. */
281 /* Most of the default values are 0 (and 0.0, NULL, and false).
282 Just reset everything, and fill in the non-zero values. Note
283 that initializing pointers to NULL this way is technically
284 illegal, but porting Wget to a machine where NULL is not all-zero
285 bit pattern will be the least of the implementors' worries. */
292 opt.add_hostdir = true;
296 opt.http_keep_alive = true;
297 opt.use_proxy = true;
298 tmp = getenv ("no_proxy");
300 opt.no_proxy = sepstring (tmp);
301 opt.allow_cache = true;
303 opt.read_timeout = 900;
304 opt.use_robots = true;
306 opt.remove_listing = true;
308 opt.dot_bytes = 1024;
309 opt.dot_spacing = 10;
310 opt.dots_in_line = 50;
312 opt.dns_cache = true;
316 opt.check_cert = true;
319 /* The default for file name restriction defaults to the OS type. */
320 #if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__)
321 opt.restrict_files_os = restrict_windows;
323 opt.restrict_files_os = restrict_unix;
325 opt.restrict_files_ctrl = true;
326 opt.restrict_files_case = restrict_no_case_restriction;
328 opt.max_redirect = 20;
331 /* Return the user's home directory (strdup-ed), or NULL if none is
336 char *home = getenv ("HOME");
341 /* Under MSDOS, if $HOME isn't defined, use the directory where
342 `wget.exe' resides. */
343 const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */
344 char *p, buf[PATH_MAX];
346 strcpy (buf, _w32_get_argv0 ());
347 p = strrchr (buf, '/'); /* djgpp */
349 p = strrchr (buf, '\\'); /* others */
353 #elif !defined(WINDOWS)
354 /* If HOME is not defined, try getting it from the password
356 struct passwd *pwd = getpwuid (getuid ());
357 if (!pwd || !pwd->pw_dir)
361 /* Under Windows, if $HOME isn't defined, use the directory where
362 `wget.exe' resides. */
367 return home ? xstrdup (home) : NULL;
370 /* Return the path to the user's .wgetrc. This is either the value of
371 `WGETRC' environment variable, or `$HOME/.wgetrc'.
373 If the `WGETRC' variable exists but the file does not exist, the
374 function will exit(). */
376 wgetrc_file_name (void)
381 /* Try the environment. */
382 env = getenv ("WGETRC");
385 if (!file_exists_p (env))
387 fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"),
391 return xstrdup (env);
394 /* If that failed, try $HOME/.wgetrc. */
397 file = aprintf ("%s/.wgetrc", home);
401 /* Under Windows, if we still haven't found .wgetrc, look for the file
402 `wget.ini' in the directory where `wget.exe' resides; we do this for
403 backward compatibility with previous versions of Wget.
404 SYSTEM_WGETRC should not be defined under WINDOWS. */
405 if (!file || !file_exists_p (file))
411 file = aprintf ("%s/wget.ini", home);
417 if (!file_exists_p (file))
425 /* Return values of parse_line. */
433 static enum parse_line parse_line (const char *, char **, char **, int *);
434 static bool setval_internal (int, const char *, const char *);
436 /* Initialize variables from a wgetrc file. Returns zero (failure) if
437 there were errors in the file. */
440 run_wgetrc (const char *file)
447 fp = fopen (file, "rb");
450 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
451 file, strerror (errno));
452 return true; /* not a fatal error */
454 enable_tilde_expansion = true;
456 while ((line = read_whole_line (fp)) != NULL)
458 char *com = NULL, *val = NULL;
461 /* Parse the line. */
462 switch (parse_line (line, &com, &val, &comind))
465 /* If everything is OK, set the value. */
466 if (!setval_internal (comind, com, val))
468 fprintf (stderr, _("%s: Error in %s at line %d.\n"),
469 exec_name, file, ln);
473 case line_syntax_error:
474 fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"),
475 exec_name, file, ln);
478 case line_unknown_command:
479 fprintf (stderr, _("%s: Unknown command `%s' in %s at line %d.\n"),
480 exec_name, com, file, ln);
493 enable_tilde_expansion = false;
499 /* Initialize the defaults and run the system wgetrc and user's own
507 /* Load the hard-coded defaults. */
510 /* If SYSTEM_WGETRC is defined, use it. */
512 if (file_exists_p (SYSTEM_WGETRC))
513 ok &= run_wgetrc (SYSTEM_WGETRC);
515 /* Override it with your own, if one exists. */
516 file = wgetrc_file_name ();
519 /* #### We should canonicalize `file' and SYSTEM_WGETRC with
520 something like realpath() before comparing them with `strcmp' */
522 if (!strcmp (file, SYSTEM_WGETRC))
524 fprintf (stderr, _("\
525 %s: Warning: Both system and user wgetrc point to `%s'.\n"),
530 ok &= run_wgetrc (file);
532 /* If there were errors processing either `.wgetrc', abort. */
540 /* Remove dashes and underscores from S, modifying S in the
546 char *t = s; /* t - tortoise */
547 char *h = s; /* h - hare */
549 if (*h == '_' || *h == '-')
556 /* Parse the line pointed by line, with the syntax:
557 <sp>* command <sp>* = <sp>* value <sp>*
558 Uses malloc to allocate space for command and value.
560 Returns one of line_ok, line_empty, line_syntax_error, or
561 line_unknown_command.
563 In case of line_ok, *COM and *VAL point to freshly allocated
564 strings, and *COMIND points to com's index. In case of error or
565 empty line, their values are unmodified. */
567 static enum parse_line
568 parse_line (const char *line, char **com, char **val, int *comind)
571 const char *end = line + strlen (line);
572 const char *cmdstart, *cmdend;
573 const char *valstart, *valend;
578 /* Skip leading and trailing whitespace. */
579 while (*line && c_isspace (*line))
581 while (end > line && c_isspace (end[-1]))
584 /* Skip empty lines and comments. */
585 if (!*line || *line == '#')
591 while (p < end && (c_isalnum (*p) || *p == '_' || *p == '-'))
595 /* Skip '=', as well as any space before or after it. */
596 while (p < end && c_isspace (*p))
598 if (p == end || *p != '=')
599 return line_syntax_error;
601 while (p < end && c_isspace (*p))
607 /* The syntax is valid (even though the command might not be). Fill
608 in the command name and value. */
609 *com = strdupdelim (cmdstart, cmdend);
610 *val = strdupdelim (valstart, valend);
612 /* The line now known to be syntactically correct. Check whether
613 the command is valid. */
614 BOUNDED_TO_ALLOCA (cmdstart, cmdend, cmdcopy);
616 ind = command_by_name (cmdcopy);
618 return line_unknown_command;
620 /* Report success to the caller. */
625 /* Run commands[comind].action. */
628 setval_internal (int comind, const char *com, const char *val)
630 assert (0 <= comind && comind < countof (commands));
631 DEBUGP (("Setting %s (%s) to %s\n", com, commands[comind].name, val));
632 return commands[comind].action (com, val, commands[comind].place);
635 /* Run command COM with value VAL. If running the command produces an
636 error, report the error and exit.
638 This is intended to be called from main() to modify Wget's behavior
639 through command-line switches. Since COM is hard-coded in main(),
640 it is not canonicalized, and this aborts when COM is not found.
642 If COMIND's are exported to init.h, this function will be changed
643 to accept COMIND directly. */
646 setoptval (const char *com, const char *val, const char *optname)
648 /* Prepend "--" to OPTNAME. */
649 char *dd_optname = (char *) alloca (2 + strlen (optname) + 1);
652 strcpy (dd_optname + 2, optname);
654 assert (val != NULL);
655 if (!setval_internal (command_by_name (com), dd_optname, val))
659 /* Parse OPT into command and value and run it. For example,
660 run_command("foo=bar") is equivalent to setoptval("foo", "bar").
661 This is used by the `--execute' flag in main.c. */
664 run_command (const char *opt)
668 switch (parse_line (opt, &com, &val, &comind))
671 if (!setval_internal (comind, com, val))
677 fprintf (stderr, _("%s: Invalid --execute command `%s'\n"),
683 /* Generic helper functions, for use with `commands'. */
685 /* Forward declarations: */
690 static bool decode_string (const char *, const struct decode_item *, int, int *);
691 static bool simple_atoi (const char *, const char *, int *);
692 static bool simple_atof (const char *, const char *, double *);
694 #define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0')
696 #define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \
697 && c_tolower((p)[1]) == (c1) \
700 #define CMP3(p, c0, c1, c2) (c_tolower((p)[0]) == (c0) \
701 && c_tolower((p)[1]) == (c1) \
702 && c_tolower((p)[2]) == (c2) \
706 /* Store the boolean value from VAL to PLACE. COM is ignored,
707 except for error messages. */
709 cmd_boolean (const char *com, const char *val, void *place)
713 if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1'))
714 /* "on", "yes" and "1" mean true. */
716 else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0'))
717 /* "off", "no" and "0" mean false. */
722 _("%s: %s: Invalid boolean `%s'; use `on' or `off'.\n"),
723 exec_name, com, val);
727 *(bool *) place = value;
731 /* Set the non-negative integer value from VAL to PLACE. With
732 incorrect specification, the number remains unchanged. */
734 cmd_number (const char *com, const char *val, void *place)
736 if (!simple_atoi (val, val + strlen (val), place)
737 || *(int *) place < 0)
739 fprintf (stderr, _("%s: %s: Invalid number `%s'.\n"),
740 exec_name, com, val);
746 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
748 cmd_number_inf (const char *com, const char *val, void *place)
750 if (!strcasecmp (val, "inf"))
755 return cmd_number (com, val, place);
758 /* Copy (strdup) the string at COM to a new location and place a
759 pointer to *PLACE. */
761 cmd_string (const char *com, const char *val, void *place)
763 char **pstring = (char **)place;
765 xfree_null (*pstring);
766 *pstring = xstrdup (val);
770 #if defined(WINDOWS) || defined(MSDOS)
771 # define ISSEP(c) ((c) == '/' || (c) == '\\')
773 # define ISSEP(c) ((c) == '/')
776 /* Like the above, but handles tilde-expansion when reading a user's
777 `.wgetrc'. In that case, and if VAL begins with `~', the tilde
778 gets expanded to the user's home directory. */
780 cmd_file (const char *com, const char *val, void *place)
782 char **pstring = (char **)place;
784 xfree_null (*pstring);
786 /* #### If VAL is empty, perhaps should set *PLACE to NULL. */
788 if (!enable_tilde_expansion || !(*val == '~' && ISSEP (val[1])))
791 *pstring = xstrdup (val);
796 char *home = home_dir ();
800 homelen = strlen (home);
801 while (homelen && ISSEP (home[homelen - 1]))
802 home[--homelen] = '\0';
804 /* Skip the leading "~/". */
805 for (++val; ISSEP (*val); val++)
808 *pstring = concat_strings (home, "/", val, (char *) 0);
811 #if defined(WINDOWS) || defined(MSDOS)
812 /* Convert "\" to "/". */
815 for (s = *pstring; *s; s++)
823 /* Like cmd_file, but strips trailing '/' characters. */
825 cmd_directory (const char *com, const char *val, void *place)
829 /* Call cmd_file() for tilde expansion and separator
830 canonicalization (backslash -> slash under Windows). These
831 things should perhaps be in a separate function. */
832 if (!cmd_file (com, val, place))
837 while (t > s && *--t == '/')
843 /* Split VAL by space to a vector of values, and append those values
844 to vector pointed to by the PLACE argument. If VAL is empty, the
845 PLACE vector is cleared instead. */
848 cmd_vector (const char *com, const char *val, void *place)
850 char ***pvec = (char ***)place;
853 *pvec = merge_vecs (*pvec, sepstring (val));
863 cmd_directory_vector (const char *com, const char *val, void *place)
865 char ***pvec = (char ***)place;
869 /* Strip the trailing slashes from directories. */
872 seps = sepstring (val);
873 for (t = seps; t && *t; t++)
875 int len = strlen (*t);
876 /* Skip degenerate case of root directory. */
879 if ((*t)[len - 1] == '/')
880 (*t)[len - 1] = '\0';
883 *pvec = merge_vecs (*pvec, seps);
893 /* Engine for cmd_bytes and cmd_bytes_sum: converts a string such as
894 "100k" or "2.5G" to a floating point number. */
897 parse_bytes_helper (const char *val, double *result)
900 const char *end = val + strlen (val);
902 /* Check for "inf". */
903 if (0 == strcmp (val, "inf"))
909 /* Strip trailing whitespace. */
910 while (val < end && c_isspace (end[-1]))
915 switch (c_tolower (end[-1]))
918 --end, mult = 1024.0;
921 --end, mult = 1048576.0;
924 --end, mult = 1073741824.0;
927 --end, mult = 1099511627776.0;
930 /* Not a recognized suffix: assume it's a digit. (If not,
931 simple_atof will raise an error.) */
935 /* Skip leading and trailing whitespace. */
936 while (val < end && c_isspace (*val))
938 while (val < end && c_isspace (end[-1]))
943 if (!simple_atof (val, end, &number) || number < 0)
946 *result = number * mult;
950 /* Parse VAL as a number and set its value to PLACE (which should
953 By default, the value is assumed to be in bytes. If "K", "M", or
954 "G" are appended, the value is multiplied with 1<<10, 1<<20, or
955 1<<30, respectively. Floating point values are allowed and are
956 cast to integer before use. The idea is to be able to use things
957 like 1.5k instead of "1536".
959 The string "inf" is returned as 0.
961 In case of error, false is returned and memory pointed to by PLACE
962 remains unmodified. */
965 cmd_bytes (const char *com, const char *val, void *place)
968 if (!parse_bytes_helper (val, &byte_value))
970 fprintf (stderr, _("%s: %s: Invalid byte value `%s'\n"),
971 exec_name, com, val);
974 *(wgint *)place = (wgint)byte_value;
978 /* Like cmd_bytes, but PLACE is interpreted as a pointer to
979 SIZE_SUM. It works by converting the string to double, therefore
980 working with values up to 2^53-1 without loss of precision. This
981 value (8192 TB) is large enough to serve for a while. */
984 cmd_bytes_sum (const char *com, const char *val, void *place)
987 if (!parse_bytes_helper (val, &byte_value))
989 fprintf (stderr, _("%s: %s: Invalid byte value `%s'\n"),
990 exec_name, com, val);
993 *(SUM_SIZE_INT *) place = (SUM_SIZE_INT) byte_value;
997 /* Store the value of VAL to *OUT. The value is a time period, by
998 default expressed in seconds, but also accepting suffixes "m", "h",
999 "d", and "w" for minutes, hours, days, and weeks respectively. */
1002 cmd_time (const char *com, const char *val, void *place)
1004 double number, mult;
1005 const char *end = val + strlen (val);
1007 /* Strip trailing whitespace. */
1008 while (val < end && c_isspace (end[-1]))
1014 fprintf (stderr, _("%s: %s: Invalid time period `%s'\n"),
1015 exec_name, com, val);
1019 switch (c_tolower (end[-1]))
1022 --end, mult = 1; /* seconds */
1025 --end, mult = 60; /* minutes */
1028 --end, mult = 3600; /* hours */
1031 --end, mult = 86400.0; /* days */
1034 --end, mult = 604800.0; /* weeks */
1037 /* Not a recognized suffix: assume it belongs to the number.
1038 (If not, simple_atof will raise an error.) */
1042 /* Skip leading and trailing whitespace. */
1043 while (val < end && c_isspace (*val))
1045 while (val < end && c_isspace (end[-1]))
1050 if (!simple_atof (val, end, &number))
1053 *(double *)place = number * mult;
1059 cmd_cert_type (const char *com, const char *val, void *place)
1061 static const struct decode_item choices[] = {
1062 { "pem", keyfile_pem },
1063 { "der", keyfile_asn1 },
1064 { "asn1", keyfile_asn1 },
1066 int ok = decode_string (val, choices, countof (choices), place);
1068 fprintf (stderr, _("%s: %s: Invalid value `%s'.\n"), exec_name, com, val);
1073 /* Specialized helper functions, used by `commands' to handle some
1074 options specially. */
1076 static bool check_user_specified_header (const char *);
1079 cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored)
1081 if (!cmd_boolean (com, val, &opt.dirstruct))
1083 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
1084 must be affected inversely. */
1086 opt.no_dirstruct = false;
1088 opt.no_dirstruct = true;
1093 cmd_spec_header (const char *com, const char *val, void *place_ignored)
1095 /* Empty value means reset the list of headers. */
1098 free_vec (opt.user_headers);
1099 opt.user_headers = NULL;
1103 if (!check_user_specified_header (val))
1105 fprintf (stderr, _("%s: %s: Invalid header `%s'.\n"),
1106 exec_name, com, val);
1109 opt.user_headers = vec_append (opt.user_headers, val);
1114 cmd_spec_htmlify (const char *com, const char *val, void *place_ignored)
1116 int flag = cmd_boolean (com, val, &opt.htmlify);
1117 if (flag && !opt.htmlify)
1118 opt.remove_listing = false;
1122 /* Set the "mirror" mode. It means: recursive download, timestamping,
1123 no limit on max. recursion depth, and don't remove listings. */
1126 cmd_spec_mirror (const char *com, const char *val, void *place_ignored)
1130 if (!cmd_boolean (com, val, &mirror))
1134 opt.recursive = true;
1135 if (!opt.no_dirstruct)
1136 opt.dirstruct = true;
1137 opt.timestamping = true;
1138 opt.reclevel = INFINITE_RECURSION;
1139 opt.remove_listing = false;
1144 /* Validate --prefer-family and set the choice. Allowed values are
1145 "IPv4", "IPv6", and "none". */
1148 cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored)
1150 static const struct decode_item choices[] = {
1151 { "IPv4", prefer_ipv4 },
1152 { "IPv6", prefer_ipv6 },
1153 { "none", prefer_none },
1155 int prefer_family = prefer_ipv4;
1156 int ok = decode_string (val, choices, countof (choices), &prefer_family);
1158 fprintf (stderr, _("%s: %s: Invalid value `%s'.\n"), exec_name, com, val);
1159 opt.prefer_family = prefer_family;
1163 /* Set progress.type to VAL, but verify that it's a valid progress
1164 implementation before that. */
1167 cmd_spec_progress (const char *com, const char *val, void *place_ignored)
1169 if (!valid_progress_implementation_p (val))
1171 fprintf (stderr, _("%s: %s: Invalid progress type `%s'.\n"),
1172 exec_name, com, val);
1175 xfree_null (opt.progress_type);
1177 /* Don't call set_progress_implementation here. It will be called
1178 in main() when it becomes clear what the log output is. */
1179 opt.progress_type = xstrdup (val);
1183 /* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is
1184 set to true, also set opt.dirstruct to true, unless opt.no_dirstruct
1188 cmd_spec_recursive (const char *com, const char *val, void *place_ignored)
1190 if (!cmd_boolean (com, val, &opt.recursive))
1194 if (opt.recursive && !opt.no_dirstruct)
1195 opt.dirstruct = true;
1201 cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored)
1203 int restrict_os = opt.restrict_files_os;
1204 int restrict_ctrl = opt.restrict_files_ctrl;
1205 int restrict_case = opt.restrict_files_case;
1209 #define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal)
1213 end = strchr (val, ',');
1215 end = val + strlen (val);
1217 if (VAL_IS ("unix"))
1218 restrict_os = restrict_unix;
1219 else if (VAL_IS ("windows"))
1220 restrict_os = restrict_windows;
1221 else if (VAL_IS ("lowercase"))
1222 restrict_case = restrict_lowercase;
1223 else if (VAL_IS ("uppercase"))
1224 restrict_case = restrict_uppercase;
1225 else if (VAL_IS ("nocontrol"))
1226 restrict_ctrl = false;
1230 _("%s: %s: Invalid restriction `%s', use [unix|windows],[lowercase|uppercase],[nocontrol].\n"),
1231 exec_name, com, val);
1238 while (*val && *end);
1242 opt.restrict_files_os = restrict_os;
1243 opt.restrict_files_ctrl = restrict_ctrl;
1244 opt.restrict_files_case = restrict_case;
1251 cmd_spec_secure_protocol (const char *com, const char *val, void *place)
1253 static const struct decode_item choices[] = {
1254 { "auto", secure_protocol_auto },
1255 { "sslv2", secure_protocol_sslv2 },
1256 { "sslv3", secure_protocol_sslv3 },
1257 { "tlsv1", secure_protocol_tlsv1 },
1259 int ok = decode_string (val, choices, countof (choices), place);
1261 fprintf (stderr, _("%s: %s: Invalid value `%s'.\n"), exec_name, com, val);
1266 /* Set all three timeout values. */
1269 cmd_spec_timeout (const char *com, const char *val, void *place_ignored)
1272 if (!cmd_time (com, val, &value))
1274 opt.read_timeout = value;
1275 opt.connect_timeout = value;
1276 opt.dns_timeout = value;
1281 cmd_spec_useragent (const char *com, const char *val, void *place_ignored)
1283 /* Disallow embedded newlines. */
1284 if (strchr (val, '\n'))
1286 fprintf (stderr, _("%s: %s: Invalid value `%s'.\n"),
1287 exec_name, com, val);
1290 xfree_null (opt.useragent);
1291 opt.useragent = xstrdup (val);
1295 /* The "verbose" option cannot be cmd_boolean because the variable is
1296 not bool -- it's of type int (-1 means uninitialized because of
1297 some random hackery for disallowing -q -v). */
1300 cmd_spec_verbose (const char *com, const char *val, void *place_ignored)
1303 if (cmd_boolean (com, val, &flag))
1311 /* Miscellaneous useful routines. */
1313 /* A very simple atoi clone, more useful than atoi because it works on
1314 delimited strings, and has error reportage. Returns true on success,
1315 false on failure. If successful, stores result to *DEST. */
1318 simple_atoi (const char *beg, const char *end, int *dest)
1321 bool negative = false;
1322 const char *p = beg;
1324 while (p < end && c_isspace (*p))
1326 if (p < end && (*p == '-' || *p == '+'))
1328 negative = (*p == '-');
1334 /* Read negative numbers in a separate loop because the most
1335 negative integer cannot be represented as a positive number. */
1338 for (; p < end && c_isdigit (*p); p++)
1340 int next = (10 * result) + (*p - '0');
1342 return false; /* overflow */
1346 for (; p < end && c_isdigit (*p); p++)
1348 int next = (10 * result) - (*p - '0');
1350 return false; /* underflow */
1361 /* Trivial atof, with error reporting. Handles "<digits>[.<digits>]",
1362 doesn't handle exponential notation. Returns true on success,
1363 false on failure. In case of success, stores its result to
1367 simple_atof (const char *beg, const char *end, double *dest)
1371 bool negative = false;
1372 bool seen_dot = false;
1373 bool seen_digit = false;
1376 const char *p = beg;
1378 while (p < end && c_isspace (*p))
1380 if (p < end && (*p == '-' || *p == '+'))
1382 negative = (*p == '-');
1386 for (; p < end; p++)
1392 result = (10 * result) + (ch - '0');
1394 result += (ch - '0') / (divider *= 10);
1416 /* Verify that the user-specified header in S is valid. It must
1417 contain a colon preceded by non-white-space characters and must not
1418 contain newlines. */
1421 check_user_specified_header (const char *s)
1425 for (p = s; *p && *p != ':' && !c_isspace (*p); p++)
1427 /* The header MUST contain `:' preceded by at least one
1428 non-whitespace character. */
1429 if (*p != ':' || p == s)
1431 /* The header MUST NOT contain newlines. */
1432 if (strchr (s, '\n'))
1437 /* Decode VAL into a number, according to ITEMS. */
1440 decode_string (const char *val, const struct decode_item *items, int itemcount,
1444 for (i = 0; i < itemcount; i++)
1445 if (0 == strcasecmp (val, items[i].name))
1447 *place = items[i].code;
1454 void cleanup_html_url (void);
1457 /* Free the memory allocated by global variables. */
1461 /* Free external resources, close files, etc. */
1464 fclose (output_stream);
1465 /* No need to check for error because Wget flushes its output (and
1466 checks for errors) after any data arrives. */
1468 /* We're exiting anyway so there's no real need to call free()
1469 hundreds of times. Skipping the frees will make Wget exit
1472 However, when detecting leaks, it's crucial to free() everything
1473 because then you can find the real leaks, i.e. the allocated
1474 memory which grows with the size of the program. */
1480 cleanup_html_url ();
1485 extern acc_t *netrc_list;
1486 free_netrc (netrc_list);
1488 xfree_null (opt.lfilename);
1489 xfree_null (opt.dir_prefix);
1490 xfree_null (opt.input_filename);
1491 xfree_null (opt.output_document);
1492 free_vec (opt.accepts);
1493 free_vec (opt.rejects);
1494 free_vec (opt.excludes);
1495 free_vec (opt.includes);
1496 free_vec (opt.domains);
1497 free_vec (opt.follow_tags);
1498 free_vec (opt.ignore_tags);
1499 xfree_null (opt.progress_type);
1500 xfree_null (opt.ftp_user);
1501 xfree_null (opt.ftp_passwd);
1502 xfree_null (opt.ftp_proxy);
1503 xfree_null (opt.https_proxy);
1504 xfree_null (opt.http_proxy);
1505 free_vec (opt.no_proxy);
1506 xfree_null (opt.useragent);
1507 xfree_null (opt.referer);
1508 xfree_null (opt.http_user);
1509 xfree_null (opt.http_passwd);
1510 free_vec (opt.user_headers);
1512 xfree_null (opt.cert_file);
1513 xfree_null (opt.private_key);
1514 xfree_null (opt.ca_directory);
1515 xfree_null (opt.ca_cert);
1516 xfree_null (opt.random_file);
1517 xfree_null (opt.egd_file);
1519 xfree_null (opt.bind_address);
1520 xfree_null (opt.cookies_input);
1521 xfree_null (opt.cookies_output);
1522 xfree_null (opt.user);
1523 xfree_null (opt.passwd);
1524 #endif /* DEBUG_MALLOC */
1527 /* Unit testing routines. */
1532 test_cmd_spec_restrict_file_names()
1537 int expected_restrict_files_os;
1538 int expected_restrict_files_ctrl;
1539 int expected_restrict_files_case;
1542 { "windows", restrict_windows, true, restrict_no_case_restriction, true },
1543 { "windows,", restrict_windows, true, restrict_no_case_restriction, true },
1544 { "windows,lowercase", restrict_windows, true, restrict_lowercase, true },
1545 { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true },
1548 for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
1553 res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL);
1556 fprintf (stderr, "test_cmd_spec_restrict_file_names: TEST %d\n", i); fflush (stderr);
1557 fprintf (stderr, "opt.restrict_files_os: %d\n", opt.restrict_files_os); fflush (stderr);
1558 fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr);
1559 fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr);
1561 mu_assert ("test_cmd_spec_restrict_file_names: wrong result",
1562 res == test_array[i].result
1563 && opt.restrict_files_os == test_array[i].expected_restrict_files_os
1564 && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl
1565 && opt.restrict_files_case == test_array[i].expected_restrict_files_case);
1571 #endif /* TESTING */