/* Reading/parsing the initialization file.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
Inc.
This file is part of GNU Wget.
as that of the covered work. */
#include "wget.h"
+#include "exits.h"
#include <stdio.h>
#include <stdlib.h>
# endif
#endif
+#include <regex.h>
+#ifdef HAVE_LIBPCRE
+# include <pcre.h>
+#endif
#ifdef HAVE_PWD_H
# include <pwd.h>
#include "res.h" /* for res_cleanup */
#include "http.h" /* for http_cleanup */
#include "retr.h" /* for output_stream */
+#include "warc.h" /* for warc_close */
+#include "spider.h" /* for spider_cleanup */
#ifdef TESTING
#include "test.h"
CMD_DECLARE (cmd_number);
CMD_DECLARE (cmd_number_inf);
CMD_DECLARE (cmd_string);
+CMD_DECLARE (cmd_string_uppercase);
CMD_DECLARE (cmd_file);
CMD_DECLARE (cmd_directory);
CMD_DECLARE (cmd_time);
CMD_DECLARE (cmd_spec_dirstruct);
CMD_DECLARE (cmd_spec_header);
+CMD_DECLARE (cmd_spec_warc_header);
CMD_DECLARE (cmd_spec_htmlify);
CMD_DECLARE (cmd_spec_mirror);
CMD_DECLARE (cmd_spec_prefer_family);
CMD_DECLARE (cmd_spec_progress);
CMD_DECLARE (cmd_spec_recursive);
+CMD_DECLARE (cmd_spec_regex_type);
CMD_DECLARE (cmd_spec_restrict_file_names);
+CMD_DECLARE (cmd_spec_report_speed);
#ifdef HAVE_SSL
CMD_DECLARE (cmd_spec_secure_protocol);
#endif
} commands[] = {
/* KEEP THIS LIST ALPHABETICALLY SORTED */
{ "accept", &opt.accepts, cmd_vector },
+ { "acceptregex", &opt.acceptregex_s, cmd_string },
{ "addhostdir", &opt.add_hostdir, cmd_boolean },
{ "adjustextension", &opt.adjust_extension, cmd_boolean },
{ "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
{ "backups", &opt.backups, cmd_number },
{ "base", &opt.base_href, cmd_string },
{ "bindaddress", &opt.bind_address, cmd_string },
+ { "bodydata", &opt.body_data, cmd_string },
+ { "bodyfile", &opt.body_file, cmd_string },
#ifdef HAVE_SSL
{ "cacertificate", &opt.ca_cert, cmd_file },
#endif
{ "certificatetype", &opt.cert_type, cmd_cert_type },
{ "checkcertificate", &opt.check_cert, cmd_boolean },
#endif
- { "chooseconfig", &opt.choose_config, cmd_file },
+ { "chooseconfig", &opt.choose_config, cmd_file },
{ "connecttimeout", &opt.connect_timeout, cmd_time },
{ "contentdisposition", &opt.content_disposition, cmd_boolean },
+ { "contentonerror", &opt.content_on_error, cmd_boolean },
{ "continue", &opt.always_rest, cmd_boolean },
{ "convertlinks", &opt.convert_links, cmd_boolean },
{ "cookies", &opt.cookies, cmd_boolean },
{ "cutdirs", &opt.cut_dirs, cmd_number },
-#ifdef ENABLE_DEBUG
{ "debug", &opt.debug, cmd_boolean },
-#endif
- { "defaultpage", &opt.default_page, cmd_string},
+ { "defaultpage", &opt.default_page, cmd_string },
{ "deleteafter", &opt.delete_after, cmd_boolean },
{ "dirprefix", &opt.dir_prefix, cmd_directory },
{ "dirstruct", NULL, cmd_spec_dirstruct },
{ "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
{ "httppassword", &opt.http_passwd, cmd_string },
{ "httpproxy", &opt.http_proxy, cmd_string },
+#ifdef HAVE_SSL
+ { "httpsonly", &opt.https_only, cmd_boolean },
+#endif
{ "httpsproxy", &opt.https_proxy, cmd_string },
{ "httpuser", &opt.http_user, cmd_string },
{ "ignorecase", &opt.ignore_case, cmd_boolean },
{ "logfile", &opt.lfilename, cmd_file },
{ "login", &opt.ftp_user, cmd_string },/* deprecated*/
{ "maxredirect", &opt.max_redirect, cmd_number },
+ { "method", &opt.method, cmd_string_uppercase },
{ "mirror", NULL, cmd_spec_mirror },
{ "netrc", &opt.netrc, cmd_boolean },
{ "noclobber", &opt.noclobber, cmd_boolean },
+ { "noconfig", &opt.noconfig, cmd_boolean },
{ "noparent", &opt.no_parent, cmd_boolean },
{ "noproxy", &opt.no_proxy, cmd_vector },
{ "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
{ "postdata", &opt.post_data, cmd_string },
{ "postfile", &opt.post_file_name, cmd_file },
{ "preferfamily", NULL, cmd_spec_prefer_family },
- { "preservepermissions", &opt.preserve_perm, cmd_boolean },/* deprecated */
+ { "preservepermissions", &opt.preserve_perm, cmd_boolean },
#ifdef HAVE_SSL
{ "privatekey", &opt.private_key, cmd_file },
{ "privatekeytype", &opt.private_key_type, cmd_cert_type },
{ "reclevel", &opt.reclevel, cmd_number_inf },
{ "recursive", NULL, cmd_spec_recursive },
{ "referer", &opt.referer, cmd_string },
+ { "regextype", &opt.regex_type, cmd_spec_regex_type },
{ "reject", &opt.rejects, cmd_vector },
+ { "rejectregex", &opt.rejectregex_s, cmd_string },
{ "relativeonly", &opt.relative_only, cmd_boolean },
{ "remoteencoding", &opt.encoding_remote, cmd_string },
{ "removelisting", &opt.remove_listing, cmd_boolean },
+ { "reportspeed", &opt.report_bps, cmd_spec_report_speed},
{ "restrictfilenames", NULL, cmd_spec_restrict_file_names },
{ "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
{ "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
#endif
{ "serverresponse", &opt.server_response, cmd_boolean },
{ "showalldnsentries", &opt.show_all_dns_entries, cmd_boolean },
+ { "showprogress", &opt.show_progress, cmd_boolean },
{ "spanhosts", &opt.spanhost, cmd_boolean },
{ "spider", &opt.spider, cmd_boolean },
+ { "startpos", &opt.start_pos, cmd_bytes },
{ "strictcomments", &opt.strict_comments, cmd_boolean },
{ "timeout", NULL, cmd_spec_timeout },
{ "timestamping", &opt.timestamping, cmd_boolean },
{ "verbose", NULL, cmd_spec_verbose },
{ "wait", &opt.wait, cmd_time },
{ "waitretry", &opt.waitretry, cmd_time },
+ { "warccdx", &opt.warc_cdx_enabled, cmd_boolean },
+ { "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file },
+#ifdef HAVE_LIBZ
+ { "warccompression", &opt.warc_compression_enabled, cmd_boolean },
+#endif
+ { "warcdigests", &opt.warc_digests_enabled, cmd_boolean },
+ { "warcfile", &opt.warc_filename, cmd_file },
+ { "warcheader", NULL, cmd_spec_warc_header },
+ { "warckeeplog", &opt.warc_keep_log, cmd_boolean },
+ { "warcmaxsize", &opt.warc_maxsize, cmd_bytes },
+ { "warctempdir", &opt.warc_tempdir, cmd_directory },
#ifdef USE_WATT32
{ "wdebug", &opt.wdebug, cmd_boolean },
#endif
opt.restrict_files_nonascii = false;
opt.restrict_files_case = restrict_no_case_restriction;
+ opt.regex_type = regex_type_posix;
+
opt.max_redirect = 20;
opt.waitretry = 10;
opt.useservertimestamps = true;
opt.show_all_dns_entries = false;
+
+ opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */
+#ifdef HAVE_LIBZ
+ opt.warc_compression_enabled = true;
+#else
+ opt.warc_compression_enabled = false;
+#endif
+ opt.warc_digests_enabled = true;
+ opt.warc_cdx_enabled = false;
+ opt.warc_cdx_dedup_filename = NULL;
+ opt.warc_tempdir = NULL;
+ opt.warc_keep_log = true;
+
+ /* Use a negative value to mark the absence of --start-pos option */
+ opt.start_pos = -1;
+ opt.show_progress = false;
+ opt.noscroll = false;
}
\f
/* Return the user's home directory (strdup-ed), or NULL if none is
}
ret = home ? xstrdup (home) : NULL;
- if (buf)
- free (buf);
+ free (buf);
return ret;
}
return NULL;
}
-/* Check for the existance of '$HOME/.wgetrc' and return it's path
+/* Check for the existance of '$HOME/.wgetrc' and return its path
if it exists and is set. */
char *
wgetrc_user_file_name (void)
run_wgetrc (const char *file)
{
FILE *fp;
- char *line;
+ char *line = NULL;
+ size_t bufsize = 0;
int ln;
int errcnt = 0;
return true; /* not a fatal error */
}
ln = 1;
- while ((line = read_whole_line (fp)) != NULL)
+ while (getline (&line, &bufsize, fp) > 0)
{
char *com = NULL, *val = NULL;
int comind;
}
xfree_null (com);
xfree_null (val);
- xfree (line);
++ln;
}
+ xfree (line);
fclose (fp);
return errcnt == 0;
variable has been set. For internal testing purposes only! */
env_sysrc = getenv ("SYSTEM_WGETRC");
if (env_sysrc && file_exists_p (env_sysrc))
- ok &= run_wgetrc (env_sysrc);
+ {
+ ok &= run_wgetrc (env_sysrc);
+ /* If there are any problems parsing the system wgetrc file, tell
+ the user and exit */
+ if (! ok)
+ {
+ fprintf (stderr, _("\
+Parsing system wgetrc file (env SYSTEM_WGETRC) failed. Please check\n\
+'%s',\n\
+or specify a different file using --config.\n"), env_sysrc);
+ exit (2);
+ }
+ }
/* Otherwise, if SYSTEM_WGETRC is defined, use it. */
#ifdef SYSTEM_WGETRC
else if (file_exists_p (SYSTEM_WGETRC))
ok &= run_wgetrc (SYSTEM_WGETRC);
-#endif
/* If there are any problems parsing the system wgetrc file, tell
the user and exit */
if (! ok)
{
fprintf (stderr, _("\
-Parsing system wgetrc file failed, please check '%s'. \
-Or specify a different file using --config\n"), SYSTEM_WGETRC);
+Parsing system wgetrc file failed. Please check\n\
+'%s',\n\
+or specify a different file using --config.\n"), SYSTEM_WGETRC);
exit (2);
}
+#endif
/* Override it with your own, if one exists. */
file = wgetrc_file_name ();
if (!file)
pstring = commands[comind].place;
home = home_dir ();
if (home)
- {
- homelen = strlen (home);
- while (homelen && ISSEP (home[homelen - 1]))
- home[--homelen] = '\0';
-
- /* Skip the leading "~/". */
- for (++val; ISSEP (*val); val++)
- ;
- *pstring = concat_strings (home, "/", val, (char *)0);
- }
+ {
+ homelen = strlen (home);
+ while (homelen && ISSEP (home[homelen - 1]))
+ home[--homelen] = '\0';
+
+ /* Skip the leading "~/". */
+ for (++val; ISSEP (*val); val++)
+ ;
+ *pstring = concat_strings (home, "/", val, (char *)0);
+ }
}
return ret;
}
This is used by the `--execute' flag in main.c. */
void
-run_command (const char *opt)
+run_command (const char *cmdopt)
{
char *com, *val;
int comind;
- switch (parse_line (opt, &com, &val, &comind))
+ switch (parse_line (cmdopt, &com, &val, &comind))
{
case line_ok:
if (!setval_internal (comind, com, val))
break;
default:
fprintf (stderr, _("%s: Invalid --execute command %s\n"),
- exec_name, quote (opt));
+ exec_name, quote (cmdopt));
exit (2);
}
}
/* Copy (strdup) the string at COM to a new location and place a
pointer to *PLACE. */
static bool
-cmd_string (const char *com, const char *val, void *place)
+cmd_string (const char *com _GL_UNUSED, const char *val, void *place)
{
char **pstring = (char **)place;
return true;
}
+/* Like cmd_string but ensure the string is upper case. */
+static bool
+cmd_string_uppercase (const char *com _GL_UNUSED, const char *val, void *place)
+{
+ char *q, **pstring;
+ pstring = (char **)place;
+ xfree_null (*pstring);
+
+ *pstring = xmalloc (strlen (val) + 1);
-/* Like the above, but handles tilde-expansion when reading a user's
+ for (q = *pstring; *val; val++, q++)
+ *q = c_toupper (*val);
+
+ *q = '\0';
+ return true;
+}
+
+
+/* Like cmd_string, but handles tilde-expansion when reading a user's
`.wgetrc'. In that case, and if VAL begins with `~', the tilde
gets expanded to the user's home directory. */
static bool
-cmd_file (const char *com, const char *val, void *place)
+cmd_file (const char *com _GL_UNUSED, const char *val, void *place)
{
char **pstring = (char **)place;
PLACE vector is cleared instead. */
static bool
-cmd_vector (const char *com, const char *val, void *place)
+cmd_vector (const char *com _GL_UNUSED, const char *val, void *place)
{
char ***pvec = (char ***)place;
}
static bool
-cmd_directory_vector (const char *com, const char *val, void *place)
+cmd_directory_vector (const char *com _GL_UNUSED, const char *val, void *place)
{
char ***pvec = (char ***)place;
static bool check_user_specified_header (const char *);
static bool
-cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored)
+cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored _GL_UNUSED)
{
if (!cmd_boolean (com, val, &opt.dirstruct))
return false;
}
static bool
-cmd_spec_header (const char *com, const char *val, void *place_ignored)
+cmd_spec_header (const char *com, const char *val, void *place_ignored _GL_UNUSED)
{
/* Empty value means reset the list of headers. */
if (*val == '\0')
}
static bool
-cmd_spec_htmlify (const char *com, const char *val, void *place_ignored)
+cmd_spec_warc_header (const char *com, const char *val, void *place_ignored _GL_UNUSED)
+{
+ /* Empty value means reset the list of headers. */
+ if (*val == '\0')
+ {
+ free_vec (opt.warc_user_headers);
+ opt.warc_user_headers = NULL;
+ return true;
+ }
+
+ if (!check_user_specified_header (val))
+ {
+ fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"),
+ exec_name, com, quote (val));
+ return false;
+ }
+ opt.warc_user_headers = vec_append (opt.warc_user_headers, val);
+ return true;
+}
+
+static bool
+cmd_spec_htmlify (const char *com, const char *val, void *place_ignored _GL_UNUSED)
{
int flag = cmd_boolean (com, val, &opt.htmlify);
if (flag && !opt.htmlify)
no limit on max. recursion depth, and don't remove listings. */
static bool
-cmd_spec_mirror (const char *com, const char *val, void *place_ignored)
+cmd_spec_mirror (const char *com, const char *val, void *place_ignored _GL_UNUSED)
{
int mirror;
"IPv4", "IPv6", and "none". */
static bool
-cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored)
+cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored _GL_UNUSED)
{
static const struct decode_item choices[] = {
{ "IPv4", prefer_ipv4 },
implementation before that. */
static bool
-cmd_spec_progress (const char *com, const char *val, void *place_ignored)
+cmd_spec_progress (const char *com, const char *val, void *place_ignored _GL_UNUSED)
{
if (!valid_progress_implementation_p (val))
{
is specified. */
static bool
-cmd_spec_recursive (const char *com, const char *val, void *place_ignored)
+cmd_spec_recursive (const char *com, const char *val, void *place_ignored _GL_UNUSED)
{
if (!cmd_boolean (com, val, &opt.recursive))
return false;
return true;
}
+/* Validate --regex-type and set the choice. */
+
+static bool
+cmd_spec_regex_type (const char *com, const char *val, void *place_ignored _GL_UNUSED)
+{
+ static const struct decode_item choices[] = {
+ { "posix", regex_type_posix },
+#ifdef HAVE_LIBPCRE
+ { "pcre", regex_type_pcre },
+#endif
+ };
+ int regex_type = regex_type_posix;
+ int ok = decode_string (val, choices, countof (choices), ®ex_type);
+ if (!ok)
+ fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
+ opt.regex_type = regex_type;
+ return ok;
+}
+
static bool
-cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored)
+cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored _GL_UNUSED)
{
int restrict_os = opt.restrict_files_os;
int restrict_ctrl = opt.restrict_files_ctrl;
return true;
}
+static bool
+cmd_spec_report_speed (const char *com, const char *val, void *place_ignored _GL_UNUSED)
+{
+ opt.report_bps = strcasecmp (val, "bits") == 0;
+ if (!opt.report_bps)
+ fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
+ return opt.report_bps;
+}
+
#ifdef HAVE_SSL
static bool
cmd_spec_secure_protocol (const char *com, const char *val, void *place)
{ "sslv2", secure_protocol_sslv2 },
{ "sslv3", secure_protocol_sslv3 },
{ "tlsv1", secure_protocol_tlsv1 },
+ { "pfs", secure_protocol_pfs },
};
int ok = decode_string (val, choices, countof (choices), place);
if (!ok)
/* Set all three timeout values. */
static bool
-cmd_spec_timeout (const char *com, const char *val, void *place_ignored)
+cmd_spec_timeout (const char *com, const char *val, void *place_ignored _GL_UNUSED)
{
double value;
if (!cmd_time (com, val, &value))
}
static bool
-cmd_spec_useragent (const char *com, const char *val, void *place_ignored)
+cmd_spec_useragent (const char *com, const char *val, void *place_ignored _GL_UNUSED)
{
/* Disallow embedded newlines. */
if (strchr (val, '\n'))
some random hackery for disallowing -q -v). */
static bool
-cmd_spec_verbose (const char *com, const char *val, void *place_ignored)
+cmd_spec_verbose (const char *com, const char *val, void *place_ignored _GL_UNUSED)
{
bool flag;
if (cmd_boolean (com, val, &flag))
return false;
}
-\f
-void cleanup_html_url (void);
-
-
/* Free the memory allocated by global variables. */
void
cleanup (void)
{
/* Free external resources, close files, etc. */
+ /* Close WARC file. */
+ if (opt.warc_filename != 0)
+ warc_close ();
+
+ log_close ();
+
if (output_stream)
- fclose (output_stream);
+ if (fclose (output_stream) == EOF)
+ inform_exit_status (CLOSEFAILED);
+
/* No need to check for error because Wget flushes its output (and
checks for errors) after any data arrives. */
res_cleanup ();
http_cleanup ();
cleanup_html_url ();
+ spider_cleanup ();
host_cleanup ();
log_cleanup ();
+ netrc_cleanup (netrc_list);
+
+ for (i = 0; i < nurl; i++)
+ xfree (url[i]);
- {
- extern acc_t *netrc_list;
- free_netrc (netrc_list);
- }
xfree_null (opt.choose_config);
xfree_null (opt.lfilename);
xfree_null (opt.dir_prefix);
xfree_null (opt.http_user);
xfree_null (opt.http_passwd);
free_vec (opt.user_headers);
+ free_vec (opt.warc_user_headers);
# ifdef HAVE_SSL
xfree_null (opt.cert_file);
xfree_null (opt.private_key);
xfree_null (opt.user);
xfree_null (opt.passwd);
xfree_null (opt.base_href);
+ xfree_null (opt.method);
#endif /* DEBUG_MALLOC */
}
#ifdef TESTING
const char *
-test_commands_sorted()
+test_commands_sorted(void)
{
- int prev_idx = 0, next_idx = 1;
- int command_count = countof (commands) - 1;
- int cmp = 0;
- while (next_idx <= command_count)
+ unsigned i;
+
+ for (i = 1; i < countof(commands); ++i)
{
- cmp = strcasecmp (commands[prev_idx].name, commands[next_idx].name);
- if (cmp > 0)
+ if (strcasecmp (commands[i - 1].name, commands[i].name) > 0)
{
mu_assert ("FAILED", false);
break;
}
- else
- {
- prev_idx ++;
- next_idx ++;
- }
}
return NULL;
}
const char *
-test_cmd_spec_restrict_file_names()
+test_cmd_spec_restrict_file_names(void)
{
- int i;
- struct {
- char *val;
+ unsigned i;
+ static const struct {
+ const char *val;
int expected_restrict_files_os;
int expected_restrict_files_ctrl;
int expected_restrict_files_case;
{ "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true },
};
- for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
+ for (i = 0; i < countof(test_array); ++i)
{
bool res;