X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Finit.c;h=fbef133cf9d295217b1a166b996ccb5cfaf67242;hp=eae355235486cf6db064f627f997aed0f7df2f62;hb=38a7829dcb4eb5dba28dbf0f05c6a80fea9217f8;hpb=8c7bd588fe94bdc12b62b38e286027acfedde751 diff --git a/src/init.c b/src/init.c index eae35523..fbef133c 100644 --- a/src/init.c +++ b/src/init.c @@ -1,6 +1,6 @@ /* Reading/parsing the initialization file. Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, - 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, + 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -30,6 +30,7 @@ shall include the source code for the parts of OpenSSL used as well as that of the covered work. */ #include "wget.h" +#include "exits.h" #include #include @@ -46,6 +47,10 @@ as that of the covered work. */ # endif #endif +#include +#ifdef HAVE_LIBPCRE +# include +#endif #ifdef HAVE_PWD_H # include @@ -62,6 +67,8 @@ as that of the covered work. */ #include "res.h" /* for res_cleanup */ #include "http.h" /* for http_cleanup */ #include "retr.h" /* for output_stream */ +#include "warc.h" /* for warc_close */ +#include "spider.h" /* for spider_cleanup */ #ifdef TESTING #include "test.h" @@ -81,6 +88,7 @@ CMD_DECLARE (cmd_directory_vector); CMD_DECLARE (cmd_number); CMD_DECLARE (cmd_number_inf); CMD_DECLARE (cmd_string); +CMD_DECLARE (cmd_string_uppercase); CMD_DECLARE (cmd_file); CMD_DECLARE (cmd_directory); CMD_DECLARE (cmd_time); @@ -88,12 +96,15 @@ CMD_DECLARE (cmd_vector); CMD_DECLARE (cmd_spec_dirstruct); CMD_DECLARE (cmd_spec_header); +CMD_DECLARE (cmd_spec_warc_header); CMD_DECLARE (cmd_spec_htmlify); CMD_DECLARE (cmd_spec_mirror); CMD_DECLARE (cmd_spec_prefer_family); CMD_DECLARE (cmd_spec_progress); CMD_DECLARE (cmd_spec_recursive); +CMD_DECLARE (cmd_spec_regex_type); CMD_DECLARE (cmd_spec_restrict_file_names); +CMD_DECLARE (cmd_spec_report_speed); #ifdef HAVE_SSL CMD_DECLARE (cmd_spec_secure_protocol); #endif @@ -115,6 +126,7 @@ static const struct { } commands[] = { /* KEEP THIS LIST ALPHABETICALLY SORTED */ { "accept", &opt.accepts, cmd_vector }, + { "acceptregex", &opt.acceptregex_s, cmd_string }, { "addhostdir", &opt.add_hostdir, cmd_boolean }, { "adjustextension", &opt.adjust_extension, cmd_boolean }, { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */ @@ -126,6 +138,8 @@ static const struct { { "backups", &opt.backups, cmd_number }, { "base", &opt.base_href, cmd_string }, { "bindaddress", &opt.bind_address, cmd_string }, + { "bodydata", &opt.body_data, cmd_string }, + { "bodyfile", &opt.body_file, cmd_string }, #ifdef HAVE_SSL { "cacertificate", &opt.ca_cert, cmd_file }, #endif @@ -144,10 +158,8 @@ static const struct { { "convertlinks", &opt.convert_links, cmd_boolean }, { "cookies", &opt.cookies, cmd_boolean }, { "cutdirs", &opt.cut_dirs, cmd_number }, -#ifdef ENABLE_DEBUG { "debug", &opt.debug, cmd_boolean }, -#endif - { "defaultpage", &opt.default_page, cmd_string}, + { "defaultpage", &opt.default_page, cmd_string }, { "deleteafter", &opt.delete_after, cmd_boolean }, { "dirprefix", &opt.dir_prefix, cmd_directory }, { "dirstruct", NULL, cmd_spec_dirstruct }, @@ -181,6 +193,9 @@ static const struct { { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */ { "httppassword", &opt.http_passwd, cmd_string }, { "httpproxy", &opt.http_proxy, cmd_string }, +#ifdef HAVE_SSL + { "httpsonly", &opt.https_only, cmd_boolean }, +#endif { "httpsproxy", &opt.https_proxy, cmd_string }, { "httpuser", &opt.http_user, cmd_string }, { "ignorecase", &opt.ignore_case, cmd_boolean }, @@ -200,9 +215,11 @@ static const struct { { "logfile", &opt.lfilename, cmd_file }, { "login", &opt.ftp_user, cmd_string },/* deprecated*/ { "maxredirect", &opt.max_redirect, cmd_number }, + { "method", &opt.method, cmd_string_uppercase }, { "mirror", NULL, cmd_spec_mirror }, { "netrc", &opt.netrc, cmd_boolean }, { "noclobber", &opt.noclobber, cmd_boolean }, + { "noconfig", &opt.noconfig, cmd_boolean }, { "noparent", &opt.no_parent, cmd_boolean }, { "noproxy", &opt.no_proxy, cmd_vector }, { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/ @@ -234,10 +251,13 @@ static const struct { { "reclevel", &opt.reclevel, cmd_number_inf }, { "recursive", NULL, cmd_spec_recursive }, { "referer", &opt.referer, cmd_string }, + { "regextype", &opt.regex_type, cmd_spec_regex_type }, { "reject", &opt.rejects, cmd_vector }, + { "rejectregex", &opt.rejectregex_s, cmd_string }, { "relativeonly", &opt.relative_only, cmd_boolean }, { "remoteencoding", &opt.encoding_remote, cmd_string }, { "removelisting", &opt.remove_listing, cmd_boolean }, + { "reportspeed", &opt.report_bps, cmd_spec_report_speed}, { "restrictfilenames", NULL, cmd_spec_restrict_file_names }, { "retrsymlinks", &opt.retr_symlinks, cmd_boolean }, { "retryconnrefused", &opt.retry_connrefused, cmd_boolean }, @@ -249,8 +269,10 @@ static const struct { #endif { "serverresponse", &opt.server_response, cmd_boolean }, { "showalldnsentries", &opt.show_all_dns_entries, cmd_boolean }, + { "showprogress", &opt.show_progress, cmd_boolean }, { "spanhosts", &opt.spanhost, cmd_boolean }, { "spider", &opt.spider, cmd_boolean }, + { "startpos", &opt.start_pos, cmd_bytes }, { "strictcomments", &opt.strict_comments, cmd_boolean }, { "timeout", NULL, cmd_spec_timeout }, { "timestamping", &opt.timestamping, cmd_boolean }, @@ -264,6 +286,17 @@ static const struct { { "verbose", NULL, cmd_spec_verbose }, { "wait", &opt.wait, cmd_time }, { "waitretry", &opt.waitretry, cmd_time }, + { "warccdx", &opt.warc_cdx_enabled, cmd_boolean }, + { "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file }, +#ifdef HAVE_LIBZ + { "warccompression", &opt.warc_compression_enabled, cmd_boolean }, +#endif + { "warcdigests", &opt.warc_digests_enabled, cmd_boolean }, + { "warcfile", &opt.warc_filename, cmd_file }, + { "warcheader", NULL, cmd_spec_warc_header }, + { "warckeeplog", &opt.warc_keep_log, cmd_boolean }, + { "warcmaxsize", &opt.warc_maxsize, cmd_bytes }, + { "warctempdir", &opt.warc_tempdir, cmd_directory }, #ifdef USE_WATT32 { "wdebug", &opt.wdebug, cmd_boolean }, #endif @@ -348,6 +381,8 @@ defaults (void) opt.restrict_files_nonascii = false; opt.restrict_files_case = restrict_no_case_restriction; + opt.regex_type = regex_type_posix; + opt.max_redirect = 20; opt.waitretry = 10; @@ -362,6 +397,22 @@ defaults (void) opt.useservertimestamps = true; opt.show_all_dns_entries = false; + + opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */ +#ifdef HAVE_LIBZ + opt.warc_compression_enabled = true; +#else + opt.warc_compression_enabled = false; +#endif + opt.warc_digests_enabled = true; + opt.warc_cdx_enabled = false; + opt.warc_cdx_dedup_filename = NULL; + opt.warc_tempdir = NULL; + opt.warc_keep_log = true; + + /* Use a negative value to mark the absence of --start-pos option */ + opt.start_pos = -1; + opt.show_progress = false; } /* Return the user's home directory (strdup-ed), or NULL if none is @@ -417,8 +468,7 @@ home_dir (void) } ret = home ? xstrdup (home) : NULL; - if (buf) - free (buf); + free (buf); return ret; } @@ -444,7 +494,7 @@ wgetrc_env_file_name (void) return NULL; } -/* Check for the existance of '$HOME/.wgetrc' and return it's path +/* Check for the existance of '$HOME/.wgetrc' and return its path if it exists and is set. */ char * wgetrc_user_file_name (void) @@ -532,7 +582,8 @@ bool run_wgetrc (const char *file) { FILE *fp; - char *line; + char *line = NULL; + size_t bufsize = 0; int ln; int errcnt = 0; @@ -544,7 +595,7 @@ run_wgetrc (const char *file) return true; /* not a fatal error */ } ln = 1; - while ((line = read_whole_line (fp)) != NULL) + while (getline (&line, &bufsize, fp) > 0) { char *com = NULL, *val = NULL; int comind; @@ -578,9 +629,9 @@ run_wgetrc (const char *file) } xfree_null (com); xfree_null (val); - xfree (line); ++ln; } + xfree (line); fclose (fp); return errcnt == 0; @@ -812,11 +863,11 @@ setoptval (const char *com, const char *val, const char *optname) This is used by the `--execute' flag in main.c. */ void -run_command (const char *opt) +run_command (const char *cmdopt) { char *com, *val; int comind; - switch (parse_line (opt, &com, &val, &comind)) + switch (parse_line (cmdopt, &com, &val, &comind)) { case line_ok: if (!setval_internal (comind, com, val)) @@ -826,7 +877,7 @@ run_command (const char *opt) break; default: fprintf (stderr, _("%s: Invalid --execute command %s\n"), - exec_name, quote (opt)); + exec_name, quote (cmdopt)); exit (2); } } @@ -909,7 +960,7 @@ cmd_number_inf (const char *com, const char *val, void *place) /* Copy (strdup) the string at COM to a new location and place a pointer to *PLACE. */ static bool -cmd_string (const char *com, const char *val, void *place) +cmd_string (const char *com _GL_UNUSED, const char *val, void *place) { char **pstring = (char **)place; @@ -918,12 +969,29 @@ cmd_string (const char *com, const char *val, void *place) return true; } +/* Like cmd_string but ensure the string is upper case. */ +static bool +cmd_string_uppercase (const char *com _GL_UNUSED, const char *val, void *place) +{ + char *q, **pstring; + pstring = (char **)place; + xfree_null (*pstring); + + *pstring = xmalloc (strlen (val) + 1); + + for (q = *pstring; *val; val++, q++) + *q = c_toupper (*val); + + *q = '\0'; + return true; +} + -/* Like the above, but handles tilde-expansion when reading a user's +/* Like cmd_string, but handles tilde-expansion when reading a user's `.wgetrc'. In that case, and if VAL begins with `~', the tilde gets expanded to the user's home directory. */ static bool -cmd_file (const char *com, const char *val, void *place) +cmd_file (const char *com _GL_UNUSED, const char *val, void *place) { char **pstring = (char **)place; @@ -970,7 +1038,7 @@ cmd_directory (const char *com, const char *val, void *place) PLACE vector is cleared instead. */ static bool -cmd_vector (const char *com, const char *val, void *place) +cmd_vector (const char *com _GL_UNUSED, const char *val, void *place) { char ***pvec = (char ***)place; @@ -985,7 +1053,7 @@ cmd_vector (const char *com, const char *val, void *place) } static bool -cmd_directory_vector (const char *com, const char *val, void *place) +cmd_directory_vector (const char *com _GL_UNUSED, const char *val, void *place) { char ***pvec = (char ***)place; @@ -1201,7 +1269,7 @@ cmd_cert_type (const char *com, const char *val, void *place) static bool check_user_specified_header (const char *); static bool -cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored) +cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored _GL_UNUSED) { if (!cmd_boolean (com, val, &opt.dirstruct)) return false; @@ -1215,7 +1283,7 @@ cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored) } static bool -cmd_spec_header (const char *com, const char *val, void *place_ignored) +cmd_spec_header (const char *com, const char *val, void *place_ignored _GL_UNUSED) { /* Empty value means reset the list of headers. */ if (*val == '\0') @@ -1236,7 +1304,28 @@ cmd_spec_header (const char *com, const char *val, void *place_ignored) } static bool -cmd_spec_htmlify (const char *com, const char *val, void *place_ignored) +cmd_spec_warc_header (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + /* Empty value means reset the list of headers. */ + if (*val == '\0') + { + free_vec (opt.warc_user_headers); + opt.warc_user_headers = NULL; + return true; + } + + if (!check_user_specified_header (val)) + { + fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"), + exec_name, com, quote (val)); + return false; + } + opt.warc_user_headers = vec_append (opt.warc_user_headers, val); + return true; +} + +static bool +cmd_spec_htmlify (const char *com, const char *val, void *place_ignored _GL_UNUSED) { int flag = cmd_boolean (com, val, &opt.htmlify); if (flag && !opt.htmlify) @@ -1248,7 +1337,7 @@ cmd_spec_htmlify (const char *com, const char *val, void *place_ignored) no limit on max. recursion depth, and don't remove listings. */ static bool -cmd_spec_mirror (const char *com, const char *val, void *place_ignored) +cmd_spec_mirror (const char *com, const char *val, void *place_ignored _GL_UNUSED) { int mirror; @@ -1270,7 +1359,7 @@ cmd_spec_mirror (const char *com, const char *val, void *place_ignored) "IPv4", "IPv6", and "none". */ static bool -cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored) +cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored _GL_UNUSED) { static const struct decode_item choices[] = { { "IPv4", prefer_ipv4 }, @@ -1289,7 +1378,7 @@ cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored) implementation before that. */ static bool -cmd_spec_progress (const char *com, const char *val, void *place_ignored) +cmd_spec_progress (const char *com, const char *val, void *place_ignored _GL_UNUSED) { if (!valid_progress_implementation_p (val)) { @@ -1310,7 +1399,7 @@ cmd_spec_progress (const char *com, const char *val, void *place_ignored) is specified. */ static bool -cmd_spec_recursive (const char *com, const char *val, void *place_ignored) +cmd_spec_recursive (const char *com, const char *val, void *place_ignored _GL_UNUSED) { if (!cmd_boolean (com, val, &opt.recursive)) return false; @@ -1322,8 +1411,27 @@ cmd_spec_recursive (const char *com, const char *val, void *place_ignored) return true; } +/* Validate --regex-type and set the choice. */ + +static bool +cmd_spec_regex_type (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + static const struct decode_item choices[] = { + { "posix", regex_type_posix }, +#ifdef HAVE_LIBPCRE + { "pcre", regex_type_pcre }, +#endif + }; + int regex_type = regex_type_posix; + int ok = decode_string (val, choices, countof (choices), ®ex_type); + if (!ok) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + opt.regex_type = regex_type; + return ok; +} + static bool -cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored) +cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored _GL_UNUSED) { int restrict_os = opt.restrict_files_os; int restrict_ctrl = opt.restrict_files_ctrl; @@ -1376,6 +1484,15 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno return true; } +static bool +cmd_spec_report_speed (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + opt.report_bps = strcasecmp (val, "bits") == 0; + if (!opt.report_bps) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + return opt.report_bps; +} + #ifdef HAVE_SSL static bool cmd_spec_secure_protocol (const char *com, const char *val, void *place) @@ -1385,6 +1502,7 @@ cmd_spec_secure_protocol (const char *com, const char *val, void *place) { "sslv2", secure_protocol_sslv2 }, { "sslv3", secure_protocol_sslv3 }, { "tlsv1", secure_protocol_tlsv1 }, + { "pfs", secure_protocol_pfs }, }; int ok = decode_string (val, choices, countof (choices), place); if (!ok) @@ -1396,7 +1514,7 @@ cmd_spec_secure_protocol (const char *com, const char *val, void *place) /* Set all three timeout values. */ static bool -cmd_spec_timeout (const char *com, const char *val, void *place_ignored) +cmd_spec_timeout (const char *com, const char *val, void *place_ignored _GL_UNUSED) { double value; if (!cmd_time (com, val, &value)) @@ -1408,7 +1526,7 @@ cmd_spec_timeout (const char *com, const char *val, void *place_ignored) } static bool -cmd_spec_useragent (const char *com, const char *val, void *place_ignored) +cmd_spec_useragent (const char *com, const char *val, void *place_ignored _GL_UNUSED) { /* Disallow embedded newlines. */ if (strchr (val, '\n')) @@ -1427,7 +1545,7 @@ cmd_spec_useragent (const char *com, const char *val, void *place_ignored) some random hackery for disallowing -q -v). */ static bool -cmd_spec_verbose (const char *com, const char *val, void *place_ignored) +cmd_spec_verbose (const char *com, const char *val, void *place_ignored _GL_UNUSED) { bool flag; if (cmd_boolean (com, val, &flag)) @@ -1580,18 +1698,22 @@ decode_string (const char *val, const struct decode_item *items, int itemcount, return false; } - -void cleanup_html_url (void); - - /* Free the memory allocated by global variables. */ void cleanup (void) { /* Free external resources, close files, etc. */ + /* Close WARC file. */ + if (opt.warc_filename != 0) + warc_close (); + + log_close (); + if (output_stream) - fclose (output_stream); + if (fclose (output_stream) == EOF) + inform_exit_status (CLOSEFAILED); + /* No need to check for error because Wget flushes its output (and checks for errors) after any data arrives. */ @@ -1608,13 +1730,14 @@ cleanup (void) res_cleanup (); http_cleanup (); cleanup_html_url (); + spider_cleanup (); host_cleanup (); log_cleanup (); + netrc_cleanup (netrc_list); + + for (i = 0; i < nurl; i++) + xfree (url[i]); - { - extern acc_t *netrc_list; - free_netrc (netrc_list); - } xfree_null (opt.choose_config); xfree_null (opt.lfilename); xfree_null (opt.dir_prefix); @@ -1639,6 +1762,7 @@ cleanup (void) xfree_null (opt.http_user); xfree_null (opt.http_passwd); free_vec (opt.user_headers); + free_vec (opt.warc_user_headers); # ifdef HAVE_SSL xfree_null (opt.cert_file); xfree_null (opt.private_key); @@ -1653,6 +1777,7 @@ cleanup (void) xfree_null (opt.user); xfree_null (opt.passwd); xfree_null (opt.base_href); + xfree_null (opt.method); #endif /* DEBUG_MALLOC */ } @@ -1662,34 +1787,27 @@ cleanup (void) #ifdef TESTING const char * -test_commands_sorted() +test_commands_sorted(void) { - int prev_idx = 0, next_idx = 1; - int command_count = countof (commands) - 1; - int cmp = 0; - while (next_idx <= command_count) + unsigned i; + + for (i = 1; i < countof(commands); ++i) { - cmp = strcasecmp (commands[prev_idx].name, commands[next_idx].name); - if (cmp > 0) + if (strcasecmp (commands[i - 1].name, commands[i].name) > 0) { mu_assert ("FAILED", false); break; } - else - { - prev_idx ++; - next_idx ++; - } } return NULL; } const char * -test_cmd_spec_restrict_file_names() +test_cmd_spec_restrict_file_names(void) { - int i; - struct { - char *val; + unsigned i; + static const struct { + const char *val; int expected_restrict_files_os; int expected_restrict_files_ctrl; int expected_restrict_files_case; @@ -1701,7 +1819,7 @@ test_cmd_spec_restrict_file_names() { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true }, }; - for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) + for (i = 0; i < countof(test_array); ++i) { bool res;