X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Finit.c;h=033da4f7634159396cc18a4632b32ead857c9a80;hp=7708e275461d752b72f28d9449f7a82a200df092;hb=42c78fdd71c311cf96210b709ec0a18ef45ef87f;hpb=f3e634a8b280d1da57e7403bd275fe2b075f7676 diff --git a/src/init.c b/src/init.c index 7708e275..033da4f7 100644 --- a/src/init.c +++ b/src/init.c @@ -1,6 +1,7 @@ /* Reading/parsing the initialization file. - Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, + Inc. This file is part of GNU Wget. @@ -29,14 +30,27 @@ shall include the source code for the parts of OpenSSL used as well as that of the covered work. */ #include "wget.h" +#include "exits.h" #include #include -#ifdef HAVE_UNISTD_H -# include -#endif +#include +#include #include #include +#include +/* not all systems provide PATH_MAX in limits.h */ +#ifndef PATH_MAX +# include +# ifndef PATH_MAX +# define PATH_MAX MAXPATHLEN +# endif +#endif + +#include +#ifdef HAVE_LIBPCRE +# include +#endif #ifdef HAVE_PWD_H # include @@ -53,6 +67,7 @@ as that of the covered work. */ #include "res.h" /* for res_cleanup */ #include "http.h" /* for http_cleanup */ #include "retr.h" /* for output_stream */ +#include "warc.h" /* for warc_close */ #ifdef TESTING #include "test.h" @@ -72,6 +87,7 @@ CMD_DECLARE (cmd_directory_vector); CMD_DECLARE (cmd_number); CMD_DECLARE (cmd_number_inf); CMD_DECLARE (cmd_string); +CMD_DECLARE (cmd_string_uppercase); CMD_DECLARE (cmd_file); CMD_DECLARE (cmd_directory); CMD_DECLARE (cmd_time); @@ -79,12 +95,15 @@ CMD_DECLARE (cmd_vector); CMD_DECLARE (cmd_spec_dirstruct); CMD_DECLARE (cmd_spec_header); +CMD_DECLARE (cmd_spec_warc_header); CMD_DECLARE (cmd_spec_htmlify); CMD_DECLARE (cmd_spec_mirror); CMD_DECLARE (cmd_spec_prefer_family); CMD_DECLARE (cmd_spec_progress); CMD_DECLARE (cmd_spec_recursive); +CMD_DECLARE (cmd_spec_regex_type); CMD_DECLARE (cmd_spec_restrict_file_names); +CMD_DECLARE (cmd_spec_report_speed); #ifdef HAVE_SSL CMD_DECLARE (cmd_spec_secure_protocol); #endif @@ -106,6 +125,7 @@ static const struct { } commands[] = { /* KEEP THIS LIST ALPHABETICALLY SORTED */ { "accept", &opt.accepts, cmd_vector }, + { "acceptregex", &opt.acceptregex_s, cmd_string }, { "addhostdir", &opt.add_hostdir, cmd_boolean }, { "adjustextension", &opt.adjust_extension, cmd_boolean }, { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */ @@ -117,6 +137,8 @@ static const struct { { "backups", &opt.backups, cmd_number }, { "base", &opt.base_href, cmd_string }, { "bindaddress", &opt.bind_address, cmd_string }, + { "bodydata", &opt.body_data, cmd_string }, + { "bodyfile", &opt.body_file, cmd_string }, #ifdef HAVE_SSL { "cacertificate", &opt.ca_cert, cmd_file }, #endif @@ -127,8 +149,10 @@ static const struct { { "certificatetype", &opt.cert_type, cmd_cert_type }, { "checkcertificate", &opt.check_cert, cmd_boolean }, #endif + { "chooseconfig", &opt.choose_config, cmd_file }, { "connecttimeout", &opt.connect_timeout, cmd_time }, { "contentdisposition", &opt.content_disposition, cmd_boolean }, + { "contentonerror", &opt.content_on_error, cmd_boolean }, { "continue", &opt.always_rest, cmd_boolean }, { "convertlinks", &opt.convert_links, cmd_boolean }, { "cookies", &opt.cookies, cmd_boolean }, @@ -136,7 +160,7 @@ static const struct { #ifdef ENABLE_DEBUG { "debug", &opt.debug, cmd_boolean }, #endif - { "defaultpage", &opt.default_page, cmd_string}, + { "defaultpage", &opt.default_page, cmd_string }, { "deleteafter", &opt.delete_after, cmd_boolean }, { "dirprefix", &opt.dir_prefix, cmd_directory }, { "dirstruct", NULL, cmd_spec_dirstruct }, @@ -164,12 +188,15 @@ static const struct { { "ftpuser", &opt.ftp_user, cmd_string }, { "glob", &opt.ftp_glob, cmd_boolean }, { "header", NULL, cmd_spec_header }, - { "htmlextension", &opt.adjust_extension, cmd_boolean }, + { "htmlextension", &opt.adjust_extension, cmd_boolean }, /* deprecated */ { "htmlify", NULL, cmd_spec_htmlify }, { "httpkeepalive", &opt.http_keep_alive, cmd_boolean }, { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */ { "httppassword", &opt.http_passwd, cmd_string }, { "httpproxy", &opt.http_proxy, cmd_string }, +#ifdef HAVE_SSL + { "httpsonly", &opt.https_only, cmd_boolean }, +#endif { "httpsproxy", &opt.https_proxy, cmd_string }, { "httpuser", &opt.http_user, cmd_string }, { "ignorecase", &opt.ignore_case, cmd_boolean }, @@ -189,6 +216,7 @@ static const struct { { "logfile", &opt.lfilename, cmd_file }, { "login", &opt.ftp_user, cmd_string },/* deprecated*/ { "maxredirect", &opt.max_redirect, cmd_number }, + { "method", &opt.method, cmd_string_uppercase }, { "mirror", NULL, cmd_spec_mirror }, { "netrc", &opt.netrc, cmd_boolean }, { "noclobber", &opt.noclobber, cmd_boolean }, @@ -203,7 +231,7 @@ static const struct { { "postdata", &opt.post_data, cmd_string }, { "postfile", &opt.post_file_name, cmd_file }, { "preferfamily", NULL, cmd_spec_prefer_family }, - { "preservepermissions", &opt.preserve_perm, cmd_boolean },/* deprecated */ + { "preservepermissions", &opt.preserve_perm, cmd_boolean }, #ifdef HAVE_SSL { "privatekey", &opt.private_key, cmd_file }, { "privatekeytype", &opt.private_key_type, cmd_cert_type }, @@ -223,10 +251,13 @@ static const struct { { "reclevel", &opt.reclevel, cmd_number_inf }, { "recursive", NULL, cmd_spec_recursive }, { "referer", &opt.referer, cmd_string }, + { "regextype", &opt.regex_type, cmd_spec_regex_type }, { "reject", &opt.rejects, cmd_vector }, + { "rejectregex", &opt.rejectregex_s, cmd_string }, { "relativeonly", &opt.relative_only, cmd_boolean }, { "remoteencoding", &opt.encoding_remote, cmd_string }, { "removelisting", &opt.remove_listing, cmd_boolean }, + { "reportspeed", &opt.report_bps, cmd_spec_report_speed}, { "restrictfilenames", NULL, cmd_spec_restrict_file_names }, { "retrsymlinks", &opt.retr_symlinks, cmd_boolean }, { "retryconnrefused", &opt.retry_connrefused, cmd_boolean }, @@ -237,18 +268,33 @@ static const struct { { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol }, #endif { "serverresponse", &opt.server_response, cmd_boolean }, + { "showalldnsentries", &opt.show_all_dns_entries, cmd_boolean }, { "spanhosts", &opt.spanhost, cmd_boolean }, { "spider", &opt.spider, cmd_boolean }, { "strictcomments", &opt.strict_comments, cmd_boolean }, { "timeout", NULL, cmd_spec_timeout }, { "timestamping", &opt.timestamping, cmd_boolean }, { "tries", &opt.ntry, cmd_number_inf }, + { "trustservernames", &opt.trustservernames, cmd_boolean }, + { "unlink", &opt.unlink, cmd_boolean }, { "useproxy", &opt.use_proxy, cmd_boolean }, { "user", &opt.user, cmd_string }, { "useragent", NULL, cmd_spec_useragent }, + { "useservertimestamps", &opt.useservertimestamps, cmd_boolean }, { "verbose", NULL, cmd_spec_verbose }, { "wait", &opt.wait, cmd_time }, { "waitretry", &opt.waitretry, cmd_time }, + { "warccdx", &opt.warc_cdx_enabled, cmd_boolean }, + { "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file }, +#ifdef HAVE_LIBZ + { "warccompression", &opt.warc_compression_enabled, cmd_boolean }, +#endif + { "warcdigests", &opt.warc_digests_enabled, cmd_boolean }, + { "warcfile", &opt.warc_filename, cmd_file }, + { "warcheader", NULL, cmd_spec_warc_header }, + { "warckeeplog", &opt.warc_keep_log, cmd_boolean }, + { "warcmaxsize", &opt.warc_maxsize, cmd_bytes }, + { "warctempdir", &opt.warc_tempdir, cmd_directory }, #ifdef USE_WATT32 { "wdebug", &opt.wdebug, cmd_boolean }, #endif @@ -279,7 +325,7 @@ command_by_name (const char *cmdname) } /* Reset the variables to default values. */ -static void +void defaults (void) { char *tmp; @@ -333,6 +379,8 @@ defaults (void) opt.restrict_files_nonascii = false; opt.restrict_files_case = restrict_no_case_restriction; + opt.regex_type = regex_type_posix; + opt.max_redirect = 20; opt.waitretry = 10; @@ -344,6 +392,21 @@ defaults (void) #endif opt.locale = NULL; opt.encoding_remote = NULL; + + opt.useservertimestamps = true; + opt.show_all_dns_entries = false; + + opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */ +#ifdef HAVE_LIBZ + opt.warc_compression_enabled = true; +#else + opt.warc_compression_enabled = false; +#endif + opt.warc_digests_enabled = true; + opt.warc_cdx_enabled = false; + opt.warc_cdx_dedup_filename = NULL; + opt.warc_tempdir = NULL; + opt.warc_keep_log = true; } /* Return the user's home directory (strdup-ed), or NULL if none is @@ -351,8 +414,8 @@ defaults (void) char * home_dir (void) { - static char buf[PATH_MAX]; - static char *home; + static char *buf = NULL; + static char *home, *ret; if (!home) { @@ -360,17 +423,28 @@ home_dir (void) if (!home) { #if defined(MSDOS) + int len; + /* Under MSDOS, if $HOME isn't defined, use the directory where `wget.exe' resides. */ const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */ char *p; - strcpy (buf, _w32_get_argv0 ()); + buff = _w32_get_argv0 (); + p = strrchr (buf, '/'); /* djgpp */ if (!p) p = strrchr (buf, '\\'); /* others */ assert (p); - *p = '\0'; + + len = p - buff + 1; + buff = malloc (len + 1); + if (buff == NULL) + return NULL; + + strncpy (buff, _w32_get_argv0 (), len); + buff[len] = '\0'; + home = buf; #elif !defined(WINDOWS) /* If HOME is not defined, try getting it from the password @@ -378,8 +452,7 @@ home_dir (void) struct passwd *pwd = getpwuid (getuid ()); if (!pwd || !pwd->pw_dir) return NULL; - strcpy (buf, pwd->pw_dir); - home = buf; + home = pwd->pw_dir; #else /* !WINDOWS */ /* Under Windows, if $HOME isn't defined, use the directory where `wget.exe' resides. */ @@ -388,15 +461,19 @@ home_dir (void) } } - return home ? xstrdup (home) : NULL; + ret = home ? xstrdup (home) : NULL; + if (buf) + free (buf); + + return ret; } -/* Check the 'WGETRC' environment variable and return the file name - if 'WGETRC' is set and is a valid file. +/* Check the 'WGETRC' environment variable and return the file name + if 'WGETRC' is set and is a valid file. If the `WGETRC' variable exists but the file does not exist, the function will exit(). */ char * -wgetrc_env_file_name (void) +wgetrc_env_file_name (void) { char *env = getenv ("WGETRC"); if (env && *env) @@ -412,12 +489,12 @@ wgetrc_env_file_name (void) return NULL; } -/* Check for the existance of '$HOME/.wgetrc' and return it's path +/* Check for the existance of '$HOME/.wgetrc' and return its path if it exists and is set. */ char * -wgetrc_user_file_name (void) +wgetrc_user_file_name (void) { - char *home = home_dir (); + char *home; char *file = NULL; /* If that failed, try $HOME/.wgetrc (or equivalent). */ @@ -443,7 +520,7 @@ wgetrc_user_file_name (void) /* Return the path to the user's .wgetrc. This is either the value of `WGETRC' environment variable, or `$HOME/.wgetrc'. - Additionally, for windows, look in the directory where wget.exe + Additionally, for windows, look in the directory where wget.exe resides. */ char * wgetrc_file_name (void) @@ -451,7 +528,7 @@ wgetrc_file_name (void) char *file = wgetrc_env_file_name (); if (file && *file) return file; - + file = wgetrc_user_file_name (); #ifdef WINDOWS @@ -496,11 +573,12 @@ static bool setval_internal_tilde (int, const char *, const char *); /* Initialize variables from a wgetrc file. Returns zero (failure) if there were errors in the file. */ -static bool +bool run_wgetrc (const char *file) { FILE *fp; - char *line; + char *line = NULL; + size_t bufsize = 0; int ln; int errcnt = 0; @@ -512,7 +590,7 @@ run_wgetrc (const char *file) return true; /* not a fatal error */ } ln = 1; - while ((line = read_whole_line (fp)) != NULL) + while (getline (&line, &bufsize, fp) > 0) { char *com = NULL, *val = NULL; int comind; @@ -546,9 +624,9 @@ run_wgetrc (const char *file) } xfree_null (com); xfree_null (val); - xfree (line); ++ln; } + xfree (line); fclose (fp); return errcnt == 0; @@ -560,20 +638,39 @@ void initialize (void) { char *file, *env_sysrc; - int ok = true; + bool ok = true; - /* Load the hard-coded defaults. */ - defaults (); - - /* Run a non-standard system rc file when the according environment + /* Run a non-standard system rc file when the according environment variable has been set. For internal testing purposes only! */ env_sysrc = getenv ("SYSTEM_WGETRC"); if (env_sysrc && file_exists_p (env_sysrc)) - ok &= run_wgetrc (env_sysrc); + { + ok &= run_wgetrc (env_sysrc); + /* If there are any problems parsing the system wgetrc file, tell + the user and exit */ + if (! ok) + { + fprintf (stderr, _("\ +Parsing system wgetrc file (env SYSTEM_WGETRC) failed. Please check\n\ +'%s',\n\ +or specify a different file using --config.\n"), env_sysrc); + exit (2); + } + } /* Otherwise, if SYSTEM_WGETRC is defined, use it. */ #ifdef SYSTEM_WGETRC else if (file_exists_p (SYSTEM_WGETRC)) ok &= run_wgetrc (SYSTEM_WGETRC); + /* If there are any problems parsing the system wgetrc file, tell + the user and exit */ + if (! ok) + { + fprintf (stderr, _("\ +Parsing system wgetrc file failed. Please check\n\ +'%s',\n\ +or specify a different file using --config.\n"), SYSTEM_WGETRC); + exit (2); + } #endif /* Override it with your own, if one exists. */ file = wgetrc_file_name (); @@ -867,8 +964,25 @@ cmd_string (const char *com, const char *val, void *place) return true; } +/* Like cmd_string but ensure the string is upper case. */ +static bool +cmd_string_uppercase (const char *com, const char *val, void *place) +{ + char *q, **pstring; + pstring = (char **)place; + xfree_null (*pstring); + + *pstring = xmalloc (strlen (val) + 1); + + for (q = *pstring; *val; val++, q++) + *q = c_toupper (*val); + + *q = '\0'; + return true; +} -/* Like the above, but handles tilde-expansion when reading a user's + +/* Like cmd_string, but handles tilde-expansion when reading a user's `.wgetrc'. In that case, and if VAL begins with `~', the tilde gets expanded to the user's home directory. */ static bool @@ -1184,6 +1298,27 @@ cmd_spec_header (const char *com, const char *val, void *place_ignored) return true; } +static bool +cmd_spec_warc_header (const char *com, const char *val, void *place_ignored) +{ + /* Empty value means reset the list of headers. */ + if (*val == '\0') + { + free_vec (opt.warc_user_headers); + opt.warc_user_headers = NULL; + return true; + } + + if (!check_user_specified_header (val)) + { + fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"), + exec_name, com, quote (val)); + return false; + } + opt.warc_user_headers = vec_append (opt.warc_user_headers, val); + return true; +} + static bool cmd_spec_htmlify (const char *com, const char *val, void *place_ignored) { @@ -1271,6 +1406,25 @@ cmd_spec_recursive (const char *com, const char *val, void *place_ignored) return true; } +/* Validate --regex-type and set the choice. */ + +static bool +cmd_spec_regex_type (const char *com, const char *val, void *place_ignored) +{ + static const struct decode_item choices[] = { + { "posix", regex_type_posix }, +#ifdef HAVE_LIBPCRE + { "pcre", regex_type_pcre }, +#endif + }; + int regex_type = regex_type_posix; + int ok = decode_string (val, choices, countof (choices), ®ex_type); + if (!ok) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + opt.regex_type = regex_type; + return ok; +} + static bool cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored) { @@ -1310,7 +1464,7 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno return false; } - if (*end) + if (*end) val = end + 1; } while (*val && *end); @@ -1321,10 +1475,19 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno opt.restrict_files_ctrl = restrict_ctrl; opt.restrict_files_case = restrict_case; opt.restrict_files_nonascii = restrict_nonascii; - + return true; } +static bool +cmd_spec_report_speed (const char *com, const char *val, void *place_ignored) +{ + opt.report_bps = strcasecmp (val, "bits") == 0; + if (!opt.report_bps) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + return opt.report_bps; +} + #ifdef HAVE_SSL static bool cmd_spec_secure_protocol (const char *com, const char *val, void *place) @@ -1531,6 +1694,7 @@ decode_string (const char *val, const struct decode_item *items, int itemcount, void cleanup_html_url (void); +void spider_cleanup (void); /* Free the memory allocated by global variables. */ @@ -1539,8 +1703,16 @@ cleanup (void) { /* Free external resources, close files, etc. */ + /* Close WARC file. */ + if (opt.warc_filename != 0) + warc_close (); + + log_close (); + if (output_stream) - fclose (output_stream); + if (fclose (output_stream) == EOF) + inform_exit_status (CLOSEFAILED); + /* No need to check for error because Wget flushes its output (and checks for errors) after any data arrives. */ @@ -1557,13 +1729,18 @@ cleanup (void) res_cleanup (); http_cleanup (); cleanup_html_url (); + spider_cleanup (); host_cleanup (); log_cleanup (); + for (i = 0; i < nurl; i++) + xfree (url[i]); + { extern acc_t *netrc_list; free_netrc (netrc_list); } + xfree_null (opt.choose_config); xfree_null (opt.lfilename); xfree_null (opt.dir_prefix); xfree_null (opt.input_filename); @@ -1587,6 +1764,7 @@ cleanup (void) xfree_null (opt.http_user); xfree_null (opt.http_passwd); free_vec (opt.user_headers); + free_vec (opt.warc_user_headers); # ifdef HAVE_SSL xfree_null (opt.cert_file); xfree_null (opt.private_key); @@ -1601,7 +1779,8 @@ cleanup (void) xfree_null (opt.user); xfree_null (opt.passwd); xfree_null (opt.base_href); - + xfree_null (opt.method); + #endif /* DEBUG_MALLOC */ } @@ -1622,9 +1801,9 @@ test_commands_sorted() { mu_assert ("FAILED", false); break; - } + } else - { + { prev_idx ++; next_idx ++; } @@ -1648,11 +1827,11 @@ test_cmd_spec_restrict_file_names() { "windows,lowercase", restrict_windows, true, restrict_lowercase, true }, { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true }, }; - - for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) + + for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) { bool res; - + defaults(); res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL); @@ -1662,10 +1841,10 @@ test_cmd_spec_restrict_file_names() fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr); fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr); */ - mu_assert ("test_cmd_spec_restrict_file_names: wrong result", + mu_assert ("test_cmd_spec_restrict_file_names: wrong result", res == test_array[i].result - && opt.restrict_files_os == test_array[i].expected_restrict_files_os - && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl + && opt.restrict_files_os == test_array[i].expected_restrict_files_os + && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl && opt.restrict_files_case == test_array[i].expected_restrict_files_case); }