X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Finit.c;h=cafd456cd2befc18d89e33bbbdc58600a435eaef;hb=359dd167602071cfa62d6c586ca846ede5ed7c29;hp=d747906971c1246f9a59b753960cb5391584c4fe;hpb=25a3d032faa467a7dabbcb96d5e772202bd0b387;p=wget diff --git a/src/init.c b/src/init.c index d7479069..cafd456c 100644 --- a/src/init.c +++ b/src/init.c @@ -1,6 +1,7 @@ /* Reading/parsing the initialization file. - Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, + Inc. This file is part of GNU Wget. @@ -29,14 +30,27 @@ shall include the source code for the parts of OpenSSL used as well as that of the covered work. */ #include "wget.h" +#include "exits.h" #include #include -#ifdef HAVE_UNISTD_H -# include -#endif +#include +#include #include #include +#include +/* not all systems provide PATH_MAX in limits.h */ +#ifndef PATH_MAX +# include +# ifndef PATH_MAX +# define PATH_MAX MAXPATHLEN +# endif +#endif + +#include +#ifdef HAVE_LIBPCRE +# include +#endif #ifdef HAVE_PWD_H # include @@ -53,6 +67,7 @@ as that of the covered work. */ #include "res.h" /* for res_cleanup */ #include "http.h" /* for http_cleanup */ #include "retr.h" /* for output_stream */ +#include "warc.h" /* for warc_close */ #ifdef TESTING #include "test.h" @@ -79,12 +94,15 @@ CMD_DECLARE (cmd_vector); CMD_DECLARE (cmd_spec_dirstruct); CMD_DECLARE (cmd_spec_header); +CMD_DECLARE (cmd_spec_warc_header); CMD_DECLARE (cmd_spec_htmlify); CMD_DECLARE (cmd_spec_mirror); CMD_DECLARE (cmd_spec_prefer_family); CMD_DECLARE (cmd_spec_progress); CMD_DECLARE (cmd_spec_recursive); +CMD_DECLARE (cmd_spec_regex_type); CMD_DECLARE (cmd_spec_restrict_file_names); +CMD_DECLARE (cmd_spec_report_speed); #ifdef HAVE_SSL CMD_DECLARE (cmd_spec_secure_protocol); #endif @@ -106,7 +124,9 @@ static const struct { } commands[] = { /* KEEP THIS LIST ALPHABETICALLY SORTED */ { "accept", &opt.accepts, cmd_vector }, + { "acceptregex", &opt.acceptregex_s, cmd_string }, { "addhostdir", &opt.add_hostdir, cmd_boolean }, + { "adjustextension", &opt.adjust_extension, cmd_boolean }, { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */ { "askpassword", &opt.ask_passwd, cmd_boolean }, { "authnochallenge", &opt.auth_without_challenge, @@ -126,8 +146,10 @@ static const struct { { "certificatetype", &opt.cert_type, cmd_cert_type }, { "checkcertificate", &opt.check_cert, cmd_boolean }, #endif + { "chooseconfig", &opt.choose_config, cmd_file }, { "connecttimeout", &opt.connect_timeout, cmd_time }, { "contentdisposition", &opt.content_disposition, cmd_boolean }, + { "contentonerror", &opt.content_on_error, cmd_boolean }, { "continue", &opt.always_rest, cmd_boolean }, { "convertlinks", &opt.convert_links, cmd_boolean }, { "cookies", &opt.cookies, cmd_boolean }, @@ -163,7 +185,7 @@ static const struct { { "ftpuser", &opt.ftp_user, cmd_string }, { "glob", &opt.ftp_glob, cmd_boolean }, { "header", NULL, cmd_spec_header }, - { "htmlextension", &opt.html_extension, cmd_boolean }, + { "htmlextension", &opt.adjust_extension, cmd_boolean }, /* deprecated */ { "htmlify", NULL, cmd_spec_htmlify }, { "httpkeepalive", &opt.http_keep_alive, cmd_boolean }, { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */ @@ -184,7 +206,7 @@ static const struct { { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean }, { "limitrate", &opt.limit_rate, cmd_bytes }, { "loadcookies", &opt.cookies_input, cmd_file }, - { "locale", &opt.locale, cmd_string }, + { "localencoding", &opt.locale, cmd_string }, { "logfile", &opt.lfilename, cmd_file }, { "login", &opt.ftp_user, cmd_string },/* deprecated*/ { "maxredirect", &opt.max_redirect, cmd_number }, @@ -222,10 +244,13 @@ static const struct { { "reclevel", &opt.reclevel, cmd_number_inf }, { "recursive", NULL, cmd_spec_recursive }, { "referer", &opt.referer, cmd_string }, + { "regextype", &opt.regex_type, cmd_spec_regex_type }, { "reject", &opt.rejects, cmd_vector }, + { "rejectregex", &opt.rejectregex_s, cmd_string }, { "relativeonly", &opt.relative_only, cmd_boolean }, { "remoteencoding", &opt.encoding_remote, cmd_string }, { "removelisting", &opt.remove_listing, cmd_boolean }, + { "reportspeed", &opt.report_bps, cmd_spec_report_speed}, { "restrictfilenames", NULL, cmd_spec_restrict_file_names }, { "retrsymlinks", &opt.retr_symlinks, cmd_boolean }, { "retryconnrefused", &opt.retry_connrefused, cmd_boolean }, @@ -236,19 +261,34 @@ static const struct { { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol }, #endif { "serverresponse", &opt.server_response, cmd_boolean }, + { "showalldnsentries", &opt.show_all_dns_entries, cmd_boolean }, { "spanhosts", &opt.spanhost, cmd_boolean }, { "spider", &opt.spider, cmd_boolean }, { "strictcomments", &opt.strict_comments, cmd_boolean }, { "timeout", NULL, cmd_spec_timeout }, { "timestamping", &opt.timestamping, cmd_boolean }, { "tries", &opt.ntry, cmd_number_inf }, + { "trustservernames", &opt.trustservernames, cmd_boolean }, + { "unlink", &opt.unlink, cmd_boolean }, { "useproxy", &opt.use_proxy, cmd_boolean }, { "user", &opt.user, cmd_string }, { "useragent", NULL, cmd_spec_useragent }, + { "useservertimestamps", &opt.useservertimestamps, cmd_boolean }, { "verbose", NULL, cmd_spec_verbose }, { "wait", &opt.wait, cmd_time }, { "waitretry", &opt.waitretry, cmd_time }, -#ifdef MSDOS + { "warccdx", &opt.warc_cdx_enabled, cmd_boolean }, + { "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file }, +#ifdef HAVE_LIBZ + { "warccompression", &opt.warc_compression_enabled, cmd_boolean }, +#endif + { "warcdigests", &opt.warc_digests_enabled, cmd_boolean }, + { "warcfile", &opt.warc_filename, cmd_file }, + { "warcheader", NULL, cmd_spec_warc_header }, + { "warckeeplog", &opt.warc_keep_log, cmd_boolean }, + { "warcmaxsize", &opt.warc_maxsize, cmd_bytes }, + { "warctempdir", &opt.warc_tempdir, cmd_directory }, +#ifdef USE_WATT32 { "wdebug", &opt.wdebug, cmd_boolean }, #endif }; @@ -278,7 +318,7 @@ command_by_name (const char *cmdname) } /* Reset the variables to default values. */ -static void +void defaults (void) { char *tmp; @@ -329,8 +369,11 @@ defaults (void) opt.restrict_files_os = restrict_unix; #endif opt.restrict_files_ctrl = true; + opt.restrict_files_nonascii = false; opt.restrict_files_case = restrict_no_case_restriction; + opt.regex_type = regex_type_posix; + opt.max_redirect = 20; opt.waitretry = 10; @@ -342,6 +385,21 @@ defaults (void) #endif opt.locale = NULL; opt.encoding_remote = NULL; + + opt.useservertimestamps = true; + opt.show_all_dns_entries = false; + + opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */ +#ifdef HAVE_LIBZ + opt.warc_compression_enabled = true; +#else + opt.warc_compression_enabled = false; +#endif + opt.warc_digests_enabled = true; + opt.warc_cdx_enabled = false; + opt.warc_cdx_dedup_filename = NULL; + opt.warc_tempdir = NULL; + opt.warc_keep_log = true; } /* Return the user's home directory (strdup-ed), or NULL if none is @@ -349,8 +407,8 @@ defaults (void) char * home_dir (void) { - static char buf[PATH_MAX]; - static char *home; + static char *buf = NULL; + static char *home, *ret; if (!home) { @@ -358,17 +416,28 @@ home_dir (void) if (!home) { #if defined(MSDOS) + int len; + /* Under MSDOS, if $HOME isn't defined, use the directory where `wget.exe' resides. */ const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */ char *p; - strcpy (buf, _w32_get_argv0 ()); + buff = _w32_get_argv0 (); + p = strrchr (buf, '/'); /* djgpp */ if (!p) p = strrchr (buf, '\\'); /* others */ assert (p); - *p = '\0'; + + len = p - buff + 1; + buff = malloc (len + 1); + if (buff == NULL) + return NULL; + + strncpy (buff, _w32_get_argv0 (), len); + buff[len] = '\0'; + home = buf; #elif !defined(WINDOWS) /* If HOME is not defined, try getting it from the password @@ -376,8 +445,7 @@ home_dir (void) struct passwd *pwd = getpwuid (getuid ()); if (!pwd || !pwd->pw_dir) return NULL; - strcpy (buf, pwd->pw_dir); - home = buf; + home = pwd->pw_dir; #else /* !WINDOWS */ /* Under Windows, if $HOME isn't defined, use the directory where `wget.exe' resides. */ @@ -386,15 +454,19 @@ home_dir (void) } } - return home ? xstrdup (home) : NULL; + ret = home ? xstrdup (home) : NULL; + if (buf) + free (buf); + + return ret; } -/* Check the 'WGETRC' environment variable and return the file name - if 'WGETRC' is set and is a valid file. +/* Check the 'WGETRC' environment variable and return the file name + if 'WGETRC' is set and is a valid file. If the `WGETRC' variable exists but the file does not exist, the function will exit(). */ char * -wgetrc_env_file_name (void) +wgetrc_env_file_name (void) { char *env = getenv ("WGETRC"); if (env && *env) @@ -410,12 +482,12 @@ wgetrc_env_file_name (void) return NULL; } -/* Check for the existance of '$HOME/.wgetrc' and return it's path +/* Check for the existance of '$HOME/.wgetrc' and return its path if it exists and is set. */ char * -wgetrc_user_file_name (void) +wgetrc_user_file_name (void) { - char *home = home_dir (); + char *home; char *file = NULL; /* If that failed, try $HOME/.wgetrc (or equivalent). */ @@ -441,7 +513,7 @@ wgetrc_user_file_name (void) /* Return the path to the user's .wgetrc. This is either the value of `WGETRC' environment variable, or `$HOME/.wgetrc'. - Additionally, for windows, look in the directory where wget.exe + Additionally, for windows, look in the directory where wget.exe resides. */ char * wgetrc_file_name (void) @@ -449,7 +521,7 @@ wgetrc_file_name (void) char *file = wgetrc_env_file_name (); if (file && *file) return file; - + file = wgetrc_user_file_name (); #ifdef WINDOWS @@ -494,7 +566,7 @@ static bool setval_internal_tilde (int, const char *, const char *); /* Initialize variables from a wgetrc file. Returns zero (failure) if there were errors in the file. */ -static bool +bool run_wgetrc (const char *file) { FILE *fp; @@ -558,20 +630,39 @@ void initialize (void) { char *file, *env_sysrc; - int ok = true; + bool ok = true; - /* Load the hard-coded defaults. */ - defaults (); - - /* Run a non-standard system rc file when the according environment + /* Run a non-standard system rc file when the according environment variable has been set. For internal testing purposes only! */ env_sysrc = getenv ("SYSTEM_WGETRC"); if (env_sysrc && file_exists_p (env_sysrc)) - ok &= run_wgetrc (env_sysrc); + { + ok &= run_wgetrc (env_sysrc); + /* If there are any problems parsing the system wgetrc file, tell + the user and exit */ + if (! ok) + { + fprintf (stderr, _("\ +Parsing system wgetrc file (env SYSTEM_WGETRC) failed. Please check\n\ +'%s',\n\ +or specify a different file using --config.\n"), env_sysrc); + exit (2); + } + } /* Otherwise, if SYSTEM_WGETRC is defined, use it. */ #ifdef SYSTEM_WGETRC else if (file_exists_p (SYSTEM_WGETRC)) ok &= run_wgetrc (SYSTEM_WGETRC); + /* If there are any problems parsing the system wgetrc file, tell + the user and exit */ + if (! ok) + { + fprintf (stderr, _("\ +Parsing system wgetrc file failed. Please check\n\ +'%s',\n\ +or specify a different file using --config.\n"), SYSTEM_WGETRC); + exit (2); + } #endif /* Override it with your own, if one exists. */ file = wgetrc_file_name (); @@ -1182,6 +1273,27 @@ cmd_spec_header (const char *com, const char *val, void *place_ignored) return true; } +static bool +cmd_spec_warc_header (const char *com, const char *val, void *place_ignored) +{ + /* Empty value means reset the list of headers. */ + if (*val == '\0') + { + free_vec (opt.warc_user_headers); + opt.warc_user_headers = NULL; + return true; + } + + if (!check_user_specified_header (val)) + { + fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"), + exec_name, com, quote (val)); + return false; + } + opt.warc_user_headers = vec_append (opt.warc_user_headers, val); + return true; +} + static bool cmd_spec_htmlify (const char *com, const char *val, void *place_ignored) { @@ -1269,12 +1381,32 @@ cmd_spec_recursive (const char *com, const char *val, void *place_ignored) return true; } +/* Validate --regex-type and set the choice. */ + +static bool +cmd_spec_regex_type (const char *com, const char *val, void *place_ignored) +{ + static const struct decode_item choices[] = { + { "posix", regex_type_posix }, +#ifdef HAVE_LIBPCRE + { "pcre", regex_type_pcre }, +#endif + }; + int regex_type = regex_type_posix; + int ok = decode_string (val, choices, countof (choices), ®ex_type); + if (!ok) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + opt.regex_type = regex_type; + return ok; +} + static bool cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored) { int restrict_os = opt.restrict_files_os; int restrict_ctrl = opt.restrict_files_ctrl; int restrict_case = opt.restrict_files_case; + int restrict_nonascii = opt.restrict_files_nonascii; const char *end; @@ -1285,7 +1417,7 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno end = strchr (val, ','); if (!end) end = val + strlen (val); - + if (VAL_IS ("unix")) restrict_os = restrict_unix; else if (VAL_IS ("windows")) @@ -1296,15 +1428,18 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno restrict_case = restrict_uppercase; else if (VAL_IS ("nocontrol")) restrict_ctrl = false; + else if (VAL_IS ("ascii")) + restrict_nonascii = true; else { - fprintf (stderr, - _("%s: %s: Invalid restriction %s, use [unix|windows],[lowercase|uppercase],[nocontrol].\n"), + fprintf (stderr, _("\ +%s: %s: Invalid restriction %s,\n\ + use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"), exec_name, com, quote (val)); return false; } - if (*end) + if (*end) val = end + 1; } while (*val && *end); @@ -1314,10 +1449,20 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno opt.restrict_files_os = restrict_os; opt.restrict_files_ctrl = restrict_ctrl; opt.restrict_files_case = restrict_case; - + opt.restrict_files_nonascii = restrict_nonascii; + return true; } +static bool +cmd_spec_report_speed (const char *com, const char *val, void *place_ignored) +{ + opt.report_bps = strcasecmp (val, "bits") == 0; + if (!opt.report_bps) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + return opt.report_bps; +} + #ifdef HAVE_SSL static bool cmd_spec_secure_protocol (const char *com, const char *val, void *place) @@ -1524,6 +1669,7 @@ decode_string (const char *val, const struct decode_item *items, int itemcount, void cleanup_html_url (void); +void spider_cleanup (void); /* Free the memory allocated by global variables. */ @@ -1532,8 +1678,16 @@ cleanup (void) { /* Free external resources, close files, etc. */ + /* Close WARC file. */ + if (opt.warc_filename != 0) + warc_close (); + + log_close (); + if (output_stream) - fclose (output_stream); + if (fclose (output_stream) == EOF) + inform_exit_status (CLOSEFAILED); + /* No need to check for error because Wget flushes its output (and checks for errors) after any data arrives. */ @@ -1550,13 +1704,18 @@ cleanup (void) res_cleanup (); http_cleanup (); cleanup_html_url (); + spider_cleanup (); host_cleanup (); log_cleanup (); + for (i = 0; i < nurl; i++) + xfree (url[i]); + { extern acc_t *netrc_list; free_netrc (netrc_list); } + xfree_null (opt.choose_config); xfree_null (opt.lfilename); xfree_null (opt.dir_prefix); xfree_null (opt.input_filename); @@ -1580,6 +1739,7 @@ cleanup (void) xfree_null (opt.http_user); xfree_null (opt.http_passwd); free_vec (opt.user_headers); + free_vec (opt.warc_user_headers); # ifdef HAVE_SSL xfree_null (opt.cert_file); xfree_null (opt.private_key); @@ -1594,7 +1754,7 @@ cleanup (void) xfree_null (opt.user); xfree_null (opt.passwd); xfree_null (opt.base_href); - + #endif /* DEBUG_MALLOC */ } @@ -1615,9 +1775,9 @@ test_commands_sorted() { mu_assert ("FAILED", false); break; - } + } else - { + { prev_idx ++; next_idx ++; } @@ -1641,11 +1801,11 @@ test_cmd_spec_restrict_file_names() { "windows,lowercase", restrict_windows, true, restrict_lowercase, true }, { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true }, }; - - for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) + + for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) { bool res; - + defaults(); res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL); @@ -1655,10 +1815,10 @@ test_cmd_spec_restrict_file_names() fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr); fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr); */ - mu_assert ("test_cmd_spec_restrict_file_names: wrong result", + mu_assert ("test_cmd_spec_restrict_file_names: wrong result", res == test_array[i].result - && opt.restrict_files_os == test_array[i].expected_restrict_files_os - && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl + && opt.restrict_files_os == test_array[i].expected_restrict_files_os + && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl && opt.restrict_files_case == test_array[i].expected_restrict_files_case); }