X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Finit.c;h=8000d73cc50919abd7617f021bc49e1d276ef971;hp=bce2427aaf71cd8e4fe17565607fda5d33bdd1c7;hb=38a7829dcb4eb5dba28dbf0f05c6a80fea9217f8;hpb=0a3697ad652df74ffeec8a97e1d23c343d8ef391 diff --git a/src/init.c b/src/init.c index bce2427a..fbef133c 100644 --- a/src/init.c +++ b/src/init.c @@ -1,12 +1,13 @@ /* Reading/parsing the initialization file. - Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 - Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, + Inc. This file is part of GNU Wget. GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GNU Wget is distributed in the hope that it will be useful, @@ -15,80 +16,79 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Wget; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +along with Wget. If not, see . -In addition, as a special exception, the Free Software Foundation -gives permission to link the code of its release of Wget with the -OpenSSL project's "OpenSSL" library (or with modified versions of it -that use the same license as the "OpenSSL" library), and distribute -the linked executables. You must obey the GNU General Public License -in all respects for all of the code used other than "OpenSSL". If you -modify this file, you may extend this exception to your version of the -file, but you are not obligated to do so. If you do not wish to do -so, delete this exception statement from your version. */ +Additional permission under GNU GPL version 3 section 7 -#include +If you modify this program, or any covered work, by linking or +combining it with the OpenSSL project's OpenSSL library (or a +modified version of that library), containing parts covered by the +terms of the OpenSSL or SSLeay licenses, the Free Software Foundation +grants you additional permission to convey the resulting work. +Corresponding Source for a non-source form of such a combination +shall include the source code for the parts of OpenSSL used as well +as that of the covered work. */ + +#include "wget.h" +#include "exits.h" #include -#include #include -#ifdef HAVE_UNISTD_H -# include -#endif -#ifdef HAVE_STRING_H -# include -#else -# include -#endif +#include +#include +#include #include - -#ifdef WINDOWS -# include -#else -# include -# include -#ifndef __BEOS__ -# include +#include +/* not all systems provide PATH_MAX in limits.h */ +#ifndef PATH_MAX +# include +# ifndef PATH_MAX +# define PATH_MAX MAXPATHLEN +# endif #endif + +#include +#ifdef HAVE_LIBPCRE +# include #endif #ifdef HAVE_PWD_H -#include +# include #endif +#include -#include "wget.h" #include "utils.h" #include "init.h" #include "host.h" -#include "recur.h" #include "netrc.h" -#include "cookies.h" /* for cookie_jar_delete */ #include "progress.h" - -#ifndef errno -extern int errno; +#include "recur.h" /* for INFINITE_RECURSION */ +#include "convert.h" /* for convert_cleanup */ +#include "res.h" /* for res_cleanup */ +#include "http.h" /* for http_cleanup */ +#include "retr.h" /* for output_stream */ +#include "warc.h" /* for warc_close */ +#include "spider.h" /* for spider_cleanup */ + +#ifdef TESTING +#include "test.h" #endif -extern struct cookie_jar *wget_cookie_jar; - -/* We want tilde expansion enabled only when reading `.wgetrc' lines; - otherwise, it will be performed by the shell. This variable will - be set by the wgetrc-reading function. */ -static int enable_tilde_expansion; - -#define CMD_DECLARE(func) static int func \ - PARAMS ((const char *, const char *, void *)) +#define CMD_DECLARE(func) static bool func (const char *, const char *, void *) CMD_DECLARE (cmd_boolean); CMD_DECLARE (cmd_bytes); +CMD_DECLARE (cmd_bytes_sum); +#ifdef HAVE_SSL +CMD_DECLARE (cmd_cert_type); +#endif CMD_DECLARE (cmd_directory_vector); -CMD_DECLARE (cmd_lockable_boolean); CMD_DECLARE (cmd_number); CMD_DECLARE (cmd_number_inf); CMD_DECLARE (cmd_string); +CMD_DECLARE (cmd_string_uppercase); CMD_DECLARE (cmd_file); CMD_DECLARE (cmd_directory); CMD_DECLARE (cmd_time); @@ -96,200 +96,323 @@ CMD_DECLARE (cmd_vector); CMD_DECLARE (cmd_spec_dirstruct); CMD_DECLARE (cmd_spec_header); +CMD_DECLARE (cmd_spec_warc_header); CMD_DECLARE (cmd_spec_htmlify); CMD_DECLARE (cmd_spec_mirror); +CMD_DECLARE (cmd_spec_prefer_family); CMD_DECLARE (cmd_spec_progress); CMD_DECLARE (cmd_spec_recursive); +CMD_DECLARE (cmd_spec_regex_type); CMD_DECLARE (cmd_spec_restrict_file_names); +CMD_DECLARE (cmd_spec_report_speed); +#ifdef HAVE_SSL +CMD_DECLARE (cmd_spec_secure_protocol); +#endif +CMD_DECLARE (cmd_spec_timeout); CMD_DECLARE (cmd_spec_useragent); - -/* List of recognized commands, each consisting of name, closure and function. - When adding a new command, simply add it to the list, but be sure to keep the - list sorted alphabetically, as comind() depends on it. Also, be sure to add - any entries that allocate memory (e.g. cmd_string and cmd_vector guys) to the - cleanup() function below. */ -static struct { - char *name; - void *closure; - int (*action) PARAMS ((const char *, const char *, void *)); +CMD_DECLARE (cmd_spec_verbose); + +/* List of recognized commands, each consisting of name, place and + function. When adding a new command, simply add it to the list, + but be sure to keep the list sorted alphabetically, as + command_by_name's binary search depends on it. Also, be sure to + add any entries that allocate memory (e.g. cmd_string and + cmd_vector) to the cleanup() function below. */ + +static const struct { + const char *name; + void *place; + bool (*action) (const char *, const char *, void *); } commands[] = { - { "accept", &opt.accepts, cmd_vector }, - { "addhostdir", &opt.add_hostdir, cmd_boolean }, - { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */ - { "background", &opt.background, cmd_boolean }, - { "backupconverted", &opt.backup_converted, cmd_boolean }, - { "backups", &opt.backups, cmd_number }, - { "base", &opt.base_href, cmd_string }, - { "bindaddress", &opt.bind_address, cmd_string }, - { "cache", &opt.allow_cache, cmd_boolean }, - { "continue", &opt.always_rest, cmd_boolean }, - { "convertlinks", &opt.convert_links, cmd_boolean }, - { "cookies", &opt.cookies, cmd_boolean }, - { "cutdirs", &opt.cut_dirs, cmd_number }, -#ifdef DEBUG - { "debug", &opt.debug, cmd_boolean }, + /* KEEP THIS LIST ALPHABETICALLY SORTED */ + { "accept", &opt.accepts, cmd_vector }, + { "acceptregex", &opt.acceptregex_s, cmd_string }, + { "addhostdir", &opt.add_hostdir, cmd_boolean }, + { "adjustextension", &opt.adjust_extension, cmd_boolean }, + { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */ + { "askpassword", &opt.ask_passwd, cmd_boolean }, + { "authnochallenge", &opt.auth_without_challenge, + cmd_boolean }, + { "background", &opt.background, cmd_boolean }, + { "backupconverted", &opt.backup_converted, cmd_boolean }, + { "backups", &opt.backups, cmd_number }, + { "base", &opt.base_href, cmd_string }, + { "bindaddress", &opt.bind_address, cmd_string }, + { "bodydata", &opt.body_data, cmd_string }, + { "bodyfile", &opt.body_file, cmd_string }, +#ifdef HAVE_SSL + { "cacertificate", &opt.ca_cert, cmd_file }, +#endif + { "cache", &opt.allow_cache, cmd_boolean }, +#ifdef HAVE_SSL + { "cadirectory", &opt.ca_directory, cmd_directory }, + { "certificate", &opt.cert_file, cmd_file }, + { "certificatetype", &opt.cert_type, cmd_cert_type }, + { "checkcertificate", &opt.check_cert, cmd_boolean }, #endif - { "deleteafter", &opt.delete_after, cmd_boolean }, - { "dirprefix", &opt.dir_prefix, cmd_directory }, - { "dirstruct", NULL, cmd_spec_dirstruct }, - { "dnscache", &opt.dns_cache, cmd_boolean }, - { "domains", &opt.domains, cmd_vector }, - { "dotbytes", &opt.dot_bytes, cmd_bytes }, - { "dotsinline", &opt.dots_in_line, cmd_number }, - { "dotspacing", &opt.dot_spacing, cmd_number }, - { "dotstyle", &opt.dot_style, cmd_string }, + { "chooseconfig", &opt.choose_config, cmd_file }, + { "connecttimeout", &opt.connect_timeout, cmd_time }, + { "contentdisposition", &opt.content_disposition, cmd_boolean }, + { "contentonerror", &opt.content_on_error, cmd_boolean }, + { "continue", &opt.always_rest, cmd_boolean }, + { "convertlinks", &opt.convert_links, cmd_boolean }, + { "cookies", &opt.cookies, cmd_boolean }, + { "cutdirs", &opt.cut_dirs, cmd_number }, + { "debug", &opt.debug, cmd_boolean }, + { "defaultpage", &opt.default_page, cmd_string }, + { "deleteafter", &opt.delete_after, cmd_boolean }, + { "dirprefix", &opt.dir_prefix, cmd_directory }, + { "dirstruct", NULL, cmd_spec_dirstruct }, + { "dnscache", &opt.dns_cache, cmd_boolean }, + { "dnstimeout", &opt.dns_timeout, cmd_time }, + { "domains", &opt.domains, cmd_vector }, + { "dotbytes", &opt.dot_bytes, cmd_bytes }, + { "dotsinline", &opt.dots_in_line, cmd_number }, + { "dotspacing", &opt.dot_spacing, cmd_number }, + { "dotstyle", &opt.dot_style, cmd_string }, /* deprecated */ #ifdef HAVE_SSL - { "egdfile", &opt.sslegdsock, cmd_file }, + { "egdfile", &opt.egd_file, cmd_file }, #endif - { "excludedirectories", &opt.excludes, cmd_directory_vector }, - { "excludedomains", &opt.exclude_domains, cmd_vector }, - { "followftp", &opt.follow_ftp, cmd_boolean }, - { "followtags", &opt.follow_tags, cmd_vector }, - { "forcehtml", &opt.force_html, cmd_boolean }, - { "ftpproxy", &opt.ftp_proxy, cmd_string }, - { "glob", &opt.ftp_glob, cmd_boolean }, - { "header", NULL, cmd_spec_header }, - { "htmlextension", &opt.html_extension, cmd_boolean }, - { "htmlify", NULL, cmd_spec_htmlify }, - { "httpkeepalive", &opt.http_keep_alive, cmd_boolean }, - { "httppasswd", &opt.http_passwd, cmd_string }, - { "httpproxy", &opt.http_proxy, cmd_string }, - { "httpsproxy", &opt.https_proxy, cmd_string }, - { "httpuser", &opt.http_user, cmd_string }, - { "ignorelength", &opt.ignore_length, cmd_boolean }, - { "ignoretags", &opt.ignore_tags, cmd_vector }, - { "includedirectories", &opt.includes, cmd_directory_vector }, - { "input", &opt.input_filename, cmd_file }, - { "killlonger", &opt.kill_longer, cmd_boolean }, - { "limitrate", &opt.limit_rate, cmd_bytes }, - { "loadcookies", &opt.cookies_input, cmd_file }, - { "logfile", &opt.lfilename, cmd_file }, - { "login", &opt.ftp_acc, cmd_string }, - { "mirror", NULL, cmd_spec_mirror }, - { "netrc", &opt.netrc, cmd_boolean }, - { "noclobber", &opt.noclobber, cmd_boolean }, - { "noparent", &opt.no_parent, cmd_boolean }, - { "noproxy", &opt.no_proxy, cmd_vector }, - { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/ - { "outputdocument", &opt.output_document, cmd_file }, - { "pagerequisites", &opt.page_requisites, cmd_boolean }, - { "passiveftp", &opt.ftp_pasv, cmd_lockable_boolean }, - { "passwd", &opt.ftp_pass, cmd_string }, - { "postdata", &opt.post_data, cmd_string }, - { "postfile", &opt.post_file_name, cmd_file }, - { "progress", &opt.progress_type, cmd_spec_progress }, - { "proxypasswd", &opt.proxy_passwd, cmd_string }, - { "proxyuser", &opt.proxy_user, cmd_string }, - { "quiet", &opt.quiet, cmd_boolean }, - { "quota", &opt.quota, cmd_bytes }, - { "randomwait", &opt.random_wait, cmd_boolean }, - { "reclevel", &opt.reclevel, cmd_number_inf }, - { "recursive", NULL, cmd_spec_recursive }, - { "referer", &opt.referer, cmd_string }, - { "reject", &opt.rejects, cmd_vector }, - { "relativeonly", &opt.relative_only, cmd_boolean }, - { "removelisting", &opt.remove_listing, cmd_boolean }, - { "restrictfilenames", &opt.restrict_file_names, cmd_spec_restrict_file_names }, - { "retrsymlinks", &opt.retr_symlinks, cmd_boolean }, - { "retryconnrefused", &opt.retry_connrefused, cmd_boolean }, - { "robots", &opt.use_robots, cmd_boolean }, - { "savecookies", &opt.cookies_output, cmd_file }, - { "saveheaders", &opt.save_headers, cmd_boolean }, - { "serverresponse", &opt.server_response, cmd_boolean }, - { "spanhosts", &opt.spanhost, cmd_boolean }, - { "spider", &opt.spider, cmd_boolean }, + { "excludedirectories", &opt.excludes, cmd_directory_vector }, + { "excludedomains", &opt.exclude_domains, cmd_vector }, + { "followftp", &opt.follow_ftp, cmd_boolean }, + { "followtags", &opt.follow_tags, cmd_vector }, + { "forcehtml", &opt.force_html, cmd_boolean }, + { "ftppasswd", &opt.ftp_passwd, cmd_string }, /* deprecated */ + { "ftppassword", &opt.ftp_passwd, cmd_string }, + { "ftpproxy", &opt.ftp_proxy, cmd_string }, +#ifdef __VMS + { "ftpstmlf", &opt.ftp_stmlf, cmd_boolean }, +#endif /* def __VMS */ + { "ftpuser", &opt.ftp_user, cmd_string }, + { "glob", &opt.ftp_glob, cmd_boolean }, + { "header", NULL, cmd_spec_header }, + { "htmlextension", &opt.adjust_extension, cmd_boolean }, /* deprecated */ + { "htmlify", NULL, cmd_spec_htmlify }, + { "httpkeepalive", &opt.http_keep_alive, cmd_boolean }, + { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */ + { "httppassword", &opt.http_passwd, cmd_string }, + { "httpproxy", &opt.http_proxy, cmd_string }, #ifdef HAVE_SSL - { "sslcadir", &opt.sslcadir, cmd_directory }, - { "sslcafile", &opt.sslcafile, cmd_file }, - { "sslcertfile", &opt.sslcertfile, cmd_file }, - { "sslcertkey", &opt.sslcertkey, cmd_file }, - { "sslcerttype", &opt.sslcerttype, cmd_number }, - { "sslcheckcert", &opt.sslcheckcert, cmd_number }, - { "sslprotocol", &opt.sslprotocol, cmd_number }, -#endif /* HAVE_SSL */ - { "timeout", &opt.timeout, cmd_time }, - { "timestamping", &opt.timestamping, cmd_boolean }, - { "tries", &opt.ntry, cmd_number_inf }, - { "useproxy", &opt.use_proxy, cmd_boolean }, - { "useragent", NULL, cmd_spec_useragent }, - { "verbose", &opt.verbose, cmd_boolean }, - { "wait", &opt.wait, cmd_time }, - { "waitretry", &opt.waitretry, cmd_time } + { "httpsonly", &opt.https_only, cmd_boolean }, +#endif + { "httpsproxy", &opt.https_proxy, cmd_string }, + { "httpuser", &opt.http_user, cmd_string }, + { "ignorecase", &opt.ignore_case, cmd_boolean }, + { "ignorelength", &opt.ignore_length, cmd_boolean }, + { "ignoretags", &opt.ignore_tags, cmd_vector }, + { "includedirectories", &opt.includes, cmd_directory_vector }, +#ifdef ENABLE_IPV6 + { "inet4only", &opt.ipv4_only, cmd_boolean }, + { "inet6only", &opt.ipv6_only, cmd_boolean }, +#endif + { "input", &opt.input_filename, cmd_file }, + { "iri", &opt.enable_iri, cmd_boolean }, + { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean }, + { "limitrate", &opt.limit_rate, cmd_bytes }, + { "loadcookies", &opt.cookies_input, cmd_file }, + { "localencoding", &opt.locale, cmd_string }, + { "logfile", &opt.lfilename, cmd_file }, + { "login", &opt.ftp_user, cmd_string },/* deprecated*/ + { "maxredirect", &opt.max_redirect, cmd_number }, + { "method", &opt.method, cmd_string_uppercase }, + { "mirror", NULL, cmd_spec_mirror }, + { "netrc", &opt.netrc, cmd_boolean }, + { "noclobber", &opt.noclobber, cmd_boolean }, + { "noconfig", &opt.noconfig, cmd_boolean }, + { "noparent", &opt.no_parent, cmd_boolean }, + { "noproxy", &opt.no_proxy, cmd_vector }, + { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/ + { "outputdocument", &opt.output_document, cmd_file }, + { "pagerequisites", &opt.page_requisites, cmd_boolean }, + { "passiveftp", &opt.ftp_pasv, cmd_boolean }, + { "passwd", &opt.ftp_passwd, cmd_string },/* deprecated*/ + { "password", &opt.passwd, cmd_string }, + { "postdata", &opt.post_data, cmd_string }, + { "postfile", &opt.post_file_name, cmd_file }, + { "preferfamily", NULL, cmd_spec_prefer_family }, + { "preservepermissions", &opt.preserve_perm, cmd_boolean }, +#ifdef HAVE_SSL + { "privatekey", &opt.private_key, cmd_file }, + { "privatekeytype", &opt.private_key_type, cmd_cert_type }, +#endif + { "progress", &opt.progress_type, cmd_spec_progress }, + { "protocoldirectories", &opt.protocol_directories, cmd_boolean }, + { "proxypasswd", &opt.proxy_passwd, cmd_string }, /* deprecated */ + { "proxypassword", &opt.proxy_passwd, cmd_string }, + { "proxyuser", &opt.proxy_user, cmd_string }, + { "quiet", &opt.quiet, cmd_boolean }, + { "quota", &opt.quota, cmd_bytes_sum }, +#ifdef HAVE_SSL + { "randomfile", &opt.random_file, cmd_file }, +#endif + { "randomwait", &opt.random_wait, cmd_boolean }, + { "readtimeout", &opt.read_timeout, cmd_time }, + { "reclevel", &opt.reclevel, cmd_number_inf }, + { "recursive", NULL, cmd_spec_recursive }, + { "referer", &opt.referer, cmd_string }, + { "regextype", &opt.regex_type, cmd_spec_regex_type }, + { "reject", &opt.rejects, cmd_vector }, + { "rejectregex", &opt.rejectregex_s, cmd_string }, + { "relativeonly", &opt.relative_only, cmd_boolean }, + { "remoteencoding", &opt.encoding_remote, cmd_string }, + { "removelisting", &opt.remove_listing, cmd_boolean }, + { "reportspeed", &opt.report_bps, cmd_spec_report_speed}, + { "restrictfilenames", NULL, cmd_spec_restrict_file_names }, + { "retrsymlinks", &opt.retr_symlinks, cmd_boolean }, + { "retryconnrefused", &opt.retry_connrefused, cmd_boolean }, + { "robots", &opt.use_robots, cmd_boolean }, + { "savecookies", &opt.cookies_output, cmd_file }, + { "saveheaders", &opt.save_headers, cmd_boolean }, +#ifdef HAVE_SSL + { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol }, +#endif + { "serverresponse", &opt.server_response, cmd_boolean }, + { "showalldnsentries", &opt.show_all_dns_entries, cmd_boolean }, + { "showprogress", &opt.show_progress, cmd_boolean }, + { "spanhosts", &opt.spanhost, cmd_boolean }, + { "spider", &opt.spider, cmd_boolean }, + { "startpos", &opt.start_pos, cmd_bytes }, + { "strictcomments", &opt.strict_comments, cmd_boolean }, + { "timeout", NULL, cmd_spec_timeout }, + { "timestamping", &opt.timestamping, cmd_boolean }, + { "tries", &opt.ntry, cmd_number_inf }, + { "trustservernames", &opt.trustservernames, cmd_boolean }, + { "unlink", &opt.unlink, cmd_boolean }, + { "useproxy", &opt.use_proxy, cmd_boolean }, + { "user", &opt.user, cmd_string }, + { "useragent", NULL, cmd_spec_useragent }, + { "useservertimestamps", &opt.useservertimestamps, cmd_boolean }, + { "verbose", NULL, cmd_spec_verbose }, + { "wait", &opt.wait, cmd_time }, + { "waitretry", &opt.waitretry, cmd_time }, + { "warccdx", &opt.warc_cdx_enabled, cmd_boolean }, + { "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file }, +#ifdef HAVE_LIBZ + { "warccompression", &opt.warc_compression_enabled, cmd_boolean }, +#endif + { "warcdigests", &opt.warc_digests_enabled, cmd_boolean }, + { "warcfile", &opt.warc_filename, cmd_file }, + { "warcheader", NULL, cmd_spec_warc_header }, + { "warckeeplog", &opt.warc_keep_log, cmd_boolean }, + { "warcmaxsize", &opt.warc_maxsize, cmd_bytes }, + { "warctempdir", &opt.warc_tempdir, cmd_directory }, +#ifdef USE_WATT32 + { "wdebug", &opt.wdebug, cmd_boolean }, +#endif }; -/* Look up COM in the commands[] array and return its index. If COM - is not found, -1 is returned. This function uses binary search. */ +/* Look up CMDNAME in the commands[] and return its position in the + array. If CMDNAME is not found, return -1. */ static int -comind (const char *com) +command_by_name (const char *cmdname) { - int lo = 0, hi = ARRAY_SIZE (commands) - 1; + /* Use binary search for speed. Wget has ~100 commands, which + guarantees a worst case performance of 7 string comparisons. */ + int lo = 0, hi = countof (commands) - 1; while (lo <= hi) { int mid = (lo + hi) >> 1; - int cmp = strcasecmp (com, commands[mid].name); + int cmp = strcasecmp (cmdname, commands[mid].name); if (cmp < 0) - hi = mid - 1; + hi = mid - 1; else if (cmp > 0) - lo = mid + 1; + lo = mid + 1; else - return mid; + return mid; } return -1; } /* Reset the variables to default values. */ -static void +void defaults (void) { char *tmp; - /* Most of the default values are 0. Just reset everything, and - fill in the non-zero values. Note that initializing pointers to - NULL this way is technically illegal, but porting Wget to a - machine where NULL is not all-zero bit pattern will be the least - of the implementors' worries. */ - memset (&opt, 0, sizeof (opt)); - - opt.cookies = 1; + /* Most of the default values are 0 (and 0.0, NULL, and false). + Just reset everything, and fill in the non-zero values. Note + that initializing pointers to NULL this way is technically + illegal, but porting Wget to a machine where NULL is not all-zero + bit pattern will be the least of the implementors' worries. */ + xzero (opt); + opt.cookies = true; opt.verbose = -1; - opt.dir_prefix = xstrdup ("."); opt.ntry = 20; opt.reclevel = 5; - opt.add_hostdir = 1; - opt.ftp_acc = xstrdup ("anonymous"); - opt.ftp_pass = xstrdup ("-wget@"); - opt.netrc = 1; - opt.ftp_glob = 1; - opt.htmlify = 1; - opt.http_keep_alive = 1; - opt.use_proxy = 1; + opt.add_hostdir = true; + opt.netrc = true; + opt.ftp_glob = true; + opt.htmlify = true; + opt.http_keep_alive = true; + opt.use_proxy = true; tmp = getenv ("no_proxy"); if (tmp) opt.no_proxy = sepstring (tmp); - opt.allow_cache = 1; + opt.prefer_family = prefer_none; + opt.allow_cache = true; -#ifdef HAVE_SELECT - opt.timeout = 900; -#endif - opt.use_robots = 1; + opt.read_timeout = 900; + opt.use_robots = true; - opt.remove_listing = 1; + opt.remove_listing = true; opt.dot_bytes = 1024; opt.dot_spacing = 10; opt.dots_in_line = 50; - opt.dns_cache = 1; + opt.dns_cache = true; + opt.ftp_pasv = true; + +#ifdef HAVE_SSL + opt.check_cert = true; +#endif /* The default for file name restriction defaults to the OS type. */ -#if !defined(WINDOWS) && !defined(__CYGWIN__) - opt.restrict_file_names = restrict_shell; +#if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__) + opt.restrict_files_os = restrict_windows; +#else + opt.restrict_files_os = restrict_unix; +#endif + opt.restrict_files_ctrl = true; + opt.restrict_files_nonascii = false; + opt.restrict_files_case = restrict_no_case_restriction; + + opt.regex_type = regex_type_posix; + + opt.max_redirect = 20; + + opt.waitretry = 10; + +#ifdef ENABLE_IRI + opt.enable_iri = true; +#else + opt.enable_iri = false; +#endif + opt.locale = NULL; + opt.encoding_remote = NULL; + + opt.useservertimestamps = true; + opt.show_all_dns_entries = false; + + opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */ +#ifdef HAVE_LIBZ + opt.warc_compression_enabled = true; #else - opt.restrict_file_names = restrict_windows; + opt.warc_compression_enabled = false; #endif + opt.warc_digests_enabled = true; + opt.warc_cdx_enabled = false; + opt.warc_cdx_dedup_filename = NULL; + opt.warc_tempdir = NULL; + opt.warc_keep_log = true; + + /* Use a negative value to mark the absence of --start-pos option */ + opt.start_pos = -1; + opt.show_progress = false; } /* Return the user's home directory (strdup-ed), or NULL if none is @@ -297,73 +420,97 @@ defaults (void) char * home_dir (void) { - char *home = getenv ("HOME"); + static char *buf = NULL; + static char *home, *ret; if (!home) { -#ifndef WINDOWS - /* If HOME is not defined, try getting it from the password - file. */ - struct passwd *pwd = getpwuid (getuid ()); - if (!pwd || !pwd->pw_dir) - return NULL; - home = pwd->pw_dir; -#else /* WINDOWS */ - home = "C:\\"; - /* #### Maybe I should grab home_dir from registry, but the best - that I could get from there is user's Start menu. It sucks! */ + home = getenv ("HOME"); + if (!home) + { +#if defined(MSDOS) + int len; + + /* Under MSDOS, if $HOME isn't defined, use the directory where + `wget.exe' resides. */ + const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */ + char *p; + + buff = _w32_get_argv0 (); + + p = strrchr (buf, '/'); /* djgpp */ + if (!p) + p = strrchr (buf, '\\'); /* others */ + assert (p); + + len = p - buff + 1; + buff = malloc (len + 1); + if (buff == NULL) + return NULL; + + strncpy (buff, _w32_get_argv0 (), len); + buff[len] = '\0'; + + home = buf; +#elif !defined(WINDOWS) + /* If HOME is not defined, try getting it from the password + file. */ + struct passwd *pwd = getpwuid (getuid ()); + if (!pwd || !pwd->pw_dir) + return NULL; + home = pwd->pw_dir; +#else /* !WINDOWS */ + /* Under Windows, if $HOME isn't defined, use the directory where + `wget.exe' resides. */ + home = ws_mypath (); #endif /* WINDOWS */ + } } - return home ? xstrdup (home) : NULL; -} + ret = home ? xstrdup (home) : NULL; + free (buf); -/* Return the path to the user's .wgetrc. This is either the value of - `WGETRC' environment variable, or `$HOME/.wgetrc'. + return ret; +} +/* Check the 'WGETRC' environment variable and return the file name + if 'WGETRC' is set and is a valid file. If the `WGETRC' variable exists but the file does not exist, the function will exit(). */ -static char * -wgetrc_file_name (void) +char * +wgetrc_env_file_name (void) { - char *env, *home; - char *file = NULL; - - /* Try the environment. */ - env = getenv ("WGETRC"); + char *env = getenv ("WGETRC"); if (env && *env) { if (!file_exists_p (env)) - { - fprintf (stderr, "%s: %s: %s.\n", exec_name, env, strerror (errno)); - exit (1); - } + { + fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"), + exec_name, env); + exit (1); + } return xstrdup (env); } + return NULL; +} -#ifndef WINDOWS - /* If that failed, try $HOME/.wgetrc. */ +/* Check for the existance of '$HOME/.wgetrc' and return its path + if it exists and is set. */ +char * +wgetrc_user_file_name (void) +{ + char *home; + char *file = NULL; + /* If that failed, try $HOME/.wgetrc (or equivalent). */ + +#ifdef __VMS + file = "SYS$LOGIN:.wgetrc"; +#else /* def __VMS */ home = home_dir (); if (home) - { - file = (char *)xmalloc (strlen (home) + 1 + strlen (".wgetrc") + 1); - sprintf (file, "%s/.wgetrc", home); - } - FREE_MAYBE (home); -#else /* WINDOWS */ - /* Under Windows, "home" is (for the purposes of this function) the - directory where `wget.exe' resides, and `wget.ini' will be used - as file name. SYSTEM_WGETRC should not be defined under WINDOWS. - - It is not as trivial as I assumed, because on 95 argv[0] is full - path, but on NT you get what you typed in command line. --dbudor */ - home = ws_mypath (); - if (home) - { - file = (char *)xmalloc (strlen (home) + strlen ("wget.ini") + 1); - sprintf (file, "%swget.ini", home); - } -#endif /* WINDOWS */ + file = aprintf ("%s/.wgetrc", home); + xfree_null (home); +#endif /* def __VMS [else] */ if (!file) return NULL; @@ -375,47 +522,119 @@ wgetrc_file_name (void) return file; } -/* Initialize variables from a wgetrc file */ -static void +/* Return the path to the user's .wgetrc. This is either the value of + `WGETRC' environment variable, or `$HOME/.wgetrc'. + + Additionally, for windows, look in the directory where wget.exe + resides. */ +char * +wgetrc_file_name (void) +{ + char *file = wgetrc_env_file_name (); + if (file && *file) + return file; + + file = wgetrc_user_file_name (); + +#ifdef WINDOWS + /* Under Windows, if we still haven't found .wgetrc, look for the file + `wget.ini' in the directory where `wget.exe' resides; we do this for + backward compatibility with previous versions of Wget. + SYSTEM_WGETRC should not be defined under WINDOWS. */ + if (!file) + { + char *home = home_dir (); + xfree_null (file); + file = NULL; + home = ws_mypath (); + if (home) + { + file = aprintf ("%s/wget.ini", home); + if (!file_exists_p (file)) + { + xfree (file); + file = NULL; + } + xfree (home); + } + } +#endif /* WINDOWS */ + + return file; +} + +/* Return values of parse_line. */ +enum parse_line { + line_ok, + line_empty, + line_syntax_error, + line_unknown_command +}; + +static enum parse_line parse_line (const char *, char **, char **, int *); +static bool setval_internal (int, const char *, const char *); +static bool setval_internal_tilde (int, const char *, const char *); + +/* Initialize variables from a wgetrc file. Returns zero (failure) if + there were errors in the file. */ + +bool run_wgetrc (const char *file) { FILE *fp; - char *line; + char *line = NULL; + size_t bufsize = 0; int ln; + int errcnt = 0; - fp = fopen (file, "rb"); + fp = fopen (file, "r"); if (!fp) { fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name, - file, strerror (errno)); - return; + file, strerror (errno)); + return true; /* not a fatal error */ } - enable_tilde_expansion = 1; ln = 1; - while ((line = read_whole_line (fp))) + while (getline (&line, &bufsize, fp) > 0) { - char *com, *val; - int status; + char *com = NULL, *val = NULL; + int comind; /* Parse the line. */ - status = parse_line (line, &com, &val); - xfree (line); - /* If everything is OK, set the value. */ - if (status == 1) - { - if (!setval (com, val)) - fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name, - file, ln); - xfree (com); - xfree (val); - } - else if (status == 0) - fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name, - file, ln); + switch (parse_line (line, &com, &val, &comind)) + { + case line_ok: + /* If everything is OK, set the value. */ + if (!setval_internal_tilde (comind, com, val)) + { + fprintf (stderr, _("%s: Error in %s at line %d.\n"), + exec_name, file, ln); + ++errcnt; + } + break; + case line_syntax_error: + fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"), + exec_name, file, ln); + ++errcnt; + break; + case line_unknown_command: + fprintf (stderr, _("%s: Unknown command %s in %s at line %d.\n"), + exec_name, quote (com), file, ln); + ++errcnt; + break; + case line_empty: + break; + default: + abort (); + } + xfree_null (com); + xfree_null (val); ++ln; } - enable_tilde_expansion = 0; + xfree (line); fclose (fp); + + return errcnt == 0; } /* Initialize the defaults and run the system wgetrc and user's own @@ -423,341 +642,405 @@ run_wgetrc (const char *file) void initialize (void) { - char *file; + char *file, *env_sysrc; + bool ok = true; - /* Load the hard-coded defaults. */ - defaults (); - - /* If SYSTEM_WGETRC is defined, use it. */ + /* Run a non-standard system rc file when the according environment + variable has been set. For internal testing purposes only! */ + env_sysrc = getenv ("SYSTEM_WGETRC"); + if (env_sysrc && file_exists_p (env_sysrc)) + { + ok &= run_wgetrc (env_sysrc); + /* If there are any problems parsing the system wgetrc file, tell + the user and exit */ + if (! ok) + { + fprintf (stderr, _("\ +Parsing system wgetrc file (env SYSTEM_WGETRC) failed. Please check\n\ +'%s',\n\ +or specify a different file using --config.\n"), env_sysrc); + exit (2); + } + } + /* Otherwise, if SYSTEM_WGETRC is defined, use it. */ #ifdef SYSTEM_WGETRC - if (file_exists_p (SYSTEM_WGETRC)) - run_wgetrc (SYSTEM_WGETRC); + else if (file_exists_p (SYSTEM_WGETRC)) + ok &= run_wgetrc (SYSTEM_WGETRC); + /* If there are any problems parsing the system wgetrc file, tell + the user and exit */ + if (! ok) + { + fprintf (stderr, _("\ +Parsing system wgetrc file failed. Please check\n\ +'%s',\n\ +or specify a different file using --config.\n"), SYSTEM_WGETRC); + exit (2); + } #endif /* Override it with your own, if one exists. */ file = wgetrc_file_name (); if (!file) return; - /* #### We should somehow canonicalize `file' and SYSTEM_WGETRC, - really. */ + /* #### We should canonicalize `file' and SYSTEM_WGETRC with + something like realpath() before comparing them with `strcmp' */ #ifdef SYSTEM_WGETRC if (!strcmp (file, SYSTEM_WGETRC)) { fprintf (stderr, _("\ -%s: Warning: Both system and user wgetrc point to `%s'.\n"), - exec_name, file); +%s: Warning: Both system and user wgetrc point to %s.\n"), + exec_name, quote (file)); } else #endif - run_wgetrc (file); + ok &= run_wgetrc (file); + + /* If there were errors processing either `.wgetrc', abort. */ + if (!ok) + exit (2); + xfree (file); return; } - + +/* Remove dashes and underscores from S, modifying S in the + process. */ + +static void +dehyphen (char *s) +{ + char *t = s; /* t - tortoise */ + char *h = s; /* h - hare */ + while (*h) + if (*h == '_' || *h == '-') + ++h; + else + *t++ = *h++; + *t = '\0'; +} + /* Parse the line pointed by line, with the syntax: - * command * = * value + * command * = * value * Uses malloc to allocate space for command and value. - If the line is invalid, data is freed and 0 is returned. - - Return values: - 1 - success - 0 - failure - -1 - empty */ -int -parse_line (const char *line, char **com, char **val) + + Returns one of line_ok, line_empty, line_syntax_error, or + line_unknown_command. + + In case of line_ok, *COM and *VAL point to freshly allocated + strings, and *COMIND points to com's index. In case of error or + empty line, their values are unmodified. */ + +static enum parse_line +parse_line (const char *line, char **com, char **val, int *comind) { - const char *p = line; - const char *orig_comptr, *end; - char *new_comptr; + const char *p; + const char *end = line + strlen (line); + const char *cmdstart, *cmdend; + const char *valstart, *valend; + + char *cmdcopy; + int ind; + + /* Skip leading and trailing whitespace. */ + while (*line && c_isspace (*line)) + ++line; + while (end > line && c_isspace (end[-1])) + --end; - /* Skip whitespace. */ - while (*p && ISSPACE (*p)) + /* Skip empty lines and comments. */ + if (!*line || *line == '#') + return line_empty; + + p = line; + + cmdstart = p; + while (p < end && (c_isalnum (*p) || *p == '_' || *p == '-')) ++p; + cmdend = p; - /* Don't process empty lines. */ - if (!*p || *p == '#') - return -1; + /* Skip '=', as well as any space before or after it. */ + while (p < end && c_isspace (*p)) + ++p; + if (p == end || *p != '=') + return line_syntax_error; + ++p; + while (p < end && c_isspace (*p)) + ++p; - for (orig_comptr = p; ISALPHA (*p) || *p == '_' || *p == '-'; p++) - ; - /* The next char should be space or '='. */ - if (!ISSPACE (*p) && (*p != '=')) - return 0; - /* Here we cannot use strdupdelim() as we normally would because we - want to skip the `-' and `_' characters in the input string. */ - *com = (char *)xmalloc (p - orig_comptr + 1); - for (new_comptr = *com; orig_comptr < p; orig_comptr++) - { - if (*orig_comptr == '_' || *orig_comptr == '-') - continue; - *new_comptr++ = *orig_comptr; - } - *new_comptr = '\0'; - /* If the command is invalid, exit now. */ - if (comind (*com) == -1) - { - xfree (*com); - return 0; - } + valstart = p; + valend = end; + + /* The syntax is valid (even though the command might not be). Fill + in the command name and value. */ + *com = strdupdelim (cmdstart, cmdend); + *val = strdupdelim (valstart, valend); + + /* The line now known to be syntactically correct. Check whether + the command is valid. */ + BOUNDED_TO_ALLOCA (cmdstart, cmdend, cmdcopy); + dehyphen (cmdcopy); + ind = command_by_name (cmdcopy); + if (ind == -1) + return line_unknown_command; + + /* Report success to the caller. */ + *comind = ind; + return line_ok; +} + +#if defined(WINDOWS) || defined(MSDOS) +# define ISSEP(c) ((c) == '/' || (c) == '\\') +#else +# define ISSEP(c) ((c) == '/') +#endif + +/* Run commands[comind].action. */ + +static bool +setval_internal (int comind, const char *com, const char *val) +{ + assert (0 <= comind && ((size_t) comind) < countof (commands)); + DEBUGP (("Setting %s (%s) to %s\n", com, commands[comind].name, val)); + return commands[comind].action (com, val, commands[comind].place); +} - /* Skip spaces before '='. */ - for (; ISSPACE (*p); p++); - /* If '=' not found, bail out. */ - if (*p != '=') +static bool +setval_internal_tilde (int comind, const char *com, const char *val) +{ + bool ret; + int homelen; + char *home; + char **pstring; + ret = setval_internal (comind, com, val); + + /* We make tilde expansion for cmd_file and cmd_directory */ + if (((commands[comind].action == cmd_file) || + (commands[comind].action == cmd_directory)) + && ret && (*val == '~' && ISSEP (val[1]))) { - xfree (*com); - return 0; + pstring = commands[comind].place; + home = home_dir (); + if (home) + { + homelen = strlen (home); + while (homelen && ISSEP (home[homelen - 1])) + home[--homelen] = '\0'; + + /* Skip the leading "~/". */ + for (++val; ISSEP (*val); val++) + ; + *pstring = concat_strings (home, "/", val, (char *)0); + } } - /* Skip spaces after '='. */ - for (++p; ISSPACE (*p); p++); - /* Get the ending position for VAL by starting with the end of the - line and skipping whitespace. */ - end = line + strlen (line) - 1; - while (end > p && ISSPACE (*end)) - --end; - *val = strdupdelim (p, end + 1); - return 1; + return ret; } -/* Set COM to VAL. This is the meat behind processing `.wgetrc'. No - fatals -- error signal prints a warning and resets to default - value. All error messages are printed to stderr, *not* to - opt.lfile, since opt.lfile wasn't even generated yet. */ -int -setval (const char *com, const char *val) +/* Run command COM with value VAL. If running the command produces an + error, report the error and exit. + + This is intended to be called from main() to modify Wget's behavior + through command-line switches. Since COM is hard-coded in main(), + it is not canonicalized, and this aborts when COM is not found. + + If COMIND's are exported to init.h, this function will be changed + to accept COMIND directly. */ + +void +setoptval (const char *com, const char *val, const char *optname) { - int ind; + /* Prepend "--" to OPTNAME. */ + char *dd_optname = (char *) alloca (2 + strlen (optname) + 1); + dd_optname[0] = '-'; + dd_optname[1] = '-'; + strcpy (dd_optname + 2, optname); + + assert (val != NULL); + if (!setval_internal (command_by_name (com), dd_optname, val)) + exit (2); +} - if (!com || !val) - return 0; - ind = comind (com); - if (ind == -1) +/* Parse OPT into command and value and run it. For example, + run_command("foo=bar") is equivalent to setoptval("foo", "bar"). + This is used by the `--execute' flag in main.c. */ + +void +run_command (const char *cmdopt) +{ + char *com, *val; + int comind; + switch (parse_line (cmdopt, &com, &val, &comind)) { - /* #### Should I just abort()? */ -#ifdef DEBUG - fprintf (stderr, _("%s: BUG: unknown command `%s', value `%s'.\n"), - exec_name, com, val); -#endif - return 0; + case line_ok: + if (!setval_internal (comind, com, val)) + exit (2); + xfree (com); + xfree (val); + break; + default: + fprintf (stderr, _("%s: Invalid --execute command %s\n"), + exec_name, quote (cmdopt)); + exit (2); } - return ((*commands[ind].action) (com, val, commands[ind].closure)); } /* Generic helper functions, for use with `commands'. */ -static int myatoi PARAMS ((const char *s)); +/* Forward declarations: */ +struct decode_item { + const char *name; + int code; +}; +static bool decode_string (const char *, const struct decode_item *, int, int *); +static bool simple_atoi (const char *, const char *, int *); +static bool simple_atof (const char *, const char *, double *); -/* Store the boolean value from VAL to CLOSURE. COM is ignored, - except for error messages. */ -static int -cmd_boolean (const char *com, const char *val, void *closure) -{ - int bool_value; - const char *v = val; -#define LC(x) TOLOWER(x) - - if ((LC(v[0]) == 'o' && LC(v[1]) == 'n' && !v[2]) - || - (LC(v[0]) == 'y' && LC(v[1]) == 'e' && LC(v[2]) == 's' && !v[3]) - || - (v[0] == '1' && !v[1])) - /* "on", "yes" and "1" mean true. */ - bool_value = 1; - else if ((LC(v[0]) == 'o' && LC(v[1]) == 'f' && LC(v[2]) == 'f' && !v[3]) - || - (LC(v[0]) == 'n' && LC(v[1]) == 'o' && !v[2]) - || - (v[0] == '0' && !v[1])) - /* "off", "no" and "0" mean false. */ - bool_value = 0; - else - { - fprintf (stderr, _("%s: %s: Please specify on or off.\n"), - exec_name, com); - return 0; - } +#define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0') - *(int *)closure = bool_value; - return 1; -} +#define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \ + && c_tolower((p)[1]) == (c1) \ + && (p)[2] == '\0') -/* Store the lockable_boolean {2, 1, 0, -1} value from VAL to CLOSURE. COM is - ignored, except for error messages. Values 2 and -1 indicate that once - defined, the value may not be changed by successive wgetrc files or - command-line arguments. +#define CMP3(p, c0, c1, c2) (c_tolower((p)[0]) == (c0) \ + && c_tolower((p)[1]) == (c1) \ + && c_tolower((p)[2]) == (c2) \ + && (p)[3] == '\0') - Values: 2 - Enable a particular option for good ("always") - 1 - Enable an option ("on") - 0 - Disable an option ("off") - -1 - Disable an option for good ("never") */ -static int -cmd_lockable_boolean (const char *com, const char *val, void *closure) + +/* Store the boolean value from VAL to PLACE. COM is ignored, + except for error messages. */ +static bool +cmd_boolean (const char *com, const char *val, void *place) { - int lockable_boolean_value; - - /* - * If a config file said "always" or "never", don't allow command line - * arguments to override the config file. - */ - if (*(int *)closure == -1 || *(int *)closure == 2) - return 1; - - if (!strcasecmp (val, "always") || !strcmp (val, "2")) - lockable_boolean_value = 2; - else if (!strcasecmp (val, "on") - || !strcasecmp (val, "yes") - || !strcmp (val, "1")) - lockable_boolean_value = 1; - else if (!strcasecmp (val, "off") - || !strcasecmp (val, "no") - || !strcmp (val, "0")) - lockable_boolean_value = 0; - else if (!strcasecmp (val, "never") || !strcmp (val, "-1")) - lockable_boolean_value = -1; + bool value; + + if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1')) + /* "on", "yes" and "1" mean true. */ + value = true; + else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0')) + /* "off", "no" and "0" mean false. */ + value = false; else { - fprintf (stderr, _("%s: %s: Please specify always, on, off, " - "or never.\n"), - exec_name, com); - return 0; + fprintf (stderr, + _("%s: %s: Invalid boolean %s; use `on' or `off'.\n"), + exec_name, com, quote (val)); + return false; } - *(int *)closure = lockable_boolean_value; - return 1; + *(bool *) place = value; + return true; } -/* Set the non-negative integer value from VAL to CLOSURE. With +/* Set the non-negative integer value from VAL to PLACE. With incorrect specification, the number remains unchanged. */ -static int -cmd_number (const char *com, const char *val, void *closure) +static bool +cmd_number (const char *com, const char *val, void *place) { - int num = myatoi (val); - - if (num == -1) + if (!simple_atoi (val, val + strlen (val), place) + || *(int *) place < 0) { - fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"), - exec_name, com, val); - return 0; + fprintf (stderr, _("%s: %s: Invalid number %s.\n"), + exec_name, com, quote (val)); + return false; } - *(int *)closure = num; - return 1; + return true; } /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */ -static int -cmd_number_inf (const char *com, const char *val, void *closure) +static bool +cmd_number_inf (const char *com, const char *val, void *place) { if (!strcasecmp (val, "inf")) { - *(int *)closure = 0; - return 1; + *(int *) place = 0; + return true; } - return cmd_number (com, val, closure); + return cmd_number (com, val, place); } /* Copy (strdup) the string at COM to a new location and place a - pointer to *CLOSURE. */ -static int -cmd_string (const char *com, const char *val, void *closure) + pointer to *PLACE. */ +static bool +cmd_string (const char *com _GL_UNUSED, const char *val, void *place) { - char **pstring = (char **)closure; + char **pstring = (char **)place; - FREE_MAYBE (*pstring); + xfree_null (*pstring); *pstring = xstrdup (val); - return 1; + return true; } -/* Like the above, but handles tilde-expansion when reading a user's - `.wgetrc'. In that case, and if VAL begins with `~', the tilde - gets expanded to the user's home directory. */ -static int -cmd_file (const char *com, const char *val, void *closure) +/* Like cmd_string but ensure the string is upper case. */ +static bool +cmd_string_uppercase (const char *com _GL_UNUSED, const char *val, void *place) { - char **pstring = (char **)closure; + char *q, **pstring; + pstring = (char **)place; + xfree_null (*pstring); - FREE_MAYBE (*pstring); + *pstring = xmalloc (strlen (val) + 1); - /* #### If VAL is empty, perhaps should set *CLOSURE to NULL. */ + for (q = *pstring; *val; val++, q++) + *q = c_toupper (*val); - if (!enable_tilde_expansion || !(*val == '~' && (*(val + 1) == '/' -#ifdef WINDOWS - || *(val + 1) == '\\' -#endif - ))) - { - noexpand: - *pstring = xstrdup (val); - } - else - { - char *result; - int homelen; - char *home = home_dir (); - if (!home) - goto noexpand; + *q = '\0'; + return true; +} - homelen = strlen (home); - while (homelen && (home[homelen - 1] == '/' -#ifdef WINDOWS - || home[homelen - 1] == '\\' -#endif - )) - home[--homelen] = '\0'; - /* Skip the leading "~/". */ -#ifdef WINDOWS - for (++val; *val == '/' || *val == '\\'; val++) - ; -#else - for (++val; *val == '/'; val++) - ; -#endif +/* Like cmd_string, but handles tilde-expansion when reading a user's + `.wgetrc'. In that case, and if VAL begins with `~', the tilde + gets expanded to the user's home directory. */ +static bool +cmd_file (const char *com _GL_UNUSED, const char *val, void *place) +{ + char **pstring = (char **)place; - result = xmalloc (homelen + 1 + strlen (val) + 1); - memcpy (result, home, homelen); - result[homelen] = '/'; - strcpy (result + homelen + 1, val); + xfree_null (*pstring); - *pstring = result; - } -#ifdef WINDOWS + /* #### If VAL is empty, perhaps should set *PLACE to NULL. */ + + *pstring = xstrdup (val); + +#if defined(WINDOWS) || defined(MSDOS) /* Convert "\" to "/". */ { char *s; for (s = *pstring; *s; s++) if (*s == '\\') - *s = '/'; + *s = '/'; } #endif - return 1; + return true; } /* Like cmd_file, but strips trailing '/' characters. */ -static int -cmd_directory (const char *com, const char *val, void *closure) +static bool +cmd_directory (const char *com, const char *val, void *place) { char *s, *t; /* Call cmd_file() for tilde expansion and separator canonicalization (backslash -> slash under Windows). These things should perhaps be in a separate function. */ - if (!cmd_file (com, val, closure)) - return 0; + if (!cmd_file (com, val, place)) + return false; - s = *(char **)closure; + s = *(char **)place; t = s + strlen (s); while (t > s && *--t == '/') *t = '\0'; - return 1; + return true; } -/* Merge the vector (array of strings separated with `,') in COM with - the vector (NULL-terminated array of strings) pointed to by - CLOSURE. */ -static int -cmd_vector (const char *com, const char *val, void *closure) +/* Split VAL by space to a vector of values, and append those values + to vector pointed to by the PLACE argument. If VAL is empty, the + PLACE vector is cleared instead. */ + +static bool +cmd_vector (const char *com _GL_UNUSED, const char *val, void *place) { - char ***pvec = (char ***)closure; + char ***pvec = (char ***)place; if (*val) *pvec = merge_vecs (*pvec, sepstring (val)); @@ -766,13 +1049,13 @@ cmd_vector (const char *com, const char *val, void *closure) free_vec (*pvec); *pvec = NULL; } - return 1; + return true; } -static int -cmd_directory_vector (const char *com, const char *val, void *closure) +static bool +cmd_directory_vector (const char *com _GL_UNUSED, const char *val, void *place) { - char ***pvec = (char ***)closure; + char ***pvec = (char ***)place; if (*val) { @@ -781,15 +1064,15 @@ cmd_directory_vector (const char *com, const char *val, void *closure) seps = sepstring (val); for (t = seps; t && *t; t++) - { - int len = strlen (*t); - /* Skip degenerate case of root directory. */ - if (len > 1) - { - if ((*t)[len - 1] == '/') - (*t)[len - 1] = '\0'; - } - } + { + int len = strlen (*t); + /* Skip degenerate case of root directory. */ + if (len > 1) + { + if ((*t)[len - 1] == '/') + (*t)[len - 1] = '\0'; + } + } *pvec = merge_vecs (*pvec, seps); } else @@ -797,302 +1080,623 @@ cmd_directory_vector (const char *com, const char *val, void *closure) free_vec (*pvec); *pvec = NULL; } - return 1; + return true; } -/* Set the value stored in VAL to CLOSURE (which should point to a - long int), allowing several postfixes, with the following syntax - (regexp): - - [0-9]+ -> bytes - [0-9]+[kK] -> bytes * 1024 - [0-9]+[mM] -> bytes * 1024 * 1024 - inf -> 0 +/* Engine for cmd_bytes and cmd_bytes_sum: converts a string such as + "100k" or "2.5G" to a floating point number. */ - Anything else is flagged as incorrect, and CLOSURE is unchanged. */ -static int -cmd_bytes (const char *com, const char *val, void *closure) +static bool +parse_bytes_helper (const char *val, double *result) { - long result; - long *out = (long *)closure; - const char *p; + double number, mult; + const char *end = val + strlen (val); - result = 0; - p = val; /* Check for "inf". */ - if (p[0] == 'i' && p[1] == 'n' && p[2] == 'f' && p[3] == '\0') + if (0 == strcmp (val, "inf")) { - *out = 0; - return 1; + *result = 0; + return true; } - /* Search for digits and construct result. */ - for (; *p && ISDIGIT (*p); p++) - result = (10 * result) + (*p - '0'); - /* If no digits were found, or more than one character is following - them, bail out. */ - if (p == val || (*p != '\0' && *(p + 1) != '\0')) - { - printf (_("%s: Invalid specification `%s'\n"), com, val); - return 0; - } - /* Search for a designator. */ - switch (TOLOWER (*p)) + + /* Strip trailing whitespace. */ + while (val < end && c_isspace (end[-1])) + --end; + if (val == end) + return false; + + switch (c_tolower (end[-1])) { - case '\0': - /* None */ - break; case 'k': - /* Kilobytes */ - result *= 1024; + --end, mult = 1024.0; break; case 'm': - /* Megabytes */ - result *= (long)1024 * 1024; + --end, mult = 1048576.0; break; case 'g': - /* Gigabytes */ - result *= (long)1024 * 1024 * 1024; + --end, mult = 1073741824.0; + break; + case 't': + --end, mult = 1099511627776.0; break; default: - printf (_("%s: Invalid specification `%s'\n"), com, val); - return 0; + /* Not a recognized suffix: assume it's a digit. (If not, + simple_atof will raise an error.) */ + mult = 1; } - *out = result; - return 1; + + /* Skip leading and trailing whitespace. */ + while (val < end && c_isspace (*val)) + ++val; + while (val < end && c_isspace (end[-1])) + --end; + if (val == end) + return false; + + if (!simple_atof (val, end, &number) || number < 0) + return false; + + *result = number * mult; + return true; } -/* Store the value of VAL to *OUT, allowing suffixes for minutes and - hours. */ -static int -cmd_time (const char *com, const char *val, void *closure) +/* Parse VAL as a number and set its value to PLACE (which should + point to a wgint). + + By default, the value is assumed to be in bytes. If "K", "M", or + "G" are appended, the value is multiplied with 1<<10, 1<<20, or + 1<<30, respectively. Floating point values are allowed and are + cast to integer before use. The idea is to be able to use things + like 1.5k instead of "1536". + + The string "inf" is returned as 0. + + In case of error, false is returned and memory pointed to by PLACE + remains unmodified. */ + +static bool +cmd_bytes (const char *com, const char *val, void *place) { - long result = 0; - const char *p = val; - - /* Search for digits and construct result. */ - for (; *p && ISDIGIT (*p); p++) - result = (10 * result) + (*p - '0'); - /* If no digits were found, or more than one character is following - them, bail out. */ - if (p == val || (*p != '\0' && *(p + 1) != '\0')) + double byte_value; + if (!parse_bytes_helper (val, &byte_value)) { - printf (_("%s: Invalid specification `%s'\n"), com, val); - return 0; + fprintf (stderr, _("%s: %s: Invalid byte value %s\n"), + exec_name, com, quote (val)); + return false; } - /* Search for a suffix. */ - switch (TOLOWER (*p)) + *(wgint *)place = (wgint)byte_value; + return true; +} + +/* Like cmd_bytes, but PLACE is interpreted as a pointer to + SIZE_SUM. It works by converting the string to double, therefore + working with values up to 2^53-1 without loss of precision. This + value (8192 TB) is large enough to serve for a while. */ + +static bool +cmd_bytes_sum (const char *com, const char *val, void *place) +{ + double byte_value; + if (!parse_bytes_helper (val, &byte_value)) { - case '\0': - /* None */ + fprintf (stderr, _("%s: %s: Invalid byte value %s\n"), + exec_name, com, quote (val)); + return false; + } + *(SUM_SIZE_INT *) place = (SUM_SIZE_INT) byte_value; + return true; +} + +/* Store the value of VAL to *OUT. The value is a time period, by + default expressed in seconds, but also accepting suffixes "m", "h", + "d", and "w" for minutes, hours, days, and weeks respectively. */ + +static bool +cmd_time (const char *com, const char *val, void *place) +{ + double number, mult; + const char *end = val + strlen (val); + + /* Strip trailing whitespace. */ + while (val < end && c_isspace (end[-1])) + --end; + + if (val == end) + { + err: + fprintf (stderr, _("%s: %s: Invalid time period %s\n"), + exec_name, com, quote (val)); + return false; + } + + switch (c_tolower (end[-1])) + { + case 's': + --end, mult = 1; /* seconds */ break; case 'm': - /* Minutes */ - result *= 60; + --end, mult = 60; /* minutes */ break; case 'h': - /* Seconds */ - result *= 3600; + --end, mult = 3600; /* hours */ break; case 'd': - /* Days (overflow on 16bit machines) */ - result *= 86400L; + --end, mult = 86400.0; /* days */ break; case 'w': - /* Weeks :-) */ - result *= 604800L; + --end, mult = 604800.0; /* weeks */ break; default: - printf (_("%s: Invalid specification `%s'\n"), com, val); - return 0; + /* Not a recognized suffix: assume it belongs to the number. + (If not, simple_atof will raise an error.) */ + mult = 1; } - *(long *)closure = result; - return 1; + + /* Skip leading and trailing whitespace. */ + while (val < end && c_isspace (*val)) + ++val; + while (val < end && c_isspace (end[-1])) + --end; + if (val == end) + goto err; + + if (!simple_atof (val, end, &number)) + goto err; + + *(double *)place = number * mult; + return true; } + +#ifdef HAVE_SSL +static bool +cmd_cert_type (const char *com, const char *val, void *place) +{ + static const struct decode_item choices[] = { + { "pem", keyfile_pem }, + { "der", keyfile_asn1 }, + { "asn1", keyfile_asn1 }, + }; + int ok = decode_string (val, choices, countof (choices), place); + if (!ok) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + return ok; +} +#endif /* Specialized helper functions, used by `commands' to handle some options specially. */ -static int check_user_specified_header PARAMS ((const char *)); +static bool check_user_specified_header (const char *); -static int -cmd_spec_dirstruct (const char *com, const char *val, void *closure) +static bool +cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored _GL_UNUSED) { if (!cmd_boolean (com, val, &opt.dirstruct)) - return 0; + return false; /* Since dirstruct behaviour is explicitly changed, no_dirstruct must be affected inversely. */ if (opt.dirstruct) - opt.no_dirstruct = 0; + opt.no_dirstruct = false; else - opt.no_dirstruct = 1; - return 1; + opt.no_dirstruct = true; + return true; } -static int -cmd_spec_header (const char *com, const char *val, void *closure) +static bool +cmd_spec_header (const char *com, const char *val, void *place_ignored _GL_UNUSED) { - if (!*val) + /* Empty value means reset the list of headers. */ + if (*val == '\0') { - /* Empty header means reset headers. */ - FREE_MAYBE (opt.user_header); - opt.user_header = NULL; + free_vec (opt.user_headers); + opt.user_headers = NULL; + return true; } - else + + if (!check_user_specified_header (val)) { - int i; + fprintf (stderr, _("%s: %s: Invalid header %s.\n"), + exec_name, com, quote (val)); + return false; + } + opt.user_headers = vec_append (opt.user_headers, val); + return true; +} - if (!check_user_specified_header (val)) - { - fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"), - exec_name, com, val); - return 0; - } - i = opt.user_header ? strlen (opt.user_header) : 0; - opt.user_header = (char *)xrealloc (opt.user_header, i + strlen (val) - + 2 + 1); - strcpy (opt.user_header + i, val); - i += strlen (val); - opt.user_header[i++] = '\r'; - opt.user_header[i++] = '\n'; - opt.user_header[i] = '\0'; +static bool +cmd_spec_warc_header (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + /* Empty value means reset the list of headers. */ + if (*val == '\0') + { + free_vec (opt.warc_user_headers); + opt.warc_user_headers = NULL; + return true; + } + + if (!check_user_specified_header (val)) + { + fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"), + exec_name, com, quote (val)); + return false; } - return 1; + opt.warc_user_headers = vec_append (opt.warc_user_headers, val); + return true; } -static int -cmd_spec_htmlify (const char *com, const char *val, void *closure) +static bool +cmd_spec_htmlify (const char *com, const char *val, void *place_ignored _GL_UNUSED) { int flag = cmd_boolean (com, val, &opt.htmlify); if (flag && !opt.htmlify) - opt.remove_listing = 0; + opt.remove_listing = false; return flag; } -static int -cmd_spec_mirror (const char *com, const char *val, void *closure) +/* Set the "mirror" mode. It means: recursive download, timestamping, + no limit on max. recursion depth, and don't remove listings. */ + +static bool +cmd_spec_mirror (const char *com, const char *val, void *place_ignored _GL_UNUSED) { int mirror; if (!cmd_boolean (com, val, &mirror)) - return 0; + return false; if (mirror) { - opt.recursive = 1; + opt.recursive = true; if (!opt.no_dirstruct) - opt.dirstruct = 1; - opt.timestamping = 1; + opt.dirstruct = true; + opt.timestamping = true; opt.reclevel = INFINITE_RECURSION; - opt.remove_listing = 0; + opt.remove_listing = false; } - return 1; + return true; } -static int -cmd_spec_progress (const char *com, const char *val, void *closure) +/* Validate --prefer-family and set the choice. Allowed values are + "IPv4", "IPv6", and "none". */ + +static bool +cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + static const struct decode_item choices[] = { + { "IPv4", prefer_ipv4 }, + { "IPv6", prefer_ipv6 }, + { "none", prefer_none }, + }; + int prefer_family = prefer_none; + int ok = decode_string (val, choices, countof (choices), &prefer_family); + if (!ok) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + opt.prefer_family = prefer_family; + return ok; +} + +/* Set progress.type to VAL, but verify that it's a valid progress + implementation before that. */ + +static bool +cmd_spec_progress (const char *com, const char *val, void *place_ignored _GL_UNUSED) { if (!valid_progress_implementation_p (val)) { - fprintf (stderr, _("%s: %s: Invalid progress type `%s'.\n"), - exec_name, com, val); - return 0; + fprintf (stderr, _("%s: %s: Invalid progress type %s.\n"), + exec_name, com, quote (val)); + return false; } - FREE_MAYBE (opt.progress_type); + xfree_null (opt.progress_type); /* Don't call set_progress_implementation here. It will be called in main() when it becomes clear what the log output is. */ opt.progress_type = xstrdup (val); - return 1; + return true; } -static int -cmd_spec_recursive (const char *com, const char *val, void *closure) +/* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is + set to true, also set opt.dirstruct to true, unless opt.no_dirstruct + is specified. */ + +static bool +cmd_spec_recursive (const char *com, const char *val, void *place_ignored _GL_UNUSED) { if (!cmd_boolean (com, val, &opt.recursive)) - return 0; + return false; else { if (opt.recursive && !opt.no_dirstruct) - opt.dirstruct = 1; + opt.dirstruct = true; } - return 1; + return true; } -static int -cmd_spec_restrict_file_names (const char *com, const char *val, void *closure) +/* Validate --regex-type and set the choice. */ + +static bool +cmd_spec_regex_type (const char *com, const char *val, void *place_ignored _GL_UNUSED) { - /* The currently accepted values are `none', `unix', and - `windows'. */ - if (0 == strcasecmp (val, "none")) - opt.restrict_file_names = restrict_none; - else if (0 == strcasecmp (val, "unix")) - opt.restrict_file_names = restrict_shell; - else if (0 == strcasecmp (val, "windows")) - opt.restrict_file_names = restrict_windows; - else + static const struct decode_item choices[] = { + { "posix", regex_type_posix }, +#ifdef HAVE_LIBPCRE + { "pcre", regex_type_pcre }, +#endif + }; + int regex_type = regex_type_posix; + int ok = decode_string (val, choices, countof (choices), ®ex_type); + if (!ok) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + opt.regex_type = regex_type; + return ok; +} + +static bool +cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + int restrict_os = opt.restrict_files_os; + int restrict_ctrl = opt.restrict_files_ctrl; + int restrict_case = opt.restrict_files_case; + int restrict_nonascii = opt.restrict_files_nonascii; + + const char *end; + +#define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal) + + do { - fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"), - exec_name, com, val); - return 0; + end = strchr (val, ','); + if (!end) + end = val + strlen (val); + + if (VAL_IS ("unix")) + restrict_os = restrict_unix; + else if (VAL_IS ("windows")) + restrict_os = restrict_windows; + else if (VAL_IS ("lowercase")) + restrict_case = restrict_lowercase; + else if (VAL_IS ("uppercase")) + restrict_case = restrict_uppercase; + else if (VAL_IS ("nocontrol")) + restrict_ctrl = false; + else if (VAL_IS ("ascii")) + restrict_nonascii = true; + else + { + fprintf (stderr, _("\ +%s: %s: Invalid restriction %s,\n\ + use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"), + exec_name, com, quote (val)); + return false; + } + + if (*end) + val = end + 1; } - return 1; + while (*val && *end); + +#undef VAL_IS + + opt.restrict_files_os = restrict_os; + opt.restrict_files_ctrl = restrict_ctrl; + opt.restrict_files_case = restrict_case; + opt.restrict_files_nonascii = restrict_nonascii; + + return true; } -static int -cmd_spec_useragent (const char *com, const char *val, void *closure) +static bool +cmd_spec_report_speed (const char *com, const char *val, void *place_ignored _GL_UNUSED) { - /* Just check for empty string and newline, so we don't throw total - junk to the server. */ - if (!*val || strchr (val, '\n')) + opt.report_bps = strcasecmp (val, "bits") == 0; + if (!opt.report_bps) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + return opt.report_bps; +} + +#ifdef HAVE_SSL +static bool +cmd_spec_secure_protocol (const char *com, const char *val, void *place) +{ + static const struct decode_item choices[] = { + { "auto", secure_protocol_auto }, + { "sslv2", secure_protocol_sslv2 }, + { "sslv3", secure_protocol_sslv3 }, + { "tlsv1", secure_protocol_tlsv1 }, + { "pfs", secure_protocol_pfs }, + }; + int ok = decode_string (val, choices, countof (choices), place); + if (!ok) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + return ok; +} +#endif + +/* Set all three timeout values. */ + +static bool +cmd_spec_timeout (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + double value; + if (!cmd_time (com, val, &value)) + return false; + opt.read_timeout = value; + opt.connect_timeout = value; + opt.dns_timeout = value; + return true; +} + +static bool +cmd_spec_useragent (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + /* Disallow embedded newlines. */ + if (strchr (val, '\n')) { - fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"), - exec_name, com, val); - return 0; + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), + exec_name, com, quote (val)); + return false; } + xfree_null (opt.useragent); opt.useragent = xstrdup (val); - return 1; + return true; +} + +/* The "verbose" option cannot be cmd_boolean because the variable is + not bool -- it's of type int (-1 means uninitialized because of + some random hackery for disallowing -q -v). */ + +static bool +cmd_spec_verbose (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + bool flag; + if (cmd_boolean (com, val, &flag)) + { + opt.verbose = flag; + return true; + } + return false; } /* Miscellaneous useful routines. */ -/* Return the integer value of a positive integer written in S, or -1 - if an error was encountered. */ -static int -myatoi (const char *s) +/* A very simple atoi clone, more useful than atoi because it works on + delimited strings, and has error reportage. Returns true on success, + false on failure. If successful, stores result to *DEST. */ + +static bool +simple_atoi (const char *beg, const char *end, int *dest) { - int res; - const char *orig = s; + int result = 0; + bool negative = false; + const char *p = beg; - for (res = 0; *s && ISDIGIT (*s); s++) - res = 10 * res + (*s - '0'); - if (*s || orig == s) - return -1; + while (p < end && c_isspace (*p)) + ++p; + if (p < end && (*p == '-' || *p == '+')) + { + negative = (*p == '-'); + ++p; + } + if (p == end) + return false; + + /* Read negative numbers in a separate loop because the most + negative integer cannot be represented as a positive number. */ + + if (!negative) + for (; p < end && c_isdigit (*p); p++) + { + int next = (10 * result) + (*p - '0'); + if (next < result) + return false; /* overflow */ + result = next; + } else - return res; + for (; p < end && c_isdigit (*p); p++) + { + int next = (10 * result) - (*p - '0'); + if (next > result) + return false; /* underflow */ + result = next; + } + + if (p != end) + return false; + + *dest = result; + return true; } -#define ISODIGIT(x) ((x) >= '0' && (x) <= '7') +/* Trivial atof, with error reporting. Handles "[.]", + doesn't handle exponential notation. Returns true on success, + false on failure. In case of success, stores its result to + *DEST. */ -static int +static bool +simple_atof (const char *beg, const char *end, double *dest) +{ + double result = 0; + + bool negative = false; + bool seen_dot = false; + bool seen_digit = false; + double divider = 1; + + const char *p = beg; + + while (p < end && c_isspace (*p)) + ++p; + if (p < end && (*p == '-' || *p == '+')) + { + negative = (*p == '-'); + ++p; + } + + for (; p < end; p++) + { + char ch = *p; + if (c_isdigit (ch)) + { + if (!seen_dot) + result = (10 * result) + (ch - '0'); + else + result += (ch - '0') / (divider *= 10); + seen_digit = true; + } + else if (ch == '.') + { + if (!seen_dot) + seen_dot = true; + else + return false; + } + else + return false; + } + if (!seen_digit) + return false; + if (negative) + result = -result; + + *dest = result; + return true; +} + +/* Verify that the user-specified header in S is valid. It must + contain a colon preceded by non-white-space characters and must not + contain newlines. */ + +static bool check_user_specified_header (const char *s) { const char *p; - for (p = s; *p && *p != ':' && !ISSPACE (*p); p++); + for (p = s; *p && *p != ':' && !c_isspace (*p); p++) + ; /* The header MUST contain `:' preceded by at least one non-whitespace character. */ if (*p != ':' || p == s) - return 0; + return false; /* The header MUST NOT contain newlines. */ if (strchr (s, '\n')) - return 0; - return 1; + return false; + return true; } - -void cleanup_html_url PARAMS ((void)); -void res_cleanup PARAMS ((void)); -void downloaded_files_free PARAMS ((void)); -void http_cleanup PARAMS ((void)); +/* Decode VAL into a number, according to ITEMS. */ + +static bool +decode_string (const char *val, const struct decode_item *items, int itemcount, + int *place) +{ + int i; + for (i = 0; i < itemcount; i++) + if (0 == strcasecmp (val, items[i].name)) + { + *place = items[i].code; + return true; + } + return false; +} /* Free the memory allocated by global variables. */ void @@ -1100,8 +1704,18 @@ cleanup (void) { /* Free external resources, close files, etc. */ - if (opt.dfp) - fclose (opt.dfp); + /* Close WARC file. */ + if (opt.warc_filename != 0) + warc_close (); + + log_close (); + + if (output_stream) + if (fclose (output_stream) == EOF) + inform_exit_status (CLOSEFAILED); + + /* No need to check for error because Wget flushes its output (and + checks for errors) after any data arrives. */ /* We're exiting anyway so there's no real need to call free() hundreds of times. Skipping the frees will make Wget exit @@ -1112,22 +1726,23 @@ cleanup (void) memory which grows with the size of the program. */ #ifdef DEBUG_MALLOC - recursive_cleanup (); + convert_cleanup (); res_cleanup (); http_cleanup (); cleanup_html_url (); - downloaded_files_free (); + spider_cleanup (); host_cleanup (); - cookie_jar_delete (wget_cookie_jar); + log_cleanup (); + netrc_cleanup (netrc_list); - { - extern acc_t *netrc_list; - free_netrc (netrc_list); - } - FREE_MAYBE (opt.lfilename); - xfree (opt.dir_prefix); - FREE_MAYBE (opt.input_filename); - FREE_MAYBE (opt.output_document); + for (i = 0; i < nurl; i++) + xfree (url[i]); + + xfree_null (opt.choose_config); + xfree_null (opt.lfilename); + xfree_null (opt.dir_prefix); + xfree_null (opt.input_filename); + xfree_null (opt.output_document); free_vec (opt.accepts); free_vec (opt.rejects); free_vec (opt.excludes); @@ -1135,24 +1750,97 @@ cleanup (void) free_vec (opt.domains); free_vec (opt.follow_tags); free_vec (opt.ignore_tags); - FREE_MAYBE (opt.progress_type); - xfree (opt.ftp_acc); - FREE_MAYBE (opt.ftp_pass); - FREE_MAYBE (opt.ftp_proxy); - FREE_MAYBE (opt.https_proxy); - FREE_MAYBE (opt.http_proxy); + xfree_null (opt.progress_type); + xfree_null (opt.ftp_user); + xfree_null (opt.ftp_passwd); + xfree_null (opt.ftp_proxy); + xfree_null (opt.https_proxy); + xfree_null (opt.http_proxy); free_vec (opt.no_proxy); - FREE_MAYBE (opt.useragent); - FREE_MAYBE (opt.referer); - FREE_MAYBE (opt.http_user); - FREE_MAYBE (opt.http_passwd); - FREE_MAYBE (opt.user_header); -#ifdef HAVE_SSL - FREE_MAYBE (opt.sslcertkey); - FREE_MAYBE (opt.sslcertfile); -#endif /* HAVE_SSL */ - FREE_MAYBE (opt.bind_address); - FREE_MAYBE (opt.cookies_input); - FREE_MAYBE (opt.cookies_output); -#endif + xfree_null (opt.useragent); + xfree_null (opt.referer); + xfree_null (opt.http_user); + xfree_null (opt.http_passwd); + free_vec (opt.user_headers); + free_vec (opt.warc_user_headers); +# ifdef HAVE_SSL + xfree_null (opt.cert_file); + xfree_null (opt.private_key); + xfree_null (opt.ca_directory); + xfree_null (opt.ca_cert); + xfree_null (opt.random_file); + xfree_null (opt.egd_file); +# endif + xfree_null (opt.bind_address); + xfree_null (opt.cookies_input); + xfree_null (opt.cookies_output); + xfree_null (opt.user); + xfree_null (opt.passwd); + xfree_null (opt.base_href); + xfree_null (opt.method); + +#endif /* DEBUG_MALLOC */ +} + +/* Unit testing routines. */ + +#ifdef TESTING + +const char * +test_commands_sorted(void) +{ + unsigned i; + + for (i = 1; i < countof(commands); ++i) + { + if (strcasecmp (commands[i - 1].name, commands[i].name) > 0) + { + mu_assert ("FAILED", false); + break; + } + } + return NULL; } + +const char * +test_cmd_spec_restrict_file_names(void) +{ + unsigned i; + static const struct { + const char *val; + int expected_restrict_files_os; + int expected_restrict_files_ctrl; + int expected_restrict_files_case; + bool result; + } test_array[] = { + { "windows", restrict_windows, true, restrict_no_case_restriction, true }, + { "windows,", restrict_windows, true, restrict_no_case_restriction, true }, + { "windows,lowercase", restrict_windows, true, restrict_lowercase, true }, + { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true }, + }; + + for (i = 0; i < countof(test_array); ++i) + { + bool res; + + defaults(); + res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL); + + /* + fprintf (stderr, "test_cmd_spec_restrict_file_names: TEST %d\n", i); fflush (stderr); + fprintf (stderr, "opt.restrict_files_os: %d\n", opt.restrict_files_os); fflush (stderr); + fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr); + fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr); + */ + mu_assert ("test_cmd_spec_restrict_file_names: wrong result", + res == test_array[i].result + && opt.restrict_files_os == test_array[i].expected_restrict_files_os + && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl + && opt.restrict_files_case == test_array[i].expected_restrict_files_case); + } + + return NULL; +} + +#endif /* TESTING */ +