X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Finit.c;h=688b3cf00aeef4556125ac1d55bf4cb133490456;hp=874e05a9162a6090493b72bea1a70e30397a11b5;hb=5f0a2b3f0846dd4c2f72fc62e7171200d1fd6e06;hpb=b3e2c0ff97765cc7a44d840d1eb94447a2ea125f diff --git a/src/init.c b/src/init.c index 874e05a9..688b3cf0 100644 --- a/src/init.c +++ b/src/init.c @@ -1,26 +1,36 @@ /* Reading/parsing the initialization file. - Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. + Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2003 + Free Software Foundation, Inc. -This file is part of Wget. +This file is part of GNU Wget. -This program is free software; you can redistribute it and/or modify +GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. -This program is distributed in the hope that it will be useful, +GNU Wget is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +along with Wget; if not, write to the Free Software +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +In addition, as a special exception, the Free Software Foundation +gives permission to link the code of its release of Wget with the +OpenSSL project's "OpenSSL" library (or with modified versions of it +that use the same license as the "OpenSSL" library), and distribute +the linked executables. You must obey the GNU General Public License +in all respects for all of the code used other than "OpenSSL". If you +modify this file, you may extend this exception to your version of the +file, but you are not obligated to do so. If you do not wish to do +so, delete this exception statement from your version. */ #include #include -#include #include #include #ifdef HAVE_UNISTD_H @@ -33,47 +43,71 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #endif #include +#ifdef WINDOWS +# include +#else +# include +# include +#ifndef __BEOS__ +# include +#endif +#endif + #ifdef HAVE_PWD_H -#include +# include #endif +#include #include "wget.h" #include "utils.h" #include "init.h" #include "host.h" -#include "recur.h" #include "netrc.h" +#include "cookies.h" /* for cookie_jar_delete */ +#include "progress.h" #ifndef errno extern int errno; #endif +extern struct cookie_jar *wget_cookie_jar; + +/* We want tilde expansion enabled only when reading `.wgetrc' lines; + otherwise, it will be performed by the shell. This variable will + be set by the wgetrc-reading function. */ + +static int enable_tilde_expansion; + #define CMD_DECLARE(func) static int func \ PARAMS ((const char *, const char *, void *)) CMD_DECLARE (cmd_boolean); -CMD_DECLARE (cmd_boolean); +CMD_DECLARE (cmd_bytes); +CMD_DECLARE (cmd_bytes_large); +CMD_DECLARE (cmd_directory_vector); +CMD_DECLARE (cmd_lockable_boolean); CMD_DECLARE (cmd_number); CMD_DECLARE (cmd_number_inf); CMD_DECLARE (cmd_string); -CMD_DECLARE (cmd_vector); -CMD_DECLARE (cmd_directory_vector); -CMD_DECLARE (cmd_bytes); +CMD_DECLARE (cmd_file); +CMD_DECLARE (cmd_directory); CMD_DECLARE (cmd_time); +CMD_DECLARE (cmd_vector); CMD_DECLARE (cmd_spec_dirstruct); -CMD_DECLARE (cmd_spec_dotstyle); CMD_DECLARE (cmd_spec_header); CMD_DECLARE (cmd_spec_htmlify); CMD_DECLARE (cmd_spec_mirror); -CMD_DECLARE (cmd_spec_outputdocument); +CMD_DECLARE (cmd_spec_progress); CMD_DECLARE (cmd_spec_recursive); +CMD_DECLARE (cmd_spec_restrict_file_names); +CMD_DECLARE (cmd_spec_timeout); CMD_DECLARE (cmd_spec_useragent); /* List of recognized commands, each consisting of name, closure and function. When adding a new command, simply add it to the list, but be sure to keep the - list sorted alphabetically, as comind() depends on it. Also, be sure to add + list sorted alphabetically, as findcmd() depends on it. Also, be sure to add any entries that allocate memory (e.g. cmd_string and cmd_vector guys) to the cleanup() function below. */ static struct { @@ -88,21 +122,29 @@ static struct { { "backupconverted", &opt.backup_converted, cmd_boolean }, { "backups", &opt.backups, cmd_number }, { "base", &opt.base_href, cmd_string }, - { "cache", &opt.proxy_cache, cmd_boolean }, + { "bindaddress", &opt.bind_address, cmd_string }, + { "cache", &opt.allow_cache, cmd_boolean }, + { "connecttimeout", &opt.connect_timeout, cmd_time }, { "continue", &opt.always_rest, cmd_boolean }, { "convertlinks", &opt.convert_links, cmd_boolean }, + { "cookies", &opt.cookies, cmd_boolean }, { "cutdirs", &opt.cut_dirs, cmd_number }, -#ifdef DEBUG +#ifdef ENABLE_DEBUG { "debug", &opt.debug, cmd_boolean }, #endif { "deleteafter", &opt.delete_after, cmd_boolean }, - { "dirprefix", &opt.dir_prefix, cmd_string }, + { "dirprefix", &opt.dir_prefix, cmd_directory }, { "dirstruct", NULL, cmd_spec_dirstruct }, + { "dnscache", &opt.dns_cache, cmd_boolean }, + { "dnstimeout", &opt.dns_timeout, cmd_time }, { "domains", &opt.domains, cmd_vector }, { "dotbytes", &opt.dot_bytes, cmd_bytes }, { "dotsinline", &opt.dots_in_line, cmd_number }, { "dotspacing", &opt.dot_spacing, cmd_number }, - { "dotstyle", NULL, cmd_spec_dotstyle }, + { "dotstyle", &opt.dot_style, cmd_string }, +#ifdef HAVE_SSL + { "egdfile", &opt.sslegdsock, cmd_file }, +#endif { "excludedirectories", &opt.excludes, cmd_directory_vector }, { "excludedomains", &opt.exclude_domains, cmd_vector }, { "followftp", &opt.follow_ftp, cmd_boolean }, @@ -113,15 +155,19 @@ static struct { { "header", NULL, cmd_spec_header }, { "htmlextension", &opt.html_extension, cmd_boolean }, { "htmlify", NULL, cmd_spec_htmlify }, + { "httpkeepalive", &opt.http_keep_alive, cmd_boolean }, { "httppasswd", &opt.http_passwd, cmd_string }, { "httpproxy", &opt.http_proxy, cmd_string }, + { "httpsproxy", &opt.https_proxy, cmd_string }, { "httpuser", &opt.http_user, cmd_string }, { "ignorelength", &opt.ignore_length, cmd_boolean }, { "ignoretags", &opt.ignore_tags, cmd_vector }, { "includedirectories", &opt.includes, cmd_directory_vector }, - { "input", &opt.input_filename, cmd_string }, + { "input", &opt.input_filename, cmd_file }, { "killlonger", &opt.kill_longer, cmd_boolean }, - { "logfile", &opt.lfilename, cmd_string }, + { "limitrate", &opt.limit_rate, cmd_bytes }, + { "loadcookies", &opt.cookies_input, cmd_file }, + { "logfile", &opt.lfilename, cmd_file }, { "login", &opt.ftp_acc, cmd_string }, { "mirror", NULL, cmd_spec_mirror }, { "netrc", &opt.netrc, cmd_boolean }, @@ -129,28 +175,45 @@ static struct { { "noparent", &opt.no_parent, cmd_boolean }, { "noproxy", &opt.no_proxy, cmd_vector }, { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/ - { "outputdocument", NULL, cmd_spec_outputdocument }, + { "outputdocument", &opt.output_document, cmd_file }, { "pagerequisites", &opt.page_requisites, cmd_boolean }, - { "passiveftp", &opt.ftp_pasv, cmd_boolean }, + { "passiveftp", &opt.ftp_pasv, cmd_lockable_boolean }, { "passwd", &opt.ftp_pass, cmd_string }, + { "postdata", &opt.post_data, cmd_string }, + { "postfile", &opt.post_file_name, cmd_file }, + { "progress", &opt.progress_type, cmd_spec_progress }, { "proxypasswd", &opt.proxy_passwd, cmd_string }, { "proxyuser", &opt.proxy_user, cmd_string }, { "quiet", &opt.quiet, cmd_boolean }, - { "quota", &opt.quota, cmd_bytes }, + { "quota", &opt.quota, cmd_bytes_large }, + { "randomwait", &opt.random_wait, cmd_boolean }, + { "readtimeout", &opt.read_timeout, cmd_time }, { "reclevel", &opt.reclevel, cmd_number_inf }, { "recursive", NULL, cmd_spec_recursive }, { "referer", &opt.referer, cmd_string }, { "reject", &opt.rejects, cmd_vector }, { "relativeonly", &opt.relative_only, cmd_boolean }, { "removelisting", &opt.remove_listing, cmd_boolean }, + { "restrictfilenames", NULL, cmd_spec_restrict_file_names }, { "retrsymlinks", &opt.retr_symlinks, cmd_boolean }, + { "retryconnrefused", &opt.retry_connrefused, cmd_boolean }, { "robots", &opt.use_robots, cmd_boolean }, + { "savecookies", &opt.cookies_output, cmd_file }, { "saveheaders", &opt.save_headers, cmd_boolean }, { "serverresponse", &opt.server_response, cmd_boolean }, - { "simplehostcheck", &opt.simple_check, cmd_boolean }, { "spanhosts", &opt.spanhost, cmd_boolean }, { "spider", &opt.spider, cmd_boolean }, - { "timeout", &opt.timeout, cmd_time }, +#ifdef HAVE_SSL + { "sslcadir", &opt.sslcadir, cmd_directory }, + { "sslcafile", &opt.sslcafile, cmd_file }, + { "sslcertfile", &opt.sslcertfile, cmd_file }, + { "sslcertkey", &opt.sslcertkey, cmd_file }, + { "sslcerttype", &opt.sslcerttype, cmd_number }, + { "sslcheckcert", &opt.sslcheckcert, cmd_number }, + { "sslprotocol", &opt.sslprotocol, cmd_number }, +#endif /* HAVE_SSL */ + { "strictcomments", &opt.strict_comments, cmd_boolean }, + { "timeout", NULL, cmd_spec_timeout }, { "timestamping", &opt.timestamping, cmd_boolean }, { "tries", &opt.ntry, cmd_number_inf }, { "useproxy", &opt.use_proxy, cmd_boolean }, @@ -160,25 +223,25 @@ static struct { { "waitretry", &opt.waitretry, cmd_time } }; -/* Return index of COM if it is a valid command, or -1 otherwise. COM - is looked up in `commands' using binary search algorithm. */ +/* Look up COM in the commands[] array and return its index. If COM + is not found, -1 is returned. This function uses binary search. */ + static int -comind (const char *com) +findcmd (const char *com) { - int min = 0, max = ARRAY_SIZE (commands); + int lo = 0, hi = countof (commands) - 1; - do + while (lo <= hi) { - int i = (min + max) / 2; - int cmp = strcasecmp (com, commands[i].name); - if (cmp == 0) - return i; - else if (cmp < 0) - max = i - 1; + int mid = (lo + hi) >> 1; + int cmp = strcasecmp (com, commands[mid].name); + if (cmp < 0) + hi = mid - 1; + else if (cmp > 0) + lo = mid + 1; else - min = i + 1; + return mid; } - while (min <= max); return -1; } @@ -193,27 +256,26 @@ defaults (void) NULL this way is technically illegal, but porting Wget to a machine where NULL is not all-zero bit pattern will be the least of the implementors' worries. */ - memset (&opt, 0, sizeof (opt)); + xzero (opt); + opt.cookies = 1; opt.verbose = -1; - opt.dir_prefix = xstrdup ("."); opt.ntry = 20; opt.reclevel = 5; opt.add_hostdir = 1; - opt.ftp_acc = xstrdup ("anonymous"); - /*opt.ftp_pass = xstrdup (ftp_getaddress ());*/ + opt.ftp_acc = xstrdup ("anonymous"); + opt.ftp_pass = xstrdup ("-wget@"); opt.netrc = 1; opt.ftp_glob = 1; opt.htmlify = 1; + opt.http_keep_alive = 1; opt.use_proxy = 1; tmp = getenv ("no_proxy"); if (tmp) opt.no_proxy = sepstring (tmp); - opt.proxy_cache = 1; + opt.allow_cache = 1; -#ifdef HAVE_SELECT - opt.timeout = 900; -#endif + opt.read_timeout = 900; opt.use_robots = 1; opt.remove_listing = 1; @@ -221,6 +283,16 @@ defaults (void) opt.dot_bytes = 1024; opt.dot_spacing = 10; opt.dots_in_line = 50; + + opt.dns_cache = 1; + + /* The default for file name restriction defaults to the OS type. */ +#if !defined(WINDOWS) && !defined(__CYGWIN__) + opt.restrict_files_os = restrict_unix; +#else + opt.restrict_files_os = restrict_windows; +#endif + opt.restrict_files_ctrl = 1; } /* Return the user's home directory (strdup-ed), or NULL if none is @@ -266,7 +338,8 @@ wgetrc_file_name (void) { if (!file_exists_p (env)) { - fprintf (stderr, "%s: %s: %s.\n", exec_name, file, strerror (errno)); + fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"), + exec_name, env); exit (1); } return xstrdup (env); @@ -300,13 +373,17 @@ wgetrc_file_name (void) return NULL; if (!file_exists_p (file)) { - free (file); + xfree (file); return NULL; } return file; } -/* Initialize variables from a wgetrc file */ +static int parse_line PARAMS ((const char *, char **, char **, int *)); +static int setval_internal PARAMS ((int, const char *, const char *)); + +/* Initialize variables from a wgetrc file. */ + static void run_wgetrc (const char *file) { @@ -321,33 +398,31 @@ run_wgetrc (const char *file) file, strerror (errno)); return; } - /* Reset line number. */ + enable_tilde_expansion = 1; ln = 1; while ((line = read_whole_line (fp))) { char *com, *val; - int status; - int length = strlen (line); + int comind, status; - if (length && line[length - 1] == '\r') - line[length - 1] = '\0'; /* Parse the line. */ - status = parse_line (line, &com, &val); - free (line); + status = parse_line (line, &com, &val, &comind); + xfree (line); /* If everything is OK, set the value. */ if (status == 1) { - if (!setval (com, val)) + if (!setval_internal (comind, com, val)) fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name, file, ln); - free (com); - free (val); + xfree (com); + xfree (val); } else if (status == 0) fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name, file, ln); ++ln; } + enable_tilde_expansion = 0; fclose (fp); } @@ -370,8 +445,8 @@ initialize (void) file = wgetrc_file_name (); if (!file) return; - /* #### We should somehow canonicalize `file' and SYSTEM_WGETRC, - really. */ + /* #### We should canonicalize `file' and SYSTEM_WGETRC with + something like realpath() before comparing them with `strcmp' */ #ifdef SYSTEM_WGETRC if (!strcmp (file, SYSTEM_WGETRC)) { @@ -382,98 +457,160 @@ initialize (void) else #endif run_wgetrc (file); - free (file); + xfree (file); return; } +/* Remove dashes and underscores from S, modifying S in the + process. */ + +static void +dehyphen (char *s) +{ + char *t = s; /* t - tortoise */ + char *h = s; /* h - hare */ + while (*h) + if (*h == '_' || *h == '-') + ++h; + else + *t++ = *h++; + *t = '\0'; +} + /* Parse the line pointed by line, with the syntax: - * command * = * value + * command * = * value * Uses malloc to allocate space for command and value. If the line is invalid, data is freed and 0 is returned. Return values: 1 - success - 0 - failure - -1 - empty */ -int -parse_line (const char *line, char **com, char **val) + 0 - error + -1 - empty + + In case of success, *COM and *VAL point to freshly allocated + strings, and *COMIND points to com's index. In case of error or + empty line, those values are unaffected. */ + +static int +parse_line (const char *line, char **com, char **val, int *comind) { - const char *p = line; - const char *orig_comptr, *end; - char *new_comptr; + const char *p; + const char *end = line + strlen (line); + const char *cmdstart, *cmdend; + const char *valstart, *valend; - /* Skip spaces. */ - while (*p == ' ' || *p == '\t') - ++p; + char *cmdcopy; + int ind; + + /* Skip leading and trailing whitespace. */ + while (*line && ISSPACE (*line)) + ++line; + while (end > line && ISSPACE (end[-1])) + --end; - /* Don't process empty lines. */ - if (!*p || *p == '\n' || *p == '#') + /* Skip empty lines and comments. */ + if (!*line || *line == '#') return -1; - for (orig_comptr = p; ISALPHA (*p) || *p == '_' || *p == '-'; p++) - ; - /* The next char should be space or '='. */ - if (!ISSPACE (*p) && (*p != '=')) + p = line; + + cmdstart = p; + while (p < end && (ISALPHA (*p) || *p == '_' || *p == '-')) + ++p; + cmdend = p; + + /* Skip '=', as well as any space before or after it. */ + while (p < end && ISSPACE (*p)) + ++p; + if (p == end || *p != '=') return 0; - *com = (char *)xmalloc (p - orig_comptr + 1); - for (new_comptr = *com; orig_comptr < p; orig_comptr++) - { - if (*orig_comptr == '_' || *orig_comptr == '-') - continue; - *new_comptr++ = *orig_comptr; - } - *new_comptr = '\0'; - /* If the command is invalid, exit now. */ - if (comind (*com) == -1) - { - free (*com); - return 0; - } + ++p; + while (p < end && ISSPACE (*p)) + ++p; - /* Skip spaces before '='. */ - for (; ISSPACE (*p); p++); - /* If '=' not found, bail out. */ - if (*p != '=') - { - free (*com); - return 0; - } - /* Skip spaces after '='. */ - for (++p; ISSPACE (*p); p++); - /* Get the ending position. */ - for (end = p; *end && *end != '\n'; end++); - /* Allocate *val, and copy from line. */ - *val = strdupdelim (p, end); + valstart = p; + valend = end; + + /* The line now known to be syntactically correct. Check whether + the command is valid. */ + BOUNDED_TO_ALLOCA (cmdstart, cmdend, cmdcopy); + dehyphen (cmdcopy); + ind = findcmd (cmdcopy); + if (ind == -1) + return 0; + + /* The command is valid. Now fill in the values and report success + to the caller. */ + *comind = ind; + *com = strdupdelim (cmdstart, cmdend); + *val = strdupdelim (valstart, valend); return 1; } -/* Set COM to VAL. This is the meat behind processing `.wgetrc'. No - fatals -- error signal prints a warning and resets to default - value. All error messages are printed to stderr, *not* to - opt.lfile, since opt.lfile wasn't even generated yet. */ -int -setval (const char *com, const char *val) +/* Run commands[comind].action. */ + +static int +setval_internal (int comind, const char *com, const char *val) { - int ind; + assert (0 <= comind && comind < countof (commands)); + return ((*commands[comind].action) (com, val, commands[comind].closure)); +} - if (!com || !val) - return 0; - ind = comind (com); - if (ind == -1) +/* Run command COM with value VAL. If running the command produces an + error, report the error and exit. + + This is intended to be called from main() to modify Wget's behavior + through command-line switches. Since COM is hard-coded in main(), + it is not canonicalized, and this aborts when COM is not found. + + If COMIND's are exported to init.h, this function will be changed + to accept COMIND directly. */ + +void +setoptval (const char *com, const char *val) +{ + if (!setval_internal (findcmd (com), com, val)) + exit (2); +} + +/* Parse OPT into command and value and run it. For example, + run_command("foo=bar") is equivalent to setoptval("foo", "bar"). + This is used by the `--execute' flag in main.c. */ + +void +run_command (const char *opt) +{ + char *com, *val; + int comind; + int status = parse_line (opt, &com, &val, &comind); + if (status == 1) { - /* #### Should I just abort()? */ -#ifdef DEBUG - fprintf (stderr, _("%s: BUG: unknown command `%s', value `%s'.\n"), - exec_name, com, val); -#endif - return 0; + if (!setval_internal (comind, com, val)) + exit (2); + xfree (com); + xfree (val); + } + else if (status == 0) + { + fprintf (stderr, _("%s: Invalid --execute command `%s'\n"), + exec_name, opt); + exit (2); } - return ((*commands[ind].action) (com, val, commands[ind].closure)); } /* Generic helper functions, for use with `commands'. */ -static int myatoi PARAMS ((const char *s)); +#define CMP1(p, c0) (TOLOWER((p)[0]) == (c0) && (p)[1] == '\0') + +#define CMP2(p, c0, c1) (TOLOWER((p)[0]) == (c0) \ + && TOLOWER((p)[1]) == (c1) \ + && (p)[2] == '\0') + +#define CMP3(p, c0, c1, c2) (TOLOWER((p)[0]) == (c0) \ + && TOLOWER((p)[1]) == (c1) \ + && TOLOWER((p)[2]) == (c2) \ + && (p)[3] == '\0') + /* Store the boolean value from VAL to CLOSURE. COM is ignored, except for error messages. */ @@ -482,16 +619,17 @@ cmd_boolean (const char *com, const char *val, void *closure) { int bool_value; - if (!strcasecmp (val, "on") - || (*val == '1' && !*(val + 1))) + if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1')) + /* "on", "yes" and "1" mean true. */ bool_value = 1; - else if (!strcasecmp (val, "off") - || (*val == '0' && !*(val + 1))) + else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0')) + /* "off", "no" and "0" mean false. */ bool_value = 0; else { - fprintf (stderr, _("%s: %s: Please specify on or off.\n"), - exec_name, com); + fprintf (stderr, + _("%s: %s: Invalid boolean `%s', use `on' or `off'.\n"), + exec_name, com, val); return 0; } @@ -499,20 +637,62 @@ cmd_boolean (const char *com, const char *val, void *closure) return 1; } +/* Store the lockable_boolean {2, 1, 0, -1} value from VAL to CLOSURE. + COM is ignored, except for error messages. Values 2 and -1 + indicate that once defined, the value may not be changed by + successive wgetrc files or command-line arguments. + + Values: 2 - Enable a particular option for good ("always") + 1 - Enable an option ("on") + 0 - Disable an option ("off") + -1 - Disable an option for good ("never") */ +static int +cmd_lockable_boolean (const char *com, const char *val, void *closure) +{ + int lockable_boolean_value; + + int oldval = *(int *)closure; + + /* + * If a config file said "always" or "never", don't allow command line + * arguments to override the config file. + */ + if (oldval == -1 || oldval == 2) + return 1; + + if (0 == strcasecmp (val, "always") || CMP1 (val, '2')) + lockable_boolean_value = 2; + else if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1')) + lockable_boolean_value = 1; + else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0')) + lockable_boolean_value = 0; + else if (0 == strcasecmp (val, "never") || CMP2 (val, '-', '1')) + lockable_boolean_value = -1; + else + { + fprintf (stderr, + _("%s: %s: Invalid boolean `%s', use always, on, off, or never.\n"), + exec_name, com, val); + return 0; + } + + *(int *)closure = lockable_boolean_value; + return 1; +} + +static int simple_atoi PARAMS ((const char *, const char *, int *)); + /* Set the non-negative integer value from VAL to CLOSURE. With incorrect specification, the number remains unchanged. */ static int cmd_number (const char *com, const char *val, void *closure) { - int num = myatoi (val); - - if (num == -1) + if (!simple_atoi (val, val + strlen (val), closure)) { - fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"), + fprintf (stderr, _("%s: %s: Invalid number `%s'.\n"), exec_name, com, val); return 0; } - *(int *)closure = num; return 1; } @@ -540,9 +720,89 @@ cmd_string (const char *com, const char *val, void *closure) return 1; } -/* Merge the vector (array of strings separated with `,') in COM with - the vector (NULL-terminated array of strings) pointed to by - CLOSURE. */ +#ifndef WINDOWS +# define ISSEP(c) ((c) == '/') +#else +# define ISSEP(c) ((c) == '/' || (c) == '\\') +#endif + +/* Like the above, but handles tilde-expansion when reading a user's + `.wgetrc'. In that case, and if VAL begins with `~', the tilde + gets expanded to the user's home directory. */ +static int +cmd_file (const char *com, const char *val, void *closure) +{ + char **pstring = (char **)closure; + + FREE_MAYBE (*pstring); + + /* #### If VAL is empty, perhaps should set *CLOSURE to NULL. */ + + if (!enable_tilde_expansion || !(*val == '~' && ISSEP (val[1]))) + { + noexpand: + *pstring = xstrdup (val); + } + else + { + char *result; + int homelen; + char *home = home_dir (); + if (!home) + goto noexpand; + + homelen = strlen (home); + while (homelen && ISSEP (home[homelen - 1])) + home[--homelen] = '\0'; + + /* Skip the leading "~/". */ + for (++val; ISSEP (*val); val++) + ; + + result = xmalloc (homelen + 1 + strlen (val) + 1); + memcpy (result, home, homelen); + result[homelen] = '/'; + strcpy (result + homelen + 1, val); + + *pstring = result; + } + +#ifdef WINDOWS + /* Convert "\" to "/". */ + { + char *s; + for (s = *pstring; *s; s++) + if (*s == '\\') + *s = '/'; + } +#endif + return 1; +} + +/* Like cmd_file, but strips trailing '/' characters. */ +static int +cmd_directory (const char *com, const char *val, void *closure) +{ + char *s, *t; + + /* Call cmd_file() for tilde expansion and separator + canonicalization (backslash -> slash under Windows). These + things should perhaps be in a separate function. */ + if (!cmd_file (com, val, closure)) + return 0; + + s = *(char **)closure; + t = s + strlen (s); + while (t > s && *--t == '/') + *t = '\0'; + + return 1; +} + +/* Split VAL by space to a vector of values, and append those values + to vector pointed to by the CLOSURE argument. If VAL is empty, the + CLOSURE vector is cleared instead. */ + static int cmd_vector (const char *com, const char *val, void *closure) { @@ -589,112 +849,169 @@ cmd_directory_vector (const char *com, const char *val, void *closure) return 1; } -/* Set the value stored in VAL to CLOSURE (which should point to a - long int), allowing several postfixes, with the following syntax - (regexp): +static int simple_atof PARAMS ((const char *, const char *, double *)); - [0-9]+ -> bytes - [0-9]+[kK] -> bytes * 1024 - [0-9]+[mM] -> bytes * 1024 * 1024 - inf -> 0 +/* Enginge for cmd_bytes and cmd_bytes_large: converts a string such + as "100k" or "2.5G" to a floating point number. */ - Anything else is flagged as incorrect, and CLOSURE is unchanged. */ static int -cmd_bytes (const char *com, const char *val, void *closure) +parse_bytes_helper (const char *val, double *result) { - long result; - long *out = (long *)closure; - const char *p; + double number, mult; + const char *end = val + strlen (val); - result = 0; - p = val; /* Check for "inf". */ - if (p[0] == 'i' && p[1] == 'n' && p[2] == 'f' && p[3] == '\0') + if (0 == strcmp (val, "inf")) { - *out = 0; + *result = 0; return 1; } - /* Search for digits and construct result. */ - for (; *p && ISDIGIT (*p); p++) - result = (10 * result) + (*p - '0'); - /* If no digits were found, or more than one character is following - them, bail out. */ - if (p == val || (*p != '\0' && *(p + 1) != '\0')) - { - printf (_("%s: Invalid specification `%s'\n"), com, val); - return 0; - } - /* Search for a designator. */ - switch (TOLOWER (*p)) + + /* Strip trailing whitespace. */ + while (val < end && ISSPACE (end[-1])) + --end; + if (val == end) + return 0; + + switch (TOLOWER (end[-1])) { - case '\0': - /* None */ - break; case 'k': - /* Kilobytes */ - result *= 1024; + --end, mult = 1024.0; break; case 'm': - /* Megabytes */ - result *= (long)1024 * 1024; + --end, mult = 1048576.0; break; case 'g': - /* Gigabytes */ - result *= (long)1024 * 1024 * 1024; + --end, mult = 1073741824.0; + break; + case 't': + --end, mult = 1099511627776.0; break; default: - printf (_("%s: Invalid specification `%s'\n"), com, val); + /* Not a recognized suffix: assume it's a digit. (If not, + simple_atof will raise an error.) */ + mult = 1; + } + + /* Skip leading and trailing whitespace. */ + while (val < end && ISSPACE (*val)) + ++val; + while (val < end && ISSPACE (end[-1])) + --end; + if (val == end) + return 0; + + if (!simple_atof (val, end, &number)) + return 0; + + *result = number * mult; + return 1; +} + +/* Parse VAL as a number and set its value to CLOSURE (which should + point to a long int). + + By default, the value is assumed to be in bytes. If "K", "M", or + "G" are appended, the value is multiplied with 1<<10, 1<<20, or + 1<<30, respectively. Floating point values are allowed and are + cast to integer before use. The idea is to be able to use things + like 1.5k instead of "1536". + + The string "inf" is returned as 0. + + In case of error, 0 is returned and memory pointed to by CLOSURE + remains unmodified. */ + +static int +cmd_bytes (const char *com, const char *val, void *closure) +{ + double byte_value; + if (!parse_bytes_helper (val, &byte_value)) + { + fprintf (stderr, _("%s: %s: Invalid byte value `%s'\n"), + exec_name, com, val); + return 0; + } + *(long *)closure = (long)byte_value; + return 1; +} + +/* Like cmd_bytes, but CLOSURE is interpreted as a pointer to + LARGE_INT. It works by converting the string to double, therefore + working with values up to 2^53-1 without loss of precision. This + value (8192 TB) is large enough to serve for a while. */ + +static int +cmd_bytes_large (const char *com, const char *val, void *closure) +{ + double byte_value; + if (!parse_bytes_helper (val, &byte_value)) + { + fprintf (stderr, _("%s: %s: Invalid byte value `%s'\n"), + exec_name, com, val); return 0; } - *out = result; + *(LARGE_INT *)closure = (LARGE_INT)byte_value; return 1; } -/* Store the value of VAL to *OUT, allowing suffixes for minutes and - hours. */ +/* Store the value of VAL to *OUT. The value is a time period, by + default expressed in seconds, but also accepting suffixes "m", "h", + "d", and "w" for minutes, hours, days, and weeks respectively. */ + static int cmd_time (const char *com, const char *val, void *closure) { - long result = 0; - const char *p = val; + double number, mult; + const char *end = val + strlen (val); - /* Search for digits and construct result. */ - for (; *p && ISDIGIT (*p); p++) - result = (10 * result) + (*p - '0'); - /* If no digits were found, or more than one character is following - them, bail out. */ - if (p == val || (*p != '\0' && *(p + 1) != '\0')) + /* Strip trailing whitespace. */ + while (val < end && ISSPACE (end[-1])) + --end; + + if (val == end) { - printf (_("%s: Invalid specification `%s'\n"), com, val); + err: + fprintf (stderr, _("%s: %s: Invalid time period `%s'\n"), + exec_name, com, val); return 0; } - /* Search for a suffix. */ - switch (TOLOWER (*p)) + + switch (TOLOWER (end[-1])) { - case '\0': - /* None */ + case 's': + --end, mult = 1; /* seconds */ break; case 'm': - /* Minutes */ - result *= 60; + --end, mult = 60; /* minutes */ break; case 'h': - /* Seconds */ - result *= 3600; + --end, mult = 3600; /* hours */ break; case 'd': - /* Days (overflow on 16bit machines) */ - result *= 86400L; + --end, mult = 86400.0; /* days */ break; case 'w': - /* Weeks :-) */ - result *= 604800L; + --end, mult = 604800.0; /* weeks */ break; default: - printf (_("%s: Invalid specification `%s'\n"), com, val); - return 0; + /* Not a recognized suffix: assume it belongs to the number. + (If not, atof simple_atof will raise an error.) */ + mult = 1; } - *(long *)closure = result; + + /* Skip leading and trailing whitespace. */ + while (val < end && ISSPACE (*val)) + ++val; + while (val < end && ISSPACE (end[-1])) + --end; + if (val == end) + goto err; + + if (!simple_atof (val, end, &number)) + goto err; + + *(double *)closure = number * mult; return 1; } @@ -717,61 +1034,6 @@ cmd_spec_dirstruct (const char *com, const char *val, void *closure) return 1; } -static int -cmd_spec_dotstyle (const char *com, const char *val, void *closure) -{ - /* Retrieval styles. */ - if (!strcasecmp (val, "default")) - { - /* Default style: 1K dots, 10 dots in a cluster, 50 dots in a - line. */ - opt.dot_bytes = 1024; - opt.dot_spacing = 10; - opt.dots_in_line = 50; - } - else if (!strcasecmp (val, "binary")) - { - /* "Binary" retrieval: 8K dots, 16 dots in a cluster, 48 dots - (384K) in a line. */ - opt.dot_bytes = 8192; - opt.dot_spacing = 16; - opt.dots_in_line = 48; - } - else if (!strcasecmp (val, "mega")) - { - /* "Mega" retrieval, for retrieving very long files; each dot is - 64K, 8 dots in a cluster, 6 clusters (3M) in a line. */ - opt.dot_bytes = 65536L; - opt.dot_spacing = 8; - opt.dots_in_line = 48; - } - else if (!strcasecmp (val, "giga")) - { - /* "Giga" retrieval, for retrieving very very *very* long files; - each dot is 1M, 8 dots in a cluster, 4 clusters (32M) in a - line. */ - opt.dot_bytes = (1L << 20); - opt.dot_spacing = 8; - opt.dots_in_line = 32; - } - else if (!strcasecmp (val, "micro")) - { - /* "Micro" retrieval, for retrieving very small files (and/or - slow connections); each dot is 128 bytes, 8 dots in a - cluster, 6 clusters (6K) in a line. */ - opt.dot_bytes = 128; - opt.dot_spacing = 8; - opt.dots_in_line = 48; - } - else - { - fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"), - exec_name, com, val); - return 0; - } - return 1; -} - static int cmd_spec_header (const char *com, const char *val, void *closure) { @@ -787,7 +1049,7 @@ cmd_spec_header (const char *com, const char *val, void *closure) if (!check_user_specified_header (val)) { - fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"), + fprintf (stderr, _("%s: %s: Invalid header `%s'.\n"), exec_name, com, val); return 0; } @@ -812,6 +1074,9 @@ cmd_spec_htmlify (const char *com, const char *val, void *closure) return flag; } +/* Set the "mirror" mode. It means: recursive download, timestamping, + no limit on max. recursion depth, and don't remove listings. */ + static int cmd_spec_mirror (const char *com, const char *val, void *closure) { @@ -831,15 +1096,30 @@ cmd_spec_mirror (const char *com, const char *val, void *closure) return 1; } +/* Set progress.type to VAL, but verify that it's a valid progress + implementation before that. */ + static int -cmd_spec_outputdocument (const char *com, const char *val, void *closure) +cmd_spec_progress (const char *com, const char *val, void *closure) { - FREE_MAYBE (opt.output_document); - opt.output_document = xstrdup (val); - opt.ntry = 1; + if (!valid_progress_implementation_p (val)) + { + fprintf (stderr, _("%s: %s: Invalid progress type `%s'.\n"), + exec_name, com, val); + return 0; + } + FREE_MAYBE (opt.progress_type); + + /* Don't call set_progress_implementation here. It will be called + in main() when it becomes clear what the log output is. */ + opt.progress_type = xstrdup (val); return 1; } +/* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is + set to true, also set opt.dirstruct to 1, unless opt.no_dirstruct + is specified. */ + static int cmd_spec_recursive (const char *com, const char *val, void *closure) { @@ -853,6 +1133,62 @@ cmd_spec_recursive (const char *com, const char *val, void *closure) return 1; } +static int +cmd_spec_restrict_file_names (const char *com, const char *val, void *closure) +{ + int restrict_os = opt.restrict_files_os; + int restrict_ctrl = opt.restrict_files_ctrl; + + const char *end = strchr (val, ','); + if (!end) + end = val + strlen (val); + +#define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal) + + if (VAL_IS ("unix")) + restrict_os = restrict_unix; + else if (VAL_IS ("windows")) + restrict_os = restrict_windows; + else if (VAL_IS ("nocontrol")) + restrict_ctrl = 0; + else + { + err: + fprintf (stderr, + _("%s: %s: Invalid restriction `%s', use `unix' or `windows'.\n"), + exec_name, com, val); + return 0; + } + +#undef VAL_IS + + if (*end) + { + if (!strcmp (end + 1, "nocontrol")) + restrict_ctrl = 0; + else + goto err; + } + + opt.restrict_files_os = restrict_os; + opt.restrict_files_ctrl = restrict_ctrl; + return 1; +} + +/* Set all three timeout values. */ + +static int +cmd_spec_timeout (const char *com, const char *val, void *closure) +{ + double value; + if (!cmd_time (com, val, &value)) + return 0; + opt.read_timeout = value; + opt.connect_timeout = value; + opt.dns_timeout = value; + return 1; +} + static int cmd_spec_useragent (const char *com, const char *val, void *closure) { @@ -860,7 +1196,7 @@ cmd_spec_useragent (const char *com, const char *val, void *closure) junk to the server. */ if (!*val || strchr (val, '\n')) { - fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"), + fprintf (stderr, _("%s: %s: Invalid value `%s'.\n"), exec_name, com, val); return 0; } @@ -870,23 +1206,71 @@ cmd_spec_useragent (const char *com, const char *val, void *closure) /* Miscellaneous useful routines. */ -/* Return the integer value of a positive integer written in S, or -1 - if an error was encountered. */ +/* A very simple atoi clone, more portable than strtol and friends, + but reports errors, unlike atoi. Returns 1 on success, 0 on + failure. In case of success, stores result to *DEST. */ + static int -myatoi (const char *s) +simple_atoi (const char *beg, const char *end, int *dest) { - int res; - const char *orig = s; + int result = 0; + const char *p; - for (res = 0; *s && ISDIGIT (*s); s++) - res = 10 * res + (*s - '0'); - if (*s || orig == s) - return -1; - else - return res; + if (beg == end) + return 0; + + for (p = beg; p < end && ISDIGIT (*p); p++) + result = (10 * result) + (*p - '0'); + + if (p != end) + return 0; + + *dest = result; + return 1; } -#define ISODIGIT(x) ((x) >= '0' && (x) <= '7') +/* Trivial atof, with error reporting. Handles "[.]", + doesn't handle exponential notation. Returns 1 on success, 0 on + failure. In case of success, stores its result to *DEST. */ + +static int +simple_atof (const char *beg, const char *end, double *dest) +{ + double result = 0; + + int seen_dot = 0; + int seen_digit = 0; + double divider = 1; + + const char *p; + + for (p = beg; p < end; p++) + { + char ch = *p; + if (ISDIGIT (ch)) + { + if (!seen_dot) + result = (10 * result) + (ch - '0'); + else + result += (ch - '0') / (divider *= 10); + seen_digit = 1; + } + else if (ch == '.') + { + if (!seen_dot) + seen_dot = 1; + else + return 0; + } + else + return 0; + } + if (!seen_digit) + return 0; + + *dest = result; + return 1; +} static int check_user_specified_header (const char *s) @@ -904,19 +1288,44 @@ check_user_specified_header (const char *s) return 1; } +void cleanup_html_url PARAMS ((void)); +void res_cleanup PARAMS ((void)); +void downloaded_files_free PARAMS ((void)); +void http_cleanup PARAMS ((void)); + + /* Free the memory allocated by global variables. */ void cleanup (void) { - extern acc_t *netrc_list; + /* Free external resources, close files, etc. */ - recursive_cleanup (); - clean_hosts (); - free_netrc (netrc_list); if (opt.dfp) fclose (opt.dfp); + + /* We're exiting anyway so there's no real need to call free() + hundreds of times. Skipping the frees will make Wget exit + faster. + + However, when detecting leaks, it's crucial to free() everything + because then you can find the real leaks, i.e. the allocated + memory which grows with the size of the program. */ + +#ifdef DEBUG_MALLOC + convert_cleanup (); + res_cleanup (); + http_cleanup (); + cleanup_html_url (); + downloaded_files_free (); + host_cleanup (); + cookie_jar_delete (wget_cookie_jar); + + { + extern acc_t *netrc_list; + free_netrc (netrc_list); + } FREE_MAYBE (opt.lfilename); - free (opt.dir_prefix); + FREE_MAYBE (opt.dir_prefix); FREE_MAYBE (opt.input_filename); FREE_MAYBE (opt.output_document); free_vec (opt.accepts); @@ -926,9 +1335,11 @@ cleanup (void) free_vec (opt.domains); free_vec (opt.follow_tags); free_vec (opt.ignore_tags); - free (opt.ftp_acc); - free (opt.ftp_pass); + FREE_MAYBE (opt.progress_type); + xfree (opt.ftp_acc); + FREE_MAYBE (opt.ftp_pass); FREE_MAYBE (opt.ftp_proxy); + FREE_MAYBE (opt.https_proxy); FREE_MAYBE (opt.http_proxy); free_vec (opt.no_proxy); FREE_MAYBE (opt.useragent); @@ -936,4 +1347,12 @@ cleanup (void) FREE_MAYBE (opt.http_user); FREE_MAYBE (opt.http_passwd); FREE_MAYBE (opt.user_header); +#ifdef HAVE_SSL + FREE_MAYBE (opt.sslcertkey); + FREE_MAYBE (opt.sslcertfile); +#endif /* HAVE_SSL */ + FREE_MAYBE (opt.bind_address); + FREE_MAYBE (opt.cookies_input); + FREE_MAYBE (opt.cookies_output); +#endif }