X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Finit.c;h=688b3cf00aeef4556125ac1d55bf4cb133490456;hp=76fbc45d9244c487e8ab4829ffbab298e1baac03;hb=5f0a2b3f0846dd4c2f72fc62e7171200d1fd6e06;hpb=cdcf67a5bdae9c56d263ebf7608b52701851cf22 diff --git a/src/init.c b/src/init.c index 76fbc45d..688b3cf0 100644 --- a/src/init.c +++ b/src/init.c @@ -1,5 +1,5 @@ /* Reading/parsing the initialization file. - Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 + Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2003 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -16,7 +16,17 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Wget; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +In addition, as a special exception, the Free Software Foundation +gives permission to link the code of its release of Wget with the +OpenSSL project's "OpenSSL" library (or with modified versions of it +that use the same license as the "OpenSSL" library), and distribute +the linked executables. You must obey the GNU General Public License +in all respects for all of the code used other than "OpenSSL". If you +modify this file, you may extend this exception to your version of the +file, but you are not obligated to do so. If you do not wish to do +so, delete this exception statement from your version. */ #include @@ -44,21 +54,24 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #endif #ifdef HAVE_PWD_H -#include +# include #endif +#include #include "wget.h" #include "utils.h" #include "init.h" #include "host.h" -#include "recur.h" #include "netrc.h" -#include "cookies.h" /* for cookies_cleanup */ +#include "cookies.h" /* for cookie_jar_delete */ +#include "progress.h" #ifndef errno extern int errno; #endif +extern struct cookie_jar *wget_cookie_jar; + /* We want tilde expansion enabled only when reading `.wgetrc' lines; otherwise, it will be performed by the shell. This variable will be set by the wgetrc-reading function. */ @@ -69,15 +82,16 @@ static int enable_tilde_expansion; #define CMD_DECLARE(func) static int func \ PARAMS ((const char *, const char *, void *)) -CMD_DECLARE (cmd_address); CMD_DECLARE (cmd_boolean); CMD_DECLARE (cmd_bytes); +CMD_DECLARE (cmd_bytes_large); CMD_DECLARE (cmd_directory_vector); CMD_DECLARE (cmd_lockable_boolean); CMD_DECLARE (cmd_number); CMD_DECLARE (cmd_number_inf); CMD_DECLARE (cmd_string); CMD_DECLARE (cmd_file); +CMD_DECLARE (cmd_directory); CMD_DECLARE (cmd_time); CMD_DECLARE (cmd_vector); @@ -87,11 +101,13 @@ CMD_DECLARE (cmd_spec_htmlify); CMD_DECLARE (cmd_spec_mirror); CMD_DECLARE (cmd_spec_progress); CMD_DECLARE (cmd_spec_recursive); +CMD_DECLARE (cmd_spec_restrict_file_names); +CMD_DECLARE (cmd_spec_timeout); CMD_DECLARE (cmd_spec_useragent); /* List of recognized commands, each consisting of name, closure and function. When adding a new command, simply add it to the list, but be sure to keep the - list sorted alphabetically, as comind() depends on it. Also, be sure to add + list sorted alphabetically, as findcmd() depends on it. Also, be sure to add any entries that allocate memory (e.g. cmd_string and cmd_vector guys) to the cleanup() function below. */ static struct { @@ -106,22 +122,29 @@ static struct { { "backupconverted", &opt.backup_converted, cmd_boolean }, { "backups", &opt.backups, cmd_number }, { "base", &opt.base_href, cmd_string }, - { "bindaddress", &opt.bind_address, cmd_address }, + { "bindaddress", &opt.bind_address, cmd_string }, { "cache", &opt.allow_cache, cmd_boolean }, + { "connecttimeout", &opt.connect_timeout, cmd_time }, { "continue", &opt.always_rest, cmd_boolean }, { "convertlinks", &opt.convert_links, cmd_boolean }, { "cookies", &opt.cookies, cmd_boolean }, { "cutdirs", &opt.cut_dirs, cmd_number }, -#ifdef DEBUG +#ifdef ENABLE_DEBUG { "debug", &opt.debug, cmd_boolean }, #endif { "deleteafter", &opt.delete_after, cmd_boolean }, - { "dirprefix", &opt.dir_prefix, cmd_file }, + { "dirprefix", &opt.dir_prefix, cmd_directory }, { "dirstruct", NULL, cmd_spec_dirstruct }, + { "dnscache", &opt.dns_cache, cmd_boolean }, + { "dnstimeout", &opt.dns_timeout, cmd_time }, { "domains", &opt.domains, cmd_vector }, { "dotbytes", &opt.dot_bytes, cmd_bytes }, { "dotsinline", &opt.dots_in_line, cmd_number }, { "dotspacing", &opt.dot_spacing, cmd_number }, + { "dotstyle", &opt.dot_style, cmd_string }, +#ifdef HAVE_SSL + { "egdfile", &opt.sslegdsock, cmd_file }, +#endif { "excludedirectories", &opt.excludes, cmd_directory_vector }, { "excludedomains", &opt.exclude_domains, cmd_vector }, { "followftp", &opt.follow_ftp, cmd_boolean }, @@ -142,6 +165,7 @@ static struct { { "includedirectories", &opt.includes, cmd_directory_vector }, { "input", &opt.input_filename, cmd_file }, { "killlonger", &opt.kill_longer, cmd_boolean }, + { "limitrate", &opt.limit_rate, cmd_bytes }, { "loadcookies", &opt.cookies_input, cmd_file }, { "logfile", &opt.lfilename, cmd_file }, { "login", &opt.ftp_acc, cmd_string }, @@ -155,30 +179,41 @@ static struct { { "pagerequisites", &opt.page_requisites, cmd_boolean }, { "passiveftp", &opt.ftp_pasv, cmd_lockable_boolean }, { "passwd", &opt.ftp_pass, cmd_string }, - { "progress", NULL, cmd_spec_progress }, + { "postdata", &opt.post_data, cmd_string }, + { "postfile", &opt.post_file_name, cmd_file }, + { "progress", &opt.progress_type, cmd_spec_progress }, { "proxypasswd", &opt.proxy_passwd, cmd_string }, { "proxyuser", &opt.proxy_user, cmd_string }, { "quiet", &opt.quiet, cmd_boolean }, - { "quota", &opt.quota, cmd_bytes }, + { "quota", &opt.quota, cmd_bytes_large }, + { "randomwait", &opt.random_wait, cmd_boolean }, + { "readtimeout", &opt.read_timeout, cmd_time }, { "reclevel", &opt.reclevel, cmd_number_inf }, { "recursive", NULL, cmd_spec_recursive }, { "referer", &opt.referer, cmd_string }, { "reject", &opt.rejects, cmd_vector }, { "relativeonly", &opt.relative_only, cmd_boolean }, { "removelisting", &opt.remove_listing, cmd_boolean }, + { "restrictfilenames", NULL, cmd_spec_restrict_file_names }, { "retrsymlinks", &opt.retr_symlinks, cmd_boolean }, + { "retryconnrefused", &opt.retry_connrefused, cmd_boolean }, { "robots", &opt.use_robots, cmd_boolean }, { "savecookies", &opt.cookies_output, cmd_file }, { "saveheaders", &opt.save_headers, cmd_boolean }, { "serverresponse", &opt.server_response, cmd_boolean }, - { "simplehostcheck", &opt.simple_check, cmd_boolean }, { "spanhosts", &opt.spanhost, cmd_boolean }, { "spider", &opt.spider, cmd_boolean }, #ifdef HAVE_SSL + { "sslcadir", &opt.sslcadir, cmd_directory }, + { "sslcafile", &opt.sslcafile, cmd_file }, { "sslcertfile", &opt.sslcertfile, cmd_file }, { "sslcertkey", &opt.sslcertkey, cmd_file }, + { "sslcerttype", &opt.sslcerttype, cmd_number }, + { "sslcheckcert", &opt.sslcheckcert, cmd_number }, + { "sslprotocol", &opt.sslprotocol, cmd_number }, #endif /* HAVE_SSL */ - { "timeout", &opt.timeout, cmd_time }, + { "strictcomments", &opt.strict_comments, cmd_boolean }, + { "timeout", NULL, cmd_spec_timeout }, { "timestamping", &opt.timestamping, cmd_boolean }, { "tries", &opt.ntry, cmd_number_inf }, { "useproxy", &opt.use_proxy, cmd_boolean }, @@ -188,25 +223,25 @@ static struct { { "waitretry", &opt.waitretry, cmd_time } }; -/* Return index of COM if it is a valid command, or -1 otherwise. COM - is looked up in `commands' using binary search algorithm. */ +/* Look up COM in the commands[] array and return its index. If COM + is not found, -1 is returned. This function uses binary search. */ + static int -comind (const char *com) +findcmd (const char *com) { - int min = 0, max = ARRAY_SIZE (commands) - 1; + int lo = 0, hi = countof (commands) - 1; - do + while (lo <= hi) { - int i = (min + max) / 2; - int cmp = strcasecmp (com, commands[i].name); - if (cmp == 0) - return i; - else if (cmp < 0) - max = i - 1; + int mid = (lo + hi) >> 1; + int cmp = strcasecmp (com, commands[mid].name); + if (cmp < 0) + hi = mid - 1; + else if (cmp > 0) + lo = mid + 1; else - min = i + 1; + return mid; } - while (min <= max); return -1; } @@ -221,12 +256,10 @@ defaults (void) NULL this way is technically illegal, but porting Wget to a machine where NULL is not all-zero bit pattern will be the least of the implementors' worries. */ - memset (&opt, 0, sizeof (opt)); + xzero (opt); opt.cookies = 1; - opt.verbose = -1; - opt.dir_prefix = xstrdup ("."); opt.ntry = 20; opt.reclevel = 5; opt.add_hostdir = 1; @@ -242,17 +275,24 @@ defaults (void) opt.no_proxy = sepstring (tmp); opt.allow_cache = 1; -#ifdef HAVE_SELECT - opt.timeout = 900; -#endif + opt.read_timeout = 900; opt.use_robots = 1; opt.remove_listing = 1; - set_progress_implementation ("dot"); opt.dot_bytes = 1024; opt.dot_spacing = 10; opt.dots_in_line = 50; + + opt.dns_cache = 1; + + /* The default for file name restriction defaults to the OS type. */ +#if !defined(WINDOWS) && !defined(__CYGWIN__) + opt.restrict_files_os = restrict_unix; +#else + opt.restrict_files_os = restrict_windows; +#endif + opt.restrict_files_ctrl = 1; } /* Return the user's home directory (strdup-ed), or NULL if none is @@ -298,7 +338,8 @@ wgetrc_file_name (void) { if (!file_exists_p (env)) { - fprintf (stderr, "%s: %s: %s.\n", exec_name, file, strerror (errno)); + fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"), + exec_name, env); exit (1); } return xstrdup (env); @@ -338,7 +379,11 @@ wgetrc_file_name (void) return file; } -/* Initialize variables from a wgetrc file */ +static int parse_line PARAMS ((const char *, char **, char **, int *)); +static int setval_internal PARAMS ((int, const char *, const char *)); + +/* Initialize variables from a wgetrc file. */ + static void run_wgetrc (const char *file) { @@ -358,15 +403,15 @@ run_wgetrc (const char *file) while ((line = read_whole_line (fp))) { char *com, *val; - int status; + int comind, status; /* Parse the line. */ - status = parse_line (line, &com, &val); + status = parse_line (line, &com, &val, &comind); xfree (line); /* If everything is OK, set the value. */ if (status == 1) { - if (!setval (com, val)) + if (!setval_internal (comind, com, val)) fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name, file, ln); xfree (com); @@ -400,8 +445,8 @@ initialize (void) file = wgetrc_file_name (); if (!file) return; - /* #### We should somehow canonicalize `file' and SYSTEM_WGETRC, - really. */ + /* #### We should canonicalize `file' and SYSTEM_WGETRC with + something like realpath() before comparing them with `strcmp' */ #ifdef SYSTEM_WGETRC if (!strcmp (file, SYSTEM_WGETRC)) { @@ -416,133 +461,156 @@ initialize (void) return; } +/* Remove dashes and underscores from S, modifying S in the + process. */ + +static void +dehyphen (char *s) +{ + char *t = s; /* t - tortoise */ + char *h = s; /* h - hare */ + while (*h) + if (*h == '_' || *h == '-') + ++h; + else + *t++ = *h++; + *t = '\0'; +} + /* Parse the line pointed by line, with the syntax: - * command * = * value + * command * = * value * Uses malloc to allocate space for command and value. If the line is invalid, data is freed and 0 is returned. Return values: 1 - success - 0 - failure - -1 - empty */ -int -parse_line (const char *line, char **com, char **val) + 0 - error + -1 - empty + + In case of success, *COM and *VAL point to freshly allocated + strings, and *COMIND points to com's index. In case of error or + empty line, those values are unaffected. */ + +static int +parse_line (const char *line, char **com, char **val, int *comind) { - const char *p = line; - const char *orig_comptr, *end; - char *new_comptr; + const char *p; + const char *end = line + strlen (line); + const char *cmdstart, *cmdend; + const char *valstart, *valend; - /* Skip whitespace. */ - while (*p && ISSPACE (*p)) - ++p; + char *cmdcopy; + int ind; + + /* Skip leading and trailing whitespace. */ + while (*line && ISSPACE (*line)) + ++line; + while (end > line && ISSPACE (end[-1])) + --end; - /* Don't process empty lines. */ - if (!*p || *p == '#') + /* Skip empty lines and comments. */ + if (!*line || *line == '#') return -1; - for (orig_comptr = p; ISALPHA (*p) || *p == '_' || *p == '-'; p++) - ; - /* The next char should be space or '='. */ - if (!ISSPACE (*p) && (*p != '=')) + p = line; + + cmdstart = p; + while (p < end && (ISALPHA (*p) || *p == '_' || *p == '-')) + ++p; + cmdend = p; + + /* Skip '=', as well as any space before or after it. */ + while (p < end && ISSPACE (*p)) + ++p; + if (p == end || *p != '=') return 0; - /* Here we cannot use strdupdelim() as we normally would because we - want to skip the `-' and `_' characters in the input string. */ - *com = (char *)xmalloc (p - orig_comptr + 1); - for (new_comptr = *com; orig_comptr < p; orig_comptr++) - { - if (*orig_comptr == '_' || *orig_comptr == '-') - continue; - *new_comptr++ = *orig_comptr; - } - *new_comptr = '\0'; - /* If the command is invalid, exit now. */ - if (comind (*com) == -1) - { - xfree (*com); - return 0; - } + ++p; + while (p < end && ISSPACE (*p)) + ++p; - /* Skip spaces before '='. */ - for (; ISSPACE (*p); p++); - /* If '=' not found, bail out. */ - if (*p != '=') - { - xfree (*com); - return 0; - } - /* Skip spaces after '='. */ - for (++p; ISSPACE (*p); p++); - /* Get the ending position for VAL by starting with the end of the - line and skipping whitespace. */ - end = line + strlen (line) - 1; - while (end > p && ISSPACE (*end)) - --end; - *val = strdupdelim (p, end + 1); + valstart = p; + valend = end; + + /* The line now known to be syntactically correct. Check whether + the command is valid. */ + BOUNDED_TO_ALLOCA (cmdstart, cmdend, cmdcopy); + dehyphen (cmdcopy); + ind = findcmd (cmdcopy); + if (ind == -1) + return 0; + + /* The command is valid. Now fill in the values and report success + to the caller. */ + *comind = ind; + *com = strdupdelim (cmdstart, cmdend); + *val = strdupdelim (valstart, valend); return 1; } -/* Set COM to VAL. This is the meat behind processing `.wgetrc'. No - fatals -- error signal prints a warning and resets to default - value. All error messages are printed to stderr, *not* to - opt.lfile, since opt.lfile wasn't even generated yet. */ -int -setval (const char *com, const char *val) -{ - int ind; +/* Run commands[comind].action. */ - if (!com || !val) - return 0; - ind = comind (com); - if (ind == -1) - { - /* #### Should I just abort()? */ -#ifdef DEBUG - fprintf (stderr, _("%s: BUG: unknown command `%s', value `%s'.\n"), - exec_name, com, val); -#endif - return 0; - } - return ((*commands[ind].action) (com, val, commands[ind].closure)); +static int +setval_internal (int comind, const char *com, const char *val) +{ + assert (0 <= comind && comind < countof (commands)); + return ((*commands[comind].action) (com, val, commands[comind].closure)); } - -/* Generic helper functions, for use with `commands'. */ -static int myatoi PARAMS ((const char *s)); +/* Run command COM with value VAL. If running the command produces an + error, report the error and exit. -/* Interpret VAL as an Internet address (a hostname or a dotted-quad - IP address), and write it (in network order) to a malloc-allocated - address. That address gets stored to the memory pointed to by - CLOSURE. COM is ignored, except for error messages. + This is intended to be called from main() to modify Wget's behavior + through command-line switches. Since COM is hard-coded in main(), + it is not canonicalized, and this aborts when COM is not found. - #### IMHO it's a mistake to do this kind of work so early in the - process (before any download even started!) opt.bind_address - should simply remember the provided value as a string. Another - function should do the lookup, when needed, and cache the - result. --hniksic */ -static int -cmd_address (const char *com, const char *val, void *closure) + If COMIND's are exported to init.h, this function will be changed + to accept COMIND directly. */ + +void +setoptval (const char *com, const char *val) { - struct sockaddr_in sin; - struct sockaddr_in **target = (struct sockaddr_in **)closure; + if (!setval_internal (findcmd (com), com, val)) + exit (2); +} - memset (&sin, '\0', sizeof (sin)); +/* Parse OPT into command and value and run it. For example, + run_command("foo=bar") is equivalent to setoptval("foo", "bar"). + This is used by the `--execute' flag in main.c. */ - if (!store_hostaddress ((unsigned char *)&sin.sin_addr, val)) +void +run_command (const char *opt) +{ + char *com, *val; + int comind; + int status = parse_line (opt, &com, &val, &comind); + if (status == 1) { - fprintf (stderr, _("%s: %s: Cannot convert `%s' to an IP address.\n"), - exec_name, com, val); - return 0; + if (!setval_internal (comind, com, val)) + exit (2); + xfree (com); + xfree (val); } - sin.sin_family = AF_INET; - sin.sin_port = 0; + else if (status == 0) + { + fprintf (stderr, _("%s: Invalid --execute command `%s'\n"), + exec_name, opt); + exit (2); + } +} + +/* Generic helper functions, for use with `commands'. */ - FREE_MAYBE (*target); +#define CMP1(p, c0) (TOLOWER((p)[0]) == (c0) && (p)[1] == '\0') - *target = xmalloc (sizeof (sin)); - memcpy (*target, &sin, sizeof (sin)); +#define CMP2(p, c0, c1) (TOLOWER((p)[0]) == (c0) \ + && TOLOWER((p)[1]) == (c1) \ + && (p)[2] == '\0') + +#define CMP3(p, c0, c1, c2) (TOLOWER((p)[0]) == (c0) \ + && TOLOWER((p)[1]) == (c1) \ + && TOLOWER((p)[2]) == (c2) \ + && (p)[3] == '\0') - return 1; -} /* Store the boolean value from VAL to CLOSURE. COM is ignored, except for error messages. */ @@ -551,16 +619,17 @@ cmd_boolean (const char *com, const char *val, void *closure) { int bool_value; - if (!strcasecmp (val, "on") - || (*val == '1' && !*(val + 1))) + if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1')) + /* "on", "yes" and "1" mean true. */ bool_value = 1; - else if (!strcasecmp (val, "off") - || (*val == '0' && !*(val + 1))) + else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0')) + /* "off", "no" and "0" mean false. */ bool_value = 0; else { - fprintf (stderr, _("%s: %s: Please specify on or off.\n"), - exec_name, com); + fprintf (stderr, + _("%s: %s: Invalid boolean `%s', use `on' or `off'.\n"), + exec_name, com, val); return 0; } @@ -568,10 +637,10 @@ cmd_boolean (const char *com, const char *val, void *closure) return 1; } -/* Store the lockable_boolean {2, 1, 0, -1} value from VAL to CLOSURE. COM is - ignored, except for error messages. Values 2 and -1 indicate that once - defined, the value may not be changed by successive wgetrc files or - command-line arguments. +/* Store the lockable_boolean {2, 1, 0, -1} value from VAL to CLOSURE. + COM is ignored, except for error messages. Values 2 and -1 + indicate that once defined, the value may not be changed by + successive wgetrc files or command-line arguments. Values: 2 - Enable a particular option for good ("always") 1 - Enable an option ("on") @@ -582,30 +651,28 @@ cmd_lockable_boolean (const char *com, const char *val, void *closure) { int lockable_boolean_value; + int oldval = *(int *)closure; + /* * If a config file said "always" or "never", don't allow command line * arguments to override the config file. */ - if (*(int *)closure == -1 || *(int *)closure == 2) + if (oldval == -1 || oldval == 2) return 1; - if (!strcasecmp (val, "always") - || (*val == '2' && !*(val + 1))) + if (0 == strcasecmp (val, "always") || CMP1 (val, '2')) lockable_boolean_value = 2; - else if (!strcasecmp (val, "on") - || (*val == '1' && !*(val + 1))) + else if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1')) lockable_boolean_value = 1; - else if (!strcasecmp (val, "off") - || (*val == '0' && !*(val + 1))) + else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0')) lockable_boolean_value = 0; - else if (!strcasecmp (val, "never") - || (*val == '-' && *(val + 1) == '1' && !*(val + 2))) + else if (0 == strcasecmp (val, "never") || CMP2 (val, '-', '1')) lockable_boolean_value = -1; else { - fprintf (stderr, _("%s: %s: Please specify always, on, off, " - "or never.\n"), - exec_name, com); + fprintf (stderr, + _("%s: %s: Invalid boolean `%s', use always, on, off, or never.\n"), + exec_name, com, val); return 0; } @@ -613,20 +680,19 @@ cmd_lockable_boolean (const char *com, const char *val, void *closure) return 1; } +static int simple_atoi PARAMS ((const char *, const char *, int *)); + /* Set the non-negative integer value from VAL to CLOSURE. With incorrect specification, the number remains unchanged. */ static int cmd_number (const char *com, const char *val, void *closure) { - int num = myatoi (val); - - if (num == -1) + if (!simple_atoi (val, val + strlen (val), closure)) { - fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"), + fprintf (stderr, _("%s: %s: Invalid number `%s'.\n"), exec_name, com, val); return 0; } - *(int *)closure = num; return 1; } @@ -654,6 +720,12 @@ cmd_string (const char *com, const char *val, void *closure) return 1; } +#ifndef WINDOWS +# define ISSEP(c) ((c) == '/') +#else +# define ISSEP(c) ((c) == '/' || (c) == '\\') +#endif + /* Like the above, but handles tilde-expansion when reading a user's `.wgetrc'. In that case, and if VAL begins with `~', the tilde gets expanded to the user's home directory. */ @@ -663,7 +735,10 @@ cmd_file (const char *com, const char *val, void *closure) char **pstring = (char **)closure; FREE_MAYBE (*pstring); - if (!enable_tilde_expansion || !(*val == '~' && *(val + 1) == '/')) + + /* #### If VAL is empty, perhaps should set *CLOSURE to NULL. */ + + if (!enable_tilde_expansion || !(*val == '~' && ISSEP (val[1]))) { noexpand: *pstring = xstrdup (val); @@ -677,26 +752,57 @@ cmd_file (const char *com, const char *val, void *closure) goto noexpand; homelen = strlen (home); - while (homelen && home[homelen - 1] == '/') + while (homelen && ISSEP (home[homelen - 1])) home[--homelen] = '\0'; /* Skip the leading "~/". */ - for (++val; *val == '/'; val++) + for (++val; ISSEP (*val); val++) ; - result = xmalloc (homelen + 1 + strlen (val)); + result = xmalloc (homelen + 1 + strlen (val) + 1); memcpy (result, home, homelen); result[homelen] = '/'; strcpy (result + homelen + 1, val); *pstring = result; } + +#ifdef WINDOWS + /* Convert "\" to "/". */ + { + char *s; + for (s = *pstring; *s; s++) + if (*s == '\\') + *s = '/'; + } +#endif + return 1; +} + +/* Like cmd_file, but strips trailing '/' characters. */ +static int +cmd_directory (const char *com, const char *val, void *closure) +{ + char *s, *t; + + /* Call cmd_file() for tilde expansion and separator + canonicalization (backslash -> slash under Windows). These + things should perhaps be in a separate function. */ + if (!cmd_file (com, val, closure)) + return 0; + + s = *(char **)closure; + t = s + strlen (s); + while (t > s && *--t == '/') + *t = '\0'; + return 1; } -/* Merge the vector (array of strings separated with `,') in COM with - the vector (NULL-terminated array of strings) pointed to by - CLOSURE. */ +/* Split VAL by space to a vector of values, and append those values + to vector pointed to by the CLOSURE argument. If VAL is empty, the + CLOSURE vector is cleared instead. */ + static int cmd_vector (const char *com, const char *val, void *closure) { @@ -743,112 +849,169 @@ cmd_directory_vector (const char *com, const char *val, void *closure) return 1; } -/* Set the value stored in VAL to CLOSURE (which should point to a - long int), allowing several postfixes, with the following syntax - (regexp): +static int simple_atof PARAMS ((const char *, const char *, double *)); - [0-9]+ -> bytes - [0-9]+[kK] -> bytes * 1024 - [0-9]+[mM] -> bytes * 1024 * 1024 - inf -> 0 +/* Enginge for cmd_bytes and cmd_bytes_large: converts a string such + as "100k" or "2.5G" to a floating point number. */ - Anything else is flagged as incorrect, and CLOSURE is unchanged. */ static int -cmd_bytes (const char *com, const char *val, void *closure) +parse_bytes_helper (const char *val, double *result) { - long result; - long *out = (long *)closure; - const char *p; + double number, mult; + const char *end = val + strlen (val); - result = 0; - p = val; /* Check for "inf". */ - if (p[0] == 'i' && p[1] == 'n' && p[2] == 'f' && p[3] == '\0') + if (0 == strcmp (val, "inf")) { - *out = 0; + *result = 0; return 1; } - /* Search for digits and construct result. */ - for (; *p && ISDIGIT (*p); p++) - result = (10 * result) + (*p - '0'); - /* If no digits were found, or more than one character is following - them, bail out. */ - if (p == val || (*p != '\0' && *(p + 1) != '\0')) - { - printf (_("%s: Invalid specification `%s'\n"), com, val); - return 0; - } - /* Search for a designator. */ - switch (TOLOWER (*p)) + + /* Strip trailing whitespace. */ + while (val < end && ISSPACE (end[-1])) + --end; + if (val == end) + return 0; + + switch (TOLOWER (end[-1])) { - case '\0': - /* None */ - break; case 'k': - /* Kilobytes */ - result *= 1024; + --end, mult = 1024.0; break; case 'm': - /* Megabytes */ - result *= (long)1024 * 1024; + --end, mult = 1048576.0; break; case 'g': - /* Gigabytes */ - result *= (long)1024 * 1024 * 1024; + --end, mult = 1073741824.0; + break; + case 't': + --end, mult = 1099511627776.0; break; default: - printf (_("%s: Invalid specification `%s'\n"), com, val); + /* Not a recognized suffix: assume it's a digit. (If not, + simple_atof will raise an error.) */ + mult = 1; + } + + /* Skip leading and trailing whitespace. */ + while (val < end && ISSPACE (*val)) + ++val; + while (val < end && ISSPACE (end[-1])) + --end; + if (val == end) + return 0; + + if (!simple_atof (val, end, &number)) + return 0; + + *result = number * mult; + return 1; +} + +/* Parse VAL as a number and set its value to CLOSURE (which should + point to a long int). + + By default, the value is assumed to be in bytes. If "K", "M", or + "G" are appended, the value is multiplied with 1<<10, 1<<20, or + 1<<30, respectively. Floating point values are allowed and are + cast to integer before use. The idea is to be able to use things + like 1.5k instead of "1536". + + The string "inf" is returned as 0. + + In case of error, 0 is returned and memory pointed to by CLOSURE + remains unmodified. */ + +static int +cmd_bytes (const char *com, const char *val, void *closure) +{ + double byte_value; + if (!parse_bytes_helper (val, &byte_value)) + { + fprintf (stderr, _("%s: %s: Invalid byte value `%s'\n"), + exec_name, com, val); return 0; } - *out = result; + *(long *)closure = (long)byte_value; return 1; } -/* Store the value of VAL to *OUT, allowing suffixes for minutes and - hours. */ +/* Like cmd_bytes, but CLOSURE is interpreted as a pointer to + LARGE_INT. It works by converting the string to double, therefore + working with values up to 2^53-1 without loss of precision. This + value (8192 TB) is large enough to serve for a while. */ + +static int +cmd_bytes_large (const char *com, const char *val, void *closure) +{ + double byte_value; + if (!parse_bytes_helper (val, &byte_value)) + { + fprintf (stderr, _("%s: %s: Invalid byte value `%s'\n"), + exec_name, com, val); + return 0; + } + *(LARGE_INT *)closure = (LARGE_INT)byte_value; + return 1; +} + +/* Store the value of VAL to *OUT. The value is a time period, by + default expressed in seconds, but also accepting suffixes "m", "h", + "d", and "w" for minutes, hours, days, and weeks respectively. */ + static int cmd_time (const char *com, const char *val, void *closure) { - long result = 0; - const char *p = val; + double number, mult; + const char *end = val + strlen (val); - /* Search for digits and construct result. */ - for (; *p && ISDIGIT (*p); p++) - result = (10 * result) + (*p - '0'); - /* If no digits were found, or more than one character is following - them, bail out. */ - if (p == val || (*p != '\0' && *(p + 1) != '\0')) + /* Strip trailing whitespace. */ + while (val < end && ISSPACE (end[-1])) + --end; + + if (val == end) { - printf (_("%s: Invalid specification `%s'\n"), com, val); + err: + fprintf (stderr, _("%s: %s: Invalid time period `%s'\n"), + exec_name, com, val); return 0; } - /* Search for a suffix. */ - switch (TOLOWER (*p)) + + switch (TOLOWER (end[-1])) { - case '\0': - /* None */ + case 's': + --end, mult = 1; /* seconds */ break; case 'm': - /* Minutes */ - result *= 60; + --end, mult = 60; /* minutes */ break; case 'h': - /* Seconds */ - result *= 3600; + --end, mult = 3600; /* hours */ break; case 'd': - /* Days (overflow on 16bit machines) */ - result *= 86400L; + --end, mult = 86400.0; /* days */ break; case 'w': - /* Weeks :-) */ - result *= 604800L; + --end, mult = 604800.0; /* weeks */ break; default: - printf (_("%s: Invalid specification `%s'\n"), com, val); - return 0; + /* Not a recognized suffix: assume it belongs to the number. + (If not, atof simple_atof will raise an error.) */ + mult = 1; } - *(long *)closure = result; + + /* Skip leading and trailing whitespace. */ + while (val < end && ISSPACE (*val)) + ++val; + while (val < end && ISSPACE (end[-1])) + --end; + if (val == end) + goto err; + + if (!simple_atof (val, end, &number)) + goto err; + + *(double *)closure = number * mult; return 1; } @@ -886,7 +1049,7 @@ cmd_spec_header (const char *com, const char *val, void *closure) if (!check_user_specified_header (val)) { - fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"), + fprintf (stderr, _("%s: %s: Invalid header `%s'.\n"), exec_name, com, val); return 0; } @@ -911,6 +1074,9 @@ cmd_spec_htmlify (const char *com, const char *val, void *closure) return flag; } +/* Set the "mirror" mode. It means: recursive download, timestamping, + no limit on max. recursion depth, and don't remove listings. */ + static int cmd_spec_mirror (const char *com, const char *val, void *closure) { @@ -930,6 +1096,9 @@ cmd_spec_mirror (const char *com, const char *val, void *closure) return 1; } +/* Set progress.type to VAL, but verify that it's a valid progress + implementation before that. */ + static int cmd_spec_progress (const char *com, const char *val, void *closure) { @@ -939,10 +1108,18 @@ cmd_spec_progress (const char *com, const char *val, void *closure) exec_name, com, val); return 0; } - set_progress_implementation (val); + FREE_MAYBE (opt.progress_type); + + /* Don't call set_progress_implementation here. It will be called + in main() when it becomes clear what the log output is. */ + opt.progress_type = xstrdup (val); return 1; } +/* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is + set to true, also set opt.dirstruct to 1, unless opt.no_dirstruct + is specified. */ + static int cmd_spec_recursive (const char *com, const char *val, void *closure) { @@ -956,6 +1133,62 @@ cmd_spec_recursive (const char *com, const char *val, void *closure) return 1; } +static int +cmd_spec_restrict_file_names (const char *com, const char *val, void *closure) +{ + int restrict_os = opt.restrict_files_os; + int restrict_ctrl = opt.restrict_files_ctrl; + + const char *end = strchr (val, ','); + if (!end) + end = val + strlen (val); + +#define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal) + + if (VAL_IS ("unix")) + restrict_os = restrict_unix; + else if (VAL_IS ("windows")) + restrict_os = restrict_windows; + else if (VAL_IS ("nocontrol")) + restrict_ctrl = 0; + else + { + err: + fprintf (stderr, + _("%s: %s: Invalid restriction `%s', use `unix' or `windows'.\n"), + exec_name, com, val); + return 0; + } + +#undef VAL_IS + + if (*end) + { + if (!strcmp (end + 1, "nocontrol")) + restrict_ctrl = 0; + else + goto err; + } + + opt.restrict_files_os = restrict_os; + opt.restrict_files_ctrl = restrict_ctrl; + return 1; +} + +/* Set all three timeout values. */ + +static int +cmd_spec_timeout (const char *com, const char *val, void *closure) +{ + double value; + if (!cmd_time (com, val, &value)) + return 0; + opt.read_timeout = value; + opt.connect_timeout = value; + opt.dns_timeout = value; + return 1; +} + static int cmd_spec_useragent (const char *com, const char *val, void *closure) { @@ -963,7 +1196,7 @@ cmd_spec_useragent (const char *com, const char *val, void *closure) junk to the server. */ if (!*val || strchr (val, '\n')) { - fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"), + fprintf (stderr, _("%s: %s: Invalid value `%s'.\n"), exec_name, com, val); return 0; } @@ -973,23 +1206,71 @@ cmd_spec_useragent (const char *com, const char *val, void *closure) /* Miscellaneous useful routines. */ -/* Return the integer value of a positive integer written in S, or -1 - if an error was encountered. */ +/* A very simple atoi clone, more portable than strtol and friends, + but reports errors, unlike atoi. Returns 1 on success, 0 on + failure. In case of success, stores result to *DEST. */ + static int -myatoi (const char *s) +simple_atoi (const char *beg, const char *end, int *dest) { - int res; - const char *orig = s; + int result = 0; + const char *p; - for (res = 0; *s && ISDIGIT (*s); s++) - res = 10 * res + (*s - '0'); - if (*s || orig == s) - return -1; - else - return res; + if (beg == end) + return 0; + + for (p = beg; p < end && ISDIGIT (*p); p++) + result = (10 * result) + (*p - '0'); + + if (p != end) + return 0; + + *dest = result; + return 1; } -#define ISODIGIT(x) ((x) >= '0' && (x) <= '7') +/* Trivial atof, with error reporting. Handles "[.]", + doesn't handle exponential notation. Returns 1 on success, 0 on + failure. In case of success, stores its result to *DEST. */ + +static int +simple_atof (const char *beg, const char *end, double *dest) +{ + double result = 0; + + int seen_dot = 0; + int seen_digit = 0; + double divider = 1; + + const char *p; + + for (p = beg; p < end; p++) + { + char ch = *p; + if (ISDIGIT (ch)) + { + if (!seen_dot) + result = (10 * result) + (ch - '0'); + else + result += (ch - '0') / (divider *= 10); + seen_digit = 1; + } + else if (ch == '.') + { + if (!seen_dot) + seen_dot = 1; + else + return 0; + } + else + return 0; + } + if (!seen_digit) + return 0; + + *dest = result; + return 1; +} static int check_user_specified_header (const char *s) @@ -1008,25 +1289,43 @@ check_user_specified_header (const char *s) } void cleanup_html_url PARAMS ((void)); +void res_cleanup PARAMS ((void)); void downloaded_files_free PARAMS ((void)); +void http_cleanup PARAMS ((void)); /* Free the memory allocated by global variables. */ void cleanup (void) { - extern acc_t *netrc_list; + /* Free external resources, close files, etc. */ - recursive_cleanup (); - clean_hosts (); - free_netrc (netrc_list); if (opt.dfp) fclose (opt.dfp); + + /* We're exiting anyway so there's no real need to call free() + hundreds of times. Skipping the frees will make Wget exit + faster. + + However, when detecting leaks, it's crucial to free() everything + because then you can find the real leaks, i.e. the allocated + memory which grows with the size of the program. */ + +#ifdef DEBUG_MALLOC + convert_cleanup (); + res_cleanup (); + http_cleanup (); cleanup_html_url (); downloaded_files_free (); - cookies_cleanup (); + host_cleanup (); + cookie_jar_delete (wget_cookie_jar); + + { + extern acc_t *netrc_list; + free_netrc (netrc_list); + } FREE_MAYBE (opt.lfilename); - xfree (opt.dir_prefix); + FREE_MAYBE (opt.dir_prefix); FREE_MAYBE (opt.input_filename); FREE_MAYBE (opt.output_document); free_vec (opt.accepts); @@ -1036,6 +1335,7 @@ cleanup (void) free_vec (opt.domains); free_vec (opt.follow_tags); free_vec (opt.ignore_tags); + FREE_MAYBE (opt.progress_type); xfree (opt.ftp_acc); FREE_MAYBE (opt.ftp_pass); FREE_MAYBE (opt.ftp_proxy); @@ -1054,4 +1354,5 @@ cleanup (void) FREE_MAYBE (opt.bind_address); FREE_MAYBE (opt.cookies_input); FREE_MAYBE (opt.cookies_output); +#endif }