/* Reading/parsing the initialization file.
- Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
+ Free Software Foundation, Inc.
-This file is part of Wget.
+This file is part of GNU Wget.
-This program is free software; you can redistribute it and/or modify
+GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
-This program is distributed in the hope that it will be useful,
+GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
+along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#include <config.h>
#include <stdio.h>
-#include <ctype.h>
#include <sys/types.h>
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#endif
#include <errno.h>
+#ifdef WINDOWS
+# include <winsock.h>
+#else
+# include <sys/socket.h>
+# include <netinet/in.h>
+#ifndef __BEOS__
+# include <arpa/inet.h>
+#endif
+#endif
+
#ifdef HAVE_PWD_H
#include <pwd.h>
#endif
#include "host.h"
#include "recur.h"
#include "netrc.h"
+#include "cookies.h" /* for cookies_cleanup */
#ifndef errno
extern int errno;
#endif
+/* We want tilde expansion enabled only when reading `.wgetrc' lines;
+ otherwise, it will be performed by the shell. This variable will
+ be set by the wgetrc-reading function. */
+
+static int enable_tilde_expansion;
+
#define CMD_DECLARE(func) static int func \
PARAMS ((const char *, const char *, void *))
+CMD_DECLARE (cmd_address);
CMD_DECLARE (cmd_boolean);
-CMD_DECLARE (cmd_boolean);
+CMD_DECLARE (cmd_bytes);
+CMD_DECLARE (cmd_directory_vector);
+CMD_DECLARE (cmd_lockable_boolean);
CMD_DECLARE (cmd_number);
CMD_DECLARE (cmd_number_inf);
CMD_DECLARE (cmd_string);
-CMD_DECLARE (cmd_vector);
-CMD_DECLARE (cmd_directory_vector);
-CMD_DECLARE (cmd_bytes);
+CMD_DECLARE (cmd_file);
CMD_DECLARE (cmd_time);
+CMD_DECLARE (cmd_vector);
CMD_DECLARE (cmd_spec_dirstruct);
-CMD_DECLARE (cmd_spec_dotstyle);
CMD_DECLARE (cmd_spec_header);
CMD_DECLARE (cmd_spec_htmlify);
CMD_DECLARE (cmd_spec_mirror);
-CMD_DECLARE (cmd_spec_outputdocument);
+/*CMD_DECLARE (cmd_spec_progress);*/
CMD_DECLARE (cmd_spec_recursive);
CMD_DECLARE (cmd_spec_useragent);
-/* List of recognized commands, each consisting of name, closure and
- function. When adding a new command, simply add it to the list,
- but be sure to keep the list sorted alphabetically, as comind()
- depends on it. */
+/* List of recognized commands, each consisting of name, closure and function.
+ When adding a new command, simply add it to the list, but be sure to keep the
+ list sorted alphabetically, as comind() depends on it. Also, be sure to add
+ any entries that allocate memory (e.g. cmd_string and cmd_vector guys) to the
+ cleanup() function below. */
static struct {
char *name;
void *closure;
{ "backupconverted", &opt.backup_converted, cmd_boolean },
{ "backups", &opt.backups, cmd_number },
{ "base", &opt.base_href, cmd_string },
- { "cache", &opt.proxy_cache, cmd_boolean },
+ { "bindaddress", &opt.bind_address, cmd_address },
+ { "cache", &opt.allow_cache, cmd_boolean },
{ "continue", &opt.always_rest, cmd_boolean },
{ "convertlinks", &opt.convert_links, cmd_boolean },
+ { "cookies", &opt.cookies, cmd_boolean },
{ "cutdirs", &opt.cut_dirs, cmd_number },
#ifdef DEBUG
{ "debug", &opt.debug, cmd_boolean },
#endif
{ "deleteafter", &opt.delete_after, cmd_boolean },
- { "dirprefix", &opt.dir_prefix, cmd_string },
+ { "dirprefix", &opt.dir_prefix, cmd_file },
{ "dirstruct", NULL, cmd_spec_dirstruct },
{ "domains", &opt.domains, cmd_vector },
{ "dotbytes", &opt.dot_bytes, cmd_bytes },
{ "dotsinline", &opt.dots_in_line, cmd_number },
{ "dotspacing", &opt.dot_spacing, cmd_number },
- { "dotstyle", NULL, cmd_spec_dotstyle },
+ { "dotstyle", &opt.dot_style, cmd_string },
{ "excludedirectories", &opt.excludes, cmd_directory_vector },
{ "excludedomains", &opt.exclude_domains, cmd_vector },
{ "followftp", &opt.follow_ftp, cmd_boolean },
+ { "followtags", &opt.follow_tags, cmd_vector },
{ "forcehtml", &opt.force_html, cmd_boolean },
{ "ftpproxy", &opt.ftp_proxy, cmd_string },
{ "glob", &opt.ftp_glob, cmd_boolean },
{ "header", NULL, cmd_spec_header },
+ { "htmlextension", &opt.html_extension, cmd_boolean },
{ "htmlify", NULL, cmd_spec_htmlify },
+ { "httpkeepalive", &opt.http_keep_alive, cmd_boolean },
{ "httppasswd", &opt.http_passwd, cmd_string },
{ "httpproxy", &opt.http_proxy, cmd_string },
+ { "httpsproxy", &opt.https_proxy, cmd_string },
{ "httpuser", &opt.http_user, cmd_string },
{ "ignorelength", &opt.ignore_length, cmd_boolean },
+ { "ignoretags", &opt.ignore_tags, cmd_vector },
{ "includedirectories", &opt.includes, cmd_directory_vector },
- { "input", &opt.input_filename, cmd_string },
+ { "input", &opt.input_filename, cmd_file },
{ "killlonger", &opt.kill_longer, cmd_boolean },
- { "logfile", &opt.lfilename, cmd_string },
+ { "limitrate", &opt.limit_rate, cmd_bytes },
+ { "loadcookies", &opt.cookies_input, cmd_file },
+ { "logfile", &opt.lfilename, cmd_file },
{ "login", &opt.ftp_acc, cmd_string },
{ "mirror", NULL, cmd_spec_mirror },
{ "netrc", &opt.netrc, cmd_boolean },
{ "noclobber", &opt.noclobber, cmd_boolean },
{ "noparent", &opt.no_parent, cmd_boolean },
{ "noproxy", &opt.no_proxy, cmd_vector },
- { "numtries", &opt.ntry, cmd_number_inf }, /* deprecated */
- { "outputdocument", NULL, cmd_spec_outputdocument },
- { "passiveftp", &opt.ftp_pasv, cmd_boolean },
+ { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
+ { "outputdocument", &opt.output_document, cmd_file },
+ { "pagerequisites", &opt.page_requisites, cmd_boolean },
+ { "passiveftp", &opt.ftp_pasv, cmd_lockable_boolean },
{ "passwd", &opt.ftp_pass, cmd_string },
+ { "progress", &opt.progress_type, cmd_string },
{ "proxypasswd", &opt.proxy_passwd, cmd_string },
{ "proxyuser", &opt.proxy_user, cmd_string },
{ "quiet", &opt.quiet, cmd_boolean },
{ "quota", &opt.quota, cmd_bytes },
+ { "randomwait", &opt.random_wait, cmd_boolean },
{ "reclevel", &opt.reclevel, cmd_number_inf },
{ "recursive", NULL, cmd_spec_recursive },
{ "referer", &opt.referer, cmd_string },
{ "removelisting", &opt.remove_listing, cmd_boolean },
{ "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
{ "robots", &opt.use_robots, cmd_boolean },
+ { "savecookies", &opt.cookies_output, cmd_file },
{ "saveheaders", &opt.save_headers, cmd_boolean },
{ "serverresponse", &opt.server_response, cmd_boolean },
- { "simplehostcheck", &opt.simple_check, cmd_boolean },
{ "spanhosts", &opt.spanhost, cmd_boolean },
{ "spider", &opt.spider, cmd_boolean },
+#ifdef HAVE_SSL
+ { "sslcertfile", &opt.sslcertfile, cmd_file },
+ { "sslcertkey", &opt.sslcertkey, cmd_file },
+#endif /* HAVE_SSL */
{ "timeout", &opt.timeout, cmd_time },
{ "timestamping", &opt.timestamping, cmd_boolean },
{ "tries", &opt.ntry, cmd_number_inf },
static int
comind (const char *com)
{
- int min = 0, max = ARRAY_SIZE (commands);
+ int min = 0, max = ARRAY_SIZE (commands) - 1;
do
{
of the implementors' worries. */
memset (&opt, 0, sizeof (opt));
+ opt.cookies = 1;
+
opt.verbose = -1;
opt.dir_prefix = xstrdup (".");
opt.ntry = 20;
opt.reclevel = 5;
opt.add_hostdir = 1;
- opt.ftp_acc = xstrdup ("anonymous");
- /*opt.ftp_pass = xstrdup (ftp_getaddress ());*/
+ opt.ftp_acc = xstrdup ("anonymous");
+ opt.ftp_pass = xstrdup ("-wget@");
opt.netrc = 1;
opt.ftp_glob = 1;
opt.htmlify = 1;
+ opt.http_keep_alive = 1;
opt.use_proxy = 1;
tmp = getenv ("no_proxy");
if (tmp)
opt.no_proxy = sepstring (tmp);
- opt.proxy_cache = 1;
+ opt.allow_cache = 1;
#ifdef HAVE_SELECT
opt.timeout = 900;
file = (char *)xmalloc (strlen (home) + 1 + strlen (".wgetrc") + 1);
sprintf (file, "%s/.wgetrc", home);
}
+ FREE_MAYBE (home);
#else /* WINDOWS */
/* Under Windows, "home" is (for the purposes of this function) the
directory where `wget.exe' resides, and `wget.ini' will be used
}
#endif /* WINDOWS */
- FREE_MAYBE (home);
if (!file)
return NULL;
if (!file_exists_p (file))
{
- free (file);
+ xfree (file);
return NULL;
}
return file;
file, strerror (errno));
return;
}
- /* Reset line number. */
+ enable_tilde_expansion = 1;
ln = 1;
while ((line = read_whole_line (fp)))
{
char *com, *val;
int status;
- int length = strlen (line);
- if (length && line[length - 1] == '\r')
- line[length - 1] = '\0';
/* Parse the line. */
status = parse_line (line, &com, &val);
- free (line);
+ xfree (line);
/* If everything is OK, set the value. */
if (status == 1)
{
if (!setval (com, val))
fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name,
file, ln);
- free (com);
- free (val);
+ xfree (com);
+ xfree (val);
}
else if (status == 0)
fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name,
file, ln);
++ln;
}
+ enable_tilde_expansion = 0;
fclose (fp);
}
else
#endif
run_wgetrc (file);
- free (file);
+ xfree (file);
return;
}
\f
const char *orig_comptr, *end;
char *new_comptr;
- /* Skip spaces. */
- while (*p == ' ' || *p == '\t')
+ /* Skip whitespace. */
+ while (*p && ISSPACE (*p))
++p;
/* Don't process empty lines. */
- if (!*p || *p == '\n' || *p == '#')
+ if (!*p || *p == '#')
return -1;
for (orig_comptr = p; ISALPHA (*p) || *p == '_' || *p == '-'; p++)
/* The next char should be space or '='. */
if (!ISSPACE (*p) && (*p != '='))
return 0;
+ /* Here we cannot use strdupdelim() as we normally would because we
+ want to skip the `-' and `_' characters in the input string. */
*com = (char *)xmalloc (p - orig_comptr + 1);
for (new_comptr = *com; orig_comptr < p; orig_comptr++)
{
/* If the command is invalid, exit now. */
if (comind (*com) == -1)
{
- free (*com);
+ xfree (*com);
return 0;
}
/* If '=' not found, bail out. */
if (*p != '=')
{
- free (*com);
+ xfree (*com);
return 0;
}
/* Skip spaces after '='. */
for (++p; ISSPACE (*p); p++);
- /* Get the ending position. */
- for (end = p; *end && *end != '\n'; end++);
- /* Allocate *val, and copy from line. */
- *val = strdupdelim (p, end);
+ /* Get the ending position for VAL by starting with the end of the
+ line and skipping whitespace. */
+ end = line + strlen (line) - 1;
+ while (end > p && ISSPACE (*end))
+ --end;
+ *val = strdupdelim (p, end + 1);
return 1;
}
static int myatoi PARAMS ((const char *s));
+/* Interpret VAL as an Internet address (a hostname or a dotted-quad
+ IP address), and write it (in network order) to a malloc-allocated
+ address. That address gets stored to the memory pointed to by
+ CLOSURE. COM is ignored, except for error messages.
+
+ #### IMHO it's a mistake to do this kind of work so early in the
+ process (before any download even started!) opt.bind_address
+ should simply remember the provided value as a string. Another
+ function should do the lookup, when needed, and cache the
+ result. --hniksic */
+static int
+cmd_address (const char *com, const char *val, void *closure)
+{
+ struct address_list *al;
+ struct sockaddr_in sin;
+ struct sockaddr_in **target = (struct sockaddr_in **)closure;
+
+ memset (&sin, '\0', sizeof (sin));
+
+ al = lookup_host (val, 1);
+ if (!al)
+ {
+ fprintf (stderr, _("%s: %s: Cannot convert `%s' to an IP address.\n"),
+ exec_name, com, val);
+ return 0;
+ }
+ address_list_copy_one (al, 0, (unsigned char *)&sin.sin_addr);
+ address_list_release (al);
+
+ sin.sin_family = AF_INET;
+ sin.sin_port = 0;
+
+ FREE_MAYBE (*target);
+
+ *target = xmalloc (sizeof (sin));
+ memcpy (*target, &sin, sizeof (sin));
+
+ return 1;
+}
+
/* Store the boolean value from VAL to CLOSURE. COM is ignored,
except for error messages. */
static int
return 1;
}
+/* Store the lockable_boolean {2, 1, 0, -1} value from VAL to CLOSURE. COM is
+ ignored, except for error messages. Values 2 and -1 indicate that once
+ defined, the value may not be changed by successive wgetrc files or
+ command-line arguments.
+
+ Values: 2 - Enable a particular option for good ("always")
+ 1 - Enable an option ("on")
+ 0 - Disable an option ("off")
+ -1 - Disable an option for good ("never") */
+static int
+cmd_lockable_boolean (const char *com, const char *val, void *closure)
+{
+ int lockable_boolean_value;
+
+ /*
+ * If a config file said "always" or "never", don't allow command line
+ * arguments to override the config file.
+ */
+ if (*(int *)closure == -1 || *(int *)closure == 2)
+ return 1;
+
+ if (!strcasecmp (val, "always")
+ || (*val == '2' && !*(val + 1)))
+ lockable_boolean_value = 2;
+ else if (!strcasecmp (val, "on")
+ || (*val == '1' && !*(val + 1)))
+ lockable_boolean_value = 1;
+ else if (!strcasecmp (val, "off")
+ || (*val == '0' && !*(val + 1)))
+ lockable_boolean_value = 0;
+ else if (!strcasecmp (val, "never")
+ || (*val == '-' && *(val + 1) == '1' && !*(val + 2)))
+ lockable_boolean_value = -1;
+ else
+ {
+ fprintf (stderr, _("%s: %s: Please specify always, on, off, "
+ "or never.\n"),
+ exec_name, com);
+ return 0;
+ }
+
+ *(int *)closure = lockable_boolean_value;
+ return 1;
+}
+
/* Set the non-negative integer value from VAL to CLOSURE. With
incorrect specification, the number remains unchanged. */
static int
return 1;
}
+/* Like the above, but handles tilde-expansion when reading a user's
+ `.wgetrc'. In that case, and if VAL begins with `~', the tilde
+ gets expanded to the user's home directory. */
+static int
+cmd_file (const char *com, const char *val, void *closure)
+{
+ char **pstring = (char **)closure;
+
+ FREE_MAYBE (*pstring);
+ if (!enable_tilde_expansion || !(*val == '~' && *(val + 1) == '/'))
+ {
+ noexpand:
+ *pstring = xstrdup (val);
+ }
+ else
+ {
+ char *result;
+ int homelen;
+ char *home = home_dir ();
+ if (!home)
+ goto noexpand;
+
+ homelen = strlen (home);
+ while (homelen && home[homelen - 1] == '/')
+ home[--homelen] = '\0';
+
+ /* Skip the leading "~/". */
+ for (++val; *val == '/'; val++)
+ ;
+
+ result = xmalloc (homelen + 1 + strlen (val));
+ memcpy (result, home, homelen);
+ result[homelen] = '/';
+ strcpy (result + homelen + 1, val);
+
+ *pstring = result;
+ }
+ return 1;
+}
+
/* Merge the vector (array of strings separated with `,') in COM with
the vector (NULL-terminated array of strings) pointed to by
CLOSURE. */
return 0;
}
/* Search for a designator. */
- switch (tolower (*p))
+ switch (TOLOWER (*p))
{
case '\0':
/* None */
return 0;
}
/* Search for a suffix. */
- switch (tolower (*p))
+ switch (TOLOWER (*p))
{
case '\0':
/* None */
return 1;
}
-static int
-cmd_spec_dotstyle (const char *com, const char *val, void *closure)
-{
- /* Retrieval styles. */
- if (!strcasecmp (val, "default"))
- {
- /* Default style: 1K dots, 10 dots in a cluster, 50 dots in a
- line. */
- opt.dot_bytes = 1024;
- opt.dot_spacing = 10;
- opt.dots_in_line = 50;
- }
- else if (!strcasecmp (val, "binary"))
- {
- /* "Binary" retrieval: 8K dots, 16 dots in a cluster, 48 dots
- (384K) in a line. */
- opt.dot_bytes = 8192;
- opt.dot_spacing = 16;
- opt.dots_in_line = 48;
- }
- else if (!strcasecmp (val, "mega"))
- {
- /* "Mega" retrieval, for retrieving very long files; each dot is
- 64K, 8 dots in a cluster, 6 clusters (3M) in a line. */
- opt.dot_bytes = 65536L;
- opt.dot_spacing = 8;
- opt.dots_in_line = 48;
- }
- else if (!strcasecmp (val, "giga"))
- {
- /* "Giga" retrieval, for retrieving very very *very* long files;
- each dot is 1M, 8 dots in a cluster, 4 clusters (32M) in a
- line. */
- opt.dot_bytes = (1L << 20);
- opt.dot_spacing = 8;
- opt.dots_in_line = 32;
- }
- else if (!strcasecmp (val, "micro"))
- {
- /* "Micro" retrieval, for retrieving very small files (and/or
- slow connections); each dot is 128 bytes, 8 dots in a
- cluster, 6 clusters (6K) in a line. */
- opt.dot_bytes = 128;
- opt.dot_spacing = 8;
- opt.dots_in_line = 48;
- }
- else
- {
- fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
- exec_name, com, val);
- return 0;
- }
- return 1;
-}
-
static int
cmd_spec_header (const char *com, const char *val, void *closure)
{
if (!opt.no_dirstruct)
opt.dirstruct = 1;
opt.timestamping = 1;
- opt.reclevel = 0;
+ opt.reclevel = INFINITE_RECURSION;
opt.remove_listing = 0;
}
return 1;
}
+#if 0
static int
-cmd_spec_outputdocument (const char *com, const char *val, void *closure)
+cmd_spec_progress (const char *com, const char *val, void *closure)
{
- FREE_MAYBE (opt.output_document);
- opt.output_document = xstrdup (val);
- opt.ntry = 1;
+ if (!valid_progress_implementation_p (val))
+ {
+ fprintf (stderr, _("%s: %s: Invalid progress type `%s'.\n"),
+ exec_name, com, val);
+ return 0;
+ }
+ set_progress_implementation (val);
return 1;
}
+#endif
static int
cmd_spec_recursive (const char *com, const char *val, void *closure)
return 1;
}
\f
+void cleanup_html_url PARAMS ((void));
+void res_cleanup PARAMS ((void));
+void downloaded_files_free PARAMS ((void));
+void http_cleanup PARAMS ((void));
+
+
/* Free the memory allocated by global variables. */
void
cleanup (void)
{
- extern acc_t *netrc_list;
+ /* Free external resources, close files, etc. */
- recursive_cleanup ();
- clean_hosts ();
- free_netrc (netrc_list);
if (opt.dfp)
fclose (opt.dfp);
+
+ /* We're exiting anyway so there's no real need to call free()
+ hundreds of times. Skipping the frees will make Wget exit
+ faster.
+
+ However, when detecting leaks, it's crucial to free() everything
+ because then you can find the real leaks, i.e. the allocated
+ memory which grows with the size of the program. */
+
+#ifdef DEBUG_MALLOC
+ recursive_cleanup ();
+ res_cleanup ();
+ http_cleanup ();
+ cleanup_html_url ();
+ downloaded_files_free ();
+ cookies_cleanup ();
+ host_cleanup ();
+
+ {
+ extern acc_t *netrc_list;
+ free_netrc (netrc_list);
+ }
FREE_MAYBE (opt.lfilename);
- free (opt.dir_prefix);
+ xfree (opt.dir_prefix);
FREE_MAYBE (opt.input_filename);
FREE_MAYBE (opt.output_document);
free_vec (opt.accepts);
free_vec (opt.excludes);
free_vec (opt.includes);
free_vec (opt.domains);
- free (opt.ftp_acc);
- free (opt.ftp_pass);
+ free_vec (opt.follow_tags);
+ free_vec (opt.ignore_tags);
+ FREE_MAYBE (opt.progress_type);
+ xfree (opt.ftp_acc);
+ FREE_MAYBE (opt.ftp_pass);
FREE_MAYBE (opt.ftp_proxy);
+ FREE_MAYBE (opt.https_proxy);
FREE_MAYBE (opt.http_proxy);
free_vec (opt.no_proxy);
FREE_MAYBE (opt.useragent);
FREE_MAYBE (opt.http_user);
FREE_MAYBE (opt.http_passwd);
FREE_MAYBE (opt.user_header);
+#ifdef HAVE_SSL
+ FREE_MAYBE (opt.sslcertkey);
+ FREE_MAYBE (opt.sslcertfile);
+#endif /* HAVE_SSL */
+ FREE_MAYBE (opt.bind_address);
+ FREE_MAYBE (opt.cookies_input);
+ FREE_MAYBE (opt.cookies_output);
+#endif
}