1 /* Reading/parsing the initialization file.
2 Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
52 #define CMD_DECLARE(func) static int func \
53 PARAMS ((const char *, const char *, void *))
55 CMD_DECLARE (cmd_boolean);
56 CMD_DECLARE (cmd_boolean);
57 CMD_DECLARE (cmd_number);
58 CMD_DECLARE (cmd_number_inf);
59 CMD_DECLARE (cmd_string);
60 CMD_DECLARE (cmd_vector);
61 CMD_DECLARE (cmd_directory_vector);
62 CMD_DECLARE (cmd_bytes);
63 CMD_DECLARE (cmd_time);
65 CMD_DECLARE (cmd_spec_dirstruct);
66 CMD_DECLARE (cmd_spec_dotstyle);
67 CMD_DECLARE (cmd_spec_header);
68 CMD_DECLARE (cmd_spec_htmlify);
69 CMD_DECLARE (cmd_spec_mirror);
70 CMD_DECLARE (cmd_spec_outputdocument);
71 CMD_DECLARE (cmd_spec_recursive);
72 CMD_DECLARE (cmd_spec_useragent);
74 /* List of recognized commands, each consisting of name, closure and function.
75 When adding a new command, simply add it to the list, but be sure to keep the
76 list sorted alphabetically, as comind() depends on it. Also, be sure to add
77 any entries that allocate memory (e.g. cmd_string and cmd_vector guys) to the
78 cleanup() function below. */
82 int (*action) PARAMS ((const char *, const char *, void *));
84 { "accept", &opt.accepts, cmd_vector },
85 { "addhostdir", &opt.add_hostdir, cmd_boolean },
86 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
87 { "background", &opt.background, cmd_boolean },
88 { "backupconverted", &opt.backup_converted, cmd_boolean },
89 { "backups", &opt.backups, cmd_number },
90 { "base", &opt.base_href, cmd_string },
91 { "cache", &opt.proxy_cache, cmd_boolean },
92 { "continue", &opt.always_rest, cmd_boolean },
93 { "convertlinks", &opt.convert_links, cmd_boolean },
94 { "cutdirs", &opt.cut_dirs, cmd_number },
96 { "debug", &opt.debug, cmd_boolean },
98 { "deleteafter", &opt.delete_after, cmd_boolean },
99 { "dirprefix", &opt.dir_prefix, cmd_string },
100 { "dirstruct", NULL, cmd_spec_dirstruct },
101 { "domains", &opt.domains, cmd_vector },
102 { "dotbytes", &opt.dot_bytes, cmd_bytes },
103 { "dotsinline", &opt.dots_in_line, cmd_number },
104 { "dotspacing", &opt.dot_spacing, cmd_number },
105 { "dotstyle", NULL, cmd_spec_dotstyle },
106 { "excludedirectories", &opt.excludes, cmd_directory_vector },
107 { "excludedomains", &opt.exclude_domains, cmd_vector },
108 { "followftp", &opt.follow_ftp, cmd_boolean },
109 { "followtags", &opt.follow_tags, cmd_vector },
110 { "forcehtml", &opt.force_html, cmd_boolean },
111 { "ftpproxy", &opt.ftp_proxy, cmd_string },
112 { "glob", &opt.ftp_glob, cmd_boolean },
113 { "header", NULL, cmd_spec_header },
114 { "htmlify", NULL, cmd_spec_htmlify },
115 { "httppasswd", &opt.http_passwd, cmd_string },
116 { "httpproxy", &opt.http_proxy, cmd_string },
117 { "httpuser", &opt.http_user, cmd_string },
118 { "ignorelength", &opt.ignore_length, cmd_boolean },
119 { "ignoretags", &opt.ignore_tags, cmd_vector },
120 { "includedirectories", &opt.includes, cmd_directory_vector },
121 { "input", &opt.input_filename, cmd_string },
122 { "killlonger", &opt.kill_longer, cmd_boolean },
123 { "logfile", &opt.lfilename, cmd_string },
124 { "login", &opt.ftp_acc, cmd_string },
125 { "mirror", NULL, cmd_spec_mirror },
126 { "netrc", &opt.netrc, cmd_boolean },
127 { "noclobber", &opt.noclobber, cmd_boolean },
128 { "noparent", &opt.no_parent, cmd_boolean },
129 { "noproxy", &opt.no_proxy, cmd_vector },
130 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
131 { "outputdocument", NULL, cmd_spec_outputdocument },
132 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
133 { "passwd", &opt.ftp_pass, cmd_string },
134 { "proxypasswd", &opt.proxy_passwd, cmd_string },
135 { "proxyuser", &opt.proxy_user, cmd_string },
136 { "quiet", &opt.quiet, cmd_boolean },
137 { "quota", &opt.quota, cmd_bytes },
138 { "reclevel", &opt.reclevel, cmd_number_inf },
139 { "recursive", NULL, cmd_spec_recursive },
140 { "referer", &opt.referer, cmd_string },
141 { "reject", &opt.rejects, cmd_vector },
142 { "relativeonly", &opt.relative_only, cmd_boolean },
143 { "removelisting", &opt.remove_listing, cmd_boolean },
144 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
145 { "robots", &opt.use_robots, cmd_boolean },
146 { "saveheaders", &opt.save_headers, cmd_boolean },
147 { "serverresponse", &opt.server_response, cmd_boolean },
148 { "simplehostcheck", &opt.simple_check, cmd_boolean },
149 { "spanhosts", &opt.spanhost, cmd_boolean },
150 { "spider", &opt.spider, cmd_boolean },
151 { "timeout", &opt.timeout, cmd_time },
152 { "timestamping", &opt.timestamping, cmd_boolean },
153 { "tries", &opt.ntry, cmd_number_inf },
154 { "useproxy", &opt.use_proxy, cmd_boolean },
155 { "useragent", NULL, cmd_spec_useragent },
156 { "verbose", &opt.verbose, cmd_boolean },
157 { "wait", &opt.wait, cmd_time },
158 { "waitretry", &opt.waitretry, cmd_time }
161 /* Return index of COM if it is a valid command, or -1 otherwise. COM
162 is looked up in `commands' using binary search algorithm. */
164 comind (const char *com)
166 int min = 0, max = ARRAY_SIZE (commands);
170 int i = (min + max) / 2;
171 int cmp = strcasecmp (com, commands[i].name);
183 /* Reset the variables to default values. */
189 /* Most of the default values are 0. Just reset everything, and
190 fill in the non-zero values. Note that initializing pointers to
191 NULL this way is technically illegal, but porting Wget to a
192 machine where NULL is not all-zero bit pattern will be the least
193 of the implementors' worries. */
194 memset (&opt, 0, sizeof (opt));
197 opt.dir_prefix = xstrdup (".");
201 opt.ftp_acc = xstrdup ("anonymous");
202 /*opt.ftp_pass = xstrdup (ftp_getaddress ());*/
207 tmp = getenv ("no_proxy");
209 opt.no_proxy = sepstring (tmp);
217 opt.remove_listing = 1;
219 opt.dot_bytes = 1024;
220 opt.dot_spacing = 10;
221 opt.dots_in_line = 50;
224 /* Return the user's home directory (strdup-ed), or NULL if none is
229 char *home = getenv ("HOME");
234 /* If HOME is not defined, try getting it from the password
236 struct passwd *pwd = getpwuid (getuid ());
237 if (!pwd || !pwd->pw_dir)
242 /* #### Maybe I should grab home_dir from registry, but the best
243 that I could get from there is user's Start menu. It sucks! */
247 return home ? xstrdup (home) : NULL;
250 /* Return the path to the user's .wgetrc. This is either the value of
251 `WGETRC' environment variable, or `$HOME/.wgetrc'.
253 If the `WGETRC' variable exists but the file does not exist, the
254 function will exit(). */
256 wgetrc_file_name (void)
261 /* Try the environment. */
262 env = getenv ("WGETRC");
265 if (!file_exists_p (env))
267 fprintf (stderr, "%s: %s: %s.\n", exec_name, file, strerror (errno));
270 return xstrdup (env);
274 /* If that failed, try $HOME/.wgetrc. */
278 file = (char *)xmalloc (strlen (home) + 1 + strlen (".wgetrc") + 1);
279 sprintf (file, "%s/.wgetrc", home);
282 /* Under Windows, "home" is (for the purposes of this function) the
283 directory where `wget.exe' resides, and `wget.ini' will be used
284 as file name. SYSTEM_WGETRC should not be defined under WINDOWS.
286 It is not as trivial as I assumed, because on 95 argv[0] is full
287 path, but on NT you get what you typed in command line. --dbudor */
291 file = (char *)xmalloc (strlen (home) + strlen ("wget.ini") + 1);
292 sprintf (file, "%swget.ini", home);
299 if (!file_exists_p (file))
307 /* Initialize variables from a wgetrc file */
309 run_wgetrc (const char *file)
315 fp = fopen (file, "rb");
318 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
319 file, strerror (errno));
322 /* Reset line number. */
324 while ((line = read_whole_line (fp)))
328 int length = strlen (line);
330 if (length && line[length - 1] == '\r')
331 line[length - 1] = '\0';
332 /* Parse the line. */
333 status = parse_line (line, &com, &val);
335 /* If everything is OK, set the value. */
338 if (!setval (com, val))
339 fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name,
344 else if (status == 0)
345 fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name,
352 /* Initialize the defaults and run the system wgetrc and user's own
359 /* Load the hard-coded defaults. */
362 /* If SYSTEM_WGETRC is defined, use it. */
364 if (file_exists_p (SYSTEM_WGETRC))
365 run_wgetrc (SYSTEM_WGETRC);
367 /* Override it with your own, if one exists. */
368 file = wgetrc_file_name ();
371 /* #### We should somehow canonicalize `file' and SYSTEM_WGETRC,
374 if (!strcmp (file, SYSTEM_WGETRC))
376 fprintf (stderr, _("\
377 %s: Warning: Both system and user wgetrc point to `%s'.\n"),
387 /* Parse the line pointed by line, with the syntax:
388 <sp>* command <sp>* = <sp>* value <newline>
389 Uses malloc to allocate space for command and value.
390 If the line is invalid, data is freed and 0 is returned.
397 parse_line (const char *line, char **com, char **val)
399 const char *p = line;
400 const char *orig_comptr, *end;
404 while (*p == ' ' || *p == '\t')
407 /* Don't process empty lines. */
408 if (!*p || *p == '\n' || *p == '#')
411 for (orig_comptr = p; ISALPHA (*p) || *p == '_' || *p == '-'; p++)
413 /* The next char should be space or '='. */
414 if (!ISSPACE (*p) && (*p != '='))
416 *com = (char *)xmalloc (p - orig_comptr + 1);
417 for (new_comptr = *com; orig_comptr < p; orig_comptr++)
419 if (*orig_comptr == '_' || *orig_comptr == '-')
421 *new_comptr++ = *orig_comptr;
424 /* If the command is invalid, exit now. */
425 if (comind (*com) == -1)
431 /* Skip spaces before '='. */
432 for (; ISSPACE (*p); p++);
433 /* If '=' not found, bail out. */
439 /* Skip spaces after '='. */
440 for (++p; ISSPACE (*p); p++);
441 /* Get the ending position. */
442 for (end = p; *end && *end != '\n'; end++);
443 /* Allocate *val, and copy from line. */
444 *val = strdupdelim (p, end);
448 /* Set COM to VAL. This is the meat behind processing `.wgetrc'. No
449 fatals -- error signal prints a warning and resets to default
450 value. All error messages are printed to stderr, *not* to
451 opt.lfile, since opt.lfile wasn't even generated yet. */
453 setval (const char *com, const char *val)
462 /* #### Should I just abort()? */
464 fprintf (stderr, _("%s: BUG: unknown command `%s', value `%s'.\n"),
465 exec_name, com, val);
469 return ((*commands[ind].action) (com, val, commands[ind].closure));
472 /* Generic helper functions, for use with `commands'. */
474 static int myatoi PARAMS ((const char *s));
476 /* Store the boolean value from VAL to CLOSURE. COM is ignored,
477 except for error messages. */
479 cmd_boolean (const char *com, const char *val, void *closure)
483 if (!strcasecmp (val, "on")
484 || (*val == '1' && !*(val + 1)))
486 else if (!strcasecmp (val, "off")
487 || (*val == '0' && !*(val + 1)))
491 fprintf (stderr, _("%s: %s: Please specify on or off.\n"),
496 *(int *)closure = bool_value;
500 /* Set the non-negative integer value from VAL to CLOSURE. With
501 incorrect specification, the number remains unchanged. */
503 cmd_number (const char *com, const char *val, void *closure)
505 int num = myatoi (val);
509 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
510 exec_name, com, val);
513 *(int *)closure = num;
517 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
519 cmd_number_inf (const char *com, const char *val, void *closure)
521 if (!strcasecmp (val, "inf"))
526 return cmd_number (com, val, closure);
529 /* Copy (strdup) the string at COM to a new location and place a
530 pointer to *CLOSURE. */
532 cmd_string (const char *com, const char *val, void *closure)
534 char **pstring = (char **)closure;
536 FREE_MAYBE (*pstring);
537 *pstring = xstrdup (val);
541 /* Merge the vector (array of strings separated with `,') in COM with
542 the vector (NULL-terminated array of strings) pointed to by
545 cmd_vector (const char *com, const char *val, void *closure)
547 char ***pvec = (char ***)closure;
550 *pvec = merge_vecs (*pvec, sepstring (val));
560 cmd_directory_vector (const char *com, const char *val, void *closure)
562 char ***pvec = (char ***)closure;
566 /* Strip the trailing slashes from directories. */
569 seps = sepstring (val);
570 for (t = seps; t && *t; t++)
572 int len = strlen (*t);
573 /* Skip degenerate case of root directory. */
576 if ((*t)[len - 1] == '/')
577 (*t)[len - 1] = '\0';
580 *pvec = merge_vecs (*pvec, seps);
590 /* Set the value stored in VAL to CLOSURE (which should point to a
591 long int), allowing several postfixes, with the following syntax
595 [0-9]+[kK] -> bytes * 1024
596 [0-9]+[mM] -> bytes * 1024 * 1024
599 Anything else is flagged as incorrect, and CLOSURE is unchanged. */
601 cmd_bytes (const char *com, const char *val, void *closure)
604 long *out = (long *)closure;
609 /* Check for "inf". */
610 if (p[0] == 'i' && p[1] == 'n' && p[2] == 'f' && p[3] == '\0')
615 /* Search for digits and construct result. */
616 for (; *p && ISDIGIT (*p); p++)
617 result = (10 * result) + (*p - '0');
618 /* If no digits were found, or more than one character is following
620 if (p == val || (*p != '\0' && *(p + 1) != '\0'))
622 printf (_("%s: Invalid specification `%s'\n"), com, val);
625 /* Search for a designator. */
626 switch (tolower (*p))
637 result *= (long)1024 * 1024;
641 result *= (long)1024 * 1024 * 1024;
644 printf (_("%s: Invalid specification `%s'\n"), com, val);
651 /* Store the value of VAL to *OUT, allowing suffixes for minutes and
654 cmd_time (const char *com, const char *val, void *closure)
659 /* Search for digits and construct result. */
660 for (; *p && ISDIGIT (*p); p++)
661 result = (10 * result) + (*p - '0');
662 /* If no digits were found, or more than one character is following
664 if (p == val || (*p != '\0' && *(p + 1) != '\0'))
666 printf (_("%s: Invalid specification `%s'\n"), com, val);
669 /* Search for a suffix. */
670 switch (tolower (*p))
684 /* Days (overflow on 16bit machines) */
692 printf (_("%s: Invalid specification `%s'\n"), com, val);
695 *(long *)closure = result;
699 /* Specialized helper functions, used by `commands' to handle some
700 options specially. */
702 static int check_user_specified_header PARAMS ((const char *));
705 cmd_spec_dirstruct (const char *com, const char *val, void *closure)
707 if (!cmd_boolean (com, val, &opt.dirstruct))
709 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
710 must be affected inversely. */
712 opt.no_dirstruct = 0;
714 opt.no_dirstruct = 1;
719 cmd_spec_dotstyle (const char *com, const char *val, void *closure)
721 /* Retrieval styles. */
722 if (!strcasecmp (val, "default"))
724 /* Default style: 1K dots, 10 dots in a cluster, 50 dots in a
726 opt.dot_bytes = 1024;
727 opt.dot_spacing = 10;
728 opt.dots_in_line = 50;
730 else if (!strcasecmp (val, "binary"))
732 /* "Binary" retrieval: 8K dots, 16 dots in a cluster, 48 dots
734 opt.dot_bytes = 8192;
735 opt.dot_spacing = 16;
736 opt.dots_in_line = 48;
738 else if (!strcasecmp (val, "mega"))
740 /* "Mega" retrieval, for retrieving very long files; each dot is
741 64K, 8 dots in a cluster, 6 clusters (3M) in a line. */
742 opt.dot_bytes = 65536L;
744 opt.dots_in_line = 48;
746 else if (!strcasecmp (val, "giga"))
748 /* "Giga" retrieval, for retrieving very very *very* long files;
749 each dot is 1M, 8 dots in a cluster, 4 clusters (32M) in a
751 opt.dot_bytes = (1L << 20);
753 opt.dots_in_line = 32;
755 else if (!strcasecmp (val, "micro"))
757 /* "Micro" retrieval, for retrieving very small files (and/or
758 slow connections); each dot is 128 bytes, 8 dots in a
759 cluster, 6 clusters (6K) in a line. */
762 opt.dots_in_line = 48;
766 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
767 exec_name, com, val);
774 cmd_spec_header (const char *com, const char *val, void *closure)
778 /* Empty header means reset headers. */
779 FREE_MAYBE (opt.user_header);
780 opt.user_header = NULL;
786 if (!check_user_specified_header (val))
788 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
789 exec_name, com, val);
792 i = opt.user_header ? strlen (opt.user_header) : 0;
793 opt.user_header = (char *)xrealloc (opt.user_header, i + strlen (val)
795 strcpy (opt.user_header + i, val);
797 opt.user_header[i++] = '\r';
798 opt.user_header[i++] = '\n';
799 opt.user_header[i] = '\0';
805 cmd_spec_htmlify (const char *com, const char *val, void *closure)
807 int flag = cmd_boolean (com, val, &opt.htmlify);
808 if (flag && !opt.htmlify)
809 opt.remove_listing = 0;
814 cmd_spec_mirror (const char *com, const char *val, void *closure)
818 if (!cmd_boolean (com, val, &mirror))
823 if (!opt.no_dirstruct)
825 opt.timestamping = 1;
827 opt.remove_listing = 0;
833 cmd_spec_outputdocument (const char *com, const char *val, void *closure)
835 FREE_MAYBE (opt.output_document);
836 opt.output_document = xstrdup (val);
842 cmd_spec_recursive (const char *com, const char *val, void *closure)
844 if (!cmd_boolean (com, val, &opt.recursive))
848 if (opt.recursive && !opt.no_dirstruct)
855 cmd_spec_useragent (const char *com, const char *val, void *closure)
857 /* Just check for empty string and newline, so we don't throw total
858 junk to the server. */
859 if (!*val || strchr (val, '\n'))
861 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
862 exec_name, com, val);
865 opt.useragent = xstrdup (val);
869 /* Miscellaneous useful routines. */
871 /* Return the integer value of a positive integer written in S, or -1
872 if an error was encountered. */
874 myatoi (const char *s)
877 const char *orig = s;
879 for (res = 0; *s && ISDIGIT (*s); s++)
880 res = 10 * res + (*s - '0');
887 #define ISODIGIT(x) ((x) >= '0' && (x) <= '7')
890 check_user_specified_header (const char *s)
894 for (p = s; *p && *p != ':' && !ISSPACE (*p); p++);
895 /* The header MUST contain `:' preceded by at least one
896 non-whitespace character. */
897 if (*p != ':' || p == s)
899 /* The header MUST NOT contain newlines. */
900 if (strchr (s, '\n'))
905 /* Free the memory allocated by global variables. */
909 extern acc_t *netrc_list;
911 recursive_cleanup ();
913 free_netrc (netrc_list);
916 FREE_MAYBE (opt.lfilename);
917 free (opt.dir_prefix);
918 FREE_MAYBE (opt.input_filename);
919 FREE_MAYBE (opt.output_document);
920 free_vec (opt.accepts);
921 free_vec (opt.rejects);
922 free_vec (opt.excludes);
923 free_vec (opt.includes);
924 free_vec (opt.domains);
925 free_vec (opt.follow_tags);
926 free_vec (opt.ignore_tags);
929 FREE_MAYBE (opt.ftp_proxy);
930 FREE_MAYBE (opt.http_proxy);
931 free_vec (opt.no_proxy);
932 FREE_MAYBE (opt.useragent);
933 FREE_MAYBE (opt.referer);
934 FREE_MAYBE (opt.http_user);
935 FREE_MAYBE (opt.http_passwd);
936 FREE_MAYBE (opt.user_header);