1 /* Reading/parsing the initialization file.
2 Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
52 #define CMD_DECLARE(func) static int func \
53 PARAMS ((const char *, const char *, void *))
55 CMD_DECLARE (cmd_boolean);
56 CMD_DECLARE (cmd_bytes);
57 CMD_DECLARE (cmd_directory_vector);
58 CMD_DECLARE (cmd_lockable_boolean);
59 CMD_DECLARE (cmd_number);
60 CMD_DECLARE (cmd_number_inf);
61 CMD_DECLARE (cmd_string);
62 CMD_DECLARE (cmd_time);
63 CMD_DECLARE (cmd_vector);
65 CMD_DECLARE (cmd_spec_dirstruct);
66 CMD_DECLARE (cmd_spec_dotstyle);
67 CMD_DECLARE (cmd_spec_header);
68 CMD_DECLARE (cmd_spec_htmlify);
69 CMD_DECLARE (cmd_spec_mirror);
70 CMD_DECLARE (cmd_spec_outputdocument);
71 CMD_DECLARE (cmd_spec_recursive);
72 CMD_DECLARE (cmd_spec_useragent);
74 /* List of recognized commands, each consisting of name, closure and function.
75 When adding a new command, simply add it to the list, but be sure to keep the
76 list sorted alphabetically, as comind() depends on it. Also, be sure to add
77 any entries that allocate memory (e.g. cmd_string and cmd_vector guys) to the
78 cleanup() function below. */
82 int (*action) PARAMS ((const char *, const char *, void *));
84 { "accept", &opt.accepts, cmd_vector },
85 { "addhostdir", &opt.add_hostdir, cmd_boolean },
86 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
87 { "background", &opt.background, cmd_boolean },
88 { "backupconverted", &opt.backup_converted, cmd_boolean },
89 { "backups", &opt.backups, cmd_number },
90 { "base", &opt.base_href, cmd_string },
91 { "cache", &opt.proxy_cache, cmd_boolean },
92 { "continue", &opt.always_rest, cmd_boolean },
93 { "convertlinks", &opt.convert_links, cmd_boolean },
94 { "cutdirs", &opt.cut_dirs, cmd_number },
96 { "debug", &opt.debug, cmd_boolean },
98 { "deleteafter", &opt.delete_after, cmd_boolean },
99 { "dirprefix", &opt.dir_prefix, cmd_string },
100 { "dirstruct", NULL, cmd_spec_dirstruct },
101 { "domains", &opt.domains, cmd_vector },
102 { "dotbytes", &opt.dot_bytes, cmd_bytes },
103 { "dotsinline", &opt.dots_in_line, cmd_number },
104 { "dotspacing", &opt.dot_spacing, cmd_number },
105 { "dotstyle", NULL, cmd_spec_dotstyle },
106 { "excludedirectories", &opt.excludes, cmd_directory_vector },
107 { "excludedomains", &opt.exclude_domains, cmd_vector },
108 { "followftp", &opt.follow_ftp, cmd_boolean },
109 { "followtags", &opt.follow_tags, cmd_vector },
110 { "forcehtml", &opt.force_html, cmd_boolean },
111 { "ftpproxy", &opt.ftp_proxy, cmd_string },
112 { "glob", &opt.ftp_glob, cmd_boolean },
113 { "header", NULL, cmd_spec_header },
114 { "htmlextension", &opt.html_extension, cmd_boolean },
115 { "htmlify", NULL, cmd_spec_htmlify },
116 { "httppasswd", &opt.http_passwd, cmd_string },
117 { "httpproxy", &opt.http_proxy, cmd_string },
118 { "httpuser", &opt.http_user, cmd_string },
119 { "ignorelength", &opt.ignore_length, cmd_boolean },
120 { "ignoretags", &opt.ignore_tags, cmd_vector },
121 { "includedirectories", &opt.includes, cmd_directory_vector },
122 { "input", &opt.input_filename, cmd_string },
123 { "killlonger", &opt.kill_longer, cmd_boolean },
124 { "logfile", &opt.lfilename, cmd_string },
125 { "login", &opt.ftp_acc, cmd_string },
126 { "mirror", NULL, cmd_spec_mirror },
127 { "netrc", &opt.netrc, cmd_boolean },
128 { "noclobber", &opt.noclobber, cmd_boolean },
129 { "noparent", &opt.no_parent, cmd_boolean },
130 { "noproxy", &opt.no_proxy, cmd_vector },
131 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
132 { "outputdocument", NULL, cmd_spec_outputdocument },
133 { "pagerequisites", &opt.page_requisites, cmd_boolean },
134 { "passiveftp", &opt.ftp_pasv, cmd_lockable_boolean },
135 { "passwd", &opt.ftp_pass, cmd_string },
136 { "proxypasswd", &opt.proxy_passwd, cmd_string },
137 { "proxyuser", &opt.proxy_user, cmd_string },
138 { "quiet", &opt.quiet, cmd_boolean },
139 { "quota", &opt.quota, cmd_bytes },
140 { "reclevel", &opt.reclevel, cmd_number_inf },
141 { "recursive", NULL, cmd_spec_recursive },
142 { "referer", &opt.referer, cmd_string },
143 { "reject", &opt.rejects, cmd_vector },
144 { "relativeonly", &opt.relative_only, cmd_boolean },
145 { "removelisting", &opt.remove_listing, cmd_boolean },
146 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
147 { "robots", &opt.use_robots, cmd_boolean },
148 { "saveheaders", &opt.save_headers, cmd_boolean },
149 { "serverresponse", &opt.server_response, cmd_boolean },
150 { "simplehostcheck", &opt.simple_check, cmd_boolean },
151 { "spanhosts", &opt.spanhost, cmd_boolean },
152 { "spider", &opt.spider, cmd_boolean },
153 { "timeout", &opt.timeout, cmd_time },
154 { "timestamping", &opt.timestamping, cmd_boolean },
155 { "tries", &opt.ntry, cmd_number_inf },
156 { "useproxy", &opt.use_proxy, cmd_boolean },
157 { "useragent", NULL, cmd_spec_useragent },
158 { "verbose", &opt.verbose, cmd_boolean },
159 { "wait", &opt.wait, cmd_time },
160 { "waitretry", &opt.waitretry, cmd_time }
163 /* Return index of COM if it is a valid command, or -1 otherwise. COM
164 is looked up in `commands' using binary search algorithm. */
166 comind (const char *com)
168 int min = 0, max = ARRAY_SIZE (commands);
172 int i = (min + max) / 2;
173 int cmp = strcasecmp (com, commands[i].name);
185 /* Reset the variables to default values. */
191 /* Most of the default values are 0. Just reset everything, and
192 fill in the non-zero values. Note that initializing pointers to
193 NULL this way is technically illegal, but porting Wget to a
194 machine where NULL is not all-zero bit pattern will be the least
195 of the implementors' worries. */
196 memset (&opt, 0, sizeof (opt));
199 opt.dir_prefix = xstrdup (".");
203 opt.ftp_acc = xstrdup ("anonymous");
204 /*opt.ftp_pass = xstrdup (ftp_getaddress ());*/
209 tmp = getenv ("no_proxy");
211 opt.no_proxy = sepstring (tmp);
219 opt.remove_listing = 1;
221 opt.dot_bytes = 1024;
222 opt.dot_spacing = 10;
223 opt.dots_in_line = 50;
226 /* Return the user's home directory (strdup-ed), or NULL if none is
231 char *home = getenv ("HOME");
236 /* If HOME is not defined, try getting it from the password
238 struct passwd *pwd = getpwuid (getuid ());
239 if (!pwd || !pwd->pw_dir)
244 /* #### Maybe I should grab home_dir from registry, but the best
245 that I could get from there is user's Start menu. It sucks! */
249 return home ? xstrdup (home) : NULL;
252 /* Return the path to the user's .wgetrc. This is either the value of
253 `WGETRC' environment variable, or `$HOME/.wgetrc'.
255 If the `WGETRC' variable exists but the file does not exist, the
256 function will exit(). */
258 wgetrc_file_name (void)
263 /* Try the environment. */
264 env = getenv ("WGETRC");
267 if (!file_exists_p (env))
269 fprintf (stderr, "%s: %s: %s.\n", exec_name, file, strerror (errno));
272 return xstrdup (env);
276 /* If that failed, try $HOME/.wgetrc. */
280 file = (char *)xmalloc (strlen (home) + 1 + strlen (".wgetrc") + 1);
281 sprintf (file, "%s/.wgetrc", home);
285 /* Under Windows, "home" is (for the purposes of this function) the
286 directory where `wget.exe' resides, and `wget.ini' will be used
287 as file name. SYSTEM_WGETRC should not be defined under WINDOWS.
289 It is not as trivial as I assumed, because on 95 argv[0] is full
290 path, but on NT you get what you typed in command line. --dbudor */
294 file = (char *)xmalloc (strlen (home) + strlen ("wget.ini") + 1);
295 sprintf (file, "%swget.ini", home);
301 if (!file_exists_p (file))
309 /* Initialize variables from a wgetrc file */
311 run_wgetrc (const char *file)
317 fp = fopen (file, "rb");
320 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
321 file, strerror (errno));
324 /* Reset line number. */
326 while ((line = read_whole_line (fp)))
330 int length = strlen (line);
332 if (length && line[length - 1] == '\r')
333 line[length - 1] = '\0';
334 /* Parse the line. */
335 status = parse_line (line, &com, &val);
337 /* If everything is OK, set the value. */
340 if (!setval (com, val))
341 fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name,
346 else if (status == 0)
347 fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name,
354 /* Initialize the defaults and run the system wgetrc and user's own
361 /* Load the hard-coded defaults. */
364 /* If SYSTEM_WGETRC is defined, use it. */
366 if (file_exists_p (SYSTEM_WGETRC))
367 run_wgetrc (SYSTEM_WGETRC);
369 /* Override it with your own, if one exists. */
370 file = wgetrc_file_name ();
373 /* #### We should somehow canonicalize `file' and SYSTEM_WGETRC,
376 if (!strcmp (file, SYSTEM_WGETRC))
378 fprintf (stderr, _("\
379 %s: Warning: Both system and user wgetrc point to `%s'.\n"),
389 /* Parse the line pointed by line, with the syntax:
390 <sp>* command <sp>* = <sp>* value <newline>
391 Uses malloc to allocate space for command and value.
392 If the line is invalid, data is freed and 0 is returned.
399 parse_line (const char *line, char **com, char **val)
401 const char *p = line;
402 const char *orig_comptr, *end;
406 while (*p == ' ' || *p == '\t')
409 /* Don't process empty lines. */
410 if (!*p || *p == '\n' || *p == '#')
413 for (orig_comptr = p; ISALPHA (*p) || *p == '_' || *p == '-'; p++)
415 /* The next char should be space or '='. */
416 if (!ISSPACE (*p) && (*p != '='))
418 *com = (char *)xmalloc (p - orig_comptr + 1);
419 for (new_comptr = *com; orig_comptr < p; orig_comptr++)
421 if (*orig_comptr == '_' || *orig_comptr == '-')
423 *new_comptr++ = *orig_comptr;
426 /* If the command is invalid, exit now. */
427 if (comind (*com) == -1)
433 /* Skip spaces before '='. */
434 for (; ISSPACE (*p); p++);
435 /* If '=' not found, bail out. */
441 /* Skip spaces after '='. */
442 for (++p; ISSPACE (*p); p++);
443 /* Get the ending position. */
444 for (end = p; *end && *end != '\n'; end++);
445 /* Allocate *val, and copy from line. */
446 *val = strdupdelim (p, end);
450 /* Set COM to VAL. This is the meat behind processing `.wgetrc'. No
451 fatals -- error signal prints a warning and resets to default
452 value. All error messages are printed to stderr, *not* to
453 opt.lfile, since opt.lfile wasn't even generated yet. */
455 setval (const char *com, const char *val)
464 /* #### Should I just abort()? */
466 fprintf (stderr, _("%s: BUG: unknown command `%s', value `%s'.\n"),
467 exec_name, com, val);
471 return ((*commands[ind].action) (com, val, commands[ind].closure));
474 /* Generic helper functions, for use with `commands'. */
476 static int myatoi PARAMS ((const char *s));
478 /* Store the boolean value from VAL to CLOSURE. COM is ignored,
479 except for error messages. */
481 cmd_boolean (const char *com, const char *val, void *closure)
485 if (!strcasecmp (val, "on")
486 || (*val == '1' && !*(val + 1)))
488 else if (!strcasecmp (val, "off")
489 || (*val == '0' && !*(val + 1)))
493 fprintf (stderr, _("%s: %s: Please specify on or off.\n"),
498 *(int *)closure = bool_value;
502 /* Store the lockable_boolean {2, 1, 0, -1} value from VAL to CLOSURE. COM is
503 ignored, except for error messages. Values 2 and -1 indicate that once
504 defined, the value may not be changed by successive wgetrc files or
505 command-line arguments.
507 Values: 2 - Enable a particular option for good ("always")
508 1 - Enable an option ("on")
509 0 - Disable an option ("off")
510 -1 - Disable an option for good ("never") */
512 cmd_lockable_boolean (const char *com, const char *val, void *closure)
514 int lockable_boolean_value;
517 * If a config file said "always" or "never", don't allow command line
518 * arguments to override the config file.
520 if (*(int *)closure == -1 || *(int *)closure == 2)
523 if (!strcasecmp (val, "always")
524 || (*val == '2' && !*(val + 1)))
525 lockable_boolean_value = 2;
526 else if (!strcasecmp (val, "on")
527 || (*val == '1' && !*(val + 1)))
528 lockable_boolean_value = 1;
529 else if (!strcasecmp (val, "off")
530 || (*val == '0' && !*(val + 1)))
531 lockable_boolean_value = 0;
532 else if (!strcasecmp (val, "never")
533 || (*val == '-' && *(val + 1) == '1' && !*(val + 2)))
534 lockable_boolean_value = -1;
537 fprintf (stderr, _("%s: %s: Please specify always, on, off, "
543 *(int *)closure = lockable_boolean_value;
547 /* Set the non-negative integer value from VAL to CLOSURE. With
548 incorrect specification, the number remains unchanged. */
550 cmd_number (const char *com, const char *val, void *closure)
552 int num = myatoi (val);
556 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
557 exec_name, com, val);
560 *(int *)closure = num;
564 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
566 cmd_number_inf (const char *com, const char *val, void *closure)
568 if (!strcasecmp (val, "inf"))
573 return cmd_number (com, val, closure);
576 /* Copy (strdup) the string at COM to a new location and place a
577 pointer to *CLOSURE. */
579 cmd_string (const char *com, const char *val, void *closure)
581 char **pstring = (char **)closure;
583 FREE_MAYBE (*pstring);
584 *pstring = xstrdup (val);
588 /* Merge the vector (array of strings separated with `,') in COM with
589 the vector (NULL-terminated array of strings) pointed to by
592 cmd_vector (const char *com, const char *val, void *closure)
594 char ***pvec = (char ***)closure;
597 *pvec = merge_vecs (*pvec, sepstring (val));
607 cmd_directory_vector (const char *com, const char *val, void *closure)
609 char ***pvec = (char ***)closure;
613 /* Strip the trailing slashes from directories. */
616 seps = sepstring (val);
617 for (t = seps; t && *t; t++)
619 int len = strlen (*t);
620 /* Skip degenerate case of root directory. */
623 if ((*t)[len - 1] == '/')
624 (*t)[len - 1] = '\0';
627 *pvec = merge_vecs (*pvec, seps);
637 /* Set the value stored in VAL to CLOSURE (which should point to a
638 long int), allowing several postfixes, with the following syntax
642 [0-9]+[kK] -> bytes * 1024
643 [0-9]+[mM] -> bytes * 1024 * 1024
646 Anything else is flagged as incorrect, and CLOSURE is unchanged. */
648 cmd_bytes (const char *com, const char *val, void *closure)
651 long *out = (long *)closure;
656 /* Check for "inf". */
657 if (p[0] == 'i' && p[1] == 'n' && p[2] == 'f' && p[3] == '\0')
662 /* Search for digits and construct result. */
663 for (; *p && ISDIGIT (*p); p++)
664 result = (10 * result) + (*p - '0');
665 /* If no digits were found, or more than one character is following
667 if (p == val || (*p != '\0' && *(p + 1) != '\0'))
669 printf (_("%s: Invalid specification `%s'\n"), com, val);
672 /* Search for a designator. */
673 switch (TOLOWER (*p))
684 result *= (long)1024 * 1024;
688 result *= (long)1024 * 1024 * 1024;
691 printf (_("%s: Invalid specification `%s'\n"), com, val);
698 /* Store the value of VAL to *OUT, allowing suffixes for minutes and
701 cmd_time (const char *com, const char *val, void *closure)
706 /* Search for digits and construct result. */
707 for (; *p && ISDIGIT (*p); p++)
708 result = (10 * result) + (*p - '0');
709 /* If no digits were found, or more than one character is following
711 if (p == val || (*p != '\0' && *(p + 1) != '\0'))
713 printf (_("%s: Invalid specification `%s'\n"), com, val);
716 /* Search for a suffix. */
717 switch (TOLOWER (*p))
731 /* Days (overflow on 16bit machines) */
739 printf (_("%s: Invalid specification `%s'\n"), com, val);
742 *(long *)closure = result;
746 /* Specialized helper functions, used by `commands' to handle some
747 options specially. */
749 static int check_user_specified_header PARAMS ((const char *));
752 cmd_spec_dirstruct (const char *com, const char *val, void *closure)
754 if (!cmd_boolean (com, val, &opt.dirstruct))
756 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
757 must be affected inversely. */
759 opt.no_dirstruct = 0;
761 opt.no_dirstruct = 1;
766 cmd_spec_dotstyle (const char *com, const char *val, void *closure)
768 /* Retrieval styles. */
769 if (!strcasecmp (val, "default"))
771 /* Default style: 1K dots, 10 dots in a cluster, 50 dots in a
773 opt.dot_bytes = 1024;
774 opt.dot_spacing = 10;
775 opt.dots_in_line = 50;
777 else if (!strcasecmp (val, "binary"))
779 /* "Binary" retrieval: 8K dots, 16 dots in a cluster, 48 dots
781 opt.dot_bytes = 8192;
782 opt.dot_spacing = 16;
783 opt.dots_in_line = 48;
785 else if (!strcasecmp (val, "mega"))
787 /* "Mega" retrieval, for retrieving very long files; each dot is
788 64K, 8 dots in a cluster, 6 clusters (3M) in a line. */
789 opt.dot_bytes = 65536L;
791 opt.dots_in_line = 48;
793 else if (!strcasecmp (val, "giga"))
795 /* "Giga" retrieval, for retrieving very very *very* long files;
796 each dot is 1M, 8 dots in a cluster, 4 clusters (32M) in a
798 opt.dot_bytes = (1L << 20);
800 opt.dots_in_line = 32;
802 else if (!strcasecmp (val, "micro"))
804 /* "Micro" retrieval, for retrieving very small files (and/or
805 slow connections); each dot is 128 bytes, 8 dots in a
806 cluster, 6 clusters (6K) in a line. */
809 opt.dots_in_line = 48;
813 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
814 exec_name, com, val);
821 cmd_spec_header (const char *com, const char *val, void *closure)
825 /* Empty header means reset headers. */
826 FREE_MAYBE (opt.user_header);
827 opt.user_header = NULL;
833 if (!check_user_specified_header (val))
835 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
836 exec_name, com, val);
839 i = opt.user_header ? strlen (opt.user_header) : 0;
840 opt.user_header = (char *)xrealloc (opt.user_header, i + strlen (val)
842 strcpy (opt.user_header + i, val);
844 opt.user_header[i++] = '\r';
845 opt.user_header[i++] = '\n';
846 opt.user_header[i] = '\0';
852 cmd_spec_htmlify (const char *com, const char *val, void *closure)
854 int flag = cmd_boolean (com, val, &opt.htmlify);
855 if (flag && !opt.htmlify)
856 opt.remove_listing = 0;
861 cmd_spec_mirror (const char *com, const char *val, void *closure)
865 if (!cmd_boolean (com, val, &mirror))
870 if (!opt.no_dirstruct)
872 opt.timestamping = 1;
873 opt.reclevel = INFINITE_RECURSION;
874 opt.remove_listing = 0;
880 cmd_spec_outputdocument (const char *com, const char *val, void *closure)
882 FREE_MAYBE (opt.output_document);
883 opt.output_document = xstrdup (val);
889 cmd_spec_recursive (const char *com, const char *val, void *closure)
891 if (!cmd_boolean (com, val, &opt.recursive))
895 if (opt.recursive && !opt.no_dirstruct)
902 cmd_spec_useragent (const char *com, const char *val, void *closure)
904 /* Just check for empty string and newline, so we don't throw total
905 junk to the server. */
906 if (!*val || strchr (val, '\n'))
908 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
909 exec_name, com, val);
912 opt.useragent = xstrdup (val);
916 /* Miscellaneous useful routines. */
918 /* Return the integer value of a positive integer written in S, or -1
919 if an error was encountered. */
921 myatoi (const char *s)
924 const char *orig = s;
926 for (res = 0; *s && ISDIGIT (*s); s++)
927 res = 10 * res + (*s - '0');
934 #define ISODIGIT(x) ((x) >= '0' && (x) <= '7')
937 check_user_specified_header (const char *s)
941 for (p = s; *p && *p != ':' && !ISSPACE (*p); p++);
942 /* The header MUST contain `:' preceded by at least one
943 non-whitespace character. */
944 if (*p != ':' || p == s)
946 /* The header MUST NOT contain newlines. */
947 if (strchr (s, '\n'))
952 /* Free the memory allocated by global variables. */
956 extern acc_t *netrc_list;
958 recursive_cleanup ();
960 free_netrc (netrc_list);
963 FREE_MAYBE (opt.lfilename);
964 free (opt.dir_prefix);
965 FREE_MAYBE (opt.input_filename);
966 FREE_MAYBE (opt.output_document);
967 free_vec (opt.accepts);
968 free_vec (opt.rejects);
969 free_vec (opt.excludes);
970 free_vec (opt.includes);
971 free_vec (opt.domains);
972 free_vec (opt.follow_tags);
973 free_vec (opt.ignore_tags);
976 FREE_MAYBE (opt.ftp_proxy);
977 FREE_MAYBE (opt.http_proxy);
978 free_vec (opt.no_proxy);
979 FREE_MAYBE (opt.useragent);
980 FREE_MAYBE (opt.referer);
981 FREE_MAYBE (opt.http_user);
982 FREE_MAYBE (opt.http_passwd);
983 FREE_MAYBE (opt.user_header);