1 /* Reading/parsing the initialization file.
2 Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
52 #define CMD_DECLARE(func) static int func \
53 PARAMS ((const char *, const char *, void *))
55 CMD_DECLARE (cmd_boolean);
56 CMD_DECLARE (cmd_boolean);
57 CMD_DECLARE (cmd_number);
58 CMD_DECLARE (cmd_number_inf);
59 CMD_DECLARE (cmd_string);
60 CMD_DECLARE (cmd_vector);
61 CMD_DECLARE (cmd_directory_vector);
62 CMD_DECLARE (cmd_bytes);
63 CMD_DECLARE (cmd_time);
65 CMD_DECLARE (cmd_spec_dirstruct);
66 CMD_DECLARE (cmd_spec_dotstyle);
67 CMD_DECLARE (cmd_spec_header);
68 CMD_DECLARE (cmd_spec_htmlify);
69 CMD_DECLARE (cmd_spec_mirror);
70 CMD_DECLARE (cmd_spec_outputdocument);
71 CMD_DECLARE (cmd_spec_recursive);
72 CMD_DECLARE (cmd_spec_useragent);
74 /* List of recognized commands, each consisting of name, closure and
75 function. When adding a new command, simply add it to the list,
76 but be sure to keep the list sorted alphabetically, as comind()
81 int (*action) PARAMS ((const char *, const char *, void *));
83 { "accept", &opt.accepts, cmd_vector },
84 { "addhostdir", &opt.add_hostdir, cmd_boolean },
85 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
86 { "background", &opt.background, cmd_boolean },
87 { "backupconverted", &opt.backup_converted, cmd_boolean },
88 { "backups", &opt.backups, cmd_number },
89 { "base", &opt.base_href, cmd_string },
90 { "cache", &opt.proxy_cache, cmd_boolean },
91 { "continue", &opt.always_rest, cmd_boolean },
92 { "convertlinks", &opt.convert_links, cmd_boolean },
93 { "cutdirs", &opt.cut_dirs, cmd_number },
95 { "debug", &opt.debug, cmd_boolean },
97 { "deleteafter", &opt.delete_after, cmd_boolean },
98 { "dirprefix", &opt.dir_prefix, cmd_string },
99 { "dirstruct", NULL, cmd_spec_dirstruct },
100 { "domains", &opt.domains, cmd_vector },
101 { "dotbytes", &opt.dot_bytes, cmd_bytes },
102 { "dotsinline", &opt.dots_in_line, cmd_number },
103 { "dotspacing", &opt.dot_spacing, cmd_number },
104 { "dotstyle", NULL, cmd_spec_dotstyle },
105 { "excludedirectories", &opt.excludes, cmd_directory_vector },
106 { "excludedomains", &opt.exclude_domains, cmd_vector },
107 { "followftp", &opt.follow_ftp, cmd_boolean },
108 { "forcehtml", &opt.force_html, cmd_boolean },
109 { "ftpproxy", &opt.ftp_proxy, cmd_string },
110 { "glob", &opt.ftp_glob, cmd_boolean },
111 { "header", NULL, cmd_spec_header },
112 { "htmlify", NULL, cmd_spec_htmlify },
113 { "httppasswd", &opt.http_passwd, cmd_string },
114 { "httpproxy", &opt.http_proxy, cmd_string },
115 { "httpuser", &opt.http_user, cmd_string },
116 { "ignorelength", &opt.ignore_length, cmd_boolean },
117 { "includedirectories", &opt.includes, cmd_directory_vector },
118 { "input", &opt.input_filename, cmd_string },
119 { "killlonger", &opt.kill_longer, cmd_boolean },
120 { "logfile", &opt.lfilename, cmd_string },
121 { "login", &opt.ftp_acc, cmd_string },
122 { "mirror", NULL, cmd_spec_mirror },
123 { "netrc", &opt.netrc, cmd_boolean },
124 { "noclobber", &opt.noclobber, cmd_boolean },
125 { "noparent", &opt.no_parent, cmd_boolean },
126 { "noproxy", &opt.no_proxy, cmd_vector },
127 { "numtries", &opt.ntry, cmd_number_inf }, /* deprecated */
128 { "outputdocument", NULL, cmd_spec_outputdocument },
129 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
130 { "passwd", &opt.ftp_pass, cmd_string },
131 { "proxypasswd", &opt.proxy_passwd, cmd_string },
132 { "proxyuser", &opt.proxy_user, cmd_string },
133 { "quiet", &opt.quiet, cmd_boolean },
134 { "quota", &opt.quota, cmd_bytes },
135 { "reclevel", &opt.reclevel, cmd_number_inf },
136 { "recursive", NULL, cmd_spec_recursive },
137 { "referer", &opt.referer, cmd_string },
138 { "reject", &opt.rejects, cmd_vector },
139 { "relativeonly", &opt.relative_only, cmd_boolean },
140 { "removelisting", &opt.remove_listing, cmd_boolean },
141 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
142 { "robots", &opt.use_robots, cmd_boolean },
143 { "saveheaders", &opt.save_headers, cmd_boolean },
144 { "serverresponse", &opt.server_response, cmd_boolean },
145 { "simplehostcheck", &opt.simple_check, cmd_boolean },
146 { "spanhosts", &opt.spanhost, cmd_boolean },
147 { "spider", &opt.spider, cmd_boolean },
148 { "timeout", &opt.timeout, cmd_time },
149 { "timestamping", &opt.timestamping, cmd_boolean },
150 { "tries", &opt.ntry, cmd_number_inf },
151 { "useproxy", &opt.use_proxy, cmd_boolean },
152 { "useragent", NULL, cmd_spec_useragent },
153 { "verbose", &opt.verbose, cmd_boolean },
154 { "wait", &opt.wait, cmd_time },
155 { "waitretry", &opt.waitretry, cmd_time }
158 /* Return index of COM if it is a valid command, or -1 otherwise. COM
159 is looked up in `commands' using binary search algorithm. */
161 comind (const char *com)
163 int min = 0, max = ARRAY_SIZE (commands);
167 int i = (min + max) / 2;
168 int cmp = strcasecmp (com, commands[i].name);
180 /* Reset the variables to default values. */
186 /* Most of the default values are 0. Just reset everything, and
187 fill in the non-zero values. Note that initializing pointers to
188 NULL this way is technically illegal, but porting Wget to a
189 machine where NULL is not all-zero bit pattern will be the least
190 of the implementors' worries. */
191 memset (&opt, 0, sizeof (opt));
194 opt.dir_prefix = xstrdup (".");
198 opt.ftp_acc = xstrdup ("anonymous");
199 /*opt.ftp_pass = xstrdup (ftp_getaddress ());*/
204 tmp = getenv ("no_proxy");
206 opt.no_proxy = sepstring (tmp);
214 opt.remove_listing = 1;
216 opt.dot_bytes = 1024;
217 opt.dot_spacing = 10;
218 opt.dots_in_line = 50;
221 /* Return the user's home directory (strdup-ed), or NULL if none is
226 char *home = getenv ("HOME");
231 /* If HOME is not defined, try getting it from the password
233 struct passwd *pwd = getpwuid (getuid ());
234 if (!pwd || !pwd->pw_dir)
239 /* #### Maybe I should grab home_dir from registry, but the best
240 that I could get from there is user's Start menu. It sucks! */
244 return home ? xstrdup (home) : NULL;
247 /* Return the path to the user's .wgetrc. This is either the value of
248 `WGETRC' environment variable, or `$HOME/.wgetrc'.
250 If the `WGETRC' variable exists but the file does not exist, the
251 function will exit(). */
253 wgetrc_file_name (void)
258 /* Try the environment. */
259 env = getenv ("WGETRC");
262 if (!file_exists_p (env))
264 fprintf (stderr, "%s: %s: %s.\n", exec_name, file, strerror (errno));
267 return xstrdup (env);
271 /* If that failed, try $HOME/.wgetrc. */
275 file = (char *)xmalloc (strlen (home) + 1 + strlen (".wgetrc") + 1);
276 sprintf (file, "%s/.wgetrc", home);
279 /* Under Windows, "home" is (for the purposes of this function) the
280 directory where `wget.exe' resides, and `wget.ini' will be used
281 as file name. SYSTEM_WGETRC should not be defined under WINDOWS.
283 It is not as trivial as I assumed, because on 95 argv[0] is full
284 path, but on NT you get what you typed in command line. --dbudor */
288 file = (char *)xmalloc (strlen (home) + strlen ("wget.ini") + 1);
289 sprintf (file, "%swget.ini", home);
296 if (!file_exists_p (file))
304 /* Initialize variables from a wgetrc file */
306 run_wgetrc (const char *file)
312 fp = fopen (file, "rb");
315 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
316 file, strerror (errno));
319 /* Reset line number. */
321 while ((line = read_whole_line (fp)))
325 int length = strlen (line);
327 if (length && line[length - 1] == '\r')
328 line[length - 1] = '\0';
329 /* Parse the line. */
330 status = parse_line (line, &com, &val);
332 /* If everything is OK, set the value. */
335 if (!setval (com, val))
336 fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name,
341 else if (status == 0)
342 fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name,
349 /* Initialize the defaults and run the system wgetrc and user's own
356 /* Load the hard-coded defaults. */
359 /* If SYSTEM_WGETRC is defined, use it. */
361 if (file_exists_p (SYSTEM_WGETRC))
362 run_wgetrc (SYSTEM_WGETRC);
364 /* Override it with your own, if one exists. */
365 file = wgetrc_file_name ();
368 /* #### We should somehow canonicalize `file' and SYSTEM_WGETRC,
371 if (!strcmp (file, SYSTEM_WGETRC))
373 fprintf (stderr, _("\
374 %s: Warning: Both system and user wgetrc point to `%s'.\n"),
384 /* Parse the line pointed by line, with the syntax:
385 <sp>* command <sp>* = <sp>* value <newline>
386 Uses malloc to allocate space for command and value.
387 If the line is invalid, data is freed and 0 is returned.
394 parse_line (const char *line, char **com, char **val)
396 const char *p = line;
397 const char *orig_comptr, *end;
401 while (*p == ' ' || *p == '\t')
404 /* Don't process empty lines. */
405 if (!*p || *p == '\n' || *p == '#')
408 for (orig_comptr = p; ISALPHA (*p) || *p == '_' || *p == '-'; p++)
410 /* The next char should be space or '='. */
411 if (!ISSPACE (*p) && (*p != '='))
413 *com = (char *)xmalloc (p - orig_comptr + 1);
414 for (new_comptr = *com; orig_comptr < p; orig_comptr++)
416 if (*orig_comptr == '_' || *orig_comptr == '-')
418 *new_comptr++ = *orig_comptr;
421 /* If the command is invalid, exit now. */
422 if (comind (*com) == -1)
428 /* Skip spaces before '='. */
429 for (; ISSPACE (*p); p++);
430 /* If '=' not found, bail out. */
436 /* Skip spaces after '='. */
437 for (++p; ISSPACE (*p); p++);
438 /* Get the ending position. */
439 for (end = p; *end && *end != '\n'; end++);
440 /* Allocate *val, and copy from line. */
441 *val = strdupdelim (p, end);
445 /* Set COM to VAL. This is the meat behind processing `.wgetrc'. No
446 fatals -- error signal prints a warning and resets to default
447 value. All error messages are printed to stderr, *not* to
448 opt.lfile, since opt.lfile wasn't even generated yet. */
450 setval (const char *com, const char *val)
459 /* #### Should I just abort()? */
461 fprintf (stderr, _("%s: BUG: unknown command `%s', value `%s'.\n"),
462 exec_name, com, val);
466 return ((*commands[ind].action) (com, val, commands[ind].closure));
469 /* Generic helper functions, for use with `commands'. */
471 static int myatoi PARAMS ((const char *s));
473 /* Store the boolean value from VAL to CLOSURE. COM is ignored,
474 except for error messages. */
476 cmd_boolean (const char *com, const char *val, void *closure)
480 if (!strcasecmp (val, "on")
481 || (*val == '1' && !*(val + 1)))
483 else if (!strcasecmp (val, "off")
484 || (*val == '0' && !*(val + 1)))
488 fprintf (stderr, _("%s: %s: Please specify on or off.\n"),
493 *(int *)closure = bool_value;
497 /* Set the non-negative integer value from VAL to CLOSURE. With
498 incorrect specification, the number remains unchanged. */
500 cmd_number (const char *com, const char *val, void *closure)
502 int num = myatoi (val);
506 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
507 exec_name, com, val);
510 *(int *)closure = num;
514 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
516 cmd_number_inf (const char *com, const char *val, void *closure)
518 if (!strcasecmp (val, "inf"))
523 return cmd_number (com, val, closure);
526 /* Copy (strdup) the string at COM to a new location and place a
527 pointer to *CLOSURE. */
529 cmd_string (const char *com, const char *val, void *closure)
531 char **pstring = (char **)closure;
533 FREE_MAYBE (*pstring);
534 *pstring = xstrdup (val);
538 /* Merge the vector (array of strings separated with `,') in COM with
539 the vector (NULL-terminated array of strings) pointed to by
542 cmd_vector (const char *com, const char *val, void *closure)
544 char ***pvec = (char ***)closure;
547 *pvec = merge_vecs (*pvec, sepstring (val));
557 cmd_directory_vector (const char *com, const char *val, void *closure)
559 char ***pvec = (char ***)closure;
563 /* Strip the trailing slashes from directories. */
566 seps = sepstring (val);
567 for (t = seps; t && *t; t++)
569 int len = strlen (*t);
570 /* Skip degenerate case of root directory. */
573 if ((*t)[len - 1] == '/')
574 (*t)[len - 1] = '\0';
577 *pvec = merge_vecs (*pvec, seps);
587 /* Set the value stored in VAL to CLOSURE (which should point to a
588 long int), allowing several postfixes, with the following syntax
592 [0-9]+[kK] -> bytes * 1024
593 [0-9]+[mM] -> bytes * 1024 * 1024
596 Anything else is flagged as incorrect, and CLOSURE is unchanged. */
598 cmd_bytes (const char *com, const char *val, void *closure)
601 long *out = (long *)closure;
606 /* Check for "inf". */
607 if (p[0] == 'i' && p[1] == 'n' && p[2] == 'f' && p[3] == '\0')
612 /* Search for digits and construct result. */
613 for (; *p && ISDIGIT (*p); p++)
614 result = (10 * result) + (*p - '0');
615 /* If no digits were found, or more than one character is following
617 if (p == val || (*p != '\0' && *(p + 1) != '\0'))
619 printf (_("%s: Invalid specification `%s'\n"), com, val);
622 /* Search for a designator. */
623 switch (tolower (*p))
634 result *= (long)1024 * 1024;
638 result *= (long)1024 * 1024 * 1024;
641 printf (_("%s: Invalid specification `%s'\n"), com, val);
648 /* Store the value of VAL to *OUT, allowing suffixes for minutes and
651 cmd_time (const char *com, const char *val, void *closure)
656 /* Search for digits and construct result. */
657 for (; *p && ISDIGIT (*p); p++)
658 result = (10 * result) + (*p - '0');
659 /* If no digits were found, or more than one character is following
661 if (p == val || (*p != '\0' && *(p + 1) != '\0'))
663 printf (_("%s: Invalid specification `%s'\n"), com, val);
666 /* Search for a suffix. */
667 switch (tolower (*p))
681 /* Days (overflow on 16bit machines) */
689 printf (_("%s: Invalid specification `%s'\n"), com, val);
692 *(long *)closure = result;
696 /* Specialized helper functions, used by `commands' to handle some
697 options specially. */
699 static int check_user_specified_header PARAMS ((const char *));
702 cmd_spec_dirstruct (const char *com, const char *val, void *closure)
704 if (!cmd_boolean (com, val, &opt.dirstruct))
706 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
707 must be affected inversely. */
709 opt.no_dirstruct = 0;
711 opt.no_dirstruct = 1;
716 cmd_spec_dotstyle (const char *com, const char *val, void *closure)
718 /* Retrieval styles. */
719 if (!strcasecmp (val, "default"))
721 /* Default style: 1K dots, 10 dots in a cluster, 50 dots in a
723 opt.dot_bytes = 1024;
724 opt.dot_spacing = 10;
725 opt.dots_in_line = 50;
727 else if (!strcasecmp (val, "binary"))
729 /* "Binary" retrieval: 8K dots, 16 dots in a cluster, 48 dots
731 opt.dot_bytes = 8192;
732 opt.dot_spacing = 16;
733 opt.dots_in_line = 48;
735 else if (!strcasecmp (val, "mega"))
737 /* "Mega" retrieval, for retrieving very long files; each dot is
738 64K, 8 dots in a cluster, 6 clusters (3M) in a line. */
739 opt.dot_bytes = 65536L;
741 opt.dots_in_line = 48;
743 else if (!strcasecmp (val, "giga"))
745 /* "Giga" retrieval, for retrieving very very *very* long files;
746 each dot is 1M, 8 dots in a cluster, 4 clusters (32M) in a
748 opt.dot_bytes = (1L << 20);
750 opt.dots_in_line = 32;
752 else if (!strcasecmp (val, "micro"))
754 /* "Micro" retrieval, for retrieving very small files (and/or
755 slow connections); each dot is 128 bytes, 8 dots in a
756 cluster, 6 clusters (6K) in a line. */
759 opt.dots_in_line = 48;
763 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
764 exec_name, com, val);
771 cmd_spec_header (const char *com, const char *val, void *closure)
775 /* Empty header means reset headers. */
776 FREE_MAYBE (opt.user_header);
777 opt.user_header = NULL;
783 if (!check_user_specified_header (val))
785 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
786 exec_name, com, val);
789 i = opt.user_header ? strlen (opt.user_header) : 0;
790 opt.user_header = (char *)xrealloc (opt.user_header, i + strlen (val)
792 strcpy (opt.user_header + i, val);
794 opt.user_header[i++] = '\r';
795 opt.user_header[i++] = '\n';
796 opt.user_header[i] = '\0';
802 cmd_spec_htmlify (const char *com, const char *val, void *closure)
804 int flag = cmd_boolean (com, val, &opt.htmlify);
805 if (flag && !opt.htmlify)
806 opt.remove_listing = 0;
811 cmd_spec_mirror (const char *com, const char *val, void *closure)
815 if (!cmd_boolean (com, val, &mirror))
820 if (!opt.no_dirstruct)
822 opt.timestamping = 1;
824 opt.remove_listing = 0;
830 cmd_spec_outputdocument (const char *com, const char *val, void *closure)
832 FREE_MAYBE (opt.output_document);
833 opt.output_document = xstrdup (val);
839 cmd_spec_recursive (const char *com, const char *val, void *closure)
841 if (!cmd_boolean (com, val, &opt.recursive))
845 if (opt.recursive && !opt.no_dirstruct)
852 cmd_spec_useragent (const char *com, const char *val, void *closure)
854 /* Just check for empty string and newline, so we don't throw total
855 junk to the server. */
856 if (!*val || strchr (val, '\n'))
858 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
859 exec_name, com, val);
862 opt.useragent = xstrdup (val);
866 /* Miscellaneous useful routines. */
868 /* Return the integer value of a positive integer written in S, or -1
869 if an error was encountered. */
871 myatoi (const char *s)
874 const char *orig = s;
876 for (res = 0; *s && ISDIGIT (*s); s++)
877 res = 10 * res + (*s - '0');
884 #define ISODIGIT(x) ((x) >= '0' && (x) <= '7')
887 check_user_specified_header (const char *s)
891 for (p = s; *p && *p != ':' && !ISSPACE (*p); p++);
892 /* The header MUST contain `:' preceded by at least one
893 non-whitespace character. */
894 if (*p != ':' || p == s)
896 /* The header MUST NOT contain newlines. */
897 if (strchr (s, '\n'))
902 /* Free the memory allocated by global variables. */
906 extern acc_t *netrc_list;
908 recursive_cleanup ();
910 free_netrc (netrc_list);
913 FREE_MAYBE (opt.lfilename);
914 free (opt.dir_prefix);
915 FREE_MAYBE (opt.input_filename);
916 FREE_MAYBE (opt.output_document);
917 free_vec (opt.accepts);
918 free_vec (opt.rejects);
919 free_vec (opt.excludes);
920 free_vec (opt.includes);
921 free_vec (opt.domains);
924 FREE_MAYBE (opt.ftp_proxy);
925 FREE_MAYBE (opt.http_proxy);
926 free_vec (opt.no_proxy);
927 FREE_MAYBE (opt.useragent);
928 FREE_MAYBE (opt.referer);
929 FREE_MAYBE (opt.http_user);
930 FREE_MAYBE (opt.http_passwd);
931 FREE_MAYBE (opt.user_header);