1 /* Reading/parsing the initialization file.
2 Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
52 #define CMD_DECLARE(func) static int func \
53 PARAMS ((const char *, const char *, void *))
55 CMD_DECLARE (cmd_boolean);
56 CMD_DECLARE (cmd_boolean);
57 CMD_DECLARE (cmd_number);
58 CMD_DECLARE (cmd_number_inf);
59 CMD_DECLARE (cmd_string);
60 CMD_DECLARE (cmd_vector);
61 CMD_DECLARE (cmd_directory_vector);
62 CMD_DECLARE (cmd_bytes);
63 CMD_DECLARE (cmd_time);
65 CMD_DECLARE (cmd_spec_dirstruct);
66 CMD_DECLARE (cmd_spec_dotstyle);
67 CMD_DECLARE (cmd_spec_header);
68 CMD_DECLARE (cmd_spec_htmlify);
69 CMD_DECLARE (cmd_spec_mirror);
70 CMD_DECLARE (cmd_spec_outputdocument);
71 CMD_DECLARE (cmd_spec_recursive);
72 CMD_DECLARE (cmd_spec_useragent);
74 /* List of recognized commands, each consisting of name, closure and function.
75 When adding a new command, simply add it to the list, but be sure to keep the
76 list sorted alphabetically, as comind() depends on it. Also, be sure to add
77 any entries that allocate memory (e.g. cmd_string and cmd_vector guys) to the
78 cleanup() function below. */
82 int (*action) PARAMS ((const char *, const char *, void *));
84 { "accept", &opt.accepts, cmd_vector },
85 { "addhostdir", &opt.add_hostdir, cmd_boolean },
86 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
87 { "background", &opt.background, cmd_boolean },
88 { "backupconverted", &opt.backup_converted, cmd_boolean },
89 { "backups", &opt.backups, cmd_number },
90 { "base", &opt.base_href, cmd_string },
91 { "cache", &opt.proxy_cache, cmd_boolean },
92 { "continue", &opt.always_rest, cmd_boolean },
93 { "convertlinks", &opt.convert_links, cmd_boolean },
94 { "cutdirs", &opt.cut_dirs, cmd_number },
96 { "debug", &opt.debug, cmd_boolean },
98 { "deleteafter", &opt.delete_after, cmd_boolean },
99 { "dirprefix", &opt.dir_prefix, cmd_string },
100 { "dirstruct", NULL, cmd_spec_dirstruct },
101 { "domains", &opt.domains, cmd_vector },
102 { "dotbytes", &opt.dot_bytes, cmd_bytes },
103 { "dotsinline", &opt.dots_in_line, cmd_number },
104 { "dotspacing", &opt.dot_spacing, cmd_number },
105 { "dotstyle", NULL, cmd_spec_dotstyle },
106 { "excludedirectories", &opt.excludes, cmd_directory_vector },
107 { "excludedomains", &opt.exclude_domains, cmd_vector },
108 { "followftp", &opt.follow_ftp, cmd_boolean },
109 { "followtags", &opt.follow_tags, cmd_vector },
110 { "forcehtml", &opt.force_html, cmd_boolean },
111 { "ftpproxy", &opt.ftp_proxy, cmd_string },
112 { "glob", &opt.ftp_glob, cmd_boolean },
113 { "header", NULL, cmd_spec_header },
114 { "htmlextension", &opt.html_extension, cmd_boolean },
115 { "htmlify", NULL, cmd_spec_htmlify },
116 { "httppasswd", &opt.http_passwd, cmd_string },
117 { "httpproxy", &opt.http_proxy, cmd_string },
118 { "httpuser", &opt.http_user, cmd_string },
119 { "ignorelength", &opt.ignore_length, cmd_boolean },
120 { "ignoretags", &opt.ignore_tags, cmd_vector },
121 { "includedirectories", &opt.includes, cmd_directory_vector },
122 { "input", &opt.input_filename, cmd_string },
123 { "killlonger", &opt.kill_longer, cmd_boolean },
124 { "logfile", &opt.lfilename, cmd_string },
125 { "login", &opt.ftp_acc, cmd_string },
126 { "mirror", NULL, cmd_spec_mirror },
127 { "netrc", &opt.netrc, cmd_boolean },
128 { "noclobber", &opt.noclobber, cmd_boolean },
129 { "noparent", &opt.no_parent, cmd_boolean },
130 { "noproxy", &opt.no_proxy, cmd_vector },
131 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
132 { "outputdocument", NULL, cmd_spec_outputdocument },
133 { "pagerequisites", &opt.page_requisites, cmd_boolean },
134 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
135 { "passwd", &opt.ftp_pass, cmd_string },
136 { "proxypasswd", &opt.proxy_passwd, cmd_string },
137 { "proxyuser", &opt.proxy_user, cmd_string },
138 { "quiet", &opt.quiet, cmd_boolean },
139 { "quota", &opt.quota, cmd_bytes },
140 { "reclevel", &opt.reclevel, cmd_number_inf },
141 { "recursive", NULL, cmd_spec_recursive },
142 { "referer", &opt.referer, cmd_string },
143 { "reject", &opt.rejects, cmd_vector },
144 { "relativeonly", &opt.relative_only, cmd_boolean },
145 { "removelisting", &opt.remove_listing, cmd_boolean },
146 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
147 { "robots", &opt.use_robots, cmd_boolean },
148 { "saveheaders", &opt.save_headers, cmd_boolean },
149 { "serverresponse", &opt.server_response, cmd_boolean },
150 { "simplehostcheck", &opt.simple_check, cmd_boolean },
151 { "spanhosts", &opt.spanhost, cmd_boolean },
152 { "spider", &opt.spider, cmd_boolean },
153 { "timeout", &opt.timeout, cmd_time },
154 { "timestamping", &opt.timestamping, cmd_boolean },
155 { "tries", &opt.ntry, cmd_number_inf },
156 { "useproxy", &opt.use_proxy, cmd_boolean },
157 { "useragent", NULL, cmd_spec_useragent },
158 { "verbose", &opt.verbose, cmd_boolean },
159 { "wait", &opt.wait, cmd_time },
160 { "waitretry", &opt.waitretry, cmd_time }
163 /* Return index of COM if it is a valid command, or -1 otherwise. COM
164 is looked up in `commands' using binary search algorithm. */
166 comind (const char *com)
168 int min = 0, max = ARRAY_SIZE (commands);
172 int i = (min + max) / 2;
173 int cmp = strcasecmp (com, commands[i].name);
185 /* Reset the variables to default values. */
191 /* Most of the default values are 0. Just reset everything, and
192 fill in the non-zero values. Note that initializing pointers to
193 NULL this way is technically illegal, but porting Wget to a
194 machine where NULL is not all-zero bit pattern will be the least
195 of the implementors' worries. */
196 memset (&opt, 0, sizeof (opt));
199 opt.dir_prefix = xstrdup (".");
203 opt.ftp_acc = xstrdup ("anonymous");
204 /*opt.ftp_pass = xstrdup (ftp_getaddress ());*/
209 tmp = getenv ("no_proxy");
211 opt.no_proxy = sepstring (tmp);
219 opt.remove_listing = 1;
221 opt.dot_bytes = 1024;
222 opt.dot_spacing = 10;
223 opt.dots_in_line = 50;
226 /* Return the user's home directory (strdup-ed), or NULL if none is
231 char *home = getenv ("HOME");
236 /* If HOME is not defined, try getting it from the password
238 struct passwd *pwd = getpwuid (getuid ());
239 if (!pwd || !pwd->pw_dir)
244 /* #### Maybe I should grab home_dir from registry, but the best
245 that I could get from there is user's Start menu. It sucks! */
249 return home ? xstrdup (home) : NULL;
252 /* Return the path to the user's .wgetrc. This is either the value of
253 `WGETRC' environment variable, or `$HOME/.wgetrc'.
255 If the `WGETRC' variable exists but the file does not exist, the
256 function will exit(). */
258 wgetrc_file_name (void)
263 /* Try the environment. */
264 env = getenv ("WGETRC");
267 if (!file_exists_p (env))
269 fprintf (stderr, "%s: %s: %s.\n", exec_name, file, strerror (errno));
272 return xstrdup (env);
276 /* If that failed, try $HOME/.wgetrc. */
280 file = (char *)xmalloc (strlen (home) + 1 + strlen (".wgetrc") + 1);
281 sprintf (file, "%s/.wgetrc", home);
285 /* Under Windows, "home" is (for the purposes of this function) the
286 directory where `wget.exe' resides, and `wget.ini' will be used
287 as file name. SYSTEM_WGETRC should not be defined under WINDOWS.
289 It is not as trivial as I assumed, because on 95 argv[0] is full
290 path, but on NT you get what you typed in command line. --dbudor */
294 file = (char *)xmalloc (strlen (home) + strlen ("wget.ini") + 1);
295 sprintf (file, "%swget.ini", home);
301 if (!file_exists_p (file))
309 /* Initialize variables from a wgetrc file */
311 run_wgetrc (const char *file)
317 fp = fopen (file, "rb");
320 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
321 file, strerror (errno));
324 /* Reset line number. */
326 while ((line = read_whole_line (fp)))
330 int length = strlen (line);
332 if (length && line[length - 1] == '\r')
333 line[length - 1] = '\0';
334 /* Parse the line. */
335 status = parse_line (line, &com, &val);
337 /* If everything is OK, set the value. */
340 if (!setval (com, val))
341 fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name,
346 else if (status == 0)
347 fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name,
354 /* Initialize the defaults and run the system wgetrc and user's own
361 /* Load the hard-coded defaults. */
364 /* If SYSTEM_WGETRC is defined, use it. */
366 if (file_exists_p (SYSTEM_WGETRC))
367 run_wgetrc (SYSTEM_WGETRC);
369 /* Override it with your own, if one exists. */
370 file = wgetrc_file_name ();
373 /* #### We should somehow canonicalize `file' and SYSTEM_WGETRC,
376 if (!strcmp (file, SYSTEM_WGETRC))
378 fprintf (stderr, _("\
379 %s: Warning: Both system and user wgetrc point to `%s'.\n"),
389 /* Parse the line pointed by line, with the syntax:
390 <sp>* command <sp>* = <sp>* value <newline>
391 Uses malloc to allocate space for command and value.
392 If the line is invalid, data is freed and 0 is returned.
399 parse_line (const char *line, char **com, char **val)
401 const char *p = line;
402 const char *orig_comptr, *end;
406 while (*p == ' ' || *p == '\t')
409 /* Don't process empty lines. */
410 if (!*p || *p == '\n' || *p == '#')
413 for (orig_comptr = p; ISALPHA (*p) || *p == '_' || *p == '-'; p++)
415 /* The next char should be space or '='. */
416 if (!ISSPACE (*p) && (*p != '='))
418 *com = (char *)xmalloc (p - orig_comptr + 1);
419 for (new_comptr = *com; orig_comptr < p; orig_comptr++)
421 if (*orig_comptr == '_' || *orig_comptr == '-')
423 *new_comptr++ = *orig_comptr;
426 /* If the command is invalid, exit now. */
427 if (comind (*com) == -1)
433 /* Skip spaces before '='. */
434 for (; ISSPACE (*p); p++);
435 /* If '=' not found, bail out. */
441 /* Skip spaces after '='. */
442 for (++p; ISSPACE (*p); p++);
443 /* Get the ending position. */
444 for (end = p; *end && *end != '\n'; end++);
445 /* Allocate *val, and copy from line. */
446 *val = strdupdelim (p, end);
450 /* Set COM to VAL. This is the meat behind processing `.wgetrc'. No
451 fatals -- error signal prints a warning and resets to default
452 value. All error messages are printed to stderr, *not* to
453 opt.lfile, since opt.lfile wasn't even generated yet. */
455 setval (const char *com, const char *val)
464 /* #### Should I just abort()? */
466 fprintf (stderr, _("%s: BUG: unknown command `%s', value `%s'.\n"),
467 exec_name, com, val);
471 return ((*commands[ind].action) (com, val, commands[ind].closure));
474 /* Generic helper functions, for use with `commands'. */
476 static int myatoi PARAMS ((const char *s));
478 /* Store the boolean value from VAL to CLOSURE. COM is ignored,
479 except for error messages. */
481 cmd_boolean (const char *com, const char *val, void *closure)
485 if (!strcasecmp (val, "on")
486 || (*val == '1' && !*(val + 1)))
488 else if (!strcasecmp (val, "off")
489 || (*val == '0' && !*(val + 1)))
493 fprintf (stderr, _("%s: %s: Please specify on or off.\n"),
498 *(int *)closure = bool_value;
502 /* Set the non-negative integer value from VAL to CLOSURE. With
503 incorrect specification, the number remains unchanged. */
505 cmd_number (const char *com, const char *val, void *closure)
507 int num = myatoi (val);
511 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
512 exec_name, com, val);
515 *(int *)closure = num;
519 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
521 cmd_number_inf (const char *com, const char *val, void *closure)
523 if (!strcasecmp (val, "inf"))
528 return cmd_number (com, val, closure);
531 /* Copy (strdup) the string at COM to a new location and place a
532 pointer to *CLOSURE. */
534 cmd_string (const char *com, const char *val, void *closure)
536 char **pstring = (char **)closure;
538 FREE_MAYBE (*pstring);
539 *pstring = xstrdup (val);
543 /* Merge the vector (array of strings separated with `,') in COM with
544 the vector (NULL-terminated array of strings) pointed to by
547 cmd_vector (const char *com, const char *val, void *closure)
549 char ***pvec = (char ***)closure;
552 *pvec = merge_vecs (*pvec, sepstring (val));
562 cmd_directory_vector (const char *com, const char *val, void *closure)
564 char ***pvec = (char ***)closure;
568 /* Strip the trailing slashes from directories. */
571 seps = sepstring (val);
572 for (t = seps; t && *t; t++)
574 int len = strlen (*t);
575 /* Skip degenerate case of root directory. */
578 if ((*t)[len - 1] == '/')
579 (*t)[len - 1] = '\0';
582 *pvec = merge_vecs (*pvec, seps);
592 /* Set the value stored in VAL to CLOSURE (which should point to a
593 long int), allowing several postfixes, with the following syntax
597 [0-9]+[kK] -> bytes * 1024
598 [0-9]+[mM] -> bytes * 1024 * 1024
601 Anything else is flagged as incorrect, and CLOSURE is unchanged. */
603 cmd_bytes (const char *com, const char *val, void *closure)
606 long *out = (long *)closure;
611 /* Check for "inf". */
612 if (p[0] == 'i' && p[1] == 'n' && p[2] == 'f' && p[3] == '\0')
617 /* Search for digits and construct result. */
618 for (; *p && ISDIGIT (*p); p++)
619 result = (10 * result) + (*p - '0');
620 /* If no digits were found, or more than one character is following
622 if (p == val || (*p != '\0' && *(p + 1) != '\0'))
624 printf (_("%s: Invalid specification `%s'\n"), com, val);
627 /* Search for a designator. */
628 switch (TOLOWER (*p))
639 result *= (long)1024 * 1024;
643 result *= (long)1024 * 1024 * 1024;
646 printf (_("%s: Invalid specification `%s'\n"), com, val);
653 /* Store the value of VAL to *OUT, allowing suffixes for minutes and
656 cmd_time (const char *com, const char *val, void *closure)
661 /* Search for digits and construct result. */
662 for (; *p && ISDIGIT (*p); p++)
663 result = (10 * result) + (*p - '0');
664 /* If no digits were found, or more than one character is following
666 if (p == val || (*p != '\0' && *(p + 1) != '\0'))
668 printf (_("%s: Invalid specification `%s'\n"), com, val);
671 /* Search for a suffix. */
672 switch (TOLOWER (*p))
686 /* Days (overflow on 16bit machines) */
694 printf (_("%s: Invalid specification `%s'\n"), com, val);
697 *(long *)closure = result;
701 /* Specialized helper functions, used by `commands' to handle some
702 options specially. */
704 static int check_user_specified_header PARAMS ((const char *));
707 cmd_spec_dirstruct (const char *com, const char *val, void *closure)
709 if (!cmd_boolean (com, val, &opt.dirstruct))
711 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
712 must be affected inversely. */
714 opt.no_dirstruct = 0;
716 opt.no_dirstruct = 1;
721 cmd_spec_dotstyle (const char *com, const char *val, void *closure)
723 /* Retrieval styles. */
724 if (!strcasecmp (val, "default"))
726 /* Default style: 1K dots, 10 dots in a cluster, 50 dots in a
728 opt.dot_bytes = 1024;
729 opt.dot_spacing = 10;
730 opt.dots_in_line = 50;
732 else if (!strcasecmp (val, "binary"))
734 /* "Binary" retrieval: 8K dots, 16 dots in a cluster, 48 dots
736 opt.dot_bytes = 8192;
737 opt.dot_spacing = 16;
738 opt.dots_in_line = 48;
740 else if (!strcasecmp (val, "mega"))
742 /* "Mega" retrieval, for retrieving very long files; each dot is
743 64K, 8 dots in a cluster, 6 clusters (3M) in a line. */
744 opt.dot_bytes = 65536L;
746 opt.dots_in_line = 48;
748 else if (!strcasecmp (val, "giga"))
750 /* "Giga" retrieval, for retrieving very very *very* long files;
751 each dot is 1M, 8 dots in a cluster, 4 clusters (32M) in a
753 opt.dot_bytes = (1L << 20);
755 opt.dots_in_line = 32;
757 else if (!strcasecmp (val, "micro"))
759 /* "Micro" retrieval, for retrieving very small files (and/or
760 slow connections); each dot is 128 bytes, 8 dots in a
761 cluster, 6 clusters (6K) in a line. */
764 opt.dots_in_line = 48;
768 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
769 exec_name, com, val);
776 cmd_spec_header (const char *com, const char *val, void *closure)
780 /* Empty header means reset headers. */
781 FREE_MAYBE (opt.user_header);
782 opt.user_header = NULL;
788 if (!check_user_specified_header (val))
790 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
791 exec_name, com, val);
794 i = opt.user_header ? strlen (opt.user_header) : 0;
795 opt.user_header = (char *)xrealloc (opt.user_header, i + strlen (val)
797 strcpy (opt.user_header + i, val);
799 opt.user_header[i++] = '\r';
800 opt.user_header[i++] = '\n';
801 opt.user_header[i] = '\0';
807 cmd_spec_htmlify (const char *com, const char *val, void *closure)
809 int flag = cmd_boolean (com, val, &opt.htmlify);
810 if (flag && !opt.htmlify)
811 opt.remove_listing = 0;
816 cmd_spec_mirror (const char *com, const char *val, void *closure)
820 if (!cmd_boolean (com, val, &mirror))
825 if (!opt.no_dirstruct)
827 opt.timestamping = 1;
828 opt.reclevel = INFINITE_RECURSION;
829 opt.remove_listing = 0;
835 cmd_spec_outputdocument (const char *com, const char *val, void *closure)
837 FREE_MAYBE (opt.output_document);
838 opt.output_document = xstrdup (val);
844 cmd_spec_recursive (const char *com, const char *val, void *closure)
846 if (!cmd_boolean (com, val, &opt.recursive))
850 if (opt.recursive && !opt.no_dirstruct)
857 cmd_spec_useragent (const char *com, const char *val, void *closure)
859 /* Just check for empty string and newline, so we don't throw total
860 junk to the server. */
861 if (!*val || strchr (val, '\n'))
863 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
864 exec_name, com, val);
867 opt.useragent = xstrdup (val);
871 /* Miscellaneous useful routines. */
873 /* Return the integer value of a positive integer written in S, or -1
874 if an error was encountered. */
876 myatoi (const char *s)
879 const char *orig = s;
881 for (res = 0; *s && ISDIGIT (*s); s++)
882 res = 10 * res + (*s - '0');
889 #define ISODIGIT(x) ((x) >= '0' && (x) <= '7')
892 check_user_specified_header (const char *s)
896 for (p = s; *p && *p != ':' && !ISSPACE (*p); p++);
897 /* The header MUST contain `:' preceded by at least one
898 non-whitespace character. */
899 if (*p != ':' || p == s)
901 /* The header MUST NOT contain newlines. */
902 if (strchr (s, '\n'))
907 /* Free the memory allocated by global variables. */
911 extern acc_t *netrc_list;
913 recursive_cleanup ();
915 free_netrc (netrc_list);
918 FREE_MAYBE (opt.lfilename);
919 free (opt.dir_prefix);
920 FREE_MAYBE (opt.input_filename);
921 FREE_MAYBE (opt.output_document);
922 free_vec (opt.accepts);
923 free_vec (opt.rejects);
924 free_vec (opt.excludes);
925 free_vec (opt.includes);
926 free_vec (opt.domains);
927 free_vec (opt.follow_tags);
928 free_vec (opt.ignore_tags);
931 FREE_MAYBE (opt.ftp_proxy);
932 FREE_MAYBE (opt.http_proxy);
933 free_vec (opt.no_proxy);
934 FREE_MAYBE (opt.useragent);
935 FREE_MAYBE (opt.referer);
936 FREE_MAYBE (opt.http_user);
937 FREE_MAYBE (opt.http_passwd);
938 FREE_MAYBE (opt.user_header);