1 /* Reading/parsing the initialization file.
2 Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
52 #define CMD_DECLARE(func) static int func \
53 PARAMS ((const char *, const char *, void *))
55 CMD_DECLARE (cmd_boolean);
56 CMD_DECLARE (cmd_boolean);
57 CMD_DECLARE (cmd_number);
58 CMD_DECLARE (cmd_number_inf);
59 CMD_DECLARE (cmd_string);
60 CMD_DECLARE (cmd_vector);
61 CMD_DECLARE (cmd_directory_vector);
62 CMD_DECLARE (cmd_bytes);
63 CMD_DECLARE (cmd_time);
65 CMD_DECLARE (cmd_spec_dirstruct);
66 CMD_DECLARE (cmd_spec_dotstyle);
67 CMD_DECLARE (cmd_spec_header);
68 CMD_DECLARE (cmd_spec_htmlify);
69 CMD_DECLARE (cmd_spec_mirror);
70 CMD_DECLARE (cmd_spec_outputdocument);
71 CMD_DECLARE (cmd_spec_recursive);
72 CMD_DECLARE (cmd_spec_useragent);
74 /* List of recognized commands, each consisting of name, closure and function.
75 When adding a new command, simply add it to the list, but be sure to keep the
76 list sorted alphabetically, as comind() depends on it. Also, be sure to add
77 any entries that allocate memory (e.g. cmd_string and cmd_vector guys) to the
78 cleanup() function below. */
82 int (*action) PARAMS ((const char *, const char *, void *));
84 { "accept", &opt.accepts, cmd_vector },
85 { "addhostdir", &opt.add_hostdir, cmd_boolean },
86 { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
87 { "background", &opt.background, cmd_boolean },
88 { "backupconverted", &opt.backup_converted, cmd_boolean },
89 { "backups", &opt.backups, cmd_number },
90 { "base", &opt.base_href, cmd_string },
91 { "cache", &opt.proxy_cache, cmd_boolean },
92 { "continue", &opt.always_rest, cmd_boolean },
93 { "convertlinks", &opt.convert_links, cmd_boolean },
94 { "cutdirs", &opt.cut_dirs, cmd_number },
96 { "debug", &opt.debug, cmd_boolean },
98 { "deleteafter", &opt.delete_after, cmd_boolean },
99 { "dirprefix", &opt.dir_prefix, cmd_string },
100 { "dirstruct", NULL, cmd_spec_dirstruct },
101 { "domains", &opt.domains, cmd_vector },
102 { "dotbytes", &opt.dot_bytes, cmd_bytes },
103 { "dotsinline", &opt.dots_in_line, cmd_number },
104 { "dotspacing", &opt.dot_spacing, cmd_number },
105 { "dotstyle", NULL, cmd_spec_dotstyle },
106 { "excludedirectories", &opt.excludes, cmd_directory_vector },
107 { "excludedomains", &opt.exclude_domains, cmd_vector },
108 { "followftp", &opt.follow_ftp, cmd_boolean },
109 { "followtags", &opt.follow_tags, cmd_vector },
110 { "forcehtml", &opt.force_html, cmd_boolean },
111 { "ftpproxy", &opt.ftp_proxy, cmd_string },
112 { "glob", &opt.ftp_glob, cmd_boolean },
113 { "header", NULL, cmd_spec_header },
114 { "htmlify", NULL, cmd_spec_htmlify },
115 { "httppasswd", &opt.http_passwd, cmd_string },
116 { "httpproxy", &opt.http_proxy, cmd_string },
117 { "httpuser", &opt.http_user, cmd_string },
118 { "ignorelength", &opt.ignore_length, cmd_boolean },
119 { "ignoretags", &opt.ignore_tags, cmd_vector },
120 { "includedirectories", &opt.includes, cmd_directory_vector },
121 { "input", &opt.input_filename, cmd_string },
122 { "killlonger", &opt.kill_longer, cmd_boolean },
123 { "logfile", &opt.lfilename, cmd_string },
124 { "login", &opt.ftp_acc, cmd_string },
125 { "mirror", NULL, cmd_spec_mirror },
126 { "netrc", &opt.netrc, cmd_boolean },
127 { "noclobber", &opt.noclobber, cmd_boolean },
128 { "noparent", &opt.no_parent, cmd_boolean },
129 { "noproxy", &opt.no_proxy, cmd_vector },
130 { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
131 { "outputdocument", NULL, cmd_spec_outputdocument },
132 { "pagerequisites", &opt.page_requisites, cmd_boolean },
133 { "passiveftp", &opt.ftp_pasv, cmd_boolean },
134 { "passwd", &opt.ftp_pass, cmd_string },
135 { "proxypasswd", &opt.proxy_passwd, cmd_string },
136 { "proxyuser", &opt.proxy_user, cmd_string },
137 { "quiet", &opt.quiet, cmd_boolean },
138 { "quota", &opt.quota, cmd_bytes },
139 { "reclevel", &opt.reclevel, cmd_number_inf },
140 { "recursive", NULL, cmd_spec_recursive },
141 { "referer", &opt.referer, cmd_string },
142 { "reject", &opt.rejects, cmd_vector },
143 { "relativeonly", &opt.relative_only, cmd_boolean },
144 { "removelisting", &opt.remove_listing, cmd_boolean },
145 { "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
146 { "robots", &opt.use_robots, cmd_boolean },
147 { "saveheaders", &opt.save_headers, cmd_boolean },
148 { "serverresponse", &opt.server_response, cmd_boolean },
149 { "simplehostcheck", &opt.simple_check, cmd_boolean },
150 { "spanhosts", &opt.spanhost, cmd_boolean },
151 { "spider", &opt.spider, cmd_boolean },
152 { "timeout", &opt.timeout, cmd_time },
153 { "timestamping", &opt.timestamping, cmd_boolean },
154 { "tries", &opt.ntry, cmd_number_inf },
155 { "useproxy", &opt.use_proxy, cmd_boolean },
156 { "useragent", NULL, cmd_spec_useragent },
157 { "verbose", &opt.verbose, cmd_boolean },
158 { "wait", &opt.wait, cmd_time },
159 { "waitretry", &opt.waitretry, cmd_time }
162 /* Return index of COM if it is a valid command, or -1 otherwise. COM
163 is looked up in `commands' using binary search algorithm. */
165 comind (const char *com)
167 int min = 0, max = ARRAY_SIZE (commands);
171 int i = (min + max) / 2;
172 int cmp = strcasecmp (com, commands[i].name);
184 /* Reset the variables to default values. */
190 /* Most of the default values are 0. Just reset everything, and
191 fill in the non-zero values. Note that initializing pointers to
192 NULL this way is technically illegal, but porting Wget to a
193 machine where NULL is not all-zero bit pattern will be the least
194 of the implementors' worries. */
195 memset (&opt, 0, sizeof (opt));
198 opt.dir_prefix = xstrdup (".");
202 opt.ftp_acc = xstrdup ("anonymous");
203 /*opt.ftp_pass = xstrdup (ftp_getaddress ());*/
208 tmp = getenv ("no_proxy");
210 opt.no_proxy = sepstring (tmp);
218 opt.remove_listing = 1;
220 opt.dot_bytes = 1024;
221 opt.dot_spacing = 10;
222 opt.dots_in_line = 50;
225 /* Return the user's home directory (strdup-ed), or NULL if none is
230 char *home = getenv ("HOME");
235 /* If HOME is not defined, try getting it from the password
237 struct passwd *pwd = getpwuid (getuid ());
238 if (!pwd || !pwd->pw_dir)
243 /* #### Maybe I should grab home_dir from registry, but the best
244 that I could get from there is user's Start menu. It sucks! */
248 return home ? xstrdup (home) : NULL;
251 /* Return the path to the user's .wgetrc. This is either the value of
252 `WGETRC' environment variable, or `$HOME/.wgetrc'.
254 If the `WGETRC' variable exists but the file does not exist, the
255 function will exit(). */
257 wgetrc_file_name (void)
262 /* Try the environment. */
263 env = getenv ("WGETRC");
266 if (!file_exists_p (env))
268 fprintf (stderr, "%s: %s: %s.\n", exec_name, file, strerror (errno));
271 return xstrdup (env);
275 /* If that failed, try $HOME/.wgetrc. */
279 file = (char *)xmalloc (strlen (home) + 1 + strlen (".wgetrc") + 1);
280 sprintf (file, "%s/.wgetrc", home);
284 /* Under Windows, "home" is (for the purposes of this function) the
285 directory where `wget.exe' resides, and `wget.ini' will be used
286 as file name. SYSTEM_WGETRC should not be defined under WINDOWS.
288 It is not as trivial as I assumed, because on 95 argv[0] is full
289 path, but on NT you get what you typed in command line. --dbudor */
293 file = (char *)xmalloc (strlen (home) + strlen ("wget.ini") + 1);
294 sprintf (file, "%swget.ini", home);
300 if (!file_exists_p (file))
308 /* Initialize variables from a wgetrc file */
310 run_wgetrc (const char *file)
316 fp = fopen (file, "rb");
319 fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
320 file, strerror (errno));
323 /* Reset line number. */
325 while ((line = read_whole_line (fp)))
329 int length = strlen (line);
331 if (length && line[length - 1] == '\r')
332 line[length - 1] = '\0';
333 /* Parse the line. */
334 status = parse_line (line, &com, &val);
336 /* If everything is OK, set the value. */
339 if (!setval (com, val))
340 fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name,
345 else if (status == 0)
346 fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name,
353 /* Initialize the defaults and run the system wgetrc and user's own
360 /* Load the hard-coded defaults. */
363 /* If SYSTEM_WGETRC is defined, use it. */
365 if (file_exists_p (SYSTEM_WGETRC))
366 run_wgetrc (SYSTEM_WGETRC);
368 /* Override it with your own, if one exists. */
369 file = wgetrc_file_name ();
372 /* #### We should somehow canonicalize `file' and SYSTEM_WGETRC,
375 if (!strcmp (file, SYSTEM_WGETRC))
377 fprintf (stderr, _("\
378 %s: Warning: Both system and user wgetrc point to `%s'.\n"),
388 /* Parse the line pointed by line, with the syntax:
389 <sp>* command <sp>* = <sp>* value <newline>
390 Uses malloc to allocate space for command and value.
391 If the line is invalid, data is freed and 0 is returned.
398 parse_line (const char *line, char **com, char **val)
400 const char *p = line;
401 const char *orig_comptr, *end;
405 while (*p == ' ' || *p == '\t')
408 /* Don't process empty lines. */
409 if (!*p || *p == '\n' || *p == '#')
412 for (orig_comptr = p; ISALPHA (*p) || *p == '_' || *p == '-'; p++)
414 /* The next char should be space or '='. */
415 if (!ISSPACE (*p) && (*p != '='))
417 *com = (char *)xmalloc (p - orig_comptr + 1);
418 for (new_comptr = *com; orig_comptr < p; orig_comptr++)
420 if (*orig_comptr == '_' || *orig_comptr == '-')
422 *new_comptr++ = *orig_comptr;
425 /* If the command is invalid, exit now. */
426 if (comind (*com) == -1)
432 /* Skip spaces before '='. */
433 for (; ISSPACE (*p); p++);
434 /* If '=' not found, bail out. */
440 /* Skip spaces after '='. */
441 for (++p; ISSPACE (*p); p++);
442 /* Get the ending position. */
443 for (end = p; *end && *end != '\n'; end++);
444 /* Allocate *val, and copy from line. */
445 *val = strdupdelim (p, end);
449 /* Set COM to VAL. This is the meat behind processing `.wgetrc'. No
450 fatals -- error signal prints a warning and resets to default
451 value. All error messages are printed to stderr, *not* to
452 opt.lfile, since opt.lfile wasn't even generated yet. */
454 setval (const char *com, const char *val)
463 /* #### Should I just abort()? */
465 fprintf (stderr, _("%s: BUG: unknown command `%s', value `%s'.\n"),
466 exec_name, com, val);
470 return ((*commands[ind].action) (com, val, commands[ind].closure));
473 /* Generic helper functions, for use with `commands'. */
475 static int myatoi PARAMS ((const char *s));
477 /* Store the boolean value from VAL to CLOSURE. COM is ignored,
478 except for error messages. */
480 cmd_boolean (const char *com, const char *val, void *closure)
484 if (!strcasecmp (val, "on")
485 || (*val == '1' && !*(val + 1)))
487 else if (!strcasecmp (val, "off")
488 || (*val == '0' && !*(val + 1)))
492 fprintf (stderr, _("%s: %s: Please specify on or off.\n"),
497 *(int *)closure = bool_value;
501 /* Set the non-negative integer value from VAL to CLOSURE. With
502 incorrect specification, the number remains unchanged. */
504 cmd_number (const char *com, const char *val, void *closure)
506 int num = myatoi (val);
510 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
511 exec_name, com, val);
514 *(int *)closure = num;
518 /* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */
520 cmd_number_inf (const char *com, const char *val, void *closure)
522 if (!strcasecmp (val, "inf"))
527 return cmd_number (com, val, closure);
530 /* Copy (strdup) the string at COM to a new location and place a
531 pointer to *CLOSURE. */
533 cmd_string (const char *com, const char *val, void *closure)
535 char **pstring = (char **)closure;
537 FREE_MAYBE (*pstring);
538 *pstring = xstrdup (val);
542 /* Merge the vector (array of strings separated with `,') in COM with
543 the vector (NULL-terminated array of strings) pointed to by
546 cmd_vector (const char *com, const char *val, void *closure)
548 char ***pvec = (char ***)closure;
551 *pvec = merge_vecs (*pvec, sepstring (val));
561 cmd_directory_vector (const char *com, const char *val, void *closure)
563 char ***pvec = (char ***)closure;
567 /* Strip the trailing slashes from directories. */
570 seps = sepstring (val);
571 for (t = seps; t && *t; t++)
573 int len = strlen (*t);
574 /* Skip degenerate case of root directory. */
577 if ((*t)[len - 1] == '/')
578 (*t)[len - 1] = '\0';
581 *pvec = merge_vecs (*pvec, seps);
591 /* Set the value stored in VAL to CLOSURE (which should point to a
592 long int), allowing several postfixes, with the following syntax
596 [0-9]+[kK] -> bytes * 1024
597 [0-9]+[mM] -> bytes * 1024 * 1024
600 Anything else is flagged as incorrect, and CLOSURE is unchanged. */
602 cmd_bytes (const char *com, const char *val, void *closure)
605 long *out = (long *)closure;
610 /* Check for "inf". */
611 if (p[0] == 'i' && p[1] == 'n' && p[2] == 'f' && p[3] == '\0')
616 /* Search for digits and construct result. */
617 for (; *p && ISDIGIT (*p); p++)
618 result = (10 * result) + (*p - '0');
619 /* If no digits were found, or more than one character is following
621 if (p == val || (*p != '\0' && *(p + 1) != '\0'))
623 printf (_("%s: Invalid specification `%s'\n"), com, val);
626 /* Search for a designator. */
627 switch (TOLOWER (*p))
638 result *= (long)1024 * 1024;
642 result *= (long)1024 * 1024 * 1024;
645 printf (_("%s: Invalid specification `%s'\n"), com, val);
652 /* Store the value of VAL to *OUT, allowing suffixes for minutes and
655 cmd_time (const char *com, const char *val, void *closure)
660 /* Search for digits and construct result. */
661 for (; *p && ISDIGIT (*p); p++)
662 result = (10 * result) + (*p - '0');
663 /* If no digits were found, or more than one character is following
665 if (p == val || (*p != '\0' && *(p + 1) != '\0'))
667 printf (_("%s: Invalid specification `%s'\n"), com, val);
670 /* Search for a suffix. */
671 switch (TOLOWER (*p))
685 /* Days (overflow on 16bit machines) */
693 printf (_("%s: Invalid specification `%s'\n"), com, val);
696 *(long *)closure = result;
700 /* Specialized helper functions, used by `commands' to handle some
701 options specially. */
703 static int check_user_specified_header PARAMS ((const char *));
706 cmd_spec_dirstruct (const char *com, const char *val, void *closure)
708 if (!cmd_boolean (com, val, &opt.dirstruct))
710 /* Since dirstruct behaviour is explicitly changed, no_dirstruct
711 must be affected inversely. */
713 opt.no_dirstruct = 0;
715 opt.no_dirstruct = 1;
720 cmd_spec_dotstyle (const char *com, const char *val, void *closure)
722 /* Retrieval styles. */
723 if (!strcasecmp (val, "default"))
725 /* Default style: 1K dots, 10 dots in a cluster, 50 dots in a
727 opt.dot_bytes = 1024;
728 opt.dot_spacing = 10;
729 opt.dots_in_line = 50;
731 else if (!strcasecmp (val, "binary"))
733 /* "Binary" retrieval: 8K dots, 16 dots in a cluster, 48 dots
735 opt.dot_bytes = 8192;
736 opt.dot_spacing = 16;
737 opt.dots_in_line = 48;
739 else if (!strcasecmp (val, "mega"))
741 /* "Mega" retrieval, for retrieving very long files; each dot is
742 64K, 8 dots in a cluster, 6 clusters (3M) in a line. */
743 opt.dot_bytes = 65536L;
745 opt.dots_in_line = 48;
747 else if (!strcasecmp (val, "giga"))
749 /* "Giga" retrieval, for retrieving very very *very* long files;
750 each dot is 1M, 8 dots in a cluster, 4 clusters (32M) in a
752 opt.dot_bytes = (1L << 20);
754 opt.dots_in_line = 32;
756 else if (!strcasecmp (val, "micro"))
758 /* "Micro" retrieval, for retrieving very small files (and/or
759 slow connections); each dot is 128 bytes, 8 dots in a
760 cluster, 6 clusters (6K) in a line. */
763 opt.dots_in_line = 48;
767 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
768 exec_name, com, val);
775 cmd_spec_header (const char *com, const char *val, void *closure)
779 /* Empty header means reset headers. */
780 FREE_MAYBE (opt.user_header);
781 opt.user_header = NULL;
787 if (!check_user_specified_header (val))
789 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
790 exec_name, com, val);
793 i = opt.user_header ? strlen (opt.user_header) : 0;
794 opt.user_header = (char *)xrealloc (opt.user_header, i + strlen (val)
796 strcpy (opt.user_header + i, val);
798 opt.user_header[i++] = '\r';
799 opt.user_header[i++] = '\n';
800 opt.user_header[i] = '\0';
806 cmd_spec_htmlify (const char *com, const char *val, void *closure)
808 int flag = cmd_boolean (com, val, &opt.htmlify);
809 if (flag && !opt.htmlify)
810 opt.remove_listing = 0;
815 cmd_spec_mirror (const char *com, const char *val, void *closure)
819 if (!cmd_boolean (com, val, &mirror))
824 if (!opt.no_dirstruct)
826 opt.timestamping = 1;
827 opt.reclevel = INFINITE_RECURSION;
828 opt.remove_listing = 0;
834 cmd_spec_outputdocument (const char *com, const char *val, void *closure)
836 FREE_MAYBE (opt.output_document);
837 opt.output_document = xstrdup (val);
843 cmd_spec_recursive (const char *com, const char *val, void *closure)
845 if (!cmd_boolean (com, val, &opt.recursive))
849 if (opt.recursive && !opt.no_dirstruct)
856 cmd_spec_useragent (const char *com, const char *val, void *closure)
858 /* Just check for empty string and newline, so we don't throw total
859 junk to the server. */
860 if (!*val || strchr (val, '\n'))
862 fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
863 exec_name, com, val);
866 opt.useragent = xstrdup (val);
870 /* Miscellaneous useful routines. */
872 /* Return the integer value of a positive integer written in S, or -1
873 if an error was encountered. */
875 myatoi (const char *s)
878 const char *orig = s;
880 for (res = 0; *s && ISDIGIT (*s); s++)
881 res = 10 * res + (*s - '0');
888 #define ISODIGIT(x) ((x) >= '0' && (x) <= '7')
891 check_user_specified_header (const char *s)
895 for (p = s; *p && *p != ':' && !ISSPACE (*p); p++);
896 /* The header MUST contain `:' preceded by at least one
897 non-whitespace character. */
898 if (*p != ':' || p == s)
900 /* The header MUST NOT contain newlines. */
901 if (strchr (s, '\n'))
906 /* Free the memory allocated by global variables. */
910 extern acc_t *netrc_list;
912 recursive_cleanup ();
914 free_netrc (netrc_list);
917 FREE_MAYBE (opt.lfilename);
918 free (opt.dir_prefix);
919 FREE_MAYBE (opt.input_filename);
920 FREE_MAYBE (opt.output_document);
921 free_vec (opt.accepts);
922 free_vec (opt.rejects);
923 free_vec (opt.excludes);
924 free_vec (opt.includes);
925 free_vec (opt.domains);
926 free_vec (opt.follow_tags);
927 free_vec (opt.ignore_tags);
930 FREE_MAYBE (opt.ftp_proxy);
931 FREE_MAYBE (opt.http_proxy);
932 free_vec (opt.no_proxy);
933 FREE_MAYBE (opt.useragent);
934 FREE_MAYBE (opt.referer);
935 FREE_MAYBE (opt.http_user);
936 FREE_MAYBE (opt.http_passwd);
937 FREE_MAYBE (opt.user_header);