+2001-11-04 Alan Eldridge <alane@geeksrus.net>
+
+ * wget.texi: Document --random-wait, randomwait=on/off.
+
2001-11-23 Hrvoje Niksic <hniksic@arsdigita.com>
* wget.texi (Download Options): Document the new `--progress'
Note that this option is turned on by default in the global
@file{wgetrc} file.
+@cindex wait, random
+@cindex random wait
+@itemx --random-wait
+Some web sites may perform log analysis to identify retrieval programs
+such as Wget by looking for statistically significant similarities in
+the time between requests. This option causes the time between requests
+to vary between 0 and 2 * @var{wait} seconds, where @var{wait} was
+specified using the @samp{-w} or @samp{--wait} options, in order to mask
+Wget's presence from such analysis.
+
+A recent article in a publication devoted to development on a popular
+consumer platform provided code to perform this analysis on the fly.
+Its author suggested blocking at the class C address level to ensure
+automated retrieval programs were blocked despite changing DHCP-supplied
+addresses.
+
+The @samp{--random-wait} option was inspired by this ill-advised
+recommendation to block many unrelated users from a web site due to the
+actions of one.
+
@cindex proxy
@item -Y on/off
@itemx --proxy=on/off
Wait up to @var{n} seconds between retries of failed retrievals
only---the same as @samp{--waitretry}. Note that this is turned on by
default in the global @file{wgetrc}.
+
+@item randomwait = on/off
+Turn random between-request wait times on or off. The same as
+@samp{--random-wait}.
@end table
@node Sample Wgetrc, , Wgetrc Commands, Startup File
+2001-11-04 Alan Eldridge <alane@geeksrus.net>
+
+ * config.h.in: added HAVE_RANDOM.
+
+ * options.h: added random_wait to struct options.
+
+ * main.c (print_help [HAVE_RANDOM], main): added arg parsing, help
+ for --random-wait.
+
+ * retr.c (sleep_between_retrievals) [HAVE_RANDOM]: added
+ implementation of random wait times.
+
+ * init.c (commands): added "randomwait" keyword.
+
2001-11-25 Hrvoje Niksic <hniksic@arsdigita.com>
* recur.c (descend_url_p): Be more conservative with blacklisting
{ "proxyuser", &opt.proxy_user, cmd_string },
{ "quiet", &opt.quiet, cmd_boolean },
{ "quota", &opt.quota, cmd_bytes },
+ { "randomwait", &opt.random_wait, cmd_boolean },
{ "reclevel", &opt.reclevel, cmd_number_inf },
{ "recursive", NULL, cmd_spec_recursive },
{ "referer", &opt.referer, cmd_string },
-T, --timeout=SECONDS set the read timeout to SECONDS.\n\
-w, --wait=SECONDS wait SECONDS between retrievals.\n\
--waitretry=SECONDS wait 1...SECONDS between retries of a retrieval.\n\
+ --random-wait wait from 0...2*WAIT secs between retrievals.\n\
-Y, --proxy=on/off turn proxy on or off.\n\
-Q, --quota=NUMBER set retrieval quota to NUMBER.\n\
\n"), stdout);
+#ifdef HAVE_RANDOM
+ fputs (_("\
+\n"), stdout);
+#endif
fputs (_("\
Directories:\n\
-nd --no-directories don\'t create directories.\n\
{ "passive-ftp", no_argument, NULL, 139 },
{ "page-requisites", no_argument, NULL, 'p' },
{ "quiet", no_argument, NULL, 'q' },
+ { "random-wait", no_argument, NULL, 165 },
{ "recursive", no_argument, NULL, 'r' },
{ "relative", no_argument, NULL, 'L' },
{ "retr-symlinks", no_argument, NULL, 137 },
case 156:
setval ("httpkeepalive", "off");
break;
+ case 165:
+ setval ("randomwait", "on");
+ break;
case 'b':
setval ("background", "on");
break;
long timeout; /* The value of read timeout in
seconds. */
#endif
+ int random_wait; /* vary from 0 .. wait secs by random()? */
long wait; /* The wait period between retrievals. */
long waitretry; /* The wait period between retries. - HEH */
int use_robots; /* Do we heed robots.txt? */
sleep (opt.waitretry);
}
else if (opt.wait)
- /* Otherwise, check if opt.wait is specified. If so, sleep. */
- sleep (opt.wait);
+ {
+ /* Otherwise, check if opt.wait is specified. If so, sleep. */
+ if (count > 1 || !opt.random_wait)
+ sleep (opt.wait);
+ else
+ {
+ int waitsecs = random() % (opt.wait * 2 + 1);
+ DEBUGP(("sleep_between_retrievals: norm=%ld,random=%ld,sleep=%d\n",
+ opt.wait, waitsecs - opt.wait, waitsecs));
+ sleep(waitsecs);
+ }
+ }
}
if (first_retrieval)
first_retrieval = 0;