From 2c41d783c62f1252701b8cb5a8adbcf8efbf0275 Mon Sep 17 00:00:00 2001 From: hniksic Date: Sun, 25 Nov 2001 13:23:15 -0800 Subject: [PATCH] [svn] New option --random-wait. Submitted by Alan Eldridge in <200111042106.fA4L63b75804@wwweasel.geeksrus.net>. --- doc/ChangeLog | 4 ++++ doc/wget.texi | 24 ++++++++++++++++++++++++ src/ChangeLog | 14 ++++++++++++++ src/init.c | 1 + src/main.c | 9 +++++++++ src/options.h | 1 + src/retr.c | 14 ++++++++++++-- 7 files changed, 65 insertions(+), 2 deletions(-) diff --git a/doc/ChangeLog b/doc/ChangeLog index 901aa4b4..6e9b2ac8 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +2001-11-04 Alan Eldridge + + * wget.texi: Document --random-wait, randomwait=on/off. + 2001-11-23 Hrvoje Niksic * wget.texi (Download Options): Document the new `--progress' diff --git a/doc/wget.texi b/doc/wget.texi index 7419c0d1..9b964fde 100644 --- a/doc/wget.texi +++ b/doc/wget.texi @@ -701,6 +701,26 @@ seconds per file. Note that this option is turned on by default in the global @file{wgetrc} file. +@cindex wait, random +@cindex random wait +@itemx --random-wait +Some web sites may perform log analysis to identify retrieval programs +such as Wget by looking for statistically significant similarities in +the time between requests. This option causes the time between requests +to vary between 0 and 2 * @var{wait} seconds, where @var{wait} was +specified using the @samp{-w} or @samp{--wait} options, in order to mask +Wget's presence from such analysis. + +A recent article in a publication devoted to development on a popular +consumer platform provided code to perform this analysis on the fly. +Its author suggested blocking at the class C address level to ensure +automated retrieval programs were blocked despite changing DHCP-supplied +addresses. + +The @samp{--random-wait} option was inspired by this ill-advised +recommendation to block many unrelated users from a web site due to the +actions of one. + @cindex proxy @item -Y on/off @itemx --proxy=on/off @@ -2168,6 +2188,10 @@ Wait @var{n} seconds between retrievals---the same as @samp{-w}. Wait up to @var{n} seconds between retries of failed retrievals only---the same as @samp{--waitretry}. Note that this is turned on by default in the global @file{wgetrc}. + +@item randomwait = on/off +Turn random between-request wait times on or off. The same as +@samp{--random-wait}. @end table @node Sample Wgetrc, , Wgetrc Commands, Startup File diff --git a/src/ChangeLog b/src/ChangeLog index c7fdc753..c051e020 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,17 @@ +2001-11-04 Alan Eldridge + + * config.h.in: added HAVE_RANDOM. + + * options.h: added random_wait to struct options. + + * main.c (print_help [HAVE_RANDOM], main): added arg parsing, help + for --random-wait. + + * retr.c (sleep_between_retrievals) [HAVE_RANDOM]: added + implementation of random wait times. + + * init.c (commands): added "randomwait" keyword. + 2001-11-25 Hrvoje Niksic * recur.c (descend_url_p): Be more conservative with blacklisting diff --git a/src/init.c b/src/init.c index f0a4f222..0d61b668 100644 --- a/src/init.c +++ b/src/init.c @@ -160,6 +160,7 @@ static struct { { "proxyuser", &opt.proxy_user, cmd_string }, { "quiet", &opt.quiet, cmd_boolean }, { "quota", &opt.quota, cmd_bytes }, + { "randomwait", &opt.random_wait, cmd_boolean }, { "reclevel", &opt.reclevel, cmd_number_inf }, { "recursive", NULL, cmd_spec_recursive }, { "referer", &opt.referer, cmd_string }, diff --git a/src/main.c b/src/main.c index f5cf13c2..0bdecf38 100644 --- a/src/main.c +++ b/src/main.c @@ -161,9 +161,14 @@ Download:\n\ -T, --timeout=SECONDS set the read timeout to SECONDS.\n\ -w, --wait=SECONDS wait SECONDS between retrievals.\n\ --waitretry=SECONDS wait 1...SECONDS between retries of a retrieval.\n\ + --random-wait wait from 0...2*WAIT secs between retrievals.\n\ -Y, --proxy=on/off turn proxy on or off.\n\ -Q, --quota=NUMBER set retrieval quota to NUMBER.\n\ \n"), stdout); +#ifdef HAVE_RANDOM + fputs (_("\ +\n"), stdout); +#endif fputs (_("\ Directories:\n\ -nd --no-directories don\'t create directories.\n\ @@ -261,6 +266,7 @@ main (int argc, char *const *argv) { "passive-ftp", no_argument, NULL, 139 }, { "page-requisites", no_argument, NULL, 'p' }, { "quiet", no_argument, NULL, 'q' }, + { "random-wait", no_argument, NULL, 165 }, { "recursive", no_argument, NULL, 'r' }, { "relative", no_argument, NULL, 'L' }, { "retr-symlinks", no_argument, NULL, 137 }, @@ -395,6 +401,9 @@ hpVqvdkKsxmNWrHSLcFbEY:G:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:C:", case 156: setval ("httpkeepalive", "off"); break; + case 165: + setval ("randomwait", "on"); + break; case 'b': setval ("background", "on"); break; diff --git a/src/options.h b/src/options.h index 4f75c8b9..8ce4c6f8 100644 --- a/src/options.h +++ b/src/options.h @@ -99,6 +99,7 @@ struct options long timeout; /* The value of read timeout in seconds. */ #endif + int random_wait; /* vary from 0 .. wait secs by random()? */ long wait; /* The wait period between retrievals. */ long waitretry; /* The wait period between retries. - HEH */ int use_robots; /* Do we heed robots.txt? */ diff --git a/src/retr.c b/src/retr.c index 93ef7db5..b8fb6c84 100644 --- a/src/retr.c +++ b/src/retr.c @@ -560,8 +560,18 @@ sleep_between_retrievals (int count) sleep (opt.waitretry); } else if (opt.wait) - /* Otherwise, check if opt.wait is specified. If so, sleep. */ - sleep (opt.wait); + { + /* Otherwise, check if opt.wait is specified. If so, sleep. */ + if (count > 1 || !opt.random_wait) + sleep (opt.wait); + else + { + int waitsecs = random() % (opt.wait * 2 + 1); + DEBUGP(("sleep_between_retrievals: norm=%ld,random=%ld,sleep=%d\n", + opt.wait, waitsecs - opt.wait, waitsecs)); + sleep(waitsecs); + } + } } if (first_retrieval) first_retrieval = 0; -- 2.39.2