[svn] Merge of fix for bugs 20341 and 20410.

[wget] / src / retr.c
diff --git a/src/retr.c b/src/retr.c

index 918fb5de6df611a4b328d501925a2fa8f11ac2fe..c531be59c35bae44344c14e9b661a8c9af05b558 100644 (file)
--- a/src/retr.c
+++ b/src/retr.c
@@ -1,11 +1,11 @@
  /* File retrieval.
-   Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
+   Copyright (C) 1996-2006 Free Software Foundation, Inc.
  
  This file is part of GNU Wget.
  
  GNU Wget is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or (at
+the Free Software Foundation; either version 3 of the License, or (at
  your option) any later version.
  
  GNU Wget is distributed in the hope that it will be useful,
@@ -14,8 +14,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
-along with Wget; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  
  In addition, as a special exception, the Free Software Foundation
  gives permission to link the code of its release of Wget with the
@@ -55,7 +54,7 @@ so, delete this exception statement from your version.  */
  /* Total size of downloaded files.  Used to enforce quota.  */
  SUM_SIZE_INT total_downloaded_bytes;
  
-/* Total download time in milliseconds. */
+/* Total download time in seconds. */
  double total_download_time;
  
  /* If non-NULL, the stream to which output should be written.  This
@@ -75,9 +74,7 @@ static struct {
  static void
  limit_bandwidth_reset (void)
  {
-  limit_data.chunk_bytes = 0;
-  limit_data.chunk_start = 0;
-  limit_data.sleep_adjust = 0;
+  xzero (limit_data);
  }
  
  /* Limit the bandwidth by pausing the download for an amount of time.
@@ -95,25 +92,25 @@ limit_bandwidth (wgint bytes, struct ptimer *timer)
    /* Calculate the amount of time we expect downloading the chunk
       should take.  If in reality it took less time, sleep to
       compensate for the difference.  */
-  expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
+  expected = (double) limit_data.chunk_bytes / opt.limit_rate;
  
    if (expected > delta_t)
      {
        double slp = expected - delta_t + limit_data.sleep_adjust;
        double t0, t1;
-      if (slp < 200)
+      if (slp < 0.2)
         {
           DEBUGP (("deferring a %.2f ms sleep (%s/%.2f).\n",
-                  slp, number_to_static_string (limit_data.chunk_bytes),
+                  slp * 1000, number_to_static_string (limit_data.chunk_bytes),
                    delta_t));
           return;
         }
        DEBUGP (("\nsleeping %.2f ms for %s bytes, adjust %.2f ms\n",
-              slp, number_to_static_string (limit_data.chunk_bytes),
+              slp * 1000, number_to_static_string (limit_data.chunk_bytes),
                limit_data.sleep_adjust));
  
        t0 = ptimer_read (timer);
-      xsleep (slp / 1000);
+      xsleep (slp);
        t1 = ptimer_measure (timer);
  
        /* Due to scheduling, we probably slept slightly longer (or
@@ -123,10 +120,10 @@ limit_bandwidth (wgint bytes, struct ptimer *timer)
        limit_data.sleep_adjust = slp - (t1 - t0);
        /* If sleep_adjust is very large, it's likely due to suspension
          and not clock inaccuracy.  Don't enforce those.  */
-      if (limit_data.sleep_adjust > 500)
-       limit_data.sleep_adjust = 500;
-      else if (limit_data.sleep_adjust < -500)
-       limit_data.sleep_adjust = -500;
+      if (limit_data.sleep_adjust > 0.5)
+       limit_data.sleep_adjust = 0.5;
+      else if (limit_data.sleep_adjust < -0.5)
+       limit_data.sleep_adjust = -0.5;
      }
  
    limit_data.chunk_bytes = 0;
@@ -185,7 +182,7 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
     is incremented by the amount of data read from the network.  If
     QTYWRITTEN is non-NULL, the value it points to is incremented by
     the amount of data written to disk.  The time it took to download
-   the data (in milliseconds) is stored to ELAPSED.
+   the data is stored to ELAPSED.
  
     The function exits and returns the amount of data read.  In case of
     error while reading data, -1 is returned.  In case of error while
@@ -267,7 +264,7 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
           if (opt.read_timeout)
             {
               double waittm;
-             waittm = (ptimer_read (timer) - last_successful_read_tm) / 1000;
+             waittm = ptimer_read (timer) - last_successful_read_tm;
               if (waittm + tmout > opt.read_timeout)
                 {
                   /* Don't let total idle time exceed read timeout. */
@@ -517,14 +514,18 @@ fd_read_line (int fd)
     the units appropriate for the download speed.  */
  
  const char *
-retr_rate (wgint bytes, double msecs)
+retr_rate (wgint bytes, double secs)
  {
    static char res[20];
    static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
-  int units = 0;
+  int units;
  
-  double dlrate = calc_rate (bytes, msecs, &units);
-  sprintf (res, "%.2f %s", dlrate, rate_names[units]);
+  double dlrate = calc_rate (bytes, secs, &units);
+  /* Use more digits for smaller numbers (regardless of unit used),
+     e.g. "1022", "247", "12.5", "2.38".  */
+  sprintf (res, "%.*f %s",
+          dlrate >= 99.95 ? 0 : dlrate >= 9.995 ? 1 : 2,
+          dlrate, rate_names[units]);
  
    return res;
  }
@@ -536,22 +537,23 @@ retr_rate (wgint bytes, double msecs)
  
     UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
     GB/s.  */
+
  double
-calc_rate (wgint bytes, double msecs, int *units)
+calc_rate (wgint bytes, double secs, int *units)
  {
    double dlrate;
  
-  assert (msecs >= 0);
+  assert (secs >= 0);
    assert (bytes >= 0);
  
-  if (msecs == 0)
+  if (secs == 0)
      /* If elapsed time is exactly zero, it means we're under the
         resolution of the timer.  This can easily happen on systems
         that use time() for the timer.  Since the interval lies between
         0 and the timer's resolution, assume half the resolution.  */
-    msecs = ptimer_resolution () / 2.0;
+    secs = ptimer_resolution () / 2.0;
  
-  dlrate = 1000.0 * bytes / msecs;
+  dlrate = bytes / secs;
    if (dlrate < 1024.0)
      *units = 0;
    else if (dlrate < 1024.0 * 1024.0)
@@ -599,7 +601,7 @@ static char *getproxy (struct url *);
  
  uerr_t
  retrieve_url (const char *origurl, char **file, char **newloc,
-             const char *refurl, int *dt)
+             const char *refurl, int *dt, bool recursive)
  {
    uerr_t result;
    char *url;
@@ -681,13 +683,12 @@ retrieve_url (const char *origurl, char **file, char **newloc,
        /* If this is a redirection, temporarily turn off opt.ftp_glob
          and opt.recursive, both being undesirable when following
          redirects.  */
-      bool oldrec = opt.recursive, oldglob = opt.ftp_glob;
+      bool oldrec = recursive, glob = opt.ftp_glob;
        if (redirection_count)
-       opt.recursive = opt.ftp_glob = false;
+       oldrec = glob = false;
  
-      result = ftp_loop (u, dt, proxy_url);
-      opt.recursive = oldrec;
-      opt.ftp_glob = oldglob;
+      result = ftp_loop (u, dt, proxy_url, recursive, glob);
+      recursive = oldrec;
  
        /* There is a possibility of having HTTP being redirected to
          FTP.  In these cases we must decide whether the text is HTML
@@ -842,10 +843,20 @@ retrieve_from_file (const char *file, bool html, int *count)
           break;
         }
        if ((opt.recursive || opt.page_requisites)
-         && cur_url->url->scheme != SCHEME_FTP)
-       status = retrieve_tree (cur_url->url->url);
+         && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
+       {
+         int old_follow_ftp = opt.follow_ftp;
+
+         /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
+         if (cur_url->url->scheme == SCHEME_FTP) 
+           opt.follow_ftp = 1;
+         
+         status = retrieve_tree (cur_url->url->url);
+
+         opt.follow_ftp = old_follow_ftp;
+       }
        else
-       status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
+       status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive);
  
        if (filename && opt.delete_after && file_exists_p (filename))
         {
@@ -912,9 +923,9 @@ sleep_between_retrievals (int count)
        else
         {
           /* Sleep a random amount of time averaging in opt.wait
-            seconds.  The sleeping amount ranges from 0 to
-            opt.wait*2, inclusive.  */
-         double waitsecs = 2 * opt.wait * random_float ();
+            seconds.  The sleeping amount ranges from 0.5*opt.wait to
+            1.5*opt.wait.  */
+         double waitsecs = (0.5 + random_float ()) * opt.wait;
           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
                    opt.wait, waitsecs));
           xsleep (waitsecs);
@@ -975,7 +986,7 @@ getproxy (struct url *u)
  
    if (!opt.use_proxy)
      return NULL;
-  if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
+  if (no_proxy_match (u->host, (const char **)opt.no_proxy))
      return NULL;
  
    switch (u->scheme)
@@ -1010,12 +1021,26 @@ getproxy (struct url *u)
    return proxy;
  }
  
+/* Returns true if URL would be downloaded through a proxy. */
+
+bool
+url_uses_proxy (const char *url)
+{
+  bool ret;
+  struct url *u = url_parse (url, NULL);
+  if (!u)
+    return false;
+  ret = getproxy (u) != NULL;
+  url_free (u);
+  return ret;
+}
+
  /* Should a host be accessed through proxy, concerning no_proxy?  */
  static bool
  no_proxy_match (const char *host, const char **no_proxy)
  {
    if (!no_proxy)
-    return true;
+    return false;
    else
-    return !sufmatch (no_proxy, host);
+    return sufmatch (no_proxy, host);
  }