Ted Mielczarek's CSS wonder-patch, applied against the source from around the time...

[wget] / src / retr.c
diff --git a/src/retr.c b/src/retr.c

index f9f1f33a901dcdd5bc2173e6758d5c83186a50b7..245eb129f40d5f49644d0bb69de1e7489f54cf1d 100644 (file)
--- a/src/retr.c
+++ b/src/retr.c
@@ -1,5 +1,5 @@
  /* File retrieval.
-   Copyright (C) 1996-2005 Free Software Foundation, Inc.
+   Copyright (C) 1996-2006 Free Software Foundation, Inc.
  
  This file is part of GNU Wget.
  
@@ -51,11 +51,12 @@ so, delete this exception statement from your version.  */
  #include "hash.h"
  #include "convert.h"
  #include "ptimer.h"
+#include "html-url.h"
  
  /* Total size of downloaded files.  Used to enforce quota.  */
  SUM_SIZE_INT total_downloaded_bytes;
  
-/* Total download time in milliseconds. */
+/* Total download time in seconds. */
  double total_download_time;
  
  /* If non-NULL, the stream to which output should be written.  This
@@ -75,9 +76,7 @@ static struct {
  static void
  limit_bandwidth_reset (void)
  {
-  limit_data.chunk_bytes = 0;
-  limit_data.chunk_start = 0;
-  limit_data.sleep_adjust = 0;
+  xzero (limit_data);
  }
  
  /* Limit the bandwidth by pausing the download for an amount of time.
@@ -95,25 +94,25 @@ limit_bandwidth (wgint bytes, struct ptimer *timer)
    /* Calculate the amount of time we expect downloading the chunk
       should take.  If in reality it took less time, sleep to
       compensate for the difference.  */
-  expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
+  expected = (double) limit_data.chunk_bytes / opt.limit_rate;
  
    if (expected > delta_t)
      {
        double slp = expected - delta_t + limit_data.sleep_adjust;
        double t0, t1;
-      if (slp < 200)
+      if (slp < 0.2)
         {
           DEBUGP (("deferring a %.2f ms sleep (%s/%.2f).\n",
-                  slp, number_to_static_string (limit_data.chunk_bytes),
+                  slp * 1000, number_to_static_string (limit_data.chunk_bytes),
                    delta_t));
           return;
         }
        DEBUGP (("\nsleeping %.2f ms for %s bytes, adjust %.2f ms\n",
-              slp, number_to_static_string (limit_data.chunk_bytes),
+              slp * 1000, number_to_static_string (limit_data.chunk_bytes),
                limit_data.sleep_adjust));
  
        t0 = ptimer_read (timer);
-      xsleep (slp / 1000);
+      xsleep (slp);
        t1 = ptimer_measure (timer);
  
        /* Due to scheduling, we probably slept slightly longer (or
@@ -123,10 +122,10 @@ limit_bandwidth (wgint bytes, struct ptimer *timer)
        limit_data.sleep_adjust = slp - (t1 - t0);
        /* If sleep_adjust is very large, it's likely due to suspension
          and not clock inaccuracy.  Don't enforce those.  */
-      if (limit_data.sleep_adjust > 500)
-       limit_data.sleep_adjust = 500;
-      else if (limit_data.sleep_adjust < -500)
-       limit_data.sleep_adjust = -500;
+      if (limit_data.sleep_adjust > 0.5)
+       limit_data.sleep_adjust = 0.5;
+      else if (limit_data.sleep_adjust < -0.5)
+       limit_data.sleep_adjust = -0.5;
      }
  
    limit_data.chunk_bytes = 0;
@@ -185,7 +184,7 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
     is incremented by the amount of data read from the network.  If
     QTYWRITTEN is non-NULL, the value it points to is incremented by
     the amount of data written to disk.  The time it took to download
-   the data (in milliseconds) is stored to ELAPSED.
+   the data is stored to ELAPSED.
  
     The function exits and returns the amount of data read.  In case of
     error while reading data, -1 is returned.  In case of error while
@@ -267,7 +266,7 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
           if (opt.read_timeout)
             {
               double waittm;
-             waittm = (ptimer_read (timer) - last_successful_read_tm) / 1000;
+             waittm = ptimer_read (timer) - last_successful_read_tm;
               if (waittm + tmout > opt.read_timeout)
                 {
                   /* Don't let total idle time exceed read timeout. */
@@ -517,13 +516,13 @@ fd_read_line (int fd)
     the units appropriate for the download speed.  */
  
  const char *
-retr_rate (wgint bytes, double msecs)
+retr_rate (wgint bytes, double secs)
  {
    static char res[20];
    static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
    int units;
  
-  double dlrate = calc_rate (bytes, msecs, &units);
+  double dlrate = calc_rate (bytes, secs, &units);
    /* Use more digits for smaller numbers (regardless of unit used),
       e.g. "1022", "247", "12.5", "2.38".  */
    sprintf (res, "%.*f %s",
@@ -540,22 +539,23 @@ retr_rate (wgint bytes, double msecs)
  
     UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
     GB/s.  */
+
  double
-calc_rate (wgint bytes, double msecs, int *units)
+calc_rate (wgint bytes, double secs, int *units)
  {
    double dlrate;
  
-  assert (msecs >= 0);
+  assert (secs >= 0);
    assert (bytes >= 0);
  
-  if (msecs == 0)
+  if (secs == 0)
      /* If elapsed time is exactly zero, it means we're under the
         resolution of the timer.  This can easily happen on systems
         that use time() for the timer.  Since the interval lies between
         0 and the timer's resolution, assume half the resolution.  */
-    msecs = ptimer_resolution () / 2.0;
+    secs = ptimer_resolution () / 2.0;
  
-  dlrate = 1000.0 * bytes / msecs;
+  dlrate = bytes / secs;
    if (dlrate < 1024.0)
      *units = 0;
    else if (dlrate < 1024.0 * 1024.0)
@@ -603,7 +603,7 @@ static char *getproxy (struct url *);
  
  uerr_t
  retrieve_url (const char *origurl, char **file, char **newloc,
-             const char *refurl, int *dt)
+             const char *refurl, int *dt, bool recursive)
  {
    uerr_t result;
    char *url;
@@ -685,13 +685,12 @@ retrieve_url (const char *origurl, char **file, char **newloc,
        /* If this is a redirection, temporarily turn off opt.ftp_glob
          and opt.recursive, both being undesirable when following
          redirects.  */
-      bool oldrec = opt.recursive, oldglob = opt.ftp_glob;
+      bool oldrec = recursive, glob = opt.ftp_glob;
        if (redirection_count)
-       opt.recursive = opt.ftp_glob = false;
+       oldrec = glob = false;
  
-      result = ftp_loop (u, dt, proxy_url);
-      opt.recursive = oldrec;
-      opt.ftp_glob = oldglob;
+      result = ftp_loop (u, dt, proxy_url, recursive, glob);
+      recursive = oldrec;
  
        /* There is a possibility of having HTTP being redirected to
          FTP.  In these cases we must decide whether the text is HTML
@@ -786,6 +785,8 @@ retrieve_url (const char *origurl, char **file, char **newloc,
             register_redirection (origurl, u->url);
           if (*dt & TEXTHTML)
             register_html (u->url, local_file);
+         if (*dt & TEXTCSS)
+           register_css (u->url, local_file);
         }
      }
  
@@ -846,10 +847,20 @@ retrieve_from_file (const char *file, bool html, int *count)
           break;
         }
        if ((opt.recursive || opt.page_requisites)
-         && cur_url->url->scheme != SCHEME_FTP)
-       status = retrieve_tree (cur_url->url->url);
+         && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
+       {
+         int old_follow_ftp = opt.follow_ftp;
+
+         /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
+         if (cur_url->url->scheme == SCHEME_FTP) 
+           opt.follow_ftp = 1;
+         
+         status = retrieve_tree (cur_url->url->url);
+
+         opt.follow_ftp = old_follow_ftp;
+       }
        else
-       status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
+       status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive);
  
        if (filename && opt.delete_after && file_exists_p (filename))
         {
@@ -916,9 +927,9 @@ sleep_between_retrievals (int count)
        else
         {
           /* Sleep a random amount of time averaging in opt.wait
-            seconds.  The sleeping amount ranges from 0 to
-            opt.wait*2, inclusive.  */
-         double waitsecs = 2 * opt.wait * random_float ();
+            seconds.  The sleeping amount ranges from 0.5*opt.wait to
+            1.5*opt.wait.  */
+         double waitsecs = (0.5 + random_float ()) * opt.wait;
           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
                    opt.wait, waitsecs));
           xsleep (waitsecs);
@@ -979,7 +990,7 @@ getproxy (struct url *u)
  
    if (!opt.use_proxy)
      return NULL;
-  if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
+  if (no_proxy_match (u->host, (const char **)opt.no_proxy))
      return NULL;
  
    switch (u->scheme)
@@ -1014,12 +1025,26 @@ getproxy (struct url *u)
    return proxy;
  }
  
+/* Returns true if URL would be downloaded through a proxy. */
+
+bool
+url_uses_proxy (const char *url)
+{
+  bool ret;
+  struct url *u = url_parse (url, NULL);
+  if (!u)
+    return false;
+  ret = getproxy (u) != NULL;
+  url_free (u);
+  return ret;
+}
+
  /* Should a host be accessed through proxy, concerning no_proxy?  */
  static bool
  no_proxy_match (const char *host, const char **no_proxy)
  {
    if (!no_proxy)
-    return true;
+    return false;
    else
-    return !sufmatch (no_proxy, host);
+    return sufmatch (no_proxy, host);
  }