Check for fclose errors.

[wget] / src / retr.c
diff --git a/src/retr.c b/src/retr.c

index 55d5be788ce2430b28166c7515e39de5a1d57b8e..6204839c62c4b331b4a80d0c731ee3fd18134d5e 100644 (file)
--- a/src/retr.c
+++ b/src/retr.c
@@ -1,6 +1,7 @@
  /* File retrieval.
     Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-   2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+   2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+   Inc.
  
  This file is part of GNU Wget.
  
@@ -32,9 +33,7 @@ as that of the covered work.  */
  
  #include <stdio.h>
  #include <stdlib.h>
-#ifdef HAVE_UNISTD_H
-# include <unistd.h>
-#endif /* HAVE_UNISTD_H */
+#include <unistd.h>
  #include <errno.h>
  #include <string.h>
  #include <assert.h>
@@ -140,13 +139,16 @@ limit_bandwidth (wgint bytes, struct ptimer *timer)
  
  /* Write data in BUF to OUT.  However, if *SKIP is non-zero, skip that
     amount of data and decrease SKIP.  Increment *TOTAL by the amount
-   of data written.  */
+   of data written.  If OUT2 is not NULL, also write BUF to OUT2.
+   In case of error writing to OUT, -1 is returned.  In case of error
+   writing to OUT2, -2 is returned.  In case of any other error,
+   1 is returned.  */
  
  static int
-write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
-            wgint *written)
+write_data (FILE *out, FILE *out2, const char *buf, int bufsize,
+            wgint *skip, wgint *written)
  {
-  if (!out)
+  if (out == NULL && out2 == NULL)
      return 1;
    if (*skip > bufsize)
      {
@@ -162,7 +164,10 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
          return 1;
      }
  
-  fwrite (buf, 1, bufsize, out);
+  if (out != NULL)
+    fwrite (buf, 1, bufsize, out);
+  if (out2 != NULL)
+    fwrite (buf, 1, bufsize, out2);
    *written += bufsize;
  
    /* Immediately flush the downloaded data.  This should not hinder
@@ -179,9 +184,17 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
       actual justification.  (Also, why 16K?  Anyone test other values?)
    */
  #ifndef __VMS
-  fflush (out);
+  if (out != NULL)
+    fflush (out);
+  if (out2 != NULL)
+    fflush (out2);
  #endif /* ndef __VMS */
-  return !ferror (out);
+  if (out != NULL && ferror (out))
+    return -1;
+  else if (out2 != NULL && ferror (out2))
+    return -2;
+  else
+    return 0;
  }
  
  /* Read the contents of file descriptor FD until it the connection
@@ -199,18 +212,26 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
     the amount of data written to disk.  The time it took to download
     the data is stored to ELAPSED.
  
+   If OUT2 is non-NULL, the contents is also written to OUT2.
+   OUT2 will get an exact copy of the response: if this is a chunked
+   response, everything -- including the chunk headers -- is written
+   to OUT2.  (OUT will only get the unchunked response.)
+
     The function exits and returns the amount of data read.  In case of
     error while reading data, -1 is returned.  In case of error while
-   writing data, -2 is returned.  */
+   writing data to OUT, -2 is returned.  In case of error while writing
+   data to OUT2, -3 is returned.  */
  
  int
  fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
-              wgint *qtyread, wgint *qtywritten, double *elapsed, int flags)
+              wgint *qtyread, wgint *qtywritten, double *elapsed, int flags,
+              FILE *out2)
  {
    int ret = 0;
-
-  static char dlbuf[16384];
-  int dlbufsize = sizeof (dlbuf);
+#undef max
+#define max(a,b) ((a) > (b) ? (a) : (b))
+  int dlbufsize = max (BUFSIZ, 8 * 1024);
+  char *dlbuf = xmalloc (dlbufsize);
  
    struct ptimer *timer = NULL;
    double last_successful_read_tm = 0;
@@ -225,11 +246,15 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
    bool progress_interactive = false;
  
    bool exact = !!(flags & rb_read_exactly);
+
+  /* Used only by HTTP/HTTPS chunked transfer encoding.  */
+  bool chunked = flags & rb_chunked_transfer_encoding;
    wgint skip = 0;
  
    /* How much data we've read/written.  */
    wgint sum_read = 0;
    wgint sum_written = 0;
+  wgint remaining_chunk_size = 0;
  
    if (flags & rb_skip_startpos)
      skip = startpos;
@@ -269,8 +294,47 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
       should be read.  */
    while (!exact || (sum_read < toread))
      {
-      int rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
+      int rdsize;
        double tmout = opt.read_timeout;
+
+      if (chunked)
+        {
+          if (remaining_chunk_size == 0)
+            {
+              char *line = fd_read_line (fd);
+              char *endl;
+              if (line == NULL)
+                {
+                  ret = -1;
+                  break;
+                }
+              else if (out2 != NULL)
+                fwrite (line, 1, strlen (line), out2);
+
+              remaining_chunk_size = strtol (line, &endl, 16);
+              xfree (line);
+
+              if (remaining_chunk_size == 0)
+                {
+                  ret = 0;
+                  line = fd_read_line (fd);
+                  if (line == NULL)
+                    ret = -1;
+                  else
+                    {
+                      if (out2 != NULL)
+                        fwrite (line, 1, strlen (line), out2);
+                      xfree (line);
+                    }
+                  break;
+                }
+            }
+
+          rdsize = MIN (remaining_chunk_size, dlbufsize);
+        }
+      else
+        rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
+
        if (progress_interactive)
          {
            /* For interactive progress gauges, always specify a ~1s
@@ -311,11 +375,31 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
        if (ret > 0)
          {
            sum_read += ret;
-          if (!write_data (out, dlbuf, ret, &skip, &sum_written))
+          int write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written);
+          if (write_res != 0)
              {
-              ret = -2;
+              ret = (write_res == -3) ? -3 : -2;
                goto out;
              }
+          if (chunked)
+            {
+              remaining_chunk_size -= ret;
+              if (remaining_chunk_size == 0)
+                {
+                  char *line = fd_read_line (fd);
+                  if (line == NULL)
+                    {
+                      ret = -1;
+                      break;
+                    }
+                  else
+                    {
+                      if (out2 != NULL)
+                        fwrite (line, 1, strlen (line), out2);
+                      xfree (line);
+                    }
+                }
+            }
          }
  
        if (opt.limit_rate)
@@ -346,6 +430,8 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
    if (qtywritten)
      *qtywritten += sum_written;
  
+  free (dlbuf);
+
    return ret;
  }
  \f
@@ -534,6 +620,7 @@ retr_rate (wgint bytes, double secs)
  {
    static char res[20];
    static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
+  static const char *rate_names_bits[] = {"b/s", "Kb/s", "Mb/s", "Gb/s" };
    int units;
  
    double dlrate = calc_rate (bytes, secs, &units);
@@ -541,7 +628,7 @@ retr_rate (wgint bytes, double secs)
       e.g. "1022", "247", "12.5", "2.38".  */
    sprintf (res, "%.*f %s",
             dlrate >= 99.95 ? 0 : dlrate >= 9.995 ? 1 : 2,
-           dlrate, rate_names[units]);
+           dlrate, !opt.report_bps ? rate_names[units]: rate_names_bits[units]);
  
    return res;
  }
@@ -558,6 +645,11 @@ double
  calc_rate (wgint bytes, double secs, int *units)
  {
    double dlrate;
+  double bibyte = 1000.0;
+ 
+  if (!opt.report_bps)
+    bibyte = 1024.0;
+
  
    assert (secs >= 0);
    assert (bytes >= 0);
@@ -569,16 +661,17 @@ calc_rate (wgint bytes, double secs, int *units)
         0 and the timer's resolution, assume half the resolution.  */
      secs = ptimer_resolution () / 2.0;
  
-  dlrate = bytes / secs;
-  if (dlrate < 1024.0)
+  dlrate = convert_to_bits (bytes) / secs;
+  if (dlrate < bibyte)
      *units = 0;
-  else if (dlrate < 1024.0 * 1024.0)
-    *units = 1, dlrate /= 1024.0;
-  else if (dlrate < 1024.0 * 1024.0 * 1024.0)
-    *units = 2, dlrate /= (1024.0 * 1024.0);
+  else if (dlrate < (bibyte * bibyte))
+    *units = 1, dlrate /= bibyte;
+  else if (dlrate < (bibyte * bibyte * bibyte))
+    *units = 2, dlrate /= (bibyte * bibyte);
+
    else
      /* Maybe someone will need this, one day. */
-    *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
+    *units = 3, dlrate /= (bibyte * bibyte * bibyte);
  
    return dlrate;
  }
@@ -689,7 +782,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
  #endif
        || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
      {
-      result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url, iri);
+      result = http_loop (u, orig_parsed, &mynewloc, &local_file, refurl, dt,
+                          proxy_url, iri);
      }
    else if (u->scheme == SCHEME_FTP)
      {
@@ -720,7 +814,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
        proxy_url = NULL;
      }
  
-  location_changed = (result == NEWLOCATION);
+  location_changed = (result == NEWLOCATION || result == NEWLOCATION_KEEP_POST);
    if (location_changed)
      {
        char *construced_newloc;
@@ -794,12 +888,17 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
          }
        u = newloc_parsed;
  
-      /* If we're being redirected from POST, we don't want to POST
+      /* If we're being redirected from POST, and we received a
+         redirect code different than 307, we don't want to POST
           again.  Many requests answer POST with a redirection to an
           index page; that redirection is clearly a GET.  We "suspend"
           POST data for the duration of the redirections, and restore
-         it when we're done. */
-      if (!post_data_suspended)
+         it when we're done.
+        
+        RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect
+        specifically to preserve the method of the request.
+        */
+      if (result != NEWLOCATION_KEEP_POST && !post_data_suspended)
          SUSPEND_POST_DATA;
  
        goto redirected;
@@ -825,23 +924,18 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
            DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url)));
      }
  
-  if (local_file && *dt & RETROKF)
+  if (local_file && u && *dt & RETROKF)
      {
        register_download (u->url, local_file);
-      if (redirection_count && 0 != strcmp (origurl, u->url))
+
+      if (!opt.spider && redirection_count && 0 != strcmp (origurl, u->url))
          register_redirection (origurl, u->url);
+
        if (*dt & TEXTHTML)
-        register_html (u->url, local_file);
-      if (*dt & RETROKF)
-        {
-          register_download (u->url, local_file);
-          if (redirection_count && 0 != strcmp (origurl, u->url))
-            register_redirection (origurl, u->url);
-          if (*dt & TEXTHTML)
-            register_html (u->url, local_file);
-          if (*dt & TEXTCSS)
-            register_css (u->url, local_file);
-        }
+        register_html (local_file);
+
+      if (*dt & TEXTCSS)
+        register_css (local_file);
      }
  
    if (file)
@@ -899,12 +993,11 @@ retrieve_from_file (const char *file, bool html, int *count)
    set_uri_encoding (iri, opt.locale, true);
    set_content_encoding (iri, opt.locale);
  
-  if (url_has_scheme (url))
+  if (url_valid_scheme (url))
      {
        int dt,url_err;
        uerr_t status;
-      struct url * url_parsed = url_parse(url, &url_err, iri, true);
-
+      struct url *url_parsed = url_parse (url, &url_err, iri, true);
        if (!url_parsed)
          {
            char *error = url_error (url, url_err);
@@ -962,9 +1055,7 @@ retrieve_from_file (const char *file, bool html, int *count)
            break;
          }
  
-      /* Need to reparse the url, since it didn't have iri information. */
-      if (opt.enable_iri)
-          parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true);
+      parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true);
  
        if ((opt.recursive || opt.page_requisites)
            && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
@@ -1189,3 +1280,20 @@ set_local_file (const char **file, const char *default_file)
    else
      *file = default_file;
  }
+
+/* Return true for an input file's own URL, false otherwise.  */
+bool
+input_file_url (const char *input_file)
+{
+  static bool first = true;
+
+  if (input_file
+      && url_has_scheme (input_file)
+      && first)
+    {
+      first = false;
+      return true;
+    }
+  else
+    return false;
+}