Test for Content-Disposition in HTTP auth.

[wget] / src / retr.c
diff --git a/src/retr.c b/src/retr.c

index 0fd936d0d9f540061cf6274c8d869d19259ad823..edc482906312085e8b01e10d42f010627080b6fc 100644 (file)
--- a/src/retr.c
+++ b/src/retr.c
@@ -1,6 +1,6 @@
  /* File retrieval.
-   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+   2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
  
  This file is part of GNU Wget.
  
@@ -39,6 +39,7 @@ as that of the covered work.  */
  #include <string.h>
  #include <assert.h>
  
+#include "exits.h"
  #include "utils.h"
  #include "retr.h"
  #include "progress.h"
@@ -52,6 +53,7 @@ as that of the covered work.  */
  #include "convert.h"
  #include "ptimer.h"
  #include "html-url.h"
+#include "iri.h"
  
  /* Total size of downloaded files.  Used to enforce quota.  */
  SUM_SIZE_INT total_downloaded_bytes;
@@ -167,7 +169,18 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
       performance: fast downloads will arrive in large 16K chunks
       (which stdio would write out immediately anyway), and slow
       downloads wouldn't be limited by disk speed.  */
+
+  /* 2005-04-20 SMS.
+     Perhaps it shouldn't hinder performance, but it sure does, at least
+     on VMS (more than 2X).  Rather than speculate on what it should or
+     shouldn't do, it might make more sense to test it.  Even better, it
+     might be nice to explain what possible benefit it could offer, as
+     it appears to be a clear invitation to poor performance with no
+     actual justification.  (Also, why 16K?  Anyone test other values?)
+  */
+#ifndef __VMS
    fflush (out);
+#endif /* ndef __VMS */
    return !ferror (out);
  }
  
@@ -599,11 +612,12 @@ static char *getproxy (struct url *);
  uerr_t
  retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
                char **newloc, const char *refurl, int *dt, bool recursive,
-              struct iri *iri)
+              struct iri *iri, bool register_status)
  {
    uerr_t result;
    char *url;
    bool location_changed;
+  bool iri_fallbacked = 0;
    int dummy;
    char *mynewloc, *proxy;
    struct url *u = orig_parsed, *proxy_url;
@@ -627,15 +641,11 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
    if (file)
      *file = NULL;
  
- second_try:
-  DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote_n (0, url),
-           iri->uri_encoding ? quote_n (1, iri->uri_encoding) : "None",
-           iri->utf8_encode));
-
    if (!refurl)
      refurl = opt.referer;
  
   redirected:
+  /* (also for IRI fallbacking) */
  
    result = NOCONERROR;
    mynewloc = NULL;
@@ -659,7 +669,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
            xfree (url);
            xfree (error);
            RESTORE_POST_DATA;
-          return PROXERR;
+          result = PROXERR;
+          goto bail;
          }
        if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
          {
@@ -667,7 +678,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
            url_free (proxy_url);
            xfree (url);
            RESTORE_POST_DATA;
-          return PROXERR;
+          result = PROXERR;
+          goto bail;
          }
      }
  
@@ -748,7 +760,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
            xfree (mynewloc);
            xfree (error);
            RESTORE_POST_DATA;
-          return result;
+          goto bail;
          }
  
        /* Now mynewloc will become newloc_parsed->url, because if the
@@ -770,7 +782,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
            xfree (url);
            xfree (mynewloc);
            RESTORE_POST_DATA;
-          return WRONGCODE;
+          result = WRONGCODE;
+          goto bail;
          }
  
        xfree (url);
@@ -796,8 +809,20 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
    if (!(*dt & RETROKF) && iri->utf8_encode)
      {
        iri->utf8_encode = false;
-      DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
-      goto second_try;
+      if (orig_parsed != u)
+        {
+          url_free (u);
+        }
+      u = url_parse (origurl, NULL, iri, true);
+      if (u)
+        {
+          DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
+          url = xstrdup (u->url);
+          iri_fallbacked = 1;
+          goto redirected;
+        }
+      else
+          DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url)));
      }
  
    if (local_file && *dt & RETROKF)
@@ -829,7 +854,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
        url_free (u);
      }
  
-  if (redirection_count)
+  if (redirection_count || iri_fallbacked)
      {
        if (newloc)
          *newloc = url;
@@ -845,6 +870,9 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
  
    RESTORE_POST_DATA;
  
+bail:
+  if (register_status)
+    inform_exit_status (result);
    return result;
  }
  
@@ -875,7 +903,7 @@ retrieve_from_file (const char *file, bool html, int *count)
      {
        int dt,url_err;
        uerr_t status;
-      struct url * url_parsed = url_parse(url, &url_err, NULL, true);
+      struct url * url_parsed = url_parse(url, &url_err, iri, true);
  
        if (!url_parsed)
          {
@@ -889,16 +917,22 @@ retrieve_from_file (const char *file, bool html, int *count)
          opt.base_href = xstrdup (url);
  
        status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt,
-                             false, iri);
+                             false, iri, true);
        if (status != RETROK)
          return status;
  
        if (dt & TEXTHTML)
          html = true;
  
-      /* If we have a found a content encoding, use it */
-      if (iri->content_encoding)
+      /* If we have a found a content encoding, use it.
+       * ( == is okay, because we're checking for identical object) */
+      if (iri->content_encoding != opt.locale)
           set_uri_encoding (iri, iri->content_encoding, false);
+
+      /* Reset UTF-8 encode status */
+      iri->utf8_encode = opt.enable_iri;
+      xfree_null (iri->orig_url);
+      iri->orig_url = NULL;
      }
    else
      input_file = (char *) file;
@@ -910,6 +944,8 @@ retrieve_from_file (const char *file, bool html, int *count)
      {
        char *filename = NULL, *new_file = NULL;
        int dt;
+      struct iri *tmpiri = iri_dup (iri);
+      struct url *parsed_url = NULL;
  
        if (cur_url->ignore_when_downloading)
          continue;
@@ -920,10 +956,9 @@ retrieve_from_file (const char *file, bool html, int *count)
            break;
          }
  
-      /* Reset UTF-8 encode status */
-      iri->utf8_encode = opt.enable_iri;
-      xfree_null (iri->orig_url);
-      iri->orig_url = NULL;
+      /* Need to reparse the url, since it didn't have iri information. */
+      if (opt.enable_iri)
+          parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true);
  
        if ((opt.recursive || opt.page_requisites)
            && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
@@ -934,13 +969,19 @@ retrieve_from_file (const char *file, bool html, int *count)
            if (cur_url->url->scheme == SCHEME_FTP)
              opt.follow_ftp = 1;
  
-          status = retrieve_tree (cur_url->url, iri);
+          status = retrieve_tree (parsed_url ? parsed_url : cur_url->url,
+                                  tmpiri);
  
            opt.follow_ftp = old_follow_ftp;
          }
        else
-        status = retrieve_url (cur_url->url, cur_url->url->url, &filename,
-                               &new_file, NULL, &dt, opt.recursive, iri);
+        status = retrieve_url (parsed_url ? parsed_url : cur_url->url,
+                               cur_url->url->url, &filename,
+                               &new_file, NULL, &dt, opt.recursive, tmpiri,
+                               true);
+
+      if (parsed_url)
+          url_free (parsed_url);
  
        if (filename && opt.delete_after && file_exists_p (filename))
          {
@@ -954,6 +995,7 @@ Removing file due to --delete-after in retrieve_from_file():\n"));
  
        xfree_null (new_file);
        xfree_null (filename);
+      iri_free (tmpiri);
      }
  
    /* Free the linked list of URL-s.  */