X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fretr.c;h=39627e4bbf5067e8ea563ac110c39c44e1cf087a;hp=2a9dec7bbb658aa6bdb12365813c0121142e7cdb;hb=d5e283b1a75c5f8249300b465b4e7b55130bec49;hpb=50e12521d63b8b42370a07d9c9971f56ca6dc58a

diff --git a/src/retr.c b/src/retr.c
index 2a9dec7b..39627e4b 100644
--- a/src/retr.c
+++ b/src/retr.c
@@ -52,6 +52,7 @@ as that of the covered work.  */
 #include "convert.h"
 #include "ptimer.h"
 #include "html-url.h"
+#include "iri.h"
 
 /* Total size of downloaded files.  Used to enforce quota.  */
 SUM_SIZE_INT total_downloaded_bytes;
@@ -142,8 +143,10 @@ limit_bandwidth (wgint bytes, struct ptimer *timer)
 
 static int
 write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
-            wgint *written)
+            wgint *written, int flags)
 {
+  static int cr_pending = 0;    /* Found CR in ASCII FTP data. */
+
   if (!out)
     return 1;
   if (*skip > bufsize)
@@ -160,14 +163,89 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
         return 1;
     }
 
-  fwrite (buf, 1, bufsize, out);
-  *written += bufsize;
+/* Note: This code assumes that "\n" is the universal line ending
+   character, as on UNIX and VMS.  If this is not true, then here's
+   where to change it.
+*/
+
+#if 1
+# define EOL_STRING "\n"
+#else /* 1 */
+# define EOL_STRING "\r\n"
+#endif /* 1 [else] */
+#define EOL_STRING_LEN (sizeof( EOL_STRING)- 1)
+
+  if (flags & rb_ftp_ascii)
+    {
+      const char *bufend;
+
+      /* ASCII transfer.  Put out lines delimited by CRLF. */
+      bufend = buf+ bufsize;
+      while (buf < bufend)
+        {
+          /* If CR, put out any pending CR, then set CR-pending flag. */
+          if (*buf == '\r')
+            {
+              if (cr_pending)
+                {
+                  fwrite ("\r", 1, 1, out);
+                  *written += 1;
+                }
+              cr_pending = 1;
+              buf++;
+              continue;
+            }
+
+          if (cr_pending)
+            {
+              if (*buf == '\n')
+                {
+                  /* Found FTP EOL (CRLF).  Put out local EOL. */
+                  fwrite (EOL_STRING, 1, EOL_STRING_LEN, out);
+                  *written += EOL_STRING_LEN;
+                }
+              else
+                {
+                  /* Normal character.  Put out pending CR and it. */
+                  fwrite ("\r", 1, 1, out);
+                  fwrite (buf, 1, 1, out);
+                  *written += 2;
+                }
+              buf++;
+              cr_pending = 0;
+            }
+          else
+            {
+              /* Normal character.  Put it out. */
+              fwrite (buf, 1, 1, out);
+              *written += 1;
+              buf++;
+            }
+        }
+    }
+  else
+    {
+      /* Image transfer.  Put out buffer. */
+      fwrite (buf, 1, bufsize, out);
+      *written += bufsize;
+    }
 
   /* Immediately flush the downloaded data.  This should not hinder
      performance: fast downloads will arrive in large 16K chunks
      (which stdio would write out immediately anyway), and slow
      downloads wouldn't be limited by disk speed.  */
+
+  /* 2005-04-20 SMS.
+     Perhaps it shouldn't hinder performance, but it sure does, at least
+     on VMS (more than 2X).  Rather than speculate on what it should or
+     shouldn't do, it might make more sense to test it.  Even better, it
+     might be nice to explain what possible benefit it could offer, as
+     it appears to be a clear invitation to poor performance with no
+     actual justification.  (Also, why 16K?  Anyone test other values?)
+  */
+#ifndef __VMS
   fflush (out);
+#endif /* ndef __VMS */
   return !ferror (out);
 }
 
@@ -298,7 +376,7 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
       if (ret > 0)
         {
           sum_read += ret;
-          if (!write_data (out, dlbuf, ret, &skip, &sum_written))
+          if (!write_data (out, dlbuf, ret, &skip, &sum_written, flags))
             {
               ret = -2;
               goto out;
@@ -604,6 +682,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
   uerr_t result;
   char *url;
   bool location_changed;
+  bool iri_fallbacked = 0;
   int dummy;
   char *mynewloc, *proxy;
   struct url *u = orig_parsed, *proxy_url;
@@ -627,15 +706,11 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
   if (file)
     *file = NULL;
 
- second_try:
-  DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote_n (0, url),
-           iri->uri_encoding ? quote_n (1, iri->uri_encoding) : "None",
-           iri->utf8_encode));
-
   if (!refurl)
     refurl = opt.referer;
 
  redirected:
+  /* (also for IRI fallbacking) */
 
   result = NOCONERROR;
   mynewloc = NULL;
@@ -804,7 +879,9 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
       if (u)
         {
           DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
-          goto second_try;
+          url = xstrdup (u->url);
+          iri_fallbacked = 1;
+          goto redirected;
         }
       else
           DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url)));
@@ -839,7 +916,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
       url_free (u);
     }
 
-  if (redirection_count)
+  if (redirection_count || iri_fallbacked)
     {
       if (newloc)
         *newloc = url;
@@ -885,7 +962,7 @@ retrieve_from_file (const char *file, bool html, int *count)
     {
       int dt,url_err;
       uerr_t status;
-      struct url * url_parsed = url_parse(url, &url_err, NULL, true);
+      struct url * url_parsed = url_parse(url, &url_err, iri, true);
 
       if (!url_parsed)
         {
@@ -906,9 +983,15 @@ retrieve_from_file (const char *file, bool html, int *count)
       if (dt & TEXTHTML)
         html = true;
 
-      /* If we have a found a content encoding, use it */
-      if (iri->content_encoding)
+      /* If we have a found a content encoding, use it.
+       * ( == is okay, because we're checking for identical object) */
+      if (iri->content_encoding != opt.locale)
 	  set_uri_encoding (iri, iri->content_encoding, false);
+
+      /* Reset UTF-8 encode status */
+      iri->utf8_encode = opt.enable_iri;
+      xfree_null (iri->orig_url);
+      iri->orig_url = NULL;
     }
   else
     input_file = (char *) file;
@@ -920,6 +1003,8 @@ retrieve_from_file (const char *file, bool html, int *count)
     {
       char *filename = NULL, *new_file = NULL;
       int dt;
+      struct iri *tmpiri = iri_dup (iri);
+      struct url *parsed_url = NULL;
 
       if (cur_url->ignore_when_downloading)
         continue;
@@ -930,10 +1015,9 @@ retrieve_from_file (const char *file, bool html, int *count)
           break;
         }
 
-      /* Reset UTF-8 encode status */
-      iri->utf8_encode = opt.enable_iri;
-      xfree_null (iri->orig_url);
-      iri->orig_url = NULL;
+      /* Need to reparse the url, since it didn't have iri information. */
+      if (opt.enable_iri)
+          parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true);
 
       if ((opt.recursive || opt.page_requisites)
           && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
@@ -944,13 +1028,18 @@ retrieve_from_file (const char *file, bool html, int *count)
           if (cur_url->url->scheme == SCHEME_FTP)
             opt.follow_ftp = 1;
 
-          status = retrieve_tree (cur_url->url, iri);
+          status = retrieve_tree (parsed_url ? parsed_url : cur_url->url,
+                                  tmpiri);
 
           opt.follow_ftp = old_follow_ftp;
         }
       else
-        status = retrieve_url (cur_url->url, cur_url->url->url, &filename,
-                               &new_file, NULL, &dt, opt.recursive, iri);
+        status = retrieve_url (parsed_url ? parsed_url : cur_url->url,
+                               cur_url->url->url, &filename,
+                               &new_file, NULL, &dt, opt.recursive, tmpiri);
+
+      if (parsed_url)
+          url_free (parsed_url);
 
       if (filename && opt.delete_after && file_exists_p (filename))
         {
@@ -964,6 +1053,7 @@ Removing file due to --delete-after in retrieve_from_file():\n"));
 
       xfree_null (new_file);
       xfree_null (filename);
+      iri_free (tmpiri);
     }
 
   /* Free the linked list of URL-s.  */