[svn] Don't rewrite "https://host" to "ftp://https//host" when SSL is not used.

[wget] / src / url.c
diff --git a/src/url.c b/src/url.c

index ad8fc202e259a3d429ec0372e4cafed1da3982ef..d5bdcdc36d0352dc6541556434b2adfb8f33971a 100644 (file)
--- a/src/url.c
+++ b/src/url.c
@@ -255,29 +255,27 @@ url_escape_allow_passthrough (const char *s)
    return url_escape_1 (s, urlchr_unsafe, 1);
  }
  \f
-enum copy_method { cm_encode, cm_passthrough };
+/* Decide whether the char at position P needs to be encoded.  (It is
+   not enough to pass a single char *P because the function may need
+   to inspect the surrounding context.)
  
-/* Decide whether to encode or pass through the char at P.  This used
-   to be a macro, but it got a little too convoluted.  */
+   Return 1 if the char should be escaped as %XX, 0 otherwise.  */
  
-static inline enum copy_method
-decide_copy_method (const char *p)
+static inline int
+char_needs_escaping (const char *p)
  {
    if (*p == '%')
      {
        if (ISXDIGIT (*(p + 1)) && ISXDIGIT (*(p + 2)))
-       /* Prior to 1.10 this decoded %HH escapes corresponding to
-          "safe" chars, but that proved too obtrusive -- it's better
-          to always preserve the escapes found in the URL.  */
-       return cm_passthrough;
+       return 0;
        else
         /* Garbled %.. sequence: encode `%'. */
-       return cm_encode;
+       return 1;
      }
    else if (URL_UNSAFE_CHAR (*p) && !URL_RESERVED_CHAR (*p))
-    return cm_encode;
+    return 1;
    else
-    return cm_passthrough;
+    return 0;
  }
  
  /* Translate a %-escaped (but possibly non-conformant) input string S
@@ -299,15 +297,14 @@ decide_copy_method (const char *p)
  
         GET /abc%20def HTTP/1.0
  
-   It appears that the unsafe chars need to be quoted, for example
-   with url_escape.  But what if we're requested to download
+   It would appear that the unsafe chars need to be quoted, for
+   example with url_escape.  But what if we're requested to download
     `abc%20def'?  url_escape transforms "%" to "%25", which would leave
     us with `abc%2520def'.  This is incorrect -- since %-escapes are
     part of URL syntax, "%20" is the correct way to denote a literal
-   space on the Wget command line.  This leaves us in the conclusion
-   that in that case Wget should not call url_escape, but leave the
-   `%20' as is.  This is clearly contradictory, but it only gets
-   worse.
+   space on the Wget command line.  This leads to the conclusion that
+   in that case Wget should not call url_escape, but leave the `%20'
+   as is.  This is clearly contradictory, but it only gets worse.
  
     What if the requested URI is `abc%20 def'?  If we call url_escape,
     we end up with `/abc%2520%20def', which is almost certainly not
@@ -368,19 +365,11 @@ reencode_escapes (const char *s)
  
    int encode_count = 0;
  
-  /* First, pass through the string to see if there's anything to do,
+  /* First pass: inspect the string to see if there's anything to do,
       and to calculate the new length.  */
    for (p1 = s; *p1; p1++)
-    {
-      switch (decide_copy_method (p1))
-       {
-       case cm_encode:
-         ++encode_count;
-         break;
-       case cm_passthrough:
-         break;
-       }
-    }
+    if (char_needs_escaping (p1))
+      ++encode_count;
  
    if (!encode_count)
      /* The string is good as it is. */
@@ -391,25 +380,22 @@ reencode_escapes (const char *s)
    newlen = oldlen + 2 * encode_count;
    newstr = xmalloc (newlen + 1);
  
+  /* Second pass: copy the string to the destination address, encoding
+     chars when needed.  */
    p1 = s;
    p2 = newstr;
  
    while (*p1)
-    {
-      switch (decide_copy_method (p1))
-       {
-       case cm_encode:
-         {
-           unsigned char c = *p1++;
-           *p2++ = '%';
-           *p2++ = XNUM_TO_DIGIT (c >> 4);
-           *p2++ = XNUM_TO_DIGIT (c & 0xf);
-         }
-         break;
-       case cm_passthrough:
-         *p2++ = *p1++;
-       }
-    }
+    if (char_needs_escaping (p1))
+      {
+       unsigned char c = *p1++;
+       *p2++ = '%';
+       *p2++ = XNUM_TO_DIGIT (c >> 4);
+       *p2++ = XNUM_TO_DIGIT (c & 0xf);
+      }
+    else
+      *p2++ = *p1++;
+
    *p2 = '\0';
    assert (p2 - newstr == newlen);
    return newstr;
@@ -548,6 +534,12 @@ rewrite_shorthand_url (const char *url)
    if (p == url)
      return NULL;
  
+  /* If we're looking at "://", it means the URL uses a scheme we
+     don't support, which may include "https" when compiled without
+     SSL support.  Don't bogusly rewrite such URLs.  */
+  if (p[0] == ':' && p[1] == '/' && p[2] == '/')
+    return NULL;
+
    if (*p == ':')
      {
        const char *pp;
@@ -597,21 +589,21 @@ static void split_path PARAMS ((const char *, char **, char **));
  #define strpbrk_or_eos(s, accept) ({           \
    char *SOE_p = strpbrk (s, accept);           \
    if (!SOE_p)                                  \
-    SOE_p = (char *)s + strlen (s);            \
+    SOE_p = strchr (s, '\0');                  \
    SOE_p;                                       \
  })
  
  #else  /* not __GNUC__ */
  
-static char *
+static inline char *
  strpbrk_or_eos (const char *s, const char *accept)
  {
    char *p = strpbrk (s, accept);
    if (!p)
-    p = (char *)s + strlen (s);
+    p = strchr (s, '\0');
    return p;
  }
-#endif
+#endif /* not __GNUC__ */
  
  /* Turn STR into lowercase; return non-zero if a character was
     actually changed. */
@@ -888,7 +880,6 @@ url_parse (const char *url, int *error)
        else
         u->url = url_encoded;
      }
-  url_encoded = NULL;
  
    return u;
  
@@ -1081,7 +1072,7 @@ sync_path (struct url *u)
        *p++ = '/';
        memcpy (p, efile, filelen);
        p += filelen;
-      *p++ = '\0';
+      *p = '\0';
      }
  
    u->path = newpath;