X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Furl.c;h=8cbb8923e2fed5898ca42967eefe0f479dcf671e;hb=857c3e2ca4ce9f87bc6b9bf71b80f63f6db08fef;hp=ad8fc202e259a3d429ec0372e4cafed1da3982ef;hpb=5aba2a585033945fe8c7d58c27eb7eb47a391eb9;p=wget

diff --git a/src/url.c b/src/url.c
index ad8fc202..8cbb8923 100644
--- a/src/url.c
+++ b/src/url.c
@@ -255,29 +255,27 @@ url_escape_allow_passthrough (const char *s)
   return url_escape_1 (s, urlchr_unsafe, 1);
 }
 
-enum copy_method { cm_encode, cm_passthrough };
+/* Decide whether the char at position P needs to be encoded.  (It is
+   not enough to pass a single char *P because the function may need
+   to inspect the surrounding context.)
 
-/* Decide whether to encode or pass through the char at P.  This used
-   to be a macro, but it got a little too convoluted.  */
+   Return 1 if the char should be escaped as %XX, 0 otherwise.  */
 
-static inline enum copy_method
-decide_copy_method (const char *p)
+static inline int
+char_needs_escaping (const char *p)
 {
   if (*p == '%')
     {
       if (ISXDIGIT (*(p + 1)) && ISXDIGIT (*(p + 2)))
-	/* Prior to 1.10 this decoded %HH escapes corresponding to
-	   "safe" chars, but that proved too obtrusive -- it's better
-	   to always preserve the escapes found in the URL.  */
-	return cm_passthrough;
+	return 0;
       else
 	/* Garbled %.. sequence: encode `%'. */
-	return cm_encode;
+	return 1;
     }
   else if (URL_UNSAFE_CHAR (*p) && !URL_RESERVED_CHAR (*p))
-    return cm_encode;
+    return 1;
   else
-    return cm_passthrough;
+    return 0;
 }
 
 /* Translate a %-escaped (but possibly non-conformant) input string S
@@ -299,15 +297,14 @@ decide_copy_method (const char *p)
 
        GET /abc%20def HTTP/1.0
 
-   It appears that the unsafe chars need to be quoted, for example
-   with url_escape.  But what if we're requested to download
+   It would appear that the unsafe chars need to be quoted, for
+   example with url_escape.  But what if we're requested to download
    `abc%20def'?  url_escape transforms "%" to "%25", which would leave
    us with `abc%2520def'.  This is incorrect -- since %-escapes are
    part of URL syntax, "%20" is the correct way to denote a literal
-   space on the Wget command line.  This leaves us in the conclusion
-   that in that case Wget should not call url_escape, but leave the
-   `%20' as is.  This is clearly contradictory, but it only gets
-   worse.
+   space on the Wget command line.  This leads to the conclusion that
+   in that case Wget should not call url_escape, but leave the `%20'
+   as is.  This is clearly contradictory, but it only gets worse.
 
    What if the requested URI is `abc%20 def'?  If we call url_escape,
    we end up with `/abc%2520%20def', which is almost certainly not
@@ -368,19 +365,11 @@ reencode_escapes (const char *s)
 
   int encode_count = 0;
 
-  /* First, pass through the string to see if there's anything to do,
+  /* First pass: inspect the string to see if there's anything to do,
      and to calculate the new length.  */
   for (p1 = s; *p1; p1++)
-    {
-      switch (decide_copy_method (p1))
-	{
-	case cm_encode:
-	  ++encode_count;
-	  break;
-	case cm_passthrough:
-	  break;
-	}
-    }
+    if (char_needs_escaping (p1))
+      ++encode_count;
 
   if (!encode_count)
     /* The string is good as it is. */
@@ -391,25 +380,22 @@ reencode_escapes (const char *s)
   newlen = oldlen + 2 * encode_count;
   newstr = xmalloc (newlen + 1);
 
+  /* Second pass: copy the string to the destination address, encoding
+     chars when needed.  */
   p1 = s;
   p2 = newstr;
 
   while (*p1)
-    {
-      switch (decide_copy_method (p1))
-	{
-	case cm_encode:
-	  {
-	    unsigned char c = *p1++;
-	    *p2++ = '%';
-	    *p2++ = XNUM_TO_DIGIT (c >> 4);
-	    *p2++ = XNUM_TO_DIGIT (c & 0xf);
-	  }
-	  break;
-	case cm_passthrough:
-	  *p2++ = *p1++;
-	}
-    }
+    if (char_needs_escaping (p1))
+      {
+	unsigned char c = *p1++;
+	*p2++ = '%';
+	*p2++ = XNUM_TO_DIGIT (c >> 4);
+	*p2++ = XNUM_TO_DIGIT (c & 0xf);
+      }
+    else
+      *p2++ = *p1++;
+
   *p2 = '\0';
   assert (p2 - newstr == newlen);
   return newstr;
@@ -548,6 +534,12 @@ rewrite_shorthand_url (const char *url)
   if (p == url)
     return NULL;
 
+  /* If we're looking at "://", it means the URL uses a scheme we
+     don't support, which may include "https" when compiled without
+     SSL support.  Don't bogusly rewrite such URLs.  */
+  if (p[0] == ':' && p[1] == '/' && p[2] == '/')
+    return NULL;
+
   if (*p == ':')
     {
       const char *pp;
@@ -597,21 +589,21 @@ static void split_path PARAMS ((const char *, char **, char **));
 #define strpbrk_or_eos(s, accept) ({		\
   char *SOE_p = strpbrk (s, accept);		\
   if (!SOE_p)					\
-    SOE_p = (char *)s + strlen (s);		\
+    SOE_p = strchr (s, '\0');			\
   SOE_p;					\
 })
 
 #else  /* not __GNUC__ */
 
-static char *
+static inline char *
 strpbrk_or_eos (const char *s, const char *accept)
 {
   char *p = strpbrk (s, accept);
   if (!p)
-    p = (char *)s + strlen (s);
+    p = strchr (s, '\0');
   return p;
 }
-#endif
+#endif /* not __GNUC__ */
 
 /* Turn STR into lowercase; return non-zero if a character was
    actually changed. */
@@ -888,7 +880,6 @@ url_parse (const char *url, int *error)
       else
 	u->url = url_encoded;
     }
-  url_encoded = NULL;
 
   return u;
 
@@ -1081,7 +1072,7 @@ sync_path (struct url *u)
       *p++ = '/';
       memcpy (p, efile, filelen);
       p += filelen;
-      *p++ = '\0';
+      *p = '\0';
     }
 
   u->path = newpath;
@@ -1135,7 +1126,7 @@ url_free (struct url *url)
 }
 
 /* Create all the necessary directories for PATH (a file).  Calls
-   mkdirhier() internally.  */
+   make_directory internally.  */
 int
 mkalldirs (const char *path)
 {