X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Furl.c;h=e89704d7b625ab21b6ea26b54270545235c2717a;hb=0967c21094580317353f0742c4836c5bbea34059;hp=56d5d9f967b8d2684cc6e45a2f22a5decff3b619;hpb=c36e9a5272e8ec394625dfa0f63bf9c1722eeaef;p=wget

diff --git a/src/url.c b/src/url.c
index 56d5d9f9..e89704d7 100644
--- a/src/url.c
+++ b/src/url.c
@@ -1,6 +1,5 @@
 /* URL handling.
-   Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2003
-   Free Software Foundation, Inc.
+   Copyright (C) 2005 Free Software Foundation, Inc.
 
 This file is part of GNU Wget.
 
@@ -88,13 +87,14 @@ static int path_simplify PARAMS ((char *));
    changing the meaning of the URL.  For example, you can't decode
    "/foo/%2f/bar" into "/foo///bar" because the number and contents of
    path components is different.  Non-reserved characters can be
-   changed, so "/foo/%78/bar" is safe to change to "/foo/x/bar".  Wget
-   uses the rfc1738 set of reserved characters, plus "$" and ",", as
-   recommended by rfc2396.
-
-   An unsafe characters is the one that should be encoded when URLs
-   are placed in foreign environments.  E.g. space and newline are
-   unsafe in HTTP contexts because HTTP uses them as separator and
+   changed, so "/foo/%78/bar" is safe to change to "/foo/x/bar".  The
+   unsafe characters are loosely based on rfc1738, plus "$" and ",",
+   as recommended by rfc2396, and minus "~", which is very frequently
+   used (and sometimes unrecognized as %7E by broken servers).
+
+   An unsafe character is the one that should be encoded when URLs are
+   placed in foreign environments.  E.g. space and newline are unsafe
+   in HTTP contexts because HTTP uses them as separator and line
    terminator, so they must be encoded to %20 and %0A respectively.
    "*" is unsafe in shell context, etc.
 
@@ -118,7 +118,7 @@ enum {
 #define U  urlchr_unsafe
 #define RU R|U
 
-const static unsigned char urlchr_table[256] =
+static const unsigned char urlchr_table[256] =
 {
   U,  U,  U,  U,   U,  U,  U,  U,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
   U,  U,  U,  U,   U,  U,  U,  U,   /* BS  HT  LF  VT   FF  CR  SO  SI  */
@@ -135,7 +135,7 @@ const static unsigned char urlchr_table[256] =
   U,  0,  0,  0,   0,  0,  0,  0,   /* `   a   b   c    d   e   f   g   */
   0,  0,  0,  0,   0,  0,  0,  0,   /* h   i   j   k    l   m   n   o   */
   0,  0,  0,  0,   0,  0,  0,  0,   /* p   q   r   s    t   u   v   w   */
-  0,  0,  0,  U,   U,  U,  U,  U,   /* x   y   z   {    |   }   ~   DEL */
+  0,  0,  0,  U,   U,  U,  0,  U,   /* x   y   z   {    |   }   ~   DEL */
 
   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
@@ -545,7 +545,7 @@ rewrite_shorthand_url (const char *url)
 {
   const char *p;
 
-  if (url_has_scheme (url))
+  if (url_scheme (url) != SCHEME_INVALID)
     return NULL;
 
   /* Look for a ':' or '/'.  The former signifies NcFTP syntax, the
@@ -688,7 +688,7 @@ url_parse (const char *url, int *error)
   if (scheme == SCHEME_INVALID)
     {
       error_code = PE_UNSUPPORTED_SCHEME;
-      goto error;
+      goto err;
     }
 
   url_encoded = reencode_escapes (url);
@@ -726,7 +726,7 @@ url_parse (const char *url, int *error)
       if (!host_e)
 	{
 	  error_code = PE_UNTERMINATED_IPV6_ADDRESS;
-	  goto error;
+	  goto err;
 	}
 
 #ifdef ENABLE_IPV6
@@ -734,14 +734,14 @@ url_parse (const char *url, int *error)
       if (!is_valid_ipv6_address(host_b, host_e))
 	{
 	  error_code = PE_INVALID_IPV6_ADDRESS;
-	  goto error;
+	  goto err;
 	}
 
       /* Continue parsing after the closing ']'. */
       p = host_e + 1;
 #else
       error_code = PE_IPV6_NOT_SUPPORTED;
-      goto error;
+      goto err;
 #endif
     }
   else
@@ -753,7 +753,7 @@ url_parse (const char *url, int *error)
   if (host_b == host_e)
     {
       error_code = PE_EMPTY_HOST;
-      goto error;
+      goto err;
     }
 
   port = scheme_default_port (scheme);
@@ -778,7 +778,7 @@ url_parse (const char *url, int *error)
 	 	  /* http://host:12randomgarbage/blah */
 		  /*               ^                  */
 		  error_code = PE_BAD_PORT_NUMBER;
-		  goto error;
+		  goto err;
 		}
 	      port = 10 * port + (*pp - '0');
 	      /* Check for too large port numbers here, before we have
@@ -786,7 +786,7 @@ url_parse (const char *url, int *error)
 	      if (port > 65535)
 		{
 		  error_code = PE_BAD_PORT_NUMBER;
-		  goto error;
+		  goto err;
 		}
 	    }
 	}
@@ -845,7 +845,7 @@ url_parse (const char *url, int *error)
       if (!parse_credentials (uname_b, uname_e - 1, &user, &passwd))
 	{
 	  error_code = PE_INVALID_USER_NAME;
-	  goto error;
+	  goto err;
 	}
     }
 
@@ -899,7 +899,7 @@ url_parse (const char *url, int *error)
 
   return u;
 
- error:
+ err:
   /* Cleanup in case of error: */
   if (url_encoded && url_encoded != url)
     xfree (url_encoded);
@@ -1017,35 +1017,41 @@ url_full_path (const struct url *url)
   return full_path;
 }
 
-/* Escape unsafe and reserved characters, except for the slash
-   characters.  */
+/* Unescape CHR in an otherwise escaped STR.  Used to selectively
+   escaping of certain characters, such as "/" and ":".  Returns a
+   count of unescaped chars.  */
 
-static char *
-url_escape_dir (const char *dir)
+static void
+unescape_single_char (char *str, char chr)
 {
-  char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1);
-  char *h, *t;
-  if (newdir == dir)
-    return (char *)dir;
-
-  /* Unescape slashes in NEWDIR. */
-
-  h = newdir;			/* hare */
-  t = newdir;			/* tortoise */
-
+  const char c1 = XNUM_TO_DIGIT (chr >> 4);
+  const char c2 = XNUM_TO_DIGIT (chr & 0xf);
+  char *h = str;		/* hare */
+  char *t = str;		/* tortoise */
   for (; *h; h++, t++)
     {
-      /* url_escape_1 having converted '/' to "%2F" exactly. */
-      if (*h == '%' && h[1] == '2' && h[2] == 'F')
+      if (h[0] == '%' && h[1] == c1 && h[2] == c2)
 	{
-	  *t = '/';
+	  *t = chr;
 	  h += 2;
 	}
       else
 	*t = *h;
     }
   *t = '\0';
+}
+
+/* Escape unsafe and reserved characters, except for the slash
+   characters.  */
+
+static char *
+url_escape_dir (const char *dir)
+{
+  char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1);
+  if (newdir == dir)
+    return (char *)dir;
 
+  unescape_single_char (newdir, '/');
   return newdir;
 }
 
@@ -1264,7 +1270,7 @@ enum {
    translate file name back to URL, this would become important
    crucial.  Right now, it's better to be minimal in escaping.  */
 
-const static unsigned char filechr_table[256] =
+static const unsigned char filechr_table[256] =
 {
 UWC,  C,  C,  C,   C,  C,  C,  C,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
   C,  C,  C,  C,   C,  C,  C,  C,   /* BS  HT  LF  VT   FF  CR  SO  SI  */
@@ -1845,7 +1851,7 @@ url_string (const struct url *url, int hide_password)
 {
   int size;
   char *result, *p;
-  char *quoted_user = NULL, *quoted_passwd = NULL;
+  char *quoted_host, *quoted_user = NULL, *quoted_passwd = NULL;
 
   int scheme_port  = supported_schemes[url->scheme].default_port;
   const char *scheme_str = supported_schemes[url->scheme].leading_string;
@@ -1868,12 +1874,19 @@ url_string (const struct url *url, int hide_password)
 	}
     }
 
-  /* Numeric IPv6 addresses can contain ':' and need to be quoted with
-     brackets.  */
-  brackets_around_host = strchr (url->host, ':') != NULL;
+  /* In the unlikely event that the host name contains non-printable
+     characters, quote it for displaying to the user.  */
+  quoted_host = url_escape_allow_passthrough (url->host);
+
+  /* Undo the quoting of colons that URL escaping performs.  IPv6
+     addresses may legally contain colons, and in that case must be
+     placed in square brackets.  */
+  if (quoted_host != url->host)
+    unescape_single_char (quoted_host, ':');
+  brackets_around_host = strchr (quoted_host, ':') != NULL;
 
   size = (strlen (scheme_str)
-	  + strlen (url->host)
+	  + strlen (quoted_host)
 	  + (brackets_around_host ? 2 : 0)
 	  + fplen
 	  + 1);
@@ -1902,7 +1915,7 @@ url_string (const struct url *url, int hide_password)
 
   if (brackets_around_host)
     *p++ = '[';
-  APPEND (p, url->host);
+  APPEND (p, quoted_host);
   if (brackets_around_host)
     *p++ = ']';
   if (url->port != scheme_port)
@@ -1919,9 +1932,10 @@ url_string (const struct url *url, int hide_password)
 
   if (quoted_user && quoted_user != url->user)
     xfree (quoted_user);
-  if (quoted_passwd && !hide_password
-      && quoted_passwd != url->passwd)
+  if (quoted_passwd && !hide_password && quoted_passwd != url->passwd)
     xfree (quoted_passwd);
+  if (quoted_host != url->host)
+    xfree (quoted_host);
 
   return result;
 }