changing the meaning of the URL. For example, you can't decode
"/foo/%2f/bar" into "/foo///bar" because the number and contents of
path components is different. Non-reserved characters can be
- changed, so "/foo/%78/bar" is safe to change to "/foo/x/bar". Wget
- uses the rfc1738 set of reserved characters, plus "$" and ",", as
- recommended by rfc2396.
-
- An unsafe characters is the one that should be encoded when URLs
- are placed in foreign environments. E.g. space and newline are
- unsafe in HTTP contexts because HTTP uses them as separator and
+ changed, so "/foo/%78/bar" is safe to change to "/foo/x/bar". The
+ unsafe characters are loosely based on rfc1738, plus "$" and ",",
+ as recommended by rfc2396, and minus "~", which is very frequently
+ used (and sometimes unrecognized as %7E by broken servers).
+
+ An unsafe character is the one that should be encoded when URLs are
+ placed in foreign environments. E.g. space and newline are unsafe
+ in HTTP contexts because HTTP uses them as separator and line
terminator, so they must be encoded to %20 and %0A respectively.
"*" is unsafe in shell context, etc.
#define U urlchr_unsafe
#define RU R|U
-const static unsigned char urlchr_table[256] =
+static const unsigned char urlchr_table[256] =
{
U, U, U, U, U, U, U, U, /* NUL SOH STX ETX EOT ENQ ACK BEL */
U, U, U, U, U, U, U, U, /* BS HT LF VT FF CR SO SI */
U, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */
0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */
0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */
- 0, 0, 0, U, U, U, U, U, /* x y z { | } ~ DEL */
+ 0, 0, 0, U, U, U, 0, U, /* x y z { | } ~ DEL */
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
{
const char *p;
- if (url_has_scheme (url))
+ if (url_scheme (url) != SCHEME_INVALID)
return NULL;
/* Look for a ':' or '/'. The former signifies NcFTP syntax, the
if (scheme == SCHEME_INVALID)
{
error_code = PE_UNSUPPORTED_SCHEME;
- goto error;
+ goto err;
}
url_encoded = reencode_escapes (url);
if (!host_e)
{
error_code = PE_UNTERMINATED_IPV6_ADDRESS;
- goto error;
+ goto err;
}
#ifdef ENABLE_IPV6
if (!is_valid_ipv6_address(host_b, host_e))
{
error_code = PE_INVALID_IPV6_ADDRESS;
- goto error;
+ goto err;
}
/* Continue parsing after the closing ']'. */
p = host_e + 1;
#else
error_code = PE_IPV6_NOT_SUPPORTED;
- goto error;
+ goto err;
#endif
}
else
if (host_b == host_e)
{
error_code = PE_EMPTY_HOST;
- goto error;
+ goto err;
}
port = scheme_default_port (scheme);
/* http://host:12randomgarbage/blah */
/* ^ */
error_code = PE_BAD_PORT_NUMBER;
- goto error;
+ goto err;
}
port = 10 * port + (*pp - '0');
/* Check for too large port numbers here, before we have
if (port > 65535)
{
error_code = PE_BAD_PORT_NUMBER;
- goto error;
+ goto err;
}
}
}
if (!parse_credentials (uname_b, uname_e - 1, &user, &passwd))
{
error_code = PE_INVALID_USER_NAME;
- goto error;
+ goto err;
}
}
return u;
- error:
+ err:
/* Cleanup in case of error: */
if (url_encoded && url_encoded != url)
xfree (url_encoded);
translate file name back to URL, this would become important
crucial. Right now, it's better to be minimal in escaping. */
-const static unsigned char filechr_table[256] =
+static const unsigned char filechr_table[256] =
{
UWC, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */
C, C, C, C, C, C, C, C, /* BS HT LF VT FF CR SO SI */