/* URL handling.
- Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2003
- Free Software Foundation, Inc.
+ Copyright (C) 2005 Free Software Foundation, Inc.
This file is part of GNU Wget.
changing the meaning of the URL. For example, you can't decode
"/foo/%2f/bar" into "/foo///bar" because the number and contents of
path components is different. Non-reserved characters can be
- changed, so "/foo/%78/bar" is safe to change to "/foo/x/bar". Wget
- uses the rfc1738 set of reserved characters, plus "$" and ",", as
- recommended by rfc2396.
-
- An unsafe characters is the one that should be encoded when URLs
- are placed in foreign environments. E.g. space and newline are
- unsafe in HTTP contexts because HTTP uses them as separator and
+ changed, so "/foo/%78/bar" is safe to change to "/foo/x/bar". The
+ unsafe characters are loosely based on rfc1738, plus "$" and ",",
+ as recommended by rfc2396, and minus "~", which is very frequently
+ used (and sometimes unrecognized as %7E by broken servers).
+
+ An unsafe character is the one that should be encoded when URLs are
+ placed in foreign environments. E.g. space and newline are unsafe
+ in HTTP contexts because HTTP uses them as separator and line
terminator, so they must be encoded to %20 and %0A respectively.
"*" is unsafe in shell context, etc.
#define U urlchr_unsafe
#define RU R|U
-const static unsigned char urlchr_table[256] =
+static const unsigned char urlchr_table[256] =
{
U, U, U, U, U, U, U, U, /* NUL SOH STX ETX EOT ENQ ACK BEL */
U, U, U, U, U, U, U, U, /* BS HT LF VT FF CR SO SI */
U, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */
0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */
0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */
- 0, 0, 0, U, U, U, U, U, /* x y z { | } ~ DEL */
+ 0, 0, 0, U, U, U, 0, U, /* x y z { | } ~ DEL */
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
{
const char *p;
- if (url_has_scheme (url))
+ if (url_scheme (url) != SCHEME_INVALID)
return NULL;
/* Look for a ':' or '/'. The former signifies NcFTP syntax, the
if (scheme == SCHEME_INVALID)
{
error_code = PE_UNSUPPORTED_SCHEME;
- goto error;
+ goto err;
}
url_encoded = reencode_escapes (url);
if (!host_e)
{
error_code = PE_UNTERMINATED_IPV6_ADDRESS;
- goto error;
+ goto err;
}
#ifdef ENABLE_IPV6
if (!is_valid_ipv6_address(host_b, host_e))
{
error_code = PE_INVALID_IPV6_ADDRESS;
- goto error;
+ goto err;
}
/* Continue parsing after the closing ']'. */
p = host_e + 1;
#else
error_code = PE_IPV6_NOT_SUPPORTED;
- goto error;
+ goto err;
#endif
}
else
if (host_b == host_e)
{
error_code = PE_EMPTY_HOST;
- goto error;
+ goto err;
}
port = scheme_default_port (scheme);
/* http://host:12randomgarbage/blah */
/* ^ */
error_code = PE_BAD_PORT_NUMBER;
- goto error;
+ goto err;
}
port = 10 * port + (*pp - '0');
+ /* Check for too large port numbers here, before we have
+ a chance to overflow on bogus port values. */
+ if (port > 65535)
+ {
+ error_code = PE_BAD_PORT_NUMBER;
+ goto err;
+ }
}
}
}
if (!parse_credentials (uname_b, uname_e - 1, &user, &passwd))
{
error_code = PE_INVALID_USER_NAME;
- goto error;
+ goto err;
}
}
return u;
- error:
+ err:
/* Cleanup in case of error: */
if (url_encoded && url_encoded != url)
xfree (url_encoded);
url_full_path (const struct url *url)
{
int length = full_path_length (url);
- char *full_path = (char *)xmalloc(length + 1);
+ char *full_path = (char *) xmalloc (length + 1);
full_path_write (url, full_path);
full_path[length] = '\0';
return full_path;
}
-/* Escape unsafe and reserved characters, except for the slash
- characters. */
+/* Unescape CHR in an otherwise escaped STR. Used to selectively
+ escaping of certain characters, such as "/" and ":". Returns a
+ count of unescaped chars. */
-static char *
-url_escape_dir (const char *dir)
+static void
+unescape_single_char (char *str, char chr)
{
- char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1);
- char *h, *t;
- if (newdir == dir)
- return (char *)dir;
-
- /* Unescape slashes in NEWDIR. */
-
- h = newdir; /* hare */
- t = newdir; /* tortoise */
-
+ const char c1 = XNUM_TO_DIGIT (chr >> 4);
+ const char c2 = XNUM_TO_DIGIT (chr & 0xf);
+ char *h = str; /* hare */
+ char *t = str; /* tortoise */
for (; *h; h++, t++)
{
- /* url_escape_1 having converted '/' to "%2F" exactly. */
- if (*h == '%' && h[1] == '2' && h[2] == 'F')
+ if (h[0] == '%' && h[1] == c1 && h[2] == c2)
{
- *t = '/';
+ *t = chr;
h += 2;
}
else
*t = *h;
}
*t = '\0';
+}
+/* Escape unsafe and reserved characters, except for the slash
+ characters. */
+
+static char *
+url_escape_dir (const char *dir)
+{
+ char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1);
+ if (newdir == dir)
+ return (char *)dir;
+
+ unescape_single_char (newdir, '/');
return newdir;
}
translate file name back to URL, this would become important
crucial. Right now, it's better to be minimal in escaping. */
-const static unsigned char filechr_table[256] =
+static const unsigned char filechr_table[256] =
{
UWC, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */
C, C, C, C, C, C, C, C, /* BS HT LF VT FF CR SO SI */
{
int size;
char *result, *p;
- char *quoted_user = NULL, *quoted_passwd = NULL;
+ char *quoted_host, *quoted_user = NULL, *quoted_passwd = NULL;
int scheme_port = supported_schemes[url->scheme].default_port;
const char *scheme_str = supported_schemes[url->scheme].leading_string;
}
}
- /* Numeric IPv6 addresses can contain ':' and need to be quoted with
- brackets. */
- brackets_around_host = strchr (url->host, ':') != NULL;
+ /* In the unlikely event that the host name contains non-printable
+ characters, quote it for displaying to the user. */
+ quoted_host = url_escape_allow_passthrough (url->host);
+
+ /* Undo the quoting of colons that URL escaping performs. IPv6
+ addresses may legally contain colons, and in that case must be
+ placed in square brackets. */
+ if (quoted_host != url->host)
+ unescape_single_char (quoted_host, ':');
+ brackets_around_host = strchr (quoted_host, ':') != NULL;
size = (strlen (scheme_str)
- + strlen (url->host)
+ + strlen (quoted_host)
+ (brackets_around_host ? 2 : 0)
+ fplen
+ 1);
if (brackets_around_host)
*p++ = '[';
- APPEND (p, url->host);
+ APPEND (p, quoted_host);
if (brackets_around_host)
*p++ = ']';
if (url->port != scheme_port)
if (quoted_user && quoted_user != url->user)
xfree (quoted_user);
- if (quoted_passwd && !hide_password
- && quoted_passwd != url->passwd)
+ if (quoted_passwd && !hide_password && quoted_passwd != url->passwd)
xfree (quoted_passwd);
+ if (quoted_host != url->host)
+ xfree (quoted_host);
return result;
}