X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Furl.c;h=e89704d7b625ab21b6ea26b54270545235c2717a;hb=0967c21094580317353f0742c4836c5bbea34059;hp=56d5d9f967b8d2684cc6e45a2f22a5decff3b619;hpb=c36e9a5272e8ec394625dfa0f63bf9c1722eeaef;p=wget diff --git a/src/url.c b/src/url.c index 56d5d9f9..e89704d7 100644 --- a/src/url.c +++ b/src/url.c @@ -1,6 +1,5 @@ /* URL handling. - Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2003 - Free Software Foundation, Inc. + Copyright (C) 2005 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -88,13 +87,14 @@ static int path_simplify PARAMS ((char *)); changing the meaning of the URL. For example, you can't decode "/foo/%2f/bar" into "/foo///bar" because the number and contents of path components is different. Non-reserved characters can be - changed, so "/foo/%78/bar" is safe to change to "/foo/x/bar". Wget - uses the rfc1738 set of reserved characters, plus "$" and ",", as - recommended by rfc2396. - - An unsafe characters is the one that should be encoded when URLs - are placed in foreign environments. E.g. space and newline are - unsafe in HTTP contexts because HTTP uses them as separator and + changed, so "/foo/%78/bar" is safe to change to "/foo/x/bar". The + unsafe characters are loosely based on rfc1738, plus "$" and ",", + as recommended by rfc2396, and minus "~", which is very frequently + used (and sometimes unrecognized as %7E by broken servers). + + An unsafe character is the one that should be encoded when URLs are + placed in foreign environments. E.g. space and newline are unsafe + in HTTP contexts because HTTP uses them as separator and line terminator, so they must be encoded to %20 and %0A respectively. "*" is unsafe in shell context, etc. @@ -118,7 +118,7 @@ enum { #define U urlchr_unsafe #define RU R|U -const static unsigned char urlchr_table[256] = +static const unsigned char urlchr_table[256] = { U, U, U, U, U, U, U, U, /* NUL SOH STX ETX EOT ENQ ACK BEL */ U, U, U, U, U, U, U, U, /* BS HT LF VT FF CR SO SI */ @@ -135,7 +135,7 @@ const static unsigned char urlchr_table[256] = U, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */ 0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */ 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */ - 0, 0, 0, U, U, U, U, U, /* x y z { | } ~ DEL */ + 0, 0, 0, U, U, U, 0, U, /* x y z { | } ~ DEL */ U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, @@ -545,7 +545,7 @@ rewrite_shorthand_url (const char *url) { const char *p; - if (url_has_scheme (url)) + if (url_scheme (url) != SCHEME_INVALID) return NULL; /* Look for a ':' or '/'. The former signifies NcFTP syntax, the @@ -688,7 +688,7 @@ url_parse (const char *url, int *error) if (scheme == SCHEME_INVALID) { error_code = PE_UNSUPPORTED_SCHEME; - goto error; + goto err; } url_encoded = reencode_escapes (url); @@ -726,7 +726,7 @@ url_parse (const char *url, int *error) if (!host_e) { error_code = PE_UNTERMINATED_IPV6_ADDRESS; - goto error; + goto err; } #ifdef ENABLE_IPV6 @@ -734,14 +734,14 @@ url_parse (const char *url, int *error) if (!is_valid_ipv6_address(host_b, host_e)) { error_code = PE_INVALID_IPV6_ADDRESS; - goto error; + goto err; } /* Continue parsing after the closing ']'. */ p = host_e + 1; #else error_code = PE_IPV6_NOT_SUPPORTED; - goto error; + goto err; #endif } else @@ -753,7 +753,7 @@ url_parse (const char *url, int *error) if (host_b == host_e) { error_code = PE_EMPTY_HOST; - goto error; + goto err; } port = scheme_default_port (scheme); @@ -778,7 +778,7 @@ url_parse (const char *url, int *error) /* http://host:12randomgarbage/blah */ /* ^ */ error_code = PE_BAD_PORT_NUMBER; - goto error; + goto err; } port = 10 * port + (*pp - '0'); /* Check for too large port numbers here, before we have @@ -786,7 +786,7 @@ url_parse (const char *url, int *error) if (port > 65535) { error_code = PE_BAD_PORT_NUMBER; - goto error; + goto err; } } } @@ -845,7 +845,7 @@ url_parse (const char *url, int *error) if (!parse_credentials (uname_b, uname_e - 1, &user, &passwd)) { error_code = PE_INVALID_USER_NAME; - goto error; + goto err; } } @@ -899,7 +899,7 @@ url_parse (const char *url, int *error) return u; - error: + err: /* Cleanup in case of error: */ if (url_encoded && url_encoded != url) xfree (url_encoded); @@ -1017,35 +1017,41 @@ url_full_path (const struct url *url) return full_path; } -/* Escape unsafe and reserved characters, except for the slash - characters. */ +/* Unescape CHR in an otherwise escaped STR. Used to selectively + escaping of certain characters, such as "/" and ":". Returns a + count of unescaped chars. */ -static char * -url_escape_dir (const char *dir) +static void +unescape_single_char (char *str, char chr) { - char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1); - char *h, *t; - if (newdir == dir) - return (char *)dir; - - /* Unescape slashes in NEWDIR. */ - - h = newdir; /* hare */ - t = newdir; /* tortoise */ - + const char c1 = XNUM_TO_DIGIT (chr >> 4); + const char c2 = XNUM_TO_DIGIT (chr & 0xf); + char *h = str; /* hare */ + char *t = str; /* tortoise */ for (; *h; h++, t++) { - /* url_escape_1 having converted '/' to "%2F" exactly. */ - if (*h == '%' && h[1] == '2' && h[2] == 'F') + if (h[0] == '%' && h[1] == c1 && h[2] == c2) { - *t = '/'; + *t = chr; h += 2; } else *t = *h; } *t = '\0'; +} + +/* Escape unsafe and reserved characters, except for the slash + characters. */ + +static char * +url_escape_dir (const char *dir) +{ + char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1); + if (newdir == dir) + return (char *)dir; + unescape_single_char (newdir, '/'); return newdir; } @@ -1264,7 +1270,7 @@ enum { translate file name back to URL, this would become important crucial. Right now, it's better to be minimal in escaping. */ -const static unsigned char filechr_table[256] = +static const unsigned char filechr_table[256] = { UWC, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */ C, C, C, C, C, C, C, C, /* BS HT LF VT FF CR SO SI */ @@ -1845,7 +1851,7 @@ url_string (const struct url *url, int hide_password) { int size; char *result, *p; - char *quoted_user = NULL, *quoted_passwd = NULL; + char *quoted_host, *quoted_user = NULL, *quoted_passwd = NULL; int scheme_port = supported_schemes[url->scheme].default_port; const char *scheme_str = supported_schemes[url->scheme].leading_string; @@ -1868,12 +1874,19 @@ url_string (const struct url *url, int hide_password) } } - /* Numeric IPv6 addresses can contain ':' and need to be quoted with - brackets. */ - brackets_around_host = strchr (url->host, ':') != NULL; + /* In the unlikely event that the host name contains non-printable + characters, quote it for displaying to the user. */ + quoted_host = url_escape_allow_passthrough (url->host); + + /* Undo the quoting of colons that URL escaping performs. IPv6 + addresses may legally contain colons, and in that case must be + placed in square brackets. */ + if (quoted_host != url->host) + unescape_single_char (quoted_host, ':'); + brackets_around_host = strchr (quoted_host, ':') != NULL; size = (strlen (scheme_str) - + strlen (url->host) + + strlen (quoted_host) + (brackets_around_host ? 2 : 0) + fplen + 1); @@ -1902,7 +1915,7 @@ url_string (const struct url *url, int hide_password) if (brackets_around_host) *p++ = '['; - APPEND (p, url->host); + APPEND (p, quoted_host); if (brackets_around_host) *p++ = ']'; if (url->port != scheme_port) @@ -1919,9 +1932,10 @@ url_string (const struct url *url, int hide_password) if (quoted_user && quoted_user != url->user) xfree (quoted_user); - if (quoted_passwd && !hide_password - && quoted_passwd != url->passwd) + if (quoted_passwd && !hide_password && quoted_passwd != url->passwd) xfree (quoted_passwd); + if (quoted_host != url->host) + xfree (quoted_host); return result; }