X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Furl.c;h=e89704d7b625ab21b6ea26b54270545235c2717a;hb=0967c21094580317353f0742c4836c5bbea34059;hp=041001adf5d6a6586689903217134400cc5f3228;hpb=7c044778bc83f3714e8d91d31c992a76d78e42ad;p=wget diff --git a/src/url.c b/src/url.c index 041001ad..e89704d7 100644 --- a/src/url.c +++ b/src/url.c @@ -87,13 +87,14 @@ static int path_simplify PARAMS ((char *)); changing the meaning of the URL. For example, you can't decode "/foo/%2f/bar" into "/foo///bar" because the number and contents of path components is different. Non-reserved characters can be - changed, so "/foo/%78/bar" is safe to change to "/foo/x/bar". Wget - uses the rfc1738 set of reserved characters, plus "$" and ",", as - recommended by rfc2396. - - An unsafe characters is the one that should be encoded when URLs - are placed in foreign environments. E.g. space and newline are - unsafe in HTTP contexts because HTTP uses them as separator and + changed, so "/foo/%78/bar" is safe to change to "/foo/x/bar". The + unsafe characters are loosely based on rfc1738, plus "$" and ",", + as recommended by rfc2396, and minus "~", which is very frequently + used (and sometimes unrecognized as %7E by broken servers). + + An unsafe character is the one that should be encoded when URLs are + placed in foreign environments. E.g. space and newline are unsafe + in HTTP contexts because HTTP uses them as separator and line terminator, so they must be encoded to %20 and %0A respectively. "*" is unsafe in shell context, etc. @@ -117,7 +118,7 @@ enum { #define U urlchr_unsafe #define RU R|U -const static unsigned char urlchr_table[256] = +static const unsigned char urlchr_table[256] = { U, U, U, U, U, U, U, U, /* NUL SOH STX ETX EOT ENQ ACK BEL */ U, U, U, U, U, U, U, U, /* BS HT LF VT FF CR SO SI */ @@ -134,7 +135,7 @@ const static unsigned char urlchr_table[256] = U, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */ 0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */ 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */ - 0, 0, 0, U, U, U, U, U, /* x y z { | } ~ DEL */ + 0, 0, 0, U, U, U, 0, U, /* x y z { | } ~ DEL */ U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, @@ -544,7 +545,7 @@ rewrite_shorthand_url (const char *url) { const char *p; - if (url_has_scheme (url)) + if (url_scheme (url) != SCHEME_INVALID) return NULL; /* Look for a ':' or '/'. The former signifies NcFTP syntax, the @@ -687,7 +688,7 @@ url_parse (const char *url, int *error) if (scheme == SCHEME_INVALID) { error_code = PE_UNSUPPORTED_SCHEME; - goto error; + goto err; } url_encoded = reencode_escapes (url); @@ -725,7 +726,7 @@ url_parse (const char *url, int *error) if (!host_e) { error_code = PE_UNTERMINATED_IPV6_ADDRESS; - goto error; + goto err; } #ifdef ENABLE_IPV6 @@ -733,14 +734,14 @@ url_parse (const char *url, int *error) if (!is_valid_ipv6_address(host_b, host_e)) { error_code = PE_INVALID_IPV6_ADDRESS; - goto error; + goto err; } /* Continue parsing after the closing ']'. */ p = host_e + 1; #else error_code = PE_IPV6_NOT_SUPPORTED; - goto error; + goto err; #endif } else @@ -752,7 +753,7 @@ url_parse (const char *url, int *error) if (host_b == host_e) { error_code = PE_EMPTY_HOST; - goto error; + goto err; } port = scheme_default_port (scheme); @@ -777,7 +778,7 @@ url_parse (const char *url, int *error) /* http://host:12randomgarbage/blah */ /* ^ */ error_code = PE_BAD_PORT_NUMBER; - goto error; + goto err; } port = 10 * port + (*pp - '0'); /* Check for too large port numbers here, before we have @@ -785,7 +786,7 @@ url_parse (const char *url, int *error) if (port > 65535) { error_code = PE_BAD_PORT_NUMBER; - goto error; + goto err; } } } @@ -844,7 +845,7 @@ url_parse (const char *url, int *error) if (!parse_credentials (uname_b, uname_e - 1, &user, &passwd)) { error_code = PE_INVALID_USER_NAME; - goto error; + goto err; } } @@ -898,7 +899,7 @@ url_parse (const char *url, int *error) return u; - error: + err: /* Cleanup in case of error: */ if (url_encoded && url_encoded != url) xfree (url_encoded); @@ -1269,7 +1270,7 @@ enum { translate file name back to URL, this would become important crucial. Right now, it's better to be minimal in escaping. */ -const static unsigned char filechr_table[256] = +static const unsigned char filechr_table[256] = { UWC, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */ C, C, C, C, C, C, C, C, /* BS HT LF VT FF CR SO SI */