X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Furl.c;h=40482b75adf6bca8ecf3992f258f7cf26584d232;hb=5f0a2b3f0846dd4c2f72fc62e7171200d1fd6e06;hp=4a4f465b50675af577e7bbf139b6099f5beb7d9e;hpb=f78016fb95ee536d2c1d26c2dcb2aff9ebc0a230;p=wget diff --git a/src/url.c b/src/url.c index 4a4f465b..40482b75 100644 --- a/src/url.c +++ b/src/url.c @@ -198,8 +198,8 @@ url_escape_1 (const char *s, unsigned char mask, int allow_passthrough) { unsigned char c = *p1++; *p2++ = '%'; - *p2++ = XNUM_TO_digit (c >> 4); - *p2++ = XNUM_TO_digit (c & 0xf); + *p2++ = XNUM_TO_DIGIT (c >> 4); + *p2++ = XNUM_TO_DIGIT (c & 0xf); } else *p2++ = *p1++; @@ -618,76 +618,75 @@ lowercase_str (char *str) static char *parse_errors[] = { #define PE_NO_ERROR 0 - "No error", + N_("No error"), #define PE_UNSUPPORTED_SCHEME 1 - "Unsupported scheme", + N_("Unsupported scheme"), #define PE_EMPTY_HOST 2 - "Empty host", + N_("Empty host"), #define PE_BAD_PORT_NUMBER 3 - "Bad port number", + N_("Bad port number"), #define PE_INVALID_USER_NAME 4 - "Invalid user name", + N_("Invalid user name"), #define PE_UNTERMINATED_IPV6_ADDRESS 5 - "Unterminated IPv6 numeric address", + N_("Unterminated IPv6 numeric address"), #define PE_IPV6_NOT_SUPPORTED 6 - "IPv6 addresses not supported", + N_("IPv6 addresses not supported"), #define PE_INVALID_IPV6_ADDRESS 7 - "Invalid IPv6 numeric address" + N_("Invalid IPv6 numeric address") }; -#define SETERR(p, v) do { \ - if (p) \ - *(p) = (v); \ -} while (0) - #ifdef ENABLE_IPV6 /* The following two functions were adapted from glibc. */ static int is_valid_ipv4_address (const char *str, const char *end) { - int saw_digit, octets; - int val; - - saw_digit = 0; - octets = 0; - val = 0; + int saw_digit = 0; + int octets = 0; + int val = 0; - while (str < end) { - int ch = *str++; + while (str < end) + { + int ch = *str++; - if (ch >= '0' && ch <= '9') { - val = val * 10 + (ch - '0'); + if (ch >= '0' && ch <= '9') + { + val = val * 10 + (ch - '0'); - if (val > 255) - return 0; - if (saw_digit == 0) { - if (++octets > 4) - return 0; - saw_digit = 1; - } - } else if (ch == '.' && saw_digit == 1) { - if (octets == 4) - return 0; - val = 0; - saw_digit = 0; - } else - return 0; - } + if (val > 255) + return 0; + if (saw_digit == 0) + { + if (++octets > 4) + return 0; + saw_digit = 1; + } + } + else if (ch == '.' && saw_digit == 1) + { + if (octets == 4) + return 0; + val = 0; + saw_digit = 0; + } + else + return 0; + } if (octets < 4) return 0; return 1; } -static const int NS_INADDRSZ = 4; -static const int NS_IN6ADDRSZ = 16; -static const int NS_INT16SZ = 2; - static int is_valid_ipv6_address (const char *str, const char *end) { - static const char xdigits[] = "0123456789abcdef"; + enum { + NS_INADDRSZ = 4, + NS_IN6ADDRSZ = 16, + NS_INT16SZ = 2 + }; + const char *curtok; int tp; const char *colonp; @@ -712,62 +711,67 @@ is_valid_ipv6_address (const char *str, const char *end) saw_xdigit = 0; val = 0; - while (str < end) { - int ch = *str++; - const char *pch; + while (str < end) + { + int ch = *str++; - /* if ch is a number, add it to val. */ - pch = strchr(xdigits, ch); - if (pch != NULL) { - val <<= 4; - val |= (pch - xdigits); - if (val > 0xffff) - return 0; - saw_xdigit = 1; - continue; + /* if ch is a number, add it to val. */ + if (ISXDIGIT (ch)) + { + val <<= 4; + val |= XDIGIT_TO_NUM (ch); + if (val > 0xffff) + return 0; + saw_xdigit = 1; + continue; + } + + /* if ch is a colon ... */ + if (ch == ':') + { + curtok = str; + if (saw_xdigit == 0) + { + if (colonp != NULL) + return 0; + colonp = str + tp; + continue; + } + else if (str == end) + return 0; + if (tp > NS_IN6ADDRSZ - NS_INT16SZ) + return 0; + tp += NS_INT16SZ; + saw_xdigit = 0; + val = 0; + continue; + } + + /* if ch is a dot ... */ + if (ch == '.' && (tp <= NS_IN6ADDRSZ - NS_INADDRSZ) + && is_valid_ipv4_address (curtok, end) == 1) + { + tp += NS_INADDRSZ; + saw_xdigit = 0; + break; + } + + return 0; } - /* if ch is a colon ... */ - if (ch == ':') { - curtok = str; - if (saw_xdigit == 0) { - if (colonp != NULL) - return 0; - colonp = str + tp; - continue; - } else if (str == end) { - return 0; - } - if (tp > NS_IN6ADDRSZ - NS_INT16SZ) + if (saw_xdigit == 1) + { + if (tp > NS_IN6ADDRSZ - NS_INT16SZ) return 0; tp += NS_INT16SZ; - saw_xdigit = 0; - val = 0; - continue; } - /* if ch is a dot ... */ - if (ch == '.' && (tp <= NS_IN6ADDRSZ - NS_INADDRSZ) && - is_valid_ipv4_address(curtok, end) == 1) { - tp += NS_INADDRSZ; - saw_xdigit = 0; - break; + if (colonp != NULL) + { + if (tp == NS_IN6ADDRSZ) + return 0; + tp = NS_IN6ADDRSZ; } - - return 0; - } - - if (saw_xdigit == 1) { - if (tp > NS_IN6ADDRSZ - NS_INT16SZ) - return 0; - tp += NS_INT16SZ; - } - - if (colonp != NULL) { - if (tp == NS_IN6ADDRSZ) - return 0; - tp = NS_IN6ADDRSZ; - } if (tp != NS_IN6ADDRSZ) return 0; @@ -800,13 +804,15 @@ url_parse (const char *url, int *error) int port; char *user = NULL, *passwd = NULL; - char *url_encoded; + char *url_encoded = NULL; + + int error_code; scheme = url_scheme (url); if (scheme == SCHEME_INVALID) { - SETERR (error, PE_UNSUPPORTED_SCHEME); - return NULL; + error_code = PE_UNSUPPORTED_SCHEME; + goto error; } url_encoded = reencode_escapes (url); @@ -843,23 +849,23 @@ url_parse (const char *url, int *error) if (!host_e) { - SETERR (error, PE_UNTERMINATED_IPV6_ADDRESS); - return NULL; + error_code = PE_UNTERMINATED_IPV6_ADDRESS; + goto error; } #ifdef ENABLE_IPV6 /* Check if the IPv6 address is valid. */ if (!is_valid_ipv6_address(host_b, host_e)) { - SETERR (error, PE_INVALID_IPV6_ADDRESS); - return NULL; + error_code = PE_INVALID_IPV6_ADDRESS; + goto error; } /* Continue parsing after the closing ']'. */ p = host_e + 1; #else - SETERR (error, PE_IPV6_NOT_SUPPORTED); - return NULL; + error_code = PE_IPV6_NOT_SUPPORTED; + goto error; #endif } else @@ -870,8 +876,8 @@ url_parse (const char *url, int *error) if (host_b == host_e) { - SETERR (error, PE_EMPTY_HOST); - return NULL; + error_code = PE_EMPTY_HOST; + goto error; } port = scheme_default_port (scheme); @@ -890,8 +896,8 @@ url_parse (const char *url, int *error) { /* http://host:/whatever */ /* ^ */ - SETERR (error, PE_BAD_PORT_NUMBER); - return NULL; + error_code = PE_BAD_PORT_NUMBER; + goto error; } for (port = 0, pp = port_b; pp < port_e; pp++) @@ -900,8 +906,8 @@ url_parse (const char *url, int *error) { /* http://host:12randomgarbage/blah */ /* ^ */ - SETERR (error, PE_BAD_PORT_NUMBER); - return NULL; + error_code = PE_BAD_PORT_NUMBER; + goto error; } port = 10 * port + (*pp - '0'); @@ -960,14 +966,12 @@ url_parse (const char *url, int *error) /* uname_b uname_e */ if (!parse_credentials (uname_b, uname_e - 1, &user, &passwd)) { - SETERR (error, PE_INVALID_USER_NAME); - return NULL; + error_code = PE_INVALID_USER_NAME; + goto error; } } - u = (struct url *)xmalloc (sizeof (struct url)); - memset (u, 0, sizeof (*u)); - + u = xnew0 (struct url); u->scheme = scheme; u->host = strdupdelim (host_b, host_e); u->port = port; @@ -1007,13 +1011,27 @@ url_parse (const char *url, int *error) url_encoded = NULL; return u; + + error: + /* Cleanup in case of error: */ + if (url_encoded && url_encoded != url) + xfree (url_encoded); + + /* Transmit the error code to the caller, if the caller wants to + know. */ + if (error) + *error = error_code; + return NULL; } +/* Return the error message string from ERROR_CODE, which should have + been retrieved from url_parse. The error message is translated. */ + const char * url_error (int error_code) { assert (error_code >= 0 && error_code < countof (parse_errors)); - return parse_errors[error_code]; + return _(parse_errors[error_code]); } /* Split PATH into DIR and FILE. PATH comes from the URL and is @@ -1130,6 +1148,7 @@ url_escape_dir (const char *dir) for (; *h; h++, t++) { + /* url_escape_1 having converted '/' to "%2F" exactly. */ if (*h == '%' && h[1] == '2' && h[2] == 'F') { *t = '/'; @@ -1338,7 +1357,7 @@ append_char (char ch, struct growable *dest) enum { filechr_not_unix = 1, /* unusable on Unix, / and \0 */ filechr_not_windows = 2, /* unusable on Windows, one of \|/<>?:*" */ - filechr_control = 4, /* a control character, e.g. 0-31 */ + filechr_control = 4 /* a control character, e.g. 0-31 */ }; #define FILE_CHAR_TEST(c, mask) (filechr_table[(unsigned char)(c)] & (mask)) @@ -1406,14 +1425,15 @@ UWC, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */ /* Quote path element, characters in [b, e), as file name, and append the quoted string to DEST. Each character is quoted as per - file_unsafe_char and the corresponding table. */ + file_unsafe_char and the corresponding table. + + If ESCAPED_P is non-zero, the path element is considered to be + URL-escaped and will be unescaped prior to inspection. */ static void -append_uri_pathel (const char *b, const char *e, struct growable *dest) +append_uri_pathel (const char *b, const char *e, int escaped_p, + struct growable *dest) { - char *pathel; - int pathlen; - const char *p; int quoted, outlen; @@ -1426,32 +1446,37 @@ append_uri_pathel (const char *b, const char *e, struct growable *dest) mask |= filechr_control; /* Copy [b, e) to PATHEL and URL-unescape it. */ - BOUNDED_TO_ALLOCA (b, e, pathel); - url_unescape (pathel); - pathlen = strlen (pathel); + if (escaped_p) + { + char *unescaped; + BOUNDED_TO_ALLOCA (b, e, unescaped); + url_unescape (unescaped); + b = unescaped; + e = unescaped + strlen (unescaped); + } - /* Go through PATHEL and check how many characters we'll need to - add for file quoting. */ + /* Walk the PATHEL string and check how many characters we'll need + to add for file quoting. */ quoted = 0; - for (p = pathel; *p; p++) + for (p = b; p < e; p++) if (FILE_CHAR_TEST (*p, mask)) ++quoted; - /* p - pathel is the string length. Each quoted char means two - additional characters in the string, hence 2*quoted. */ - outlen = (p - pathel) + (2 * quoted); + /* e-b is the string length. Each quoted char means two additional + characters in the string, hence 2*quoted. */ + outlen = (e - b) + (2 * quoted); GROW (dest, outlen); if (!quoted) { /* If there's nothing to quote, we don't need to go through the string the second time. */ - memcpy (TAIL (dest), pathel, outlen); + memcpy (TAIL (dest), b, outlen); } else { char *q = TAIL (dest); - for (p = pathel; *p; p++) + for (p = b; p < e; p++) { if (!FILE_CHAR_TEST (*p, mask)) *q++ = *p; @@ -1497,14 +1522,12 @@ append_dir_structure (const struct url *u, struct growable *dest) if (cut-- > 0) continue; if (pathel == next) - /* Ignore empty pathels. path_simplify should remove - occurrences of "//" from the path, but it has special cases - for starting / which generates an empty pathel here. */ + /* Ignore empty pathels. */ continue; if (dest->tail) append_char ('/', dest); - append_uri_pathel (pathel, next, dest); + append_uri_pathel (pathel, next, 1, dest); } } @@ -1553,14 +1576,14 @@ url_file_name (const struct url *u) if (fnres.tail) append_char ('/', &fnres); u_file = *u->file ? u->file : "index.html"; - append_uri_pathel (u_file, u_file + strlen (u_file), &fnres); + append_uri_pathel (u_file, u_file + strlen (u_file), 0, &fnres); /* Append "?query" to the file name. */ u_query = u->query && *u->query ? u->query : NULL; if (u_query) { append_char (FN_QUERY_SEP, &fnres); - append_uri_pathel (u_query, u_query + strlen (u_query), &fnres); + append_uri_pathel (u_query, u_query + strlen (u_query), 1, &fnres); } /* Zero-terminate the file name. */ @@ -1619,6 +1642,11 @@ find_last_char (const char *b, const char *e, char c) "back up one element". Single leading and trailing slashes are preserved. + This function does not handle URL escapes explicitly. If you're + passing paths from URLs, make sure to unquote "%2e" and "%2E" to + ".", so that this function can find the dots. (Wget's URL parser + calls reencode_escapes, which see.) + For example, "a/b/c/./../d/.." will yield "a/b/". More exhaustive test examples are provided below. If you change anything in this function, run test_path_simplify to make sure you haven't broken a @@ -1652,7 +1680,7 @@ path_simplify (char *path) element -- but not past beggining of PATH. */ if (t > path) { - /* Move backwards until B hits the beginning of the + /* Move backwards until T hits the beginning of the previous path element or the beginning of path. */ for (--t; t > path && t[-1] != '/'; t--) ;