/* Forward declarations: */
-static bool path_simplify (char *);
+static bool path_simplify (enum url_scheme, char *);
\f
/* Support for escaping and unescaping of URL strings. */
return url_escape_1 (s, urlchr_unsafe, false);
}
+/* URL-escape the unsafe and reserved characters (see urlchr_table) in
+ a given string, returning a freshly allocated string. */
+
+char *
+url_escape_unsafe_and_reserved (const char *s)
+{
+ return url_escape_1 (s, urlchr_unsafe|urlchr_reserved, false);
+}
+
/* URL-escape the unsafe characters (see urlchr_table) in a given
string. If no characters are unsafe, S is returned. */
#define PE_NO_ERROR 0
N_("No error"),
#define PE_UNSUPPORTED_SCHEME 1
- N_("Unsupported scheme"),
-#define PE_INVALID_HOST_NAME 2
+ N_("Unsupported scheme %s"), /* support for format token only here */
+#define PE_MISSING_SCHEME 2
+ N_("Scheme missing"),
+#define PE_INVALID_HOST_NAME 3
N_("Invalid host name"),
-#define PE_BAD_PORT_NUMBER 3
+#define PE_BAD_PORT_NUMBER 4
N_("Bad port number"),
-#define PE_INVALID_USER_NAME 4
+#define PE_INVALID_USER_NAME 5
N_("Invalid user name"),
-#define PE_UNTERMINATED_IPV6_ADDRESS 5
+#define PE_UNTERMINATED_IPV6_ADDRESS 6
N_("Unterminated IPv6 numeric address"),
-#define PE_IPV6_NOT_SUPPORTED 6
+#define PE_IPV6_NOT_SUPPORTED 7
N_("IPv6 addresses not supported"),
-#define PE_INVALID_IPV6_ADDRESS 7
+#define PE_INVALID_IPV6_ADDRESS 8
N_("Invalid IPv6 numeric address")
};
error, and if ERROR is not NULL, also set *ERROR to the appropriate
error code. */
struct url *
-url_parse (const char *url, int *error)
+url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
{
struct url *u;
const char *p;
int port;
char *user = NULL, *passwd = NULL;
- char *url_encoded = NULL;
+ const char *url_encoded = NULL;
+ char *new_url = NULL;
int error_code;
scheme = url_scheme (url);
if (scheme == SCHEME_INVALID)
{
- error_code = PE_UNSUPPORTED_SCHEME;
+ if (url_has_scheme (url))
+ error_code = PE_UNSUPPORTED_SCHEME;
+ else
+ error_code = PE_MISSING_SCHEME;
goto error;
}
- url_encoded = reencode_escapes (url);
+ if (iri && iri->utf8_encode)
+ {
+ iri->utf8_encode = remote_to_utf8 (iri, iri->orig_url ? iri->orig_url : url, (const char **) &new_url);
+ if (!iri->utf8_encode)
+ new_url = NULL;
+ else
+ iri->orig_url = xstrdup (url);
+ }
+
+ /* XXX XXX Could that change introduce (security) bugs ??? XXX XXX*/
+ if (percent_encode)
+ url_encoded = reencode_escapes (new_url ? new_url : url);
+ else
+ url_encoded = new_url ? new_url : url;
+
p = url_encoded;
+ if (new_url && url_encoded != new_url)
+ xfree (new_url);
+
p += strlen (supported_schemes[scheme].leading_string);
uname_b = p;
p = url_skip_credentials (p);
u->passwd = passwd;
u->path = strdupdelim (path_b, path_e);
- path_modified = path_simplify (u->path);
+ path_modified = path_simplify (scheme, u->path);
split_path (u->path, &u->dir, &u->file);
host_modified = lowercase_str (u->host);
{
url_unescape (u->host);
host_modified = true;
+
+ /* Apply IDNA regardless of iri->utf8_encode status */
+ if (opt.enable_iri && iri)
+ {
+ char *new = idn_encode (iri, u->host);
+ if (new)
+ {
+ xfree (u->host);
+ u->host = new;
+ host_modified = true;
+ }
+ }
}
if (params_b)
if (fragment_b)
u->fragment = strdupdelim (fragment_b, fragment_e);
- if (path_modified || u->fragment || host_modified || path_b == path_e)
+ if (opt.enable_iri || path_modified || u->fragment || host_modified || path_b == path_e)
{
/* If we suspect that a transformation has rendered what
url_string might return different from URL_ENCODED, rebuild
if (url_encoded == url)
u->url = xstrdup (url);
else
- u->url = url_encoded;
+ u->url = (char *) url_encoded;
}
return u;
error:
/* Cleanup in case of error: */
if (url_encoded && url_encoded != url)
- xfree (url_encoded);
+ xfree ((char *) url_encoded);
/* Transmit the error code to the caller, if the caller wants to
know. */
/* Return the error message string from ERROR_CODE, which should have
been retrieved from url_parse. The error message is translated. */
-const char *
-url_error (int error_code)
+char *
+url_error (const char *url, int error_code)
{
- assert (error_code >= 0 && error_code < countof (parse_errors));
- return _(parse_errors[error_code]);
+ assert (error_code >= 0 && ((size_t) error_code) < countof (parse_errors));
+
+ if (error_code == PE_UNSUPPORTED_SCHEME)
+ {
+ char *error, *p;
+ char *scheme = xstrdup (url);
+ assert (url_has_scheme (url));
+
+ if ((p = strchr (scheme, ':')))
+ *p = '\0';
+ if (!strcasecmp (scheme, "https"))
+ error = aprintf (_("HTTPS support not compiled in"));
+ else
+ error = aprintf (_(parse_errors[error_code]), quote (scheme));
+ xfree (scheme);
+
+ return error;
+ }
+ else
+ return xstrdup (_(parse_errors[error_code]));
}
/* Split PATH into DIR and FILE. PATH comes from the URL and is
filechr_control = 4 /* a control character, e.g. 0-31 */
};
-#define FILE_CHAR_TEST(c, mask) (filechr_table[(unsigned char)(c)] & (mask))
+#define FILE_CHAR_TEST(c, mask) \
+ ((opt.restrict_files_nonascii && !c_isascii ((unsigned char)(c))) || \
+ (filechr_table[(unsigned char)(c)] & (mask)))
/* Shorthands for the table: */
#define U filechr_not_unix
const char *u_file, *u_query;
char *fname, *unique;
+ char *index_filename = "index.html"; /* The default index file is index.html */
fnres.base = NULL;
fnres.size = 0;
fnres.tail = 0;
+ /* If an alternative index file was defined, change index_filename */
+ if (opt.default_page)
+ index_filename = opt.default_page;
+
+
/* Start with the directory prefix, if specified. */
if (opt.dir_prefix)
append_string (opt.dir_prefix, &fnres);
/* Add the file name. */
if (fnres.tail)
append_char ('/', &fnres);
- u_file = *u->file ? u->file : "index.html";
+ u_file = *u->file ? u->file : index_filename;
append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
/* Append "?query" to the file name. */
test case. */
static bool
-path_simplify (char *path)
+path_simplify (enum url_scheme scheme, char *path)
{
char *h = path; /* hare */
char *t = path; /* tortoise */
+ char *beg = path;
char *end = strchr (path, '\0');
while (h < end)
{
/* Handle "../" by retreating the tortoise by one path
element -- but not past beggining. */
- if (t > path)
+ if (t > beg)
{
/* Move backwards until T hits the beginning of the
previous path element or the beginning of path. */
- for (--t; t > path && t[-1] != '/'; t--)
+ for (--t; t > beg && t[-1] != '/'; t--)
;
}
+ else if (scheme == SCHEME_FTP)
+ {
+ /* If we're at the beginning, copy the "../" literally
+ and move the beginning so a later ".." doesn't remove
+ it. This violates RFC 3986; but we do it for FTP
+ anyway because there is otherwise no way to get at a
+ parent directory, when the FTP server drops us in a
+ non-root directory (which is not uncommon). */
+ beg = t + 3;
+ goto regular;
+ }
h += 3;
}
else
{
+ regular:
/* A regular path element. If H hasn't advanced past T,
simply skip to the next path element. Otherwise, copy
the path element until the next slash. */
\f
static int
getchar_from_escaped_string (const char *str, char *c)
-{
+{
const char *p = str;
assert (str && *str);
assert (c);
-
+
if (p[0] == '%')
{
if (!c_isxdigit(p[1]) || !c_isxdigit(p[2]))
p += pp;
q += qq;
}
-
+
return (*p == 0 && *q == 0 ? true : false);
}
\f
-#if 0
+#ifdef TESTING
/* Debugging and testing support for path_simplify. */
+#if 0
/* Debug: run path_simplify on PATH and return the result in a new
string. Useful for calling from the debugger. */
static char *
path_simplify (copy);
return copy;
}
+#endif
-static void
-run_test (char *test, char *expected_result, bool expected_change)
+static const char *
+run_test (char *test, char *expected_result, enum url_scheme scheme,
+ bool expected_change)
{
char *test_copy = xstrdup (test);
- bool modified = path_simplify (test_copy);
+ bool modified = path_simplify (scheme, test_copy);
if (0 != strcmp (test_copy, expected_result))
{
printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n",
test, expected_result, test_copy);
+ mu_assert ("", 0);
}
if (modified != expected_change)
{
test);
}
xfree (test_copy);
+ mu_assert ("", modified == expected_change);
+ return NULL;
}
-static void
+const char *
test_path_simplify (void)
{
static struct {
char *test, *result;
+ enum url_scheme scheme;
bool should_modify;
} tests[] = {
- { "", "", false },
- { ".", "", true },
- { "./", "", true },
- { "..", "", true },
- { "../", "", true },
- { "foo", "foo", false },
- { "foo/bar", "foo/bar", false },
- { "foo///bar", "foo///bar", false },
- { "foo/.", "foo/", true },
- { "foo/./", "foo/", true },
- { "foo./", "foo./", false },
- { "foo/../bar", "bar", true },
- { "foo/../bar/", "bar/", true },
- { "foo/bar/..", "foo/", true },
- { "foo/bar/../x", "foo/x", true },
- { "foo/bar/../x/", "foo/x/", true },
- { "foo/..", "", true },
- { "foo/../..", "", true },
- { "foo/../../..", "", true },
- { "foo/../../bar/../../baz", "baz", true },
- { "a/b/../../c", "c", true },
- { "./a/../b", "b", true }
+ { "", "", SCHEME_HTTP, false },
+ { ".", "", SCHEME_HTTP, true },
+ { "./", "", SCHEME_HTTP, true },
+ { "..", "", SCHEME_HTTP, true },
+ { "../", "", SCHEME_HTTP, true },
+ { "..", "..", SCHEME_FTP, false },
+ { "../", "../", SCHEME_FTP, false },
+ { "foo", "foo", SCHEME_HTTP, false },
+ { "foo/bar", "foo/bar", SCHEME_HTTP, false },
+ { "foo///bar", "foo///bar", SCHEME_HTTP, false },
+ { "foo/.", "foo/", SCHEME_HTTP, true },
+ { "foo/./", "foo/", SCHEME_HTTP, true },
+ { "foo./", "foo./", SCHEME_HTTP, false },
+ { "foo/../bar", "bar", SCHEME_HTTP, true },
+ { "foo/../bar/", "bar/", SCHEME_HTTP, true },
+ { "foo/bar/..", "foo/", SCHEME_HTTP, true },
+ { "foo/bar/../x", "foo/x", SCHEME_HTTP, true },
+ { "foo/bar/../x/", "foo/x/", SCHEME_HTTP, true },
+ { "foo/..", "", SCHEME_HTTP, true },
+ { "foo/../..", "", SCHEME_HTTP, true },
+ { "foo/../../..", "", SCHEME_HTTP, true },
+ { "foo/../../bar/../../baz", "baz", SCHEME_HTTP, true },
+ { "foo/../..", "..", SCHEME_FTP, true },
+ { "foo/../../..", "../..", SCHEME_FTP, true },
+ { "foo/../../bar/../../baz", "../../baz", SCHEME_FTP, true },
+ { "a/b/../../c", "c", SCHEME_HTTP, true },
+ { "./a/../b", "b", SCHEME_HTTP, true }
};
int i;
for (i = 0; i < countof (tests); i++)
{
+ const char *message;
char *test = tests[i].test;
char *expected_result = tests[i].result;
+ enum url_scheme scheme = tests[i].scheme;
bool expected_change = tests[i].should_modify;
- run_test (test, expected_result, expected_change);
+ message = run_test (test, expected_result, scheme, expected_change);
+ if (message) return message;
}
+ return NULL;
}
-#endif
-\f
-#ifdef TESTING
const char *
test_append_uri_pathel()
} test_array[] = {
{ "http://www.yoyodyne.com/path/", "somepage.html", false, "http://www.yoyodyne.com/path/somepage.html" },
};
-
+
for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
{
struct growable dest;