/* URL handling.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
This file is part of GNU Wget.
shall include the source code for the parts of OpenSSL used as well
as that of the covered work. */
-#include <config.h>
+#include "wget.h"
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <assert.h>
-#include "wget.h"
#include "utils.h"
#include "url.h"
#include "host.h" /* for is_valid_ipv6_address */
+#include "iri.h"
#ifdef TESTING
#include "test.h"
/* Forward declarations: */
-static bool path_simplify (char *);
+static bool path_simplify (enum url_scheme, char *);
\f
/* Support for escaping and unescaping of URL strings. */
{
char c;
/* Do nothing if '%' is not followed by two hex digits. */
- if (!h[1] || !h[2] || !(ISXDIGIT (h[1]) && ISXDIGIT (h[2])))
+ if (!h[1] || !h[2] || !(c_isxdigit (h[1]) && c_isxdigit (h[2])))
goto copychar;
c = X2DIGITS_TO_NUM (h[1], h[2]);
/* Don't unescape %00 because there is no way to insert it
{
if (*p == '%')
{
- if (ISXDIGIT (*(p + 1)) && ISXDIGIT (*(p + 2)))
+ if (c_isxdigit (*(p + 1)) && c_isxdigit (*(p + 2)))
return false;
else
/* Garbled %.. sequence: encode `%'. */
return SCHEME_INVALID;
}
-#define SCHEME_CHAR(ch) (ISALNUM (ch) || (ch) == '-' || (ch) == '+')
+#define SCHEME_CHAR(ch) (c_isalnum (ch) || (ch) == '-' || (ch) == '+')
/* Return 1 if the URL begins with any "scheme", 0 otherwise. As
currently implemented, it returns true if URL begins with
{
bool changed = false;
for (; *str; str++)
- if (ISUPPER (*str))
+ if (c_isupper (*str))
{
changed = true;
- *str = TOLOWER (*str);
+ *str = c_tolower (*str);
}
return changed;
}
goto error;
}
+ if (opt.enable_iri)
+ {
+ url_unescape ((char *) url);
+ url = locale_to_utf8(url);
+ }
+
url_encoded = reencode_escapes (url);
p = url_encoded;
if (port_b != port_e)
for (port = 0, pp = port_b; pp < port_e; pp++)
{
- if (!ISDIGIT (*pp))
+ if (!c_isdigit (*pp))
{
/* http://host:12randomgarbage/blah */
/* ^ */
u->passwd = passwd;
u->path = strdupdelim (path_b, path_e);
- path_modified = path_simplify (u->path);
+ path_modified = path_simplify (scheme, u->path);
split_path (u->path, &u->dir, &u->file);
host_modified = lowercase_str (u->host);
host_modified = true;
}
+ if (opt.enable_iri)
+ {
+ char *new = idn_encode (u->host);
+ if (new)
+ {
+ xfree (u->host);
+ u->host = new;
+ host_modified = true;
+ }
+ }
+
if (params_b)
u->params = strdupdelim (params_b, params_e);
if (query_b)
if (fragment_b)
u->fragment = strdupdelim (fragment_b, fragment_e);
- if (path_modified || u->fragment || host_modified || path_b == path_e)
+ if (opt.enable_iri || path_modified || u->fragment || host_modified || path_b == path_e)
{
/* If we suspect that a transformation has rendered what
url_string might return different from URL_ENCODED, rebuild
const char *
url_error (int error_code)
{
- assert (error_code >= 0 && error_code < countof (parse_errors));
+ assert (error_code >= 0 && ((size_t) error_code) < countof (parse_errors));
return _(parse_errors[error_code]);
}
enum {
filechr_not_unix = 1, /* unusable on Unix, / and \0 */
- filechr_not_windows = 2, /* unusable on MSDOS/Windows, one of \|/<>?:*" */
+ filechr_not_windows = 2, /* unusable on Windows, one of \|/<>?:*" */
filechr_control = 4 /* a control character, e.g. 0-31 */
};
for (q = TAIL (dest); q < TAIL (dest) + outlen; ++q)
{
if (opt.restrict_files_case == restrict_lowercase)
- *q = TOLOWER (*q);
+ *q = c_tolower (*q);
else
- *q = TOUPPER (*q);
+ *q = c_toupper (*q);
}
}
test case. */
static bool
-path_simplify (char *path)
+path_simplify (enum url_scheme scheme, char *path)
{
char *h = path; /* hare */
char *t = path; /* tortoise */
+ char *beg = path;
char *end = strchr (path, '\0');
while (h < end)
{
/* Handle "../" by retreating the tortoise by one path
element -- but not past beggining. */
- if (t > path)
+ if (t > beg)
{
/* Move backwards until T hits the beginning of the
previous path element or the beginning of path. */
- for (--t; t > path && t[-1] != '/'; t--)
+ for (--t; t > beg && t[-1] != '/'; t--)
;
}
+ else if (scheme == SCHEME_FTP)
+ {
+ /* If we're at the beginning, copy the "../" literally
+ and move the beginning so a later ".." doesn't remove
+ it. This violates RFC 3986; but we do it for FTP
+ anyway because there is otherwise no way to get at a
+ parent directory, when the FTP server drops us in a
+ non-root directory (which is not uncommon). */
+ beg = t + 3;
+ goto regular;
+ }
h += 3;
}
else
{
+ regular:
/* A regular path element. If H hasn't advanced past T,
simply skip to the next path element. Otherwise, copy
the path element until the next slash. */
if (p[0] == '%')
{
- if (!ISXDIGIT(p[1]) || !ISXDIGIT(p[2]))
+ if (!c_isxdigit(p[1]) || !c_isxdigit(p[2]))
{
*c = '%';
return 1;
while (*p && *q
&& (pp = getchar_from_escaped_string (p, &ch1))
&& (qq = getchar_from_escaped_string (q, &ch2))
- && (TOLOWER(ch1) == TOLOWER(ch2)))
+ && (c_tolower(ch1) == c_tolower(ch2)))
{
p += pp;
q += qq;
return (*p == 0 && *q == 0 ? true : false);
}
\f
-#if 0
+#ifdef TESTING
/* Debugging and testing support for path_simplify. */
+#if 0
/* Debug: run path_simplify on PATH and return the result in a new
string. Useful for calling from the debugger. */
static char *
path_simplify (copy);
return copy;
}
+#endif
-static void
-run_test (char *test, char *expected_result, bool expected_change)
+static const char *
+run_test (char *test, char *expected_result, enum url_scheme scheme,
+ bool expected_change)
{
char *test_copy = xstrdup (test);
- bool modified = path_simplify (test_copy);
+ bool modified = path_simplify (scheme, test_copy);
if (0 != strcmp (test_copy, expected_result))
{
printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n",
test, expected_result, test_copy);
+ mu_assert ("", 0);
}
if (modified != expected_change)
{
test);
}
xfree (test_copy);
+ mu_assert ("", modified == expected_change);
+ return NULL;
}
-static void
+const char *
test_path_simplify (void)
{
static struct {
char *test, *result;
+ enum url_scheme scheme;
bool should_modify;
} tests[] = {
- { "", "", false },
- { ".", "", true },
- { "./", "", true },
- { "..", "", true },
- { "../", "", true },
- { "foo", "foo", false },
- { "foo/bar", "foo/bar", false },
- { "foo///bar", "foo///bar", false },
- { "foo/.", "foo/", true },
- { "foo/./", "foo/", true },
- { "foo./", "foo./", false },
- { "foo/../bar", "bar", true },
- { "foo/../bar/", "bar/", true },
- { "foo/bar/..", "foo/", true },
- { "foo/bar/../x", "foo/x", true },
- { "foo/bar/../x/", "foo/x/", true },
- { "foo/..", "", true },
- { "foo/../..", "", true },
- { "foo/../../..", "", true },
- { "foo/../../bar/../../baz", "baz", true },
- { "a/b/../../c", "c", true },
- { "./a/../b", "b", true }
+ { "", "", SCHEME_HTTP, false },
+ { ".", "", SCHEME_HTTP, true },
+ { "./", "", SCHEME_HTTP, true },
+ { "..", "", SCHEME_HTTP, true },
+ { "../", "", SCHEME_HTTP, true },
+ { "..", "..", SCHEME_FTP, false },
+ { "../", "../", SCHEME_FTP, false },
+ { "foo", "foo", SCHEME_HTTP, false },
+ { "foo/bar", "foo/bar", SCHEME_HTTP, false },
+ { "foo///bar", "foo///bar", SCHEME_HTTP, false },
+ { "foo/.", "foo/", SCHEME_HTTP, true },
+ { "foo/./", "foo/", SCHEME_HTTP, true },
+ { "foo./", "foo./", SCHEME_HTTP, false },
+ { "foo/../bar", "bar", SCHEME_HTTP, true },
+ { "foo/../bar/", "bar/", SCHEME_HTTP, true },
+ { "foo/bar/..", "foo/", SCHEME_HTTP, true },
+ { "foo/bar/../x", "foo/x", SCHEME_HTTP, true },
+ { "foo/bar/../x/", "foo/x/", SCHEME_HTTP, true },
+ { "foo/..", "", SCHEME_HTTP, true },
+ { "foo/../..", "", SCHEME_HTTP, true },
+ { "foo/../../..", "", SCHEME_HTTP, true },
+ { "foo/../../bar/../../baz", "baz", SCHEME_HTTP, true },
+ { "foo/../..", "..", SCHEME_FTP, true },
+ { "foo/../../..", "../..", SCHEME_FTP, true },
+ { "foo/../../bar/../../baz", "../../baz", SCHEME_FTP, true },
+ { "a/b/../../c", "c", SCHEME_HTTP, true },
+ { "./a/../b", "b", SCHEME_HTTP, true }
};
int i;
for (i = 0; i < countof (tests); i++)
{
+ const char *message;
char *test = tests[i].test;
char *expected_result = tests[i].result;
+ enum url_scheme scheme = tests[i].scheme;
bool expected_change = tests[i].should_modify;
- run_test (test, expected_result, expected_change);
+ message = run_test (test, expected_result, scheme, expected_change);
+ if (message) return message;
}
+ return NULL;
}
-#endif
-\f
-#ifdef TESTING
const char *
test_append_uri_pathel()