/* URL handling.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
This file is part of GNU Wget.
shall include the source code for the parts of OpenSSL used as well
as that of the covered work. */
-#include <config.h>
+#include "wget.h"
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <assert.h>
-#include "wget.h"
#include "utils.h"
#include "url.h"
#include "host.h" /* for is_valid_ipv6_address */
/* Forward declarations: */
-static bool path_simplify (char *);
+static bool path_simplify (enum url_scheme, char *);
\f
/* Support for escaping and unescaping of URL strings. */
{
char c;
/* Do nothing if '%' is not followed by two hex digits. */
- if (!h[1] || !h[2] || !(ISXDIGIT (h[1]) && ISXDIGIT (h[2])))
+ if (!h[1] || !h[2] || !(c_isxdigit (h[1]) && c_isxdigit (h[2])))
goto copychar;
c = X2DIGITS_TO_NUM (h[1], h[2]);
/* Don't unescape %00 because there is no way to insert it
{
if (*p == '%')
{
- if (ISXDIGIT (*(p + 1)) && ISXDIGIT (*(p + 2)))
+ if (c_isxdigit (*(p + 1)) && c_isxdigit (*(p + 2)))
return false;
else
/* Garbled %.. sequence: encode `%'. */
return SCHEME_INVALID;
}
-#define SCHEME_CHAR(ch) (ISALNUM (ch) || (ch) == '-' || (ch) == '+')
+#define SCHEME_CHAR(ch) (c_isalnum (ch) || (ch) == '-' || (ch) == '+')
/* Return 1 if the URL begins with any "scheme", 0 otherwise. As
currently implemented, it returns true if URL begins with
{
bool changed = false;
for (; *str; str++)
- if (ISUPPER (*str))
+ if (c_isupper (*str))
{
changed = true;
- *str = TOLOWER (*str);
+ *str = c_tolower (*str);
}
return changed;
}
#define PE_NO_ERROR 0
N_("No error"),
#define PE_UNSUPPORTED_SCHEME 1
- N_("Unsupported scheme"),
+ N_("Unsupported scheme %s"),
#define PE_INVALID_HOST_NAME 2
N_("Invalid host name"),
#define PE_BAD_PORT_NUMBER 3
if (port_b != port_e)
for (port = 0, pp = port_b; pp < port_e; pp++)
{
- if (!ISDIGIT (*pp))
+ if (!c_isdigit (*pp))
{
/* http://host:12randomgarbage/blah */
/* ^ */
u->passwd = passwd;
u->path = strdupdelim (path_b, path_e);
- path_modified = path_simplify (u->path);
+ path_modified = path_simplify (scheme, u->path);
split_path (u->path, &u->dir, &u->file);
host_modified = lowercase_str (u->host);
/* Return the error message string from ERROR_CODE, which should have
been retrieved from url_parse. The error message is translated. */
-const char *
-url_error (int error_code)
+char *
+url_error (const char *url, int error_code)
{
- assert (error_code >= 0 && error_code < countof (parse_errors));
- return _(parse_errors[error_code]);
+ assert (error_code >= 0 && ((size_t) error_code) < countof (parse_errors));
+
+ if (error_code == PE_UNSUPPORTED_SCHEME)
+ {
+ char *error, *p;
+ char *scheme = xstrdup (url);
+ assert (url_has_scheme (url));
+
+ if ((p = strchr (scheme, ':')))
+ *p = '\0';
+ if (!strcasecmp (scheme, "https"))
+ asprintf (&error, _("HTTPS support not compiled in"));
+ else
+ asprintf (&error, _(parse_errors[error_code]), quote (scheme));
+ xfree (scheme);
+
+ return error;
+ }
+ else
+ return xstrdup (_(parse_errors[error_code]));
}
/* Split PATH into DIR and FILE. PATH comes from the URL and is
enum {
filechr_not_unix = 1, /* unusable on Unix, / and \0 */
- filechr_not_windows = 2, /* unusable on MSDOS/Windows, one of \|/<>?:*" */
+ filechr_not_windows = 2, /* unusable on Windows, one of \|/<>?:*" */
filechr_control = 4 /* a control character, e.g. 0-31 */
};
for (q = TAIL (dest); q < TAIL (dest) + outlen; ++q)
{
if (opt.restrict_files_case == restrict_lowercase)
- *q = TOLOWER (*q);
+ *q = c_tolower (*q);
else
- *q = TOUPPER (*q);
+ *q = c_toupper (*q);
}
}
const char *u_file, *u_query;
char *fname, *unique;
+ char *index_filename = "index.html"; /* The default index file is index.html */
fnres.base = NULL;
fnres.size = 0;
fnres.tail = 0;
+ /* If an alternative index file was defined, change index_filename */
+ if (opt.default_page)
+ index_filename = opt.default_page;
+
+
/* Start with the directory prefix, if specified. */
if (opt.dir_prefix)
append_string (opt.dir_prefix, &fnres);
/* Add the file name. */
if (fnres.tail)
append_char ('/', &fnres);
- u_file = *u->file ? u->file : "index.html";
+ u_file = *u->file ? u->file : index_filename;
append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
/* Append "?query" to the file name. */
test case. */
static bool
-path_simplify (char *path)
+path_simplify (enum url_scheme scheme, char *path)
{
char *h = path; /* hare */
char *t = path; /* tortoise */
+ char *beg = path;
char *end = strchr (path, '\0');
while (h < end)
{
/* Handle "../" by retreating the tortoise by one path
element -- but not past beggining. */
- if (t > path)
+ if (t > beg)
{
/* Move backwards until T hits the beginning of the
previous path element or the beginning of path. */
- for (--t; t > path && t[-1] != '/'; t--)
+ for (--t; t > beg && t[-1] != '/'; t--)
;
}
+ else if (scheme == SCHEME_FTP)
+ {
+ /* If we're at the beginning, copy the "../" literally
+ and move the beginning so a later ".." doesn't remove
+ it. This violates RFC 3986; but we do it for FTP
+ anyway because there is otherwise no way to get at a
+ parent directory, when the FTP server drops us in a
+ non-root directory (which is not uncommon). */
+ beg = t + 3;
+ goto regular;
+ }
h += 3;
}
else
{
+ regular:
/* A regular path element. If H hasn't advanced past T,
simply skip to the next path element. Otherwise, copy
the path element until the next slash. */
if (p[0] == '%')
{
- if (!ISXDIGIT(p[1]) || !ISXDIGIT(p[2]))
+ if (!c_isxdigit(p[1]) || !c_isxdigit(p[2]))
{
*c = '%';
return 1;
while (*p && *q
&& (pp = getchar_from_escaped_string (p, &ch1))
&& (qq = getchar_from_escaped_string (q, &ch2))
- && (TOLOWER(ch1) == TOLOWER(ch2)))
+ && (c_tolower(ch1) == c_tolower(ch2)))
{
p += pp;
q += qq;
return (*p == 0 && *q == 0 ? true : false);
}
\f
-#if 0
+#ifdef TESTING
/* Debugging and testing support for path_simplify. */
+#if 0
/* Debug: run path_simplify on PATH and return the result in a new
string. Useful for calling from the debugger. */
static char *
path_simplify (copy);
return copy;
}
+#endif
-static void
-run_test (char *test, char *expected_result, bool expected_change)
+static const char *
+run_test (char *test, char *expected_result, enum url_scheme scheme,
+ bool expected_change)
{
char *test_copy = xstrdup (test);
- bool modified = path_simplify (test_copy);
+ bool modified = path_simplify (scheme, test_copy);
if (0 != strcmp (test_copy, expected_result))
{
printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n",
test, expected_result, test_copy);
+ mu_assert ("", 0);
}
if (modified != expected_change)
{
test);
}
xfree (test_copy);
+ mu_assert ("", modified == expected_change);
+ return NULL;
}
-static void
+const char *
test_path_simplify (void)
{
static struct {
char *test, *result;
+ enum url_scheme scheme;
bool should_modify;
} tests[] = {
- { "", "", false },
- { ".", "", true },
- { "./", "", true },
- { "..", "", true },
- { "../", "", true },
- { "foo", "foo", false },
- { "foo/bar", "foo/bar", false },
- { "foo///bar", "foo///bar", false },
- { "foo/.", "foo/", true },
- { "foo/./", "foo/", true },
- { "foo./", "foo./", false },
- { "foo/../bar", "bar", true },
- { "foo/../bar/", "bar/", true },
- { "foo/bar/..", "foo/", true },
- { "foo/bar/../x", "foo/x", true },
- { "foo/bar/../x/", "foo/x/", true },
- { "foo/..", "", true },
- { "foo/../..", "", true },
- { "foo/../../..", "", true },
- { "foo/../../bar/../../baz", "baz", true },
- { "a/b/../../c", "c", true },
- { "./a/../b", "b", true }
+ { "", "", SCHEME_HTTP, false },
+ { ".", "", SCHEME_HTTP, true },
+ { "./", "", SCHEME_HTTP, true },
+ { "..", "", SCHEME_HTTP, true },
+ { "../", "", SCHEME_HTTP, true },
+ { "..", "..", SCHEME_FTP, false },
+ { "../", "../", SCHEME_FTP, false },
+ { "foo", "foo", SCHEME_HTTP, false },
+ { "foo/bar", "foo/bar", SCHEME_HTTP, false },
+ { "foo///bar", "foo///bar", SCHEME_HTTP, false },
+ { "foo/.", "foo/", SCHEME_HTTP, true },
+ { "foo/./", "foo/", SCHEME_HTTP, true },
+ { "foo./", "foo./", SCHEME_HTTP, false },
+ { "foo/../bar", "bar", SCHEME_HTTP, true },
+ { "foo/../bar/", "bar/", SCHEME_HTTP, true },
+ { "foo/bar/..", "foo/", SCHEME_HTTP, true },
+ { "foo/bar/../x", "foo/x", SCHEME_HTTP, true },
+ { "foo/bar/../x/", "foo/x/", SCHEME_HTTP, true },
+ { "foo/..", "", SCHEME_HTTP, true },
+ { "foo/../..", "", SCHEME_HTTP, true },
+ { "foo/../../..", "", SCHEME_HTTP, true },
+ { "foo/../../bar/../../baz", "baz", SCHEME_HTTP, true },
+ { "foo/../..", "..", SCHEME_FTP, true },
+ { "foo/../../..", "../..", SCHEME_FTP, true },
+ { "foo/../../bar/../../baz", "../../baz", SCHEME_FTP, true },
+ { "a/b/../../c", "c", SCHEME_HTTP, true },
+ { "./a/../b", "b", SCHEME_HTTP, true }
};
int i;
for (i = 0; i < countof (tests); i++)
{
+ const char *message;
char *test = tests[i].test;
char *expected_result = tests[i].result;
+ enum url_scheme scheme = tests[i].scheme;
bool expected_change = tests[i].should_modify;
- run_test (test, expected_result, expected_change);
+ message = run_test (test, expected_result, scheme, expected_change);
+ if (message) return message;
}
+ return NULL;
}
-#endif
-\f
-#ifdef TESTING
const char *
test_append_uri_pathel()