/* URL handling.
- Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU Wget.
#include "url.h"
#include "host.h" /* for is_valid_ipv6_address */
+#ifdef __VMS
+#include "vms.h"
+#endif /* def __VMS */
+
#ifdef TESTING
#include "test.h"
#endif
return url_escape_1 (s, urlchr_unsafe, false);
}
+/* URL-escape the unsafe and reserved characters (see urlchr_table) in
+ a given string, returning a freshly allocated string. */
+
+char *
+url_escape_unsafe_and_reserved (const char *s)
+{
+ return url_escape_1 (s, urlchr_unsafe|urlchr_reserved, false);
+}
+
/* URL-escape the unsafe characters (see urlchr_table) in a given
string. If no characters are unsafe, S is returned. */
#define PE_NO_ERROR 0
N_("No error"),
#define PE_UNSUPPORTED_SCHEME 1
- N_("Unsupported scheme %s"),
-#define PE_INVALID_HOST_NAME 2
+ N_("Unsupported scheme %s"), /* support for format token only here */
+#define PE_MISSING_SCHEME 2
+ N_("Scheme missing"),
+#define PE_INVALID_HOST_NAME 3
N_("Invalid host name"),
-#define PE_BAD_PORT_NUMBER 3
+#define PE_BAD_PORT_NUMBER 4
N_("Bad port number"),
-#define PE_INVALID_USER_NAME 4
+#define PE_INVALID_USER_NAME 5
N_("Invalid user name"),
-#define PE_UNTERMINATED_IPV6_ADDRESS 5
+#define PE_UNTERMINATED_IPV6_ADDRESS 6
N_("Unterminated IPv6 numeric address"),
-#define PE_IPV6_NOT_SUPPORTED 6
+#define PE_IPV6_NOT_SUPPORTED 7
N_("IPv6 addresses not supported"),
-#define PE_INVALID_IPV6_ADDRESS 7
+#define PE_INVALID_IPV6_ADDRESS 8
N_("Invalid IPv6 numeric address")
};
error, and if ERROR is not NULL, also set *ERROR to the appropriate
error code. */
struct url *
-url_parse (const char *url, int *error, struct iri *iri)
+url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
{
struct url *u;
const char *p;
int port;
char *user = NULL, *passwd = NULL;
- char *url_encoded = NULL, *new_url = NULL;
+ const char *url_encoded = NULL;
+ char *new_url = NULL;
int error_code;
scheme = url_scheme (url);
if (scheme == SCHEME_INVALID)
{
- error_code = PE_UNSUPPORTED_SCHEME;
+ if (url_has_scheme (url))
+ error_code = PE_UNSUPPORTED_SCHEME;
+ else
+ error_code = PE_MISSING_SCHEME;
goto error;
}
if (iri && iri->utf8_encode)
{
- url_unescape ((char *) url);
- iri->utf8_encode = remote_to_utf8 (iri, url, (const char **) &new_url);
+ iri->utf8_encode = remote_to_utf8 (iri, iri->orig_url ? iri->orig_url : url, (const char **) &new_url);
if (!iri->utf8_encode)
new_url = NULL;
+ else
+ iri->orig_url = xstrdup (url);
}
- url_encoded = reencode_escapes (new_url ? new_url : url);
+ /* XXX XXX Could that change introduce (security) bugs ??? XXX XXX*/
+ if (percent_encode)
+ url_encoded = reencode_escapes (new_url ? new_url : url);
+ else
+ url_encoded = new_url ? new_url : url;
+
p = url_encoded;
if (new_url && url_encoded != new_url)
if (url_encoded == url)
u->url = xstrdup (url);
else
- u->url = url_encoded;
+ u->url = (char *) url_encoded;
}
return u;
error:
/* Cleanup in case of error: */
if (url_encoded && url_encoded != url)
- xfree (url_encoded);
+ xfree ((char *) url_encoded);
/* Transmit the error code to the caller, if the caller wants to
know. */
if ((p = strchr (scheme, ':')))
*p = '\0';
if (!strcasecmp (scheme, "https"))
- asprintf (&error, _("HTTPS support not compiled in"));
+ error = aprintf (_("HTTPS support not compiled in"));
else
- asprintf (&error, _(parse_errors[error_code]), quote (scheme));
+ error = aprintf (_(parse_errors[error_code]), quote (scheme));
xfree (scheme);
return error;
filechr_control = 4 /* a control character, e.g. 0-31 */
};
-#define FILE_CHAR_TEST(c, mask) (filechr_table[(unsigned char)(c)] & (mask))
+#define FILE_CHAR_TEST(c, mask) \
+ ((opt.restrict_files_nonascii && !c_isascii ((unsigned char)(c))) || \
+ (filechr_table[(unsigned char)(c)] & (mask)))
/* Shorthands for the table: */
#define U filechr_not_unix
if ((opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct)
&& !(file_exists_p (fname) && !file_non_directory_p (fname)))
- return fname;
+ {
+ unique = fname;
+ }
+ else
+ {
+ unique = unique_name (fname, true);
+ if (unique != fname)
+ xfree (fname);
+ }
+
+/* On VMS, alter the name as required. */
+#ifdef __VMS
+ {
+ char *unique2;
+
+ unique2 = ods_conform( unique);
+ if (unique2 != unique)
+ {
+ xfree (unique);
+ unique = unique2;
+ }
+ }
+#endif /* def __VMS */
- unique = unique_name (fname, true);
- if (unique != fname)
- xfree (fname);
return unique;
}
\f
\f
static int
getchar_from_escaped_string (const char *str, char *c)
-{
+{
const char *p = str;
assert (str && *str);
assert (c);
-
+
if (p[0] == '%')
{
if (!c_isxdigit(p[1]) || !c_isxdigit(p[2]))
p += pp;
q += qq;
}
-
+
return (*p == 0 && *q == 0 ? true : false);
}
\f
} test_array[] = {
{ "http://www.yoyodyne.com/path/", "somepage.html", false, "http://www.yoyodyne.com/path/somepage.html" },
};
-
+
for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
{
struct growable dest;