#include "wget.h"
#include "utils.h"
#include "url.h"
+#include "host.h" /* for is_valid_ipv6_address */
#ifndef errno
extern int errno;
supported_schemes[scheme].enabled = 0;
}
-/* Skip the username and password, if present here. The function
- should *not* be called with the complete URL, but with the part
- right after the scheme.
+/* Skip the username and password, if present in the URL. The
+ function should *not* be called with the complete URL, but with the
+ portion after the scheme.
- If no username and password are found, return 0. */
+ If no username and password are found, return URL. */
-static int
+static const char *
url_skip_credentials (const char *url)
{
/* Look for '@' that comes before terminators, such as '/', '?',
'#', or ';'. */
const char *p = (const char *)strpbrk (url, "@/?#;");
if (!p || *p != '@')
- return 0;
- return p + 1 - url;
+ return url;
+ return p + 1;
}
/* Parse credentials contained in [BEG, END). The region is expected
N_("Invalid IPv6 numeric address")
};
-#ifdef ENABLE_IPV6
-/* The following two functions were adapted from glibc. */
-
-static int
-is_valid_ipv4_address (const char *str, const char *end)
-{
- int saw_digit = 0;
- int octets = 0;
- int val = 0;
-
- while (str < end)
- {
- int ch = *str++;
-
- if (ch >= '0' && ch <= '9')
- {
- val = val * 10 + (ch - '0');
-
- if (val > 255)
- return 0;
- if (saw_digit == 0)
- {
- if (++octets > 4)
- return 0;
- saw_digit = 1;
- }
- }
- else if (ch == '.' && saw_digit == 1)
- {
- if (octets == 4)
- return 0;
- val = 0;
- saw_digit = 0;
- }
- else
- return 0;
- }
- if (octets < 4)
- return 0;
-
- return 1;
-}
-
-static int
-is_valid_ipv6_address (const char *str, const char *end)
-{
- enum {
- NS_INADDRSZ = 4,
- NS_IN6ADDRSZ = 16,
- NS_INT16SZ = 2
- };
-
- const char *curtok;
- int tp;
- const char *colonp;
- int saw_xdigit;
- unsigned int val;
-
- tp = 0;
- colonp = NULL;
-
- if (str == end)
- return 0;
-
- /* Leading :: requires some special handling. */
- if (*str == ':')
- {
- ++str;
- if (str == end || *str != ':')
- return 0;
- }
-
- curtok = str;
- saw_xdigit = 0;
- val = 0;
-
- while (str < end)
- {
- int ch = *str++;
-
- /* if ch is a number, add it to val. */
- if (ISXDIGIT (ch))
- {
- val <<= 4;
- val |= XDIGIT_TO_NUM (ch);
- if (val > 0xffff)
- return 0;
- saw_xdigit = 1;
- continue;
- }
-
- /* if ch is a colon ... */
- if (ch == ':')
- {
- curtok = str;
- if (saw_xdigit == 0)
- {
- if (colonp != NULL)
- return 0;
- colonp = str + tp;
- continue;
- }
- else if (str == end)
- return 0;
- if (tp > NS_IN6ADDRSZ - NS_INT16SZ)
- return 0;
- tp += NS_INT16SZ;
- saw_xdigit = 0;
- val = 0;
- continue;
- }
-
- /* if ch is a dot ... */
- if (ch == '.' && (tp <= NS_IN6ADDRSZ - NS_INADDRSZ)
- && is_valid_ipv4_address (curtok, end) == 1)
- {
- tp += NS_INADDRSZ;
- saw_xdigit = 0;
- break;
- }
-
- return 0;
- }
-
- if (saw_xdigit == 1)
- {
- if (tp > NS_IN6ADDRSZ - NS_INT16SZ)
- return 0;
- tp += NS_INT16SZ;
- }
-
- if (colonp != NULL)
- {
- if (tp == NS_IN6ADDRSZ)
- return 0;
- tp = NS_IN6ADDRSZ;
- }
-
- if (tp != NS_IN6ADDRSZ)
- return 0;
-
- return 1;
-}
-#endif
-
/* Parse a URL.
Return a new struct url if successful, NULL on error. In case of
p += strlen (supported_schemes[scheme].leading_string);
uname_b = p;
- p += url_skip_credentials (p);
+ p = url_skip_credentials (p);
uname_e = p;
/* scheme://user:pass@host[:port]... */
goto error;
}
port = 10 * port + (*pp - '0');
+ /* Check for too large port numbers here, before we have
+ a chance to overflow on bogus port values. */
+ if (port > 65535)
+ {
+ error_code = PE_BAD_PORT_NUMBER;
+ goto error;
+ }
}
}
}
host_modified = lowercase_str (u->host);
+ /* Decode %HH sequences in host name. This is important not so much
+ to support %HH sequences, but to support binary characters (which
+ will have been converted to %HH by reencode_escapes). */
+ if (strchr (u->host, '%'))
+ {
+ url_unescape (u->host);
+ host_modified = 1;
+ }
+
if (params_b)
u->params = strdupdelim (params_b, params_e);
if (query_b)
url_full_path (const struct url *url)
{
int length = full_path_length (url);
- char *full_path = (char *)xmalloc(length + 1);
+ char *full_path = (char *) xmalloc (length + 1);
full_path_write (url, full_path);
full_path[length] = '\0';
{
const char *p;
char *t;
- struct stat st;
+ struct_stat st;
int res;
p = path + strlen (path);
char *
url_file_name (const struct url *u)
{
- struct growable fnres;
+ struct growable fnres; /* stands for "file name result" */
const char *u_file, *u_query;
char *fname, *unique;
{
if (fnres.tail)
append_char ('/', &fnres);
- append_string (u->host, &fnres);
+ if (0 != strcmp (u->host, ".."))
+ append_string (u->host, &fnres);
+ else
+ /* Host name can come from the network; malicious DNS may
+ allow ".." to be resolved, causing us to write to
+ "../<file>". Defang such host names. */
+ append_string ("%2E%2E", &fnres);
if (u->port != scheme_default_port (u->scheme))
{
char portstr[24];