You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+In addition, as a special exception, the Free Software Foundation
+gives permission to link the code of its release of Wget with the
+OpenSSL project's "OpenSSL" library (or with modified versions of it
+that use the same license as the "OpenSSL" library), and distribute
+the linked executables. You must obey the GNU General Public License
+in all respects for all of the code used other than "OpenSSL". If you
+modify this file, you may extend this exception to your version of the
+file, but you are not obligated to do so. If you do not wish to do
+so, delete this exception statement from your version. */
#include <config.h>
/* Is X ".."? */
#define DDOTP(x) ((*(x) == '.') && (*(x + 1) == '.') && (!*(x + 2)))
+static const int NS_INADDRSZ = 4;
+static const int NS_IN6ADDRSZ = 16;
+static const int NS_INT16SZ = 2;
+
+
struct scheme_data
{
char *leading_string;
RU, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */
0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */
0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */
- 0, 0, 0, U, U, U, U, 0, /* X Y Z [ \ ] ^ _ */
+ 0, 0, 0, RU, U, RU, U, 0, /* X Y Z [ \ ] ^ _ */
U, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */
0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */
0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */
memcpy (*user, str, len);
(*user)[len] = '\0';
+ if (*user)
+ decode_string (*user);
+ if (*passwd)
+ decode_string (*passwd);
+
return 1;
}
}
static char *parse_errors[] = {
-#define PE_NO_ERROR 0
+#define PE_NO_ERROR 0
"No error",
-#define PE_UNSUPPORTED_SCHEME 1
+#define PE_UNSUPPORTED_SCHEME 1
"Unsupported scheme",
-#define PE_EMPTY_HOST 2
+#define PE_EMPTY_HOST 2
"Empty host",
-#define PE_BAD_PORT_NUMBER 3
+#define PE_BAD_PORT_NUMBER 3
"Bad port number",
-#define PE_INVALID_USER_NAME 4
- "Invalid user name"
+#define PE_INVALID_USER_NAME 4
+ "Invalid user name",
+#define PE_UNTERMINATED_IPV6_ADDRESS 5
+ "Unterminated IPv6 numeric address",
+#define PE_IPV6_NOT_SUPPORTED 6
+ "IPv6 addresses not supported",
+#define PE_INVALID_IPV6_ADDRESS 7
+ "Invalid IPv6 numeric address"
};
#define SETERR(p, v) do { \
*(p) = (v); \
} while (0)
+#ifdef ENABLE_IPV6
+/* The following two functions were adapted from glibc. */
+
+static int
+is_valid_ipv4_address (const char *str, const char *end)
+{
+ int saw_digit, octets;
+ int val;
+
+ saw_digit = 0;
+ octets = 0;
+ val = 0;
+
+ while (str < end) {
+ int ch = *str++;
+
+ if (ch >= '0' && ch <= '9') {
+ val = val * 10 + (ch - '0');
+
+ if (val > 255)
+ return 0;
+ if (saw_digit == 0) {
+ if (++octets > 4)
+ return 0;
+ saw_digit = 1;
+ }
+ } else if (ch == '.' && saw_digit == 1) {
+ if (octets == 4)
+ return 0;
+ val = 0;
+ saw_digit = 0;
+ } else
+ return 0;
+ }
+ if (octets < 4)
+ return 0;
+
+ return 1;
+}
+
+static int
+is_valid_ipv6_address (const char *str, const char *end)
+{
+ static const char xdigits[] = "0123456789abcdef";
+ const char *curtok;
+ int tp;
+ const char *colonp;
+ int saw_xdigit;
+ unsigned int val;
+
+ tp = 0;
+ colonp = NULL;
+
+ if (str == end)
+ return 0;
+
+ /* Leading :: requires some special handling. */
+ if (*str == ':')
+ {
+ ++str;
+ if (str == end || *str != ':')
+ return 0;
+ }
+
+ curtok = str;
+ saw_xdigit = 0;
+ val = 0;
+
+ while (str < end) {
+ int ch = *str++;
+ const char *pch;
+
+ /* if ch is a number, add it to val. */
+ pch = strchr(xdigits, ch);
+ if (pch != NULL) {
+ val <<= 4;
+ val |= (pch - xdigits);
+ if (val > 0xffff)
+ return 0;
+ saw_xdigit = 1;
+ continue;
+ }
+
+ /* if ch is a colon ... */
+ if (ch == ':') {
+ curtok = str;
+ if (saw_xdigit == 0) {
+ if (colonp != NULL)
+ return 0;
+ colonp = str + tp;
+ continue;
+ } else if (str == end) {
+ return 0;
+ }
+ if (tp > NS_IN6ADDRSZ - NS_INT16SZ)
+ return 0;
+ tp += NS_INT16SZ;
+ saw_xdigit = 0;
+ val = 0;
+ continue;
+ }
+
+ /* if ch is a dot ... */
+ if (ch == '.' && (tp <= NS_IN6ADDRSZ - NS_INADDRSZ) &&
+ is_valid_ipv4_address(curtok, end) == 1) {
+ tp += NS_INADDRSZ;
+ saw_xdigit = 0;
+ break;
+ }
+
+ return 0;
+ }
+
+ if (saw_xdigit == 1) {
+ if (tp > NS_IN6ADDRSZ - NS_INT16SZ)
+ return 0;
+ tp += NS_INT16SZ;
+ }
+
+ if (colonp != NULL) {
+ if (tp == NS_IN6ADDRSZ)
+ return 0;
+ tp = NS_IN6ADDRSZ;
+ }
+
+ if (tp != NS_IN6ADDRSZ)
+ return 0;
+
+ return 1;
+}
+#endif
+
/* Parse a URL.
Return a new struct url if successful, NULL on error. In case of
fragment_b = fragment_e = NULL;
host_b = p;
- p = strpbrk_or_eos (p, ":/;?#");
- host_e = p;
+
+ if (*p == '[')
+ {
+ /* Handle IPv6 address inside square brackets. Ideally we'd
+ just look for the terminating ']', but rfc2732 mandates
+ rejecting invalid IPv6 addresses. */
+
+ /* The address begins after '['. */
+ host_b = p + 1;
+ host_e = strchr (host_b, ']');
+
+ if (!host_e)
+ {
+ SETERR (error, PE_UNTERMINATED_IPV6_ADDRESS);
+ return NULL;
+ }
+
+#ifdef ENABLE_IPV6
+ /* Check if the IPv6 address is valid. */
+ if (!is_valid_ipv6_address(host_b, host_e))
+ {
+ SETERR (error, PE_INVALID_IPV6_ADDRESS);
+ return NULL;
+ }
+
+ /* Continue parsing after the closing ']'. */
+ p = host_e + 1;
+#else
+ SETERR (error, PE_IPV6_NOT_SUPPORTED);
+ return NULL;
+#endif
+ }
+ else
+ {
+ p = strpbrk_or_eos (p, ":/;?#");
+ host_e = p;
+ }
if (host_b == host_e)
{
SETERR (error, PE_BAD_PORT_NUMBER);
return NULL;
}
+
port = 10 * port + (*pp - '0');
}
}
query_b = p;
p = strpbrk_or_eos (p, "#");
query_e = p;
+
+ /* Hack that allows users to use '?' (a wildcard character) in
+ FTP URLs without it being interpreted as a query string
+ delimiter. */
+ if (scheme == SCHEME_FTP)
+ {
+ query_b = query_e = NULL;
+ path_e = p;
+ }
}
if (*p == '#')
{
static char *
mkstruct (const struct url *u)
{
- char *dir, *dir_preencoding;
- char *file, *res, *dirpref;
- char *query = u->query && *u->query ? u->query : NULL;
+ char *dir, *file;
+ char *res, *dirpref;
int l;
if (opt.cut_dirs)
dir = newdir;
}
- dir_preencoding = dir;
- dir = reencode_string (dir_preencoding);
-
l = strlen (dir);
if (l && dir[l - 1] == '/')
dir[l - 1] = '\0';
/* Finally, construct the full name. */
res = (char *)xmalloc (strlen (dir) + 1 + strlen (file)
- + (query ? (1 + strlen (query)) : 0)
+ 1);
sprintf (res, "%s%s%s", dir, *dir ? "/" : "", file);
- if (query)
- {
- strcat (res, "?");
- strcat (res, query);
- }
- if (dir != dir_preencoding)
- xfree (dir);
+
return res;
}
url_filename (const struct url *u)
{
char *file, *name;
- int have_prefix = 0; /* whether we must prepend opt.dir_prefix */
+
+ char *query = u->query && *u->query ? u->query : NULL;
if (opt.dirstruct)
{
- file = mkstruct (u);
- have_prefix = 1;
+ char *base = mkstruct (u);
+ file = compose_file_name (base, query);
+ xfree (base);
}
else
{
char *base = *u->file ? u->file : "index.html";
- char *query = u->query && *u->query ? u->query : NULL;
file = compose_file_name (base, query);
- }
- if (!have_prefix)
- {
/* Check whether the prefix directory is something other than "."
before prepending it. */
if (!DOTP (opt.dir_prefix))
{
+ /* #### should just realloc FILE and prepend dir_prefix. */
char *nfile = (char *)xmalloc (strlen (opt.dir_prefix)
+ 1 + strlen (file) + 1);
sprintf (nfile, "%s/%s", opt.dir_prefix, file);
file = nfile;
}
}
+
/* DOS-ish file systems don't like `%' signs in them; we change it
to `@'. */
#ifdef WINDOWS
memcpy (constr + baselength, link, linklength);
constr[baselength + linklength] = '\0';
}
+ else if (linklength > 1 && *link == '/' && *(link + 1) == '/')
+ {
+ /* LINK begins with "//" and so is a net path: we need to
+ replace everything after (and including) the double slash
+ with LINK. */
+
+ /* uri_merge("foo", "//new/bar") -> "//new/bar" */
+ /* uri_merge("//old/foo", "//new/bar") -> "//new/bar" */
+ /* uri_merge("http://old/foo", "//new/bar") -> "http://new/bar" */
+
+ int span;
+ const char *slash;
+ const char *start_insert;
+
+ /* Look for first slash. */
+ slash = memchr (base, '/', end - base);
+ /* If found slash and it is a double slash, then replace
+ from this point, else default to replacing from the
+ beginning. */
+ if (slash && *(slash + 1) == '/')
+ start_insert = slash;
+ else
+ start_insert = base;
+
+ span = start_insert - base;
+ constr = (char *)xmalloc (span + linklength + 1);
+ if (span)
+ memcpy (constr, base, span);
+ memcpy (constr + span, link, linklength);
+ constr[span + linklength] = '\0';
+ }
else if (*link == '/')
{
/* LINK is an absolute path: we need to replace everything
char *scheme_str = supported_schemes[url->scheme].leading_string;
int fplen = full_path_length (url);
+ int brackets_around_host = 0;
+
assert (scheme_str != NULL);
/* Make sure the user name and password are quoted. */
}
}
+ if (strchr (url->host, ':'))
+ brackets_around_host = 1;
+
size = (strlen (scheme_str)
+ strlen (url->host)
+ + (brackets_around_host ? 2 : 0)
+ fplen
+ 1);
if (url->port != scheme_port)
*p++ = '@';
}
+ if (brackets_around_host)
+ *p++ = '[';
APPEND (p, url->host);
+ if (brackets_around_host)
+ *p++ = ']';
if (url->port != scheme_port)
{
*p++ = ':';
return result;
}
\f
-/* Returns proxy host address, in accordance with SCHEME. */
+/* Return the URL of the proxy appropriate for url U. */
char *
-getproxy (enum url_scheme scheme)
+getproxy (struct url *u)
{
char *proxy = NULL;
char *rewritten_url;
static char rewritten_storage[1024];
- switch (scheme)
+ if (!opt.use_proxy)
+ return NULL;
+ if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
+ return NULL;
+
+ switch (u->scheme)
{
case SCHEME_HTTP:
proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
if (!proxy || !*proxy)
return NULL;
- /* Handle shorthands. */
+ /* Handle shorthands. `rewritten_storage' is a kludge to allow
+ getproxy() to return static storage. */
rewritten_url = rewrite_shorthand_url (proxy);
if (rewritten_url)
{
downloaded_files_hash = NULL;
}
}
+
+/* Return non-zero if scheme a is similar to scheme b.
+
+ Schemes are similar if they are equal. If SSL is supported, schemes
+ are also similar if one is http (SCHEME_HTTP) and the other is https
+ (SCHEME_HTTPS). */
+int
+schemes_are_similar_p (enum url_scheme a, enum url_scheme b)
+{
+ if (a == b)
+ return 1;
+#ifdef HAVE_SSL
+ if ((a == SCHEME_HTTP && b == SCHEME_HTTPS)
+ || (a == SCHEME_HTTPS && b == SCHEME_HTTP))
+ return 1;
+#endif
+ return 0;
+}
\f
#if 0
/* Debugging and testing support for path_simplify. */