You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+In addition, as a special exception, the Free Software Foundation
+gives permission to link the code of its release of Wget with the
+OpenSSL project's "OpenSSL" library (or with modified versions of it
+that use the same license as the "OpenSSL" library), and distribute
+the linked executables. You must obey the GNU General Public License
+in all respects for all of the code used other than "OpenSSL". If you
+modify this file, you may extend this exception to your version of the
+file, but you are not obligated to do so. If you do not wish to do
+so, delete this exception statement from your version. */
#include <config.h>
RU, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */
0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */
0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */
- 0, 0, 0, U, U, U, U, 0, /* X Y Z [ \ ] ^ _ */
+ 0, 0, 0, RU, U, RU, U, 0, /* X Y Z [ \ ] ^ _ */
U, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */
0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */
0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */
memcpy (*user, str, len);
(*user)[len] = '\0';
+ if (*user)
+ decode_string (*user);
+ if (*passwd)
+ decode_string (*passwd);
+
return 1;
}
}
static char *parse_errors[] = {
-#define PE_NO_ERROR 0
+#define PE_NO_ERROR 0
"No error",
-#define PE_UNSUPPORTED_SCHEME 1
+#define PE_UNSUPPORTED_SCHEME 1
"Unsupported scheme",
-#define PE_EMPTY_HOST 2
+#define PE_EMPTY_HOST 2
"Empty host",
-#define PE_BAD_PORT_NUMBER 3
+#define PE_BAD_PORT_NUMBER 3
"Bad port number",
-#define PE_INVALID_USER_NAME 4
- "Invalid user name"
+#define PE_INVALID_USER_NAME 4
+ "Invalid user name",
+#define PE_UNTERMINATED_IPV6_ADDRESS 5
+ "Unterminated IPv6 numeric address",
+#define PE_INVALID_IPV6_ADDRESS 6
+ "Invalid char in IPv6 numeric address"
};
#define SETERR(p, v) do { \
fragment_b = fragment_e = NULL;
host_b = p;
- p = strpbrk_or_eos (p, ":/;?#");
- host_e = p;
+
+ if (*p == '[')
+ {
+ /* Support http://[::1]/ used by IPv6. */
+ int invalid = 0;
+ ++p;
+ while (1)
+ {
+ char c = *p++;
+ switch (c)
+ {
+ case ']':
+ goto out;
+ case '\0':
+ SETERR (error, PE_UNTERMINATED_IPV6_ADDRESS);
+ return NULL;
+ case ':': case '.':
+ break;
+ default:
+ if (ISXDIGIT (c))
+ break;
+ invalid = 1;
+ }
+ }
+ out:
+ if (invalid)
+ {
+ SETERR (error, PE_INVALID_IPV6_ADDRESS);
+ return NULL;
+ }
+ /* Don't include brackets in [host_b, host_p). */
+ ++host_b;
+ host_e = p - 1;
+ }
+ else
+ {
+ p = strpbrk_or_eos (p, ":/;?#");
+ host_e = p;
+ }
if (host_b == host_e)
{
query_b = p;
p = strpbrk_or_eos (p, "#");
query_e = p;
+
+ /* Hack that allows users to use '?' (a wildcard character) in
+ FTP URLs without it being interpreted as a query string
+ delimiter. */
+ if (scheme == SCHEME_FTP)
+ {
+ query_b = query_e = NULL;
+ path_e = p;
+ }
}
if (*p == '#')
{
static char *
mkstruct (const struct url *u)
{
- char *dir, *dir_preencoding;
- char *file, *res, *dirpref;
- char *query = u->query && *u->query ? u->query : NULL;
+ char *dir, *file;
+ char *res, *dirpref;
int l;
if (opt.cut_dirs)
dir = newdir;
}
- dir_preencoding = dir;
- dir = reencode_string (dir_preencoding);
-
l = strlen (dir);
if (l && dir[l - 1] == '/')
dir[l - 1] = '\0';
/* Finally, construct the full name. */
res = (char *)xmalloc (strlen (dir) + 1 + strlen (file)
- + (query ? (1 + strlen (query)) : 0)
+ 1);
sprintf (res, "%s%s%s", dir, *dir ? "/" : "", file);
- if (query)
- {
- strcat (res, "?");
- strcat (res, query);
- }
- if (dir != dir_preencoding)
- xfree (dir);
+
return res;
}
url_filename (const struct url *u)
{
char *file, *name;
- int have_prefix = 0; /* whether we must prepend opt.dir_prefix */
+
+ char *query = u->query && *u->query ? u->query : NULL;
if (opt.dirstruct)
{
- file = mkstruct (u);
- have_prefix = 1;
+ char *base = mkstruct (u);
+ file = compose_file_name (base, query);
+ xfree (base);
}
else
{
char *base = *u->file ? u->file : "index.html";
- char *query = u->query && *u->query ? u->query : NULL;
file = compose_file_name (base, query);
- }
- if (!have_prefix)
- {
/* Check whether the prefix directory is something other than "."
before prepending it. */
if (!DOTP (opt.dir_prefix))
{
+ /* #### should just realloc FILE and prepend dir_prefix. */
char *nfile = (char *)xmalloc (strlen (opt.dir_prefix)
+ 1 + strlen (file) + 1);
sprintf (nfile, "%s/%s", opt.dir_prefix, file);
file = nfile;
}
}
+
/* DOS-ish file systems don't like `%' signs in them; we change it
to `@'. */
#ifdef WINDOWS
memcpy (constr + baselength, link, linklength);
constr[baselength + linklength] = '\0';
}
+ else if (linklength > 1 && *link == '/' && *(link + 1) == '/')
+ {
+ /* LINK begins with "//" and so is a net path: we need to
+ replace everything after (and including) the double slash
+ with LINK. */
+
+ /* uri_merge("foo", "//new/bar") -> "//new/bar" */
+ /* uri_merge("//old/foo", "//new/bar") -> "//new/bar" */
+ /* uri_merge("http://old/foo", "//new/bar") -> "http://new/bar" */
+
+ int span;
+ const char *slash;
+ const char *start_insert;
+
+ /* Look for first slash. */
+ slash = memchr (base, '/', end - base);
+ /* If found slash and it is a double slash, then replace
+ from this point, else default to replacing from the
+ beginning. */
+ if (slash && *(slash + 1) == '/')
+ start_insert = slash;
+ else
+ start_insert = base;
+
+ span = start_insert - base;
+ constr = (char *)xmalloc (span + linklength + 1);
+ if (span)
+ memcpy (constr, base, span);
+ memcpy (constr + span, link, linklength);
+ constr[span + linklength] = '\0';
+ }
else if (*link == '/')
{
/* LINK is an absolute path: we need to replace everything
char *scheme_str = supported_schemes[url->scheme].leading_string;
int fplen = full_path_length (url);
+ int brackets_around_host = 0;
+
assert (scheme_str != NULL);
/* Make sure the user name and password are quoted. */
}
}
+ if (strchr (url->host, ':'))
+ brackets_around_host = 1;
+
size = (strlen (scheme_str)
+ strlen (url->host)
+ + (brackets_around_host ? 2 : 0)
+ fplen
+ 1);
if (url->port != scheme_port)
*p++ = '@';
}
+ if (brackets_around_host)
+ *p++ = '[';
APPEND (p, url->host);
+ if (brackets_around_host)
+ *p++ = ']';
if (url->port != scheme_port)
{
*p++ = ':';
return result;
}
\f
-/* Returns proxy host address, in accordance with SCHEME. */
+/* Return the URL of the proxy appropriate for url U. */
char *
-getproxy (enum url_scheme scheme)
+getproxy (struct url *u)
{
char *proxy = NULL;
char *rewritten_url;
static char rewritten_storage[1024];
- switch (scheme)
+ if (!opt.use_proxy)
+ return NULL;
+ if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
+ return NULL;
+
+ switch (u->scheme)
{
case SCHEME_HTTP:
proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
if (!proxy || !*proxy)
return NULL;
- /* Handle shorthands. */
+ /* Handle shorthands. `rewritten_storage' is a kludge to allow
+ getproxy() to return static storage. */
rewritten_url = rewrite_shorthand_url (proxy);
if (rewritten_url)
{
downloaded_files_hash = NULL;
}
}
+
+/* Return non-zero if scheme a is similar to scheme b.
+
+ Schemes are similar if they are equal. If SSL is supported, schemes
+ are also similar if one is http (SCHEME_HTTP) and the other is https
+ (SCHEME_HTTPS). */
+int
+schemes_are_similar_p (enum url_scheme a, enum url_scheme b)
+{
+ if (a == b)
+ return 1;
+#ifdef HAVE_SSL
+ if ((a == SCHEME_HTTP && b == SCHEME_HTTPS)
+ || (a == SCHEME_HTTPS && b == SCHEME_HTTP))
+ return 1;
+#endif
+ return 0;
+}
\f
#if 0
/* Debugging and testing support for path_simplify. */