similarity (SCHEME_HTTP and SCHEME_HTTPS are similar). Use it in recur.c
(download_child_p). Fixes a bug that caused -H option to be ignored when
child scheme different to parent scheme.
Published in <agn4eu8apduek7magfu9bfe63gto8i7cdh@farscape.privy.mev.co.uk>.
+2002-05-16 Ian Abbott <abbotti@mev.co.uk>
+
+ * url.c (schemes_are_similar_p): New function to test enumerated
+ scheme codes for similarity.
+
+ * url.h: Declare it.
+
+ * recur.c (download_child_p): Use it to compare schemes. This
+ also fixes a bug that allows hosts to be spanned (without the
+ -H option) when the parent scheme is https and the child's is
+ http or vice versa.
+
2002-05-14 Bill Richardson <bill@riverstonenet.com>
* ftp.c (getftp): Don't ftruncate stdout.
2002-05-14 Bill Richardson <bill@riverstonenet.com>
* ftp.c (getftp): Don't ftruncate stdout.
{
struct url *u = upos->url;
const char *url = u->url;
{
struct url *u = upos->url;
const char *url = u->url;
+ int u_scheme_like_http;
DEBUGP (("Deciding whether to enqueue \"%s\".\n", url));
DEBUGP (("Deciding whether to enqueue \"%s\".\n", url));
More time- and memory- consuming tests should be put later on
the list. */
More time- and memory- consuming tests should be put later on
the list. */
+ /* Determine whether URL under consideration has a HTTP-like scheme. */
+ u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP);
+
/* 1. Schemes other than HTTP are normally not recursed into. */
/* 1. Schemes other than HTTP are normally not recursed into. */
- if (u->scheme != SCHEME_HTTP
-#ifdef HAVE_SSL
- && u->scheme != SCHEME_HTTPS
-#endif
- && !(u->scheme == SCHEME_FTP && opt.follow_ftp))
+ if (!u_scheme_like_http && !(u->scheme == SCHEME_FTP && opt.follow_ftp))
{
DEBUGP (("Not following non-HTTP schemes.\n"));
goto out;
{
DEBUGP (("Not following non-HTTP schemes.\n"));
goto out;
/* 2. If it is an absolute link and they are not followed, throw it
out. */
/* 2. If it is an absolute link and they are not followed, throw it
out. */
- if (u->scheme == SCHEME_HTTP
-#ifdef HAVE_SSL
- || u->scheme == SCHEME_HTTPS
-#endif
- )
+ if (schemes_are_similar_p (u->scheme, SCHEME_HTTP))
if (opt.relative_only && !upos->link_relative_p)
{
DEBUGP (("It doesn't really look like a relative link.\n"));
if (opt.relative_only && !upos->link_relative_p)
{
DEBUGP (("It doesn't really look like a relative link.\n"));
opt.no_parent. Also ignore it for documents needed to display
the parent page when in -p mode. */
if (opt.no_parent
opt.no_parent. Also ignore it for documents needed to display
the parent page when in -p mode. */
if (opt.no_parent
- && u->scheme == start_url_parsed->scheme
+ && schemes_are_similar_p (u->scheme, start_url_parsed->scheme)
&& 0 == strcasecmp (u->host, start_url_parsed->host)
&& u->port == start_url_parsed->port
&& !(opt.page_requisites && upos->link_inline_p))
&& 0 == strcasecmp (u->host, start_url_parsed->host)
&& u->port == start_url_parsed->port
&& !(opt.page_requisites && upos->link_inline_p))
- if (u->scheme == parent->scheme)
+ if (schemes_are_similar_p (u->scheme, parent->scheme))
if (!opt.spanhost && 0 != strcasecmp (parent->host, u->host))
{
DEBUGP (("This is not the same hostname as the parent's (%s and %s).\n",
if (!opt.spanhost && 0 != strcasecmp (parent->host, u->host))
{
DEBUGP (("This is not the same hostname as the parent's (%s and %s).\n",
- if (opt.use_robots
- && (u->scheme == SCHEME_HTTP
-#ifdef HAVE_SSL
- || u->scheme == SCHEME_HTTPS
-#endif
- )
- )
+ if (opt.use_robots && u_scheme_like_http)
{
struct robot_specs *specs = res_get_specs (u->host, u->port);
if (!specs)
{
struct robot_specs *specs = res_get_specs (u->host, u->port);
if (!specs)
downloaded_files_hash = NULL;
}
}
downloaded_files_hash = NULL;
}
}
+
+/* Return non-zero if scheme a is similar to scheme b.
+
+ Schemes are similar if they are equal. If SSL is supported, schemes
+ are also similar if one is http (SCHEME_HTTP) and the other is https
+ (SCHEME_HTTPS). */
+int
+schemes_are_similar_p (enum url_scheme a, enum url_scheme b)
+{
+ if (a == b)
+ return 1;
+#ifdef HAVE_SSL
+ if ((a == SCHEME_HTTP && b == SCHEME_HTTPS)
+ || (a == SCHEME_HTTPS && b == SCHEME_HTTP))
+ return 1;
+#endif
+ return 0;
+}
\f
#if 0
/* Debugging and testing support for path_simplify. */
\f
#if 0
/* Debugging and testing support for path_simplify. */
char *rewrite_shorthand_url PARAMS ((const char *));
char *rewrite_shorthand_url PARAMS ((const char *));
+int schemes_are_similar_p PARAMS ((enum url_scheme a, enum url_scheme b));
+