From e863a6323b88d2cfddffc64aba84e0d9af7ba921 Mon Sep 17 00:00:00 2001 From: abbotti Date: Thu, 16 May 2002 10:22:24 -0700 Subject: [PATCH] [svn] New function schemes_are_similar_p to test enumerated scheme codes for similarity (SCHEME_HTTP and SCHEME_HTTPS are similar). Use it in recur.c (download_child_p). Fixes a bug that caused -H option to be ignored when child scheme different to parent scheme. Published in . --- src/ChangeLog | 12 ++++++++++++ src/recur.c | 28 +++++++++------------------- src/url.c | 18 ++++++++++++++++++ src/url.h | 2 ++ 4 files changed, 41 insertions(+), 19 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 95009b85..8d309055 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,15 @@ +2002-05-16 Ian Abbott + + * url.c (schemes_are_similar_p): New function to test enumerated + scheme codes for similarity. + + * url.h: Declare it. + + * recur.c (download_child_p): Use it to compare schemes. This + also fixes a bug that allows hosts to be spanned (without the + -H option) when the parent scheme is https and the child's is + http or vice versa. + 2002-05-14 Bill Richardson * ftp.c (getftp): Don't ftruncate stdout. diff --git a/src/recur.c b/src/recur.c index a77ff391..7339c365 100644 --- a/src/recur.c +++ b/src/recur.c @@ -415,6 +415,7 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth, { struct url *u = upos->url; const char *url = u->url; + int u_scheme_like_http; DEBUGP (("Deciding whether to enqueue \"%s\".\n", url)); @@ -445,12 +446,11 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth, More time- and memory- consuming tests should be put later on the list. */ + /* Determine whether URL under consideration has a HTTP-like scheme. */ + u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP); + /* 1. Schemes other than HTTP are normally not recursed into. */ - if (u->scheme != SCHEME_HTTP -#ifdef HAVE_SSL - && u->scheme != SCHEME_HTTPS -#endif - && !(u->scheme == SCHEME_FTP && opt.follow_ftp)) + if (!u_scheme_like_http && !(u->scheme == SCHEME_FTP && opt.follow_ftp)) { DEBUGP (("Not following non-HTTP schemes.\n")); goto out; @@ -458,11 +458,7 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth, /* 2. If it is an absolute link and they are not followed, throw it out. */ - if (u->scheme == SCHEME_HTTP -#ifdef HAVE_SSL - || u->scheme == SCHEME_HTTPS -#endif - ) + if (schemes_are_similar_p (u->scheme, SCHEME_HTTP)) if (opt.relative_only && !upos->link_relative_p) { DEBUGP (("It doesn't really look like a relative link.\n")); @@ -483,7 +479,7 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth, opt.no_parent. Also ignore it for documents needed to display the parent page when in -p mode. */ if (opt.no_parent - && u->scheme == start_url_parsed->scheme + && schemes_are_similar_p (u->scheme, start_url_parsed->scheme) && 0 == strcasecmp (u->host, start_url_parsed->host) && u->port == start_url_parsed->port && !(opt.page_requisites && upos->link_inline_p)) @@ -526,7 +522,7 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth, } /* 7. */ - if (u->scheme == parent->scheme) + if (schemes_are_similar_p (u->scheme, parent->scheme)) if (!opt.spanhost && 0 != strcasecmp (parent->host, u->host)) { DEBUGP (("This is not the same hostname as the parent's (%s and %s).\n", @@ -535,13 +531,7 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth, } /* 8. */ - if (opt.use_robots - && (u->scheme == SCHEME_HTTP -#ifdef HAVE_SSL - || u->scheme == SCHEME_HTTPS -#endif - ) - ) + if (opt.use_robots && u_scheme_like_http) { struct robot_specs *specs = res_get_specs (u->host, u->port); if (!specs) diff --git a/src/url.c b/src/url.c index 6bcaa39a..f68ee610 100644 --- a/src/url.c +++ b/src/url.c @@ -2472,6 +2472,24 @@ downloaded_files_free (void) downloaded_files_hash = NULL; } } + +/* Return non-zero if scheme a is similar to scheme b. + + Schemes are similar if they are equal. If SSL is supported, schemes + are also similar if one is http (SCHEME_HTTP) and the other is https + (SCHEME_HTTPS). */ +int +schemes_are_similar_p (enum url_scheme a, enum url_scheme b) +{ + if (a == b) + return 1; +#ifdef HAVE_SSL + if ((a == SCHEME_HTTP && b == SCHEME_HTTPS) + || (a == SCHEME_HTTPS && b == SCHEME_HTTP)) + return 1; +#endif + return 0; +} #if 0 /* Debugging and testing support for path_simplify. */ diff --git a/src/url.h b/src/url.h index 79f23814..bd482633 100644 --- a/src/url.h +++ b/src/url.h @@ -158,4 +158,6 @@ downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *)); char *rewrite_shorthand_url PARAMS ((const char *)); +int schemes_are_similar_p PARAMS ((enum url_scheme a, enum url_scheme b)); + #endif /* URL_H */ -- 2.39.2