From 92b269acadce8d498a8021f2cec0104547559b7e Mon Sep 17 00:00:00 2001 From: hniksic Date: Fri, 30 Nov 2001 21:08:03 -0800 Subject: [PATCH] [svn] Match hosts against domains per Netscape cookie "specification". Published in . --- src/ChangeLog | 5 +++ src/cookies.c | 102 +++++++++++++++++++------------------------------- src/utils.c | 8 ++-- src/utils.h | 1 + 4 files changed, 48 insertions(+), 68 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 45f06eab..782448dd 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,8 @@ +2001-12-01 Hrvoje Niksic + + * cookies.c (check_domain_match): Reimplement to match Netscape's + "preliminary specification" for cookies. + 2001-12-01 Hrvoje Niksic * url.c (replace_attr_refresh_hack): New function. diff --git a/src/cookies.c b/src/cookies.c index 5f374951..a0937e34 100644 --- a/src/cookies.c +++ b/src/cookies.c @@ -670,96 +670,70 @@ numeric_address_p (const char *addr) } /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST. - This check is compliant with rfc2109. */ + Originally I tried to make the check compliant with rfc2109, but + the sites deviated too often, so I had to fall back to "tail + matching", as defined by the original Netscape's cookie spec. */ static int check_domain_match (const char *cookie_domain, const char *host) { - int headlen; - const char *tail; + static char *special_toplevel_domains[] = { + ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int" + }; + int i, required_dots; DEBUGP (("cdm: 1")); /* Numeric address requires exact match. It also requires HOST to - be an IP address. I suppose we *could* resolve HOST with - store_hostaddress (it would hit the hash table), but rfc2109 - doesn't require it, and it doesn't seem very useful, so we - don't. */ + be an IP address. */ if (numeric_address_p (cookie_domain)) - return !strcmp (cookie_domain, host); + return 0 == strcmp (cookie_domain, host); DEBUGP ((" 2")); - /* The domain must contain at least one embedded dot. */ - { - const char *rest = cookie_domain; - int len = strlen (rest); - if (*rest == '.') - ++rest, --len; /* ignore first dot */ - if (len <= 0) - return 0; - if (rest[len - 1] == '.') - --len; /* ignore last dot */ - - if (!memchr (rest, '.', len)) - /* No dots. */ - return 0; - } - - DEBUGP ((" 3")); - /* For the sake of efficiency, check for exact match first. */ if (!strcasecmp (cookie_domain, host)) return 1; - DEBUGP ((" 4")); - - /* In rfc2109 terminology, HOST needs domain-match COOKIE_DOMAIN. - This means that COOKIE_DOMAIN needs to start with `.' and be an - FQDN, and that HOST must end with COOKIE_DOMAIN. */ - if (*cookie_domain != '.') - return 0; - - DEBUGP ((" 5")); - - /* Two proceed, we need to examine two parts of HOST: its head and - its tail. Head and tail are defined in terms of the length of - the domain, like this: - - HHHHTTTTTTTTTTTTTTT <- host - DDDDDDDDDDDDDDD <- domain - - That is, "head" is the part of the host before (dlen - hlen), and - "tail" is what follows. - - For the domain to match, two conditions need to be true: + DEBUGP ((" 3")); - 1. Tail must equal DOMAIN. - 2. Head must not contain an embedded dot. */ + required_dots = 3; + for (i = 0; i < ARRAY_SIZE (special_toplevel_domains); i++) + if (match_tail (cookie_domain, special_toplevel_domains[i])) + { + required_dots = 2; + break; + } - headlen = strlen (host) - strlen (cookie_domain); + /* If the domain does not start with '.', require one less dot. + This is so that domains like "altavista.com" (which should be + ".altavista.com") are accepted. */ + if (*cookie_domain != '.') + --required_dots; - if (headlen <= 0) - /* DOMAIN must be a proper subset of HOST. */ + if (count_char (cookie_domain, '.') < required_dots) return 0; - tail = host + headlen; - DEBUGP ((" 6")); + DEBUGP ((" 4")); - /* (1) */ - if (strcasecmp (tail, cookie_domain)) + if (!match_tail (host, cookie_domain)) return 0; - DEBUGP ((" 7")); - - /* Test (2) is not part of the "domain-match" itself, but is - recommended by rfc2109 for reasons of privacy. */ + DEBUGP ((" 5")); - /* (2) */ - if (memchr (host, '.', headlen)) - return 0; + /* Don't allow domain "bar.com" to match host "foobar.com". */ + if (*cookie_domain != '.') + { + int dlen = strlen (cookie_domain); + int hlen = strlen (host); + /* hostname.foobar.com */ + /* bar.com */ + /* ^ <-- must be '.' for host */ + if (hlen > dlen && host[hlen - dlen - 1] != '.') + return 0; + } - DEBUGP ((" 8")); + DEBUGP ((" 6")); return 1; } diff --git a/src/utils.c b/src/utils.c index 18015b07..9edc0934 100644 --- a/src/utils.c +++ b/src/utils.c @@ -854,8 +854,8 @@ accdir (const char *directory, enum accd flags) match_backwards ("abc", "bc") -> 1 match_backwards ("abc", "ab") -> 0 match_backwards ("abc", "abc") -> 1 */ -static int -match_backwards (const char *string, const char *pattern) +int +match_tail (const char *string, const char *pattern) { int i, j; @@ -870,7 +870,7 @@ match_backwards (const char *string, const char *pattern) } /* Checks whether string S matches each element of ACCEPTS. A list - element are matched either with fnmatch() or match_backwards(), + element are matched either with fnmatch() or match_tail(), according to whether the element contains wildcards or not. If the BACKWARD is 0, don't do backward comparison -- just compare @@ -891,7 +891,7 @@ in_acclist (const char *const *accepts, const char *s, int backward) { if (backward) { - if (match_backwards (s, *accepts)) + if (match_tail (s, *accepts)) return 1; } else diff --git a/src/utils.h b/src/utils.h index 4f142ea9..d8e4481c 100644 --- a/src/utils.h +++ b/src/utils.h @@ -69,6 +69,7 @@ char *file_merge PARAMS ((const char *, const char *)); int acceptable PARAMS ((const char *)); int accdir PARAMS ((const char *s, enum accd)); char *suffix PARAMS ((const char *s)); +int match_tail PARAMS ((const char *, const char *)); char *read_whole_line PARAMS ((FILE *)); struct file_memory *read_file PARAMS ((const char *)); -- 2.39.2