/* Support for cookies.
Copyright (C) 2001 Free Software Foundation, Inc.
-This file is part of Wget.
+This file is part of GNU Wget.
-This program is free software; you can redistribute it and/or modify
+GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
-This program is distributed in the hope that it will be useful, but
+GNU Wget is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
+along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
/* If we don't know better, assume cookie is non-permanent and valid
for the entire session. */
- cookie->expiry_time = ~0UL;
+ cookie->expiry_time = ~(unsigned long)0;
/* Assume default port. */
cookie->port = 80;
xfree (cookie);
}
\f
-/* Functions for cookie-specific hash tables. These are regular hash
- tables, but with case-insensitive test and hash functions. */
-
-/* Like string_hash, but produces the same results regardless of the
- case. */
-
-static unsigned long
-unsigned_string_hash (const void *key)
-{
- const char *p = key;
- unsigned int h = TOLOWER (*p);
-
- if (h)
- for (p += 1; *p != '\0'; p++)
- h = (h << 5) - h + TOLOWER (*p);
-
- return h;
-}
-
-/* Front-end to strcasecmp. */
-
-static int
-unsigned_string_cmp (const void *s1, const void *s2)
-{
- return !strcasecmp ((const char *)s1, (const char *)s2);
-}
-
-/* Like make_string_hash_table, but uses unsigned_string_hash and
- unsigned_string_cmp. */
+/* Functions for storing cookies.
-static struct hash_table *
-make_unsigned_string_hash_table (int initial_size)
-{
- return hash_table_new (initial_size,
- unsigned_string_hash, unsigned_string_cmp);
-}
+ All cookies can be referenced through cookies_hash_table. The key
+ in that table is the domain name, and the value is a linked list of
+ all cookies from that domain. Every new cookie is placed on the
+ head of the list. */
-/* Write "HOST:PORT" to RESULT. RESULT should be a pointer, and the
- memory for the contents is allocated on the stack. Useful for
+/* Write "HOST:PORT" to a stack-allocated area and make RESULT point
+ to that area. RESULT should be a character pointer. Useful for
creating HOST:PORT strings, which are the keys in the hash
table. */
result = alloca (HP_len + 1 + numdigit (port) + 1); \
memcpy (result, host, HP_len); \
result[HP_len] = ':'; \
- long_to_string (result + HP_len + 1, port); \
+ number_to_string (result + HP_len + 1, port); \
} while (0)
/* Find cookie chain that corresponds to DOMAIN (exact) and PORT. */
SET_HOSTPORT (domain, port, key);
return hash_table_get (cookies_hash_table, key);
}
-\f
-/* Functions for storing cookies.
-
- All cookies can be referenced through cookies_hash_table. The key
- in that table is the domain name, and the value is a linked list of
- all cookies from that domain. Every new cookie is placed on the
- head of the list. */
/* Find and return the cookie whose domain, path, and attribute name
correspond to COOKIE. If found, PREVPTR will point to the location
if (!cookies_hash_table)
/* If the hash table is not initialized, do so now, because we'll
need to store things. */
- cookies_hash_table = make_unsigned_string_hash_table (0);
+ cookies_hash_table = make_nocase_string_hash_table (0);
/* Initialize hash table key. */
SET_HOSTPORT (cookie->domain, cookie->port, hostport);
#undef NAME_IS
/* Returns non-zero for characters that are legal in the name of an
- attribute. */
+ attribute. This used to allow only alphanumerics, '-', and '_',
+ but we need to be more lenient because a number of sites wants to
+ use weirder attribute names. rfc2965 "informally specifies"
+ attribute name (token) as "a sequence of non-special, non-white
+ space characters". So we allow everything except the stuff we know
+ could harm us. */
-#define ATTR_NAME_CHAR(c) (ISALNUM (c) || (c) == '-' || (c) == '_')
+#define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
+ && (c) != '"' && (c) != '=' \
+ && (c) != ';' && (c) != ',')
/* Fetch the next character without doing anything special if CH gets
set to 0. (The code executed next is expected to handle it.) */
}
/* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
- This check is compliant with rfc2109. */
+ Originally I tried to make the check compliant with rfc2109, but
+ the sites deviated too often, so I had to fall back to "tail
+ matching", as defined by the original Netscape's cookie spec. */
static int
check_domain_match (const char *cookie_domain, const char *host)
{
- int headlen;
- const char *tail;
+ static char *special_toplevel_domains[] = {
+ ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
+ };
+ int i, required_dots;
DEBUGP (("cdm: 1"));
/* Numeric address requires exact match. It also requires HOST to
- be an IP address. I suppose we *could* resolve HOST with
- store_hostaddress (it would hit the hash table), but rfc2109
- doesn't require it, and it doesn't seem very useful, so we
- don't. */
+ be an IP address. */
if (numeric_address_p (cookie_domain))
- return !strcmp (cookie_domain, host);
+ return 0 == strcmp (cookie_domain, host);
DEBUGP ((" 2"));
- /* The domain must contain at least one embedded dot. */
- {
- const char *rest = cookie_domain;
- int len = strlen (rest);
- if (*rest == '.')
- ++rest, --len; /* ignore first dot */
- if (len <= 0)
- return 0;
- if (rest[len - 1] == '.')
- --len; /* ignore last dot */
-
- if (!memchr (rest, '.', len))
- /* No dots. */
- return 0;
- }
-
- DEBUGP ((" 3"));
-
/* For the sake of efficiency, check for exact match first. */
if (!strcasecmp (cookie_domain, host))
return 1;
- DEBUGP ((" 4"));
-
- /* In rfc2109 terminology, HOST needs domain-match COOKIE_DOMAIN.
- This means that COOKIE_DOMAIN needs to start with `.' and be an
- FQDN, and that HOST must end with COOKIE_DOMAIN. */
- if (*cookie_domain != '.')
- return 0;
-
- DEBUGP ((" 5"));
-
- /* Two proceed, we need to examine two parts of HOST: its head and
- its tail. Head and tail are defined in terms of the length of
- the domain, like this:
-
- HHHHTTTTTTTTTTTTTTT <- host
- DDDDDDDDDDDDDDD <- domain
-
- That is, "head" is the part of the host before (dlen - hlen), and
- "tail" is what follows.
-
- For the domain to match, two conditions need to be true:
+ DEBUGP ((" 3"));
- 1. Tail must equal DOMAIN.
- 2. Head must not contain an embedded dot. */
+ required_dots = 3;
+ for (i = 0; i < ARRAY_SIZE (special_toplevel_domains); i++)
+ if (match_tail (cookie_domain, special_toplevel_domains[i]))
+ {
+ required_dots = 2;
+ break;
+ }
- headlen = strlen (host) - strlen (cookie_domain);
+ /* If the domain does not start with '.', require one less dot.
+ This is so that domains like "altavista.com" (which should be
+ ".altavista.com") are accepted. */
+ if (*cookie_domain != '.')
+ --required_dots;
- if (headlen <= 0)
- /* DOMAIN must be a proper subset of HOST. */
+ if (count_char (cookie_domain, '.') < required_dots)
return 0;
- tail = host + headlen;
- DEBUGP ((" 6"));
+ DEBUGP ((" 4"));
- /* (1) */
- if (strcasecmp (tail, cookie_domain))
+ if (!match_tail (host, cookie_domain))
return 0;
- DEBUGP ((" 7"));
-
- /* Test (2) is not part of the "domain-match" itself, but is
- recommended by rfc2109 for reasons of privacy. */
+ DEBUGP ((" 5"));
- /* (2) */
- if (memchr (host, '.', headlen))
- return 0;
+ /* Don't allow domain "bar.com" to match host "foobar.com". */
+ if (*cookie_domain != '.')
+ {
+ int dlen = strlen (cookie_domain);
+ int hlen = strlen (host);
+ /* cookie host: hostname.foobar.com */
+ /* desired domain: bar.com */
+ /* '.' must be here in host-> ^ */
+ if (hlen > dlen && host[hlen - dlen - 1] != '.')
+ return 0;
+ }
- DEBUGP ((" 8"));
+ DEBUGP ((" 6"));
return 1;
}
int
set_cookie_header_cb (const char *hdr, void *closure)
{
- struct urlinfo *u = (struct urlinfo *)closure;
+ struct url *u = (struct url *)closure;
struct cookie *cookie;
cookies_now = time (NULL);
previously stored cookies. Entry point is
`build_cookies_request'. */
-
-/* Count how many times CHR occurs in STRING. */
-
-static int
-count_char (const char *string, char chr)
-{
- const char *p;
- int count = 0;
- for (p = string; *p; p++)
- if (*p == chr)
- ++count;
- return count;
-}
-
/* Store CHAIN to STORE if there is room in STORE. If not, inrecement
COUNT anyway, so that when the function is done, we end up with the
exact count of how much place we actually need. */
SIZE matches are written; if more matches are present, return the
number of chains that would have been written. */
-int
+static int
find_matching_chains (const char *host, int port,
struct cookie *store[], int size)
{
static int
path_matches (const char *full_path, const char *prefix)
{
- int len = strlen (prefix);
- if (strncmp (full_path, prefix, len))
+ int len;
+
+ if (*prefix != '/')
+ /* Wget's HTTP paths do not begin with '/' (the URL code treats it
+ as a separator), but the '/' is assumed when matching against
+ the cookie stuff. */
+ return 0;
+
+ ++prefix;
+ len = strlen (prefix);
+
+ if (0 != strncmp (full_path, prefix, len))
/* FULL_PATH doesn't begin with PREFIX. */
return 0;
/* Length of PREFIX determines the quality of the match. */
- return len;
+ return len + 1;
}
static int