/* Support for cookies.
Copyright (C) 2001 Free Software Foundation, Inc.
-This file is part of Wget.
+This file is part of GNU Wget.
-This program is free software; you can redistribute it and/or modify
+GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
-This program is distributed in the hope that it will be useful, but
+GNU Wget is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
+along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
xfree (cookie);
}
\f
-/* Functions for cookie-specific hash tables. These are regular hash
- tables, but with case-insensitive test and hash functions. */
-
-/* Like string_hash, but produces the same results regardless of the
- case. */
-
-static unsigned long
-unsigned_string_hash (const void *key)
-{
- const char *p = key;
- unsigned int h = TOLOWER (*p);
-
- if (h)
- for (p += 1; *p != '\0'; p++)
- h = (h << 5) - h + TOLOWER (*p);
-
- return h;
-}
-
-/* Front-end to strcasecmp. */
-
-static int
-unsigned_string_cmp (const void *s1, const void *s2)
-{
- return !strcasecmp ((const char *)s1, (const char *)s2);
-}
-
-/* Like make_string_hash_table, but uses unsigned_string_hash and
- unsigned_string_cmp. */
+/* Functions for storing cookies.
-static struct hash_table *
-make_unsigned_string_hash_table (int initial_size)
-{
- return hash_table_new (initial_size,
- unsigned_string_hash, unsigned_string_cmp);
-}
+ All cookies can be referenced through cookies_hash_table. The key
+ in that table is the domain name, and the value is a linked list of
+ all cookies from that domain. Every new cookie is placed on the
+ head of the list. */
-/* Write "HOST:PORT" to RESULT. RESULT should be a pointer, and the
- memory for the contents is allocated on the stack. Useful for
+/* Write "HOST:PORT" to a stack-allocated area and make RESULT point
+ to that area. RESULT should be a character pointer. Useful for
creating HOST:PORT strings, which are the keys in the hash
table. */
SET_HOSTPORT (domain, port, key);
return hash_table_get (cookies_hash_table, key);
}
-\f
-/* Functions for storing cookies.
-
- All cookies can be referenced through cookies_hash_table. The key
- in that table is the domain name, and the value is a linked list of
- all cookies from that domain. Every new cookie is placed on the
- head of the list. */
/* Find and return the cookie whose domain, path, and attribute name
correspond to COOKIE. If found, PREVPTR will point to the location
if (!cookies_hash_table)
/* If the hash table is not initialized, do so now, because we'll
need to store things. */
- cookies_hash_table = make_unsigned_string_hash_table (0);
+ cookies_hash_table = make_nocase_string_hash_table (0);
/* Initialize hash table key. */
SET_HOSTPORT (cookie->domain, cookie->port, hostport);
hash_table_put (cookies_hash_table, chain_key, cookie);
- DEBUGP (("\nStored cookie %s %d %s %d %s %s %s\n",
- cookie->domain, cookie->port, cookie->path, cookie->secure,
+ DEBUGP (("\nStored cookie %s %d %s %s %d %s %s %s\n",
+ cookie->domain, cookie->port, cookie->path,
+ cookie->permanent ? "permanent" : "nonpermanent",
+ cookie->secure,
asctime (localtime ((time_t *)&cookie->expiry_time)),
cookie->attr, cookie->value));
}
#undef NAME_IS
/* Returns non-zero for characters that are legal in the name of an
- attribute. */
+ attribute. This used to allow only alphanumerics, '-', and '_',
+ but we need to be more lenient because a number of sites wants to
+ use weirder attribute names. rfc2965 "informally specifies"
+ attribute name (token) as "a sequence of non-special, non-white
+ space characters". So we allow everything except the stuff we know
+ could harm us. */
-#define ATTR_NAME_CHAR(c) (ISALNUM (c) || (c) == '-' || (c) == '_')
+#define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
+ && (c) != '"' && (c) != '=' \
+ && (c) != ';' && (c) != ',')
/* Fetch the next character without doing anything special if CH gets
set to 0. (The code executed next is expected to handle it.) */
static int
check_domain_match (const char *cookie_domain, const char *host)
{
- int i, headlen;
+ int headlen;
const char *tail;
+ DEBUGP (("cdm: 1"));
+
/* Numeric address requires exact match. It also requires HOST to
be an IP address. I suppose we *could* resolve HOST with
store_hostaddress (it would hit the hash table), but rfc2109
if (numeric_address_p (cookie_domain))
return !strcmp (cookie_domain, host);
+ DEBUGP ((" 2"));
+
/* The domain must contain at least one embedded dot. */
{
const char *rest = cookie_domain;
return 0;
}
+ DEBUGP ((" 3"));
+
/* For the sake of efficiency, check for exact match first. */
if (!strcasecmp (cookie_domain, host))
return 1;
+ DEBUGP ((" 4"));
+
/* In rfc2109 terminology, HOST needs domain-match COOKIE_DOMAIN.
This means that COOKIE_DOMAIN needs to start with `.' and be an
FQDN, and that HOST must end with COOKIE_DOMAIN. */
if (*cookie_domain != '.')
return 0;
+ DEBUGP ((" 5"));
+
/* Two proceed, we need to examine two parts of HOST: its head and
its tail. Head and tail are defined in terms of the length of
the domain, like this:
return 0;
tail = host + headlen;
+ DEBUGP ((" 6"));
+
/* (1) */
if (strcasecmp (tail, cookie_domain))
return 0;
+ DEBUGP ((" 7"));
+
/* Test (2) is not part of the "domain-match" itself, but is
recommended by rfc2109 for reasons of privacy. */
if (memchr (host, '.', headlen))
return 0;
+ DEBUGP ((" 8"));
+
return 1;
}
int
set_cookie_header_cb (const char *hdr, void *closure)
{
- struct urlinfo *u = (struct urlinfo *)closure;
+ struct url *u = (struct url *)closure;
struct cookie *cookie;
cookies_now = time (NULL);
return namecmp ? namecmp : valuecmp;
}
+/* Eliminate duplicate cookies. "Duplicate cookies" are any two
+ cookies whose name and value are the same. Whenever a duplicate
+ pair is found, one of the cookies is removed. */
+
+static int
+eliminate_dups (struct weighed_cookie *outgoing, int count)
+{
+ int i;
+
+ /* We deploy a simple uniquify algorithm: first sort the array
+ according to our sort criterion, then uniquify it by comparing
+ each cookie with its neighbor. */
+
+ qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
+
+ for (i = 0; i < count - 1; i++)
+ {
+ struct cookie *c1 = outgoing[i].cookie;
+ struct cookie *c2 = outgoing[i + 1].cookie;
+ if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value))
+ {
+ /* c1 and c2 are the same; get rid of c2. */
+ if (count > i + 1)
+ /* move all ptrs from positions [i + 1, count) to i. */
+ memmove (outgoing + i, outgoing + i + 1,
+ (count - (i + 1)) * sizeof (struct weighed_cookie));
+ /* We decrement i to counter the ++i above. Remember that
+ we've just removed the element in front of us; we need to
+ remain in place to check whether outgoing[i] matches what
+ used to be outgoing[i + 2]. */
+ --i;
+ --count;
+ }
+ }
+ return count;
+}
+
/* Comparator used for sorting by quality. */
static int
return dgdiff ? dgdiff : pgdiff;
}
-/* Build a `Cookies' header for a request that goes to HOST:PORT and
- requests PATH from the server. Memory is allocated by `malloc',
- and the caller is responsible for freeing it. If no cookies
- pertain to this request, i.e. no cookie header should be generated,
- NULL is returned. */
+/* Build a `Cookie' header for a request that goes to HOST:PORT and
+ requests PATH from the server. The resulting string is allocated
+ with `malloc', and the caller is responsible for freeing it. If no
+ cookies pertain to this request, i.e. no cookie header should be
+ generated, NULL is returned. */
char *
build_cookies_request (const char *host, int port, const char *path,
if (chain_count > chain_store_size)
{
/* It's extremely unlikely that more than 20 chains will ever
- match. But in this case it's easy to not have the
- limitation, so we don't. */
+ match. But since find_matching_chains reports the exact size
+ it needs, it's easy to not have the limitation, so we
+ don't. */
all_chains = alloca (chain_count * sizeof (struct cookie *));
+ chain_store_size = chain_count;
goto again;
}
/* Allocate the array. */
outgoing = alloca (count * sizeof (struct weighed_cookie));
+ /* Fill the array with all the matching cookies from all the
+ matching chains. */
ocnt = 0;
for (i = 0; i < chain_count; i++)
for (cookie = all_chains[i]; cookie; cookie = cookie->next)
assert (ocnt == count);
/* Eliminate duplicate cookies; that is, those whose name and value
- are the same. We do it by first sorting the array, and then
- uniq'ing it. */
- qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
- for (i = 0; i < count - 1; i++)
- {
- struct cookie *c1 = outgoing[i].cookie;
- struct cookie *c2 = outgoing[i + 1].cookie;
- if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value))
- {
- /* c1 and c2 are the same; get rid of c2. */
- if (count > i + 1)
- /* move all ptrs from positions [i + 1, count) to i. */
- memmove (outgoing + i, outgoing + i + 1,
- (count - (i + 1)) * sizeof (struct weighed_cookie));
- /* We decrement i to counter the ++i above. Remember that
- we've just removed the element in front of us; we need to
- remain in place to check whether outgoing[i] what used to
- be outgoing[i + 2]. */
- --i;
- --count;
- }
- }
+ are the same. */
+ count = eliminate_dups (outgoing, count);
/* Sort the array so that best-matching domains come first, and
that, within one domain, best-matching paths come first. */
++p; \
} while (0)
-#define MARK_WORD(p, b, e) do { \
+#define SET_WORD_BOUNDARIES(p, b, e) do { \
SKIP_WS (p); \
b = p; \
/* skip non-ws */ \
/* empty line */
continue;
- MARK_WORD (p, domain_b, domain_e);
- MARK_WORD (p, ignore_b, ignore_e);
- MARK_WORD (p, path_b, path_e);
- MARK_WORD (p, secure_b, secure_e);
- MARK_WORD (p, expires_b, expires_e);
- MARK_WORD (p, name_b, name_e);
+ SET_WORD_BOUNDARIES (p, domain_b, domain_e);
+ SET_WORD_BOUNDARIES (p, ignore_b, ignore_e);
+ SET_WORD_BOUNDARIES (p, path_b, path_e);
+ SET_WORD_BOUNDARIES (p, secure_b, secure_e);
+ SET_WORD_BOUNDARIES (p, expires_b, expires_e);
+ SET_WORD_BOUNDARIES (p, name_b, name_e);
- /* Don't use MARK_WORD for value because it may contain
- whitespace itself. Instead, . */
- MARK_WORD (p, value_b, value_e);
+ /* Don't use SET_WORD_BOUNDARIES for value because it may
+ contain whitespace. Instead, set value_e to the end of line,
+ modulo trailing space (this will skip the line separator.) */
+ SKIP_WS (p);
+ value_b = p;
+ value_e = p + strlen (p);
+ while (value_e > value_b && ISSPACE (*(value_e - 1)))
+ --value_e;
+ if (value_b == value_e)
+ /* Hmm, should we check for empty value? I guess that's
+ legal, so I leave it. */
+ ;
cookie = cookie_new ();
cookie->domain = strdupdelim (domain_b, domain_e);
- /* Don't use MARK_WORD for value because it may contain
- whitespace itself. Instead, set name_e to the end of line,
- modulo trailing space (which includes the NL separator.) */
- SKIP_WS (p);
- name_b = p;
- name_e = p + strlen (p);
- while (name_e >= name_b && ISSPACE (*name_e))
- --name_e;
- if (name_b == name_e)
- /* Hmm, should we check for empty value? I guess that's
- legal, so I leave it. */
- ;
-
/* safe default in case EXPIRES field is garbled. */
cookie->expiry_time = cookies_now - 1;
logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
file, strerror (errno));
- DEBUGP (("Done saving cookies.\n", file));
+ DEBUGP (("Done saving cookies.\n"));
}
\f
static int