/* Support for cookies.
Copyright (C) 2001 Free Software Foundation, Inc.
-This file is part of Wget.
+This file is part of GNU Wget.
-This program is free software; you can redistribute it and/or modify
+GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
-This program is distributed in the hope that it will be useful, but
+GNU Wget is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
+along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
functions don't need to call time() all the time. */
static time_t cookies_now;
+
+/* This should *really* be in a .h file! */
+time_t http_atotm PARAMS ((char *));
+
\f
/* Definition of `struct cookie' and the most basic functions. */
xfree (cookie);
}
\f
-/* Functions for cookie-specific hash tables. These are regular hash
- tables, but with case-insensitive test and hash functions. */
-
-/* Like string_hash, but produces the same results regardless of the
- case. */
-
-static unsigned long
-unsigned_string_hash (const void *sv)
-{
- unsigned int h = 0;
- unsigned const char *x = (unsigned const char *) sv;
-
- while (*x)
- {
- unsigned int g;
- unsigned char c = TOLOWER (*x);
- h = (h << 4) + c;
- if ((g = h & 0xf0000000) != 0)
- h = (h ^ (g >> 24)) ^ g;
- ++x;
- }
-
- return h;
-}
-
-/* Front-end to strcasecmp. */
-
-static int
-unsigned_string_cmp (const void *s1, const void *s2)
-{
- return !strcasecmp ((const char *)s1, (const char *)s2);
-}
-
-/* Like make_string_hash_table, but uses unsigned_string_hash and
- unsigned_string_cmp. */
+/* Functions for storing cookies.
-static struct hash_table *
-make_unsigned_string_hash_table (int initial_size)
-{
- return hash_table_new (initial_size,
- unsigned_string_hash, unsigned_string_cmp);
-}
+ All cookies can be referenced through cookies_hash_table. The key
+ in that table is the domain name, and the value is a linked list of
+ all cookies from that domain. Every new cookie is placed on the
+ head of the list. */
-/* Write "HOST:PORT" to RESULT. RESULT should be a pointer, and the
- memory for the contents is allocated on the stack. Useful for
+/* Write "HOST:PORT" to a stack-allocated area and make RESULT point
+ to that area. RESULT should be a character pointer. Useful for
creating HOST:PORT strings, which are the keys in the hash
table. */
SET_HOSTPORT (domain, port, key);
return hash_table_get (cookies_hash_table, key);
}
-\f
-/* Functions for storing cookies.
-
- All cookies can be referenced through cookies_hash_table. The key
- in that table is the domain name, and the value is a linked list of
- all cookies from that domain. Every new cookie is placed on the
- head of the list. */
/* Find and return the cookie whose domain, path, and attribute name
correspond to COOKIE. If found, PREVPTR will point to the location
if (!cookies_hash_table)
/* If the hash table is not initialized, do so now, because we'll
need to store things. */
- cookies_hash_table = make_unsigned_string_hash_table (0);
+ cookies_hash_table = make_nocase_string_hash_table (0);
/* Initialize hash table key. */
SET_HOSTPORT (cookie->domain, cookie->port, hostport);
hash_table_put (cookies_hash_table, chain_key, cookie);
- DEBUGP (("\nStored cookie %s %d %s %d %s %s %s\n",
- cookie->domain, cookie->port, cookie->path, cookie->secure,
+ DEBUGP (("\nStored cookie %s %d %s %s %d %s %s %s\n",
+ cookie->domain, cookie->port, cookie->path,
+ cookie->permanent ? "permanent" : "nonpermanent",
+ cookie->secure,
asctime (localtime ((time_t *)&cookie->expiry_time)),
cookie->attr, cookie->value));
}
#undef NAME_IS
/* Returns non-zero for characters that are legal in the name of an
- attribute. */
+ attribute. This used to allow only alphanumerics, '-', and '_',
+ but we need to be more lenient because a number of sites wants to
+ use weirder attribute names. rfc2965 "informally specifies"
+ attribute name (token) as "a sequence of non-special, non-white
+ space characters". So we allow everything except the stuff we know
+ could harm us. */
-#define ATTR_NAME_CHAR(c) (ISALNUM (c) || (c) == '-' || (c) == '_')
+#define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
+ && (c) != '"' && (c) != '=' \
+ && (c) != ';' && (c) != ',')
/* Fetch the next character without doing anything special if CH gets
set to 0. (The code executed next is expected to handle it.) */
delete_cookie (cookie);
if (state == S_ERROR)
- logprintf (LOG_NOTQUIET, _("Error in Set-Cookie, at character `%c'.\n"), c);
+ logprintf (LOG_NOTQUIET, _("Syntax error in Set-Cookie at character `%c'.\n"), c);
else
abort ();
return NULL;
eof:
delete_cookie (cookie);
logprintf (LOG_NOTQUIET,
- _("Error in Set-Cookie: premature end of string.\n"));
+ _("Syntax error in Set-Cookie: premature end of string.\n"));
return NULL;
}
\f
static int
check_domain_match (const char *cookie_domain, const char *host)
{
- int i, headlen;
+ int headlen;
const char *tail;
+ DEBUGP (("cdm: 1"));
+
/* Numeric address requires exact match. It also requires HOST to
be an IP address. I suppose we *could* resolve HOST with
store_hostaddress (it would hit the hash table), but rfc2109
if (numeric_address_p (cookie_domain))
return !strcmp (cookie_domain, host);
+ DEBUGP ((" 2"));
+
/* The domain must contain at least one embedded dot. */
{
const char *rest = cookie_domain;
return 0;
}
+ DEBUGP ((" 3"));
+
/* For the sake of efficiency, check for exact match first. */
if (!strcasecmp (cookie_domain, host))
return 1;
+ DEBUGP ((" 4"));
+
/* In rfc2109 terminology, HOST needs domain-match COOKIE_DOMAIN.
This means that COOKIE_DOMAIN needs to start with `.' and be an
FQDN, and that HOST must end with COOKIE_DOMAIN. */
if (*cookie_domain != '.')
return 0;
+ DEBUGP ((" 5"));
+
/* Two proceed, we need to examine two parts of HOST: its head and
its tail. Head and tail are defined in terms of the length of
the domain, like this:
return 0;
tail = host + headlen;
+ DEBUGP ((" 6"));
+
/* (1) */
if (strcasecmp (tail, cookie_domain))
return 0;
+ DEBUGP ((" 7"));
+
/* Test (2) is not part of the "domain-match" itself, but is
recommended by rfc2109 for reasons of privacy. */
if (memchr (host, '.', headlen))
return 0;
+ DEBUGP ((" 8"));
+
return 1;
}
int
set_cookie_header_cb (const char *hdr, void *closure)
{
- struct urlinfo *u = (struct urlinfo *)closure;
+ struct url *u = (struct url *)closure;
struct cookie *cookie;
cookies_now = time (NULL);
return count;
}
-/* Return the head of the cookie chain that matches HOST. */
+/* Store CHAIN to STORE if there is room in STORE. If not, inrecement
+ COUNT anyway, so that when the function is done, we end up with the
+ exact count of how much place we actually need. */
-static struct cookie *
-find_cookie_chain (const char *host, int port)
+#define STORE_CHAIN(st_chain, st_store, st_size, st_count) do { \
+ if (st_count < st_size) \
+ store[st_count] = st_chain; \
+ ++st_count; \
+} while (0)
+
+/* Store cookie chains that match HOST, PORT. Since more than one
+ chain can match, the matches are written to STORE. No more than
+ SIZE matches are written; if more matches are present, return the
+ number of chains that would have been written. */
+
+int
+find_matching_chains (const char *host, int port,
+ struct cookie *store[], int size)
{
+ struct cookie *chain;
int dot_count;
char *hash_key;
- struct cookie *chain = NULL;
+ int count = 0;
if (!cookies_hash_table)
- return NULL;
+ return 0;
SET_HOSTPORT (host, port, hash_key);
/* Exact match. */
chain = hash_table_get (cookies_hash_table, hash_key);
if (chain)
- return chain;
+ STORE_CHAIN (chain, store, size, count);
dot_count = count_char (host, '.');
assert (p != NULL);
chain = hash_table_get (cookies_hash_table, p);
if (chain)
- return chain;
+ STORE_CHAIN (chain, store, size, count);
hash_key = p + 1;
}
- return NULL;
+ return count;
}
/* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
return len;
}
+static int
+matching_cookie (const struct cookie *cookie, const char *path,
+ int connection_secure_p, int *path_goodness)
+{
+ int pg;
+
+ if (cookie->expiry_time < cookies_now)
+ /* Ignore stale cookies. There is no need to unchain the cookie
+ at this point -- Wget is a relatively short-lived application,
+ and stale cookies will not be saved by `save_cookies'. */
+ return 0;
+ if (cookie->secure && !connection_secure_p)
+ /* Don't transmit secure cookies over an insecure connection. */
+ return 0;
+ pg = path_matches (path, cookie->path);
+ if (!pg)
+ return 0;
+
+ if (path_goodness)
+ /* If the caller requested path_goodness, we return it. This is
+ an optimization, so that the caller doesn't need to call
+ path_matches() again. */
+ *path_goodness = pg;
+ return 1;
+}
+
struct weighed_cookie {
struct cookie *cookie;
+ int domain_goodness;
int path_goodness;
};
+/* Comparator used for uniquifying the list. */
+
+static int
+equality_comparator (const void *p1, const void *p2)
+{
+ struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
+ struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
+
+ int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
+ int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
+
+ /* We only really care whether both name and value are equal. We
+ return them in this order only for consistency... */
+ return namecmp ? namecmp : valuecmp;
+}
+
+/* Eliminate duplicate cookies. "Duplicate cookies" are any two
+ cookies whose name and value are the same. Whenever a duplicate
+ pair is found, one of the cookies is removed. */
+
+static int
+eliminate_dups (struct weighed_cookie *outgoing, int count)
+{
+ int i;
+
+ /* We deploy a simple uniquify algorithm: first sort the array
+ according to our sort criterion, then uniquify it by comparing
+ each cookie with its neighbor. */
+
+ qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
+
+ for (i = 0; i < count - 1; i++)
+ {
+ struct cookie *c1 = outgoing[i].cookie;
+ struct cookie *c2 = outgoing[i + 1].cookie;
+ if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value))
+ {
+ /* c1 and c2 are the same; get rid of c2. */
+ if (count > i + 1)
+ /* move all ptrs from positions [i + 1, count) to i. */
+ memmove (outgoing + i, outgoing + i + 1,
+ (count - (i + 1)) * sizeof (struct weighed_cookie));
+ /* We decrement i to counter the ++i above. Remember that
+ we've just removed the element in front of us; we need to
+ remain in place to check whether outgoing[i] matches what
+ used to be outgoing[i + 2]. */
+ --i;
+ --count;
+ }
+ }
+ return count;
+}
+
+/* Comparator used for sorting by quality. */
+
static int
goodness_comparator (const void *p1, const void *p2)
{
struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
- /* It's goodness2-goodness1 because we want a sort in *decreasing*
- order of goodness. */
- return wc2->path_goodness - wc1->path_goodness;
+
+ /* Subtractions take `wc2' as the first argument becauase we want a
+ sort in *decreasing* order of goodness. */
+ int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
+ int pgdiff = wc2->path_goodness - wc1->path_goodness;
+
+ /* Sort by domain goodness; if these are the same, sort by path
+ goodness. (The sorting order isn't really specified; maybe it
+ should be the other way around.) */
+ return dgdiff ? dgdiff : pgdiff;
}
-/* Build a `Cookies' header for a request that goes to HOST:PORT and
- requests PATH from the server. Memory is allocated by `malloc',
- and the caller is responsible for freeing it. If no cookies
- pertain to this request, i.e. no cookie header should be generated,
- NULL is returned. */
+/* Build a `Cookie' header for a request that goes to HOST:PORT and
+ requests PATH from the server. The resulting string is allocated
+ with `malloc', and the caller is responsible for freeing it. If no
+ cookies pertain to this request, i.e. no cookie header should be
+ generated, NULL is returned. */
char *
build_cookies_request (const char *host, int port, const char *path,
int connection_secure_p)
{
- struct cookie *chain = find_cookie_chain (host, port);
+ struct cookie *chain_default_store[20];
+ struct cookie **all_chains = chain_default_store;
+ int chain_store_size = ARRAY_SIZE (chain_default_store);
+ int chain_count;
+
struct cookie *cookie;
struct weighed_cookie *outgoing;
- int count, i;
+ int count, i, ocnt;
char *result;
int result_size, pos;
- if (!chain)
+ again:
+ chain_count = find_matching_chains (host, port, all_chains, chain_store_size);
+ if (chain_count > chain_store_size)
+ {
+ /* It's extremely unlikely that more than 20 chains will ever
+ match. But since find_matching_chains reports the exact size
+ it needs, it's easy to not have the limitation, so we
+ don't. */
+ all_chains = alloca (chain_count * sizeof (struct cookie *));
+ chain_store_size = chain_count;
+ goto again;
+ }
+
+ if (!chain_count)
return NULL;
cookies_now = time (NULL);
/* Count the number of cookies whose path matches. */
count = 0;
- result_size = 0;
- for (cookie = chain; cookie; cookie = cookie->next)
- {
- if (cookie->expiry_time < cookies_now)
- /* Ignore stale cookies. There is no need to unchain the
- cookie at this point -- Wget is a relatively short-lived
- application, and stale cookies will not be saved by
- `save_cookies'. */
- continue;
- if (cookie->secure && !connection_secure_p)
- /* Don't transmit secure cookies over an insecure
- connection. */
- continue;
- if (path_matches (path, cookie->path))
- {
- ++count;
- /* name=value */
- result_size += strlen (cookie->attr) + 1 + strlen (cookie->value);
- }
- }
+ for (i = 0; i < chain_count; i++)
+ for (cookie = all_chains[i]; cookie; cookie = cookie->next)
+ if (matching_cookie (cookie, path, connection_secure_p, NULL))
+ ++count;
if (!count)
+ /* No matching cookies. */
return NULL;
/* Allocate the array. */
outgoing = alloca (count * sizeof (struct weighed_cookie));
- i = 0;
- for (cookie = chain; cookie; cookie = cookie->next)
- {
- int goodness;
- /* #### These two if's are repeated verbatim from the loop
- above. Should I put them in a separate function? */
- if (cookie->expiry_time < cookies_now)
- continue;
- if (cookie->secure && !connection_secure_p)
- /* Don't transmit secure cookies over an insecure
- connection. */
- continue;
- goodness = path_matches (path, cookie->path);
- if (!goodness)
- continue;
- outgoing[i].cookie = cookie;
- outgoing[i].path_goodness = goodness;
- ++i;
- }
- assert (i == count);
- /* Sort the array so that paths that match our path better come
- first. */
+ /* Fill the array with all the matching cookies from all the
+ matching chains. */
+ ocnt = 0;
+ for (i = 0; i < chain_count; i++)
+ for (cookie = all_chains[i]; cookie; cookie = cookie->next)
+ {
+ int pg;
+ if (!matching_cookie (cookie, path, connection_secure_p, &pg))
+ continue;
+ outgoing[ocnt].cookie = cookie;
+ outgoing[ocnt].domain_goodness = strlen (cookie->domain);
+ outgoing[ocnt].path_goodness = pg;
+ ++ocnt;
+ }
+ assert (ocnt == count);
+
+ /* Eliminate duplicate cookies; that is, those whose name and value
+ are the same. */
+ count = eliminate_dups (outgoing, count);
+
+ /* Sort the array so that best-matching domains come first, and
+ that, within one domain, best-matching paths come first. */
qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
+ /* Count the space the name=value pairs will take. */
+ result_size = 0;
+ for (i = 0; i < count; i++)
+ {
+ struct cookie *c = outgoing[i].cookie;
+ /* name=value */
+ result_size += strlen (c->attr) + 1 + strlen (c->value);
+ }
+
/* Allocate output buffer:
"Cookie: " -- 8
name=value pairs -- result_size
++p; \
} while (0)
-#define MARK_WORD(p, b, e) do { \
+#define SET_WORD_BOUNDARIES(p, b, e) do { \
SKIP_WS (p); \
b = p; \
/* skip non-ws */ \
/* empty line */
continue;
- MARK_WORD (p, domain_b, domain_e);
- MARK_WORD (p, ignore_b, ignore_e);
- MARK_WORD (p, path_b, path_e);
- MARK_WORD (p, secure_b, secure_e);
- MARK_WORD (p, expires_b, expires_e);
- MARK_WORD (p, name_b, name_e);
+ SET_WORD_BOUNDARIES (p, domain_b, domain_e);
+ SET_WORD_BOUNDARIES (p, ignore_b, ignore_e);
+ SET_WORD_BOUNDARIES (p, path_b, path_e);
+ SET_WORD_BOUNDARIES (p, secure_b, secure_e);
+ SET_WORD_BOUNDARIES (p, expires_b, expires_e);
+ SET_WORD_BOUNDARIES (p, name_b, name_e);
- /* Don't use MARK_WORD for value because it may contain
- whitespace itself. Instead, . */
- MARK_WORD (p, value_b, value_e);
+ /* Don't use SET_WORD_BOUNDARIES for value because it may
+ contain whitespace. Instead, set value_e to the end of line,
+ modulo trailing space (this will skip the line separator.) */
+ SKIP_WS (p);
+ value_b = p;
+ value_e = p + strlen (p);
+ while (value_e > value_b && ISSPACE (*(value_e - 1)))
+ --value_e;
+ if (value_b == value_e)
+ /* Hmm, should we check for empty value? I guess that's
+ legal, so I leave it. */
+ ;
cookie = cookie_new ();
cookie->domain = strdupdelim (domain_b, domain_e);
- /* Don't use MARK_WORD for value because it may contain
- whitespace itself. Instead, set name_e to the end of line,
- modulo trailing space (which includes the NL separator.) */
- SKIP_WS (p);
- name_b = p;
- name_e = p + strlen (p);
- while (name_e >= name_b && ISSPACE (*name_e))
- --name_e;
- if (name_b == name_e)
- /* Hmm, should we check for empty value? I guess that's
- legal, so I leave it. */
- ;
-
/* safe default in case EXPIRES field is garbled. */
cookie->expiry_time = cookies_now - 1;
/* no cookies stored; nothing to do. */
return;
+ DEBUGP (("Saving cookies to %s.\n", file));
+
cookies_now = time (NULL);
fp = fopen (file, "w");
if (fclose (fp) < 0)
logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
file, strerror (errno));
+
+ DEBUGP (("Done saving cookies.\n"));
}
\f
static int