X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fcookies.c;h=766896c2e5e70741497219e1a68c85d879a182b3;hb=d5be8ecca466601bda9b81c28a79077fbda6ccde;hp=a7c1598c1871affc2ef66dbd1b7147b65ff2167d;hpb=cd1423a4780e8d7979b9be287a949c067e9559c1;p=wget diff --git a/src/cookies.c b/src/cookies.c index a7c1598c..766896c2 100644 --- a/src/cookies.c +++ b/src/cookies.c @@ -1,20 +1,20 @@ /* Support for cookies. Copyright (C) 2001 Free Software Foundation, Inc. -This file is part of Wget. +This file is part of GNU Wget. -This program is free software; you can redistribute it and/or modify +GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but +GNU Wget is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software +along with Wget; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie @@ -104,51 +104,15 @@ delete_cookie (struct cookie *cookie) xfree (cookie); } -/* Functions for cookie-specific hash tables. These are regular hash - tables, but with case-insensitive test and hash functions. */ - -/* Like string_hash, but produces the same results regardless of the - case. */ - -static unsigned long -unsigned_string_hash (const void *sv) -{ - unsigned int h = 0; - unsigned const char *x = (unsigned const char *) sv; - - while (*x) - { - unsigned int g; - unsigned char c = TOLOWER (*x); - h = (h << 4) + c; - if ((g = h & 0xf0000000) != 0) - h = (h ^ (g >> 24)) ^ g; - ++x; - } - - return h; -} - -/* Front-end to strcasecmp. */ - -static int -unsigned_string_cmp (const void *s1, const void *s2) -{ - return !strcasecmp ((const char *)s1, (const char *)s2); -} - -/* Like make_string_hash_table, but uses unsigned_string_hash and - unsigned_string_cmp. */ +/* Functions for storing cookies. -static struct hash_table * -make_unsigned_string_hash_table (int initial_size) -{ - return hash_table_new (initial_size, - unsigned_string_hash, unsigned_string_cmp); -} + All cookies can be referenced through cookies_hash_table. The key + in that table is the domain name, and the value is a linked list of + all cookies from that domain. Every new cookie is placed on the + head of the list. */ -/* Write "HOST:PORT" to RESULT. RESULT should be a pointer, and the - memory for the contents is allocated on the stack. Useful for +/* Write "HOST:PORT" to a stack-allocated area and make RESULT point + to that area. RESULT should be a character pointer. Useful for creating HOST:PORT strings, which are the keys in the hash table. */ @@ -171,13 +135,6 @@ find_cookie_chain_exact (const char *domain, int port) SET_HOSTPORT (domain, port, key); return hash_table_get (cookies_hash_table, key); } - -/* Functions for storing cookies. - - All cookies can be referenced through cookies_hash_table. The key - in that table is the domain name, and the value is a linked list of - all cookies from that domain. Every new cookie is placed on the - head of the list. */ /* Find and return the cookie whose domain, path, and attribute name correspond to COOKIE. If found, PREVPTR will point to the location @@ -231,7 +188,7 @@ store_cookie (struct cookie *cookie) if (!cookies_hash_table) /* If the hash table is not initialized, do so now, because we'll need to store things. */ - cookies_hash_table = make_unsigned_string_hash_table (0); + cookies_hash_table = make_nocase_string_hash_table (0); /* Initialize hash table key. */ SET_HOSTPORT (cookie->domain, cookie->port, hostport); @@ -280,8 +237,10 @@ store_cookie (struct cookie *cookie) hash_table_put (cookies_hash_table, chain_key, cookie); - DEBUGP (("\nStored cookie %s %d %s %d %s %s %s\n", - cookie->domain, cookie->port, cookie->path, cookie->secure, + DEBUGP (("\nStored cookie %s %d %s %s %d %s %s %s\n", + cookie->domain, cookie->port, cookie->path, + cookie->permanent ? "permanent" : "nonpermanent", + cookie->secure, asctime (localtime ((time_t *)&cookie->expiry_time)), cookie->attr, cookie->value)); } @@ -459,9 +418,16 @@ update_cookie_field (struct cookie *cookie, #undef NAME_IS /* Returns non-zero for characters that are legal in the name of an - attribute. */ + attribute. This used to allow only alphanumerics, '-', and '_', + but we need to be more lenient because a number of sites wants to + use weirder attribute names. rfc2965 "informally specifies" + attribute name (token) as "a sequence of non-special, non-white + space characters". So we allow everything except the stuff we know + could harm us. */ -#define ATTR_NAME_CHAR(c) (ISALNUM (c) || (c) == '-' || (c) == '_') +#define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \ + && (c) != '"' && (c) != '=' \ + && (c) != ';' && (c) != ',') /* Fetch the next character without doing anything special if CH gets set to 0. (The code executed next is expected to handle it.) */ @@ -709,9 +675,11 @@ numeric_address_p (const char *addr) static int check_domain_match (const char *cookie_domain, const char *host) { - int i, headlen; + int headlen; const char *tail; + DEBUGP (("cdm: 1")); + /* Numeric address requires exact match. It also requires HOST to be an IP address. I suppose we *could* resolve HOST with store_hostaddress (it would hit the hash table), but rfc2109 @@ -720,6 +688,8 @@ check_domain_match (const char *cookie_domain, const char *host) if (numeric_address_p (cookie_domain)) return !strcmp (cookie_domain, host); + DEBUGP ((" 2")); + /* The domain must contain at least one embedded dot. */ { const char *rest = cookie_domain; @@ -736,16 +706,22 @@ check_domain_match (const char *cookie_domain, const char *host) return 0; } + DEBUGP ((" 3")); + /* For the sake of efficiency, check for exact match first. */ if (!strcasecmp (cookie_domain, host)) return 1; + DEBUGP ((" 4")); + /* In rfc2109 terminology, HOST needs domain-match COOKIE_DOMAIN. This means that COOKIE_DOMAIN needs to start with `.' and be an FQDN, and that HOST must end with COOKIE_DOMAIN. */ if (*cookie_domain != '.') return 0; + DEBUGP ((" 5")); + /* Two proceed, we need to examine two parts of HOST: its head and its tail. Head and tail are defined in terms of the length of the domain, like this: @@ -768,10 +744,14 @@ check_domain_match (const char *cookie_domain, const char *host) return 0; tail = host + headlen; + DEBUGP ((" 6")); + /* (1) */ if (strcasecmp (tail, cookie_domain)) return 0; + DEBUGP ((" 7")); + /* Test (2) is not part of the "domain-match" itself, but is recommended by rfc2109 for reasons of privacy. */ @@ -779,6 +759,8 @@ check_domain_match (const char *cookie_domain, const char *host) if (memchr (host, '.', headlen)) return 0; + DEBUGP ((" 8")); + return 1; } @@ -798,7 +780,7 @@ check_path_match (const char *cookie_path, const char *path) int set_cookie_header_cb (const char *hdr, void *closure) { - struct urlinfo *u = (struct urlinfo *)closure; + struct url *u = (struct url *)closure; struct cookie *cookie; cookies_now = time (NULL); @@ -984,6 +966,43 @@ equality_comparator (const void *p1, const void *p2) return namecmp ? namecmp : valuecmp; } +/* Eliminate duplicate cookies. "Duplicate cookies" are any two + cookies whose name and value are the same. Whenever a duplicate + pair is found, one of the cookies is removed. */ + +static int +eliminate_dups (struct weighed_cookie *outgoing, int count) +{ + int i; + + /* We deploy a simple uniquify algorithm: first sort the array + according to our sort criterion, then uniquify it by comparing + each cookie with its neighbor. */ + + qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator); + + for (i = 0; i < count - 1; i++) + { + struct cookie *c1 = outgoing[i].cookie; + struct cookie *c2 = outgoing[i + 1].cookie; + if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value)) + { + /* c1 and c2 are the same; get rid of c2. */ + if (count > i + 1) + /* move all ptrs from positions [i + 1, count) to i. */ + memmove (outgoing + i, outgoing + i + 1, + (count - (i + 1)) * sizeof (struct weighed_cookie)); + /* We decrement i to counter the ++i above. Remember that + we've just removed the element in front of us; we need to + remain in place to check whether outgoing[i] matches what + used to be outgoing[i + 2]. */ + --i; + --count; + } + } + return count; +} + /* Comparator used for sorting by quality. */ static int @@ -1003,11 +1022,11 @@ goodness_comparator (const void *p1, const void *p2) return dgdiff ? dgdiff : pgdiff; } -/* Build a `Cookies' header for a request that goes to HOST:PORT and - requests PATH from the server. Memory is allocated by `malloc', - and the caller is responsible for freeing it. If no cookies - pertain to this request, i.e. no cookie header should be generated, - NULL is returned. */ +/* Build a `Cookie' header for a request that goes to HOST:PORT and + requests PATH from the server. The resulting string is allocated + with `malloc', and the caller is responsible for freeing it. If no + cookies pertain to this request, i.e. no cookie header should be + generated, NULL is returned. */ char * build_cookies_request (const char *host, int port, const char *path, @@ -1029,9 +1048,11 @@ build_cookies_request (const char *host, int port, const char *path, if (chain_count > chain_store_size) { /* It's extremely unlikely that more than 20 chains will ever - match. But in this case it's easy to not have the - limitation, so we don't. */ + match. But since find_matching_chains reports the exact size + it needs, it's easy to not have the limitation, so we + don't. */ all_chains = alloca (chain_count * sizeof (struct cookie *)); + chain_store_size = chain_count; goto again; } @@ -1053,6 +1074,8 @@ build_cookies_request (const char *host, int port, const char *path, /* Allocate the array. */ outgoing = alloca (count * sizeof (struct weighed_cookie)); + /* Fill the array with all the matching cookies from all the + matching chains. */ ocnt = 0; for (i = 0; i < chain_count; i++) for (cookie = all_chains[i]; cookie; cookie = cookie->next) @@ -1068,28 +1091,8 @@ build_cookies_request (const char *host, int port, const char *path, assert (ocnt == count); /* Eliminate duplicate cookies; that is, those whose name and value - are the same. We do it by first sorting the array, and then - uniq'ing it. */ - qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator); - for (i = 0; i < count - 1; i++) - { - struct cookie *c1 = outgoing[i].cookie; - struct cookie *c2 = outgoing[i + 1].cookie; - if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value)) - { - /* c1 and c2 are the same; get rid of c2. */ - if (count > i + 1) - /* move all ptrs from positions [i + 1, count) to i. */ - memmove (outgoing + i, outgoing + i + 1, - (count - (i + 1)) * sizeof (struct weighed_cookie)); - /* We decrement i to counter the ++i above. Remember that - we've just removed the element in front of us; we need to - remain in place to check whether outgoing[i] what used to - be outgoing[i + 2]. */ - --i; - --count; - } - } + are the same. */ + count = eliminate_dups (outgoing, count); /* Sort the array so that best-matching domains come first, and that, within one domain, best-matching paths come first. */ @@ -1198,7 +1201,7 @@ domain_port (const char *domain_b, const char *domain_e, ++p; \ } while (0) -#define MARK_WORD(p, b, e) do { \ +#define SET_WORD_BOUNDARIES(p, b, e) do { \ SKIP_WS (p); \ b = p; \ /* skip non-ws */ \ @@ -1245,16 +1248,25 @@ load_cookies (const char *file) /* empty line */ continue; - MARK_WORD (p, domain_b, domain_e); - MARK_WORD (p, ignore_b, ignore_e); - MARK_WORD (p, path_b, path_e); - MARK_WORD (p, secure_b, secure_e); - MARK_WORD (p, expires_b, expires_e); - MARK_WORD (p, name_b, name_e); + SET_WORD_BOUNDARIES (p, domain_b, domain_e); + SET_WORD_BOUNDARIES (p, ignore_b, ignore_e); + SET_WORD_BOUNDARIES (p, path_b, path_e); + SET_WORD_BOUNDARIES (p, secure_b, secure_e); + SET_WORD_BOUNDARIES (p, expires_b, expires_e); + SET_WORD_BOUNDARIES (p, name_b, name_e); - /* Don't use MARK_WORD for value because it may contain - whitespace itself. Instead, . */ - MARK_WORD (p, value_b, value_e); + /* Don't use SET_WORD_BOUNDARIES for value because it may + contain whitespace. Instead, set value_e to the end of line, + modulo trailing space (this will skip the line separator.) */ + SKIP_WS (p); + value_b = p; + value_e = p + strlen (p); + while (value_e > value_b && ISSPACE (*(value_e - 1))) + --value_e; + if (value_b == value_e) + /* Hmm, should we check for empty value? I guess that's + legal, so I leave it. */ + ; cookie = cookie_new (); @@ -1275,19 +1287,6 @@ load_cookies (const char *file) cookie->domain = strdupdelim (domain_b, domain_e); - /* Don't use MARK_WORD for value because it may contain - whitespace itself. Instead, set name_e to the end of line, - modulo trailing space (which includes the NL separator.) */ - SKIP_WS (p); - name_b = p; - name_e = p + strlen (p); - while (name_e >= name_b && ISSPACE (*name_e)) - --name_e; - if (name_b == name_e) - /* Hmm, should we check for empty value? I guess that's - legal, so I leave it. */ - ; - /* safe default in case EXPIRES field is garbled. */ cookie->expiry_time = cookies_now - 1; @@ -1376,7 +1375,7 @@ save_cookies (const char *file) logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"), file, strerror (errno)); - DEBUGP (("Done saving cookies.\n", file)); + DEBUGP (("Done saving cookies.\n")); } static int