X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fcookies.c;h=07cfdc8ad14721b2353420424502cac6e041748c;hb=5f0a2b3f0846dd4c2f72fc62e7171200d1fd6e06;hp=55d03df6a884f205032fd0a7713070ecd76d0dd3;hpb=45164e05ccd4a968db94165bbe0bd7df4b5fcd84;p=wget diff --git a/src/cookies.c b/src/cookies.c index 55d03df6..07cfdc8a 100644 --- a/src/cookies.c +++ b/src/cookies.c @@ -30,11 +30,17 @@ so, delete this exception statement from your version. */ /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie code submitted by Tomasz Wegrzanowski. - TODO: Implement limits on cookie-related sizes, such as max. cookie - size, max. number of cookies, etc. Add more "cookie jar" methods, - such as methods to over stored cookies, to clear temporary cookies, - to perform intelligent auto-saving, etc. Ultimately support - `Set-Cookie2' and `Cookie2' headers. */ + Ideas for future work: + + * Implement limits on cookie-related sizes, such as max. cookie + size, max. number of cookies, etc. + + * Add more "cookie jar" methods, such as methods to iterate over + stored cookies, to clear temporary cookies, to perform + intelligent auto-saving, etc. + + * Support `Set-Cookie2' and `Cookie2' headers? Does anyone really + use them? */ #include @@ -58,11 +64,21 @@ time_t http_atotm PARAMS ((const char *)); /* Declarations of `struct cookie' and the most basic functions. */ +/* Cookie jar serves as cookie storage and a means of retrieving + cookies efficiently. All cookies with the same domain are stored + in a linked list called "chain". A cookie chain can be reached by + looking up the domain in the cookie jar's chains_by_domain table. + + For example, to reach all the cookies under google.com, one must + execute hash_table_get(jar->chains_by_domain, "google.com"). Of + course, when sending a cookie to `www.google.com', one must search + for cookies that belong to either `www.google.com' or `google.com' + -- but the point is that the code doesn't need to go through *all* + the cookies. */ + struct cookie_jar { - /* Hash table that maps domain names to cookie chains. A "cookie - chain" is a linked list of cookies that belong to the same - domain. */ - struct hash_table *chains_by_domain; + /* Cookie chains indexed by domain. */ + struct hash_table *chains; int cookie_count; /* number of cookies in the jar. */ }; @@ -74,8 +90,8 @@ time_t cookies_now; struct cookie_jar * cookie_jar_new (void) { - struct cookie_jar *jar = xmalloc (sizeof (struct cookie_jar)); - jar->chains_by_domain = make_nocase_string_hash_table (0); + struct cookie_jar *jar = xnew (struct cookie_jar); + jar->chains = make_nocase_string_hash_table (0); jar->cookie_count = 0; return jar; } @@ -102,8 +118,6 @@ struct cookie { char *attr; /* cookie attribute name */ char *value; /* cookie attribute value */ - struct cookie_jar *jar; /* pointer back to the cookie jar, for - convenience. */ struct cookie *next; /* used for chaining of cookies in the same domain. */ }; @@ -116,8 +130,7 @@ struct cookie { static struct cookie * cookie_new (void) { - struct cookie *cookie = xmalloc (sizeof (struct cookie)); - memset (cookie, '\0', sizeof (struct cookie)); + struct cookie *cookie = xnew0 (struct cookie); /* Both cookie->permanent and cookie->expiry_time are now 0. By default, we assume that the cookie is non-permanent and valid @@ -141,10 +154,10 @@ delete_cookie (struct cookie *cookie) /* Functions for storing cookies. - All cookies can be reached beginning with jar->chains_by_domain. - The key in that table is the domain name, and the value is a linked - list of all cookies from that domain. Every new cookie is placed - on the head of the list. */ + All cookies can be reached beginning with jar->chains. The key in + that table is the domain name, and the value is a linked list of + all cookies from that domain. Every new cookie is placed on the + head of the list. */ /* Find and return a cookie in JAR whose domain, path, and attribute name correspond to COOKIE. If found, PREVPTR will point to the @@ -159,7 +172,7 @@ find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie, { struct cookie *chain, *prev; - chain = hash_table_get (jar->chains_by_domain, cookie->domain); + chain = hash_table_get (jar->chains, cookie->domain); if (!chain) goto nomatch; @@ -193,7 +206,7 @@ store_cookie (struct cookie_jar *jar, struct cookie *cookie) struct cookie *chain_head; char *chain_key; - if (hash_table_get_pair (jar->chains_by_domain, cookie->domain, + if (hash_table_get_pair (jar->chains, cookie->domain, &chain_key, &chain_head)) { /* A chain of cookies in this domain already exists. Check for @@ -227,15 +240,16 @@ store_cookie (struct cookie_jar *jar, struct cookie *cookie) } else { - /* We are now creating the chain. Allocate the string that will - be used as a key. It is unsafe to use cookie->domain for - that, because it might get deallocated by the above code at - some point later. */ + /* We are now creating the chain. Use a copy of cookie->domain + as the key for the life-time of the chain. Using + cookie->domain would be unsafe because the life-time of the + chain may exceed the life-time of the cookie. (Cookies may + be deleted from the chain by this very function.) */ cookie->next = NULL; chain_key = xstrdup (cookie->domain); } - hash_table_put (jar->chains_by_domain, chain_key, cookie); + hash_table_put (jar->chains, chain_key, cookie); ++jar->cookie_count; DEBUGP (("\nStored cookie %s %d%s %s %s %d %s %s %s\n", @@ -260,7 +274,7 @@ discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie) { struct cookie *prev, *victim; - if (!hash_table_count (jar->chains_by_domain)) + if (!hash_table_count (jar->chains)) /* No elements == nothing to discard. */ return; @@ -277,18 +291,18 @@ discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie) char *chain_key = NULL; int res; - res = hash_table_get_pair (jar->chains_by_domain, victim->domain, + res = hash_table_get_pair (jar->chains, victim->domain, &chain_key, NULL); assert (res != 0); if (!victim->next) { /* VICTIM was the only cookie in the chain. Destroy the chain and deallocate the chain key. */ - hash_table_remove (jar->chains_by_domain, victim->domain); + hash_table_remove (jar->chains, victim->domain); xfree (chain_key); } else - hash_table_put (jar->chains_by_domain, chain_key, victim->next); + hash_table_put (jar->chains, chain_key, victim->next); } delete_cookie (victim); DEBUGP (("Discarded old cookie.\n")); @@ -298,7 +312,6 @@ discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie) /* Functions for parsing the `Set-Cookie' header, and creating new cookies from the wire. */ - #define NAME_IS(string_literal) \ BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal) @@ -772,7 +785,8 @@ check_domain_match (const char *cookie_domain, const char *host) DEBUGP ((" 7")); - /* Don't allow domain "bar.com" to match host "foobar.com". */ + /* Don't allow the host "foobar.com" to set a cookie for domain + "bar.com". */ if (*cookie_domain != '.') { int dlen = strlen (cookie_domain); @@ -830,9 +844,11 @@ cookie_jar_process_set_cookie (struct cookie_jar *jar, logprintf (LOG_NOTQUIET, "Cookie coming from %s attempted to set domain to %s\n", host, cookie->domain); + xfree (cookie->domain); goto copy_domain; } } + if (!cookie->path) cookie->path = xstrdup (path); else @@ -863,24 +879,27 @@ cookie_jar_process_set_cookie (struct cookie_jar *jar, previously stored cookies. Entry point is `build_cookies_request'. */ -/* Find the cookie chains that match HOST and store them to DEST. +/* Find the cookie chains whose domains match HOST and store them to + DEST. - A cookie chain is the list of cookies declared under a domain. - Given HOST "img.search.xemacs.org", this function will store the - chains for "img.search.xemacs.org", "search.xemacs.org", and - "xemacs.org" -- those of them that exist (if any), that is. + A cookie chain is the head of a list of cookies that belong to a + host/domain. Given HOST "img.search.xemacs.org", this function + will return the chains for "img.search.xemacs.org", + "search.xemacs.org", and "xemacs.org" -- those of them that exist + (if any), that is. - No more than SIZE matches are written; if more matches are present, - return the number of chains that would have been written. */ + DEST should be large enough to accept (in the worst case) as many + elements as there are domain components of HOST. */ static int -find_matching_chains (struct cookie_jar *jar, const char *host, - struct cookie *dest[], int dest_size) +find_chains_of_host (struct cookie_jar *jar, const char *host, + struct cookie *dest[]) { int dest_count = 0; int passes, passcnt; - if (!hash_table_count (jar->chains_by_domain)) + /* Bail out quickly if there are no cookies in the jar. */ + if (!hash_table_count (jar->chains)) return 0; if (numeric_address_p (host)) @@ -900,13 +919,9 @@ find_matching_chains (struct cookie_jar *jar, const char *host, srk.fer.hr's, then fer.hr's. */ while (1) { - struct cookie *chain = hash_table_get (jar->chains_by_domain, host); + struct cookie *chain = hash_table_get (jar->chains, host); if (chain) - { - if (dest_count < dest_size) - dest[dest_count] = chain; - ++dest_count; - } + dest[dest_count++] = chain; if (++passcnt >= passes) break; host = strchr (host, '.') + 1; @@ -925,8 +940,8 @@ path_matches (const char *full_path, const char *prefix) if (*prefix != '/') /* Wget's HTTP paths do not begin with '/' (the URL code treats it - as a separator), but the '/' is assumed when matching against - the cookie stuff. */ + as a mere separator, inspired by rfc1808), but the '/' is + assumed when matching against the cookie stuff. */ return 0; ++prefix; @@ -940,17 +955,17 @@ path_matches (const char *full_path, const char *prefix) return len + 1; } -/* Return non-zero iff COOKIE matches the given HOST, PORT, PATH, and - SECFLAG. +/* Return non-zero iff COOKIE matches the provided parameters of the + URL being downloaded: HOST, PORT, PATH, and SECFLAG. If PATH_GOODNESS is non-NULL, store the "path goodness" value - there. That value is a measure of how well COOKIE matches PATH, + there. That value is a measure of how closely COOKIE matches PATH, used for ordering cookies. */ static int -matching_cookie (const struct cookie *cookie, - const char *host, int port, const char *path, - int secure, int *path_goodness) +cookie_matches_url (const struct cookie *cookie, + const char *host, int port, const char *path, + int secflag, int *path_goodness) { int pg; @@ -962,7 +977,7 @@ matching_cookie (const struct cookie *cookie, possible. */ return 0; - if (cookie->secure && !secure) + if (cookie->secure && !secflag) /* Don't transmit secure cookies over insecure connections. */ return 0; if (cookie->port != PORT_ANY && cookie->port != port) @@ -970,7 +985,7 @@ matching_cookie (const struct cookie *cookie, /* If exact domain match is required, verify that cookie's domain is equal to HOST. If not, assume success on the grounds of the - cookie's chain having been found by find_matching_chains. */ + cookie's chain having been found by find_chains_of_host. */ if (cookie->domain_exact && 0 != strcasecmp (host, cookie->domain)) return 0; @@ -1015,40 +1030,45 @@ equality_comparator (const void *p1, const void *p2) } /* Eliminate duplicate cookies. "Duplicate cookies" are any two - cookies whose name and value are the same. Whenever a duplicate + cookies with the same attr name and value. Whenever a duplicate pair is found, one of the cookies is removed. */ static int eliminate_dups (struct weighed_cookie *outgoing, int count) { - int i; + struct weighed_cookie *h; /* hare */ + struct weighed_cookie *t; /* tortoise */ + struct weighed_cookie *end = outgoing + count; /* We deploy a simple uniquify algorithm: first sort the array - according to our sort criteria, then uniquify it by comparing - each cookie with its neighbor. */ + according to our sort criteria, then copy it to itself, comparing + each cookie to its neighbor and ignoring the duplicates. */ qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator); - for (i = 0; i < count - 1; i++) + /* "Hare" runs through all the entries in the array, followed by + "tortoise". If a duplicate is found, the hare skips it. + Non-duplicate entries are copied to the tortoise ptr. */ + + for (h = t = outgoing; h < end; h++) { - struct cookie *c1 = outgoing[i].cookie; - struct cookie *c2 = outgoing[i + 1].cookie; - if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value)) + if (h != end - 1) { - /* c1 and c2 are the same; get rid of c2. */ - if (count > i + 1) - /* move all ptrs from positions [i + 1, count) to i. */ - memmove (outgoing + i, outgoing + i + 1, - (count - (i + 1)) * sizeof (struct weighed_cookie)); - /* We decrement i to counter the ++i above. Remember that - we've just removed the element in front of us; we need to - remain in place to check whether outgoing[i] matches what - used to be outgoing[i + 2]. */ - --i; - --count; + struct cookie *c0 = h[0].cookie; + struct cookie *c1 = h[1].cookie; + if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value)) + continue; /* ignore the duplicate */ } + + /* If the hare has advanced past the tortoise (because of + previous dups), make sure the values get copied. Otherwise, + no copying is necessary. */ + if (h != t) + *t++ = *h; + else + t++; } - return count; + return t - outgoing; } /* Comparator used for sorting by quality. */ @@ -1081,9 +1101,7 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host, int port, const char *path, int connection_secure_p) { - struct cookie *chain_default_store[20]; - struct cookie **chains = chain_default_store; - int chain_store_size = countof (chain_default_store); + struct cookie **chains; int chain_count; struct cookie *cookie; @@ -1092,20 +1110,15 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host, char *result; int result_size, pos; - /* First, find the chains that match HOST. */ - again: - chain_count = find_matching_chains (jar, host, chains, chain_store_size); - if (chain_count > chain_store_size) - { - /* It's extremely unlikely that more than 20 chains will ever - match. But since find_matching_chains reports the exact size - it needs, it's easy to not have the limitation, so we - don't. */ - chains = alloca (chain_count * sizeof (struct cookie *)); - chain_store_size = chain_count; - goto again; - } + /* First, find the cookie chains whose domains match HOST. */ + + /* Allocate room for find_chains_of_host to write to. The number of + chains can at most equal the number of subdomains, hence + 1+. */ + chains = alloca_array (struct cookie *, 1 + count_char (host, '.')); + chain_count = find_chains_of_host (jar, host, chains); + /* No cookies for this host. */ if (!chain_count) return NULL; @@ -1119,13 +1132,14 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host, count = 0; for (i = 0; i < chain_count; i++) for (cookie = chains[i]; cookie; cookie = cookie->next) - if (matching_cookie (cookie, host, port, path, connection_secure_p, NULL)) + if (cookie_matches_url (cookie, host, port, path, connection_secure_p, + NULL)) ++count; if (!count) return NULL; /* no cookies matched */ /* Allocate the array. */ - outgoing = alloca (count * sizeof (struct weighed_cookie)); + outgoing = alloca_array (struct weighed_cookie, count); /* Fill the array with all the matching cookies from the chains that match HOST. */ @@ -1134,8 +1148,8 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host, for (cookie = chains[i]; cookie; cookie = cookie->next) { int pg; - if (!matching_cookie (cookie, host, port, path, - connection_secure_p, &pg)) + if (!cookie_matches_url (cookie, host, port, path, + connection_secure_p, &pg)) continue; outgoing[ocnt].cookie = cookie; outgoing[ocnt].domain_goodness = strlen (cookie->domain); @@ -1417,7 +1431,7 @@ cookie_jar_save (struct cookie_jar *jar, const char *file) fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (NULL)); fputs ("# Edit at your own risk.\n\n", fp); - hash_table_map (jar->chains_by_domain, save_cookies_mapper, fp); + hash_table_map (jar->chains, save_cookies_mapper, fp); if (ferror (fp)) logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"), @@ -1443,7 +1457,7 @@ nuke_cookie_chain (void *value, void *key, void *arg) struct cookie_jar *jar = (struct cookie_jar *)arg; /* Remove the chain from the table and free the key. */ - hash_table_remove (jar->chains_by_domain, chain_key); + hash_table_remove (jar->chains, chain_key); xfree (chain_key); /* Then delete all the cookies in the chain. */ @@ -1463,8 +1477,8 @@ nuke_cookie_chain (void *value, void *key, void *arg) void cookie_jar_delete (struct cookie_jar *jar) { - hash_table_map (jar->chains_by_domain, nuke_cookie_chain, jar); - hash_table_destroy (jar->chains_by_domain); + hash_table_map (jar->chains, nuke_cookie_chain, jar); + hash_table_destroy (jar->chains); xfree (jar); }