X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fcookies.c;h=6ba7b5a5a4ed9b482f3866124b9705e16310472e;hp=8c44267e126529d1acb6b15d5fc3c9a95f660f66;hb=38a7829dcb4eb5dba28dbf0f05c6a80fea9217f8;hpb=5fa53b5a1debb7cd33b260292060d7a45fed282a diff --git a/src/cookies.c b/src/cookies.c index 8c44267e..6ba7b5a5 100644 --- a/src/cookies.c +++ b/src/cookies.c @@ -1,206 +1,192 @@ /* Support for cookies. - Copyright (C) 2001 Free Software Foundation, Inc. + Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, + 2010, 2011 Free Software Foundation, Inc. -This file is part of Wget. +This file is part of GNU Wget. -This program is free software; you can redistribute it and/or modify +GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or (at +the Free Software Foundation; either version 3 of the License, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but +GNU Wget is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +along with Wget. If not, see . -/* Written by Hrvoje Niksic. Parts are loosely inspired by cookie - code submitted by Tomasz Wegrzanowski. */ +Additional permission under GNU GPL version 3 section 7 -#include +If you modify this program, or any covered work, by linking or +combining it with the OpenSSL project's OpenSSL library (or a +modified version of that library), containing parts covered by the +terms of the OpenSSL or SSLeay licenses, the Free Software Foundation +grants you additional permission to convey the resulting work. +Corresponding Source for a non-source form of such a combination +shall include the source code for the parts of OpenSSL used as well +as that of the covered work. */ + +/* Written by Hrvoje Niksic. Parts are loosely inspired by the + cookie patch submitted by Tomasz Wegrzanowski. + + This implements the client-side cookie support, as specified + (loosely) by Netscape's "preliminary specification", currently + available at: + + http://wp.netscape.com/newsref/std/cookie_spec.html + + rfc2109 is not supported because of its incompatibilities with the + above widely-used specification. rfc2965 is entirely ignored, + since popular client software doesn't implement it, and even the + sites that do send Set-Cookie2 also emit Set-Cookie for + compatibility. */ + +#include "wget.h" #include -#ifdef HAVE_STRING_H -# include -#else -# include -#endif +#include #include #include #include - -#include "wget.h" +#include #include "utils.h" #include "hash.h" -#include "url.h" #include "cookies.h" +#include "http.h" /* for http_atotm */ + +/* Declarations of `struct cookie' and the most basic functions. */ -/* Hash table that maps domain names to cookie chains. */ +/* Cookie jar serves as cookie storage and a means of retrieving + cookies efficiently. All cookies with the same domain are stored + in a linked list called "chain". A cookie chain can be reached by + looking up the domain in the cookie jar's chains_by_domain table. -static struct hash_table *cookies_hash_table; + For example, to reach all the cookies under google.com, one must + execute hash_table_get(jar->chains_by_domain, "google.com"). Of + course, when sending a cookie to `www.google.com', one must search + for cookies that belong to either `www.google.com' or `google.com' + -- but the point is that the code doesn't need to go through *all* + the cookies. */ -/* This should be set by entry points in this file, so the low-level - functions don't need to call time() all the time. */ +struct cookie_jar { + /* Cookie chains indexed by domain. */ + struct hash_table *chains; + + int cookie_count; /* number of cookies in the jar. */ +}; +/* Value set by entry point functions, so that the low-level + routines don't need to call time() all the time. */ static time_t cookies_now; - -/* Definition of `struct cookie' and the most basic functions. */ + +struct cookie_jar * +cookie_jar_new (void) +{ + struct cookie_jar *jar = xnew (struct cookie_jar); + jar->chains = make_nocase_string_hash_table (0); + jar->cookie_count = 0; + return jar; +} struct cookie { - char *domain; /* domain of the cookie */ - int port; /* port number */ - char *path; /* path prefix of the cookie */ - int secure; /* whether cookie should be - transmitted over non-https - connections. */ - int permanent; /* whether the cookie should outlive - the session */ - unsigned long expiry_time; /* time when the cookie expires */ - int discard_requested; /* whether cookie was created to - request discarding another - cookie */ - - char *attr; /* cookie attribute name */ - char *value; /* cookie attribute value */ - - struct cookie *next; /* used for chaining of cookies in the - same domain. */ + char *domain; /* domain of the cookie */ + int port; /* port number */ + char *path; /* path prefix of the cookie */ + + unsigned discard_requested :1; /* whether cookie was created to + request discarding another + cookie. */ + + unsigned secure :1; /* whether cookie should be + transmitted over non-https + connections. */ + unsigned domain_exact :1; /* whether DOMAIN must match as a + whole. */ + + unsigned permanent :1; /* whether the cookie should outlive + the session. */ + time_t expiry_time; /* time when the cookie expires, 0 + means undetermined. */ + + char *attr; /* cookie attribute name */ + char *value; /* cookie attribute value */ + + struct cookie *next; /* used for chaining of cookies in the + same domain. */ }; +#define PORT_ANY (-1) + /* Allocate and return a new, empty cookie structure. */ static struct cookie * cookie_new (void) { - struct cookie *cookie = xmalloc (sizeof (struct cookie)); - memset (cookie, '\0', sizeof (struct cookie)); + struct cookie *cookie = xnew0 (struct cookie); - /* If we don't know better, assume cookie is non-permanent and valid - for the entire session. */ - cookie->expiry_time = ~0UL; - - /* Assume default port. */ - cookie->port = 80; + /* Both cookie->permanent and cookie->expiry_time are now 0. This + means that the cookie doesn't expire, but is only valid for this + session (i.e. not written out to disk). */ + cookie->port = PORT_ANY; return cookie; } -/* Deallocate COOKIE and its components. */ - -static void -delete_cookie (struct cookie *cookie) -{ - FREE_MAYBE (cookie->domain); - FREE_MAYBE (cookie->path); - FREE_MAYBE (cookie->attr); - FREE_MAYBE (cookie->value); - xfree (cookie); -} - -/* Functions for cookie-specific hash tables. These are regular hash - tables, but with case-insensitive test and hash functions. */ - -/* Like string_hash, but produces the same results regardless of the - case. */ - -static unsigned long -unsigned_string_hash (const void *sv) -{ - unsigned int h = 0; - unsigned const char *x = (unsigned const char *) sv; - - while (*x) - { - unsigned int g; - unsigned char c = TOLOWER (*x); - h = (h << 4) + c; - if ((g = h & 0xf0000000) != 0) - h = (h ^ (g >> 24)) ^ g; - ++x; - } - - return h; -} - -/* Front-end to strcasecmp. */ +/* Non-zero if the cookie has expired. Assumes cookies_now has been + set by one of the entry point functions. */ -static int -unsigned_string_cmp (const void *s1, const void *s2) +static bool +cookie_expired_p (const struct cookie *c) { - return !strcasecmp ((const char *)s1, (const char *)s2); + return c->expiry_time != 0 && c->expiry_time < cookies_now; } -/* Like make_string_hash_table, but uses unsigned_string_hash and - unsigned_string_cmp. */ - -static struct hash_table * -make_unsigned_string_hash_table (int initial_size) -{ - return hash_table_new (initial_size, - unsigned_string_hash, unsigned_string_cmp); -} - -/* Write "HOST:PORT" to RESULT. RESULT should be a pointer, and the - memory for the contents is allocated on the stack. Useful for - creating HOST:PORT strings, which are the keys in the hash - table. */ - -#define SET_HOSTPORT(host, port, result) do { \ - int HP_len = strlen (host); \ - result = alloca (HP_len + 1 + numdigit (port) + 1); \ - memcpy (result, host, HP_len); \ - result[HP_len] = ':'; \ - long_to_string (result + HP_len + 1, port); \ -} while (0) - -/* Find cookie chain that corresponds to DOMAIN (exact) and PORT. */ +/* Deallocate COOKIE and its components. */ -static struct cookie * -find_cookie_chain_exact (const char *domain, int port) +static void +delete_cookie (struct cookie *cookie) { - char *key; - if (!cookies_hash_table) - return NULL; - SET_HOSTPORT (domain, port, key); - return hash_table_get (cookies_hash_table, key); + xfree_null (cookie->domain); + xfree_null (cookie->path); + xfree_null (cookie->attr); + xfree_null (cookie->value); + xfree (cookie); } /* Functions for storing cookies. - All cookies can be referenced through cookies_hash_table. The key - in that table is the domain name, and the value is a linked list of + All cookies can be reached beginning with jar->chains. The key in + that table is the domain name, and the value is a linked list of all cookies from that domain. Every new cookie is placed on the head of the list. */ -/* Find and return the cookie whose domain, path, and attribute name - correspond to COOKIE. If found, PREVPTR will point to the location - of the cookie previous in chain, or NULL if the found cookie is the - head of a chain. +/* Find and return a cookie in JAR whose domain, path, and attribute + name correspond to COOKIE. If found, PREVPTR will point to the + location of the cookie previous in chain, or NULL if the found + cookie is the head of a chain. If no matching cookie is found, return NULL. */ static struct cookie * -find_matching_cookie (struct cookie *cookie, struct cookie **prevptr) +find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie, + struct cookie **prevptr) { struct cookie *chain, *prev; - if (!cookies_hash_table) - goto nomatch; - - chain = find_cookie_chain_exact (cookie->domain, cookie->port); + chain = hash_table_get (jar->chains, cookie->domain); if (!chain) goto nomatch; prev = NULL; for (; chain; prev = chain, chain = chain->next) - if (!strcmp (cookie->path, chain->path) - && !strcmp (cookie->attr, chain->attr)) + if (0 == strcmp (cookie->path, chain->path) + && 0 == strcmp (cookie->attr, chain->attr) + && cookie->port == chain->port) { - *prevptr = prev; - return chain; + *prevptr = prev; + return chain; } nomatch: @@ -208,7 +194,7 @@ find_matching_cookie (struct cookie *cookie, struct cookie **prevptr) return NULL; } -/* Store COOKIE to memory. +/* Store COOKIE to the jar. This is done by placing COOKIE at the head of its chain. However, if COOKIE matches a cookie already in memory, as determined by @@ -218,116 +204,112 @@ find_matching_cookie (struct cookie *cookie, struct cookie **prevptr) first time; next hash_table_put's reuse the same key. */ static void -store_cookie (struct cookie *cookie) +store_cookie (struct cookie_jar *jar, struct cookie *cookie) { struct cookie *chain_head; - char *hostport; char *chain_key; - if (!cookies_hash_table) - /* If the hash table is not initialized, do so now, because we'll - need to store things. */ - cookies_hash_table = make_unsigned_string_hash_table (0); - - /* Initialize hash table key. */ - SET_HOSTPORT (cookie->domain, cookie->port, hostport); - - if (hash_table_get_pair (cookies_hash_table, hostport, - &chain_key, &chain_head)) + if (hash_table_get_pair (jar->chains, cookie->domain, + &chain_key, &chain_head)) { - /* There already exists a chain of cookies with this exact - domain. We need to check for duplicates -- if an existing - cookie exactly matches our domain, path and name, we replace - it. */ + /* A chain of cookies in this domain already exists. Check for + duplicates -- if an extant cookie exactly matches our domain, + port, path, and name, replace it. */ struct cookie *prev; - struct cookie *victim = find_matching_cookie (cookie, &prev); + struct cookie *victim = find_matching_cookie (jar, cookie, &prev); if (victim) - { - /* Remove VICTIM from the chain. COOKIE will be placed at - the head. */ - if (prev) - { - prev->next = victim->next; - cookie->next = chain_head; - } - else - { - /* prev is NULL; apparently VICTIM was at the head of - the chain. This place will be taken by COOKIE, so - all we need to do is: */ - cookie->next = victim->next; - } - delete_cookie (victim); - DEBUGP (("Deleted old cookie (to be replaced.)\n")); - } + { + /* Remove VICTIM from the chain. COOKIE will be placed at + the head. */ + if (prev) + { + prev->next = victim->next; + cookie->next = chain_head; + } + else + { + /* prev is NULL; apparently VICTIM was at the head of + the chain. This place will be taken by COOKIE, so + all we need to do is: */ + cookie->next = victim->next; + } + delete_cookie (victim); + --jar->cookie_count; + DEBUGP (("Deleted old cookie (to be replaced.)\n")); + } else - cookie->next = chain_head; + cookie->next = chain_head; } else { - /* We are now creating the chain. Allocate the string that will - be used as a key. It is unsafe to use cookie->domain for - that, because it might get deallocated by the above code at - some point later. */ + /* We are now creating the chain. Use a copy of cookie->domain + as the key for the life-time of the chain. Using + cookie->domain would be unsafe because the life-time of the + chain may exceed the life-time of the cookie. (Cookies may + be deleted from the chain by this very function.) */ cookie->next = NULL; - chain_key = xstrdup (hostport); + chain_key = xstrdup (cookie->domain); } - hash_table_put (cookies_hash_table, chain_key, cookie); + hash_table_put (jar->chains, chain_key, cookie); + ++jar->cookie_count; - DEBUGP (("\nStored cookie %s %d %s %d %s %s %s\n", - cookie->domain, cookie->port, cookie->path, cookie->secure, - asctime (localtime ((time_t *)&cookie->expiry_time)), - cookie->attr, cookie->value)); + IF_DEBUG + { + time_t exptime = cookie->expiry_time; + DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n", + cookie->domain, cookie->port, + cookie->port == PORT_ANY ? " (ANY)" : "", + cookie->path, + cookie->permanent ? "permanent" : "session", + cookie->secure ? "secure" : "insecure", + cookie->expiry_time ? datetime_str (exptime) : "none", + cookie->attr, cookie->value)); + } } -/* Discard a cookie matching COOKIE's domain, path, and attribute - name. This gets called when we encounter a cookie whose expiry - date is in the past, or whose max-age is set to 0. The former - corresponds to netscape cookie spec, while the latter is specified - by rfc2109. */ +/* Discard a cookie matching COOKIE's domain, port, path, and + attribute name. This gets called when we encounter a cookie whose + expiry date is in the past, or whose max-age is set to 0. The + former corresponds to netscape cookie spec, while the latter is + specified by rfc2109. */ static void -discard_matching_cookie (struct cookie *cookie) +discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie) { struct cookie *prev, *victim; - if (!cookies_hash_table - || !hash_table_count (cookies_hash_table)) + if (!hash_table_count (jar->chains)) /* No elements == nothing to discard. */ return; - victim = find_matching_cookie (cookie, &prev); + victim = find_matching_cookie (jar, cookie, &prev); if (victim) { if (prev) - /* Simply unchain the victim. */ - prev->next = victim->next; + /* Simply unchain the victim. */ + prev->next = victim->next; else - { - /* VICTIM was head of its chain. We need to place a new - cookie at the head. */ - - char *hostport; - char *chain_key = NULL; - int res; - - SET_HOSTPORT (victim->domain, victim->port, hostport); - res = hash_table_get_pair (cookies_hash_table, hostport, - &chain_key, NULL); - assert (res != 0); - if (!victim->next) - { - /* VICTIM was the only cookie in the chain. Destroy the - chain and deallocate the chain key. */ - - hash_table_remove (cookies_hash_table, hostport); - xfree (chain_key); - } - else - hash_table_put (cookies_hash_table, chain_key, victim->next); - } + { + /* VICTIM was head of its chain. We need to place a new + cookie at the head. */ + char *chain_key = NULL; + int res; + + res = hash_table_get_pair (jar->chains, victim->domain, + &chain_key, NULL); + assert (res != 0); + if (!victim->next) + { + /* VICTIM was the only cookie in the chain. Destroy the + chain and deallocate the chain key. */ + hash_table_remove (jar->chains, victim->domain); + xfree (chain_key); + } + else + hash_table_put (jar->chains, chain_key, victim->next); + } delete_cookie (victim); DEBUGP (("Discarded old cookie.\n")); } @@ -336,143 +318,10 @@ discard_matching_cookie (struct cookie *cookie) /* Functions for parsing the `Set-Cookie' header, and creating new cookies from the wire. */ +#define TOKEN_IS(token, string_literal) \ + BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal) -#define NAME_IS(string_literal) \ - BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal) - -#define VALUE_EXISTS (value_b && value_e) - -#define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e)) - -/* Update the appropriate cookie field. [name_b, name_e) are expected - to delimit the attribute name, while [value_b, value_e) (optional) - should delimit the attribute value. - - When called the first time, it will set the cookie's attribute name - and value. After that, it will check the attribute name for - special fields such as `domain', `path', etc. Where appropriate, - it will parse the values of the fields it recognizes and fill the - corresponding fields in COOKIE. - - Returns 1 on success. Returns zero in case a syntax error is - found; such a cookie should be discarded. */ - -static int -update_cookie_field (struct cookie *cookie, - const char *name_b, const char *name_e, - const char *value_b, const char *value_e) -{ - assert (name_b != NULL && name_e != NULL); - - if (!cookie->attr) - { - if (!VALUE_EXISTS) - return 0; - cookie->attr = strdupdelim (name_b, name_e); - cookie->value = strdupdelim (value_b, value_e); - return 1; - } - - if (NAME_IS ("domain")) - { - if (!VALUE_NON_EMPTY) - return 0; - FREE_MAYBE (cookie->domain); - cookie->domain = strdupdelim (value_b, value_e); - return 1; - } - else if (NAME_IS ("path")) - { - if (!VALUE_NON_EMPTY) - return 0; - FREE_MAYBE (cookie->path); - cookie->path = strdupdelim (value_b, value_e); - return 1; - } - else if (NAME_IS ("expires")) - { - char *value_copy; - time_t expires; - - if (!VALUE_NON_EMPTY) - return 0; - BOUNDED_TO_ALLOCA (value_b, value_e, value_copy); - - expires = http_atotm (value_copy); - if (expires != -1) - { - cookie->permanent = 1; - cookie->expiry_time = (unsigned long)expires; - } - else - /* Error in expiration spec. Assume default (cookie valid for - this session.) #### Should we return 0 and invalidate the - cookie? */ - ; - - /* According to netscape's specification, expiry time in the - past means that discarding of a matching cookie is - requested. */ - if (cookie->expiry_time < cookies_now) - cookie->discard_requested = 1; - - return 1; - } - else if (NAME_IS ("max-age")) - { - double maxage = -1; - char *value_copy; - - if (!VALUE_NON_EMPTY) - return 0; - BOUNDED_TO_ALLOCA (value_b, value_e, value_copy); - - sscanf (value_copy, "%lf", &maxage); - if (maxage == -1) - /* something is wrong. */ - return 0; - cookie->permanent = 1; - cookie->expiry_time = (unsigned long)cookies_now + (unsigned long)maxage; - - /* According to rfc2109, a cookie with max-age of 0 means that - discarding of a matching cookie is requested. */ - if (maxage == 0) - cookie->discard_requested = 1; - - return 1; - } - else if (NAME_IS ("secure")) - { - /* ignore value completely */ - cookie->secure = 1; - return 1; - } - else - /* Unrecognized attribute; ignore it. */ - return 1; -} - -#undef NAME_IS - -/* Returns non-zero for characters that are legal in the name of an - attribute. */ - -#define ATTR_NAME_CHAR(c) (ISALNUM (c) || (c) == '-' || (c) == '_') - -/* Fetch the next character without doing anything special if CH gets - set to 0. (The code executed next is expected to handle it.) */ - -#define FETCH1(ch, ptr) do { \ - ch = *ptr++; \ -} while (0) - -/* Like FETCH1, but jumps to `eof' label if CH gets set to 0. */ - -#define FETCH(ch, ptr) do { \ - FETCH1 (ch, ptr); \ - if (!ch) \ - goto eof; \ -} while (0) +#define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e) /* Parse the contents of the `Set-Cookie' header. The header looks like this: @@ -482,376 +331,412 @@ update_cookie_field (struct cookie *cookie, Trailing semicolon is optional; spaces are allowed between all tokens. Additionally, values may be quoted. - A new cookie is returned upon success, NULL otherwise. The - function `update_cookie_field' is used to update the fields of the - newly created cookie structure. */ + A new cookie is returned upon success, NULL otherwise. + + The first name-value pair will be used to set the cookie's + attribute name and value. Subsequent parameters will be checked + against field names such as `domain', `path', etc. Recognized + fields will be parsed and the corresponding members of COOKIE + filled. */ static struct cookie * -parse_set_cookies (const char *sc) +parse_set_cookie (const char *set_cookie, bool silent) { + const char *ptr = set_cookie; struct cookie *cookie = cookie_new (); + param_token name, value; - enum { S_NAME_PRE, S_NAME, S_NAME_POST, - S_VALUE_PRE, S_VALUE, S_VALUE_TRAILSPACE_MAYBE, - S_QUOTED_VALUE, S_QUOTED_VALUE_POST, - S_ATTR_ACTION, - S_DONE, S_ERROR } state = S_NAME_PRE; + if (!extract_param (&ptr, &name, &value, ';', NULL)) + goto error; + if (!value.b) + goto error; - const char *p = sc; - char c; + /* If the value is quoted, do not modify it. */ + if (*(value.b - 1) == '"') + value.b--; + if (*value.e == '"') + value.e++; - const char *name_b = NULL, *name_e = NULL; - const char *value_b = NULL, *value_e = NULL; + cookie->attr = strdupdelim (name.b, name.e); + cookie->value = strdupdelim (value.b, value.e); - FETCH (c, p); - - while (state != S_DONE && state != S_ERROR) + while (extract_param (&ptr, &name, &value, ';', NULL)) { - switch (state) - { - case S_NAME_PRE: - if (ISSPACE (c)) - FETCH (c, p); - else if (ATTR_NAME_CHAR (c)) - { - name_b = p - 1; - FETCH1 (c, p); - state = S_NAME; - } - else - /* empty attr name not allowed */ - state = S_ERROR; - break; - case S_NAME: - if (ATTR_NAME_CHAR (c)) - FETCH1 (c, p); - else if (!c || c == ';' || c == '=' || ISSPACE (c)) - { - name_e = p - 1; - state = S_NAME_POST; - } - else - state = S_ERROR; - break; - case S_NAME_POST: - if (ISSPACE (c)) - FETCH1 (c, p); - else if (!c || c == ';') - { - value_b = value_e = NULL; - state = S_ATTR_ACTION; - } - else if (c == '=') - { - FETCH1 (c, p); - state = S_VALUE_PRE; - } - else - state = S_ERROR; - break; - case S_VALUE_PRE: - if (ISSPACE (c)) - FETCH1 (c, p); - else if (c == '"') - { - value_b = p; - FETCH (c, p); - state = S_QUOTED_VALUE; - } - else if (c == ';' || c == '\0') - { - value_b = value_e = p - 1; - state = S_ATTR_ACTION; - } - else - { - value_b = p - 1; - value_e = NULL; - state = S_VALUE; - } - break; - case S_VALUE: - if (c == ';' || c == '\0') - { - if (!value_e) - value_e = p - 1; - state = S_ATTR_ACTION; - } - else if (ISSPACE (c)) - { - value_e = p - 1; - FETCH1 (c, p); - state = S_VALUE_TRAILSPACE_MAYBE; - } - else - { - value_e = NULL; /* no trailing space */ - FETCH1 (c, p); - } - break; - case S_VALUE_TRAILSPACE_MAYBE: - if (ISSPACE (c)) - FETCH1 (c, p); - else - state = S_VALUE; - break; - case S_QUOTED_VALUE: - if (c == '"') - { - value_e = p - 1; - FETCH1 (c, p); - state = S_QUOTED_VALUE_POST; - } - else - FETCH (c, p); - break; - case S_QUOTED_VALUE_POST: - if (c == ';' || !c) - state = S_ATTR_ACTION; - else if (ISSPACE (c)) - FETCH1 (c, p); - else - state = S_ERROR; - break; - case S_ATTR_ACTION: - { - int legal = update_cookie_field (cookie, name_b, name_e, - value_b, value_e); - if (!legal) - { - char *name; - BOUNDED_TO_ALLOCA (name_b, name_e, name); - logprintf (LOG_NOTQUIET, - _("Error in Set-Cookie, field `%s'"), name); - state = S_ERROR; - break; - } - - if (c) - FETCH1 (c, p); - if (!c) - state = S_DONE; - else - state = S_NAME_PRE; - } - break; - case S_DONE: - case S_ERROR: - /* handled by loop condition */ - break; - } + if (TOKEN_IS (name, "domain")) + { + if (!TOKEN_NON_EMPTY (value)) + goto error; + xfree_null (cookie->domain); + /* Strictly speaking, we should set cookie->domain_exact if the + domain doesn't begin with a dot. But many sites set the + domain to "foo.com" and expect "subhost.foo.com" to get the + cookie, and it apparently works in browsers. */ + if (*value.b == '.') + ++value.b; + cookie->domain = strdupdelim (value.b, value.e); + } + else if (TOKEN_IS (name, "path")) + { + if (!TOKEN_NON_EMPTY (value)) + goto error; + xfree_null (cookie->path); + cookie->path = strdupdelim (value.b, value.e); + } + else if (TOKEN_IS (name, "expires")) + { + char *value_copy; + time_t expires; + + if (!TOKEN_NON_EMPTY (value)) + goto error; + BOUNDED_TO_ALLOCA (value.b, value.e, value_copy); + + /* Check if expiration spec is valid. + If not, assume default (cookie doesn't expire, but valid only for + this session.) */ + expires = http_atotm (value_copy); + if (expires != (time_t) -1) + { + cookie->permanent = 1; + cookie->expiry_time = expires; + /* According to netscape's specification, expiry time in + the past means that discarding of a matching cookie + is requested. */ + if (cookie->expiry_time < cookies_now) + cookie->discard_requested = 1; + } + } + else if (TOKEN_IS (name, "max-age")) + { + double maxage = -1; + char *value_copy; + + if (!TOKEN_NON_EMPTY (value)) + goto error; + BOUNDED_TO_ALLOCA (value.b, value.e, value_copy); + + sscanf (value_copy, "%lf", &maxage); + if (maxage == -1) + /* something went wrong. */ + goto error; + cookie->permanent = 1; + cookie->expiry_time = cookies_now + maxage; + + /* According to rfc2109, a cookie with max-age of 0 means that + discarding of a matching cookie is requested. */ + if (maxage == 0) + cookie->discard_requested = 1; + } + else if (TOKEN_IS (name, "secure")) + { + /* ignore value completely */ + cookie->secure = 1; + } + /* else: Ignore unrecognized attribute. */ } - if (state == S_DONE) - return cookie; + if (*ptr) + /* extract_param has encountered a syntax error */ + goto error; - delete_cookie (cookie); - if (state == S_ERROR) - logprintf (LOG_NOTQUIET, _("Syntax error in Set-Cookie at character `%c'.\n"), c); - else - abort (); - return NULL; + /* The cookie has been successfully constructed; return it. */ + return cookie; - eof: + error: + if (!silent) + logprintf (LOG_NOTQUIET, + _("Syntax error in Set-Cookie: %s at position %d.\n"), + quotearg_style (escape_quoting_style, set_cookie), + (int) (ptr - set_cookie)); delete_cookie (cookie); - logprintf (LOG_NOTQUIET, - _("Syntax error in Set-Cookie: premature end of string.\n")); return NULL; } + +#undef TOKEN_IS +#undef TOKEN_NON_EMPTY /* Sanity checks. These are important, otherwise it is possible for mailcious attackers to destroy important cookie information and/or violate your privacy. */ -#define REQUIRE_DIGITS(p) do { \ - if (!ISDIGIT (*p)) \ - return 0; \ - for (++p; ISDIGIT (*p); p++) \ - ; \ +#define REQUIRE_DIGITS(p) do { \ + if (!c_isdigit (*p)) \ + return false; \ + for (++p; c_isdigit (*p); p++) \ + ; \ } while (0) -#define REQUIRE_DOT(p) do { \ - if (*p++ != '.') \ - return 0; \ +#define REQUIRE_DOT(p) do { \ + if (*p++ != '.') \ + return false; \ } while (0) /* Check whether ADDR matches .... - We don't want to call network functions like inet_addr() because all - we need is a check, preferrably one that is small, fast, and - well-defined. */ + We don't want to call network functions like inet_addr() because + all we need is a check, preferrably one that is small, fast, and + well-defined. */ -static int +static bool numeric_address_p (const char *addr) { const char *p = addr; - REQUIRE_DIGITS (p); /* A */ - REQUIRE_DOT (p); /* . */ - REQUIRE_DIGITS (p); /* B */ - REQUIRE_DOT (p); /* . */ - REQUIRE_DIGITS (p); /* C */ - REQUIRE_DOT (p); /* . */ - REQUIRE_DIGITS (p); /* D */ + REQUIRE_DIGITS (p); /* A */ + REQUIRE_DOT (p); /* . */ + REQUIRE_DIGITS (p); /* B */ + REQUIRE_DOT (p); /* . */ + REQUIRE_DIGITS (p); /* C */ + REQUIRE_DOT (p); /* . */ + REQUIRE_DIGITS (p); /* D */ if (*p != '\0') - return 0; - return 1; + return false; + return true; } /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST. - This check is compliant with rfc2109. */ + Originally I tried to make the check compliant with rfc2109, but + the sites deviated too often, so I had to fall back to "tail + matching", as defined by the original Netscape's cookie spec. */ -static int +static bool check_domain_match (const char *cookie_domain, const char *host) { - int i, headlen; - const char *tail; + DEBUGP (("cdm: 1")); /* Numeric address requires exact match. It also requires HOST to - be an IP address. I suppose we *could* resolve HOST with - store_hostaddress (it would hit the hash table), but rfc2109 - doesn't require it, and it doesn't seem very useful, so we - don't. */ + be an IP address. */ if (numeric_address_p (cookie_domain)) - return !strcmp (cookie_domain, host); + return 0 == strcmp (cookie_domain, host); - /* The domain must contain at least one embedded dot. */ + DEBUGP ((" 2")); + + /* For the sake of efficiency, check for exact match first. */ + if (0 == strcasecmp (cookie_domain, host)) + return true; + + DEBUGP ((" 3")); + + /* HOST must match the tail of cookie_domain. */ + if (!match_tail (host, cookie_domain, true)) + return false; + + /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must + make sure that somebody is not trying to set the cookie for a + subdomain shared by many entities. For example, "company.co.uk" + must not be allowed to set a cookie for ".co.uk". On the other + hand, "sso.redhat.de" should be able to set a cookie for + ".redhat.de". + + The only marginally sane way to handle this I can think of is to + reject on the basis of the length of the second-level domain name + (but when the top-level domain is unknown), with the assumption + that those of three or less characters could be reserved. For + example: + + .co.org -> works because the TLD is known + .co.uk -> doesn't work because "co" is only two chars long + .com.au -> doesn't work because "com" is only 3 chars long + .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh) + .cnn.de -> doesn't work for the same reason (ugh!!) + .abcd.de -> works because "abcd" is 4 chars long + .img.cnn.de -> works because it's not trying to set the 2nd level domain + .cnn.co.uk -> works for the same reason + + That should prevent misuse, while allowing reasonable usage. If + someone knows of a better way to handle this, please let me + know. */ { - const char *rest = cookie_domain; - int len = strlen (rest); - if (*rest == '.') - ++rest, --len; /* ignore first dot */ - if (len <= 0) - return 0; - if (rest[len - 1] == '.') - --len; /* ignore last dot */ - - if (!memchr (rest, '.', len)) - /* No dots. */ - return 0; + const char *p = cookie_domain; + int dccount = 1; /* number of domain components */ + int ldcl = 0; /* last domain component length */ + int nldcl = 0; /* next to last domain component length */ + int out; + if (*p == '.') + /* Ignore leading period in this calculation. */ + ++p; + DEBUGP ((" 4")); + for (out = 0; !out; p++) + switch (*p) + { + case '\0': + out = 1; + break; + case '.': + if (ldcl == 0) + /* Empty domain component found -- the domain is invalid. */ + return false; + if (*(p + 1) == '\0') + { + /* Tolerate trailing '.' by not treating the domain as + one ending with an empty domain component. */ + out = 1; + break; + } + nldcl = ldcl; + ldcl = 0; + ++dccount; + break; + default: + ++ldcl; + } + + DEBUGP ((" 5")); + + if (dccount < 2) + return false; + + DEBUGP ((" 6")); + + if (dccount == 2) + { + size_t i; + int known_toplevel = false; + static const char *known_toplevel_domains[] = { + ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int" + }; + for (i = 0; i < countof (known_toplevel_domains); i++) + if (match_tail (cookie_domain, known_toplevel_domains[i], true)) + { + known_toplevel = true; + break; + } + if (!known_toplevel && nldcl <= 3) + return false; + } } - /* For the sake of efficiency, check for exact match first. */ - if (!strcasecmp (cookie_domain, host)) - return 1; + DEBUGP ((" 7")); - /* In rfc2109 terminology, HOST needs domain-match COOKIE_DOMAIN. - This means that COOKIE_DOMAIN needs to start with `.' and be an - FQDN, and that HOST must end with COOKIE_DOMAIN. */ + /* Don't allow the host "foobar.com" to set a cookie for domain + "bar.com". */ if (*cookie_domain != '.') - return 0; - - /* Two proceed, we need to examine two parts of HOST: its head and - its tail. Head and tail are defined in terms of the length of - the domain, like this: - - HHHHTTTTTTTTTTTTTTT <- host - DDDDDDDDDDDDDDD <- domain - - That is, "head" is the part of the host before (dlen - hlen), and - "tail" is what follows. - - For the domain to match, two conditions need to be true: - - 1. Tail must equal DOMAIN. - 2. Head must not contain an embedded dot. */ - - headlen = strlen (host) - strlen (cookie_domain); - - if (headlen <= 0) - /* DOMAIN must be a proper subset of HOST. */ - return 0; - tail = host + headlen; - - /* (1) */ - if (strcasecmp (tail, cookie_domain)) - return 0; - - /* Test (2) is not part of the "domain-match" itself, but is - recommended by rfc2109 for reasons of privacy. */ + { + int dlen = strlen (cookie_domain); + int hlen = strlen (host); + /* cookie host: hostname.foobar.com */ + /* desired domain: bar.com */ + /* '.' must be here in host-> ^ */ + if (hlen > dlen && host[hlen - dlen - 1] != '.') + return false; + } - /* (2) */ - if (memchr (host, '.', headlen)) - return 0; + DEBUGP ((" 8")); - return 1; + return true; } -static int path_matches PARAMS ((const char *, const char *)); +static int path_matches (const char *, const char *); /* Check whether PATH begins with COOKIE_PATH. */ -static int +static bool check_path_match (const char *cookie_path, const char *path) { - return path_matches (path, cookie_path); + return path_matches (path, cookie_path) != 0; } + +/* Prepend '/' to string S. S is copied to fresh stack-allocated + space and its value is modified to point to the new location. */ + +#define PREPEND_SLASH(s) do { \ + char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \ + *PS_newstr = '/'; \ + strcpy (PS_newstr + 1, s); \ + s = PS_newstr; \ +} while (0) + -/* Parse the `Set-Cookie' header and, if the cookie is legal, store it - to memory. */ +/* Process the HTTP `Set-Cookie' header. This results in storing the + cookie or discarding a matching one, or ignoring it completely, all + depending on the contents. */ -int -set_cookie_header_cb (const char *hdr, void *closure) +void +cookie_handle_set_cookie (struct cookie_jar *jar, + const char *host, int port, + const char *path, const char *set_cookie) { - struct urlinfo *u = (struct urlinfo *)closure; struct cookie *cookie; - cookies_now = time (NULL); - cookie = parse_set_cookies (hdr); + /* Wget's paths don't begin with '/' (blame rfc1808), but cookie + usage assumes /-prefixed paths. Until the rest of Wget is fixed, + simply prepend slash to PATH. */ + PREPEND_SLASH (path); + + cookie = parse_set_cookie (set_cookie, false); if (!cookie) goto out; /* Sanitize parts of cookie. */ if (!cookie->domain) - cookie->domain = xstrdup (u->host); + { + cookie->domain = xstrdup (host); + cookie->domain_exact = 1; + /* Set the port, but only if it's non-default. */ + if (port != 80 && port != 443) + cookie->port = port; + } else { - if (!check_domain_match (cookie->domain, u->host)) - { - DEBUGP (("Attempt to fake the domain: %s, %s\n", - cookie->domain, u->host)); - goto out; - } + if (!check_domain_match (cookie->domain, host)) + { + logprintf (LOG_NOTQUIET, + _("Cookie coming from %s attempted to set domain to "), + quotearg_style (escape_quoting_style, host)); + logprintf (LOG_NOTQUIET, + _("%s\n"), + quotearg_style (escape_quoting_style, cookie->domain)); + cookie->discard_requested = true; + } } + if (!cookie->path) - cookie->path = xstrdup (u->path); + { + /* The cookie doesn't set path: set it to the URL path, sans the + file part ("/dir/file" truncated to "/dir/"). */ + char *trailing_slash = strrchr (path, '/'); + if (trailing_slash) + cookie->path = strdupdelim (path, trailing_slash + 1); + else + /* no slash in the string -- can this even happen? */ + cookie->path = xstrdup (path); + } else { - if (!check_path_match (cookie->path, u->path)) - { - DEBUGP (("Attempt to fake the path: %s, %s\n", - cookie->path, u->path)); - goto out; - } + /* The cookie sets its own path; verify that it is legal. */ + if (!check_path_match (cookie->path, path)) + { + DEBUGP (("Attempt to fake the path: %s, %s\n", + cookie->path, path)); + goto out; + } } - cookie->port = u->port; + /* Now store the cookie, or discard an existing cookie, if + discarding was requested. */ if (cookie->discard_requested) { - discard_matching_cookie (cookie); - delete_cookie (cookie); - return 1; + discard_matching_cookie (jar, cookie); + goto out; } - store_cookie (cookie); - return 1; + store_cookie (jar, cookie); + return; out: if (cookie) delete_cookie (cookie); - return 1; } /* Support for sending out cookies in HTTP requests, based on previously stored cookies. Entry point is `build_cookies_request'. */ - -/* Count how many times CHR occurs in STRING. */ +/* Return a count of how many times CHR occurs in STRING. */ static int count_char (const char *string, char chr) @@ -864,57 +749,55 @@ count_char (const char *string, char chr) return count; } -/* Store CHAIN to STORE if there is room in STORE. If not, inrecement - COUNT anyway, so that when the function is done, we end up with the - exact count of how much place we actually need. */ +/* Find the cookie chains whose domains match HOST and store them to + DEST. -#define STORE_CHAIN(st_chain, st_store, st_size, st_count) do { \ - if (st_count < st_size) \ - store[st_count] = st_chain; \ - ++st_count; \ -} while (0) + A cookie chain is the head of a list of cookies that belong to a + host/domain. Given HOST "img.search.xemacs.org", this function + will return the chains for "img.search.xemacs.org", + "search.xemacs.org", and "xemacs.org" -- those of them that exist + (if any), that is. -/* Store cookie chains that match HOST, PORT. Since more than one - chain can match, the matches are written to STORE. No more than - SIZE matches are written; if more matches are present, return the - number of chains that would have been written. */ + DEST should be large enough to accept (in the worst case) as many + elements as there are domain components of HOST. */ -int -find_matching_chains (const char *host, int port, - struct cookie *store[], int size) +static int +find_chains_of_host (struct cookie_jar *jar, const char *host, + struct cookie *dest[]) { - struct cookie *chain; - int dot_count; - char *hash_key; - int count = 0; + int dest_count = 0; + int passes, passcnt; - if (!cookies_hash_table) + /* Bail out quickly if there are no cookies in the jar. */ + if (!hash_table_count (jar->chains)) return 0; - SET_HOSTPORT (host, port, hash_key); - - /* Exact match. */ - chain = hash_table_get (cookies_hash_table, hash_key); - if (chain) - STORE_CHAIN (chain, store, size, count); - - dot_count = count_char (host, '.'); - - /* Match less and less specific domains. For instance, given - fly.srk.fer.hr, we match .srk.fer.hr, then .fer.hr. */ - while (dot_count-- > 1) + if (numeric_address_p (host)) + /* If host is an IP address, only check for the exact match. */ + passes = 1; + else + /* Otherwise, check all the subdomains except the top-level (last) + one. As a domain with N components has N-1 dots, the number of + passes equals the number of dots. */ + passes = count_char (host, '.'); + + passcnt = 0; + + /* Find chains that match HOST, starting with exact match and + progressing to less specific domains. For instance, given HOST + fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then + srk.fer.hr's, then fer.hr's. */ + while (1) { - /* Note: we operate directly on hash_key (in form host:port) - because we don't want to allocate new hash keys in a - loop. */ - char *p = strchr (hash_key, '.'); - assert (p != NULL); - chain = hash_table_get (cookies_hash_table, p); + struct cookie *chain = hash_table_get (jar->chains, host); if (chain) - STORE_CHAIN (chain, store, size, count); - hash_key = p + 1; + dest[dest_count++] = chain; + if (++passcnt >= passes) + break; + host = strchr (host, '.') + 1; } - return count; + + return dest_count; } /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero @@ -924,40 +807,67 @@ static int path_matches (const char *full_path, const char *prefix) { int len = strlen (prefix); - if (strncmp (full_path, prefix, len)) + + if (0 != strncmp (full_path, prefix, len)) /* FULL_PATH doesn't begin with PREFIX. */ return 0; /* Length of PREFIX determines the quality of the match. */ - return len; + return len + 1; } -static int -matching_cookie (const struct cookie *cookie, const char *path, - int connection_secure_p, int *path_goodness) +/* Return true iff COOKIE matches the provided parameters of the URL + being downloaded: HOST, PORT, PATH, and SECFLAG. + + If PATH_GOODNESS is non-NULL, store the "path goodness" value + there. That value is a measure of how closely COOKIE matches PATH, + used for ordering cookies. */ + +static bool +cookie_matches_url (const struct cookie *cookie, + const char *host, int port, const char *path, + bool secflag, int *path_goodness) { int pg; - if (cookie->expiry_time < cookies_now) - /* Ignore stale cookies. There is no need to unchain the cookie - at this point -- Wget is a relatively short-lived application, - and stale cookies will not be saved by `save_cookies'. */ - return 0; - if (cookie->secure && !connection_secure_p) - /* Don't transmit secure cookies over an insecure connection. */ - return 0; + if (cookie_expired_p (cookie)) + /* Ignore stale cookies. Don't bother unchaining the cookie at + this point -- Wget is a relatively short-lived application, and + stale cookies will not be saved by `save_cookies'. On the + other hand, this function should be as efficient as + possible. */ + return false; + + if (cookie->secure && !secflag) + /* Don't transmit secure cookies over insecure connections. */ + return false; + if (cookie->port != PORT_ANY && cookie->port != port) + return false; + + /* If exact domain match is required, verify that cookie's domain is + equal to HOST. If not, assume success on the grounds of the + cookie's chain having been found by find_chains_of_host. */ + if (cookie->domain_exact + && 0 != strcasecmp (host, cookie->domain)) + return false; + pg = path_matches (path, cookie->path); - if (!pg) - return 0; + if (pg == 0) + return false; if (path_goodness) /* If the caller requested path_goodness, we return it. This is an optimization, so that the caller doesn't need to call path_matches() again. */ *path_goodness = pg; - return 1; + return true; } +/* A structure that points to a cookie, along with the additional + information about the cookie's "goodness". This allows us to sort + the cookies when returning them to the server, as required by the + spec. */ + struct weighed_cookie { struct cookie *cookie; int domain_goodness; @@ -980,6 +890,48 @@ equality_comparator (const void *p1, const void *p2) return namecmp ? namecmp : valuecmp; } +/* Eliminate duplicate cookies. "Duplicate cookies" are any two + cookies with the same attr name and value. Whenever a duplicate + pair is found, one of the cookies is removed. */ + +static int +eliminate_dups (struct weighed_cookie *outgoing, int count) +{ + struct weighed_cookie *h; /* hare */ + struct weighed_cookie *t; /* tortoise */ + struct weighed_cookie *end = outgoing + count; + + /* We deploy a simple uniquify algorithm: first sort the array + according to our sort criteria, then copy it to itself, comparing + each cookie to its neighbor and ignoring the duplicates. */ + + qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator); + + /* "Hare" runs through all the entries in the array, followed by + "tortoise". If a duplicate is found, the hare skips it. + Non-duplicate entries are copied to the tortoise ptr. */ + + for (h = t = outgoing; h < end; h++) + { + if (h != end - 1) + { + struct cookie *c0 = h[0].cookie; + struct cookie *c1 = h[1].cookie; + if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value)) + continue; /* ignore the duplicate */ + } + + /* If the hare has advanced past the tortoise (because of + previous dups), make sure the values get copied. Otherwise, + no copying is necessary. */ + if (h != t) + *t++ = *h; + else + t++; + } + return t - outgoing; +} + /* Comparator used for sorting by quality. */ static int @@ -999,19 +951,17 @@ goodness_comparator (const void *p1, const void *p2) return dgdiff ? dgdiff : pgdiff; } -/* Build a `Cookies' header for a request that goes to HOST:PORT and - requests PATH from the server. Memory is allocated by `malloc', - and the caller is responsible for freeing it. If no cookies - pertain to this request, i.e. no cookie header should be generated, - NULL is returned. */ +/* Generate a `Cookie' header for a request that goes to HOST:PORT and + requests PATH from the server. The resulting string is allocated + with `malloc', and the caller is responsible for freeing it. If no + cookies pertain to this request, i.e. no cookie header should be + generated, NULL is returned. */ char * -build_cookies_request (const char *host, int port, const char *path, - int connection_secure_p) +cookie_header (struct cookie_jar *jar, const char *host, + int port, const char *path, bool secflag) { - struct cookie *chain_default_store[20]; - struct cookie **all_chains = chain_default_store; - int chain_store_size = ARRAY_SIZE (chain_default_store); + struct cookie **chains; int chain_count; struct cookie *cookie; @@ -1019,73 +969,57 @@ build_cookies_request (const char *host, int port, const char *path, int count, i, ocnt; char *result; int result_size, pos; + PREPEND_SLASH (path); /* see cookie_handle_set_cookie */ - again: - chain_count = find_matching_chains (host, port, all_chains, chain_store_size); - if (chain_count > chain_store_size) - { - /* It's extremely unlikely that more than 20 chains will ever - match. But in this case it's easy to not have the - limitation, so we don't. */ - all_chains = alloca (chain_count * sizeof (struct cookie *)); - goto again; - } + /* First, find the cookie chains whose domains match HOST. */ + /* Allocate room for find_chains_of_host to write to. The number of + chains can at most equal the number of subdomains, hence + 1+. */ + chains = alloca_array (struct cookie *, 1 + count_char (host, '.')); + chain_count = find_chains_of_host (jar, host, chains); + + /* No cookies for this host. */ if (!chain_count) return NULL; cookies_now = time (NULL); - /* Count the number of cookies whose path matches. */ + /* Now extract from the chains those cookies that match our host + (for domain_exact cookies), port (for cookies with port other + than PORT_ANY), etc. See matching_cookie for details. */ + + /* Count the number of matching cookies. */ count = 0; for (i = 0; i < chain_count; i++) - for (cookie = all_chains[i]; cookie; cookie = cookie->next) - if (matching_cookie (cookie, path, connection_secure_p, NULL)) - ++count; + for (cookie = chains[i]; cookie; cookie = cookie->next) + if (cookie_matches_url (cookie, host, port, path, secflag, NULL)) + ++count; if (!count) - /* No matching cookies. */ - return NULL; + return NULL; /* no cookies matched */ /* Allocate the array. */ - outgoing = alloca (count * sizeof (struct weighed_cookie)); + outgoing = alloca_array (struct weighed_cookie, count); + /* Fill the array with all the matching cookies from the chains that + match HOST. */ ocnt = 0; for (i = 0; i < chain_count; i++) - for (cookie = all_chains[i]; cookie; cookie = cookie->next) + for (cookie = chains[i]; cookie; cookie = cookie->next) { - int pg; - if (!matching_cookie (cookie, path, connection_secure_p, &pg)) - continue; - outgoing[ocnt].cookie = cookie; - outgoing[ocnt].domain_goodness = strlen (cookie->domain); - outgoing[ocnt].path_goodness = pg; - ++ocnt; + int pg; + if (!cookie_matches_url (cookie, host, port, path, secflag, &pg)) + continue; + outgoing[ocnt].cookie = cookie; + outgoing[ocnt].domain_goodness = strlen (cookie->domain); + outgoing[ocnt].path_goodness = pg; + ++ocnt; } assert (ocnt == count); /* Eliminate duplicate cookies; that is, those whose name and value - are the same. We do it by first sorting the array, and then - uniq'ing it. */ - qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator); - for (i = 0; i < count - 1; i++) - { - struct cookie *c1 = outgoing[i].cookie; - struct cookie *c2 = outgoing[i + 1].cookie; - if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value)) - { - /* c1 and c2 are the same; get rid of c2. */ - if (count > i + 1) - /* move all ptrs from positions [i + 1, count) to i. */ - memmove (outgoing + i, outgoing + i + 1, - (count - (i + 1)) * sizeof (struct weighed_cookie)); - /* We decrement i to counter the ++i above. Remember that - we've just removed the element in front of us; we need to - remain in place to check whether outgoing[i] what used to - be outgoing[i + 2]. */ - --i; - --count; - } - } + are the same. */ + count = eliminate_dups (outgoing, count); /* Sort the array so that best-matching domains come first, and that, within one domain, best-matching paths come first. */ @@ -1101,16 +1035,12 @@ build_cookies_request (const char *host, int port, const char *path, } /* Allocate output buffer: - "Cookie: " -- 8 name=value pairs -- result_size "; " separators -- (count - 1) * 2 - \r\n line ending -- 2 \0 terminator -- 1 */ - result_size = 8 + result_size + (count - 1) * 2 + 2 + 1; + result_size = result_size + (count - 1) * 2 + 1; result = xmalloc (result_size); pos = 0; - strcpy (result, "Cookie: "); - pos += 8; for (i = 0; i < count; i++) { struct cookie *c = outgoing[i].cookie; @@ -1123,21 +1053,20 @@ build_cookies_request (const char *host, int port, const char *path, memcpy (result + pos, c->value, vallen); pos += vallen; if (i < count - 1) - { - result[pos++] = ';'; - result[pos++] = ' '; - } + { + result[pos++] = ';'; + result[pos++] = ' '; + } } - result[pos++] = '\r'; - result[pos++] = '\n'; result[pos++] = '\0'; assert (pos == result_size); return result; } /* Support for loading and saving cookies. The format used for - loading and saving roughly matches the format of `cookies.txt' file - used by Netscape and Mozilla, at least the Unix versions. The + loading and saving should be the format of the `cookies.txt' file + used by Netscape and Mozilla, at least the Unix versions. + (Apparently IE can export cookies in that format as well.) The format goes like this: DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE @@ -1150,22 +1079,18 @@ build_cookies_request (const char *host, int port, const char *path, ATTR-NAME -- name of the cookie attribute ATTR-VALUE -- value of the cookie attribute (empty if absent) - The fields are separated by TABs (but Wget's loader recognizes any - whitespace). All fields are mandatory, except for ATTR-VALUE. The - `-FLAG' fields are boolean, their legal values being "TRUE" and - "FALSE'. Empty lines, lines consisting of whitespace only, and - comment lines (beginning with # optionally preceded by whitespace) - are ignored. + The fields are separated by TABs. All fields are mandatory, except + for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values + being "TRUE" and "FALSE'. Empty lines, lines consisting of + whitespace only, and comment lines (beginning with # optionally + preceded by whitespace) are ignored. Example line from cookies.txt (split in two lines for readability): - .google.com TRUE / FALSE 2147368447 \ - PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012 - - DOMAIN-FLAG is currently not honored by Wget. The cookies whose - domain begins with `.' are treated as if DOMAIN-FLAG were true, - while all other cookies are treated as if it were FALSE. */ + .google.com TRUE / FALSE 2147368447 \ + PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012 +*/ /* If the region [B, E) ends with :, parse the number, return it, and store new boundary (location of the `:') to DOMAIN_E_PTR. @@ -1173,14 +1098,14 @@ build_cookies_request (const char *host, int port, const char *path, static int domain_port (const char *domain_b, const char *domain_e, - const char **domain_e_ptr) + const char **domain_e_ptr) { int port = 0; const char *p; const char *colon = memchr (domain_b, ':', domain_e - domain_b); if (!colon) return 0; - for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++) + for (p = colon + 1; p < domain_e && c_isdigit (*p); p++) port = 10 * port + (*p - '0'); if (p < domain_e) /* Garbage following port number. */ @@ -1189,162 +1114,140 @@ domain_port (const char *domain_b, const char *domain_e, return port; } -#define SKIP_WS(p) do { \ - while (*p && ISSPACE (*p)) \ - ++p; \ -} while (0) - -#define MARK_WORD(p, b, e) do { \ - SKIP_WS (p); \ - b = p; \ - /* skip non-ws */ \ - while (*p && !ISSPACE (*p)) \ - ++p; \ - e = p; \ - if (b == e) \ - goto next; \ +#define GET_WORD(p, b, e) do { \ + b = p; \ + while (*p && *p != '\t') \ + ++p; \ + e = p; \ + if (b == e || !*p) \ + goto next; \ + ++p; \ } while (0) /* Load cookies from FILE. */ void -load_cookies (const char *file) +cookie_jar_load (struct cookie_jar *jar, const char *file) { - char *line; + char *line = NULL; + size_t bufsize = 0; + FILE *fp = fopen (file, "r"); if (!fp) { - logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n", - file, strerror (errno)); + logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"), + quote (file), strerror (errno)); return; } + cookies_now = time (NULL); - for (; ((line = read_whole_line (fp)) != NULL); xfree (line)) + while (getline (&line, &bufsize, fp) > 0) { struct cookie *cookie; char *p = line; + double expiry; int port; char *domain_b = NULL, *domain_e = NULL; - char *ignore_b = NULL, *ignore_e = NULL; + char *domflag_b = NULL, *domflag_e = NULL; char *path_b = NULL, *path_e = NULL; char *secure_b = NULL, *secure_e = NULL; char *expires_b = NULL, *expires_e = NULL; char *name_b = NULL, *name_e = NULL; char *value_b = NULL, *value_e = NULL; - SKIP_WS (p); - + /* Skip leading white-space. */ + while (*p && c_isspace (*p)) + ++p; + /* Ignore empty lines. */ if (!*p || *p == '#') - /* empty line */ - continue; - - MARK_WORD (p, domain_b, domain_e); - MARK_WORD (p, ignore_b, ignore_e); - MARK_WORD (p, path_b, path_e); - MARK_WORD (p, secure_b, secure_e); - MARK_WORD (p, expires_b, expires_e); - MARK_WORD (p, name_b, name_e); - - /* Don't use MARK_WORD for value because it may contain - whitespace itself. Instead, . */ - MARK_WORD (p, value_b, value_e); + continue; + + GET_WORD (p, domain_b, domain_e); + GET_WORD (p, domflag_b, domflag_e); + GET_WORD (p, path_b, path_e); + GET_WORD (p, secure_b, secure_e); + GET_WORD (p, expires_b, expires_e); + GET_WORD (p, name_b, name_e); + + /* Don't use GET_WORD for value because it ends with newline, + not TAB. */ + value_b = p; + value_e = p + strlen (p); + if (value_e > value_b && value_e[-1] == '\n') + --value_e; + if (value_e > value_b && value_e[-1] == '\r') + --value_e; + /* Empty values are legal (I think), so don't bother checking. */ cookie = cookie_new (); cookie->attr = strdupdelim (name_b, name_e); cookie->value = strdupdelim (value_b, value_e); cookie->path = strdupdelim (path_b, path_e); + cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE"); - if (BOUNDED_EQUAL (secure_b, secure_e, "TRUE")) - cookie->secure = 1; + /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE + value indicating if all machines within a given domain can + access the variable. This value is set automatically by the + browser, depending on the value set for the domain." */ + cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE"); /* DOMAIN needs special treatment because we might need to - extract the port. */ + extract the port. */ port = domain_port (domain_b, domain_e, (const char **)&domain_e); if (port) - cookie->port = port; - else - cookie->port = cookie->secure ? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT; + cookie->port = port; + if (*domain_b == '.') + ++domain_b; /* remove leading dot internally */ cookie->domain = strdupdelim (domain_b, domain_e); - /* Don't use MARK_WORD for value because it may contain - whitespace itself. Instead, set name_e to the end of line, - modulo trailing space (which includes the NL separator.) */ - SKIP_WS (p); - name_b = p; - name_e = p + strlen (p); - while (name_e >= name_b && ISSPACE (*name_e)) - --name_e; - if (name_b == name_e) - /* Hmm, should we check for empty value? I guess that's - legal, so I leave it. */ - ; - /* safe default in case EXPIRES field is garbled. */ - cookie->expiry_time = cookies_now - 1; + expiry = (double)cookies_now - 1; - /* I don't like changing the line, but it's completely safe. - (line is malloced.) */ + /* I don't like changing the line, but it's safe here. (line is + malloced.) */ *expires_e = '\0'; - sscanf (expires_b, "%lu", &cookie->expiry_time); - if (cookie->expiry_time < cookies_now) - /* ignore stale cookie. */ - goto abort; - cookie->permanent = 1; + sscanf (expires_b, "%lf", &expiry); + + if (expiry == 0) + { + /* EXPIRY can be 0 for session cookies saved because the + user specified `--keep-session-cookies' in the past. + They remain session cookies, and will be saved only if + the user has specified `keep-session-cookies' again. */ + } + else + { + if (expiry < cookies_now) + goto abort_cookie; /* ignore stale cookie. */ + cookie->expiry_time = expiry; + cookie->permanent = 1; + } - store_cookie (cookie); + store_cookie (jar, cookie); next: continue; - abort: + abort_cookie: delete_cookie (cookie); } - fclose (fp); -} - -/* Mapper for save_cookies callable by hash_table_map. VALUE points - to the head in a chain of cookies. The function prints the entire - chain. */ -static int -save_cookies_mapper (void *key, void *value, void *arg) -{ - FILE *fp = (FILE *)arg; - char *domain = (char *)key; - struct cookie *chain = (struct cookie *)value; - for (; chain; chain = chain->next) - { - if (!chain->permanent) - continue; - if (chain->expiry_time < cookies_now) - continue; - fprintf (fp, "%s\t%s\t%s\t%s\t%lu\t%s\t%s\n", - domain, *domain == '.' ? "TRUE" : "FALSE", - chain->path, chain->secure ? "TRUE" : "FALSE", - chain->expiry_time, - chain->attr, chain->value); - if (ferror (fp)) - return 1; /* stop mapping */ - } - return 0; + xfree(line); + fclose (fp); } /* Save cookies, in format described above, to FILE. */ void -save_cookies (const char *file) +cookie_jar_save (struct cookie_jar *jar, const char *file) { FILE *fp; - - if (!cookies_hash_table - || !hash_table_count (cookies_hash_table)) - /* no cookies stored; nothing to do. */ - return; + hash_table_iterator iter; DEBUGP (("Saving cookies to %s.\n", file)); @@ -1353,58 +1256,158 @@ save_cookies (const char *file) fp = fopen (file, "w"); if (!fp) { - logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"), - file, strerror (errno)); + logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"), + quote (file), strerror (errno)); return; } fputs ("# HTTP cookie file.\n", fp); - fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (NULL)); + fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now)); fputs ("# Edit at your own risk.\n\n", fp); - hash_table_map (cookies_hash_table, save_cookies_mapper, fp); - + for (hash_table_iterate (jar->chains, &iter); + hash_table_iter_next (&iter); + ) + { + const char *domain = iter.key; + struct cookie *cookie = iter.value; + for (; cookie; cookie = cookie->next) + { + if (!cookie->permanent && !opt.keep_session_cookies) + continue; + if (cookie_expired_p (cookie)) + continue; + if (!cookie->domain_exact) + fputc ('.', fp); + fputs (domain, fp); + if (cookie->port != PORT_ANY) + fprintf (fp, ":%d", cookie->port); + fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n", + cookie->domain_exact ? "FALSE" : "TRUE", + cookie->path, cookie->secure ? "TRUE" : "FALSE", + (double)cookie->expiry_time, + cookie->attr, cookie->value); + if (ferror (fp)) + goto out; + } + } + out: if (ferror (fp)) - logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"), - file, strerror (errno)); - + logprintf (LOG_NOTQUIET, _("Error writing to %s: %s\n"), + quote (file), strerror (errno)); if (fclose (fp) < 0) - logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"), - file, strerror (errno)); + logprintf (LOG_NOTQUIET, _("Error closing %s: %s\n"), + quote (file), strerror (errno)); - DEBUGP (("Done saving cookies.\n", file)); + DEBUGP (("Done saving cookies.\n")); } -static int -delete_cookie_chain_mapper (void *value, void *key, void *arg_ignored) -{ - char *chain_key = (char *)value; - struct cookie *chain = (struct cookie *)key; - - /* Remove the chain from the table and free the key. */ - hash_table_remove (cookies_hash_table, chain_key); - xfree (chain_key); +/* Clean up cookie-related data. */ - /* Then delete all the cookies in the chain. */ - while (chain) +void +cookie_jar_delete (struct cookie_jar *jar) +{ + /* Iterate over chains (indexed by domain) and free them. */ + hash_table_iterator iter; + for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); ) { - struct cookie *next = chain->next; - delete_cookie (chain); - chain = next; + struct cookie *chain = iter.value; + xfree (iter.key); + /* Then all cookies in this chain. */ + while (chain) + { + struct cookie *next = chain->next; + delete_cookie (chain); + chain = next; + } } - - /* Keep mapping. */ - return 0; + hash_table_destroy (jar->chains); + xfree (jar); } + +/* Test cases. Currently this is only tests parse_set_cookies. To + use, recompile Wget with -DTEST_COOKIES and call test_cookies() + from main. */ -/* Clean up cookie-related data. */ - +#ifdef TEST_COOKIES void -cookies_cleanup (void) +test_cookies (void) { - if (!cookies_hash_table) - return; - hash_table_map (cookies_hash_table, delete_cookie_chain_mapper, NULL); - hash_table_destroy (cookies_hash_table); - cookies_hash_table = NULL; + /* Tests expected to succeed: */ + static struct { + const char *data; + const char *results[10]; + } tests_succ[] = { + { "arg=value", {"arg", "value", NULL} }, + { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} }, + { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} }, + { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} }, + { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} }, + { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} }, + { "arg=", {"arg", "", NULL} }, + { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} }, + { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} }, + }; + + /* Tests expected to fail: */ + static char *tests_fail[] = { + ";", + "arg=\"unterminated", + "=empty-name", + "arg1=;=another-empty-name", + }; + int i; + + for (i = 0; i < countof (tests_succ); i++) + { + int ind; + const char *data = tests_succ[i].data; + const char **expected = tests_succ[i].results; + struct cookie *c; + + c = parse_set_cookie (data, true); + if (!c) + { + printf ("NULL cookie returned for valid data: %s\n", data); + continue; + } + + /* Test whether extract_param handles these cases correctly. */ + { + param_token name, value; + const char *ptr = data; + int j = 0; + while (extract_param (&ptr, &name, &value, ';', NULL)) + { + char *n = strdupdelim (name.b, name.e); + char *v = strdupdelim (value.b, value.e); + if (!expected[j]) + { + printf ("Too many parameters for '%s'\n", data); + break; + } + if (0 != strcmp (expected[j], n)) + printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n", + j / 2 + 1, data, expected[j], n); + if (0 != strcmp (expected[j + 1], v)) + printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n", + j / 2 + 1, data, expected[j + 1], v); + j += 2; + free (n); + free (v); + } + if (expected[j]) + printf ("Too few parameters for '%s'\n", data); + } + } + + for (i = 0; i < countof (tests_fail); i++) + { + struct cookie *c; + char *data = tests_fail[i]; + c = parse_set_cookie (data, true); + if (c) + printf ("Failed to report error on invalid data: %s\n", data); + } } +#endif /* TEST_COOKIES */