X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fcookies.c;h=eb3f879f5716ab6fe071b4080c49a8ff414f43a6;hb=0967c21094580317353f0742c4836c5bbea34059;hp=7df96f7c9cde86d1f441313b170c519bfa538cd2;hpb=291693c3c25766a454f472415b22c2d1fbddcd29;p=wget diff --git a/src/cookies.c b/src/cookies.c index 7df96f7c..eb3f879f 100644 --- a/src/cookies.c +++ b/src/cookies.c @@ -15,16 +15,32 @@ General Public License for more details. You should have received a copy of the GNU General Public License along with Wget; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +In addition, as a special exception, the Free Software Foundation +gives permission to link the code of its release of Wget with the +OpenSSL project's "OpenSSL" library (or with modified versions of it +that use the same license as the "OpenSSL" library), and distribute +the linked executables. You must obey the GNU General Public License +in all respects for all of the code used other than "OpenSSL". If you +modify this file, you may extend this exception to your version of the +file, but you are not obligated to do so. If you do not wish to do +so, delete this exception statement from your version. */ /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie code submitted by Tomasz Wegrzanowski. - TODO: Implement limits on cookie-related sizes, such as max. cookie - size, max. number of cookies, etc. Add more "cookie jar" methods, - such as methods to over stored cookies, to clear temporary cookies, - to perform intelligent auto-saving, etc. Ultimately support - `Set-Cookie2' and `Cookie2' headers. */ + Ideas for future work: + + * Implement limits on cookie-related sizes, such as max. cookie + size, max. number of cookies, etc. + + * Add more "cookie jar" methods, such as methods to iterate over + stored cookies, to clear temporary cookies, to perform + intelligent auto-saving, etc. + + * Support `Set-Cookie2' and `Cookie2' headers? Does anyone really + use them? */ #include @@ -48,11 +64,21 @@ time_t http_atotm PARAMS ((const char *)); /* Declarations of `struct cookie' and the most basic functions. */ +/* Cookie jar serves as cookie storage and a means of retrieving + cookies efficiently. All cookies with the same domain are stored + in a linked list called "chain". A cookie chain can be reached by + looking up the domain in the cookie jar's chains_by_domain table. + + For example, to reach all the cookies under google.com, one must + execute hash_table_get(jar->chains_by_domain, "google.com"). Of + course, when sending a cookie to `www.google.com', one must search + for cookies that belong to either `www.google.com' or `google.com' + -- but the point is that the code doesn't need to go through *all* + the cookies. */ + struct cookie_jar { - /* Hash table that maps domain names to cookie chains. A "cookie - chain" is a linked list of cookies that belong to the same - domain. */ - struct hash_table *chains_by_domain; + /* Cookie chains indexed by domain. */ + struct hash_table *chains; int cookie_count; /* number of cookies in the jar. */ }; @@ -64,8 +90,8 @@ time_t cookies_now; struct cookie_jar * cookie_jar_new (void) { - struct cookie_jar *jar = xmalloc (sizeof (struct cookie_jar)); - jar->chains_by_domain = make_nocase_string_hash_table (0); + struct cookie_jar *jar = xnew (struct cookie_jar); + jar->chains = make_nocase_string_hash_table (0); jar->cookie_count = 0; return jar; } @@ -74,62 +100,73 @@ struct cookie { char *domain; /* domain of the cookie */ int port; /* port number */ char *path; /* path prefix of the cookie */ + int secure; /* whether cookie should be transmitted over non-https connections. */ + int domain_exact; /* whether DOMAIN must match as a + whole. */ + int permanent; /* whether the cookie should outlive - the session */ - time_t expiry_time; /* time when the cookie expires */ + the session. */ + time_t expiry_time; /* time when the cookie expires, 0 + means undetermined. */ + int discard_requested; /* whether cookie was created to request discarding another - cookie */ + cookie. */ char *attr; /* cookie attribute name */ char *value; /* cookie attribute value */ - struct cookie_jar *jar; /* pointer back to the cookie jar, for - convenience. */ struct cookie *next; /* used for chaining of cookies in the same domain. */ }; #define PORT_ANY (-1) -#define COOKIE_EXPIRED_P(c) ((c)->expiry_time != 0 && (c)->expiry_time < cookies_now) /* Allocate and return a new, empty cookie structure. */ static struct cookie * cookie_new (void) { - struct cookie *cookie = xmalloc (sizeof (struct cookie)); - memset (cookie, '\0', sizeof (struct cookie)); + struct cookie *cookie = xnew0 (struct cookie); - /* Both cookie->permanent and cookie->expiry_time are now 0. By - default, we assume that the cookie is non-permanent and valid - until the end of the session. */ + /* Both cookie->permanent and cookie->expiry_time are now 0. This + means that the cookie doesn't expire, but is only valid for this + session (i.e. not written out to disk). */ cookie->port = PORT_ANY; return cookie; } +/* Non-zero if the cookie has expired. Assumes cookies_now has been + set by one of the entry point functions. */ + +static int +cookie_expired_p (const struct cookie *c) +{ + return c->expiry_time != 0 && c->expiry_time < cookies_now; +} + /* Deallocate COOKIE and its components. */ static void delete_cookie (struct cookie *cookie) { - FREE_MAYBE (cookie->domain); - FREE_MAYBE (cookie->path); - FREE_MAYBE (cookie->attr); - FREE_MAYBE (cookie->value); + xfree_null (cookie->domain); + xfree_null (cookie->path); + xfree_null (cookie->attr); + xfree_null (cookie->value); xfree (cookie); } /* Functions for storing cookies. - All cookies can be reached beginning with jar->chains_by_domain. - The key in that table is the domain name, and the value is a linked - list of all cookies from that domain. Every new cookie is placed - on the head of the list. */ + All cookies can be reached beginning with jar->chains. The key in + that table is the domain name, and the value is a linked list of + all cookies from that domain. Every new cookie is placed on the + head of the list. */ /* Find and return a cookie in JAR whose domain, path, and attribute name correspond to COOKIE. If found, PREVPTR will point to the @@ -144,7 +181,7 @@ find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie, { struct cookie *chain, *prev; - chain = hash_table_get (jar->chains_by_domain, cookie->domain); + chain = hash_table_get (jar->chains, cookie->domain); if (!chain) goto nomatch; @@ -178,7 +215,7 @@ store_cookie (struct cookie_jar *jar, struct cookie *cookie) struct cookie *chain_head; char *chain_key; - if (hash_table_get_pair (jar->chains_by_domain, cookie->domain, + if (hash_table_get_pair (jar->chains, cookie->domain, &chain_key, &chain_head)) { /* A chain of cookies in this domain already exists. Check for @@ -212,26 +249,32 @@ store_cookie (struct cookie_jar *jar, struct cookie *cookie) } else { - /* We are now creating the chain. Allocate the string that will - be used as a key. It is unsafe to use cookie->domain for - that, because it might get deallocated by the above code at - some point later. */ + /* We are now creating the chain. Use a copy of cookie->domain + as the key for the life-time of the chain. Using + cookie->domain would be unsafe because the life-time of the + chain may exceed the life-time of the cookie. (Cookies may + be deleted from the chain by this very function.) */ cookie->next = NULL; chain_key = xstrdup (cookie->domain); } - hash_table_put (jar->chains_by_domain, chain_key, cookie); + hash_table_put (jar->chains, chain_key, cookie); ++jar->cookie_count; - DEBUGP (("\nStored cookie %s %d%s %s %s %d %s %s %s\n", - cookie->domain, cookie->port, - cookie->port == PORT_ANY ? " (ANY)" : "", - cookie->path, - cookie->permanent ? "permanent" : "nonpermanent", - cookie->secure, - cookie->expiry_time - ? asctime (localtime (&cookie->expiry_time)) : "", - cookie->attr, cookie->value)); +#ifdef ENABLE_DEBUG + if (opt.debug) + { + time_t exptime = (time_t) cookie->expiry_time; + DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n", + cookie->domain, cookie->port, + cookie->port == PORT_ANY ? " (ANY)" : "", + cookie->path, + cookie->permanent ? "permanent" : "session", + cookie->secure ? "secure" : "insecure", + cookie->expiry_time ? datetime_str (&exptime) : "none", + cookie->attr, cookie->value)); + } +#endif } /* Discard a cookie matching COOKIE's domain, port, path, and @@ -245,7 +288,7 @@ discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie) { struct cookie *prev, *victim; - if (!hash_table_count (jar->chains_by_domain)) + if (!hash_table_count (jar->chains)) /* No elements == nothing to discard. */ return; @@ -262,18 +305,18 @@ discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie) char *chain_key = NULL; int res; - res = hash_table_get_pair (jar->chains_by_domain, victim->domain, + res = hash_table_get_pair (jar->chains, victim->domain, &chain_key, NULL); assert (res != 0); if (!victim->next) { /* VICTIM was the only cookie in the chain. Destroy the chain and deallocate the chain key. */ - hash_table_remove (jar->chains_by_domain, victim->domain); + hash_table_remove (jar->chains, victim->domain); xfree (chain_key); } else - hash_table_put (jar->chains_by_domain, chain_key, victim->next); + hash_table_put (jar->chains, chain_key, victim->next); } delete_cookie (victim); DEBUGP (("Discarded old cookie.\n")); @@ -283,7 +326,6 @@ discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie) /* Functions for parsing the `Set-Cookie' header, and creating new cookies from the wire. */ - #define NAME_IS(string_literal) \ BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal) @@ -324,7 +366,13 @@ update_cookie_field (struct cookie *cookie, { if (!VALUE_NON_EMPTY) return 0; - FREE_MAYBE (cookie->domain); + xfree_null (cookie->domain); + /* Strictly speaking, we should set cookie->domain_exact if the + domain doesn't begin with a dot. But many sites set the + domain to "foo.com" and expect "subhost.foo.com" to get the + cookie, and it apparently works. */ + if (*value_b == '.') + ++value_b; cookie->domain = strdupdelim (value_b, value_e); return 1; } @@ -332,7 +380,7 @@ update_cookie_field (struct cookie *cookie, { if (!VALUE_NON_EMPTY) return 0; - FREE_MAYBE (cookie->path); + xfree_null (cookie->path); cookie->path = strdupdelim (value_b, value_e); return 1; } @@ -352,8 +400,8 @@ update_cookie_field (struct cookie *cookie, cookie->expiry_time = (time_t)expires; } else - /* Error in expiration spec. Assume default (cookie valid for - this session.) */ + /* Error in expiration spec. Assume default (cookie doesn't + expire, but valid only for this session.) */ ; /* According to netscape's specification, expiry time in the @@ -412,21 +460,6 @@ update_cookie_field (struct cookie *cookie, && (c) != '"' && (c) != '=' \ && (c) != ';' && (c) != ',') -/* Fetch the next character without doing anything special if CH gets - set to 0. (The code executed next is expected to handle it.) */ - -#define FETCH1(ch, ptr) do { \ - ch = *ptr++; \ -} while (0) - -/* Like FETCH1, but jumps to `eof' label if CH gets set to 0. */ - -#define FETCH(ch, ptr) do { \ - FETCH1 (ch, ptr); \ - if (!ch) \ - goto eof; \ -} while (0) - /* Parse the contents of the `Set-Cookie' header. The header looks like this: @@ -436,19 +469,25 @@ update_cookie_field (struct cookie *cookie, tokens. Additionally, values may be quoted. A new cookie is returned upon success, NULL otherwise. The - function `update_cookie_field' is used to update the fields of the - newly created cookie structure. */ + specified CALLBACK function (normally `update_cookie_field' is used + to update the fields of the newly created cookie structure. */ static struct cookie * -parse_set_cookies (const char *sc) +parse_set_cookies (const char *sc, + int (*callback) (struct cookie *, + const char *, const char *, + const char *, const char *), + int silent) { struct cookie *cookie = cookie_new (); - enum { S_NAME_PRE, S_NAME, S_NAME_POST, - S_VALUE_PRE, S_VALUE, S_VALUE_TRAILSPACE_MAYBE, - S_QUOTED_VALUE, S_QUOTED_VALUE_POST, - S_ATTR_ACTION, - S_DONE, S_ERROR } state = S_NAME_PRE; + /* #### Hand-written DFAs are no fun to debug. We'de be better off + to rewrite this as an inline parser. */ + + enum { S_START, S_NAME, S_NAME_POST, + S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE, + S_ATTR_ACTION, S_DONE, S_ERROR + } state = S_START; const char *p = sc; char c; @@ -456,19 +495,21 @@ parse_set_cookies (const char *sc) const char *name_b = NULL, *name_e = NULL; const char *value_b = NULL, *value_e = NULL; - FETCH (c, p); + c = *p; while (state != S_DONE && state != S_ERROR) { switch (state) { - case S_NAME_PRE: - if (ISSPACE (c)) - FETCH (c, p); + case S_START: + if (!c) + state = S_DONE; + else if (ISSPACE (c)) + /* Strip all whitespace preceding the name. */ + c = *++p; else if (ATTR_NAME_CHAR (c)) { - name_b = p - 1; - FETCH1 (c, p); + name_b = p; state = S_NAME; } else @@ -476,116 +517,112 @@ parse_set_cookies (const char *sc) state = S_ERROR; break; case S_NAME: - if (ATTR_NAME_CHAR (c)) - FETCH1 (c, p); - else if (!c || c == ';' || c == '=' || ISSPACE (c)) + if (!c || c == ';' || c == '=' || ISSPACE (c)) { - name_e = p - 1; + name_e = p; state = S_NAME_POST; } + else if (ATTR_NAME_CHAR (c)) + c = *++p; else state = S_ERROR; break; case S_NAME_POST: - if (ISSPACE (c)) - FETCH1 (c, p); - else if (!c || c == ';') + if (!c || c == ';') { value_b = value_e = NULL; + if (c == ';') + c = *++p; state = S_ATTR_ACTION; } else if (c == '=') { - FETCH1 (c, p); + c = *++p; state = S_VALUE_PRE; } + else if (ISSPACE (c)) + /* Ignore space and keep the state. */ + c = *++p; else state = S_ERROR; break; case S_VALUE_PRE: - if (ISSPACE (c)) - FETCH1 (c, p); + if (!c || c == ';') + { + value_b = value_e = p; + if (c == ';') + c = *++p; + state = S_ATTR_ACTION; + } else if (c == '"') { + c = *++p; value_b = p; - FETCH (c, p); state = S_QUOTED_VALUE; } - else if (c == ';' || c == '\0') - { - value_b = value_e = p - 1; - state = S_ATTR_ACTION; - } + else if (ISSPACE (c)) + c = *++p; else { - value_b = p - 1; + value_b = p; value_e = NULL; state = S_VALUE; } break; case S_VALUE: - if (c == ';' || c == '\0') - { - if (!value_e) - value_e = p - 1; - state = S_ATTR_ACTION; - } - else if (ISSPACE (c)) + if (!c || c == ';' || ISSPACE (c)) { - value_e = p - 1; - FETCH1 (c, p); - state = S_VALUE_TRAILSPACE_MAYBE; + value_e = p; + state = S_VALUE_TRAILSPACE; } else { value_e = NULL; /* no trailing space */ - FETCH1 (c, p); + c = *++p; } break; - case S_VALUE_TRAILSPACE_MAYBE: - if (ISSPACE (c)) - FETCH1 (c, p); - else - state = S_VALUE; - break; case S_QUOTED_VALUE: if (c == '"') { - value_e = p - 1; - FETCH1 (c, p); - state = S_QUOTED_VALUE_POST; + value_e = p; + c = *++p; + state = S_VALUE_TRAILSPACE; } + else if (!c) + state = S_ERROR; else - FETCH (c, p); + c = *++p; break; - case S_QUOTED_VALUE_POST: - if (c == ';' || !c) + case S_VALUE_TRAILSPACE: + if (c == ';') + { + c = *++p; + state = S_ATTR_ACTION; + } + else if (!c) state = S_ATTR_ACTION; else if (ISSPACE (c)) - FETCH1 (c, p); + c = *++p; else - state = S_ERROR; + state = S_VALUE; break; case S_ATTR_ACTION: { - int legal = update_cookie_field (cookie, name_b, name_e, - value_b, value_e); + int legal = callback (cookie, name_b, name_e, value_b, value_e); if (!legal) { - char *name; - BOUNDED_TO_ALLOCA (name_b, name_e, name); - logprintf (LOG_NOTQUIET, - _("Error in Set-Cookie, field `%s'"), name); + if (!silent) + { + char *name; + BOUNDED_TO_ALLOCA (name_b, name_e, name); + logprintf (LOG_NOTQUIET, + _("Error in Set-Cookie, field `%s'"), + escnonprint (name)); + } state = S_ERROR; break; } - - if (c) - FETCH1 (c, p); - if (!c) - state = S_DONE; - else - state = S_NAME_PRE; + state = S_START; } break; case S_DONE: @@ -598,16 +635,13 @@ parse_set_cookies (const char *sc) return cookie; delete_cookie (cookie); - if (state == S_ERROR) - logprintf (LOG_NOTQUIET, _("Syntax error in Set-Cookie at character `%c'.\n"), c); - else + if (state != S_ERROR) abort (); - return NULL; - eof: - delete_cookie (cookie); - logprintf (LOG_NOTQUIET, - _("Syntax error in Set-Cookie: premature end of string.\n")); + if (!silent) + logprintf (LOG_NOTQUIET, + _("Syntax error in Set-Cookie: %s at position %d.\n"), + escnonprint (sc), p - sc); return NULL; } @@ -630,9 +664,9 @@ parse_set_cookies (const char *sc) /* Check whether ADDR matches .... - We don't want to call network functions like inet_addr() because all - we need is a check, preferrably one that is small, fast, and - well-defined. */ + We don't want to call network functions like inet_addr() because + all we need is a check, preferrably one that is small, fast, and + well-defined. */ static int numeric_address_p (const char *addr) @@ -670,13 +704,13 @@ check_domain_match (const char *cookie_domain, const char *host) DEBUGP ((" 2")); /* For the sake of efficiency, check for exact match first. */ - if (!strcasecmp (cookie_domain, host)) + if (0 == strcasecmp (cookie_domain, host)) return 1; DEBUGP ((" 3")); /* HOST must match the tail of cookie_domain. */ - if (!match_tail (host, cookie_domain)) + if (!match_tail (host, cookie_domain, 1)) return 0; /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must @@ -750,11 +784,11 @@ check_domain_match (const char *cookie_domain, const char *host) { int i; int known_toplevel = 0; - static char *known_toplevel_domains[] = { + static const char *known_toplevel_domains[] = { ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int" }; - for (i = 0; i < ARRAY_SIZE (known_toplevel_domains); i++) - if (match_tail (cookie_domain, known_toplevel_domains[i])) + for (i = 0; i < countof (known_toplevel_domains); i++) + if (match_tail (cookie_domain, known_toplevel_domains[i], 1)) { known_toplevel = 1; break; @@ -766,7 +800,8 @@ check_domain_match (const char *cookie_domain, const char *host) DEBUGP ((" 7")); - /* Don't allow domain "bar.com" to match host "foobar.com". */ + /* Don't allow the host "foobar.com" to set a cookie for domain + "bar.com". */ if (*cookie_domain != '.') { int dlen = strlen (cookie_domain); @@ -798,14 +833,14 @@ check_path_match (const char *cookie_path, const char *path) depending on the contents. */ void -cookie_jar_process_set_cookie (struct cookie_jar *jar, - const char *host, int port, - const char *path, const char *set_cookie) +cookie_handle_set_cookie (struct cookie_jar *jar, + const char *host, int port, + const char *path, const char *set_cookie) { struct cookie *cookie; cookies_now = time (NULL); - cookie = parse_set_cookies (set_cookie); + cookie = parse_set_cookies (set_cookie, update_cookie_field, 0); if (!cookie) goto out; @@ -814,8 +849,13 @@ cookie_jar_process_set_cookie (struct cookie_jar *jar, if (!cookie->domain) { copy_domain: + /* If the domain was not provided, we use the one we're talking + to, and set exact match. */ cookie->domain = xstrdup (host); - cookie->port = port; + cookie->domain_exact = 1; + /* Set the port, but only if it's non-default. */ + if (port != 80 && port != 443) + cookie->port = port; } else { @@ -823,14 +863,26 @@ cookie_jar_process_set_cookie (struct cookie_jar *jar, { logprintf (LOG_NOTQUIET, "Cookie coming from %s attempted to set domain to %s\n", - host, cookie->domain); + escnonprint (host), escnonprint (cookie->domain)); + xfree (cookie->domain); goto copy_domain; } } + if (!cookie->path) - cookie->path = xstrdup (path); + { + /* The cookie doesn't set path: set it to the URL path, sans the + file part ("/dir/file" truncated to "/dir/"). */ + char *trailing_slash = strrchr (path, '/'); + if (trailing_slash) + cookie->path = strdupdelim (path, trailing_slash + 1); + else + /* no slash in the string -- can this even happen? */ + cookie->path = xstrdup (path); + } else { + /* The cookie sets its own path; verify that it is legal. */ if (!check_path_match (cookie->path, path)) { DEBUGP (("Attempt to fake the path: %s, %s\n", @@ -839,6 +891,9 @@ cookie_jar_process_set_cookie (struct cookie_jar *jar, } } + /* Now store the cookie, or discard an existing cookie, if + discarding was requested. */ + if (cookie->discard_requested) { discard_matching_cookie (jar, cookie); @@ -856,58 +911,69 @@ cookie_jar_process_set_cookie (struct cookie_jar *jar, /* Support for sending out cookies in HTTP requests, based on previously stored cookies. Entry point is `build_cookies_request'. */ - -/* Store CHAIN to STORE if there is room in STORE. If not, inrecement - COUNT anyway, so that when the function is done, we end up with the - exact count of how much place we actually need. */ - -#define STORE_CHAIN(st_chain, st_store, st_size, st_count) do { \ - if (st_count < st_size) \ - store[st_count] = st_chain; \ - ++st_count; \ -} while (0) - -/* Store cookie chains that match HOST. Since more than one chain can - match, the matches are written to STORE. No more than SIZE matches - are written; if more matches are present, return the number of - chains that would have been written. */ + +/* Return a count of how many times CHR occurs in STRING. */ static int -find_matching_chains (struct cookie_jar *jar, const char *host, - struct cookie *store[], int size) +count_char (const char *string, char chr) { - struct cookie *chain; - int dot_count; - char *hash_key; + const char *p; int count = 0; + for (p = string; *p; p++) + if (*p == chr) + ++count; + return count; +} - if (!hash_table_count (jar->chains_by_domain)) - return 0; +/* Find the cookie chains whose domains match HOST and store them to + DEST. - STRDUP_ALLOCA (hash_key, host); + A cookie chain is the head of a list of cookies that belong to a + host/domain. Given HOST "img.search.xemacs.org", this function + will return the chains for "img.search.xemacs.org", + "search.xemacs.org", and "xemacs.org" -- those of them that exist + (if any), that is. - /* Look for an exact match. */ - chain = hash_table_get (jar->chains_by_domain, hash_key); - if (chain) - STORE_CHAIN (chain, store, size, count); + DEST should be large enough to accept (in the worst case) as many + elements as there are domain components of HOST. */ + +static int +find_chains_of_host (struct cookie_jar *jar, const char *host, + struct cookie *dest[]) +{ + int dest_count = 0; + int passes, passcnt; - dot_count = count_char (host, '.'); + /* Bail out quickly if there are no cookies in the jar. */ + if (!hash_table_count (jar->chains)) + return 0; - /* Match less and less specific domains. For instance, given - fly.srk.fer.hr, we match .srk.fer.hr, then .fer.hr. */ - while (dot_count-- > 1) + if (numeric_address_p (host)) + /* If host is an IP address, only check for the exact match. */ + passes = 1; + else + /* Otherwise, check all the subdomains except the top-level (last) + one. As a domain with N components has N-1 dots, the number of + passes equals the number of dots. */ + passes = count_char (host, '.'); + + passcnt = 0; + + /* Find chains that match HOST, starting with exact match and + progressing to less specific domains. For instance, given HOST + fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then + srk.fer.hr's, then fer.hr's. */ + while (1) { - /* Note: we operate directly on hash_key (in form host:port) - because we don't want to allocate new hash keys in a - loop. */ - char *p = strchr (hash_key, '.'); - assert (p != NULL); - chain = hash_table_get (jar->chains_by_domain, p); + struct cookie *chain = hash_table_get (jar->chains, host); if (chain) - STORE_CHAIN (chain, store, size, count); - hash_key = p + 1; + dest[dest_count++] = chain; + if (++passcnt >= passes) + break; + host = strchr (host, '.') + 1; } - return count; + + return dest_count; } /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero @@ -920,8 +986,8 @@ path_matches (const char *full_path, const char *prefix) if (*prefix != '/') /* Wget's HTTP paths do not begin with '/' (the URL code treats it - as a separator), but the '/' is assumed when matching against - the cookie stuff. */ + as a mere separator, inspired by rfc1808), but the '/' is + assumed when matching against the cookie stuff. */ return 0; ++prefix; @@ -935,21 +1001,21 @@ path_matches (const char *full_path, const char *prefix) return len + 1; } -/* Return non-zero iff COOKIE matches the given PATH, PORT, and - security flag. HOST is not a flag because it is assumed that the - cookie comes from the correct chain. +/* Return non-zero iff COOKIE matches the provided parameters of the + URL being downloaded: HOST, PORT, PATH, and SECFLAG. - If PATH_GOODNESS is non-NULL, store the "path goodness" there. The - said goodness is a measure of how well COOKIE matches PATH. It is + If PATH_GOODNESS is non-NULL, store the "path goodness" value + there. That value is a measure of how closely COOKIE matches PATH, used for ordering cookies. */ static int -matching_cookie (const struct cookie *cookie, const char *path, int port, - int connection_secure_p, int *path_goodness) +cookie_matches_url (const struct cookie *cookie, + const char *host, int port, const char *path, + int secflag, int *path_goodness) { int pg; - if (COOKIE_EXPIRED_P (cookie)) + if (cookie_expired_p (cookie)) /* Ignore stale cookies. Don't bother unchaining the cookie at this point -- Wget is a relatively short-lived application, and stale cookies will not be saved by `save_cookies'. On the @@ -957,11 +1023,19 @@ matching_cookie (const struct cookie *cookie, const char *path, int port, possible. */ return 0; - if (cookie->secure && !connection_secure_p) - /* Don't transmit secure cookies over an insecure connection. */ + if (cookie->secure && !secflag) + /* Don't transmit secure cookies over insecure connections. */ return 0; if (cookie->port != PORT_ANY && cookie->port != port) return 0; + + /* If exact domain match is required, verify that cookie's domain is + equal to HOST. If not, assume success on the grounds of the + cookie's chain having been found by find_chains_of_host. */ + if (cookie->domain_exact + && 0 != strcasecmp (host, cookie->domain)) + return 0; + pg = path_matches (path, cookie->path); if (!pg) return 0; @@ -974,6 +1048,11 @@ matching_cookie (const struct cookie *cookie, const char *path, int port, return 1; } +/* A structure that points to a cookie, along with the additional + information about the cookie's "goodness". This allows us to sort + the cookies when returning them to the server, as required by the + spec. */ + struct weighed_cookie { struct cookie *cookie; int domain_goodness; @@ -997,40 +1076,45 @@ equality_comparator (const void *p1, const void *p2) } /* Eliminate duplicate cookies. "Duplicate cookies" are any two - cookies whose name and value are the same. Whenever a duplicate + cookies with the same attr name and value. Whenever a duplicate pair is found, one of the cookies is removed. */ static int eliminate_dups (struct weighed_cookie *outgoing, int count) { - int i; + struct weighed_cookie *h; /* hare */ + struct weighed_cookie *t; /* tortoise */ + struct weighed_cookie *end = outgoing + count; /* We deploy a simple uniquify algorithm: first sort the array - according to our sort criterion, then uniquify it by comparing - each cookie with its neighbor. */ + according to our sort criteria, then copy it to itself, comparing + each cookie to its neighbor and ignoring the duplicates. */ qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator); - for (i = 0; i < count - 1; i++) + /* "Hare" runs through all the entries in the array, followed by + "tortoise". If a duplicate is found, the hare skips it. + Non-duplicate entries are copied to the tortoise ptr. */ + + for (h = t = outgoing; h < end; h++) { - struct cookie *c1 = outgoing[i].cookie; - struct cookie *c2 = outgoing[i + 1].cookie; - if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value)) + if (h != end - 1) { - /* c1 and c2 are the same; get rid of c2. */ - if (count > i + 1) - /* move all ptrs from positions [i + 1, count) to i. */ - memmove (outgoing + i, outgoing + i + 1, - (count - (i + 1)) * sizeof (struct weighed_cookie)); - /* We decrement i to counter the ++i above. Remember that - we've just removed the element in front of us; we need to - remain in place to check whether outgoing[i] matches what - used to be outgoing[i + 2]. */ - --i; - --count; + struct cookie *c0 = h[0].cookie; + struct cookie *c1 = h[1].cookie; + if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value)) + continue; /* ignore the duplicate */ } + + /* If the hare has advanced past the tortoise (because of + previous dups), make sure the values get copied. Otherwise, + no copying is necessary. */ + if (h != t) + *t++ = *h; + else + t++; } - return count; + return t - outgoing; } /* Comparator used for sorting by quality. */ @@ -1059,13 +1143,10 @@ goodness_comparator (const void *p1, const void *p2) generated, NULL is returned. */ char * -cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host, - int port, const char *path, - int connection_secure_p) +cookie_header (struct cookie_jar *jar, const char *host, + int port, const char *path, int secflag) { - struct cookie *chain_default_store[20]; - struct cookie **all_chains = chain_default_store; - int chain_store_size = ARRAY_SIZE (chain_default_store); + struct cookie **chains; int chain_count; struct cookie *cookie; @@ -1074,45 +1155,44 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host, char *result; int result_size, pos; - again: - chain_count = find_matching_chains (jar, host, all_chains, chain_store_size); - if (chain_count > chain_store_size) - { - /* It's extremely unlikely that more than 20 chains will ever - match. But since find_matching_chains reports the exact size - it needs, it's easy to not have the limitation, so we - don't. */ - all_chains = alloca (chain_count * sizeof (struct cookie *)); - chain_store_size = chain_count; - goto again; - } + /* First, find the cookie chains whose domains match HOST. */ + /* Allocate room for find_chains_of_host to write to. The number of + chains can at most equal the number of subdomains, hence + 1+. */ + chains = alloca_array (struct cookie *, 1 + count_char (host, '.')); + chain_count = find_chains_of_host (jar, host, chains); + + /* No cookies for this host. */ if (!chain_count) return NULL; cookies_now = time (NULL); - /* Count the number of cookies whose path matches. */ + /* Now extract from the chains those cookies that match our host + (for domain_exact cookies), port (for cookies with port other + than PORT_ANY), etc. See matching_cookie for details. */ + + /* Count the number of matching cookies. */ count = 0; for (i = 0; i < chain_count; i++) - for (cookie = all_chains[i]; cookie; cookie = cookie->next) - if (matching_cookie (cookie, path, port, connection_secure_p, NULL)) + for (cookie = chains[i]; cookie; cookie = cookie->next) + if (cookie_matches_url (cookie, host, port, path, secflag, NULL)) ++count; if (!count) - /* No matching cookies. */ - return NULL; + return NULL; /* no cookies matched */ /* Allocate the array. */ - outgoing = alloca (count * sizeof (struct weighed_cookie)); + outgoing = alloca_array (struct weighed_cookie, count); - /* Fill the array with all the matching cookies from all the - matching chains. */ + /* Fill the array with all the matching cookies from the chains that + match HOST. */ ocnt = 0; for (i = 0; i < chain_count; i++) - for (cookie = all_chains[i]; cookie; cookie = cookie->next) + for (cookie = chains[i]; cookie; cookie = cookie->next) { int pg; - if (!matching_cookie (cookie, path, port, connection_secure_p, &pg)) + if (!cookie_matches_url (cookie, host, port, path, secflag, &pg)) continue; outgoing[ocnt].cookie = cookie; outgoing[ocnt].domain_goodness = strlen (cookie->domain); @@ -1139,16 +1219,12 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host, } /* Allocate output buffer: - "Cookie: " -- 8 name=value pairs -- result_size "; " separators -- (count - 1) * 2 - \r\n line ending -- 2 \0 terminator -- 1 */ - result_size = 8 + result_size + (count - 1) * 2 + 2 + 1; + result_size = result_size + (count - 1) * 2 + 1; result = xmalloc (result_size); pos = 0; - strcpy (result, "Cookie: "); - pos += 8; for (i = 0; i < count; i++) { struct cookie *c = outgoing[i].cookie; @@ -1166,16 +1242,15 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host, result[pos++] = ' '; } } - result[pos++] = '\r'; - result[pos++] = '\n'; result[pos++] = '\0'; assert (pos == result_size); return result; } /* Support for loading and saving cookies. The format used for - loading and saving roughly matches the format of `cookies.txt' file - used by Netscape and Mozilla, at least the Unix versions. The + loading and saving should be the format of the `cookies.txt' file + used by Netscape and Mozilla, at least the Unix versions. + (Apparently IE can export cookies in that format as well.) The format goes like this: DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE @@ -1188,22 +1263,18 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host, ATTR-NAME -- name of the cookie attribute ATTR-VALUE -- value of the cookie attribute (empty if absent) - The fields are separated by TABs (but Wget's loader recognizes any - whitespace). All fields are mandatory, except for ATTR-VALUE. The - `-FLAG' fields are boolean, their legal values being "TRUE" and - "FALSE'. Empty lines, lines consisting of whitespace only, and - comment lines (beginning with # optionally preceded by whitespace) - are ignored. + The fields are separated by TABs. All fields are mandatory, except + for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values + being "TRUE" and "FALSE'. Empty lines, lines consisting of + whitespace only, and comment lines (beginning with # optionally + preceded by whitespace) are ignored. Example line from cookies.txt (split in two lines for readability): .google.com TRUE / FALSE 2147368447 \ PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012 - DOMAIN-FLAG is currently not honored by Wget. The cookies whose - domain begins with `.' are treated as if DOMAIN-FLAG were true, - while all other cookies are treated as if it were FALSE. */ - +*/ /* If the region [B, E) ends with :, parse the number, return it, and store new boundary (location of the `:') to DOMAIN_E_PTR. @@ -1227,20 +1298,14 @@ domain_port (const char *domain_b, const char *domain_e, return port; } -#define SKIP_WS(p) do { \ - while (*p && ISSPACE (*p)) \ - ++p; \ -} while (0) - -#define SET_WORD_BOUNDARIES(p, b, e) do { \ - SKIP_WS (p); \ +#define GET_WORD(p, b, e) do { \ b = p; \ - /* skip non-ws */ \ - while (*p && !ISSPACE (*p)) \ + while (*p && *p != '\t') \ ++p; \ e = p; \ - if (b == e) \ + if (b == e || !*p) \ goto next; \ + ++p; \ } while (0) /* Load cookies from FILE. */ @@ -1267,77 +1332,89 @@ cookie_jar_load (struct cookie_jar *jar, const char *file) int port; char *domain_b = NULL, *domain_e = NULL; - char *ignore_b = NULL, *ignore_e = NULL; + char *domflag_b = NULL, *domflag_e = NULL; char *path_b = NULL, *path_e = NULL; char *secure_b = NULL, *secure_e = NULL; char *expires_b = NULL, *expires_e = NULL; char *name_b = NULL, *name_e = NULL; char *value_b = NULL, *value_e = NULL; - SKIP_WS (p); - + /* Skip leading white-space. */ + while (*p && ISSPACE (*p)) + ++p; + /* Ignore empty lines. */ if (!*p || *p == '#') - /* empty line */ continue; - SET_WORD_BOUNDARIES (p, domain_b, domain_e); - SET_WORD_BOUNDARIES (p, ignore_b, ignore_e); - SET_WORD_BOUNDARIES (p, path_b, path_e); - SET_WORD_BOUNDARIES (p, secure_b, secure_e); - SET_WORD_BOUNDARIES (p, expires_b, expires_e); - SET_WORD_BOUNDARIES (p, name_b, name_e); - - /* Don't use SET_WORD_BOUNDARIES for value because it may - contain whitespace. Instead, set value_e to the end of line, - modulo trailing space (this will skip the line separator.) */ - SKIP_WS (p); + GET_WORD (p, domain_b, domain_e); + GET_WORD (p, domflag_b, domflag_e); + GET_WORD (p, path_b, path_e); + GET_WORD (p, secure_b, secure_e); + GET_WORD (p, expires_b, expires_e); + GET_WORD (p, name_b, name_e); + + /* Don't use GET_WORD for value because it ends with newline, + not TAB. */ value_b = p; value_e = p + strlen (p); - while (value_e > value_b && ISSPACE (*(value_e - 1))) + if (value_e > value_b && value_e[-1] == '\n') --value_e; - if (value_b == value_e) - /* Hmm, should we check for empty value? I guess that's - legal, so I leave it. */ - ; + if (value_e > value_b && value_e[-1] == '\r') + --value_e; + /* Empty values are legal (I think), so don't bother checking. */ cookie = cookie_new (); cookie->attr = strdupdelim (name_b, name_e); cookie->value = strdupdelim (value_b, value_e); cookie->path = strdupdelim (path_b, path_e); + cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE"); - if (BOUNDED_EQUAL (secure_b, secure_e, "TRUE")) - cookie->secure = 1; + /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE + value indicating if all machines within a given domain can + access the variable. This value is set automatically by the + browser, depending on the value set for the domain." */ + cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE"); /* DOMAIN needs special treatment because we might need to extract the port. */ port = domain_port (domain_b, domain_e, (const char **)&domain_e); if (port) cookie->port = port; + + if (*domain_b == '.') + ++domain_b; /* remove leading dot internally */ cookie->domain = strdupdelim (domain_b, domain_e); /* safe default in case EXPIRES field is garbled. */ expiry = (double)cookies_now - 1; - /* I don't like changing the line, but it's completely safe. - (line is malloced.) */ + /* I don't like changing the line, but it's safe here. (line is + malloced.) */ *expires_e = '\0'; sscanf (expires_b, "%lf", &expiry); - if (expiry < cookies_now) - /* ignore stale cookie. */ - goto abort; - cookie->expiry_time = expiry; - /* If the cookie has survived being saved into an external file, - it is obviously permanent. */ - cookie->permanent = 1; + if (expiry == 0) + { + /* EXPIRY can be 0 for session cookies saved because the + user specified `--keep-session-cookies' in the past. + They remain session cookies, and will be saved only if + the user has specified `keep-session-cookies' again. */ + } + else + { + if (expiry < cookies_now) + goto abort_cookie; /* ignore stale cookie. */ + cookie->expiry_time = expiry; + cookie->permanent = 1; + } store_cookie (jar, cookie); next: continue; - abort: + abort_cookie: delete_cookie (cookie); } fclose (fp); @@ -1352,21 +1429,23 @@ save_cookies_mapper (void *key, void *value, void *arg) { FILE *fp = (FILE *)arg; char *domain = (char *)key; - struct cookie *chain = (struct cookie *)value; - for (; chain; chain = chain->next) + struct cookie *cookie = (struct cookie *)value; + for (; cookie; cookie = cookie->next) { - if (!chain->permanent) + if (!cookie->permanent && !opt.keep_session_cookies) continue; - if (COOKIE_EXPIRED_P (chain)) + if (cookie_expired_p (cookie)) continue; + if (!cookie->domain_exact) + fputc ('.', fp); fputs (domain, fp); - if (chain->port != PORT_ANY) - fprintf (fp, ":%d", chain->port); + if (cookie->port != PORT_ANY) + fprintf (fp, ":%d", cookie->port); fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n", - *domain == '.' ? "TRUE" : "FALSE", - chain->path, chain->secure ? "TRUE" : "FALSE", - (double)chain->expiry_time, - chain->attr, chain->value); + cookie->domain_exact ? "FALSE" : "TRUE", + cookie->path, cookie->secure ? "TRUE" : "FALSE", + (double)cookie->expiry_time, + cookie->attr, cookie->value); if (ferror (fp)) return 1; /* stop mapping */ } @@ -1393,15 +1472,14 @@ cookie_jar_save (struct cookie_jar *jar, const char *file) } fputs ("# HTTP cookie file.\n", fp); - fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (NULL)); + fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (&cookies_now)); fputs ("# Edit at your own risk.\n\n", fp); - hash_table_map (jar->chains_by_domain, save_cookies_mapper, fp); + hash_table_map (jar->chains, save_cookies_mapper, fp); if (ferror (fp)) logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"), file, strerror (errno)); - if (fclose (fp) < 0) logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"), file, strerror (errno)); @@ -1422,7 +1500,7 @@ nuke_cookie_chain (void *value, void *key, void *arg) struct cookie_jar *jar = (struct cookie_jar *)arg; /* Remove the chain from the table and free the key. */ - hash_table_remove (jar->chains_by_domain, chain_key); + hash_table_remove (jar->chains, chain_key); xfree (chain_key); /* Then delete all the cookies in the chain. */ @@ -1442,7 +1520,95 @@ nuke_cookie_chain (void *value, void *key, void *arg) void cookie_jar_delete (struct cookie_jar *jar) { - hash_table_map (jar->chains_by_domain, nuke_cookie_chain, jar); - hash_table_destroy (jar->chains_by_domain); + hash_table_map (jar->chains, nuke_cookie_chain, jar); + hash_table_destroy (jar->chains); xfree (jar); } + +/* Test cases. Currently this is only tests parse_set_cookies. To + use, recompile Wget with -DTEST_COOKIES and call test_cookies() + from main. */ + +#ifdef TEST_COOKIES +int test_count; +char *test_results[10]; + +static int test_parse_cookies_callback (struct cookie *ignored, + const char *nb, const char *ne, + const char *vb, const char *ve) +{ + test_results[test_count++] = strdupdelim (nb, ne); + test_results[test_count++] = strdupdelim (vb, ve); + return 1; +} + +void +test_cookies (void) +{ + /* Tests expected to succeed: */ + static struct { + char *data; + char *results[10]; + } tests_succ[] = { + { "", {NULL} }, + { "arg=value", {"arg", "value", NULL} }, + { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} }, + { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} }, + { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} }, + { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} }, + { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} }, + { "arg=", {"arg", "", NULL} }, + { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} }, + { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} }, + }; + + /* Tests expected to fail: */ + static char *tests_fail[] = { + ";", + "arg=\"unterminated", + "=empty-name", + "arg1=;=another-empty-name", + }; + int i; + + for (i = 0; i < countof (tests_succ); i++) + { + int ind; + char *data = tests_succ[i].data; + char **expected = tests_succ[i].results; + struct cookie *c; + + test_count = 0; + c = parse_set_cookies (data, test_parse_cookies_callback, 1); + if (!c) + { + printf ("NULL cookie returned for valid data: %s\n", data); + continue; + } + + for (ind = 0; ind < test_count; ind += 2) + { + if (!expected[ind]) + break; + if (0 != strcmp (expected[ind], test_results[ind])) + printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n", + ind / 2 + 1, data, expected[ind], test_results[ind]); + if (0 != strcmp (expected[ind + 1], test_results[ind + 1])) + printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n", + ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]); + } + if (ind < test_count || expected[ind]) + printf ("Unmatched number of results: %s\n", data); + } + + for (i = 0; i < countof (tests_fail); i++) + { + struct cookie *c; + char *data = tests_fail[i]; + test_count = 0; + c = parse_set_cookies (data, test_parse_cookies_callback, 1); + if (c) + printf ("Failed to report error on invalid data: %s\n", data); + } +} +#endif /* TEST_COOKIES */