1 /* Support for cookies.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
3 2010, 2011 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 Additional permission under GNU GPL version 3 section 7
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
31 /* Written by Hrvoje Niksic. Parts are loosely inspired by the
32 cookie patch submitted by Tomasz Wegrzanowski.
34 This implements the client-side cookie support, as specified
35 (loosely) by Netscape's "preliminary specification", currently
38 http://wp.netscape.com/newsref/std/cookie_spec.html
40 rfc2109 is not supported because of its incompatibilities with the
41 above widely-used specification. rfc2965 is entirely ignored,
42 since popular client software doesn't implement it, and even the
43 sites that do send Set-Cookie2 also emit Set-Cookie for
60 #include "http.h" /* for http_atotm */
62 /* Declarations of `struct cookie' and the most basic functions. */
64 /* Cookie jar serves as cookie storage and a means of retrieving
65 cookies efficiently. All cookies with the same domain are stored
66 in a linked list called "chain". A cookie chain can be reached by
67 looking up the domain in the cookie jar's chains_by_domain table.
69 For example, to reach all the cookies under google.com, one must
70 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
71 course, when sending a cookie to `www.google.com', one must search
72 for cookies that belong to either `www.google.com' or `google.com'
73 -- but the point is that the code doesn't need to go through *all*
77 /* Cookie chains indexed by domain. */
78 struct hash_table *chains;
80 int cookie_count; /* number of cookies in the jar. */
83 /* Value set by entry point functions, so that the low-level
84 routines don't need to call time() all the time. */
85 static time_t cookies_now;
90 struct cookie_jar *jar = xnew (struct cookie_jar);
91 jar->chains = make_nocase_string_hash_table (0);
92 jar->cookie_count = 0;
97 char *domain; /* domain of the cookie */
98 int port; /* port number */
99 char *path; /* path prefix of the cookie */
101 unsigned discard_requested :1;/* whether cookie was created to
102 request discarding another
105 unsigned secure :1; /* whether cookie should be
106 transmitted over non-https
108 unsigned domain_exact :1; /* whether DOMAIN must match as a
111 unsigned permanent :1; /* whether the cookie should outlive
113 time_t expiry_time; /* time when the cookie expires, 0
114 means undetermined. */
116 char *attr; /* cookie attribute name */
117 char *value; /* cookie attribute value */
119 struct cookie *next; /* used for chaining of cookies in the
123 #define PORT_ANY (-1)
125 /* Allocate and return a new, empty cookie structure. */
127 static struct cookie *
130 struct cookie *cookie = xnew0 (struct cookie);
132 /* Both cookie->permanent and cookie->expiry_time are now 0. This
133 means that the cookie doesn't expire, but is only valid for this
134 session (i.e. not written out to disk). */
136 cookie->port = PORT_ANY;
140 /* Non-zero if the cookie has expired. Assumes cookies_now has been
141 set by one of the entry point functions. */
144 cookie_expired_p (const struct cookie *c)
146 return c->expiry_time != 0 && c->expiry_time < cookies_now;
149 /* Deallocate COOKIE and its components. */
152 delete_cookie (struct cookie *cookie)
154 xfree_null (cookie->domain);
155 xfree_null (cookie->path);
156 xfree_null (cookie->attr);
157 xfree_null (cookie->value);
161 /* Functions for storing cookies.
163 All cookies can be reached beginning with jar->chains. The key in
164 that table is the domain name, and the value is a linked list of
165 all cookies from that domain. Every new cookie is placed on the
168 /* Find and return a cookie in JAR whose domain, path, and attribute
169 name correspond to COOKIE. If found, PREVPTR will point to the
170 location of the cookie previous in chain, or NULL if the found
171 cookie is the head of a chain.
173 If no matching cookie is found, return NULL. */
175 static struct cookie *
176 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
177 struct cookie **prevptr)
179 struct cookie *chain, *prev;
181 chain = hash_table_get (jar->chains, cookie->domain);
186 for (; chain; prev = chain, chain = chain->next)
187 if (0 == strcmp (cookie->path, chain->path)
188 && 0 == strcmp (cookie->attr, chain->attr)
189 && cookie->port == chain->port)
200 /* Store COOKIE to the jar.
202 This is done by placing COOKIE at the head of its chain. However,
203 if COOKIE matches a cookie already in memory, as determined by
204 find_matching_cookie, the old cookie is unlinked and destroyed.
206 The key of each chain's hash table entry is allocated only the
207 first time; next hash_table_put's reuse the same key. */
210 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
212 struct cookie *chain_head;
215 if (hash_table_get_pair (jar->chains, cookie->domain,
216 &chain_key, &chain_head))
218 /* A chain of cookies in this domain already exists. Check for
219 duplicates -- if an extant cookie exactly matches our domain,
220 port, path, and name, replace it. */
222 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
226 /* Remove VICTIM from the chain. COOKIE will be placed at
230 prev->next = victim->next;
231 cookie->next = chain_head;
235 /* prev is NULL; apparently VICTIM was at the head of
236 the chain. This place will be taken by COOKIE, so
237 all we need to do is: */
238 cookie->next = victim->next;
240 delete_cookie (victim);
242 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
245 cookie->next = chain_head;
249 /* We are now creating the chain. Use a copy of cookie->domain
250 as the key for the life-time of the chain. Using
251 cookie->domain would be unsafe because the life-time of the
252 chain may exceed the life-time of the cookie. (Cookies may
253 be deleted from the chain by this very function.) */
255 chain_key = xstrdup (cookie->domain);
258 hash_table_put (jar->chains, chain_key, cookie);
263 time_t exptime = cookie->expiry_time;
264 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
265 cookie->domain, cookie->port,
266 cookie->port == PORT_ANY ? " (ANY)" : "",
268 cookie->permanent ? "permanent" : "session",
269 cookie->secure ? "secure" : "insecure",
270 cookie->expiry_time ? datetime_str (exptime) : "none",
271 cookie->attr, cookie->value));
275 /* Discard a cookie matching COOKIE's domain, port, path, and
276 attribute name. This gets called when we encounter a cookie whose
277 expiry date is in the past, or whose max-age is set to 0. The
278 former corresponds to netscape cookie spec, while the latter is
279 specified by rfc2109. */
282 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
284 struct cookie *prev, *victim;
286 if (!hash_table_count (jar->chains))
287 /* No elements == nothing to discard. */
290 victim = find_matching_cookie (jar, cookie, &prev);
294 /* Simply unchain the victim. */
295 prev->next = victim->next;
298 /* VICTIM was head of its chain. We need to place a new
299 cookie at the head. */
300 char *chain_key = NULL;
303 res = hash_table_get_pair (jar->chains, victim->domain,
308 /* VICTIM was the only cookie in the chain. Destroy the
309 chain and deallocate the chain key. */
310 hash_table_remove (jar->chains, victim->domain);
314 hash_table_put (jar->chains, chain_key, victim->next);
316 delete_cookie (victim);
317 DEBUGP (("Discarded old cookie.\n"));
321 /* Functions for parsing the `Set-Cookie' header, and creating new
322 cookies from the wire. */
324 #define TOKEN_IS(token, string_literal) \
325 BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal)
327 #define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e)
329 /* Parse the contents of the `Set-Cookie' header. The header looks
332 name1=value1; name2=value2; ...
334 Trailing semicolon is optional; spaces are allowed between all
335 tokens. Additionally, values may be quoted.
337 A new cookie is returned upon success, NULL otherwise.
339 The first name-value pair will be used to set the cookie's
340 attribute name and value. Subsequent parameters will be checked
341 against field names such as `domain', `path', etc. Recognized
342 fields will be parsed and the corresponding members of COOKIE
345 static struct cookie *
346 parse_set_cookie (const char *set_cookie, bool silent)
348 const char *ptr = set_cookie;
349 struct cookie *cookie = cookie_new ();
350 param_token name, value;
352 if (!extract_param (&ptr, &name, &value, ';', NULL))
357 /* If the value is quoted, do not modify it. */
358 if (*(value.b - 1) == '"')
363 cookie->attr = strdupdelim (name.b, name.e);
364 cookie->value = strdupdelim (value.b, value.e);
366 while (extract_param (&ptr, &name, &value, ';', NULL))
368 if (TOKEN_IS (name, "domain"))
370 if (!TOKEN_NON_EMPTY (value))
372 xfree_null (cookie->domain);
373 /* Strictly speaking, we should set cookie->domain_exact if the
374 domain doesn't begin with a dot. But many sites set the
375 domain to "foo.com" and expect "subhost.foo.com" to get the
376 cookie, and it apparently works in browsers. */
379 cookie->domain = strdupdelim (value.b, value.e);
381 else if (TOKEN_IS (name, "path"))
383 if (!TOKEN_NON_EMPTY (value))
385 xfree_null (cookie->path);
386 cookie->path = strdupdelim (value.b, value.e);
388 else if (TOKEN_IS (name, "expires"))
393 if (!TOKEN_NON_EMPTY (value))
395 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
397 /* Check if expiration spec is valid.
398 If not, assume default (cookie doesn't expire, but valid only for
400 expires = http_atotm (value_copy);
401 if (expires != (time_t) -1)
403 cookie->permanent = 1;
404 cookie->expiry_time = expires;
405 /* According to netscape's specification, expiry time in
406 the past means that discarding of a matching cookie
408 if (cookie->expiry_time < cookies_now)
409 cookie->discard_requested = 1;
412 else if (TOKEN_IS (name, "max-age"))
417 if (!TOKEN_NON_EMPTY (value))
419 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
421 sscanf (value_copy, "%lf", &maxage);
423 /* something went wrong. */
425 cookie->permanent = 1;
426 cookie->expiry_time = cookies_now + maxage;
428 /* According to rfc2109, a cookie with max-age of 0 means that
429 discarding of a matching cookie is requested. */
431 cookie->discard_requested = 1;
433 else if (TOKEN_IS (name, "secure"))
435 /* ignore value completely */
438 /* else: Ignore unrecognized attribute. */
441 /* extract_param has encountered a syntax error */
444 /* The cookie has been successfully constructed; return it. */
449 logprintf (LOG_NOTQUIET,
450 _("Syntax error in Set-Cookie: %s at position %d.\n"),
451 quotearg_style (escape_quoting_style, set_cookie),
452 (int) (ptr - set_cookie));
453 delete_cookie (cookie);
458 #undef TOKEN_NON_EMPTY
460 /* Sanity checks. These are important, otherwise it is possible for
461 mailcious attackers to destroy important cookie information and/or
462 violate your privacy. */
465 #define REQUIRE_DIGITS(p) do { \
466 if (!c_isdigit (*p)) \
468 for (++p; c_isdigit (*p); p++) \
472 #define REQUIRE_DOT(p) do { \
477 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
479 We don't want to call network functions like inet_addr() because
480 all we need is a check, preferrably one that is small, fast, and
484 numeric_address_p (const char *addr)
486 const char *p = addr;
488 REQUIRE_DIGITS (p); /* A */
489 REQUIRE_DOT (p); /* . */
490 REQUIRE_DIGITS (p); /* B */
491 REQUIRE_DOT (p); /* . */
492 REQUIRE_DIGITS (p); /* C */
493 REQUIRE_DOT (p); /* . */
494 REQUIRE_DIGITS (p); /* D */
501 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
502 Originally I tried to make the check compliant with rfc2109, but
503 the sites deviated too often, so I had to fall back to "tail
504 matching", as defined by the original Netscape's cookie spec. */
507 check_domain_match (const char *cookie_domain, const char *host)
512 const psl_ctx_t *psl;
515 if (!(psl = psl_builtin()))
517 DEBUGP (("\nlibpsl not built with a public suffix list. "
518 "Falling back to simple heuristics.\n"));
522 is_acceptable = psl_is_cookie_domain_acceptable (psl, host, cookie_domain);
523 return true ? (is_acceptable == 1) : false;
528 /* For efficiency make some elementary checks first */
531 /* For the sake of efficiency, check for exact match first. */
532 if (0 == strcasecmp (cookie_domain, host))
537 /* HOST must match the tail of cookie_domain. */
538 if (!match_tail (host, cookie_domain, true))
541 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
542 make sure that somebody is not trying to set the cookie for a
543 subdomain shared by many entities. For example, "company.co.uk"
544 must not be allowed to set a cookie for ".co.uk". On the other
545 hand, "sso.redhat.de" should be able to set a cookie for
548 The only marginally sane way to handle this I can think of is to
549 reject on the basis of the length of the second-level domain name
550 (but when the top-level domain is unknown), with the assumption
551 that those of three or less characters could be reserved. For
554 .co.org -> works because the TLD is known
555 .co.uk -> doesn't work because "co" is only two chars long
556 .com.au -> doesn't work because "com" is only 3 chars long
557 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
558 .cnn.de -> doesn't work for the same reason (ugh!!)
559 .abcd.de -> works because "abcd" is 4 chars long
560 .img.cnn.de -> works because it's not trying to set the 2nd level domain
561 .cnn.co.uk -> works for the same reason
563 That should prevent misuse, while allowing reasonable usage. If
564 someone knows of a better way to handle this, please let me
567 const char *p = cookie_domain;
568 int dccount = 1; /* number of domain components */
569 int ldcl = 0; /* last domain component length */
570 int nldcl = 0; /* next to last domain component length */
573 /* Ignore leading period in this calculation. */
576 for (out = 0; !out; p++)
584 /* Empty domain component found -- the domain is invalid. */
586 if (*(p + 1) == '\0')
588 /* Tolerate trailing '.' by not treating the domain as
589 one ending with an empty domain component. */
611 int known_toplevel = false;
612 static const char *known_toplevel_domains[] = {
613 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
615 for (i = 0; i < countof (known_toplevel_domains); i++)
616 if (match_tail (cookie_domain, known_toplevel_domains[i], true))
618 known_toplevel = true;
621 if (!known_toplevel && nldcl <= 3)
628 /* Don't allow the host "foobar.com" to set a cookie for domain
630 if (*cookie_domain != '.')
632 int dlen = strlen (cookie_domain);
633 int hlen = strlen (host);
634 /* cookie host: hostname.foobar.com */
635 /* desired domain: bar.com */
636 /* '.' must be here in host-> ^ */
637 if (hlen > dlen && host[hlen - dlen - 1] != '.')
646 static int path_matches (const char *, const char *);
648 /* Check whether PATH begins with COOKIE_PATH. */
651 check_path_match (const char *cookie_path, const char *path)
653 return path_matches (path, cookie_path) != 0;
656 /* Prepend '/' to string S. S is copied to fresh stack-allocated
657 space and its value is modified to point to the new location. */
659 #define PREPEND_SLASH(s) do { \
660 char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \
662 strcpy (PS_newstr + 1, s); \
667 /* Process the HTTP `Set-Cookie' header. This results in storing the
668 cookie or discarding a matching one, or ignoring it completely, all
669 depending on the contents. */
672 cookie_handle_set_cookie (struct cookie_jar *jar,
673 const char *host, int port,
674 const char *path, const char *set_cookie)
676 struct cookie *cookie;
677 cookies_now = time (NULL);
679 /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
680 usage assumes /-prefixed paths. Until the rest of Wget is fixed,
681 simply prepend slash to PATH. */
682 PREPEND_SLASH (path);
684 cookie = parse_set_cookie (set_cookie, false);
688 /* Sanitize parts of cookie. */
692 cookie->domain = xstrdup (host);
693 cookie->domain_exact = 1;
694 /* Set the port, but only if it's non-default. */
695 if (port != 80 && port != 443)
700 if (!check_domain_match (cookie->domain, host))
702 logprintf (LOG_NOTQUIET,
703 _("Cookie coming from %s attempted to set domain to "),
704 quotearg_style (escape_quoting_style, host));
705 logprintf (LOG_NOTQUIET,
707 quotearg_style (escape_quoting_style, cookie->domain));
708 cookie->discard_requested = true;
714 /* The cookie doesn't set path: set it to the URL path, sans the
715 file part ("/dir/file" truncated to "/dir/"). */
716 char *trailing_slash = strrchr (path, '/');
718 cookie->path = strdupdelim (path, trailing_slash + 1);
720 /* no slash in the string -- can this even happen? */
721 cookie->path = xstrdup (path);
725 /* The cookie sets its own path; verify that it is legal. */
726 if (!check_path_match (cookie->path, path))
728 DEBUGP (("Attempt to fake the path: %s, %s\n",
729 cookie->path, path));
734 /* Now store the cookie, or discard an existing cookie, if
735 discarding was requested. */
737 if (cookie->discard_requested)
739 discard_matching_cookie (jar, cookie);
743 store_cookie (jar, cookie);
748 delete_cookie (cookie);
751 /* Support for sending out cookies in HTTP requests, based on
752 previously stored cookies. Entry point is
753 `build_cookies_request'. */
755 /* Return a count of how many times CHR occurs in STRING. */
758 count_char (const char *string, char chr)
762 for (p = string; *p; p++)
768 /* Find the cookie chains whose domains match HOST and store them to
771 A cookie chain is the head of a list of cookies that belong to a
772 host/domain. Given HOST "img.search.xemacs.org", this function
773 will return the chains for "img.search.xemacs.org",
774 "search.xemacs.org", and "xemacs.org" -- those of them that exist
777 DEST should be large enough to accept (in the worst case) as many
778 elements as there are domain components of HOST. */
781 find_chains_of_host (struct cookie_jar *jar, const char *host,
782 struct cookie *dest[])
787 /* Bail out quickly if there are no cookies in the jar. */
788 if (!hash_table_count (jar->chains))
791 if (numeric_address_p (host))
792 /* If host is an IP address, only check for the exact match. */
795 /* Otherwise, check all the subdomains except the top-level (last)
796 one. As a domain with N components has N-1 dots, the number of
797 passes equals the number of dots. */
798 passes = count_char (host, '.');
802 /* Find chains that match HOST, starting with exact match and
803 progressing to less specific domains. For instance, given HOST
804 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
805 srk.fer.hr's, then fer.hr's. */
808 struct cookie *chain = hash_table_get (jar->chains, host);
810 dest[dest_count++] = chain;
811 if (++passcnt >= passes)
813 host = strchr (host, '.') + 1;
819 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
823 path_matches (const char *full_path, const char *prefix)
825 int len = strlen (prefix);
827 if (0 != strncmp (full_path, prefix, len))
828 /* FULL_PATH doesn't begin with PREFIX. */
831 /* Length of PREFIX determines the quality of the match. */
835 /* Return true iff COOKIE matches the provided parameters of the URL
836 being downloaded: HOST, PORT, PATH, and SECFLAG.
838 If PATH_GOODNESS is non-NULL, store the "path goodness" value
839 there. That value is a measure of how closely COOKIE matches PATH,
840 used for ordering cookies. */
843 cookie_matches_url (const struct cookie *cookie,
844 const char *host, int port, const char *path,
845 bool secflag, int *path_goodness)
849 if (cookie_expired_p (cookie))
850 /* Ignore stale cookies. Don't bother unchaining the cookie at
851 this point -- Wget is a relatively short-lived application, and
852 stale cookies will not be saved by `save_cookies'. On the
853 other hand, this function should be as efficient as
857 if (cookie->secure && !secflag)
858 /* Don't transmit secure cookies over insecure connections. */
860 if (cookie->port != PORT_ANY && cookie->port != port)
863 /* If exact domain match is required, verify that cookie's domain is
864 equal to HOST. If not, assume success on the grounds of the
865 cookie's chain having been found by find_chains_of_host. */
866 if (cookie->domain_exact
867 && 0 != strcasecmp (host, cookie->domain))
870 pg = path_matches (path, cookie->path);
875 /* If the caller requested path_goodness, we return it. This is
876 an optimization, so that the caller doesn't need to call
877 path_matches() again. */
882 /* A structure that points to a cookie, along with the additional
883 information about the cookie's "goodness". This allows us to sort
884 the cookies when returning them to the server, as required by the
887 struct weighed_cookie {
888 struct cookie *cookie;
893 /* Comparator used for uniquifying the list. */
896 equality_comparator (const void *p1, const void *p2)
898 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
899 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
901 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
902 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
904 /* We only really care whether both name and value are equal. We
905 return them in this order only for consistency... */
906 return namecmp ? namecmp : valuecmp;
909 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
910 cookies with the same attr name and value. Whenever a duplicate
911 pair is found, one of the cookies is removed. */
914 eliminate_dups (struct weighed_cookie *outgoing, int count)
916 struct weighed_cookie *h; /* hare */
917 struct weighed_cookie *t; /* tortoise */
918 struct weighed_cookie *end = outgoing + count;
920 /* We deploy a simple uniquify algorithm: first sort the array
921 according to our sort criteria, then copy it to itself, comparing
922 each cookie to its neighbor and ignoring the duplicates. */
924 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
926 /* "Hare" runs through all the entries in the array, followed by
927 "tortoise". If a duplicate is found, the hare skips it.
928 Non-duplicate entries are copied to the tortoise ptr. */
930 for (h = t = outgoing; h < end; h++)
934 struct cookie *c0 = h[0].cookie;
935 struct cookie *c1 = h[1].cookie;
936 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
937 continue; /* ignore the duplicate */
940 /* If the hare has advanced past the tortoise (because of
941 previous dups), make sure the values get copied. Otherwise,
942 no copying is necessary. */
951 /* Comparator used for sorting by quality. */
954 goodness_comparator (const void *p1, const void *p2)
956 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
957 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
959 /* Subtractions take `wc2' as the first argument becauase we want a
960 sort in *decreasing* order of goodness. */
961 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
962 int pgdiff = wc2->path_goodness - wc1->path_goodness;
964 /* Sort by domain goodness; if these are the same, sort by path
965 goodness. (The sorting order isn't really specified; maybe it
966 should be the other way around.) */
967 return dgdiff ? dgdiff : pgdiff;
970 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
971 requests PATH from the server. The resulting string is allocated
972 with `malloc', and the caller is responsible for freeing it. If no
973 cookies pertain to this request, i.e. no cookie header should be
974 generated, NULL is returned. */
977 cookie_header (struct cookie_jar *jar, const char *host,
978 int port, const char *path, bool secflag)
980 struct cookie **chains;
983 struct cookie *cookie;
984 struct weighed_cookie *outgoing;
987 int result_size, pos;
988 PREPEND_SLASH (path); /* see cookie_handle_set_cookie */
990 /* First, find the cookie chains whose domains match HOST. */
992 /* Allocate room for find_chains_of_host to write to. The number of
993 chains can at most equal the number of subdomains, hence
994 1+<number of dots>. */
995 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
996 chain_count = find_chains_of_host (jar, host, chains);
998 /* No cookies for this host. */
1002 cookies_now = time (NULL);
1004 /* Now extract from the chains those cookies that match our host
1005 (for domain_exact cookies), port (for cookies with port other
1006 than PORT_ANY), etc. See matching_cookie for details. */
1008 /* Count the number of matching cookies. */
1010 for (i = 0; i < chain_count; i++)
1011 for (cookie = chains[i]; cookie; cookie = cookie->next)
1012 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1015 return NULL; /* no cookies matched */
1017 /* Allocate the array. */
1018 outgoing = alloca_array (struct weighed_cookie, count);
1020 /* Fill the array with all the matching cookies from the chains that
1023 for (i = 0; i < chain_count; i++)
1024 for (cookie = chains[i]; cookie; cookie = cookie->next)
1027 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1029 outgoing[ocnt].cookie = cookie;
1030 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1031 outgoing[ocnt].path_goodness = pg;
1034 assert (ocnt == count);
1036 /* Eliminate duplicate cookies; that is, those whose name and value
1038 count = eliminate_dups (outgoing, count);
1040 /* Sort the array so that best-matching domains come first, and
1041 that, within one domain, best-matching paths come first. */
1042 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1044 /* Count the space the name=value pairs will take. */
1046 for (i = 0; i < count; i++)
1048 struct cookie *c = outgoing[i].cookie;
1050 result_size += strlen (c->attr) + 1 + strlen (c->value);
1053 /* Allocate output buffer:
1054 name=value pairs -- result_size
1055 "; " separators -- (count - 1) * 2
1056 \0 terminator -- 1 */
1057 result_size = result_size + (count - 1) * 2 + 1;
1058 result = xmalloc (result_size);
1060 for (i = 0; i < count; i++)
1062 struct cookie *c = outgoing[i].cookie;
1063 int namlen = strlen (c->attr);
1064 int vallen = strlen (c->value);
1066 memcpy (result + pos, c->attr, namlen);
1068 result[pos++] = '=';
1069 memcpy (result + pos, c->value, vallen);
1073 result[pos++] = ';';
1074 result[pos++] = ' ';
1077 result[pos++] = '\0';
1078 assert (pos == result_size);
1082 /* Support for loading and saving cookies. The format used for
1083 loading and saving should be the format of the `cookies.txt' file
1084 used by Netscape and Mozilla, at least the Unix versions.
1085 (Apparently IE can export cookies in that format as well.) The
1086 format goes like this:
1088 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1090 DOMAIN -- cookie domain, optionally followed by :PORT
1091 DOMAIN-FLAG -- whether all hosts in the domain match
1093 SECURE-FLAG -- whether cookie requires secure connection
1094 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1095 ATTR-NAME -- name of the cookie attribute
1096 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1098 The fields are separated by TABs. All fields are mandatory, except
1099 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1100 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1101 whitespace only, and comment lines (beginning with # optionally
1102 preceded by whitespace) are ignored.
1104 Example line from cookies.txt (split in two lines for readability):
1106 .google.com TRUE / FALSE 2147368447 \
1107 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1111 /* If the region [B, E) ends with :<digits>, parse the number, return
1112 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1113 If port is not specified, return 0. */
1116 domain_port (const char *domain_b, const char *domain_e,
1117 const char **domain_e_ptr)
1121 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1124 for (p = colon + 1; p < domain_e && c_isdigit (*p); p++)
1125 port = 10 * port + (*p - '0');
1127 /* Garbage following port number. */
1129 *domain_e_ptr = colon;
1133 #define GET_WORD(p, b, e) do { \
1135 while (*p && *p != '\t') \
1138 if (b == e || !*p) \
1143 /* Load cookies from FILE. */
1146 cookie_jar_load (struct cookie_jar *jar, const char *file)
1151 FILE *fp = fopen (file, "r");
1154 logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1155 quote (file), strerror (errno));
1159 cookies_now = time (NULL);
1161 while (getline (&line, &bufsize, fp) > 0)
1163 struct cookie *cookie;
1169 char *domain_b = NULL, *domain_e = NULL;
1170 char *domflag_b = NULL, *domflag_e = NULL;
1171 char *path_b = NULL, *path_e = NULL;
1172 char *secure_b = NULL, *secure_e = NULL;
1173 char *expires_b = NULL, *expires_e = NULL;
1174 char *name_b = NULL, *name_e = NULL;
1175 char *value_b = NULL, *value_e = NULL;
1177 /* Skip leading white-space. */
1178 while (*p && c_isspace (*p))
1180 /* Ignore empty lines. */
1181 if (!*p || *p == '#')
1184 GET_WORD (p, domain_b, domain_e);
1185 GET_WORD (p, domflag_b, domflag_e);
1186 GET_WORD (p, path_b, path_e);
1187 GET_WORD (p, secure_b, secure_e);
1188 GET_WORD (p, expires_b, expires_e);
1189 GET_WORD (p, name_b, name_e);
1191 /* Don't use GET_WORD for value because it ends with newline,
1194 value_e = p + strlen (p);
1195 if (value_e > value_b && value_e[-1] == '\n')
1197 if (value_e > value_b && value_e[-1] == '\r')
1199 /* Empty values are legal (I think), so don't bother checking. */
1201 cookie = cookie_new ();
1203 cookie->attr = strdupdelim (name_b, name_e);
1204 cookie->value = strdupdelim (value_b, value_e);
1205 cookie->path = strdupdelim (path_b, path_e);
1206 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1208 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1209 value indicating if all machines within a given domain can
1210 access the variable. This value is set automatically by the
1211 browser, depending on the value set for the domain." */
1212 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1214 /* DOMAIN needs special treatment because we might need to
1215 extract the port. */
1216 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1218 cookie->port = port;
1220 if (*domain_b == '.')
1221 ++domain_b; /* remove leading dot internally */
1222 cookie->domain = strdupdelim (domain_b, domain_e);
1224 /* safe default in case EXPIRES field is garbled. */
1225 expiry = (double)cookies_now - 1;
1227 /* I don't like changing the line, but it's safe here. (line is
1230 sscanf (expires_b, "%lf", &expiry);
1234 /* EXPIRY can be 0 for session cookies saved because the
1235 user specified `--keep-session-cookies' in the past.
1236 They remain session cookies, and will be saved only if
1237 the user has specified `keep-session-cookies' again. */
1241 if (expiry < cookies_now)
1242 goto abort_cookie; /* ignore stale cookie. */
1243 cookie->expiry_time = expiry;
1244 cookie->permanent = 1;
1247 store_cookie (jar, cookie);
1253 delete_cookie (cookie);
1260 /* Save cookies, in format described above, to FILE. */
1263 cookie_jar_save (struct cookie_jar *jar, const char *file)
1266 hash_table_iterator iter;
1268 DEBUGP (("Saving cookies to %s.\n", file));
1270 cookies_now = time (NULL);
1272 fp = fopen (file, "w");
1275 logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1276 quote (file), strerror (errno));
1280 fputs ("# HTTP cookie file.\n", fp);
1281 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now));
1282 fputs ("# Edit at your own risk.\n\n", fp);
1284 for (hash_table_iterate (jar->chains, &iter);
1285 hash_table_iter_next (&iter);
1288 const char *domain = iter.key;
1289 struct cookie *cookie = iter.value;
1290 for (; cookie; cookie = cookie->next)
1292 if (!cookie->permanent && !opt.keep_session_cookies)
1294 if (cookie_expired_p (cookie))
1296 if (!cookie->domain_exact)
1299 if (cookie->port != PORT_ANY)
1300 fprintf (fp, ":%d", cookie->port);
1301 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1302 cookie->domain_exact ? "FALSE" : "TRUE",
1303 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1304 (double)cookie->expiry_time,
1305 cookie->attr, cookie->value);
1312 logprintf (LOG_NOTQUIET, _("Error writing to %s: %s\n"),
1313 quote (file), strerror (errno));
1314 if (fclose (fp) < 0)
1315 logprintf (LOG_NOTQUIET, _("Error closing %s: %s\n"),
1316 quote (file), strerror (errno));
1318 DEBUGP (("Done saving cookies.\n"));
1321 /* Clean up cookie-related data. */
1324 cookie_jar_delete (struct cookie_jar *jar)
1326 /* Iterate over chains (indexed by domain) and free them. */
1327 hash_table_iterator iter;
1328 for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); )
1330 struct cookie *chain = iter.value;
1332 /* Then all cookies in this chain. */
1335 struct cookie *next = chain->next;
1336 delete_cookie (chain);
1340 hash_table_destroy (jar->chains);
1344 /* Test cases. Currently this is only tests parse_set_cookies. To
1345 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1352 /* Tests expected to succeed: */
1355 const char *results[10];
1357 { "arg=value", {"arg", "value", NULL} },
1358 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1359 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1360 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1361 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1362 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1363 { "arg=", {"arg", "", NULL} },
1364 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1365 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1368 /* Tests expected to fail: */
1369 static char *tests_fail[] = {
1371 "arg=\"unterminated",
1373 "arg1=;=another-empty-name",
1377 for (i = 0; i < countof (tests_succ); i++)
1380 const char *data = tests_succ[i].data;
1381 const char **expected = tests_succ[i].results;
1384 c = parse_set_cookie (data, true);
1387 printf ("NULL cookie returned for valid data: %s\n", data);
1391 /* Test whether extract_param handles these cases correctly. */
1393 param_token name, value;
1394 const char *ptr = data;
1396 while (extract_param (&ptr, &name, &value, ';', NULL))
1398 char *n = strdupdelim (name.b, name.e);
1399 char *v = strdupdelim (value.b, value.e);
1402 printf ("Too many parameters for '%s'\n", data);
1405 if (0 != strcmp (expected[j], n))
1406 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1407 j / 2 + 1, data, expected[j], n);
1408 if (0 != strcmp (expected[j + 1], v))
1409 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1410 j / 2 + 1, data, expected[j + 1], v);
1416 printf ("Too few parameters for '%s'\n", data);
1420 for (i = 0; i < countof (tests_fail); i++)
1423 char *data = tests_fail[i];
1424 c = parse_set_cookie (data, true);
1426 printf ("Failed to report error on invalid data: %s\n", data);
1429 #endif /* TEST_COOKIES */