1 /* Support for cookies.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 Free
3 Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by the
31 cookie patch submitted by Tomasz Wegrzanowski.
33 This implements the client-side cookie support, as specified
34 (loosely) by Netscape's "preliminary specification", currently
37 http://wp.netscape.com/newsref/std/cookie_spec.html
39 rfc2109 is not supported because of its incompatibilities with the
40 above widely-used specification. rfc2965 is entirely ignored,
41 since popular client software doesn't implement it, and even the
42 sites that do send Set-Cookie2 also emit Set-Cookie for
56 #include "http.h" /* for http_atotm */
58 /* Declarations of `struct cookie' and the most basic functions. */
60 /* Cookie jar serves as cookie storage and a means of retrieving
61 cookies efficiently. All cookies with the same domain are stored
62 in a linked list called "chain". A cookie chain can be reached by
63 looking up the domain in the cookie jar's chains_by_domain table.
65 For example, to reach all the cookies under google.com, one must
66 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
67 course, when sending a cookie to `www.google.com', one must search
68 for cookies that belong to either `www.google.com' or `google.com'
69 -- but the point is that the code doesn't need to go through *all*
73 /* Cookie chains indexed by domain. */
74 struct hash_table *chains;
76 int cookie_count; /* number of cookies in the jar. */
79 /* Value set by entry point functions, so that the low-level
80 routines don't need to call time() all the time. */
81 static time_t cookies_now;
86 struct cookie_jar *jar = xnew (struct cookie_jar);
87 jar->chains = make_nocase_string_hash_table (0);
88 jar->cookie_count = 0;
93 char *domain; /* domain of the cookie */
94 int port; /* port number */
95 char *path; /* path prefix of the cookie */
97 unsigned discard_requested :1; /* whether cookie was created to
98 request discarding another
101 unsigned secure :1; /* whether cookie should be
102 transmitted over non-https
104 unsigned domain_exact :1; /* whether DOMAIN must match as a
107 unsigned permanent :1; /* whether the cookie should outlive
109 time_t expiry_time; /* time when the cookie expires, 0
110 means undetermined. */
112 char *attr; /* cookie attribute name */
113 char *value; /* cookie attribute value */
115 struct cookie *next; /* used for chaining of cookies in the
119 #define PORT_ANY (-1)
121 /* Allocate and return a new, empty cookie structure. */
123 static struct cookie *
126 struct cookie *cookie = xnew0 (struct cookie);
128 /* Both cookie->permanent and cookie->expiry_time are now 0. This
129 means that the cookie doesn't expire, but is only valid for this
130 session (i.e. not written out to disk). */
132 cookie->port = PORT_ANY;
136 /* Non-zero if the cookie has expired. Assumes cookies_now has been
137 set by one of the entry point functions. */
140 cookie_expired_p (const struct cookie *c)
142 return c->expiry_time != 0 && c->expiry_time < cookies_now;
145 /* Deallocate COOKIE and its components. */
148 delete_cookie (struct cookie *cookie)
150 xfree_null (cookie->domain);
151 xfree_null (cookie->path);
152 xfree_null (cookie->attr);
153 xfree_null (cookie->value);
157 /* Functions for storing cookies.
159 All cookies can be reached beginning with jar->chains. The key in
160 that table is the domain name, and the value is a linked list of
161 all cookies from that domain. Every new cookie is placed on the
164 /* Find and return a cookie in JAR whose domain, path, and attribute
165 name correspond to COOKIE. If found, PREVPTR will point to the
166 location of the cookie previous in chain, or NULL if the found
167 cookie is the head of a chain.
169 If no matching cookie is found, return NULL. */
171 static struct cookie *
172 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
173 struct cookie **prevptr)
175 struct cookie *chain, *prev;
177 chain = hash_table_get (jar->chains, cookie->domain);
182 for (; chain; prev = chain, chain = chain->next)
183 if (0 == strcmp (cookie->path, chain->path)
184 && 0 == strcmp (cookie->attr, chain->attr)
185 && cookie->port == chain->port)
196 /* Store COOKIE to the jar.
198 This is done by placing COOKIE at the head of its chain. However,
199 if COOKIE matches a cookie already in memory, as determined by
200 find_matching_cookie, the old cookie is unlinked and destroyed.
202 The key of each chain's hash table entry is allocated only the
203 first time; next hash_table_put's reuse the same key. */
206 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
208 struct cookie *chain_head;
211 if (hash_table_get_pair (jar->chains, cookie->domain,
212 &chain_key, &chain_head))
214 /* A chain of cookies in this domain already exists. Check for
215 duplicates -- if an extant cookie exactly matches our domain,
216 port, path, and name, replace it. */
218 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
222 /* Remove VICTIM from the chain. COOKIE will be placed at
226 prev->next = victim->next;
227 cookie->next = chain_head;
231 /* prev is NULL; apparently VICTIM was at the head of
232 the chain. This place will be taken by COOKIE, so
233 all we need to do is: */
234 cookie->next = victim->next;
236 delete_cookie (victim);
238 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
241 cookie->next = chain_head;
245 /* We are now creating the chain. Use a copy of cookie->domain
246 as the key for the life-time of the chain. Using
247 cookie->domain would be unsafe because the life-time of the
248 chain may exceed the life-time of the cookie. (Cookies may
249 be deleted from the chain by this very function.) */
251 chain_key = xstrdup (cookie->domain);
254 hash_table_put (jar->chains, chain_key, cookie);
259 time_t exptime = cookie->expiry_time;
260 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
261 cookie->domain, cookie->port,
262 cookie->port == PORT_ANY ? " (ANY)" : "",
264 cookie->permanent ? "permanent" : "session",
265 cookie->secure ? "secure" : "insecure",
266 cookie->expiry_time ? datetime_str (exptime) : "none",
267 cookie->attr, cookie->value));
271 /* Discard a cookie matching COOKIE's domain, port, path, and
272 attribute name. This gets called when we encounter a cookie whose
273 expiry date is in the past, or whose max-age is set to 0. The
274 former corresponds to netscape cookie spec, while the latter is
275 specified by rfc2109. */
278 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
280 struct cookie *prev, *victim;
282 if (!hash_table_count (jar->chains))
283 /* No elements == nothing to discard. */
286 victim = find_matching_cookie (jar, cookie, &prev);
290 /* Simply unchain the victim. */
291 prev->next = victim->next;
294 /* VICTIM was head of its chain. We need to place a new
295 cookie at the head. */
296 char *chain_key = NULL;
299 res = hash_table_get_pair (jar->chains, victim->domain,
304 /* VICTIM was the only cookie in the chain. Destroy the
305 chain and deallocate the chain key. */
306 hash_table_remove (jar->chains, victim->domain);
310 hash_table_put (jar->chains, chain_key, victim->next);
312 delete_cookie (victim);
313 DEBUGP (("Discarded old cookie.\n"));
317 /* Functions for parsing the `Set-Cookie' header, and creating new
318 cookies from the wire. */
320 #define TOKEN_IS(token, string_literal) \
321 BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal)
323 #define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e)
325 /* Parse the contents of the `Set-Cookie' header. The header looks
328 name1=value1; name2=value2; ...
330 Trailing semicolon is optional; spaces are allowed between all
331 tokens. Additionally, values may be quoted.
333 A new cookie is returned upon success, NULL otherwise.
335 The first name-value pair will be used to set the cookie's
336 attribute name and value. Subsequent parameters will be checked
337 against field names such as `domain', `path', etc. Recognized
338 fields will be parsed and the corresponding members of COOKIE
341 static struct cookie *
342 parse_set_cookie (const char *set_cookie, bool silent)
344 const char *ptr = set_cookie;
345 struct cookie *cookie = cookie_new ();
346 param_token name, value;
348 if (!extract_param (&ptr, &name, &value, ';'))
352 cookie->attr = strdupdelim (name.b, name.e);
353 cookie->value = strdupdelim (value.b, value.e);
355 while (extract_param (&ptr, &name, &value, ';'))
357 if (TOKEN_IS (name, "domain"))
359 if (!TOKEN_NON_EMPTY (value))
361 xfree_null (cookie->domain);
362 /* Strictly speaking, we should set cookie->domain_exact if the
363 domain doesn't begin with a dot. But many sites set the
364 domain to "foo.com" and expect "subhost.foo.com" to get the
365 cookie, and it apparently works in browsers. */
368 cookie->domain = strdupdelim (value.b, value.e);
370 else if (TOKEN_IS (name, "path"))
372 if (!TOKEN_NON_EMPTY (value))
374 xfree_null (cookie->path);
375 cookie->path = strdupdelim (value.b, value.e);
377 else if (TOKEN_IS (name, "expires"))
382 if (!TOKEN_NON_EMPTY (value))
384 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
386 expires = http_atotm (value_copy);
387 if (expires != (time_t) -1)
389 cookie->permanent = 1;
390 cookie->expiry_time = expires;
391 /* According to netscape's specification, expiry time in
392 the past means that discarding of a matching cookie
394 if (cookie->expiry_time < cookies_now)
395 cookie->discard_requested = 1;
398 /* Error in expiration spec. Assume default (cookie doesn't
399 expire, but valid only for this session.) */
402 else if (TOKEN_IS (name, "max-age"))
407 if (!TOKEN_NON_EMPTY (value))
409 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
411 sscanf (value_copy, "%lf", &maxage);
413 /* something went wrong. */
415 cookie->permanent = 1;
416 cookie->expiry_time = cookies_now + maxage;
418 /* According to rfc2109, a cookie with max-age of 0 means that
419 discarding of a matching cookie is requested. */
421 cookie->discard_requested = 1;
423 else if (TOKEN_IS (name, "secure"))
425 /* ignore value completely */
429 /* Ignore unrecognized attribute. */
433 /* extract_param has encountered a syntax error */
436 /* The cookie has been successfully constructed; return it. */
441 logprintf (LOG_NOTQUIET,
442 _("Syntax error in Set-Cookie: %s at position %d.\n"),
443 escnonprint (set_cookie), (int) (ptr - set_cookie));
444 delete_cookie (cookie);
449 #undef TOKEN_NON_EMPTY
451 /* Sanity checks. These are important, otherwise it is possible for
452 mailcious attackers to destroy important cookie information and/or
453 violate your privacy. */
456 #define REQUIRE_DIGITS(p) do { \
457 if (!c_isdigit (*p)) \
459 for (++p; c_isdigit (*p); p++) \
463 #define REQUIRE_DOT(p) do { \
468 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
470 We don't want to call network functions like inet_addr() because
471 all we need is a check, preferrably one that is small, fast, and
475 numeric_address_p (const char *addr)
477 const char *p = addr;
479 REQUIRE_DIGITS (p); /* A */
480 REQUIRE_DOT (p); /* . */
481 REQUIRE_DIGITS (p); /* B */
482 REQUIRE_DOT (p); /* . */
483 REQUIRE_DIGITS (p); /* C */
484 REQUIRE_DOT (p); /* . */
485 REQUIRE_DIGITS (p); /* D */
492 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
493 Originally I tried to make the check compliant with rfc2109, but
494 the sites deviated too often, so I had to fall back to "tail
495 matching", as defined by the original Netscape's cookie spec. */
498 check_domain_match (const char *cookie_domain, const char *host)
502 /* Numeric address requires exact match. It also requires HOST to
504 if (numeric_address_p (cookie_domain))
505 return 0 == strcmp (cookie_domain, host);
509 /* For the sake of efficiency, check for exact match first. */
510 if (0 == strcasecmp (cookie_domain, host))
515 /* HOST must match the tail of cookie_domain. */
516 if (!match_tail (host, cookie_domain, true))
519 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
520 make sure that somebody is not trying to set the cookie for a
521 subdomain shared by many entities. For example, "company.co.uk"
522 must not be allowed to set a cookie for ".co.uk". On the other
523 hand, "sso.redhat.de" should be able to set a cookie for
526 The only marginally sane way to handle this I can think of is to
527 reject on the basis of the length of the second-level domain name
528 (but when the top-level domain is unknown), with the assumption
529 that those of three or less characters could be reserved. For
532 .co.org -> works because the TLD is known
533 .co.uk -> doesn't work because "co" is only two chars long
534 .com.au -> doesn't work because "com" is only 3 chars long
535 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
536 .cnn.de -> doesn't work for the same reason (ugh!!)
537 .abcd.de -> works because "abcd" is 4 chars long
538 .img.cnn.de -> works because it's not trying to set the 2nd level domain
539 .cnn.co.uk -> works for the same reason
541 That should prevent misuse, while allowing reasonable usage. If
542 someone knows of a better way to handle this, please let me
545 const char *p = cookie_domain;
546 int dccount = 1; /* number of domain components */
547 int ldcl = 0; /* last domain component length */
548 int nldcl = 0; /* next to last domain component length */
551 /* Ignore leading period in this calculation. */
554 for (out = 0; !out; p++)
562 /* Empty domain component found -- the domain is invalid. */
564 if (*(p + 1) == '\0')
566 /* Tolerate trailing '.' by not treating the domain as
567 one ending with an empty domain component. */
589 int known_toplevel = false;
590 static const char *known_toplevel_domains[] = {
591 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
593 for (i = 0; i < countof (known_toplevel_domains); i++)
594 if (match_tail (cookie_domain, known_toplevel_domains[i], true))
596 known_toplevel = true;
599 if (!known_toplevel && nldcl <= 3)
606 /* Don't allow the host "foobar.com" to set a cookie for domain
608 if (*cookie_domain != '.')
610 int dlen = strlen (cookie_domain);
611 int hlen = strlen (host);
612 /* cookie host: hostname.foobar.com */
613 /* desired domain: bar.com */
614 /* '.' must be here in host-> ^ */
615 if (hlen > dlen && host[hlen - dlen - 1] != '.')
624 static int path_matches (const char *, const char *);
626 /* Check whether PATH begins with COOKIE_PATH. */
629 check_path_match (const char *cookie_path, const char *path)
631 return path_matches (path, cookie_path) != 0;
634 /* Prepend '/' to string S. S is copied to fresh stack-allocated
635 space and its value is modified to point to the new location. */
637 #define PREPEND_SLASH(s) do { \
638 char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \
640 strcpy (PS_newstr + 1, s); \
645 /* Process the HTTP `Set-Cookie' header. This results in storing the
646 cookie or discarding a matching one, or ignoring it completely, all
647 depending on the contents. */
650 cookie_handle_set_cookie (struct cookie_jar *jar,
651 const char *host, int port,
652 const char *path, const char *set_cookie)
654 struct cookie *cookie;
655 cookies_now = time (NULL);
657 /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
658 usage assumes /-prefixed paths. Until the rest of Wget is fixed,
659 simply prepend slash to PATH. */
660 PREPEND_SLASH (path);
662 cookie = parse_set_cookie (set_cookie, false);
666 /* Sanitize parts of cookie. */
671 /* If the domain was not provided, we use the one we're talking
672 to, and set exact match. */
673 cookie->domain = xstrdup (host);
674 cookie->domain_exact = 1;
675 /* Set the port, but only if it's non-default. */
676 if (port != 80 && port != 443)
681 if (!check_domain_match (cookie->domain, host))
683 logprintf (LOG_NOTQUIET,
684 _("Cookie coming from %s attempted to set domain to %s\n"),
685 escnonprint (host), escnonprint (cookie->domain));
686 xfree (cookie->domain);
693 /* The cookie doesn't set path: set it to the URL path, sans the
694 file part ("/dir/file" truncated to "/dir/"). */
695 char *trailing_slash = strrchr (path, '/');
697 cookie->path = strdupdelim (path, trailing_slash + 1);
699 /* no slash in the string -- can this even happen? */
700 cookie->path = xstrdup (path);
704 /* The cookie sets its own path; verify that it is legal. */
705 if (!check_path_match (cookie->path, path))
707 DEBUGP (("Attempt to fake the path: %s, %s\n",
708 cookie->path, path));
713 /* Now store the cookie, or discard an existing cookie, if
714 discarding was requested. */
716 if (cookie->discard_requested)
718 discard_matching_cookie (jar, cookie);
722 store_cookie (jar, cookie);
727 delete_cookie (cookie);
730 /* Support for sending out cookies in HTTP requests, based on
731 previously stored cookies. Entry point is
732 `build_cookies_request'. */
734 /* Return a count of how many times CHR occurs in STRING. */
737 count_char (const char *string, char chr)
741 for (p = string; *p; p++)
747 /* Find the cookie chains whose domains match HOST and store them to
750 A cookie chain is the head of a list of cookies that belong to a
751 host/domain. Given HOST "img.search.xemacs.org", this function
752 will return the chains for "img.search.xemacs.org",
753 "search.xemacs.org", and "xemacs.org" -- those of them that exist
756 DEST should be large enough to accept (in the worst case) as many
757 elements as there are domain components of HOST. */
760 find_chains_of_host (struct cookie_jar *jar, const char *host,
761 struct cookie *dest[])
766 /* Bail out quickly if there are no cookies in the jar. */
767 if (!hash_table_count (jar->chains))
770 if (numeric_address_p (host))
771 /* If host is an IP address, only check for the exact match. */
774 /* Otherwise, check all the subdomains except the top-level (last)
775 one. As a domain with N components has N-1 dots, the number of
776 passes equals the number of dots. */
777 passes = count_char (host, '.');
781 /* Find chains that match HOST, starting with exact match and
782 progressing to less specific domains. For instance, given HOST
783 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
784 srk.fer.hr's, then fer.hr's. */
787 struct cookie *chain = hash_table_get (jar->chains, host);
789 dest[dest_count++] = chain;
790 if (++passcnt >= passes)
792 host = strchr (host, '.') + 1;
798 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
802 path_matches (const char *full_path, const char *prefix)
804 int len = strlen (prefix);
806 if (0 != strncmp (full_path, prefix, len))
807 /* FULL_PATH doesn't begin with PREFIX. */
810 /* Length of PREFIX determines the quality of the match. */
814 /* Return true iff COOKIE matches the provided parameters of the URL
815 being downloaded: HOST, PORT, PATH, and SECFLAG.
817 If PATH_GOODNESS is non-NULL, store the "path goodness" value
818 there. That value is a measure of how closely COOKIE matches PATH,
819 used for ordering cookies. */
822 cookie_matches_url (const struct cookie *cookie,
823 const char *host, int port, const char *path,
824 bool secflag, int *path_goodness)
828 if (cookie_expired_p (cookie))
829 /* Ignore stale cookies. Don't bother unchaining the cookie at
830 this point -- Wget is a relatively short-lived application, and
831 stale cookies will not be saved by `save_cookies'. On the
832 other hand, this function should be as efficient as
836 if (cookie->secure && !secflag)
837 /* Don't transmit secure cookies over insecure connections. */
839 if (cookie->port != PORT_ANY && cookie->port != port)
842 /* If exact domain match is required, verify that cookie's domain is
843 equal to HOST. If not, assume success on the grounds of the
844 cookie's chain having been found by find_chains_of_host. */
845 if (cookie->domain_exact
846 && 0 != strcasecmp (host, cookie->domain))
849 pg = path_matches (path, cookie->path);
854 /* If the caller requested path_goodness, we return it. This is
855 an optimization, so that the caller doesn't need to call
856 path_matches() again. */
861 /* A structure that points to a cookie, along with the additional
862 information about the cookie's "goodness". This allows us to sort
863 the cookies when returning them to the server, as required by the
866 struct weighed_cookie {
867 struct cookie *cookie;
872 /* Comparator used for uniquifying the list. */
875 equality_comparator (const void *p1, const void *p2)
877 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
878 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
880 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
881 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
883 /* We only really care whether both name and value are equal. We
884 return them in this order only for consistency... */
885 return namecmp ? namecmp : valuecmp;
888 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
889 cookies with the same attr name and value. Whenever a duplicate
890 pair is found, one of the cookies is removed. */
893 eliminate_dups (struct weighed_cookie *outgoing, int count)
895 struct weighed_cookie *h; /* hare */
896 struct weighed_cookie *t; /* tortoise */
897 struct weighed_cookie *end = outgoing + count;
899 /* We deploy a simple uniquify algorithm: first sort the array
900 according to our sort criteria, then copy it to itself, comparing
901 each cookie to its neighbor and ignoring the duplicates. */
903 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
905 /* "Hare" runs through all the entries in the array, followed by
906 "tortoise". If a duplicate is found, the hare skips it.
907 Non-duplicate entries are copied to the tortoise ptr. */
909 for (h = t = outgoing; h < end; h++)
913 struct cookie *c0 = h[0].cookie;
914 struct cookie *c1 = h[1].cookie;
915 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
916 continue; /* ignore the duplicate */
919 /* If the hare has advanced past the tortoise (because of
920 previous dups), make sure the values get copied. Otherwise,
921 no copying is necessary. */
930 /* Comparator used for sorting by quality. */
933 goodness_comparator (const void *p1, const void *p2)
935 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
936 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
938 /* Subtractions take `wc2' as the first argument becauase we want a
939 sort in *decreasing* order of goodness. */
940 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
941 int pgdiff = wc2->path_goodness - wc1->path_goodness;
943 /* Sort by domain goodness; if these are the same, sort by path
944 goodness. (The sorting order isn't really specified; maybe it
945 should be the other way around.) */
946 return dgdiff ? dgdiff : pgdiff;
949 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
950 requests PATH from the server. The resulting string is allocated
951 with `malloc', and the caller is responsible for freeing it. If no
952 cookies pertain to this request, i.e. no cookie header should be
953 generated, NULL is returned. */
956 cookie_header (struct cookie_jar *jar, const char *host,
957 int port, const char *path, bool secflag)
959 struct cookie **chains;
962 struct cookie *cookie;
963 struct weighed_cookie *outgoing;
966 int result_size, pos;
967 PREPEND_SLASH (path); /* see cookie_handle_set_cookie */
969 /* First, find the cookie chains whose domains match HOST. */
971 /* Allocate room for find_chains_of_host to write to. The number of
972 chains can at most equal the number of subdomains, hence
973 1+<number of dots>. */
974 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
975 chain_count = find_chains_of_host (jar, host, chains);
977 /* No cookies for this host. */
981 cookies_now = time (NULL);
983 /* Now extract from the chains those cookies that match our host
984 (for domain_exact cookies), port (for cookies with port other
985 than PORT_ANY), etc. See matching_cookie for details. */
987 /* Count the number of matching cookies. */
989 for (i = 0; i < chain_count; i++)
990 for (cookie = chains[i]; cookie; cookie = cookie->next)
991 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
994 return NULL; /* no cookies matched */
996 /* Allocate the array. */
997 outgoing = alloca_array (struct weighed_cookie, count);
999 /* Fill the array with all the matching cookies from the chains that
1002 for (i = 0; i < chain_count; i++)
1003 for (cookie = chains[i]; cookie; cookie = cookie->next)
1006 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1008 outgoing[ocnt].cookie = cookie;
1009 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1010 outgoing[ocnt].path_goodness = pg;
1013 assert (ocnt == count);
1015 /* Eliminate duplicate cookies; that is, those whose name and value
1017 count = eliminate_dups (outgoing, count);
1019 /* Sort the array so that best-matching domains come first, and
1020 that, within one domain, best-matching paths come first. */
1021 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1023 /* Count the space the name=value pairs will take. */
1025 for (i = 0; i < count; i++)
1027 struct cookie *c = outgoing[i].cookie;
1029 result_size += strlen (c->attr) + 1 + strlen (c->value);
1032 /* Allocate output buffer:
1033 name=value pairs -- result_size
1034 "; " separators -- (count - 1) * 2
1035 \0 terminator -- 1 */
1036 result_size = result_size + (count - 1) * 2 + 1;
1037 result = xmalloc (result_size);
1039 for (i = 0; i < count; i++)
1041 struct cookie *c = outgoing[i].cookie;
1042 int namlen = strlen (c->attr);
1043 int vallen = strlen (c->value);
1045 memcpy (result + pos, c->attr, namlen);
1047 result[pos++] = '=';
1048 memcpy (result + pos, c->value, vallen);
1052 result[pos++] = ';';
1053 result[pos++] = ' ';
1056 result[pos++] = '\0';
1057 assert (pos == result_size);
1061 /* Support for loading and saving cookies. The format used for
1062 loading and saving should be the format of the `cookies.txt' file
1063 used by Netscape and Mozilla, at least the Unix versions.
1064 (Apparently IE can export cookies in that format as well.) The
1065 format goes like this:
1067 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1069 DOMAIN -- cookie domain, optionally followed by :PORT
1070 DOMAIN-FLAG -- whether all hosts in the domain match
1072 SECURE-FLAG -- whether cookie requires secure connection
1073 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1074 ATTR-NAME -- name of the cookie attribute
1075 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1077 The fields are separated by TABs. All fields are mandatory, except
1078 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1079 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1080 whitespace only, and comment lines (beginning with # optionally
1081 preceded by whitespace) are ignored.
1083 Example line from cookies.txt (split in two lines for readability):
1085 .google.com TRUE / FALSE 2147368447 \
1086 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1090 /* If the region [B, E) ends with :<digits>, parse the number, return
1091 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1092 If port is not specified, return 0. */
1095 domain_port (const char *domain_b, const char *domain_e,
1096 const char **domain_e_ptr)
1100 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1103 for (p = colon + 1; p < domain_e && c_isdigit (*p); p++)
1104 port = 10 * port + (*p - '0');
1106 /* Garbage following port number. */
1108 *domain_e_ptr = colon;
1112 #define GET_WORD(p, b, e) do { \
1114 while (*p && *p != '\t') \
1117 if (b == e || !*p) \
1122 /* Load cookies from FILE. */
1125 cookie_jar_load (struct cookie_jar *jar, const char *file)
1128 FILE *fp = fopen (file, "r");
1131 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1132 file, strerror (errno));
1135 cookies_now = time (NULL);
1137 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1139 struct cookie *cookie;
1145 char *domain_b = NULL, *domain_e = NULL;
1146 char *domflag_b = NULL, *domflag_e = NULL;
1147 char *path_b = NULL, *path_e = NULL;
1148 char *secure_b = NULL, *secure_e = NULL;
1149 char *expires_b = NULL, *expires_e = NULL;
1150 char *name_b = NULL, *name_e = NULL;
1151 char *value_b = NULL, *value_e = NULL;
1153 /* Skip leading white-space. */
1154 while (*p && c_isspace (*p))
1156 /* Ignore empty lines. */
1157 if (!*p || *p == '#')
1160 GET_WORD (p, domain_b, domain_e);
1161 GET_WORD (p, domflag_b, domflag_e);
1162 GET_WORD (p, path_b, path_e);
1163 GET_WORD (p, secure_b, secure_e);
1164 GET_WORD (p, expires_b, expires_e);
1165 GET_WORD (p, name_b, name_e);
1167 /* Don't use GET_WORD for value because it ends with newline,
1170 value_e = p + strlen (p);
1171 if (value_e > value_b && value_e[-1] == '\n')
1173 if (value_e > value_b && value_e[-1] == '\r')
1175 /* Empty values are legal (I think), so don't bother checking. */
1177 cookie = cookie_new ();
1179 cookie->attr = strdupdelim (name_b, name_e);
1180 cookie->value = strdupdelim (value_b, value_e);
1181 cookie->path = strdupdelim (path_b, path_e);
1182 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1184 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1185 value indicating if all machines within a given domain can
1186 access the variable. This value is set automatically by the
1187 browser, depending on the value set for the domain." */
1188 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1190 /* DOMAIN needs special treatment because we might need to
1191 extract the port. */
1192 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1194 cookie->port = port;
1196 if (*domain_b == '.')
1197 ++domain_b; /* remove leading dot internally */
1198 cookie->domain = strdupdelim (domain_b, domain_e);
1200 /* safe default in case EXPIRES field is garbled. */
1201 expiry = (double)cookies_now - 1;
1203 /* I don't like changing the line, but it's safe here. (line is
1206 sscanf (expires_b, "%lf", &expiry);
1210 /* EXPIRY can be 0 for session cookies saved because the
1211 user specified `--keep-session-cookies' in the past.
1212 They remain session cookies, and will be saved only if
1213 the user has specified `keep-session-cookies' again. */
1217 if (expiry < cookies_now)
1218 goto abort_cookie; /* ignore stale cookie. */
1219 cookie->expiry_time = expiry;
1220 cookie->permanent = 1;
1223 store_cookie (jar, cookie);
1229 delete_cookie (cookie);
1234 /* Save cookies, in format described above, to FILE. */
1237 cookie_jar_save (struct cookie_jar *jar, const char *file)
1240 hash_table_iterator iter;
1242 DEBUGP (("Saving cookies to %s.\n", file));
1244 cookies_now = time (NULL);
1246 fp = fopen (file, "w");
1249 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1250 file, strerror (errno));
1254 fputs ("# HTTP cookie file.\n", fp);
1255 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now));
1256 fputs ("# Edit at your own risk.\n\n", fp);
1258 for (hash_table_iterate (jar->chains, &iter);
1259 hash_table_iter_next (&iter);
1262 const char *domain = iter.key;
1263 struct cookie *cookie = iter.value;
1264 for (; cookie; cookie = cookie->next)
1266 if (!cookie->permanent && !opt.keep_session_cookies)
1268 if (cookie_expired_p (cookie))
1270 if (!cookie->domain_exact)
1273 if (cookie->port != PORT_ANY)
1274 fprintf (fp, ":%d", cookie->port);
1275 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1276 cookie->domain_exact ? "FALSE" : "TRUE",
1277 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1278 (double)cookie->expiry_time,
1279 cookie->attr, cookie->value);
1286 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1287 file, strerror (errno));
1288 if (fclose (fp) < 0)
1289 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1290 file, strerror (errno));
1292 DEBUGP (("Done saving cookies.\n"));
1295 /* Clean up cookie-related data. */
1298 cookie_jar_delete (struct cookie_jar *jar)
1300 /* Iterate over chains (indexed by domain) and free them. */
1301 hash_table_iterator iter;
1302 for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); )
1304 struct cookie *chain = iter.value;
1306 /* Then all cookies in this chain. */
1309 struct cookie *next = chain->next;
1310 delete_cookie (chain);
1314 hash_table_destroy (jar->chains);
1318 /* Test cases. Currently this is only tests parse_set_cookies. To
1319 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1326 /* Tests expected to succeed: */
1329 const char *results[10];
1331 { "arg=value", {"arg", "value", NULL} },
1332 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1333 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1334 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1335 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1336 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1337 { "arg=", {"arg", "", NULL} },
1338 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1339 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1342 /* Tests expected to fail: */
1343 static char *tests_fail[] = {
1345 "arg=\"unterminated",
1347 "arg1=;=another-empty-name",
1351 for (i = 0; i < countof (tests_succ); i++)
1354 const char *data = tests_succ[i].data;
1355 const char **expected = tests_succ[i].results;
1358 c = parse_set_cookie (data, true);
1361 printf ("NULL cookie returned for valid data: %s\n", data);
1365 /* Test whether extract_param handles these cases correctly. */
1367 param_token name, value;
1368 const char *ptr = data;
1370 while (extract_param (&ptr, &name, &value, ';'))
1372 char *n = strdupdelim (name.b, name.e);
1373 char *v = strdupdelim (value.b, value.e);
1376 printf ("Too many parameters for '%s'\n", data);
1379 if (0 != strcmp (expected[j], n))
1380 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1381 j / 2 + 1, data, expected[j], n);
1382 if (0 != strcmp (expected[j + 1], v))
1383 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1384 j / 2 + 1, data, expected[j + 1], v);
1390 printf ("Too few parameters for '%s'\n", data);
1394 for (i = 0; i < countof (tests_fail); i++)
1397 char *data = tests_fail[i];
1398 c = parse_set_cookie (data, true);
1400 printf ("Failed to report error on invalid data: %s\n", data);
1403 #endif /* TEST_COOKIES */