1 /* Support for cookies.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
3 2010, 2011 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 Additional permission under GNU GPL version 3 section 7
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
31 /* Written by Hrvoje Niksic. Parts are loosely inspired by the
32 cookie patch submitted by Tomasz Wegrzanowski.
34 This implements the client-side cookie support, as specified
35 (loosely) by Netscape's "preliminary specification", currently
38 http://wp.netscape.com/newsref/std/cookie_spec.html
40 rfc2109 is not supported because of its incompatibilities with the
41 above widely-used specification. rfc2965 is entirely ignored,
42 since popular client software doesn't implement it, and even the
43 sites that do send Set-Cookie2 also emit Set-Cookie for
58 #include "http.h" /* for http_atotm */
60 /* Declarations of `struct cookie' and the most basic functions. */
62 /* Cookie jar serves as cookie storage and a means of retrieving
63 cookies efficiently. All cookies with the same domain are stored
64 in a linked list called "chain". A cookie chain can be reached by
65 looking up the domain in the cookie jar's chains_by_domain table.
67 For example, to reach all the cookies under google.com, one must
68 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
69 course, when sending a cookie to `www.google.com', one must search
70 for cookies that belong to either `www.google.com' or `google.com'
71 -- but the point is that the code doesn't need to go through *all*
75 /* Cookie chains indexed by domain. */
76 struct hash_table *chains;
78 int cookie_count; /* number of cookies in the jar. */
81 /* Value set by entry point functions, so that the low-level
82 routines don't need to call time() all the time. */
83 static time_t cookies_now;
88 struct cookie_jar *jar = xnew (struct cookie_jar);
89 jar->chains = make_nocase_string_hash_table (0);
90 jar->cookie_count = 0;
95 char *domain; /* domain of the cookie */
96 int port; /* port number */
97 char *path; /* path prefix of the cookie */
99 unsigned discard_requested :1;/* whether cookie was created to
100 request discarding another
103 unsigned secure :1; /* whether cookie should be
104 transmitted over non-https
106 unsigned domain_exact :1; /* whether DOMAIN must match as a
109 unsigned permanent :1; /* whether the cookie should outlive
111 time_t expiry_time; /* time when the cookie expires, 0
112 means undetermined. */
114 char *attr; /* cookie attribute name */
115 char *value; /* cookie attribute value */
117 struct cookie *next; /* used for chaining of cookies in the
121 #define PORT_ANY (-1)
123 /* Allocate and return a new, empty cookie structure. */
125 static struct cookie *
128 struct cookie *cookie = xnew0 (struct cookie);
130 /* Both cookie->permanent and cookie->expiry_time are now 0. This
131 means that the cookie doesn't expire, but is only valid for this
132 session (i.e. not written out to disk). */
134 cookie->port = PORT_ANY;
138 /* Non-zero if the cookie has expired. Assumes cookies_now has been
139 set by one of the entry point functions. */
142 cookie_expired_p (const struct cookie *c)
144 return c->expiry_time != 0 && c->expiry_time < cookies_now;
147 /* Deallocate COOKIE and its components. */
150 delete_cookie (struct cookie *cookie)
152 xfree_null (cookie->domain);
153 xfree_null (cookie->path);
154 xfree_null (cookie->attr);
155 xfree_null (cookie->value);
159 /* Functions for storing cookies.
161 All cookies can be reached beginning with jar->chains. The key in
162 that table is the domain name, and the value is a linked list of
163 all cookies from that domain. Every new cookie is placed on the
166 /* Find and return a cookie in JAR whose domain, path, and attribute
167 name correspond to COOKIE. If found, PREVPTR will point to the
168 location of the cookie previous in chain, or NULL if the found
169 cookie is the head of a chain.
171 If no matching cookie is found, return NULL. */
173 static struct cookie *
174 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
175 struct cookie **prevptr)
177 struct cookie *chain, *prev;
179 chain = hash_table_get (jar->chains, cookie->domain);
184 for (; chain; prev = chain, chain = chain->next)
185 if (0 == strcmp (cookie->path, chain->path)
186 && 0 == strcmp (cookie->attr, chain->attr)
187 && cookie->port == chain->port)
198 /* Store COOKIE to the jar.
200 This is done by placing COOKIE at the head of its chain. However,
201 if COOKIE matches a cookie already in memory, as determined by
202 find_matching_cookie, the old cookie is unlinked and destroyed.
204 The key of each chain's hash table entry is allocated only the
205 first time; next hash_table_put's reuse the same key. */
208 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
210 struct cookie *chain_head;
213 if (hash_table_get_pair (jar->chains, cookie->domain,
214 &chain_key, &chain_head))
216 /* A chain of cookies in this domain already exists. Check for
217 duplicates -- if an extant cookie exactly matches our domain,
218 port, path, and name, replace it. */
220 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
224 /* Remove VICTIM from the chain. COOKIE will be placed at
228 prev->next = victim->next;
229 cookie->next = chain_head;
233 /* prev is NULL; apparently VICTIM was at the head of
234 the chain. This place will be taken by COOKIE, so
235 all we need to do is: */
236 cookie->next = victim->next;
238 delete_cookie (victim);
240 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
243 cookie->next = chain_head;
247 /* We are now creating the chain. Use a copy of cookie->domain
248 as the key for the life-time of the chain. Using
249 cookie->domain would be unsafe because the life-time of the
250 chain may exceed the life-time of the cookie. (Cookies may
251 be deleted from the chain by this very function.) */
253 chain_key = xstrdup (cookie->domain);
256 hash_table_put (jar->chains, chain_key, cookie);
261 time_t exptime = cookie->expiry_time;
262 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
263 cookie->domain, cookie->port,
264 cookie->port == PORT_ANY ? " (ANY)" : "",
266 cookie->permanent ? "permanent" : "session",
267 cookie->secure ? "secure" : "insecure",
268 cookie->expiry_time ? datetime_str (exptime) : "none",
269 cookie->attr, cookie->value));
273 /* Discard a cookie matching COOKIE's domain, port, path, and
274 attribute name. This gets called when we encounter a cookie whose
275 expiry date is in the past, or whose max-age is set to 0. The
276 former corresponds to netscape cookie spec, while the latter is
277 specified by rfc2109. */
280 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
282 struct cookie *prev, *victim;
284 if (!hash_table_count (jar->chains))
285 /* No elements == nothing to discard. */
288 victim = find_matching_cookie (jar, cookie, &prev);
292 /* Simply unchain the victim. */
293 prev->next = victim->next;
296 /* VICTIM was head of its chain. We need to place a new
297 cookie at the head. */
298 char *chain_key = NULL;
301 res = hash_table_get_pair (jar->chains, victim->domain,
306 /* VICTIM was the only cookie in the chain. Destroy the
307 chain and deallocate the chain key. */
308 hash_table_remove (jar->chains, victim->domain);
312 hash_table_put (jar->chains, chain_key, victim->next);
314 delete_cookie (victim);
315 DEBUGP (("Discarded old cookie.\n"));
319 /* Functions for parsing the `Set-Cookie' header, and creating new
320 cookies from the wire. */
322 #define TOKEN_IS(token, string_literal) \
323 BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal)
325 #define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e)
327 /* Parse the contents of the `Set-Cookie' header. The header looks
330 name1=value1; name2=value2; ...
332 Trailing semicolon is optional; spaces are allowed between all
333 tokens. Additionally, values may be quoted.
335 A new cookie is returned upon success, NULL otherwise.
337 The first name-value pair will be used to set the cookie's
338 attribute name and value. Subsequent parameters will be checked
339 against field names such as `domain', `path', etc. Recognized
340 fields will be parsed and the corresponding members of COOKIE
343 static struct cookie *
344 parse_set_cookie (const char *set_cookie, bool silent)
346 const char *ptr = set_cookie;
347 struct cookie *cookie = cookie_new ();
348 param_token name, value;
350 if (!extract_param (&ptr, &name, &value, ';', NULL))
355 /* If the value is quoted, do not modify it. */
356 if (*(value.b - 1) == '"')
361 cookie->attr = strdupdelim (name.b, name.e);
362 cookie->value = strdupdelim (value.b, value.e);
364 while (extract_param (&ptr, &name, &value, ';', NULL))
366 if (TOKEN_IS (name, "domain"))
368 if (!TOKEN_NON_EMPTY (value))
370 xfree_null (cookie->domain);
371 /* Strictly speaking, we should set cookie->domain_exact if the
372 domain doesn't begin with a dot. But many sites set the
373 domain to "foo.com" and expect "subhost.foo.com" to get the
374 cookie, and it apparently works in browsers. */
377 cookie->domain = strdupdelim (value.b, value.e);
379 else if (TOKEN_IS (name, "path"))
381 if (!TOKEN_NON_EMPTY (value))
383 xfree_null (cookie->path);
384 cookie->path = strdupdelim (value.b, value.e);
386 else if (TOKEN_IS (name, "expires"))
391 if (!TOKEN_NON_EMPTY (value))
393 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
395 /* Check if expiration spec is valid.
396 If not, assume default (cookie doesn't expire, but valid only for
398 expires = http_atotm (value_copy);
399 if (expires != (time_t) -1)
401 cookie->permanent = 1;
402 cookie->expiry_time = expires;
403 /* According to netscape's specification, expiry time in
404 the past means that discarding of a matching cookie
406 if (cookie->expiry_time < cookies_now)
407 cookie->discard_requested = 1;
410 else if (TOKEN_IS (name, "max-age"))
415 if (!TOKEN_NON_EMPTY (value))
417 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
419 sscanf (value_copy, "%lf", &maxage);
421 /* something went wrong. */
423 cookie->permanent = 1;
424 cookie->expiry_time = cookies_now + maxage;
426 /* According to rfc2109, a cookie with max-age of 0 means that
427 discarding of a matching cookie is requested. */
429 cookie->discard_requested = 1;
431 else if (TOKEN_IS (name, "secure"))
433 /* ignore value completely */
436 /* else: Ignore unrecognized attribute. */
439 /* extract_param has encountered a syntax error */
442 /* The cookie has been successfully constructed; return it. */
447 logprintf (LOG_NOTQUIET,
448 _("Syntax error in Set-Cookie: %s at position %d.\n"),
449 quotearg_style (escape_quoting_style, set_cookie),
450 (int) (ptr - set_cookie));
451 delete_cookie (cookie);
456 #undef TOKEN_NON_EMPTY
458 /* Sanity checks. These are important, otherwise it is possible for
459 mailcious attackers to destroy important cookie information and/or
460 violate your privacy. */
463 #define REQUIRE_DIGITS(p) do { \
464 if (!c_isdigit (*p)) \
466 for (++p; c_isdigit (*p); p++) \
470 #define REQUIRE_DOT(p) do { \
475 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
477 We don't want to call network functions like inet_addr() because
478 all we need is a check, preferrably one that is small, fast, and
482 numeric_address_p (const char *addr)
484 const char *p = addr;
486 REQUIRE_DIGITS (p); /* A */
487 REQUIRE_DOT (p); /* . */
488 REQUIRE_DIGITS (p); /* B */
489 REQUIRE_DOT (p); /* . */
490 REQUIRE_DIGITS (p); /* C */
491 REQUIRE_DOT (p); /* . */
492 REQUIRE_DIGITS (p); /* D */
499 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
500 Originally I tried to make the check compliant with rfc2109, but
501 the sites deviated too often, so I had to fall back to "tail
502 matching", as defined by the original Netscape's cookie spec. */
505 check_domain_match (const char *cookie_domain, const char *host)
510 const psl_ctx_t *psl;
513 if (!(psl = psl_builtin()))
515 DEBUGP (("\nlibpsl not built with a public suffix list. "
516 "Falling back to simple heuristics.\n"));
520 is_acceptable = psl_is_cookie_domain_acceptable (psl, host, cookie_domain);
521 return true ? (is_acceptable == 1) : false;
526 /* For efficiency make some elementary checks first */
529 /* For the sake of efficiency, check for exact match first. */
530 if (0 == strcasecmp (cookie_domain, host))
535 /* HOST must match the tail of cookie_domain. */
536 if (!match_tail (host, cookie_domain, true))
539 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
540 make sure that somebody is not trying to set the cookie for a
541 subdomain shared by many entities. For example, "company.co.uk"
542 must not be allowed to set a cookie for ".co.uk". On the other
543 hand, "sso.redhat.de" should be able to set a cookie for
546 The only marginally sane way to handle this I can think of is to
547 reject on the basis of the length of the second-level domain name
548 (but when the top-level domain is unknown), with the assumption
549 that those of three or less characters could be reserved. For
552 .co.org -> works because the TLD is known
553 .co.uk -> doesn't work because "co" is only two chars long
554 .com.au -> doesn't work because "com" is only 3 chars long
555 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
556 .cnn.de -> doesn't work for the same reason (ugh!!)
557 .abcd.de -> works because "abcd" is 4 chars long
558 .img.cnn.de -> works because it's not trying to set the 2nd level domain
559 .cnn.co.uk -> works for the same reason
561 That should prevent misuse, while allowing reasonable usage. If
562 someone knows of a better way to handle this, please let me
565 const char *p = cookie_domain;
566 int dccount = 1; /* number of domain components */
567 int ldcl = 0; /* last domain component length */
568 int nldcl = 0; /* next to last domain component length */
571 /* Ignore leading period in this calculation. */
574 for (out = 0; !out; p++)
582 /* Empty domain component found -- the domain is invalid. */
584 if (*(p + 1) == '\0')
586 /* Tolerate trailing '.' by not treating the domain as
587 one ending with an empty domain component. */
609 int known_toplevel = false;
610 static const char *known_toplevel_domains[] = {
611 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
613 for (i = 0; i < countof (known_toplevel_domains); i++)
614 if (match_tail (cookie_domain, known_toplevel_domains[i], true))
616 known_toplevel = true;
619 if (!known_toplevel && nldcl <= 3)
626 /* Don't allow the host "foobar.com" to set a cookie for domain
628 if (*cookie_domain != '.')
630 int dlen = strlen (cookie_domain);
631 int hlen = strlen (host);
632 /* cookie host: hostname.foobar.com */
633 /* desired domain: bar.com */
634 /* '.' must be here in host-> ^ */
635 if (hlen > dlen && host[hlen - dlen - 1] != '.')
644 static int path_matches (const char *, const char *);
646 /* Check whether PATH begins with COOKIE_PATH. */
649 check_path_match (const char *cookie_path, const char *path)
651 return path_matches (path, cookie_path) != 0;
654 /* Prepend '/' to string S. S is copied to fresh stack-allocated
655 space and its value is modified to point to the new location. */
657 #define PREPEND_SLASH(s) do { \
658 char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \
660 strcpy (PS_newstr + 1, s); \
665 /* Process the HTTP `Set-Cookie' header. This results in storing the
666 cookie or discarding a matching one, or ignoring it completely, all
667 depending on the contents. */
670 cookie_handle_set_cookie (struct cookie_jar *jar,
671 const char *host, int port,
672 const char *path, const char *set_cookie)
674 struct cookie *cookie;
675 cookies_now = time (NULL);
677 /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
678 usage assumes /-prefixed paths. Until the rest of Wget is fixed,
679 simply prepend slash to PATH. */
680 PREPEND_SLASH (path);
682 cookie = parse_set_cookie (set_cookie, false);
686 /* Sanitize parts of cookie. */
690 cookie->domain = xstrdup (host);
691 cookie->domain_exact = 1;
692 /* Set the port, but only if it's non-default. */
693 if (port != 80 && port != 443)
698 if (!check_domain_match (cookie->domain, host))
700 logprintf (LOG_NOTQUIET,
701 _("Cookie coming from %s attempted to set domain to "),
702 quotearg_style (escape_quoting_style, host));
703 logprintf (LOG_NOTQUIET,
705 quotearg_style (escape_quoting_style, cookie->domain));
706 cookie->discard_requested = true;
712 /* The cookie doesn't set path: set it to the URL path, sans the
713 file part ("/dir/file" truncated to "/dir/"). */
714 char *trailing_slash = strrchr (path, '/');
716 cookie->path = strdupdelim (path, trailing_slash + 1);
718 /* no slash in the string -- can this even happen? */
719 cookie->path = xstrdup (path);
723 /* The cookie sets its own path; verify that it is legal. */
724 if (!check_path_match (cookie->path, path))
726 DEBUGP (("Attempt to fake the path: %s, %s\n",
727 cookie->path, path));
732 /* Now store the cookie, or discard an existing cookie, if
733 discarding was requested. */
735 if (cookie->discard_requested)
737 discard_matching_cookie (jar, cookie);
741 store_cookie (jar, cookie);
746 delete_cookie (cookie);
749 /* Support for sending out cookies in HTTP requests, based on
750 previously stored cookies. Entry point is
751 `build_cookies_request'. */
753 /* Return a count of how many times CHR occurs in STRING. */
756 count_char (const char *string, char chr)
760 for (p = string; *p; p++)
766 /* Find the cookie chains whose domains match HOST and store them to
769 A cookie chain is the head of a list of cookies that belong to a
770 host/domain. Given HOST "img.search.xemacs.org", this function
771 will return the chains for "img.search.xemacs.org",
772 "search.xemacs.org", and "xemacs.org" -- those of them that exist
775 DEST should be large enough to accept (in the worst case) as many
776 elements as there are domain components of HOST. */
779 find_chains_of_host (struct cookie_jar *jar, const char *host,
780 struct cookie *dest[])
785 /* Bail out quickly if there are no cookies in the jar. */
786 if (!hash_table_count (jar->chains))
789 if (numeric_address_p (host))
790 /* If host is an IP address, only check for the exact match. */
793 /* Otherwise, check all the subdomains except the top-level (last)
794 one. As a domain with N components has N-1 dots, the number of
795 passes equals the number of dots. */
796 passes = count_char (host, '.');
800 /* Find chains that match HOST, starting with exact match and
801 progressing to less specific domains. For instance, given HOST
802 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
803 srk.fer.hr's, then fer.hr's. */
806 struct cookie *chain = hash_table_get (jar->chains, host);
808 dest[dest_count++] = chain;
809 if (++passcnt >= passes)
811 host = strchr (host, '.') + 1;
817 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
821 path_matches (const char *full_path, const char *prefix)
823 int len = strlen (prefix);
825 if (0 != strncmp (full_path, prefix, len))
826 /* FULL_PATH doesn't begin with PREFIX. */
829 /* Length of PREFIX determines the quality of the match. */
833 /* Return true iff COOKIE matches the provided parameters of the URL
834 being downloaded: HOST, PORT, PATH, and SECFLAG.
836 If PATH_GOODNESS is non-NULL, store the "path goodness" value
837 there. That value is a measure of how closely COOKIE matches PATH,
838 used for ordering cookies. */
841 cookie_matches_url (const struct cookie *cookie,
842 const char *host, int port, const char *path,
843 bool secflag, int *path_goodness)
847 if (cookie_expired_p (cookie))
848 /* Ignore stale cookies. Don't bother unchaining the cookie at
849 this point -- Wget is a relatively short-lived application, and
850 stale cookies will not be saved by `save_cookies'. On the
851 other hand, this function should be as efficient as
855 if (cookie->secure && !secflag)
856 /* Don't transmit secure cookies over insecure connections. */
858 if (cookie->port != PORT_ANY && cookie->port != port)
861 /* If exact domain match is required, verify that cookie's domain is
862 equal to HOST. If not, assume success on the grounds of the
863 cookie's chain having been found by find_chains_of_host. */
864 if (cookie->domain_exact
865 && 0 != strcasecmp (host, cookie->domain))
868 pg = path_matches (path, cookie->path);
873 /* If the caller requested path_goodness, we return it. This is
874 an optimization, so that the caller doesn't need to call
875 path_matches() again. */
880 /* A structure that points to a cookie, along with the additional
881 information about the cookie's "goodness". This allows us to sort
882 the cookies when returning them to the server, as required by the
885 struct weighed_cookie {
886 struct cookie *cookie;
891 /* Comparator used for uniquifying the list. */
894 equality_comparator (const void *p1, const void *p2)
896 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
897 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
899 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
900 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
902 /* We only really care whether both name and value are equal. We
903 return them in this order only for consistency... */
904 return namecmp ? namecmp : valuecmp;
907 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
908 cookies with the same attr name and value. Whenever a duplicate
909 pair is found, one of the cookies is removed. */
912 eliminate_dups (struct weighed_cookie *outgoing, int count)
914 struct weighed_cookie *h; /* hare */
915 struct weighed_cookie *t; /* tortoise */
916 struct weighed_cookie *end = outgoing + count;
918 /* We deploy a simple uniquify algorithm: first sort the array
919 according to our sort criteria, then copy it to itself, comparing
920 each cookie to its neighbor and ignoring the duplicates. */
922 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
924 /* "Hare" runs through all the entries in the array, followed by
925 "tortoise". If a duplicate is found, the hare skips it.
926 Non-duplicate entries are copied to the tortoise ptr. */
928 for (h = t = outgoing; h < end; h++)
932 struct cookie *c0 = h[0].cookie;
933 struct cookie *c1 = h[1].cookie;
934 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
935 continue; /* ignore the duplicate */
938 /* If the hare has advanced past the tortoise (because of
939 previous dups), make sure the values get copied. Otherwise,
940 no copying is necessary. */
949 /* Comparator used for sorting by quality. */
952 goodness_comparator (const void *p1, const void *p2)
954 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
955 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
957 /* Subtractions take `wc2' as the first argument becauase we want a
958 sort in *decreasing* order of goodness. */
959 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
960 int pgdiff = wc2->path_goodness - wc1->path_goodness;
962 /* Sort by domain goodness; if these are the same, sort by path
963 goodness. (The sorting order isn't really specified; maybe it
964 should be the other way around.) */
965 return dgdiff ? dgdiff : pgdiff;
968 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
969 requests PATH from the server. The resulting string is allocated
970 with `malloc', and the caller is responsible for freeing it. If no
971 cookies pertain to this request, i.e. no cookie header should be
972 generated, NULL is returned. */
975 cookie_header (struct cookie_jar *jar, const char *host,
976 int port, const char *path, bool secflag)
978 struct cookie **chains;
981 struct cookie *cookie;
982 struct weighed_cookie *outgoing;
985 int result_size, pos;
986 PREPEND_SLASH (path); /* see cookie_handle_set_cookie */
988 /* First, find the cookie chains whose domains match HOST. */
990 /* Allocate room for find_chains_of_host to write to. The number of
991 chains can at most equal the number of subdomains, hence
992 1+<number of dots>. */
993 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
994 chain_count = find_chains_of_host (jar, host, chains);
996 /* No cookies for this host. */
1000 cookies_now = time (NULL);
1002 /* Now extract from the chains those cookies that match our host
1003 (for domain_exact cookies), port (for cookies with port other
1004 than PORT_ANY), etc. See matching_cookie for details. */
1006 /* Count the number of matching cookies. */
1008 for (i = 0; i < chain_count; i++)
1009 for (cookie = chains[i]; cookie; cookie = cookie->next)
1010 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1013 return NULL; /* no cookies matched */
1015 /* Allocate the array. */
1016 outgoing = alloca_array (struct weighed_cookie, count);
1018 /* Fill the array with all the matching cookies from the chains that
1021 for (i = 0; i < chain_count; i++)
1022 for (cookie = chains[i]; cookie; cookie = cookie->next)
1025 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1027 outgoing[ocnt].cookie = cookie;
1028 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1029 outgoing[ocnt].path_goodness = pg;
1032 assert (ocnt == count);
1034 /* Eliminate duplicate cookies; that is, those whose name and value
1036 count = eliminate_dups (outgoing, count);
1038 /* Sort the array so that best-matching domains come first, and
1039 that, within one domain, best-matching paths come first. */
1040 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1042 /* Count the space the name=value pairs will take. */
1044 for (i = 0; i < count; i++)
1046 struct cookie *c = outgoing[i].cookie;
1048 result_size += strlen (c->attr) + 1 + strlen (c->value);
1051 /* Allocate output buffer:
1052 name=value pairs -- result_size
1053 "; " separators -- (count - 1) * 2
1054 \0 terminator -- 1 */
1055 result_size = result_size + (count - 1) * 2 + 1;
1056 result = xmalloc (result_size);
1058 for (i = 0; i < count; i++)
1060 struct cookie *c = outgoing[i].cookie;
1061 int namlen = strlen (c->attr);
1062 int vallen = strlen (c->value);
1064 memcpy (result + pos, c->attr, namlen);
1066 result[pos++] = '=';
1067 memcpy (result + pos, c->value, vallen);
1071 result[pos++] = ';';
1072 result[pos++] = ' ';
1075 result[pos++] = '\0';
1076 assert (pos == result_size);
1080 /* Support for loading and saving cookies. The format used for
1081 loading and saving should be the format of the `cookies.txt' file
1082 used by Netscape and Mozilla, at least the Unix versions.
1083 (Apparently IE can export cookies in that format as well.) The
1084 format goes like this:
1086 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1088 DOMAIN -- cookie domain, optionally followed by :PORT
1089 DOMAIN-FLAG -- whether all hosts in the domain match
1091 SECURE-FLAG -- whether cookie requires secure connection
1092 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1093 ATTR-NAME -- name of the cookie attribute
1094 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1096 The fields are separated by TABs. All fields are mandatory, except
1097 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1098 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1099 whitespace only, and comment lines (beginning with # optionally
1100 preceded by whitespace) are ignored.
1102 Example line from cookies.txt (split in two lines for readability):
1104 .google.com TRUE / FALSE 2147368447 \
1105 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1109 /* If the region [B, E) ends with :<digits>, parse the number, return
1110 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1111 If port is not specified, return 0. */
1114 domain_port (const char *domain_b, const char *domain_e,
1115 const char **domain_e_ptr)
1119 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1122 for (p = colon + 1; p < domain_e && c_isdigit (*p); p++)
1123 port = 10 * port + (*p - '0');
1125 /* Garbage following port number. */
1127 *domain_e_ptr = colon;
1131 #define GET_WORD(p, b, e) do { \
1133 while (*p && *p != '\t') \
1136 if (b == e || !*p) \
1141 /* Load cookies from FILE. */
1144 cookie_jar_load (struct cookie_jar *jar, const char *file)
1149 FILE *fp = fopen (file, "r");
1152 logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1153 quote (file), strerror (errno));
1157 cookies_now = time (NULL);
1159 while (getline (&line, &bufsize, fp) > 0)
1161 struct cookie *cookie;
1167 char *domain_b = NULL, *domain_e = NULL;
1168 char *domflag_b = NULL, *domflag_e = NULL;
1169 char *path_b = NULL, *path_e = NULL;
1170 char *secure_b = NULL, *secure_e = NULL;
1171 char *expires_b = NULL, *expires_e = NULL;
1172 char *name_b = NULL, *name_e = NULL;
1173 char *value_b = NULL, *value_e = NULL;
1175 /* Skip leading white-space. */
1176 while (*p && c_isspace (*p))
1178 /* Ignore empty lines. */
1179 if (!*p || *p == '#')
1182 GET_WORD (p, domain_b, domain_e);
1183 GET_WORD (p, domflag_b, domflag_e);
1184 GET_WORD (p, path_b, path_e);
1185 GET_WORD (p, secure_b, secure_e);
1186 GET_WORD (p, expires_b, expires_e);
1187 GET_WORD (p, name_b, name_e);
1189 /* Don't use GET_WORD for value because it ends with newline,
1192 value_e = p + strlen (p);
1193 if (value_e > value_b && value_e[-1] == '\n')
1195 if (value_e > value_b && value_e[-1] == '\r')
1197 /* Empty values are legal (I think), so don't bother checking. */
1199 cookie = cookie_new ();
1201 cookie->attr = strdupdelim (name_b, name_e);
1202 cookie->value = strdupdelim (value_b, value_e);
1203 cookie->path = strdupdelim (path_b, path_e);
1204 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1206 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1207 value indicating if all machines within a given domain can
1208 access the variable. This value is set automatically by the
1209 browser, depending on the value set for the domain." */
1210 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1212 /* DOMAIN needs special treatment because we might need to
1213 extract the port. */
1214 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1216 cookie->port = port;
1218 if (*domain_b == '.')
1219 ++domain_b; /* remove leading dot internally */
1220 cookie->domain = strdupdelim (domain_b, domain_e);
1222 /* safe default in case EXPIRES field is garbled. */
1223 expiry = (double)cookies_now - 1;
1225 /* I don't like changing the line, but it's safe here. (line is
1228 sscanf (expires_b, "%lf", &expiry);
1232 /* EXPIRY can be 0 for session cookies saved because the
1233 user specified `--keep-session-cookies' in the past.
1234 They remain session cookies, and will be saved only if
1235 the user has specified `keep-session-cookies' again. */
1239 if (expiry < cookies_now)
1240 goto abort_cookie; /* ignore stale cookie. */
1241 cookie->expiry_time = expiry;
1242 cookie->permanent = 1;
1245 store_cookie (jar, cookie);
1251 delete_cookie (cookie);
1258 /* Save cookies, in format described above, to FILE. */
1261 cookie_jar_save (struct cookie_jar *jar, const char *file)
1264 hash_table_iterator iter;
1266 DEBUGP (("Saving cookies to %s.\n", file));
1268 cookies_now = time (NULL);
1270 fp = fopen (file, "w");
1273 logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1274 quote (file), strerror (errno));
1278 fputs ("# HTTP cookie file.\n", fp);
1279 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now));
1280 fputs ("# Edit at your own risk.\n\n", fp);
1282 for (hash_table_iterate (jar->chains, &iter);
1283 hash_table_iter_next (&iter);
1286 const char *domain = iter.key;
1287 struct cookie *cookie = iter.value;
1288 for (; cookie; cookie = cookie->next)
1290 if (!cookie->permanent && !opt.keep_session_cookies)
1292 if (cookie_expired_p (cookie))
1294 if (!cookie->domain_exact)
1297 if (cookie->port != PORT_ANY)
1298 fprintf (fp, ":%d", cookie->port);
1299 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1300 cookie->domain_exact ? "FALSE" : "TRUE",
1301 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1302 (double)cookie->expiry_time,
1303 cookie->attr, cookie->value);
1310 logprintf (LOG_NOTQUIET, _("Error writing to %s: %s\n"),
1311 quote (file), strerror (errno));
1312 if (fclose (fp) < 0)
1313 logprintf (LOG_NOTQUIET, _("Error closing %s: %s\n"),
1314 quote (file), strerror (errno));
1316 DEBUGP (("Done saving cookies.\n"));
1319 /* Clean up cookie-related data. */
1322 cookie_jar_delete (struct cookie_jar *jar)
1324 /* Iterate over chains (indexed by domain) and free them. */
1325 hash_table_iterator iter;
1326 for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); )
1328 struct cookie *chain = iter.value;
1330 /* Then all cookies in this chain. */
1333 struct cookie *next = chain->next;
1334 delete_cookie (chain);
1338 hash_table_destroy (jar->chains);
1342 /* Test cases. Currently this is only tests parse_set_cookies. To
1343 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1350 /* Tests expected to succeed: */
1353 const char *results[10];
1355 { "arg=value", {"arg", "value", NULL} },
1356 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1357 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1358 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1359 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1360 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1361 { "arg=", {"arg", "", NULL} },
1362 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1363 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1366 /* Tests expected to fail: */
1367 static char *tests_fail[] = {
1369 "arg=\"unterminated",
1371 "arg1=;=another-empty-name",
1375 for (i = 0; i < countof (tests_succ); i++)
1378 const char *data = tests_succ[i].data;
1379 const char **expected = tests_succ[i].results;
1382 c = parse_set_cookie (data, true);
1385 printf ("NULL cookie returned for valid data: %s\n", data);
1389 /* Test whether extract_param handles these cases correctly. */
1391 param_token name, value;
1392 const char *ptr = data;
1394 while (extract_param (&ptr, &name, &value, ';', NULL))
1396 char *n = strdupdelim (name.b, name.e);
1397 char *v = strdupdelim (value.b, value.e);
1400 printf ("Too many parameters for '%s'\n", data);
1403 if (0 != strcmp (expected[j], n))
1404 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1405 j / 2 + 1, data, expected[j], n);
1406 if (0 != strcmp (expected[j + 1], v))
1407 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1408 j / 2 + 1, data, expected[j + 1], v);
1414 printf ("Too few parameters for '%s'\n", data);
1418 for (i = 0; i < countof (tests_fail); i++)
1421 char *data = tests_fail[i];
1422 c = parse_set_cookie (data, true);
1424 printf ("Failed to report error on invalid data: %s\n", data);
1427 #endif /* TEST_COOKIES */