1 /* Support for cookies.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
3 2010, 2011 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 Additional permission under GNU GPL version 3 section 7
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
31 /* Written by Hrvoje Niksic. Parts are loosely inspired by the
32 cookie patch submitted by Tomasz Wegrzanowski.
34 This implements the client-side cookie support, as specified
35 (loosely) by Netscape's "preliminary specification", currently
38 http://wp.netscape.com/newsref/std/cookie_spec.html
40 rfc2109 is not supported because of its incompatibilities with the
41 above widely-used specification. rfc2965 is entirely ignored,
42 since popular client software doesn't implement it, and even the
43 sites that do send Set-Cookie2 also emit Set-Cookie for
57 #include "http.h" /* for http_atotm */
59 /* Declarations of `struct cookie' and the most basic functions. */
61 /* Cookie jar serves as cookie storage and a means of retrieving
62 cookies efficiently. All cookies with the same domain are stored
63 in a linked list called "chain". A cookie chain can be reached by
64 looking up the domain in the cookie jar's chains_by_domain table.
66 For example, to reach all the cookies under google.com, one must
67 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
68 course, when sending a cookie to `www.google.com', one must search
69 for cookies that belong to either `www.google.com' or `google.com'
70 -- but the point is that the code doesn't need to go through *all*
74 /* Cookie chains indexed by domain. */
75 struct hash_table *chains;
77 int cookie_count; /* number of cookies in the jar. */
80 /* Value set by entry point functions, so that the low-level
81 routines don't need to call time() all the time. */
82 static time_t cookies_now;
87 struct cookie_jar *jar = xnew (struct cookie_jar);
88 jar->chains = make_nocase_string_hash_table (0);
89 jar->cookie_count = 0;
94 char *domain; /* domain of the cookie */
95 int port; /* port number */
96 char *path; /* path prefix of the cookie */
98 unsigned discard_requested :1; /* whether cookie was created to
99 request discarding another
102 unsigned secure :1; /* whether cookie should be
103 transmitted over non-https
105 unsigned domain_exact :1; /* whether DOMAIN must match as a
108 unsigned permanent :1; /* whether the cookie should outlive
110 time_t expiry_time; /* time when the cookie expires, 0
111 means undetermined. */
113 char *attr; /* cookie attribute name */
114 char *value; /* cookie attribute value */
116 struct cookie *next; /* used for chaining of cookies in the
120 #define PORT_ANY (-1)
122 /* Allocate and return a new, empty cookie structure. */
124 static struct cookie *
127 struct cookie *cookie = xnew0 (struct cookie);
129 /* Both cookie->permanent and cookie->expiry_time are now 0. This
130 means that the cookie doesn't expire, but is only valid for this
131 session (i.e. not written out to disk). */
133 cookie->port = PORT_ANY;
137 /* Non-zero if the cookie has expired. Assumes cookies_now has been
138 set by one of the entry point functions. */
141 cookie_expired_p (const struct cookie *c)
143 return c->expiry_time != 0 && c->expiry_time < cookies_now;
146 /* Deallocate COOKIE and its components. */
149 delete_cookie (struct cookie *cookie)
151 xfree_null (cookie->domain);
152 xfree_null (cookie->path);
153 xfree_null (cookie->attr);
154 xfree_null (cookie->value);
158 /* Functions for storing cookies.
160 All cookies can be reached beginning with jar->chains. The key in
161 that table is the domain name, and the value is a linked list of
162 all cookies from that domain. Every new cookie is placed on the
165 /* Find and return a cookie in JAR whose domain, path, and attribute
166 name correspond to COOKIE. If found, PREVPTR will point to the
167 location of the cookie previous in chain, or NULL if the found
168 cookie is the head of a chain.
170 If no matching cookie is found, return NULL. */
172 static struct cookie *
173 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
174 struct cookie **prevptr)
176 struct cookie *chain, *prev;
178 chain = hash_table_get (jar->chains, cookie->domain);
183 for (; chain; prev = chain, chain = chain->next)
184 if (0 == strcmp (cookie->path, chain->path)
185 && 0 == strcmp (cookie->attr, chain->attr)
186 && cookie->port == chain->port)
197 /* Store COOKIE to the jar.
199 This is done by placing COOKIE at the head of its chain. However,
200 if COOKIE matches a cookie already in memory, as determined by
201 find_matching_cookie, the old cookie is unlinked and destroyed.
203 The key of each chain's hash table entry is allocated only the
204 first time; next hash_table_put's reuse the same key. */
207 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
209 struct cookie *chain_head;
212 if (hash_table_get_pair (jar->chains, cookie->domain,
213 &chain_key, &chain_head))
215 /* A chain of cookies in this domain already exists. Check for
216 duplicates -- if an extant cookie exactly matches our domain,
217 port, path, and name, replace it. */
219 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
223 /* Remove VICTIM from the chain. COOKIE will be placed at
227 prev->next = victim->next;
228 cookie->next = chain_head;
232 /* prev is NULL; apparently VICTIM was at the head of
233 the chain. This place will be taken by COOKIE, so
234 all we need to do is: */
235 cookie->next = victim->next;
237 delete_cookie (victim);
239 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
242 cookie->next = chain_head;
246 /* We are now creating the chain. Use a copy of cookie->domain
247 as the key for the life-time of the chain. Using
248 cookie->domain would be unsafe because the life-time of the
249 chain may exceed the life-time of the cookie. (Cookies may
250 be deleted from the chain by this very function.) */
252 chain_key = xstrdup (cookie->domain);
255 hash_table_put (jar->chains, chain_key, cookie);
260 time_t exptime = cookie->expiry_time;
261 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
262 cookie->domain, cookie->port,
263 cookie->port == PORT_ANY ? " (ANY)" : "",
265 cookie->permanent ? "permanent" : "session",
266 cookie->secure ? "secure" : "insecure",
267 cookie->expiry_time ? datetime_str (exptime) : "none",
268 cookie->attr, cookie->value));
272 /* Discard a cookie matching COOKIE's domain, port, path, and
273 attribute name. This gets called when we encounter a cookie whose
274 expiry date is in the past, or whose max-age is set to 0. The
275 former corresponds to netscape cookie spec, while the latter is
276 specified by rfc2109. */
279 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
281 struct cookie *prev, *victim;
283 if (!hash_table_count (jar->chains))
284 /* No elements == nothing to discard. */
287 victim = find_matching_cookie (jar, cookie, &prev);
291 /* Simply unchain the victim. */
292 prev->next = victim->next;
295 /* VICTIM was head of its chain. We need to place a new
296 cookie at the head. */
297 char *chain_key = NULL;
300 res = hash_table_get_pair (jar->chains, victim->domain,
305 /* VICTIM was the only cookie in the chain. Destroy the
306 chain and deallocate the chain key. */
307 hash_table_remove (jar->chains, victim->domain);
311 hash_table_put (jar->chains, chain_key, victim->next);
313 delete_cookie (victim);
314 DEBUGP (("Discarded old cookie.\n"));
318 /* Functions for parsing the `Set-Cookie' header, and creating new
319 cookies from the wire. */
321 #define TOKEN_IS(token, string_literal) \
322 BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal)
324 #define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e)
326 /* Parse the contents of the `Set-Cookie' header. The header looks
329 name1=value1; name2=value2; ...
331 Trailing semicolon is optional; spaces are allowed between all
332 tokens. Additionally, values may be quoted.
334 A new cookie is returned upon success, NULL otherwise.
336 The first name-value pair will be used to set the cookie's
337 attribute name and value. Subsequent parameters will be checked
338 against field names such as `domain', `path', etc. Recognized
339 fields will be parsed and the corresponding members of COOKIE
342 static struct cookie *
343 parse_set_cookie (const char *set_cookie, bool silent)
345 const char *ptr = set_cookie;
346 struct cookie *cookie = cookie_new ();
347 param_token name, value;
349 if (!extract_param (&ptr, &name, &value, ';'))
354 /* If the value is quoted, do not modify it. */
355 if (*(value.b - 1) == '"')
360 cookie->attr = strdupdelim (name.b, name.e);
361 cookie->value = strdupdelim (value.b, value.e);
363 while (extract_param (&ptr, &name, &value, ';'))
365 if (TOKEN_IS (name, "domain"))
367 if (!TOKEN_NON_EMPTY (value))
369 xfree_null (cookie->domain);
370 /* Strictly speaking, we should set cookie->domain_exact if the
371 domain doesn't begin with a dot. But many sites set the
372 domain to "foo.com" and expect "subhost.foo.com" to get the
373 cookie, and it apparently works in browsers. */
376 cookie->domain = strdupdelim (value.b, value.e);
378 else if (TOKEN_IS (name, "path"))
380 if (!TOKEN_NON_EMPTY (value))
382 xfree_null (cookie->path);
383 cookie->path = strdupdelim (value.b, value.e);
385 else if (TOKEN_IS (name, "expires"))
390 if (!TOKEN_NON_EMPTY (value))
392 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
394 /* Check if expiration spec is valid.
395 If not, assume default (cookie doesn't expire, but valid only for
397 expires = http_atotm (value_copy);
398 if (expires != (time_t) -1)
400 cookie->permanent = 1;
401 cookie->expiry_time = expires;
402 /* According to netscape's specification, expiry time in
403 the past means that discarding of a matching cookie
405 if (cookie->expiry_time < cookies_now)
406 cookie->discard_requested = 1;
409 else if (TOKEN_IS (name, "max-age"))
414 if (!TOKEN_NON_EMPTY (value))
416 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
418 sscanf (value_copy, "%lf", &maxage);
420 /* something went wrong. */
422 cookie->permanent = 1;
423 cookie->expiry_time = cookies_now + maxage;
425 /* According to rfc2109, a cookie with max-age of 0 means that
426 discarding of a matching cookie is requested. */
428 cookie->discard_requested = 1;
430 else if (TOKEN_IS (name, "secure"))
432 /* ignore value completely */
435 /* else: Ignore unrecognized attribute. */
438 /* extract_param has encountered a syntax error */
441 /* The cookie has been successfully constructed; return it. */
446 logprintf (LOG_NOTQUIET,
447 _("Syntax error in Set-Cookie: %s at position %d.\n"),
448 quotearg_style (escape_quoting_style, set_cookie),
449 (int) (ptr - set_cookie));
450 delete_cookie (cookie);
455 #undef TOKEN_NON_EMPTY
457 /* Sanity checks. These are important, otherwise it is possible for
458 mailcious attackers to destroy important cookie information and/or
459 violate your privacy. */
462 #define REQUIRE_DIGITS(p) do { \
463 if (!c_isdigit (*p)) \
465 for (++p; c_isdigit (*p); p++) \
469 #define REQUIRE_DOT(p) do { \
474 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
476 We don't want to call network functions like inet_addr() because
477 all we need is a check, preferrably one that is small, fast, and
481 numeric_address_p (const char *addr)
483 const char *p = addr;
485 REQUIRE_DIGITS (p); /* A */
486 REQUIRE_DOT (p); /* . */
487 REQUIRE_DIGITS (p); /* B */
488 REQUIRE_DOT (p); /* . */
489 REQUIRE_DIGITS (p); /* C */
490 REQUIRE_DOT (p); /* . */
491 REQUIRE_DIGITS (p); /* D */
498 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
499 Originally I tried to make the check compliant with rfc2109, but
500 the sites deviated too often, so I had to fall back to "tail
501 matching", as defined by the original Netscape's cookie spec. */
504 check_domain_match (const char *cookie_domain, const char *host)
508 /* Numeric address requires exact match. It also requires HOST to
510 if (numeric_address_p (cookie_domain))
511 return 0 == strcmp (cookie_domain, host);
515 /* For the sake of efficiency, check for exact match first. */
516 if (0 == strcasecmp (cookie_domain, host))
521 /* HOST must match the tail of cookie_domain. */
522 if (!match_tail (host, cookie_domain, true))
525 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
526 make sure that somebody is not trying to set the cookie for a
527 subdomain shared by many entities. For example, "company.co.uk"
528 must not be allowed to set a cookie for ".co.uk". On the other
529 hand, "sso.redhat.de" should be able to set a cookie for
532 The only marginally sane way to handle this I can think of is to
533 reject on the basis of the length of the second-level domain name
534 (but when the top-level domain is unknown), with the assumption
535 that those of three or less characters could be reserved. For
538 .co.org -> works because the TLD is known
539 .co.uk -> doesn't work because "co" is only two chars long
540 .com.au -> doesn't work because "com" is only 3 chars long
541 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
542 .cnn.de -> doesn't work for the same reason (ugh!!)
543 .abcd.de -> works because "abcd" is 4 chars long
544 .img.cnn.de -> works because it's not trying to set the 2nd level domain
545 .cnn.co.uk -> works for the same reason
547 That should prevent misuse, while allowing reasonable usage. If
548 someone knows of a better way to handle this, please let me
551 const char *p = cookie_domain;
552 int dccount = 1; /* number of domain components */
553 int ldcl = 0; /* last domain component length */
554 int nldcl = 0; /* next to last domain component length */
557 /* Ignore leading period in this calculation. */
560 for (out = 0; !out; p++)
568 /* Empty domain component found -- the domain is invalid. */
570 if (*(p + 1) == '\0')
572 /* Tolerate trailing '.' by not treating the domain as
573 one ending with an empty domain component. */
595 int known_toplevel = false;
596 static const char *known_toplevel_domains[] = {
597 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
599 for (i = 0; i < countof (known_toplevel_domains); i++)
600 if (match_tail (cookie_domain, known_toplevel_domains[i], true))
602 known_toplevel = true;
605 if (!known_toplevel && nldcl <= 3)
612 /* Don't allow the host "foobar.com" to set a cookie for domain
614 if (*cookie_domain != '.')
616 int dlen = strlen (cookie_domain);
617 int hlen = strlen (host);
618 /* cookie host: hostname.foobar.com */
619 /* desired domain: bar.com */
620 /* '.' must be here in host-> ^ */
621 if (hlen > dlen && host[hlen - dlen - 1] != '.')
630 static int path_matches (const char *, const char *);
632 /* Check whether PATH begins with COOKIE_PATH. */
635 check_path_match (const char *cookie_path, const char *path)
637 return path_matches (path, cookie_path) != 0;
640 /* Prepend '/' to string S. S is copied to fresh stack-allocated
641 space and its value is modified to point to the new location. */
643 #define PREPEND_SLASH(s) do { \
644 char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \
646 strcpy (PS_newstr + 1, s); \
651 /* Process the HTTP `Set-Cookie' header. This results in storing the
652 cookie or discarding a matching one, or ignoring it completely, all
653 depending on the contents. */
656 cookie_handle_set_cookie (struct cookie_jar *jar,
657 const char *host, int port,
658 const char *path, const char *set_cookie)
660 struct cookie *cookie;
661 cookies_now = time (NULL);
663 /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
664 usage assumes /-prefixed paths. Until the rest of Wget is fixed,
665 simply prepend slash to PATH. */
666 PREPEND_SLASH (path);
668 cookie = parse_set_cookie (set_cookie, false);
672 /* Sanitize parts of cookie. */
677 /* If the domain was not provided, we use the one we're talking
678 to, and set exact match. */
679 cookie->domain = xstrdup (host);
680 cookie->domain_exact = 1;
681 /* Set the port, but only if it's non-default. */
682 if (port != 80 && port != 443)
687 if (!check_domain_match (cookie->domain, host))
689 logprintf (LOG_NOTQUIET,
690 _("Cookie coming from %s attempted to set domain to %s\n"),
691 quotearg_style (escape_quoting_style, host),
692 quotearg_style (escape_quoting_style, cookie->domain));
693 xfree (cookie->domain);
700 /* The cookie doesn't set path: set it to the URL path, sans the
701 file part ("/dir/file" truncated to "/dir/"). */
702 char *trailing_slash = strrchr (path, '/');
704 cookie->path = strdupdelim (path, trailing_slash + 1);
706 /* no slash in the string -- can this even happen? */
707 cookie->path = xstrdup (path);
711 /* The cookie sets its own path; verify that it is legal. */
712 if (!check_path_match (cookie->path, path))
714 DEBUGP (("Attempt to fake the path: %s, %s\n",
715 cookie->path, path));
720 /* Now store the cookie, or discard an existing cookie, if
721 discarding was requested. */
723 if (cookie->discard_requested)
725 discard_matching_cookie (jar, cookie);
729 store_cookie (jar, cookie);
734 delete_cookie (cookie);
737 /* Support for sending out cookies in HTTP requests, based on
738 previously stored cookies. Entry point is
739 `build_cookies_request'. */
741 /* Return a count of how many times CHR occurs in STRING. */
744 count_char (const char *string, char chr)
748 for (p = string; *p; p++)
754 /* Find the cookie chains whose domains match HOST and store them to
757 A cookie chain is the head of a list of cookies that belong to a
758 host/domain. Given HOST "img.search.xemacs.org", this function
759 will return the chains for "img.search.xemacs.org",
760 "search.xemacs.org", and "xemacs.org" -- those of them that exist
763 DEST should be large enough to accept (in the worst case) as many
764 elements as there are domain components of HOST. */
767 find_chains_of_host (struct cookie_jar *jar, const char *host,
768 struct cookie *dest[])
773 /* Bail out quickly if there are no cookies in the jar. */
774 if (!hash_table_count (jar->chains))
777 if (numeric_address_p (host))
778 /* If host is an IP address, only check for the exact match. */
781 /* Otherwise, check all the subdomains except the top-level (last)
782 one. As a domain with N components has N-1 dots, the number of
783 passes equals the number of dots. */
784 passes = count_char (host, '.');
788 /* Find chains that match HOST, starting with exact match and
789 progressing to less specific domains. For instance, given HOST
790 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
791 srk.fer.hr's, then fer.hr's. */
794 struct cookie *chain = hash_table_get (jar->chains, host);
796 dest[dest_count++] = chain;
797 if (++passcnt >= passes)
799 host = strchr (host, '.') + 1;
805 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
809 path_matches (const char *full_path, const char *prefix)
811 int len = strlen (prefix);
813 if (0 != strncmp (full_path, prefix, len))
814 /* FULL_PATH doesn't begin with PREFIX. */
817 /* Length of PREFIX determines the quality of the match. */
821 /* Return true iff COOKIE matches the provided parameters of the URL
822 being downloaded: HOST, PORT, PATH, and SECFLAG.
824 If PATH_GOODNESS is non-NULL, store the "path goodness" value
825 there. That value is a measure of how closely COOKIE matches PATH,
826 used for ordering cookies. */
829 cookie_matches_url (const struct cookie *cookie,
830 const char *host, int port, const char *path,
831 bool secflag, int *path_goodness)
835 if (cookie_expired_p (cookie))
836 /* Ignore stale cookies. Don't bother unchaining the cookie at
837 this point -- Wget is a relatively short-lived application, and
838 stale cookies will not be saved by `save_cookies'. On the
839 other hand, this function should be as efficient as
843 if (cookie->secure && !secflag)
844 /* Don't transmit secure cookies over insecure connections. */
846 if (cookie->port != PORT_ANY && cookie->port != port)
849 /* If exact domain match is required, verify that cookie's domain is
850 equal to HOST. If not, assume success on the grounds of the
851 cookie's chain having been found by find_chains_of_host. */
852 if (cookie->domain_exact
853 && 0 != strcasecmp (host, cookie->domain))
856 pg = path_matches (path, cookie->path);
861 /* If the caller requested path_goodness, we return it. This is
862 an optimization, so that the caller doesn't need to call
863 path_matches() again. */
868 /* A structure that points to a cookie, along with the additional
869 information about the cookie's "goodness". This allows us to sort
870 the cookies when returning them to the server, as required by the
873 struct weighed_cookie {
874 struct cookie *cookie;
879 /* Comparator used for uniquifying the list. */
882 equality_comparator (const void *p1, const void *p2)
884 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
885 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
887 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
888 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
890 /* We only really care whether both name and value are equal. We
891 return them in this order only for consistency... */
892 return namecmp ? namecmp : valuecmp;
895 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
896 cookies with the same attr name and value. Whenever a duplicate
897 pair is found, one of the cookies is removed. */
900 eliminate_dups (struct weighed_cookie *outgoing, int count)
902 struct weighed_cookie *h; /* hare */
903 struct weighed_cookie *t; /* tortoise */
904 struct weighed_cookie *end = outgoing + count;
906 /* We deploy a simple uniquify algorithm: first sort the array
907 according to our sort criteria, then copy it to itself, comparing
908 each cookie to its neighbor and ignoring the duplicates. */
910 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
912 /* "Hare" runs through all the entries in the array, followed by
913 "tortoise". If a duplicate is found, the hare skips it.
914 Non-duplicate entries are copied to the tortoise ptr. */
916 for (h = t = outgoing; h < end; h++)
920 struct cookie *c0 = h[0].cookie;
921 struct cookie *c1 = h[1].cookie;
922 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
923 continue; /* ignore the duplicate */
926 /* If the hare has advanced past the tortoise (because of
927 previous dups), make sure the values get copied. Otherwise,
928 no copying is necessary. */
937 /* Comparator used for sorting by quality. */
940 goodness_comparator (const void *p1, const void *p2)
942 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
943 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
945 /* Subtractions take `wc2' as the first argument becauase we want a
946 sort in *decreasing* order of goodness. */
947 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
948 int pgdiff = wc2->path_goodness - wc1->path_goodness;
950 /* Sort by domain goodness; if these are the same, sort by path
951 goodness. (The sorting order isn't really specified; maybe it
952 should be the other way around.) */
953 return dgdiff ? dgdiff : pgdiff;
956 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
957 requests PATH from the server. The resulting string is allocated
958 with `malloc', and the caller is responsible for freeing it. If no
959 cookies pertain to this request, i.e. no cookie header should be
960 generated, NULL is returned. */
963 cookie_header (struct cookie_jar *jar, const char *host,
964 int port, const char *path, bool secflag)
966 struct cookie **chains;
969 struct cookie *cookie;
970 struct weighed_cookie *outgoing;
973 int result_size, pos;
974 PREPEND_SLASH (path); /* see cookie_handle_set_cookie */
976 /* First, find the cookie chains whose domains match HOST. */
978 /* Allocate room for find_chains_of_host to write to. The number of
979 chains can at most equal the number of subdomains, hence
980 1+<number of dots>. */
981 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
982 chain_count = find_chains_of_host (jar, host, chains);
984 /* No cookies for this host. */
988 cookies_now = time (NULL);
990 /* Now extract from the chains those cookies that match our host
991 (for domain_exact cookies), port (for cookies with port other
992 than PORT_ANY), etc. See matching_cookie for details. */
994 /* Count the number of matching cookies. */
996 for (i = 0; i < chain_count; i++)
997 for (cookie = chains[i]; cookie; cookie = cookie->next)
998 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1001 return NULL; /* no cookies matched */
1003 /* Allocate the array. */
1004 outgoing = alloca_array (struct weighed_cookie, count);
1006 /* Fill the array with all the matching cookies from the chains that
1009 for (i = 0; i < chain_count; i++)
1010 for (cookie = chains[i]; cookie; cookie = cookie->next)
1013 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1015 outgoing[ocnt].cookie = cookie;
1016 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1017 outgoing[ocnt].path_goodness = pg;
1020 assert (ocnt == count);
1022 /* Eliminate duplicate cookies; that is, those whose name and value
1024 count = eliminate_dups (outgoing, count);
1026 /* Sort the array so that best-matching domains come first, and
1027 that, within one domain, best-matching paths come first. */
1028 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1030 /* Count the space the name=value pairs will take. */
1032 for (i = 0; i < count; i++)
1034 struct cookie *c = outgoing[i].cookie;
1036 result_size += strlen (c->attr) + 1 + strlen (c->value);
1039 /* Allocate output buffer:
1040 name=value pairs -- result_size
1041 "; " separators -- (count - 1) * 2
1042 \0 terminator -- 1 */
1043 result_size = result_size + (count - 1) * 2 + 1;
1044 result = xmalloc (result_size);
1046 for (i = 0; i < count; i++)
1048 struct cookie *c = outgoing[i].cookie;
1049 int namlen = strlen (c->attr);
1050 int vallen = strlen (c->value);
1052 memcpy (result + pos, c->attr, namlen);
1054 result[pos++] = '=';
1055 memcpy (result + pos, c->value, vallen);
1059 result[pos++] = ';';
1060 result[pos++] = ' ';
1063 result[pos++] = '\0';
1064 assert (pos == result_size);
1068 /* Support for loading and saving cookies. The format used for
1069 loading and saving should be the format of the `cookies.txt' file
1070 used by Netscape and Mozilla, at least the Unix versions.
1071 (Apparently IE can export cookies in that format as well.) The
1072 format goes like this:
1074 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1076 DOMAIN -- cookie domain, optionally followed by :PORT
1077 DOMAIN-FLAG -- whether all hosts in the domain match
1079 SECURE-FLAG -- whether cookie requires secure connection
1080 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1081 ATTR-NAME -- name of the cookie attribute
1082 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1084 The fields are separated by TABs. All fields are mandatory, except
1085 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1086 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1087 whitespace only, and comment lines (beginning with # optionally
1088 preceded by whitespace) are ignored.
1090 Example line from cookies.txt (split in two lines for readability):
1092 .google.com TRUE / FALSE 2147368447 \
1093 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1097 /* If the region [B, E) ends with :<digits>, parse the number, return
1098 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1099 If port is not specified, return 0. */
1102 domain_port (const char *domain_b, const char *domain_e,
1103 const char **domain_e_ptr)
1107 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1110 for (p = colon + 1; p < domain_e && c_isdigit (*p); p++)
1111 port = 10 * port + (*p - '0');
1113 /* Garbage following port number. */
1115 *domain_e_ptr = colon;
1119 #define GET_WORD(p, b, e) do { \
1121 while (*p && *p != '\t') \
1124 if (b == e || !*p) \
1129 /* Load cookies from FILE. */
1132 cookie_jar_load (struct cookie_jar *jar, const char *file)
1135 FILE *fp = fopen (file, "r");
1138 logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1139 quote (file), strerror (errno));
1142 cookies_now = time (NULL);
1144 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1146 struct cookie *cookie;
1152 char *domain_b = NULL, *domain_e = NULL;
1153 char *domflag_b = NULL, *domflag_e = NULL;
1154 char *path_b = NULL, *path_e = NULL;
1155 char *secure_b = NULL, *secure_e = NULL;
1156 char *expires_b = NULL, *expires_e = NULL;
1157 char *name_b = NULL, *name_e = NULL;
1158 char *value_b = NULL, *value_e = NULL;
1160 /* Skip leading white-space. */
1161 while (*p && c_isspace (*p))
1163 /* Ignore empty lines. */
1164 if (!*p || *p == '#')
1167 GET_WORD (p, domain_b, domain_e);
1168 GET_WORD (p, domflag_b, domflag_e);
1169 GET_WORD (p, path_b, path_e);
1170 GET_WORD (p, secure_b, secure_e);
1171 GET_WORD (p, expires_b, expires_e);
1172 GET_WORD (p, name_b, name_e);
1174 /* Don't use GET_WORD for value because it ends with newline,
1177 value_e = p + strlen (p);
1178 if (value_e > value_b && value_e[-1] == '\n')
1180 if (value_e > value_b && value_e[-1] == '\r')
1182 /* Empty values are legal (I think), so don't bother checking. */
1184 cookie = cookie_new ();
1186 cookie->attr = strdupdelim (name_b, name_e);
1187 cookie->value = strdupdelim (value_b, value_e);
1188 cookie->path = strdupdelim (path_b, path_e);
1189 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1191 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1192 value indicating if all machines within a given domain can
1193 access the variable. This value is set automatically by the
1194 browser, depending on the value set for the domain." */
1195 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1197 /* DOMAIN needs special treatment because we might need to
1198 extract the port. */
1199 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1201 cookie->port = port;
1203 if (*domain_b == '.')
1204 ++domain_b; /* remove leading dot internally */
1205 cookie->domain = strdupdelim (domain_b, domain_e);
1207 /* safe default in case EXPIRES field is garbled. */
1208 expiry = (double)cookies_now - 1;
1210 /* I don't like changing the line, but it's safe here. (line is
1213 sscanf (expires_b, "%lf", &expiry);
1217 /* EXPIRY can be 0 for session cookies saved because the
1218 user specified `--keep-session-cookies' in the past.
1219 They remain session cookies, and will be saved only if
1220 the user has specified `keep-session-cookies' again. */
1224 if (expiry < cookies_now)
1225 goto abort_cookie; /* ignore stale cookie. */
1226 cookie->expiry_time = expiry;
1227 cookie->permanent = 1;
1230 store_cookie (jar, cookie);
1236 delete_cookie (cookie);
1241 /* Save cookies, in format described above, to FILE. */
1244 cookie_jar_save (struct cookie_jar *jar, const char *file)
1247 hash_table_iterator iter;
1249 DEBUGP (("Saving cookies to %s.\n", file));
1251 cookies_now = time (NULL);
1253 fp = fopen (file, "w");
1256 logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1257 quote (file), strerror (errno));
1261 fputs ("# HTTP cookie file.\n", fp);
1262 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now));
1263 fputs ("# Edit at your own risk.\n\n", fp);
1265 for (hash_table_iterate (jar->chains, &iter);
1266 hash_table_iter_next (&iter);
1269 const char *domain = iter.key;
1270 struct cookie *cookie = iter.value;
1271 for (; cookie; cookie = cookie->next)
1273 if (!cookie->permanent && !opt.keep_session_cookies)
1275 if (cookie_expired_p (cookie))
1277 if (!cookie->domain_exact)
1280 if (cookie->port != PORT_ANY)
1281 fprintf (fp, ":%d", cookie->port);
1282 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1283 cookie->domain_exact ? "FALSE" : "TRUE",
1284 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1285 (double)cookie->expiry_time,
1286 cookie->attr, cookie->value);
1293 logprintf (LOG_NOTQUIET, _("Error writing to %s: %s\n"),
1294 quote (file), strerror (errno));
1295 if (fclose (fp) < 0)
1296 logprintf (LOG_NOTQUIET, _("Error closing %s: %s\n"),
1297 quote (file), strerror (errno));
1299 DEBUGP (("Done saving cookies.\n"));
1302 /* Clean up cookie-related data. */
1305 cookie_jar_delete (struct cookie_jar *jar)
1307 /* Iterate over chains (indexed by domain) and free them. */
1308 hash_table_iterator iter;
1309 for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); )
1311 struct cookie *chain = iter.value;
1313 /* Then all cookies in this chain. */
1316 struct cookie *next = chain->next;
1317 delete_cookie (chain);
1321 hash_table_destroy (jar->chains);
1325 /* Test cases. Currently this is only tests parse_set_cookies. To
1326 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1333 /* Tests expected to succeed: */
1336 const char *results[10];
1338 { "arg=value", {"arg", "value", NULL} },
1339 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1340 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1341 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1342 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1343 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1344 { "arg=", {"arg", "", NULL} },
1345 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1346 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1349 /* Tests expected to fail: */
1350 static char *tests_fail[] = {
1352 "arg=\"unterminated",
1354 "arg1=;=another-empty-name",
1358 for (i = 0; i < countof (tests_succ); i++)
1361 const char *data = tests_succ[i].data;
1362 const char **expected = tests_succ[i].results;
1365 c = parse_set_cookie (data, true);
1368 printf ("NULL cookie returned for valid data: %s\n", data);
1372 /* Test whether extract_param handles these cases correctly. */
1374 param_token name, value;
1375 const char *ptr = data;
1377 while (extract_param (&ptr, &name, &value, ';'))
1379 char *n = strdupdelim (name.b, name.e);
1380 char *v = strdupdelim (value.b, value.e);
1383 printf ("Too many parameters for '%s'\n", data);
1386 if (0 != strcmp (expected[j], n))
1387 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1388 j / 2 + 1, data, expected[j], n);
1389 if (0 != strcmp (expected[j + 1], v))
1390 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1391 j / 2 + 1, data, expected[j + 1], v);
1397 printf ("Too few parameters for '%s'\n", data);
1401 for (i = 0; i < countof (tests_fail); i++)
1404 char *data = tests_fail[i];
1405 c = parse_set_cookie (data, true);
1407 printf ("Failed to report error on invalid data: %s\n", data);
1410 #endif /* TEST_COOKIES */