1 /* Support for cookies.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
3 2010, 2011 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 Additional permission under GNU GPL version 3 section 7
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
31 /* Written by Hrvoje Niksic. Parts are loosely inspired by the
32 cookie patch submitted by Tomasz Wegrzanowski.
34 This implements the client-side cookie support, as specified
35 (loosely) by Netscape's "preliminary specification", currently
38 http://wp.netscape.com/newsref/std/cookie_spec.html
40 rfc2109 is not supported because of its incompatibilities with the
41 above widely-used specification. rfc2965 is entirely ignored,
42 since popular client software doesn't implement it, and even the
43 sites that do send Set-Cookie2 also emit Set-Cookie for
57 #include "http.h" /* for http_atotm */
59 /* Declarations of `struct cookie' and the most basic functions. */
61 /* Cookie jar serves as cookie storage and a means of retrieving
62 cookies efficiently. All cookies with the same domain are stored
63 in a linked list called "chain". A cookie chain can be reached by
64 looking up the domain in the cookie jar's chains_by_domain table.
66 For example, to reach all the cookies under google.com, one must
67 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
68 course, when sending a cookie to `www.google.com', one must search
69 for cookies that belong to either `www.google.com' or `google.com'
70 -- but the point is that the code doesn't need to go through *all*
74 /* Cookie chains indexed by domain. */
75 struct hash_table *chains;
77 int cookie_count; /* number of cookies in the jar. */
80 /* Value set by entry point functions, so that the low-level
81 routines don't need to call time() all the time. */
82 static time_t cookies_now;
87 struct cookie_jar *jar = xnew (struct cookie_jar);
88 jar->chains = make_nocase_string_hash_table (0);
89 jar->cookie_count = 0;
94 char *domain; /* domain of the cookie */
95 int port; /* port number */
96 char *path; /* path prefix of the cookie */
98 unsigned discard_requested :1; /* whether cookie was created to
99 request discarding another
102 unsigned secure :1; /* whether cookie should be
103 transmitted over non-https
105 unsigned domain_exact :1; /* whether DOMAIN must match as a
108 unsigned permanent :1; /* whether the cookie should outlive
110 time_t expiry_time; /* time when the cookie expires, 0
111 means undetermined. */
113 char *attr; /* cookie attribute name */
114 char *value; /* cookie attribute value */
116 struct cookie *next; /* used for chaining of cookies in the
120 #define PORT_ANY (-1)
122 /* Allocate and return a new, empty cookie structure. */
124 static struct cookie *
127 struct cookie *cookie = xnew0 (struct cookie);
129 /* Both cookie->permanent and cookie->expiry_time are now 0. This
130 means that the cookie doesn't expire, but is only valid for this
131 session (i.e. not written out to disk). */
133 cookie->port = PORT_ANY;
137 /* Non-zero if the cookie has expired. Assumes cookies_now has been
138 set by one of the entry point functions. */
141 cookie_expired_p (const struct cookie *c)
143 return c->expiry_time != 0 && c->expiry_time < cookies_now;
146 /* Deallocate COOKIE and its components. */
149 delete_cookie (struct cookie *cookie)
151 xfree_null (cookie->domain);
152 xfree_null (cookie->path);
153 xfree_null (cookie->attr);
154 xfree_null (cookie->value);
158 /* Functions for storing cookies.
160 All cookies can be reached beginning with jar->chains. The key in
161 that table is the domain name, and the value is a linked list of
162 all cookies from that domain. Every new cookie is placed on the
165 /* Find and return a cookie in JAR whose domain, path, and attribute
166 name correspond to COOKIE. If found, PREVPTR will point to the
167 location of the cookie previous in chain, or NULL if the found
168 cookie is the head of a chain.
170 If no matching cookie is found, return NULL. */
172 static struct cookie *
173 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
174 struct cookie **prevptr)
176 struct cookie *chain, *prev;
178 chain = hash_table_get (jar->chains, cookie->domain);
183 for (; chain; prev = chain, chain = chain->next)
184 if (0 == strcmp (cookie->path, chain->path)
185 && 0 == strcmp (cookie->attr, chain->attr)
186 && cookie->port == chain->port)
197 /* Store COOKIE to the jar.
199 This is done by placing COOKIE at the head of its chain. However,
200 if COOKIE matches a cookie already in memory, as determined by
201 find_matching_cookie, the old cookie is unlinked and destroyed.
203 The key of each chain's hash table entry is allocated only the
204 first time; next hash_table_put's reuse the same key. */
207 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
209 struct cookie *chain_head;
212 if (hash_table_get_pair (jar->chains, cookie->domain,
213 &chain_key, &chain_head))
215 /* A chain of cookies in this domain already exists. Check for
216 duplicates -- if an extant cookie exactly matches our domain,
217 port, path, and name, replace it. */
219 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
223 /* Remove VICTIM from the chain. COOKIE will be placed at
227 prev->next = victim->next;
228 cookie->next = chain_head;
232 /* prev is NULL; apparently VICTIM was at the head of
233 the chain. This place will be taken by COOKIE, so
234 all we need to do is: */
235 cookie->next = victim->next;
237 delete_cookie (victim);
239 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
242 cookie->next = chain_head;
246 /* We are now creating the chain. Use a copy of cookie->domain
247 as the key for the life-time of the chain. Using
248 cookie->domain would be unsafe because the life-time of the
249 chain may exceed the life-time of the cookie. (Cookies may
250 be deleted from the chain by this very function.) */
252 chain_key = xstrdup (cookie->domain);
255 hash_table_put (jar->chains, chain_key, cookie);
260 time_t exptime = cookie->expiry_time;
261 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
262 cookie->domain, cookie->port,
263 cookie->port == PORT_ANY ? " (ANY)" : "",
265 cookie->permanent ? "permanent" : "session",
266 cookie->secure ? "secure" : "insecure",
267 cookie->expiry_time ? datetime_str (exptime) : "none",
268 cookie->attr, cookie->value));
272 /* Discard a cookie matching COOKIE's domain, port, path, and
273 attribute name. This gets called when we encounter a cookie whose
274 expiry date is in the past, or whose max-age is set to 0. The
275 former corresponds to netscape cookie spec, while the latter is
276 specified by rfc2109. */
279 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
281 struct cookie *prev, *victim;
283 if (!hash_table_count (jar->chains))
284 /* No elements == nothing to discard. */
287 victim = find_matching_cookie (jar, cookie, &prev);
291 /* Simply unchain the victim. */
292 prev->next = victim->next;
295 /* VICTIM was head of its chain. We need to place a new
296 cookie at the head. */
297 char *chain_key = NULL;
300 res = hash_table_get_pair (jar->chains, victim->domain,
305 /* VICTIM was the only cookie in the chain. Destroy the
306 chain and deallocate the chain key. */
307 hash_table_remove (jar->chains, victim->domain);
311 hash_table_put (jar->chains, chain_key, victim->next);
313 delete_cookie (victim);
314 DEBUGP (("Discarded old cookie.\n"));
318 /* Functions for parsing the `Set-Cookie' header, and creating new
319 cookies from the wire. */
321 #define TOKEN_IS(token, string_literal) \
322 BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal)
324 #define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e)
326 /* Parse the contents of the `Set-Cookie' header. The header looks
329 name1=value1; name2=value2; ...
331 Trailing semicolon is optional; spaces are allowed between all
332 tokens. Additionally, values may be quoted.
334 A new cookie is returned upon success, NULL otherwise.
336 The first name-value pair will be used to set the cookie's
337 attribute name and value. Subsequent parameters will be checked
338 against field names such as `domain', `path', etc. Recognized
339 fields will be parsed and the corresponding members of COOKIE
342 static struct cookie *
343 parse_set_cookie (const char *set_cookie, bool silent)
345 const char *ptr = set_cookie;
346 struct cookie *cookie = cookie_new ();
347 param_token name, value;
349 if (!extract_param (&ptr, &name, &value, ';'))
354 /* If the value is quoted, do not modify it. */
355 if (*(value.b - 1) == '"')
360 cookie->attr = strdupdelim (name.b, name.e);
361 cookie->value = strdupdelim (value.b, value.e);
363 while (extract_param (&ptr, &name, &value, ';'))
365 if (TOKEN_IS (name, "domain"))
367 if (!TOKEN_NON_EMPTY (value))
369 xfree_null (cookie->domain);
370 /* Strictly speaking, we should set cookie->domain_exact if the
371 domain doesn't begin with a dot. But many sites set the
372 domain to "foo.com" and expect "subhost.foo.com" to get the
373 cookie, and it apparently works in browsers. */
376 cookie->domain = strdupdelim (value.b, value.e);
378 else if (TOKEN_IS (name, "path"))
380 if (!TOKEN_NON_EMPTY (value))
382 xfree_null (cookie->path);
383 cookie->path = strdupdelim (value.b, value.e);
385 else if (TOKEN_IS (name, "expires"))
390 if (!TOKEN_NON_EMPTY (value))
392 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
394 /* Check if expiration spec is valid.
395 If not, assume default (cookie doesn't expire, but valid only for
397 expires = http_atotm (value_copy);
398 if (expires != (time_t) -1)
400 cookie->permanent = 1;
401 cookie->expiry_time = expires;
402 /* According to netscape's specification, expiry time in
403 the past means that discarding of a matching cookie
405 if (cookie->expiry_time < cookies_now)
406 cookie->discard_requested = 1;
409 else if (TOKEN_IS (name, "max-age"))
414 if (!TOKEN_NON_EMPTY (value))
416 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
418 sscanf (value_copy, "%lf", &maxage);
420 /* something went wrong. */
422 cookie->permanent = 1;
423 cookie->expiry_time = cookies_now + maxage;
425 /* According to rfc2109, a cookie with max-age of 0 means that
426 discarding of a matching cookie is requested. */
428 cookie->discard_requested = 1;
430 else if (TOKEN_IS (name, "secure"))
432 /* ignore value completely */
435 /* else: Ignore unrecognized attribute. */
438 /* extract_param has encountered a syntax error */
441 /* The cookie has been successfully constructed; return it. */
446 logprintf (LOG_NOTQUIET,
447 _("Syntax error in Set-Cookie: %s at position %d.\n"),
448 quotearg_style (escape_quoting_style, set_cookie),
449 (int) (ptr - set_cookie));
450 delete_cookie (cookie);
455 #undef TOKEN_NON_EMPTY
457 /* Sanity checks. These are important, otherwise it is possible for
458 mailcious attackers to destroy important cookie information and/or
459 violate your privacy. */
462 #define REQUIRE_DIGITS(p) do { \
463 if (!c_isdigit (*p)) \
465 for (++p; c_isdigit (*p); p++) \
469 #define REQUIRE_DOT(p) do { \
474 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
476 We don't want to call network functions like inet_addr() because
477 all we need is a check, preferrably one that is small, fast, and
481 numeric_address_p (const char *addr)
483 const char *p = addr;
485 REQUIRE_DIGITS (p); /* A */
486 REQUIRE_DOT (p); /* . */
487 REQUIRE_DIGITS (p); /* B */
488 REQUIRE_DOT (p); /* . */
489 REQUIRE_DIGITS (p); /* C */
490 REQUIRE_DOT (p); /* . */
491 REQUIRE_DIGITS (p); /* D */
498 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
499 Originally I tried to make the check compliant with rfc2109, but
500 the sites deviated too often, so I had to fall back to "tail
501 matching", as defined by the original Netscape's cookie spec. */
504 check_domain_match (const char *cookie_domain, const char *host)
508 /* Numeric address requires exact match. It also requires HOST to
510 if (numeric_address_p (cookie_domain))
511 return 0 == strcmp (cookie_domain, host);
515 /* For the sake of efficiency, check for exact match first. */
516 if (0 == strcasecmp (cookie_domain, host))
521 /* HOST must match the tail of cookie_domain. */
522 if (!match_tail (host, cookie_domain, true))
525 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
526 make sure that somebody is not trying to set the cookie for a
527 subdomain shared by many entities. For example, "company.co.uk"
528 must not be allowed to set a cookie for ".co.uk". On the other
529 hand, "sso.redhat.de" should be able to set a cookie for
532 The only marginally sane way to handle this I can think of is to
533 reject on the basis of the length of the second-level domain name
534 (but when the top-level domain is unknown), with the assumption
535 that those of three or less characters could be reserved. For
538 .co.org -> works because the TLD is known
539 .co.uk -> doesn't work because "co" is only two chars long
540 .com.au -> doesn't work because "com" is only 3 chars long
541 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
542 .cnn.de -> doesn't work for the same reason (ugh!!)
543 .abcd.de -> works because "abcd" is 4 chars long
544 .img.cnn.de -> works because it's not trying to set the 2nd level domain
545 .cnn.co.uk -> works for the same reason
547 That should prevent misuse, while allowing reasonable usage. If
548 someone knows of a better way to handle this, please let me
551 const char *p = cookie_domain;
552 int dccount = 1; /* number of domain components */
553 int ldcl = 0; /* last domain component length */
554 int nldcl = 0; /* next to last domain component length */
557 /* Ignore leading period in this calculation. */
560 for (out = 0; !out; p++)
568 /* Empty domain component found -- the domain is invalid. */
570 if (*(p + 1) == '\0')
572 /* Tolerate trailing '.' by not treating the domain as
573 one ending with an empty domain component. */
595 int known_toplevel = false;
596 static const char *known_toplevel_domains[] = {
597 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
599 for (i = 0; i < countof (known_toplevel_domains); i++)
600 if (match_tail (cookie_domain, known_toplevel_domains[i], true))
602 known_toplevel = true;
605 if (!known_toplevel && nldcl <= 3)
612 /* Don't allow the host "foobar.com" to set a cookie for domain
614 if (*cookie_domain != '.')
616 int dlen = strlen (cookie_domain);
617 int hlen = strlen (host);
618 /* cookie host: hostname.foobar.com */
619 /* desired domain: bar.com */
620 /* '.' must be here in host-> ^ */
621 if (hlen > dlen && host[hlen - dlen - 1] != '.')
630 static int path_matches (const char *, const char *);
632 /* Check whether PATH begins with COOKIE_PATH. */
635 check_path_match (const char *cookie_path, const char *path)
637 return path_matches (path, cookie_path) != 0;
640 /* Prepend '/' to string S. S is copied to fresh stack-allocated
641 space and its value is modified to point to the new location. */
643 #define PREPEND_SLASH(s) do { \
644 char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \
646 strcpy (PS_newstr + 1, s); \
651 /* Process the HTTP `Set-Cookie' header. This results in storing the
652 cookie or discarding a matching one, or ignoring it completely, all
653 depending on the contents. */
656 cookie_handle_set_cookie (struct cookie_jar *jar,
657 const char *host, int port,
658 const char *path, const char *set_cookie)
660 struct cookie *cookie;
661 cookies_now = time (NULL);
663 /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
664 usage assumes /-prefixed paths. Until the rest of Wget is fixed,
665 simply prepend slash to PATH. */
666 PREPEND_SLASH (path);
668 cookie = parse_set_cookie (set_cookie, false);
672 /* Sanitize parts of cookie. */
676 cookie->domain = xstrdup (host);
677 cookie->domain_exact = 1;
678 /* Set the port, but only if it's non-default. */
679 if (port != 80 && port != 443)
684 if (!check_domain_match (cookie->domain, host))
686 logprintf (LOG_NOTQUIET,
687 _("Cookie coming from %s attempted to set domain to "),
688 quotearg_style (escape_quoting_style, host));
689 logprintf (LOG_NOTQUIET,
691 quotearg_style (escape_quoting_style, cookie->domain));
692 cookie->discard_requested = true;
698 /* The cookie doesn't set path: set it to the URL path, sans the
699 file part ("/dir/file" truncated to "/dir/"). */
700 char *trailing_slash = strrchr (path, '/');
702 cookie->path = strdupdelim (path, trailing_slash + 1);
704 /* no slash in the string -- can this even happen? */
705 cookie->path = xstrdup (path);
709 /* The cookie sets its own path; verify that it is legal. */
710 if (!check_path_match (cookie->path, path))
712 DEBUGP (("Attempt to fake the path: %s, %s\n",
713 cookie->path, path));
718 /* Now store the cookie, or discard an existing cookie, if
719 discarding was requested. */
721 if (cookie->discard_requested)
723 discard_matching_cookie (jar, cookie);
727 store_cookie (jar, cookie);
732 delete_cookie (cookie);
735 /* Support for sending out cookies in HTTP requests, based on
736 previously stored cookies. Entry point is
737 `build_cookies_request'. */
739 /* Return a count of how many times CHR occurs in STRING. */
742 count_char (const char *string, char chr)
746 for (p = string; *p; p++)
752 /* Find the cookie chains whose domains match HOST and store them to
755 A cookie chain is the head of a list of cookies that belong to a
756 host/domain. Given HOST "img.search.xemacs.org", this function
757 will return the chains for "img.search.xemacs.org",
758 "search.xemacs.org", and "xemacs.org" -- those of them that exist
761 DEST should be large enough to accept (in the worst case) as many
762 elements as there are domain components of HOST. */
765 find_chains_of_host (struct cookie_jar *jar, const char *host,
766 struct cookie *dest[])
771 /* Bail out quickly if there are no cookies in the jar. */
772 if (!hash_table_count (jar->chains))
775 if (numeric_address_p (host))
776 /* If host is an IP address, only check for the exact match. */
779 /* Otherwise, check all the subdomains except the top-level (last)
780 one. As a domain with N components has N-1 dots, the number of
781 passes equals the number of dots. */
782 passes = count_char (host, '.');
786 /* Find chains that match HOST, starting with exact match and
787 progressing to less specific domains. For instance, given HOST
788 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
789 srk.fer.hr's, then fer.hr's. */
792 struct cookie *chain = hash_table_get (jar->chains, host);
794 dest[dest_count++] = chain;
795 if (++passcnt >= passes)
797 host = strchr (host, '.') + 1;
803 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
807 path_matches (const char *full_path, const char *prefix)
809 int len = strlen (prefix);
811 if (0 != strncmp (full_path, prefix, len))
812 /* FULL_PATH doesn't begin with PREFIX. */
815 /* Length of PREFIX determines the quality of the match. */
819 /* Return true iff COOKIE matches the provided parameters of the URL
820 being downloaded: HOST, PORT, PATH, and SECFLAG.
822 If PATH_GOODNESS is non-NULL, store the "path goodness" value
823 there. That value is a measure of how closely COOKIE matches PATH,
824 used for ordering cookies. */
827 cookie_matches_url (const struct cookie *cookie,
828 const char *host, int port, const char *path,
829 bool secflag, int *path_goodness)
833 if (cookie_expired_p (cookie))
834 /* Ignore stale cookies. Don't bother unchaining the cookie at
835 this point -- Wget is a relatively short-lived application, and
836 stale cookies will not be saved by `save_cookies'. On the
837 other hand, this function should be as efficient as
841 if (cookie->secure && !secflag)
842 /* Don't transmit secure cookies over insecure connections. */
844 if (cookie->port != PORT_ANY && cookie->port != port)
847 /* If exact domain match is required, verify that cookie's domain is
848 equal to HOST. If not, assume success on the grounds of the
849 cookie's chain having been found by find_chains_of_host. */
850 if (cookie->domain_exact
851 && 0 != strcasecmp (host, cookie->domain))
854 pg = path_matches (path, cookie->path);
859 /* If the caller requested path_goodness, we return it. This is
860 an optimization, so that the caller doesn't need to call
861 path_matches() again. */
866 /* A structure that points to a cookie, along with the additional
867 information about the cookie's "goodness". This allows us to sort
868 the cookies when returning them to the server, as required by the
871 struct weighed_cookie {
872 struct cookie *cookie;
877 /* Comparator used for uniquifying the list. */
880 equality_comparator (const void *p1, const void *p2)
882 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
883 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
885 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
886 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
888 /* We only really care whether both name and value are equal. We
889 return them in this order only for consistency... */
890 return namecmp ? namecmp : valuecmp;
893 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
894 cookies with the same attr name and value. Whenever a duplicate
895 pair is found, one of the cookies is removed. */
898 eliminate_dups (struct weighed_cookie *outgoing, int count)
900 struct weighed_cookie *h; /* hare */
901 struct weighed_cookie *t; /* tortoise */
902 struct weighed_cookie *end = outgoing + count;
904 /* We deploy a simple uniquify algorithm: first sort the array
905 according to our sort criteria, then copy it to itself, comparing
906 each cookie to its neighbor and ignoring the duplicates. */
908 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
910 /* "Hare" runs through all the entries in the array, followed by
911 "tortoise". If a duplicate is found, the hare skips it.
912 Non-duplicate entries are copied to the tortoise ptr. */
914 for (h = t = outgoing; h < end; h++)
918 struct cookie *c0 = h[0].cookie;
919 struct cookie *c1 = h[1].cookie;
920 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
921 continue; /* ignore the duplicate */
924 /* If the hare has advanced past the tortoise (because of
925 previous dups), make sure the values get copied. Otherwise,
926 no copying is necessary. */
935 /* Comparator used for sorting by quality. */
938 goodness_comparator (const void *p1, const void *p2)
940 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
941 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
943 /* Subtractions take `wc2' as the first argument becauase we want a
944 sort in *decreasing* order of goodness. */
945 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
946 int pgdiff = wc2->path_goodness - wc1->path_goodness;
948 /* Sort by domain goodness; if these are the same, sort by path
949 goodness. (The sorting order isn't really specified; maybe it
950 should be the other way around.) */
951 return dgdiff ? dgdiff : pgdiff;
954 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
955 requests PATH from the server. The resulting string is allocated
956 with `malloc', and the caller is responsible for freeing it. If no
957 cookies pertain to this request, i.e. no cookie header should be
958 generated, NULL is returned. */
961 cookie_header (struct cookie_jar *jar, const char *host,
962 int port, const char *path, bool secflag)
964 struct cookie **chains;
967 struct cookie *cookie;
968 struct weighed_cookie *outgoing;
971 int result_size, pos;
972 PREPEND_SLASH (path); /* see cookie_handle_set_cookie */
974 /* First, find the cookie chains whose domains match HOST. */
976 /* Allocate room for find_chains_of_host to write to. The number of
977 chains can at most equal the number of subdomains, hence
978 1+<number of dots>. */
979 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
980 chain_count = find_chains_of_host (jar, host, chains);
982 /* No cookies for this host. */
986 cookies_now = time (NULL);
988 /* Now extract from the chains those cookies that match our host
989 (for domain_exact cookies), port (for cookies with port other
990 than PORT_ANY), etc. See matching_cookie for details. */
992 /* Count the number of matching cookies. */
994 for (i = 0; i < chain_count; i++)
995 for (cookie = chains[i]; cookie; cookie = cookie->next)
996 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
999 return NULL; /* no cookies matched */
1001 /* Allocate the array. */
1002 outgoing = alloca_array (struct weighed_cookie, count);
1004 /* Fill the array with all the matching cookies from the chains that
1007 for (i = 0; i < chain_count; i++)
1008 for (cookie = chains[i]; cookie; cookie = cookie->next)
1011 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1013 outgoing[ocnt].cookie = cookie;
1014 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1015 outgoing[ocnt].path_goodness = pg;
1018 assert (ocnt == count);
1020 /* Eliminate duplicate cookies; that is, those whose name and value
1022 count = eliminate_dups (outgoing, count);
1024 /* Sort the array so that best-matching domains come first, and
1025 that, within one domain, best-matching paths come first. */
1026 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1028 /* Count the space the name=value pairs will take. */
1030 for (i = 0; i < count; i++)
1032 struct cookie *c = outgoing[i].cookie;
1034 result_size += strlen (c->attr) + 1 + strlen (c->value);
1037 /* Allocate output buffer:
1038 name=value pairs -- result_size
1039 "; " separators -- (count - 1) * 2
1040 \0 terminator -- 1 */
1041 result_size = result_size + (count - 1) * 2 + 1;
1042 result = xmalloc (result_size);
1044 for (i = 0; i < count; i++)
1046 struct cookie *c = outgoing[i].cookie;
1047 int namlen = strlen (c->attr);
1048 int vallen = strlen (c->value);
1050 memcpy (result + pos, c->attr, namlen);
1052 result[pos++] = '=';
1053 memcpy (result + pos, c->value, vallen);
1057 result[pos++] = ';';
1058 result[pos++] = ' ';
1061 result[pos++] = '\0';
1062 assert (pos == result_size);
1066 /* Support for loading and saving cookies. The format used for
1067 loading and saving should be the format of the `cookies.txt' file
1068 used by Netscape and Mozilla, at least the Unix versions.
1069 (Apparently IE can export cookies in that format as well.) The
1070 format goes like this:
1072 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1074 DOMAIN -- cookie domain, optionally followed by :PORT
1075 DOMAIN-FLAG -- whether all hosts in the domain match
1077 SECURE-FLAG -- whether cookie requires secure connection
1078 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1079 ATTR-NAME -- name of the cookie attribute
1080 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1082 The fields are separated by TABs. All fields are mandatory, except
1083 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1084 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1085 whitespace only, and comment lines (beginning with # optionally
1086 preceded by whitespace) are ignored.
1088 Example line from cookies.txt (split in two lines for readability):
1090 .google.com TRUE / FALSE 2147368447 \
1091 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1095 /* If the region [B, E) ends with :<digits>, parse the number, return
1096 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1097 If port is not specified, return 0. */
1100 domain_port (const char *domain_b, const char *domain_e,
1101 const char **domain_e_ptr)
1105 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1108 for (p = colon + 1; p < domain_e && c_isdigit (*p); p++)
1109 port = 10 * port + (*p - '0');
1111 /* Garbage following port number. */
1113 *domain_e_ptr = colon;
1117 #define GET_WORD(p, b, e) do { \
1119 while (*p && *p != '\t') \
1122 if (b == e || !*p) \
1127 /* Load cookies from FILE. */
1130 cookie_jar_load (struct cookie_jar *jar, const char *file)
1135 FILE *fp = fopen (file, "r");
1138 logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1139 quote (file), strerror (errno));
1143 cookies_now = time (NULL);
1145 while (getline (&line, &bufsize, fp) > 0)
1147 struct cookie *cookie;
1153 char *domain_b = NULL, *domain_e = NULL;
1154 char *domflag_b = NULL, *domflag_e = NULL;
1155 char *path_b = NULL, *path_e = NULL;
1156 char *secure_b = NULL, *secure_e = NULL;
1157 char *expires_b = NULL, *expires_e = NULL;
1158 char *name_b = NULL, *name_e = NULL;
1159 char *value_b = NULL, *value_e = NULL;
1161 /* Skip leading white-space. */
1162 while (*p && c_isspace (*p))
1164 /* Ignore empty lines. */
1165 if (!*p || *p == '#')
1168 GET_WORD (p, domain_b, domain_e);
1169 GET_WORD (p, domflag_b, domflag_e);
1170 GET_WORD (p, path_b, path_e);
1171 GET_WORD (p, secure_b, secure_e);
1172 GET_WORD (p, expires_b, expires_e);
1173 GET_WORD (p, name_b, name_e);
1175 /* Don't use GET_WORD for value because it ends with newline,
1178 value_e = p + strlen (p);
1179 if (value_e > value_b && value_e[-1] == '\n')
1181 if (value_e > value_b && value_e[-1] == '\r')
1183 /* Empty values are legal (I think), so don't bother checking. */
1185 cookie = cookie_new ();
1187 cookie->attr = strdupdelim (name_b, name_e);
1188 cookie->value = strdupdelim (value_b, value_e);
1189 cookie->path = strdupdelim (path_b, path_e);
1190 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1192 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1193 value indicating if all machines within a given domain can
1194 access the variable. This value is set automatically by the
1195 browser, depending on the value set for the domain." */
1196 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1198 /* DOMAIN needs special treatment because we might need to
1199 extract the port. */
1200 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1202 cookie->port = port;
1204 if (*domain_b == '.')
1205 ++domain_b; /* remove leading dot internally */
1206 cookie->domain = strdupdelim (domain_b, domain_e);
1208 /* safe default in case EXPIRES field is garbled. */
1209 expiry = (double)cookies_now - 1;
1211 /* I don't like changing the line, but it's safe here. (line is
1214 sscanf (expires_b, "%lf", &expiry);
1218 /* EXPIRY can be 0 for session cookies saved because the
1219 user specified `--keep-session-cookies' in the past.
1220 They remain session cookies, and will be saved only if
1221 the user has specified `keep-session-cookies' again. */
1225 if (expiry < cookies_now)
1226 goto abort_cookie; /* ignore stale cookie. */
1227 cookie->expiry_time = expiry;
1228 cookie->permanent = 1;
1231 store_cookie (jar, cookie);
1237 delete_cookie (cookie);
1244 /* Save cookies, in format described above, to FILE. */
1247 cookie_jar_save (struct cookie_jar *jar, const char *file)
1250 hash_table_iterator iter;
1252 DEBUGP (("Saving cookies to %s.\n", file));
1254 cookies_now = time (NULL);
1256 fp = fopen (file, "w");
1259 logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1260 quote (file), strerror (errno));
1264 fputs ("# HTTP cookie file.\n", fp);
1265 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now));
1266 fputs ("# Edit at your own risk.\n\n", fp);
1268 for (hash_table_iterate (jar->chains, &iter);
1269 hash_table_iter_next (&iter);
1272 const char *domain = iter.key;
1273 struct cookie *cookie = iter.value;
1274 for (; cookie; cookie = cookie->next)
1276 if (!cookie->permanent && !opt.keep_session_cookies)
1278 if (cookie_expired_p (cookie))
1280 if (!cookie->domain_exact)
1283 if (cookie->port != PORT_ANY)
1284 fprintf (fp, ":%d", cookie->port);
1285 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1286 cookie->domain_exact ? "FALSE" : "TRUE",
1287 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1288 (double)cookie->expiry_time,
1289 cookie->attr, cookie->value);
1296 logprintf (LOG_NOTQUIET, _("Error writing to %s: %s\n"),
1297 quote (file), strerror (errno));
1298 if (fclose (fp) < 0)
1299 logprintf (LOG_NOTQUIET, _("Error closing %s: %s\n"),
1300 quote (file), strerror (errno));
1302 DEBUGP (("Done saving cookies.\n"));
1305 /* Clean up cookie-related data. */
1308 cookie_jar_delete (struct cookie_jar *jar)
1310 /* Iterate over chains (indexed by domain) and free them. */
1311 hash_table_iterator iter;
1312 for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); )
1314 struct cookie *chain = iter.value;
1316 /* Then all cookies in this chain. */
1319 struct cookie *next = chain->next;
1320 delete_cookie (chain);
1324 hash_table_destroy (jar->chains);
1328 /* Test cases. Currently this is only tests parse_set_cookies. To
1329 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1336 /* Tests expected to succeed: */
1339 const char *results[10];
1341 { "arg=value", {"arg", "value", NULL} },
1342 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1343 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1344 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1345 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1346 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1347 { "arg=", {"arg", "", NULL} },
1348 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1349 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1352 /* Tests expected to fail: */
1353 static char *tests_fail[] = {
1355 "arg=\"unterminated",
1357 "arg1=;=another-empty-name",
1361 for (i = 0; i < countof (tests_succ); i++)
1364 const char *data = tests_succ[i].data;
1365 const char **expected = tests_succ[i].results;
1368 c = parse_set_cookie (data, true);
1371 printf ("NULL cookie returned for valid data: %s\n", data);
1375 /* Test whether extract_param handles these cases correctly. */
1377 param_token name, value;
1378 const char *ptr = data;
1380 while (extract_param (&ptr, &name, &value, ';'))
1382 char *n = strdupdelim (name.b, name.e);
1383 char *v = strdupdelim (value.b, value.e);
1386 printf ("Too many parameters for '%s'\n", data);
1389 if (0 != strcmp (expected[j], n))
1390 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1391 j / 2 + 1, data, expected[j], n);
1392 if (0 != strcmp (expected[j + 1], v))
1393 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1394 j / 2 + 1, data, expected[j + 1], v);
1400 printf ("Too few parameters for '%s'\n", data);
1404 for (i = 0; i < countof (tests_fail); i++)
1407 char *data = tests_fail[i];
1408 c = parse_set_cookie (data, true);
1410 printf ("Failed to report error on invalid data: %s\n", data);
1413 #endif /* TEST_COOKIES */