1 /* Support for cookies.
2 Copyright (C) 2001-2006 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software Foundation, Inc.,
18 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by the
31 cookie patch submitted by Tomasz Wegrzanowski.
33 This implements the client-side cookie support, as specified
34 (loosely) by Netscape's "preliminary specification", currently
37 http://wp.netscape.com/newsref/std/cookie_spec.html
39 rfc2109 is not supported because of its incompatibilities with the
40 above widely-used specification. rfc2965 is entirely ignored,
41 since popular client software doesn't implement it, and even the
42 sites that do send Set-Cookie2 also emit Set-Cookie for
58 #include "http.h" /* for http_atotm */
60 /* Declarations of `struct cookie' and the most basic functions. */
62 /* Cookie jar serves as cookie storage and a means of retrieving
63 cookies efficiently. All cookies with the same domain are stored
64 in a linked list called "chain". A cookie chain can be reached by
65 looking up the domain in the cookie jar's chains_by_domain table.
67 For example, to reach all the cookies under google.com, one must
68 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
69 course, when sending a cookie to `www.google.com', one must search
70 for cookies that belong to either `www.google.com' or `google.com'
71 -- but the point is that the code doesn't need to go through *all*
75 /* Cookie chains indexed by domain. */
76 struct hash_table *chains;
78 int cookie_count; /* number of cookies in the jar. */
81 /* Value set by entry point functions, so that the low-level
82 routines don't need to call time() all the time. */
83 static time_t cookies_now;
88 struct cookie_jar *jar = xnew (struct cookie_jar);
89 jar->chains = make_nocase_string_hash_table (0);
90 jar->cookie_count = 0;
95 char *domain; /* domain of the cookie */
96 int port; /* port number */
97 char *path; /* path prefix of the cookie */
99 unsigned discard_requested :1; /* whether cookie was created to
100 request discarding another
103 unsigned secure :1; /* whether cookie should be
104 transmitted over non-https
106 unsigned domain_exact :1; /* whether DOMAIN must match as a
109 unsigned permanent :1; /* whether the cookie should outlive
111 time_t expiry_time; /* time when the cookie expires, 0
112 means undetermined. */
114 char *attr; /* cookie attribute name */
115 char *value; /* cookie attribute value */
117 struct cookie *next; /* used for chaining of cookies in the
121 #define PORT_ANY (-1)
123 /* Allocate and return a new, empty cookie structure. */
125 static struct cookie *
128 struct cookie *cookie = xnew0 (struct cookie);
130 /* Both cookie->permanent and cookie->expiry_time are now 0. This
131 means that the cookie doesn't expire, but is only valid for this
132 session (i.e. not written out to disk). */
134 cookie->port = PORT_ANY;
138 /* Non-zero if the cookie has expired. Assumes cookies_now has been
139 set by one of the entry point functions. */
142 cookie_expired_p (const struct cookie *c)
144 return c->expiry_time != 0 && c->expiry_time < cookies_now;
147 /* Deallocate COOKIE and its components. */
150 delete_cookie (struct cookie *cookie)
152 xfree_null (cookie->domain);
153 xfree_null (cookie->path);
154 xfree_null (cookie->attr);
155 xfree_null (cookie->value);
159 /* Functions for storing cookies.
161 All cookies can be reached beginning with jar->chains. The key in
162 that table is the domain name, and the value is a linked list of
163 all cookies from that domain. Every new cookie is placed on the
166 /* Find and return a cookie in JAR whose domain, path, and attribute
167 name correspond to COOKIE. If found, PREVPTR will point to the
168 location of the cookie previous in chain, or NULL if the found
169 cookie is the head of a chain.
171 If no matching cookie is found, return NULL. */
173 static struct cookie *
174 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
175 struct cookie **prevptr)
177 struct cookie *chain, *prev;
179 chain = hash_table_get (jar->chains, cookie->domain);
184 for (; chain; prev = chain, chain = chain->next)
185 if (0 == strcmp (cookie->path, chain->path)
186 && 0 == strcmp (cookie->attr, chain->attr)
187 && cookie->port == chain->port)
198 /* Store COOKIE to the jar.
200 This is done by placing COOKIE at the head of its chain. However,
201 if COOKIE matches a cookie already in memory, as determined by
202 find_matching_cookie, the old cookie is unlinked and destroyed.
204 The key of each chain's hash table entry is allocated only the
205 first time; next hash_table_put's reuse the same key. */
208 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
210 struct cookie *chain_head;
213 if (hash_table_get_pair (jar->chains, cookie->domain,
214 &chain_key, &chain_head))
216 /* A chain of cookies in this domain already exists. Check for
217 duplicates -- if an extant cookie exactly matches our domain,
218 port, path, and name, replace it. */
220 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
224 /* Remove VICTIM from the chain. COOKIE will be placed at
228 prev->next = victim->next;
229 cookie->next = chain_head;
233 /* prev is NULL; apparently VICTIM was at the head of
234 the chain. This place will be taken by COOKIE, so
235 all we need to do is: */
236 cookie->next = victim->next;
238 delete_cookie (victim);
240 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
243 cookie->next = chain_head;
247 /* We are now creating the chain. Use a copy of cookie->domain
248 as the key for the life-time of the chain. Using
249 cookie->domain would be unsafe because the life-time of the
250 chain may exceed the life-time of the cookie. (Cookies may
251 be deleted from the chain by this very function.) */
253 chain_key = xstrdup (cookie->domain);
256 hash_table_put (jar->chains, chain_key, cookie);
261 time_t exptime = cookie->expiry_time;
262 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
263 cookie->domain, cookie->port,
264 cookie->port == PORT_ANY ? " (ANY)" : "",
266 cookie->permanent ? "permanent" : "session",
267 cookie->secure ? "secure" : "insecure",
268 cookie->expiry_time ? datetime_str (exptime) : "none",
269 cookie->attr, cookie->value));
273 /* Discard a cookie matching COOKIE's domain, port, path, and
274 attribute name. This gets called when we encounter a cookie whose
275 expiry date is in the past, or whose max-age is set to 0. The
276 former corresponds to netscape cookie spec, while the latter is
277 specified by rfc2109. */
280 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
282 struct cookie *prev, *victim;
284 if (!hash_table_count (jar->chains))
285 /* No elements == nothing to discard. */
288 victim = find_matching_cookie (jar, cookie, &prev);
292 /* Simply unchain the victim. */
293 prev->next = victim->next;
296 /* VICTIM was head of its chain. We need to place a new
297 cookie at the head. */
298 char *chain_key = NULL;
301 res = hash_table_get_pair (jar->chains, victim->domain,
306 /* VICTIM was the only cookie in the chain. Destroy the
307 chain and deallocate the chain key. */
308 hash_table_remove (jar->chains, victim->domain);
312 hash_table_put (jar->chains, chain_key, victim->next);
314 delete_cookie (victim);
315 DEBUGP (("Discarded old cookie.\n"));
319 /* Functions for parsing the `Set-Cookie' header, and creating new
320 cookies from the wire. */
322 #define TOKEN_IS(token, string_literal) \
323 BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal)
325 #define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e)
327 /* Parse the contents of the `Set-Cookie' header. The header looks
330 name1=value1; name2=value2; ...
332 Trailing semicolon is optional; spaces are allowed between all
333 tokens. Additionally, values may be quoted.
335 A new cookie is returned upon success, NULL otherwise.
337 The first name-value pair will be used to set the cookie's
338 attribute name and value. Subsequent parameters will be checked
339 against field names such as `domain', `path', etc. Recognized
340 fields will be parsed and the corresponding members of COOKIE
343 static struct cookie *
344 parse_set_cookie (const char *set_cookie, bool silent)
346 const char *ptr = set_cookie;
347 struct cookie *cookie = cookie_new ();
348 param_token name, value;
350 if (!extract_param (&ptr, &name, &value, ';'))
354 cookie->attr = strdupdelim (name.b, name.e);
355 cookie->value = strdupdelim (value.b, value.e);
357 while (extract_param (&ptr, &name, &value, ';'))
359 if (TOKEN_IS (name, "domain"))
361 if (!TOKEN_NON_EMPTY (value))
363 xfree_null (cookie->domain);
364 /* Strictly speaking, we should set cookie->domain_exact if the
365 domain doesn't begin with a dot. But many sites set the
366 domain to "foo.com" and expect "subhost.foo.com" to get the
367 cookie, and it apparently works in browsers. */
370 cookie->domain = strdupdelim (value.b, value.e);
372 else if (TOKEN_IS (name, "path"))
374 if (!TOKEN_NON_EMPTY (value))
376 xfree_null (cookie->path);
377 cookie->path = strdupdelim (value.b, value.e);
379 else if (TOKEN_IS (name, "expires"))
384 if (!TOKEN_NON_EMPTY (value))
386 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
388 expires = http_atotm (value_copy);
389 if (expires != (time_t) -1)
391 cookie->permanent = 1;
392 cookie->expiry_time = expires;
393 /* According to netscape's specification, expiry time in
394 the past means that discarding of a matching cookie
396 if (cookie->expiry_time < cookies_now)
397 cookie->discard_requested = 1;
400 /* Error in expiration spec. Assume default (cookie doesn't
401 expire, but valid only for this session.) */
404 else if (TOKEN_IS (name, "max-age"))
409 if (!TOKEN_NON_EMPTY (value))
411 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
413 sscanf (value_copy, "%lf", &maxage);
415 /* something went wrong. */
417 cookie->permanent = 1;
418 cookie->expiry_time = cookies_now + maxage;
420 /* According to rfc2109, a cookie with max-age of 0 means that
421 discarding of a matching cookie is requested. */
423 cookie->discard_requested = 1;
425 else if (TOKEN_IS (name, "secure"))
427 /* ignore value completely */
431 /* Ignore unrecognized attribute. */
435 /* extract_param has encountered a syntax error */
438 /* The cookie has been successfully constructed; return it. */
443 logprintf (LOG_NOTQUIET,
444 _("Syntax error in Set-Cookie: %s at position %d.\n"),
445 escnonprint (set_cookie), (int) (ptr - set_cookie));
446 delete_cookie (cookie);
451 #undef TOKEN_NON_EMPTY
453 /* Sanity checks. These are important, otherwise it is possible for
454 mailcious attackers to destroy important cookie information and/or
455 violate your privacy. */
458 #define REQUIRE_DIGITS(p) do { \
461 for (++p; ISDIGIT (*p); p++) \
465 #define REQUIRE_DOT(p) do { \
470 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
472 We don't want to call network functions like inet_addr() because
473 all we need is a check, preferrably one that is small, fast, and
477 numeric_address_p (const char *addr)
479 const char *p = addr;
481 REQUIRE_DIGITS (p); /* A */
482 REQUIRE_DOT (p); /* . */
483 REQUIRE_DIGITS (p); /* B */
484 REQUIRE_DOT (p); /* . */
485 REQUIRE_DIGITS (p); /* C */
486 REQUIRE_DOT (p); /* . */
487 REQUIRE_DIGITS (p); /* D */
494 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
495 Originally I tried to make the check compliant with rfc2109, but
496 the sites deviated too often, so I had to fall back to "tail
497 matching", as defined by the original Netscape's cookie spec. */
500 check_domain_match (const char *cookie_domain, const char *host)
504 /* Numeric address requires exact match. It also requires HOST to
506 if (numeric_address_p (cookie_domain))
507 return 0 == strcmp (cookie_domain, host);
511 /* For the sake of efficiency, check for exact match first. */
512 if (0 == strcasecmp (cookie_domain, host))
517 /* HOST must match the tail of cookie_domain. */
518 if (!match_tail (host, cookie_domain, true))
521 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
522 make sure that somebody is not trying to set the cookie for a
523 subdomain shared by many entities. For example, "company.co.uk"
524 must not be allowed to set a cookie for ".co.uk". On the other
525 hand, "sso.redhat.de" should be able to set a cookie for
528 The only marginally sane way to handle this I can think of is to
529 reject on the basis of the length of the second-level domain name
530 (but when the top-level domain is unknown), with the assumption
531 that those of three or less characters could be reserved. For
534 .co.org -> works because the TLD is known
535 .co.uk -> doesn't work because "co" is only two chars long
536 .com.au -> doesn't work because "com" is only 3 chars long
537 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
538 .cnn.de -> doesn't work for the same reason (ugh!!)
539 .abcd.de -> works because "abcd" is 4 chars long
540 .img.cnn.de -> works because it's not trying to set the 2nd level domain
541 .cnn.co.uk -> works for the same reason
543 That should prevent misuse, while allowing reasonable usage. If
544 someone knows of a better way to handle this, please let me
547 const char *p = cookie_domain;
548 int dccount = 1; /* number of domain components */
549 int ldcl = 0; /* last domain component length */
550 int nldcl = 0; /* next to last domain component length */
553 /* Ignore leading period in this calculation. */
556 for (out = 0; !out; p++)
564 /* Empty domain component found -- the domain is invalid. */
566 if (*(p + 1) == '\0')
568 /* Tolerate trailing '.' by not treating the domain as
569 one ending with an empty domain component. */
591 int known_toplevel = false;
592 static const char *known_toplevel_domains[] = {
593 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
595 for (i = 0; i < countof (known_toplevel_domains); i++)
596 if (match_tail (cookie_domain, known_toplevel_domains[i], true))
598 known_toplevel = true;
601 if (!known_toplevel && nldcl <= 3)
608 /* Don't allow the host "foobar.com" to set a cookie for domain
610 if (*cookie_domain != '.')
612 int dlen = strlen (cookie_domain);
613 int hlen = strlen (host);
614 /* cookie host: hostname.foobar.com */
615 /* desired domain: bar.com */
616 /* '.' must be here in host-> ^ */
617 if (hlen > dlen && host[hlen - dlen - 1] != '.')
626 static int path_matches (const char *, const char *);
628 /* Check whether PATH begins with COOKIE_PATH. */
631 check_path_match (const char *cookie_path, const char *path)
633 return path_matches (path, cookie_path) != 0;
636 /* Prepend '/' to string S. S is copied to fresh stack-allocated
637 space and its value is modified to point to the new location. */
639 #define PREPEND_SLASH(s) do { \
640 char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \
642 strcpy (PS_newstr + 1, s); \
647 /* Process the HTTP `Set-Cookie' header. This results in storing the
648 cookie or discarding a matching one, or ignoring it completely, all
649 depending on the contents. */
652 cookie_handle_set_cookie (struct cookie_jar *jar,
653 const char *host, int port,
654 const char *path, const char *set_cookie)
656 struct cookie *cookie;
657 cookies_now = time (NULL);
659 /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
660 usage assumes /-prefixed paths. Until the rest of Wget is fixed,
661 simply prepend slash to PATH. */
662 PREPEND_SLASH (path);
664 cookie = parse_set_cookie (set_cookie, false);
668 /* Sanitize parts of cookie. */
673 /* If the domain was not provided, we use the one we're talking
674 to, and set exact match. */
675 cookie->domain = xstrdup (host);
676 cookie->domain_exact = 1;
677 /* Set the port, but only if it's non-default. */
678 if (port != 80 && port != 443)
683 if (!check_domain_match (cookie->domain, host))
685 logprintf (LOG_NOTQUIET,
686 _("Cookie coming from %s attempted to set domain to %s\n"),
687 escnonprint (host), escnonprint (cookie->domain));
688 xfree (cookie->domain);
695 /* The cookie doesn't set path: set it to the URL path, sans the
696 file part ("/dir/file" truncated to "/dir/"). */
697 char *trailing_slash = strrchr (path, '/');
699 cookie->path = strdupdelim (path, trailing_slash + 1);
701 /* no slash in the string -- can this even happen? */
702 cookie->path = xstrdup (path);
706 /* The cookie sets its own path; verify that it is legal. */
707 if (!check_path_match (cookie->path, path))
709 DEBUGP (("Attempt to fake the path: %s, %s\n",
710 cookie->path, path));
715 /* Now store the cookie, or discard an existing cookie, if
716 discarding was requested. */
718 if (cookie->discard_requested)
720 discard_matching_cookie (jar, cookie);
724 store_cookie (jar, cookie);
729 delete_cookie (cookie);
732 /* Support for sending out cookies in HTTP requests, based on
733 previously stored cookies. Entry point is
734 `build_cookies_request'. */
736 /* Return a count of how many times CHR occurs in STRING. */
739 count_char (const char *string, char chr)
743 for (p = string; *p; p++)
749 /* Find the cookie chains whose domains match HOST and store them to
752 A cookie chain is the head of a list of cookies that belong to a
753 host/domain. Given HOST "img.search.xemacs.org", this function
754 will return the chains for "img.search.xemacs.org",
755 "search.xemacs.org", and "xemacs.org" -- those of them that exist
758 DEST should be large enough to accept (in the worst case) as many
759 elements as there are domain components of HOST. */
762 find_chains_of_host (struct cookie_jar *jar, const char *host,
763 struct cookie *dest[])
768 /* Bail out quickly if there are no cookies in the jar. */
769 if (!hash_table_count (jar->chains))
772 if (numeric_address_p (host))
773 /* If host is an IP address, only check for the exact match. */
776 /* Otherwise, check all the subdomains except the top-level (last)
777 one. As a domain with N components has N-1 dots, the number of
778 passes equals the number of dots. */
779 passes = count_char (host, '.');
783 /* Find chains that match HOST, starting with exact match and
784 progressing to less specific domains. For instance, given HOST
785 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
786 srk.fer.hr's, then fer.hr's. */
789 struct cookie *chain = hash_table_get (jar->chains, host);
791 dest[dest_count++] = chain;
792 if (++passcnt >= passes)
794 host = strchr (host, '.') + 1;
800 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
804 path_matches (const char *full_path, const char *prefix)
806 int len = strlen (prefix);
808 if (0 != strncmp (full_path, prefix, len))
809 /* FULL_PATH doesn't begin with PREFIX. */
812 /* Length of PREFIX determines the quality of the match. */
816 /* Return true iff COOKIE matches the provided parameters of the URL
817 being downloaded: HOST, PORT, PATH, and SECFLAG.
819 If PATH_GOODNESS is non-NULL, store the "path goodness" value
820 there. That value is a measure of how closely COOKIE matches PATH,
821 used for ordering cookies. */
824 cookie_matches_url (const struct cookie *cookie,
825 const char *host, int port, const char *path,
826 bool secflag, int *path_goodness)
830 if (cookie_expired_p (cookie))
831 /* Ignore stale cookies. Don't bother unchaining the cookie at
832 this point -- Wget is a relatively short-lived application, and
833 stale cookies will not be saved by `save_cookies'. On the
834 other hand, this function should be as efficient as
838 if (cookie->secure && !secflag)
839 /* Don't transmit secure cookies over insecure connections. */
841 if (cookie->port != PORT_ANY && cookie->port != port)
844 /* If exact domain match is required, verify that cookie's domain is
845 equal to HOST. If not, assume success on the grounds of the
846 cookie's chain having been found by find_chains_of_host. */
847 if (cookie->domain_exact
848 && 0 != strcasecmp (host, cookie->domain))
851 pg = path_matches (path, cookie->path);
856 /* If the caller requested path_goodness, we return it. This is
857 an optimization, so that the caller doesn't need to call
858 path_matches() again. */
863 /* A structure that points to a cookie, along with the additional
864 information about the cookie's "goodness". This allows us to sort
865 the cookies when returning them to the server, as required by the
868 struct weighed_cookie {
869 struct cookie *cookie;
874 /* Comparator used for uniquifying the list. */
877 equality_comparator (const void *p1, const void *p2)
879 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
880 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
882 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
883 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
885 /* We only really care whether both name and value are equal. We
886 return them in this order only for consistency... */
887 return namecmp ? namecmp : valuecmp;
890 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
891 cookies with the same attr name and value. Whenever a duplicate
892 pair is found, one of the cookies is removed. */
895 eliminate_dups (struct weighed_cookie *outgoing, int count)
897 struct weighed_cookie *h; /* hare */
898 struct weighed_cookie *t; /* tortoise */
899 struct weighed_cookie *end = outgoing + count;
901 /* We deploy a simple uniquify algorithm: first sort the array
902 according to our sort criteria, then copy it to itself, comparing
903 each cookie to its neighbor and ignoring the duplicates. */
905 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
907 /* "Hare" runs through all the entries in the array, followed by
908 "tortoise". If a duplicate is found, the hare skips it.
909 Non-duplicate entries are copied to the tortoise ptr. */
911 for (h = t = outgoing; h < end; h++)
915 struct cookie *c0 = h[0].cookie;
916 struct cookie *c1 = h[1].cookie;
917 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
918 continue; /* ignore the duplicate */
921 /* If the hare has advanced past the tortoise (because of
922 previous dups), make sure the values get copied. Otherwise,
923 no copying is necessary. */
932 /* Comparator used for sorting by quality. */
935 goodness_comparator (const void *p1, const void *p2)
937 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
938 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
940 /* Subtractions take `wc2' as the first argument becauase we want a
941 sort in *decreasing* order of goodness. */
942 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
943 int pgdiff = wc2->path_goodness - wc1->path_goodness;
945 /* Sort by domain goodness; if these are the same, sort by path
946 goodness. (The sorting order isn't really specified; maybe it
947 should be the other way around.) */
948 return dgdiff ? dgdiff : pgdiff;
951 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
952 requests PATH from the server. The resulting string is allocated
953 with `malloc', and the caller is responsible for freeing it. If no
954 cookies pertain to this request, i.e. no cookie header should be
955 generated, NULL is returned. */
958 cookie_header (struct cookie_jar *jar, const char *host,
959 int port, const char *path, bool secflag)
961 struct cookie **chains;
964 struct cookie *cookie;
965 struct weighed_cookie *outgoing;
968 int result_size, pos;
969 PREPEND_SLASH (path); /* see cookie_handle_set_cookie */
971 /* First, find the cookie chains whose domains match HOST. */
973 /* Allocate room for find_chains_of_host to write to. The number of
974 chains can at most equal the number of subdomains, hence
975 1+<number of dots>. */
976 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
977 chain_count = find_chains_of_host (jar, host, chains);
979 /* No cookies for this host. */
983 cookies_now = time (NULL);
985 /* Now extract from the chains those cookies that match our host
986 (for domain_exact cookies), port (for cookies with port other
987 than PORT_ANY), etc. See matching_cookie for details. */
989 /* Count the number of matching cookies. */
991 for (i = 0; i < chain_count; i++)
992 for (cookie = chains[i]; cookie; cookie = cookie->next)
993 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
996 return NULL; /* no cookies matched */
998 /* Allocate the array. */
999 outgoing = alloca_array (struct weighed_cookie, count);
1001 /* Fill the array with all the matching cookies from the chains that
1004 for (i = 0; i < chain_count; i++)
1005 for (cookie = chains[i]; cookie; cookie = cookie->next)
1008 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1010 outgoing[ocnt].cookie = cookie;
1011 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1012 outgoing[ocnt].path_goodness = pg;
1015 assert (ocnt == count);
1017 /* Eliminate duplicate cookies; that is, those whose name and value
1019 count = eliminate_dups (outgoing, count);
1021 /* Sort the array so that best-matching domains come first, and
1022 that, within one domain, best-matching paths come first. */
1023 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1025 /* Count the space the name=value pairs will take. */
1027 for (i = 0; i < count; i++)
1029 struct cookie *c = outgoing[i].cookie;
1031 result_size += strlen (c->attr) + 1 + strlen (c->value);
1034 /* Allocate output buffer:
1035 name=value pairs -- result_size
1036 "; " separators -- (count - 1) * 2
1037 \0 terminator -- 1 */
1038 result_size = result_size + (count - 1) * 2 + 1;
1039 result = xmalloc (result_size);
1041 for (i = 0; i < count; i++)
1043 struct cookie *c = outgoing[i].cookie;
1044 int namlen = strlen (c->attr);
1045 int vallen = strlen (c->value);
1047 memcpy (result + pos, c->attr, namlen);
1049 result[pos++] = '=';
1050 memcpy (result + pos, c->value, vallen);
1054 result[pos++] = ';';
1055 result[pos++] = ' ';
1058 result[pos++] = '\0';
1059 assert (pos == result_size);
1063 /* Support for loading and saving cookies. The format used for
1064 loading and saving should be the format of the `cookies.txt' file
1065 used by Netscape and Mozilla, at least the Unix versions.
1066 (Apparently IE can export cookies in that format as well.) The
1067 format goes like this:
1069 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1071 DOMAIN -- cookie domain, optionally followed by :PORT
1072 DOMAIN-FLAG -- whether all hosts in the domain match
1074 SECURE-FLAG -- whether cookie requires secure connection
1075 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1076 ATTR-NAME -- name of the cookie attribute
1077 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1079 The fields are separated by TABs. All fields are mandatory, except
1080 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1081 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1082 whitespace only, and comment lines (beginning with # optionally
1083 preceded by whitespace) are ignored.
1085 Example line from cookies.txt (split in two lines for readability):
1087 .google.com TRUE / FALSE 2147368447 \
1088 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1092 /* If the region [B, E) ends with :<digits>, parse the number, return
1093 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1094 If port is not specified, return 0. */
1097 domain_port (const char *domain_b, const char *domain_e,
1098 const char **domain_e_ptr)
1102 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1105 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1106 port = 10 * port + (*p - '0');
1108 /* Garbage following port number. */
1110 *domain_e_ptr = colon;
1114 #define GET_WORD(p, b, e) do { \
1116 while (*p && *p != '\t') \
1119 if (b == e || !*p) \
1124 /* Load cookies from FILE. */
1127 cookie_jar_load (struct cookie_jar *jar, const char *file)
1130 FILE *fp = fopen (file, "r");
1133 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1134 file, strerror (errno));
1137 cookies_now = time (NULL);
1139 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1141 struct cookie *cookie;
1147 char *domain_b = NULL, *domain_e = NULL;
1148 char *domflag_b = NULL, *domflag_e = NULL;
1149 char *path_b = NULL, *path_e = NULL;
1150 char *secure_b = NULL, *secure_e = NULL;
1151 char *expires_b = NULL, *expires_e = NULL;
1152 char *name_b = NULL, *name_e = NULL;
1153 char *value_b = NULL, *value_e = NULL;
1155 /* Skip leading white-space. */
1156 while (*p && ISSPACE (*p))
1158 /* Ignore empty lines. */
1159 if (!*p || *p == '#')
1162 GET_WORD (p, domain_b, domain_e);
1163 GET_WORD (p, domflag_b, domflag_e);
1164 GET_WORD (p, path_b, path_e);
1165 GET_WORD (p, secure_b, secure_e);
1166 GET_WORD (p, expires_b, expires_e);
1167 GET_WORD (p, name_b, name_e);
1169 /* Don't use GET_WORD for value because it ends with newline,
1172 value_e = p + strlen (p);
1173 if (value_e > value_b && value_e[-1] == '\n')
1175 if (value_e > value_b && value_e[-1] == '\r')
1177 /* Empty values are legal (I think), so don't bother checking. */
1179 cookie = cookie_new ();
1181 cookie->attr = strdupdelim (name_b, name_e);
1182 cookie->value = strdupdelim (value_b, value_e);
1183 cookie->path = strdupdelim (path_b, path_e);
1184 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1186 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1187 value indicating if all machines within a given domain can
1188 access the variable. This value is set automatically by the
1189 browser, depending on the value set for the domain." */
1190 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1192 /* DOMAIN needs special treatment because we might need to
1193 extract the port. */
1194 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1196 cookie->port = port;
1198 if (*domain_b == '.')
1199 ++domain_b; /* remove leading dot internally */
1200 cookie->domain = strdupdelim (domain_b, domain_e);
1202 /* safe default in case EXPIRES field is garbled. */
1203 expiry = (double)cookies_now - 1;
1205 /* I don't like changing the line, but it's safe here. (line is
1208 sscanf (expires_b, "%lf", &expiry);
1212 /* EXPIRY can be 0 for session cookies saved because the
1213 user specified `--keep-session-cookies' in the past.
1214 They remain session cookies, and will be saved only if
1215 the user has specified `keep-session-cookies' again. */
1219 if (expiry < cookies_now)
1220 goto abort_cookie; /* ignore stale cookie. */
1221 cookie->expiry_time = expiry;
1222 cookie->permanent = 1;
1225 store_cookie (jar, cookie);
1231 delete_cookie (cookie);
1236 /* Save cookies, in format described above, to FILE. */
1239 cookie_jar_save (struct cookie_jar *jar, const char *file)
1242 hash_table_iterator iter;
1244 DEBUGP (("Saving cookies to %s.\n", file));
1246 cookies_now = time (NULL);
1248 fp = fopen (file, "w");
1251 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1252 file, strerror (errno));
1256 fputs ("# HTTP cookie file.\n", fp);
1257 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now));
1258 fputs ("# Edit at your own risk.\n\n", fp);
1260 for (hash_table_iterate (jar->chains, &iter);
1261 hash_table_iter_next (&iter);
1264 const char *domain = iter.key;
1265 struct cookie *cookie = iter.value;
1266 for (; cookie; cookie = cookie->next)
1268 if (!cookie->permanent && !opt.keep_session_cookies)
1270 if (cookie_expired_p (cookie))
1272 if (!cookie->domain_exact)
1275 if (cookie->port != PORT_ANY)
1276 fprintf (fp, ":%d", cookie->port);
1277 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1278 cookie->domain_exact ? "FALSE" : "TRUE",
1279 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1280 (double)cookie->expiry_time,
1281 cookie->attr, cookie->value);
1288 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1289 file, strerror (errno));
1290 if (fclose (fp) < 0)
1291 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1292 file, strerror (errno));
1294 DEBUGP (("Done saving cookies.\n"));
1297 /* Clean up cookie-related data. */
1300 cookie_jar_delete (struct cookie_jar *jar)
1302 /* Iterate over chains (indexed by domain) and free them. */
1303 hash_table_iterator iter;
1304 for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); )
1306 struct cookie *chain = iter.value;
1308 /* Then all cookies in this chain. */
1311 struct cookie *next = chain->next;
1312 delete_cookie (chain);
1316 hash_table_destroy (jar->chains);
1320 /* Test cases. Currently this is only tests parse_set_cookies. To
1321 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1328 /* Tests expected to succeed: */
1331 const char *results[10];
1333 { "arg=value", {"arg", "value", NULL} },
1334 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1335 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1336 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1337 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1338 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1339 { "arg=", {"arg", "", NULL} },
1340 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1341 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1344 /* Tests expected to fail: */
1345 static char *tests_fail[] = {
1347 "arg=\"unterminated",
1349 "arg1=;=another-empty-name",
1353 for (i = 0; i < countof (tests_succ); i++)
1356 const char *data = tests_succ[i].data;
1357 const char **expected = tests_succ[i].results;
1360 c = parse_set_cookie (data, true);
1363 printf ("NULL cookie returned for valid data: %s\n", data);
1367 /* Test whether extract_param handles these cases correctly. */
1369 param_token name, value;
1370 const char *ptr = data;
1372 while (extract_param (&ptr, &name, &value, ';'))
1374 char *n = strdupdelim (name.b, name.e);
1375 char *v = strdupdelim (value.b, value.e);
1378 printf ("Too many parameters for '%s'\n", data);
1381 if (0 != strcmp (expected[j], n))
1382 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1383 j / 2 + 1, data, expected[j], n);
1384 if (0 != strcmp (expected[j + 1], v))
1385 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1386 j / 2 + 1, data, expected[j + 1], v);
1392 printf ("Too few parameters for '%s'\n", data);
1396 for (i = 0; i < countof (tests_fail); i++)
1399 char *data = tests_fail[i];
1400 c = parse_set_cookie (data, true);
1402 printf ("Failed to report error on invalid data: %s\n", data);
1405 #endif /* TEST_COOKIES */