1 /* Support for cookies.
2 Copyright (C) 2001-2006 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software Foundation, Inc.,
18 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by the
31 cookie patch submitted by Tomasz Wegrzanowski.
33 This implements the client-side cookie support, as specified
34 (loosely) by Netscape's "preliminary specification", currently
37 http://wp.netscape.com/newsref/std/cookie_spec.html
39 rfc2109 is not supported because of its incompatibilities with the
40 above widely-used specification. rfc2965 is entirely ignored,
41 since popular client software doesn't implement it, and even the
42 sites that do send Set-Cookie2 also emit Set-Cookie for
58 #include "http.h" /* for http_atotm */
60 /* Declarations of `struct cookie' and the most basic functions. */
62 /* Cookie jar serves as cookie storage and a means of retrieving
63 cookies efficiently. All cookies with the same domain are stored
64 in a linked list called "chain". A cookie chain can be reached by
65 looking up the domain in the cookie jar's chains_by_domain table.
67 For example, to reach all the cookies under google.com, one must
68 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
69 course, when sending a cookie to `www.google.com', one must search
70 for cookies that belong to either `www.google.com' or `google.com'
71 -- but the point is that the code doesn't need to go through *all*
75 /* Cookie chains indexed by domain. */
76 struct hash_table *chains;
78 int cookie_count; /* number of cookies in the jar. */
81 /* Value set by entry point functions, so that the low-level
82 routines don't need to call time() all the time. */
83 static time_t cookies_now;
88 struct cookie_jar *jar = xnew (struct cookie_jar);
89 jar->chains = make_nocase_string_hash_table (0);
90 jar->cookie_count = 0;
95 char *domain; /* domain of the cookie */
96 int port; /* port number */
97 char *path; /* path prefix of the cookie */
99 unsigned discard_requested :1; /* whether cookie was created to
100 request discarding another
103 unsigned secure :1; /* whether cookie should be
104 transmitted over non-https
106 unsigned domain_exact :1; /* whether DOMAIN must match as a
109 unsigned permanent :1; /* whether the cookie should outlive
111 time_t expiry_time; /* time when the cookie expires, 0
112 means undetermined. */
114 char *attr; /* cookie attribute name */
115 char *value; /* cookie attribute value */
117 struct cookie *next; /* used for chaining of cookies in the
121 #define PORT_ANY (-1)
123 /* Allocate and return a new, empty cookie structure. */
125 static struct cookie *
128 struct cookie *cookie = xnew0 (struct cookie);
130 /* Both cookie->permanent and cookie->expiry_time are now 0. This
131 means that the cookie doesn't expire, but is only valid for this
132 session (i.e. not written out to disk). */
134 cookie->port = PORT_ANY;
138 /* Non-zero if the cookie has expired. Assumes cookies_now has been
139 set by one of the entry point functions. */
142 cookie_expired_p (const struct cookie *c)
144 return c->expiry_time != 0 && c->expiry_time < cookies_now;
147 /* Deallocate COOKIE and its components. */
150 delete_cookie (struct cookie *cookie)
152 xfree_null (cookie->domain);
153 xfree_null (cookie->path);
154 xfree_null (cookie->attr);
155 xfree_null (cookie->value);
159 /* Functions for storing cookies.
161 All cookies can be reached beginning with jar->chains. The key in
162 that table is the domain name, and the value is a linked list of
163 all cookies from that domain. Every new cookie is placed on the
166 /* Find and return a cookie in JAR whose domain, path, and attribute
167 name correspond to COOKIE. If found, PREVPTR will point to the
168 location of the cookie previous in chain, or NULL if the found
169 cookie is the head of a chain.
171 If no matching cookie is found, return NULL. */
173 static struct cookie *
174 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
175 struct cookie **prevptr)
177 struct cookie *chain, *prev;
179 chain = hash_table_get (jar->chains, cookie->domain);
184 for (; chain; prev = chain, chain = chain->next)
185 if (0 == strcmp (cookie->path, chain->path)
186 && 0 == strcmp (cookie->attr, chain->attr)
187 && cookie->port == chain->port)
198 /* Store COOKIE to the jar.
200 This is done by placing COOKIE at the head of its chain. However,
201 if COOKIE matches a cookie already in memory, as determined by
202 find_matching_cookie, the old cookie is unlinked and destroyed.
204 The key of each chain's hash table entry is allocated only the
205 first time; next hash_table_put's reuse the same key. */
208 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
210 struct cookie *chain_head;
213 if (hash_table_get_pair (jar->chains, cookie->domain,
214 &chain_key, &chain_head))
216 /* A chain of cookies in this domain already exists. Check for
217 duplicates -- if an extant cookie exactly matches our domain,
218 port, path, and name, replace it. */
220 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
224 /* Remove VICTIM from the chain. COOKIE will be placed at
228 prev->next = victim->next;
229 cookie->next = chain_head;
233 /* prev is NULL; apparently VICTIM was at the head of
234 the chain. This place will be taken by COOKIE, so
235 all we need to do is: */
236 cookie->next = victim->next;
238 delete_cookie (victim);
240 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
243 cookie->next = chain_head;
247 /* We are now creating the chain. Use a copy of cookie->domain
248 as the key for the life-time of the chain. Using
249 cookie->domain would be unsafe because the life-time of the
250 chain may exceed the life-time of the cookie. (Cookies may
251 be deleted from the chain by this very function.) */
253 chain_key = xstrdup (cookie->domain);
256 hash_table_put (jar->chains, chain_key, cookie);
261 time_t exptime = cookie->expiry_time;
262 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
263 cookie->domain, cookie->port,
264 cookie->port == PORT_ANY ? " (ANY)" : "",
266 cookie->permanent ? "permanent" : "session",
267 cookie->secure ? "secure" : "insecure",
268 cookie->expiry_time ? datetime_str (exptime) : "none",
269 cookie->attr, cookie->value));
273 /* Discard a cookie matching COOKIE's domain, port, path, and
274 attribute name. This gets called when we encounter a cookie whose
275 expiry date is in the past, or whose max-age is set to 0. The
276 former corresponds to netscape cookie spec, while the latter is
277 specified by rfc2109. */
280 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
282 struct cookie *prev, *victim;
284 if (!hash_table_count (jar->chains))
285 /* No elements == nothing to discard. */
288 victim = find_matching_cookie (jar, cookie, &prev);
292 /* Simply unchain the victim. */
293 prev->next = victim->next;
296 /* VICTIM was head of its chain. We need to place a new
297 cookie at the head. */
298 char *chain_key = NULL;
301 res = hash_table_get_pair (jar->chains, victim->domain,
306 /* VICTIM was the only cookie in the chain. Destroy the
307 chain and deallocate the chain key. */
308 hash_table_remove (jar->chains, victim->domain);
312 hash_table_put (jar->chains, chain_key, victim->next);
314 delete_cookie (victim);
315 DEBUGP (("Discarded old cookie.\n"));
319 /* Functions for parsing the `Set-Cookie' header, and creating new
320 cookies from the wire. */
322 #define TOKEN_IS(token, string_literal) \
323 BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal)
325 #define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e)
327 /* Parse the contents of the `Set-Cookie' header. The header looks
330 name1=value1; name2=value2; ...
332 Trailing semicolon is optional; spaces are allowed between all
333 tokens. Additionally, values may be quoted.
335 A new cookie is returned upon success, NULL otherwise.
337 The first name-value pair will be used to set the cookie's
338 attribute name and value. Subsequent parameters will be checked
339 against field names such as `domain', `path', etc. Recognized
340 fields will be parsed and the corresponding members of COOKIE
343 static struct cookie *
344 parse_set_cookie (const char *set_cookie, bool silent)
346 const char *ptr = set_cookie;
347 struct cookie *cookie = cookie_new ();
348 param_token name, value;
350 if (!extract_param (&ptr, &name, &value, ';'))
354 cookie->attr = strdupdelim (name.b, name.e);
355 cookie->value = strdupdelim (value.b, value.e);
357 while (extract_param (&ptr, &name, &value, ';'))
359 if (TOKEN_IS (name, "domain"))
361 if (!TOKEN_NON_EMPTY (value))
363 xfree_null (cookie->domain);
364 /* Strictly speaking, we should set cookie->domain_exact if the
365 domain doesn't begin with a dot. But many sites set the
366 domain to "foo.com" and expect "subhost.foo.com" to get the
367 cookie, and it apparently works in browsers. */
370 cookie->domain = strdupdelim (value.b, value.e);
372 else if (TOKEN_IS (name, "path"))
374 if (!TOKEN_NON_EMPTY (value))
376 xfree_null (cookie->path);
377 cookie->path = strdupdelim (value.b, value.e);
379 else if (TOKEN_IS (name, "expires"))
384 if (!TOKEN_NON_EMPTY (value))
386 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
388 expires = http_atotm (value_copy);
389 if (expires != (time_t) -1)
391 cookie->permanent = 1;
392 cookie->expiry_time = expires;
395 /* Error in expiration spec. Assume default (cookie doesn't
396 expire, but valid only for this session.) */
399 /* According to netscape's specification, expiry time in the
400 past means that discarding of a matching cookie is
402 if (cookie->expiry_time < cookies_now)
403 cookie->discard_requested = 1;
405 else if (TOKEN_IS (name, "max-age"))
410 if (!TOKEN_NON_EMPTY (value))
412 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
414 sscanf (value_copy, "%lf", &maxage);
416 /* something went wrong. */
418 cookie->permanent = 1;
419 cookie->expiry_time = cookies_now + maxage;
421 /* According to rfc2109, a cookie with max-age of 0 means that
422 discarding of a matching cookie is requested. */
424 cookie->discard_requested = 1;
426 else if (TOKEN_IS (name, "secure"))
428 /* ignore value completely */
432 /* Ignore unrecognized attribute. */
436 /* extract_param has encountered a syntax error */
439 /* The cookie has been successfully constructed; return it. */
444 logprintf (LOG_NOTQUIET,
445 _("Syntax error in Set-Cookie: %s at position %d.\n"),
446 escnonprint (set_cookie), (int) (ptr - set_cookie));
447 delete_cookie (cookie);
452 #undef TOKEN_NON_EMPTY
454 /* Sanity checks. These are important, otherwise it is possible for
455 mailcious attackers to destroy important cookie information and/or
456 violate your privacy. */
459 #define REQUIRE_DIGITS(p) do { \
462 for (++p; ISDIGIT (*p); p++) \
466 #define REQUIRE_DOT(p) do { \
471 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
473 We don't want to call network functions like inet_addr() because
474 all we need is a check, preferrably one that is small, fast, and
478 numeric_address_p (const char *addr)
480 const char *p = addr;
482 REQUIRE_DIGITS (p); /* A */
483 REQUIRE_DOT (p); /* . */
484 REQUIRE_DIGITS (p); /* B */
485 REQUIRE_DOT (p); /* . */
486 REQUIRE_DIGITS (p); /* C */
487 REQUIRE_DOT (p); /* . */
488 REQUIRE_DIGITS (p); /* D */
495 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
496 Originally I tried to make the check compliant with rfc2109, but
497 the sites deviated too often, so I had to fall back to "tail
498 matching", as defined by the original Netscape's cookie spec. */
501 check_domain_match (const char *cookie_domain, const char *host)
505 /* Numeric address requires exact match. It also requires HOST to
507 if (numeric_address_p (cookie_domain))
508 return 0 == strcmp (cookie_domain, host);
512 /* For the sake of efficiency, check for exact match first. */
513 if (0 == strcasecmp (cookie_domain, host))
518 /* HOST must match the tail of cookie_domain. */
519 if (!match_tail (host, cookie_domain, true))
522 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
523 make sure that somebody is not trying to set the cookie for a
524 subdomain shared by many entities. For example, "company.co.uk"
525 must not be allowed to set a cookie for ".co.uk". On the other
526 hand, "sso.redhat.de" should be able to set a cookie for
529 The only marginally sane way to handle this I can think of is to
530 reject on the basis of the length of the second-level domain name
531 (but when the top-level domain is unknown), with the assumption
532 that those of three or less characters could be reserved. For
535 .co.org -> works because the TLD is known
536 .co.uk -> doesn't work because "co" is only two chars long
537 .com.au -> doesn't work because "com" is only 3 chars long
538 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
539 .cnn.de -> doesn't work for the same reason (ugh!!)
540 .abcd.de -> works because "abcd" is 4 chars long
541 .img.cnn.de -> works because it's not trying to set the 2nd level domain
542 .cnn.co.uk -> works for the same reason
544 That should prevent misuse, while allowing reasonable usage. If
545 someone knows of a better way to handle this, please let me
548 const char *p = cookie_domain;
549 int dccount = 1; /* number of domain components */
550 int ldcl = 0; /* last domain component length */
551 int nldcl = 0; /* next to last domain component length */
554 /* Ignore leading period in this calculation. */
557 for (out = 0; !out; p++)
565 /* Empty domain component found -- the domain is invalid. */
567 if (*(p + 1) == '\0')
569 /* Tolerate trailing '.' by not treating the domain as
570 one ending with an empty domain component. */
592 int known_toplevel = false;
593 static const char *known_toplevel_domains[] = {
594 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
596 for (i = 0; i < countof (known_toplevel_domains); i++)
597 if (match_tail (cookie_domain, known_toplevel_domains[i], true))
599 known_toplevel = true;
602 if (!known_toplevel && nldcl <= 3)
609 /* Don't allow the host "foobar.com" to set a cookie for domain
611 if (*cookie_domain != '.')
613 int dlen = strlen (cookie_domain);
614 int hlen = strlen (host);
615 /* cookie host: hostname.foobar.com */
616 /* desired domain: bar.com */
617 /* '.' must be here in host-> ^ */
618 if (hlen > dlen && host[hlen - dlen - 1] != '.')
627 static int path_matches (const char *, const char *);
629 /* Check whether PATH begins with COOKIE_PATH. */
632 check_path_match (const char *cookie_path, const char *path)
634 return path_matches (path, cookie_path) != 0;
637 /* Prepend '/' to string S. S is copied to fresh stack-allocated
638 space and its value is modified to point to the new location. */
640 #define PREPEND_SLASH(s) do { \
641 char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \
643 strcpy (PS_newstr + 1, s); \
648 /* Process the HTTP `Set-Cookie' header. This results in storing the
649 cookie or discarding a matching one, or ignoring it completely, all
650 depending on the contents. */
653 cookie_handle_set_cookie (struct cookie_jar *jar,
654 const char *host, int port,
655 const char *path, const char *set_cookie)
657 struct cookie *cookie;
658 cookies_now = time (NULL);
660 /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
661 usage assumes /-prefixed paths. Until the rest of Wget is fixed,
662 simply prepend slash to PATH. */
663 PREPEND_SLASH (path);
665 cookie = parse_set_cookie (set_cookie, false);
669 /* Sanitize parts of cookie. */
674 /* If the domain was not provided, we use the one we're talking
675 to, and set exact match. */
676 cookie->domain = xstrdup (host);
677 cookie->domain_exact = 1;
678 /* Set the port, but only if it's non-default. */
679 if (port != 80 && port != 443)
684 if (!check_domain_match (cookie->domain, host))
686 logprintf (LOG_NOTQUIET,
687 _("Cookie coming from %s attempted to set domain to %s\n"),
688 escnonprint (host), escnonprint (cookie->domain));
689 xfree (cookie->domain);
696 /* The cookie doesn't set path: set it to the URL path, sans the
697 file part ("/dir/file" truncated to "/dir/"). */
698 char *trailing_slash = strrchr (path, '/');
700 cookie->path = strdupdelim (path, trailing_slash + 1);
702 /* no slash in the string -- can this even happen? */
703 cookie->path = xstrdup (path);
707 /* The cookie sets its own path; verify that it is legal. */
708 if (!check_path_match (cookie->path, path))
710 DEBUGP (("Attempt to fake the path: %s, %s\n",
711 cookie->path, path));
716 /* Now store the cookie, or discard an existing cookie, if
717 discarding was requested. */
719 if (cookie->discard_requested)
721 discard_matching_cookie (jar, cookie);
725 store_cookie (jar, cookie);
730 delete_cookie (cookie);
733 /* Support for sending out cookies in HTTP requests, based on
734 previously stored cookies. Entry point is
735 `build_cookies_request'. */
737 /* Return a count of how many times CHR occurs in STRING. */
740 count_char (const char *string, char chr)
744 for (p = string; *p; p++)
750 /* Find the cookie chains whose domains match HOST and store them to
753 A cookie chain is the head of a list of cookies that belong to a
754 host/domain. Given HOST "img.search.xemacs.org", this function
755 will return the chains for "img.search.xemacs.org",
756 "search.xemacs.org", and "xemacs.org" -- those of them that exist
759 DEST should be large enough to accept (in the worst case) as many
760 elements as there are domain components of HOST. */
763 find_chains_of_host (struct cookie_jar *jar, const char *host,
764 struct cookie *dest[])
769 /* Bail out quickly if there are no cookies in the jar. */
770 if (!hash_table_count (jar->chains))
773 if (numeric_address_p (host))
774 /* If host is an IP address, only check for the exact match. */
777 /* Otherwise, check all the subdomains except the top-level (last)
778 one. As a domain with N components has N-1 dots, the number of
779 passes equals the number of dots. */
780 passes = count_char (host, '.');
784 /* Find chains that match HOST, starting with exact match and
785 progressing to less specific domains. For instance, given HOST
786 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
787 srk.fer.hr's, then fer.hr's. */
790 struct cookie *chain = hash_table_get (jar->chains, host);
792 dest[dest_count++] = chain;
793 if (++passcnt >= passes)
795 host = strchr (host, '.') + 1;
801 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
805 path_matches (const char *full_path, const char *prefix)
807 int len = strlen (prefix);
809 if (0 != strncmp (full_path, prefix, len))
810 /* FULL_PATH doesn't begin with PREFIX. */
813 /* Length of PREFIX determines the quality of the match. */
817 /* Return true iff COOKIE matches the provided parameters of the URL
818 being downloaded: HOST, PORT, PATH, and SECFLAG.
820 If PATH_GOODNESS is non-NULL, store the "path goodness" value
821 there. That value is a measure of how closely COOKIE matches PATH,
822 used for ordering cookies. */
825 cookie_matches_url (const struct cookie *cookie,
826 const char *host, int port, const char *path,
827 bool secflag, int *path_goodness)
831 if (cookie_expired_p (cookie))
832 /* Ignore stale cookies. Don't bother unchaining the cookie at
833 this point -- Wget is a relatively short-lived application, and
834 stale cookies will not be saved by `save_cookies'. On the
835 other hand, this function should be as efficient as
839 if (cookie->secure && !secflag)
840 /* Don't transmit secure cookies over insecure connections. */
842 if (cookie->port != PORT_ANY && cookie->port != port)
845 /* If exact domain match is required, verify that cookie's domain is
846 equal to HOST. If not, assume success on the grounds of the
847 cookie's chain having been found by find_chains_of_host. */
848 if (cookie->domain_exact
849 && 0 != strcasecmp (host, cookie->domain))
852 pg = path_matches (path, cookie->path);
857 /* If the caller requested path_goodness, we return it. This is
858 an optimization, so that the caller doesn't need to call
859 path_matches() again. */
864 /* A structure that points to a cookie, along with the additional
865 information about the cookie's "goodness". This allows us to sort
866 the cookies when returning them to the server, as required by the
869 struct weighed_cookie {
870 struct cookie *cookie;
875 /* Comparator used for uniquifying the list. */
878 equality_comparator (const void *p1, const void *p2)
880 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
881 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
883 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
884 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
886 /* We only really care whether both name and value are equal. We
887 return them in this order only for consistency... */
888 return namecmp ? namecmp : valuecmp;
891 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
892 cookies with the same attr name and value. Whenever a duplicate
893 pair is found, one of the cookies is removed. */
896 eliminate_dups (struct weighed_cookie *outgoing, int count)
898 struct weighed_cookie *h; /* hare */
899 struct weighed_cookie *t; /* tortoise */
900 struct weighed_cookie *end = outgoing + count;
902 /* We deploy a simple uniquify algorithm: first sort the array
903 according to our sort criteria, then copy it to itself, comparing
904 each cookie to its neighbor and ignoring the duplicates. */
906 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
908 /* "Hare" runs through all the entries in the array, followed by
909 "tortoise". If a duplicate is found, the hare skips it.
910 Non-duplicate entries are copied to the tortoise ptr. */
912 for (h = t = outgoing; h < end; h++)
916 struct cookie *c0 = h[0].cookie;
917 struct cookie *c1 = h[1].cookie;
918 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
919 continue; /* ignore the duplicate */
922 /* If the hare has advanced past the tortoise (because of
923 previous dups), make sure the values get copied. Otherwise,
924 no copying is necessary. */
933 /* Comparator used for sorting by quality. */
936 goodness_comparator (const void *p1, const void *p2)
938 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
939 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
941 /* Subtractions take `wc2' as the first argument becauase we want a
942 sort in *decreasing* order of goodness. */
943 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
944 int pgdiff = wc2->path_goodness - wc1->path_goodness;
946 /* Sort by domain goodness; if these are the same, sort by path
947 goodness. (The sorting order isn't really specified; maybe it
948 should be the other way around.) */
949 return dgdiff ? dgdiff : pgdiff;
952 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
953 requests PATH from the server. The resulting string is allocated
954 with `malloc', and the caller is responsible for freeing it. If no
955 cookies pertain to this request, i.e. no cookie header should be
956 generated, NULL is returned. */
959 cookie_header (struct cookie_jar *jar, const char *host,
960 int port, const char *path, bool secflag)
962 struct cookie **chains;
965 struct cookie *cookie;
966 struct weighed_cookie *outgoing;
969 int result_size, pos;
970 PREPEND_SLASH (path); /* see cookie_handle_set_cookie */
972 /* First, find the cookie chains whose domains match HOST. */
974 /* Allocate room for find_chains_of_host to write to. The number of
975 chains can at most equal the number of subdomains, hence
976 1+<number of dots>. */
977 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
978 chain_count = find_chains_of_host (jar, host, chains);
980 /* No cookies for this host. */
984 cookies_now = time (NULL);
986 /* Now extract from the chains those cookies that match our host
987 (for domain_exact cookies), port (for cookies with port other
988 than PORT_ANY), etc. See matching_cookie for details. */
990 /* Count the number of matching cookies. */
992 for (i = 0; i < chain_count; i++)
993 for (cookie = chains[i]; cookie; cookie = cookie->next)
994 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
997 return NULL; /* no cookies matched */
999 /* Allocate the array. */
1000 outgoing = alloca_array (struct weighed_cookie, count);
1002 /* Fill the array with all the matching cookies from the chains that
1005 for (i = 0; i < chain_count; i++)
1006 for (cookie = chains[i]; cookie; cookie = cookie->next)
1009 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1011 outgoing[ocnt].cookie = cookie;
1012 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1013 outgoing[ocnt].path_goodness = pg;
1016 assert (ocnt == count);
1018 /* Eliminate duplicate cookies; that is, those whose name and value
1020 count = eliminate_dups (outgoing, count);
1022 /* Sort the array so that best-matching domains come first, and
1023 that, within one domain, best-matching paths come first. */
1024 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1026 /* Count the space the name=value pairs will take. */
1028 for (i = 0; i < count; i++)
1030 struct cookie *c = outgoing[i].cookie;
1032 result_size += strlen (c->attr) + 1 + strlen (c->value);
1035 /* Allocate output buffer:
1036 name=value pairs -- result_size
1037 "; " separators -- (count - 1) * 2
1038 \0 terminator -- 1 */
1039 result_size = result_size + (count - 1) * 2 + 1;
1040 result = xmalloc (result_size);
1042 for (i = 0; i < count; i++)
1044 struct cookie *c = outgoing[i].cookie;
1045 int namlen = strlen (c->attr);
1046 int vallen = strlen (c->value);
1048 memcpy (result + pos, c->attr, namlen);
1050 result[pos++] = '=';
1051 memcpy (result + pos, c->value, vallen);
1055 result[pos++] = ';';
1056 result[pos++] = ' ';
1059 result[pos++] = '\0';
1060 assert (pos == result_size);
1064 /* Support for loading and saving cookies. The format used for
1065 loading and saving should be the format of the `cookies.txt' file
1066 used by Netscape and Mozilla, at least the Unix versions.
1067 (Apparently IE can export cookies in that format as well.) The
1068 format goes like this:
1070 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1072 DOMAIN -- cookie domain, optionally followed by :PORT
1073 DOMAIN-FLAG -- whether all hosts in the domain match
1075 SECURE-FLAG -- whether cookie requires secure connection
1076 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1077 ATTR-NAME -- name of the cookie attribute
1078 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1080 The fields are separated by TABs. All fields are mandatory, except
1081 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1082 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1083 whitespace only, and comment lines (beginning with # optionally
1084 preceded by whitespace) are ignored.
1086 Example line from cookies.txt (split in two lines for readability):
1088 .google.com TRUE / FALSE 2147368447 \
1089 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1093 /* If the region [B, E) ends with :<digits>, parse the number, return
1094 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1095 If port is not specified, return 0. */
1098 domain_port (const char *domain_b, const char *domain_e,
1099 const char **domain_e_ptr)
1103 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1106 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1107 port = 10 * port + (*p - '0');
1109 /* Garbage following port number. */
1111 *domain_e_ptr = colon;
1115 #define GET_WORD(p, b, e) do { \
1117 while (*p && *p != '\t') \
1120 if (b == e || !*p) \
1125 /* Load cookies from FILE. */
1128 cookie_jar_load (struct cookie_jar *jar, const char *file)
1131 FILE *fp = fopen (file, "r");
1134 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1135 file, strerror (errno));
1138 cookies_now = time (NULL);
1140 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1142 struct cookie *cookie;
1148 char *domain_b = NULL, *domain_e = NULL;
1149 char *domflag_b = NULL, *domflag_e = NULL;
1150 char *path_b = NULL, *path_e = NULL;
1151 char *secure_b = NULL, *secure_e = NULL;
1152 char *expires_b = NULL, *expires_e = NULL;
1153 char *name_b = NULL, *name_e = NULL;
1154 char *value_b = NULL, *value_e = NULL;
1156 /* Skip leading white-space. */
1157 while (*p && ISSPACE (*p))
1159 /* Ignore empty lines. */
1160 if (!*p || *p == '#')
1163 GET_WORD (p, domain_b, domain_e);
1164 GET_WORD (p, domflag_b, domflag_e);
1165 GET_WORD (p, path_b, path_e);
1166 GET_WORD (p, secure_b, secure_e);
1167 GET_WORD (p, expires_b, expires_e);
1168 GET_WORD (p, name_b, name_e);
1170 /* Don't use GET_WORD for value because it ends with newline,
1173 value_e = p + strlen (p);
1174 if (value_e > value_b && value_e[-1] == '\n')
1176 if (value_e > value_b && value_e[-1] == '\r')
1178 /* Empty values are legal (I think), so don't bother checking. */
1180 cookie = cookie_new ();
1182 cookie->attr = strdupdelim (name_b, name_e);
1183 cookie->value = strdupdelim (value_b, value_e);
1184 cookie->path = strdupdelim (path_b, path_e);
1185 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1187 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1188 value indicating if all machines within a given domain can
1189 access the variable. This value is set automatically by the
1190 browser, depending on the value set for the domain." */
1191 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1193 /* DOMAIN needs special treatment because we might need to
1194 extract the port. */
1195 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1197 cookie->port = port;
1199 if (*domain_b == '.')
1200 ++domain_b; /* remove leading dot internally */
1201 cookie->domain = strdupdelim (domain_b, domain_e);
1203 /* safe default in case EXPIRES field is garbled. */
1204 expiry = (double)cookies_now - 1;
1206 /* I don't like changing the line, but it's safe here. (line is
1209 sscanf (expires_b, "%lf", &expiry);
1213 /* EXPIRY can be 0 for session cookies saved because the
1214 user specified `--keep-session-cookies' in the past.
1215 They remain session cookies, and will be saved only if
1216 the user has specified `keep-session-cookies' again. */
1220 if (expiry < cookies_now)
1221 goto abort_cookie; /* ignore stale cookie. */
1222 cookie->expiry_time = expiry;
1223 cookie->permanent = 1;
1226 store_cookie (jar, cookie);
1232 delete_cookie (cookie);
1237 /* Save cookies, in format described above, to FILE. */
1240 cookie_jar_save (struct cookie_jar *jar, const char *file)
1243 hash_table_iterator iter;
1245 DEBUGP (("Saving cookies to %s.\n", file));
1247 cookies_now = time (NULL);
1249 fp = fopen (file, "w");
1252 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1253 file, strerror (errno));
1257 fputs ("# HTTP cookie file.\n", fp);
1258 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now));
1259 fputs ("# Edit at your own risk.\n\n", fp);
1261 for (hash_table_iterate (jar->chains, &iter);
1262 hash_table_iter_next (&iter);
1265 const char *domain = iter.key;
1266 struct cookie *cookie = iter.value;
1267 for (; cookie; cookie = cookie->next)
1269 if (!cookie->permanent && !opt.keep_session_cookies)
1271 if (cookie_expired_p (cookie))
1273 if (!cookie->domain_exact)
1276 if (cookie->port != PORT_ANY)
1277 fprintf (fp, ":%d", cookie->port);
1278 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1279 cookie->domain_exact ? "FALSE" : "TRUE",
1280 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1281 (double)cookie->expiry_time,
1282 cookie->attr, cookie->value);
1289 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1290 file, strerror (errno));
1291 if (fclose (fp) < 0)
1292 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1293 file, strerror (errno));
1295 DEBUGP (("Done saving cookies.\n"));
1298 /* Clean up cookie-related data. */
1301 cookie_jar_delete (struct cookie_jar *jar)
1303 /* Iterate over chains (indexed by domain) and free them. */
1304 hash_table_iterator iter;
1305 for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); )
1307 struct cookie *chain = iter.value;
1309 /* Then all cookies in this chain. */
1312 struct cookie *next = chain->next;
1313 delete_cookie (chain);
1317 hash_table_destroy (jar->chains);
1321 /* Test cases. Currently this is only tests parse_set_cookies. To
1322 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1329 /* Tests expected to succeed: */
1332 const char *results[10];
1334 { "arg=value", {"arg", "value", NULL} },
1335 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1336 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1337 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1338 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1339 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1340 { "arg=", {"arg", "", NULL} },
1341 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1342 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1345 /* Tests expected to fail: */
1346 static char *tests_fail[] = {
1348 "arg=\"unterminated",
1350 "arg1=;=another-empty-name",
1354 for (i = 0; i < countof (tests_succ); i++)
1357 const char *data = tests_succ[i].data;
1358 const char **expected = tests_succ[i].results;
1361 c = parse_set_cookie (data, true);
1364 printf ("NULL cookie returned for valid data: %s\n", data);
1368 /* Test whether extract_param handles these cases correctly. */
1370 param_token name, value;
1371 const char *ptr = data;
1373 while (extract_param (&ptr, &name, &value, ';'))
1375 char *n = strdupdelim (name.b, name.e);
1376 char *v = strdupdelim (value.b, value.e);
1379 printf ("Too many parameters for '%s'\n", data);
1382 if (0 != strcmp (expected[j], n))
1383 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1384 j / 2 + 1, data, expected[j], n);
1385 if (0 != strcmp (expected[j + 1], v))
1386 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1387 j / 2 + 1, data, expected[j + 1], v);
1393 printf ("Too few parameters for '%s'\n", data);
1397 for (i = 0; i < countof (tests_fail); i++)
1400 char *data = tests_fail[i];
1401 c = parse_set_cookie (data, true);
1403 printf ("Failed to report error on invalid data: %s\n", data);
1406 #endif /* TEST_COOKIES */