1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
31 code submitted by Tomasz Wegrzanowski.
33 TODO: Implement limits on cookie-related sizes, such as max. cookie
34 size, max. number of cookies, etc. Add more "cookie jar" methods,
35 such as methods to over stored cookies, to clear temporary cookies,
36 to perform intelligent auto-saving, etc. Ultimately support
37 `Set-Cookie2' and `Cookie2' headers. */
56 /* This should *really* be in a .h file! */
57 time_t http_atotm PARAMS ((const char *));
59 /* Declarations of `struct cookie' and the most basic functions. */
62 /* Hash table that maps domain names to cookie chains. A "cookie
63 chain" is a linked list of cookies that belong to the same
65 struct hash_table *chains_by_domain;
67 int cookie_count; /* number of cookies in the jar. */
70 /* Value set by entry point functions, so that the low-level
71 routines don't need to call time() all the time. */
77 struct cookie_jar *jar = xmalloc (sizeof (struct cookie_jar));
78 jar->chains_by_domain = make_nocase_string_hash_table (0);
79 jar->cookie_count = 0;
84 char *domain; /* domain of the cookie */
85 int port; /* port number */
86 char *path; /* path prefix of the cookie */
88 int secure; /* whether cookie should be
89 transmitted over non-https
91 int domain_exact; /* whether DOMAIN must match as a
94 int permanent; /* whether the cookie should outlive
96 time_t expiry_time; /* time when the cookie expires */
98 int discard_requested; /* whether cookie was created to
99 request discarding another
102 char *attr; /* cookie attribute name */
103 char *value; /* cookie attribute value */
105 struct cookie_jar *jar; /* pointer back to the cookie jar, for
107 struct cookie *next; /* used for chaining of cookies in the
111 #define PORT_ANY (-1)
112 #define COOKIE_EXPIRED_P(c) ((c)->expiry_time != 0 && (c)->expiry_time < cookies_now)
114 /* Allocate and return a new, empty cookie structure. */
116 static struct cookie *
119 struct cookie *cookie = xmalloc (sizeof (struct cookie));
120 memset (cookie, '\0', sizeof (struct cookie));
122 /* Both cookie->permanent and cookie->expiry_time are now 0. By
123 default, we assume that the cookie is non-permanent and valid
124 until the end of the session. */
126 cookie->port = PORT_ANY;
130 /* Deallocate COOKIE and its components. */
133 delete_cookie (struct cookie *cookie)
135 FREE_MAYBE (cookie->domain);
136 FREE_MAYBE (cookie->path);
137 FREE_MAYBE (cookie->attr);
138 FREE_MAYBE (cookie->value);
142 /* Functions for storing cookies.
144 All cookies can be reached beginning with jar->chains_by_domain.
145 The key in that table is the domain name, and the value is a linked
146 list of all cookies from that domain. Every new cookie is placed
147 on the head of the list. */
149 /* Find and return a cookie in JAR whose domain, path, and attribute
150 name correspond to COOKIE. If found, PREVPTR will point to the
151 location of the cookie previous in chain, or NULL if the found
152 cookie is the head of a chain.
154 If no matching cookie is found, return NULL. */
156 static struct cookie *
157 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
158 struct cookie **prevptr)
160 struct cookie *chain, *prev;
162 chain = hash_table_get (jar->chains_by_domain, cookie->domain);
167 for (; chain; prev = chain, chain = chain->next)
168 if (0 == strcmp (cookie->path, chain->path)
169 && 0 == strcmp (cookie->attr, chain->attr)
170 && cookie->port == chain->port)
181 /* Store COOKIE to the jar.
183 This is done by placing COOKIE at the head of its chain. However,
184 if COOKIE matches a cookie already in memory, as determined by
185 find_matching_cookie, the old cookie is unlinked and destroyed.
187 The key of each chain's hash table entry is allocated only the
188 first time; next hash_table_put's reuse the same key. */
191 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
193 struct cookie *chain_head;
196 if (hash_table_get_pair (jar->chains_by_domain, cookie->domain,
197 &chain_key, &chain_head))
199 /* A chain of cookies in this domain already exists. Check for
200 duplicates -- if an extant cookie exactly matches our domain,
201 port, path, and name, replace it. */
203 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
207 /* Remove VICTIM from the chain. COOKIE will be placed at
211 prev->next = victim->next;
212 cookie->next = chain_head;
216 /* prev is NULL; apparently VICTIM was at the head of
217 the chain. This place will be taken by COOKIE, so
218 all we need to do is: */
219 cookie->next = victim->next;
221 delete_cookie (victim);
223 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
226 cookie->next = chain_head;
230 /* We are now creating the chain. Allocate the string that will
231 be used as a key. It is unsafe to use cookie->domain for
232 that, because it might get deallocated by the above code at
235 chain_key = xstrdup (cookie->domain);
238 hash_table_put (jar->chains_by_domain, chain_key, cookie);
241 DEBUGP (("\nStored cookie %s %d%s %s %s %d %s %s %s\n",
242 cookie->domain, cookie->port,
243 cookie->port == PORT_ANY ? " (ANY)" : "",
245 cookie->permanent ? "permanent" : "nonpermanent",
248 ? asctime (localtime (&cookie->expiry_time)) : "<undefined>",
249 cookie->attr, cookie->value));
252 /* Discard a cookie matching COOKIE's domain, port, path, and
253 attribute name. This gets called when we encounter a cookie whose
254 expiry date is in the past, or whose max-age is set to 0. The
255 former corresponds to netscape cookie spec, while the latter is
256 specified by rfc2109. */
259 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
261 struct cookie *prev, *victim;
263 if (!hash_table_count (jar->chains_by_domain))
264 /* No elements == nothing to discard. */
267 victim = find_matching_cookie (jar, cookie, &prev);
271 /* Simply unchain the victim. */
272 prev->next = victim->next;
275 /* VICTIM was head of its chain. We need to place a new
276 cookie at the head. */
277 char *chain_key = NULL;
280 res = hash_table_get_pair (jar->chains_by_domain, victim->domain,
285 /* VICTIM was the only cookie in the chain. Destroy the
286 chain and deallocate the chain key. */
287 hash_table_remove (jar->chains_by_domain, victim->domain);
291 hash_table_put (jar->chains_by_domain, chain_key, victim->next);
293 delete_cookie (victim);
294 DEBUGP (("Discarded old cookie.\n"));
298 /* Functions for parsing the `Set-Cookie' header, and creating new
299 cookies from the wire. */
302 #define NAME_IS(string_literal) \
303 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
305 #define VALUE_EXISTS (value_b && value_e)
307 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
309 /* Update the appropriate cookie field. [name_b, name_e) are expected
310 to delimit the attribute name, while [value_b, value_e) (optional)
311 should delimit the attribute value.
313 When called the first time, it will set the cookie's attribute name
314 and value. After that, it will check the attribute name for
315 special fields such as `domain', `path', etc. Where appropriate,
316 it will parse the values of the fields it recognizes and fill the
317 corresponding fields in COOKIE.
319 Returns 1 on success. Returns zero in case a syntax error is
320 found; such a cookie should be discarded. */
323 update_cookie_field (struct cookie *cookie,
324 const char *name_b, const char *name_e,
325 const char *value_b, const char *value_e)
327 assert (name_b != NULL && name_e != NULL);
333 cookie->attr = strdupdelim (name_b, name_e);
334 cookie->value = strdupdelim (value_b, value_e);
338 if (NAME_IS ("domain"))
340 if (!VALUE_NON_EMPTY)
342 FREE_MAYBE (cookie->domain);
343 /* Strictly speaking, we should set cookie->domain_exact if the
344 domain doesn't begin with a dot. But many sites set the
345 domain to "foo.com" and expect "subhost.foo.com" to get the
346 cookie, and it apparently works. */
349 cookie->domain = strdupdelim (value_b, value_e);
352 else if (NAME_IS ("path"))
354 if (!VALUE_NON_EMPTY)
356 FREE_MAYBE (cookie->path);
357 cookie->path = strdupdelim (value_b, value_e);
360 else if (NAME_IS ("expires"))
365 if (!VALUE_NON_EMPTY)
367 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
369 expires = http_atotm (value_copy);
372 cookie->permanent = 1;
373 cookie->expiry_time = (time_t)expires;
376 /* Error in expiration spec. Assume default (cookie valid for
380 /* According to netscape's specification, expiry time in the
381 past means that discarding of a matching cookie is
383 if (cookie->expiry_time < cookies_now)
384 cookie->discard_requested = 1;
388 else if (NAME_IS ("max-age"))
393 if (!VALUE_NON_EMPTY)
395 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
397 sscanf (value_copy, "%lf", &maxage);
399 /* something went wrong. */
401 cookie->permanent = 1;
402 cookie->expiry_time = cookies_now + maxage;
404 /* According to rfc2109, a cookie with max-age of 0 means that
405 discarding of a matching cookie is requested. */
407 cookie->discard_requested = 1;
411 else if (NAME_IS ("secure"))
413 /* ignore value completely */
418 /* Unrecognized attribute; ignore it. */
424 /* Returns non-zero for characters that are legal in the name of an
425 attribute. This used to allow only alphanumerics, '-', and '_',
426 but we need to be more lenient because a number of sites wants to
427 use weirder attribute names. rfc2965 "informally specifies"
428 attribute name (token) as "a sequence of non-special, non-white
429 space characters". So we allow everything except the stuff we know
432 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
433 && (c) != '"' && (c) != '=' \
434 && (c) != ';' && (c) != ',')
436 /* Parse the contents of the `Set-Cookie' header. The header looks
439 name1=value1; name2=value2; ...
441 Trailing semicolon is optional; spaces are allowed between all
442 tokens. Additionally, values may be quoted.
444 A new cookie is returned upon success, NULL otherwise. The
445 specified CALLBACK function (normally `update_cookie_field' is used
446 to update the fields of the newly created cookie structure. */
448 static struct cookie *
449 parse_set_cookies (const char *sc,
450 int (*callback) (struct cookie *,
451 const char *, const char *,
452 const char *, const char *),
455 struct cookie *cookie = cookie_new ();
457 /* #### Hand-written DFAs are no fun to debug. We'de be better off
458 to rewrite this as an inline parser. */
460 enum { S_START, S_NAME, S_NAME_POST,
461 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
462 S_ATTR_ACTION, S_DONE, S_ERROR
468 const char *name_b = NULL, *name_e = NULL;
469 const char *value_b = NULL, *value_e = NULL;
473 while (state != S_DONE && state != S_ERROR)
480 else if (ISSPACE (c))
481 /* Strip all whitespace preceding the name. */
483 else if (ATTR_NAME_CHAR (c))
489 /* empty attr name not allowed */
493 if (!c || c == ';' || c == '=' || ISSPACE (c))
498 else if (ATTR_NAME_CHAR (c))
506 value_b = value_e = NULL;
509 state = S_ATTR_ACTION;
516 else if (ISSPACE (c))
517 /* Ignore space and keep the state. */
525 value_b = value_e = p;
528 state = S_ATTR_ACTION;
534 state = S_QUOTED_VALUE;
536 else if (ISSPACE (c))
546 if (!c || c == ';' || ISSPACE (c))
549 state = S_VALUE_TRAILSPACE;
553 value_e = NULL; /* no trailing space */
562 state = S_VALUE_TRAILSPACE;
569 case S_VALUE_TRAILSPACE:
573 state = S_ATTR_ACTION;
576 state = S_ATTR_ACTION;
577 else if (ISSPACE (c))
584 int legal = callback (cookie, name_b, name_e, value_b, value_e);
590 BOUNDED_TO_ALLOCA (name_b, name_e, name);
591 logprintf (LOG_NOTQUIET,
592 _("Error in Set-Cookie, field `%s'"), name);
602 /* handled by loop condition */
609 delete_cookie (cookie);
610 if (state != S_ERROR)
614 logprintf (LOG_NOTQUIET,
615 _("Syntax error in Set-Cookie: %s at position %d.\n"),
620 /* Sanity checks. These are important, otherwise it is possible for
621 mailcious attackers to destroy important cookie information and/or
622 violate your privacy. */
625 #define REQUIRE_DIGITS(p) do { \
628 for (++p; ISDIGIT (*p); p++) \
632 #define REQUIRE_DOT(p) do { \
637 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
639 We don't want to call network functions like inet_addr() because all
640 we need is a check, preferrably one that is small, fast, and
644 numeric_address_p (const char *addr)
646 const char *p = addr;
648 REQUIRE_DIGITS (p); /* A */
649 REQUIRE_DOT (p); /* . */
650 REQUIRE_DIGITS (p); /* B */
651 REQUIRE_DOT (p); /* . */
652 REQUIRE_DIGITS (p); /* C */
653 REQUIRE_DOT (p); /* . */
654 REQUIRE_DIGITS (p); /* D */
661 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
662 Originally I tried to make the check compliant with rfc2109, but
663 the sites deviated too often, so I had to fall back to "tail
664 matching", as defined by the original Netscape's cookie spec. */
667 check_domain_match (const char *cookie_domain, const char *host)
671 /* Numeric address requires exact match. It also requires HOST to
673 if (numeric_address_p (cookie_domain))
674 return 0 == strcmp (cookie_domain, host);
678 /* For the sake of efficiency, check for exact match first. */
679 if (0 == strcasecmp (cookie_domain, host))
684 /* HOST must match the tail of cookie_domain. */
685 if (!match_tail (host, cookie_domain, 1))
688 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
689 make sure that somebody is not trying to set the cookie for a
690 subdomain shared by many entities. For example, "company.co.uk"
691 must not be allowed to set a cookie for ".co.uk". On the other
692 hand, "sso.redhat.de" should be able to set a cookie for
695 The only marginally sane way to handle this I can think of is to
696 reject on the basis of the length of the second-level domain name
697 (but when the top-level domain is unknown), with the assumption
698 that those of three or less characters could be reserved. For
701 .co.org -> works because the TLD is known
702 .co.uk -> doesn't work because "co" is only two chars long
703 .com.au -> doesn't work because "com" is only 3 chars long
704 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
705 .cnn.de -> doesn't work for the same reason (ugh!!)
706 .abcd.de -> works because "abcd" is 4 chars long
707 .img.cnn.de -> works because it's not trying to set the 2nd level domain
708 .cnn.co.uk -> works for the same reason
710 That should prevent misuse, while allowing reasonable usage. If
711 someone knows of a better way to handle this, please let me
714 const char *p = cookie_domain;
715 int dccount = 1; /* number of domain components */
716 int ldcl = 0; /* last domain component length */
717 int nldcl = 0; /* next to last domain component length */
720 /* Ignore leading period in this calculation. */
723 for (out = 0; !out; p++)
731 /* Empty domain component found -- the domain is invalid. */
733 if (*(p + 1) == '\0')
735 /* Tolerate trailing '.' by not treating the domain as
736 one ending with an empty domain component. */
758 int known_toplevel = 0;
759 static char *known_toplevel_domains[] = {
760 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
762 for (i = 0; i < countof (known_toplevel_domains); i++)
763 if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
768 if (!known_toplevel && nldcl <= 3)
775 /* Don't allow domain "bar.com" to match host "foobar.com". */
776 if (*cookie_domain != '.')
778 int dlen = strlen (cookie_domain);
779 int hlen = strlen (host);
780 /* cookie host: hostname.foobar.com */
781 /* desired domain: bar.com */
782 /* '.' must be here in host-> ^ */
783 if (hlen > dlen && host[hlen - dlen - 1] != '.')
792 static int path_matches PARAMS ((const char *, const char *));
794 /* Check whether PATH begins with COOKIE_PATH. */
797 check_path_match (const char *cookie_path, const char *path)
799 return path_matches (path, cookie_path);
802 /* Process the HTTP `Set-Cookie' header. This results in storing the
803 cookie or discarding a matching one, or ignoring it completely, all
804 depending on the contents. */
807 cookie_jar_process_set_cookie (struct cookie_jar *jar,
808 const char *host, int port,
809 const char *path, const char *set_cookie)
811 struct cookie *cookie;
812 cookies_now = time (NULL);
814 cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
818 /* Sanitize parts of cookie. */
823 cookie->domain = xstrdup (host);
828 if (!check_domain_match (cookie->domain, host))
830 logprintf (LOG_NOTQUIET,
831 "Cookie coming from %s attempted to set domain to %s\n",
832 host, cookie->domain);
837 cookie->path = xstrdup (path);
840 if (!check_path_match (cookie->path, path))
842 DEBUGP (("Attempt to fake the path: %s, %s\n",
843 cookie->path, path));
848 if (cookie->discard_requested)
850 discard_matching_cookie (jar, cookie);
854 store_cookie (jar, cookie);
859 delete_cookie (cookie);
862 /* Support for sending out cookies in HTTP requests, based on
863 previously stored cookies. Entry point is
864 `build_cookies_request'. */
866 /* Find the cookie chains that match HOST and store them to DEST.
868 A cookie chain is the list of cookies declared under a domain.
869 Given HOST "img.search.xemacs.org", this function will store the
870 chains for "img.search.xemacs.org", "search.xemacs.org", and
871 "xemacs.org" -- those of them that exist (if any), that is.
873 No more than SIZE matches are written; if more matches are present,
874 return the number of chains that would have been written. */
877 find_matching_chains (struct cookie_jar *jar, const char *host,
878 struct cookie *dest[], int dest_size)
883 if (!hash_table_count (jar->chains_by_domain))
886 if (numeric_address_p (host))
887 /* If host is an IP address, only check for the exact match. */
890 /* Otherwise, check all the subdomains except the top-level (last)
891 one. As a domain with N components has N-1 dots, the number of
892 passes equals the number of dots. */
893 passes = count_char (host, '.');
897 /* Find chains that match HOST, starting with exact match and
898 progressing to less specific domains. For instance, given HOST
899 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
900 srk.fer.hr's, then fer.hr's. */
903 struct cookie *chain = hash_table_get (jar->chains_by_domain, host);
906 if (dest_count < dest_size)
907 dest[dest_count] = chain;
910 if (++passcnt >= passes)
912 host = strchr (host, '.') + 1;
918 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
922 path_matches (const char *full_path, const char *prefix)
927 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
928 as a separator), but the '/' is assumed when matching against
933 len = strlen (prefix);
935 if (0 != strncmp (full_path, prefix, len))
936 /* FULL_PATH doesn't begin with PREFIX. */
939 /* Length of PREFIX determines the quality of the match. */
943 /* Return non-zero iff COOKIE matches the given HOST, PORT, PATH, and
946 If PATH_GOODNESS is non-NULL, store the "path goodness" value
947 there. That value is a measure of how well COOKIE matches PATH,
948 used for ordering cookies. */
951 matching_cookie (const struct cookie *cookie,
952 const char *host, int port, const char *path,
953 int secure, int *path_goodness)
957 if (COOKIE_EXPIRED_P (cookie))
958 /* Ignore stale cookies. Don't bother unchaining the cookie at
959 this point -- Wget is a relatively short-lived application, and
960 stale cookies will not be saved by `save_cookies'. On the
961 other hand, this function should be as efficient as
965 if (cookie->secure && !secure)
966 /* Don't transmit secure cookies over insecure connections. */
968 if (cookie->port != PORT_ANY && cookie->port != port)
971 /* If exact domain match is required, verify that cookie's domain is
972 equal to HOST. If not, assume success on the grounds of the
973 cookie's chain having been found by find_matching_chains. */
974 if (cookie->domain_exact
975 && 0 != strcasecmp (host, cookie->domain))
978 pg = path_matches (path, cookie->path);
983 /* If the caller requested path_goodness, we return it. This is
984 an optimization, so that the caller doesn't need to call
985 path_matches() again. */
990 /* A structure that points to a cookie, along with the additional
991 information about the cookie's "goodness". This allows us to sort
992 the cookies when returning them to the server, as required by the
995 struct weighed_cookie {
996 struct cookie *cookie;
1001 /* Comparator used for uniquifying the list. */
1004 equality_comparator (const void *p1, const void *p2)
1006 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1007 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1009 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1010 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1012 /* We only really care whether both name and value are equal. We
1013 return them in this order only for consistency... */
1014 return namecmp ? namecmp : valuecmp;
1017 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1018 cookies whose name and value are the same. Whenever a duplicate
1019 pair is found, one of the cookies is removed. */
1022 eliminate_dups (struct weighed_cookie *outgoing, int count)
1026 /* We deploy a simple uniquify algorithm: first sort the array
1027 according to our sort criteria, then uniquify it by comparing
1028 each cookie with its neighbor. */
1030 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1032 for (i = 0; i < count - 1; i++)
1034 struct cookie *c1 = outgoing[i].cookie;
1035 struct cookie *c2 = outgoing[i + 1].cookie;
1036 if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value))
1038 /* c1 and c2 are the same; get rid of c2. */
1040 /* move all ptrs from positions [i + 1, count) to i. */
1041 memmove (outgoing + i, outgoing + i + 1,
1042 (count - (i + 1)) * sizeof (struct weighed_cookie));
1043 /* We decrement i to counter the ++i above. Remember that
1044 we've just removed the element in front of us; we need to
1045 remain in place to check whether outgoing[i] matches what
1046 used to be outgoing[i + 2]. */
1054 /* Comparator used for sorting by quality. */
1057 goodness_comparator (const void *p1, const void *p2)
1059 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1060 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1062 /* Subtractions take `wc2' as the first argument becauase we want a
1063 sort in *decreasing* order of goodness. */
1064 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1065 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1067 /* Sort by domain goodness; if these are the same, sort by path
1068 goodness. (The sorting order isn't really specified; maybe it
1069 should be the other way around.) */
1070 return dgdiff ? dgdiff : pgdiff;
1073 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1074 requests PATH from the server. The resulting string is allocated
1075 with `malloc', and the caller is responsible for freeing it. If no
1076 cookies pertain to this request, i.e. no cookie header should be
1077 generated, NULL is returned. */
1080 cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
1081 int port, const char *path,
1082 int connection_secure_p)
1084 struct cookie *chain_default_store[20];
1085 struct cookie **chains = chain_default_store;
1086 int chain_store_size = countof (chain_default_store);
1089 struct cookie *cookie;
1090 struct weighed_cookie *outgoing;
1093 int result_size, pos;
1095 /* First, find the chains that match HOST. */
1097 chain_count = find_matching_chains (jar, host, chains, chain_store_size);
1098 if (chain_count > chain_store_size)
1100 /* It's extremely unlikely that more than 20 chains will ever
1101 match. But since find_matching_chains reports the exact size
1102 it needs, it's easy to not have the limitation, so we
1104 chains = alloca (chain_count * sizeof (struct cookie *));
1105 chain_store_size = chain_count;
1112 cookies_now = time (NULL);
1114 /* Now extract from the chains those cookies that match our host
1115 (for domain_exact cookies), port (for cookies with port other
1116 than PORT_ANY), etc. See matching_cookie for details. */
1118 /* Count the number of matching cookies. */
1120 for (i = 0; i < chain_count; i++)
1121 for (cookie = chains[i]; cookie; cookie = cookie->next)
1122 if (matching_cookie (cookie, host, port, path, connection_secure_p, NULL))
1125 return NULL; /* no cookies matched */
1127 /* Allocate the array. */
1128 outgoing = alloca (count * sizeof (struct weighed_cookie));
1130 /* Fill the array with all the matching cookies from the chains that
1133 for (i = 0; i < chain_count; i++)
1134 for (cookie = chains[i]; cookie; cookie = cookie->next)
1137 if (!matching_cookie (cookie, host, port, path,
1138 connection_secure_p, &pg))
1140 outgoing[ocnt].cookie = cookie;
1141 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1142 outgoing[ocnt].path_goodness = pg;
1145 assert (ocnt == count);
1147 /* Eliminate duplicate cookies; that is, those whose name and value
1149 count = eliminate_dups (outgoing, count);
1151 /* Sort the array so that best-matching domains come first, and
1152 that, within one domain, best-matching paths come first. */
1153 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1155 /* Count the space the name=value pairs will take. */
1157 for (i = 0; i < count; i++)
1159 struct cookie *c = outgoing[i].cookie;
1161 result_size += strlen (c->attr) + 1 + strlen (c->value);
1164 /* Allocate output buffer:
1166 name=value pairs -- result_size
1167 "; " separators -- (count - 1) * 2
1168 \r\n line ending -- 2
1169 \0 terminator -- 1 */
1170 result_size = 8 + result_size + (count - 1) * 2 + 2 + 1;
1171 result = xmalloc (result_size);
1173 strcpy (result, "Cookie: ");
1175 for (i = 0; i < count; i++)
1177 struct cookie *c = outgoing[i].cookie;
1178 int namlen = strlen (c->attr);
1179 int vallen = strlen (c->value);
1181 memcpy (result + pos, c->attr, namlen);
1183 result[pos++] = '=';
1184 memcpy (result + pos, c->value, vallen);
1188 result[pos++] = ';';
1189 result[pos++] = ' ';
1192 result[pos++] = '\r';
1193 result[pos++] = '\n';
1194 result[pos++] = '\0';
1195 assert (pos == result_size);
1199 /* Support for loading and saving cookies. The format used for
1200 loading and saving should be the format of the `cookies.txt' file
1201 used by Netscape and Mozilla, at least the Unix versions.
1202 (Apparently IE can export cookies in that format as well.) The
1203 format goes like this:
1205 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1207 DOMAIN -- cookie domain, optionally followed by :PORT
1208 DOMAIN-FLAG -- whether all hosts in the domain match
1210 SECURE-FLAG -- whether cookie requires secure connection
1211 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1212 ATTR-NAME -- name of the cookie attribute
1213 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1215 The fields are separated by TABs. All fields are mandatory, except
1216 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1217 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1218 whitespace only, and comment lines (beginning with # optionally
1219 preceded by whitespace) are ignored.
1221 Example line from cookies.txt (split in two lines for readability):
1223 .google.com TRUE / FALSE 2147368447 \
1224 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1228 /* If the region [B, E) ends with :<digits>, parse the number, return
1229 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1230 If port is not specified, return 0. */
1233 domain_port (const char *domain_b, const char *domain_e,
1234 const char **domain_e_ptr)
1238 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1241 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1242 port = 10 * port + (*p - '0');
1244 /* Garbage following port number. */
1246 *domain_e_ptr = colon;
1250 #define GET_WORD(p, b, e) do { \
1252 while (*p && *p != '\t') \
1255 if (b == e || !*p) \
1260 /* Load cookies from FILE. */
1263 cookie_jar_load (struct cookie_jar *jar, const char *file)
1266 FILE *fp = fopen (file, "r");
1269 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1270 file, strerror (errno));
1273 cookies_now = time (NULL);
1275 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1277 struct cookie *cookie;
1283 char *domain_b = NULL, *domain_e = NULL;
1284 char *domflag_b = NULL, *domflag_e = NULL;
1285 char *path_b = NULL, *path_e = NULL;
1286 char *secure_b = NULL, *secure_e = NULL;
1287 char *expires_b = NULL, *expires_e = NULL;
1288 char *name_b = NULL, *name_e = NULL;
1289 char *value_b = NULL, *value_e = NULL;
1291 /* Skip leading white-space. */
1292 while (*p && ISSPACE (*p))
1294 /* Ignore empty lines. */
1295 if (!*p || *p == '#')
1298 GET_WORD (p, domain_b, domain_e);
1299 GET_WORD (p, domflag_b, domflag_e);
1300 GET_WORD (p, path_b, path_e);
1301 GET_WORD (p, secure_b, secure_e);
1302 GET_WORD (p, expires_b, expires_e);
1303 GET_WORD (p, name_b, name_e);
1305 /* Don't use GET_WORD for value because it ends with newline,
1308 value_e = p + strlen (p);
1309 if (value_e > value_b && value_e[-1] == '\n')
1311 if (value_e > value_b && value_e[-1] == '\r')
1313 /* Empty values are legal (I think), so don't bother checking. */
1315 cookie = cookie_new ();
1317 cookie->attr = strdupdelim (name_b, name_e);
1318 cookie->value = strdupdelim (value_b, value_e);
1319 cookie->path = strdupdelim (path_b, path_e);
1320 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1322 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1323 value indicating if all machines within a given domain can
1324 access the variable. This value is set automatically by the
1325 browser, depending on the value set for the domain." */
1326 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1328 /* DOMAIN needs special treatment because we might need to
1329 extract the port. */
1330 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1332 cookie->port = port;
1334 if (*domain_b == '.')
1335 ++domain_b; /* remove leading dot internally */
1336 cookie->domain = strdupdelim (domain_b, domain_e);
1338 /* safe default in case EXPIRES field is garbled. */
1339 expiry = (double)cookies_now - 1;
1341 /* I don't like changing the line, but it's safe here. (line is
1344 sscanf (expires_b, "%lf", &expiry);
1345 if (expiry < cookies_now)
1346 /* ignore stale cookie. */
1348 cookie->expiry_time = expiry;
1350 /* If the cookie has survived being saved into an external file,
1351 it is obviously permanent. */
1352 cookie->permanent = 1;
1354 store_cookie (jar, cookie);
1360 delete_cookie (cookie);
1365 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1366 to the head in a chain of cookies. The function prints the entire
1370 save_cookies_mapper (void *key, void *value, void *arg)
1372 FILE *fp = (FILE *)arg;
1373 char *domain = (char *)key;
1374 struct cookie *cookie = (struct cookie *)value;
1375 for (; cookie; cookie = cookie->next)
1377 if (!cookie->permanent)
1379 if (COOKIE_EXPIRED_P (cookie))
1381 if (!cookie->domain_exact)
1384 if (cookie->port != PORT_ANY)
1385 fprintf (fp, ":%d", cookie->port);
1386 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1387 cookie->domain_exact ? "FALSE" : "TRUE",
1388 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1389 (double)cookie->expiry_time,
1390 cookie->attr, cookie->value);
1392 return 1; /* stop mapping */
1397 /* Save cookies, in format described above, to FILE. */
1400 cookie_jar_save (struct cookie_jar *jar, const char *file)
1404 DEBUGP (("Saving cookies to %s.\n", file));
1406 cookies_now = time (NULL);
1408 fp = fopen (file, "w");
1411 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1412 file, strerror (errno));
1416 fputs ("# HTTP cookie file.\n", fp);
1417 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (NULL));
1418 fputs ("# Edit at your own risk.\n\n", fp);
1420 hash_table_map (jar->chains_by_domain, save_cookies_mapper, fp);
1423 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1424 file, strerror (errno));
1426 if (fclose (fp) < 0)
1427 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1428 file, strerror (errno));
1430 DEBUGP (("Done saving cookies.\n"));
1433 /* Destroy all the elements in the chain and unhook it from the cookie
1434 jar. This is written in the form of a callback to hash_table_map
1435 and used by cookie_jar_delete to delete all the cookies in a
1439 nuke_cookie_chain (void *value, void *key, void *arg)
1441 char *chain_key = (char *)value;
1442 struct cookie *chain = (struct cookie *)key;
1443 struct cookie_jar *jar = (struct cookie_jar *)arg;
1445 /* Remove the chain from the table and free the key. */
1446 hash_table_remove (jar->chains_by_domain, chain_key);
1449 /* Then delete all the cookies in the chain. */
1452 struct cookie *next = chain->next;
1453 delete_cookie (chain);
1461 /* Clean up cookie-related data. */
1464 cookie_jar_delete (struct cookie_jar *jar)
1466 hash_table_map (jar->chains_by_domain, nuke_cookie_chain, jar);
1467 hash_table_destroy (jar->chains_by_domain);
1471 /* Test cases. Currently this is only tests parse_set_cookies. To
1472 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1477 char *test_results[10];
1479 static int test_parse_cookies_callback (struct cookie *ignored,
1480 const char *nb, const char *ne,
1481 const char *vb, const char *ve)
1483 test_results[test_count++] = strdupdelim (nb, ne);
1484 test_results[test_count++] = strdupdelim (vb, ve);
1491 /* Tests expected to succeed: */
1497 { "arg=value", {"arg", "value", NULL} },
1498 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1499 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1500 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1501 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1502 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1503 { "arg=", {"arg", "", NULL} },
1504 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1505 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1508 /* Tests expected to fail: */
1509 static char *tests_fail[] = {
1511 "arg=\"unterminated",
1513 "arg1=;=another-empty-name",
1517 for (i = 0; i < countof (tests_succ); i++)
1520 char *data = tests_succ[i].data;
1521 char **expected = tests_succ[i].results;
1525 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1528 printf ("NULL cookie returned for valid data: %s\n", data);
1532 for (ind = 0; ind < test_count; ind += 2)
1536 if (0 != strcmp (expected[ind], test_results[ind]))
1537 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1538 ind / 2 + 1, data, expected[ind], test_results[ind]);
1539 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1540 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1541 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1543 if (ind < test_count || expected[ind])
1544 printf ("Unmatched number of results: %s\n", data);
1547 for (i = 0; i < countof (tests_fail); i++)
1550 char *data = tests_fail[i];
1552 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1554 printf ("Failed to report error on invalid data: %s\n", data);
1557 #endif /* TEST_COOKIES */