1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
31 code submitted by Tomasz Wegrzanowski.
33 TODO: Implement limits on cookie-related sizes, such as max. cookie
34 size, max. number of cookies, etc. Add more "cookie jar" methods,
35 such as methods to over stored cookies, to clear temporary cookies,
36 to perform intelligent auto-saving, etc. Ultimately support
37 `Set-Cookie2' and `Cookie2' headers. */
56 /* This should *really* be in a .h file! */
57 time_t http_atotm PARAMS ((const char *));
59 /* Declarations of `struct cookie' and the most basic functions. */
62 /* Hash table that maps domain names to cookie chains. A "cookie
63 chain" is a linked list of cookies that belong to the same
65 struct hash_table *chains_by_domain;
67 int cookie_count; /* number of cookies in the jar. */
70 /* Value set by entry point functions, so that the low-level
71 routines don't need to call time() all the time. */
77 struct cookie_jar *jar = xmalloc (sizeof (struct cookie_jar));
78 jar->chains_by_domain = make_nocase_string_hash_table (0);
79 jar->cookie_count = 0;
84 char *domain; /* domain of the cookie */
85 int port; /* port number */
86 char *path; /* path prefix of the cookie */
88 int secure; /* whether cookie should be
89 transmitted over non-https
91 int domain_exact; /* whether DOMAIN must match as a
94 int permanent; /* whether the cookie should outlive
96 time_t expiry_time; /* time when the cookie expires */
98 int discard_requested; /* whether cookie was created to
99 request discarding another
102 char *attr; /* cookie attribute name */
103 char *value; /* cookie attribute value */
105 struct cookie_jar *jar; /* pointer back to the cookie jar, for
107 struct cookie *next; /* used for chaining of cookies in the
111 #define PORT_ANY (-1)
112 #define COOKIE_EXPIRED_P(c) ((c)->expiry_time != 0 && (c)->expiry_time < cookies_now)
114 /* Allocate and return a new, empty cookie structure. */
116 static struct cookie *
119 struct cookie *cookie = xmalloc (sizeof (struct cookie));
120 memset (cookie, '\0', sizeof (struct cookie));
122 /* Both cookie->permanent and cookie->expiry_time are now 0. By
123 default, we assume that the cookie is non-permanent and valid
124 until the end of the session. */
126 cookie->port = PORT_ANY;
130 /* Deallocate COOKIE and its components. */
133 delete_cookie (struct cookie *cookie)
135 FREE_MAYBE (cookie->domain);
136 FREE_MAYBE (cookie->path);
137 FREE_MAYBE (cookie->attr);
138 FREE_MAYBE (cookie->value);
142 /* Functions for storing cookies.
144 All cookies can be reached beginning with jar->chains_by_domain.
145 The key in that table is the domain name, and the value is a linked
146 list of all cookies from that domain. Every new cookie is placed
147 on the head of the list. */
149 /* Find and return a cookie in JAR whose domain, path, and attribute
150 name correspond to COOKIE. If found, PREVPTR will point to the
151 location of the cookie previous in chain, or NULL if the found
152 cookie is the head of a chain.
154 If no matching cookie is found, return NULL. */
156 static struct cookie *
157 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
158 struct cookie **prevptr)
160 struct cookie *chain, *prev;
162 chain = hash_table_get (jar->chains_by_domain, cookie->domain);
167 for (; chain; prev = chain, chain = chain->next)
168 if (0 == strcmp (cookie->path, chain->path)
169 && 0 == strcmp (cookie->attr, chain->attr)
170 && cookie->port == chain->port)
181 /* Store COOKIE to the jar.
183 This is done by placing COOKIE at the head of its chain. However,
184 if COOKIE matches a cookie already in memory, as determined by
185 find_matching_cookie, the old cookie is unlinked and destroyed.
187 The key of each chain's hash table entry is allocated only the
188 first time; next hash_table_put's reuse the same key. */
191 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
193 struct cookie *chain_head;
196 if (hash_table_get_pair (jar->chains_by_domain, cookie->domain,
197 &chain_key, &chain_head))
199 /* A chain of cookies in this domain already exists. Check for
200 duplicates -- if an extant cookie exactly matches our domain,
201 port, path, and name, replace it. */
203 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
207 /* Remove VICTIM from the chain. COOKIE will be placed at
211 prev->next = victim->next;
212 cookie->next = chain_head;
216 /* prev is NULL; apparently VICTIM was at the head of
217 the chain. This place will be taken by COOKIE, so
218 all we need to do is: */
219 cookie->next = victim->next;
221 delete_cookie (victim);
223 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
226 cookie->next = chain_head;
230 /* We are now creating the chain. Allocate the string that will
231 be used as a key. It is unsafe to use cookie->domain for
232 that, because it might get deallocated by the above code at
235 chain_key = xstrdup (cookie->domain);
238 hash_table_put (jar->chains_by_domain, chain_key, cookie);
241 DEBUGP (("\nStored cookie %s %d%s %s %s %d %s %s %s\n",
242 cookie->domain, cookie->port,
243 cookie->port == PORT_ANY ? " (ANY)" : "",
245 cookie->permanent ? "permanent" : "nonpermanent",
248 ? asctime (localtime (&cookie->expiry_time)) : "<undefined>",
249 cookie->attr, cookie->value));
252 /* Discard a cookie matching COOKIE's domain, port, path, and
253 attribute name. This gets called when we encounter a cookie whose
254 expiry date is in the past, or whose max-age is set to 0. The
255 former corresponds to netscape cookie spec, while the latter is
256 specified by rfc2109. */
259 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
261 struct cookie *prev, *victim;
263 if (!hash_table_count (jar->chains_by_domain))
264 /* No elements == nothing to discard. */
267 victim = find_matching_cookie (jar, cookie, &prev);
271 /* Simply unchain the victim. */
272 prev->next = victim->next;
275 /* VICTIM was head of its chain. We need to place a new
276 cookie at the head. */
277 char *chain_key = NULL;
280 res = hash_table_get_pair (jar->chains_by_domain, victim->domain,
285 /* VICTIM was the only cookie in the chain. Destroy the
286 chain and deallocate the chain key. */
287 hash_table_remove (jar->chains_by_domain, victim->domain);
291 hash_table_put (jar->chains_by_domain, chain_key, victim->next);
293 delete_cookie (victim);
294 DEBUGP (("Discarded old cookie.\n"));
298 /* Functions for parsing the `Set-Cookie' header, and creating new
299 cookies from the wire. */
302 #define NAME_IS(string_literal) \
303 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
305 #define VALUE_EXISTS (value_b && value_e)
307 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
309 /* Update the appropriate cookie field. [name_b, name_e) are expected
310 to delimit the attribute name, while [value_b, value_e) (optional)
311 should delimit the attribute value.
313 When called the first time, it will set the cookie's attribute name
314 and value. After that, it will check the attribute name for
315 special fields such as `domain', `path', etc. Where appropriate,
316 it will parse the values of the fields it recognizes and fill the
317 corresponding fields in COOKIE.
319 Returns 1 on success. Returns zero in case a syntax error is
320 found; such a cookie should be discarded. */
323 update_cookie_field (struct cookie *cookie,
324 const char *name_b, const char *name_e,
325 const char *value_b, const char *value_e)
327 assert (name_b != NULL && name_e != NULL);
333 cookie->attr = strdupdelim (name_b, name_e);
334 cookie->value = strdupdelim (value_b, value_e);
338 if (NAME_IS ("domain"))
340 if (!VALUE_NON_EMPTY)
342 FREE_MAYBE (cookie->domain);
343 /* Strictly speaking, we should set cookie->domain_exact if the
344 domain doesn't begin with a dot. But many sites set the
345 domain to "foo.com" and expect "subhost.foo.com" to get the
346 cookie, and it apparently works. */
349 cookie->domain = strdupdelim (value_b, value_e);
352 else if (NAME_IS ("path"))
354 if (!VALUE_NON_EMPTY)
356 FREE_MAYBE (cookie->path);
357 cookie->path = strdupdelim (value_b, value_e);
360 else if (NAME_IS ("expires"))
365 if (!VALUE_NON_EMPTY)
367 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
369 expires = http_atotm (value_copy);
372 cookie->permanent = 1;
373 cookie->expiry_time = (time_t)expires;
376 /* Error in expiration spec. Assume default (cookie valid for
380 /* According to netscape's specification, expiry time in the
381 past means that discarding of a matching cookie is
383 if (cookie->expiry_time < cookies_now)
384 cookie->discard_requested = 1;
388 else if (NAME_IS ("max-age"))
393 if (!VALUE_NON_EMPTY)
395 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
397 sscanf (value_copy, "%lf", &maxage);
399 /* something went wrong. */
401 cookie->permanent = 1;
402 cookie->expiry_time = cookies_now + maxage;
404 /* According to rfc2109, a cookie with max-age of 0 means that
405 discarding of a matching cookie is requested. */
407 cookie->discard_requested = 1;
411 else if (NAME_IS ("secure"))
413 /* ignore value completely */
418 /* Unrecognized attribute; ignore it. */
424 /* Returns non-zero for characters that are legal in the name of an
425 attribute. This used to allow only alphanumerics, '-', and '_',
426 but we need to be more lenient because a number of sites wants to
427 use weirder attribute names. rfc2965 "informally specifies"
428 attribute name (token) as "a sequence of non-special, non-white
429 space characters". So we allow everything except the stuff we know
432 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
433 && (c) != '"' && (c) != '=' \
434 && (c) != ';' && (c) != ',')
436 /* Parse the contents of the `Set-Cookie' header. The header looks
439 name1=value1; name2=value2; ...
441 Trailing semicolon is optional; spaces are allowed between all
442 tokens. Additionally, values may be quoted.
444 A new cookie is returned upon success, NULL otherwise. The
445 specified CALLBACK function (normally `update_cookie_field' is used
446 to update the fields of the newly created cookie structure. */
448 static struct cookie *
449 parse_set_cookies (const char *sc,
450 int (*callback) (struct cookie *,
451 const char *, const char *,
452 const char *, const char *),
455 struct cookie *cookie = cookie_new ();
457 /* #### Hand-written DFAs are no fun to debug. We'de be better off
458 to rewrite this as an inline parser. */
460 enum { S_START, S_NAME, S_NAME_POST,
461 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
462 S_ATTR_ACTION, S_DONE, S_ERROR
468 const char *name_b = NULL, *name_e = NULL;
469 const char *value_b = NULL, *value_e = NULL;
473 while (state != S_DONE && state != S_ERROR)
480 else if (ISSPACE (c))
481 /* Strip all whitespace preceding the name. */
483 else if (ATTR_NAME_CHAR (c))
489 /* empty attr name not allowed */
493 if (!c || c == ';' || c == '=' || ISSPACE (c))
498 else if (ATTR_NAME_CHAR (c))
506 value_b = value_e = NULL;
509 state = S_ATTR_ACTION;
516 else if (ISSPACE (c))
517 /* Ignore space and keep the state. */
525 value_b = value_e = p;
528 state = S_ATTR_ACTION;
534 state = S_QUOTED_VALUE;
536 else if (ISSPACE (c))
546 if (!c || c == ';' || ISSPACE (c))
549 state = S_VALUE_TRAILSPACE;
553 value_e = NULL; /* no trailing space */
562 state = S_VALUE_TRAILSPACE;
569 case S_VALUE_TRAILSPACE:
573 state = S_ATTR_ACTION;
576 state = S_ATTR_ACTION;
577 else if (ISSPACE (c))
584 int legal = callback (cookie, name_b, name_e, value_b, value_e);
590 BOUNDED_TO_ALLOCA (name_b, name_e, name);
591 logprintf (LOG_NOTQUIET,
592 _("Error in Set-Cookie, field `%s'"), name);
602 /* handled by loop condition */
609 delete_cookie (cookie);
610 if (state != S_ERROR)
614 logprintf (LOG_NOTQUIET,
615 _("Syntax error in Set-Cookie: %s at position %d.\n"),
620 /* Sanity checks. These are important, otherwise it is possible for
621 mailcious attackers to destroy important cookie information and/or
622 violate your privacy. */
625 #define REQUIRE_DIGITS(p) do { \
628 for (++p; ISDIGIT (*p); p++) \
632 #define REQUIRE_DOT(p) do { \
637 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
639 We don't want to call network functions like inet_addr() because all
640 we need is a check, preferrably one that is small, fast, and
644 numeric_address_p (const char *addr)
646 const char *p = addr;
648 REQUIRE_DIGITS (p); /* A */
649 REQUIRE_DOT (p); /* . */
650 REQUIRE_DIGITS (p); /* B */
651 REQUIRE_DOT (p); /* . */
652 REQUIRE_DIGITS (p); /* C */
653 REQUIRE_DOT (p); /* . */
654 REQUIRE_DIGITS (p); /* D */
661 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
662 Originally I tried to make the check compliant with rfc2109, but
663 the sites deviated too often, so I had to fall back to "tail
664 matching", as defined by the original Netscape's cookie spec. */
667 check_domain_match (const char *cookie_domain, const char *host)
671 /* Numeric address requires exact match. It also requires HOST to
673 if (numeric_address_p (cookie_domain))
674 return 0 == strcmp (cookie_domain, host);
678 /* For the sake of efficiency, check for exact match first. */
679 if (0 == strcasecmp (cookie_domain, host))
684 /* HOST must match the tail of cookie_domain. */
685 if (!match_tail (host, cookie_domain, 1))
688 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
689 make sure that somebody is not trying to set the cookie for a
690 subdomain shared by many entities. For example, "company.co.uk"
691 must not be allowed to set a cookie for ".co.uk". On the other
692 hand, "sso.redhat.de" should be able to set a cookie for
695 The only marginally sane way to handle this I can think of is to
696 reject on the basis of the length of the second-level domain name
697 (but when the top-level domain is unknown), with the assumption
698 that those of three or less characters could be reserved. For
701 .co.org -> works because the TLD is known
702 .co.uk -> doesn't work because "co" is only two chars long
703 .com.au -> doesn't work because "com" is only 3 chars long
704 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
705 .cnn.de -> doesn't work for the same reason (ugh!!)
706 .abcd.de -> works because "abcd" is 4 chars long
707 .img.cnn.de -> works because it's not trying to set the 2nd level domain
708 .cnn.co.uk -> works for the same reason
710 That should prevent misuse, while allowing reasonable usage. If
711 someone knows of a better way to handle this, please let me
714 const char *p = cookie_domain;
715 int dccount = 1; /* number of domain components */
716 int ldcl = 0; /* last domain component length */
717 int nldcl = 0; /* next to last domain component length */
720 /* Ignore leading period in this calculation. */
723 for (out = 0; !out; p++)
731 /* Empty domain component found -- the domain is invalid. */
733 if (*(p + 1) == '\0')
735 /* Tolerate trailing '.' by not treating the domain as
736 one ending with an empty domain component. */
758 int known_toplevel = 0;
759 static char *known_toplevel_domains[] = {
760 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
762 for (i = 0; i < countof (known_toplevel_domains); i++)
763 if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
768 if (!known_toplevel && nldcl <= 3)
775 /* Don't allow domain "bar.com" to match host "foobar.com". */
776 if (*cookie_domain != '.')
778 int dlen = strlen (cookie_domain);
779 int hlen = strlen (host);
780 /* cookie host: hostname.foobar.com */
781 /* desired domain: bar.com */
782 /* '.' must be here in host-> ^ */
783 if (hlen > dlen && host[hlen - dlen - 1] != '.')
792 static int path_matches PARAMS ((const char *, const char *));
794 /* Check whether PATH begins with COOKIE_PATH. */
797 check_path_match (const char *cookie_path, const char *path)
799 return path_matches (path, cookie_path);
802 /* Process the HTTP `Set-Cookie' header. This results in storing the
803 cookie or discarding a matching one, or ignoring it completely, all
804 depending on the contents. */
807 cookie_jar_process_set_cookie (struct cookie_jar *jar,
808 const char *host, int port,
809 const char *path, const char *set_cookie)
811 struct cookie *cookie;
812 cookies_now = time (NULL);
814 cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
818 /* Sanitize parts of cookie. */
823 cookie->domain = xstrdup (host);
828 if (!check_domain_match (cookie->domain, host))
830 logprintf (LOG_NOTQUIET,
831 "Cookie coming from %s attempted to set domain to %s\n",
832 host, cookie->domain);
837 cookie->path = xstrdup (path);
840 if (!check_path_match (cookie->path, path))
842 DEBUGP (("Attempt to fake the path: %s, %s\n",
843 cookie->path, path));
848 if (cookie->discard_requested)
850 discard_matching_cookie (jar, cookie);
854 store_cookie (jar, cookie);
859 delete_cookie (cookie);
862 /* Support for sending out cookies in HTTP requests, based on
863 previously stored cookies. Entry point is
864 `build_cookies_request'. */
866 /* Find the cookie chains that match HOST and store them to DEST.
868 A cookie chain is the list of cookies declared under a domain.
869 Given HOST "img.search.xemacs.org", this function will store the
870 chains for "img.search.xemacs.org", "search.xemacs.org", and
871 "xemacs.org" -- those of them that exist (if any), that is.
873 No more than SIZE matches are written; if more matches are present,
874 return the number of chains that would have been written. */
877 find_matching_chains (struct cookie_jar *jar, const char *host,
878 struct cookie *dest[], int dest_size)
883 if (!hash_table_count (jar->chains_by_domain))
886 if (numeric_address_p (host))
887 /* If host is an IP address, only check for the exact match. */
890 /* Otherwise, check all the subdomains except the top-level (last)
891 one. As a domain with N components has N-1 dots, the number of
892 passes equals the number of dots. */
893 passes = count_char (host, '.');
897 /* Find chains that match HOST, starting with exact match and
898 progressing to less specific domains. For instance, given HOST
899 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
900 srk.fer.hr's, then fer.hr's. */
903 struct cookie *chain = hash_table_get (jar->chains_by_domain, host);
906 if (dest_count < dest_size)
907 dest[dest_count] = chain;
910 if (++passcnt >= passes)
912 host = strchr (host, '.') + 1;
918 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
922 path_matches (const char *full_path, const char *prefix)
927 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
928 as a separator), but the '/' is assumed when matching against
933 len = strlen (prefix);
935 if (0 != strncmp (full_path, prefix, len))
936 /* FULL_PATH doesn't begin with PREFIX. */
939 /* Length of PREFIX determines the quality of the match. */
943 /* Return non-zero iff COOKIE matches the given HOST, PORT, PATH, and
946 If PATH_GOODNESS is non-NULL, store the "path goodness" value
947 there. That value is a measure of how well COOKIE matches PATH,
948 used for ordering cookies. */
951 matching_cookie (const struct cookie *cookie,
952 const char *host, int port, const char *path,
953 int secure, int *path_goodness)
957 if (COOKIE_EXPIRED_P (cookie))
958 /* Ignore stale cookies. Don't bother unchaining the cookie at
959 this point -- Wget is a relatively short-lived application, and
960 stale cookies will not be saved by `save_cookies'. On the
961 other hand, this function should be as efficient as
965 if (cookie->secure && !secure)
966 /* Don't transmit secure cookies over insecure connections. */
968 if (cookie->port != PORT_ANY && cookie->port != port)
971 /* If exact domain match is required, verify that cookie's domain is
972 equal to HOST. If not, assume success on the grounds of the
973 cookie's chain having been found by find_matching_chains. */
974 if (cookie->domain_exact
975 && 0 != strcasecmp (host, cookie->domain))
978 pg = path_matches (path, cookie->path);
983 /* If the caller requested path_goodness, we return it. This is
984 an optimization, so that the caller doesn't need to call
985 path_matches() again. */
990 /* A structure that points to a cookie, along with the additional
991 information about the cookie's "goodness". This allows us to sort
992 the cookies when returning them to the server, as required by the
995 struct weighed_cookie {
996 struct cookie *cookie;
1001 /* Comparator used for uniquifying the list. */
1004 equality_comparator (const void *p1, const void *p2)
1006 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1007 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1009 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1010 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1012 /* We only really care whether both name and value are equal. We
1013 return them in this order only for consistency... */
1014 return namecmp ? namecmp : valuecmp;
1017 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1018 cookies whose name and value are the same. Whenever a duplicate
1019 pair is found, one of the cookies is removed. */
1022 eliminate_dups (struct weighed_cookie *outgoing, int count)
1026 /* We deploy a simple uniquify algorithm: first sort the array
1027 according to our sort criteria, then uniquify it by comparing
1028 each cookie with its neighbor. */
1030 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1032 for (i = 0; i < count - 1; i++)
1034 struct cookie *c1 = outgoing[i].cookie;
1035 struct cookie *c2 = outgoing[i + 1].cookie;
1036 if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value))
1038 /* c1 and c2 are the same; get rid of c2. */
1040 /* move all ptrs from positions [i + 1, count) to i. */
1041 memmove (outgoing + i, outgoing + i + 1,
1042 (count - (i + 1)) * sizeof (struct weighed_cookie));
1043 /* We decrement i to counter the ++i above. Remember that
1044 we've just removed the element in front of us; we need to
1045 remain in place to check whether outgoing[i] matches what
1046 used to be outgoing[i + 2]. */
1054 /* Comparator used for sorting by quality. */
1057 goodness_comparator (const void *p1, const void *p2)
1059 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1060 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1062 /* Subtractions take `wc2' as the first argument becauase we want a
1063 sort in *decreasing* order of goodness. */
1064 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1065 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1067 /* Sort by domain goodness; if these are the same, sort by path
1068 goodness. (The sorting order isn't really specified; maybe it
1069 should be the other way around.) */
1070 return dgdiff ? dgdiff : pgdiff;
1073 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1074 requests PATH from the server. The resulting string is allocated
1075 with `malloc', and the caller is responsible for freeing it. If no
1076 cookies pertain to this request, i.e. no cookie header should be
1077 generated, NULL is returned. */
1080 cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
1081 int port, const char *path,
1082 int connection_secure_p)
1084 struct cookie *chain_default_store[5];
1085 struct cookie **chains = chain_default_store;
1086 int chain_store_size = countof (chain_default_store);
1089 struct cookie *cookie;
1090 struct weighed_cookie *outgoing;
1093 int result_size, pos;
1095 /* First, find the chains that match HOST. */
1097 chain_count = find_matching_chains (jar, host, chains, chain_store_size);
1098 if (chain_count > chain_store_size)
1100 /* It's unlikely that more than 5 chains will ever match. But
1101 since find_matching_chains reports the exact size it needs,
1102 it's easy to not have the limitation, so we don't. */
1103 chains = alloca (chain_count * sizeof (struct cookie *));
1104 chain_store_size = chain_count;
1111 cookies_now = time (NULL);
1113 /* Now extract from the chains those cookies that match our host
1114 (for domain_exact cookies), port (for cookies with port other
1115 than PORT_ANY), etc. See matching_cookie for details. */
1117 /* Count the number of matching cookies. */
1119 for (i = 0; i < chain_count; i++)
1120 for (cookie = chains[i]; cookie; cookie = cookie->next)
1121 if (matching_cookie (cookie, host, port, path, connection_secure_p, NULL))
1124 return NULL; /* no cookies matched */
1126 /* Allocate the array. */
1127 outgoing = alloca (count * sizeof (struct weighed_cookie));
1129 /* Fill the array with all the matching cookies from the chains that
1132 for (i = 0; i < chain_count; i++)
1133 for (cookie = chains[i]; cookie; cookie = cookie->next)
1136 if (!matching_cookie (cookie, host, port, path,
1137 connection_secure_p, &pg))
1139 outgoing[ocnt].cookie = cookie;
1140 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1141 outgoing[ocnt].path_goodness = pg;
1144 assert (ocnt == count);
1146 /* Eliminate duplicate cookies; that is, those whose name and value
1148 count = eliminate_dups (outgoing, count);
1150 /* Sort the array so that best-matching domains come first, and
1151 that, within one domain, best-matching paths come first. */
1152 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1154 /* Count the space the name=value pairs will take. */
1156 for (i = 0; i < count; i++)
1158 struct cookie *c = outgoing[i].cookie;
1160 result_size += strlen (c->attr) + 1 + strlen (c->value);
1163 /* Allocate output buffer:
1165 name=value pairs -- result_size
1166 "; " separators -- (count - 1) * 2
1167 \r\n line ending -- 2
1168 \0 terminator -- 1 */
1169 result_size = 8 + result_size + (count - 1) * 2 + 2 + 1;
1170 result = xmalloc (result_size);
1172 strcpy (result, "Cookie: ");
1174 for (i = 0; i < count; i++)
1176 struct cookie *c = outgoing[i].cookie;
1177 int namlen = strlen (c->attr);
1178 int vallen = strlen (c->value);
1180 memcpy (result + pos, c->attr, namlen);
1182 result[pos++] = '=';
1183 memcpy (result + pos, c->value, vallen);
1187 result[pos++] = ';';
1188 result[pos++] = ' ';
1191 result[pos++] = '\r';
1192 result[pos++] = '\n';
1193 result[pos++] = '\0';
1194 assert (pos == result_size);
1198 /* Support for loading and saving cookies. The format used for
1199 loading and saving should be the format of the `cookies.txt' file
1200 used by Netscape and Mozilla, at least the Unix versions.
1201 (Apparently IE can export cookies in that format as well.) The
1202 format goes like this:
1204 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1206 DOMAIN -- cookie domain, optionally followed by :PORT
1207 DOMAIN-FLAG -- whether all hosts in the domain match
1209 SECURE-FLAG -- whether cookie requires secure connection
1210 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1211 ATTR-NAME -- name of the cookie attribute
1212 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1214 The fields are separated by TABs. All fields are mandatory, except
1215 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1216 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1217 whitespace only, and comment lines (beginning with # optionally
1218 preceded by whitespace) are ignored.
1220 Example line from cookies.txt (split in two lines for readability):
1222 .google.com TRUE / FALSE 2147368447 \
1223 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1227 /* If the region [B, E) ends with :<digits>, parse the number, return
1228 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1229 If port is not specified, return 0. */
1232 domain_port (const char *domain_b, const char *domain_e,
1233 const char **domain_e_ptr)
1237 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1240 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1241 port = 10 * port + (*p - '0');
1243 /* Garbage following port number. */
1245 *domain_e_ptr = colon;
1249 #define GET_WORD(p, b, e) do { \
1251 while (*p && *p != '\t') \
1254 if (b == e || !*p) \
1259 /* Load cookies from FILE. */
1262 cookie_jar_load (struct cookie_jar *jar, const char *file)
1265 FILE *fp = fopen (file, "r");
1268 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1269 file, strerror (errno));
1272 cookies_now = time (NULL);
1274 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1276 struct cookie *cookie;
1282 char *domain_b = NULL, *domain_e = NULL;
1283 char *domflag_b = NULL, *domflag_e = NULL;
1284 char *path_b = NULL, *path_e = NULL;
1285 char *secure_b = NULL, *secure_e = NULL;
1286 char *expires_b = NULL, *expires_e = NULL;
1287 char *name_b = NULL, *name_e = NULL;
1288 char *value_b = NULL, *value_e = NULL;
1290 /* Skip leading white-space. */
1291 while (*p && ISSPACE (*p))
1293 /* Ignore empty lines. */
1294 if (!*p || *p == '#')
1297 GET_WORD (p, domain_b, domain_e);
1298 GET_WORD (p, domflag_b, domflag_e);
1299 GET_WORD (p, path_b, path_e);
1300 GET_WORD (p, secure_b, secure_e);
1301 GET_WORD (p, expires_b, expires_e);
1302 GET_WORD (p, name_b, name_e);
1304 /* Don't use GET_WORD for value because it ends with newline,
1307 value_e = p + strlen (p);
1308 if (value_e > value_b && value_e[-1] == '\n')
1310 if (value_e > value_b && value_e[-1] == '\r')
1312 /* Empty values are legal (I think), so don't bother checking. */
1314 cookie = cookie_new ();
1316 cookie->attr = strdupdelim (name_b, name_e);
1317 cookie->value = strdupdelim (value_b, value_e);
1318 cookie->path = strdupdelim (path_b, path_e);
1319 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1321 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1322 value indicating if all machines within a given domain can
1323 access the variable. This value is set automatically by the
1324 browser, depending on the value set for the domain." */
1325 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1327 /* DOMAIN needs special treatment because we might need to
1328 extract the port. */
1329 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1331 cookie->port = port;
1333 if (*domain_b == '.')
1334 ++domain_b; /* remove leading dot internally */
1335 cookie->domain = strdupdelim (domain_b, domain_e);
1337 /* safe default in case EXPIRES field is garbled. */
1338 expiry = (double)cookies_now - 1;
1340 /* I don't like changing the line, but it's safe here. (line is
1343 sscanf (expires_b, "%lf", &expiry);
1344 if (expiry < cookies_now)
1345 /* ignore stale cookie. */
1347 cookie->expiry_time = expiry;
1349 /* If the cookie has survived being saved into an external file,
1350 it is obviously permanent. */
1351 cookie->permanent = 1;
1353 store_cookie (jar, cookie);
1359 delete_cookie (cookie);
1364 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1365 to the head in a chain of cookies. The function prints the entire
1369 save_cookies_mapper (void *key, void *value, void *arg)
1371 FILE *fp = (FILE *)arg;
1372 char *domain = (char *)key;
1373 struct cookie *cookie = (struct cookie *)value;
1374 for (; cookie; cookie = cookie->next)
1376 if (!cookie->permanent)
1378 if (COOKIE_EXPIRED_P (cookie))
1380 if (!cookie->domain_exact)
1383 if (cookie->port != PORT_ANY)
1384 fprintf (fp, ":%d", cookie->port);
1385 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1386 cookie->domain_exact ? "FALSE" : "TRUE",
1387 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1388 (double)cookie->expiry_time,
1389 cookie->attr, cookie->value);
1391 return 1; /* stop mapping */
1396 /* Save cookies, in format described above, to FILE. */
1399 cookie_jar_save (struct cookie_jar *jar, const char *file)
1403 DEBUGP (("Saving cookies to %s.\n", file));
1405 cookies_now = time (NULL);
1407 fp = fopen (file, "w");
1410 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1411 file, strerror (errno));
1415 fputs ("# HTTP cookie file.\n", fp);
1416 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (NULL));
1417 fputs ("# Edit at your own risk.\n\n", fp);
1419 hash_table_map (jar->chains_by_domain, save_cookies_mapper, fp);
1422 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1423 file, strerror (errno));
1425 if (fclose (fp) < 0)
1426 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1427 file, strerror (errno));
1429 DEBUGP (("Done saving cookies.\n"));
1432 /* Destroy all the elements in the chain and unhook it from the cookie
1433 jar. This is written in the form of a callback to hash_table_map
1434 and used by cookie_jar_delete to delete all the cookies in a
1438 nuke_cookie_chain (void *value, void *key, void *arg)
1440 char *chain_key = (char *)value;
1441 struct cookie *chain = (struct cookie *)key;
1442 struct cookie_jar *jar = (struct cookie_jar *)arg;
1444 /* Remove the chain from the table and free the key. */
1445 hash_table_remove (jar->chains_by_domain, chain_key);
1448 /* Then delete all the cookies in the chain. */
1451 struct cookie *next = chain->next;
1452 delete_cookie (chain);
1460 /* Clean up cookie-related data. */
1463 cookie_jar_delete (struct cookie_jar *jar)
1465 hash_table_map (jar->chains_by_domain, nuke_cookie_chain, jar);
1466 hash_table_destroy (jar->chains_by_domain);
1470 /* Test cases. Currently this is only tests parse_set_cookies. To
1471 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1476 char *test_results[10];
1478 static int test_parse_cookies_callback (struct cookie *ignored,
1479 const char *nb, const char *ne,
1480 const char *vb, const char *ve)
1482 test_results[test_count++] = strdupdelim (nb, ne);
1483 test_results[test_count++] = strdupdelim (vb, ve);
1490 /* Tests expected to succeed: */
1496 { "arg=value", {"arg", "value", NULL} },
1497 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1498 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1499 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1500 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1501 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1502 { "arg=", {"arg", "", NULL} },
1503 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1504 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1507 /* Tests expected to fail: */
1508 static char *tests_fail[] = {
1510 "arg=\"unterminated",
1512 "arg1=;=another-empty-name",
1516 for (i = 0; i < countof (tests_succ); i++)
1519 char *data = tests_succ[i].data;
1520 char **expected = tests_succ[i].results;
1524 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1527 printf ("NULL cookie returned for valid data: %s\n", data);
1531 for (ind = 0; ind < test_count; ind += 2)
1535 if (0 != strcmp (expected[ind], test_results[ind]))
1536 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1537 ind / 2 + 1, data, expected[ind], test_results[ind]);
1538 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1539 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1540 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1542 if (ind < test_count || expected[ind])
1543 printf ("Unmatched number of results: %s\n", data);
1546 for (i = 0; i < countof (tests_fail); i++)
1549 char *data = tests_fail[i];
1551 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1553 printf ("Failed to report error on invalid data: %s\n", data);
1556 #endif /* TEST_COOKIES */