1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
31 code submitted by Tomasz Wegrzanowski.
33 Ideas for future work:
35 * Implement limits on cookie-related sizes, such as max. cookie
36 size, max. number of cookies, etc.
38 * Add more "cookie jar" methods, such as methods to iterate over
39 stored cookies, to clear temporary cookies, to perform
40 intelligent auto-saving, etc.
42 * Support `Set-Cookie2' and `Cookie2' headers? Does anyone really
62 /* This should *really* be in a .h file! */
63 time_t http_atotm PARAMS ((const char *));
65 /* Declarations of `struct cookie' and the most basic functions. */
67 /* Cookie jar serves as cookie storage and a means of retrieving
68 cookies efficiently. All cookies with the same domain are stored
69 in a linked list called "chain". A cookie chain can be reached by
70 looking up the domain in the cookie jar's chains_by_domain table.
72 For example, to reach all the cookies under google.com, one must
73 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
74 course, when sending a cookie to `www.google.com', one must search
75 for cookies that belong to either `www.google.com' or `google.com'
76 -- but the point is that the code doesn't need to go through *all*
80 /* Mapping between domains and their corresponding cookies. */
81 struct hash_table *chains_by_domain;
83 int cookie_count; /* number of cookies in the jar. */
86 /* Value set by entry point functions, so that the low-level
87 routines don't need to call time() all the time. */
93 struct cookie_jar *jar = xmalloc (sizeof (struct cookie_jar));
94 jar->chains_by_domain = make_nocase_string_hash_table (0);
95 jar->cookie_count = 0;
100 char *domain; /* domain of the cookie */
101 int port; /* port number */
102 char *path; /* path prefix of the cookie */
104 int secure; /* whether cookie should be
105 transmitted over non-https
107 int domain_exact; /* whether DOMAIN must match as a
110 int permanent; /* whether the cookie should outlive
112 time_t expiry_time; /* time when the cookie expires */
114 int discard_requested; /* whether cookie was created to
115 request discarding another
118 char *attr; /* cookie attribute name */
119 char *value; /* cookie attribute value */
121 struct cookie_jar *jar; /* pointer back to the cookie jar, for
123 struct cookie *next; /* used for chaining of cookies in the
127 #define PORT_ANY (-1)
128 #define COOKIE_EXPIRED_P(c) ((c)->expiry_time != 0 && (c)->expiry_time < cookies_now)
130 /* Allocate and return a new, empty cookie structure. */
132 static struct cookie *
135 struct cookie *cookie = xmalloc (sizeof (struct cookie));
136 memset (cookie, '\0', sizeof (struct cookie));
138 /* Both cookie->permanent and cookie->expiry_time are now 0. By
139 default, we assume that the cookie is non-permanent and valid
140 until the end of the session. */
142 cookie->port = PORT_ANY;
146 /* Deallocate COOKIE and its components. */
149 delete_cookie (struct cookie *cookie)
151 FREE_MAYBE (cookie->domain);
152 FREE_MAYBE (cookie->path);
153 FREE_MAYBE (cookie->attr);
154 FREE_MAYBE (cookie->value);
158 /* Functions for storing cookies.
160 All cookies can be reached beginning with jar->chains_by_domain.
161 The key in that table is the domain name, and the value is a linked
162 list of all cookies from that domain. Every new cookie is placed
163 on the head of the list. */
165 /* Find and return a cookie in JAR whose domain, path, and attribute
166 name correspond to COOKIE. If found, PREVPTR will point to the
167 location of the cookie previous in chain, or NULL if the found
168 cookie is the head of a chain.
170 If no matching cookie is found, return NULL. */
172 static struct cookie *
173 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
174 struct cookie **prevptr)
176 struct cookie *chain, *prev;
178 chain = hash_table_get (jar->chains_by_domain, cookie->domain);
183 for (; chain; prev = chain, chain = chain->next)
184 if (0 == strcmp (cookie->path, chain->path)
185 && 0 == strcmp (cookie->attr, chain->attr)
186 && cookie->port == chain->port)
197 /* Store COOKIE to the jar.
199 This is done by placing COOKIE at the head of its chain. However,
200 if COOKIE matches a cookie already in memory, as determined by
201 find_matching_cookie, the old cookie is unlinked and destroyed.
203 The key of each chain's hash table entry is allocated only the
204 first time; next hash_table_put's reuse the same key. */
207 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
209 struct cookie *chain_head;
212 if (hash_table_get_pair (jar->chains_by_domain, cookie->domain,
213 &chain_key, &chain_head))
215 /* A chain of cookies in this domain already exists. Check for
216 duplicates -- if an extant cookie exactly matches our domain,
217 port, path, and name, replace it. */
219 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
223 /* Remove VICTIM from the chain. COOKIE will be placed at
227 prev->next = victim->next;
228 cookie->next = chain_head;
232 /* prev is NULL; apparently VICTIM was at the head of
233 the chain. This place will be taken by COOKIE, so
234 all we need to do is: */
235 cookie->next = victim->next;
237 delete_cookie (victim);
239 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
242 cookie->next = chain_head;
246 /* We are now creating the chain. Use a copy of cookie->domain
247 as the key for the life-time of the chain. Using
248 cookie->domain would be unsafe because the life-time of the
249 chain may exceed the life-time of the cookie. (Cookies may
250 be deleted from the chain by this very function.) */
252 chain_key = xstrdup (cookie->domain);
255 hash_table_put (jar->chains_by_domain, chain_key, cookie);
258 DEBUGP (("\nStored cookie %s %d%s %s %s %d %s %s %s\n",
259 cookie->domain, cookie->port,
260 cookie->port == PORT_ANY ? " (ANY)" : "",
262 cookie->permanent ? "permanent" : "nonpermanent",
265 ? asctime (localtime (&cookie->expiry_time)) : "<undefined>",
266 cookie->attr, cookie->value));
269 /* Discard a cookie matching COOKIE's domain, port, path, and
270 attribute name. This gets called when we encounter a cookie whose
271 expiry date is in the past, or whose max-age is set to 0. The
272 former corresponds to netscape cookie spec, while the latter is
273 specified by rfc2109. */
276 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
278 struct cookie *prev, *victim;
280 if (!hash_table_count (jar->chains_by_domain))
281 /* No elements == nothing to discard. */
284 victim = find_matching_cookie (jar, cookie, &prev);
288 /* Simply unchain the victim. */
289 prev->next = victim->next;
292 /* VICTIM was head of its chain. We need to place a new
293 cookie at the head. */
294 char *chain_key = NULL;
297 res = hash_table_get_pair (jar->chains_by_domain, victim->domain,
302 /* VICTIM was the only cookie in the chain. Destroy the
303 chain and deallocate the chain key. */
304 hash_table_remove (jar->chains_by_domain, victim->domain);
308 hash_table_put (jar->chains_by_domain, chain_key, victim->next);
310 delete_cookie (victim);
311 DEBUGP (("Discarded old cookie.\n"));
315 /* Functions for parsing the `Set-Cookie' header, and creating new
316 cookies from the wire. */
318 #define NAME_IS(string_literal) \
319 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
321 #define VALUE_EXISTS (value_b && value_e)
323 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
325 /* Update the appropriate cookie field. [name_b, name_e) are expected
326 to delimit the attribute name, while [value_b, value_e) (optional)
327 should delimit the attribute value.
329 When called the first time, it will set the cookie's attribute name
330 and value. After that, it will check the attribute name for
331 special fields such as `domain', `path', etc. Where appropriate,
332 it will parse the values of the fields it recognizes and fill the
333 corresponding fields in COOKIE.
335 Returns 1 on success. Returns zero in case a syntax error is
336 found; such a cookie should be discarded. */
339 update_cookie_field (struct cookie *cookie,
340 const char *name_b, const char *name_e,
341 const char *value_b, const char *value_e)
343 assert (name_b != NULL && name_e != NULL);
349 cookie->attr = strdupdelim (name_b, name_e);
350 cookie->value = strdupdelim (value_b, value_e);
354 if (NAME_IS ("domain"))
356 if (!VALUE_NON_EMPTY)
358 FREE_MAYBE (cookie->domain);
359 /* Strictly speaking, we should set cookie->domain_exact if the
360 domain doesn't begin with a dot. But many sites set the
361 domain to "foo.com" and expect "subhost.foo.com" to get the
362 cookie, and it apparently works. */
365 cookie->domain = strdupdelim (value_b, value_e);
368 else if (NAME_IS ("path"))
370 if (!VALUE_NON_EMPTY)
372 FREE_MAYBE (cookie->path);
373 cookie->path = strdupdelim (value_b, value_e);
376 else if (NAME_IS ("expires"))
381 if (!VALUE_NON_EMPTY)
383 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
385 expires = http_atotm (value_copy);
388 cookie->permanent = 1;
389 cookie->expiry_time = (time_t)expires;
392 /* Error in expiration spec. Assume default (cookie valid for
396 /* According to netscape's specification, expiry time in the
397 past means that discarding of a matching cookie is
399 if (cookie->expiry_time < cookies_now)
400 cookie->discard_requested = 1;
404 else if (NAME_IS ("max-age"))
409 if (!VALUE_NON_EMPTY)
411 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
413 sscanf (value_copy, "%lf", &maxage);
415 /* something went wrong. */
417 cookie->permanent = 1;
418 cookie->expiry_time = cookies_now + maxage;
420 /* According to rfc2109, a cookie with max-age of 0 means that
421 discarding of a matching cookie is requested. */
423 cookie->discard_requested = 1;
427 else if (NAME_IS ("secure"))
429 /* ignore value completely */
434 /* Unrecognized attribute; ignore it. */
440 /* Returns non-zero for characters that are legal in the name of an
441 attribute. This used to allow only alphanumerics, '-', and '_',
442 but we need to be more lenient because a number of sites wants to
443 use weirder attribute names. rfc2965 "informally specifies"
444 attribute name (token) as "a sequence of non-special, non-white
445 space characters". So we allow everything except the stuff we know
448 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
449 && (c) != '"' && (c) != '=' \
450 && (c) != ';' && (c) != ',')
452 /* Parse the contents of the `Set-Cookie' header. The header looks
455 name1=value1; name2=value2; ...
457 Trailing semicolon is optional; spaces are allowed between all
458 tokens. Additionally, values may be quoted.
460 A new cookie is returned upon success, NULL otherwise. The
461 specified CALLBACK function (normally `update_cookie_field' is used
462 to update the fields of the newly created cookie structure. */
464 static struct cookie *
465 parse_set_cookies (const char *sc,
466 int (*callback) (struct cookie *,
467 const char *, const char *,
468 const char *, const char *),
471 struct cookie *cookie = cookie_new ();
473 /* #### Hand-written DFAs are no fun to debug. We'de be better off
474 to rewrite this as an inline parser. */
476 enum { S_START, S_NAME, S_NAME_POST,
477 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
478 S_ATTR_ACTION, S_DONE, S_ERROR
484 const char *name_b = NULL, *name_e = NULL;
485 const char *value_b = NULL, *value_e = NULL;
489 while (state != S_DONE && state != S_ERROR)
496 else if (ISSPACE (c))
497 /* Strip all whitespace preceding the name. */
499 else if (ATTR_NAME_CHAR (c))
505 /* empty attr name not allowed */
509 if (!c || c == ';' || c == '=' || ISSPACE (c))
514 else if (ATTR_NAME_CHAR (c))
522 value_b = value_e = NULL;
525 state = S_ATTR_ACTION;
532 else if (ISSPACE (c))
533 /* Ignore space and keep the state. */
541 value_b = value_e = p;
544 state = S_ATTR_ACTION;
550 state = S_QUOTED_VALUE;
552 else if (ISSPACE (c))
562 if (!c || c == ';' || ISSPACE (c))
565 state = S_VALUE_TRAILSPACE;
569 value_e = NULL; /* no trailing space */
578 state = S_VALUE_TRAILSPACE;
585 case S_VALUE_TRAILSPACE:
589 state = S_ATTR_ACTION;
592 state = S_ATTR_ACTION;
593 else if (ISSPACE (c))
600 int legal = callback (cookie, name_b, name_e, value_b, value_e);
606 BOUNDED_TO_ALLOCA (name_b, name_e, name);
607 logprintf (LOG_NOTQUIET,
608 _("Error in Set-Cookie, field `%s'"), name);
618 /* handled by loop condition */
625 delete_cookie (cookie);
626 if (state != S_ERROR)
630 logprintf (LOG_NOTQUIET,
631 _("Syntax error in Set-Cookie: %s at position %d.\n"),
636 /* Sanity checks. These are important, otherwise it is possible for
637 mailcious attackers to destroy important cookie information and/or
638 violate your privacy. */
641 #define REQUIRE_DIGITS(p) do { \
644 for (++p; ISDIGIT (*p); p++) \
648 #define REQUIRE_DOT(p) do { \
653 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
655 We don't want to call network functions like inet_addr() because all
656 we need is a check, preferrably one that is small, fast, and
660 numeric_address_p (const char *addr)
662 const char *p = addr;
664 REQUIRE_DIGITS (p); /* A */
665 REQUIRE_DOT (p); /* . */
666 REQUIRE_DIGITS (p); /* B */
667 REQUIRE_DOT (p); /* . */
668 REQUIRE_DIGITS (p); /* C */
669 REQUIRE_DOT (p); /* . */
670 REQUIRE_DIGITS (p); /* D */
677 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
678 Originally I tried to make the check compliant with rfc2109, but
679 the sites deviated too often, so I had to fall back to "tail
680 matching", as defined by the original Netscape's cookie spec. */
683 check_domain_match (const char *cookie_domain, const char *host)
687 /* Numeric address requires exact match. It also requires HOST to
689 if (numeric_address_p (cookie_domain))
690 return 0 == strcmp (cookie_domain, host);
694 /* For the sake of efficiency, check for exact match first. */
695 if (0 == strcasecmp (cookie_domain, host))
700 /* HOST must match the tail of cookie_domain. */
701 if (!match_tail (host, cookie_domain, 1))
704 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
705 make sure that somebody is not trying to set the cookie for a
706 subdomain shared by many entities. For example, "company.co.uk"
707 must not be allowed to set a cookie for ".co.uk". On the other
708 hand, "sso.redhat.de" should be able to set a cookie for
711 The only marginally sane way to handle this I can think of is to
712 reject on the basis of the length of the second-level domain name
713 (but when the top-level domain is unknown), with the assumption
714 that those of three or less characters could be reserved. For
717 .co.org -> works because the TLD is known
718 .co.uk -> doesn't work because "co" is only two chars long
719 .com.au -> doesn't work because "com" is only 3 chars long
720 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
721 .cnn.de -> doesn't work for the same reason (ugh!!)
722 .abcd.de -> works because "abcd" is 4 chars long
723 .img.cnn.de -> works because it's not trying to set the 2nd level domain
724 .cnn.co.uk -> works for the same reason
726 That should prevent misuse, while allowing reasonable usage. If
727 someone knows of a better way to handle this, please let me
730 const char *p = cookie_domain;
731 int dccount = 1; /* number of domain components */
732 int ldcl = 0; /* last domain component length */
733 int nldcl = 0; /* next to last domain component length */
736 /* Ignore leading period in this calculation. */
739 for (out = 0; !out; p++)
747 /* Empty domain component found -- the domain is invalid. */
749 if (*(p + 1) == '\0')
751 /* Tolerate trailing '.' by not treating the domain as
752 one ending with an empty domain component. */
774 int known_toplevel = 0;
775 static char *known_toplevel_domains[] = {
776 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
778 for (i = 0; i < countof (known_toplevel_domains); i++)
779 if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
784 if (!known_toplevel && nldcl <= 3)
791 /* Don't allow the host "foobar.com" to set a cookie for domain
793 if (*cookie_domain != '.')
795 int dlen = strlen (cookie_domain);
796 int hlen = strlen (host);
797 /* cookie host: hostname.foobar.com */
798 /* desired domain: bar.com */
799 /* '.' must be here in host-> ^ */
800 if (hlen > dlen && host[hlen - dlen - 1] != '.')
809 static int path_matches PARAMS ((const char *, const char *));
811 /* Check whether PATH begins with COOKIE_PATH. */
814 check_path_match (const char *cookie_path, const char *path)
816 return path_matches (path, cookie_path);
819 /* Process the HTTP `Set-Cookie' header. This results in storing the
820 cookie or discarding a matching one, or ignoring it completely, all
821 depending on the contents. */
824 cookie_jar_process_set_cookie (struct cookie_jar *jar,
825 const char *host, int port,
826 const char *path, const char *set_cookie)
828 struct cookie *cookie;
829 cookies_now = time (NULL);
831 cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
835 /* Sanitize parts of cookie. */
840 cookie->domain = xstrdup (host);
845 if (!check_domain_match (cookie->domain, host))
847 logprintf (LOG_NOTQUIET,
848 "Cookie coming from %s attempted to set domain to %s\n",
849 host, cookie->domain);
850 xfree (cookie->domain);
856 cookie->path = xstrdup (path);
859 if (!check_path_match (cookie->path, path))
861 DEBUGP (("Attempt to fake the path: %s, %s\n",
862 cookie->path, path));
867 if (cookie->discard_requested)
869 discard_matching_cookie (jar, cookie);
873 store_cookie (jar, cookie);
878 delete_cookie (cookie);
881 /* Support for sending out cookies in HTTP requests, based on
882 previously stored cookies. Entry point is
883 `build_cookies_request'. */
885 /* Find the cookie chains whose domains match HOST and store them to
888 A cookie chain is the head of a list of cookies that belong to a
889 host/domain. Given HOST "img.search.xemacs.org", this function
890 will return the chains for "img.search.xemacs.org",
891 "search.xemacs.org", and "xemacs.org" -- those of them that exist
894 DEST should be large enough to accept (in the worst case) as many
895 elements as there are domain components of HOST. */
898 find_chains_of_host (struct cookie_jar *jar, const char *host,
899 struct cookie *dest[])
904 /* Bail out quickly if there are no cookies in the jar. */
905 if (!hash_table_count (jar->chains_by_domain))
908 if (numeric_address_p (host))
909 /* If host is an IP address, only check for the exact match. */
912 /* Otherwise, check all the subdomains except the top-level (last)
913 one. As a domain with N components has N-1 dots, the number of
914 passes equals the number of dots. */
915 passes = count_char (host, '.');
919 /* Find chains that match HOST, starting with exact match and
920 progressing to less specific domains. For instance, given HOST
921 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
922 srk.fer.hr's, then fer.hr's. */
925 struct cookie *chain = hash_table_get (jar->chains_by_domain, host);
927 dest[dest_count++] = chain;
928 if (++passcnt >= passes)
930 host = strchr (host, '.') + 1;
936 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
940 path_matches (const char *full_path, const char *prefix)
945 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
946 as a mere separator, inspired by rfc1808), but the '/' is
947 assumed when matching against the cookie stuff. */
951 len = strlen (prefix);
953 if (0 != strncmp (full_path, prefix, len))
954 /* FULL_PATH doesn't begin with PREFIX. */
957 /* Length of PREFIX determines the quality of the match. */
961 /* Return non-zero iff COOKIE matches the provided parameters of the
962 URL being downloaded: HOST, PORT, PATH, and SECFLAG.
964 If PATH_GOODNESS is non-NULL, store the "path goodness" value
965 there. That value is a measure of how closely COOKIE matches PATH,
966 used for ordering cookies. */
969 cookie_matches_url (const struct cookie *cookie,
970 const char *host, int port, const char *path,
971 int secflag, int *path_goodness)
975 if (COOKIE_EXPIRED_P (cookie))
976 /* Ignore stale cookies. Don't bother unchaining the cookie at
977 this point -- Wget is a relatively short-lived application, and
978 stale cookies will not be saved by `save_cookies'. On the
979 other hand, this function should be as efficient as
983 if (cookie->secure && !secflag)
984 /* Don't transmit secure cookies over insecure connections. */
986 if (cookie->port != PORT_ANY && cookie->port != port)
989 /* If exact domain match is required, verify that cookie's domain is
990 equal to HOST. If not, assume success on the grounds of the
991 cookie's chain having been found by find_chains_of_host. */
992 if (cookie->domain_exact
993 && 0 != strcasecmp (host, cookie->domain))
996 pg = path_matches (path, cookie->path);
1001 /* If the caller requested path_goodness, we return it. This is
1002 an optimization, so that the caller doesn't need to call
1003 path_matches() again. */
1004 *path_goodness = pg;
1008 /* A structure that points to a cookie, along with the additional
1009 information about the cookie's "goodness". This allows us to sort
1010 the cookies when returning them to the server, as required by the
1013 struct weighed_cookie {
1014 struct cookie *cookie;
1015 int domain_goodness;
1019 /* Comparator used for uniquifying the list. */
1022 equality_comparator (const void *p1, const void *p2)
1024 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1025 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1027 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1028 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1030 /* We only really care whether both name and value are equal. We
1031 return them in this order only for consistency... */
1032 return namecmp ? namecmp : valuecmp;
1035 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1036 cookies with the same attr name and value. Whenever a duplicate
1037 pair is found, one of the cookies is removed. */
1040 eliminate_dups (struct weighed_cookie *outgoing, int count)
1042 struct weighed_cookie *h; /* hare */
1043 struct weighed_cookie *t; /* tortoise */
1044 struct weighed_cookie *end = outgoing + count;
1046 /* We deploy a simple uniquify algorithm: first sort the array
1047 according to our sort criteria, then copy it to itself, comparing
1048 each cookie to its neighbor and ignoring the duplicates. */
1050 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1052 /* "Hare" runs through all the entries in the array, followed by
1053 "tortoise". If a duplicate is found, the hare skips it.
1054 Non-duplicate entries are copied to the tortoise ptr. */
1056 for (h = t = outgoing; h < end; h++)
1060 struct cookie *c0 = h[0].cookie;
1061 struct cookie *c1 = h[1].cookie;
1062 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1063 continue; /* ignore the duplicate */
1066 /* If the hare has advanced past the tortoise (because of
1067 previous dups), make sure the values get copied. Otherwise,
1068 no copying is necessary. */
1074 return t - outgoing;
1077 /* Comparator used for sorting by quality. */
1080 goodness_comparator (const void *p1, const void *p2)
1082 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1083 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1085 /* Subtractions take `wc2' as the first argument becauase we want a
1086 sort in *decreasing* order of goodness. */
1087 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1088 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1090 /* Sort by domain goodness; if these are the same, sort by path
1091 goodness. (The sorting order isn't really specified; maybe it
1092 should be the other way around.) */
1093 return dgdiff ? dgdiff : pgdiff;
1096 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1097 requests PATH from the server. The resulting string is allocated
1098 with `malloc', and the caller is responsible for freeing it. If no
1099 cookies pertain to this request, i.e. no cookie header should be
1100 generated, NULL is returned. */
1103 cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
1104 int port, const char *path,
1105 int connection_secure_p)
1107 struct cookie **chains;
1110 struct cookie *cookie;
1111 struct weighed_cookie *outgoing;
1114 int result_size, pos;
1116 /* First, find the cookie chains whose domains match HOST. */
1118 /* Allocate room for find_chains_of_host to write to. The number of
1119 chains can at most equal the number of subdomains, hence
1120 1+<number of dots>. */
1121 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
1122 chain_count = find_chains_of_host (jar, host, chains);
1124 /* No cookies for this host. */
1128 cookies_now = time (NULL);
1130 /* Now extract from the chains those cookies that match our host
1131 (for domain_exact cookies), port (for cookies with port other
1132 than PORT_ANY), etc. See matching_cookie for details. */
1134 /* Count the number of matching cookies. */
1136 for (i = 0; i < chain_count; i++)
1137 for (cookie = chains[i]; cookie; cookie = cookie->next)
1138 if (cookie_matches_url (cookie, host, port, path, connection_secure_p,
1142 return NULL; /* no cookies matched */
1144 /* Allocate the array. */
1145 outgoing = alloca_array (struct weighed_cookie, count);
1147 /* Fill the array with all the matching cookies from the chains that
1150 for (i = 0; i < chain_count; i++)
1151 for (cookie = chains[i]; cookie; cookie = cookie->next)
1154 if (!cookie_matches_url (cookie, host, port, path,
1155 connection_secure_p, &pg))
1157 outgoing[ocnt].cookie = cookie;
1158 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1159 outgoing[ocnt].path_goodness = pg;
1162 assert (ocnt == count);
1164 /* Eliminate duplicate cookies; that is, those whose name and value
1166 count = eliminate_dups (outgoing, count);
1168 /* Sort the array so that best-matching domains come first, and
1169 that, within one domain, best-matching paths come first. */
1170 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1172 /* Count the space the name=value pairs will take. */
1174 for (i = 0; i < count; i++)
1176 struct cookie *c = outgoing[i].cookie;
1178 result_size += strlen (c->attr) + 1 + strlen (c->value);
1181 /* Allocate output buffer:
1183 name=value pairs -- result_size
1184 "; " separators -- (count - 1) * 2
1185 \r\n line ending -- 2
1186 \0 terminator -- 1 */
1187 result_size = 8 + result_size + (count - 1) * 2 + 2 + 1;
1188 result = xmalloc (result_size);
1190 strcpy (result, "Cookie: ");
1192 for (i = 0; i < count; i++)
1194 struct cookie *c = outgoing[i].cookie;
1195 int namlen = strlen (c->attr);
1196 int vallen = strlen (c->value);
1198 memcpy (result + pos, c->attr, namlen);
1200 result[pos++] = '=';
1201 memcpy (result + pos, c->value, vallen);
1205 result[pos++] = ';';
1206 result[pos++] = ' ';
1209 result[pos++] = '\r';
1210 result[pos++] = '\n';
1211 result[pos++] = '\0';
1212 assert (pos == result_size);
1216 /* Support for loading and saving cookies. The format used for
1217 loading and saving should be the format of the `cookies.txt' file
1218 used by Netscape and Mozilla, at least the Unix versions.
1219 (Apparently IE can export cookies in that format as well.) The
1220 format goes like this:
1222 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1224 DOMAIN -- cookie domain, optionally followed by :PORT
1225 DOMAIN-FLAG -- whether all hosts in the domain match
1227 SECURE-FLAG -- whether cookie requires secure connection
1228 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1229 ATTR-NAME -- name of the cookie attribute
1230 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1232 The fields are separated by TABs. All fields are mandatory, except
1233 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1234 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1235 whitespace only, and comment lines (beginning with # optionally
1236 preceded by whitespace) are ignored.
1238 Example line from cookies.txt (split in two lines for readability):
1240 .google.com TRUE / FALSE 2147368447 \
1241 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1245 /* If the region [B, E) ends with :<digits>, parse the number, return
1246 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1247 If port is not specified, return 0. */
1250 domain_port (const char *domain_b, const char *domain_e,
1251 const char **domain_e_ptr)
1255 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1258 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1259 port = 10 * port + (*p - '0');
1261 /* Garbage following port number. */
1263 *domain_e_ptr = colon;
1267 #define GET_WORD(p, b, e) do { \
1269 while (*p && *p != '\t') \
1272 if (b == e || !*p) \
1277 /* Load cookies from FILE. */
1280 cookie_jar_load (struct cookie_jar *jar, const char *file)
1283 FILE *fp = fopen (file, "r");
1286 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1287 file, strerror (errno));
1290 cookies_now = time (NULL);
1292 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1294 struct cookie *cookie;
1300 char *domain_b = NULL, *domain_e = NULL;
1301 char *domflag_b = NULL, *domflag_e = NULL;
1302 char *path_b = NULL, *path_e = NULL;
1303 char *secure_b = NULL, *secure_e = NULL;
1304 char *expires_b = NULL, *expires_e = NULL;
1305 char *name_b = NULL, *name_e = NULL;
1306 char *value_b = NULL, *value_e = NULL;
1308 /* Skip leading white-space. */
1309 while (*p && ISSPACE (*p))
1311 /* Ignore empty lines. */
1312 if (!*p || *p == '#')
1315 GET_WORD (p, domain_b, domain_e);
1316 GET_WORD (p, domflag_b, domflag_e);
1317 GET_WORD (p, path_b, path_e);
1318 GET_WORD (p, secure_b, secure_e);
1319 GET_WORD (p, expires_b, expires_e);
1320 GET_WORD (p, name_b, name_e);
1322 /* Don't use GET_WORD for value because it ends with newline,
1325 value_e = p + strlen (p);
1326 if (value_e > value_b && value_e[-1] == '\n')
1328 if (value_e > value_b && value_e[-1] == '\r')
1330 /* Empty values are legal (I think), so don't bother checking. */
1332 cookie = cookie_new ();
1334 cookie->attr = strdupdelim (name_b, name_e);
1335 cookie->value = strdupdelim (value_b, value_e);
1336 cookie->path = strdupdelim (path_b, path_e);
1337 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1339 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1340 value indicating if all machines within a given domain can
1341 access the variable. This value is set automatically by the
1342 browser, depending on the value set for the domain." */
1343 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1345 /* DOMAIN needs special treatment because we might need to
1346 extract the port. */
1347 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1349 cookie->port = port;
1351 if (*domain_b == '.')
1352 ++domain_b; /* remove leading dot internally */
1353 cookie->domain = strdupdelim (domain_b, domain_e);
1355 /* safe default in case EXPIRES field is garbled. */
1356 expiry = (double)cookies_now - 1;
1358 /* I don't like changing the line, but it's safe here. (line is
1361 sscanf (expires_b, "%lf", &expiry);
1362 if (expiry < cookies_now)
1363 /* ignore stale cookie. */
1365 cookie->expiry_time = expiry;
1367 /* If the cookie has survived being saved into an external file,
1368 it is obviously permanent. */
1369 cookie->permanent = 1;
1371 store_cookie (jar, cookie);
1377 delete_cookie (cookie);
1382 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1383 to the head in a chain of cookies. The function prints the entire
1387 save_cookies_mapper (void *key, void *value, void *arg)
1389 FILE *fp = (FILE *)arg;
1390 char *domain = (char *)key;
1391 struct cookie *cookie = (struct cookie *)value;
1392 for (; cookie; cookie = cookie->next)
1394 if (!cookie->permanent)
1396 if (COOKIE_EXPIRED_P (cookie))
1398 if (!cookie->domain_exact)
1401 if (cookie->port != PORT_ANY)
1402 fprintf (fp, ":%d", cookie->port);
1403 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1404 cookie->domain_exact ? "FALSE" : "TRUE",
1405 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1406 (double)cookie->expiry_time,
1407 cookie->attr, cookie->value);
1409 return 1; /* stop mapping */
1414 /* Save cookies, in format described above, to FILE. */
1417 cookie_jar_save (struct cookie_jar *jar, const char *file)
1421 DEBUGP (("Saving cookies to %s.\n", file));
1423 cookies_now = time (NULL);
1425 fp = fopen (file, "w");
1428 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1429 file, strerror (errno));
1433 fputs ("# HTTP cookie file.\n", fp);
1434 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (NULL));
1435 fputs ("# Edit at your own risk.\n\n", fp);
1437 hash_table_map (jar->chains_by_domain, save_cookies_mapper, fp);
1440 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1441 file, strerror (errno));
1443 if (fclose (fp) < 0)
1444 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1445 file, strerror (errno));
1447 DEBUGP (("Done saving cookies.\n"));
1450 /* Destroy all the elements in the chain and unhook it from the cookie
1451 jar. This is written in the form of a callback to hash_table_map
1452 and used by cookie_jar_delete to delete all the cookies in a
1456 nuke_cookie_chain (void *value, void *key, void *arg)
1458 char *chain_key = (char *)value;
1459 struct cookie *chain = (struct cookie *)key;
1460 struct cookie_jar *jar = (struct cookie_jar *)arg;
1462 /* Remove the chain from the table and free the key. */
1463 hash_table_remove (jar->chains_by_domain, chain_key);
1466 /* Then delete all the cookies in the chain. */
1469 struct cookie *next = chain->next;
1470 delete_cookie (chain);
1478 /* Clean up cookie-related data. */
1481 cookie_jar_delete (struct cookie_jar *jar)
1483 hash_table_map (jar->chains_by_domain, nuke_cookie_chain, jar);
1484 hash_table_destroy (jar->chains_by_domain);
1488 /* Test cases. Currently this is only tests parse_set_cookies. To
1489 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1494 char *test_results[10];
1496 static int test_parse_cookies_callback (struct cookie *ignored,
1497 const char *nb, const char *ne,
1498 const char *vb, const char *ve)
1500 test_results[test_count++] = strdupdelim (nb, ne);
1501 test_results[test_count++] = strdupdelim (vb, ve);
1508 /* Tests expected to succeed: */
1514 { "arg=value", {"arg", "value", NULL} },
1515 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1516 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1517 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1518 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1519 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1520 { "arg=", {"arg", "", NULL} },
1521 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1522 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1525 /* Tests expected to fail: */
1526 static char *tests_fail[] = {
1528 "arg=\"unterminated",
1530 "arg1=;=another-empty-name",
1534 for (i = 0; i < countof (tests_succ); i++)
1537 char *data = tests_succ[i].data;
1538 char **expected = tests_succ[i].results;
1542 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1545 printf ("NULL cookie returned for valid data: %s\n", data);
1549 for (ind = 0; ind < test_count; ind += 2)
1553 if (0 != strcmp (expected[ind], test_results[ind]))
1554 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1555 ind / 2 + 1, data, expected[ind], test_results[ind]);
1556 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1557 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1558 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1560 if (ind < test_count || expected[ind])
1561 printf ("Unmatched number of results: %s\n", data);
1564 for (i = 0; i < countof (tests_fail); i++)
1567 char *data = tests_fail[i];
1569 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1571 printf ("Failed to report error on invalid data: %s\n", data);
1574 #endif /* TEST_COOKIES */