1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
31 code submitted by Tomasz Wegrzanowski.
33 Ideas for future work:
35 * Implement limits on cookie-related sizes, such as max. cookie
36 size, max. number of cookies, etc.
38 * Add more "cookie jar" methods, such as methods to iterate over
39 stored cookies, to clear temporary cookies, to perform
40 intelligent auto-saving, etc.
42 * Support `Set-Cookie2' and `Cookie2' headers? Does anyone really
62 /* This should *really* be in a .h file! */
63 time_t http_atotm PARAMS ((const char *));
65 /* Declarations of `struct cookie' and the most basic functions. */
67 /* Cookie jar serves as cookie storage and a means of retrieving
68 cookies efficiently. All cookies with the same domain are stored
69 in a linked list called "chain". A cookie chain can be reached by
70 looking up the domain in the cookie jar's chains_by_domain table.
72 For example, to reach all the cookies under google.com, one must
73 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
74 course, when sending a cookie to `www.google.com', one must search
75 for cookies that belong to either `www.google.com' or `google.com'
76 -- but the point is that the code doesn't need to go through *all*
80 /* Cookie chains indexed by domain. */
81 struct hash_table *chains;
83 int cookie_count; /* number of cookies in the jar. */
86 /* Value set by entry point functions, so that the low-level
87 routines don't need to call time() all the time. */
93 struct cookie_jar *jar = xnew (struct cookie_jar);
94 jar->chains = make_nocase_string_hash_table (0);
95 jar->cookie_count = 0;
100 char *domain; /* domain of the cookie */
101 int port; /* port number */
102 char *path; /* path prefix of the cookie */
104 int secure; /* whether cookie should be
105 transmitted over non-https
107 int domain_exact; /* whether DOMAIN must match as a
110 int permanent; /* whether the cookie should outlive
112 time_t expiry_time; /* time when the cookie expires, 0
113 means undetermined. */
115 int discard_requested; /* whether cookie was created to
116 request discarding another
119 char *attr; /* cookie attribute name */
120 char *value; /* cookie attribute value */
122 struct cookie *next; /* used for chaining of cookies in the
126 #define PORT_ANY (-1)
128 /* Allocate and return a new, empty cookie structure. */
130 static struct cookie *
133 struct cookie *cookie = xnew0 (struct cookie);
135 /* Both cookie->permanent and cookie->expiry_time are now 0. This
136 means that the cookie doesn't expire, but is only valid for this
137 session (i.e. not written out to disk). */
139 cookie->port = PORT_ANY;
143 /* Non-zero if the cookie has expired. Assumes cookies_now has been
144 set by one of the entry point functions. */
147 cookie_expired_p (const struct cookie *c)
149 return c->expiry_time != 0 && c->expiry_time < cookies_now;
152 /* Deallocate COOKIE and its components. */
155 delete_cookie (struct cookie *cookie)
157 xfree_null (cookie->domain);
158 xfree_null (cookie->path);
159 xfree_null (cookie->attr);
160 xfree_null (cookie->value);
164 /* Functions for storing cookies.
166 All cookies can be reached beginning with jar->chains. The key in
167 that table is the domain name, and the value is a linked list of
168 all cookies from that domain. Every new cookie is placed on the
171 /* Find and return a cookie in JAR whose domain, path, and attribute
172 name correspond to COOKIE. If found, PREVPTR will point to the
173 location of the cookie previous in chain, or NULL if the found
174 cookie is the head of a chain.
176 If no matching cookie is found, return NULL. */
178 static struct cookie *
179 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
180 struct cookie **prevptr)
182 struct cookie *chain, *prev;
184 chain = hash_table_get (jar->chains, cookie->domain);
189 for (; chain; prev = chain, chain = chain->next)
190 if (0 == strcmp (cookie->path, chain->path)
191 && 0 == strcmp (cookie->attr, chain->attr)
192 && cookie->port == chain->port)
203 /* Store COOKIE to the jar.
205 This is done by placing COOKIE at the head of its chain. However,
206 if COOKIE matches a cookie already in memory, as determined by
207 find_matching_cookie, the old cookie is unlinked and destroyed.
209 The key of each chain's hash table entry is allocated only the
210 first time; next hash_table_put's reuse the same key. */
213 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
215 struct cookie *chain_head;
218 if (hash_table_get_pair (jar->chains, cookie->domain,
219 &chain_key, &chain_head))
221 /* A chain of cookies in this domain already exists. Check for
222 duplicates -- if an extant cookie exactly matches our domain,
223 port, path, and name, replace it. */
225 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
229 /* Remove VICTIM from the chain. COOKIE will be placed at
233 prev->next = victim->next;
234 cookie->next = chain_head;
238 /* prev is NULL; apparently VICTIM was at the head of
239 the chain. This place will be taken by COOKIE, so
240 all we need to do is: */
241 cookie->next = victim->next;
243 delete_cookie (victim);
245 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
248 cookie->next = chain_head;
252 /* We are now creating the chain. Use a copy of cookie->domain
253 as the key for the life-time of the chain. Using
254 cookie->domain would be unsafe because the life-time of the
255 chain may exceed the life-time of the cookie. (Cookies may
256 be deleted from the chain by this very function.) */
258 chain_key = xstrdup (cookie->domain);
261 hash_table_put (jar->chains, chain_key, cookie);
267 time_t exptime = (time_t) cookie->expiry_time;
268 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
269 cookie->domain, cookie->port,
270 cookie->port == PORT_ANY ? " (ANY)" : "",
272 cookie->permanent ? "permanent" : "session",
273 cookie->secure ? "secure" : "insecure",
274 cookie->expiry_time ? datetime_str (&exptime) : "none",
275 cookie->attr, cookie->value));
280 /* Discard a cookie matching COOKIE's domain, port, path, and
281 attribute name. This gets called when we encounter a cookie whose
282 expiry date is in the past, or whose max-age is set to 0. The
283 former corresponds to netscape cookie spec, while the latter is
284 specified by rfc2109. */
287 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
289 struct cookie *prev, *victim;
291 if (!hash_table_count (jar->chains))
292 /* No elements == nothing to discard. */
295 victim = find_matching_cookie (jar, cookie, &prev);
299 /* Simply unchain the victim. */
300 prev->next = victim->next;
303 /* VICTIM was head of its chain. We need to place a new
304 cookie at the head. */
305 char *chain_key = NULL;
308 res = hash_table_get_pair (jar->chains, victim->domain,
313 /* VICTIM was the only cookie in the chain. Destroy the
314 chain and deallocate the chain key. */
315 hash_table_remove (jar->chains, victim->domain);
319 hash_table_put (jar->chains, chain_key, victim->next);
321 delete_cookie (victim);
322 DEBUGP (("Discarded old cookie.\n"));
326 /* Functions for parsing the `Set-Cookie' header, and creating new
327 cookies from the wire. */
329 #define NAME_IS(string_literal) \
330 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
332 #define VALUE_EXISTS (value_b && value_e)
334 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
336 /* Update the appropriate cookie field. [name_b, name_e) are expected
337 to delimit the attribute name, while [value_b, value_e) (optional)
338 should delimit the attribute value.
340 When called the first time, it will set the cookie's attribute name
341 and value. After that, it will check the attribute name for
342 special fields such as `domain', `path', etc. Where appropriate,
343 it will parse the values of the fields it recognizes and fill the
344 corresponding fields in COOKIE.
346 Returns 1 on success. Returns zero in case a syntax error is
347 found; such a cookie should be discarded. */
350 update_cookie_field (struct cookie *cookie,
351 const char *name_b, const char *name_e,
352 const char *value_b, const char *value_e)
354 assert (name_b != NULL && name_e != NULL);
360 cookie->attr = strdupdelim (name_b, name_e);
361 cookie->value = strdupdelim (value_b, value_e);
365 if (NAME_IS ("domain"))
367 if (!VALUE_NON_EMPTY)
369 xfree_null (cookie->domain);
370 /* Strictly speaking, we should set cookie->domain_exact if the
371 domain doesn't begin with a dot. But many sites set the
372 domain to "foo.com" and expect "subhost.foo.com" to get the
373 cookie, and it apparently works. */
376 cookie->domain = strdupdelim (value_b, value_e);
379 else if (NAME_IS ("path"))
381 if (!VALUE_NON_EMPTY)
383 xfree_null (cookie->path);
384 cookie->path = strdupdelim (value_b, value_e);
387 else if (NAME_IS ("expires"))
392 if (!VALUE_NON_EMPTY)
394 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
396 expires = http_atotm (value_copy);
399 cookie->permanent = 1;
400 cookie->expiry_time = (time_t)expires;
403 /* Error in expiration spec. Assume default (cookie doesn't
404 expire, but valid only for this session.) */
407 /* According to netscape's specification, expiry time in the
408 past means that discarding of a matching cookie is
410 if (cookie->expiry_time < cookies_now)
411 cookie->discard_requested = 1;
415 else if (NAME_IS ("max-age"))
420 if (!VALUE_NON_EMPTY)
422 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
424 sscanf (value_copy, "%lf", &maxage);
426 /* something went wrong. */
428 cookie->permanent = 1;
429 cookie->expiry_time = cookies_now + maxage;
431 /* According to rfc2109, a cookie with max-age of 0 means that
432 discarding of a matching cookie is requested. */
434 cookie->discard_requested = 1;
438 else if (NAME_IS ("secure"))
440 /* ignore value completely */
445 /* Unrecognized attribute; ignore it. */
451 /* Returns non-zero for characters that are legal in the name of an
452 attribute. This used to allow only alphanumerics, '-', and '_',
453 but we need to be more lenient because a number of sites wants to
454 use weirder attribute names. rfc2965 "informally specifies"
455 attribute name (token) as "a sequence of non-special, non-white
456 space characters". So we allow everything except the stuff we know
459 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
460 && (c) != '"' && (c) != '=' \
461 && (c) != ';' && (c) != ',')
463 /* Parse the contents of the `Set-Cookie' header. The header looks
466 name1=value1; name2=value2; ...
468 Trailing semicolon is optional; spaces are allowed between all
469 tokens. Additionally, values may be quoted.
471 A new cookie is returned upon success, NULL otherwise. The
472 specified CALLBACK function (normally `update_cookie_field' is used
473 to update the fields of the newly created cookie structure. */
475 static struct cookie *
476 parse_set_cookies (const char *sc,
477 int (*callback) (struct cookie *,
478 const char *, const char *,
479 const char *, const char *),
482 struct cookie *cookie = cookie_new ();
484 /* #### Hand-written DFAs are no fun to debug. We'de be better off
485 to rewrite this as an inline parser. */
487 enum { S_START, S_NAME, S_NAME_POST,
488 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
489 S_ATTR_ACTION, S_DONE, S_ERROR
495 const char *name_b = NULL, *name_e = NULL;
496 const char *value_b = NULL, *value_e = NULL;
500 while (state != S_DONE && state != S_ERROR)
507 else if (ISSPACE (c))
508 /* Strip all whitespace preceding the name. */
510 else if (ATTR_NAME_CHAR (c))
516 /* empty attr name not allowed */
520 if (!c || c == ';' || c == '=' || ISSPACE (c))
525 else if (ATTR_NAME_CHAR (c))
533 value_b = value_e = NULL;
536 state = S_ATTR_ACTION;
543 else if (ISSPACE (c))
544 /* Ignore space and keep the state. */
552 value_b = value_e = p;
555 state = S_ATTR_ACTION;
561 state = S_QUOTED_VALUE;
563 else if (ISSPACE (c))
573 if (!c || c == ';' || ISSPACE (c))
576 state = S_VALUE_TRAILSPACE;
580 value_e = NULL; /* no trailing space */
589 state = S_VALUE_TRAILSPACE;
596 case S_VALUE_TRAILSPACE:
600 state = S_ATTR_ACTION;
603 state = S_ATTR_ACTION;
604 else if (ISSPACE (c))
611 int legal = callback (cookie, name_b, name_e, value_b, value_e);
617 BOUNDED_TO_ALLOCA (name_b, name_e, name);
618 logprintf (LOG_NOTQUIET,
619 _("Error in Set-Cookie, field `%s'"), name);
629 /* handled by loop condition */
636 delete_cookie (cookie);
637 if (state != S_ERROR)
641 logprintf (LOG_NOTQUIET,
642 _("Syntax error in Set-Cookie: %s at position %d.\n"),
647 /* Sanity checks. These are important, otherwise it is possible for
648 mailcious attackers to destroy important cookie information and/or
649 violate your privacy. */
652 #define REQUIRE_DIGITS(p) do { \
655 for (++p; ISDIGIT (*p); p++) \
659 #define REQUIRE_DOT(p) do { \
664 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
666 We don't want to call network functions like inet_addr() because
667 all we need is a check, preferrably one that is small, fast, and
671 numeric_address_p (const char *addr)
673 const char *p = addr;
675 REQUIRE_DIGITS (p); /* A */
676 REQUIRE_DOT (p); /* . */
677 REQUIRE_DIGITS (p); /* B */
678 REQUIRE_DOT (p); /* . */
679 REQUIRE_DIGITS (p); /* C */
680 REQUIRE_DOT (p); /* . */
681 REQUIRE_DIGITS (p); /* D */
688 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
689 Originally I tried to make the check compliant with rfc2109, but
690 the sites deviated too often, so I had to fall back to "tail
691 matching", as defined by the original Netscape's cookie spec. */
694 check_domain_match (const char *cookie_domain, const char *host)
698 /* Numeric address requires exact match. It also requires HOST to
700 if (numeric_address_p (cookie_domain))
701 return 0 == strcmp (cookie_domain, host);
705 /* For the sake of efficiency, check for exact match first. */
706 if (0 == strcasecmp (cookie_domain, host))
711 /* HOST must match the tail of cookie_domain. */
712 if (!match_tail (host, cookie_domain, 1))
715 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
716 make sure that somebody is not trying to set the cookie for a
717 subdomain shared by many entities. For example, "company.co.uk"
718 must not be allowed to set a cookie for ".co.uk". On the other
719 hand, "sso.redhat.de" should be able to set a cookie for
722 The only marginally sane way to handle this I can think of is to
723 reject on the basis of the length of the second-level domain name
724 (but when the top-level domain is unknown), with the assumption
725 that those of three or less characters could be reserved. For
728 .co.org -> works because the TLD is known
729 .co.uk -> doesn't work because "co" is only two chars long
730 .com.au -> doesn't work because "com" is only 3 chars long
731 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
732 .cnn.de -> doesn't work for the same reason (ugh!!)
733 .abcd.de -> works because "abcd" is 4 chars long
734 .img.cnn.de -> works because it's not trying to set the 2nd level domain
735 .cnn.co.uk -> works for the same reason
737 That should prevent misuse, while allowing reasonable usage. If
738 someone knows of a better way to handle this, please let me
741 const char *p = cookie_domain;
742 int dccount = 1; /* number of domain components */
743 int ldcl = 0; /* last domain component length */
744 int nldcl = 0; /* next to last domain component length */
747 /* Ignore leading period in this calculation. */
750 for (out = 0; !out; p++)
758 /* Empty domain component found -- the domain is invalid. */
760 if (*(p + 1) == '\0')
762 /* Tolerate trailing '.' by not treating the domain as
763 one ending with an empty domain component. */
785 int known_toplevel = 0;
786 static const char *known_toplevel_domains[] = {
787 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
789 for (i = 0; i < countof (known_toplevel_domains); i++)
790 if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
795 if (!known_toplevel && nldcl <= 3)
802 /* Don't allow the host "foobar.com" to set a cookie for domain
804 if (*cookie_domain != '.')
806 int dlen = strlen (cookie_domain);
807 int hlen = strlen (host);
808 /* cookie host: hostname.foobar.com */
809 /* desired domain: bar.com */
810 /* '.' must be here in host-> ^ */
811 if (hlen > dlen && host[hlen - dlen - 1] != '.')
820 static int path_matches PARAMS ((const char *, const char *));
822 /* Check whether PATH begins with COOKIE_PATH. */
825 check_path_match (const char *cookie_path, const char *path)
827 return path_matches (path, cookie_path);
830 /* Process the HTTP `Set-Cookie' header. This results in storing the
831 cookie or discarding a matching one, or ignoring it completely, all
832 depending on the contents. */
835 cookie_handle_set_cookie (struct cookie_jar *jar,
836 const char *host, int port,
837 const char *path, const char *set_cookie)
839 struct cookie *cookie;
840 cookies_now = time (NULL);
842 cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
846 /* Sanitize parts of cookie. */
851 /* If the domain was not provided, we use the one we're talking
852 to, and set exact match. */
853 cookie->domain = xstrdup (host);
854 cookie->domain_exact = 1;
855 /* Set the port, but only if it's non-default. */
856 if (port != 80 && port != 443)
861 if (!check_domain_match (cookie->domain, host))
863 logprintf (LOG_NOTQUIET,
864 "Cookie coming from %s attempted to set domain to %s\n",
865 host, cookie->domain);
866 xfree (cookie->domain);
872 cookie->path = xstrdup (path);
875 if (!check_path_match (cookie->path, path))
877 DEBUGP (("Attempt to fake the path: %s, %s\n",
878 cookie->path, path));
883 if (cookie->discard_requested)
885 discard_matching_cookie (jar, cookie);
889 store_cookie (jar, cookie);
894 delete_cookie (cookie);
897 /* Support for sending out cookies in HTTP requests, based on
898 previously stored cookies. Entry point is
899 `build_cookies_request'. */
901 /* Find the cookie chains whose domains match HOST and store them to
904 A cookie chain is the head of a list of cookies that belong to a
905 host/domain. Given HOST "img.search.xemacs.org", this function
906 will return the chains for "img.search.xemacs.org",
907 "search.xemacs.org", and "xemacs.org" -- those of them that exist
910 DEST should be large enough to accept (in the worst case) as many
911 elements as there are domain components of HOST. */
914 find_chains_of_host (struct cookie_jar *jar, const char *host,
915 struct cookie *dest[])
920 /* Bail out quickly if there are no cookies in the jar. */
921 if (!hash_table_count (jar->chains))
924 if (numeric_address_p (host))
925 /* If host is an IP address, only check for the exact match. */
928 /* Otherwise, check all the subdomains except the top-level (last)
929 one. As a domain with N components has N-1 dots, the number of
930 passes equals the number of dots. */
931 passes = count_char (host, '.');
935 /* Find chains that match HOST, starting with exact match and
936 progressing to less specific domains. For instance, given HOST
937 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
938 srk.fer.hr's, then fer.hr's. */
941 struct cookie *chain = hash_table_get (jar->chains, host);
943 dest[dest_count++] = chain;
944 if (++passcnt >= passes)
946 host = strchr (host, '.') + 1;
952 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
956 path_matches (const char *full_path, const char *prefix)
961 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
962 as a mere separator, inspired by rfc1808), but the '/' is
963 assumed when matching against the cookie stuff. */
967 len = strlen (prefix);
969 if (0 != strncmp (full_path, prefix, len))
970 /* FULL_PATH doesn't begin with PREFIX. */
973 /* Length of PREFIX determines the quality of the match. */
977 /* Return non-zero iff COOKIE matches the provided parameters of the
978 URL being downloaded: HOST, PORT, PATH, and SECFLAG.
980 If PATH_GOODNESS is non-NULL, store the "path goodness" value
981 there. That value is a measure of how closely COOKIE matches PATH,
982 used for ordering cookies. */
985 cookie_matches_url (const struct cookie *cookie,
986 const char *host, int port, const char *path,
987 int secflag, int *path_goodness)
991 if (cookie_expired_p (cookie))
992 /* Ignore stale cookies. Don't bother unchaining the cookie at
993 this point -- Wget is a relatively short-lived application, and
994 stale cookies will not be saved by `save_cookies'. On the
995 other hand, this function should be as efficient as
999 if (cookie->secure && !secflag)
1000 /* Don't transmit secure cookies over insecure connections. */
1002 if (cookie->port != PORT_ANY && cookie->port != port)
1005 /* If exact domain match is required, verify that cookie's domain is
1006 equal to HOST. If not, assume success on the grounds of the
1007 cookie's chain having been found by find_chains_of_host. */
1008 if (cookie->domain_exact
1009 && 0 != strcasecmp (host, cookie->domain))
1012 pg = path_matches (path, cookie->path);
1017 /* If the caller requested path_goodness, we return it. This is
1018 an optimization, so that the caller doesn't need to call
1019 path_matches() again. */
1020 *path_goodness = pg;
1024 /* A structure that points to a cookie, along with the additional
1025 information about the cookie's "goodness". This allows us to sort
1026 the cookies when returning them to the server, as required by the
1029 struct weighed_cookie {
1030 struct cookie *cookie;
1031 int domain_goodness;
1035 /* Comparator used for uniquifying the list. */
1038 equality_comparator (const void *p1, const void *p2)
1040 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1041 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1043 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1044 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1046 /* We only really care whether both name and value are equal. We
1047 return them in this order only for consistency... */
1048 return namecmp ? namecmp : valuecmp;
1051 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1052 cookies with the same attr name and value. Whenever a duplicate
1053 pair is found, one of the cookies is removed. */
1056 eliminate_dups (struct weighed_cookie *outgoing, int count)
1058 struct weighed_cookie *h; /* hare */
1059 struct weighed_cookie *t; /* tortoise */
1060 struct weighed_cookie *end = outgoing + count;
1062 /* We deploy a simple uniquify algorithm: first sort the array
1063 according to our sort criteria, then copy it to itself, comparing
1064 each cookie to its neighbor and ignoring the duplicates. */
1066 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1068 /* "Hare" runs through all the entries in the array, followed by
1069 "tortoise". If a duplicate is found, the hare skips it.
1070 Non-duplicate entries are copied to the tortoise ptr. */
1072 for (h = t = outgoing; h < end; h++)
1076 struct cookie *c0 = h[0].cookie;
1077 struct cookie *c1 = h[1].cookie;
1078 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1079 continue; /* ignore the duplicate */
1082 /* If the hare has advanced past the tortoise (because of
1083 previous dups), make sure the values get copied. Otherwise,
1084 no copying is necessary. */
1090 return t - outgoing;
1093 /* Comparator used for sorting by quality. */
1096 goodness_comparator (const void *p1, const void *p2)
1098 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1099 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1101 /* Subtractions take `wc2' as the first argument becauase we want a
1102 sort in *decreasing* order of goodness. */
1103 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1104 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1106 /* Sort by domain goodness; if these are the same, sort by path
1107 goodness. (The sorting order isn't really specified; maybe it
1108 should be the other way around.) */
1109 return dgdiff ? dgdiff : pgdiff;
1112 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1113 requests PATH from the server. The resulting string is allocated
1114 with `malloc', and the caller is responsible for freeing it. If no
1115 cookies pertain to this request, i.e. no cookie header should be
1116 generated, NULL is returned. */
1119 cookie_header (struct cookie_jar *jar, const char *host,
1120 int port, const char *path, int secflag)
1122 struct cookie **chains;
1125 struct cookie *cookie;
1126 struct weighed_cookie *outgoing;
1129 int result_size, pos;
1131 /* First, find the cookie chains whose domains match HOST. */
1133 /* Allocate room for find_chains_of_host to write to. The number of
1134 chains can at most equal the number of subdomains, hence
1135 1+<number of dots>. */
1136 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
1137 chain_count = find_chains_of_host (jar, host, chains);
1139 /* No cookies for this host. */
1143 cookies_now = time (NULL);
1145 /* Now extract from the chains those cookies that match our host
1146 (for domain_exact cookies), port (for cookies with port other
1147 than PORT_ANY), etc. See matching_cookie for details. */
1149 /* Count the number of matching cookies. */
1151 for (i = 0; i < chain_count; i++)
1152 for (cookie = chains[i]; cookie; cookie = cookie->next)
1153 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1156 return NULL; /* no cookies matched */
1158 /* Allocate the array. */
1159 outgoing = alloca_array (struct weighed_cookie, count);
1161 /* Fill the array with all the matching cookies from the chains that
1164 for (i = 0; i < chain_count; i++)
1165 for (cookie = chains[i]; cookie; cookie = cookie->next)
1168 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1170 outgoing[ocnt].cookie = cookie;
1171 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1172 outgoing[ocnt].path_goodness = pg;
1175 assert (ocnt == count);
1177 /* Eliminate duplicate cookies; that is, those whose name and value
1179 count = eliminate_dups (outgoing, count);
1181 /* Sort the array so that best-matching domains come first, and
1182 that, within one domain, best-matching paths come first. */
1183 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1185 /* Count the space the name=value pairs will take. */
1187 for (i = 0; i < count; i++)
1189 struct cookie *c = outgoing[i].cookie;
1191 result_size += strlen (c->attr) + 1 + strlen (c->value);
1194 /* Allocate output buffer:
1195 name=value pairs -- result_size
1196 "; " separators -- (count - 1) * 2
1197 \0 terminator -- 1 */
1198 result_size = result_size + (count - 1) * 2 + 1;
1199 result = xmalloc (result_size);
1201 for (i = 0; i < count; i++)
1203 struct cookie *c = outgoing[i].cookie;
1204 int namlen = strlen (c->attr);
1205 int vallen = strlen (c->value);
1207 memcpy (result + pos, c->attr, namlen);
1209 result[pos++] = '=';
1210 memcpy (result + pos, c->value, vallen);
1214 result[pos++] = ';';
1215 result[pos++] = ' ';
1218 result[pos++] = '\0';
1219 assert (pos == result_size);
1223 /* Support for loading and saving cookies. The format used for
1224 loading and saving should be the format of the `cookies.txt' file
1225 used by Netscape and Mozilla, at least the Unix versions.
1226 (Apparently IE can export cookies in that format as well.) The
1227 format goes like this:
1229 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1231 DOMAIN -- cookie domain, optionally followed by :PORT
1232 DOMAIN-FLAG -- whether all hosts in the domain match
1234 SECURE-FLAG -- whether cookie requires secure connection
1235 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1236 ATTR-NAME -- name of the cookie attribute
1237 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1239 The fields are separated by TABs. All fields are mandatory, except
1240 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1241 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1242 whitespace only, and comment lines (beginning with # optionally
1243 preceded by whitespace) are ignored.
1245 Example line from cookies.txt (split in two lines for readability):
1247 .google.com TRUE / FALSE 2147368447 \
1248 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1252 /* If the region [B, E) ends with :<digits>, parse the number, return
1253 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1254 If port is not specified, return 0. */
1257 domain_port (const char *domain_b, const char *domain_e,
1258 const char **domain_e_ptr)
1262 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1265 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1266 port = 10 * port + (*p - '0');
1268 /* Garbage following port number. */
1270 *domain_e_ptr = colon;
1274 #define GET_WORD(p, b, e) do { \
1276 while (*p && *p != '\t') \
1279 if (b == e || !*p) \
1284 /* Load cookies from FILE. */
1287 cookie_jar_load (struct cookie_jar *jar, const char *file)
1290 FILE *fp = fopen (file, "r");
1293 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1294 file, strerror (errno));
1297 cookies_now = time (NULL);
1299 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1301 struct cookie *cookie;
1307 char *domain_b = NULL, *domain_e = NULL;
1308 char *domflag_b = NULL, *domflag_e = NULL;
1309 char *path_b = NULL, *path_e = NULL;
1310 char *secure_b = NULL, *secure_e = NULL;
1311 char *expires_b = NULL, *expires_e = NULL;
1312 char *name_b = NULL, *name_e = NULL;
1313 char *value_b = NULL, *value_e = NULL;
1315 /* Skip leading white-space. */
1316 while (*p && ISSPACE (*p))
1318 /* Ignore empty lines. */
1319 if (!*p || *p == '#')
1322 GET_WORD (p, domain_b, domain_e);
1323 GET_WORD (p, domflag_b, domflag_e);
1324 GET_WORD (p, path_b, path_e);
1325 GET_WORD (p, secure_b, secure_e);
1326 GET_WORD (p, expires_b, expires_e);
1327 GET_WORD (p, name_b, name_e);
1329 /* Don't use GET_WORD for value because it ends with newline,
1332 value_e = p + strlen (p);
1333 if (value_e > value_b && value_e[-1] == '\n')
1335 if (value_e > value_b && value_e[-1] == '\r')
1337 /* Empty values are legal (I think), so don't bother checking. */
1339 cookie = cookie_new ();
1341 cookie->attr = strdupdelim (name_b, name_e);
1342 cookie->value = strdupdelim (value_b, value_e);
1343 cookie->path = strdupdelim (path_b, path_e);
1344 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1346 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1347 value indicating if all machines within a given domain can
1348 access the variable. This value is set automatically by the
1349 browser, depending on the value set for the domain." */
1350 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1352 /* DOMAIN needs special treatment because we might need to
1353 extract the port. */
1354 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1356 cookie->port = port;
1358 if (*domain_b == '.')
1359 ++domain_b; /* remove leading dot internally */
1360 cookie->domain = strdupdelim (domain_b, domain_e);
1362 /* safe default in case EXPIRES field is garbled. */
1363 expiry = (double)cookies_now - 1;
1365 /* I don't like changing the line, but it's safe here. (line is
1368 sscanf (expires_b, "%lf", &expiry);
1372 /* EXPIRY can be 0 for session cookies saved because the
1373 user specified `--keep-session-cookies' in the past.
1374 They remain session cookies, and will be saved only if
1375 the user has specified `keep-session-cookies' again. */
1379 if (expiry < cookies_now)
1380 goto abort; /* ignore stale cookie. */
1381 cookie->expiry_time = expiry;
1382 cookie->permanent = 1;
1385 store_cookie (jar, cookie);
1391 delete_cookie (cookie);
1396 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1397 to the head in a chain of cookies. The function prints the entire
1401 save_cookies_mapper (void *key, void *value, void *arg)
1403 FILE *fp = (FILE *)arg;
1404 char *domain = (char *)key;
1405 struct cookie *cookie = (struct cookie *)value;
1406 for (; cookie; cookie = cookie->next)
1408 if (!cookie->permanent && !opt.keep_session_cookies)
1410 if (cookie_expired_p (cookie))
1412 if (!cookie->domain_exact)
1415 if (cookie->port != PORT_ANY)
1416 fprintf (fp, ":%d", cookie->port);
1417 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1418 cookie->domain_exact ? "FALSE" : "TRUE",
1419 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1420 (double)cookie->expiry_time,
1421 cookie->attr, cookie->value);
1423 return 1; /* stop mapping */
1428 /* Save cookies, in format described above, to FILE. */
1431 cookie_jar_save (struct cookie_jar *jar, const char *file)
1435 DEBUGP (("Saving cookies to %s.\n", file));
1437 cookies_now = time (NULL);
1439 fp = fopen (file, "w");
1442 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1443 file, strerror (errno));
1447 fputs ("# HTTP cookie file.\n", fp);
1448 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (&cookies_now));
1449 fputs ("# Edit at your own risk.\n\n", fp);
1451 hash_table_map (jar->chains, save_cookies_mapper, fp);
1454 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1455 file, strerror (errno));
1456 if (fclose (fp) < 0)
1457 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1458 file, strerror (errno));
1460 DEBUGP (("Done saving cookies.\n"));
1463 /* Destroy all the elements in the chain and unhook it from the cookie
1464 jar. This is written in the form of a callback to hash_table_map
1465 and used by cookie_jar_delete to delete all the cookies in a
1469 nuke_cookie_chain (void *value, void *key, void *arg)
1471 char *chain_key = (char *)value;
1472 struct cookie *chain = (struct cookie *)key;
1473 struct cookie_jar *jar = (struct cookie_jar *)arg;
1475 /* Remove the chain from the table and free the key. */
1476 hash_table_remove (jar->chains, chain_key);
1479 /* Then delete all the cookies in the chain. */
1482 struct cookie *next = chain->next;
1483 delete_cookie (chain);
1491 /* Clean up cookie-related data. */
1494 cookie_jar_delete (struct cookie_jar *jar)
1496 hash_table_map (jar->chains, nuke_cookie_chain, jar);
1497 hash_table_destroy (jar->chains);
1501 /* Test cases. Currently this is only tests parse_set_cookies. To
1502 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1507 char *test_results[10];
1509 static int test_parse_cookies_callback (struct cookie *ignored,
1510 const char *nb, const char *ne,
1511 const char *vb, const char *ve)
1513 test_results[test_count++] = strdupdelim (nb, ne);
1514 test_results[test_count++] = strdupdelim (vb, ve);
1521 /* Tests expected to succeed: */
1527 { "arg=value", {"arg", "value", NULL} },
1528 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1529 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1530 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1531 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1532 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1533 { "arg=", {"arg", "", NULL} },
1534 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1535 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1538 /* Tests expected to fail: */
1539 static char *tests_fail[] = {
1541 "arg=\"unterminated",
1543 "arg1=;=another-empty-name",
1547 for (i = 0; i < countof (tests_succ); i++)
1550 char *data = tests_succ[i].data;
1551 char **expected = tests_succ[i].results;
1555 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1558 printf ("NULL cookie returned for valid data: %s\n", data);
1562 for (ind = 0; ind < test_count; ind += 2)
1566 if (0 != strcmp (expected[ind], test_results[ind]))
1567 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1568 ind / 2 + 1, data, expected[ind], test_results[ind]);
1569 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1570 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1571 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1573 if (ind < test_count || expected[ind])
1574 printf ("Unmatched number of results: %s\n", data);
1577 for (i = 0; i < countof (tests_fail); i++)
1580 char *data = tests_fail[i];
1582 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1584 printf ("Failed to report error on invalid data: %s\n", data);
1587 #endif /* TEST_COOKIES */