1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
31 code submitted by Tomasz Wegrzanowski.
33 Ideas for future work:
35 * Implement limits on cookie-related sizes, such as max. cookie
36 size, max. number of cookies, etc.
38 * Add more "cookie jar" methods, such as methods to iterate over
39 stored cookies, to clear temporary cookies, to perform
40 intelligent auto-saving, etc.
42 * Support `Set-Cookie2' and `Cookie2' headers? Does anyone really
62 /* This should *really* be in a .h file! */
63 time_t http_atotm PARAMS ((const char *));
65 /* Declarations of `struct cookie' and the most basic functions. */
67 /* Cookie jar serves as cookie storage and a means of retrieving
68 cookies efficiently. All cookies with the same domain are stored
69 in a linked list called "chain". A cookie chain can be reached by
70 looking up the domain in the cookie jar's chains_by_domain table.
72 For example, to reach all the cookies under google.com, one must
73 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
74 course, when sending a cookie to `www.google.com', one must search
75 for cookies that belong to either `www.google.com' or `google.com'
76 -- but the point is that the code doesn't need to go through *all*
80 /* Cookie chains indexed by domain. */
81 struct hash_table *chains;
83 int cookie_count; /* number of cookies in the jar. */
86 /* Value set by entry point functions, so that the low-level
87 routines don't need to call time() all the time. */
93 struct cookie_jar *jar = xnew (struct cookie_jar);
94 jar->chains = make_nocase_string_hash_table (0);
95 jar->cookie_count = 0;
100 char *domain; /* domain of the cookie */
101 int port; /* port number */
102 char *path; /* path prefix of the cookie */
104 int secure; /* whether cookie should be
105 transmitted over non-https
107 int domain_exact; /* whether DOMAIN must match as a
110 int permanent; /* whether the cookie should outlive
112 time_t expiry_time; /* time when the cookie expires, 0
113 means undetermined. */
115 int discard_requested; /* whether cookie was created to
116 request discarding another
119 char *attr; /* cookie attribute name */
120 char *value; /* cookie attribute value */
122 struct cookie *next; /* used for chaining of cookies in the
126 #define PORT_ANY (-1)
128 /* Allocate and return a new, empty cookie structure. */
130 static struct cookie *
133 struct cookie *cookie = xnew0 (struct cookie);
135 /* Both cookie->permanent and cookie->expiry_time are now 0. This
136 means that the cookie doesn't expire, but is only valid for this
137 session (i.e. not written out to disk). */
139 cookie->port = PORT_ANY;
143 /* Non-zero if the cookie has expired. Assumes cookies_now has been
144 set by one of the entry point functions. */
147 cookie_expired_p (const struct cookie *c)
149 return c->expiry_time != 0 && c->expiry_time < cookies_now;
152 /* Deallocate COOKIE and its components. */
155 delete_cookie (struct cookie *cookie)
157 xfree_null (cookie->domain);
158 xfree_null (cookie->path);
159 xfree_null (cookie->attr);
160 xfree_null (cookie->value);
164 /* Functions for storing cookies.
166 All cookies can be reached beginning with jar->chains. The key in
167 that table is the domain name, and the value is a linked list of
168 all cookies from that domain. Every new cookie is placed on the
171 /* Find and return a cookie in JAR whose domain, path, and attribute
172 name correspond to COOKIE. If found, PREVPTR will point to the
173 location of the cookie previous in chain, or NULL if the found
174 cookie is the head of a chain.
176 If no matching cookie is found, return NULL. */
178 static struct cookie *
179 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
180 struct cookie **prevptr)
182 struct cookie *chain, *prev;
184 chain = hash_table_get (jar->chains, cookie->domain);
189 for (; chain; prev = chain, chain = chain->next)
190 if (0 == strcmp (cookie->path, chain->path)
191 && 0 == strcmp (cookie->attr, chain->attr)
192 && cookie->port == chain->port)
203 /* Store COOKIE to the jar.
205 This is done by placing COOKIE at the head of its chain. However,
206 if COOKIE matches a cookie already in memory, as determined by
207 find_matching_cookie, the old cookie is unlinked and destroyed.
209 The key of each chain's hash table entry is allocated only the
210 first time; next hash_table_put's reuse the same key. */
213 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
215 struct cookie *chain_head;
218 if (hash_table_get_pair (jar->chains, cookie->domain,
219 &chain_key, &chain_head))
221 /* A chain of cookies in this domain already exists. Check for
222 duplicates -- if an extant cookie exactly matches our domain,
223 port, path, and name, replace it. */
225 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
229 /* Remove VICTIM from the chain. COOKIE will be placed at
233 prev->next = victim->next;
234 cookie->next = chain_head;
238 /* prev is NULL; apparently VICTIM was at the head of
239 the chain. This place will be taken by COOKIE, so
240 all we need to do is: */
241 cookie->next = victim->next;
243 delete_cookie (victim);
245 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
248 cookie->next = chain_head;
252 /* We are now creating the chain. Use a copy of cookie->domain
253 as the key for the life-time of the chain. Using
254 cookie->domain would be unsafe because the life-time of the
255 chain may exceed the life-time of the cookie. (Cookies may
256 be deleted from the chain by this very function.) */
258 chain_key = xstrdup (cookie->domain);
261 hash_table_put (jar->chains, chain_key, cookie);
267 time_t exptime = (time_t) cookie->expiry_time;
268 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
269 cookie->domain, cookie->port,
270 cookie->port == PORT_ANY ? " (ANY)" : "",
272 cookie->permanent ? "permanent" : "session",
273 cookie->secure ? "secure" : "insecure",
274 cookie->expiry_time ? datetime_str (&exptime) : "none",
275 cookie->attr, cookie->value));
280 /* Discard a cookie matching COOKIE's domain, port, path, and
281 attribute name. This gets called when we encounter a cookie whose
282 expiry date is in the past, or whose max-age is set to 0. The
283 former corresponds to netscape cookie spec, while the latter is
284 specified by rfc2109. */
287 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
289 struct cookie *prev, *victim;
291 if (!hash_table_count (jar->chains))
292 /* No elements == nothing to discard. */
295 victim = find_matching_cookie (jar, cookie, &prev);
299 /* Simply unchain the victim. */
300 prev->next = victim->next;
303 /* VICTIM was head of its chain. We need to place a new
304 cookie at the head. */
305 char *chain_key = NULL;
308 res = hash_table_get_pair (jar->chains, victim->domain,
313 /* VICTIM was the only cookie in the chain. Destroy the
314 chain and deallocate the chain key. */
315 hash_table_remove (jar->chains, victim->domain);
319 hash_table_put (jar->chains, chain_key, victim->next);
321 delete_cookie (victim);
322 DEBUGP (("Discarded old cookie.\n"));
326 /* Functions for parsing the `Set-Cookie' header, and creating new
327 cookies from the wire. */
329 #define NAME_IS(string_literal) \
330 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
332 #define VALUE_EXISTS (value_b && value_e)
334 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
336 /* Update the appropriate cookie field. [name_b, name_e) are expected
337 to delimit the attribute name, while [value_b, value_e) (optional)
338 should delimit the attribute value.
340 When called the first time, it will set the cookie's attribute name
341 and value. After that, it will check the attribute name for
342 special fields such as `domain', `path', etc. Where appropriate,
343 it will parse the values of the fields it recognizes and fill the
344 corresponding fields in COOKIE.
346 Returns 1 on success. Returns zero in case a syntax error is
347 found; such a cookie should be discarded. */
350 update_cookie_field (struct cookie *cookie,
351 const char *name_b, const char *name_e,
352 const char *value_b, const char *value_e)
354 assert (name_b != NULL && name_e != NULL);
360 cookie->attr = strdupdelim (name_b, name_e);
361 cookie->value = strdupdelim (value_b, value_e);
365 if (NAME_IS ("domain"))
367 if (!VALUE_NON_EMPTY)
369 xfree_null (cookie->domain);
370 /* Strictly speaking, we should set cookie->domain_exact if the
371 domain doesn't begin with a dot. But many sites set the
372 domain to "foo.com" and expect "subhost.foo.com" to get the
373 cookie, and it apparently works. */
376 cookie->domain = strdupdelim (value_b, value_e);
379 else if (NAME_IS ("path"))
381 if (!VALUE_NON_EMPTY)
383 xfree_null (cookie->path);
384 cookie->path = strdupdelim (value_b, value_e);
387 else if (NAME_IS ("expires"))
392 if (!VALUE_NON_EMPTY)
394 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
396 expires = http_atotm (value_copy);
399 cookie->permanent = 1;
400 cookie->expiry_time = (time_t)expires;
403 /* Error in expiration spec. Assume default (cookie doesn't
404 expire, but valid only for this session.) */
407 /* According to netscape's specification, expiry time in the
408 past means that discarding of a matching cookie is
410 if (cookie->expiry_time < cookies_now)
411 cookie->discard_requested = 1;
415 else if (NAME_IS ("max-age"))
420 if (!VALUE_NON_EMPTY)
422 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
424 sscanf (value_copy, "%lf", &maxage);
426 /* something went wrong. */
428 cookie->permanent = 1;
429 cookie->expiry_time = cookies_now + maxage;
431 /* According to rfc2109, a cookie with max-age of 0 means that
432 discarding of a matching cookie is requested. */
434 cookie->discard_requested = 1;
438 else if (NAME_IS ("secure"))
440 /* ignore value completely */
445 /* Unrecognized attribute; ignore it. */
451 /* Returns non-zero for characters that are legal in the name of an
452 attribute. This used to allow only alphanumerics, '-', and '_',
453 but we need to be more lenient because a number of sites wants to
454 use weirder attribute names. rfc2965 "informally specifies"
455 attribute name (token) as "a sequence of non-special, non-white
456 space characters". So we allow everything except the stuff we know
459 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
460 && (c) != '"' && (c) != '=' \
461 && (c) != ';' && (c) != ',')
463 /* Parse the contents of the `Set-Cookie' header. The header looks
466 name1=value1; name2=value2; ...
468 Trailing semicolon is optional; spaces are allowed between all
469 tokens. Additionally, values may be quoted.
471 A new cookie is returned upon success, NULL otherwise. The
472 specified CALLBACK function (normally `update_cookie_field' is used
473 to update the fields of the newly created cookie structure. */
475 static struct cookie *
476 parse_set_cookies (const char *sc,
477 int (*callback) (struct cookie *,
478 const char *, const char *,
479 const char *, const char *),
482 struct cookie *cookie = cookie_new ();
484 /* #### Hand-written DFAs are no fun to debug. We'de be better off
485 to rewrite this as an inline parser. */
487 enum { S_START, S_NAME, S_NAME_POST,
488 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
489 S_ATTR_ACTION, S_DONE, S_ERROR
495 const char *name_b = NULL, *name_e = NULL;
496 const char *value_b = NULL, *value_e = NULL;
500 while (state != S_DONE && state != S_ERROR)
507 else if (ISSPACE (c))
508 /* Strip all whitespace preceding the name. */
510 else if (ATTR_NAME_CHAR (c))
516 /* empty attr name not allowed */
520 if (!c || c == ';' || c == '=' || ISSPACE (c))
525 else if (ATTR_NAME_CHAR (c))
533 value_b = value_e = NULL;
536 state = S_ATTR_ACTION;
543 else if (ISSPACE (c))
544 /* Ignore space and keep the state. */
552 value_b = value_e = p;
555 state = S_ATTR_ACTION;
561 state = S_QUOTED_VALUE;
563 else if (ISSPACE (c))
573 if (!c || c == ';' || ISSPACE (c))
576 state = S_VALUE_TRAILSPACE;
580 value_e = NULL; /* no trailing space */
589 state = S_VALUE_TRAILSPACE;
596 case S_VALUE_TRAILSPACE:
600 state = S_ATTR_ACTION;
603 state = S_ATTR_ACTION;
604 else if (ISSPACE (c))
611 int legal = callback (cookie, name_b, name_e, value_b, value_e);
617 BOUNDED_TO_ALLOCA (name_b, name_e, name);
618 logprintf (LOG_NOTQUIET,
619 _("Error in Set-Cookie, field `%s'"), name);
629 /* handled by loop condition */
636 delete_cookie (cookie);
637 if (state != S_ERROR)
641 logprintf (LOG_NOTQUIET,
642 _("Syntax error in Set-Cookie: %s at position %d.\n"),
647 /* Sanity checks. These are important, otherwise it is possible for
648 mailcious attackers to destroy important cookie information and/or
649 violate your privacy. */
652 #define REQUIRE_DIGITS(p) do { \
655 for (++p; ISDIGIT (*p); p++) \
659 #define REQUIRE_DOT(p) do { \
664 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
666 We don't want to call network functions like inet_addr() because
667 all we need is a check, preferrably one that is small, fast, and
671 numeric_address_p (const char *addr)
673 const char *p = addr;
675 REQUIRE_DIGITS (p); /* A */
676 REQUIRE_DOT (p); /* . */
677 REQUIRE_DIGITS (p); /* B */
678 REQUIRE_DOT (p); /* . */
679 REQUIRE_DIGITS (p); /* C */
680 REQUIRE_DOT (p); /* . */
681 REQUIRE_DIGITS (p); /* D */
688 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
689 Originally I tried to make the check compliant with rfc2109, but
690 the sites deviated too often, so I had to fall back to "tail
691 matching", as defined by the original Netscape's cookie spec. */
694 check_domain_match (const char *cookie_domain, const char *host)
698 /* Numeric address requires exact match. It also requires HOST to
700 if (numeric_address_p (cookie_domain))
701 return 0 == strcmp (cookie_domain, host);
705 /* For the sake of efficiency, check for exact match first. */
706 if (0 == strcasecmp (cookie_domain, host))
711 /* HOST must match the tail of cookie_domain. */
712 if (!match_tail (host, cookie_domain, 1))
715 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
716 make sure that somebody is not trying to set the cookie for a
717 subdomain shared by many entities. For example, "company.co.uk"
718 must not be allowed to set a cookie for ".co.uk". On the other
719 hand, "sso.redhat.de" should be able to set a cookie for
722 The only marginally sane way to handle this I can think of is to
723 reject on the basis of the length of the second-level domain name
724 (but when the top-level domain is unknown), with the assumption
725 that those of three or less characters could be reserved. For
728 .co.org -> works because the TLD is known
729 .co.uk -> doesn't work because "co" is only two chars long
730 .com.au -> doesn't work because "com" is only 3 chars long
731 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
732 .cnn.de -> doesn't work for the same reason (ugh!!)
733 .abcd.de -> works because "abcd" is 4 chars long
734 .img.cnn.de -> works because it's not trying to set the 2nd level domain
735 .cnn.co.uk -> works for the same reason
737 That should prevent misuse, while allowing reasonable usage. If
738 someone knows of a better way to handle this, please let me
741 const char *p = cookie_domain;
742 int dccount = 1; /* number of domain components */
743 int ldcl = 0; /* last domain component length */
744 int nldcl = 0; /* next to last domain component length */
747 /* Ignore leading period in this calculation. */
750 for (out = 0; !out; p++)
758 /* Empty domain component found -- the domain is invalid. */
760 if (*(p + 1) == '\0')
762 /* Tolerate trailing '.' by not treating the domain as
763 one ending with an empty domain component. */
785 int known_toplevel = 0;
786 static char *known_toplevel_domains[] = {
787 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
789 for (i = 0; i < countof (known_toplevel_domains); i++)
790 if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
795 if (!known_toplevel && nldcl <= 3)
802 /* Don't allow the host "foobar.com" to set a cookie for domain
804 if (*cookie_domain != '.')
806 int dlen = strlen (cookie_domain);
807 int hlen = strlen (host);
808 /* cookie host: hostname.foobar.com */
809 /* desired domain: bar.com */
810 /* '.' must be here in host-> ^ */
811 if (hlen > dlen && host[hlen - dlen - 1] != '.')
820 static int path_matches PARAMS ((const char *, const char *));
822 /* Check whether PATH begins with COOKIE_PATH. */
825 check_path_match (const char *cookie_path, const char *path)
827 return path_matches (path, cookie_path);
830 /* Process the HTTP `Set-Cookie' header. This results in storing the
831 cookie or discarding a matching one, or ignoring it completely, all
832 depending on the contents. */
835 cookie_handle_set_cookie (struct cookie_jar *jar,
836 const char *host, int port,
837 const char *path, const char *set_cookie)
839 struct cookie *cookie;
840 cookies_now = time (NULL);
842 cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
846 /* Sanitize parts of cookie. */
851 cookie->domain = xstrdup (host);
856 if (!check_domain_match (cookie->domain, host))
858 logprintf (LOG_NOTQUIET,
859 "Cookie coming from %s attempted to set domain to %s\n",
860 host, cookie->domain);
861 xfree (cookie->domain);
867 cookie->path = xstrdup (path);
870 if (!check_path_match (cookie->path, path))
872 DEBUGP (("Attempt to fake the path: %s, %s\n",
873 cookie->path, path));
878 if (cookie->discard_requested)
880 discard_matching_cookie (jar, cookie);
884 store_cookie (jar, cookie);
889 delete_cookie (cookie);
892 /* Support for sending out cookies in HTTP requests, based on
893 previously stored cookies. Entry point is
894 `build_cookies_request'. */
896 /* Find the cookie chains whose domains match HOST and store them to
899 A cookie chain is the head of a list of cookies that belong to a
900 host/domain. Given HOST "img.search.xemacs.org", this function
901 will return the chains for "img.search.xemacs.org",
902 "search.xemacs.org", and "xemacs.org" -- those of them that exist
905 DEST should be large enough to accept (in the worst case) as many
906 elements as there are domain components of HOST. */
909 find_chains_of_host (struct cookie_jar *jar, const char *host,
910 struct cookie *dest[])
915 /* Bail out quickly if there are no cookies in the jar. */
916 if (!hash_table_count (jar->chains))
919 if (numeric_address_p (host))
920 /* If host is an IP address, only check for the exact match. */
923 /* Otherwise, check all the subdomains except the top-level (last)
924 one. As a domain with N components has N-1 dots, the number of
925 passes equals the number of dots. */
926 passes = count_char (host, '.');
930 /* Find chains that match HOST, starting with exact match and
931 progressing to less specific domains. For instance, given HOST
932 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
933 srk.fer.hr's, then fer.hr's. */
936 struct cookie *chain = hash_table_get (jar->chains, host);
938 dest[dest_count++] = chain;
939 if (++passcnt >= passes)
941 host = strchr (host, '.') + 1;
947 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
951 path_matches (const char *full_path, const char *prefix)
956 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
957 as a mere separator, inspired by rfc1808), but the '/' is
958 assumed when matching against the cookie stuff. */
962 len = strlen (prefix);
964 if (0 != strncmp (full_path, prefix, len))
965 /* FULL_PATH doesn't begin with PREFIX. */
968 /* Length of PREFIX determines the quality of the match. */
972 /* Return non-zero iff COOKIE matches the provided parameters of the
973 URL being downloaded: HOST, PORT, PATH, and SECFLAG.
975 If PATH_GOODNESS is non-NULL, store the "path goodness" value
976 there. That value is a measure of how closely COOKIE matches PATH,
977 used for ordering cookies. */
980 cookie_matches_url (const struct cookie *cookie,
981 const char *host, int port, const char *path,
982 int secflag, int *path_goodness)
986 if (cookie_expired_p (cookie))
987 /* Ignore stale cookies. Don't bother unchaining the cookie at
988 this point -- Wget is a relatively short-lived application, and
989 stale cookies will not be saved by `save_cookies'. On the
990 other hand, this function should be as efficient as
994 if (cookie->secure && !secflag)
995 /* Don't transmit secure cookies over insecure connections. */
997 if (cookie->port != PORT_ANY && cookie->port != port)
1000 /* If exact domain match is required, verify that cookie's domain is
1001 equal to HOST. If not, assume success on the grounds of the
1002 cookie's chain having been found by find_chains_of_host. */
1003 if (cookie->domain_exact
1004 && 0 != strcasecmp (host, cookie->domain))
1007 pg = path_matches (path, cookie->path);
1012 /* If the caller requested path_goodness, we return it. This is
1013 an optimization, so that the caller doesn't need to call
1014 path_matches() again. */
1015 *path_goodness = pg;
1019 /* A structure that points to a cookie, along with the additional
1020 information about the cookie's "goodness". This allows us to sort
1021 the cookies when returning them to the server, as required by the
1024 struct weighed_cookie {
1025 struct cookie *cookie;
1026 int domain_goodness;
1030 /* Comparator used for uniquifying the list. */
1033 equality_comparator (const void *p1, const void *p2)
1035 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1036 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1038 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1039 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1041 /* We only really care whether both name and value are equal. We
1042 return them in this order only for consistency... */
1043 return namecmp ? namecmp : valuecmp;
1046 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1047 cookies with the same attr name and value. Whenever a duplicate
1048 pair is found, one of the cookies is removed. */
1051 eliminate_dups (struct weighed_cookie *outgoing, int count)
1053 struct weighed_cookie *h; /* hare */
1054 struct weighed_cookie *t; /* tortoise */
1055 struct weighed_cookie *end = outgoing + count;
1057 /* We deploy a simple uniquify algorithm: first sort the array
1058 according to our sort criteria, then copy it to itself, comparing
1059 each cookie to its neighbor and ignoring the duplicates. */
1061 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1063 /* "Hare" runs through all the entries in the array, followed by
1064 "tortoise". If a duplicate is found, the hare skips it.
1065 Non-duplicate entries are copied to the tortoise ptr. */
1067 for (h = t = outgoing; h < end; h++)
1071 struct cookie *c0 = h[0].cookie;
1072 struct cookie *c1 = h[1].cookie;
1073 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1074 continue; /* ignore the duplicate */
1077 /* If the hare has advanced past the tortoise (because of
1078 previous dups), make sure the values get copied. Otherwise,
1079 no copying is necessary. */
1085 return t - outgoing;
1088 /* Comparator used for sorting by quality. */
1091 goodness_comparator (const void *p1, const void *p2)
1093 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1094 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1096 /* Subtractions take `wc2' as the first argument becauase we want a
1097 sort in *decreasing* order of goodness. */
1098 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1099 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1101 /* Sort by domain goodness; if these are the same, sort by path
1102 goodness. (The sorting order isn't really specified; maybe it
1103 should be the other way around.) */
1104 return dgdiff ? dgdiff : pgdiff;
1107 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1108 requests PATH from the server. The resulting string is allocated
1109 with `malloc', and the caller is responsible for freeing it. If no
1110 cookies pertain to this request, i.e. no cookie header should be
1111 generated, NULL is returned. */
1114 cookie_header (struct cookie_jar *jar, const char *host,
1115 int port, const char *path, int secflag)
1117 struct cookie **chains;
1120 struct cookie *cookie;
1121 struct weighed_cookie *outgoing;
1124 int result_size, pos;
1126 /* First, find the cookie chains whose domains match HOST. */
1128 /* Allocate room for find_chains_of_host to write to. The number of
1129 chains can at most equal the number of subdomains, hence
1130 1+<number of dots>. */
1131 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
1132 chain_count = find_chains_of_host (jar, host, chains);
1134 /* No cookies for this host. */
1138 cookies_now = time (NULL);
1140 /* Now extract from the chains those cookies that match our host
1141 (for domain_exact cookies), port (for cookies with port other
1142 than PORT_ANY), etc. See matching_cookie for details. */
1144 /* Count the number of matching cookies. */
1146 for (i = 0; i < chain_count; i++)
1147 for (cookie = chains[i]; cookie; cookie = cookie->next)
1148 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1151 return NULL; /* no cookies matched */
1153 /* Allocate the array. */
1154 outgoing = alloca_array (struct weighed_cookie, count);
1156 /* Fill the array with all the matching cookies from the chains that
1159 for (i = 0; i < chain_count; i++)
1160 for (cookie = chains[i]; cookie; cookie = cookie->next)
1163 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1165 outgoing[ocnt].cookie = cookie;
1166 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1167 outgoing[ocnt].path_goodness = pg;
1170 assert (ocnt == count);
1172 /* Eliminate duplicate cookies; that is, those whose name and value
1174 count = eliminate_dups (outgoing, count);
1176 /* Sort the array so that best-matching domains come first, and
1177 that, within one domain, best-matching paths come first. */
1178 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1180 /* Count the space the name=value pairs will take. */
1182 for (i = 0; i < count; i++)
1184 struct cookie *c = outgoing[i].cookie;
1186 result_size += strlen (c->attr) + 1 + strlen (c->value);
1189 /* Allocate output buffer:
1191 name=value pairs -- result_size
1192 "; " separators -- (count - 1) * 2
1193 \r\n line ending -- 2
1194 \0 terminator -- 1 */
1195 result_size = 8 + result_size + (count - 1) * 2 + 2 + 1;
1196 result = xmalloc (result_size);
1198 strcpy (result, "Cookie: ");
1200 for (i = 0; i < count; i++)
1202 struct cookie *c = outgoing[i].cookie;
1203 int namlen = strlen (c->attr);
1204 int vallen = strlen (c->value);
1206 memcpy (result + pos, c->attr, namlen);
1208 result[pos++] = '=';
1209 memcpy (result + pos, c->value, vallen);
1213 result[pos++] = ';';
1214 result[pos++] = ' ';
1217 result[pos++] = '\r';
1218 result[pos++] = '\n';
1219 result[pos++] = '\0';
1220 assert (pos == result_size);
1224 /* Support for loading and saving cookies. The format used for
1225 loading and saving should be the format of the `cookies.txt' file
1226 used by Netscape and Mozilla, at least the Unix versions.
1227 (Apparently IE can export cookies in that format as well.) The
1228 format goes like this:
1230 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1232 DOMAIN -- cookie domain, optionally followed by :PORT
1233 DOMAIN-FLAG -- whether all hosts in the domain match
1235 SECURE-FLAG -- whether cookie requires secure connection
1236 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1237 ATTR-NAME -- name of the cookie attribute
1238 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1240 The fields are separated by TABs. All fields are mandatory, except
1241 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1242 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1243 whitespace only, and comment lines (beginning with # optionally
1244 preceded by whitespace) are ignored.
1246 Example line from cookies.txt (split in two lines for readability):
1248 .google.com TRUE / FALSE 2147368447 \
1249 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1253 /* If the region [B, E) ends with :<digits>, parse the number, return
1254 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1255 If port is not specified, return 0. */
1258 domain_port (const char *domain_b, const char *domain_e,
1259 const char **domain_e_ptr)
1263 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1266 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1267 port = 10 * port + (*p - '0');
1269 /* Garbage following port number. */
1271 *domain_e_ptr = colon;
1275 #define GET_WORD(p, b, e) do { \
1277 while (*p && *p != '\t') \
1280 if (b == e || !*p) \
1285 /* Load cookies from FILE. */
1288 cookie_jar_load (struct cookie_jar *jar, const char *file)
1291 FILE *fp = fopen (file, "r");
1294 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1295 file, strerror (errno));
1298 cookies_now = time (NULL);
1300 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1302 struct cookie *cookie;
1308 char *domain_b = NULL, *domain_e = NULL;
1309 char *domflag_b = NULL, *domflag_e = NULL;
1310 char *path_b = NULL, *path_e = NULL;
1311 char *secure_b = NULL, *secure_e = NULL;
1312 char *expires_b = NULL, *expires_e = NULL;
1313 char *name_b = NULL, *name_e = NULL;
1314 char *value_b = NULL, *value_e = NULL;
1316 /* Skip leading white-space. */
1317 while (*p && ISSPACE (*p))
1319 /* Ignore empty lines. */
1320 if (!*p || *p == '#')
1323 GET_WORD (p, domain_b, domain_e);
1324 GET_WORD (p, domflag_b, domflag_e);
1325 GET_WORD (p, path_b, path_e);
1326 GET_WORD (p, secure_b, secure_e);
1327 GET_WORD (p, expires_b, expires_e);
1328 GET_WORD (p, name_b, name_e);
1330 /* Don't use GET_WORD for value because it ends with newline,
1333 value_e = p + strlen (p);
1334 if (value_e > value_b && value_e[-1] == '\n')
1336 if (value_e > value_b && value_e[-1] == '\r')
1338 /* Empty values are legal (I think), so don't bother checking. */
1340 cookie = cookie_new ();
1342 cookie->attr = strdupdelim (name_b, name_e);
1343 cookie->value = strdupdelim (value_b, value_e);
1344 cookie->path = strdupdelim (path_b, path_e);
1345 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1347 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1348 value indicating if all machines within a given domain can
1349 access the variable. This value is set automatically by the
1350 browser, depending on the value set for the domain." */
1351 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1353 /* DOMAIN needs special treatment because we might need to
1354 extract the port. */
1355 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1357 cookie->port = port;
1359 if (*domain_b == '.')
1360 ++domain_b; /* remove leading dot internally */
1361 cookie->domain = strdupdelim (domain_b, domain_e);
1363 /* safe default in case EXPIRES field is garbled. */
1364 expiry = (double)cookies_now - 1;
1366 /* I don't like changing the line, but it's safe here. (line is
1369 sscanf (expires_b, "%lf", &expiry);
1373 /* EXPIRY can be 0 for session cookies saved because the
1374 user specified `--keep-session-cookies' in the past.
1375 They remain session cookies, and will be saved only if
1376 the user has specified `keep-session-cookies' again. */
1380 if (expiry < cookies_now)
1381 goto abort; /* ignore stale cookie. */
1382 cookie->expiry_time = expiry;
1383 cookie->permanent = 1;
1386 store_cookie (jar, cookie);
1392 delete_cookie (cookie);
1397 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1398 to the head in a chain of cookies. The function prints the entire
1402 save_cookies_mapper (void *key, void *value, void *arg)
1404 FILE *fp = (FILE *)arg;
1405 char *domain = (char *)key;
1406 struct cookie *cookie = (struct cookie *)value;
1407 for (; cookie; cookie = cookie->next)
1409 if (!cookie->permanent && !opt.keep_session_cookies)
1411 if (cookie_expired_p (cookie))
1413 if (!cookie->domain_exact)
1416 if (cookie->port != PORT_ANY)
1417 fprintf (fp, ":%d", cookie->port);
1418 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1419 cookie->domain_exact ? "FALSE" : "TRUE",
1420 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1421 (double)cookie->expiry_time,
1422 cookie->attr, cookie->value);
1424 return 1; /* stop mapping */
1429 /* Save cookies, in format described above, to FILE. */
1432 cookie_jar_save (struct cookie_jar *jar, const char *file)
1436 DEBUGP (("Saving cookies to %s.\n", file));
1438 cookies_now = time (NULL);
1440 fp = fopen (file, "w");
1443 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1444 file, strerror (errno));
1448 fputs ("# HTTP cookie file.\n", fp);
1449 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (&cookies_now));
1450 fputs ("# Edit at your own risk.\n\n", fp);
1452 hash_table_map (jar->chains, save_cookies_mapper, fp);
1455 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1456 file, strerror (errno));
1457 if (fclose (fp) < 0)
1458 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1459 file, strerror (errno));
1461 DEBUGP (("Done saving cookies.\n"));
1464 /* Destroy all the elements in the chain and unhook it from the cookie
1465 jar. This is written in the form of a callback to hash_table_map
1466 and used by cookie_jar_delete to delete all the cookies in a
1470 nuke_cookie_chain (void *value, void *key, void *arg)
1472 char *chain_key = (char *)value;
1473 struct cookie *chain = (struct cookie *)key;
1474 struct cookie_jar *jar = (struct cookie_jar *)arg;
1476 /* Remove the chain from the table and free the key. */
1477 hash_table_remove (jar->chains, chain_key);
1480 /* Then delete all the cookies in the chain. */
1483 struct cookie *next = chain->next;
1484 delete_cookie (chain);
1492 /* Clean up cookie-related data. */
1495 cookie_jar_delete (struct cookie_jar *jar)
1497 hash_table_map (jar->chains, nuke_cookie_chain, jar);
1498 hash_table_destroy (jar->chains);
1502 /* Test cases. Currently this is only tests parse_set_cookies. To
1503 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1508 char *test_results[10];
1510 static int test_parse_cookies_callback (struct cookie *ignored,
1511 const char *nb, const char *ne,
1512 const char *vb, const char *ve)
1514 test_results[test_count++] = strdupdelim (nb, ne);
1515 test_results[test_count++] = strdupdelim (vb, ve);
1522 /* Tests expected to succeed: */
1528 { "arg=value", {"arg", "value", NULL} },
1529 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1530 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1531 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1532 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1533 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1534 { "arg=", {"arg", "", NULL} },
1535 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1536 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1539 /* Tests expected to fail: */
1540 static char *tests_fail[] = {
1542 "arg=\"unterminated",
1544 "arg1=;=another-empty-name",
1548 for (i = 0; i < countof (tests_succ); i++)
1551 char *data = tests_succ[i].data;
1552 char **expected = tests_succ[i].results;
1556 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1559 printf ("NULL cookie returned for valid data: %s\n", data);
1563 for (ind = 0; ind < test_count; ind += 2)
1567 if (0 != strcmp (expected[ind], test_results[ind]))
1568 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1569 ind / 2 + 1, data, expected[ind], test_results[ind]);
1570 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1571 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1572 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1574 if (ind < test_count || expected[ind])
1575 printf ("Unmatched number of results: %s\n", data);
1578 for (i = 0; i < countof (tests_fail); i++)
1581 char *data = tests_fail[i];
1583 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1585 printf ("Failed to report error on invalid data: %s\n", data);
1588 #endif /* TEST_COOKIES */