1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
31 code submitted by Tomasz Wegrzanowski.
33 Ideas for future work:
35 * Implement limits on cookie-related sizes, such as max. cookie
36 size, max. number of cookies, etc.
38 * Add more "cookie jar" methods, such as methods to iterate over
39 stored cookies, to clear temporary cookies, to perform
40 intelligent auto-saving, etc.
42 * Support `Set-Cookie2' and `Cookie2' headers? Does anyone really
62 /* This should *really* be in a .h file! */
63 time_t http_atotm PARAMS ((const char *));
65 /* Declarations of `struct cookie' and the most basic functions. */
67 /* Cookie jar serves as cookie storage and a means of retrieving
68 cookies efficiently. All cookies with the same domain are stored
69 in a linked list called "chain". A cookie chain can be reached by
70 looking up the domain in the cookie jar's chains_by_domain table.
72 For example, to reach all the cookies under google.com, one must
73 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
74 course, when sending a cookie to `www.google.com', one must search
75 for cookies that belong to either `www.google.com' or `google.com'
76 -- but the point is that the code doesn't need to go through *all*
80 /* Cookie chains indexed by domain. */
81 struct hash_table *chains;
83 int cookie_count; /* number of cookies in the jar. */
86 /* Value set by entry point functions, so that the low-level
87 routines don't need to call time() all the time. */
93 struct cookie_jar *jar = xmalloc (sizeof (struct cookie_jar));
94 jar->chains = make_nocase_string_hash_table (0);
95 jar->cookie_count = 0;
100 char *domain; /* domain of the cookie */
101 int port; /* port number */
102 char *path; /* path prefix of the cookie */
104 int secure; /* whether cookie should be
105 transmitted over non-https
107 int domain_exact; /* whether DOMAIN must match as a
110 int permanent; /* whether the cookie should outlive
112 time_t expiry_time; /* time when the cookie expires */
114 int discard_requested; /* whether cookie was created to
115 request discarding another
118 char *attr; /* cookie attribute name */
119 char *value; /* cookie attribute value */
121 struct cookie *next; /* used for chaining of cookies in the
125 #define PORT_ANY (-1)
126 #define COOKIE_EXPIRED_P(c) ((c)->expiry_time != 0 && (c)->expiry_time < cookies_now)
128 /* Allocate and return a new, empty cookie structure. */
130 static struct cookie *
133 struct cookie *cookie = xmalloc (sizeof (struct cookie));
134 memset (cookie, '\0', sizeof (struct cookie));
136 /* Both cookie->permanent and cookie->expiry_time are now 0. By
137 default, we assume that the cookie is non-permanent and valid
138 until the end of the session. */
140 cookie->port = PORT_ANY;
144 /* Deallocate COOKIE and its components. */
147 delete_cookie (struct cookie *cookie)
149 FREE_MAYBE (cookie->domain);
150 FREE_MAYBE (cookie->path);
151 FREE_MAYBE (cookie->attr);
152 FREE_MAYBE (cookie->value);
156 /* Functions for storing cookies.
158 All cookies can be reached beginning with jar->chains. The key in
159 that table is the domain name, and the value is a linked list of
160 all cookies from that domain. Every new cookie is placed on the
163 /* Find and return a cookie in JAR whose domain, path, and attribute
164 name correspond to COOKIE. If found, PREVPTR will point to the
165 location of the cookie previous in chain, or NULL if the found
166 cookie is the head of a chain.
168 If no matching cookie is found, return NULL. */
170 static struct cookie *
171 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
172 struct cookie **prevptr)
174 struct cookie *chain, *prev;
176 chain = hash_table_get (jar->chains, cookie->domain);
181 for (; chain; prev = chain, chain = chain->next)
182 if (0 == strcmp (cookie->path, chain->path)
183 && 0 == strcmp (cookie->attr, chain->attr)
184 && cookie->port == chain->port)
195 /* Store COOKIE to the jar.
197 This is done by placing COOKIE at the head of its chain. However,
198 if COOKIE matches a cookie already in memory, as determined by
199 find_matching_cookie, the old cookie is unlinked and destroyed.
201 The key of each chain's hash table entry is allocated only the
202 first time; next hash_table_put's reuse the same key. */
205 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
207 struct cookie *chain_head;
210 if (hash_table_get_pair (jar->chains, cookie->domain,
211 &chain_key, &chain_head))
213 /* A chain of cookies in this domain already exists. Check for
214 duplicates -- if an extant cookie exactly matches our domain,
215 port, path, and name, replace it. */
217 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
221 /* Remove VICTIM from the chain. COOKIE will be placed at
225 prev->next = victim->next;
226 cookie->next = chain_head;
230 /* prev is NULL; apparently VICTIM was at the head of
231 the chain. This place will be taken by COOKIE, so
232 all we need to do is: */
233 cookie->next = victim->next;
235 delete_cookie (victim);
237 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
240 cookie->next = chain_head;
244 /* We are now creating the chain. Use a copy of cookie->domain
245 as the key for the life-time of the chain. Using
246 cookie->domain would be unsafe because the life-time of the
247 chain may exceed the life-time of the cookie. (Cookies may
248 be deleted from the chain by this very function.) */
250 chain_key = xstrdup (cookie->domain);
253 hash_table_put (jar->chains, chain_key, cookie);
256 DEBUGP (("\nStored cookie %s %d%s %s %s %d %s %s %s\n",
257 cookie->domain, cookie->port,
258 cookie->port == PORT_ANY ? " (ANY)" : "",
260 cookie->permanent ? "permanent" : "nonpermanent",
263 ? asctime (localtime (&cookie->expiry_time)) : "<undefined>",
264 cookie->attr, cookie->value));
267 /* Discard a cookie matching COOKIE's domain, port, path, and
268 attribute name. This gets called when we encounter a cookie whose
269 expiry date is in the past, or whose max-age is set to 0. The
270 former corresponds to netscape cookie spec, while the latter is
271 specified by rfc2109. */
274 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
276 struct cookie *prev, *victim;
278 if (!hash_table_count (jar->chains))
279 /* No elements == nothing to discard. */
282 victim = find_matching_cookie (jar, cookie, &prev);
286 /* Simply unchain the victim. */
287 prev->next = victim->next;
290 /* VICTIM was head of its chain. We need to place a new
291 cookie at the head. */
292 char *chain_key = NULL;
295 res = hash_table_get_pair (jar->chains, victim->domain,
300 /* VICTIM was the only cookie in the chain. Destroy the
301 chain and deallocate the chain key. */
302 hash_table_remove (jar->chains, victim->domain);
306 hash_table_put (jar->chains, chain_key, victim->next);
308 delete_cookie (victim);
309 DEBUGP (("Discarded old cookie.\n"));
313 /* Functions for parsing the `Set-Cookie' header, and creating new
314 cookies from the wire. */
316 #define NAME_IS(string_literal) \
317 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
319 #define VALUE_EXISTS (value_b && value_e)
321 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
323 /* Update the appropriate cookie field. [name_b, name_e) are expected
324 to delimit the attribute name, while [value_b, value_e) (optional)
325 should delimit the attribute value.
327 When called the first time, it will set the cookie's attribute name
328 and value. After that, it will check the attribute name for
329 special fields such as `domain', `path', etc. Where appropriate,
330 it will parse the values of the fields it recognizes and fill the
331 corresponding fields in COOKIE.
333 Returns 1 on success. Returns zero in case a syntax error is
334 found; such a cookie should be discarded. */
337 update_cookie_field (struct cookie *cookie,
338 const char *name_b, const char *name_e,
339 const char *value_b, const char *value_e)
341 assert (name_b != NULL && name_e != NULL);
347 cookie->attr = strdupdelim (name_b, name_e);
348 cookie->value = strdupdelim (value_b, value_e);
352 if (NAME_IS ("domain"))
354 if (!VALUE_NON_EMPTY)
356 FREE_MAYBE (cookie->domain);
357 /* Strictly speaking, we should set cookie->domain_exact if the
358 domain doesn't begin with a dot. But many sites set the
359 domain to "foo.com" and expect "subhost.foo.com" to get the
360 cookie, and it apparently works. */
363 cookie->domain = strdupdelim (value_b, value_e);
366 else if (NAME_IS ("path"))
368 if (!VALUE_NON_EMPTY)
370 FREE_MAYBE (cookie->path);
371 cookie->path = strdupdelim (value_b, value_e);
374 else if (NAME_IS ("expires"))
379 if (!VALUE_NON_EMPTY)
381 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
383 expires = http_atotm (value_copy);
386 cookie->permanent = 1;
387 cookie->expiry_time = (time_t)expires;
390 /* Error in expiration spec. Assume default (cookie valid for
394 /* According to netscape's specification, expiry time in the
395 past means that discarding of a matching cookie is
397 if (cookie->expiry_time < cookies_now)
398 cookie->discard_requested = 1;
402 else if (NAME_IS ("max-age"))
407 if (!VALUE_NON_EMPTY)
409 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
411 sscanf (value_copy, "%lf", &maxage);
413 /* something went wrong. */
415 cookie->permanent = 1;
416 cookie->expiry_time = cookies_now + maxage;
418 /* According to rfc2109, a cookie with max-age of 0 means that
419 discarding of a matching cookie is requested. */
421 cookie->discard_requested = 1;
425 else if (NAME_IS ("secure"))
427 /* ignore value completely */
432 /* Unrecognized attribute; ignore it. */
438 /* Returns non-zero for characters that are legal in the name of an
439 attribute. This used to allow only alphanumerics, '-', and '_',
440 but we need to be more lenient because a number of sites wants to
441 use weirder attribute names. rfc2965 "informally specifies"
442 attribute name (token) as "a sequence of non-special, non-white
443 space characters". So we allow everything except the stuff we know
446 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
447 && (c) != '"' && (c) != '=' \
448 && (c) != ';' && (c) != ',')
450 /* Parse the contents of the `Set-Cookie' header. The header looks
453 name1=value1; name2=value2; ...
455 Trailing semicolon is optional; spaces are allowed between all
456 tokens. Additionally, values may be quoted.
458 A new cookie is returned upon success, NULL otherwise. The
459 specified CALLBACK function (normally `update_cookie_field' is used
460 to update the fields of the newly created cookie structure. */
462 static struct cookie *
463 parse_set_cookies (const char *sc,
464 int (*callback) (struct cookie *,
465 const char *, const char *,
466 const char *, const char *),
469 struct cookie *cookie = cookie_new ();
471 /* #### Hand-written DFAs are no fun to debug. We'de be better off
472 to rewrite this as an inline parser. */
474 enum { S_START, S_NAME, S_NAME_POST,
475 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
476 S_ATTR_ACTION, S_DONE, S_ERROR
482 const char *name_b = NULL, *name_e = NULL;
483 const char *value_b = NULL, *value_e = NULL;
487 while (state != S_DONE && state != S_ERROR)
494 else if (ISSPACE (c))
495 /* Strip all whitespace preceding the name. */
497 else if (ATTR_NAME_CHAR (c))
503 /* empty attr name not allowed */
507 if (!c || c == ';' || c == '=' || ISSPACE (c))
512 else if (ATTR_NAME_CHAR (c))
520 value_b = value_e = NULL;
523 state = S_ATTR_ACTION;
530 else if (ISSPACE (c))
531 /* Ignore space and keep the state. */
539 value_b = value_e = p;
542 state = S_ATTR_ACTION;
548 state = S_QUOTED_VALUE;
550 else if (ISSPACE (c))
560 if (!c || c == ';' || ISSPACE (c))
563 state = S_VALUE_TRAILSPACE;
567 value_e = NULL; /* no trailing space */
576 state = S_VALUE_TRAILSPACE;
583 case S_VALUE_TRAILSPACE:
587 state = S_ATTR_ACTION;
590 state = S_ATTR_ACTION;
591 else if (ISSPACE (c))
598 int legal = callback (cookie, name_b, name_e, value_b, value_e);
604 BOUNDED_TO_ALLOCA (name_b, name_e, name);
605 logprintf (LOG_NOTQUIET,
606 _("Error in Set-Cookie, field `%s'"), name);
616 /* handled by loop condition */
623 delete_cookie (cookie);
624 if (state != S_ERROR)
628 logprintf (LOG_NOTQUIET,
629 _("Syntax error in Set-Cookie: %s at position %d.\n"),
634 /* Sanity checks. These are important, otherwise it is possible for
635 mailcious attackers to destroy important cookie information and/or
636 violate your privacy. */
639 #define REQUIRE_DIGITS(p) do { \
642 for (++p; ISDIGIT (*p); p++) \
646 #define REQUIRE_DOT(p) do { \
651 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
653 We don't want to call network functions like inet_addr() because all
654 we need is a check, preferrably one that is small, fast, and
658 numeric_address_p (const char *addr)
660 const char *p = addr;
662 REQUIRE_DIGITS (p); /* A */
663 REQUIRE_DOT (p); /* . */
664 REQUIRE_DIGITS (p); /* B */
665 REQUIRE_DOT (p); /* . */
666 REQUIRE_DIGITS (p); /* C */
667 REQUIRE_DOT (p); /* . */
668 REQUIRE_DIGITS (p); /* D */
675 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
676 Originally I tried to make the check compliant with rfc2109, but
677 the sites deviated too often, so I had to fall back to "tail
678 matching", as defined by the original Netscape's cookie spec. */
681 check_domain_match (const char *cookie_domain, const char *host)
685 /* Numeric address requires exact match. It also requires HOST to
687 if (numeric_address_p (cookie_domain))
688 return 0 == strcmp (cookie_domain, host);
692 /* For the sake of efficiency, check for exact match first. */
693 if (0 == strcasecmp (cookie_domain, host))
698 /* HOST must match the tail of cookie_domain. */
699 if (!match_tail (host, cookie_domain, 1))
702 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
703 make sure that somebody is not trying to set the cookie for a
704 subdomain shared by many entities. For example, "company.co.uk"
705 must not be allowed to set a cookie for ".co.uk". On the other
706 hand, "sso.redhat.de" should be able to set a cookie for
709 The only marginally sane way to handle this I can think of is to
710 reject on the basis of the length of the second-level domain name
711 (but when the top-level domain is unknown), with the assumption
712 that those of three or less characters could be reserved. For
715 .co.org -> works because the TLD is known
716 .co.uk -> doesn't work because "co" is only two chars long
717 .com.au -> doesn't work because "com" is only 3 chars long
718 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
719 .cnn.de -> doesn't work for the same reason (ugh!!)
720 .abcd.de -> works because "abcd" is 4 chars long
721 .img.cnn.de -> works because it's not trying to set the 2nd level domain
722 .cnn.co.uk -> works for the same reason
724 That should prevent misuse, while allowing reasonable usage. If
725 someone knows of a better way to handle this, please let me
728 const char *p = cookie_domain;
729 int dccount = 1; /* number of domain components */
730 int ldcl = 0; /* last domain component length */
731 int nldcl = 0; /* next to last domain component length */
734 /* Ignore leading period in this calculation. */
737 for (out = 0; !out; p++)
745 /* Empty domain component found -- the domain is invalid. */
747 if (*(p + 1) == '\0')
749 /* Tolerate trailing '.' by not treating the domain as
750 one ending with an empty domain component. */
772 int known_toplevel = 0;
773 static char *known_toplevel_domains[] = {
774 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
776 for (i = 0; i < countof (known_toplevel_domains); i++)
777 if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
782 if (!known_toplevel && nldcl <= 3)
789 /* Don't allow the host "foobar.com" to set a cookie for domain
791 if (*cookie_domain != '.')
793 int dlen = strlen (cookie_domain);
794 int hlen = strlen (host);
795 /* cookie host: hostname.foobar.com */
796 /* desired domain: bar.com */
797 /* '.' must be here in host-> ^ */
798 if (hlen > dlen && host[hlen - dlen - 1] != '.')
807 static int path_matches PARAMS ((const char *, const char *));
809 /* Check whether PATH begins with COOKIE_PATH. */
812 check_path_match (const char *cookie_path, const char *path)
814 return path_matches (path, cookie_path);
817 /* Process the HTTP `Set-Cookie' header. This results in storing the
818 cookie or discarding a matching one, or ignoring it completely, all
819 depending on the contents. */
822 cookie_jar_process_set_cookie (struct cookie_jar *jar,
823 const char *host, int port,
824 const char *path, const char *set_cookie)
826 struct cookie *cookie;
827 cookies_now = time (NULL);
829 cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
833 /* Sanitize parts of cookie. */
838 cookie->domain = xstrdup (host);
843 if (!check_domain_match (cookie->domain, host))
845 logprintf (LOG_NOTQUIET,
846 "Cookie coming from %s attempted to set domain to %s\n",
847 host, cookie->domain);
848 xfree (cookie->domain);
854 cookie->path = xstrdup (path);
857 if (!check_path_match (cookie->path, path))
859 DEBUGP (("Attempt to fake the path: %s, %s\n",
860 cookie->path, path));
865 if (cookie->discard_requested)
867 discard_matching_cookie (jar, cookie);
871 store_cookie (jar, cookie);
876 delete_cookie (cookie);
879 /* Support for sending out cookies in HTTP requests, based on
880 previously stored cookies. Entry point is
881 `build_cookies_request'. */
883 /* Find the cookie chains whose domains match HOST and store them to
886 A cookie chain is the head of a list of cookies that belong to a
887 host/domain. Given HOST "img.search.xemacs.org", this function
888 will return the chains for "img.search.xemacs.org",
889 "search.xemacs.org", and "xemacs.org" -- those of them that exist
892 DEST should be large enough to accept (in the worst case) as many
893 elements as there are domain components of HOST. */
896 find_chains_of_host (struct cookie_jar *jar, const char *host,
897 struct cookie *dest[])
902 /* Bail out quickly if there are no cookies in the jar. */
903 if (!hash_table_count (jar->chains))
906 if (numeric_address_p (host))
907 /* If host is an IP address, only check for the exact match. */
910 /* Otherwise, check all the subdomains except the top-level (last)
911 one. As a domain with N components has N-1 dots, the number of
912 passes equals the number of dots. */
913 passes = count_char (host, '.');
917 /* Find chains that match HOST, starting with exact match and
918 progressing to less specific domains. For instance, given HOST
919 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
920 srk.fer.hr's, then fer.hr's. */
923 struct cookie *chain = hash_table_get (jar->chains, host);
925 dest[dest_count++] = chain;
926 if (++passcnt >= passes)
928 host = strchr (host, '.') + 1;
934 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
938 path_matches (const char *full_path, const char *prefix)
943 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
944 as a mere separator, inspired by rfc1808), but the '/' is
945 assumed when matching against the cookie stuff. */
949 len = strlen (prefix);
951 if (0 != strncmp (full_path, prefix, len))
952 /* FULL_PATH doesn't begin with PREFIX. */
955 /* Length of PREFIX determines the quality of the match. */
959 /* Return non-zero iff COOKIE matches the provided parameters of the
960 URL being downloaded: HOST, PORT, PATH, and SECFLAG.
962 If PATH_GOODNESS is non-NULL, store the "path goodness" value
963 there. That value is a measure of how closely COOKIE matches PATH,
964 used for ordering cookies. */
967 cookie_matches_url (const struct cookie *cookie,
968 const char *host, int port, const char *path,
969 int secflag, int *path_goodness)
973 if (COOKIE_EXPIRED_P (cookie))
974 /* Ignore stale cookies. Don't bother unchaining the cookie at
975 this point -- Wget is a relatively short-lived application, and
976 stale cookies will not be saved by `save_cookies'. On the
977 other hand, this function should be as efficient as
981 if (cookie->secure && !secflag)
982 /* Don't transmit secure cookies over insecure connections. */
984 if (cookie->port != PORT_ANY && cookie->port != port)
987 /* If exact domain match is required, verify that cookie's domain is
988 equal to HOST. If not, assume success on the grounds of the
989 cookie's chain having been found by find_chains_of_host. */
990 if (cookie->domain_exact
991 && 0 != strcasecmp (host, cookie->domain))
994 pg = path_matches (path, cookie->path);
999 /* If the caller requested path_goodness, we return it. This is
1000 an optimization, so that the caller doesn't need to call
1001 path_matches() again. */
1002 *path_goodness = pg;
1006 /* A structure that points to a cookie, along with the additional
1007 information about the cookie's "goodness". This allows us to sort
1008 the cookies when returning them to the server, as required by the
1011 struct weighed_cookie {
1012 struct cookie *cookie;
1013 int domain_goodness;
1017 /* Comparator used for uniquifying the list. */
1020 equality_comparator (const void *p1, const void *p2)
1022 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1023 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1025 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1026 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1028 /* We only really care whether both name and value are equal. We
1029 return them in this order only for consistency... */
1030 return namecmp ? namecmp : valuecmp;
1033 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1034 cookies with the same attr name and value. Whenever a duplicate
1035 pair is found, one of the cookies is removed. */
1038 eliminate_dups (struct weighed_cookie *outgoing, int count)
1040 struct weighed_cookie *h; /* hare */
1041 struct weighed_cookie *t; /* tortoise */
1042 struct weighed_cookie *end = outgoing + count;
1044 /* We deploy a simple uniquify algorithm: first sort the array
1045 according to our sort criteria, then copy it to itself, comparing
1046 each cookie to its neighbor and ignoring the duplicates. */
1048 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1050 /* "Hare" runs through all the entries in the array, followed by
1051 "tortoise". If a duplicate is found, the hare skips it.
1052 Non-duplicate entries are copied to the tortoise ptr. */
1054 for (h = t = outgoing; h < end; h++)
1058 struct cookie *c0 = h[0].cookie;
1059 struct cookie *c1 = h[1].cookie;
1060 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1061 continue; /* ignore the duplicate */
1064 /* If the hare has advanced past the tortoise (because of
1065 previous dups), make sure the values get copied. Otherwise,
1066 no copying is necessary. */
1072 return t - outgoing;
1075 /* Comparator used for sorting by quality. */
1078 goodness_comparator (const void *p1, const void *p2)
1080 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1081 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1083 /* Subtractions take `wc2' as the first argument becauase we want a
1084 sort in *decreasing* order of goodness. */
1085 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1086 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1088 /* Sort by domain goodness; if these are the same, sort by path
1089 goodness. (The sorting order isn't really specified; maybe it
1090 should be the other way around.) */
1091 return dgdiff ? dgdiff : pgdiff;
1094 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1095 requests PATH from the server. The resulting string is allocated
1096 with `malloc', and the caller is responsible for freeing it. If no
1097 cookies pertain to this request, i.e. no cookie header should be
1098 generated, NULL is returned. */
1101 cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
1102 int port, const char *path,
1103 int connection_secure_p)
1105 struct cookie **chains;
1108 struct cookie *cookie;
1109 struct weighed_cookie *outgoing;
1112 int result_size, pos;
1114 /* First, find the cookie chains whose domains match HOST. */
1116 /* Allocate room for find_chains_of_host to write to. The number of
1117 chains can at most equal the number of subdomains, hence
1118 1+<number of dots>. */
1119 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
1120 chain_count = find_chains_of_host (jar, host, chains);
1122 /* No cookies for this host. */
1126 cookies_now = time (NULL);
1128 /* Now extract from the chains those cookies that match our host
1129 (for domain_exact cookies), port (for cookies with port other
1130 than PORT_ANY), etc. See matching_cookie for details. */
1132 /* Count the number of matching cookies. */
1134 for (i = 0; i < chain_count; i++)
1135 for (cookie = chains[i]; cookie; cookie = cookie->next)
1136 if (cookie_matches_url (cookie, host, port, path, connection_secure_p,
1140 return NULL; /* no cookies matched */
1142 /* Allocate the array. */
1143 outgoing = alloca_array (struct weighed_cookie, count);
1145 /* Fill the array with all the matching cookies from the chains that
1148 for (i = 0; i < chain_count; i++)
1149 for (cookie = chains[i]; cookie; cookie = cookie->next)
1152 if (!cookie_matches_url (cookie, host, port, path,
1153 connection_secure_p, &pg))
1155 outgoing[ocnt].cookie = cookie;
1156 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1157 outgoing[ocnt].path_goodness = pg;
1160 assert (ocnt == count);
1162 /* Eliminate duplicate cookies; that is, those whose name and value
1164 count = eliminate_dups (outgoing, count);
1166 /* Sort the array so that best-matching domains come first, and
1167 that, within one domain, best-matching paths come first. */
1168 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1170 /* Count the space the name=value pairs will take. */
1172 for (i = 0; i < count; i++)
1174 struct cookie *c = outgoing[i].cookie;
1176 result_size += strlen (c->attr) + 1 + strlen (c->value);
1179 /* Allocate output buffer:
1181 name=value pairs -- result_size
1182 "; " separators -- (count - 1) * 2
1183 \r\n line ending -- 2
1184 \0 terminator -- 1 */
1185 result_size = 8 + result_size + (count - 1) * 2 + 2 + 1;
1186 result = xmalloc (result_size);
1188 strcpy (result, "Cookie: ");
1190 for (i = 0; i < count; i++)
1192 struct cookie *c = outgoing[i].cookie;
1193 int namlen = strlen (c->attr);
1194 int vallen = strlen (c->value);
1196 memcpy (result + pos, c->attr, namlen);
1198 result[pos++] = '=';
1199 memcpy (result + pos, c->value, vallen);
1203 result[pos++] = ';';
1204 result[pos++] = ' ';
1207 result[pos++] = '\r';
1208 result[pos++] = '\n';
1209 result[pos++] = '\0';
1210 assert (pos == result_size);
1214 /* Support for loading and saving cookies. The format used for
1215 loading and saving should be the format of the `cookies.txt' file
1216 used by Netscape and Mozilla, at least the Unix versions.
1217 (Apparently IE can export cookies in that format as well.) The
1218 format goes like this:
1220 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1222 DOMAIN -- cookie domain, optionally followed by :PORT
1223 DOMAIN-FLAG -- whether all hosts in the domain match
1225 SECURE-FLAG -- whether cookie requires secure connection
1226 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1227 ATTR-NAME -- name of the cookie attribute
1228 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1230 The fields are separated by TABs. All fields are mandatory, except
1231 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1232 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1233 whitespace only, and comment lines (beginning with # optionally
1234 preceded by whitespace) are ignored.
1236 Example line from cookies.txt (split in two lines for readability):
1238 .google.com TRUE / FALSE 2147368447 \
1239 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1243 /* If the region [B, E) ends with :<digits>, parse the number, return
1244 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1245 If port is not specified, return 0. */
1248 domain_port (const char *domain_b, const char *domain_e,
1249 const char **domain_e_ptr)
1253 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1256 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1257 port = 10 * port + (*p - '0');
1259 /* Garbage following port number. */
1261 *domain_e_ptr = colon;
1265 #define GET_WORD(p, b, e) do { \
1267 while (*p && *p != '\t') \
1270 if (b == e || !*p) \
1275 /* Load cookies from FILE. */
1278 cookie_jar_load (struct cookie_jar *jar, const char *file)
1281 FILE *fp = fopen (file, "r");
1284 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1285 file, strerror (errno));
1288 cookies_now = time (NULL);
1290 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1292 struct cookie *cookie;
1298 char *domain_b = NULL, *domain_e = NULL;
1299 char *domflag_b = NULL, *domflag_e = NULL;
1300 char *path_b = NULL, *path_e = NULL;
1301 char *secure_b = NULL, *secure_e = NULL;
1302 char *expires_b = NULL, *expires_e = NULL;
1303 char *name_b = NULL, *name_e = NULL;
1304 char *value_b = NULL, *value_e = NULL;
1306 /* Skip leading white-space. */
1307 while (*p && ISSPACE (*p))
1309 /* Ignore empty lines. */
1310 if (!*p || *p == '#')
1313 GET_WORD (p, domain_b, domain_e);
1314 GET_WORD (p, domflag_b, domflag_e);
1315 GET_WORD (p, path_b, path_e);
1316 GET_WORD (p, secure_b, secure_e);
1317 GET_WORD (p, expires_b, expires_e);
1318 GET_WORD (p, name_b, name_e);
1320 /* Don't use GET_WORD for value because it ends with newline,
1323 value_e = p + strlen (p);
1324 if (value_e > value_b && value_e[-1] == '\n')
1326 if (value_e > value_b && value_e[-1] == '\r')
1328 /* Empty values are legal (I think), so don't bother checking. */
1330 cookie = cookie_new ();
1332 cookie->attr = strdupdelim (name_b, name_e);
1333 cookie->value = strdupdelim (value_b, value_e);
1334 cookie->path = strdupdelim (path_b, path_e);
1335 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1337 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1338 value indicating if all machines within a given domain can
1339 access the variable. This value is set automatically by the
1340 browser, depending on the value set for the domain." */
1341 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1343 /* DOMAIN needs special treatment because we might need to
1344 extract the port. */
1345 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1347 cookie->port = port;
1349 if (*domain_b == '.')
1350 ++domain_b; /* remove leading dot internally */
1351 cookie->domain = strdupdelim (domain_b, domain_e);
1353 /* safe default in case EXPIRES field is garbled. */
1354 expiry = (double)cookies_now - 1;
1356 /* I don't like changing the line, but it's safe here. (line is
1359 sscanf (expires_b, "%lf", &expiry);
1360 if (expiry < cookies_now)
1361 /* ignore stale cookie. */
1363 cookie->expiry_time = expiry;
1365 /* If the cookie has survived being saved into an external file,
1366 it is obviously permanent. */
1367 cookie->permanent = 1;
1369 store_cookie (jar, cookie);
1375 delete_cookie (cookie);
1380 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1381 to the head in a chain of cookies. The function prints the entire
1385 save_cookies_mapper (void *key, void *value, void *arg)
1387 FILE *fp = (FILE *)arg;
1388 char *domain = (char *)key;
1389 struct cookie *cookie = (struct cookie *)value;
1390 for (; cookie; cookie = cookie->next)
1392 if (!cookie->permanent)
1394 if (COOKIE_EXPIRED_P (cookie))
1396 if (!cookie->domain_exact)
1399 if (cookie->port != PORT_ANY)
1400 fprintf (fp, ":%d", cookie->port);
1401 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1402 cookie->domain_exact ? "FALSE" : "TRUE",
1403 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1404 (double)cookie->expiry_time,
1405 cookie->attr, cookie->value);
1407 return 1; /* stop mapping */
1412 /* Save cookies, in format described above, to FILE. */
1415 cookie_jar_save (struct cookie_jar *jar, const char *file)
1419 DEBUGP (("Saving cookies to %s.\n", file));
1421 cookies_now = time (NULL);
1423 fp = fopen (file, "w");
1426 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1427 file, strerror (errno));
1431 fputs ("# HTTP cookie file.\n", fp);
1432 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (NULL));
1433 fputs ("# Edit at your own risk.\n\n", fp);
1435 hash_table_map (jar->chains, save_cookies_mapper, fp);
1438 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1439 file, strerror (errno));
1441 if (fclose (fp) < 0)
1442 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1443 file, strerror (errno));
1445 DEBUGP (("Done saving cookies.\n"));
1448 /* Destroy all the elements in the chain and unhook it from the cookie
1449 jar. This is written in the form of a callback to hash_table_map
1450 and used by cookie_jar_delete to delete all the cookies in a
1454 nuke_cookie_chain (void *value, void *key, void *arg)
1456 char *chain_key = (char *)value;
1457 struct cookie *chain = (struct cookie *)key;
1458 struct cookie_jar *jar = (struct cookie_jar *)arg;
1460 /* Remove the chain from the table and free the key. */
1461 hash_table_remove (jar->chains, chain_key);
1464 /* Then delete all the cookies in the chain. */
1467 struct cookie *next = chain->next;
1468 delete_cookie (chain);
1476 /* Clean up cookie-related data. */
1479 cookie_jar_delete (struct cookie_jar *jar)
1481 hash_table_map (jar->chains, nuke_cookie_chain, jar);
1482 hash_table_destroy (jar->chains);
1486 /* Test cases. Currently this is only tests parse_set_cookies. To
1487 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1492 char *test_results[10];
1494 static int test_parse_cookies_callback (struct cookie *ignored,
1495 const char *nb, const char *ne,
1496 const char *vb, const char *ve)
1498 test_results[test_count++] = strdupdelim (nb, ne);
1499 test_results[test_count++] = strdupdelim (vb, ve);
1506 /* Tests expected to succeed: */
1512 { "arg=value", {"arg", "value", NULL} },
1513 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1514 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1515 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1516 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1517 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1518 { "arg=", {"arg", "", NULL} },
1519 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1520 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1523 /* Tests expected to fail: */
1524 static char *tests_fail[] = {
1526 "arg=\"unterminated",
1528 "arg1=;=another-empty-name",
1532 for (i = 0; i < countof (tests_succ); i++)
1535 char *data = tests_succ[i].data;
1536 char **expected = tests_succ[i].results;
1540 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1543 printf ("NULL cookie returned for valid data: %s\n", data);
1547 for (ind = 0; ind < test_count; ind += 2)
1551 if (0 != strcmp (expected[ind], test_results[ind]))
1552 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1553 ind / 2 + 1, data, expected[ind], test_results[ind]);
1554 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1555 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1556 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1558 if (ind < test_count || expected[ind])
1559 printf ("Unmatched number of results: %s\n", data);
1562 for (i = 0; i < countof (tests_fail); i++)
1565 char *data = tests_fail[i];
1567 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1569 printf ("Failed to report error on invalid data: %s\n", data);
1572 #endif /* TEST_COOKIES */