1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
31 code submitted by Tomasz Wegrzanowski.
33 Ideas for future work:
35 * Implement limits on cookie-related sizes, such as max. cookie
36 size, max. number of cookies, etc.
38 * Add more "cookie jar" methods, such as methods to iterate over
39 stored cookies, to clear temporary cookies, to perform
40 intelligent auto-saving, etc.
42 * Support `Set-Cookie2' and `Cookie2' headers? Does anyone really
62 /* This should *really* be in a .h file! */
63 time_t http_atotm PARAMS ((const char *));
65 /* Declarations of `struct cookie' and the most basic functions. */
67 /* Cookie jar serves as cookie storage and a means of retrieving
68 cookies efficiently. All cookies with the same domain are stored
69 in a linked list called "chain". A cookie chain can be reached by
70 looking up the domain in the cookie jar's chains_by_domain table.
72 For example, to reach all the cookies under google.com, one must
73 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
74 course, when sending a cookie to `www.google.com', one must search
75 for cookies that belong to either `www.google.com' or `google.com'
76 -- but the point is that the code doesn't need to go through *all*
80 /* Cookie chains indexed by domain. */
81 struct hash_table *chains;
83 int cookie_count; /* number of cookies in the jar. */
86 /* Value set by entry point functions, so that the low-level
87 routines don't need to call time() all the time. */
93 struct cookie_jar *jar = xnew (struct cookie_jar);
94 jar->chains = make_nocase_string_hash_table (0);
95 jar->cookie_count = 0;
100 char *domain; /* domain of the cookie */
101 int port; /* port number */
102 char *path; /* path prefix of the cookie */
104 int secure; /* whether cookie should be
105 transmitted over non-https
107 int domain_exact; /* whether DOMAIN must match as a
110 int permanent; /* whether the cookie should outlive
112 time_t expiry_time; /* time when the cookie expires */
114 int discard_requested; /* whether cookie was created to
115 request discarding another
118 char *attr; /* cookie attribute name */
119 char *value; /* cookie attribute value */
121 struct cookie *next; /* used for chaining of cookies in the
125 #define PORT_ANY (-1)
126 #define COOKIE_EXPIRED_P(c) ((c)->expiry_time != 0 && (c)->expiry_time < cookies_now)
128 /* Allocate and return a new, empty cookie structure. */
130 static struct cookie *
133 struct cookie *cookie = xnew0 (struct cookie);
135 /* Both cookie->permanent and cookie->expiry_time are now 0. By
136 default, we assume that the cookie is non-permanent and valid
137 until the end of the session. */
139 cookie->port = PORT_ANY;
143 /* Deallocate COOKIE and its components. */
146 delete_cookie (struct cookie *cookie)
148 xfree_null (cookie->domain);
149 xfree_null (cookie->path);
150 xfree_null (cookie->attr);
151 xfree_null (cookie->value);
155 /* Functions for storing cookies.
157 All cookies can be reached beginning with jar->chains. The key in
158 that table is the domain name, and the value is a linked list of
159 all cookies from that domain. Every new cookie is placed on the
162 /* Find and return a cookie in JAR whose domain, path, and attribute
163 name correspond to COOKIE. If found, PREVPTR will point to the
164 location of the cookie previous in chain, or NULL if the found
165 cookie is the head of a chain.
167 If no matching cookie is found, return NULL. */
169 static struct cookie *
170 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
171 struct cookie **prevptr)
173 struct cookie *chain, *prev;
175 chain = hash_table_get (jar->chains, cookie->domain);
180 for (; chain; prev = chain, chain = chain->next)
181 if (0 == strcmp (cookie->path, chain->path)
182 && 0 == strcmp (cookie->attr, chain->attr)
183 && cookie->port == chain->port)
194 /* Store COOKIE to the jar.
196 This is done by placing COOKIE at the head of its chain. However,
197 if COOKIE matches a cookie already in memory, as determined by
198 find_matching_cookie, the old cookie is unlinked and destroyed.
200 The key of each chain's hash table entry is allocated only the
201 first time; next hash_table_put's reuse the same key. */
204 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
206 struct cookie *chain_head;
209 if (hash_table_get_pair (jar->chains, cookie->domain,
210 &chain_key, &chain_head))
212 /* A chain of cookies in this domain already exists. Check for
213 duplicates -- if an extant cookie exactly matches our domain,
214 port, path, and name, replace it. */
216 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
220 /* Remove VICTIM from the chain. COOKIE will be placed at
224 prev->next = victim->next;
225 cookie->next = chain_head;
229 /* prev is NULL; apparently VICTIM was at the head of
230 the chain. This place will be taken by COOKIE, so
231 all we need to do is: */
232 cookie->next = victim->next;
234 delete_cookie (victim);
236 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
239 cookie->next = chain_head;
243 /* We are now creating the chain. Use a copy of cookie->domain
244 as the key for the life-time of the chain. Using
245 cookie->domain would be unsafe because the life-time of the
246 chain may exceed the life-time of the cookie. (Cookies may
247 be deleted from the chain by this very function.) */
249 chain_key = xstrdup (cookie->domain);
252 hash_table_put (jar->chains, chain_key, cookie);
255 DEBUGP (("\nStored cookie %s %d%s %s %s %d %s %s %s\n",
256 cookie->domain, cookie->port,
257 cookie->port == PORT_ANY ? " (ANY)" : "",
259 cookie->permanent ? "permanent" : "nonpermanent",
262 ? asctime (localtime (&cookie->expiry_time)) : "<undefined>",
263 cookie->attr, cookie->value));
266 /* Discard a cookie matching COOKIE's domain, port, path, and
267 attribute name. This gets called when we encounter a cookie whose
268 expiry date is in the past, or whose max-age is set to 0. The
269 former corresponds to netscape cookie spec, while the latter is
270 specified by rfc2109. */
273 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
275 struct cookie *prev, *victim;
277 if (!hash_table_count (jar->chains))
278 /* No elements == nothing to discard. */
281 victim = find_matching_cookie (jar, cookie, &prev);
285 /* Simply unchain the victim. */
286 prev->next = victim->next;
289 /* VICTIM was head of its chain. We need to place a new
290 cookie at the head. */
291 char *chain_key = NULL;
294 res = hash_table_get_pair (jar->chains, victim->domain,
299 /* VICTIM was the only cookie in the chain. Destroy the
300 chain and deallocate the chain key. */
301 hash_table_remove (jar->chains, victim->domain);
305 hash_table_put (jar->chains, chain_key, victim->next);
307 delete_cookie (victim);
308 DEBUGP (("Discarded old cookie.\n"));
312 /* Functions for parsing the `Set-Cookie' header, and creating new
313 cookies from the wire. */
315 #define NAME_IS(string_literal) \
316 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
318 #define VALUE_EXISTS (value_b && value_e)
320 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
322 /* Update the appropriate cookie field. [name_b, name_e) are expected
323 to delimit the attribute name, while [value_b, value_e) (optional)
324 should delimit the attribute value.
326 When called the first time, it will set the cookie's attribute name
327 and value. After that, it will check the attribute name for
328 special fields such as `domain', `path', etc. Where appropriate,
329 it will parse the values of the fields it recognizes and fill the
330 corresponding fields in COOKIE.
332 Returns 1 on success. Returns zero in case a syntax error is
333 found; such a cookie should be discarded. */
336 update_cookie_field (struct cookie *cookie,
337 const char *name_b, const char *name_e,
338 const char *value_b, const char *value_e)
340 assert (name_b != NULL && name_e != NULL);
346 cookie->attr = strdupdelim (name_b, name_e);
347 cookie->value = strdupdelim (value_b, value_e);
351 if (NAME_IS ("domain"))
353 if (!VALUE_NON_EMPTY)
355 xfree_null (cookie->domain);
356 /* Strictly speaking, we should set cookie->domain_exact if the
357 domain doesn't begin with a dot. But many sites set the
358 domain to "foo.com" and expect "subhost.foo.com" to get the
359 cookie, and it apparently works. */
362 cookie->domain = strdupdelim (value_b, value_e);
365 else if (NAME_IS ("path"))
367 if (!VALUE_NON_EMPTY)
369 xfree_null (cookie->path);
370 cookie->path = strdupdelim (value_b, value_e);
373 else if (NAME_IS ("expires"))
378 if (!VALUE_NON_EMPTY)
380 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
382 expires = http_atotm (value_copy);
385 cookie->permanent = 1;
386 cookie->expiry_time = (time_t)expires;
389 /* Error in expiration spec. Assume default (cookie valid for
393 /* According to netscape's specification, expiry time in the
394 past means that discarding of a matching cookie is
396 if (cookie->expiry_time < cookies_now)
397 cookie->discard_requested = 1;
401 else if (NAME_IS ("max-age"))
406 if (!VALUE_NON_EMPTY)
408 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
410 sscanf (value_copy, "%lf", &maxage);
412 /* something went wrong. */
414 cookie->permanent = 1;
415 cookie->expiry_time = cookies_now + maxage;
417 /* According to rfc2109, a cookie with max-age of 0 means that
418 discarding of a matching cookie is requested. */
420 cookie->discard_requested = 1;
424 else if (NAME_IS ("secure"))
426 /* ignore value completely */
431 /* Unrecognized attribute; ignore it. */
437 /* Returns non-zero for characters that are legal in the name of an
438 attribute. This used to allow only alphanumerics, '-', and '_',
439 but we need to be more lenient because a number of sites wants to
440 use weirder attribute names. rfc2965 "informally specifies"
441 attribute name (token) as "a sequence of non-special, non-white
442 space characters". So we allow everything except the stuff we know
445 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
446 && (c) != '"' && (c) != '=' \
447 && (c) != ';' && (c) != ',')
449 /* Parse the contents of the `Set-Cookie' header. The header looks
452 name1=value1; name2=value2; ...
454 Trailing semicolon is optional; spaces are allowed between all
455 tokens. Additionally, values may be quoted.
457 A new cookie is returned upon success, NULL otherwise. The
458 specified CALLBACK function (normally `update_cookie_field' is used
459 to update the fields of the newly created cookie structure. */
461 static struct cookie *
462 parse_set_cookies (const char *sc,
463 int (*callback) (struct cookie *,
464 const char *, const char *,
465 const char *, const char *),
468 struct cookie *cookie = cookie_new ();
470 /* #### Hand-written DFAs are no fun to debug. We'de be better off
471 to rewrite this as an inline parser. */
473 enum { S_START, S_NAME, S_NAME_POST,
474 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
475 S_ATTR_ACTION, S_DONE, S_ERROR
481 const char *name_b = NULL, *name_e = NULL;
482 const char *value_b = NULL, *value_e = NULL;
486 while (state != S_DONE && state != S_ERROR)
493 else if (ISSPACE (c))
494 /* Strip all whitespace preceding the name. */
496 else if (ATTR_NAME_CHAR (c))
502 /* empty attr name not allowed */
506 if (!c || c == ';' || c == '=' || ISSPACE (c))
511 else if (ATTR_NAME_CHAR (c))
519 value_b = value_e = NULL;
522 state = S_ATTR_ACTION;
529 else if (ISSPACE (c))
530 /* Ignore space and keep the state. */
538 value_b = value_e = p;
541 state = S_ATTR_ACTION;
547 state = S_QUOTED_VALUE;
549 else if (ISSPACE (c))
559 if (!c || c == ';' || ISSPACE (c))
562 state = S_VALUE_TRAILSPACE;
566 value_e = NULL; /* no trailing space */
575 state = S_VALUE_TRAILSPACE;
582 case S_VALUE_TRAILSPACE:
586 state = S_ATTR_ACTION;
589 state = S_ATTR_ACTION;
590 else if (ISSPACE (c))
597 int legal = callback (cookie, name_b, name_e, value_b, value_e);
603 BOUNDED_TO_ALLOCA (name_b, name_e, name);
604 logprintf (LOG_NOTQUIET,
605 _("Error in Set-Cookie, field `%s'"), name);
615 /* handled by loop condition */
622 delete_cookie (cookie);
623 if (state != S_ERROR)
627 logprintf (LOG_NOTQUIET,
628 _("Syntax error in Set-Cookie: %s at position %d.\n"),
633 /* Sanity checks. These are important, otherwise it is possible for
634 mailcious attackers to destroy important cookie information and/or
635 violate your privacy. */
638 #define REQUIRE_DIGITS(p) do { \
641 for (++p; ISDIGIT (*p); p++) \
645 #define REQUIRE_DOT(p) do { \
650 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
652 We don't want to call network functions like inet_addr() because all
653 we need is a check, preferrably one that is small, fast, and
657 numeric_address_p (const char *addr)
659 const char *p = addr;
661 REQUIRE_DIGITS (p); /* A */
662 REQUIRE_DOT (p); /* . */
663 REQUIRE_DIGITS (p); /* B */
664 REQUIRE_DOT (p); /* . */
665 REQUIRE_DIGITS (p); /* C */
666 REQUIRE_DOT (p); /* . */
667 REQUIRE_DIGITS (p); /* D */
674 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
675 Originally I tried to make the check compliant with rfc2109, but
676 the sites deviated too often, so I had to fall back to "tail
677 matching", as defined by the original Netscape's cookie spec. */
680 check_domain_match (const char *cookie_domain, const char *host)
684 /* Numeric address requires exact match. It also requires HOST to
686 if (numeric_address_p (cookie_domain))
687 return 0 == strcmp (cookie_domain, host);
691 /* For the sake of efficiency, check for exact match first. */
692 if (0 == strcasecmp (cookie_domain, host))
697 /* HOST must match the tail of cookie_domain. */
698 if (!match_tail (host, cookie_domain, 1))
701 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
702 make sure that somebody is not trying to set the cookie for a
703 subdomain shared by many entities. For example, "company.co.uk"
704 must not be allowed to set a cookie for ".co.uk". On the other
705 hand, "sso.redhat.de" should be able to set a cookie for
708 The only marginally sane way to handle this I can think of is to
709 reject on the basis of the length of the second-level domain name
710 (but when the top-level domain is unknown), with the assumption
711 that those of three or less characters could be reserved. For
714 .co.org -> works because the TLD is known
715 .co.uk -> doesn't work because "co" is only two chars long
716 .com.au -> doesn't work because "com" is only 3 chars long
717 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
718 .cnn.de -> doesn't work for the same reason (ugh!!)
719 .abcd.de -> works because "abcd" is 4 chars long
720 .img.cnn.de -> works because it's not trying to set the 2nd level domain
721 .cnn.co.uk -> works for the same reason
723 That should prevent misuse, while allowing reasonable usage. If
724 someone knows of a better way to handle this, please let me
727 const char *p = cookie_domain;
728 int dccount = 1; /* number of domain components */
729 int ldcl = 0; /* last domain component length */
730 int nldcl = 0; /* next to last domain component length */
733 /* Ignore leading period in this calculation. */
736 for (out = 0; !out; p++)
744 /* Empty domain component found -- the domain is invalid. */
746 if (*(p + 1) == '\0')
748 /* Tolerate trailing '.' by not treating the domain as
749 one ending with an empty domain component. */
771 int known_toplevel = 0;
772 static char *known_toplevel_domains[] = {
773 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
775 for (i = 0; i < countof (known_toplevel_domains); i++)
776 if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
781 if (!known_toplevel && nldcl <= 3)
788 /* Don't allow the host "foobar.com" to set a cookie for domain
790 if (*cookie_domain != '.')
792 int dlen = strlen (cookie_domain);
793 int hlen = strlen (host);
794 /* cookie host: hostname.foobar.com */
795 /* desired domain: bar.com */
796 /* '.' must be here in host-> ^ */
797 if (hlen > dlen && host[hlen - dlen - 1] != '.')
806 static int path_matches PARAMS ((const char *, const char *));
808 /* Check whether PATH begins with COOKIE_PATH. */
811 check_path_match (const char *cookie_path, const char *path)
813 return path_matches (path, cookie_path);
816 /* Process the HTTP `Set-Cookie' header. This results in storing the
817 cookie or discarding a matching one, or ignoring it completely, all
818 depending on the contents. */
821 cookie_jar_process_set_cookie (struct cookie_jar *jar,
822 const char *host, int port,
823 const char *path, const char *set_cookie)
825 struct cookie *cookie;
826 cookies_now = time (NULL);
828 cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
832 /* Sanitize parts of cookie. */
837 cookie->domain = xstrdup (host);
842 if (!check_domain_match (cookie->domain, host))
844 logprintf (LOG_NOTQUIET,
845 "Cookie coming from %s attempted to set domain to %s\n",
846 host, cookie->domain);
847 xfree (cookie->domain);
853 cookie->path = xstrdup (path);
856 if (!check_path_match (cookie->path, path))
858 DEBUGP (("Attempt to fake the path: %s, %s\n",
859 cookie->path, path));
864 if (cookie->discard_requested)
866 discard_matching_cookie (jar, cookie);
870 store_cookie (jar, cookie);
875 delete_cookie (cookie);
878 /* Support for sending out cookies in HTTP requests, based on
879 previously stored cookies. Entry point is
880 `build_cookies_request'. */
882 /* Find the cookie chains whose domains match HOST and store them to
885 A cookie chain is the head of a list of cookies that belong to a
886 host/domain. Given HOST "img.search.xemacs.org", this function
887 will return the chains for "img.search.xemacs.org",
888 "search.xemacs.org", and "xemacs.org" -- those of them that exist
891 DEST should be large enough to accept (in the worst case) as many
892 elements as there are domain components of HOST. */
895 find_chains_of_host (struct cookie_jar *jar, const char *host,
896 struct cookie *dest[])
901 /* Bail out quickly if there are no cookies in the jar. */
902 if (!hash_table_count (jar->chains))
905 if (numeric_address_p (host))
906 /* If host is an IP address, only check for the exact match. */
909 /* Otherwise, check all the subdomains except the top-level (last)
910 one. As a domain with N components has N-1 dots, the number of
911 passes equals the number of dots. */
912 passes = count_char (host, '.');
916 /* Find chains that match HOST, starting with exact match and
917 progressing to less specific domains. For instance, given HOST
918 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
919 srk.fer.hr's, then fer.hr's. */
922 struct cookie *chain = hash_table_get (jar->chains, host);
924 dest[dest_count++] = chain;
925 if (++passcnt >= passes)
927 host = strchr (host, '.') + 1;
933 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
937 path_matches (const char *full_path, const char *prefix)
942 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
943 as a mere separator, inspired by rfc1808), but the '/' is
944 assumed when matching against the cookie stuff. */
948 len = strlen (prefix);
950 if (0 != strncmp (full_path, prefix, len))
951 /* FULL_PATH doesn't begin with PREFIX. */
954 /* Length of PREFIX determines the quality of the match. */
958 /* Return non-zero iff COOKIE matches the provided parameters of the
959 URL being downloaded: HOST, PORT, PATH, and SECFLAG.
961 If PATH_GOODNESS is non-NULL, store the "path goodness" value
962 there. That value is a measure of how closely COOKIE matches PATH,
963 used for ordering cookies. */
966 cookie_matches_url (const struct cookie *cookie,
967 const char *host, int port, const char *path,
968 int secflag, int *path_goodness)
972 if (COOKIE_EXPIRED_P (cookie))
973 /* Ignore stale cookies. Don't bother unchaining the cookie at
974 this point -- Wget is a relatively short-lived application, and
975 stale cookies will not be saved by `save_cookies'. On the
976 other hand, this function should be as efficient as
980 if (cookie->secure && !secflag)
981 /* Don't transmit secure cookies over insecure connections. */
983 if (cookie->port != PORT_ANY && cookie->port != port)
986 /* If exact domain match is required, verify that cookie's domain is
987 equal to HOST. If not, assume success on the grounds of the
988 cookie's chain having been found by find_chains_of_host. */
989 if (cookie->domain_exact
990 && 0 != strcasecmp (host, cookie->domain))
993 pg = path_matches (path, cookie->path);
998 /* If the caller requested path_goodness, we return it. This is
999 an optimization, so that the caller doesn't need to call
1000 path_matches() again. */
1001 *path_goodness = pg;
1005 /* A structure that points to a cookie, along with the additional
1006 information about the cookie's "goodness". This allows us to sort
1007 the cookies when returning them to the server, as required by the
1010 struct weighed_cookie {
1011 struct cookie *cookie;
1012 int domain_goodness;
1016 /* Comparator used for uniquifying the list. */
1019 equality_comparator (const void *p1, const void *p2)
1021 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1022 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1024 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1025 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1027 /* We only really care whether both name and value are equal. We
1028 return them in this order only for consistency... */
1029 return namecmp ? namecmp : valuecmp;
1032 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1033 cookies with the same attr name and value. Whenever a duplicate
1034 pair is found, one of the cookies is removed. */
1037 eliminate_dups (struct weighed_cookie *outgoing, int count)
1039 struct weighed_cookie *h; /* hare */
1040 struct weighed_cookie *t; /* tortoise */
1041 struct weighed_cookie *end = outgoing + count;
1043 /* We deploy a simple uniquify algorithm: first sort the array
1044 according to our sort criteria, then copy it to itself, comparing
1045 each cookie to its neighbor and ignoring the duplicates. */
1047 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1049 /* "Hare" runs through all the entries in the array, followed by
1050 "tortoise". If a duplicate is found, the hare skips it.
1051 Non-duplicate entries are copied to the tortoise ptr. */
1053 for (h = t = outgoing; h < end; h++)
1057 struct cookie *c0 = h[0].cookie;
1058 struct cookie *c1 = h[1].cookie;
1059 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1060 continue; /* ignore the duplicate */
1063 /* If the hare has advanced past the tortoise (because of
1064 previous dups), make sure the values get copied. Otherwise,
1065 no copying is necessary. */
1071 return t - outgoing;
1074 /* Comparator used for sorting by quality. */
1077 goodness_comparator (const void *p1, const void *p2)
1079 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1080 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1082 /* Subtractions take `wc2' as the first argument becauase we want a
1083 sort in *decreasing* order of goodness. */
1084 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1085 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1087 /* Sort by domain goodness; if these are the same, sort by path
1088 goodness. (The sorting order isn't really specified; maybe it
1089 should be the other way around.) */
1090 return dgdiff ? dgdiff : pgdiff;
1093 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1094 requests PATH from the server. The resulting string is allocated
1095 with `malloc', and the caller is responsible for freeing it. If no
1096 cookies pertain to this request, i.e. no cookie header should be
1097 generated, NULL is returned. */
1100 cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
1101 int port, const char *path,
1102 int connection_secure_p)
1104 struct cookie **chains;
1107 struct cookie *cookie;
1108 struct weighed_cookie *outgoing;
1111 int result_size, pos;
1113 /* First, find the cookie chains whose domains match HOST. */
1115 /* Allocate room for find_chains_of_host to write to. The number of
1116 chains can at most equal the number of subdomains, hence
1117 1+<number of dots>. */
1118 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
1119 chain_count = find_chains_of_host (jar, host, chains);
1121 /* No cookies for this host. */
1125 cookies_now = time (NULL);
1127 /* Now extract from the chains those cookies that match our host
1128 (for domain_exact cookies), port (for cookies with port other
1129 than PORT_ANY), etc. See matching_cookie for details. */
1131 /* Count the number of matching cookies. */
1133 for (i = 0; i < chain_count; i++)
1134 for (cookie = chains[i]; cookie; cookie = cookie->next)
1135 if (cookie_matches_url (cookie, host, port, path, connection_secure_p,
1139 return NULL; /* no cookies matched */
1141 /* Allocate the array. */
1142 outgoing = alloca_array (struct weighed_cookie, count);
1144 /* Fill the array with all the matching cookies from the chains that
1147 for (i = 0; i < chain_count; i++)
1148 for (cookie = chains[i]; cookie; cookie = cookie->next)
1151 if (!cookie_matches_url (cookie, host, port, path,
1152 connection_secure_p, &pg))
1154 outgoing[ocnt].cookie = cookie;
1155 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1156 outgoing[ocnt].path_goodness = pg;
1159 assert (ocnt == count);
1161 /* Eliminate duplicate cookies; that is, those whose name and value
1163 count = eliminate_dups (outgoing, count);
1165 /* Sort the array so that best-matching domains come first, and
1166 that, within one domain, best-matching paths come first. */
1167 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1169 /* Count the space the name=value pairs will take. */
1171 for (i = 0; i < count; i++)
1173 struct cookie *c = outgoing[i].cookie;
1175 result_size += strlen (c->attr) + 1 + strlen (c->value);
1178 /* Allocate output buffer:
1180 name=value pairs -- result_size
1181 "; " separators -- (count - 1) * 2
1182 \r\n line ending -- 2
1183 \0 terminator -- 1 */
1184 result_size = 8 + result_size + (count - 1) * 2 + 2 + 1;
1185 result = xmalloc (result_size);
1187 strcpy (result, "Cookie: ");
1189 for (i = 0; i < count; i++)
1191 struct cookie *c = outgoing[i].cookie;
1192 int namlen = strlen (c->attr);
1193 int vallen = strlen (c->value);
1195 memcpy (result + pos, c->attr, namlen);
1197 result[pos++] = '=';
1198 memcpy (result + pos, c->value, vallen);
1202 result[pos++] = ';';
1203 result[pos++] = ' ';
1206 result[pos++] = '\r';
1207 result[pos++] = '\n';
1208 result[pos++] = '\0';
1209 assert (pos == result_size);
1213 /* Support for loading and saving cookies. The format used for
1214 loading and saving should be the format of the `cookies.txt' file
1215 used by Netscape and Mozilla, at least the Unix versions.
1216 (Apparently IE can export cookies in that format as well.) The
1217 format goes like this:
1219 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1221 DOMAIN -- cookie domain, optionally followed by :PORT
1222 DOMAIN-FLAG -- whether all hosts in the domain match
1224 SECURE-FLAG -- whether cookie requires secure connection
1225 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1226 ATTR-NAME -- name of the cookie attribute
1227 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1229 The fields are separated by TABs. All fields are mandatory, except
1230 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1231 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1232 whitespace only, and comment lines (beginning with # optionally
1233 preceded by whitespace) are ignored.
1235 Example line from cookies.txt (split in two lines for readability):
1237 .google.com TRUE / FALSE 2147368447 \
1238 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1242 /* If the region [B, E) ends with :<digits>, parse the number, return
1243 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1244 If port is not specified, return 0. */
1247 domain_port (const char *domain_b, const char *domain_e,
1248 const char **domain_e_ptr)
1252 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1255 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1256 port = 10 * port + (*p - '0');
1258 /* Garbage following port number. */
1260 *domain_e_ptr = colon;
1264 #define GET_WORD(p, b, e) do { \
1266 while (*p && *p != '\t') \
1269 if (b == e || !*p) \
1274 /* Load cookies from FILE. */
1277 cookie_jar_load (struct cookie_jar *jar, const char *file)
1280 FILE *fp = fopen (file, "r");
1283 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1284 file, strerror (errno));
1287 cookies_now = time (NULL);
1289 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1291 struct cookie *cookie;
1297 char *domain_b = NULL, *domain_e = NULL;
1298 char *domflag_b = NULL, *domflag_e = NULL;
1299 char *path_b = NULL, *path_e = NULL;
1300 char *secure_b = NULL, *secure_e = NULL;
1301 char *expires_b = NULL, *expires_e = NULL;
1302 char *name_b = NULL, *name_e = NULL;
1303 char *value_b = NULL, *value_e = NULL;
1305 /* Skip leading white-space. */
1306 while (*p && ISSPACE (*p))
1308 /* Ignore empty lines. */
1309 if (!*p || *p == '#')
1312 GET_WORD (p, domain_b, domain_e);
1313 GET_WORD (p, domflag_b, domflag_e);
1314 GET_WORD (p, path_b, path_e);
1315 GET_WORD (p, secure_b, secure_e);
1316 GET_WORD (p, expires_b, expires_e);
1317 GET_WORD (p, name_b, name_e);
1319 /* Don't use GET_WORD for value because it ends with newline,
1322 value_e = p + strlen (p);
1323 if (value_e > value_b && value_e[-1] == '\n')
1325 if (value_e > value_b && value_e[-1] == '\r')
1327 /* Empty values are legal (I think), so don't bother checking. */
1329 cookie = cookie_new ();
1331 cookie->attr = strdupdelim (name_b, name_e);
1332 cookie->value = strdupdelim (value_b, value_e);
1333 cookie->path = strdupdelim (path_b, path_e);
1334 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1336 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1337 value indicating if all machines within a given domain can
1338 access the variable. This value is set automatically by the
1339 browser, depending on the value set for the domain." */
1340 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1342 /* DOMAIN needs special treatment because we might need to
1343 extract the port. */
1344 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1346 cookie->port = port;
1348 if (*domain_b == '.')
1349 ++domain_b; /* remove leading dot internally */
1350 cookie->domain = strdupdelim (domain_b, domain_e);
1352 /* safe default in case EXPIRES field is garbled. */
1353 expiry = (double)cookies_now - 1;
1355 /* I don't like changing the line, but it's safe here. (line is
1358 sscanf (expires_b, "%lf", &expiry);
1359 if (expiry < cookies_now)
1360 /* ignore stale cookie. */
1362 cookie->expiry_time = expiry;
1364 /* If the cookie has survived being saved into an external file,
1365 it is obviously permanent. */
1366 cookie->permanent = 1;
1368 store_cookie (jar, cookie);
1374 delete_cookie (cookie);
1379 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1380 to the head in a chain of cookies. The function prints the entire
1384 save_cookies_mapper (void *key, void *value, void *arg)
1386 FILE *fp = (FILE *)arg;
1387 char *domain = (char *)key;
1388 struct cookie *cookie = (struct cookie *)value;
1389 for (; cookie; cookie = cookie->next)
1391 if (!cookie->permanent)
1393 if (COOKIE_EXPIRED_P (cookie))
1395 if (!cookie->domain_exact)
1398 if (cookie->port != PORT_ANY)
1399 fprintf (fp, ":%d", cookie->port);
1400 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1401 cookie->domain_exact ? "FALSE" : "TRUE",
1402 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1403 (double)cookie->expiry_time,
1404 cookie->attr, cookie->value);
1406 return 1; /* stop mapping */
1411 /* Save cookies, in format described above, to FILE. */
1414 cookie_jar_save (struct cookie_jar *jar, const char *file)
1418 DEBUGP (("Saving cookies to %s.\n", file));
1420 cookies_now = time (NULL);
1422 fp = fopen (file, "w");
1425 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1426 file, strerror (errno));
1430 fputs ("# HTTP cookie file.\n", fp);
1431 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (NULL));
1432 fputs ("# Edit at your own risk.\n\n", fp);
1434 hash_table_map (jar->chains, save_cookies_mapper, fp);
1437 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1438 file, strerror (errno));
1440 if (fclose (fp) < 0)
1441 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1442 file, strerror (errno));
1444 DEBUGP (("Done saving cookies.\n"));
1447 /* Destroy all the elements in the chain and unhook it from the cookie
1448 jar. This is written in the form of a callback to hash_table_map
1449 and used by cookie_jar_delete to delete all the cookies in a
1453 nuke_cookie_chain (void *value, void *key, void *arg)
1455 char *chain_key = (char *)value;
1456 struct cookie *chain = (struct cookie *)key;
1457 struct cookie_jar *jar = (struct cookie_jar *)arg;
1459 /* Remove the chain from the table and free the key. */
1460 hash_table_remove (jar->chains, chain_key);
1463 /* Then delete all the cookies in the chain. */
1466 struct cookie *next = chain->next;
1467 delete_cookie (chain);
1475 /* Clean up cookie-related data. */
1478 cookie_jar_delete (struct cookie_jar *jar)
1480 hash_table_map (jar->chains, nuke_cookie_chain, jar);
1481 hash_table_destroy (jar->chains);
1485 /* Test cases. Currently this is only tests parse_set_cookies. To
1486 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1491 char *test_results[10];
1493 static int test_parse_cookies_callback (struct cookie *ignored,
1494 const char *nb, const char *ne,
1495 const char *vb, const char *ve)
1497 test_results[test_count++] = strdupdelim (nb, ne);
1498 test_results[test_count++] = strdupdelim (vb, ve);
1505 /* Tests expected to succeed: */
1511 { "arg=value", {"arg", "value", NULL} },
1512 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1513 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1514 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1515 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1516 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1517 { "arg=", {"arg", "", NULL} },
1518 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1519 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1522 /* Tests expected to fail: */
1523 static char *tests_fail[] = {
1525 "arg=\"unterminated",
1527 "arg1=;=another-empty-name",
1531 for (i = 0; i < countof (tests_succ); i++)
1534 char *data = tests_succ[i].data;
1535 char **expected = tests_succ[i].results;
1539 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1542 printf ("NULL cookie returned for valid data: %s\n", data);
1546 for (ind = 0; ind < test_count; ind += 2)
1550 if (0 != strcmp (expected[ind], test_results[ind]))
1551 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1552 ind / 2 + 1, data, expected[ind], test_results[ind]);
1553 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1554 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1555 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1557 if (ind < test_count || expected[ind])
1558 printf ("Unmatched number of results: %s\n", data);
1561 for (i = 0; i < countof (tests_fail); i++)
1564 char *data = tests_fail[i];
1566 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1568 printf ("Failed to report error on invalid data: %s\n", data);
1571 #endif /* TEST_COOKIES */