1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
31 code submitted by Tomasz Wegrzanowski.
33 Ideas for future work:
35 * Implement limits on cookie-related sizes, such as max. cookie
36 size, max. number of cookies, etc.
38 * Add more "cookie jar" methods, such as methods to iterate over
39 stored cookies, to clear temporary cookies, to perform
40 intelligent auto-saving, etc.
42 * Support `Set-Cookie2' and `Cookie2' headers? Does anyone really
62 /* This should *really* be in a .h file! */
63 time_t http_atotm PARAMS ((const char *));
65 /* Declarations of `struct cookie' and the most basic functions. */
67 /* Cookie jar serves as cookie storage and a means of retrieving
68 cookies efficiently. All cookies with the same domain are stored
69 in a linked list called "chain". A cookie chain can be reached by
70 looking up the domain in the cookie jar's chains_by_domain table.
72 For example, to reach all the cookies under google.com, one must
73 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
74 course, when sending a cookie to `www.google.com', one must search
75 for cookies that belong to either `www.google.com' or `google.com'
76 -- but the point is that the code doesn't need to go through *all*
80 /* Cookie chains indexed by domain. */
81 struct hash_table *chains;
83 int cookie_count; /* number of cookies in the jar. */
86 /* Value set by entry point functions, so that the low-level
87 routines don't need to call time() all the time. */
93 struct cookie_jar *jar = xnew (struct cookie_jar);
94 jar->chains = make_nocase_string_hash_table (0);
95 jar->cookie_count = 0;
100 char *domain; /* domain of the cookie */
101 int port; /* port number */
102 char *path; /* path prefix of the cookie */
104 int secure; /* whether cookie should be
105 transmitted over non-https
107 int domain_exact; /* whether DOMAIN must match as a
110 int permanent; /* whether the cookie should outlive
112 time_t expiry_time; /* time when the cookie expires, 0
113 means undetermined. */
115 int discard_requested; /* whether cookie was created to
116 request discarding another
119 char *attr; /* cookie attribute name */
120 char *value; /* cookie attribute value */
122 struct cookie *next; /* used for chaining of cookies in the
126 #define PORT_ANY (-1)
128 /* Allocate and return a new, empty cookie structure. */
130 static struct cookie *
133 struct cookie *cookie = xnew0 (struct cookie);
135 /* Both cookie->permanent and cookie->expiry_time are now 0. This
136 means that the cookie doesn't expire, but is only valid for this
137 session (i.e. not written out to disk). */
139 cookie->port = PORT_ANY;
143 /* Non-zero if the cookie has expired. Assumes cookies_now has been
144 set by one of the entry point functions. */
147 cookie_expired_p (const struct cookie *c)
149 return c->expiry_time != 0 && c->expiry_time < cookies_now;
152 /* Deallocate COOKIE and its components. */
155 delete_cookie (struct cookie *cookie)
157 xfree_null (cookie->domain);
158 xfree_null (cookie->path);
159 xfree_null (cookie->attr);
160 xfree_null (cookie->value);
164 /* Functions for storing cookies.
166 All cookies can be reached beginning with jar->chains. The key in
167 that table is the domain name, and the value is a linked list of
168 all cookies from that domain. Every new cookie is placed on the
171 /* Find and return a cookie in JAR whose domain, path, and attribute
172 name correspond to COOKIE. If found, PREVPTR will point to the
173 location of the cookie previous in chain, or NULL if the found
174 cookie is the head of a chain.
176 If no matching cookie is found, return NULL. */
178 static struct cookie *
179 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
180 struct cookie **prevptr)
182 struct cookie *chain, *prev;
184 chain = hash_table_get (jar->chains, cookie->domain);
189 for (; chain; prev = chain, chain = chain->next)
190 if (0 == strcmp (cookie->path, chain->path)
191 && 0 == strcmp (cookie->attr, chain->attr)
192 && cookie->port == chain->port)
203 /* Store COOKIE to the jar.
205 This is done by placing COOKIE at the head of its chain. However,
206 if COOKIE matches a cookie already in memory, as determined by
207 find_matching_cookie, the old cookie is unlinked and destroyed.
209 The key of each chain's hash table entry is allocated only the
210 first time; next hash_table_put's reuse the same key. */
213 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
215 struct cookie *chain_head;
218 if (hash_table_get_pair (jar->chains, cookie->domain,
219 &chain_key, &chain_head))
221 /* A chain of cookies in this domain already exists. Check for
222 duplicates -- if an extant cookie exactly matches our domain,
223 port, path, and name, replace it. */
225 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
229 /* Remove VICTIM from the chain. COOKIE will be placed at
233 prev->next = victim->next;
234 cookie->next = chain_head;
238 /* prev is NULL; apparently VICTIM was at the head of
239 the chain. This place will be taken by COOKIE, so
240 all we need to do is: */
241 cookie->next = victim->next;
243 delete_cookie (victim);
245 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
248 cookie->next = chain_head;
252 /* We are now creating the chain. Use a copy of cookie->domain
253 as the key for the life-time of the chain. Using
254 cookie->domain would be unsafe because the life-time of the
255 chain may exceed the life-time of the cookie. (Cookies may
256 be deleted from the chain by this very function.) */
258 chain_key = xstrdup (cookie->domain);
261 hash_table_put (jar->chains, chain_key, cookie);
267 time_t exptime = cookie->expiry_time;
268 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
269 cookie->domain, cookie->port,
270 cookie->port == PORT_ANY ? " (ANY)" : "",
272 cookie->permanent ? "permanent" : "session",
273 cookie->secure ? "secure" : "insecure",
274 cookie->expiry_time ? datetime_str (&exptime) : "none",
275 cookie->attr, cookie->value));
280 /* Discard a cookie matching COOKIE's domain, port, path, and
281 attribute name. This gets called when we encounter a cookie whose
282 expiry date is in the past, or whose max-age is set to 0. The
283 former corresponds to netscape cookie spec, while the latter is
284 specified by rfc2109. */
287 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
289 struct cookie *prev, *victim;
291 if (!hash_table_count (jar->chains))
292 /* No elements == nothing to discard. */
295 victim = find_matching_cookie (jar, cookie, &prev);
299 /* Simply unchain the victim. */
300 prev->next = victim->next;
303 /* VICTIM was head of its chain. We need to place a new
304 cookie at the head. */
305 char *chain_key = NULL;
308 res = hash_table_get_pair (jar->chains, victim->domain,
313 /* VICTIM was the only cookie in the chain. Destroy the
314 chain and deallocate the chain key. */
315 hash_table_remove (jar->chains, victim->domain);
319 hash_table_put (jar->chains, chain_key, victim->next);
321 delete_cookie (victim);
322 DEBUGP (("Discarded old cookie.\n"));
326 /* Functions for parsing the `Set-Cookie' header, and creating new
327 cookies from the wire. */
329 #define NAME_IS(string_literal) \
330 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
332 #define VALUE_EXISTS (value_b && value_e)
334 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
336 /* Update the appropriate cookie field. [name_b, name_e) are expected
337 to delimit the attribute name, while [value_b, value_e) (optional)
338 should delimit the attribute value.
340 When called the first time, it will set the cookie's attribute name
341 and value. After that, it will check the attribute name for
342 special fields such as `domain', `path', etc. Where appropriate,
343 it will parse the values of the fields it recognizes and fill the
344 corresponding fields in COOKIE.
346 Returns 1 on success. Returns zero in case a syntax error is
347 found; such a cookie should be discarded. */
350 update_cookie_field (struct cookie *cookie,
351 const char *name_b, const char *name_e,
352 const char *value_b, const char *value_e)
354 assert (name_b != NULL && name_e != NULL);
360 cookie->attr = strdupdelim (name_b, name_e);
361 cookie->value = strdupdelim (value_b, value_e);
365 if (NAME_IS ("domain"))
367 if (!VALUE_NON_EMPTY)
369 xfree_null (cookie->domain);
370 /* Strictly speaking, we should set cookie->domain_exact if the
371 domain doesn't begin with a dot. But many sites set the
372 domain to "foo.com" and expect "subhost.foo.com" to get the
373 cookie, and it apparently works. */
376 cookie->domain = strdupdelim (value_b, value_e);
379 else if (NAME_IS ("path"))
381 if (!VALUE_NON_EMPTY)
383 xfree_null (cookie->path);
384 cookie->path = strdupdelim (value_b, value_e);
387 else if (NAME_IS ("expires"))
392 if (!VALUE_NON_EMPTY)
394 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
396 expires = http_atotm (value_copy);
397 if (expires != (time_t) -1)
399 cookie->permanent = 1;
400 cookie->expiry_time = expires;
403 /* Error in expiration spec. Assume default (cookie doesn't
404 expire, but valid only for this session.) */
407 /* According to netscape's specification, expiry time in the
408 past means that discarding of a matching cookie is
410 if (cookie->expiry_time < cookies_now)
411 cookie->discard_requested = 1;
415 else if (NAME_IS ("max-age"))
420 if (!VALUE_NON_EMPTY)
422 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
424 sscanf (value_copy, "%lf", &maxage);
426 /* something went wrong. */
428 cookie->permanent = 1;
429 cookie->expiry_time = cookies_now + maxage;
431 /* According to rfc2109, a cookie with max-age of 0 means that
432 discarding of a matching cookie is requested. */
434 cookie->discard_requested = 1;
438 else if (NAME_IS ("secure"))
440 /* ignore value completely */
445 /* Unrecognized attribute; ignore it. */
451 /* Returns non-zero for characters that are legal in the name of an
452 attribute. This used to allow only alphanumerics, '-', and '_',
453 but we need to be more lenient because a number of sites wants to
454 use weirder attribute names. rfc2965 "informally specifies"
455 attribute name (token) as "a sequence of non-special, non-white
456 space characters". So we allow everything except the stuff we know
459 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
460 && (c) != '"' && (c) != '=' \
461 && (c) != ';' && (c) != ',')
463 /* Parse the contents of the `Set-Cookie' header. The header looks
466 name1=value1; name2=value2; ...
468 Trailing semicolon is optional; spaces are allowed between all
469 tokens. Additionally, values may be quoted.
471 A new cookie is returned upon success, NULL otherwise. The
472 specified CALLBACK function (normally `update_cookie_field' is used
473 to update the fields of the newly created cookie structure. */
475 static struct cookie *
476 parse_set_cookies (const char *sc,
477 int (*callback) (struct cookie *,
478 const char *, const char *,
479 const char *, const char *),
482 struct cookie *cookie = cookie_new ();
484 /* #### Hand-written DFAs are no fun to debug. We'de be better off
485 to rewrite this as an inline parser. */
487 enum { S_START, S_NAME, S_NAME_POST,
488 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
489 S_ATTR_ACTION, S_DONE, S_ERROR
495 const char *name_b = NULL, *name_e = NULL;
496 const char *value_b = NULL, *value_e = NULL;
500 while (state != S_DONE && state != S_ERROR)
507 else if (ISSPACE (c))
508 /* Strip all whitespace preceding the name. */
510 else if (ATTR_NAME_CHAR (c))
516 /* empty attr name not allowed */
520 if (!c || c == ';' || c == '=' || ISSPACE (c))
525 else if (ATTR_NAME_CHAR (c))
533 value_b = value_e = NULL;
536 state = S_ATTR_ACTION;
543 else if (ISSPACE (c))
544 /* Ignore space and keep the state. */
552 value_b = value_e = p;
555 state = S_ATTR_ACTION;
561 state = S_QUOTED_VALUE;
563 else if (ISSPACE (c))
573 if (!c || c == ';' || ISSPACE (c))
576 state = S_VALUE_TRAILSPACE;
580 value_e = NULL; /* no trailing space */
589 state = S_VALUE_TRAILSPACE;
596 case S_VALUE_TRAILSPACE:
600 state = S_ATTR_ACTION;
603 state = S_ATTR_ACTION;
604 else if (ISSPACE (c))
611 int legal = callback (cookie, name_b, name_e, value_b, value_e);
617 BOUNDED_TO_ALLOCA (name_b, name_e, name);
618 logprintf (LOG_NOTQUIET,
619 _("Error in Set-Cookie, field `%s'"),
630 /* handled by loop condition */
637 delete_cookie (cookie);
638 if (state != S_ERROR)
642 logprintf (LOG_NOTQUIET,
643 _("Syntax error in Set-Cookie: %s at position %d.\n"),
644 escnonprint (sc), p - sc);
648 /* Sanity checks. These are important, otherwise it is possible for
649 mailcious attackers to destroy important cookie information and/or
650 violate your privacy. */
653 #define REQUIRE_DIGITS(p) do { \
656 for (++p; ISDIGIT (*p); p++) \
660 #define REQUIRE_DOT(p) do { \
665 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
667 We don't want to call network functions like inet_addr() because
668 all we need is a check, preferrably one that is small, fast, and
672 numeric_address_p (const char *addr)
674 const char *p = addr;
676 REQUIRE_DIGITS (p); /* A */
677 REQUIRE_DOT (p); /* . */
678 REQUIRE_DIGITS (p); /* B */
679 REQUIRE_DOT (p); /* . */
680 REQUIRE_DIGITS (p); /* C */
681 REQUIRE_DOT (p); /* . */
682 REQUIRE_DIGITS (p); /* D */
689 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
690 Originally I tried to make the check compliant with rfc2109, but
691 the sites deviated too often, so I had to fall back to "tail
692 matching", as defined by the original Netscape's cookie spec. */
695 check_domain_match (const char *cookie_domain, const char *host)
699 /* Numeric address requires exact match. It also requires HOST to
701 if (numeric_address_p (cookie_domain))
702 return 0 == strcmp (cookie_domain, host);
706 /* For the sake of efficiency, check for exact match first. */
707 if (0 == strcasecmp (cookie_domain, host))
712 /* HOST must match the tail of cookie_domain. */
713 if (!match_tail (host, cookie_domain, 1))
716 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
717 make sure that somebody is not trying to set the cookie for a
718 subdomain shared by many entities. For example, "company.co.uk"
719 must not be allowed to set a cookie for ".co.uk". On the other
720 hand, "sso.redhat.de" should be able to set a cookie for
723 The only marginally sane way to handle this I can think of is to
724 reject on the basis of the length of the second-level domain name
725 (but when the top-level domain is unknown), with the assumption
726 that those of three or less characters could be reserved. For
729 .co.org -> works because the TLD is known
730 .co.uk -> doesn't work because "co" is only two chars long
731 .com.au -> doesn't work because "com" is only 3 chars long
732 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
733 .cnn.de -> doesn't work for the same reason (ugh!!)
734 .abcd.de -> works because "abcd" is 4 chars long
735 .img.cnn.de -> works because it's not trying to set the 2nd level domain
736 .cnn.co.uk -> works for the same reason
738 That should prevent misuse, while allowing reasonable usage. If
739 someone knows of a better way to handle this, please let me
742 const char *p = cookie_domain;
743 int dccount = 1; /* number of domain components */
744 int ldcl = 0; /* last domain component length */
745 int nldcl = 0; /* next to last domain component length */
748 /* Ignore leading period in this calculation. */
751 for (out = 0; !out; p++)
759 /* Empty domain component found -- the domain is invalid. */
761 if (*(p + 1) == '\0')
763 /* Tolerate trailing '.' by not treating the domain as
764 one ending with an empty domain component. */
786 int known_toplevel = 0;
787 static const char *known_toplevel_domains[] = {
788 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
790 for (i = 0; i < countof (known_toplevel_domains); i++)
791 if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
796 if (!known_toplevel && nldcl <= 3)
803 /* Don't allow the host "foobar.com" to set a cookie for domain
805 if (*cookie_domain != '.')
807 int dlen = strlen (cookie_domain);
808 int hlen = strlen (host);
809 /* cookie host: hostname.foobar.com */
810 /* desired domain: bar.com */
811 /* '.' must be here in host-> ^ */
812 if (hlen > dlen && host[hlen - dlen - 1] != '.')
821 static int path_matches PARAMS ((const char *, const char *));
823 /* Check whether PATH begins with COOKIE_PATH. */
826 check_path_match (const char *cookie_path, const char *path)
828 return path_matches (path, cookie_path);
831 /* Process the HTTP `Set-Cookie' header. This results in storing the
832 cookie or discarding a matching one, or ignoring it completely, all
833 depending on the contents. */
836 cookie_handle_set_cookie (struct cookie_jar *jar,
837 const char *host, int port,
838 const char *path, const char *set_cookie)
840 struct cookie *cookie;
841 cookies_now = time (NULL);
843 cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
847 /* Sanitize parts of cookie. */
852 /* If the domain was not provided, we use the one we're talking
853 to, and set exact match. */
854 cookie->domain = xstrdup (host);
855 cookie->domain_exact = 1;
856 /* Set the port, but only if it's non-default. */
857 if (port != 80 && port != 443)
862 if (!check_domain_match (cookie->domain, host))
864 logprintf (LOG_NOTQUIET,
865 "Cookie coming from %s attempted to set domain to %s\n",
866 escnonprint (host), escnonprint (cookie->domain));
867 xfree (cookie->domain);
874 /* The cookie doesn't set path: set it to the URL path, sans the
875 file part ("/dir/file" truncated to "/dir/"). */
876 char *trailing_slash = strrchr (path, '/');
878 cookie->path = strdupdelim (path, trailing_slash + 1);
880 /* no slash in the string -- can this even happen? */
881 cookie->path = xstrdup (path);
885 /* The cookie sets its own path; verify that it is legal. */
886 if (!check_path_match (cookie->path, path))
888 DEBUGP (("Attempt to fake the path: %s, %s\n",
889 cookie->path, path));
894 /* Now store the cookie, or discard an existing cookie, if
895 discarding was requested. */
897 if (cookie->discard_requested)
899 discard_matching_cookie (jar, cookie);
903 store_cookie (jar, cookie);
908 delete_cookie (cookie);
911 /* Support for sending out cookies in HTTP requests, based on
912 previously stored cookies. Entry point is
913 `build_cookies_request'. */
915 /* Return a count of how many times CHR occurs in STRING. */
918 count_char (const char *string, char chr)
922 for (p = string; *p; p++)
928 /* Find the cookie chains whose domains match HOST and store them to
931 A cookie chain is the head of a list of cookies that belong to a
932 host/domain. Given HOST "img.search.xemacs.org", this function
933 will return the chains for "img.search.xemacs.org",
934 "search.xemacs.org", and "xemacs.org" -- those of them that exist
937 DEST should be large enough to accept (in the worst case) as many
938 elements as there are domain components of HOST. */
941 find_chains_of_host (struct cookie_jar *jar, const char *host,
942 struct cookie *dest[])
947 /* Bail out quickly if there are no cookies in the jar. */
948 if (!hash_table_count (jar->chains))
951 if (numeric_address_p (host))
952 /* If host is an IP address, only check for the exact match. */
955 /* Otherwise, check all the subdomains except the top-level (last)
956 one. As a domain with N components has N-1 dots, the number of
957 passes equals the number of dots. */
958 passes = count_char (host, '.');
962 /* Find chains that match HOST, starting with exact match and
963 progressing to less specific domains. For instance, given HOST
964 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
965 srk.fer.hr's, then fer.hr's. */
968 struct cookie *chain = hash_table_get (jar->chains, host);
970 dest[dest_count++] = chain;
971 if (++passcnt >= passes)
973 host = strchr (host, '.') + 1;
979 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
983 path_matches (const char *full_path, const char *prefix)
988 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
989 as a mere separator, inspired by rfc1808), but the '/' is
990 assumed when matching against the cookie stuff. */
994 len = strlen (prefix);
996 if (0 != strncmp (full_path, prefix, len))
997 /* FULL_PATH doesn't begin with PREFIX. */
1000 /* Length of PREFIX determines the quality of the match. */
1004 /* Return non-zero iff COOKIE matches the provided parameters of the
1005 URL being downloaded: HOST, PORT, PATH, and SECFLAG.
1007 If PATH_GOODNESS is non-NULL, store the "path goodness" value
1008 there. That value is a measure of how closely COOKIE matches PATH,
1009 used for ordering cookies. */
1012 cookie_matches_url (const struct cookie *cookie,
1013 const char *host, int port, const char *path,
1014 int secflag, int *path_goodness)
1018 if (cookie_expired_p (cookie))
1019 /* Ignore stale cookies. Don't bother unchaining the cookie at
1020 this point -- Wget is a relatively short-lived application, and
1021 stale cookies will not be saved by `save_cookies'. On the
1022 other hand, this function should be as efficient as
1026 if (cookie->secure && !secflag)
1027 /* Don't transmit secure cookies over insecure connections. */
1029 if (cookie->port != PORT_ANY && cookie->port != port)
1032 /* If exact domain match is required, verify that cookie's domain is
1033 equal to HOST. If not, assume success on the grounds of the
1034 cookie's chain having been found by find_chains_of_host. */
1035 if (cookie->domain_exact
1036 && 0 != strcasecmp (host, cookie->domain))
1039 pg = path_matches (path, cookie->path);
1044 /* If the caller requested path_goodness, we return it. This is
1045 an optimization, so that the caller doesn't need to call
1046 path_matches() again. */
1047 *path_goodness = pg;
1051 /* A structure that points to a cookie, along with the additional
1052 information about the cookie's "goodness". This allows us to sort
1053 the cookies when returning them to the server, as required by the
1056 struct weighed_cookie {
1057 struct cookie *cookie;
1058 int domain_goodness;
1062 /* Comparator used for uniquifying the list. */
1065 equality_comparator (const void *p1, const void *p2)
1067 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1068 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1070 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1071 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1073 /* We only really care whether both name and value are equal. We
1074 return them in this order only for consistency... */
1075 return namecmp ? namecmp : valuecmp;
1078 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1079 cookies with the same attr name and value. Whenever a duplicate
1080 pair is found, one of the cookies is removed. */
1083 eliminate_dups (struct weighed_cookie *outgoing, int count)
1085 struct weighed_cookie *h; /* hare */
1086 struct weighed_cookie *t; /* tortoise */
1087 struct weighed_cookie *end = outgoing + count;
1089 /* We deploy a simple uniquify algorithm: first sort the array
1090 according to our sort criteria, then copy it to itself, comparing
1091 each cookie to its neighbor and ignoring the duplicates. */
1093 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1095 /* "Hare" runs through all the entries in the array, followed by
1096 "tortoise". If a duplicate is found, the hare skips it.
1097 Non-duplicate entries are copied to the tortoise ptr. */
1099 for (h = t = outgoing; h < end; h++)
1103 struct cookie *c0 = h[0].cookie;
1104 struct cookie *c1 = h[1].cookie;
1105 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1106 continue; /* ignore the duplicate */
1109 /* If the hare has advanced past the tortoise (because of
1110 previous dups), make sure the values get copied. Otherwise,
1111 no copying is necessary. */
1117 return t - outgoing;
1120 /* Comparator used for sorting by quality. */
1123 goodness_comparator (const void *p1, const void *p2)
1125 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1126 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1128 /* Subtractions take `wc2' as the first argument becauase we want a
1129 sort in *decreasing* order of goodness. */
1130 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1131 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1133 /* Sort by domain goodness; if these are the same, sort by path
1134 goodness. (The sorting order isn't really specified; maybe it
1135 should be the other way around.) */
1136 return dgdiff ? dgdiff : pgdiff;
1139 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1140 requests PATH from the server. The resulting string is allocated
1141 with `malloc', and the caller is responsible for freeing it. If no
1142 cookies pertain to this request, i.e. no cookie header should be
1143 generated, NULL is returned. */
1146 cookie_header (struct cookie_jar *jar, const char *host,
1147 int port, const char *path, int secflag)
1149 struct cookie **chains;
1152 struct cookie *cookie;
1153 struct weighed_cookie *outgoing;
1156 int result_size, pos;
1158 /* First, find the cookie chains whose domains match HOST. */
1160 /* Allocate room for find_chains_of_host to write to. The number of
1161 chains can at most equal the number of subdomains, hence
1162 1+<number of dots>. */
1163 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
1164 chain_count = find_chains_of_host (jar, host, chains);
1166 /* No cookies for this host. */
1170 cookies_now = time (NULL);
1172 /* Now extract from the chains those cookies that match our host
1173 (for domain_exact cookies), port (for cookies with port other
1174 than PORT_ANY), etc. See matching_cookie for details. */
1176 /* Count the number of matching cookies. */
1178 for (i = 0; i < chain_count; i++)
1179 for (cookie = chains[i]; cookie; cookie = cookie->next)
1180 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1183 return NULL; /* no cookies matched */
1185 /* Allocate the array. */
1186 outgoing = alloca_array (struct weighed_cookie, count);
1188 /* Fill the array with all the matching cookies from the chains that
1191 for (i = 0; i < chain_count; i++)
1192 for (cookie = chains[i]; cookie; cookie = cookie->next)
1195 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1197 outgoing[ocnt].cookie = cookie;
1198 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1199 outgoing[ocnt].path_goodness = pg;
1202 assert (ocnt == count);
1204 /* Eliminate duplicate cookies; that is, those whose name and value
1206 count = eliminate_dups (outgoing, count);
1208 /* Sort the array so that best-matching domains come first, and
1209 that, within one domain, best-matching paths come first. */
1210 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1212 /* Count the space the name=value pairs will take. */
1214 for (i = 0; i < count; i++)
1216 struct cookie *c = outgoing[i].cookie;
1218 result_size += strlen (c->attr) + 1 + strlen (c->value);
1221 /* Allocate output buffer:
1222 name=value pairs -- result_size
1223 "; " separators -- (count - 1) * 2
1224 \0 terminator -- 1 */
1225 result_size = result_size + (count - 1) * 2 + 1;
1226 result = xmalloc (result_size);
1228 for (i = 0; i < count; i++)
1230 struct cookie *c = outgoing[i].cookie;
1231 int namlen = strlen (c->attr);
1232 int vallen = strlen (c->value);
1234 memcpy (result + pos, c->attr, namlen);
1236 result[pos++] = '=';
1237 memcpy (result + pos, c->value, vallen);
1241 result[pos++] = ';';
1242 result[pos++] = ' ';
1245 result[pos++] = '\0';
1246 assert (pos == result_size);
1250 /* Support for loading and saving cookies. The format used for
1251 loading and saving should be the format of the `cookies.txt' file
1252 used by Netscape and Mozilla, at least the Unix versions.
1253 (Apparently IE can export cookies in that format as well.) The
1254 format goes like this:
1256 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1258 DOMAIN -- cookie domain, optionally followed by :PORT
1259 DOMAIN-FLAG -- whether all hosts in the domain match
1261 SECURE-FLAG -- whether cookie requires secure connection
1262 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1263 ATTR-NAME -- name of the cookie attribute
1264 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1266 The fields are separated by TABs. All fields are mandatory, except
1267 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1268 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1269 whitespace only, and comment lines (beginning with # optionally
1270 preceded by whitespace) are ignored.
1272 Example line from cookies.txt (split in two lines for readability):
1274 .google.com TRUE / FALSE 2147368447 \
1275 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1279 /* If the region [B, E) ends with :<digits>, parse the number, return
1280 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1281 If port is not specified, return 0. */
1284 domain_port (const char *domain_b, const char *domain_e,
1285 const char **domain_e_ptr)
1289 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1292 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1293 port = 10 * port + (*p - '0');
1295 /* Garbage following port number. */
1297 *domain_e_ptr = colon;
1301 #define GET_WORD(p, b, e) do { \
1303 while (*p && *p != '\t') \
1306 if (b == e || !*p) \
1311 /* Load cookies from FILE. */
1314 cookie_jar_load (struct cookie_jar *jar, const char *file)
1317 FILE *fp = fopen (file, "r");
1320 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1321 file, strerror (errno));
1324 cookies_now = time (NULL);
1326 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1328 struct cookie *cookie;
1334 char *domain_b = NULL, *domain_e = NULL;
1335 char *domflag_b = NULL, *domflag_e = NULL;
1336 char *path_b = NULL, *path_e = NULL;
1337 char *secure_b = NULL, *secure_e = NULL;
1338 char *expires_b = NULL, *expires_e = NULL;
1339 char *name_b = NULL, *name_e = NULL;
1340 char *value_b = NULL, *value_e = NULL;
1342 /* Skip leading white-space. */
1343 while (*p && ISSPACE (*p))
1345 /* Ignore empty lines. */
1346 if (!*p || *p == '#')
1349 GET_WORD (p, domain_b, domain_e);
1350 GET_WORD (p, domflag_b, domflag_e);
1351 GET_WORD (p, path_b, path_e);
1352 GET_WORD (p, secure_b, secure_e);
1353 GET_WORD (p, expires_b, expires_e);
1354 GET_WORD (p, name_b, name_e);
1356 /* Don't use GET_WORD for value because it ends with newline,
1359 value_e = p + strlen (p);
1360 if (value_e > value_b && value_e[-1] == '\n')
1362 if (value_e > value_b && value_e[-1] == '\r')
1364 /* Empty values are legal (I think), so don't bother checking. */
1366 cookie = cookie_new ();
1368 cookie->attr = strdupdelim (name_b, name_e);
1369 cookie->value = strdupdelim (value_b, value_e);
1370 cookie->path = strdupdelim (path_b, path_e);
1371 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1373 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1374 value indicating if all machines within a given domain can
1375 access the variable. This value is set automatically by the
1376 browser, depending on the value set for the domain." */
1377 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1379 /* DOMAIN needs special treatment because we might need to
1380 extract the port. */
1381 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1383 cookie->port = port;
1385 if (*domain_b == '.')
1386 ++domain_b; /* remove leading dot internally */
1387 cookie->domain = strdupdelim (domain_b, domain_e);
1389 /* safe default in case EXPIRES field is garbled. */
1390 expiry = (double)cookies_now - 1;
1392 /* I don't like changing the line, but it's safe here. (line is
1395 sscanf (expires_b, "%lf", &expiry);
1399 /* EXPIRY can be 0 for session cookies saved because the
1400 user specified `--keep-session-cookies' in the past.
1401 They remain session cookies, and will be saved only if
1402 the user has specified `keep-session-cookies' again. */
1406 if (expiry < cookies_now)
1407 goto abort_cookie; /* ignore stale cookie. */
1408 cookie->expiry_time = expiry;
1409 cookie->permanent = 1;
1412 store_cookie (jar, cookie);
1418 delete_cookie (cookie);
1423 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1424 to the head in a chain of cookies. The function prints the entire
1428 save_cookies_mapper (void *key, void *value, void *arg)
1430 FILE *fp = (FILE *)arg;
1431 char *domain = (char *)key;
1432 struct cookie *cookie = (struct cookie *)value;
1433 for (; cookie; cookie = cookie->next)
1435 if (!cookie->permanent && !opt.keep_session_cookies)
1437 if (cookie_expired_p (cookie))
1439 if (!cookie->domain_exact)
1442 if (cookie->port != PORT_ANY)
1443 fprintf (fp, ":%d", cookie->port);
1444 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1445 cookie->domain_exact ? "FALSE" : "TRUE",
1446 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1447 (double)cookie->expiry_time,
1448 cookie->attr, cookie->value);
1450 return 1; /* stop mapping */
1455 /* Save cookies, in format described above, to FILE. */
1458 cookie_jar_save (struct cookie_jar *jar, const char *file)
1462 DEBUGP (("Saving cookies to %s.\n", file));
1464 cookies_now = time (NULL);
1466 fp = fopen (file, "w");
1469 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1470 file, strerror (errno));
1474 fputs ("# HTTP cookie file.\n", fp);
1475 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (&cookies_now));
1476 fputs ("# Edit at your own risk.\n\n", fp);
1478 hash_table_map (jar->chains, save_cookies_mapper, fp);
1481 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1482 file, strerror (errno));
1483 if (fclose (fp) < 0)
1484 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1485 file, strerror (errno));
1487 DEBUGP (("Done saving cookies.\n"));
1490 /* Destroy all the elements in the chain and unhook it from the cookie
1491 jar. This is written in the form of a callback to hash_table_map
1492 and used by cookie_jar_delete to delete all the cookies in a
1496 nuke_cookie_chain (void *value, void *key, void *arg)
1498 char *chain_key = (char *)value;
1499 struct cookie *chain = (struct cookie *)key;
1500 struct cookie_jar *jar = (struct cookie_jar *)arg;
1502 /* Remove the chain from the table and free the key. */
1503 hash_table_remove (jar->chains, chain_key);
1506 /* Then delete all the cookies in the chain. */
1509 struct cookie *next = chain->next;
1510 delete_cookie (chain);
1518 /* Clean up cookie-related data. */
1521 cookie_jar_delete (struct cookie_jar *jar)
1523 hash_table_map (jar->chains, nuke_cookie_chain, jar);
1524 hash_table_destroy (jar->chains);
1528 /* Test cases. Currently this is only tests parse_set_cookies. To
1529 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1534 char *test_results[10];
1536 static int test_parse_cookies_callback (struct cookie *ignored,
1537 const char *nb, const char *ne,
1538 const char *vb, const char *ve)
1540 test_results[test_count++] = strdupdelim (nb, ne);
1541 test_results[test_count++] = strdupdelim (vb, ve);
1548 /* Tests expected to succeed: */
1554 { "arg=value", {"arg", "value", NULL} },
1555 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1556 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1557 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1558 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1559 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1560 { "arg=", {"arg", "", NULL} },
1561 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1562 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1565 /* Tests expected to fail: */
1566 static char *tests_fail[] = {
1568 "arg=\"unterminated",
1570 "arg1=;=another-empty-name",
1574 for (i = 0; i < countof (tests_succ); i++)
1577 char *data = tests_succ[i].data;
1578 char **expected = tests_succ[i].results;
1582 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1585 printf ("NULL cookie returned for valid data: %s\n", data);
1589 for (ind = 0; ind < test_count; ind += 2)
1593 if (0 != strcmp (expected[ind], test_results[ind]))
1594 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1595 ind / 2 + 1, data, expected[ind], test_results[ind]);
1596 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1597 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1598 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1600 if (ind < test_count || expected[ind])
1601 printf ("Unmatched number of results: %s\n", data);
1604 for (i = 0; i < countof (tests_fail); i++)
1607 char *data = tests_fail[i];
1609 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1611 printf ("Failed to report error on invalid data: %s\n", data);
1614 #endif /* TEST_COOKIES */