1 /* Support for cookies.
2 Copyright (C) 2001-2005 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software Foundation, Inc.,
18 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by the
31 cookie patch submitted by Tomasz Wegrzanowski.
33 This implements the client-side cookie support, as specified
34 (loosely) by Netscape's "preliminary specification", currently
37 http://wp.netscape.com/newsref/std/cookie_spec.html
39 rfc2109 is not supported because of its incompatibilities with the
40 above widely-used specification. rfc2965 is entirely ignored,
41 since popular client software doesn't implement it, and even the
42 sites that do send Set-Cookie2 also emit Set-Cookie for
58 #include "http.h" /* for http_atotm */
60 /* Declarations of `struct cookie' and the most basic functions. */
62 /* Cookie jar serves as cookie storage and a means of retrieving
63 cookies efficiently. All cookies with the same domain are stored
64 in a linked list called "chain". A cookie chain can be reached by
65 looking up the domain in the cookie jar's chains_by_domain table.
67 For example, to reach all the cookies under google.com, one must
68 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
69 course, when sending a cookie to `www.google.com', one must search
70 for cookies that belong to either `www.google.com' or `google.com'
71 -- but the point is that the code doesn't need to go through *all*
75 /* Cookie chains indexed by domain. */
76 struct hash_table *chains;
78 int cookie_count; /* number of cookies in the jar. */
81 /* Value set by entry point functions, so that the low-level
82 routines don't need to call time() all the time. */
83 static time_t cookies_now;
88 struct cookie_jar *jar = xnew (struct cookie_jar);
89 jar->chains = make_nocase_string_hash_table (0);
90 jar->cookie_count = 0;
95 char *domain; /* domain of the cookie */
96 int port; /* port number */
97 char *path; /* path prefix of the cookie */
99 unsigned discard_requested :1; /* whether cookie was created to
100 request discarding another
103 unsigned secure :1; /* whether cookie should be
104 transmitted over non-https
106 unsigned domain_exact :1; /* whether DOMAIN must match as a
109 unsigned permanent :1; /* whether the cookie should outlive
111 time_t expiry_time; /* time when the cookie expires, 0
112 means undetermined. */
114 char *attr; /* cookie attribute name */
115 char *value; /* cookie attribute value */
117 struct cookie *next; /* used for chaining of cookies in the
121 #define PORT_ANY (-1)
123 /* Allocate and return a new, empty cookie structure. */
125 static struct cookie *
128 struct cookie *cookie = xnew0 (struct cookie);
130 /* Both cookie->permanent and cookie->expiry_time are now 0. This
131 means that the cookie doesn't expire, but is only valid for this
132 session (i.e. not written out to disk). */
134 cookie->port = PORT_ANY;
138 /* Non-zero if the cookie has expired. Assumes cookies_now has been
139 set by one of the entry point functions. */
142 cookie_expired_p (const struct cookie *c)
144 return c->expiry_time != 0 && c->expiry_time < cookies_now;
147 /* Deallocate COOKIE and its components. */
150 delete_cookie (struct cookie *cookie)
152 xfree_null (cookie->domain);
153 xfree_null (cookie->path);
154 xfree_null (cookie->attr);
155 xfree_null (cookie->value);
159 /* Functions for storing cookies.
161 All cookies can be reached beginning with jar->chains. The key in
162 that table is the domain name, and the value is a linked list of
163 all cookies from that domain. Every new cookie is placed on the
166 /* Find and return a cookie in JAR whose domain, path, and attribute
167 name correspond to COOKIE. If found, PREVPTR will point to the
168 location of the cookie previous in chain, or NULL if the found
169 cookie is the head of a chain.
171 If no matching cookie is found, return NULL. */
173 static struct cookie *
174 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
175 struct cookie **prevptr)
177 struct cookie *chain, *prev;
179 chain = hash_table_get (jar->chains, cookie->domain);
184 for (; chain; prev = chain, chain = chain->next)
185 if (0 == strcmp (cookie->path, chain->path)
186 && 0 == strcmp (cookie->attr, chain->attr)
187 && cookie->port == chain->port)
198 /* Store COOKIE to the jar.
200 This is done by placing COOKIE at the head of its chain. However,
201 if COOKIE matches a cookie already in memory, as determined by
202 find_matching_cookie, the old cookie is unlinked and destroyed.
204 The key of each chain's hash table entry is allocated only the
205 first time; next hash_table_put's reuse the same key. */
208 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
210 struct cookie *chain_head;
213 if (hash_table_get_pair (jar->chains, cookie->domain,
214 &chain_key, &chain_head))
216 /* A chain of cookies in this domain already exists. Check for
217 duplicates -- if an extant cookie exactly matches our domain,
218 port, path, and name, replace it. */
220 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
224 /* Remove VICTIM from the chain. COOKIE will be placed at
228 prev->next = victim->next;
229 cookie->next = chain_head;
233 /* prev is NULL; apparently VICTIM was at the head of
234 the chain. This place will be taken by COOKIE, so
235 all we need to do is: */
236 cookie->next = victim->next;
238 delete_cookie (victim);
240 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
243 cookie->next = chain_head;
247 /* We are now creating the chain. Use a copy of cookie->domain
248 as the key for the life-time of the chain. Using
249 cookie->domain would be unsafe because the life-time of the
250 chain may exceed the life-time of the cookie. (Cookies may
251 be deleted from the chain by this very function.) */
253 chain_key = xstrdup (cookie->domain);
256 hash_table_put (jar->chains, chain_key, cookie);
261 time_t exptime = cookie->expiry_time;
262 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
263 cookie->domain, cookie->port,
264 cookie->port == PORT_ANY ? " (ANY)" : "",
266 cookie->permanent ? "permanent" : "session",
267 cookie->secure ? "secure" : "insecure",
268 cookie->expiry_time ? datetime_str (&exptime) : "none",
269 cookie->attr, cookie->value));
273 /* Discard a cookie matching COOKIE's domain, port, path, and
274 attribute name. This gets called when we encounter a cookie whose
275 expiry date is in the past, or whose max-age is set to 0. The
276 former corresponds to netscape cookie spec, while the latter is
277 specified by rfc2109. */
280 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
282 struct cookie *prev, *victim;
284 if (!hash_table_count (jar->chains))
285 /* No elements == nothing to discard. */
288 victim = find_matching_cookie (jar, cookie, &prev);
292 /* Simply unchain the victim. */
293 prev->next = victim->next;
296 /* VICTIM was head of its chain. We need to place a new
297 cookie at the head. */
298 char *chain_key = NULL;
301 res = hash_table_get_pair (jar->chains, victim->domain,
306 /* VICTIM was the only cookie in the chain. Destroy the
307 chain and deallocate the chain key. */
308 hash_table_remove (jar->chains, victim->domain);
312 hash_table_put (jar->chains, chain_key, victim->next);
314 delete_cookie (victim);
315 DEBUGP (("Discarded old cookie.\n"));
319 /* Functions for parsing the `Set-Cookie' header, and creating new
320 cookies from the wire. */
322 #define NAME_IS(string_literal) \
323 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
325 #define VALUE_EXISTS (value_b && value_e)
327 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
329 /* Update the appropriate cookie field. [name_b, name_e) are expected
330 to delimit the attribute name, while [value_b, value_e) (optional)
331 should delimit the attribute value.
333 When called the first time, it will set the cookie's attribute name
334 and value. After that, it will check the attribute name for
335 special fields such as `domain', `path', etc. Where appropriate,
336 it will parse the values of the fields it recognizes and fill the
337 corresponding fields in COOKIE.
339 Returns true on success. Returns false in case a syntax error is
340 found; such a cookie should be discarded. */
343 update_cookie_field (struct cookie *cookie,
344 const char *name_b, const char *name_e,
345 const char *value_b, const char *value_e)
347 assert (name_b != NULL && name_e != NULL);
353 cookie->attr = strdupdelim (name_b, name_e);
354 cookie->value = strdupdelim (value_b, value_e);
358 if (NAME_IS ("domain"))
360 if (!VALUE_NON_EMPTY)
362 xfree_null (cookie->domain);
363 /* Strictly speaking, we should set cookie->domain_exact if the
364 domain doesn't begin with a dot. But many sites set the
365 domain to "foo.com" and expect "subhost.foo.com" to get the
366 cookie, and it apparently works. */
369 cookie->domain = strdupdelim (value_b, value_e);
372 else if (NAME_IS ("path"))
374 if (!VALUE_NON_EMPTY)
376 xfree_null (cookie->path);
377 cookie->path = strdupdelim (value_b, value_e);
380 else if (NAME_IS ("expires"))
385 if (!VALUE_NON_EMPTY)
387 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
389 expires = http_atotm (value_copy);
390 if (expires != (time_t) -1)
392 cookie->permanent = 1;
393 cookie->expiry_time = expires;
396 /* Error in expiration spec. Assume default (cookie doesn't
397 expire, but valid only for this session.) */
400 /* According to netscape's specification, expiry time in the
401 past means that discarding of a matching cookie is
403 if (cookie->expiry_time < cookies_now)
404 cookie->discard_requested = 1;
408 else if (NAME_IS ("max-age"))
413 if (!VALUE_NON_EMPTY)
415 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
417 sscanf (value_copy, "%lf", &maxage);
419 /* something went wrong. */
421 cookie->permanent = 1;
422 cookie->expiry_time = cookies_now + maxage;
424 /* According to rfc2109, a cookie with max-age of 0 means that
425 discarding of a matching cookie is requested. */
427 cookie->discard_requested = 1;
431 else if (NAME_IS ("secure"))
433 /* ignore value completely */
438 /* Unrecognized attribute; ignore it. */
444 /* Returns true for characters that are legal in the name of an
445 attribute. This used to allow only alphanumerics, '-', and '_',
446 but we need to be more lenient because a number of sites wants to
447 use weirder attribute names. rfc2965 "informally specifies"
448 attribute name (token) as "a sequence of non-special, non-white
449 space characters". So we allow everything except the stuff we know
452 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
453 && (c) != '"' && (c) != '=' \
454 && (c) != ';' && (c) != ',')
456 /* Parse the contents of the `Set-Cookie' header. The header looks
459 name1=value1; name2=value2; ...
461 Trailing semicolon is optional; spaces are allowed between all
462 tokens. Additionally, values may be quoted.
464 A new cookie is returned upon success, NULL otherwise. The
465 specified CALLBACK function (normally `update_cookie_field' is used
466 to update the fields of the newly created cookie structure. */
468 static struct cookie *
469 parse_set_cookies (const char *sc,
470 bool (*callback) (struct cookie *,
471 const char *, const char *,
472 const char *, const char *),
475 struct cookie *cookie = cookie_new ();
477 /* #### Hand-written DFAs are no fun to debug. We'de be better off
478 to rewrite this as an inline parser. */
480 enum { S_START, S_NAME, S_NAME_POST,
481 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
482 S_ATTR_ACTION, S_DONE, S_ERROR
488 const char *name_b = NULL, *name_e = NULL;
489 const char *value_b = NULL, *value_e = NULL;
493 while (state != S_DONE && state != S_ERROR)
500 else if (ISSPACE (c))
501 /* Strip all whitespace preceding the name. */
503 else if (ATTR_NAME_CHAR (c))
509 /* empty attr name not allowed */
513 if (!c || c == ';' || c == '=' || ISSPACE (c))
518 else if (ATTR_NAME_CHAR (c))
526 value_b = value_e = NULL;
529 state = S_ATTR_ACTION;
536 else if (ISSPACE (c))
537 /* Ignore space and keep the state. */
545 value_b = value_e = p;
548 state = S_ATTR_ACTION;
554 state = S_QUOTED_VALUE;
556 else if (ISSPACE (c))
566 if (!c || c == ';' || ISSPACE (c))
569 state = S_VALUE_TRAILSPACE;
573 value_e = NULL; /* no trailing space */
582 state = S_VALUE_TRAILSPACE;
589 case S_VALUE_TRAILSPACE:
593 state = S_ATTR_ACTION;
596 state = S_ATTR_ACTION;
597 else if (ISSPACE (c))
604 bool legal = callback (cookie, name_b, name_e, value_b, value_e);
610 BOUNDED_TO_ALLOCA (name_b, name_e, name);
611 logprintf (LOG_NOTQUIET,
612 _("Error in Set-Cookie, field `%s'"),
623 /* handled by loop condition */
630 delete_cookie (cookie);
631 if (state != S_ERROR)
635 logprintf (LOG_NOTQUIET,
636 _("Syntax error in Set-Cookie: %s at position %d.\n"),
637 escnonprint (sc), (int) (p - sc));
641 /* Sanity checks. These are important, otherwise it is possible for
642 mailcious attackers to destroy important cookie information and/or
643 violate your privacy. */
646 #define REQUIRE_DIGITS(p) do { \
649 for (++p; ISDIGIT (*p); p++) \
653 #define REQUIRE_DOT(p) do { \
658 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
660 We don't want to call network functions like inet_addr() because
661 all we need is a check, preferrably one that is small, fast, and
665 numeric_address_p (const char *addr)
667 const char *p = addr;
669 REQUIRE_DIGITS (p); /* A */
670 REQUIRE_DOT (p); /* . */
671 REQUIRE_DIGITS (p); /* B */
672 REQUIRE_DOT (p); /* . */
673 REQUIRE_DIGITS (p); /* C */
674 REQUIRE_DOT (p); /* . */
675 REQUIRE_DIGITS (p); /* D */
682 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
683 Originally I tried to make the check compliant with rfc2109, but
684 the sites deviated too often, so I had to fall back to "tail
685 matching", as defined by the original Netscape's cookie spec. */
688 check_domain_match (const char *cookie_domain, const char *host)
692 /* Numeric address requires exact match. It also requires HOST to
694 if (numeric_address_p (cookie_domain))
695 return 0 == strcmp (cookie_domain, host);
699 /* For the sake of efficiency, check for exact match first. */
700 if (0 == strcasecmp (cookie_domain, host))
705 /* HOST must match the tail of cookie_domain. */
706 if (!match_tail (host, cookie_domain, true))
709 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
710 make sure that somebody is not trying to set the cookie for a
711 subdomain shared by many entities. For example, "company.co.uk"
712 must not be allowed to set a cookie for ".co.uk". On the other
713 hand, "sso.redhat.de" should be able to set a cookie for
716 The only marginally sane way to handle this I can think of is to
717 reject on the basis of the length of the second-level domain name
718 (but when the top-level domain is unknown), with the assumption
719 that those of three or less characters could be reserved. For
722 .co.org -> works because the TLD is known
723 .co.uk -> doesn't work because "co" is only two chars long
724 .com.au -> doesn't work because "com" is only 3 chars long
725 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
726 .cnn.de -> doesn't work for the same reason (ugh!!)
727 .abcd.de -> works because "abcd" is 4 chars long
728 .img.cnn.de -> works because it's not trying to set the 2nd level domain
729 .cnn.co.uk -> works for the same reason
731 That should prevent misuse, while allowing reasonable usage. If
732 someone knows of a better way to handle this, please let me
735 const char *p = cookie_domain;
736 int dccount = 1; /* number of domain components */
737 int ldcl = 0; /* last domain component length */
738 int nldcl = 0; /* next to last domain component length */
741 /* Ignore leading period in this calculation. */
744 for (out = 0; !out; p++)
752 /* Empty domain component found -- the domain is invalid. */
754 if (*(p + 1) == '\0')
756 /* Tolerate trailing '.' by not treating the domain as
757 one ending with an empty domain component. */
779 int known_toplevel = false;
780 static const char *known_toplevel_domains[] = {
781 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
783 for (i = 0; i < countof (known_toplevel_domains); i++)
784 if (match_tail (cookie_domain, known_toplevel_domains[i], true))
786 known_toplevel = true;
789 if (!known_toplevel && nldcl <= 3)
796 /* Don't allow the host "foobar.com" to set a cookie for domain
798 if (*cookie_domain != '.')
800 int dlen = strlen (cookie_domain);
801 int hlen = strlen (host);
802 /* cookie host: hostname.foobar.com */
803 /* desired domain: bar.com */
804 /* '.' must be here in host-> ^ */
805 if (hlen > dlen && host[hlen - dlen - 1] != '.')
814 static int path_matches (const char *, const char *);
816 /* Check whether PATH begins with COOKIE_PATH. */
819 check_path_match (const char *cookie_path, const char *path)
821 return path_matches (path, cookie_path) != 0;
824 /* Prepend '/' to string S. S is copied to fresh stack-allocated
825 space and its value is modified to point to the new location. */
827 #define PREPEND_SLASH(s) do { \
828 char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \
830 strcpy (PS_newstr + 1, s); \
835 /* Process the HTTP `Set-Cookie' header. This results in storing the
836 cookie or discarding a matching one, or ignoring it completely, all
837 depending on the contents. */
840 cookie_handle_set_cookie (struct cookie_jar *jar,
841 const char *host, int port,
842 const char *path, const char *set_cookie)
844 struct cookie *cookie;
845 cookies_now = time (NULL);
847 /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
848 usage assumes /-prefixed paths. Until the rest of Wget is fixed,
849 simply prepend slash to PATH. */
850 PREPEND_SLASH (path);
852 cookie = parse_set_cookies (set_cookie, update_cookie_field, false);
856 /* Sanitize parts of cookie. */
861 /* If the domain was not provided, we use the one we're talking
862 to, and set exact match. */
863 cookie->domain = xstrdup (host);
864 cookie->domain_exact = 1;
865 /* Set the port, but only if it's non-default. */
866 if (port != 80 && port != 443)
871 if (!check_domain_match (cookie->domain, host))
873 logprintf (LOG_NOTQUIET,
874 _("Cookie coming from %s attempted to set domain to %s\n"),
875 escnonprint (host), escnonprint (cookie->domain));
876 xfree (cookie->domain);
883 /* The cookie doesn't set path: set it to the URL path, sans the
884 file part ("/dir/file" truncated to "/dir/"). */
885 char *trailing_slash = strrchr (path, '/');
887 cookie->path = strdupdelim (path, trailing_slash + 1);
889 /* no slash in the string -- can this even happen? */
890 cookie->path = xstrdup (path);
894 /* The cookie sets its own path; verify that it is legal. */
895 if (!check_path_match (cookie->path, path))
897 DEBUGP (("Attempt to fake the path: %s, %s\n",
898 cookie->path, path));
903 /* Now store the cookie, or discard an existing cookie, if
904 discarding was requested. */
906 if (cookie->discard_requested)
908 discard_matching_cookie (jar, cookie);
912 store_cookie (jar, cookie);
917 delete_cookie (cookie);
920 /* Support for sending out cookies in HTTP requests, based on
921 previously stored cookies. Entry point is
922 `build_cookies_request'. */
924 /* Return a count of how many times CHR occurs in STRING. */
927 count_char (const char *string, char chr)
931 for (p = string; *p; p++)
937 /* Find the cookie chains whose domains match HOST and store them to
940 A cookie chain is the head of a list of cookies that belong to a
941 host/domain. Given HOST "img.search.xemacs.org", this function
942 will return the chains for "img.search.xemacs.org",
943 "search.xemacs.org", and "xemacs.org" -- those of them that exist
946 DEST should be large enough to accept (in the worst case) as many
947 elements as there are domain components of HOST. */
950 find_chains_of_host (struct cookie_jar *jar, const char *host,
951 struct cookie *dest[])
956 /* Bail out quickly if there are no cookies in the jar. */
957 if (!hash_table_count (jar->chains))
960 if (numeric_address_p (host))
961 /* If host is an IP address, only check for the exact match. */
964 /* Otherwise, check all the subdomains except the top-level (last)
965 one. As a domain with N components has N-1 dots, the number of
966 passes equals the number of dots. */
967 passes = count_char (host, '.');
971 /* Find chains that match HOST, starting with exact match and
972 progressing to less specific domains. For instance, given HOST
973 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
974 srk.fer.hr's, then fer.hr's. */
977 struct cookie *chain = hash_table_get (jar->chains, host);
979 dest[dest_count++] = chain;
980 if (++passcnt >= passes)
982 host = strchr (host, '.') + 1;
988 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
992 path_matches (const char *full_path, const char *prefix)
994 int len = strlen (prefix);
996 if (0 != strncmp (full_path, prefix, len))
997 /* FULL_PATH doesn't begin with PREFIX. */
1000 /* Length of PREFIX determines the quality of the match. */
1004 /* Return true iff COOKIE matches the provided parameters of the URL
1005 being downloaded: HOST, PORT, PATH, and SECFLAG.
1007 If PATH_GOODNESS is non-NULL, store the "path goodness" value
1008 there. That value is a measure of how closely COOKIE matches PATH,
1009 used for ordering cookies. */
1012 cookie_matches_url (const struct cookie *cookie,
1013 const char *host, int port, const char *path,
1014 bool secflag, int *path_goodness)
1018 if (cookie_expired_p (cookie))
1019 /* Ignore stale cookies. Don't bother unchaining the cookie at
1020 this point -- Wget is a relatively short-lived application, and
1021 stale cookies will not be saved by `save_cookies'. On the
1022 other hand, this function should be as efficient as
1026 if (cookie->secure && !secflag)
1027 /* Don't transmit secure cookies over insecure connections. */
1029 if (cookie->port != PORT_ANY && cookie->port != port)
1032 /* If exact domain match is required, verify that cookie's domain is
1033 equal to HOST. If not, assume success on the grounds of the
1034 cookie's chain having been found by find_chains_of_host. */
1035 if (cookie->domain_exact
1036 && 0 != strcasecmp (host, cookie->domain))
1039 pg = path_matches (path, cookie->path);
1044 /* If the caller requested path_goodness, we return it. This is
1045 an optimization, so that the caller doesn't need to call
1046 path_matches() again. */
1047 *path_goodness = pg;
1051 /* A structure that points to a cookie, along with the additional
1052 information about the cookie's "goodness". This allows us to sort
1053 the cookies when returning them to the server, as required by the
1056 struct weighed_cookie {
1057 struct cookie *cookie;
1058 int domain_goodness;
1062 /* Comparator used for uniquifying the list. */
1065 equality_comparator (const void *p1, const void *p2)
1067 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1068 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1070 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1071 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1073 /* We only really care whether both name and value are equal. We
1074 return them in this order only for consistency... */
1075 return namecmp ? namecmp : valuecmp;
1078 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1079 cookies with the same attr name and value. Whenever a duplicate
1080 pair is found, one of the cookies is removed. */
1083 eliminate_dups (struct weighed_cookie *outgoing, int count)
1085 struct weighed_cookie *h; /* hare */
1086 struct weighed_cookie *t; /* tortoise */
1087 struct weighed_cookie *end = outgoing + count;
1089 /* We deploy a simple uniquify algorithm: first sort the array
1090 according to our sort criteria, then copy it to itself, comparing
1091 each cookie to its neighbor and ignoring the duplicates. */
1093 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1095 /* "Hare" runs through all the entries in the array, followed by
1096 "tortoise". If a duplicate is found, the hare skips it.
1097 Non-duplicate entries are copied to the tortoise ptr. */
1099 for (h = t = outgoing; h < end; h++)
1103 struct cookie *c0 = h[0].cookie;
1104 struct cookie *c1 = h[1].cookie;
1105 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1106 continue; /* ignore the duplicate */
1109 /* If the hare has advanced past the tortoise (because of
1110 previous dups), make sure the values get copied. Otherwise,
1111 no copying is necessary. */
1117 return t - outgoing;
1120 /* Comparator used for sorting by quality. */
1123 goodness_comparator (const void *p1, const void *p2)
1125 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1126 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1128 /* Subtractions take `wc2' as the first argument becauase we want a
1129 sort in *decreasing* order of goodness. */
1130 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1131 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1133 /* Sort by domain goodness; if these are the same, sort by path
1134 goodness. (The sorting order isn't really specified; maybe it
1135 should be the other way around.) */
1136 return dgdiff ? dgdiff : pgdiff;
1139 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1140 requests PATH from the server. The resulting string is allocated
1141 with `malloc', and the caller is responsible for freeing it. If no
1142 cookies pertain to this request, i.e. no cookie header should be
1143 generated, NULL is returned. */
1146 cookie_header (struct cookie_jar *jar, const char *host,
1147 int port, const char *path, bool secflag)
1149 struct cookie **chains;
1152 struct cookie *cookie;
1153 struct weighed_cookie *outgoing;
1156 int result_size, pos;
1157 PREPEND_SLASH (path); /* see cookie_handle_set_cookie */
1159 /* First, find the cookie chains whose domains match HOST. */
1161 /* Allocate room for find_chains_of_host to write to. The number of
1162 chains can at most equal the number of subdomains, hence
1163 1+<number of dots>. */
1164 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
1165 chain_count = find_chains_of_host (jar, host, chains);
1167 /* No cookies for this host. */
1171 cookies_now = time (NULL);
1173 /* Now extract from the chains those cookies that match our host
1174 (for domain_exact cookies), port (for cookies with port other
1175 than PORT_ANY), etc. See matching_cookie for details. */
1177 /* Count the number of matching cookies. */
1179 for (i = 0; i < chain_count; i++)
1180 for (cookie = chains[i]; cookie; cookie = cookie->next)
1181 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1184 return NULL; /* no cookies matched */
1186 /* Allocate the array. */
1187 outgoing = alloca_array (struct weighed_cookie, count);
1189 /* Fill the array with all the matching cookies from the chains that
1192 for (i = 0; i < chain_count; i++)
1193 for (cookie = chains[i]; cookie; cookie = cookie->next)
1196 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1198 outgoing[ocnt].cookie = cookie;
1199 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1200 outgoing[ocnt].path_goodness = pg;
1203 assert (ocnt == count);
1205 /* Eliminate duplicate cookies; that is, those whose name and value
1207 count = eliminate_dups (outgoing, count);
1209 /* Sort the array so that best-matching domains come first, and
1210 that, within one domain, best-matching paths come first. */
1211 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1213 /* Count the space the name=value pairs will take. */
1215 for (i = 0; i < count; i++)
1217 struct cookie *c = outgoing[i].cookie;
1219 result_size += strlen (c->attr) + 1 + strlen (c->value);
1222 /* Allocate output buffer:
1223 name=value pairs -- result_size
1224 "; " separators -- (count - 1) * 2
1225 \0 terminator -- 1 */
1226 result_size = result_size + (count - 1) * 2 + 1;
1227 result = xmalloc (result_size);
1229 for (i = 0; i < count; i++)
1231 struct cookie *c = outgoing[i].cookie;
1232 int namlen = strlen (c->attr);
1233 int vallen = strlen (c->value);
1235 memcpy (result + pos, c->attr, namlen);
1237 result[pos++] = '=';
1238 memcpy (result + pos, c->value, vallen);
1242 result[pos++] = ';';
1243 result[pos++] = ' ';
1246 result[pos++] = '\0';
1247 assert (pos == result_size);
1251 /* Support for loading and saving cookies. The format used for
1252 loading and saving should be the format of the `cookies.txt' file
1253 used by Netscape and Mozilla, at least the Unix versions.
1254 (Apparently IE can export cookies in that format as well.) The
1255 format goes like this:
1257 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1259 DOMAIN -- cookie domain, optionally followed by :PORT
1260 DOMAIN-FLAG -- whether all hosts in the domain match
1262 SECURE-FLAG -- whether cookie requires secure connection
1263 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1264 ATTR-NAME -- name of the cookie attribute
1265 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1267 The fields are separated by TABs. All fields are mandatory, except
1268 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1269 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1270 whitespace only, and comment lines (beginning with # optionally
1271 preceded by whitespace) are ignored.
1273 Example line from cookies.txt (split in two lines for readability):
1275 .google.com TRUE / FALSE 2147368447 \
1276 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1280 /* If the region [B, E) ends with :<digits>, parse the number, return
1281 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1282 If port is not specified, return 0. */
1285 domain_port (const char *domain_b, const char *domain_e,
1286 const char **domain_e_ptr)
1290 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1293 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1294 port = 10 * port + (*p - '0');
1296 /* Garbage following port number. */
1298 *domain_e_ptr = colon;
1302 #define GET_WORD(p, b, e) do { \
1304 while (*p && *p != '\t') \
1307 if (b == e || !*p) \
1312 /* Load cookies from FILE. */
1315 cookie_jar_load (struct cookie_jar *jar, const char *file)
1318 FILE *fp = fopen (file, "r");
1321 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1322 file, strerror (errno));
1325 cookies_now = time (NULL);
1327 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1329 struct cookie *cookie;
1335 char *domain_b = NULL, *domain_e = NULL;
1336 char *domflag_b = NULL, *domflag_e = NULL;
1337 char *path_b = NULL, *path_e = NULL;
1338 char *secure_b = NULL, *secure_e = NULL;
1339 char *expires_b = NULL, *expires_e = NULL;
1340 char *name_b = NULL, *name_e = NULL;
1341 char *value_b = NULL, *value_e = NULL;
1343 /* Skip leading white-space. */
1344 while (*p && ISSPACE (*p))
1346 /* Ignore empty lines. */
1347 if (!*p || *p == '#')
1350 GET_WORD (p, domain_b, domain_e);
1351 GET_WORD (p, domflag_b, domflag_e);
1352 GET_WORD (p, path_b, path_e);
1353 GET_WORD (p, secure_b, secure_e);
1354 GET_WORD (p, expires_b, expires_e);
1355 GET_WORD (p, name_b, name_e);
1357 /* Don't use GET_WORD for value because it ends with newline,
1360 value_e = p + strlen (p);
1361 if (value_e > value_b && value_e[-1] == '\n')
1363 if (value_e > value_b && value_e[-1] == '\r')
1365 /* Empty values are legal (I think), so don't bother checking. */
1367 cookie = cookie_new ();
1369 cookie->attr = strdupdelim (name_b, name_e);
1370 cookie->value = strdupdelim (value_b, value_e);
1371 cookie->path = strdupdelim (path_b, path_e);
1372 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1374 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1375 value indicating if all machines within a given domain can
1376 access the variable. This value is set automatically by the
1377 browser, depending on the value set for the domain." */
1378 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1380 /* DOMAIN needs special treatment because we might need to
1381 extract the port. */
1382 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1384 cookie->port = port;
1386 if (*domain_b == '.')
1387 ++domain_b; /* remove leading dot internally */
1388 cookie->domain = strdupdelim (domain_b, domain_e);
1390 /* safe default in case EXPIRES field is garbled. */
1391 expiry = (double)cookies_now - 1;
1393 /* I don't like changing the line, but it's safe here. (line is
1396 sscanf (expires_b, "%lf", &expiry);
1400 /* EXPIRY can be 0 for session cookies saved because the
1401 user specified `--keep-session-cookies' in the past.
1402 They remain session cookies, and will be saved only if
1403 the user has specified `keep-session-cookies' again. */
1407 if (expiry < cookies_now)
1408 goto abort_cookie; /* ignore stale cookie. */
1409 cookie->expiry_time = expiry;
1410 cookie->permanent = 1;
1413 store_cookie (jar, cookie);
1419 delete_cookie (cookie);
1424 /* Save cookies, in format described above, to FILE. */
1427 cookie_jar_save (struct cookie_jar *jar, const char *file)
1430 hash_table_iterator iter;
1432 DEBUGP (("Saving cookies to %s.\n", file));
1434 cookies_now = time (NULL);
1436 fp = fopen (file, "w");
1439 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1440 file, strerror (errno));
1444 fputs ("# HTTP cookie file.\n", fp);
1445 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (&cookies_now));
1446 fputs ("# Edit at your own risk.\n\n", fp);
1448 for (hash_table_iterate (jar->chains, &iter);
1449 hash_table_iter_next (&iter);
1452 const char *domain = iter.key;
1453 struct cookie *cookie = iter.value;
1454 for (; cookie; cookie = cookie->next)
1456 if (!cookie->permanent && !opt.keep_session_cookies)
1458 if (cookie_expired_p (cookie))
1460 if (!cookie->domain_exact)
1463 if (cookie->port != PORT_ANY)
1464 fprintf (fp, ":%d", cookie->port);
1465 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1466 cookie->domain_exact ? "FALSE" : "TRUE",
1467 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1468 (double)cookie->expiry_time,
1469 cookie->attr, cookie->value);
1476 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1477 file, strerror (errno));
1478 if (fclose (fp) < 0)
1479 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1480 file, strerror (errno));
1482 DEBUGP (("Done saving cookies.\n"));
1485 /* Destroy all the elements in the chain and unhook it from the cookie
1486 jar. This is written in the form of a callback to
1487 hash_table_for_each and used by cookie_jar_delete to delete all the
1488 cookies in a jar. */
1491 nuke_cookie_chain (void *value, void *key, void *arg)
1493 char *chain_key = (char *)value;
1494 struct cookie *chain = (struct cookie *)key;
1495 struct cookie_jar *jar = (struct cookie_jar *)arg;
1497 /* Remove the chain from the table and free the key. */
1498 hash_table_remove (jar->chains, chain_key);
1501 /* Then delete all the cookies in the chain. */
1504 struct cookie *next = chain->next;
1505 delete_cookie (chain);
1513 /* Clean up cookie-related data. */
1516 cookie_jar_delete (struct cookie_jar *jar)
1518 hash_table_for_each (jar->chains, nuke_cookie_chain, jar);
1519 hash_table_destroy (jar->chains);
1523 /* Test cases. Currently this is only tests parse_set_cookies. To
1524 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1529 char *test_results[10];
1531 static bool test_parse_cookies_callback (struct cookie *ignored,
1532 const char *nb, const char *ne,
1533 const char *vb, const char *ve)
1535 test_results[test_count++] = strdupdelim (nb, ne);
1536 test_results[test_count++] = strdupdelim (vb, ve);
1543 /* Tests expected to succeed: */
1549 { "arg=value", {"arg", "value", NULL} },
1550 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1551 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1552 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1553 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1554 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1555 { "arg=", {"arg", "", NULL} },
1556 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1557 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1560 /* Tests expected to fail: */
1561 static char *tests_fail[] = {
1563 "arg=\"unterminated",
1565 "arg1=;=another-empty-name",
1569 for (i = 0; i < countof (tests_succ); i++)
1572 char *data = tests_succ[i].data;
1573 char **expected = tests_succ[i].results;
1577 c = parse_set_cookies (data, test_parse_cookies_callback, true);
1580 printf ("NULL cookie returned for valid data: %s\n", data);
1584 for (ind = 0; ind < test_count; ind += 2)
1588 if (0 != strcmp (expected[ind], test_results[ind]))
1589 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1590 ind / 2 + 1, data, expected[ind], test_results[ind]);
1591 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1592 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1593 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1595 if (ind < test_count || expected[ind])
1596 printf ("Unmatched number of results: %s\n", data);
1599 for (i = 0; i < countof (tests_fail); i++)
1602 char *data = tests_fail[i];
1604 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1606 printf ("Failed to report error on invalid data: %s\n", data);
1609 #endif /* TEST_COOKIES */