1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by the
31 cookie patch submitted by Tomasz Wegrzanowski.
33 This implements the client-side cookie support, as specified
34 (loosely) by Netscape's "preliminary specification", currently
37 http://wp.netscape.com/newsref/std/cookie_spec.html
39 rfc2109 is not supported because of its incompatibilities with the
40 above widely-used specification. rfc2965 is entirely ignored,
41 since popular client software doesn't implement it, and even the
42 sites that do send Set-Cookie2 also emit Set-Cookie for
59 /* This should *really* be in a .h file! */
60 time_t http_atotm (const char *);
62 /* Declarations of `struct cookie' and the most basic functions. */
64 /* Cookie jar serves as cookie storage and a means of retrieving
65 cookies efficiently. All cookies with the same domain are stored
66 in a linked list called "chain". A cookie chain can be reached by
67 looking up the domain in the cookie jar's chains_by_domain table.
69 For example, to reach all the cookies under google.com, one must
70 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
71 course, when sending a cookie to `www.google.com', one must search
72 for cookies that belong to either `www.google.com' or `google.com'
73 -- but the point is that the code doesn't need to go through *all*
77 /* Cookie chains indexed by domain. */
78 struct hash_table *chains;
80 int cookie_count; /* number of cookies in the jar. */
83 /* Value set by entry point functions, so that the low-level
84 routines don't need to call time() all the time. */
90 struct cookie_jar *jar = xnew (struct cookie_jar);
91 jar->chains = make_nocase_string_hash_table (0);
92 jar->cookie_count = 0;
97 char *domain; /* domain of the cookie */
98 int port; /* port number */
99 char *path; /* path prefix of the cookie */
101 unsigned discard_requested :1; /* whether cookie was created to
102 request discarding another
105 unsigned secure :1; /* whether cookie should be
106 transmitted over non-https
108 unsigned domain_exact :1; /* whether DOMAIN must match as a
111 unsigned permanent :1; /* whether the cookie should outlive
113 time_t expiry_time; /* time when the cookie expires, 0
114 means undetermined. */
116 char *attr; /* cookie attribute name */
117 char *value; /* cookie attribute value */
119 struct cookie *next; /* used for chaining of cookies in the
123 #define PORT_ANY (-1)
125 /* Allocate and return a new, empty cookie structure. */
127 static struct cookie *
130 struct cookie *cookie = xnew0 (struct cookie);
132 /* Both cookie->permanent and cookie->expiry_time are now 0. This
133 means that the cookie doesn't expire, but is only valid for this
134 session (i.e. not written out to disk). */
136 cookie->port = PORT_ANY;
140 /* Non-zero if the cookie has expired. Assumes cookies_now has been
141 set by one of the entry point functions. */
144 cookie_expired_p (const struct cookie *c)
146 return c->expiry_time != 0 && c->expiry_time < cookies_now;
149 /* Deallocate COOKIE and its components. */
152 delete_cookie (struct cookie *cookie)
154 xfree_null (cookie->domain);
155 xfree_null (cookie->path);
156 xfree_null (cookie->attr);
157 xfree_null (cookie->value);
161 /* Functions for storing cookies.
163 All cookies can be reached beginning with jar->chains. The key in
164 that table is the domain name, and the value is a linked list of
165 all cookies from that domain. Every new cookie is placed on the
168 /* Find and return a cookie in JAR whose domain, path, and attribute
169 name correspond to COOKIE. If found, PREVPTR will point to the
170 location of the cookie previous in chain, or NULL if the found
171 cookie is the head of a chain.
173 If no matching cookie is found, return NULL. */
175 static struct cookie *
176 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
177 struct cookie **prevptr)
179 struct cookie *chain, *prev;
181 chain = hash_table_get (jar->chains, cookie->domain);
186 for (; chain; prev = chain, chain = chain->next)
187 if (0 == strcmp (cookie->path, chain->path)
188 && 0 == strcmp (cookie->attr, chain->attr)
189 && cookie->port == chain->port)
200 /* Store COOKIE to the jar.
202 This is done by placing COOKIE at the head of its chain. However,
203 if COOKIE matches a cookie already in memory, as determined by
204 find_matching_cookie, the old cookie is unlinked and destroyed.
206 The key of each chain's hash table entry is allocated only the
207 first time; next hash_table_put's reuse the same key. */
210 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
212 struct cookie *chain_head;
215 if (hash_table_get_pair (jar->chains, cookie->domain,
216 &chain_key, &chain_head))
218 /* A chain of cookies in this domain already exists. Check for
219 duplicates -- if an extant cookie exactly matches our domain,
220 port, path, and name, replace it. */
222 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
226 /* Remove VICTIM from the chain. COOKIE will be placed at
230 prev->next = victim->next;
231 cookie->next = chain_head;
235 /* prev is NULL; apparently VICTIM was at the head of
236 the chain. This place will be taken by COOKIE, so
237 all we need to do is: */
238 cookie->next = victim->next;
240 delete_cookie (victim);
242 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
245 cookie->next = chain_head;
249 /* We are now creating the chain. Use a copy of cookie->domain
250 as the key for the life-time of the chain. Using
251 cookie->domain would be unsafe because the life-time of the
252 chain may exceed the life-time of the cookie. (Cookies may
253 be deleted from the chain by this very function.) */
255 chain_key = xstrdup (cookie->domain);
258 hash_table_put (jar->chains, chain_key, cookie);
263 time_t exptime = cookie->expiry_time;
264 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
265 cookie->domain, cookie->port,
266 cookie->port == PORT_ANY ? " (ANY)" : "",
268 cookie->permanent ? "permanent" : "session",
269 cookie->secure ? "secure" : "insecure",
270 cookie->expiry_time ? datetime_str (&exptime) : "none",
271 cookie->attr, cookie->value));
275 /* Discard a cookie matching COOKIE's domain, port, path, and
276 attribute name. This gets called when we encounter a cookie whose
277 expiry date is in the past, or whose max-age is set to 0. The
278 former corresponds to netscape cookie spec, while the latter is
279 specified by rfc2109. */
282 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
284 struct cookie *prev, *victim;
286 if (!hash_table_count (jar->chains))
287 /* No elements == nothing to discard. */
290 victim = find_matching_cookie (jar, cookie, &prev);
294 /* Simply unchain the victim. */
295 prev->next = victim->next;
298 /* VICTIM was head of its chain. We need to place a new
299 cookie at the head. */
300 char *chain_key = NULL;
303 res = hash_table_get_pair (jar->chains, victim->domain,
308 /* VICTIM was the only cookie in the chain. Destroy the
309 chain and deallocate the chain key. */
310 hash_table_remove (jar->chains, victim->domain);
314 hash_table_put (jar->chains, chain_key, victim->next);
316 delete_cookie (victim);
317 DEBUGP (("Discarded old cookie.\n"));
321 /* Functions for parsing the `Set-Cookie' header, and creating new
322 cookies from the wire. */
324 #define NAME_IS(string_literal) \
325 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
327 #define VALUE_EXISTS (value_b && value_e)
329 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
331 /* Update the appropriate cookie field. [name_b, name_e) are expected
332 to delimit the attribute name, while [value_b, value_e) (optional)
333 should delimit the attribute value.
335 When called the first time, it will set the cookie's attribute name
336 and value. After that, it will check the attribute name for
337 special fields such as `domain', `path', etc. Where appropriate,
338 it will parse the values of the fields it recognizes and fill the
339 corresponding fields in COOKIE.
341 Returns true on success. Returns false in case a syntax error is
342 found; such a cookie should be discarded. */
345 update_cookie_field (struct cookie *cookie,
346 const char *name_b, const char *name_e,
347 const char *value_b, const char *value_e)
349 assert (name_b != NULL && name_e != NULL);
355 cookie->attr = strdupdelim (name_b, name_e);
356 cookie->value = strdupdelim (value_b, value_e);
360 if (NAME_IS ("domain"))
362 if (!VALUE_NON_EMPTY)
364 xfree_null (cookie->domain);
365 /* Strictly speaking, we should set cookie->domain_exact if the
366 domain doesn't begin with a dot. But many sites set the
367 domain to "foo.com" and expect "subhost.foo.com" to get the
368 cookie, and it apparently works. */
371 cookie->domain = strdupdelim (value_b, value_e);
374 else if (NAME_IS ("path"))
376 if (!VALUE_NON_EMPTY)
378 xfree_null (cookie->path);
379 cookie->path = strdupdelim (value_b, value_e);
382 else if (NAME_IS ("expires"))
387 if (!VALUE_NON_EMPTY)
389 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
391 expires = http_atotm (value_copy);
392 if (expires != (time_t) -1)
394 cookie->permanent = 1;
395 cookie->expiry_time = expires;
398 /* Error in expiration spec. Assume default (cookie doesn't
399 expire, but valid only for this session.) */
402 /* According to netscape's specification, expiry time in the
403 past means that discarding of a matching cookie is
405 if (cookie->expiry_time < cookies_now)
406 cookie->discard_requested = 1;
410 else if (NAME_IS ("max-age"))
415 if (!VALUE_NON_EMPTY)
417 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
419 sscanf (value_copy, "%lf", &maxage);
421 /* something went wrong. */
423 cookie->permanent = 1;
424 cookie->expiry_time = cookies_now + maxage;
426 /* According to rfc2109, a cookie with max-age of 0 means that
427 discarding of a matching cookie is requested. */
429 cookie->discard_requested = 1;
433 else if (NAME_IS ("secure"))
435 /* ignore value completely */
440 /* Unrecognized attribute; ignore it. */
446 /* Returns true for characters that are legal in the name of an
447 attribute. This used to allow only alphanumerics, '-', and '_',
448 but we need to be more lenient because a number of sites wants to
449 use weirder attribute names. rfc2965 "informally specifies"
450 attribute name (token) as "a sequence of non-special, non-white
451 space characters". So we allow everything except the stuff we know
454 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
455 && (c) != '"' && (c) != '=' \
456 && (c) != ';' && (c) != ',')
458 /* Parse the contents of the `Set-Cookie' header. The header looks
461 name1=value1; name2=value2; ...
463 Trailing semicolon is optional; spaces are allowed between all
464 tokens. Additionally, values may be quoted.
466 A new cookie is returned upon success, NULL otherwise. The
467 specified CALLBACK function (normally `update_cookie_field' is used
468 to update the fields of the newly created cookie structure. */
470 static struct cookie *
471 parse_set_cookies (const char *sc,
472 bool (*callback) (struct cookie *,
473 const char *, const char *,
474 const char *, const char *),
477 struct cookie *cookie = cookie_new ();
479 /* #### Hand-written DFAs are no fun to debug. We'de be better off
480 to rewrite this as an inline parser. */
482 enum { S_START, S_NAME, S_NAME_POST,
483 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
484 S_ATTR_ACTION, S_DONE, S_ERROR
490 const char *name_b = NULL, *name_e = NULL;
491 const char *value_b = NULL, *value_e = NULL;
495 while (state != S_DONE && state != S_ERROR)
502 else if (ISSPACE (c))
503 /* Strip all whitespace preceding the name. */
505 else if (ATTR_NAME_CHAR (c))
511 /* empty attr name not allowed */
515 if (!c || c == ';' || c == '=' || ISSPACE (c))
520 else if (ATTR_NAME_CHAR (c))
528 value_b = value_e = NULL;
531 state = S_ATTR_ACTION;
538 else if (ISSPACE (c))
539 /* Ignore space and keep the state. */
547 value_b = value_e = p;
550 state = S_ATTR_ACTION;
556 state = S_QUOTED_VALUE;
558 else if (ISSPACE (c))
568 if (!c || c == ';' || ISSPACE (c))
571 state = S_VALUE_TRAILSPACE;
575 value_e = NULL; /* no trailing space */
584 state = S_VALUE_TRAILSPACE;
591 case S_VALUE_TRAILSPACE:
595 state = S_ATTR_ACTION;
598 state = S_ATTR_ACTION;
599 else if (ISSPACE (c))
606 bool legal = callback (cookie, name_b, name_e, value_b, value_e);
612 BOUNDED_TO_ALLOCA (name_b, name_e, name);
613 logprintf (LOG_NOTQUIET,
614 _("Error in Set-Cookie, field `%s'"),
625 /* handled by loop condition */
632 delete_cookie (cookie);
633 if (state != S_ERROR)
637 logprintf (LOG_NOTQUIET,
638 _("Syntax error in Set-Cookie: %s at position %d.\n"),
639 escnonprint (sc), (int) (p - sc));
643 /* Sanity checks. These are important, otherwise it is possible for
644 mailcious attackers to destroy important cookie information and/or
645 violate your privacy. */
648 #define REQUIRE_DIGITS(p) do { \
651 for (++p; ISDIGIT (*p); p++) \
655 #define REQUIRE_DOT(p) do { \
660 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
662 We don't want to call network functions like inet_addr() because
663 all we need is a check, preferrably one that is small, fast, and
667 numeric_address_p (const char *addr)
669 const char *p = addr;
671 REQUIRE_DIGITS (p); /* A */
672 REQUIRE_DOT (p); /* . */
673 REQUIRE_DIGITS (p); /* B */
674 REQUIRE_DOT (p); /* . */
675 REQUIRE_DIGITS (p); /* C */
676 REQUIRE_DOT (p); /* . */
677 REQUIRE_DIGITS (p); /* D */
684 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
685 Originally I tried to make the check compliant with rfc2109, but
686 the sites deviated too often, so I had to fall back to "tail
687 matching", as defined by the original Netscape's cookie spec. */
690 check_domain_match (const char *cookie_domain, const char *host)
694 /* Numeric address requires exact match. It also requires HOST to
696 if (numeric_address_p (cookie_domain))
697 return 0 == strcmp (cookie_domain, host);
701 /* For the sake of efficiency, check for exact match first. */
702 if (0 == strcasecmp (cookie_domain, host))
707 /* HOST must match the tail of cookie_domain. */
708 if (!match_tail (host, cookie_domain, true))
711 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
712 make sure that somebody is not trying to set the cookie for a
713 subdomain shared by many entities. For example, "company.co.uk"
714 must not be allowed to set a cookie for ".co.uk". On the other
715 hand, "sso.redhat.de" should be able to set a cookie for
718 The only marginally sane way to handle this I can think of is to
719 reject on the basis of the length of the second-level domain name
720 (but when the top-level domain is unknown), with the assumption
721 that those of three or less characters could be reserved. For
724 .co.org -> works because the TLD is known
725 .co.uk -> doesn't work because "co" is only two chars long
726 .com.au -> doesn't work because "com" is only 3 chars long
727 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
728 .cnn.de -> doesn't work for the same reason (ugh!!)
729 .abcd.de -> works because "abcd" is 4 chars long
730 .img.cnn.de -> works because it's not trying to set the 2nd level domain
731 .cnn.co.uk -> works for the same reason
733 That should prevent misuse, while allowing reasonable usage. If
734 someone knows of a better way to handle this, please let me
737 const char *p = cookie_domain;
738 int dccount = 1; /* number of domain components */
739 int ldcl = 0; /* last domain component length */
740 int nldcl = 0; /* next to last domain component length */
743 /* Ignore leading period in this calculation. */
746 for (out = 0; !out; p++)
754 /* Empty domain component found -- the domain is invalid. */
756 if (*(p + 1) == '\0')
758 /* Tolerate trailing '.' by not treating the domain as
759 one ending with an empty domain component. */
781 int known_toplevel = false;
782 static const char *known_toplevel_domains[] = {
783 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
785 for (i = 0; i < countof (known_toplevel_domains); i++)
786 if (match_tail (cookie_domain, known_toplevel_domains[i], true))
788 known_toplevel = true;
791 if (!known_toplevel && nldcl <= 3)
798 /* Don't allow the host "foobar.com" to set a cookie for domain
800 if (*cookie_domain != '.')
802 int dlen = strlen (cookie_domain);
803 int hlen = strlen (host);
804 /* cookie host: hostname.foobar.com */
805 /* desired domain: bar.com */
806 /* '.' must be here in host-> ^ */
807 if (hlen > dlen && host[hlen - dlen - 1] != '.')
816 static int path_matches (const char *, const char *);
818 /* Check whether PATH begins with COOKIE_PATH. */
821 check_path_match (const char *cookie_path, const char *path)
823 return path_matches (path, cookie_path) != 0;
826 /* Prepend '/' to string S. S is copied to fresh stack-allocated
827 space and its value is modified to point to the new location. */
829 #define PREPEND_SLASH(s) do { \
830 char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \
832 strcpy (PS_newstr + 1, s); \
837 /* Process the HTTP `Set-Cookie' header. This results in storing the
838 cookie or discarding a matching one, or ignoring it completely, all
839 depending on the contents. */
842 cookie_handle_set_cookie (struct cookie_jar *jar,
843 const char *host, int port,
844 const char *path, const char *set_cookie)
846 struct cookie *cookie;
847 cookies_now = time (NULL);
849 /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
850 usage assumes /-prefixed paths. Until the rest of Wget is fixed,
851 simply prepend slash to PATH. */
852 PREPEND_SLASH (path);
854 cookie = parse_set_cookies (set_cookie, update_cookie_field, false);
858 /* Sanitize parts of cookie. */
863 /* If the domain was not provided, we use the one we're talking
864 to, and set exact match. */
865 cookie->domain = xstrdup (host);
866 cookie->domain_exact = 1;
867 /* Set the port, but only if it's non-default. */
868 if (port != 80 && port != 443)
873 if (!check_domain_match (cookie->domain, host))
875 logprintf (LOG_NOTQUIET,
876 _("Cookie coming from %s attempted to set domain to %s\n"),
877 escnonprint (host), escnonprint (cookie->domain));
878 xfree (cookie->domain);
885 /* The cookie doesn't set path: set it to the URL path, sans the
886 file part ("/dir/file" truncated to "/dir/"). */
887 char *trailing_slash = strrchr (path, '/');
889 cookie->path = strdupdelim (path, trailing_slash + 1);
891 /* no slash in the string -- can this even happen? */
892 cookie->path = xstrdup (path);
896 /* The cookie sets its own path; verify that it is legal. */
897 if (!check_path_match (cookie->path, path))
899 DEBUGP (("Attempt to fake the path: %s, %s\n",
900 cookie->path, path));
905 /* Now store the cookie, or discard an existing cookie, if
906 discarding was requested. */
908 if (cookie->discard_requested)
910 discard_matching_cookie (jar, cookie);
914 store_cookie (jar, cookie);
919 delete_cookie (cookie);
922 /* Support for sending out cookies in HTTP requests, based on
923 previously stored cookies. Entry point is
924 `build_cookies_request'. */
926 /* Return a count of how many times CHR occurs in STRING. */
929 count_char (const char *string, char chr)
933 for (p = string; *p; p++)
939 /* Find the cookie chains whose domains match HOST and store them to
942 A cookie chain is the head of a list of cookies that belong to a
943 host/domain. Given HOST "img.search.xemacs.org", this function
944 will return the chains for "img.search.xemacs.org",
945 "search.xemacs.org", and "xemacs.org" -- those of them that exist
948 DEST should be large enough to accept (in the worst case) as many
949 elements as there are domain components of HOST. */
952 find_chains_of_host (struct cookie_jar *jar, const char *host,
953 struct cookie *dest[])
958 /* Bail out quickly if there are no cookies in the jar. */
959 if (!hash_table_count (jar->chains))
962 if (numeric_address_p (host))
963 /* If host is an IP address, only check for the exact match. */
966 /* Otherwise, check all the subdomains except the top-level (last)
967 one. As a domain with N components has N-1 dots, the number of
968 passes equals the number of dots. */
969 passes = count_char (host, '.');
973 /* Find chains that match HOST, starting with exact match and
974 progressing to less specific domains. For instance, given HOST
975 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
976 srk.fer.hr's, then fer.hr's. */
979 struct cookie *chain = hash_table_get (jar->chains, host);
981 dest[dest_count++] = chain;
982 if (++passcnt >= passes)
984 host = strchr (host, '.') + 1;
990 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
994 path_matches (const char *full_path, const char *prefix)
996 int len = strlen (prefix);
998 if (0 != strncmp (full_path, prefix, len))
999 /* FULL_PATH doesn't begin with PREFIX. */
1002 /* Length of PREFIX determines the quality of the match. */
1006 /* Return true iff COOKIE matches the provided parameters of the URL
1007 being downloaded: HOST, PORT, PATH, and SECFLAG.
1009 If PATH_GOODNESS is non-NULL, store the "path goodness" value
1010 there. That value is a measure of how closely COOKIE matches PATH,
1011 used for ordering cookies. */
1014 cookie_matches_url (const struct cookie *cookie,
1015 const char *host, int port, const char *path,
1016 bool secflag, int *path_goodness)
1020 if (cookie_expired_p (cookie))
1021 /* Ignore stale cookies. Don't bother unchaining the cookie at
1022 this point -- Wget is a relatively short-lived application, and
1023 stale cookies will not be saved by `save_cookies'. On the
1024 other hand, this function should be as efficient as
1028 if (cookie->secure && !secflag)
1029 /* Don't transmit secure cookies over insecure connections. */
1031 if (cookie->port != PORT_ANY && cookie->port != port)
1034 /* If exact domain match is required, verify that cookie's domain is
1035 equal to HOST. If not, assume success on the grounds of the
1036 cookie's chain having been found by find_chains_of_host. */
1037 if (cookie->domain_exact
1038 && 0 != strcasecmp (host, cookie->domain))
1041 pg = path_matches (path, cookie->path);
1046 /* If the caller requested path_goodness, we return it. This is
1047 an optimization, so that the caller doesn't need to call
1048 path_matches() again. */
1049 *path_goodness = pg;
1053 /* A structure that points to a cookie, along with the additional
1054 information about the cookie's "goodness". This allows us to sort
1055 the cookies when returning them to the server, as required by the
1058 struct weighed_cookie {
1059 struct cookie *cookie;
1060 int domain_goodness;
1064 /* Comparator used for uniquifying the list. */
1067 equality_comparator (const void *p1, const void *p2)
1069 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1070 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1072 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1073 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1075 /* We only really care whether both name and value are equal. We
1076 return them in this order only for consistency... */
1077 return namecmp ? namecmp : valuecmp;
1080 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1081 cookies with the same attr name and value. Whenever a duplicate
1082 pair is found, one of the cookies is removed. */
1085 eliminate_dups (struct weighed_cookie *outgoing, int count)
1087 struct weighed_cookie *h; /* hare */
1088 struct weighed_cookie *t; /* tortoise */
1089 struct weighed_cookie *end = outgoing + count;
1091 /* We deploy a simple uniquify algorithm: first sort the array
1092 according to our sort criteria, then copy it to itself, comparing
1093 each cookie to its neighbor and ignoring the duplicates. */
1095 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1097 /* "Hare" runs through all the entries in the array, followed by
1098 "tortoise". If a duplicate is found, the hare skips it.
1099 Non-duplicate entries are copied to the tortoise ptr. */
1101 for (h = t = outgoing; h < end; h++)
1105 struct cookie *c0 = h[0].cookie;
1106 struct cookie *c1 = h[1].cookie;
1107 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1108 continue; /* ignore the duplicate */
1111 /* If the hare has advanced past the tortoise (because of
1112 previous dups), make sure the values get copied. Otherwise,
1113 no copying is necessary. */
1119 return t - outgoing;
1122 /* Comparator used for sorting by quality. */
1125 goodness_comparator (const void *p1, const void *p2)
1127 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1128 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1130 /* Subtractions take `wc2' as the first argument becauase we want a
1131 sort in *decreasing* order of goodness. */
1132 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1133 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1135 /* Sort by domain goodness; if these are the same, sort by path
1136 goodness. (The sorting order isn't really specified; maybe it
1137 should be the other way around.) */
1138 return dgdiff ? dgdiff : pgdiff;
1141 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1142 requests PATH from the server. The resulting string is allocated
1143 with `malloc', and the caller is responsible for freeing it. If no
1144 cookies pertain to this request, i.e. no cookie header should be
1145 generated, NULL is returned. */
1148 cookie_header (struct cookie_jar *jar, const char *host,
1149 int port, const char *path, bool secflag)
1151 struct cookie **chains;
1154 struct cookie *cookie;
1155 struct weighed_cookie *outgoing;
1158 int result_size, pos;
1159 PREPEND_SLASH (path); /* see cookie_handle_set_cookie */
1161 /* First, find the cookie chains whose domains match HOST. */
1163 /* Allocate room for find_chains_of_host to write to. The number of
1164 chains can at most equal the number of subdomains, hence
1165 1+<number of dots>. */
1166 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
1167 chain_count = find_chains_of_host (jar, host, chains);
1169 /* No cookies for this host. */
1173 cookies_now = time (NULL);
1175 /* Now extract from the chains those cookies that match our host
1176 (for domain_exact cookies), port (for cookies with port other
1177 than PORT_ANY), etc. See matching_cookie for details. */
1179 /* Count the number of matching cookies. */
1181 for (i = 0; i < chain_count; i++)
1182 for (cookie = chains[i]; cookie; cookie = cookie->next)
1183 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1186 return NULL; /* no cookies matched */
1188 /* Allocate the array. */
1189 outgoing = alloca_array (struct weighed_cookie, count);
1191 /* Fill the array with all the matching cookies from the chains that
1194 for (i = 0; i < chain_count; i++)
1195 for (cookie = chains[i]; cookie; cookie = cookie->next)
1198 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1200 outgoing[ocnt].cookie = cookie;
1201 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1202 outgoing[ocnt].path_goodness = pg;
1205 assert (ocnt == count);
1207 /* Eliminate duplicate cookies; that is, those whose name and value
1209 count = eliminate_dups (outgoing, count);
1211 /* Sort the array so that best-matching domains come first, and
1212 that, within one domain, best-matching paths come first. */
1213 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1215 /* Count the space the name=value pairs will take. */
1217 for (i = 0; i < count; i++)
1219 struct cookie *c = outgoing[i].cookie;
1221 result_size += strlen (c->attr) + 1 + strlen (c->value);
1224 /* Allocate output buffer:
1225 name=value pairs -- result_size
1226 "; " separators -- (count - 1) * 2
1227 \0 terminator -- 1 */
1228 result_size = result_size + (count - 1) * 2 + 1;
1229 result = xmalloc (result_size);
1231 for (i = 0; i < count; i++)
1233 struct cookie *c = outgoing[i].cookie;
1234 int namlen = strlen (c->attr);
1235 int vallen = strlen (c->value);
1237 memcpy (result + pos, c->attr, namlen);
1239 result[pos++] = '=';
1240 memcpy (result + pos, c->value, vallen);
1244 result[pos++] = ';';
1245 result[pos++] = ' ';
1248 result[pos++] = '\0';
1249 assert (pos == result_size);
1253 /* Support for loading and saving cookies. The format used for
1254 loading and saving should be the format of the `cookies.txt' file
1255 used by Netscape and Mozilla, at least the Unix versions.
1256 (Apparently IE can export cookies in that format as well.) The
1257 format goes like this:
1259 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1261 DOMAIN -- cookie domain, optionally followed by :PORT
1262 DOMAIN-FLAG -- whether all hosts in the domain match
1264 SECURE-FLAG -- whether cookie requires secure connection
1265 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1266 ATTR-NAME -- name of the cookie attribute
1267 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1269 The fields are separated by TABs. All fields are mandatory, except
1270 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1271 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1272 whitespace only, and comment lines (beginning with # optionally
1273 preceded by whitespace) are ignored.
1275 Example line from cookies.txt (split in two lines for readability):
1277 .google.com TRUE / FALSE 2147368447 \
1278 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1282 /* If the region [B, E) ends with :<digits>, parse the number, return
1283 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1284 If port is not specified, return 0. */
1287 domain_port (const char *domain_b, const char *domain_e,
1288 const char **domain_e_ptr)
1292 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1295 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1296 port = 10 * port + (*p - '0');
1298 /* Garbage following port number. */
1300 *domain_e_ptr = colon;
1304 #define GET_WORD(p, b, e) do { \
1306 while (*p && *p != '\t') \
1309 if (b == e || !*p) \
1314 /* Load cookies from FILE. */
1317 cookie_jar_load (struct cookie_jar *jar, const char *file)
1320 FILE *fp = fopen (file, "r");
1323 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1324 file, strerror (errno));
1327 cookies_now = time (NULL);
1329 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1331 struct cookie *cookie;
1337 char *domain_b = NULL, *domain_e = NULL;
1338 char *domflag_b = NULL, *domflag_e = NULL;
1339 char *path_b = NULL, *path_e = NULL;
1340 char *secure_b = NULL, *secure_e = NULL;
1341 char *expires_b = NULL, *expires_e = NULL;
1342 char *name_b = NULL, *name_e = NULL;
1343 char *value_b = NULL, *value_e = NULL;
1345 /* Skip leading white-space. */
1346 while (*p && ISSPACE (*p))
1348 /* Ignore empty lines. */
1349 if (!*p || *p == '#')
1352 GET_WORD (p, domain_b, domain_e);
1353 GET_WORD (p, domflag_b, domflag_e);
1354 GET_WORD (p, path_b, path_e);
1355 GET_WORD (p, secure_b, secure_e);
1356 GET_WORD (p, expires_b, expires_e);
1357 GET_WORD (p, name_b, name_e);
1359 /* Don't use GET_WORD for value because it ends with newline,
1362 value_e = p + strlen (p);
1363 if (value_e > value_b && value_e[-1] == '\n')
1365 if (value_e > value_b && value_e[-1] == '\r')
1367 /* Empty values are legal (I think), so don't bother checking. */
1369 cookie = cookie_new ();
1371 cookie->attr = strdupdelim (name_b, name_e);
1372 cookie->value = strdupdelim (value_b, value_e);
1373 cookie->path = strdupdelim (path_b, path_e);
1374 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1376 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1377 value indicating if all machines within a given domain can
1378 access the variable. This value is set automatically by the
1379 browser, depending on the value set for the domain." */
1380 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1382 /* DOMAIN needs special treatment because we might need to
1383 extract the port. */
1384 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1386 cookie->port = port;
1388 if (*domain_b == '.')
1389 ++domain_b; /* remove leading dot internally */
1390 cookie->domain = strdupdelim (domain_b, domain_e);
1392 /* safe default in case EXPIRES field is garbled. */
1393 expiry = (double)cookies_now - 1;
1395 /* I don't like changing the line, but it's safe here. (line is
1398 sscanf (expires_b, "%lf", &expiry);
1402 /* EXPIRY can be 0 for session cookies saved because the
1403 user specified `--keep-session-cookies' in the past.
1404 They remain session cookies, and will be saved only if
1405 the user has specified `keep-session-cookies' again. */
1409 if (expiry < cookies_now)
1410 goto abort_cookie; /* ignore stale cookie. */
1411 cookie->expiry_time = expiry;
1412 cookie->permanent = 1;
1415 store_cookie (jar, cookie);
1421 delete_cookie (cookie);
1426 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1427 to the head in a chain of cookies. The function prints the entire
1431 save_cookies_mapper (void *key, void *value, void *arg)
1433 FILE *fp = (FILE *)arg;
1434 char *domain = (char *)key;
1435 struct cookie *cookie = (struct cookie *)value;
1436 for (; cookie; cookie = cookie->next)
1438 if (!cookie->permanent && !opt.keep_session_cookies)
1440 if (cookie_expired_p (cookie))
1442 if (!cookie->domain_exact)
1445 if (cookie->port != PORT_ANY)
1446 fprintf (fp, ":%d", cookie->port);
1447 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1448 cookie->domain_exact ? "FALSE" : "TRUE",
1449 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1450 (double)cookie->expiry_time,
1451 cookie->attr, cookie->value);
1453 return 1; /* stop mapping */
1458 /* Save cookies, in format described above, to FILE. */
1461 cookie_jar_save (struct cookie_jar *jar, const char *file)
1465 DEBUGP (("Saving cookies to %s.\n", file));
1467 cookies_now = time (NULL);
1469 fp = fopen (file, "w");
1472 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1473 file, strerror (errno));
1477 fputs ("# HTTP cookie file.\n", fp);
1478 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (&cookies_now));
1479 fputs ("# Edit at your own risk.\n\n", fp);
1481 hash_table_map (jar->chains, save_cookies_mapper, fp);
1484 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1485 file, strerror (errno));
1486 if (fclose (fp) < 0)
1487 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1488 file, strerror (errno));
1490 DEBUGP (("Done saving cookies.\n"));
1493 /* Destroy all the elements in the chain and unhook it from the cookie
1494 jar. This is written in the form of a callback to hash_table_map
1495 and used by cookie_jar_delete to delete all the cookies in a
1499 nuke_cookie_chain (void *value, void *key, void *arg)
1501 char *chain_key = (char *)value;
1502 struct cookie *chain = (struct cookie *)key;
1503 struct cookie_jar *jar = (struct cookie_jar *)arg;
1505 /* Remove the chain from the table and free the key. */
1506 hash_table_remove (jar->chains, chain_key);
1509 /* Then delete all the cookies in the chain. */
1512 struct cookie *next = chain->next;
1513 delete_cookie (chain);
1521 /* Clean up cookie-related data. */
1524 cookie_jar_delete (struct cookie_jar *jar)
1526 hash_table_map (jar->chains, nuke_cookie_chain, jar);
1527 hash_table_destroy (jar->chains);
1531 /* Test cases. Currently this is only tests parse_set_cookies. To
1532 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1537 char *test_results[10];
1539 static bool test_parse_cookies_callback (struct cookie *ignored,
1540 const char *nb, const char *ne,
1541 const char *vb, const char *ve)
1543 test_results[test_count++] = strdupdelim (nb, ne);
1544 test_results[test_count++] = strdupdelim (vb, ve);
1551 /* Tests expected to succeed: */
1557 { "arg=value", {"arg", "value", NULL} },
1558 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1559 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1560 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1561 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1562 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1563 { "arg=", {"arg", "", NULL} },
1564 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1565 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1568 /* Tests expected to fail: */
1569 static char *tests_fail[] = {
1571 "arg=\"unterminated",
1573 "arg1=;=another-empty-name",
1577 for (i = 0; i < countof (tests_succ); i++)
1580 char *data = tests_succ[i].data;
1581 char **expected = tests_succ[i].results;
1585 c = parse_set_cookies (data, test_parse_cookies_callback, true);
1588 printf ("NULL cookie returned for valid data: %s\n", data);
1592 for (ind = 0; ind < test_count; ind += 2)
1596 if (0 != strcmp (expected[ind], test_results[ind]))
1597 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1598 ind / 2 + 1, data, expected[ind], test_results[ind]);
1599 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1600 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1601 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1603 if (ind < test_count || expected[ind])
1604 printf ("Unmatched number of results: %s\n", data);
1607 for (i = 0; i < countof (tests_fail); i++)
1610 char *data = tests_fail[i];
1612 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1614 printf ("Failed to report error on invalid data: %s\n", data);
1617 #endif /* TEST_COOKIES */