1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by the
31 cookie patch submitted by Tomasz Wegrzanowski.
33 This implements the client-side cookie support, as specified
34 (loosely) by Netscape's "preliminary specification", currently
37 http://wp.netscape.com/newsref/std/cookie_spec.html
39 rfc2109 is not supported because of its incompatibilities with the
40 above widely-used specification. rfc2965 is entirely ignored,
41 since popular client software doesn't implement it, and even the
42 sites that do send Set-Cookie2 also emit Set-Cookie for
59 /* This should *really* be in a .h file! */
60 time_t http_atotm (const char *);
62 /* Declarations of `struct cookie' and the most basic functions. */
64 /* Cookie jar serves as cookie storage and a means of retrieving
65 cookies efficiently. All cookies with the same domain are stored
66 in a linked list called "chain". A cookie chain can be reached by
67 looking up the domain in the cookie jar's chains_by_domain table.
69 For example, to reach all the cookies under google.com, one must
70 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
71 course, when sending a cookie to `www.google.com', one must search
72 for cookies that belong to either `www.google.com' or `google.com'
73 -- but the point is that the code doesn't need to go through *all*
77 /* Cookie chains indexed by domain. */
78 struct hash_table *chains;
80 int cookie_count; /* number of cookies in the jar. */
83 /* Value set by entry point functions, so that the low-level
84 routines don't need to call time() all the time. */
90 struct cookie_jar *jar = xnew (struct cookie_jar);
91 jar->chains = make_nocase_string_hash_table (0);
92 jar->cookie_count = 0;
97 char *domain; /* domain of the cookie */
98 int port; /* port number */
99 char *path; /* path prefix of the cookie */
101 unsigned discard_requested :1; /* whether cookie was created to
102 request discarding another
105 unsigned secure :1; /* whether cookie should be
106 transmitted over non-https
108 unsigned domain_exact :1; /* whether DOMAIN must match as a
111 int permanent :1; /* whether the cookie should outlive
113 time_t expiry_time; /* time when the cookie expires, 0
114 means undetermined. */
116 char *attr; /* cookie attribute name */
117 char *value; /* cookie attribute value */
119 struct cookie *next; /* used for chaining of cookies in the
123 #define PORT_ANY (-1)
125 /* Allocate and return a new, empty cookie structure. */
127 static struct cookie *
130 struct cookie *cookie = xnew0 (struct cookie);
132 /* Both cookie->permanent and cookie->expiry_time are now 0. This
133 means that the cookie doesn't expire, but is only valid for this
134 session (i.e. not written out to disk). */
136 cookie->port = PORT_ANY;
140 /* Non-zero if the cookie has expired. Assumes cookies_now has been
141 set by one of the entry point functions. */
144 cookie_expired_p (const struct cookie *c)
146 return c->expiry_time != 0 && c->expiry_time < cookies_now;
149 /* Deallocate COOKIE and its components. */
152 delete_cookie (struct cookie *cookie)
154 xfree_null (cookie->domain);
155 xfree_null (cookie->path);
156 xfree_null (cookie->attr);
157 xfree_null (cookie->value);
161 /* Functions for storing cookies.
163 All cookies can be reached beginning with jar->chains. The key in
164 that table is the domain name, and the value is a linked list of
165 all cookies from that domain. Every new cookie is placed on the
168 /* Find and return a cookie in JAR whose domain, path, and attribute
169 name correspond to COOKIE. If found, PREVPTR will point to the
170 location of the cookie previous in chain, or NULL if the found
171 cookie is the head of a chain.
173 If no matching cookie is found, return NULL. */
175 static struct cookie *
176 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
177 struct cookie **prevptr)
179 struct cookie *chain, *prev;
181 chain = hash_table_get (jar->chains, cookie->domain);
186 for (; chain; prev = chain, chain = chain->next)
187 if (0 == strcmp (cookie->path, chain->path)
188 && 0 == strcmp (cookie->attr, chain->attr)
189 && cookie->port == chain->port)
200 /* Store COOKIE to the jar.
202 This is done by placing COOKIE at the head of its chain. However,
203 if COOKIE matches a cookie already in memory, as determined by
204 find_matching_cookie, the old cookie is unlinked and destroyed.
206 The key of each chain's hash table entry is allocated only the
207 first time; next hash_table_put's reuse the same key. */
210 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
212 struct cookie *chain_head;
215 if (hash_table_get_pair (jar->chains, cookie->domain,
216 &chain_key, &chain_head))
218 /* A chain of cookies in this domain already exists. Check for
219 duplicates -- if an extant cookie exactly matches our domain,
220 port, path, and name, replace it. */
222 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
226 /* Remove VICTIM from the chain. COOKIE will be placed at
230 prev->next = victim->next;
231 cookie->next = chain_head;
235 /* prev is NULL; apparently VICTIM was at the head of
236 the chain. This place will be taken by COOKIE, so
237 all we need to do is: */
238 cookie->next = victim->next;
240 delete_cookie (victim);
242 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
245 cookie->next = chain_head;
249 /* We are now creating the chain. Use a copy of cookie->domain
250 as the key for the life-time of the chain. Using
251 cookie->domain would be unsafe because the life-time of the
252 chain may exceed the life-time of the cookie. (Cookies may
253 be deleted from the chain by this very function.) */
255 chain_key = xstrdup (cookie->domain);
258 hash_table_put (jar->chains, chain_key, cookie);
263 time_t exptime = cookie->expiry_time;
264 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
265 cookie->domain, cookie->port,
266 cookie->port == PORT_ANY ? " (ANY)" : "",
268 cookie->permanent ? "permanent" : "session",
269 cookie->secure ? "secure" : "insecure",
270 cookie->expiry_time ? datetime_str (&exptime) : "none",
271 cookie->attr, cookie->value));
275 /* Discard a cookie matching COOKIE's domain, port, path, and
276 attribute name. This gets called when we encounter a cookie whose
277 expiry date is in the past, or whose max-age is set to 0. The
278 former corresponds to netscape cookie spec, while the latter is
279 specified by rfc2109. */
282 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
284 struct cookie *prev, *victim;
286 if (!hash_table_count (jar->chains))
287 /* No elements == nothing to discard. */
290 victim = find_matching_cookie (jar, cookie, &prev);
294 /* Simply unchain the victim. */
295 prev->next = victim->next;
298 /* VICTIM was head of its chain. We need to place a new
299 cookie at the head. */
300 char *chain_key = NULL;
303 res = hash_table_get_pair (jar->chains, victim->domain,
308 /* VICTIM was the only cookie in the chain. Destroy the
309 chain and deallocate the chain key. */
310 hash_table_remove (jar->chains, victim->domain);
314 hash_table_put (jar->chains, chain_key, victim->next);
316 delete_cookie (victim);
317 DEBUGP (("Discarded old cookie.\n"));
321 /* Functions for parsing the `Set-Cookie' header, and creating new
322 cookies from the wire. */
324 #define NAME_IS(string_literal) \
325 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
327 #define VALUE_EXISTS (value_b && value_e)
329 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
331 /* Update the appropriate cookie field. [name_b, name_e) are expected
332 to delimit the attribute name, while [value_b, value_e) (optional)
333 should delimit the attribute value.
335 When called the first time, it will set the cookie's attribute name
336 and value. After that, it will check the attribute name for
337 special fields such as `domain', `path', etc. Where appropriate,
338 it will parse the values of the fields it recognizes and fill the
339 corresponding fields in COOKIE.
341 Returns 1 on success. Returns zero in case a syntax error is
342 found; such a cookie should be discarded. */
345 update_cookie_field (struct cookie *cookie,
346 const char *name_b, const char *name_e,
347 const char *value_b, const char *value_e)
349 assert (name_b != NULL && name_e != NULL);
355 cookie->attr = strdupdelim (name_b, name_e);
356 cookie->value = strdupdelim (value_b, value_e);
360 if (NAME_IS ("domain"))
362 if (!VALUE_NON_EMPTY)
364 xfree_null (cookie->domain);
365 /* Strictly speaking, we should set cookie->domain_exact if the
366 domain doesn't begin with a dot. But many sites set the
367 domain to "foo.com" and expect "subhost.foo.com" to get the
368 cookie, and it apparently works. */
371 cookie->domain = strdupdelim (value_b, value_e);
374 else if (NAME_IS ("path"))
376 if (!VALUE_NON_EMPTY)
378 xfree_null (cookie->path);
379 cookie->path = strdupdelim (value_b, value_e);
382 else if (NAME_IS ("expires"))
387 if (!VALUE_NON_EMPTY)
389 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
391 expires = http_atotm (value_copy);
392 if (expires != (time_t) -1)
394 cookie->permanent = 1;
395 cookie->expiry_time = expires;
398 /* Error in expiration spec. Assume default (cookie doesn't
399 expire, but valid only for this session.) */
402 /* According to netscape's specification, expiry time in the
403 past means that discarding of a matching cookie is
405 if (cookie->expiry_time < cookies_now)
406 cookie->discard_requested = 1;
410 else if (NAME_IS ("max-age"))
415 if (!VALUE_NON_EMPTY)
417 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
419 sscanf (value_copy, "%lf", &maxage);
421 /* something went wrong. */
423 cookie->permanent = 1;
424 cookie->expiry_time = cookies_now + maxage;
426 /* According to rfc2109, a cookie with max-age of 0 means that
427 discarding of a matching cookie is requested. */
429 cookie->discard_requested = 1;
433 else if (NAME_IS ("secure"))
435 /* ignore value completely */
440 /* Unrecognized attribute; ignore it. */
446 /* Returns non-zero for characters that are legal in the name of an
447 attribute. This used to allow only alphanumerics, '-', and '_',
448 but we need to be more lenient because a number of sites wants to
449 use weirder attribute names. rfc2965 "informally specifies"
450 attribute name (token) as "a sequence of non-special, non-white
451 space characters". So we allow everything except the stuff we know
454 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
455 && (c) != '"' && (c) != '=' \
456 && (c) != ';' && (c) != ',')
458 /* Parse the contents of the `Set-Cookie' header. The header looks
461 name1=value1; name2=value2; ...
463 Trailing semicolon is optional; spaces are allowed between all
464 tokens. Additionally, values may be quoted.
466 A new cookie is returned upon success, NULL otherwise. The
467 specified CALLBACK function (normally `update_cookie_field' is used
468 to update the fields of the newly created cookie structure. */
470 static struct cookie *
471 parse_set_cookies (const char *sc,
472 int (*callback) (struct cookie *,
473 const char *, const char *,
474 const char *, const char *),
477 struct cookie *cookie = cookie_new ();
479 /* #### Hand-written DFAs are no fun to debug. We'de be better off
480 to rewrite this as an inline parser. */
482 enum { S_START, S_NAME, S_NAME_POST,
483 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
484 S_ATTR_ACTION, S_DONE, S_ERROR
490 const char *name_b = NULL, *name_e = NULL;
491 const char *value_b = NULL, *value_e = NULL;
495 while (state != S_DONE && state != S_ERROR)
502 else if (ISSPACE (c))
503 /* Strip all whitespace preceding the name. */
505 else if (ATTR_NAME_CHAR (c))
511 /* empty attr name not allowed */
515 if (!c || c == ';' || c == '=' || ISSPACE (c))
520 else if (ATTR_NAME_CHAR (c))
528 value_b = value_e = NULL;
531 state = S_ATTR_ACTION;
538 else if (ISSPACE (c))
539 /* Ignore space and keep the state. */
547 value_b = value_e = p;
550 state = S_ATTR_ACTION;
556 state = S_QUOTED_VALUE;
558 else if (ISSPACE (c))
568 if (!c || c == ';' || ISSPACE (c))
571 state = S_VALUE_TRAILSPACE;
575 value_e = NULL; /* no trailing space */
584 state = S_VALUE_TRAILSPACE;
591 case S_VALUE_TRAILSPACE:
595 state = S_ATTR_ACTION;
598 state = S_ATTR_ACTION;
599 else if (ISSPACE (c))
606 int legal = callback (cookie, name_b, name_e, value_b, value_e);
612 BOUNDED_TO_ALLOCA (name_b, name_e, name);
613 logprintf (LOG_NOTQUIET,
614 _("Error in Set-Cookie, field `%s'"),
625 /* handled by loop condition */
632 delete_cookie (cookie);
633 if (state != S_ERROR)
637 logprintf (LOG_NOTQUIET,
638 _("Syntax error in Set-Cookie: %s at position %d.\n"),
639 escnonprint (sc), p - sc);
643 /* Sanity checks. These are important, otherwise it is possible for
644 mailcious attackers to destroy important cookie information and/or
645 violate your privacy. */
648 #define REQUIRE_DIGITS(p) do { \
651 for (++p; ISDIGIT (*p); p++) \
655 #define REQUIRE_DOT(p) do { \
660 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
662 We don't want to call network functions like inet_addr() because
663 all we need is a check, preferrably one that is small, fast, and
667 numeric_address_p (const char *addr)
669 const char *p = addr;
671 REQUIRE_DIGITS (p); /* A */
672 REQUIRE_DOT (p); /* . */
673 REQUIRE_DIGITS (p); /* B */
674 REQUIRE_DOT (p); /* . */
675 REQUIRE_DIGITS (p); /* C */
676 REQUIRE_DOT (p); /* . */
677 REQUIRE_DIGITS (p); /* D */
684 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
685 Originally I tried to make the check compliant with rfc2109, but
686 the sites deviated too often, so I had to fall back to "tail
687 matching", as defined by the original Netscape's cookie spec. */
690 check_domain_match (const char *cookie_domain, const char *host)
694 /* Numeric address requires exact match. It also requires HOST to
696 if (numeric_address_p (cookie_domain))
697 return 0 == strcmp (cookie_domain, host);
701 /* For the sake of efficiency, check for exact match first. */
702 if (0 == strcasecmp (cookie_domain, host))
707 /* HOST must match the tail of cookie_domain. */
708 if (!match_tail (host, cookie_domain, 1))
711 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
712 make sure that somebody is not trying to set the cookie for a
713 subdomain shared by many entities. For example, "company.co.uk"
714 must not be allowed to set a cookie for ".co.uk". On the other
715 hand, "sso.redhat.de" should be able to set a cookie for
718 The only marginally sane way to handle this I can think of is to
719 reject on the basis of the length of the second-level domain name
720 (but when the top-level domain is unknown), with the assumption
721 that those of three or less characters could be reserved. For
724 .co.org -> works because the TLD is known
725 .co.uk -> doesn't work because "co" is only two chars long
726 .com.au -> doesn't work because "com" is only 3 chars long
727 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
728 .cnn.de -> doesn't work for the same reason (ugh!!)
729 .abcd.de -> works because "abcd" is 4 chars long
730 .img.cnn.de -> works because it's not trying to set the 2nd level domain
731 .cnn.co.uk -> works for the same reason
733 That should prevent misuse, while allowing reasonable usage. If
734 someone knows of a better way to handle this, please let me
737 const char *p = cookie_domain;
738 int dccount = 1; /* number of domain components */
739 int ldcl = 0; /* last domain component length */
740 int nldcl = 0; /* next to last domain component length */
743 /* Ignore leading period in this calculation. */
746 for (out = 0; !out; p++)
754 /* Empty domain component found -- the domain is invalid. */
756 if (*(p + 1) == '\0')
758 /* Tolerate trailing '.' by not treating the domain as
759 one ending with an empty domain component. */
781 int known_toplevel = 0;
782 static const char *known_toplevel_domains[] = {
783 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
785 for (i = 0; i < countof (known_toplevel_domains); i++)
786 if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
791 if (!known_toplevel && nldcl <= 3)
798 /* Don't allow the host "foobar.com" to set a cookie for domain
800 if (*cookie_domain != '.')
802 int dlen = strlen (cookie_domain);
803 int hlen = strlen (host);
804 /* cookie host: hostname.foobar.com */
805 /* desired domain: bar.com */
806 /* '.' must be here in host-> ^ */
807 if (hlen > dlen && host[hlen - dlen - 1] != '.')
816 static int path_matches (const char *, const char *);
818 /* Check whether PATH begins with COOKIE_PATH. */
821 check_path_match (const char *cookie_path, const char *path)
823 return path_matches (path, cookie_path);
826 /* Process the HTTP `Set-Cookie' header. This results in storing the
827 cookie or discarding a matching one, or ignoring it completely, all
828 depending on the contents. */
831 cookie_handle_set_cookie (struct cookie_jar *jar,
832 const char *host, int port,
833 const char *path, const char *set_cookie)
835 struct cookie *cookie;
836 cookies_now = time (NULL);
838 cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
842 /* Sanitize parts of cookie. */
847 /* If the domain was not provided, we use the one we're talking
848 to, and set exact match. */
849 cookie->domain = xstrdup (host);
850 cookie->domain_exact = 1;
851 /* Set the port, but only if it's non-default. */
852 if (port != 80 && port != 443)
857 if (!check_domain_match (cookie->domain, host))
859 logprintf (LOG_NOTQUIET,
860 _("Cookie coming from %s attempted to set domain to %s\n"),
861 escnonprint (host), escnonprint (cookie->domain));
862 xfree (cookie->domain);
869 /* The cookie doesn't set path: set it to the URL path, sans the
870 file part ("/dir/file" truncated to "/dir/"). */
871 char *trailing_slash = strrchr (path, '/');
873 cookie->path = strdupdelim (path, trailing_slash + 1);
875 /* no slash in the string -- can this even happen? */
876 cookie->path = xstrdup (path);
880 /* The cookie sets its own path; verify that it is legal. */
881 if (!check_path_match (cookie->path, path))
883 DEBUGP (("Attempt to fake the path: %s, %s\n",
884 cookie->path, path));
889 /* Now store the cookie, or discard an existing cookie, if
890 discarding was requested. */
892 if (cookie->discard_requested)
894 discard_matching_cookie (jar, cookie);
898 store_cookie (jar, cookie);
903 delete_cookie (cookie);
906 /* Support for sending out cookies in HTTP requests, based on
907 previously stored cookies. Entry point is
908 `build_cookies_request'. */
910 /* Return a count of how many times CHR occurs in STRING. */
913 count_char (const char *string, char chr)
917 for (p = string; *p; p++)
923 /* Find the cookie chains whose domains match HOST and store them to
926 A cookie chain is the head of a list of cookies that belong to a
927 host/domain. Given HOST "img.search.xemacs.org", this function
928 will return the chains for "img.search.xemacs.org",
929 "search.xemacs.org", and "xemacs.org" -- those of them that exist
932 DEST should be large enough to accept (in the worst case) as many
933 elements as there are domain components of HOST. */
936 find_chains_of_host (struct cookie_jar *jar, const char *host,
937 struct cookie *dest[])
942 /* Bail out quickly if there are no cookies in the jar. */
943 if (!hash_table_count (jar->chains))
946 if (numeric_address_p (host))
947 /* If host is an IP address, only check for the exact match. */
950 /* Otherwise, check all the subdomains except the top-level (last)
951 one. As a domain with N components has N-1 dots, the number of
952 passes equals the number of dots. */
953 passes = count_char (host, '.');
957 /* Find chains that match HOST, starting with exact match and
958 progressing to less specific domains. For instance, given HOST
959 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
960 srk.fer.hr's, then fer.hr's. */
963 struct cookie *chain = hash_table_get (jar->chains, host);
965 dest[dest_count++] = chain;
966 if (++passcnt >= passes)
968 host = strchr (host, '.') + 1;
974 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
978 path_matches (const char *full_path, const char *prefix)
983 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
984 as a mere separator, inspired by rfc1808), but the '/' is
985 assumed when matching against the cookie stuff. */
989 len = strlen (prefix);
991 if (0 != strncmp (full_path, prefix, len))
992 /* FULL_PATH doesn't begin with PREFIX. */
995 /* Length of PREFIX determines the quality of the match. */
999 /* Return non-zero iff COOKIE matches the provided parameters of the
1000 URL being downloaded: HOST, PORT, PATH, and SECFLAG.
1002 If PATH_GOODNESS is non-NULL, store the "path goodness" value
1003 there. That value is a measure of how closely COOKIE matches PATH,
1004 used for ordering cookies. */
1007 cookie_matches_url (const struct cookie *cookie,
1008 const char *host, int port, const char *path,
1009 int secflag, int *path_goodness)
1013 if (cookie_expired_p (cookie))
1014 /* Ignore stale cookies. Don't bother unchaining the cookie at
1015 this point -- Wget is a relatively short-lived application, and
1016 stale cookies will not be saved by `save_cookies'. On the
1017 other hand, this function should be as efficient as
1021 if (cookie->secure && !secflag)
1022 /* Don't transmit secure cookies over insecure connections. */
1024 if (cookie->port != PORT_ANY && cookie->port != port)
1027 /* If exact domain match is required, verify that cookie's domain is
1028 equal to HOST. If not, assume success on the grounds of the
1029 cookie's chain having been found by find_chains_of_host. */
1030 if (cookie->domain_exact
1031 && 0 != strcasecmp (host, cookie->domain))
1034 pg = path_matches (path, cookie->path);
1039 /* If the caller requested path_goodness, we return it. This is
1040 an optimization, so that the caller doesn't need to call
1041 path_matches() again. */
1042 *path_goodness = pg;
1046 /* A structure that points to a cookie, along with the additional
1047 information about the cookie's "goodness". This allows us to sort
1048 the cookies when returning them to the server, as required by the
1051 struct weighed_cookie {
1052 struct cookie *cookie;
1053 int domain_goodness;
1057 /* Comparator used for uniquifying the list. */
1060 equality_comparator (const void *p1, const void *p2)
1062 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1063 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1065 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1066 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1068 /* We only really care whether both name and value are equal. We
1069 return them in this order only for consistency... */
1070 return namecmp ? namecmp : valuecmp;
1073 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1074 cookies with the same attr name and value. Whenever a duplicate
1075 pair is found, one of the cookies is removed. */
1078 eliminate_dups (struct weighed_cookie *outgoing, int count)
1080 struct weighed_cookie *h; /* hare */
1081 struct weighed_cookie *t; /* tortoise */
1082 struct weighed_cookie *end = outgoing + count;
1084 /* We deploy a simple uniquify algorithm: first sort the array
1085 according to our sort criteria, then copy it to itself, comparing
1086 each cookie to its neighbor and ignoring the duplicates. */
1088 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1090 /* "Hare" runs through all the entries in the array, followed by
1091 "tortoise". If a duplicate is found, the hare skips it.
1092 Non-duplicate entries are copied to the tortoise ptr. */
1094 for (h = t = outgoing; h < end; h++)
1098 struct cookie *c0 = h[0].cookie;
1099 struct cookie *c1 = h[1].cookie;
1100 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1101 continue; /* ignore the duplicate */
1104 /* If the hare has advanced past the tortoise (because of
1105 previous dups), make sure the values get copied. Otherwise,
1106 no copying is necessary. */
1112 return t - outgoing;
1115 /* Comparator used for sorting by quality. */
1118 goodness_comparator (const void *p1, const void *p2)
1120 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1121 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1123 /* Subtractions take `wc2' as the first argument becauase we want a
1124 sort in *decreasing* order of goodness. */
1125 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1126 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1128 /* Sort by domain goodness; if these are the same, sort by path
1129 goodness. (The sorting order isn't really specified; maybe it
1130 should be the other way around.) */
1131 return dgdiff ? dgdiff : pgdiff;
1134 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1135 requests PATH from the server. The resulting string is allocated
1136 with `malloc', and the caller is responsible for freeing it. If no
1137 cookies pertain to this request, i.e. no cookie header should be
1138 generated, NULL is returned. */
1141 cookie_header (struct cookie_jar *jar, const char *host,
1142 int port, const char *path, int secflag)
1144 struct cookie **chains;
1147 struct cookie *cookie;
1148 struct weighed_cookie *outgoing;
1151 int result_size, pos;
1153 /* First, find the cookie chains whose domains match HOST. */
1155 /* Allocate room for find_chains_of_host to write to. The number of
1156 chains can at most equal the number of subdomains, hence
1157 1+<number of dots>. */
1158 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
1159 chain_count = find_chains_of_host (jar, host, chains);
1161 /* No cookies for this host. */
1165 cookies_now = time (NULL);
1167 /* Now extract from the chains those cookies that match our host
1168 (for domain_exact cookies), port (for cookies with port other
1169 than PORT_ANY), etc. See matching_cookie for details. */
1171 /* Count the number of matching cookies. */
1173 for (i = 0; i < chain_count; i++)
1174 for (cookie = chains[i]; cookie; cookie = cookie->next)
1175 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1178 return NULL; /* no cookies matched */
1180 /* Allocate the array. */
1181 outgoing = alloca_array (struct weighed_cookie, count);
1183 /* Fill the array with all the matching cookies from the chains that
1186 for (i = 0; i < chain_count; i++)
1187 for (cookie = chains[i]; cookie; cookie = cookie->next)
1190 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1192 outgoing[ocnt].cookie = cookie;
1193 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1194 outgoing[ocnt].path_goodness = pg;
1197 assert (ocnt == count);
1199 /* Eliminate duplicate cookies; that is, those whose name and value
1201 count = eliminate_dups (outgoing, count);
1203 /* Sort the array so that best-matching domains come first, and
1204 that, within one domain, best-matching paths come first. */
1205 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1207 /* Count the space the name=value pairs will take. */
1209 for (i = 0; i < count; i++)
1211 struct cookie *c = outgoing[i].cookie;
1213 result_size += strlen (c->attr) + 1 + strlen (c->value);
1216 /* Allocate output buffer:
1217 name=value pairs -- result_size
1218 "; " separators -- (count - 1) * 2
1219 \0 terminator -- 1 */
1220 result_size = result_size + (count - 1) * 2 + 1;
1221 result = xmalloc (result_size);
1223 for (i = 0; i < count; i++)
1225 struct cookie *c = outgoing[i].cookie;
1226 int namlen = strlen (c->attr);
1227 int vallen = strlen (c->value);
1229 memcpy (result + pos, c->attr, namlen);
1231 result[pos++] = '=';
1232 memcpy (result + pos, c->value, vallen);
1236 result[pos++] = ';';
1237 result[pos++] = ' ';
1240 result[pos++] = '\0';
1241 assert (pos == result_size);
1245 /* Support for loading and saving cookies. The format used for
1246 loading and saving should be the format of the `cookies.txt' file
1247 used by Netscape and Mozilla, at least the Unix versions.
1248 (Apparently IE can export cookies in that format as well.) The
1249 format goes like this:
1251 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1253 DOMAIN -- cookie domain, optionally followed by :PORT
1254 DOMAIN-FLAG -- whether all hosts in the domain match
1256 SECURE-FLAG -- whether cookie requires secure connection
1257 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1258 ATTR-NAME -- name of the cookie attribute
1259 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1261 The fields are separated by TABs. All fields are mandatory, except
1262 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1263 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1264 whitespace only, and comment lines (beginning with # optionally
1265 preceded by whitespace) are ignored.
1267 Example line from cookies.txt (split in two lines for readability):
1269 .google.com TRUE / FALSE 2147368447 \
1270 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1274 /* If the region [B, E) ends with :<digits>, parse the number, return
1275 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1276 If port is not specified, return 0. */
1279 domain_port (const char *domain_b, const char *domain_e,
1280 const char **domain_e_ptr)
1284 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1287 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1288 port = 10 * port + (*p - '0');
1290 /* Garbage following port number. */
1292 *domain_e_ptr = colon;
1296 #define GET_WORD(p, b, e) do { \
1298 while (*p && *p != '\t') \
1301 if (b == e || !*p) \
1306 /* Load cookies from FILE. */
1309 cookie_jar_load (struct cookie_jar *jar, const char *file)
1312 FILE *fp = fopen (file, "r");
1315 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1316 file, strerror (errno));
1319 cookies_now = time (NULL);
1321 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1323 struct cookie *cookie;
1329 char *domain_b = NULL, *domain_e = NULL;
1330 char *domflag_b = NULL, *domflag_e = NULL;
1331 char *path_b = NULL, *path_e = NULL;
1332 char *secure_b = NULL, *secure_e = NULL;
1333 char *expires_b = NULL, *expires_e = NULL;
1334 char *name_b = NULL, *name_e = NULL;
1335 char *value_b = NULL, *value_e = NULL;
1337 /* Skip leading white-space. */
1338 while (*p && ISSPACE (*p))
1340 /* Ignore empty lines. */
1341 if (!*p || *p == '#')
1344 GET_WORD (p, domain_b, domain_e);
1345 GET_WORD (p, domflag_b, domflag_e);
1346 GET_WORD (p, path_b, path_e);
1347 GET_WORD (p, secure_b, secure_e);
1348 GET_WORD (p, expires_b, expires_e);
1349 GET_WORD (p, name_b, name_e);
1351 /* Don't use GET_WORD for value because it ends with newline,
1354 value_e = p + strlen (p);
1355 if (value_e > value_b && value_e[-1] == '\n')
1357 if (value_e > value_b && value_e[-1] == '\r')
1359 /* Empty values are legal (I think), so don't bother checking. */
1361 cookie = cookie_new ();
1363 cookie->attr = strdupdelim (name_b, name_e);
1364 cookie->value = strdupdelim (value_b, value_e);
1365 cookie->path = strdupdelim (path_b, path_e);
1366 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1368 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1369 value indicating if all machines within a given domain can
1370 access the variable. This value is set automatically by the
1371 browser, depending on the value set for the domain." */
1372 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1374 /* DOMAIN needs special treatment because we might need to
1375 extract the port. */
1376 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1378 cookie->port = port;
1380 if (*domain_b == '.')
1381 ++domain_b; /* remove leading dot internally */
1382 cookie->domain = strdupdelim (domain_b, domain_e);
1384 /* safe default in case EXPIRES field is garbled. */
1385 expiry = (double)cookies_now - 1;
1387 /* I don't like changing the line, but it's safe here. (line is
1390 sscanf (expires_b, "%lf", &expiry);
1394 /* EXPIRY can be 0 for session cookies saved because the
1395 user specified `--keep-session-cookies' in the past.
1396 They remain session cookies, and will be saved only if
1397 the user has specified `keep-session-cookies' again. */
1401 if (expiry < cookies_now)
1402 goto abort_cookie; /* ignore stale cookie. */
1403 cookie->expiry_time = expiry;
1404 cookie->permanent = 1;
1407 store_cookie (jar, cookie);
1413 delete_cookie (cookie);
1418 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1419 to the head in a chain of cookies. The function prints the entire
1423 save_cookies_mapper (void *key, void *value, void *arg)
1425 FILE *fp = (FILE *)arg;
1426 char *domain = (char *)key;
1427 struct cookie *cookie = (struct cookie *)value;
1428 for (; cookie; cookie = cookie->next)
1430 if (!cookie->permanent && !opt.keep_session_cookies)
1432 if (cookie_expired_p (cookie))
1434 if (!cookie->domain_exact)
1437 if (cookie->port != PORT_ANY)
1438 fprintf (fp, ":%d", cookie->port);
1439 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1440 cookie->domain_exact ? "FALSE" : "TRUE",
1441 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1442 (double)cookie->expiry_time,
1443 cookie->attr, cookie->value);
1445 return 1; /* stop mapping */
1450 /* Save cookies, in format described above, to FILE. */
1453 cookie_jar_save (struct cookie_jar *jar, const char *file)
1457 DEBUGP (("Saving cookies to %s.\n", file));
1459 cookies_now = time (NULL);
1461 fp = fopen (file, "w");
1464 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1465 file, strerror (errno));
1469 fputs ("# HTTP cookie file.\n", fp);
1470 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (&cookies_now));
1471 fputs ("# Edit at your own risk.\n\n", fp);
1473 hash_table_map (jar->chains, save_cookies_mapper, fp);
1476 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1477 file, strerror (errno));
1478 if (fclose (fp) < 0)
1479 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1480 file, strerror (errno));
1482 DEBUGP (("Done saving cookies.\n"));
1485 /* Destroy all the elements in the chain and unhook it from the cookie
1486 jar. This is written in the form of a callback to hash_table_map
1487 and used by cookie_jar_delete to delete all the cookies in a
1491 nuke_cookie_chain (void *value, void *key, void *arg)
1493 char *chain_key = (char *)value;
1494 struct cookie *chain = (struct cookie *)key;
1495 struct cookie_jar *jar = (struct cookie_jar *)arg;
1497 /* Remove the chain from the table and free the key. */
1498 hash_table_remove (jar->chains, chain_key);
1501 /* Then delete all the cookies in the chain. */
1504 struct cookie *next = chain->next;
1505 delete_cookie (chain);
1513 /* Clean up cookie-related data. */
1516 cookie_jar_delete (struct cookie_jar *jar)
1518 hash_table_map (jar->chains, nuke_cookie_chain, jar);
1519 hash_table_destroy (jar->chains);
1523 /* Test cases. Currently this is only tests parse_set_cookies. To
1524 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1529 char *test_results[10];
1531 static int test_parse_cookies_callback (struct cookie *ignored,
1532 const char *nb, const char *ne,
1533 const char *vb, const char *ve)
1535 test_results[test_count++] = strdupdelim (nb, ne);
1536 test_results[test_count++] = strdupdelim (vb, ve);
1543 /* Tests expected to succeed: */
1549 { "arg=value", {"arg", "value", NULL} },
1550 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1551 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1552 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1553 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1554 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1555 { "arg=", {"arg", "", NULL} },
1556 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1557 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1560 /* Tests expected to fail: */
1561 static char *tests_fail[] = {
1563 "arg=\"unterminated",
1565 "arg1=;=another-empty-name",
1569 for (i = 0; i < countof (tests_succ); i++)
1572 char *data = tests_succ[i].data;
1573 char **expected = tests_succ[i].results;
1577 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1580 printf ("NULL cookie returned for valid data: %s\n", data);
1584 for (ind = 0; ind < test_count; ind += 2)
1588 if (0 != strcmp (expected[ind], test_results[ind]))
1589 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1590 ind / 2 + 1, data, expected[ind], test_results[ind]);
1591 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1592 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1593 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1595 if (ind < test_count || expected[ind])
1596 printf ("Unmatched number of results: %s\n", data);
1599 for (i = 0; i < countof (tests_fail); i++)
1602 char *data = tests_fail[i];
1604 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1606 printf ("Failed to report error on invalid data: %s\n", data);
1609 #endif /* TEST_COOKIES */