1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by the
31 cookie patch submitted by Tomasz Wegrzanowski.
33 This implements the client-side cookie support, as specified
34 (loosely) by Netscape's "preliminary specification", currently
37 http://wp.netscape.com/newsref/std/cookie_spec.html
39 rfc2109 is not supported because of its incompatibilities with the
40 above widely-used specification. rfc2965 is entirely ignored,
41 since popular client software doesn't implement it, and even the
42 sites that do send Set-Cookie2 also emit Set-Cookie for
59 /* This should *really* be in a .h file! */
60 time_t http_atotm (const char *);
62 /* Declarations of `struct cookie' and the most basic functions. */
64 /* Cookie jar serves as cookie storage and a means of retrieving
65 cookies efficiently. All cookies with the same domain are stored
66 in a linked list called "chain". A cookie chain can be reached by
67 looking up the domain in the cookie jar's chains_by_domain table.
69 For example, to reach all the cookies under google.com, one must
70 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
71 course, when sending a cookie to `www.google.com', one must search
72 for cookies that belong to either `www.google.com' or `google.com'
73 -- but the point is that the code doesn't need to go through *all*
77 /* Cookie chains indexed by domain. */
78 struct hash_table *chains;
80 int cookie_count; /* number of cookies in the jar. */
83 /* Value set by entry point functions, so that the low-level
84 routines don't need to call time() all the time. */
90 struct cookie_jar *jar = xnew (struct cookie_jar);
91 jar->chains = make_nocase_string_hash_table (0);
92 jar->cookie_count = 0;
97 char *domain; /* domain of the cookie */
98 int port; /* port number */
99 char *path; /* path prefix of the cookie */
101 int secure; /* whether cookie should be
102 transmitted over non-https
104 int domain_exact; /* whether DOMAIN must match as a
107 int permanent; /* whether the cookie should outlive
109 time_t expiry_time; /* time when the cookie expires, 0
110 means undetermined. */
112 int discard_requested; /* whether cookie was created to
113 request discarding another
116 char *attr; /* cookie attribute name */
117 char *value; /* cookie attribute value */
119 struct cookie *next; /* used for chaining of cookies in the
123 #define PORT_ANY (-1)
125 /* Allocate and return a new, empty cookie structure. */
127 static struct cookie *
130 struct cookie *cookie = xnew0 (struct cookie);
132 /* Both cookie->permanent and cookie->expiry_time are now 0. This
133 means that the cookie doesn't expire, but is only valid for this
134 session (i.e. not written out to disk). */
136 cookie->port = PORT_ANY;
140 /* Non-zero if the cookie has expired. Assumes cookies_now has been
141 set by one of the entry point functions. */
144 cookie_expired_p (const struct cookie *c)
146 return c->expiry_time != 0 && c->expiry_time < cookies_now;
149 /* Deallocate COOKIE and its components. */
152 delete_cookie (struct cookie *cookie)
154 xfree_null (cookie->domain);
155 xfree_null (cookie->path);
156 xfree_null (cookie->attr);
157 xfree_null (cookie->value);
161 /* Functions for storing cookies.
163 All cookies can be reached beginning with jar->chains. The key in
164 that table is the domain name, and the value is a linked list of
165 all cookies from that domain. Every new cookie is placed on the
168 /* Find and return a cookie in JAR whose domain, path, and attribute
169 name correspond to COOKIE. If found, PREVPTR will point to the
170 location of the cookie previous in chain, or NULL if the found
171 cookie is the head of a chain.
173 If no matching cookie is found, return NULL. */
175 static struct cookie *
176 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
177 struct cookie **prevptr)
179 struct cookie *chain, *prev;
181 chain = hash_table_get (jar->chains, cookie->domain);
186 for (; chain; prev = chain, chain = chain->next)
187 if (0 == strcmp (cookie->path, chain->path)
188 && 0 == strcmp (cookie->attr, chain->attr)
189 && cookie->port == chain->port)
200 /* Store COOKIE to the jar.
202 This is done by placing COOKIE at the head of its chain. However,
203 if COOKIE matches a cookie already in memory, as determined by
204 find_matching_cookie, the old cookie is unlinked and destroyed.
206 The key of each chain's hash table entry is allocated only the
207 first time; next hash_table_put's reuse the same key. */
210 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
212 struct cookie *chain_head;
215 if (hash_table_get_pair (jar->chains, cookie->domain,
216 &chain_key, &chain_head))
218 /* A chain of cookies in this domain already exists. Check for
219 duplicates -- if an extant cookie exactly matches our domain,
220 port, path, and name, replace it. */
222 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
226 /* Remove VICTIM from the chain. COOKIE will be placed at
230 prev->next = victim->next;
231 cookie->next = chain_head;
235 /* prev is NULL; apparently VICTIM was at the head of
236 the chain. This place will be taken by COOKIE, so
237 all we need to do is: */
238 cookie->next = victim->next;
240 delete_cookie (victim);
242 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
245 cookie->next = chain_head;
249 /* We are now creating the chain. Use a copy of cookie->domain
250 as the key for the life-time of the chain. Using
251 cookie->domain would be unsafe because the life-time of the
252 chain may exceed the life-time of the cookie. (Cookies may
253 be deleted from the chain by this very function.) */
255 chain_key = xstrdup (cookie->domain);
258 hash_table_put (jar->chains, chain_key, cookie);
264 time_t exptime = cookie->expiry_time;
265 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
266 cookie->domain, cookie->port,
267 cookie->port == PORT_ANY ? " (ANY)" : "",
269 cookie->permanent ? "permanent" : "session",
270 cookie->secure ? "secure" : "insecure",
271 cookie->expiry_time ? datetime_str (&exptime) : "none",
272 cookie->attr, cookie->value));
277 /* Discard a cookie matching COOKIE's domain, port, path, and
278 attribute name. This gets called when we encounter a cookie whose
279 expiry date is in the past, or whose max-age is set to 0. The
280 former corresponds to netscape cookie spec, while the latter is
281 specified by rfc2109. */
284 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
286 struct cookie *prev, *victim;
288 if (!hash_table_count (jar->chains))
289 /* No elements == nothing to discard. */
292 victim = find_matching_cookie (jar, cookie, &prev);
296 /* Simply unchain the victim. */
297 prev->next = victim->next;
300 /* VICTIM was head of its chain. We need to place a new
301 cookie at the head. */
302 char *chain_key = NULL;
305 res = hash_table_get_pair (jar->chains, victim->domain,
310 /* VICTIM was the only cookie in the chain. Destroy the
311 chain and deallocate the chain key. */
312 hash_table_remove (jar->chains, victim->domain);
316 hash_table_put (jar->chains, chain_key, victim->next);
318 delete_cookie (victim);
319 DEBUGP (("Discarded old cookie.\n"));
323 /* Functions for parsing the `Set-Cookie' header, and creating new
324 cookies from the wire. */
326 #define NAME_IS(string_literal) \
327 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
329 #define VALUE_EXISTS (value_b && value_e)
331 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
333 /* Update the appropriate cookie field. [name_b, name_e) are expected
334 to delimit the attribute name, while [value_b, value_e) (optional)
335 should delimit the attribute value.
337 When called the first time, it will set the cookie's attribute name
338 and value. After that, it will check the attribute name for
339 special fields such as `domain', `path', etc. Where appropriate,
340 it will parse the values of the fields it recognizes and fill the
341 corresponding fields in COOKIE.
343 Returns 1 on success. Returns zero in case a syntax error is
344 found; such a cookie should be discarded. */
347 update_cookie_field (struct cookie *cookie,
348 const char *name_b, const char *name_e,
349 const char *value_b, const char *value_e)
351 assert (name_b != NULL && name_e != NULL);
357 cookie->attr = strdupdelim (name_b, name_e);
358 cookie->value = strdupdelim (value_b, value_e);
362 if (NAME_IS ("domain"))
364 if (!VALUE_NON_EMPTY)
366 xfree_null (cookie->domain);
367 /* Strictly speaking, we should set cookie->domain_exact if the
368 domain doesn't begin with a dot. But many sites set the
369 domain to "foo.com" and expect "subhost.foo.com" to get the
370 cookie, and it apparently works. */
373 cookie->domain = strdupdelim (value_b, value_e);
376 else if (NAME_IS ("path"))
378 if (!VALUE_NON_EMPTY)
380 xfree_null (cookie->path);
381 cookie->path = strdupdelim (value_b, value_e);
384 else if (NAME_IS ("expires"))
389 if (!VALUE_NON_EMPTY)
391 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
393 expires = http_atotm (value_copy);
394 if (expires != (time_t) -1)
396 cookie->permanent = 1;
397 cookie->expiry_time = expires;
400 /* Error in expiration spec. Assume default (cookie doesn't
401 expire, but valid only for this session.) */
404 /* According to netscape's specification, expiry time in the
405 past means that discarding of a matching cookie is
407 if (cookie->expiry_time < cookies_now)
408 cookie->discard_requested = 1;
412 else if (NAME_IS ("max-age"))
417 if (!VALUE_NON_EMPTY)
419 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
421 sscanf (value_copy, "%lf", &maxage);
423 /* something went wrong. */
425 cookie->permanent = 1;
426 cookie->expiry_time = cookies_now + maxage;
428 /* According to rfc2109, a cookie with max-age of 0 means that
429 discarding of a matching cookie is requested. */
431 cookie->discard_requested = 1;
435 else if (NAME_IS ("secure"))
437 /* ignore value completely */
442 /* Unrecognized attribute; ignore it. */
448 /* Returns non-zero for characters that are legal in the name of an
449 attribute. This used to allow only alphanumerics, '-', and '_',
450 but we need to be more lenient because a number of sites wants to
451 use weirder attribute names. rfc2965 "informally specifies"
452 attribute name (token) as "a sequence of non-special, non-white
453 space characters". So we allow everything except the stuff we know
456 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
457 && (c) != '"' && (c) != '=' \
458 && (c) != ';' && (c) != ',')
460 /* Parse the contents of the `Set-Cookie' header. The header looks
463 name1=value1; name2=value2; ...
465 Trailing semicolon is optional; spaces are allowed between all
466 tokens. Additionally, values may be quoted.
468 A new cookie is returned upon success, NULL otherwise. The
469 specified CALLBACK function (normally `update_cookie_field' is used
470 to update the fields of the newly created cookie structure. */
472 static struct cookie *
473 parse_set_cookies (const char *sc,
474 int (*callback) (struct cookie *,
475 const char *, const char *,
476 const char *, const char *),
479 struct cookie *cookie = cookie_new ();
481 /* #### Hand-written DFAs are no fun to debug. We'de be better off
482 to rewrite this as an inline parser. */
484 enum { S_START, S_NAME, S_NAME_POST,
485 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
486 S_ATTR_ACTION, S_DONE, S_ERROR
492 const char *name_b = NULL, *name_e = NULL;
493 const char *value_b = NULL, *value_e = NULL;
497 while (state != S_DONE && state != S_ERROR)
504 else if (ISSPACE (c))
505 /* Strip all whitespace preceding the name. */
507 else if (ATTR_NAME_CHAR (c))
513 /* empty attr name not allowed */
517 if (!c || c == ';' || c == '=' || ISSPACE (c))
522 else if (ATTR_NAME_CHAR (c))
530 value_b = value_e = NULL;
533 state = S_ATTR_ACTION;
540 else if (ISSPACE (c))
541 /* Ignore space and keep the state. */
549 value_b = value_e = p;
552 state = S_ATTR_ACTION;
558 state = S_QUOTED_VALUE;
560 else if (ISSPACE (c))
570 if (!c || c == ';' || ISSPACE (c))
573 state = S_VALUE_TRAILSPACE;
577 value_e = NULL; /* no trailing space */
586 state = S_VALUE_TRAILSPACE;
593 case S_VALUE_TRAILSPACE:
597 state = S_ATTR_ACTION;
600 state = S_ATTR_ACTION;
601 else if (ISSPACE (c))
608 int legal = callback (cookie, name_b, name_e, value_b, value_e);
614 BOUNDED_TO_ALLOCA (name_b, name_e, name);
615 logprintf (LOG_NOTQUIET,
616 _("Error in Set-Cookie, field `%s'"),
627 /* handled by loop condition */
634 delete_cookie (cookie);
635 if (state != S_ERROR)
639 logprintf (LOG_NOTQUIET,
640 _("Syntax error in Set-Cookie: %s at position %d.\n"),
641 escnonprint (sc), p - sc);
645 /* Sanity checks. These are important, otherwise it is possible for
646 mailcious attackers to destroy important cookie information and/or
647 violate your privacy. */
650 #define REQUIRE_DIGITS(p) do { \
653 for (++p; ISDIGIT (*p); p++) \
657 #define REQUIRE_DOT(p) do { \
662 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
664 We don't want to call network functions like inet_addr() because
665 all we need is a check, preferrably one that is small, fast, and
669 numeric_address_p (const char *addr)
671 const char *p = addr;
673 REQUIRE_DIGITS (p); /* A */
674 REQUIRE_DOT (p); /* . */
675 REQUIRE_DIGITS (p); /* B */
676 REQUIRE_DOT (p); /* . */
677 REQUIRE_DIGITS (p); /* C */
678 REQUIRE_DOT (p); /* . */
679 REQUIRE_DIGITS (p); /* D */
686 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
687 Originally I tried to make the check compliant with rfc2109, but
688 the sites deviated too often, so I had to fall back to "tail
689 matching", as defined by the original Netscape's cookie spec. */
692 check_domain_match (const char *cookie_domain, const char *host)
696 /* Numeric address requires exact match. It also requires HOST to
698 if (numeric_address_p (cookie_domain))
699 return 0 == strcmp (cookie_domain, host);
703 /* For the sake of efficiency, check for exact match first. */
704 if (0 == strcasecmp (cookie_domain, host))
709 /* HOST must match the tail of cookie_domain. */
710 if (!match_tail (host, cookie_domain, 1))
713 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
714 make sure that somebody is not trying to set the cookie for a
715 subdomain shared by many entities. For example, "company.co.uk"
716 must not be allowed to set a cookie for ".co.uk". On the other
717 hand, "sso.redhat.de" should be able to set a cookie for
720 The only marginally sane way to handle this I can think of is to
721 reject on the basis of the length of the second-level domain name
722 (but when the top-level domain is unknown), with the assumption
723 that those of three or less characters could be reserved. For
726 .co.org -> works because the TLD is known
727 .co.uk -> doesn't work because "co" is only two chars long
728 .com.au -> doesn't work because "com" is only 3 chars long
729 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
730 .cnn.de -> doesn't work for the same reason (ugh!!)
731 .abcd.de -> works because "abcd" is 4 chars long
732 .img.cnn.de -> works because it's not trying to set the 2nd level domain
733 .cnn.co.uk -> works for the same reason
735 That should prevent misuse, while allowing reasonable usage. If
736 someone knows of a better way to handle this, please let me
739 const char *p = cookie_domain;
740 int dccount = 1; /* number of domain components */
741 int ldcl = 0; /* last domain component length */
742 int nldcl = 0; /* next to last domain component length */
745 /* Ignore leading period in this calculation. */
748 for (out = 0; !out; p++)
756 /* Empty domain component found -- the domain is invalid. */
758 if (*(p + 1) == '\0')
760 /* Tolerate trailing '.' by not treating the domain as
761 one ending with an empty domain component. */
783 int known_toplevel = 0;
784 static const char *known_toplevel_domains[] = {
785 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
787 for (i = 0; i < countof (known_toplevel_domains); i++)
788 if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
793 if (!known_toplevel && nldcl <= 3)
800 /* Don't allow the host "foobar.com" to set a cookie for domain
802 if (*cookie_domain != '.')
804 int dlen = strlen (cookie_domain);
805 int hlen = strlen (host);
806 /* cookie host: hostname.foobar.com */
807 /* desired domain: bar.com */
808 /* '.' must be here in host-> ^ */
809 if (hlen > dlen && host[hlen - dlen - 1] != '.')
818 static int path_matches (const char *, const char *);
820 /* Check whether PATH begins with COOKIE_PATH. */
823 check_path_match (const char *cookie_path, const char *path)
825 return path_matches (path, cookie_path);
828 /* Process the HTTP `Set-Cookie' header. This results in storing the
829 cookie or discarding a matching one, or ignoring it completely, all
830 depending on the contents. */
833 cookie_handle_set_cookie (struct cookie_jar *jar,
834 const char *host, int port,
835 const char *path, const char *set_cookie)
837 struct cookie *cookie;
838 cookies_now = time (NULL);
840 cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
844 /* Sanitize parts of cookie. */
849 /* If the domain was not provided, we use the one we're talking
850 to, and set exact match. */
851 cookie->domain = xstrdup (host);
852 cookie->domain_exact = 1;
853 /* Set the port, but only if it's non-default. */
854 if (port != 80 && port != 443)
859 if (!check_domain_match (cookie->domain, host))
861 logprintf (LOG_NOTQUIET,
862 _("Cookie coming from %s attempted to set domain to %s\n"),
863 escnonprint (host), escnonprint (cookie->domain));
864 xfree (cookie->domain);
871 /* The cookie doesn't set path: set it to the URL path, sans the
872 file part ("/dir/file" truncated to "/dir/"). */
873 char *trailing_slash = strrchr (path, '/');
875 cookie->path = strdupdelim (path, trailing_slash + 1);
877 /* no slash in the string -- can this even happen? */
878 cookie->path = xstrdup (path);
882 /* The cookie sets its own path; verify that it is legal. */
883 if (!check_path_match (cookie->path, path))
885 DEBUGP (("Attempt to fake the path: %s, %s\n",
886 cookie->path, path));
891 /* Now store the cookie, or discard an existing cookie, if
892 discarding was requested. */
894 if (cookie->discard_requested)
896 discard_matching_cookie (jar, cookie);
900 store_cookie (jar, cookie);
905 delete_cookie (cookie);
908 /* Support for sending out cookies in HTTP requests, based on
909 previously stored cookies. Entry point is
910 `build_cookies_request'. */
912 /* Return a count of how many times CHR occurs in STRING. */
915 count_char (const char *string, char chr)
919 for (p = string; *p; p++)
925 /* Find the cookie chains whose domains match HOST and store them to
928 A cookie chain is the head of a list of cookies that belong to a
929 host/domain. Given HOST "img.search.xemacs.org", this function
930 will return the chains for "img.search.xemacs.org",
931 "search.xemacs.org", and "xemacs.org" -- those of them that exist
934 DEST should be large enough to accept (in the worst case) as many
935 elements as there are domain components of HOST. */
938 find_chains_of_host (struct cookie_jar *jar, const char *host,
939 struct cookie *dest[])
944 /* Bail out quickly if there are no cookies in the jar. */
945 if (!hash_table_count (jar->chains))
948 if (numeric_address_p (host))
949 /* If host is an IP address, only check for the exact match. */
952 /* Otherwise, check all the subdomains except the top-level (last)
953 one. As a domain with N components has N-1 dots, the number of
954 passes equals the number of dots. */
955 passes = count_char (host, '.');
959 /* Find chains that match HOST, starting with exact match and
960 progressing to less specific domains. For instance, given HOST
961 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
962 srk.fer.hr's, then fer.hr's. */
965 struct cookie *chain = hash_table_get (jar->chains, host);
967 dest[dest_count++] = chain;
968 if (++passcnt >= passes)
970 host = strchr (host, '.') + 1;
976 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
980 path_matches (const char *full_path, const char *prefix)
985 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
986 as a mere separator, inspired by rfc1808), but the '/' is
987 assumed when matching against the cookie stuff. */
991 len = strlen (prefix);
993 if (0 != strncmp (full_path, prefix, len))
994 /* FULL_PATH doesn't begin with PREFIX. */
997 /* Length of PREFIX determines the quality of the match. */
1001 /* Return non-zero iff COOKIE matches the provided parameters of the
1002 URL being downloaded: HOST, PORT, PATH, and SECFLAG.
1004 If PATH_GOODNESS is non-NULL, store the "path goodness" value
1005 there. That value is a measure of how closely COOKIE matches PATH,
1006 used for ordering cookies. */
1009 cookie_matches_url (const struct cookie *cookie,
1010 const char *host, int port, const char *path,
1011 int secflag, int *path_goodness)
1015 if (cookie_expired_p (cookie))
1016 /* Ignore stale cookies. Don't bother unchaining the cookie at
1017 this point -- Wget is a relatively short-lived application, and
1018 stale cookies will not be saved by `save_cookies'. On the
1019 other hand, this function should be as efficient as
1023 if (cookie->secure && !secflag)
1024 /* Don't transmit secure cookies over insecure connections. */
1026 if (cookie->port != PORT_ANY && cookie->port != port)
1029 /* If exact domain match is required, verify that cookie's domain is
1030 equal to HOST. If not, assume success on the grounds of the
1031 cookie's chain having been found by find_chains_of_host. */
1032 if (cookie->domain_exact
1033 && 0 != strcasecmp (host, cookie->domain))
1036 pg = path_matches (path, cookie->path);
1041 /* If the caller requested path_goodness, we return it. This is
1042 an optimization, so that the caller doesn't need to call
1043 path_matches() again. */
1044 *path_goodness = pg;
1048 /* A structure that points to a cookie, along with the additional
1049 information about the cookie's "goodness". This allows us to sort
1050 the cookies when returning them to the server, as required by the
1053 struct weighed_cookie {
1054 struct cookie *cookie;
1055 int domain_goodness;
1059 /* Comparator used for uniquifying the list. */
1062 equality_comparator (const void *p1, const void *p2)
1064 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1065 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1067 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1068 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1070 /* We only really care whether both name and value are equal. We
1071 return them in this order only for consistency... */
1072 return namecmp ? namecmp : valuecmp;
1075 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1076 cookies with the same attr name and value. Whenever a duplicate
1077 pair is found, one of the cookies is removed. */
1080 eliminate_dups (struct weighed_cookie *outgoing, int count)
1082 struct weighed_cookie *h; /* hare */
1083 struct weighed_cookie *t; /* tortoise */
1084 struct weighed_cookie *end = outgoing + count;
1086 /* We deploy a simple uniquify algorithm: first sort the array
1087 according to our sort criteria, then copy it to itself, comparing
1088 each cookie to its neighbor and ignoring the duplicates. */
1090 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1092 /* "Hare" runs through all the entries in the array, followed by
1093 "tortoise". If a duplicate is found, the hare skips it.
1094 Non-duplicate entries are copied to the tortoise ptr. */
1096 for (h = t = outgoing; h < end; h++)
1100 struct cookie *c0 = h[0].cookie;
1101 struct cookie *c1 = h[1].cookie;
1102 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1103 continue; /* ignore the duplicate */
1106 /* If the hare has advanced past the tortoise (because of
1107 previous dups), make sure the values get copied. Otherwise,
1108 no copying is necessary. */
1114 return t - outgoing;
1117 /* Comparator used for sorting by quality. */
1120 goodness_comparator (const void *p1, const void *p2)
1122 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1123 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1125 /* Subtractions take `wc2' as the first argument becauase we want a
1126 sort in *decreasing* order of goodness. */
1127 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1128 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1130 /* Sort by domain goodness; if these are the same, sort by path
1131 goodness. (The sorting order isn't really specified; maybe it
1132 should be the other way around.) */
1133 return dgdiff ? dgdiff : pgdiff;
1136 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1137 requests PATH from the server. The resulting string is allocated
1138 with `malloc', and the caller is responsible for freeing it. If no
1139 cookies pertain to this request, i.e. no cookie header should be
1140 generated, NULL is returned. */
1143 cookie_header (struct cookie_jar *jar, const char *host,
1144 int port, const char *path, int secflag)
1146 struct cookie **chains;
1149 struct cookie *cookie;
1150 struct weighed_cookie *outgoing;
1153 int result_size, pos;
1155 /* First, find the cookie chains whose domains match HOST. */
1157 /* Allocate room for find_chains_of_host to write to. The number of
1158 chains can at most equal the number of subdomains, hence
1159 1+<number of dots>. */
1160 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
1161 chain_count = find_chains_of_host (jar, host, chains);
1163 /* No cookies for this host. */
1167 cookies_now = time (NULL);
1169 /* Now extract from the chains those cookies that match our host
1170 (for domain_exact cookies), port (for cookies with port other
1171 than PORT_ANY), etc. See matching_cookie for details. */
1173 /* Count the number of matching cookies. */
1175 for (i = 0; i < chain_count; i++)
1176 for (cookie = chains[i]; cookie; cookie = cookie->next)
1177 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1180 return NULL; /* no cookies matched */
1182 /* Allocate the array. */
1183 outgoing = alloca_array (struct weighed_cookie, count);
1185 /* Fill the array with all the matching cookies from the chains that
1188 for (i = 0; i < chain_count; i++)
1189 for (cookie = chains[i]; cookie; cookie = cookie->next)
1192 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1194 outgoing[ocnt].cookie = cookie;
1195 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1196 outgoing[ocnt].path_goodness = pg;
1199 assert (ocnt == count);
1201 /* Eliminate duplicate cookies; that is, those whose name and value
1203 count = eliminate_dups (outgoing, count);
1205 /* Sort the array so that best-matching domains come first, and
1206 that, within one domain, best-matching paths come first. */
1207 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1209 /* Count the space the name=value pairs will take. */
1211 for (i = 0; i < count; i++)
1213 struct cookie *c = outgoing[i].cookie;
1215 result_size += strlen (c->attr) + 1 + strlen (c->value);
1218 /* Allocate output buffer:
1219 name=value pairs -- result_size
1220 "; " separators -- (count - 1) * 2
1221 \0 terminator -- 1 */
1222 result_size = result_size + (count - 1) * 2 + 1;
1223 result = xmalloc (result_size);
1225 for (i = 0; i < count; i++)
1227 struct cookie *c = outgoing[i].cookie;
1228 int namlen = strlen (c->attr);
1229 int vallen = strlen (c->value);
1231 memcpy (result + pos, c->attr, namlen);
1233 result[pos++] = '=';
1234 memcpy (result + pos, c->value, vallen);
1238 result[pos++] = ';';
1239 result[pos++] = ' ';
1242 result[pos++] = '\0';
1243 assert (pos == result_size);
1247 /* Support for loading and saving cookies. The format used for
1248 loading and saving should be the format of the `cookies.txt' file
1249 used by Netscape and Mozilla, at least the Unix versions.
1250 (Apparently IE can export cookies in that format as well.) The
1251 format goes like this:
1253 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1255 DOMAIN -- cookie domain, optionally followed by :PORT
1256 DOMAIN-FLAG -- whether all hosts in the domain match
1258 SECURE-FLAG -- whether cookie requires secure connection
1259 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1260 ATTR-NAME -- name of the cookie attribute
1261 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1263 The fields are separated by TABs. All fields are mandatory, except
1264 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1265 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1266 whitespace only, and comment lines (beginning with # optionally
1267 preceded by whitespace) are ignored.
1269 Example line from cookies.txt (split in two lines for readability):
1271 .google.com TRUE / FALSE 2147368447 \
1272 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1276 /* If the region [B, E) ends with :<digits>, parse the number, return
1277 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1278 If port is not specified, return 0. */
1281 domain_port (const char *domain_b, const char *domain_e,
1282 const char **domain_e_ptr)
1286 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1289 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1290 port = 10 * port + (*p - '0');
1292 /* Garbage following port number. */
1294 *domain_e_ptr = colon;
1298 #define GET_WORD(p, b, e) do { \
1300 while (*p && *p != '\t') \
1303 if (b == e || !*p) \
1308 /* Load cookies from FILE. */
1311 cookie_jar_load (struct cookie_jar *jar, const char *file)
1314 FILE *fp = fopen (file, "r");
1317 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1318 file, strerror (errno));
1321 cookies_now = time (NULL);
1323 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1325 struct cookie *cookie;
1331 char *domain_b = NULL, *domain_e = NULL;
1332 char *domflag_b = NULL, *domflag_e = NULL;
1333 char *path_b = NULL, *path_e = NULL;
1334 char *secure_b = NULL, *secure_e = NULL;
1335 char *expires_b = NULL, *expires_e = NULL;
1336 char *name_b = NULL, *name_e = NULL;
1337 char *value_b = NULL, *value_e = NULL;
1339 /* Skip leading white-space. */
1340 while (*p && ISSPACE (*p))
1342 /* Ignore empty lines. */
1343 if (!*p || *p == '#')
1346 GET_WORD (p, domain_b, domain_e);
1347 GET_WORD (p, domflag_b, domflag_e);
1348 GET_WORD (p, path_b, path_e);
1349 GET_WORD (p, secure_b, secure_e);
1350 GET_WORD (p, expires_b, expires_e);
1351 GET_WORD (p, name_b, name_e);
1353 /* Don't use GET_WORD for value because it ends with newline,
1356 value_e = p + strlen (p);
1357 if (value_e > value_b && value_e[-1] == '\n')
1359 if (value_e > value_b && value_e[-1] == '\r')
1361 /* Empty values are legal (I think), so don't bother checking. */
1363 cookie = cookie_new ();
1365 cookie->attr = strdupdelim (name_b, name_e);
1366 cookie->value = strdupdelim (value_b, value_e);
1367 cookie->path = strdupdelim (path_b, path_e);
1368 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1370 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1371 value indicating if all machines within a given domain can
1372 access the variable. This value is set automatically by the
1373 browser, depending on the value set for the domain." */
1374 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1376 /* DOMAIN needs special treatment because we might need to
1377 extract the port. */
1378 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1380 cookie->port = port;
1382 if (*domain_b == '.')
1383 ++domain_b; /* remove leading dot internally */
1384 cookie->domain = strdupdelim (domain_b, domain_e);
1386 /* safe default in case EXPIRES field is garbled. */
1387 expiry = (double)cookies_now - 1;
1389 /* I don't like changing the line, but it's safe here. (line is
1392 sscanf (expires_b, "%lf", &expiry);
1396 /* EXPIRY can be 0 for session cookies saved because the
1397 user specified `--keep-session-cookies' in the past.
1398 They remain session cookies, and will be saved only if
1399 the user has specified `keep-session-cookies' again. */
1403 if (expiry < cookies_now)
1404 goto abort_cookie; /* ignore stale cookie. */
1405 cookie->expiry_time = expiry;
1406 cookie->permanent = 1;
1409 store_cookie (jar, cookie);
1415 delete_cookie (cookie);
1420 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1421 to the head in a chain of cookies. The function prints the entire
1425 save_cookies_mapper (void *key, void *value, void *arg)
1427 FILE *fp = (FILE *)arg;
1428 char *domain = (char *)key;
1429 struct cookie *cookie = (struct cookie *)value;
1430 for (; cookie; cookie = cookie->next)
1432 if (!cookie->permanent && !opt.keep_session_cookies)
1434 if (cookie_expired_p (cookie))
1436 if (!cookie->domain_exact)
1439 if (cookie->port != PORT_ANY)
1440 fprintf (fp, ":%d", cookie->port);
1441 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1442 cookie->domain_exact ? "FALSE" : "TRUE",
1443 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1444 (double)cookie->expiry_time,
1445 cookie->attr, cookie->value);
1447 return 1; /* stop mapping */
1452 /* Save cookies, in format described above, to FILE. */
1455 cookie_jar_save (struct cookie_jar *jar, const char *file)
1459 DEBUGP (("Saving cookies to %s.\n", file));
1461 cookies_now = time (NULL);
1463 fp = fopen (file, "w");
1466 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1467 file, strerror (errno));
1471 fputs ("# HTTP cookie file.\n", fp);
1472 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (&cookies_now));
1473 fputs ("# Edit at your own risk.\n\n", fp);
1475 hash_table_map (jar->chains, save_cookies_mapper, fp);
1478 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1479 file, strerror (errno));
1480 if (fclose (fp) < 0)
1481 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1482 file, strerror (errno));
1484 DEBUGP (("Done saving cookies.\n"));
1487 /* Destroy all the elements in the chain and unhook it from the cookie
1488 jar. This is written in the form of a callback to hash_table_map
1489 and used by cookie_jar_delete to delete all the cookies in a
1493 nuke_cookie_chain (void *value, void *key, void *arg)
1495 char *chain_key = (char *)value;
1496 struct cookie *chain = (struct cookie *)key;
1497 struct cookie_jar *jar = (struct cookie_jar *)arg;
1499 /* Remove the chain from the table and free the key. */
1500 hash_table_remove (jar->chains, chain_key);
1503 /* Then delete all the cookies in the chain. */
1506 struct cookie *next = chain->next;
1507 delete_cookie (chain);
1515 /* Clean up cookie-related data. */
1518 cookie_jar_delete (struct cookie_jar *jar)
1520 hash_table_map (jar->chains, nuke_cookie_chain, jar);
1521 hash_table_destroy (jar->chains);
1525 /* Test cases. Currently this is only tests parse_set_cookies. To
1526 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1531 char *test_results[10];
1533 static int test_parse_cookies_callback (struct cookie *ignored,
1534 const char *nb, const char *ne,
1535 const char *vb, const char *ve)
1537 test_results[test_count++] = strdupdelim (nb, ne);
1538 test_results[test_count++] = strdupdelim (vb, ve);
1545 /* Tests expected to succeed: */
1551 { "arg=value", {"arg", "value", NULL} },
1552 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1553 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1554 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1555 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1556 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1557 { "arg=", {"arg", "", NULL} },
1558 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1559 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1562 /* Tests expected to fail: */
1563 static char *tests_fail[] = {
1565 "arg=\"unterminated",
1567 "arg1=;=another-empty-name",
1571 for (i = 0; i < countof (tests_succ); i++)
1574 char *data = tests_succ[i].data;
1575 char **expected = tests_succ[i].results;
1579 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1582 printf ("NULL cookie returned for valid data: %s\n", data);
1586 for (ind = 0; ind < test_count; ind += 2)
1590 if (0 != strcmp (expected[ind], test_results[ind]))
1591 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1592 ind / 2 + 1, data, expected[ind], test_results[ind]);
1593 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1594 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1595 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1597 if (ind < test_count || expected[ind])
1598 printf ("Unmatched number of results: %s\n", data);
1601 for (i = 0; i < countof (tests_fail); i++)
1604 char *data = tests_fail[i];
1606 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1608 printf ("Failed to report error on invalid data: %s\n", data);
1611 #endif /* TEST_COOKIES */