1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
31 code submitted by Tomasz Wegrzanowski.
33 Ideas for future work:
35 * Implement limits on cookie-related sizes, such as max. cookie
36 size, max. number of cookies, etc.
38 * Add more "cookie jar" methods, such as methods to iterate over
39 stored cookies, to clear temporary cookies, to perform
40 intelligent auto-saving, etc.
42 * Support `Set-Cookie2' and `Cookie2' headers? Does anyone really
62 /* This should *really* be in a .h file! */
63 time_t http_atotm PARAMS ((const char *));
65 /* Declarations of `struct cookie' and the most basic functions. */
67 /* Cookie jar serves as cookie storage and a means of retrieving
68 cookies efficiently. All cookies with the same domain are stored
69 in a linked list called "chain". A cookie chain can be reached by
70 looking up the domain in the cookie jar's chains_by_domain table.
72 For example, to reach all the cookies under google.com, one must
73 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
74 course, when sending a cookie to `www.google.com', one must search
75 for cookies that belong to either `www.google.com' or `google.com'
76 -- but the point is that the code doesn't need to go through *all*
80 /* Cookie chains indexed by domain. */
81 struct hash_table *chains;
83 int cookie_count; /* number of cookies in the jar. */
86 /* Value set by entry point functions, so that the low-level
87 routines don't need to call time() all the time. */
93 struct cookie_jar *jar = xnew (struct cookie_jar);
94 jar->chains = make_nocase_string_hash_table (0);
95 jar->cookie_count = 0;
100 char *domain; /* domain of the cookie */
101 int port; /* port number */
102 char *path; /* path prefix of the cookie */
104 int secure; /* whether cookie should be
105 transmitted over non-https
107 int domain_exact; /* whether DOMAIN must match as a
110 int permanent; /* whether the cookie should outlive
112 time_t expiry_time; /* time when the cookie expires, 0
113 means undetermined. */
115 int discard_requested; /* whether cookie was created to
116 request discarding another
119 char *attr; /* cookie attribute name */
120 char *value; /* cookie attribute value */
122 struct cookie *next; /* used for chaining of cookies in the
126 #define PORT_ANY (-1)
128 /* Allocate and return a new, empty cookie structure. */
130 static struct cookie *
133 struct cookie *cookie = xnew0 (struct cookie);
135 /* Both cookie->permanent and cookie->expiry_time are now 0. This
136 means that the cookie doesn't expire, but is only valid for this
137 session (i.e. not written out to disk). */
139 cookie->port = PORT_ANY;
143 /* Non-zero if the cookie has expired. Assumes cookies_now has been
144 set by one of the entry point functions. */
147 cookie_expired_p (const struct cookie *c)
149 return c->expiry_time != 0 && c->expiry_time < cookies_now;
152 /* Deallocate COOKIE and its components. */
155 delete_cookie (struct cookie *cookie)
157 xfree_null (cookie->domain);
158 xfree_null (cookie->path);
159 xfree_null (cookie->attr);
160 xfree_null (cookie->value);
164 /* Functions for storing cookies.
166 All cookies can be reached beginning with jar->chains. The key in
167 that table is the domain name, and the value is a linked list of
168 all cookies from that domain. Every new cookie is placed on the
171 /* Find and return a cookie in JAR whose domain, path, and attribute
172 name correspond to COOKIE. If found, PREVPTR will point to the
173 location of the cookie previous in chain, or NULL if the found
174 cookie is the head of a chain.
176 If no matching cookie is found, return NULL. */
178 static struct cookie *
179 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
180 struct cookie **prevptr)
182 struct cookie *chain, *prev;
184 chain = hash_table_get (jar->chains, cookie->domain);
189 for (; chain; prev = chain, chain = chain->next)
190 if (0 == strcmp (cookie->path, chain->path)
191 && 0 == strcmp (cookie->attr, chain->attr)
192 && cookie->port == chain->port)
203 /* Store COOKIE to the jar.
205 This is done by placing COOKIE at the head of its chain. However,
206 if COOKIE matches a cookie already in memory, as determined by
207 find_matching_cookie, the old cookie is unlinked and destroyed.
209 The key of each chain's hash table entry is allocated only the
210 first time; next hash_table_put's reuse the same key. */
213 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
215 struct cookie *chain_head;
218 if (hash_table_get_pair (jar->chains, cookie->domain,
219 &chain_key, &chain_head))
221 /* A chain of cookies in this domain already exists. Check for
222 duplicates -- if an extant cookie exactly matches our domain,
223 port, path, and name, replace it. */
225 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
229 /* Remove VICTIM from the chain. COOKIE will be placed at
233 prev->next = victim->next;
234 cookie->next = chain_head;
238 /* prev is NULL; apparently VICTIM was at the head of
239 the chain. This place will be taken by COOKIE, so
240 all we need to do is: */
241 cookie->next = victim->next;
243 delete_cookie (victim);
245 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
248 cookie->next = chain_head;
252 /* We are now creating the chain. Use a copy of cookie->domain
253 as the key for the life-time of the chain. Using
254 cookie->domain would be unsafe because the life-time of the
255 chain may exceed the life-time of the cookie. (Cookies may
256 be deleted from the chain by this very function.) */
258 chain_key = xstrdup (cookie->domain);
261 hash_table_put (jar->chains, chain_key, cookie);
267 time_t exptime = (time_t) cookie->expiry_time;
268 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
269 cookie->domain, cookie->port,
270 cookie->port == PORT_ANY ? " (ANY)" : "",
272 cookie->permanent ? "permanent" : "session",
273 cookie->secure ? "secure" : "insecure",
274 cookie->expiry_time ? datetime_str (&exptime) : "none",
275 cookie->attr, cookie->value));
280 /* Discard a cookie matching COOKIE's domain, port, path, and
281 attribute name. This gets called when we encounter a cookie whose
282 expiry date is in the past, or whose max-age is set to 0. The
283 former corresponds to netscape cookie spec, while the latter is
284 specified by rfc2109. */
287 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
289 struct cookie *prev, *victim;
291 if (!hash_table_count (jar->chains))
292 /* No elements == nothing to discard. */
295 victim = find_matching_cookie (jar, cookie, &prev);
299 /* Simply unchain the victim. */
300 prev->next = victim->next;
303 /* VICTIM was head of its chain. We need to place a new
304 cookie at the head. */
305 char *chain_key = NULL;
308 res = hash_table_get_pair (jar->chains, victim->domain,
313 /* VICTIM was the only cookie in the chain. Destroy the
314 chain and deallocate the chain key. */
315 hash_table_remove (jar->chains, victim->domain);
319 hash_table_put (jar->chains, chain_key, victim->next);
321 delete_cookie (victim);
322 DEBUGP (("Discarded old cookie.\n"));
326 /* Functions for parsing the `Set-Cookie' header, and creating new
327 cookies from the wire. */
329 #define NAME_IS(string_literal) \
330 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
332 #define VALUE_EXISTS (value_b && value_e)
334 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
336 /* Update the appropriate cookie field. [name_b, name_e) are expected
337 to delimit the attribute name, while [value_b, value_e) (optional)
338 should delimit the attribute value.
340 When called the first time, it will set the cookie's attribute name
341 and value. After that, it will check the attribute name for
342 special fields such as `domain', `path', etc. Where appropriate,
343 it will parse the values of the fields it recognizes and fill the
344 corresponding fields in COOKIE.
346 Returns 1 on success. Returns zero in case a syntax error is
347 found; such a cookie should be discarded. */
350 update_cookie_field (struct cookie *cookie,
351 const char *name_b, const char *name_e,
352 const char *value_b, const char *value_e)
354 assert (name_b != NULL && name_e != NULL);
360 cookie->attr = strdupdelim (name_b, name_e);
361 cookie->value = strdupdelim (value_b, value_e);
365 if (NAME_IS ("domain"))
367 if (!VALUE_NON_EMPTY)
369 xfree_null (cookie->domain);
370 /* Strictly speaking, we should set cookie->domain_exact if the
371 domain doesn't begin with a dot. But many sites set the
372 domain to "foo.com" and expect "subhost.foo.com" to get the
373 cookie, and it apparently works. */
376 cookie->domain = strdupdelim (value_b, value_e);
379 else if (NAME_IS ("path"))
381 if (!VALUE_NON_EMPTY)
383 xfree_null (cookie->path);
384 cookie->path = strdupdelim (value_b, value_e);
387 else if (NAME_IS ("expires"))
392 if (!VALUE_NON_EMPTY)
394 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
396 expires = http_atotm (value_copy);
399 cookie->permanent = 1;
400 cookie->expiry_time = (time_t)expires;
403 /* Error in expiration spec. Assume default (cookie doesn't
404 expire, but valid only for this session.) */
407 /* According to netscape's specification, expiry time in the
408 past means that discarding of a matching cookie is
410 if (cookie->expiry_time < cookies_now)
411 cookie->discard_requested = 1;
415 else if (NAME_IS ("max-age"))
420 if (!VALUE_NON_EMPTY)
422 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
424 sscanf (value_copy, "%lf", &maxage);
426 /* something went wrong. */
428 cookie->permanent = 1;
429 cookie->expiry_time = cookies_now + maxage;
431 /* According to rfc2109, a cookie with max-age of 0 means that
432 discarding of a matching cookie is requested. */
434 cookie->discard_requested = 1;
438 else if (NAME_IS ("secure"))
440 /* ignore value completely */
445 /* Unrecognized attribute; ignore it. */
451 /* Returns non-zero for characters that are legal in the name of an
452 attribute. This used to allow only alphanumerics, '-', and '_',
453 but we need to be more lenient because a number of sites wants to
454 use weirder attribute names. rfc2965 "informally specifies"
455 attribute name (token) as "a sequence of non-special, non-white
456 space characters". So we allow everything except the stuff we know
459 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
460 && (c) != '"' && (c) != '=' \
461 && (c) != ';' && (c) != ',')
463 /* Parse the contents of the `Set-Cookie' header. The header looks
466 name1=value1; name2=value2; ...
468 Trailing semicolon is optional; spaces are allowed between all
469 tokens. Additionally, values may be quoted.
471 A new cookie is returned upon success, NULL otherwise. The
472 specified CALLBACK function (normally `update_cookie_field' is used
473 to update the fields of the newly created cookie structure. */
475 static struct cookie *
476 parse_set_cookies (const char *sc,
477 int (*callback) (struct cookie *,
478 const char *, const char *,
479 const char *, const char *),
482 struct cookie *cookie = cookie_new ();
484 /* #### Hand-written DFAs are no fun to debug. We'de be better off
485 to rewrite this as an inline parser. */
487 enum { S_START, S_NAME, S_NAME_POST,
488 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
489 S_ATTR_ACTION, S_DONE, S_ERROR
495 const char *name_b = NULL, *name_e = NULL;
496 const char *value_b = NULL, *value_e = NULL;
500 while (state != S_DONE && state != S_ERROR)
507 else if (ISSPACE (c))
508 /* Strip all whitespace preceding the name. */
510 else if (ATTR_NAME_CHAR (c))
516 /* empty attr name not allowed */
520 if (!c || c == ';' || c == '=' || ISSPACE (c))
525 else if (ATTR_NAME_CHAR (c))
533 value_b = value_e = NULL;
536 state = S_ATTR_ACTION;
543 else if (ISSPACE (c))
544 /* Ignore space and keep the state. */
552 value_b = value_e = p;
555 state = S_ATTR_ACTION;
561 state = S_QUOTED_VALUE;
563 else if (ISSPACE (c))
573 if (!c || c == ';' || ISSPACE (c))
576 state = S_VALUE_TRAILSPACE;
580 value_e = NULL; /* no trailing space */
589 state = S_VALUE_TRAILSPACE;
596 case S_VALUE_TRAILSPACE:
600 state = S_ATTR_ACTION;
603 state = S_ATTR_ACTION;
604 else if (ISSPACE (c))
611 int legal = callback (cookie, name_b, name_e, value_b, value_e);
617 BOUNDED_TO_ALLOCA (name_b, name_e, name);
618 logprintf (LOG_NOTQUIET,
619 _("Error in Set-Cookie, field `%s'"), name);
629 /* handled by loop condition */
636 delete_cookie (cookie);
637 if (state != S_ERROR)
641 logprintf (LOG_NOTQUIET,
642 _("Syntax error in Set-Cookie: %s at position %d.\n"),
647 /* Sanity checks. These are important, otherwise it is possible for
648 mailcious attackers to destroy important cookie information and/or
649 violate your privacy. */
652 #define REQUIRE_DIGITS(p) do { \
655 for (++p; ISDIGIT (*p); p++) \
659 #define REQUIRE_DOT(p) do { \
664 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
666 We don't want to call network functions like inet_addr() because
667 all we need is a check, preferrably one that is small, fast, and
671 numeric_address_p (const char *addr)
673 const char *p = addr;
675 REQUIRE_DIGITS (p); /* A */
676 REQUIRE_DOT (p); /* . */
677 REQUIRE_DIGITS (p); /* B */
678 REQUIRE_DOT (p); /* . */
679 REQUIRE_DIGITS (p); /* C */
680 REQUIRE_DOT (p); /* . */
681 REQUIRE_DIGITS (p); /* D */
688 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
689 Originally I tried to make the check compliant with rfc2109, but
690 the sites deviated too often, so I had to fall back to "tail
691 matching", as defined by the original Netscape's cookie spec. */
694 check_domain_match (const char *cookie_domain, const char *host)
698 /* Numeric address requires exact match. It also requires HOST to
700 if (numeric_address_p (cookie_domain))
701 return 0 == strcmp (cookie_domain, host);
705 /* For the sake of efficiency, check for exact match first. */
706 if (0 == strcasecmp (cookie_domain, host))
711 /* HOST must match the tail of cookie_domain. */
712 if (!match_tail (host, cookie_domain, 1))
715 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
716 make sure that somebody is not trying to set the cookie for a
717 subdomain shared by many entities. For example, "company.co.uk"
718 must not be allowed to set a cookie for ".co.uk". On the other
719 hand, "sso.redhat.de" should be able to set a cookie for
722 The only marginally sane way to handle this I can think of is to
723 reject on the basis of the length of the second-level domain name
724 (but when the top-level domain is unknown), with the assumption
725 that those of three or less characters could be reserved. For
728 .co.org -> works because the TLD is known
729 .co.uk -> doesn't work because "co" is only two chars long
730 .com.au -> doesn't work because "com" is only 3 chars long
731 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
732 .cnn.de -> doesn't work for the same reason (ugh!!)
733 .abcd.de -> works because "abcd" is 4 chars long
734 .img.cnn.de -> works because it's not trying to set the 2nd level domain
735 .cnn.co.uk -> works for the same reason
737 That should prevent misuse, while allowing reasonable usage. If
738 someone knows of a better way to handle this, please let me
741 const char *p = cookie_domain;
742 int dccount = 1; /* number of domain components */
743 int ldcl = 0; /* last domain component length */
744 int nldcl = 0; /* next to last domain component length */
747 /* Ignore leading period in this calculation. */
750 for (out = 0; !out; p++)
758 /* Empty domain component found -- the domain is invalid. */
760 if (*(p + 1) == '\0')
762 /* Tolerate trailing '.' by not treating the domain as
763 one ending with an empty domain component. */
785 int known_toplevel = 0;
786 static char *known_toplevel_domains[] = {
787 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
789 for (i = 0; i < countof (known_toplevel_domains); i++)
790 if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
795 if (!known_toplevel && nldcl <= 3)
802 /* Don't allow the host "foobar.com" to set a cookie for domain
804 if (*cookie_domain != '.')
806 int dlen = strlen (cookie_domain);
807 int hlen = strlen (host);
808 /* cookie host: hostname.foobar.com */
809 /* desired domain: bar.com */
810 /* '.' must be here in host-> ^ */
811 if (hlen > dlen && host[hlen - dlen - 1] != '.')
820 static int path_matches PARAMS ((const char *, const char *));
822 /* Check whether PATH begins with COOKIE_PATH. */
825 check_path_match (const char *cookie_path, const char *path)
827 return path_matches (path, cookie_path);
830 /* Process the HTTP `Set-Cookie' header. This results in storing the
831 cookie or discarding a matching one, or ignoring it completely, all
832 depending on the contents. */
835 cookie_handle_set_cookie (struct cookie_jar *jar,
836 const char *host, int port,
837 const char *path, const char *set_cookie)
839 struct cookie *cookie;
840 cookies_now = time (NULL);
842 cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
846 /* Sanitize parts of cookie. */
851 /* If the domain was not provided, we use the one we're talking
852 to, and set exact match. */
853 cookie->domain = xstrdup (host);
854 cookie->domain_exact = 1;
855 /* Set the port, but only if it's non-default. */
856 if (port != 80 && port != 443)
861 if (!check_domain_match (cookie->domain, host))
863 logprintf (LOG_NOTQUIET,
864 "Cookie coming from %s attempted to set domain to %s\n",
865 host, cookie->domain);
866 xfree (cookie->domain);
872 cookie->path = xstrdup (path);
875 if (!check_path_match (cookie->path, path))
877 DEBUGP (("Attempt to fake the path: %s, %s\n",
878 cookie->path, path));
883 if (cookie->discard_requested)
885 discard_matching_cookie (jar, cookie);
889 store_cookie (jar, cookie);
894 delete_cookie (cookie);
897 /* Support for sending out cookies in HTTP requests, based on
898 previously stored cookies. Entry point is
899 `build_cookies_request'. */
901 /* Find the cookie chains whose domains match HOST and store them to
904 A cookie chain is the head of a list of cookies that belong to a
905 host/domain. Given HOST "img.search.xemacs.org", this function
906 will return the chains for "img.search.xemacs.org",
907 "search.xemacs.org", and "xemacs.org" -- those of them that exist
910 DEST should be large enough to accept (in the worst case) as many
911 elements as there are domain components of HOST. */
914 find_chains_of_host (struct cookie_jar *jar, const char *host,
915 struct cookie *dest[])
920 /* Bail out quickly if there are no cookies in the jar. */
921 if (!hash_table_count (jar->chains))
924 if (numeric_address_p (host))
925 /* If host is an IP address, only check for the exact match. */
928 /* Otherwise, check all the subdomains except the top-level (last)
929 one. As a domain with N components has N-1 dots, the number of
930 passes equals the number of dots. */
931 passes = count_char (host, '.');
935 /* Find chains that match HOST, starting with exact match and
936 progressing to less specific domains. For instance, given HOST
937 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
938 srk.fer.hr's, then fer.hr's. */
941 struct cookie *chain = hash_table_get (jar->chains, host);
943 dest[dest_count++] = chain;
944 if (++passcnt >= passes)
946 host = strchr (host, '.') + 1;
952 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
956 path_matches (const char *full_path, const char *prefix)
961 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
962 as a mere separator, inspired by rfc1808), but the '/' is
963 assumed when matching against the cookie stuff. */
967 len = strlen (prefix);
969 if (0 != strncmp (full_path, prefix, len))
970 /* FULL_PATH doesn't begin with PREFIX. */
973 /* Length of PREFIX determines the quality of the match. */
977 /* Return non-zero iff COOKIE matches the provided parameters of the
978 URL being downloaded: HOST, PORT, PATH, and SECFLAG.
980 If PATH_GOODNESS is non-NULL, store the "path goodness" value
981 there. That value is a measure of how closely COOKIE matches PATH,
982 used for ordering cookies. */
985 cookie_matches_url (const struct cookie *cookie,
986 const char *host, int port, const char *path,
987 int secflag, int *path_goodness)
991 if (cookie_expired_p (cookie))
992 /* Ignore stale cookies. Don't bother unchaining the cookie at
993 this point -- Wget is a relatively short-lived application, and
994 stale cookies will not be saved by `save_cookies'. On the
995 other hand, this function should be as efficient as
999 if (cookie->secure && !secflag)
1000 /* Don't transmit secure cookies over insecure connections. */
1002 if (cookie->port != PORT_ANY && cookie->port != port)
1005 /* If exact domain match is required, verify that cookie's domain is
1006 equal to HOST. If not, assume success on the grounds of the
1007 cookie's chain having been found by find_chains_of_host. */
1008 if (cookie->domain_exact
1009 && 0 != strcasecmp (host, cookie->domain))
1012 pg = path_matches (path, cookie->path);
1017 /* If the caller requested path_goodness, we return it. This is
1018 an optimization, so that the caller doesn't need to call
1019 path_matches() again. */
1020 *path_goodness = pg;
1024 /* A structure that points to a cookie, along with the additional
1025 information about the cookie's "goodness". This allows us to sort
1026 the cookies when returning them to the server, as required by the
1029 struct weighed_cookie {
1030 struct cookie *cookie;
1031 int domain_goodness;
1035 /* Comparator used for uniquifying the list. */
1038 equality_comparator (const void *p1, const void *p2)
1040 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1041 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1043 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1044 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1046 /* We only really care whether both name and value are equal. We
1047 return them in this order only for consistency... */
1048 return namecmp ? namecmp : valuecmp;
1051 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1052 cookies with the same attr name and value. Whenever a duplicate
1053 pair is found, one of the cookies is removed. */
1056 eliminate_dups (struct weighed_cookie *outgoing, int count)
1058 struct weighed_cookie *h; /* hare */
1059 struct weighed_cookie *t; /* tortoise */
1060 struct weighed_cookie *end = outgoing + count;
1062 /* We deploy a simple uniquify algorithm: first sort the array
1063 according to our sort criteria, then copy it to itself, comparing
1064 each cookie to its neighbor and ignoring the duplicates. */
1066 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1068 /* "Hare" runs through all the entries in the array, followed by
1069 "tortoise". If a duplicate is found, the hare skips it.
1070 Non-duplicate entries are copied to the tortoise ptr. */
1072 for (h = t = outgoing; h < end; h++)
1076 struct cookie *c0 = h[0].cookie;
1077 struct cookie *c1 = h[1].cookie;
1078 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1079 continue; /* ignore the duplicate */
1082 /* If the hare has advanced past the tortoise (because of
1083 previous dups), make sure the values get copied. Otherwise,
1084 no copying is necessary. */
1090 return t - outgoing;
1093 /* Comparator used for sorting by quality. */
1096 goodness_comparator (const void *p1, const void *p2)
1098 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1099 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1101 /* Subtractions take `wc2' as the first argument becauase we want a
1102 sort in *decreasing* order of goodness. */
1103 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1104 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1106 /* Sort by domain goodness; if these are the same, sort by path
1107 goodness. (The sorting order isn't really specified; maybe it
1108 should be the other way around.) */
1109 return dgdiff ? dgdiff : pgdiff;
1112 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1113 requests PATH from the server. The resulting string is allocated
1114 with `malloc', and the caller is responsible for freeing it. If no
1115 cookies pertain to this request, i.e. no cookie header should be
1116 generated, NULL is returned. */
1119 cookie_header (struct cookie_jar *jar, const char *host,
1120 int port, const char *path, int secflag)
1122 struct cookie **chains;
1125 struct cookie *cookie;
1126 struct weighed_cookie *outgoing;
1129 int result_size, pos;
1131 /* First, find the cookie chains whose domains match HOST. */
1133 /* Allocate room for find_chains_of_host to write to. The number of
1134 chains can at most equal the number of subdomains, hence
1135 1+<number of dots>. */
1136 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
1137 chain_count = find_chains_of_host (jar, host, chains);
1139 /* No cookies for this host. */
1143 cookies_now = time (NULL);
1145 /* Now extract from the chains those cookies that match our host
1146 (for domain_exact cookies), port (for cookies with port other
1147 than PORT_ANY), etc. See matching_cookie for details. */
1149 /* Count the number of matching cookies. */
1151 for (i = 0; i < chain_count; i++)
1152 for (cookie = chains[i]; cookie; cookie = cookie->next)
1153 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1156 return NULL; /* no cookies matched */
1158 /* Allocate the array. */
1159 outgoing = alloca_array (struct weighed_cookie, count);
1161 /* Fill the array with all the matching cookies from the chains that
1164 for (i = 0; i < chain_count; i++)
1165 for (cookie = chains[i]; cookie; cookie = cookie->next)
1168 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1170 outgoing[ocnt].cookie = cookie;
1171 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1172 outgoing[ocnt].path_goodness = pg;
1175 assert (ocnt == count);
1177 /* Eliminate duplicate cookies; that is, those whose name and value
1179 count = eliminate_dups (outgoing, count);
1181 /* Sort the array so that best-matching domains come first, and
1182 that, within one domain, best-matching paths come first. */
1183 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1185 /* Count the space the name=value pairs will take. */
1187 for (i = 0; i < count; i++)
1189 struct cookie *c = outgoing[i].cookie;
1191 result_size += strlen (c->attr) + 1 + strlen (c->value);
1194 /* Allocate output buffer:
1196 name=value pairs -- result_size
1197 "; " separators -- (count - 1) * 2
1198 \r\n line ending -- 2
1199 \0 terminator -- 1 */
1200 result_size = 8 + result_size + (count - 1) * 2 + 2 + 1;
1201 result = xmalloc (result_size);
1203 strcpy (result, "Cookie: ");
1205 for (i = 0; i < count; i++)
1207 struct cookie *c = outgoing[i].cookie;
1208 int namlen = strlen (c->attr);
1209 int vallen = strlen (c->value);
1211 memcpy (result + pos, c->attr, namlen);
1213 result[pos++] = '=';
1214 memcpy (result + pos, c->value, vallen);
1218 result[pos++] = ';';
1219 result[pos++] = ' ';
1222 result[pos++] = '\r';
1223 result[pos++] = '\n';
1224 result[pos++] = '\0';
1225 assert (pos == result_size);
1229 /* Support for loading and saving cookies. The format used for
1230 loading and saving should be the format of the `cookies.txt' file
1231 used by Netscape and Mozilla, at least the Unix versions.
1232 (Apparently IE can export cookies in that format as well.) The
1233 format goes like this:
1235 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1237 DOMAIN -- cookie domain, optionally followed by :PORT
1238 DOMAIN-FLAG -- whether all hosts in the domain match
1240 SECURE-FLAG -- whether cookie requires secure connection
1241 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1242 ATTR-NAME -- name of the cookie attribute
1243 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1245 The fields are separated by TABs. All fields are mandatory, except
1246 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1247 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1248 whitespace only, and comment lines (beginning with # optionally
1249 preceded by whitespace) are ignored.
1251 Example line from cookies.txt (split in two lines for readability):
1253 .google.com TRUE / FALSE 2147368447 \
1254 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1258 /* If the region [B, E) ends with :<digits>, parse the number, return
1259 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1260 If port is not specified, return 0. */
1263 domain_port (const char *domain_b, const char *domain_e,
1264 const char **domain_e_ptr)
1268 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1271 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1272 port = 10 * port + (*p - '0');
1274 /* Garbage following port number. */
1276 *domain_e_ptr = colon;
1280 #define GET_WORD(p, b, e) do { \
1282 while (*p && *p != '\t') \
1285 if (b == e || !*p) \
1290 /* Load cookies from FILE. */
1293 cookie_jar_load (struct cookie_jar *jar, const char *file)
1296 FILE *fp = fopen (file, "r");
1299 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1300 file, strerror (errno));
1303 cookies_now = time (NULL);
1305 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1307 struct cookie *cookie;
1313 char *domain_b = NULL, *domain_e = NULL;
1314 char *domflag_b = NULL, *domflag_e = NULL;
1315 char *path_b = NULL, *path_e = NULL;
1316 char *secure_b = NULL, *secure_e = NULL;
1317 char *expires_b = NULL, *expires_e = NULL;
1318 char *name_b = NULL, *name_e = NULL;
1319 char *value_b = NULL, *value_e = NULL;
1321 /* Skip leading white-space. */
1322 while (*p && ISSPACE (*p))
1324 /* Ignore empty lines. */
1325 if (!*p || *p == '#')
1328 GET_WORD (p, domain_b, domain_e);
1329 GET_WORD (p, domflag_b, domflag_e);
1330 GET_WORD (p, path_b, path_e);
1331 GET_WORD (p, secure_b, secure_e);
1332 GET_WORD (p, expires_b, expires_e);
1333 GET_WORD (p, name_b, name_e);
1335 /* Don't use GET_WORD for value because it ends with newline,
1338 value_e = p + strlen (p);
1339 if (value_e > value_b && value_e[-1] == '\n')
1341 if (value_e > value_b && value_e[-1] == '\r')
1343 /* Empty values are legal (I think), so don't bother checking. */
1345 cookie = cookie_new ();
1347 cookie->attr = strdupdelim (name_b, name_e);
1348 cookie->value = strdupdelim (value_b, value_e);
1349 cookie->path = strdupdelim (path_b, path_e);
1350 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1352 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1353 value indicating if all machines within a given domain can
1354 access the variable. This value is set automatically by the
1355 browser, depending on the value set for the domain." */
1356 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1358 /* DOMAIN needs special treatment because we might need to
1359 extract the port. */
1360 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1362 cookie->port = port;
1364 if (*domain_b == '.')
1365 ++domain_b; /* remove leading dot internally */
1366 cookie->domain = strdupdelim (domain_b, domain_e);
1368 /* safe default in case EXPIRES field is garbled. */
1369 expiry = (double)cookies_now - 1;
1371 /* I don't like changing the line, but it's safe here. (line is
1374 sscanf (expires_b, "%lf", &expiry);
1378 /* EXPIRY can be 0 for session cookies saved because the
1379 user specified `--keep-session-cookies' in the past.
1380 They remain session cookies, and will be saved only if
1381 the user has specified `keep-session-cookies' again. */
1385 if (expiry < cookies_now)
1386 goto abort; /* ignore stale cookie. */
1387 cookie->expiry_time = expiry;
1388 cookie->permanent = 1;
1391 store_cookie (jar, cookie);
1397 delete_cookie (cookie);
1402 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1403 to the head in a chain of cookies. The function prints the entire
1407 save_cookies_mapper (void *key, void *value, void *arg)
1409 FILE *fp = (FILE *)arg;
1410 char *domain = (char *)key;
1411 struct cookie *cookie = (struct cookie *)value;
1412 for (; cookie; cookie = cookie->next)
1414 if (!cookie->permanent && !opt.keep_session_cookies)
1416 if (cookie_expired_p (cookie))
1418 if (!cookie->domain_exact)
1421 if (cookie->port != PORT_ANY)
1422 fprintf (fp, ":%d", cookie->port);
1423 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1424 cookie->domain_exact ? "FALSE" : "TRUE",
1425 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1426 (double)cookie->expiry_time,
1427 cookie->attr, cookie->value);
1429 return 1; /* stop mapping */
1434 /* Save cookies, in format described above, to FILE. */
1437 cookie_jar_save (struct cookie_jar *jar, const char *file)
1441 DEBUGP (("Saving cookies to %s.\n", file));
1443 cookies_now = time (NULL);
1445 fp = fopen (file, "w");
1448 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1449 file, strerror (errno));
1453 fputs ("# HTTP cookie file.\n", fp);
1454 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (&cookies_now));
1455 fputs ("# Edit at your own risk.\n\n", fp);
1457 hash_table_map (jar->chains, save_cookies_mapper, fp);
1460 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1461 file, strerror (errno));
1462 if (fclose (fp) < 0)
1463 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1464 file, strerror (errno));
1466 DEBUGP (("Done saving cookies.\n"));
1469 /* Destroy all the elements in the chain and unhook it from the cookie
1470 jar. This is written in the form of a callback to hash_table_map
1471 and used by cookie_jar_delete to delete all the cookies in a
1475 nuke_cookie_chain (void *value, void *key, void *arg)
1477 char *chain_key = (char *)value;
1478 struct cookie *chain = (struct cookie *)key;
1479 struct cookie_jar *jar = (struct cookie_jar *)arg;
1481 /* Remove the chain from the table and free the key. */
1482 hash_table_remove (jar->chains, chain_key);
1485 /* Then delete all the cookies in the chain. */
1488 struct cookie *next = chain->next;
1489 delete_cookie (chain);
1497 /* Clean up cookie-related data. */
1500 cookie_jar_delete (struct cookie_jar *jar)
1502 hash_table_map (jar->chains, nuke_cookie_chain, jar);
1503 hash_table_destroy (jar->chains);
1507 /* Test cases. Currently this is only tests parse_set_cookies. To
1508 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1513 char *test_results[10];
1515 static int test_parse_cookies_callback (struct cookie *ignored,
1516 const char *nb, const char *ne,
1517 const char *vb, const char *ve)
1519 test_results[test_count++] = strdupdelim (nb, ne);
1520 test_results[test_count++] = strdupdelim (vb, ve);
1527 /* Tests expected to succeed: */
1533 { "arg=value", {"arg", "value", NULL} },
1534 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1535 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1536 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1537 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1538 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1539 { "arg=", {"arg", "", NULL} },
1540 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1541 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1544 /* Tests expected to fail: */
1545 static char *tests_fail[] = {
1547 "arg=\"unterminated",
1549 "arg1=;=another-empty-name",
1553 for (i = 0; i < countof (tests_succ); i++)
1556 char *data = tests_succ[i].data;
1557 char **expected = tests_succ[i].results;
1561 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1564 printf ("NULL cookie returned for valid data: %s\n", data);
1568 for (ind = 0; ind < test_count; ind += 2)
1572 if (0 != strcmp (expected[ind], test_results[ind]))
1573 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1574 ind / 2 + 1, data, expected[ind], test_results[ind]);
1575 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1576 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1577 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1579 if (ind < test_count || expected[ind])
1580 printf ("Unmatched number of results: %s\n", data);
1583 for (i = 0; i < countof (tests_fail); i++)
1586 char *data = tests_fail[i];
1588 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1590 printf ("Failed to report error on invalid data: %s\n", data);
1593 #endif /* TEST_COOKIES */