1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
31 code submitted by Tomasz Wegrzanowski.
33 Ideas for future work:
35 * Implement limits on cookie-related sizes, such as max. cookie
36 size, max. number of cookies, etc.
38 * Add more "cookie jar" methods, such as methods to iterate over
39 stored cookies, to clear temporary cookies, to perform
40 intelligent auto-saving, etc.
42 * Support `Set-Cookie2' and `Cookie2' headers? Does anyone really
62 /* This should *really* be in a .h file! */
63 time_t http_atotm PARAMS ((const char *));
65 /* Declarations of `struct cookie' and the most basic functions. */
67 /* Cookie jar serves as cookie storage and a means of retrieving
68 cookies efficiently. All cookies with the same domain are stored
69 in a linked list called "chain". A cookie chain can be reached by
70 looking up the domain in the cookie jar's chains_by_domain table.
72 For example, to reach all the cookies under google.com, one must
73 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
74 course, when sending a cookie to `www.google.com', one must search
75 for cookies that belong to either `www.google.com' or `google.com'
76 -- but the point is that the code doesn't need to go through *all*
80 /* Cookie chains indexed by domain. */
81 struct hash_table *chains;
83 int cookie_count; /* number of cookies in the jar. */
86 /* Value set by entry point functions, so that the low-level
87 routines don't need to call time() all the time. */
93 struct cookie_jar *jar = xnew (struct cookie_jar);
94 jar->chains = make_nocase_string_hash_table (0);
95 jar->cookie_count = 0;
100 char *domain; /* domain of the cookie */
101 int port; /* port number */
102 char *path; /* path prefix of the cookie */
104 int secure; /* whether cookie should be
105 transmitted over non-https
107 int domain_exact; /* whether DOMAIN must match as a
110 int permanent; /* whether the cookie should outlive
112 time_t expiry_time; /* time when the cookie expires, 0
113 means undetermined. */
115 int discard_requested; /* whether cookie was created to
116 request discarding another
119 char *attr; /* cookie attribute name */
120 char *value; /* cookie attribute value */
122 struct cookie *next; /* used for chaining of cookies in the
126 #define PORT_ANY (-1)
128 /* Allocate and return a new, empty cookie structure. */
130 static struct cookie *
133 struct cookie *cookie = xnew0 (struct cookie);
135 /* Both cookie->permanent and cookie->expiry_time are now 0. This
136 means that the cookie doesn't expire, but is only valid for this
137 session (i.e. not written out to disk). */
139 cookie->port = PORT_ANY;
143 /* Non-zero if the cookie has expired. Assumes cookies_now has been
144 set by one of the entry point functions. */
147 cookie_expired_p (const struct cookie *c)
149 return c->expiry_time != 0 && c->expiry_time < cookies_now;
152 /* Deallocate COOKIE and its components. */
155 delete_cookie (struct cookie *cookie)
157 xfree_null (cookie->domain);
158 xfree_null (cookie->path);
159 xfree_null (cookie->attr);
160 xfree_null (cookie->value);
164 /* Functions for storing cookies.
166 All cookies can be reached beginning with jar->chains. The key in
167 that table is the domain name, and the value is a linked list of
168 all cookies from that domain. Every new cookie is placed on the
171 /* Find and return a cookie in JAR whose domain, path, and attribute
172 name correspond to COOKIE. If found, PREVPTR will point to the
173 location of the cookie previous in chain, or NULL if the found
174 cookie is the head of a chain.
176 If no matching cookie is found, return NULL. */
178 static struct cookie *
179 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
180 struct cookie **prevptr)
182 struct cookie *chain, *prev;
184 chain = hash_table_get (jar->chains, cookie->domain);
189 for (; chain; prev = chain, chain = chain->next)
190 if (0 == strcmp (cookie->path, chain->path)
191 && 0 == strcmp (cookie->attr, chain->attr)
192 && cookie->port == chain->port)
203 /* Store COOKIE to the jar.
205 This is done by placing COOKIE at the head of its chain. However,
206 if COOKIE matches a cookie already in memory, as determined by
207 find_matching_cookie, the old cookie is unlinked and destroyed.
209 The key of each chain's hash table entry is allocated only the
210 first time; next hash_table_put's reuse the same key. */
213 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
215 struct cookie *chain_head;
218 if (hash_table_get_pair (jar->chains, cookie->domain,
219 &chain_key, &chain_head))
221 /* A chain of cookies in this domain already exists. Check for
222 duplicates -- if an extant cookie exactly matches our domain,
223 port, path, and name, replace it. */
225 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
229 /* Remove VICTIM from the chain. COOKIE will be placed at
233 prev->next = victim->next;
234 cookie->next = chain_head;
238 /* prev is NULL; apparently VICTIM was at the head of
239 the chain. This place will be taken by COOKIE, so
240 all we need to do is: */
241 cookie->next = victim->next;
243 delete_cookie (victim);
245 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
248 cookie->next = chain_head;
252 /* We are now creating the chain. Use a copy of cookie->domain
253 as the key for the life-time of the chain. Using
254 cookie->domain would be unsafe because the life-time of the
255 chain may exceed the life-time of the cookie. (Cookies may
256 be deleted from the chain by this very function.) */
258 chain_key = xstrdup (cookie->domain);
261 hash_table_put (jar->chains, chain_key, cookie);
267 time_t exptime = (time_t) cookie->expiry_time;
268 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
269 cookie->domain, cookie->port,
270 cookie->port == PORT_ANY ? " (ANY)" : "",
272 cookie->permanent ? "permanent" : "session",
273 cookie->secure ? "secure" : "insecure",
274 cookie->expiry_time ? datetime_str (&exptime) : "none",
275 cookie->attr, cookie->value));
280 /* Discard a cookie matching COOKIE's domain, port, path, and
281 attribute name. This gets called when we encounter a cookie whose
282 expiry date is in the past, or whose max-age is set to 0. The
283 former corresponds to netscape cookie spec, while the latter is
284 specified by rfc2109. */
287 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
289 struct cookie *prev, *victim;
291 if (!hash_table_count (jar->chains))
292 /* No elements == nothing to discard. */
295 victim = find_matching_cookie (jar, cookie, &prev);
299 /* Simply unchain the victim. */
300 prev->next = victim->next;
303 /* VICTIM was head of its chain. We need to place a new
304 cookie at the head. */
305 char *chain_key = NULL;
308 res = hash_table_get_pair (jar->chains, victim->domain,
313 /* VICTIM was the only cookie in the chain. Destroy the
314 chain and deallocate the chain key. */
315 hash_table_remove (jar->chains, victim->domain);
319 hash_table_put (jar->chains, chain_key, victim->next);
321 delete_cookie (victim);
322 DEBUGP (("Discarded old cookie.\n"));
326 /* Functions for parsing the `Set-Cookie' header, and creating new
327 cookies from the wire. */
329 #define NAME_IS(string_literal) \
330 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
332 #define VALUE_EXISTS (value_b && value_e)
334 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
336 /* Update the appropriate cookie field. [name_b, name_e) are expected
337 to delimit the attribute name, while [value_b, value_e) (optional)
338 should delimit the attribute value.
340 When called the first time, it will set the cookie's attribute name
341 and value. After that, it will check the attribute name for
342 special fields such as `domain', `path', etc. Where appropriate,
343 it will parse the values of the fields it recognizes and fill the
344 corresponding fields in COOKIE.
346 Returns 1 on success. Returns zero in case a syntax error is
347 found; such a cookie should be discarded. */
350 update_cookie_field (struct cookie *cookie,
351 const char *name_b, const char *name_e,
352 const char *value_b, const char *value_e)
354 assert (name_b != NULL && name_e != NULL);
360 cookie->attr = strdupdelim (name_b, name_e);
361 cookie->value = strdupdelim (value_b, value_e);
365 if (NAME_IS ("domain"))
367 if (!VALUE_NON_EMPTY)
369 xfree_null (cookie->domain);
370 /* Strictly speaking, we should set cookie->domain_exact if the
371 domain doesn't begin with a dot. But many sites set the
372 domain to "foo.com" and expect "subhost.foo.com" to get the
373 cookie, and it apparently works. */
376 cookie->domain = strdupdelim (value_b, value_e);
379 else if (NAME_IS ("path"))
381 if (!VALUE_NON_EMPTY)
383 xfree_null (cookie->path);
384 cookie->path = strdupdelim (value_b, value_e);
387 else if (NAME_IS ("expires"))
392 if (!VALUE_NON_EMPTY)
394 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
396 expires = http_atotm (value_copy);
399 cookie->permanent = 1;
400 cookie->expiry_time = (time_t)expires;
403 /* Error in expiration spec. Assume default (cookie doesn't
404 expire, but valid only for this session.) */
407 /* According to netscape's specification, expiry time in the
408 past means that discarding of a matching cookie is
410 if (cookie->expiry_time < cookies_now)
411 cookie->discard_requested = 1;
415 else if (NAME_IS ("max-age"))
420 if (!VALUE_NON_EMPTY)
422 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
424 sscanf (value_copy, "%lf", &maxage);
426 /* something went wrong. */
428 cookie->permanent = 1;
429 cookie->expiry_time = cookies_now + maxage;
431 /* According to rfc2109, a cookie with max-age of 0 means that
432 discarding of a matching cookie is requested. */
434 cookie->discard_requested = 1;
438 else if (NAME_IS ("secure"))
440 /* ignore value completely */
445 /* Unrecognized attribute; ignore it. */
451 /* Returns non-zero for characters that are legal in the name of an
452 attribute. This used to allow only alphanumerics, '-', and '_',
453 but we need to be more lenient because a number of sites wants to
454 use weirder attribute names. rfc2965 "informally specifies"
455 attribute name (token) as "a sequence of non-special, non-white
456 space characters". So we allow everything except the stuff we know
459 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
460 && (c) != '"' && (c) != '=' \
461 && (c) != ';' && (c) != ',')
463 /* Parse the contents of the `Set-Cookie' header. The header looks
466 name1=value1; name2=value2; ...
468 Trailing semicolon is optional; spaces are allowed between all
469 tokens. Additionally, values may be quoted.
471 A new cookie is returned upon success, NULL otherwise. The
472 specified CALLBACK function (normally `update_cookie_field' is used
473 to update the fields of the newly created cookie structure. */
475 static struct cookie *
476 parse_set_cookies (const char *sc,
477 int (*callback) (struct cookie *,
478 const char *, const char *,
479 const char *, const char *),
482 struct cookie *cookie = cookie_new ();
484 /* #### Hand-written DFAs are no fun to debug. We'de be better off
485 to rewrite this as an inline parser. */
487 enum { S_START, S_NAME, S_NAME_POST,
488 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
489 S_ATTR_ACTION, S_DONE, S_ERROR
495 const char *name_b = NULL, *name_e = NULL;
496 const char *value_b = NULL, *value_e = NULL;
500 while (state != S_DONE && state != S_ERROR)
507 else if (ISSPACE (c))
508 /* Strip all whitespace preceding the name. */
510 else if (ATTR_NAME_CHAR (c))
516 /* empty attr name not allowed */
520 if (!c || c == ';' || c == '=' || ISSPACE (c))
525 else if (ATTR_NAME_CHAR (c))
533 value_b = value_e = NULL;
536 state = S_ATTR_ACTION;
543 else if (ISSPACE (c))
544 /* Ignore space and keep the state. */
552 value_b = value_e = p;
555 state = S_ATTR_ACTION;
561 state = S_QUOTED_VALUE;
563 else if (ISSPACE (c))
573 if (!c || c == ';' || ISSPACE (c))
576 state = S_VALUE_TRAILSPACE;
580 value_e = NULL; /* no trailing space */
589 state = S_VALUE_TRAILSPACE;
596 case S_VALUE_TRAILSPACE:
600 state = S_ATTR_ACTION;
603 state = S_ATTR_ACTION;
604 else if (ISSPACE (c))
611 int legal = callback (cookie, name_b, name_e, value_b, value_e);
617 BOUNDED_TO_ALLOCA (name_b, name_e, name);
618 logprintf (LOG_NOTQUIET,
619 _("Error in Set-Cookie, field `%s'"),
630 /* handled by loop condition */
637 delete_cookie (cookie);
638 if (state != S_ERROR)
642 logprintf (LOG_NOTQUIET,
643 _("Syntax error in Set-Cookie: %s at position %d.\n"),
644 escnonprint (sc), p - sc);
648 /* Sanity checks. These are important, otherwise it is possible for
649 mailcious attackers to destroy important cookie information and/or
650 violate your privacy. */
653 #define REQUIRE_DIGITS(p) do { \
656 for (++p; ISDIGIT (*p); p++) \
660 #define REQUIRE_DOT(p) do { \
665 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
667 We don't want to call network functions like inet_addr() because
668 all we need is a check, preferrably one that is small, fast, and
672 numeric_address_p (const char *addr)
674 const char *p = addr;
676 REQUIRE_DIGITS (p); /* A */
677 REQUIRE_DOT (p); /* . */
678 REQUIRE_DIGITS (p); /* B */
679 REQUIRE_DOT (p); /* . */
680 REQUIRE_DIGITS (p); /* C */
681 REQUIRE_DOT (p); /* . */
682 REQUIRE_DIGITS (p); /* D */
689 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
690 Originally I tried to make the check compliant with rfc2109, but
691 the sites deviated too often, so I had to fall back to "tail
692 matching", as defined by the original Netscape's cookie spec. */
695 check_domain_match (const char *cookie_domain, const char *host)
699 /* Numeric address requires exact match. It also requires HOST to
701 if (numeric_address_p (cookie_domain))
702 return 0 == strcmp (cookie_domain, host);
706 /* For the sake of efficiency, check for exact match first. */
707 if (0 == strcasecmp (cookie_domain, host))
712 /* HOST must match the tail of cookie_domain. */
713 if (!match_tail (host, cookie_domain, 1))
716 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
717 make sure that somebody is not trying to set the cookie for a
718 subdomain shared by many entities. For example, "company.co.uk"
719 must not be allowed to set a cookie for ".co.uk". On the other
720 hand, "sso.redhat.de" should be able to set a cookie for
723 The only marginally sane way to handle this I can think of is to
724 reject on the basis of the length of the second-level domain name
725 (but when the top-level domain is unknown), with the assumption
726 that those of three or less characters could be reserved. For
729 .co.org -> works because the TLD is known
730 .co.uk -> doesn't work because "co" is only two chars long
731 .com.au -> doesn't work because "com" is only 3 chars long
732 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
733 .cnn.de -> doesn't work for the same reason (ugh!!)
734 .abcd.de -> works because "abcd" is 4 chars long
735 .img.cnn.de -> works because it's not trying to set the 2nd level domain
736 .cnn.co.uk -> works for the same reason
738 That should prevent misuse, while allowing reasonable usage. If
739 someone knows of a better way to handle this, please let me
742 const char *p = cookie_domain;
743 int dccount = 1; /* number of domain components */
744 int ldcl = 0; /* last domain component length */
745 int nldcl = 0; /* next to last domain component length */
748 /* Ignore leading period in this calculation. */
751 for (out = 0; !out; p++)
759 /* Empty domain component found -- the domain is invalid. */
761 if (*(p + 1) == '\0')
763 /* Tolerate trailing '.' by not treating the domain as
764 one ending with an empty domain component. */
786 int known_toplevel = 0;
787 static const char *known_toplevel_domains[] = {
788 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
790 for (i = 0; i < countof (known_toplevel_domains); i++)
791 if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
796 if (!known_toplevel && nldcl <= 3)
803 /* Don't allow the host "foobar.com" to set a cookie for domain
805 if (*cookie_domain != '.')
807 int dlen = strlen (cookie_domain);
808 int hlen = strlen (host);
809 /* cookie host: hostname.foobar.com */
810 /* desired domain: bar.com */
811 /* '.' must be here in host-> ^ */
812 if (hlen > dlen && host[hlen - dlen - 1] != '.')
821 static int path_matches PARAMS ((const char *, const char *));
823 /* Check whether PATH begins with COOKIE_PATH. */
826 check_path_match (const char *cookie_path, const char *path)
828 return path_matches (path, cookie_path);
831 /* Process the HTTP `Set-Cookie' header. This results in storing the
832 cookie or discarding a matching one, or ignoring it completely, all
833 depending on the contents. */
836 cookie_handle_set_cookie (struct cookie_jar *jar,
837 const char *host, int port,
838 const char *path, const char *set_cookie)
840 struct cookie *cookie;
841 cookies_now = time (NULL);
843 cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
847 /* Sanitize parts of cookie. */
852 /* If the domain was not provided, we use the one we're talking
853 to, and set exact match. */
854 cookie->domain = xstrdup (host);
855 cookie->domain_exact = 1;
856 /* Set the port, but only if it's non-default. */
857 if (port != 80 && port != 443)
862 if (!check_domain_match (cookie->domain, host))
864 logprintf (LOG_NOTQUIET,
865 "Cookie coming from %s attempted to set domain to %s\n",
866 escnonprint (host), escnonprint (cookie->domain));
867 xfree (cookie->domain);
873 cookie->path = xstrdup (path);
876 if (!check_path_match (cookie->path, path))
878 DEBUGP (("Attempt to fake the path: %s, %s\n",
879 cookie->path, path));
884 if (cookie->discard_requested)
886 discard_matching_cookie (jar, cookie);
890 store_cookie (jar, cookie);
895 delete_cookie (cookie);
898 /* Support for sending out cookies in HTTP requests, based on
899 previously stored cookies. Entry point is
900 `build_cookies_request'. */
902 /* Return a count of how many times CHR occurs in STRING. */
905 count_char (const char *string, char chr)
909 for (p = string; *p; p++)
915 /* Find the cookie chains whose domains match HOST and store them to
918 A cookie chain is the head of a list of cookies that belong to a
919 host/domain. Given HOST "img.search.xemacs.org", this function
920 will return the chains for "img.search.xemacs.org",
921 "search.xemacs.org", and "xemacs.org" -- those of them that exist
924 DEST should be large enough to accept (in the worst case) as many
925 elements as there are domain components of HOST. */
928 find_chains_of_host (struct cookie_jar *jar, const char *host,
929 struct cookie *dest[])
934 /* Bail out quickly if there are no cookies in the jar. */
935 if (!hash_table_count (jar->chains))
938 if (numeric_address_p (host))
939 /* If host is an IP address, only check for the exact match. */
942 /* Otherwise, check all the subdomains except the top-level (last)
943 one. As a domain with N components has N-1 dots, the number of
944 passes equals the number of dots. */
945 passes = count_char (host, '.');
949 /* Find chains that match HOST, starting with exact match and
950 progressing to less specific domains. For instance, given HOST
951 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
952 srk.fer.hr's, then fer.hr's. */
955 struct cookie *chain = hash_table_get (jar->chains, host);
957 dest[dest_count++] = chain;
958 if (++passcnt >= passes)
960 host = strchr (host, '.') + 1;
966 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
970 path_matches (const char *full_path, const char *prefix)
975 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
976 as a mere separator, inspired by rfc1808), but the '/' is
977 assumed when matching against the cookie stuff. */
981 len = strlen (prefix);
983 if (0 != strncmp (full_path, prefix, len))
984 /* FULL_PATH doesn't begin with PREFIX. */
987 /* Length of PREFIX determines the quality of the match. */
991 /* Return non-zero iff COOKIE matches the provided parameters of the
992 URL being downloaded: HOST, PORT, PATH, and SECFLAG.
994 If PATH_GOODNESS is non-NULL, store the "path goodness" value
995 there. That value is a measure of how closely COOKIE matches PATH,
996 used for ordering cookies. */
999 cookie_matches_url (const struct cookie *cookie,
1000 const char *host, int port, const char *path,
1001 int secflag, int *path_goodness)
1005 if (cookie_expired_p (cookie))
1006 /* Ignore stale cookies. Don't bother unchaining the cookie at
1007 this point -- Wget is a relatively short-lived application, and
1008 stale cookies will not be saved by `save_cookies'. On the
1009 other hand, this function should be as efficient as
1013 if (cookie->secure && !secflag)
1014 /* Don't transmit secure cookies over insecure connections. */
1016 if (cookie->port != PORT_ANY && cookie->port != port)
1019 /* If exact domain match is required, verify that cookie's domain is
1020 equal to HOST. If not, assume success on the grounds of the
1021 cookie's chain having been found by find_chains_of_host. */
1022 if (cookie->domain_exact
1023 && 0 != strcasecmp (host, cookie->domain))
1026 pg = path_matches (path, cookie->path);
1031 /* If the caller requested path_goodness, we return it. This is
1032 an optimization, so that the caller doesn't need to call
1033 path_matches() again. */
1034 *path_goodness = pg;
1038 /* A structure that points to a cookie, along with the additional
1039 information about the cookie's "goodness". This allows us to sort
1040 the cookies when returning them to the server, as required by the
1043 struct weighed_cookie {
1044 struct cookie *cookie;
1045 int domain_goodness;
1049 /* Comparator used for uniquifying the list. */
1052 equality_comparator (const void *p1, const void *p2)
1054 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1055 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1057 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1058 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1060 /* We only really care whether both name and value are equal. We
1061 return them in this order only for consistency... */
1062 return namecmp ? namecmp : valuecmp;
1065 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1066 cookies with the same attr name and value. Whenever a duplicate
1067 pair is found, one of the cookies is removed. */
1070 eliminate_dups (struct weighed_cookie *outgoing, int count)
1072 struct weighed_cookie *h; /* hare */
1073 struct weighed_cookie *t; /* tortoise */
1074 struct weighed_cookie *end = outgoing + count;
1076 /* We deploy a simple uniquify algorithm: first sort the array
1077 according to our sort criteria, then copy it to itself, comparing
1078 each cookie to its neighbor and ignoring the duplicates. */
1080 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1082 /* "Hare" runs through all the entries in the array, followed by
1083 "tortoise". If a duplicate is found, the hare skips it.
1084 Non-duplicate entries are copied to the tortoise ptr. */
1086 for (h = t = outgoing; h < end; h++)
1090 struct cookie *c0 = h[0].cookie;
1091 struct cookie *c1 = h[1].cookie;
1092 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1093 continue; /* ignore the duplicate */
1096 /* If the hare has advanced past the tortoise (because of
1097 previous dups), make sure the values get copied. Otherwise,
1098 no copying is necessary. */
1104 return t - outgoing;
1107 /* Comparator used for sorting by quality. */
1110 goodness_comparator (const void *p1, const void *p2)
1112 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1113 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1115 /* Subtractions take `wc2' as the first argument becauase we want a
1116 sort in *decreasing* order of goodness. */
1117 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1118 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1120 /* Sort by domain goodness; if these are the same, sort by path
1121 goodness. (The sorting order isn't really specified; maybe it
1122 should be the other way around.) */
1123 return dgdiff ? dgdiff : pgdiff;
1126 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1127 requests PATH from the server. The resulting string is allocated
1128 with `malloc', and the caller is responsible for freeing it. If no
1129 cookies pertain to this request, i.e. no cookie header should be
1130 generated, NULL is returned. */
1133 cookie_header (struct cookie_jar *jar, const char *host,
1134 int port, const char *path, int secflag)
1136 struct cookie **chains;
1139 struct cookie *cookie;
1140 struct weighed_cookie *outgoing;
1143 int result_size, pos;
1145 /* First, find the cookie chains whose domains match HOST. */
1147 /* Allocate room for find_chains_of_host to write to. The number of
1148 chains can at most equal the number of subdomains, hence
1149 1+<number of dots>. */
1150 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
1151 chain_count = find_chains_of_host (jar, host, chains);
1153 /* No cookies for this host. */
1157 cookies_now = time (NULL);
1159 /* Now extract from the chains those cookies that match our host
1160 (for domain_exact cookies), port (for cookies with port other
1161 than PORT_ANY), etc. See matching_cookie for details. */
1163 /* Count the number of matching cookies. */
1165 for (i = 0; i < chain_count; i++)
1166 for (cookie = chains[i]; cookie; cookie = cookie->next)
1167 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1170 return NULL; /* no cookies matched */
1172 /* Allocate the array. */
1173 outgoing = alloca_array (struct weighed_cookie, count);
1175 /* Fill the array with all the matching cookies from the chains that
1178 for (i = 0; i < chain_count; i++)
1179 for (cookie = chains[i]; cookie; cookie = cookie->next)
1182 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1184 outgoing[ocnt].cookie = cookie;
1185 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1186 outgoing[ocnt].path_goodness = pg;
1189 assert (ocnt == count);
1191 /* Eliminate duplicate cookies; that is, those whose name and value
1193 count = eliminate_dups (outgoing, count);
1195 /* Sort the array so that best-matching domains come first, and
1196 that, within one domain, best-matching paths come first. */
1197 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1199 /* Count the space the name=value pairs will take. */
1201 for (i = 0; i < count; i++)
1203 struct cookie *c = outgoing[i].cookie;
1205 result_size += strlen (c->attr) + 1 + strlen (c->value);
1208 /* Allocate output buffer:
1209 name=value pairs -- result_size
1210 "; " separators -- (count - 1) * 2
1211 \0 terminator -- 1 */
1212 result_size = result_size + (count - 1) * 2 + 1;
1213 result = xmalloc (result_size);
1215 for (i = 0; i < count; i++)
1217 struct cookie *c = outgoing[i].cookie;
1218 int namlen = strlen (c->attr);
1219 int vallen = strlen (c->value);
1221 memcpy (result + pos, c->attr, namlen);
1223 result[pos++] = '=';
1224 memcpy (result + pos, c->value, vallen);
1228 result[pos++] = ';';
1229 result[pos++] = ' ';
1232 result[pos++] = '\0';
1233 assert (pos == result_size);
1237 /* Support for loading and saving cookies. The format used for
1238 loading and saving should be the format of the `cookies.txt' file
1239 used by Netscape and Mozilla, at least the Unix versions.
1240 (Apparently IE can export cookies in that format as well.) The
1241 format goes like this:
1243 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1245 DOMAIN -- cookie domain, optionally followed by :PORT
1246 DOMAIN-FLAG -- whether all hosts in the domain match
1248 SECURE-FLAG -- whether cookie requires secure connection
1249 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1250 ATTR-NAME -- name of the cookie attribute
1251 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1253 The fields are separated by TABs. All fields are mandatory, except
1254 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1255 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1256 whitespace only, and comment lines (beginning with # optionally
1257 preceded by whitespace) are ignored.
1259 Example line from cookies.txt (split in two lines for readability):
1261 .google.com TRUE / FALSE 2147368447 \
1262 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1266 /* If the region [B, E) ends with :<digits>, parse the number, return
1267 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1268 If port is not specified, return 0. */
1271 domain_port (const char *domain_b, const char *domain_e,
1272 const char **domain_e_ptr)
1276 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1279 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1280 port = 10 * port + (*p - '0');
1282 /* Garbage following port number. */
1284 *domain_e_ptr = colon;
1288 #define GET_WORD(p, b, e) do { \
1290 while (*p && *p != '\t') \
1293 if (b == e || !*p) \
1298 /* Load cookies from FILE. */
1301 cookie_jar_load (struct cookie_jar *jar, const char *file)
1304 FILE *fp = fopen (file, "r");
1307 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1308 file, strerror (errno));
1311 cookies_now = time (NULL);
1313 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1315 struct cookie *cookie;
1321 char *domain_b = NULL, *domain_e = NULL;
1322 char *domflag_b = NULL, *domflag_e = NULL;
1323 char *path_b = NULL, *path_e = NULL;
1324 char *secure_b = NULL, *secure_e = NULL;
1325 char *expires_b = NULL, *expires_e = NULL;
1326 char *name_b = NULL, *name_e = NULL;
1327 char *value_b = NULL, *value_e = NULL;
1329 /* Skip leading white-space. */
1330 while (*p && ISSPACE (*p))
1332 /* Ignore empty lines. */
1333 if (!*p || *p == '#')
1336 GET_WORD (p, domain_b, domain_e);
1337 GET_WORD (p, domflag_b, domflag_e);
1338 GET_WORD (p, path_b, path_e);
1339 GET_WORD (p, secure_b, secure_e);
1340 GET_WORD (p, expires_b, expires_e);
1341 GET_WORD (p, name_b, name_e);
1343 /* Don't use GET_WORD for value because it ends with newline,
1346 value_e = p + strlen (p);
1347 if (value_e > value_b && value_e[-1] == '\n')
1349 if (value_e > value_b && value_e[-1] == '\r')
1351 /* Empty values are legal (I think), so don't bother checking. */
1353 cookie = cookie_new ();
1355 cookie->attr = strdupdelim (name_b, name_e);
1356 cookie->value = strdupdelim (value_b, value_e);
1357 cookie->path = strdupdelim (path_b, path_e);
1358 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1360 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1361 value indicating if all machines within a given domain can
1362 access the variable. This value is set automatically by the
1363 browser, depending on the value set for the domain." */
1364 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1366 /* DOMAIN needs special treatment because we might need to
1367 extract the port. */
1368 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1370 cookie->port = port;
1372 if (*domain_b == '.')
1373 ++domain_b; /* remove leading dot internally */
1374 cookie->domain = strdupdelim (domain_b, domain_e);
1376 /* safe default in case EXPIRES field is garbled. */
1377 expiry = (double)cookies_now - 1;
1379 /* I don't like changing the line, but it's safe here. (line is
1382 sscanf (expires_b, "%lf", &expiry);
1386 /* EXPIRY can be 0 for session cookies saved because the
1387 user specified `--keep-session-cookies' in the past.
1388 They remain session cookies, and will be saved only if
1389 the user has specified `keep-session-cookies' again. */
1393 if (expiry < cookies_now)
1394 goto abort; /* ignore stale cookie. */
1395 cookie->expiry_time = expiry;
1396 cookie->permanent = 1;
1399 store_cookie (jar, cookie);
1405 delete_cookie (cookie);
1410 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1411 to the head in a chain of cookies. The function prints the entire
1415 save_cookies_mapper (void *key, void *value, void *arg)
1417 FILE *fp = (FILE *)arg;
1418 char *domain = (char *)key;
1419 struct cookie *cookie = (struct cookie *)value;
1420 for (; cookie; cookie = cookie->next)
1422 if (!cookie->permanent && !opt.keep_session_cookies)
1424 if (cookie_expired_p (cookie))
1426 if (!cookie->domain_exact)
1429 if (cookie->port != PORT_ANY)
1430 fprintf (fp, ":%d", cookie->port);
1431 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1432 cookie->domain_exact ? "FALSE" : "TRUE",
1433 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1434 (double)cookie->expiry_time,
1435 cookie->attr, cookie->value);
1437 return 1; /* stop mapping */
1442 /* Save cookies, in format described above, to FILE. */
1445 cookie_jar_save (struct cookie_jar *jar, const char *file)
1449 DEBUGP (("Saving cookies to %s.\n", file));
1451 cookies_now = time (NULL);
1453 fp = fopen (file, "w");
1456 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1457 file, strerror (errno));
1461 fputs ("# HTTP cookie file.\n", fp);
1462 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (&cookies_now));
1463 fputs ("# Edit at your own risk.\n\n", fp);
1465 hash_table_map (jar->chains, save_cookies_mapper, fp);
1468 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1469 file, strerror (errno));
1470 if (fclose (fp) < 0)
1471 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1472 file, strerror (errno));
1474 DEBUGP (("Done saving cookies.\n"));
1477 /* Destroy all the elements in the chain and unhook it from the cookie
1478 jar. This is written in the form of a callback to hash_table_map
1479 and used by cookie_jar_delete to delete all the cookies in a
1483 nuke_cookie_chain (void *value, void *key, void *arg)
1485 char *chain_key = (char *)value;
1486 struct cookie *chain = (struct cookie *)key;
1487 struct cookie_jar *jar = (struct cookie_jar *)arg;
1489 /* Remove the chain from the table and free the key. */
1490 hash_table_remove (jar->chains, chain_key);
1493 /* Then delete all the cookies in the chain. */
1496 struct cookie *next = chain->next;
1497 delete_cookie (chain);
1505 /* Clean up cookie-related data. */
1508 cookie_jar_delete (struct cookie_jar *jar)
1510 hash_table_map (jar->chains, nuke_cookie_chain, jar);
1511 hash_table_destroy (jar->chains);
1515 /* Test cases. Currently this is only tests parse_set_cookies. To
1516 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1521 char *test_results[10];
1523 static int test_parse_cookies_callback (struct cookie *ignored,
1524 const char *nb, const char *ne,
1525 const char *vb, const char *ve)
1527 test_results[test_count++] = strdupdelim (nb, ne);
1528 test_results[test_count++] = strdupdelim (vb, ve);
1535 /* Tests expected to succeed: */
1541 { "arg=value", {"arg", "value", NULL} },
1542 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1543 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1544 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1545 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1546 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1547 { "arg=", {"arg", "", NULL} },
1548 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1549 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1552 /* Tests expected to fail: */
1553 static char *tests_fail[] = {
1555 "arg=\"unterminated",
1557 "arg1=;=another-empty-name",
1561 for (i = 0; i < countof (tests_succ); i++)
1564 char *data = tests_succ[i].data;
1565 char **expected = tests_succ[i].results;
1569 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1572 printf ("NULL cookie returned for valid data: %s\n", data);
1576 for (ind = 0; ind < test_count; ind += 2)
1580 if (0 != strcmp (expected[ind], test_results[ind]))
1581 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1582 ind / 2 + 1, data, expected[ind], test_results[ind]);
1583 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1584 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1585 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1587 if (ind < test_count || expected[ind])
1588 printf ("Unmatched number of results: %s\n", data);
1591 for (i = 0; i < countof (tests_fail); i++)
1594 char *data = tests_fail[i];
1596 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1598 printf ("Failed to report error on invalid data: %s\n", data);
1601 #endif /* TEST_COOKIES */