1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
30 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
31 code submitted by Tomasz Wegrzanowski.
33 TODO: Implement limits on cookie-related sizes, such as max. cookie
34 size, max. number of cookies, etc. Add more "cookie jar" methods,
35 such as methods to over stored cookies, to clear temporary cookies,
36 to perform intelligent auto-saving, etc. Ultimately support
37 `Set-Cookie2' and `Cookie2' headers. */
56 /* This should *really* be in a .h file! */
57 time_t http_atotm PARAMS ((const char *));
59 /* Declarations of `struct cookie' and the most basic functions. */
62 /* Hash table that maps domain names to cookie chains. A "cookie
63 chain" is a linked list of cookies that belong to the same
65 struct hash_table *chains_by_domain;
67 int cookie_count; /* number of cookies in the jar. */
70 /* Value set by entry point functions, so that the low-level
71 routines don't need to call time() all the time. */
77 struct cookie_jar *jar = xmalloc (sizeof (struct cookie_jar));
78 jar->chains_by_domain = make_nocase_string_hash_table (0);
79 jar->cookie_count = 0;
84 char *domain; /* domain of the cookie */
85 int port; /* port number */
86 char *path; /* path prefix of the cookie */
87 int secure; /* whether cookie should be
88 transmitted over non-https
90 int permanent; /* whether the cookie should outlive
92 time_t expiry_time; /* time when the cookie expires */
93 int discard_requested; /* whether cookie was created to
94 request discarding another
97 char *attr; /* cookie attribute name */
98 char *value; /* cookie attribute value */
100 struct cookie_jar *jar; /* pointer back to the cookie jar, for
102 struct cookie *next; /* used for chaining of cookies in the
106 #define PORT_ANY (-1)
107 #define COOKIE_EXPIRED_P(c) ((c)->expiry_time != 0 && (c)->expiry_time < cookies_now)
109 /* Allocate and return a new, empty cookie structure. */
111 static struct cookie *
114 struct cookie *cookie = xmalloc (sizeof (struct cookie));
115 memset (cookie, '\0', sizeof (struct cookie));
117 /* Both cookie->permanent and cookie->expiry_time are now 0. By
118 default, we assume that the cookie is non-permanent and valid
119 until the end of the session. */
121 cookie->port = PORT_ANY;
125 /* Deallocate COOKIE and its components. */
128 delete_cookie (struct cookie *cookie)
130 FREE_MAYBE (cookie->domain);
131 FREE_MAYBE (cookie->path);
132 FREE_MAYBE (cookie->attr);
133 FREE_MAYBE (cookie->value);
137 /* Functions for storing cookies.
139 All cookies can be reached beginning with jar->chains_by_domain.
140 The key in that table is the domain name, and the value is a linked
141 list of all cookies from that domain. Every new cookie is placed
142 on the head of the list. */
144 /* Find and return a cookie in JAR whose domain, path, and attribute
145 name correspond to COOKIE. If found, PREVPTR will point to the
146 location of the cookie previous in chain, or NULL if the found
147 cookie is the head of a chain.
149 If no matching cookie is found, return NULL. */
151 static struct cookie *
152 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
153 struct cookie **prevptr)
155 struct cookie *chain, *prev;
157 chain = hash_table_get (jar->chains_by_domain, cookie->domain);
162 for (; chain; prev = chain, chain = chain->next)
163 if (0 == strcmp (cookie->path, chain->path)
164 && 0 == strcmp (cookie->attr, chain->attr)
165 && cookie->port == chain->port)
176 /* Store COOKIE to the jar.
178 This is done by placing COOKIE at the head of its chain. However,
179 if COOKIE matches a cookie already in memory, as determined by
180 find_matching_cookie, the old cookie is unlinked and destroyed.
182 The key of each chain's hash table entry is allocated only the
183 first time; next hash_table_put's reuse the same key. */
186 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
188 struct cookie *chain_head;
191 if (hash_table_get_pair (jar->chains_by_domain, cookie->domain,
192 &chain_key, &chain_head))
194 /* A chain of cookies in this domain already exists. Check for
195 duplicates -- if an extant cookie exactly matches our domain,
196 port, path, and name, replace it. */
198 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
202 /* Remove VICTIM from the chain. COOKIE will be placed at
206 prev->next = victim->next;
207 cookie->next = chain_head;
211 /* prev is NULL; apparently VICTIM was at the head of
212 the chain. This place will be taken by COOKIE, so
213 all we need to do is: */
214 cookie->next = victim->next;
216 delete_cookie (victim);
218 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
221 cookie->next = chain_head;
225 /* We are now creating the chain. Allocate the string that will
226 be used as a key. It is unsafe to use cookie->domain for
227 that, because it might get deallocated by the above code at
230 chain_key = xstrdup (cookie->domain);
233 hash_table_put (jar->chains_by_domain, chain_key, cookie);
236 DEBUGP (("\nStored cookie %s %d%s %s %s %d %s %s %s\n",
237 cookie->domain, cookie->port,
238 cookie->port == PORT_ANY ? " (ANY)" : "",
240 cookie->permanent ? "permanent" : "nonpermanent",
243 ? asctime (localtime (&cookie->expiry_time)) : "<undefined>",
244 cookie->attr, cookie->value));
247 /* Discard a cookie matching COOKIE's domain, port, path, and
248 attribute name. This gets called when we encounter a cookie whose
249 expiry date is in the past, or whose max-age is set to 0. The
250 former corresponds to netscape cookie spec, while the latter is
251 specified by rfc2109. */
254 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
256 struct cookie *prev, *victim;
258 if (!hash_table_count (jar->chains_by_domain))
259 /* No elements == nothing to discard. */
262 victim = find_matching_cookie (jar, cookie, &prev);
266 /* Simply unchain the victim. */
267 prev->next = victim->next;
270 /* VICTIM was head of its chain. We need to place a new
271 cookie at the head. */
272 char *chain_key = NULL;
275 res = hash_table_get_pair (jar->chains_by_domain, victim->domain,
280 /* VICTIM was the only cookie in the chain. Destroy the
281 chain and deallocate the chain key. */
282 hash_table_remove (jar->chains_by_domain, victim->domain);
286 hash_table_put (jar->chains_by_domain, chain_key, victim->next);
288 delete_cookie (victim);
289 DEBUGP (("Discarded old cookie.\n"));
293 /* Functions for parsing the `Set-Cookie' header, and creating new
294 cookies from the wire. */
297 #define NAME_IS(string_literal) \
298 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
300 #define VALUE_EXISTS (value_b && value_e)
302 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
304 /* Update the appropriate cookie field. [name_b, name_e) are expected
305 to delimit the attribute name, while [value_b, value_e) (optional)
306 should delimit the attribute value.
308 When called the first time, it will set the cookie's attribute name
309 and value. After that, it will check the attribute name for
310 special fields such as `domain', `path', etc. Where appropriate,
311 it will parse the values of the fields it recognizes and fill the
312 corresponding fields in COOKIE.
314 Returns 1 on success. Returns zero in case a syntax error is
315 found; such a cookie should be discarded. */
318 update_cookie_field (struct cookie *cookie,
319 const char *name_b, const char *name_e,
320 const char *value_b, const char *value_e)
322 assert (name_b != NULL && name_e != NULL);
328 cookie->attr = strdupdelim (name_b, name_e);
329 cookie->value = strdupdelim (value_b, value_e);
333 if (NAME_IS ("domain"))
335 if (!VALUE_NON_EMPTY)
337 FREE_MAYBE (cookie->domain);
338 cookie->domain = strdupdelim (value_b, value_e);
341 else if (NAME_IS ("path"))
343 if (!VALUE_NON_EMPTY)
345 FREE_MAYBE (cookie->path);
346 cookie->path = strdupdelim (value_b, value_e);
349 else if (NAME_IS ("expires"))
354 if (!VALUE_NON_EMPTY)
356 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
358 expires = http_atotm (value_copy);
361 cookie->permanent = 1;
362 cookie->expiry_time = (time_t)expires;
365 /* Error in expiration spec. Assume default (cookie valid for
369 /* According to netscape's specification, expiry time in the
370 past means that discarding of a matching cookie is
372 if (cookie->expiry_time < cookies_now)
373 cookie->discard_requested = 1;
377 else if (NAME_IS ("max-age"))
382 if (!VALUE_NON_EMPTY)
384 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
386 sscanf (value_copy, "%lf", &maxage);
388 /* something went wrong. */
390 cookie->permanent = 1;
391 cookie->expiry_time = cookies_now + maxage;
393 /* According to rfc2109, a cookie with max-age of 0 means that
394 discarding of a matching cookie is requested. */
396 cookie->discard_requested = 1;
400 else if (NAME_IS ("secure"))
402 /* ignore value completely */
407 /* Unrecognized attribute; ignore it. */
413 /* Returns non-zero for characters that are legal in the name of an
414 attribute. This used to allow only alphanumerics, '-', and '_',
415 but we need to be more lenient because a number of sites wants to
416 use weirder attribute names. rfc2965 "informally specifies"
417 attribute name (token) as "a sequence of non-special, non-white
418 space characters". So we allow everything except the stuff we know
421 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
422 && (c) != '"' && (c) != '=' \
423 && (c) != ';' && (c) != ',')
425 /* Parse the contents of the `Set-Cookie' header. The header looks
428 name1=value1; name2=value2; ...
430 Trailing semicolon is optional; spaces are allowed between all
431 tokens. Additionally, values may be quoted.
433 A new cookie is returned upon success, NULL otherwise. The
434 specified CALLBACK function (normally `update_cookie_field' is used
435 to update the fields of the newly created cookie structure. */
437 static struct cookie *
438 parse_set_cookies (const char *sc,
439 int (*callback) (struct cookie *,
440 const char *, const char *,
441 const char *, const char *),
444 struct cookie *cookie = cookie_new ();
446 /* #### Hand-written DFAs are no fun to debug. We'de be better off
447 to rewrite this as an inline parser. */
449 enum { S_START, S_NAME, S_NAME_POST,
450 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
451 S_ATTR_ACTION, S_DONE, S_ERROR
457 const char *name_b = NULL, *name_e = NULL;
458 const char *value_b = NULL, *value_e = NULL;
462 while (state != S_DONE && state != S_ERROR)
469 else if (ISSPACE (c))
470 /* Strip all whitespace preceding the name. */
472 else if (ATTR_NAME_CHAR (c))
478 /* empty attr name not allowed */
482 if (!c || c == ';' || c == '=' || ISSPACE (c))
487 else if (ATTR_NAME_CHAR (c))
495 value_b = value_e = NULL;
498 state = S_ATTR_ACTION;
505 else if (ISSPACE (c))
506 /* Ignore space and keep the state. */
514 value_b = value_e = p;
517 state = S_ATTR_ACTION;
523 state = S_QUOTED_VALUE;
525 else if (ISSPACE (c))
535 if (!c || c == ';' || ISSPACE (c))
538 state = S_VALUE_TRAILSPACE;
542 value_e = NULL; /* no trailing space */
551 state = S_VALUE_TRAILSPACE;
558 case S_VALUE_TRAILSPACE:
562 state = S_ATTR_ACTION;
565 state = S_ATTR_ACTION;
566 else if (ISSPACE (c))
573 int legal = callback (cookie, name_b, name_e, value_b, value_e);
579 BOUNDED_TO_ALLOCA (name_b, name_e, name);
580 logprintf (LOG_NOTQUIET,
581 _("Error in Set-Cookie, field `%s'"), name);
591 /* handled by loop condition */
598 delete_cookie (cookie);
599 if (state != S_ERROR)
603 logprintf (LOG_NOTQUIET,
604 _("Syntax error in Set-Cookie: %s at position %d.\n"),
609 /* Sanity checks. These are important, otherwise it is possible for
610 mailcious attackers to destroy important cookie information and/or
611 violate your privacy. */
614 #define REQUIRE_DIGITS(p) do { \
617 for (++p; ISDIGIT (*p); p++) \
621 #define REQUIRE_DOT(p) do { \
626 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
628 We don't want to call network functions like inet_addr() because all
629 we need is a check, preferrably one that is small, fast, and
633 numeric_address_p (const char *addr)
635 const char *p = addr;
637 REQUIRE_DIGITS (p); /* A */
638 REQUIRE_DOT (p); /* . */
639 REQUIRE_DIGITS (p); /* B */
640 REQUIRE_DOT (p); /* . */
641 REQUIRE_DIGITS (p); /* C */
642 REQUIRE_DOT (p); /* . */
643 REQUIRE_DIGITS (p); /* D */
650 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
651 Originally I tried to make the check compliant with rfc2109, but
652 the sites deviated too often, so I had to fall back to "tail
653 matching", as defined by the original Netscape's cookie spec. */
656 check_domain_match (const char *cookie_domain, const char *host)
660 /* Numeric address requires exact match. It also requires HOST to
662 if (numeric_address_p (cookie_domain))
663 return 0 == strcmp (cookie_domain, host);
667 /* For the sake of efficiency, check for exact match first. */
668 if (!strcasecmp (cookie_domain, host))
673 /* HOST must match the tail of cookie_domain. */
674 if (!match_tail (host, cookie_domain, 1))
677 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
678 make sure that somebody is not trying to set the cookie for a
679 subdomain shared by many entities. For example, "company.co.uk"
680 must not be allowed to set a cookie for ".co.uk". On the other
681 hand, "sso.redhat.de" should be able to set a cookie for
684 The only marginally sane way to handle this I can think of is to
685 reject on the basis of the length of the second-level domain name
686 (but when the top-level domain is unknown), with the assumption
687 that those of three or less characters could be reserved. For
690 .co.org -> works because the TLD is known
691 .co.uk -> doesn't work because "co" is only two chars long
692 .com.au -> doesn't work because "com" is only 3 chars long
693 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
694 .cnn.de -> doesn't work for the same reason (ugh!!)
695 .abcd.de -> works because "abcd" is 4 chars long
696 .img.cnn.de -> works because it's not trying to set the 2nd level domain
697 .cnn.co.uk -> works for the same reason
699 That should prevent misuse, while allowing reasonable usage. If
700 someone knows of a better way to handle this, please let me
703 const char *p = cookie_domain;
704 int dccount = 1; /* number of domain components */
705 int ldcl = 0; /* last domain component length */
706 int nldcl = 0; /* next to last domain component length */
709 /* Ignore leading period in this calculation. */
712 for (out = 0; !out; p++)
720 /* Empty domain component found -- the domain is invalid. */
722 if (*(p + 1) == '\0')
724 /* Tolerate trailing '.' by not treating the domain as
725 one ending with an empty domain component. */
747 int known_toplevel = 0;
748 static char *known_toplevel_domains[] = {
749 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
751 for (i = 0; i < countof (known_toplevel_domains); i++)
752 if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
757 if (!known_toplevel && nldcl <= 3)
764 /* Don't allow domain "bar.com" to match host "foobar.com". */
765 if (*cookie_domain != '.')
767 int dlen = strlen (cookie_domain);
768 int hlen = strlen (host);
769 /* cookie host: hostname.foobar.com */
770 /* desired domain: bar.com */
771 /* '.' must be here in host-> ^ */
772 if (hlen > dlen && host[hlen - dlen - 1] != '.')
781 static int path_matches PARAMS ((const char *, const char *));
783 /* Check whether PATH begins with COOKIE_PATH. */
786 check_path_match (const char *cookie_path, const char *path)
788 return path_matches (path, cookie_path);
791 /* Process the HTTP `Set-Cookie' header. This results in storing the
792 cookie or discarding a matching one, or ignoring it completely, all
793 depending on the contents. */
796 cookie_jar_process_set_cookie (struct cookie_jar *jar,
797 const char *host, int port,
798 const char *path, const char *set_cookie)
800 struct cookie *cookie;
801 cookies_now = time (NULL);
803 cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
807 /* Sanitize parts of cookie. */
812 cookie->domain = xstrdup (host);
817 if (!check_domain_match (cookie->domain, host))
819 logprintf (LOG_NOTQUIET,
820 "Cookie coming from %s attempted to set domain to %s\n",
821 host, cookie->domain);
826 cookie->path = xstrdup (path);
829 if (!check_path_match (cookie->path, path))
831 DEBUGP (("Attempt to fake the path: %s, %s\n",
832 cookie->path, path));
837 if (cookie->discard_requested)
839 discard_matching_cookie (jar, cookie);
843 store_cookie (jar, cookie);
848 delete_cookie (cookie);
851 /* Support for sending out cookies in HTTP requests, based on
852 previously stored cookies. Entry point is
853 `build_cookies_request'. */
855 /* Store CHAIN to STORE if there is room in STORE. If not, inrecement
856 COUNT anyway, so that when the function is done, we end up with the
857 exact count of how much place we actually need. */
859 #define STORE_CHAIN(st_chain, st_store, st_size, st_count) do { \
860 if (st_count < st_size) \
861 store[st_count] = st_chain; \
865 /* Store cookie chains that match HOST. Since more than one chain can
866 match, the matches are written to STORE. No more than SIZE matches
867 are written; if more matches are present, return the number of
868 chains that would have been written. */
871 find_matching_chains (struct cookie_jar *jar, const char *host,
872 struct cookie *store[], int size)
874 struct cookie *chain;
879 if (!hash_table_count (jar->chains_by_domain))
882 STRDUP_ALLOCA (hash_key, host);
884 /* Look for an exact match. */
885 chain = hash_table_get (jar->chains_by_domain, hash_key);
887 STORE_CHAIN (chain, store, size, count);
889 dot_count = count_char (host, '.');
891 /* Match less and less specific domains. For instance, given
892 fly.srk.fer.hr, we match .srk.fer.hr, then .fer.hr. */
893 while (dot_count-- > 1)
895 /* Note: we operate directly on hash_key (in form host:port)
896 because we don't want to allocate new hash keys in a
898 char *p = strchr (hash_key, '.');
900 chain = hash_table_get (jar->chains_by_domain, p);
902 STORE_CHAIN (chain, store, size, count);
908 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
912 path_matches (const char *full_path, const char *prefix)
917 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
918 as a separator), but the '/' is assumed when matching against
923 len = strlen (prefix);
925 if (0 != strncmp (full_path, prefix, len))
926 /* FULL_PATH doesn't begin with PREFIX. */
929 /* Length of PREFIX determines the quality of the match. */
933 /* Return non-zero iff COOKIE matches the given PATH, PORT, and
934 security flag. HOST is not a flag because it is assumed that the
935 cookie comes from the correct chain.
937 If PATH_GOODNESS is non-NULL, store the "path goodness" there. The
938 said goodness is a measure of how well COOKIE matches PATH. It is
939 used for ordering cookies. */
942 matching_cookie (const struct cookie *cookie, const char *path, int port,
943 int connection_secure_p, int *path_goodness)
947 if (COOKIE_EXPIRED_P (cookie))
948 /* Ignore stale cookies. Don't bother unchaining the cookie at
949 this point -- Wget is a relatively short-lived application, and
950 stale cookies will not be saved by `save_cookies'. On the
951 other hand, this function should be as efficient as
955 if (cookie->secure && !connection_secure_p)
956 /* Don't transmit secure cookies over an insecure connection. */
958 if (cookie->port != PORT_ANY && cookie->port != port)
960 pg = path_matches (path, cookie->path);
965 /* If the caller requested path_goodness, we return it. This is
966 an optimization, so that the caller doesn't need to call
967 path_matches() again. */
972 struct weighed_cookie {
973 struct cookie *cookie;
978 /* Comparator used for uniquifying the list. */
981 equality_comparator (const void *p1, const void *p2)
983 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
984 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
986 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
987 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
989 /* We only really care whether both name and value are equal. We
990 return them in this order only for consistency... */
991 return namecmp ? namecmp : valuecmp;
994 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
995 cookies whose name and value are the same. Whenever a duplicate
996 pair is found, one of the cookies is removed. */
999 eliminate_dups (struct weighed_cookie *outgoing, int count)
1003 /* We deploy a simple uniquify algorithm: first sort the array
1004 according to our sort criterion, then uniquify it by comparing
1005 each cookie with its neighbor. */
1007 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1009 for (i = 0; i < count - 1; i++)
1011 struct cookie *c1 = outgoing[i].cookie;
1012 struct cookie *c2 = outgoing[i + 1].cookie;
1013 if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value))
1015 /* c1 and c2 are the same; get rid of c2. */
1017 /* move all ptrs from positions [i + 1, count) to i. */
1018 memmove (outgoing + i, outgoing + i + 1,
1019 (count - (i + 1)) * sizeof (struct weighed_cookie));
1020 /* We decrement i to counter the ++i above. Remember that
1021 we've just removed the element in front of us; we need to
1022 remain in place to check whether outgoing[i] matches what
1023 used to be outgoing[i + 2]. */
1031 /* Comparator used for sorting by quality. */
1034 goodness_comparator (const void *p1, const void *p2)
1036 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1037 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1039 /* Subtractions take `wc2' as the first argument becauase we want a
1040 sort in *decreasing* order of goodness. */
1041 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1042 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1044 /* Sort by domain goodness; if these are the same, sort by path
1045 goodness. (The sorting order isn't really specified; maybe it
1046 should be the other way around.) */
1047 return dgdiff ? dgdiff : pgdiff;
1050 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1051 requests PATH from the server. The resulting string is allocated
1052 with `malloc', and the caller is responsible for freeing it. If no
1053 cookies pertain to this request, i.e. no cookie header should be
1054 generated, NULL is returned. */
1057 cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
1058 int port, const char *path,
1059 int connection_secure_p)
1061 struct cookie *chain_default_store[20];
1062 struct cookie **all_chains = chain_default_store;
1063 int chain_store_size = countof (chain_default_store);
1066 struct cookie *cookie;
1067 struct weighed_cookie *outgoing;
1070 int result_size, pos;
1073 chain_count = find_matching_chains (jar, host, all_chains, chain_store_size);
1074 if (chain_count > chain_store_size)
1076 /* It's extremely unlikely that more than 20 chains will ever
1077 match. But since find_matching_chains reports the exact size
1078 it needs, it's easy to not have the limitation, so we
1080 all_chains = alloca (chain_count * sizeof (struct cookie *));
1081 chain_store_size = chain_count;
1088 cookies_now = time (NULL);
1090 /* Count the number of cookies whose path matches. */
1092 for (i = 0; i < chain_count; i++)
1093 for (cookie = all_chains[i]; cookie; cookie = cookie->next)
1094 if (matching_cookie (cookie, path, port, connection_secure_p, NULL))
1097 /* No matching cookies. */
1100 /* Allocate the array. */
1101 outgoing = alloca (count * sizeof (struct weighed_cookie));
1103 /* Fill the array with all the matching cookies from all the
1106 for (i = 0; i < chain_count; i++)
1107 for (cookie = all_chains[i]; cookie; cookie = cookie->next)
1110 if (!matching_cookie (cookie, path, port, connection_secure_p, &pg))
1112 outgoing[ocnt].cookie = cookie;
1113 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1114 outgoing[ocnt].path_goodness = pg;
1117 assert (ocnt == count);
1119 /* Eliminate duplicate cookies; that is, those whose name and value
1121 count = eliminate_dups (outgoing, count);
1123 /* Sort the array so that best-matching domains come first, and
1124 that, within one domain, best-matching paths come first. */
1125 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1127 /* Count the space the name=value pairs will take. */
1129 for (i = 0; i < count; i++)
1131 struct cookie *c = outgoing[i].cookie;
1133 result_size += strlen (c->attr) + 1 + strlen (c->value);
1136 /* Allocate output buffer:
1138 name=value pairs -- result_size
1139 "; " separators -- (count - 1) * 2
1140 \r\n line ending -- 2
1141 \0 terminator -- 1 */
1142 result_size = 8 + result_size + (count - 1) * 2 + 2 + 1;
1143 result = xmalloc (result_size);
1145 strcpy (result, "Cookie: ");
1147 for (i = 0; i < count; i++)
1149 struct cookie *c = outgoing[i].cookie;
1150 int namlen = strlen (c->attr);
1151 int vallen = strlen (c->value);
1153 memcpy (result + pos, c->attr, namlen);
1155 result[pos++] = '=';
1156 memcpy (result + pos, c->value, vallen);
1160 result[pos++] = ';';
1161 result[pos++] = ' ';
1164 result[pos++] = '\r';
1165 result[pos++] = '\n';
1166 result[pos++] = '\0';
1167 assert (pos == result_size);
1171 /* Support for loading and saving cookies. The format used for
1172 loading and saving roughly matches the format of `cookies.txt' file
1173 used by Netscape and Mozilla, at least the Unix versions. The
1174 format goes like this:
1176 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1178 DOMAIN -- cookie domain, optionally followed by :PORT
1179 DOMAIN-FLAG -- whether all hosts in the domain match
1181 SECURE-FLAG -- whether cookie requires secure connection
1182 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1183 ATTR-NAME -- name of the cookie attribute
1184 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1186 The fields are separated by TABs (but Wget's loader recognizes any
1187 whitespace). All fields are mandatory, except for ATTR-VALUE. The
1188 `-FLAG' fields are boolean, their legal values being "TRUE" and
1189 "FALSE'. Empty lines, lines consisting of whitespace only, and
1190 comment lines (beginning with # optionally preceded by whitespace)
1193 Example line from cookies.txt (split in two lines for readability):
1195 .google.com TRUE / FALSE 2147368447 \
1196 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1198 DOMAIN-FLAG is currently not honored by Wget. The cookies whose
1199 domain begins with `.' are treated as if DOMAIN-FLAG were true,
1200 while all other cookies are treated as if it were FALSE. */
1203 /* If the region [B, E) ends with :<digits>, parse the number, return
1204 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1205 If port is not specified, return 0. */
1208 domain_port (const char *domain_b, const char *domain_e,
1209 const char **domain_e_ptr)
1213 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1216 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1217 port = 10 * port + (*p - '0');
1219 /* Garbage following port number. */
1221 *domain_e_ptr = colon;
1225 #define SKIP_WS(p) do { \
1226 while (*p && ISSPACE (*p)) \
1230 #define SET_WORD_BOUNDARIES(p, b, e) do { \
1234 while (*p && !ISSPACE (*p)) \
1241 /* Load cookies from FILE. */
1244 cookie_jar_load (struct cookie_jar *jar, const char *file)
1247 FILE *fp = fopen (file, "r");
1250 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1251 file, strerror (errno));
1254 cookies_now = time (NULL);
1256 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1258 struct cookie *cookie;
1264 char *domain_b = NULL, *domain_e = NULL;
1265 char *ignore_b = NULL, *ignore_e = NULL;
1266 char *path_b = NULL, *path_e = NULL;
1267 char *secure_b = NULL, *secure_e = NULL;
1268 char *expires_b = NULL, *expires_e = NULL;
1269 char *name_b = NULL, *name_e = NULL;
1270 char *value_b = NULL, *value_e = NULL;
1274 if (!*p || *p == '#')
1278 SET_WORD_BOUNDARIES (p, domain_b, domain_e);
1279 SET_WORD_BOUNDARIES (p, ignore_b, ignore_e);
1280 SET_WORD_BOUNDARIES (p, path_b, path_e);
1281 SET_WORD_BOUNDARIES (p, secure_b, secure_e);
1282 SET_WORD_BOUNDARIES (p, expires_b, expires_e);
1283 SET_WORD_BOUNDARIES (p, name_b, name_e);
1285 /* Don't use SET_WORD_BOUNDARIES for value because it may
1286 contain whitespace. Instead, set value_e to the end of line,
1287 modulo trailing space (this will skip the line separator.) */
1290 value_e = p + strlen (p);
1291 while (value_e > value_b && ISSPACE (*(value_e - 1)))
1293 if (value_b == value_e)
1294 /* Hmm, should we check for empty value? I guess that's
1295 legal, so I leave it. */
1298 cookie = cookie_new ();
1300 cookie->attr = strdupdelim (name_b, name_e);
1301 cookie->value = strdupdelim (value_b, value_e);
1302 cookie->path = strdupdelim (path_b, path_e);
1304 if (BOUNDED_EQUAL (secure_b, secure_e, "TRUE"))
1307 /* DOMAIN needs special treatment because we might need to
1308 extract the port. */
1309 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1311 cookie->port = port;
1312 cookie->domain = strdupdelim (domain_b, domain_e);
1314 /* safe default in case EXPIRES field is garbled. */
1315 expiry = (double)cookies_now - 1;
1317 /* I don't like changing the line, but it's completely safe.
1318 (line is malloced.) */
1320 sscanf (expires_b, "%lf", &expiry);
1321 if (expiry < cookies_now)
1322 /* ignore stale cookie. */
1324 cookie->expiry_time = expiry;
1326 /* If the cookie has survived being saved into an external file,
1327 it is obviously permanent. */
1328 cookie->permanent = 1;
1330 store_cookie (jar, cookie);
1336 delete_cookie (cookie);
1341 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1342 to the head in a chain of cookies. The function prints the entire
1346 save_cookies_mapper (void *key, void *value, void *arg)
1348 FILE *fp = (FILE *)arg;
1349 char *domain = (char *)key;
1350 struct cookie *chain = (struct cookie *)value;
1351 for (; chain; chain = chain->next)
1353 if (!chain->permanent)
1355 if (COOKIE_EXPIRED_P (chain))
1358 if (chain->port != PORT_ANY)
1359 fprintf (fp, ":%d", chain->port);
1360 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1361 *domain == '.' ? "TRUE" : "FALSE",
1362 chain->path, chain->secure ? "TRUE" : "FALSE",
1363 (double)chain->expiry_time,
1364 chain->attr, chain->value);
1366 return 1; /* stop mapping */
1371 /* Save cookies, in format described above, to FILE. */
1374 cookie_jar_save (struct cookie_jar *jar, const char *file)
1378 DEBUGP (("Saving cookies to %s.\n", file));
1380 cookies_now = time (NULL);
1382 fp = fopen (file, "w");
1385 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1386 file, strerror (errno));
1390 fputs ("# HTTP cookie file.\n", fp);
1391 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (NULL));
1392 fputs ("# Edit at your own risk.\n\n", fp);
1394 hash_table_map (jar->chains_by_domain, save_cookies_mapper, fp);
1397 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1398 file, strerror (errno));
1400 if (fclose (fp) < 0)
1401 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1402 file, strerror (errno));
1404 DEBUGP (("Done saving cookies.\n"));
1407 /* Destroy all the elements in the chain and unhook it from the cookie
1408 jar. This is written in the form of a callback to hash_table_map
1409 and used by cookie_jar_delete to delete all the cookies in a
1413 nuke_cookie_chain (void *value, void *key, void *arg)
1415 char *chain_key = (char *)value;
1416 struct cookie *chain = (struct cookie *)key;
1417 struct cookie_jar *jar = (struct cookie_jar *)arg;
1419 /* Remove the chain from the table and free the key. */
1420 hash_table_remove (jar->chains_by_domain, chain_key);
1423 /* Then delete all the cookies in the chain. */
1426 struct cookie *next = chain->next;
1427 delete_cookie (chain);
1435 /* Clean up cookie-related data. */
1438 cookie_jar_delete (struct cookie_jar *jar)
1440 hash_table_map (jar->chains_by_domain, nuke_cookie_chain, jar);
1441 hash_table_destroy (jar->chains_by_domain);
1445 /* Test cases. Currently this is only tests parse_set_cookies. To
1446 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1451 char *test_results[10];
1453 static int test_parse_cookies_callback (struct cookie *ignored,
1454 const char *nb, const char *ne,
1455 const char *vb, const char *ve)
1457 test_results[test_count++] = strdupdelim (nb, ne);
1458 test_results[test_count++] = strdupdelim (vb, ve);
1465 /* Tests expected to succeed: */
1471 { "arg=value", {"arg", "value", NULL} },
1472 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1473 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1474 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1475 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1476 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1477 { "arg=", {"arg", "", NULL} },
1478 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1479 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1482 /* Tests expected to fail: */
1483 static char *tests_fail[] = {
1485 "arg=\"unterminated",
1487 "arg1=;=another-empty-name",
1491 for (i = 0; i < countof (tests_succ); i++)
1494 char *data = tests_succ[i].data;
1495 char **expected = tests_succ[i].results;
1499 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1502 printf ("NULL cookie returned for valid data: %s\n", data);
1506 for (ind = 0; ind < test_count; ind += 2)
1510 if (0 != strcmp (expected[ind], test_results[ind]))
1511 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1512 ind / 2 + 1, data, expected[ind], test_results[ind]);
1513 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1514 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1515 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1517 if (ind < test_count || expected[ind])
1518 printf ("Unmatched number of results: %s\n", data);
1521 for (i = 0; i < countof (tests_fail); i++)
1524 char *data = tests_fail[i];
1526 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1528 printf ("Failed to report error on invalid data: %s\n", data);
1531 #endif /* TEST_COOKIES */