1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
21 code submitted by Tomasz Wegrzanowski.
23 TODO: Implement limits on cookie-related sizes, such as max. cookie
24 size, max. number of cookies, etc. Add more "cookie jar" methods,
25 such as methods to over stored cookies, to clear temporary cookies,
26 to perform intelligent auto-saving, etc. Ultimately support
27 `Set-Cookie2' and `Cookie2' headers. */
46 /* This should *really* be in a .h file! */
47 time_t http_atotm PARAMS ((const char *));
49 /* Declarations of `struct cookie' and the most basic functions. */
52 /* Hash table that maps domain names to cookie chains. A "cookie
53 chain" is a linked list of cookies that belong to the same
55 struct hash_table *chains_by_domain;
57 int cookie_count; /* number of cookies in the jar. */
60 /* Value set by entry point functions, so that the low-level
61 routines don't need to call time() all the time. */
67 struct cookie_jar *jar = xmalloc (sizeof (struct cookie_jar));
68 jar->chains_by_domain = make_nocase_string_hash_table (0);
69 jar->cookie_count = 0;
74 char *domain; /* domain of the cookie */
75 int port; /* port number */
76 char *path; /* path prefix of the cookie */
77 int secure; /* whether cookie should be
78 transmitted over non-https
80 int permanent; /* whether the cookie should outlive
82 time_t expiry_time; /* time when the cookie expires */
83 int discard_requested; /* whether cookie was created to
84 request discarding another
87 char *attr; /* cookie attribute name */
88 char *value; /* cookie attribute value */
90 struct cookie_jar *jar; /* pointer back to the cookie jar, for
92 struct cookie *next; /* used for chaining of cookies in the
97 #define COOKIE_EXPIRED_P(c) ((c)->expiry_time != 0 && (c)->expiry_time < cookies_now)
99 /* Allocate and return a new, empty cookie structure. */
101 static struct cookie *
104 struct cookie *cookie = xmalloc (sizeof (struct cookie));
105 memset (cookie, '\0', sizeof (struct cookie));
107 /* Both cookie->permanent and cookie->expiry_time are now 0. By
108 default, we assume that the cookie is non-permanent and valid
109 until the end of the session. */
111 cookie->port = PORT_ANY;
115 /* Deallocate COOKIE and its components. */
118 delete_cookie (struct cookie *cookie)
120 FREE_MAYBE (cookie->domain);
121 FREE_MAYBE (cookie->path);
122 FREE_MAYBE (cookie->attr);
123 FREE_MAYBE (cookie->value);
127 /* Functions for storing cookies.
129 All cookies can be reached beginning with jar->chains_by_domain.
130 The key in that table is the domain name, and the value is a linked
131 list of all cookies from that domain. Every new cookie is placed
132 on the head of the list. */
134 /* Find and return a cookie in JAR whose domain, path, and attribute
135 name correspond to COOKIE. If found, PREVPTR will point to the
136 location of the cookie previous in chain, or NULL if the found
137 cookie is the head of a chain.
139 If no matching cookie is found, return NULL. */
141 static struct cookie *
142 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
143 struct cookie **prevptr)
145 struct cookie *chain, *prev;
147 chain = hash_table_get (jar->chains_by_domain, cookie->domain);
152 for (; chain; prev = chain, chain = chain->next)
153 if (0 == strcmp (cookie->path, chain->path)
154 && 0 == strcmp (cookie->attr, chain->attr)
155 && cookie->port == chain->port)
166 /* Store COOKIE to the jar.
168 This is done by placing COOKIE at the head of its chain. However,
169 if COOKIE matches a cookie already in memory, as determined by
170 find_matching_cookie, the old cookie is unlinked and destroyed.
172 The key of each chain's hash table entry is allocated only the
173 first time; next hash_table_put's reuse the same key. */
176 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
178 struct cookie *chain_head;
181 if (hash_table_get_pair (jar->chains_by_domain, cookie->domain,
182 &chain_key, &chain_head))
184 /* A chain of cookies in this domain already exists. Check for
185 duplicates -- if an extant cookie exactly matches our domain,
186 port, path, and name, replace it. */
188 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
192 /* Remove VICTIM from the chain. COOKIE will be placed at
196 prev->next = victim->next;
197 cookie->next = chain_head;
201 /* prev is NULL; apparently VICTIM was at the head of
202 the chain. This place will be taken by COOKIE, so
203 all we need to do is: */
204 cookie->next = victim->next;
206 delete_cookie (victim);
208 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
211 cookie->next = chain_head;
215 /* We are now creating the chain. Allocate the string that will
216 be used as a key. It is unsafe to use cookie->domain for
217 that, because it might get deallocated by the above code at
220 chain_key = xstrdup (cookie->domain);
223 hash_table_put (jar->chains_by_domain, chain_key, cookie);
226 DEBUGP (("\nStored cookie %s %d%s %s %s %d %s %s %s\n",
227 cookie->domain, cookie->port,
228 cookie->port == PORT_ANY ? " (ANY)" : "",
230 cookie->permanent ? "permanent" : "nonpermanent",
233 ? asctime (localtime (&cookie->expiry_time)) : "<indefinitely>",
234 cookie->attr, cookie->value));
237 /* Discard a cookie matching COOKIE's domain, port, path, and
238 attribute name. This gets called when we encounter a cookie whose
239 expiry date is in the past, or whose max-age is set to 0. The
240 former corresponds to netscape cookie spec, while the latter is
241 specified by rfc2109. */
244 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
246 struct cookie *prev, *victim;
248 if (!hash_table_count (jar->chains_by_domain))
249 /* No elements == nothing to discard. */
252 victim = find_matching_cookie (jar, cookie, &prev);
256 /* Simply unchain the victim. */
257 prev->next = victim->next;
260 /* VICTIM was head of its chain. We need to place a new
261 cookie at the head. */
262 char *chain_key = NULL;
265 res = hash_table_get_pair (jar->chains_by_domain, victim->domain,
270 /* VICTIM was the only cookie in the chain. Destroy the
271 chain and deallocate the chain key. */
272 hash_table_remove (jar->chains_by_domain, victim->domain);
276 hash_table_put (jar->chains_by_domain, chain_key, victim->next);
278 delete_cookie (victim);
279 DEBUGP (("Discarded old cookie.\n"));
283 /* Functions for parsing the `Set-Cookie' header, and creating new
284 cookies from the wire. */
287 #define NAME_IS(string_literal) \
288 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
290 #define VALUE_EXISTS (value_b && value_e)
292 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
294 /* Update the appropriate cookie field. [name_b, name_e) are expected
295 to delimit the attribute name, while [value_b, value_e) (optional)
296 should delimit the attribute value.
298 When called the first time, it will set the cookie's attribute name
299 and value. After that, it will check the attribute name for
300 special fields such as `domain', `path', etc. Where appropriate,
301 it will parse the values of the fields it recognizes and fill the
302 corresponding fields in COOKIE.
304 Returns 1 on success. Returns zero in case a syntax error is
305 found; such a cookie should be discarded. */
308 update_cookie_field (struct cookie *cookie,
309 const char *name_b, const char *name_e,
310 const char *value_b, const char *value_e)
312 assert (name_b != NULL && name_e != NULL);
318 cookie->attr = strdupdelim (name_b, name_e);
319 cookie->value = strdupdelim (value_b, value_e);
323 if (NAME_IS ("domain"))
325 if (!VALUE_NON_EMPTY)
327 FREE_MAYBE (cookie->domain);
328 cookie->domain = strdupdelim (value_b, value_e);
331 else if (NAME_IS ("path"))
333 if (!VALUE_NON_EMPTY)
335 FREE_MAYBE (cookie->path);
336 cookie->path = strdupdelim (value_b, value_e);
339 else if (NAME_IS ("expires"))
344 if (!VALUE_NON_EMPTY)
346 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
348 expires = http_atotm (value_copy);
351 cookie->permanent = 1;
352 cookie->expiry_time = (time_t)expires;
355 /* Error in expiration spec. Assume default (cookie valid for
359 /* According to netscape's specification, expiry time in the
360 past means that discarding of a matching cookie is
362 if (cookie->expiry_time < cookies_now)
363 cookie->discard_requested = 1;
367 else if (NAME_IS ("max-age"))
372 if (!VALUE_NON_EMPTY)
374 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
376 sscanf (value_copy, "%lf", &maxage);
378 /* something went wrong. */
380 cookie->permanent = 1;
381 cookie->expiry_time = cookies_now + maxage;
383 /* According to rfc2109, a cookie with max-age of 0 means that
384 discarding of a matching cookie is requested. */
386 cookie->discard_requested = 1;
390 else if (NAME_IS ("secure"))
392 /* ignore value completely */
397 /* Unrecognized attribute; ignore it. */
403 /* Returns non-zero for characters that are legal in the name of an
404 attribute. This used to allow only alphanumerics, '-', and '_',
405 but we need to be more lenient because a number of sites wants to
406 use weirder attribute names. rfc2965 "informally specifies"
407 attribute name (token) as "a sequence of non-special, non-white
408 space characters". So we allow everything except the stuff we know
411 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
412 && (c) != '"' && (c) != '=' \
413 && (c) != ';' && (c) != ',')
415 /* Fetch the next character without doing anything special if CH gets
416 set to 0. (The code executed next is expected to handle it.) */
418 #define FETCH1(ch, ptr) do { \
422 /* Like FETCH1, but jumps to `eof' label if CH gets set to 0. */
424 #define FETCH(ch, ptr) do { \
430 /* Parse the contents of the `Set-Cookie' header. The header looks
433 name1=value1; name2=value2; ...
435 Trailing semicolon is optional; spaces are allowed between all
436 tokens. Additionally, values may be quoted.
438 A new cookie is returned upon success, NULL otherwise. The
439 function `update_cookie_field' is used to update the fields of the
440 newly created cookie structure. */
442 static struct cookie *
443 parse_set_cookies (const char *sc)
445 struct cookie *cookie = cookie_new ();
447 enum { S_NAME_PRE, S_NAME, S_NAME_POST,
448 S_VALUE_PRE, S_VALUE, S_VALUE_TRAILSPACE_MAYBE,
449 S_QUOTED_VALUE, S_QUOTED_VALUE_POST,
451 S_DONE, S_ERROR } state = S_NAME_PRE;
456 const char *name_b = NULL, *name_e = NULL;
457 const char *value_b = NULL, *value_e = NULL;
461 while (state != S_DONE && state != S_ERROR)
468 else if (ATTR_NAME_CHAR (c))
475 /* empty attr name not allowed */
479 if (ATTR_NAME_CHAR (c))
481 else if (!c || c == ';' || c == '=' || ISSPACE (c))
492 else if (!c || c == ';')
494 value_b = value_e = NULL;
495 state = S_ATTR_ACTION;
512 state = S_QUOTED_VALUE;
514 else if (c == ';' || c == '\0')
516 value_b = value_e = p - 1;
517 state = S_ATTR_ACTION;
527 if (c == ';' || c == '\0')
531 state = S_ATTR_ACTION;
533 else if (ISSPACE (c))
537 state = S_VALUE_TRAILSPACE_MAYBE;
541 value_e = NULL; /* no trailing space */
545 case S_VALUE_TRAILSPACE_MAYBE:
556 state = S_QUOTED_VALUE_POST;
561 case S_QUOTED_VALUE_POST:
563 state = S_ATTR_ACTION;
564 else if (ISSPACE (c))
571 int legal = update_cookie_field (cookie, name_b, name_e,
576 BOUNDED_TO_ALLOCA (name_b, name_e, name);
577 logprintf (LOG_NOTQUIET,
578 _("Error in Set-Cookie, field `%s'"), name);
593 /* handled by loop condition */
600 delete_cookie (cookie);
601 if (state == S_ERROR)
602 logprintf (LOG_NOTQUIET, _("Syntax error in Set-Cookie at character `%c'.\n"), c);
608 delete_cookie (cookie);
609 logprintf (LOG_NOTQUIET,
610 _("Syntax error in Set-Cookie: premature end of string.\n"));
614 /* Sanity checks. These are important, otherwise it is possible for
615 mailcious attackers to destroy important cookie information and/or
616 violate your privacy. */
619 #define REQUIRE_DIGITS(p) do { \
622 for (++p; ISDIGIT (*p); p++) \
626 #define REQUIRE_DOT(p) do { \
631 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
633 We don't want to call network functions like inet_addr() because all
634 we need is a check, preferrably one that is small, fast, and
638 numeric_address_p (const char *addr)
640 const char *p = addr;
642 REQUIRE_DIGITS (p); /* A */
643 REQUIRE_DOT (p); /* . */
644 REQUIRE_DIGITS (p); /* B */
645 REQUIRE_DOT (p); /* . */
646 REQUIRE_DIGITS (p); /* C */
647 REQUIRE_DOT (p); /* . */
648 REQUIRE_DIGITS (p); /* D */
655 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
656 Originally I tried to make the check compliant with rfc2109, but
657 the sites deviated too often, so I had to fall back to "tail
658 matching", as defined by the original Netscape's cookie spec. */
661 check_domain_match (const char *cookie_domain, const char *host)
665 /* Numeric address requires exact match. It also requires HOST to
667 if (numeric_address_p (cookie_domain))
668 return 0 == strcmp (cookie_domain, host);
672 /* For the sake of efficiency, check for exact match first. */
673 if (!strcasecmp (cookie_domain, host))
678 /* HOST must match the tail of cookie_domain. */
679 if (!match_tail (host, cookie_domain))
682 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
683 make sure that somebody is not trying to set the cookie for a
684 subdomain shared by many entities. For example, "company.co.uk"
685 must not be allowed to set a cookie for ".co.uk". On the other
686 hand, "sso.redhat.de" should be able to set a cookie for
689 The only marginally sane way to handle this I can think of is to
690 reject on the basis of the length of the second-level domain name
691 (but when the top-level domain is unknown), with the assumption
692 that those of three or less characters could be reserved. For
695 .co.org -> works because the TLD is known
696 .co.uk -> doesn't work because "co" is only two chars long
697 .com.au -> doesn't work because "com" is only 3 chars long
698 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
699 .cnn.de -> doesn't work for the same reason (ugh!!)
700 .abcd.de -> works because "abcd" is 4 chars long
701 .img.cnn.de -> works because it's not trying to set the 2nd level domain
702 .cnn.co.uk -> works for the same reason
704 That should prevent misuse, while allowing reasonable usage. If
705 someone knows of a better way to handle this, please let me
708 const char *p = cookie_domain;
709 int dccount = 1; /* number of domain components */
710 int ldcl = 0; /* last domain component length */
711 int nldcl = 0; /* next to last domain component length */
714 /* Ignore leading period in this calculation. */
717 for (out = 0; !out; p++)
725 /* Empty domain component found -- the domain is invalid. */
727 if (*(p + 1) == '\0')
729 /* Tolerate trailing '.' by not treating the domain as
730 one ending with an empty domain component. */
752 int known_toplevel = 0;
753 static char *known_toplevel_domains[] = {
754 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
756 for (i = 0; i < ARRAY_SIZE (known_toplevel_domains); i++)
757 if (match_tail (cookie_domain, known_toplevel_domains[i]))
762 if (!known_toplevel && nldcl <= 3)
769 /* Don't allow domain "bar.com" to match host "foobar.com". */
770 if (*cookie_domain != '.')
772 int dlen = strlen (cookie_domain);
773 int hlen = strlen (host);
774 /* cookie host: hostname.foobar.com */
775 /* desired domain: bar.com */
776 /* '.' must be here in host-> ^ */
777 if (hlen > dlen && host[hlen - dlen - 1] != '.')
786 static int path_matches PARAMS ((const char *, const char *));
788 /* Check whether PATH begins with COOKIE_PATH. */
791 check_path_match (const char *cookie_path, const char *path)
793 return path_matches (path, cookie_path);
796 /* Process the HTTP `Set-Cookie' header. This results in storing the
797 cookie or discarding a matching one, or ignoring it completely, all
798 depending on the contents. */
801 cookie_jar_process_set_cookie (struct cookie_jar *jar,
802 const char *host, int port,
803 const char *path, const char *set_cookie)
805 struct cookie *cookie;
806 cookies_now = time (NULL);
808 cookie = parse_set_cookies (set_cookie);
812 /* Sanitize parts of cookie. */
817 cookie->domain = xstrdup (host);
822 if (!check_domain_match (cookie->domain, host))
824 logprintf (LOG_NOTQUIET,
825 "Cookie coming from %s attempted to set domain to %s\n",
826 host, cookie->domain);
831 cookie->path = xstrdup (path);
834 if (!check_path_match (cookie->path, path))
836 DEBUGP (("Attempt to fake the path: %s, %s\n",
837 cookie->path, path));
842 if (cookie->discard_requested)
844 discard_matching_cookie (jar, cookie);
848 store_cookie (jar, cookie);
853 delete_cookie (cookie);
856 /* Support for sending out cookies in HTTP requests, based on
857 previously stored cookies. Entry point is
858 `build_cookies_request'. */
860 /* Store CHAIN to STORE if there is room in STORE. If not, inrecement
861 COUNT anyway, so that when the function is done, we end up with the
862 exact count of how much place we actually need. */
864 #define STORE_CHAIN(st_chain, st_store, st_size, st_count) do { \
865 if (st_count < st_size) \
866 store[st_count] = st_chain; \
870 /* Store cookie chains that match HOST. Since more than one chain can
871 match, the matches are written to STORE. No more than SIZE matches
872 are written; if more matches are present, return the number of
873 chains that would have been written. */
876 find_matching_chains (struct cookie_jar *jar, const char *host,
877 struct cookie *store[], int size)
879 struct cookie *chain;
884 if (!hash_table_count (jar->chains_by_domain))
887 STRDUP_ALLOCA (hash_key, host);
889 /* Look for an exact match. */
890 chain = hash_table_get (jar->chains_by_domain, hash_key);
892 STORE_CHAIN (chain, store, size, count);
894 dot_count = count_char (host, '.');
896 /* Match less and less specific domains. For instance, given
897 fly.srk.fer.hr, we match .srk.fer.hr, then .fer.hr. */
898 while (dot_count-- > 1)
900 /* Note: we operate directly on hash_key (in form host:port)
901 because we don't want to allocate new hash keys in a
903 char *p = strchr (hash_key, '.');
905 chain = hash_table_get (jar->chains_by_domain, p);
907 STORE_CHAIN (chain, store, size, count);
913 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
917 path_matches (const char *full_path, const char *prefix)
922 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
923 as a separator), but the '/' is assumed when matching against
928 len = strlen (prefix);
930 if (0 != strncmp (full_path, prefix, len))
931 /* FULL_PATH doesn't begin with PREFIX. */
934 /* Length of PREFIX determines the quality of the match. */
938 /* Return non-zero iff COOKIE matches the given PATH, PORT, and
939 security flag. HOST is not a flag because it is assumed that the
940 cookie comes from the correct chain.
942 If PATH_GOODNESS is non-NULL, store the "path goodness" there. The
943 said goodness is a measure of how well COOKIE matches PATH. It is
944 used for ordering cookies. */
947 matching_cookie (const struct cookie *cookie, const char *path, int port,
948 int connection_secure_p, int *path_goodness)
952 if (COOKIE_EXPIRED_P (cookie))
953 /* Ignore stale cookies. Don't bother unchaining the cookie at
954 this point -- Wget is a relatively short-lived application, and
955 stale cookies will not be saved by `save_cookies'. On the
956 other hand, this function should be as efficient as
960 if (cookie->secure && !connection_secure_p)
961 /* Don't transmit secure cookies over an insecure connection. */
963 if (cookie->port != PORT_ANY && cookie->port != port)
965 pg = path_matches (path, cookie->path);
970 /* If the caller requested path_goodness, we return it. This is
971 an optimization, so that the caller doesn't need to call
972 path_matches() again. */
977 struct weighed_cookie {
978 struct cookie *cookie;
983 /* Comparator used for uniquifying the list. */
986 equality_comparator (const void *p1, const void *p2)
988 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
989 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
991 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
992 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
994 /* We only really care whether both name and value are equal. We
995 return them in this order only for consistency... */
996 return namecmp ? namecmp : valuecmp;
999 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1000 cookies whose name and value are the same. Whenever a duplicate
1001 pair is found, one of the cookies is removed. */
1004 eliminate_dups (struct weighed_cookie *outgoing, int count)
1008 /* We deploy a simple uniquify algorithm: first sort the array
1009 according to our sort criterion, then uniquify it by comparing
1010 each cookie with its neighbor. */
1012 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1014 for (i = 0; i < count - 1; i++)
1016 struct cookie *c1 = outgoing[i].cookie;
1017 struct cookie *c2 = outgoing[i + 1].cookie;
1018 if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value))
1020 /* c1 and c2 are the same; get rid of c2. */
1022 /* move all ptrs from positions [i + 1, count) to i. */
1023 memmove (outgoing + i, outgoing + i + 1,
1024 (count - (i + 1)) * sizeof (struct weighed_cookie));
1025 /* We decrement i to counter the ++i above. Remember that
1026 we've just removed the element in front of us; we need to
1027 remain in place to check whether outgoing[i] matches what
1028 used to be outgoing[i + 2]. */
1036 /* Comparator used for sorting by quality. */
1039 goodness_comparator (const void *p1, const void *p2)
1041 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1042 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1044 /* Subtractions take `wc2' as the first argument becauase we want a
1045 sort in *decreasing* order of goodness. */
1046 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1047 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1049 /* Sort by domain goodness; if these are the same, sort by path
1050 goodness. (The sorting order isn't really specified; maybe it
1051 should be the other way around.) */
1052 return dgdiff ? dgdiff : pgdiff;
1055 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1056 requests PATH from the server. The resulting string is allocated
1057 with `malloc', and the caller is responsible for freeing it. If no
1058 cookies pertain to this request, i.e. no cookie header should be
1059 generated, NULL is returned. */
1062 cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
1063 int port, const char *path,
1064 int connection_secure_p)
1066 struct cookie *chain_default_store[20];
1067 struct cookie **all_chains = chain_default_store;
1068 int chain_store_size = ARRAY_SIZE (chain_default_store);
1071 struct cookie *cookie;
1072 struct weighed_cookie *outgoing;
1075 int result_size, pos;
1078 chain_count = find_matching_chains (jar, host, all_chains, chain_store_size);
1079 if (chain_count > chain_store_size)
1081 /* It's extremely unlikely that more than 20 chains will ever
1082 match. But since find_matching_chains reports the exact size
1083 it needs, it's easy to not have the limitation, so we
1085 all_chains = alloca (chain_count * sizeof (struct cookie *));
1086 chain_store_size = chain_count;
1093 cookies_now = time (NULL);
1095 /* Count the number of cookies whose path matches. */
1097 for (i = 0; i < chain_count; i++)
1098 for (cookie = all_chains[i]; cookie; cookie = cookie->next)
1099 if (matching_cookie (cookie, path, port, connection_secure_p, NULL))
1102 /* No matching cookies. */
1105 /* Allocate the array. */
1106 outgoing = alloca (count * sizeof (struct weighed_cookie));
1108 /* Fill the array with all the matching cookies from all the
1111 for (i = 0; i < chain_count; i++)
1112 for (cookie = all_chains[i]; cookie; cookie = cookie->next)
1115 if (!matching_cookie (cookie, path, port, connection_secure_p, &pg))
1117 outgoing[ocnt].cookie = cookie;
1118 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1119 outgoing[ocnt].path_goodness = pg;
1122 assert (ocnt == count);
1124 /* Eliminate duplicate cookies; that is, those whose name and value
1126 count = eliminate_dups (outgoing, count);
1128 /* Sort the array so that best-matching domains come first, and
1129 that, within one domain, best-matching paths come first. */
1130 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1132 /* Count the space the name=value pairs will take. */
1134 for (i = 0; i < count; i++)
1136 struct cookie *c = outgoing[i].cookie;
1138 result_size += strlen (c->attr) + 1 + strlen (c->value);
1141 /* Allocate output buffer:
1143 name=value pairs -- result_size
1144 "; " separators -- (count - 1) * 2
1145 \r\n line ending -- 2
1146 \0 terminator -- 1 */
1147 result_size = 8 + result_size + (count - 1) * 2 + 2 + 1;
1148 result = xmalloc (result_size);
1150 strcpy (result, "Cookie: ");
1152 for (i = 0; i < count; i++)
1154 struct cookie *c = outgoing[i].cookie;
1155 int namlen = strlen (c->attr);
1156 int vallen = strlen (c->value);
1158 memcpy (result + pos, c->attr, namlen);
1160 result[pos++] = '=';
1161 memcpy (result + pos, c->value, vallen);
1165 result[pos++] = ';';
1166 result[pos++] = ' ';
1169 result[pos++] = '\r';
1170 result[pos++] = '\n';
1171 result[pos++] = '\0';
1172 assert (pos == result_size);
1176 /* Support for loading and saving cookies. The format used for
1177 loading and saving roughly matches the format of `cookies.txt' file
1178 used by Netscape and Mozilla, at least the Unix versions. The
1179 format goes like this:
1181 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1183 DOMAIN -- cookie domain, optionally followed by :PORT
1184 DOMAIN-FLAG -- whether all hosts in the domain match
1186 SECURE-FLAG -- whether cookie requires secure connection
1187 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1188 ATTR-NAME -- name of the cookie attribute
1189 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1191 The fields are separated by TABs (but Wget's loader recognizes any
1192 whitespace). All fields are mandatory, except for ATTR-VALUE. The
1193 `-FLAG' fields are boolean, their legal values being "TRUE" and
1194 "FALSE'. Empty lines, lines consisting of whitespace only, and
1195 comment lines (beginning with # optionally preceded by whitespace)
1198 Example line from cookies.txt (split in two lines for readability):
1200 .google.com TRUE / FALSE 2147368447 \
1201 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1203 DOMAIN-FLAG is currently not honored by Wget. The cookies whose
1204 domain begins with `.' are treated as if DOMAIN-FLAG were true,
1205 while all other cookies are treated as if it were FALSE. */
1208 /* If the region [B, E) ends with :<digits>, parse the number, return
1209 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1210 If port is not specified, return 0. */
1213 domain_port (const char *domain_b, const char *domain_e,
1214 const char **domain_e_ptr)
1218 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1221 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1222 port = 10 * port + (*p - '0');
1224 /* Garbage following port number. */
1226 *domain_e_ptr = colon;
1230 #define SKIP_WS(p) do { \
1231 while (*p && ISSPACE (*p)) \
1235 #define SET_WORD_BOUNDARIES(p, b, e) do { \
1239 while (*p && !ISSPACE (*p)) \
1246 /* Load cookies from FILE. */
1249 cookie_jar_load (struct cookie_jar *jar, const char *file)
1252 FILE *fp = fopen (file, "r");
1255 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1256 file, strerror (errno));
1259 cookies_now = time (NULL);
1261 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1263 struct cookie *cookie;
1269 char *domain_b = NULL, *domain_e = NULL;
1270 char *ignore_b = NULL, *ignore_e = NULL;
1271 char *path_b = NULL, *path_e = NULL;
1272 char *secure_b = NULL, *secure_e = NULL;
1273 char *expires_b = NULL, *expires_e = NULL;
1274 char *name_b = NULL, *name_e = NULL;
1275 char *value_b = NULL, *value_e = NULL;
1279 if (!*p || *p == '#')
1283 SET_WORD_BOUNDARIES (p, domain_b, domain_e);
1284 SET_WORD_BOUNDARIES (p, ignore_b, ignore_e);
1285 SET_WORD_BOUNDARIES (p, path_b, path_e);
1286 SET_WORD_BOUNDARIES (p, secure_b, secure_e);
1287 SET_WORD_BOUNDARIES (p, expires_b, expires_e);
1288 SET_WORD_BOUNDARIES (p, name_b, name_e);
1290 /* Don't use SET_WORD_BOUNDARIES for value because it may
1291 contain whitespace. Instead, set value_e to the end of line,
1292 modulo trailing space (this will skip the line separator.) */
1295 value_e = p + strlen (p);
1296 while (value_e > value_b && ISSPACE (*(value_e - 1)))
1298 if (value_b == value_e)
1299 /* Hmm, should we check for empty value? I guess that's
1300 legal, so I leave it. */
1303 cookie = cookie_new ();
1305 cookie->attr = strdupdelim (name_b, name_e);
1306 cookie->value = strdupdelim (value_b, value_e);
1307 cookie->path = strdupdelim (path_b, path_e);
1309 if (BOUNDED_EQUAL (secure_b, secure_e, "TRUE"))
1312 /* DOMAIN needs special treatment because we might need to
1313 extract the port. */
1314 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1316 cookie->port = port;
1317 cookie->domain = strdupdelim (domain_b, domain_e);
1319 /* safe default in case EXPIRES field is garbled. */
1320 expiry = (double)cookies_now - 1;
1322 /* I don't like changing the line, but it's completely safe.
1323 (line is malloced.) */
1325 sscanf (expires_b, "%lf", &expiry);
1326 if (expiry < cookies_now)
1327 /* ignore stale cookie. */
1329 cookie->expiry_time = expiry;
1331 /* If the cookie has survived being saved into an external file,
1332 it is obviously permanent. */
1333 cookie->permanent = 1;
1335 store_cookie (jar, cookie);
1341 delete_cookie (cookie);
1346 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1347 to the head in a chain of cookies. The function prints the entire
1351 save_cookies_mapper (void *key, void *value, void *arg)
1353 FILE *fp = (FILE *)arg;
1354 char *domain = (char *)key;
1355 struct cookie *chain = (struct cookie *)value;
1356 for (; chain; chain = chain->next)
1358 if (!chain->permanent)
1360 if (COOKIE_EXPIRED_P (chain))
1363 if (chain->port != PORT_ANY)
1364 fprintf (fp, ":%d", chain->port);
1365 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1366 *domain == '.' ? "TRUE" : "FALSE",
1367 chain->path, chain->secure ? "TRUE" : "FALSE",
1368 (double)chain->expiry_time,
1369 chain->attr, chain->value);
1371 return 1; /* stop mapping */
1376 /* Save cookies, in format described above, to FILE. */
1379 cookie_jar_save (struct cookie_jar *jar, const char *file)
1383 DEBUGP (("Saving cookies to %s.\n", file));
1385 cookies_now = time (NULL);
1387 fp = fopen (file, "w");
1390 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1391 file, strerror (errno));
1395 fputs ("# HTTP cookie file.\n", fp);
1396 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (NULL));
1397 fputs ("# Edit at your own risk.\n\n", fp);
1399 hash_table_map (jar->chains_by_domain, save_cookies_mapper, fp);
1402 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1403 file, strerror (errno));
1405 if (fclose (fp) < 0)
1406 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1407 file, strerror (errno));
1409 DEBUGP (("Done saving cookies.\n"));
1412 /* Destroy all the elements in the chain and unhook it from the cookie
1413 jar. This is written in the form of a callback to hash_table_map
1414 and used by cookie_jar_delete to delete all the cookies in a
1418 nuke_cookie_chain (void *value, void *key, void *arg)
1420 char *chain_key = (char *)value;
1421 struct cookie *chain = (struct cookie *)key;
1422 struct cookie_jar *jar = (struct cookie_jar *)arg;
1424 /* Remove the chain from the table and free the key. */
1425 hash_table_remove (jar->chains_by_domain, chain_key);
1428 /* Then delete all the cookies in the chain. */
1431 struct cookie *next = chain->next;
1432 delete_cookie (chain);
1440 /* Clean up cookie-related data. */
1443 cookie_jar_delete (struct cookie_jar *jar)
1445 hash_table_map (jar->chains_by_domain, nuke_cookie_chain, jar);
1446 hash_table_destroy (jar->chains_by_domain);