1 /* Support for cookies.
2 Copyright (C) 2001 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
21 code submitted by Tomasz Wegrzanowski. */
41 /* Hash table that maps domain names to cookie chains. */
43 static struct hash_table *cookies_hash_table;
45 /* This should be set by entry points in this file, so the low-level
46 functions don't need to call time() all the time. */
48 static time_t cookies_now;
50 /* This should *really* be in a .h file! */
51 time_t http_atotm PARAMS ((char *));
54 /* Definition of `struct cookie' and the most basic functions. */
57 char *domain; /* domain of the cookie */
58 int port; /* port number */
59 char *path; /* path prefix of the cookie */
60 int secure; /* whether cookie should be
61 transmitted over non-https
63 int permanent; /* whether the cookie should outlive
65 unsigned long expiry_time; /* time when the cookie expires */
66 int discard_requested; /* whether cookie was created to
67 request discarding another
70 char *attr; /* cookie attribute name */
71 char *value; /* cookie attribute value */
73 struct cookie *next; /* used for chaining of cookies in the
77 /* Allocate and return a new, empty cookie structure. */
79 static struct cookie *
82 struct cookie *cookie = xmalloc (sizeof (struct cookie));
83 memset (cookie, '\0', sizeof (struct cookie));
85 /* If we don't know better, assume cookie is non-permanent and valid
86 for the entire session. */
87 cookie->expiry_time = ~0UL;
89 /* Assume default port. */
95 /* Deallocate COOKIE and its components. */
98 delete_cookie (struct cookie *cookie)
100 FREE_MAYBE (cookie->domain);
101 FREE_MAYBE (cookie->path);
102 FREE_MAYBE (cookie->attr);
103 FREE_MAYBE (cookie->value);
107 /* Functions for cookie-specific hash tables. These are regular hash
108 tables, but with case-insensitive test and hash functions. */
110 /* Like string_hash, but produces the same results regardless of the
114 unsigned_string_hash (const void *key)
117 unsigned int h = TOLOWER (*p);
120 for (p += 1; *p != '\0'; p++)
121 h = (h << 5) - h + TOLOWER (*p);
126 /* Front-end to strcasecmp. */
129 unsigned_string_cmp (const void *s1, const void *s2)
131 return !strcasecmp ((const char *)s1, (const char *)s2);
134 /* Like make_string_hash_table, but uses unsigned_string_hash and
135 unsigned_string_cmp. */
137 static struct hash_table *
138 make_unsigned_string_hash_table (int initial_size)
140 return hash_table_new (initial_size,
141 unsigned_string_hash, unsigned_string_cmp);
144 /* Write "HOST:PORT" to RESULT. RESULT should be a pointer, and the
145 memory for the contents is allocated on the stack. Useful for
146 creating HOST:PORT strings, which are the keys in the hash
149 #define SET_HOSTPORT(host, port, result) do { \
150 int HP_len = strlen (host); \
151 result = alloca (HP_len + 1 + numdigit (port) + 1); \
152 memcpy (result, host, HP_len); \
153 result[HP_len] = ':'; \
154 long_to_string (result + HP_len + 1, port); \
157 /* Find cookie chain that corresponds to DOMAIN (exact) and PORT. */
159 static struct cookie *
160 find_cookie_chain_exact (const char *domain, int port)
163 if (!cookies_hash_table)
165 SET_HOSTPORT (domain, port, key);
166 return hash_table_get (cookies_hash_table, key);
169 /* Functions for storing cookies.
171 All cookies can be referenced through cookies_hash_table. The key
172 in that table is the domain name, and the value is a linked list of
173 all cookies from that domain. Every new cookie is placed on the
176 /* Find and return the cookie whose domain, path, and attribute name
177 correspond to COOKIE. If found, PREVPTR will point to the location
178 of the cookie previous in chain, or NULL if the found cookie is the
181 If no matching cookie is found, return NULL. */
183 static struct cookie *
184 find_matching_cookie (struct cookie *cookie, struct cookie **prevptr)
186 struct cookie *chain, *prev;
188 if (!cookies_hash_table)
191 chain = find_cookie_chain_exact (cookie->domain, cookie->port);
196 for (; chain; prev = chain, chain = chain->next)
197 if (!strcmp (cookie->path, chain->path)
198 && !strcmp (cookie->attr, chain->attr))
209 /* Store COOKIE to memory.
211 This is done by placing COOKIE at the head of its chain. However,
212 if COOKIE matches a cookie already in memory, as determined by
213 find_matching_cookie, the old cookie is unlinked and destroyed.
215 The key of each chain's hash table entry is allocated only the
216 first time; next hash_table_put's reuse the same key. */
219 store_cookie (struct cookie *cookie)
221 struct cookie *chain_head;
225 if (!cookies_hash_table)
226 /* If the hash table is not initialized, do so now, because we'll
227 need to store things. */
228 cookies_hash_table = make_unsigned_string_hash_table (0);
230 /* Initialize hash table key. */
231 SET_HOSTPORT (cookie->domain, cookie->port, hostport);
233 if (hash_table_get_pair (cookies_hash_table, hostport,
234 &chain_key, &chain_head))
236 /* There already exists a chain of cookies with this exact
237 domain. We need to check for duplicates -- if an existing
238 cookie exactly matches our domain, path and name, we replace
241 struct cookie *victim = find_matching_cookie (cookie, &prev);
245 /* Remove VICTIM from the chain. COOKIE will be placed at
249 prev->next = victim->next;
250 cookie->next = chain_head;
254 /* prev is NULL; apparently VICTIM was at the head of
255 the chain. This place will be taken by COOKIE, so
256 all we need to do is: */
257 cookie->next = victim->next;
259 delete_cookie (victim);
260 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
263 cookie->next = chain_head;
267 /* We are now creating the chain. Allocate the string that will
268 be used as a key. It is unsafe to use cookie->domain for
269 that, because it might get deallocated by the above code at
272 chain_key = xstrdup (hostport);
275 hash_table_put (cookies_hash_table, chain_key, cookie);
277 DEBUGP (("\nStored cookie %s %d %s %s %d %s %s %s\n",
278 cookie->domain, cookie->port, cookie->path,
279 cookie->permanent ? "permanent" : "nonpermanent",
281 asctime (localtime ((time_t *)&cookie->expiry_time)),
282 cookie->attr, cookie->value));
285 /* Discard a cookie matching COOKIE's domain, path, and attribute
286 name. This gets called when we encounter a cookie whose expiry
287 date is in the past, or whose max-age is set to 0. The former
288 corresponds to netscape cookie spec, while the latter is specified
292 discard_matching_cookie (struct cookie *cookie)
294 struct cookie *prev, *victim;
296 if (!cookies_hash_table
297 || !hash_table_count (cookies_hash_table))
298 /* No elements == nothing to discard. */
301 victim = find_matching_cookie (cookie, &prev);
305 /* Simply unchain the victim. */
306 prev->next = victim->next;
309 /* VICTIM was head of its chain. We need to place a new
310 cookie at the head. */
313 char *chain_key = NULL;
316 SET_HOSTPORT (victim->domain, victim->port, hostport);
317 res = hash_table_get_pair (cookies_hash_table, hostport,
322 /* VICTIM was the only cookie in the chain. Destroy the
323 chain and deallocate the chain key. */
325 hash_table_remove (cookies_hash_table, hostport);
329 hash_table_put (cookies_hash_table, chain_key, victim->next);
331 delete_cookie (victim);
332 DEBUGP (("Discarded old cookie.\n"));
336 /* Functions for parsing the `Set-Cookie' header, and creating new
337 cookies from the wire. */
340 #define NAME_IS(string_literal) \
341 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
343 #define VALUE_EXISTS (value_b && value_e)
345 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
347 /* Update the appropriate cookie field. [name_b, name_e) are expected
348 to delimit the attribute name, while [value_b, value_e) (optional)
349 should delimit the attribute value.
351 When called the first time, it will set the cookie's attribute name
352 and value. After that, it will check the attribute name for
353 special fields such as `domain', `path', etc. Where appropriate,
354 it will parse the values of the fields it recognizes and fill the
355 corresponding fields in COOKIE.
357 Returns 1 on success. Returns zero in case a syntax error is
358 found; such a cookie should be discarded. */
361 update_cookie_field (struct cookie *cookie,
362 const char *name_b, const char *name_e,
363 const char *value_b, const char *value_e)
365 assert (name_b != NULL && name_e != NULL);
371 cookie->attr = strdupdelim (name_b, name_e);
372 cookie->value = strdupdelim (value_b, value_e);
376 if (NAME_IS ("domain"))
378 if (!VALUE_NON_EMPTY)
380 FREE_MAYBE (cookie->domain);
381 cookie->domain = strdupdelim (value_b, value_e);
384 else if (NAME_IS ("path"))
386 if (!VALUE_NON_EMPTY)
388 FREE_MAYBE (cookie->path);
389 cookie->path = strdupdelim (value_b, value_e);
392 else if (NAME_IS ("expires"))
397 if (!VALUE_NON_EMPTY)
399 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
401 expires = http_atotm (value_copy);
404 cookie->permanent = 1;
405 cookie->expiry_time = (unsigned long)expires;
408 /* Error in expiration spec. Assume default (cookie valid for
409 this session.) #### Should we return 0 and invalidate the
413 /* According to netscape's specification, expiry time in the
414 past means that discarding of a matching cookie is
416 if (cookie->expiry_time < cookies_now)
417 cookie->discard_requested = 1;
421 else if (NAME_IS ("max-age"))
426 if (!VALUE_NON_EMPTY)
428 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
430 sscanf (value_copy, "%lf", &maxage);
432 /* something is wrong. */
434 cookie->permanent = 1;
435 cookie->expiry_time = (unsigned long)cookies_now + (unsigned long)maxage;
437 /* According to rfc2109, a cookie with max-age of 0 means that
438 discarding of a matching cookie is requested. */
440 cookie->discard_requested = 1;
444 else if (NAME_IS ("secure"))
446 /* ignore value completely */
451 /* Unrecognized attribute; ignore it. */
457 /* Returns non-zero for characters that are legal in the name of an
460 #define ATTR_NAME_CHAR(c) (ISALNUM (c) || (c) == '-' || (c) == '_')
462 /* Fetch the next character without doing anything special if CH gets
463 set to 0. (The code executed next is expected to handle it.) */
465 #define FETCH1(ch, ptr) do { \
469 /* Like FETCH1, but jumps to `eof' label if CH gets set to 0. */
471 #define FETCH(ch, ptr) do { \
477 /* Parse the contents of the `Set-Cookie' header. The header looks
480 name1=value1; name2=value2; ...
482 Trailing semicolon is optional; spaces are allowed between all
483 tokens. Additionally, values may be quoted.
485 A new cookie is returned upon success, NULL otherwise. The
486 function `update_cookie_field' is used to update the fields of the
487 newly created cookie structure. */
489 static struct cookie *
490 parse_set_cookies (const char *sc)
492 struct cookie *cookie = cookie_new ();
494 enum { S_NAME_PRE, S_NAME, S_NAME_POST,
495 S_VALUE_PRE, S_VALUE, S_VALUE_TRAILSPACE_MAYBE,
496 S_QUOTED_VALUE, S_QUOTED_VALUE_POST,
498 S_DONE, S_ERROR } state = S_NAME_PRE;
503 const char *name_b = NULL, *name_e = NULL;
504 const char *value_b = NULL, *value_e = NULL;
508 while (state != S_DONE && state != S_ERROR)
515 else if (ATTR_NAME_CHAR (c))
522 /* empty attr name not allowed */
526 if (ATTR_NAME_CHAR (c))
528 else if (!c || c == ';' || c == '=' || ISSPACE (c))
539 else if (!c || c == ';')
541 value_b = value_e = NULL;
542 state = S_ATTR_ACTION;
559 state = S_QUOTED_VALUE;
561 else if (c == ';' || c == '\0')
563 value_b = value_e = p - 1;
564 state = S_ATTR_ACTION;
574 if (c == ';' || c == '\0')
578 state = S_ATTR_ACTION;
580 else if (ISSPACE (c))
584 state = S_VALUE_TRAILSPACE_MAYBE;
588 value_e = NULL; /* no trailing space */
592 case S_VALUE_TRAILSPACE_MAYBE:
603 state = S_QUOTED_VALUE_POST;
608 case S_QUOTED_VALUE_POST:
610 state = S_ATTR_ACTION;
611 else if (ISSPACE (c))
618 int legal = update_cookie_field (cookie, name_b, name_e,
623 BOUNDED_TO_ALLOCA (name_b, name_e, name);
624 logprintf (LOG_NOTQUIET,
625 _("Error in Set-Cookie, field `%s'"), name);
640 /* handled by loop condition */
647 delete_cookie (cookie);
648 if (state == S_ERROR)
649 logprintf (LOG_NOTQUIET, _("Syntax error in Set-Cookie at character `%c'.\n"), c);
655 delete_cookie (cookie);
656 logprintf (LOG_NOTQUIET,
657 _("Syntax error in Set-Cookie: premature end of string.\n"));
661 /* Sanity checks. These are important, otherwise it is possible for
662 mailcious attackers to destroy important cookie information and/or
663 violate your privacy. */
666 #define REQUIRE_DIGITS(p) do { \
669 for (++p; ISDIGIT (*p); p++) \
673 #define REQUIRE_DOT(p) do { \
678 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
680 We don't want to call network functions like inet_addr() because all
681 we need is a check, preferrably one that is small, fast, and
685 numeric_address_p (const char *addr)
687 const char *p = addr;
689 REQUIRE_DIGITS (p); /* A */
690 REQUIRE_DOT (p); /* . */
691 REQUIRE_DIGITS (p); /* B */
692 REQUIRE_DOT (p); /* . */
693 REQUIRE_DIGITS (p); /* C */
694 REQUIRE_DOT (p); /* . */
695 REQUIRE_DIGITS (p); /* D */
702 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
703 This check is compliant with rfc2109. */
706 check_domain_match (const char *cookie_domain, const char *host)
713 /* Numeric address requires exact match. It also requires HOST to
714 be an IP address. I suppose we *could* resolve HOST with
715 store_hostaddress (it would hit the hash table), but rfc2109
716 doesn't require it, and it doesn't seem very useful, so we
718 if (numeric_address_p (cookie_domain))
719 return !strcmp (cookie_domain, host);
723 /* The domain must contain at least one embedded dot. */
725 const char *rest = cookie_domain;
726 int len = strlen (rest);
728 ++rest, --len; /* ignore first dot */
731 if (rest[len - 1] == '.')
732 --len; /* ignore last dot */
734 if (!memchr (rest, '.', len))
741 /* For the sake of efficiency, check for exact match first. */
742 if (!strcasecmp (cookie_domain, host))
747 /* In rfc2109 terminology, HOST needs domain-match COOKIE_DOMAIN.
748 This means that COOKIE_DOMAIN needs to start with `.' and be an
749 FQDN, and that HOST must end with COOKIE_DOMAIN. */
750 if (*cookie_domain != '.')
755 /* Two proceed, we need to examine two parts of HOST: its head and
756 its tail. Head and tail are defined in terms of the length of
757 the domain, like this:
759 HHHHTTTTTTTTTTTTTTT <- host
760 DDDDDDDDDDDDDDD <- domain
762 That is, "head" is the part of the host before (dlen - hlen), and
763 "tail" is what follows.
765 For the domain to match, two conditions need to be true:
767 1. Tail must equal DOMAIN.
768 2. Head must not contain an embedded dot. */
770 headlen = strlen (host) - strlen (cookie_domain);
773 /* DOMAIN must be a proper subset of HOST. */
775 tail = host + headlen;
780 if (strcasecmp (tail, cookie_domain))
785 /* Test (2) is not part of the "domain-match" itself, but is
786 recommended by rfc2109 for reasons of privacy. */
789 if (memchr (host, '.', headlen))
797 static int path_matches PARAMS ((const char *, const char *));
799 /* Check whether PATH begins with COOKIE_PATH. */
802 check_path_match (const char *cookie_path, const char *path)
804 return path_matches (path, cookie_path);
807 /* Parse the `Set-Cookie' header and, if the cookie is legal, store it
811 set_cookie_header_cb (const char *hdr, void *closure)
813 struct urlinfo *u = (struct urlinfo *)closure;
814 struct cookie *cookie;
816 cookies_now = time (NULL);
818 cookie = parse_set_cookies (hdr);
822 /* Sanitize parts of cookie. */
825 cookie->domain = xstrdup (u->host);
828 if (!check_domain_match (cookie->domain, u->host))
830 DEBUGP (("Attempt to fake the domain: %s, %s\n",
831 cookie->domain, u->host));
836 cookie->path = xstrdup (u->path);
839 if (!check_path_match (cookie->path, u->path))
841 DEBUGP (("Attempt to fake the path: %s, %s\n",
842 cookie->path, u->path));
847 cookie->port = u->port;
849 if (cookie->discard_requested)
851 discard_matching_cookie (cookie);
852 delete_cookie (cookie);
856 store_cookie (cookie);
861 delete_cookie (cookie);
865 /* Support for sending out cookies in HTTP requests, based on
866 previously stored cookies. Entry point is
867 `build_cookies_request'. */
870 /* Count how many times CHR occurs in STRING. */
873 count_char (const char *string, char chr)
877 for (p = string; *p; p++)
883 /* Store CHAIN to STORE if there is room in STORE. If not, inrecement
884 COUNT anyway, so that when the function is done, we end up with the
885 exact count of how much place we actually need. */
887 #define STORE_CHAIN(st_chain, st_store, st_size, st_count) do { \
888 if (st_count < st_size) \
889 store[st_count] = st_chain; \
893 /* Store cookie chains that match HOST, PORT. Since more than one
894 chain can match, the matches are written to STORE. No more than
895 SIZE matches are written; if more matches are present, return the
896 number of chains that would have been written. */
899 find_matching_chains (const char *host, int port,
900 struct cookie *store[], int size)
902 struct cookie *chain;
907 if (!cookies_hash_table)
910 SET_HOSTPORT (host, port, hash_key);
913 chain = hash_table_get (cookies_hash_table, hash_key);
915 STORE_CHAIN (chain, store, size, count);
917 dot_count = count_char (host, '.');
919 /* Match less and less specific domains. For instance, given
920 fly.srk.fer.hr, we match .srk.fer.hr, then .fer.hr. */
921 while (dot_count-- > 1)
923 /* Note: we operate directly on hash_key (in form host:port)
924 because we don't want to allocate new hash keys in a
926 char *p = strchr (hash_key, '.');
928 chain = hash_table_get (cookies_hash_table, p);
930 STORE_CHAIN (chain, store, size, count);
936 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
940 path_matches (const char *full_path, const char *prefix)
942 int len = strlen (prefix);
943 if (strncmp (full_path, prefix, len))
944 /* FULL_PATH doesn't begin with PREFIX. */
947 /* Length of PREFIX determines the quality of the match. */
952 matching_cookie (const struct cookie *cookie, const char *path,
953 int connection_secure_p, int *path_goodness)
957 if (cookie->expiry_time < cookies_now)
958 /* Ignore stale cookies. There is no need to unchain the cookie
959 at this point -- Wget is a relatively short-lived application,
960 and stale cookies will not be saved by `save_cookies'. */
962 if (cookie->secure && !connection_secure_p)
963 /* Don't transmit secure cookies over an insecure connection. */
965 pg = path_matches (path, cookie->path);
970 /* If the caller requested path_goodness, we return it. This is
971 an optimization, so that the caller doesn't need to call
972 path_matches() again. */
977 struct weighed_cookie {
978 struct cookie *cookie;
983 /* Comparator used for uniquifying the list. */
986 equality_comparator (const void *p1, const void *p2)
988 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
989 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
991 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
992 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
994 /* We only really care whether both name and value are equal. We
995 return them in this order only for consistency... */
996 return namecmp ? namecmp : valuecmp;
999 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
1000 cookies whose name and value are the same. Whenever a duplicate
1001 pair is found, one of the cookies is removed. */
1004 eliminate_dups (struct weighed_cookie *outgoing, int count)
1008 /* We deploy a simple uniquify algorithm: first sort the array
1009 according to our sort criterion, then uniquify it by comparing
1010 each cookie with its neighbor. */
1012 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1014 for (i = 0; i < count - 1; i++)
1016 struct cookie *c1 = outgoing[i].cookie;
1017 struct cookie *c2 = outgoing[i + 1].cookie;
1018 if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value))
1020 /* c1 and c2 are the same; get rid of c2. */
1022 /* move all ptrs from positions [i + 1, count) to i. */
1023 memmove (outgoing + i, outgoing + i + 1,
1024 (count - (i + 1)) * sizeof (struct weighed_cookie));
1025 /* We decrement i to counter the ++i above. Remember that
1026 we've just removed the element in front of us; we need to
1027 remain in place to check whether outgoing[i] matches what
1028 used to be outgoing[i + 2]. */
1036 /* Comparator used for sorting by quality. */
1039 goodness_comparator (const void *p1, const void *p2)
1041 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1042 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1044 /* Subtractions take `wc2' as the first argument becauase we want a
1045 sort in *decreasing* order of goodness. */
1046 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1047 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1049 /* Sort by domain goodness; if these are the same, sort by path
1050 goodness. (The sorting order isn't really specified; maybe it
1051 should be the other way around.) */
1052 return dgdiff ? dgdiff : pgdiff;
1055 /* Build a `Cookie' header for a request that goes to HOST:PORT and
1056 requests PATH from the server. The resulting string is allocated
1057 with `malloc', and the caller is responsible for freeing it. If no
1058 cookies pertain to this request, i.e. no cookie header should be
1059 generated, NULL is returned. */
1062 build_cookies_request (const char *host, int port, const char *path,
1063 int connection_secure_p)
1065 struct cookie *chain_default_store[20];
1066 struct cookie **all_chains = chain_default_store;
1067 int chain_store_size = ARRAY_SIZE (chain_default_store);
1070 struct cookie *cookie;
1071 struct weighed_cookie *outgoing;
1074 int result_size, pos;
1077 chain_count = find_matching_chains (host, port, all_chains, chain_store_size);
1078 if (chain_count > chain_store_size)
1080 /* It's extremely unlikely that more than 20 chains will ever
1081 match. But since find_matching_chains reports the exact size
1082 it needs, it's easy to not have the limitation, so we
1084 all_chains = alloca (chain_count * sizeof (struct cookie *));
1085 chain_store_size = chain_count;
1092 cookies_now = time (NULL);
1094 /* Count the number of cookies whose path matches. */
1096 for (i = 0; i < chain_count; i++)
1097 for (cookie = all_chains[i]; cookie; cookie = cookie->next)
1098 if (matching_cookie (cookie, path, connection_secure_p, NULL))
1101 /* No matching cookies. */
1104 /* Allocate the array. */
1105 outgoing = alloca (count * sizeof (struct weighed_cookie));
1107 /* Fill the array with all the matching cookies from all the
1110 for (i = 0; i < chain_count; i++)
1111 for (cookie = all_chains[i]; cookie; cookie = cookie->next)
1114 if (!matching_cookie (cookie, path, connection_secure_p, &pg))
1116 outgoing[ocnt].cookie = cookie;
1117 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1118 outgoing[ocnt].path_goodness = pg;
1121 assert (ocnt == count);
1123 /* Eliminate duplicate cookies; that is, those whose name and value
1125 count = eliminate_dups (outgoing, count);
1127 /* Sort the array so that best-matching domains come first, and
1128 that, within one domain, best-matching paths come first. */
1129 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1131 /* Count the space the name=value pairs will take. */
1133 for (i = 0; i < count; i++)
1135 struct cookie *c = outgoing[i].cookie;
1137 result_size += strlen (c->attr) + 1 + strlen (c->value);
1140 /* Allocate output buffer:
1142 name=value pairs -- result_size
1143 "; " separators -- (count - 1) * 2
1144 \r\n line ending -- 2
1145 \0 terminator -- 1 */
1146 result_size = 8 + result_size + (count - 1) * 2 + 2 + 1;
1147 result = xmalloc (result_size);
1149 strcpy (result, "Cookie: ");
1151 for (i = 0; i < count; i++)
1153 struct cookie *c = outgoing[i].cookie;
1154 int namlen = strlen (c->attr);
1155 int vallen = strlen (c->value);
1157 memcpy (result + pos, c->attr, namlen);
1159 result[pos++] = '=';
1160 memcpy (result + pos, c->value, vallen);
1164 result[pos++] = ';';
1165 result[pos++] = ' ';
1168 result[pos++] = '\r';
1169 result[pos++] = '\n';
1170 result[pos++] = '\0';
1171 assert (pos == result_size);
1175 /* Support for loading and saving cookies. The format used for
1176 loading and saving roughly matches the format of `cookies.txt' file
1177 used by Netscape and Mozilla, at least the Unix versions. The
1178 format goes like this:
1180 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1182 DOMAIN -- cookie domain, optionally followed by :PORT
1183 DOMAIN-FLAG -- whether all hosts in the domain match
1185 SECURE-FLAG -- whether cookie requires secure connection
1186 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1187 ATTR-NAME -- name of the cookie attribute
1188 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1190 The fields are separated by TABs (but Wget's loader recognizes any
1191 whitespace). All fields are mandatory, except for ATTR-VALUE. The
1192 `-FLAG' fields are boolean, their legal values being "TRUE" and
1193 "FALSE'. Empty lines, lines consisting of whitespace only, and
1194 comment lines (beginning with # optionally preceded by whitespace)
1197 Example line from cookies.txt (split in two lines for readability):
1199 .google.com TRUE / FALSE 2147368447 \
1200 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1202 DOMAIN-FLAG is currently not honored by Wget. The cookies whose
1203 domain begins with `.' are treated as if DOMAIN-FLAG were true,
1204 while all other cookies are treated as if it were FALSE. */
1207 /* If the region [B, E) ends with :<digits>, parse the number, return
1208 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1209 If port is not specified, return 0. */
1212 domain_port (const char *domain_b, const char *domain_e,
1213 const char **domain_e_ptr)
1217 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1220 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1221 port = 10 * port + (*p - '0');
1223 /* Garbage following port number. */
1225 *domain_e_ptr = colon;
1229 #define SKIP_WS(p) do { \
1230 while (*p && ISSPACE (*p)) \
1234 #define SET_WORD_BOUNDARIES(p, b, e) do { \
1238 while (*p && !ISSPACE (*p)) \
1245 /* Load cookies from FILE. */
1248 load_cookies (const char *file)
1251 FILE *fp = fopen (file, "r");
1254 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1255 file, strerror (errno));
1258 cookies_now = time (NULL);
1260 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1262 struct cookie *cookie;
1267 char *domain_b = NULL, *domain_e = NULL;
1268 char *ignore_b = NULL, *ignore_e = NULL;
1269 char *path_b = NULL, *path_e = NULL;
1270 char *secure_b = NULL, *secure_e = NULL;
1271 char *expires_b = NULL, *expires_e = NULL;
1272 char *name_b = NULL, *name_e = NULL;
1273 char *value_b = NULL, *value_e = NULL;
1277 if (!*p || *p == '#')
1281 SET_WORD_BOUNDARIES (p, domain_b, domain_e);
1282 SET_WORD_BOUNDARIES (p, ignore_b, ignore_e);
1283 SET_WORD_BOUNDARIES (p, path_b, path_e);
1284 SET_WORD_BOUNDARIES (p, secure_b, secure_e);
1285 SET_WORD_BOUNDARIES (p, expires_b, expires_e);
1286 SET_WORD_BOUNDARIES (p, name_b, name_e);
1288 /* Don't use SET_WORD_BOUNDARIES for value because it may
1289 contain whitespace. Instead, set value_e to the end of line,
1290 modulo trailing space (this will skip the line separator.) */
1293 value_e = p + strlen (p);
1294 while (value_e > value_b && ISSPACE (*(value_e - 1)))
1296 if (value_b == value_e)
1297 /* Hmm, should we check for empty value? I guess that's
1298 legal, so I leave it. */
1301 cookie = cookie_new ();
1303 cookie->attr = strdupdelim (name_b, name_e);
1304 cookie->value = strdupdelim (value_b, value_e);
1305 cookie->path = strdupdelim (path_b, path_e);
1307 if (BOUNDED_EQUAL (secure_b, secure_e, "TRUE"))
1310 /* DOMAIN needs special treatment because we might need to
1311 extract the port. */
1312 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1314 cookie->port = port;
1316 cookie->port = cookie->secure ? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT;
1318 cookie->domain = strdupdelim (domain_b, domain_e);
1320 /* safe default in case EXPIRES field is garbled. */
1321 cookie->expiry_time = cookies_now - 1;
1323 /* I don't like changing the line, but it's completely safe.
1324 (line is malloced.) */
1326 sscanf (expires_b, "%lu", &cookie->expiry_time);
1327 if (cookie->expiry_time < cookies_now)
1328 /* ignore stale cookie. */
1330 cookie->permanent = 1;
1332 store_cookie (cookie);
1338 delete_cookie (cookie);
1343 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1344 to the head in a chain of cookies. The function prints the entire
1348 save_cookies_mapper (void *key, void *value, void *arg)
1350 FILE *fp = (FILE *)arg;
1351 char *domain = (char *)key;
1352 struct cookie *chain = (struct cookie *)value;
1353 for (; chain; chain = chain->next)
1355 if (!chain->permanent)
1357 if (chain->expiry_time < cookies_now)
1359 fprintf (fp, "%s\t%s\t%s\t%s\t%lu\t%s\t%s\n",
1360 domain, *domain == '.' ? "TRUE" : "FALSE",
1361 chain->path, chain->secure ? "TRUE" : "FALSE",
1363 chain->attr, chain->value);
1365 return 1; /* stop mapping */
1370 /* Save cookies, in format described above, to FILE. */
1373 save_cookies (const char *file)
1377 if (!cookies_hash_table
1378 || !hash_table_count (cookies_hash_table))
1379 /* no cookies stored; nothing to do. */
1382 DEBUGP (("Saving cookies to %s.\n", file));
1384 cookies_now = time (NULL);
1386 fp = fopen (file, "w");
1389 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1390 file, strerror (errno));
1394 fputs ("# HTTP cookie file.\n", fp);
1395 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (NULL));
1396 fputs ("# Edit at your own risk.\n\n", fp);
1398 hash_table_map (cookies_hash_table, save_cookies_mapper, fp);
1401 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1402 file, strerror (errno));
1404 if (fclose (fp) < 0)
1405 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1406 file, strerror (errno));
1408 DEBUGP (("Done saving cookies.\n"));
1412 delete_cookie_chain_mapper (void *value, void *key, void *arg_ignored)
1414 char *chain_key = (char *)value;
1415 struct cookie *chain = (struct cookie *)key;
1417 /* Remove the chain from the table and free the key. */
1418 hash_table_remove (cookies_hash_table, chain_key);
1421 /* Then delete all the cookies in the chain. */
1424 struct cookie *next = chain->next;
1425 delete_cookie (chain);
1433 /* Clean up cookie-related data. */
1436 cookies_cleanup (void)
1438 if (!cookies_hash_table)
1440 hash_table_map (cookies_hash_table, delete_cookie_chain_mapper, NULL);
1441 hash_table_destroy (cookies_hash_table);
1442 cookies_hash_table = NULL;