1 /* Support for cookies.
2 Copyright (C) 2001 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
21 code submitted by Tomasz Wegrzanowski. */
41 /* Hash table that maps domain names to cookie chains. */
43 static struct hash_table *cookies_hash_table;
45 /* This should be set by entry points in this file, so the low-level
46 functions don't need to call time() all the time. */
48 static time_t cookies_now;
50 /* This should *really* be in a .h file! */
51 time_t http_atotm PARAMS ((char *));
54 /* Definition of `struct cookie' and the most basic functions. */
57 char *domain; /* domain of the cookie */
58 int port; /* port number */
59 char *path; /* path prefix of the cookie */
60 int secure; /* whether cookie should be
61 transmitted over non-https
63 int permanent; /* whether the cookie should outlive
65 unsigned long expiry_time; /* time when the cookie expires */
66 int discard_requested; /* whether cookie was created to
67 request discarding another
70 char *attr; /* cookie attribute name */
71 char *value; /* cookie attribute value */
73 struct cookie *next; /* used for chaining of cookies in the
77 /* Allocate and return a new, empty cookie structure. */
79 static struct cookie *
82 struct cookie *cookie = xmalloc (sizeof (struct cookie));
83 memset (cookie, '\0', sizeof (struct cookie));
85 /* If we don't know better, assume cookie is non-permanent and valid
86 for the entire session. */
87 cookie->expiry_time = ~0UL;
89 /* Assume default port. */
95 /* Deallocate COOKIE and its components. */
98 delete_cookie (struct cookie *cookie)
100 FREE_MAYBE (cookie->domain);
101 FREE_MAYBE (cookie->path);
102 FREE_MAYBE (cookie->attr);
103 FREE_MAYBE (cookie->value);
107 /* Functions for cookie-specific hash tables. These are regular hash
108 tables, but with case-insensitive test and hash functions. */
110 /* Like string_hash, but produces the same results regardless of the
114 unsigned_string_hash (const void *key)
117 unsigned int h = TOLOWER (*p);
120 for (p += 1; *p != '\0'; p++)
121 h = (h << 5) - h + TOLOWER (*p);
126 /* Front-end to strcasecmp. */
129 unsigned_string_cmp (const void *s1, const void *s2)
131 return !strcasecmp ((const char *)s1, (const char *)s2);
134 /* Like make_string_hash_table, but uses unsigned_string_hash and
135 unsigned_string_cmp. */
137 static struct hash_table *
138 make_unsigned_string_hash_table (int initial_size)
140 return hash_table_new (initial_size,
141 unsigned_string_hash, unsigned_string_cmp);
144 /* Write "HOST:PORT" to RESULT. RESULT should be a pointer, and the
145 memory for the contents is allocated on the stack. Useful for
146 creating HOST:PORT strings, which are the keys in the hash
149 #define SET_HOSTPORT(host, port, result) do { \
150 int HP_len = strlen (host); \
151 result = alloca (HP_len + 1 + numdigit (port) + 1); \
152 memcpy (result, host, HP_len); \
153 result[HP_len] = ':'; \
154 long_to_string (result + HP_len + 1, port); \
157 /* Find cookie chain that corresponds to DOMAIN (exact) and PORT. */
159 static struct cookie *
160 find_cookie_chain_exact (const char *domain, int port)
163 if (!cookies_hash_table)
165 SET_HOSTPORT (domain, port, key);
166 return hash_table_get (cookies_hash_table, key);
169 /* Functions for storing cookies.
171 All cookies can be referenced through cookies_hash_table. The key
172 in that table is the domain name, and the value is a linked list of
173 all cookies from that domain. Every new cookie is placed on the
176 /* Find and return the cookie whose domain, path, and attribute name
177 correspond to COOKIE. If found, PREVPTR will point to the location
178 of the cookie previous in chain, or NULL if the found cookie is the
181 If no matching cookie is found, return NULL. */
183 static struct cookie *
184 find_matching_cookie (struct cookie *cookie, struct cookie **prevptr)
186 struct cookie *chain, *prev;
188 if (!cookies_hash_table)
191 chain = find_cookie_chain_exact (cookie->domain, cookie->port);
196 for (; chain; prev = chain, chain = chain->next)
197 if (!strcmp (cookie->path, chain->path)
198 && !strcmp (cookie->attr, chain->attr))
209 /* Store COOKIE to memory.
211 This is done by placing COOKIE at the head of its chain. However,
212 if COOKIE matches a cookie already in memory, as determined by
213 find_matching_cookie, the old cookie is unlinked and destroyed.
215 The key of each chain's hash table entry is allocated only the
216 first time; next hash_table_put's reuse the same key. */
219 store_cookie (struct cookie *cookie)
221 struct cookie *chain_head;
225 if (!cookies_hash_table)
226 /* If the hash table is not initialized, do so now, because we'll
227 need to store things. */
228 cookies_hash_table = make_unsigned_string_hash_table (0);
230 /* Initialize hash table key. */
231 SET_HOSTPORT (cookie->domain, cookie->port, hostport);
233 if (hash_table_get_pair (cookies_hash_table, hostport,
234 &chain_key, &chain_head))
236 /* There already exists a chain of cookies with this exact
237 domain. We need to check for duplicates -- if an existing
238 cookie exactly matches our domain, path and name, we replace
241 struct cookie *victim = find_matching_cookie (cookie, &prev);
245 /* Remove VICTIM from the chain. COOKIE will be placed at
249 prev->next = victim->next;
250 cookie->next = chain_head;
254 /* prev is NULL; apparently VICTIM was at the head of
255 the chain. This place will be taken by COOKIE, so
256 all we need to do is: */
257 cookie->next = victim->next;
259 delete_cookie (victim);
260 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
263 cookie->next = chain_head;
267 /* We are now creating the chain. Allocate the string that will
268 be used as a key. It is unsafe to use cookie->domain for
269 that, because it might get deallocated by the above code at
272 chain_key = xstrdup (hostport);
275 hash_table_put (cookies_hash_table, chain_key, cookie);
277 DEBUGP (("\nStored cookie %s %d %s %d %s %s %s\n",
278 cookie->domain, cookie->port, cookie->path, cookie->secure,
279 asctime (localtime ((time_t *)&cookie->expiry_time)),
280 cookie->attr, cookie->value));
283 /* Discard a cookie matching COOKIE's domain, path, and attribute
284 name. This gets called when we encounter a cookie whose expiry
285 date is in the past, or whose max-age is set to 0. The former
286 corresponds to netscape cookie spec, while the latter is specified
290 discard_matching_cookie (struct cookie *cookie)
292 struct cookie *prev, *victim;
294 if (!cookies_hash_table
295 || !hash_table_count (cookies_hash_table))
296 /* No elements == nothing to discard. */
299 victim = find_matching_cookie (cookie, &prev);
303 /* Simply unchain the victim. */
304 prev->next = victim->next;
307 /* VICTIM was head of its chain. We need to place a new
308 cookie at the head. */
311 char *chain_key = NULL;
314 SET_HOSTPORT (victim->domain, victim->port, hostport);
315 res = hash_table_get_pair (cookies_hash_table, hostport,
320 /* VICTIM was the only cookie in the chain. Destroy the
321 chain and deallocate the chain key. */
323 hash_table_remove (cookies_hash_table, hostport);
327 hash_table_put (cookies_hash_table, chain_key, victim->next);
329 delete_cookie (victim);
330 DEBUGP (("Discarded old cookie.\n"));
334 /* Functions for parsing the `Set-Cookie' header, and creating new
335 cookies from the wire. */
338 #define NAME_IS(string_literal) \
339 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
341 #define VALUE_EXISTS (value_b && value_e)
343 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
345 /* Update the appropriate cookie field. [name_b, name_e) are expected
346 to delimit the attribute name, while [value_b, value_e) (optional)
347 should delimit the attribute value.
349 When called the first time, it will set the cookie's attribute name
350 and value. After that, it will check the attribute name for
351 special fields such as `domain', `path', etc. Where appropriate,
352 it will parse the values of the fields it recognizes and fill the
353 corresponding fields in COOKIE.
355 Returns 1 on success. Returns zero in case a syntax error is
356 found; such a cookie should be discarded. */
359 update_cookie_field (struct cookie *cookie,
360 const char *name_b, const char *name_e,
361 const char *value_b, const char *value_e)
363 assert (name_b != NULL && name_e != NULL);
369 cookie->attr = strdupdelim (name_b, name_e);
370 cookie->value = strdupdelim (value_b, value_e);
374 if (NAME_IS ("domain"))
376 if (!VALUE_NON_EMPTY)
378 FREE_MAYBE (cookie->domain);
379 cookie->domain = strdupdelim (value_b, value_e);
382 else if (NAME_IS ("path"))
384 if (!VALUE_NON_EMPTY)
386 FREE_MAYBE (cookie->path);
387 cookie->path = strdupdelim (value_b, value_e);
390 else if (NAME_IS ("expires"))
395 if (!VALUE_NON_EMPTY)
397 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
399 expires = http_atotm (value_copy);
402 cookie->permanent = 1;
403 cookie->expiry_time = (unsigned long)expires;
406 /* Error in expiration spec. Assume default (cookie valid for
407 this session.) #### Should we return 0 and invalidate the
411 /* According to netscape's specification, expiry time in the
412 past means that discarding of a matching cookie is
414 if (cookie->expiry_time < cookies_now)
415 cookie->discard_requested = 1;
419 else if (NAME_IS ("max-age"))
424 if (!VALUE_NON_EMPTY)
426 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
428 sscanf (value_copy, "%lf", &maxage);
430 /* something is wrong. */
432 cookie->permanent = 1;
433 cookie->expiry_time = (unsigned long)cookies_now + (unsigned long)maxage;
435 /* According to rfc2109, a cookie with max-age of 0 means that
436 discarding of a matching cookie is requested. */
438 cookie->discard_requested = 1;
442 else if (NAME_IS ("secure"))
444 /* ignore value completely */
449 /* Unrecognized attribute; ignore it. */
455 /* Returns non-zero for characters that are legal in the name of an
458 #define ATTR_NAME_CHAR(c) (ISALNUM (c) || (c) == '-' || (c) == '_')
460 /* Fetch the next character without doing anything special if CH gets
461 set to 0. (The code executed next is expected to handle it.) */
463 #define FETCH1(ch, ptr) do { \
467 /* Like FETCH1, but jumps to `eof' label if CH gets set to 0. */
469 #define FETCH(ch, ptr) do { \
475 /* Parse the contents of the `Set-Cookie' header. The header looks
478 name1=value1; name2=value2; ...
480 Trailing semicolon is optional; spaces are allowed between all
481 tokens. Additionally, values may be quoted.
483 A new cookie is returned upon success, NULL otherwise. The
484 function `update_cookie_field' is used to update the fields of the
485 newly created cookie structure. */
487 static struct cookie *
488 parse_set_cookies (const char *sc)
490 struct cookie *cookie = cookie_new ();
492 enum { S_NAME_PRE, S_NAME, S_NAME_POST,
493 S_VALUE_PRE, S_VALUE, S_VALUE_TRAILSPACE_MAYBE,
494 S_QUOTED_VALUE, S_QUOTED_VALUE_POST,
496 S_DONE, S_ERROR } state = S_NAME_PRE;
501 const char *name_b = NULL, *name_e = NULL;
502 const char *value_b = NULL, *value_e = NULL;
506 while (state != S_DONE && state != S_ERROR)
513 else if (ATTR_NAME_CHAR (c))
520 /* empty attr name not allowed */
524 if (ATTR_NAME_CHAR (c))
526 else if (!c || c == ';' || c == '=' || ISSPACE (c))
537 else if (!c || c == ';')
539 value_b = value_e = NULL;
540 state = S_ATTR_ACTION;
557 state = S_QUOTED_VALUE;
559 else if (c == ';' || c == '\0')
561 value_b = value_e = p - 1;
562 state = S_ATTR_ACTION;
572 if (c == ';' || c == '\0')
576 state = S_ATTR_ACTION;
578 else if (ISSPACE (c))
582 state = S_VALUE_TRAILSPACE_MAYBE;
586 value_e = NULL; /* no trailing space */
590 case S_VALUE_TRAILSPACE_MAYBE:
601 state = S_QUOTED_VALUE_POST;
606 case S_QUOTED_VALUE_POST:
608 state = S_ATTR_ACTION;
609 else if (ISSPACE (c))
616 int legal = update_cookie_field (cookie, name_b, name_e,
621 BOUNDED_TO_ALLOCA (name_b, name_e, name);
622 logprintf (LOG_NOTQUIET,
623 _("Error in Set-Cookie, field `%s'"), name);
638 /* handled by loop condition */
645 delete_cookie (cookie);
646 if (state == S_ERROR)
647 logprintf (LOG_NOTQUIET, _("Syntax error in Set-Cookie at character `%c'.\n"), c);
653 delete_cookie (cookie);
654 logprintf (LOG_NOTQUIET,
655 _("Syntax error in Set-Cookie: premature end of string.\n"));
659 /* Sanity checks. These are important, otherwise it is possible for
660 mailcious attackers to destroy important cookie information and/or
661 violate your privacy. */
664 #define REQUIRE_DIGITS(p) do { \
667 for (++p; ISDIGIT (*p); p++) \
671 #define REQUIRE_DOT(p) do { \
676 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
678 We don't want to call network functions like inet_addr() because all
679 we need is a check, preferrably one that is small, fast, and
683 numeric_address_p (const char *addr)
685 const char *p = addr;
687 REQUIRE_DIGITS (p); /* A */
688 REQUIRE_DOT (p); /* . */
689 REQUIRE_DIGITS (p); /* B */
690 REQUIRE_DOT (p); /* . */
691 REQUIRE_DIGITS (p); /* C */
692 REQUIRE_DOT (p); /* . */
693 REQUIRE_DIGITS (p); /* D */
700 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
701 This check is compliant with rfc2109. */
704 check_domain_match (const char *cookie_domain, const char *host)
709 /* Numeric address requires exact match. It also requires HOST to
710 be an IP address. I suppose we *could* resolve HOST with
711 store_hostaddress (it would hit the hash table), but rfc2109
712 doesn't require it, and it doesn't seem very useful, so we
714 if (numeric_address_p (cookie_domain))
715 return !strcmp (cookie_domain, host);
717 /* The domain must contain at least one embedded dot. */
719 const char *rest = cookie_domain;
720 int len = strlen (rest);
722 ++rest, --len; /* ignore first dot */
725 if (rest[len - 1] == '.')
726 --len; /* ignore last dot */
728 if (!memchr (rest, '.', len))
733 /* For the sake of efficiency, check for exact match first. */
734 if (!strcasecmp (cookie_domain, host))
737 /* In rfc2109 terminology, HOST needs domain-match COOKIE_DOMAIN.
738 This means that COOKIE_DOMAIN needs to start with `.' and be an
739 FQDN, and that HOST must end with COOKIE_DOMAIN. */
740 if (*cookie_domain != '.')
743 /* Two proceed, we need to examine two parts of HOST: its head and
744 its tail. Head and tail are defined in terms of the length of
745 the domain, like this:
747 HHHHTTTTTTTTTTTTTTT <- host
748 DDDDDDDDDDDDDDD <- domain
750 That is, "head" is the part of the host before (dlen - hlen), and
751 "tail" is what follows.
753 For the domain to match, two conditions need to be true:
755 1. Tail must equal DOMAIN.
756 2. Head must not contain an embedded dot. */
758 headlen = strlen (host) - strlen (cookie_domain);
761 /* DOMAIN must be a proper subset of HOST. */
763 tail = host + headlen;
766 if (strcasecmp (tail, cookie_domain))
769 /* Test (2) is not part of the "domain-match" itself, but is
770 recommended by rfc2109 for reasons of privacy. */
773 if (memchr (host, '.', headlen))
779 static int path_matches PARAMS ((const char *, const char *));
781 /* Check whether PATH begins with COOKIE_PATH. */
784 check_path_match (const char *cookie_path, const char *path)
786 return path_matches (path, cookie_path);
789 /* Parse the `Set-Cookie' header and, if the cookie is legal, store it
793 set_cookie_header_cb (const char *hdr, void *closure)
795 struct urlinfo *u = (struct urlinfo *)closure;
796 struct cookie *cookie;
798 cookies_now = time (NULL);
800 cookie = parse_set_cookies (hdr);
804 /* Sanitize parts of cookie. */
807 cookie->domain = xstrdup (u->host);
810 if (!check_domain_match (cookie->domain, u->host))
812 DEBUGP (("Attempt to fake the domain: %s, %s\n",
813 cookie->domain, u->host));
818 cookie->path = xstrdup (u->path);
821 if (!check_path_match (cookie->path, u->path))
823 DEBUGP (("Attempt to fake the path: %s, %s\n",
824 cookie->path, u->path));
829 cookie->port = u->port;
831 if (cookie->discard_requested)
833 discard_matching_cookie (cookie);
834 delete_cookie (cookie);
838 store_cookie (cookie);
843 delete_cookie (cookie);
847 /* Support for sending out cookies in HTTP requests, based on
848 previously stored cookies. Entry point is
849 `build_cookies_request'. */
852 /* Count how many times CHR occurs in STRING. */
855 count_char (const char *string, char chr)
859 for (p = string; *p; p++)
865 /* Store CHAIN to STORE if there is room in STORE. If not, inrecement
866 COUNT anyway, so that when the function is done, we end up with the
867 exact count of how much place we actually need. */
869 #define STORE_CHAIN(st_chain, st_store, st_size, st_count) do { \
870 if (st_count < st_size) \
871 store[st_count] = st_chain; \
875 /* Store cookie chains that match HOST, PORT. Since more than one
876 chain can match, the matches are written to STORE. No more than
877 SIZE matches are written; if more matches are present, return the
878 number of chains that would have been written. */
881 find_matching_chains (const char *host, int port,
882 struct cookie *store[], int size)
884 struct cookie *chain;
889 if (!cookies_hash_table)
892 SET_HOSTPORT (host, port, hash_key);
895 chain = hash_table_get (cookies_hash_table, hash_key);
897 STORE_CHAIN (chain, store, size, count);
899 dot_count = count_char (host, '.');
901 /* Match less and less specific domains. For instance, given
902 fly.srk.fer.hr, we match .srk.fer.hr, then .fer.hr. */
903 while (dot_count-- > 1)
905 /* Note: we operate directly on hash_key (in form host:port)
906 because we don't want to allocate new hash keys in a
908 char *p = strchr (hash_key, '.');
910 chain = hash_table_get (cookies_hash_table, p);
912 STORE_CHAIN (chain, store, size, count);
918 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
922 path_matches (const char *full_path, const char *prefix)
924 int len = strlen (prefix);
925 if (strncmp (full_path, prefix, len))
926 /* FULL_PATH doesn't begin with PREFIX. */
929 /* Length of PREFIX determines the quality of the match. */
934 matching_cookie (const struct cookie *cookie, const char *path,
935 int connection_secure_p, int *path_goodness)
939 if (cookie->expiry_time < cookies_now)
940 /* Ignore stale cookies. There is no need to unchain the cookie
941 at this point -- Wget is a relatively short-lived application,
942 and stale cookies will not be saved by `save_cookies'. */
944 if (cookie->secure && !connection_secure_p)
945 /* Don't transmit secure cookies over an insecure connection. */
947 pg = path_matches (path, cookie->path);
952 /* If the caller requested path_goodness, we return it. This is
953 an optimization, so that the caller doesn't need to call
954 path_matches() again. */
959 struct weighed_cookie {
960 struct cookie *cookie;
965 /* Comparator used for uniquifying the list. */
968 equality_comparator (const void *p1, const void *p2)
970 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
971 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
973 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
974 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
976 /* We only really care whether both name and value are equal. We
977 return them in this order only for consistency... */
978 return namecmp ? namecmp : valuecmp;
981 /* Comparator used for sorting by quality. */
984 goodness_comparator (const void *p1, const void *p2)
986 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
987 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
989 /* Subtractions take `wc2' as the first argument becauase we want a
990 sort in *decreasing* order of goodness. */
991 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
992 int pgdiff = wc2->path_goodness - wc1->path_goodness;
994 /* Sort by domain goodness; if these are the same, sort by path
995 goodness. (The sorting order isn't really specified; maybe it
996 should be the other way around.) */
997 return dgdiff ? dgdiff : pgdiff;
1000 /* Build a `Cookies' header for a request that goes to HOST:PORT and
1001 requests PATH from the server. Memory is allocated by `malloc',
1002 and the caller is responsible for freeing it. If no cookies
1003 pertain to this request, i.e. no cookie header should be generated,
1004 NULL is returned. */
1007 build_cookies_request (const char *host, int port, const char *path,
1008 int connection_secure_p)
1010 struct cookie *chain_default_store[20];
1011 struct cookie **all_chains = chain_default_store;
1012 int chain_store_size = ARRAY_SIZE (chain_default_store);
1015 struct cookie *cookie;
1016 struct weighed_cookie *outgoing;
1019 int result_size, pos;
1022 chain_count = find_matching_chains (host, port, all_chains, chain_store_size);
1023 if (chain_count > chain_store_size)
1025 /* It's extremely unlikely that more than 20 chains will ever
1026 match. But in this case it's easy to not have the
1027 limitation, so we don't. */
1028 all_chains = alloca (chain_count * sizeof (struct cookie *));
1035 cookies_now = time (NULL);
1037 /* Count the number of cookies whose path matches. */
1039 for (i = 0; i < chain_count; i++)
1040 for (cookie = all_chains[i]; cookie; cookie = cookie->next)
1041 if (matching_cookie (cookie, path, connection_secure_p, NULL))
1044 /* No matching cookies. */
1047 /* Allocate the array. */
1048 outgoing = alloca (count * sizeof (struct weighed_cookie));
1051 for (i = 0; i < chain_count; i++)
1052 for (cookie = all_chains[i]; cookie; cookie = cookie->next)
1055 if (!matching_cookie (cookie, path, connection_secure_p, &pg))
1057 outgoing[ocnt].cookie = cookie;
1058 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1059 outgoing[ocnt].path_goodness = pg;
1062 assert (ocnt == count);
1064 /* Eliminate duplicate cookies; that is, those whose name and value
1065 are the same. We do it by first sorting the array, and then
1067 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1068 for (i = 0; i < count - 1; i++)
1070 struct cookie *c1 = outgoing[i].cookie;
1071 struct cookie *c2 = outgoing[i + 1].cookie;
1072 if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value))
1074 /* c1 and c2 are the same; get rid of c2. */
1076 /* move all ptrs from positions [i + 1, count) to i. */
1077 memmove (outgoing + i, outgoing + i + 1,
1078 (count - (i + 1)) * sizeof (struct weighed_cookie));
1079 /* We decrement i to counter the ++i above. Remember that
1080 we've just removed the element in front of us; we need to
1081 remain in place to check whether outgoing[i] what used to
1082 be outgoing[i + 2]. */
1088 /* Sort the array so that best-matching domains come first, and
1089 that, within one domain, best-matching paths come first. */
1090 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1092 /* Count the space the name=value pairs will take. */
1094 for (i = 0; i < count; i++)
1096 struct cookie *c = outgoing[i].cookie;
1098 result_size += strlen (c->attr) + 1 + strlen (c->value);
1101 /* Allocate output buffer:
1103 name=value pairs -- result_size
1104 "; " separators -- (count - 1) * 2
1105 \r\n line ending -- 2
1106 \0 terminator -- 1 */
1107 result_size = 8 + result_size + (count - 1) * 2 + 2 + 1;
1108 result = xmalloc (result_size);
1110 strcpy (result, "Cookie: ");
1112 for (i = 0; i < count; i++)
1114 struct cookie *c = outgoing[i].cookie;
1115 int namlen = strlen (c->attr);
1116 int vallen = strlen (c->value);
1118 memcpy (result + pos, c->attr, namlen);
1120 result[pos++] = '=';
1121 memcpy (result + pos, c->value, vallen);
1125 result[pos++] = ';';
1126 result[pos++] = ' ';
1129 result[pos++] = '\r';
1130 result[pos++] = '\n';
1131 result[pos++] = '\0';
1132 assert (pos == result_size);
1136 /* Support for loading and saving cookies. The format used for
1137 loading and saving roughly matches the format of `cookies.txt' file
1138 used by Netscape and Mozilla, at least the Unix versions. The
1139 format goes like this:
1141 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1143 DOMAIN -- cookie domain, optionally followed by :PORT
1144 DOMAIN-FLAG -- whether all hosts in the domain match
1146 SECURE-FLAG -- whether cookie requires secure connection
1147 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1148 ATTR-NAME -- name of the cookie attribute
1149 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1151 The fields are separated by TABs (but Wget's loader recognizes any
1152 whitespace). All fields are mandatory, except for ATTR-VALUE. The
1153 `-FLAG' fields are boolean, their legal values being "TRUE" and
1154 "FALSE'. Empty lines, lines consisting of whitespace only, and
1155 comment lines (beginning with # optionally preceded by whitespace)
1158 Example line from cookies.txt (split in two lines for readability):
1160 .google.com TRUE / FALSE 2147368447 \
1161 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1163 DOMAIN-FLAG is currently not honored by Wget. The cookies whose
1164 domain begins with `.' are treated as if DOMAIN-FLAG were true,
1165 while all other cookies are treated as if it were FALSE. */
1168 /* If the region [B, E) ends with :<digits>, parse the number, return
1169 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1170 If port is not specified, return 0. */
1173 domain_port (const char *domain_b, const char *domain_e,
1174 const char **domain_e_ptr)
1178 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1181 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1182 port = 10 * port + (*p - '0');
1184 /* Garbage following port number. */
1186 *domain_e_ptr = colon;
1190 #define SKIP_WS(p) do { \
1191 while (*p && ISSPACE (*p)) \
1195 #define MARK_WORD(p, b, e) do { \
1199 while (*p && !ISSPACE (*p)) \
1206 /* Load cookies from FILE. */
1209 load_cookies (const char *file)
1212 FILE *fp = fopen (file, "r");
1215 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1216 file, strerror (errno));
1219 cookies_now = time (NULL);
1221 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1223 struct cookie *cookie;
1228 char *domain_b = NULL, *domain_e = NULL;
1229 char *ignore_b = NULL, *ignore_e = NULL;
1230 char *path_b = NULL, *path_e = NULL;
1231 char *secure_b = NULL, *secure_e = NULL;
1232 char *expires_b = NULL, *expires_e = NULL;
1233 char *name_b = NULL, *name_e = NULL;
1234 char *value_b = NULL, *value_e = NULL;
1238 if (!*p || *p == '#')
1242 MARK_WORD (p, domain_b, domain_e);
1243 MARK_WORD (p, ignore_b, ignore_e);
1244 MARK_WORD (p, path_b, path_e);
1245 MARK_WORD (p, secure_b, secure_e);
1246 MARK_WORD (p, expires_b, expires_e);
1247 MARK_WORD (p, name_b, name_e);
1249 /* Don't use MARK_WORD for value because it may contain
1250 whitespace itself. Instead, . */
1251 MARK_WORD (p, value_b, value_e);
1253 cookie = cookie_new ();
1255 cookie->attr = strdupdelim (name_b, name_e);
1256 cookie->value = strdupdelim (value_b, value_e);
1257 cookie->path = strdupdelim (path_b, path_e);
1259 if (BOUNDED_EQUAL (secure_b, secure_e, "TRUE"))
1262 /* DOMAIN needs special treatment because we might need to
1263 extract the port. */
1264 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1266 cookie->port = port;
1268 cookie->port = cookie->secure ? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT;
1270 cookie->domain = strdupdelim (domain_b, domain_e);
1272 /* Don't use MARK_WORD for value because it may contain
1273 whitespace itself. Instead, set name_e to the end of line,
1274 modulo trailing space (which includes the NL separator.) */
1277 name_e = p + strlen (p);
1278 while (name_e >= name_b && ISSPACE (*name_e))
1280 if (name_b == name_e)
1281 /* Hmm, should we check for empty value? I guess that's
1282 legal, so I leave it. */
1285 /* safe default in case EXPIRES field is garbled. */
1286 cookie->expiry_time = cookies_now - 1;
1288 /* I don't like changing the line, but it's completely safe.
1289 (line is malloced.) */
1291 sscanf (expires_b, "%lu", &cookie->expiry_time);
1292 if (cookie->expiry_time < cookies_now)
1293 /* ignore stale cookie. */
1295 cookie->permanent = 1;
1297 store_cookie (cookie);
1303 delete_cookie (cookie);
1308 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1309 to the head in a chain of cookies. The function prints the entire
1313 save_cookies_mapper (void *key, void *value, void *arg)
1315 FILE *fp = (FILE *)arg;
1316 char *domain = (char *)key;
1317 struct cookie *chain = (struct cookie *)value;
1318 for (; chain; chain = chain->next)
1320 if (!chain->permanent)
1322 if (chain->expiry_time < cookies_now)
1324 fprintf (fp, "%s\t%s\t%s\t%s\t%lu\t%s\t%s\n",
1325 domain, *domain == '.' ? "TRUE" : "FALSE",
1326 chain->path, chain->secure ? "TRUE" : "FALSE",
1328 chain->attr, chain->value);
1330 return 1; /* stop mapping */
1335 /* Save cookies, in format described above, to FILE. */
1338 save_cookies (const char *file)
1342 if (!cookies_hash_table
1343 || !hash_table_count (cookies_hash_table))
1344 /* no cookies stored; nothing to do. */
1347 DEBUGP (("Saving cookies to %s.\n", file));
1349 cookies_now = time (NULL);
1351 fp = fopen (file, "w");
1354 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1355 file, strerror (errno));
1359 fputs ("# HTTP cookie file.\n", fp);
1360 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (NULL));
1361 fputs ("# Edit at your own risk.\n\n", fp);
1363 hash_table_map (cookies_hash_table, save_cookies_mapper, fp);
1366 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1367 file, strerror (errno));
1369 if (fclose (fp) < 0)
1370 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1371 file, strerror (errno));
1373 DEBUGP (("Done saving cookies.\n"));
1377 delete_cookie_chain_mapper (void *value, void *key, void *arg_ignored)
1379 char *chain_key = (char *)value;
1380 struct cookie *chain = (struct cookie *)key;
1382 /* Remove the chain from the table and free the key. */
1383 hash_table_remove (cookies_hash_table, chain_key);
1386 /* Then delete all the cookies in the chain. */
1389 struct cookie *next = chain->next;
1390 delete_cookie (chain);
1398 /* Clean up cookie-related data. */
1401 cookies_cleanup (void)
1403 if (!cookies_hash_table)
1405 hash_table_map (cookies_hash_table, delete_cookie_chain_mapper, NULL);
1406 hash_table_destroy (cookies_hash_table);
1407 cookies_hash_table = NULL;