1 /* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
21 code submitted by Tomasz Wegrzanowski.
23 TODO: Implement limits on cookie-related sizes, such as max. cookie
24 size, max. number of cookies, etc. Add more "cookie jar" methods,
25 such as methods to over stored cookies, to clear temporary cookies,
26 to perform intelligent auto-saving, etc. Ultimately support
27 `Set-Cookie2' and `Cookie2' headers. */
46 /* This should *really* be in a .h file! */
47 time_t http_atotm PARAMS ((const char *));
49 /* Declarations of `struct cookie' and the most basic functions. */
52 /* Hash table that maps domain names to cookie chains. A "cookie
53 chain" is a linked list of cookies that belong to the same
55 struct hash_table *chains_by_domain;
57 int cookie_count; /* number of cookies in the jar. */
60 /* Value set by entry point functions, so that the low-level
61 routines don't need to call time() all the time. */
67 struct cookie_jar *jar = xmalloc (sizeof (struct cookie_jar));
68 jar->chains_by_domain = make_nocase_string_hash_table (0);
69 jar->cookie_count = 0;
74 char *domain; /* domain of the cookie */
75 int port; /* port number */
76 char *path; /* path prefix of the cookie */
77 int secure; /* whether cookie should be
78 transmitted over non-https
80 int permanent; /* whether the cookie should outlive
82 time_t expiry_time; /* time when the cookie expires */
83 int discard_requested; /* whether cookie was created to
84 request discarding another
87 char *attr; /* cookie attribute name */
88 char *value; /* cookie attribute value */
90 struct cookie_jar *jar; /* pointer back to the cookie jar, for
92 struct cookie *next; /* used for chaining of cookies in the
97 #define COOKIE_EXPIRED_P(c) ((c)->expiry_time != 0 && (c)->expiry_time < cookies_now)
99 /* Allocate and return a new, empty cookie structure. */
101 static struct cookie *
104 struct cookie *cookie = xmalloc (sizeof (struct cookie));
105 memset (cookie, '\0', sizeof (struct cookie));
107 /* Both cookie->permanent and cookie->expiry_time are now 0. By
108 default, we assume that the cookie is non-permanent and valid
109 until the end of the session. */
111 cookie->port = PORT_ANY;
115 /* Deallocate COOKIE and its components. */
118 delete_cookie (struct cookie *cookie)
120 FREE_MAYBE (cookie->domain);
121 FREE_MAYBE (cookie->path);
122 FREE_MAYBE (cookie->attr);
123 FREE_MAYBE (cookie->value);
127 /* Functions for storing cookies.
129 All cookies can be reached beginning with jar->chains_by_domain.
130 The key in that table is the domain name, and the value is a linked
131 list of all cookies from that domain. Every new cookie is placed
132 on the head of the list. */
134 /* Find and return a cookie in JAR whose domain, path, and attribute
135 name correspond to COOKIE. If found, PREVPTR will point to the
136 location of the cookie previous in chain, or NULL if the found
137 cookie is the head of a chain.
139 If no matching cookie is found, return NULL. */
141 static struct cookie *
142 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
143 struct cookie **prevptr)
145 struct cookie *chain, *prev;
147 chain = hash_table_get (jar->chains_by_domain, cookie->domain);
152 for (; chain; prev = chain, chain = chain->next)
153 if (0 == strcmp (cookie->path, chain->path)
154 && 0 == strcmp (cookie->attr, chain->attr)
155 && cookie->port == chain->port)
166 /* Store COOKIE to the jar.
168 This is done by placing COOKIE at the head of its chain. However,
169 if COOKIE matches a cookie already in memory, as determined by
170 find_matching_cookie, the old cookie is unlinked and destroyed.
172 The key of each chain's hash table entry is allocated only the
173 first time; next hash_table_put's reuse the same key. */
176 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
178 struct cookie *chain_head;
181 if (hash_table_get_pair (jar->chains_by_domain, cookie->domain,
182 &chain_key, &chain_head))
184 /* A chain of cookies in this domain already exists. Check for
185 duplicates -- if an extant cookie exactly matches our domain,
186 port, path, and name, replace it. */
188 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
192 /* Remove VICTIM from the chain. COOKIE will be placed at
196 prev->next = victim->next;
197 cookie->next = chain_head;
201 /* prev is NULL; apparently VICTIM was at the head of
202 the chain. This place will be taken by COOKIE, so
203 all we need to do is: */
204 cookie->next = victim->next;
206 delete_cookie (victim);
208 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
211 cookie->next = chain_head;
215 /* We are now creating the chain. Allocate the string that will
216 be used as a key. It is unsafe to use cookie->domain for
217 that, because it might get deallocated by the above code at
220 chain_key = xstrdup (cookie->domain);
223 hash_table_put (jar->chains_by_domain, chain_key, cookie);
226 DEBUGP (("\nStored cookie %s %d%s %s %s %d %s %s %s\n",
227 cookie->domain, cookie->port,
228 cookie->port == PORT_ANY ? " (ANY)" : "",
230 cookie->permanent ? "permanent" : "nonpermanent",
233 ? asctime (localtime (&cookie->expiry_time)) : "<indefinitely>",
234 cookie->attr, cookie->value));
237 /* Discard a cookie matching COOKIE's domain, port, path, and
238 attribute name. This gets called when we encounter a cookie whose
239 expiry date is in the past, or whose max-age is set to 0. The
240 former corresponds to netscape cookie spec, while the latter is
241 specified by rfc2109. */
244 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
246 struct cookie *prev, *victim;
248 if (!hash_table_count (jar->chains_by_domain))
249 /* No elements == nothing to discard. */
252 victim = find_matching_cookie (jar, cookie, &prev);
256 /* Simply unchain the victim. */
257 prev->next = victim->next;
260 /* VICTIM was head of its chain. We need to place a new
261 cookie at the head. */
262 char *chain_key = NULL;
265 res = hash_table_get_pair (jar->chains_by_domain, victim->domain,
270 /* VICTIM was the only cookie in the chain. Destroy the
271 chain and deallocate the chain key. */
272 hash_table_remove (jar->chains_by_domain, victim->domain);
276 hash_table_put (jar->chains_by_domain, chain_key, victim->next);
278 delete_cookie (victim);
279 DEBUGP (("Discarded old cookie.\n"));
283 /* Functions for parsing the `Set-Cookie' header, and creating new
284 cookies from the wire. */
287 #define NAME_IS(string_literal) \
288 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
290 #define VALUE_EXISTS (value_b && value_e)
292 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
294 /* Update the appropriate cookie field. [name_b, name_e) are expected
295 to delimit the attribute name, while [value_b, value_e) (optional)
296 should delimit the attribute value.
298 When called the first time, it will set the cookie's attribute name
299 and value. After that, it will check the attribute name for
300 special fields such as `domain', `path', etc. Where appropriate,
301 it will parse the values of the fields it recognizes and fill the
302 corresponding fields in COOKIE.
304 Returns 1 on success. Returns zero in case a syntax error is
305 found; such a cookie should be discarded. */
308 update_cookie_field (struct cookie *cookie,
309 const char *name_b, const char *name_e,
310 const char *value_b, const char *value_e)
312 assert (name_b != NULL && name_e != NULL);
318 cookie->attr = strdupdelim (name_b, name_e);
319 cookie->value = strdupdelim (value_b, value_e);
323 if (NAME_IS ("domain"))
325 if (!VALUE_NON_EMPTY)
327 FREE_MAYBE (cookie->domain);
328 cookie->domain = strdupdelim (value_b, value_e);
331 else if (NAME_IS ("path"))
333 if (!VALUE_NON_EMPTY)
335 FREE_MAYBE (cookie->path);
336 cookie->path = strdupdelim (value_b, value_e);
339 else if (NAME_IS ("expires"))
344 if (!VALUE_NON_EMPTY)
346 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
348 expires = http_atotm (value_copy);
351 cookie->permanent = 1;
352 cookie->expiry_time = (time_t)expires;
355 /* Error in expiration spec. Assume default (cookie valid for
359 /* According to netscape's specification, expiry time in the
360 past means that discarding of a matching cookie is
362 if (cookie->expiry_time < cookies_now)
363 cookie->discard_requested = 1;
367 else if (NAME_IS ("max-age"))
372 if (!VALUE_NON_EMPTY)
374 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
376 sscanf (value_copy, "%lf", &maxage);
378 /* something went wrong. */
380 cookie->permanent = 1;
381 cookie->expiry_time = cookies_now + maxage;
383 /* According to rfc2109, a cookie with max-age of 0 means that
384 discarding of a matching cookie is requested. */
386 cookie->discard_requested = 1;
390 else if (NAME_IS ("secure"))
392 /* ignore value completely */
397 /* Unrecognized attribute; ignore it. */
403 /* Returns non-zero for characters that are legal in the name of an
404 attribute. This used to allow only alphanumerics, '-', and '_',
405 but we need to be more lenient because a number of sites wants to
406 use weirder attribute names. rfc2965 "informally specifies"
407 attribute name (token) as "a sequence of non-special, non-white
408 space characters". So we allow everything except the stuff we know
411 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
412 && (c) != '"' && (c) != '=' \
413 && (c) != ';' && (c) != ',')
415 /* Fetch the next character without doing anything special if CH gets
416 set to 0. (The code executed next is expected to handle it.) */
418 #define FETCH1(ch, ptr) do { \
422 /* Like FETCH1, but jumps to `eof' label if CH gets set to 0. */
424 #define FETCH(ch, ptr) do { \
430 /* Parse the contents of the `Set-Cookie' header. The header looks
433 name1=value1; name2=value2; ...
435 Trailing semicolon is optional; spaces are allowed between all
436 tokens. Additionally, values may be quoted.
438 A new cookie is returned upon success, NULL otherwise. The
439 function `update_cookie_field' is used to update the fields of the
440 newly created cookie structure. */
442 static struct cookie *
443 parse_set_cookies (const char *sc)
445 struct cookie *cookie = cookie_new ();
447 enum { S_NAME_PRE, S_NAME, S_NAME_POST,
448 S_VALUE_PRE, S_VALUE, S_VALUE_TRAILSPACE_MAYBE,
449 S_QUOTED_VALUE, S_QUOTED_VALUE_POST,
451 S_DONE, S_ERROR } state = S_NAME_PRE;
456 const char *name_b = NULL, *name_e = NULL;
457 const char *value_b = NULL, *value_e = NULL;
461 while (state != S_DONE && state != S_ERROR)
468 else if (ATTR_NAME_CHAR (c))
475 /* empty attr name not allowed */
479 if (ATTR_NAME_CHAR (c))
481 else if (!c || c == ';' || c == '=' || ISSPACE (c))
492 else if (!c || c == ';')
494 value_b = value_e = NULL;
495 state = S_ATTR_ACTION;
512 state = S_QUOTED_VALUE;
514 else if (c == ';' || c == '\0')
516 value_b = value_e = p - 1;
517 state = S_ATTR_ACTION;
527 if (c == ';' || c == '\0')
531 state = S_ATTR_ACTION;
533 else if (ISSPACE (c))
537 state = S_VALUE_TRAILSPACE_MAYBE;
541 value_e = NULL; /* no trailing space */
545 case S_VALUE_TRAILSPACE_MAYBE:
556 state = S_QUOTED_VALUE_POST;
561 case S_QUOTED_VALUE_POST:
563 state = S_ATTR_ACTION;
564 else if (ISSPACE (c))
571 int legal = update_cookie_field (cookie, name_b, name_e,
576 BOUNDED_TO_ALLOCA (name_b, name_e, name);
577 logprintf (LOG_NOTQUIET,
578 _("Error in Set-Cookie, field `%s'"), name);
593 /* handled by loop condition */
600 delete_cookie (cookie);
601 if (state == S_ERROR)
602 logprintf (LOG_NOTQUIET, _("Syntax error in Set-Cookie at character `%c'.\n"), c);
608 delete_cookie (cookie);
609 logprintf (LOG_NOTQUIET,
610 _("Syntax error in Set-Cookie: premature end of string.\n"));
614 /* Sanity checks. These are important, otherwise it is possible for
615 mailcious attackers to destroy important cookie information and/or
616 violate your privacy. */
619 #define REQUIRE_DIGITS(p) do { \
622 for (++p; ISDIGIT (*p); p++) \
626 #define REQUIRE_DOT(p) do { \
631 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
633 We don't want to call network functions like inet_addr() because all
634 we need is a check, preferrably one that is small, fast, and
638 numeric_address_p (const char *addr)
640 const char *p = addr;
642 REQUIRE_DIGITS (p); /* A */
643 REQUIRE_DOT (p); /* . */
644 REQUIRE_DIGITS (p); /* B */
645 REQUIRE_DOT (p); /* . */
646 REQUIRE_DIGITS (p); /* C */
647 REQUIRE_DOT (p); /* . */
648 REQUIRE_DIGITS (p); /* D */
655 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
656 Originally I tried to make the check compliant with rfc2109, but
657 the sites deviated too often, so I had to fall back to "tail
658 matching", as defined by the original Netscape's cookie spec. */
661 check_domain_match (const char *cookie_domain, const char *host)
663 static char *special_toplevel_domains[] = {
664 /* This is a total crock of shit, but we're living with it until
665 something better is devised. */
666 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int",
669 int i, required_dots;
673 /* Numeric address requires exact match. It also requires HOST to
675 if (numeric_address_p (cookie_domain))
676 return 0 == strcmp (cookie_domain, host);
680 /* For the sake of efficiency, check for exact match first. */
681 if (!strcasecmp (cookie_domain, host))
687 for (i = 0; i < ARRAY_SIZE (special_toplevel_domains); i++)
688 if (match_tail (cookie_domain, special_toplevel_domains[i]))
694 /* If the domain does not start with '.', require one less dot.
695 This is so that domains like "altavista.com" (which should be
696 ".altavista.com") are accepted. */
697 if (*cookie_domain != '.')
700 if (count_char (cookie_domain, '.') < required_dots)
705 if (!match_tail (host, cookie_domain))
710 /* Don't allow domain "bar.com" to match host "foobar.com". */
711 if (*cookie_domain != '.')
713 int dlen = strlen (cookie_domain);
714 int hlen = strlen (host);
715 /* cookie host: hostname.foobar.com */
716 /* desired domain: bar.com */
717 /* '.' must be here in host-> ^ */
718 if (hlen > dlen && host[hlen - dlen - 1] != '.')
727 static int path_matches PARAMS ((const char *, const char *));
729 /* Check whether PATH begins with COOKIE_PATH. */
732 check_path_match (const char *cookie_path, const char *path)
734 return path_matches (path, cookie_path);
737 /* Process the HTTP `Set-Cookie' header. This results in storing the
738 cookie or discarding a matching one, or ignoring it completely, all
739 depending on the contents. */
742 cookie_jar_process_set_cookie (struct cookie_jar *jar,
743 const char *host, int port,
744 const char *path, const char *set_cookie)
746 struct cookie *cookie;
747 cookies_now = time (NULL);
749 cookie = parse_set_cookies (set_cookie);
753 /* Sanitize parts of cookie. */
758 cookie->domain = xstrdup (host);
763 if (!check_domain_match (cookie->domain, host))
765 logprintf (LOG_NOTQUIET,
766 "Cookie coming from %s attempted to set domain to %s\n",
767 host, cookie->domain);
772 cookie->path = xstrdup (path);
775 if (!check_path_match (cookie->path, path))
777 DEBUGP (("Attempt to fake the path: %s, %s\n",
778 cookie->path, path));
783 if (cookie->discard_requested)
785 discard_matching_cookie (jar, cookie);
786 delete_cookie (cookie);
789 store_cookie (jar, cookie);
794 delete_cookie (cookie);
797 /* Support for sending out cookies in HTTP requests, based on
798 previously stored cookies. Entry point is
799 `build_cookies_request'. */
801 /* Store CHAIN to STORE if there is room in STORE. If not, inrecement
802 COUNT anyway, so that when the function is done, we end up with the
803 exact count of how much place we actually need. */
805 #define STORE_CHAIN(st_chain, st_store, st_size, st_count) do { \
806 if (st_count < st_size) \
807 store[st_count] = st_chain; \
811 /* Store cookie chains that match HOST. Since more than one chain can
812 match, the matches are written to STORE. No more than SIZE matches
813 are written; if more matches are present, return the number of
814 chains that would have been written. */
817 find_matching_chains (struct cookie_jar *jar, const char *host,
818 struct cookie *store[], int size)
820 struct cookie *chain;
825 if (!hash_table_count (jar->chains_by_domain))
828 STRDUP_ALLOCA (hash_key, host);
830 /* Look for an exact match. */
831 chain = hash_table_get (jar->chains_by_domain, hash_key);
833 STORE_CHAIN (chain, store, size, count);
835 dot_count = count_char (host, '.');
837 /* Match less and less specific domains. For instance, given
838 fly.srk.fer.hr, we match .srk.fer.hr, then .fer.hr. */
839 while (dot_count-- > 1)
841 /* Note: we operate directly on hash_key (in form host:port)
842 because we don't want to allocate new hash keys in a
844 char *p = strchr (hash_key, '.');
846 chain = hash_table_get (jar->chains_by_domain, p);
848 STORE_CHAIN (chain, store, size, count);
854 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
858 path_matches (const char *full_path, const char *prefix)
863 /* Wget's HTTP paths do not begin with '/' (the URL code treats it
864 as a separator), but the '/' is assumed when matching against
869 len = strlen (prefix);
871 if (0 != strncmp (full_path, prefix, len))
872 /* FULL_PATH doesn't begin with PREFIX. */
875 /* Length of PREFIX determines the quality of the match. */
879 /* Return non-zero iff COOKIE matches the given PATH, PORT, and
880 security flag. HOST is not a flag because it is assumed that the
881 cookie comes from the correct chain.
883 If PATH_GOODNESS is non-NULL, store the "path goodness" there. The
884 said goodness is a measure of how well COOKIE matches PATH. It is
885 used for ordering cookies. */
888 matching_cookie (const struct cookie *cookie, const char *path, int port,
889 int connection_secure_p, int *path_goodness)
893 if (COOKIE_EXPIRED_P (cookie))
894 /* Ignore stale cookies. Don't bother unchaining the cookie at
895 this point -- Wget is a relatively short-lived application, and
896 stale cookies will not be saved by `save_cookies'. On the
897 other hand, this function should be as efficient as
901 if (cookie->secure && !connection_secure_p)
902 /* Don't transmit secure cookies over an insecure connection. */
904 if (cookie->port != PORT_ANY && cookie->port != port)
906 pg = path_matches (path, cookie->path);
911 /* If the caller requested path_goodness, we return it. This is
912 an optimization, so that the caller doesn't need to call
913 path_matches() again. */
918 struct weighed_cookie {
919 struct cookie *cookie;
924 /* Comparator used for uniquifying the list. */
927 equality_comparator (const void *p1, const void *p2)
929 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
930 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
932 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
933 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
935 /* We only really care whether both name and value are equal. We
936 return them in this order only for consistency... */
937 return namecmp ? namecmp : valuecmp;
940 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
941 cookies whose name and value are the same. Whenever a duplicate
942 pair is found, one of the cookies is removed. */
945 eliminate_dups (struct weighed_cookie *outgoing, int count)
949 /* We deploy a simple uniquify algorithm: first sort the array
950 according to our sort criterion, then uniquify it by comparing
951 each cookie with its neighbor. */
953 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
955 for (i = 0; i < count - 1; i++)
957 struct cookie *c1 = outgoing[i].cookie;
958 struct cookie *c2 = outgoing[i + 1].cookie;
959 if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value))
961 /* c1 and c2 are the same; get rid of c2. */
963 /* move all ptrs from positions [i + 1, count) to i. */
964 memmove (outgoing + i, outgoing + i + 1,
965 (count - (i + 1)) * sizeof (struct weighed_cookie));
966 /* We decrement i to counter the ++i above. Remember that
967 we've just removed the element in front of us; we need to
968 remain in place to check whether outgoing[i] matches what
969 used to be outgoing[i + 2]. */
977 /* Comparator used for sorting by quality. */
980 goodness_comparator (const void *p1, const void *p2)
982 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
983 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
985 /* Subtractions take `wc2' as the first argument becauase we want a
986 sort in *decreasing* order of goodness. */
987 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
988 int pgdiff = wc2->path_goodness - wc1->path_goodness;
990 /* Sort by domain goodness; if these are the same, sort by path
991 goodness. (The sorting order isn't really specified; maybe it
992 should be the other way around.) */
993 return dgdiff ? dgdiff : pgdiff;
996 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
997 requests PATH from the server. The resulting string is allocated
998 with `malloc', and the caller is responsible for freeing it. If no
999 cookies pertain to this request, i.e. no cookie header should be
1000 generated, NULL is returned. */
1003 cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
1004 int port, const char *path,
1005 int connection_secure_p)
1007 struct cookie *chain_default_store[20];
1008 struct cookie **all_chains = chain_default_store;
1009 int chain_store_size = ARRAY_SIZE (chain_default_store);
1012 struct cookie *cookie;
1013 struct weighed_cookie *outgoing;
1016 int result_size, pos;
1019 chain_count = find_matching_chains (jar, host, all_chains, chain_store_size);
1020 if (chain_count > chain_store_size)
1022 /* It's extremely unlikely that more than 20 chains will ever
1023 match. But since find_matching_chains reports the exact size
1024 it needs, it's easy to not have the limitation, so we
1026 all_chains = alloca (chain_count * sizeof (struct cookie *));
1027 chain_store_size = chain_count;
1034 cookies_now = time (NULL);
1036 /* Count the number of cookies whose path matches. */
1038 for (i = 0; i < chain_count; i++)
1039 for (cookie = all_chains[i]; cookie; cookie = cookie->next)
1040 if (matching_cookie (cookie, path, port, connection_secure_p, NULL))
1043 /* No matching cookies. */
1046 /* Allocate the array. */
1047 outgoing = alloca (count * sizeof (struct weighed_cookie));
1049 /* Fill the array with all the matching cookies from all the
1052 for (i = 0; i < chain_count; i++)
1053 for (cookie = all_chains[i]; cookie; cookie = cookie->next)
1056 if (!matching_cookie (cookie, path, port, connection_secure_p, &pg))
1058 outgoing[ocnt].cookie = cookie;
1059 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1060 outgoing[ocnt].path_goodness = pg;
1063 assert (ocnt == count);
1065 /* Eliminate duplicate cookies; that is, those whose name and value
1067 count = eliminate_dups (outgoing, count);
1069 /* Sort the array so that best-matching domains come first, and
1070 that, within one domain, best-matching paths come first. */
1071 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1073 /* Count the space the name=value pairs will take. */
1075 for (i = 0; i < count; i++)
1077 struct cookie *c = outgoing[i].cookie;
1079 result_size += strlen (c->attr) + 1 + strlen (c->value);
1082 /* Allocate output buffer:
1084 name=value pairs -- result_size
1085 "; " separators -- (count - 1) * 2
1086 \r\n line ending -- 2
1087 \0 terminator -- 1 */
1088 result_size = 8 + result_size + (count - 1) * 2 + 2 + 1;
1089 result = xmalloc (result_size);
1091 strcpy (result, "Cookie: ");
1093 for (i = 0; i < count; i++)
1095 struct cookie *c = outgoing[i].cookie;
1096 int namlen = strlen (c->attr);
1097 int vallen = strlen (c->value);
1099 memcpy (result + pos, c->attr, namlen);
1101 result[pos++] = '=';
1102 memcpy (result + pos, c->value, vallen);
1106 result[pos++] = ';';
1107 result[pos++] = ' ';
1110 result[pos++] = '\r';
1111 result[pos++] = '\n';
1112 result[pos++] = '\0';
1113 assert (pos == result_size);
1117 /* Support for loading and saving cookies. The format used for
1118 loading and saving roughly matches the format of `cookies.txt' file
1119 used by Netscape and Mozilla, at least the Unix versions. The
1120 format goes like this:
1122 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1124 DOMAIN -- cookie domain, optionally followed by :PORT
1125 DOMAIN-FLAG -- whether all hosts in the domain match
1127 SECURE-FLAG -- whether cookie requires secure connection
1128 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1129 ATTR-NAME -- name of the cookie attribute
1130 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1132 The fields are separated by TABs (but Wget's loader recognizes any
1133 whitespace). All fields are mandatory, except for ATTR-VALUE. The
1134 `-FLAG' fields are boolean, their legal values being "TRUE" and
1135 "FALSE'. Empty lines, lines consisting of whitespace only, and
1136 comment lines (beginning with # optionally preceded by whitespace)
1139 Example line from cookies.txt (split in two lines for readability):
1141 .google.com TRUE / FALSE 2147368447 \
1142 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1144 DOMAIN-FLAG is currently not honored by Wget. The cookies whose
1145 domain begins with `.' are treated as if DOMAIN-FLAG were true,
1146 while all other cookies are treated as if it were FALSE. */
1149 /* If the region [B, E) ends with :<digits>, parse the number, return
1150 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1151 If port is not specified, return 0. */
1154 domain_port (const char *domain_b, const char *domain_e,
1155 const char **domain_e_ptr)
1159 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1162 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1163 port = 10 * port + (*p - '0');
1165 /* Garbage following port number. */
1167 *domain_e_ptr = colon;
1171 #define SKIP_WS(p) do { \
1172 while (*p && ISSPACE (*p)) \
1176 #define SET_WORD_BOUNDARIES(p, b, e) do { \
1180 while (*p && !ISSPACE (*p)) \
1187 /* Load cookies from FILE. */
1190 cookie_jar_load (struct cookie_jar *jar, const char *file)
1193 FILE *fp = fopen (file, "r");
1196 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1197 file, strerror (errno));
1200 cookies_now = time (NULL);
1202 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1204 struct cookie *cookie;
1210 char *domain_b = NULL, *domain_e = NULL;
1211 char *ignore_b = NULL, *ignore_e = NULL;
1212 char *path_b = NULL, *path_e = NULL;
1213 char *secure_b = NULL, *secure_e = NULL;
1214 char *expires_b = NULL, *expires_e = NULL;
1215 char *name_b = NULL, *name_e = NULL;
1216 char *value_b = NULL, *value_e = NULL;
1220 if (!*p || *p == '#')
1224 SET_WORD_BOUNDARIES (p, domain_b, domain_e);
1225 SET_WORD_BOUNDARIES (p, ignore_b, ignore_e);
1226 SET_WORD_BOUNDARIES (p, path_b, path_e);
1227 SET_WORD_BOUNDARIES (p, secure_b, secure_e);
1228 SET_WORD_BOUNDARIES (p, expires_b, expires_e);
1229 SET_WORD_BOUNDARIES (p, name_b, name_e);
1231 /* Don't use SET_WORD_BOUNDARIES for value because it may
1232 contain whitespace. Instead, set value_e to the end of line,
1233 modulo trailing space (this will skip the line separator.) */
1236 value_e = p + strlen (p);
1237 while (value_e > value_b && ISSPACE (*(value_e - 1)))
1239 if (value_b == value_e)
1240 /* Hmm, should we check for empty value? I guess that's
1241 legal, so I leave it. */
1244 cookie = cookie_new ();
1246 cookie->attr = strdupdelim (name_b, name_e);
1247 cookie->value = strdupdelim (value_b, value_e);
1248 cookie->path = strdupdelim (path_b, path_e);
1250 if (BOUNDED_EQUAL (secure_b, secure_e, "TRUE"))
1253 /* DOMAIN needs special treatment because we might need to
1254 extract the port. */
1255 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1257 cookie->port = port;
1258 cookie->domain = strdupdelim (domain_b, domain_e);
1260 /* safe default in case EXPIRES field is garbled. */
1261 expiry = (double)cookies_now - 1;
1263 /* I don't like changing the line, but it's completely safe.
1264 (line is malloced.) */
1266 sscanf (expires_b, "%lf", &expiry);
1267 if (expiry < cookies_now)
1268 /* ignore stale cookie. */
1270 cookie->expiry_time = expiry;
1272 /* If the cookie has survived being saved into an external file,
1273 it is obviously permanent. */
1274 cookie->permanent = 1;
1276 store_cookie (jar, cookie);
1282 delete_cookie (cookie);
1287 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1288 to the head in a chain of cookies. The function prints the entire
1292 save_cookies_mapper (void *key, void *value, void *arg)
1294 FILE *fp = (FILE *)arg;
1295 char *domain = (char *)key;
1296 struct cookie *chain = (struct cookie *)value;
1297 for (; chain; chain = chain->next)
1299 if (!chain->permanent)
1301 if (COOKIE_EXPIRED_P (chain))
1304 if (chain->port != PORT_ANY)
1305 fprintf (fp, ":%d", chain->port);
1306 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1307 *domain == '.' ? "TRUE" : "FALSE",
1308 chain->path, chain->secure ? "TRUE" : "FALSE",
1309 (double)chain->expiry_time,
1310 chain->attr, chain->value);
1312 return 1; /* stop mapping */
1317 /* Save cookies, in format described above, to FILE. */
1320 cookie_jar_save (struct cookie_jar *jar, const char *file)
1324 DEBUGP (("Saving cookies to %s.\n", file));
1326 cookies_now = time (NULL);
1328 fp = fopen (file, "w");
1331 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1332 file, strerror (errno));
1336 fputs ("# HTTP cookie file.\n", fp);
1337 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (NULL));
1338 fputs ("# Edit at your own risk.\n\n", fp);
1340 hash_table_map (jar->chains_by_domain, save_cookies_mapper, fp);
1343 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1344 file, strerror (errno));
1346 if (fclose (fp) < 0)
1347 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1348 file, strerror (errno));
1350 DEBUGP (("Done saving cookies.\n"));
1353 /* Destroy all the elements in the chain and unhook it from the cookie
1354 jar. This is written in the form of a callback to hash_table_map
1355 and used by cookie_jar_delete to delete all the cookies in a
1359 nuke_cookie_chain (void *value, void *key, void *arg)
1361 char *chain_key = (char *)value;
1362 struct cookie *chain = (struct cookie *)key;
1363 struct cookie_jar *jar = (struct cookie_jar *)arg;
1365 /* Remove the chain from the table and free the key. */
1366 hash_table_remove (jar->chains_by_domain, chain_key);
1369 /* Then delete all the cookies in the chain. */
1372 struct cookie *next = chain->next;
1373 delete_cookie (chain);
1381 /* Clean up cookie-related data. */
1384 cookie_jar_delete (struct cookie_jar *jar)
1386 hash_table_map (jar->chains_by_domain, nuke_cookie_chain, jar);
1387 hash_table_destroy (jar->chains_by_domain);