1 /* Support for cookies.
2 Copyright (C) 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20 /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
21 code submitted by Tomasz Wegrzanowski. */
41 /* Hash table that maps domain names to cookie chains. */
43 static struct hash_table *cookies_hash_table;
45 /* This should be set by entry points in this file, so the low-level
46 functions don't need to call time() all the time. */
48 static time_t cookies_now;
50 /* This should *really* be in a .h file! */
51 time_t http_atotm PARAMS ((char *));
54 /* Definition of `struct cookie' and the most basic functions. */
57 char *domain; /* domain of the cookie */
58 int port; /* port number */
59 char *path; /* path prefix of the cookie */
60 int secure; /* whether cookie should be
61 transmitted over non-https
63 int permanent; /* whether the cookie should outlive
65 unsigned long expiry_time; /* time when the cookie expires */
66 int discard_requested; /* whether cookie was created to
67 request discarding another
70 char *attr; /* cookie attribute name */
71 char *value; /* cookie attribute value */
73 struct cookie *next; /* used for chaining of cookies in the
77 /* Allocate and return a new, empty cookie structure. */
79 static struct cookie *
82 struct cookie *cookie = xmalloc (sizeof (struct cookie));
83 memset (cookie, '\0', sizeof (struct cookie));
85 /* If we don't know better, assume cookie is non-permanent and valid
86 for the entire session. */
87 cookie->expiry_time = ~0UL;
89 /* Assume default port. */
95 /* Deallocate COOKIE and its components. */
98 delete_cookie (struct cookie *cookie)
100 FREE_MAYBE (cookie->domain);
101 FREE_MAYBE (cookie->path);
102 FREE_MAYBE (cookie->attr);
103 FREE_MAYBE (cookie->value);
107 /* Functions for storing cookies.
109 All cookies can be referenced through cookies_hash_table. The key
110 in that table is the domain name, and the value is a linked list of
111 all cookies from that domain. Every new cookie is placed on the
114 /* Write "HOST:PORT" to a stack-allocated area and make RESULT point
115 to that area. RESULT should be a character pointer. Useful for
116 creating HOST:PORT strings, which are the keys in the hash
119 #define SET_HOSTPORT(host, port, result) do { \
120 int HP_len = strlen (host); \
121 result = alloca (HP_len + 1 + numdigit (port) + 1); \
122 memcpy (result, host, HP_len); \
123 result[HP_len] = ':'; \
124 long_to_string (result + HP_len + 1, port); \
127 /* Find cookie chain that corresponds to DOMAIN (exact) and PORT. */
129 static struct cookie *
130 find_cookie_chain_exact (const char *domain, int port)
133 if (!cookies_hash_table)
135 SET_HOSTPORT (domain, port, key);
136 return hash_table_get (cookies_hash_table, key);
139 /* Find and return the cookie whose domain, path, and attribute name
140 correspond to COOKIE. If found, PREVPTR will point to the location
141 of the cookie previous in chain, or NULL if the found cookie is the
144 If no matching cookie is found, return NULL. */
146 static struct cookie *
147 find_matching_cookie (struct cookie *cookie, struct cookie **prevptr)
149 struct cookie *chain, *prev;
151 if (!cookies_hash_table)
154 chain = find_cookie_chain_exact (cookie->domain, cookie->port);
159 for (; chain; prev = chain, chain = chain->next)
160 if (!strcmp (cookie->path, chain->path)
161 && !strcmp (cookie->attr, chain->attr))
172 /* Store COOKIE to memory.
174 This is done by placing COOKIE at the head of its chain. However,
175 if COOKIE matches a cookie already in memory, as determined by
176 find_matching_cookie, the old cookie is unlinked and destroyed.
178 The key of each chain's hash table entry is allocated only the
179 first time; next hash_table_put's reuse the same key. */
182 store_cookie (struct cookie *cookie)
184 struct cookie *chain_head;
188 if (!cookies_hash_table)
189 /* If the hash table is not initialized, do so now, because we'll
190 need to store things. */
191 cookies_hash_table = make_nocase_string_hash_table (0);
193 /* Initialize hash table key. */
194 SET_HOSTPORT (cookie->domain, cookie->port, hostport);
196 if (hash_table_get_pair (cookies_hash_table, hostport,
197 &chain_key, &chain_head))
199 /* There already exists a chain of cookies with this exact
200 domain. We need to check for duplicates -- if an existing
201 cookie exactly matches our domain, path and name, we replace
204 struct cookie *victim = find_matching_cookie (cookie, &prev);
208 /* Remove VICTIM from the chain. COOKIE will be placed at
212 prev->next = victim->next;
213 cookie->next = chain_head;
217 /* prev is NULL; apparently VICTIM was at the head of
218 the chain. This place will be taken by COOKIE, so
219 all we need to do is: */
220 cookie->next = victim->next;
222 delete_cookie (victim);
223 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
226 cookie->next = chain_head;
230 /* We are now creating the chain. Allocate the string that will
231 be used as a key. It is unsafe to use cookie->domain for
232 that, because it might get deallocated by the above code at
235 chain_key = xstrdup (hostport);
238 hash_table_put (cookies_hash_table, chain_key, cookie);
240 DEBUGP (("\nStored cookie %s %d %s %s %d %s %s %s\n",
241 cookie->domain, cookie->port, cookie->path,
242 cookie->permanent ? "permanent" : "nonpermanent",
244 asctime (localtime ((time_t *)&cookie->expiry_time)),
245 cookie->attr, cookie->value));
248 /* Discard a cookie matching COOKIE's domain, path, and attribute
249 name. This gets called when we encounter a cookie whose expiry
250 date is in the past, or whose max-age is set to 0. The former
251 corresponds to netscape cookie spec, while the latter is specified
255 discard_matching_cookie (struct cookie *cookie)
257 struct cookie *prev, *victim;
259 if (!cookies_hash_table
260 || !hash_table_count (cookies_hash_table))
261 /* No elements == nothing to discard. */
264 victim = find_matching_cookie (cookie, &prev);
268 /* Simply unchain the victim. */
269 prev->next = victim->next;
272 /* VICTIM was head of its chain. We need to place a new
273 cookie at the head. */
276 char *chain_key = NULL;
279 SET_HOSTPORT (victim->domain, victim->port, hostport);
280 res = hash_table_get_pair (cookies_hash_table, hostport,
285 /* VICTIM was the only cookie in the chain. Destroy the
286 chain and deallocate the chain key. */
288 hash_table_remove (cookies_hash_table, hostport);
292 hash_table_put (cookies_hash_table, chain_key, victim->next);
294 delete_cookie (victim);
295 DEBUGP (("Discarded old cookie.\n"));
299 /* Functions for parsing the `Set-Cookie' header, and creating new
300 cookies from the wire. */
303 #define NAME_IS(string_literal) \
304 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
306 #define VALUE_EXISTS (value_b && value_e)
308 #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
310 /* Update the appropriate cookie field. [name_b, name_e) are expected
311 to delimit the attribute name, while [value_b, value_e) (optional)
312 should delimit the attribute value.
314 When called the first time, it will set the cookie's attribute name
315 and value. After that, it will check the attribute name for
316 special fields such as `domain', `path', etc. Where appropriate,
317 it will parse the values of the fields it recognizes and fill the
318 corresponding fields in COOKIE.
320 Returns 1 on success. Returns zero in case a syntax error is
321 found; such a cookie should be discarded. */
324 update_cookie_field (struct cookie *cookie,
325 const char *name_b, const char *name_e,
326 const char *value_b, const char *value_e)
328 assert (name_b != NULL && name_e != NULL);
334 cookie->attr = strdupdelim (name_b, name_e);
335 cookie->value = strdupdelim (value_b, value_e);
339 if (NAME_IS ("domain"))
341 if (!VALUE_NON_EMPTY)
343 FREE_MAYBE (cookie->domain);
344 cookie->domain = strdupdelim (value_b, value_e);
347 else if (NAME_IS ("path"))
349 if (!VALUE_NON_EMPTY)
351 FREE_MAYBE (cookie->path);
352 cookie->path = strdupdelim (value_b, value_e);
355 else if (NAME_IS ("expires"))
360 if (!VALUE_NON_EMPTY)
362 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
364 expires = http_atotm (value_copy);
367 cookie->permanent = 1;
368 cookie->expiry_time = (unsigned long)expires;
371 /* Error in expiration spec. Assume default (cookie valid for
372 this session.) #### Should we return 0 and invalidate the
376 /* According to netscape's specification, expiry time in the
377 past means that discarding of a matching cookie is
379 if (cookie->expiry_time < cookies_now)
380 cookie->discard_requested = 1;
384 else if (NAME_IS ("max-age"))
389 if (!VALUE_NON_EMPTY)
391 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
393 sscanf (value_copy, "%lf", &maxage);
395 /* something is wrong. */
397 cookie->permanent = 1;
398 cookie->expiry_time = (unsigned long)cookies_now + (unsigned long)maxage;
400 /* According to rfc2109, a cookie with max-age of 0 means that
401 discarding of a matching cookie is requested. */
403 cookie->discard_requested = 1;
407 else if (NAME_IS ("secure"))
409 /* ignore value completely */
414 /* Unrecognized attribute; ignore it. */
420 /* Returns non-zero for characters that are legal in the name of an
421 attribute. This used to allow only alphanumerics, '-', and '_',
422 but we need to be more lenient because a number of sites wants to
423 use weirder attribute names. rfc2965 "informally specifies"
424 attribute name (token) as "a sequence of non-special, non-white
425 space characters". So we allow everything except the stuff we know
428 #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
429 && (c) != '"' && (c) != '=' \
430 && (c) != ';' && (c) != ',')
432 /* Fetch the next character without doing anything special if CH gets
433 set to 0. (The code executed next is expected to handle it.) */
435 #define FETCH1(ch, ptr) do { \
439 /* Like FETCH1, but jumps to `eof' label if CH gets set to 0. */
441 #define FETCH(ch, ptr) do { \
447 /* Parse the contents of the `Set-Cookie' header. The header looks
450 name1=value1; name2=value2; ...
452 Trailing semicolon is optional; spaces are allowed between all
453 tokens. Additionally, values may be quoted.
455 A new cookie is returned upon success, NULL otherwise. The
456 function `update_cookie_field' is used to update the fields of the
457 newly created cookie structure. */
459 static struct cookie *
460 parse_set_cookies (const char *sc)
462 struct cookie *cookie = cookie_new ();
464 enum { S_NAME_PRE, S_NAME, S_NAME_POST,
465 S_VALUE_PRE, S_VALUE, S_VALUE_TRAILSPACE_MAYBE,
466 S_QUOTED_VALUE, S_QUOTED_VALUE_POST,
468 S_DONE, S_ERROR } state = S_NAME_PRE;
473 const char *name_b = NULL, *name_e = NULL;
474 const char *value_b = NULL, *value_e = NULL;
478 while (state != S_DONE && state != S_ERROR)
485 else if (ATTR_NAME_CHAR (c))
492 /* empty attr name not allowed */
496 if (ATTR_NAME_CHAR (c))
498 else if (!c || c == ';' || c == '=' || ISSPACE (c))
509 else if (!c || c == ';')
511 value_b = value_e = NULL;
512 state = S_ATTR_ACTION;
529 state = S_QUOTED_VALUE;
531 else if (c == ';' || c == '\0')
533 value_b = value_e = p - 1;
534 state = S_ATTR_ACTION;
544 if (c == ';' || c == '\0')
548 state = S_ATTR_ACTION;
550 else if (ISSPACE (c))
554 state = S_VALUE_TRAILSPACE_MAYBE;
558 value_e = NULL; /* no trailing space */
562 case S_VALUE_TRAILSPACE_MAYBE:
573 state = S_QUOTED_VALUE_POST;
578 case S_QUOTED_VALUE_POST:
580 state = S_ATTR_ACTION;
581 else if (ISSPACE (c))
588 int legal = update_cookie_field (cookie, name_b, name_e,
593 BOUNDED_TO_ALLOCA (name_b, name_e, name);
594 logprintf (LOG_NOTQUIET,
595 _("Error in Set-Cookie, field `%s'"), name);
610 /* handled by loop condition */
617 delete_cookie (cookie);
618 if (state == S_ERROR)
619 logprintf (LOG_NOTQUIET, _("Syntax error in Set-Cookie at character `%c'.\n"), c);
625 delete_cookie (cookie);
626 logprintf (LOG_NOTQUIET,
627 _("Syntax error in Set-Cookie: premature end of string.\n"));
631 /* Sanity checks. These are important, otherwise it is possible for
632 mailcious attackers to destroy important cookie information and/or
633 violate your privacy. */
636 #define REQUIRE_DIGITS(p) do { \
639 for (++p; ISDIGIT (*p); p++) \
643 #define REQUIRE_DOT(p) do { \
648 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
650 We don't want to call network functions like inet_addr() because all
651 we need is a check, preferrably one that is small, fast, and
655 numeric_address_p (const char *addr)
657 const char *p = addr;
659 REQUIRE_DIGITS (p); /* A */
660 REQUIRE_DOT (p); /* . */
661 REQUIRE_DIGITS (p); /* B */
662 REQUIRE_DOT (p); /* . */
663 REQUIRE_DIGITS (p); /* C */
664 REQUIRE_DOT (p); /* . */
665 REQUIRE_DIGITS (p); /* D */
672 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
673 This check is compliant with rfc2109. */
676 check_domain_match (const char *cookie_domain, const char *host)
683 /* Numeric address requires exact match. It also requires HOST to
684 be an IP address. I suppose we *could* resolve HOST with
685 store_hostaddress (it would hit the hash table), but rfc2109
686 doesn't require it, and it doesn't seem very useful, so we
688 if (numeric_address_p (cookie_domain))
689 return !strcmp (cookie_domain, host);
693 /* The domain must contain at least one embedded dot. */
695 const char *rest = cookie_domain;
696 int len = strlen (rest);
698 ++rest, --len; /* ignore first dot */
701 if (rest[len - 1] == '.')
702 --len; /* ignore last dot */
704 if (!memchr (rest, '.', len))
711 /* For the sake of efficiency, check for exact match first. */
712 if (!strcasecmp (cookie_domain, host))
717 /* In rfc2109 terminology, HOST needs domain-match COOKIE_DOMAIN.
718 This means that COOKIE_DOMAIN needs to start with `.' and be an
719 FQDN, and that HOST must end with COOKIE_DOMAIN. */
720 if (*cookie_domain != '.')
725 /* Two proceed, we need to examine two parts of HOST: its head and
726 its tail. Head and tail are defined in terms of the length of
727 the domain, like this:
729 HHHHTTTTTTTTTTTTTTT <- host
730 DDDDDDDDDDDDDDD <- domain
732 That is, "head" is the part of the host before (dlen - hlen), and
733 "tail" is what follows.
735 For the domain to match, two conditions need to be true:
737 1. Tail must equal DOMAIN.
738 2. Head must not contain an embedded dot. */
740 headlen = strlen (host) - strlen (cookie_domain);
743 /* DOMAIN must be a proper subset of HOST. */
745 tail = host + headlen;
750 if (strcasecmp (tail, cookie_domain))
755 /* Test (2) is not part of the "domain-match" itself, but is
756 recommended by rfc2109 for reasons of privacy. */
759 if (memchr (host, '.', headlen))
767 static int path_matches PARAMS ((const char *, const char *));
769 /* Check whether PATH begins with COOKIE_PATH. */
772 check_path_match (const char *cookie_path, const char *path)
774 return path_matches (path, cookie_path);
777 /* Parse the `Set-Cookie' header and, if the cookie is legal, store it
781 set_cookie_header_cb (const char *hdr, void *closure)
783 struct url *u = (struct url *)closure;
784 struct cookie *cookie;
786 cookies_now = time (NULL);
788 cookie = parse_set_cookies (hdr);
792 /* Sanitize parts of cookie. */
795 cookie->domain = xstrdup (u->host);
798 if (!check_domain_match (cookie->domain, u->host))
800 DEBUGP (("Attempt to fake the domain: %s, %s\n",
801 cookie->domain, u->host));
806 cookie->path = xstrdup (u->path);
809 if (!check_path_match (cookie->path, u->path))
811 DEBUGP (("Attempt to fake the path: %s, %s\n",
812 cookie->path, u->path));
817 cookie->port = u->port;
819 if (cookie->discard_requested)
821 discard_matching_cookie (cookie);
822 delete_cookie (cookie);
826 store_cookie (cookie);
831 delete_cookie (cookie);
835 /* Support for sending out cookies in HTTP requests, based on
836 previously stored cookies. Entry point is
837 `build_cookies_request'. */
840 /* Count how many times CHR occurs in STRING. */
843 count_char (const char *string, char chr)
847 for (p = string; *p; p++)
853 /* Store CHAIN to STORE if there is room in STORE. If not, inrecement
854 COUNT anyway, so that when the function is done, we end up with the
855 exact count of how much place we actually need. */
857 #define STORE_CHAIN(st_chain, st_store, st_size, st_count) do { \
858 if (st_count < st_size) \
859 store[st_count] = st_chain; \
863 /* Store cookie chains that match HOST, PORT. Since more than one
864 chain can match, the matches are written to STORE. No more than
865 SIZE matches are written; if more matches are present, return the
866 number of chains that would have been written. */
869 find_matching_chains (const char *host, int port,
870 struct cookie *store[], int size)
872 struct cookie *chain;
877 if (!cookies_hash_table)
880 SET_HOSTPORT (host, port, hash_key);
883 chain = hash_table_get (cookies_hash_table, hash_key);
885 STORE_CHAIN (chain, store, size, count);
887 dot_count = count_char (host, '.');
889 /* Match less and less specific domains. For instance, given
890 fly.srk.fer.hr, we match .srk.fer.hr, then .fer.hr. */
891 while (dot_count-- > 1)
893 /* Note: we operate directly on hash_key (in form host:port)
894 because we don't want to allocate new hash keys in a
896 char *p = strchr (hash_key, '.');
898 chain = hash_table_get (cookies_hash_table, p);
900 STORE_CHAIN (chain, store, size, count);
906 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
910 path_matches (const char *full_path, const char *prefix)
912 int len = strlen (prefix);
913 if (strncmp (full_path, prefix, len))
914 /* FULL_PATH doesn't begin with PREFIX. */
917 /* Length of PREFIX determines the quality of the match. */
922 matching_cookie (const struct cookie *cookie, const char *path,
923 int connection_secure_p, int *path_goodness)
927 if (cookie->expiry_time < cookies_now)
928 /* Ignore stale cookies. There is no need to unchain the cookie
929 at this point -- Wget is a relatively short-lived application,
930 and stale cookies will not be saved by `save_cookies'. */
932 if (cookie->secure && !connection_secure_p)
933 /* Don't transmit secure cookies over an insecure connection. */
935 pg = path_matches (path, cookie->path);
940 /* If the caller requested path_goodness, we return it. This is
941 an optimization, so that the caller doesn't need to call
942 path_matches() again. */
947 struct weighed_cookie {
948 struct cookie *cookie;
953 /* Comparator used for uniquifying the list. */
956 equality_comparator (const void *p1, const void *p2)
958 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
959 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
961 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
962 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
964 /* We only really care whether both name and value are equal. We
965 return them in this order only for consistency... */
966 return namecmp ? namecmp : valuecmp;
969 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
970 cookies whose name and value are the same. Whenever a duplicate
971 pair is found, one of the cookies is removed. */
974 eliminate_dups (struct weighed_cookie *outgoing, int count)
978 /* We deploy a simple uniquify algorithm: first sort the array
979 according to our sort criterion, then uniquify it by comparing
980 each cookie with its neighbor. */
982 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
984 for (i = 0; i < count - 1; i++)
986 struct cookie *c1 = outgoing[i].cookie;
987 struct cookie *c2 = outgoing[i + 1].cookie;
988 if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value))
990 /* c1 and c2 are the same; get rid of c2. */
992 /* move all ptrs from positions [i + 1, count) to i. */
993 memmove (outgoing + i, outgoing + i + 1,
994 (count - (i + 1)) * sizeof (struct weighed_cookie));
995 /* We decrement i to counter the ++i above. Remember that
996 we've just removed the element in front of us; we need to
997 remain in place to check whether outgoing[i] matches what
998 used to be outgoing[i + 2]. */
1006 /* Comparator used for sorting by quality. */
1009 goodness_comparator (const void *p1, const void *p2)
1011 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1012 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1014 /* Subtractions take `wc2' as the first argument becauase we want a
1015 sort in *decreasing* order of goodness. */
1016 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1017 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1019 /* Sort by domain goodness; if these are the same, sort by path
1020 goodness. (The sorting order isn't really specified; maybe it
1021 should be the other way around.) */
1022 return dgdiff ? dgdiff : pgdiff;
1025 /* Build a `Cookie' header for a request that goes to HOST:PORT and
1026 requests PATH from the server. The resulting string is allocated
1027 with `malloc', and the caller is responsible for freeing it. If no
1028 cookies pertain to this request, i.e. no cookie header should be
1029 generated, NULL is returned. */
1032 build_cookies_request (const char *host, int port, const char *path,
1033 int connection_secure_p)
1035 struct cookie *chain_default_store[20];
1036 struct cookie **all_chains = chain_default_store;
1037 int chain_store_size = ARRAY_SIZE (chain_default_store);
1040 struct cookie *cookie;
1041 struct weighed_cookie *outgoing;
1044 int result_size, pos;
1047 chain_count = find_matching_chains (host, port, all_chains, chain_store_size);
1048 if (chain_count > chain_store_size)
1050 /* It's extremely unlikely that more than 20 chains will ever
1051 match. But since find_matching_chains reports the exact size
1052 it needs, it's easy to not have the limitation, so we
1054 all_chains = alloca (chain_count * sizeof (struct cookie *));
1055 chain_store_size = chain_count;
1062 cookies_now = time (NULL);
1064 /* Count the number of cookies whose path matches. */
1066 for (i = 0; i < chain_count; i++)
1067 for (cookie = all_chains[i]; cookie; cookie = cookie->next)
1068 if (matching_cookie (cookie, path, connection_secure_p, NULL))
1071 /* No matching cookies. */
1074 /* Allocate the array. */
1075 outgoing = alloca (count * sizeof (struct weighed_cookie));
1077 /* Fill the array with all the matching cookies from all the
1080 for (i = 0; i < chain_count; i++)
1081 for (cookie = all_chains[i]; cookie; cookie = cookie->next)
1084 if (!matching_cookie (cookie, path, connection_secure_p, &pg))
1086 outgoing[ocnt].cookie = cookie;
1087 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1088 outgoing[ocnt].path_goodness = pg;
1091 assert (ocnt == count);
1093 /* Eliminate duplicate cookies; that is, those whose name and value
1095 count = eliminate_dups (outgoing, count);
1097 /* Sort the array so that best-matching domains come first, and
1098 that, within one domain, best-matching paths come first. */
1099 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1101 /* Count the space the name=value pairs will take. */
1103 for (i = 0; i < count; i++)
1105 struct cookie *c = outgoing[i].cookie;
1107 result_size += strlen (c->attr) + 1 + strlen (c->value);
1110 /* Allocate output buffer:
1112 name=value pairs -- result_size
1113 "; " separators -- (count - 1) * 2
1114 \r\n line ending -- 2
1115 \0 terminator -- 1 */
1116 result_size = 8 + result_size + (count - 1) * 2 + 2 + 1;
1117 result = xmalloc (result_size);
1119 strcpy (result, "Cookie: ");
1121 for (i = 0; i < count; i++)
1123 struct cookie *c = outgoing[i].cookie;
1124 int namlen = strlen (c->attr);
1125 int vallen = strlen (c->value);
1127 memcpy (result + pos, c->attr, namlen);
1129 result[pos++] = '=';
1130 memcpy (result + pos, c->value, vallen);
1134 result[pos++] = ';';
1135 result[pos++] = ' ';
1138 result[pos++] = '\r';
1139 result[pos++] = '\n';
1140 result[pos++] = '\0';
1141 assert (pos == result_size);
1145 /* Support for loading and saving cookies. The format used for
1146 loading and saving roughly matches the format of `cookies.txt' file
1147 used by Netscape and Mozilla, at least the Unix versions. The
1148 format goes like this:
1150 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1152 DOMAIN -- cookie domain, optionally followed by :PORT
1153 DOMAIN-FLAG -- whether all hosts in the domain match
1155 SECURE-FLAG -- whether cookie requires secure connection
1156 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1157 ATTR-NAME -- name of the cookie attribute
1158 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1160 The fields are separated by TABs (but Wget's loader recognizes any
1161 whitespace). All fields are mandatory, except for ATTR-VALUE. The
1162 `-FLAG' fields are boolean, their legal values being "TRUE" and
1163 "FALSE'. Empty lines, lines consisting of whitespace only, and
1164 comment lines (beginning with # optionally preceded by whitespace)
1167 Example line from cookies.txt (split in two lines for readability):
1169 .google.com TRUE / FALSE 2147368447 \
1170 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1172 DOMAIN-FLAG is currently not honored by Wget. The cookies whose
1173 domain begins with `.' are treated as if DOMAIN-FLAG were true,
1174 while all other cookies are treated as if it were FALSE. */
1177 /* If the region [B, E) ends with :<digits>, parse the number, return
1178 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1179 If port is not specified, return 0. */
1182 domain_port (const char *domain_b, const char *domain_e,
1183 const char **domain_e_ptr)
1187 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1190 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1191 port = 10 * port + (*p - '0');
1193 /* Garbage following port number. */
1195 *domain_e_ptr = colon;
1199 #define SKIP_WS(p) do { \
1200 while (*p && ISSPACE (*p)) \
1204 #define SET_WORD_BOUNDARIES(p, b, e) do { \
1208 while (*p && !ISSPACE (*p)) \
1215 /* Load cookies from FILE. */
1218 load_cookies (const char *file)
1221 FILE *fp = fopen (file, "r");
1224 logprintf (LOG_NOTQUIET, "Cannot open cookies file `%s': %s\n",
1225 file, strerror (errno));
1228 cookies_now = time (NULL);
1230 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1232 struct cookie *cookie;
1237 char *domain_b = NULL, *domain_e = NULL;
1238 char *ignore_b = NULL, *ignore_e = NULL;
1239 char *path_b = NULL, *path_e = NULL;
1240 char *secure_b = NULL, *secure_e = NULL;
1241 char *expires_b = NULL, *expires_e = NULL;
1242 char *name_b = NULL, *name_e = NULL;
1243 char *value_b = NULL, *value_e = NULL;
1247 if (!*p || *p == '#')
1251 SET_WORD_BOUNDARIES (p, domain_b, domain_e);
1252 SET_WORD_BOUNDARIES (p, ignore_b, ignore_e);
1253 SET_WORD_BOUNDARIES (p, path_b, path_e);
1254 SET_WORD_BOUNDARIES (p, secure_b, secure_e);
1255 SET_WORD_BOUNDARIES (p, expires_b, expires_e);
1256 SET_WORD_BOUNDARIES (p, name_b, name_e);
1258 /* Don't use SET_WORD_BOUNDARIES for value because it may
1259 contain whitespace. Instead, set value_e to the end of line,
1260 modulo trailing space (this will skip the line separator.) */
1263 value_e = p + strlen (p);
1264 while (value_e > value_b && ISSPACE (*(value_e - 1)))
1266 if (value_b == value_e)
1267 /* Hmm, should we check for empty value? I guess that's
1268 legal, so I leave it. */
1271 cookie = cookie_new ();
1273 cookie->attr = strdupdelim (name_b, name_e);
1274 cookie->value = strdupdelim (value_b, value_e);
1275 cookie->path = strdupdelim (path_b, path_e);
1277 if (BOUNDED_EQUAL (secure_b, secure_e, "TRUE"))
1280 /* DOMAIN needs special treatment because we might need to
1281 extract the port. */
1282 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1284 cookie->port = port;
1286 cookie->port = cookie->secure ? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT;
1288 cookie->domain = strdupdelim (domain_b, domain_e);
1290 /* safe default in case EXPIRES field is garbled. */
1291 cookie->expiry_time = cookies_now - 1;
1293 /* I don't like changing the line, but it's completely safe.
1294 (line is malloced.) */
1296 sscanf (expires_b, "%lu", &cookie->expiry_time);
1297 if (cookie->expiry_time < cookies_now)
1298 /* ignore stale cookie. */
1300 cookie->permanent = 1;
1302 store_cookie (cookie);
1308 delete_cookie (cookie);
1313 /* Mapper for save_cookies callable by hash_table_map. VALUE points
1314 to the head in a chain of cookies. The function prints the entire
1318 save_cookies_mapper (void *key, void *value, void *arg)
1320 FILE *fp = (FILE *)arg;
1321 char *domain = (char *)key;
1322 struct cookie *chain = (struct cookie *)value;
1323 for (; chain; chain = chain->next)
1325 if (!chain->permanent)
1327 if (chain->expiry_time < cookies_now)
1329 fprintf (fp, "%s\t%s\t%s\t%s\t%lu\t%s\t%s\n",
1330 domain, *domain == '.' ? "TRUE" : "FALSE",
1331 chain->path, chain->secure ? "TRUE" : "FALSE",
1333 chain->attr, chain->value);
1335 return 1; /* stop mapping */
1340 /* Save cookies, in format described above, to FILE. */
1343 save_cookies (const char *file)
1347 if (!cookies_hash_table
1348 || !hash_table_count (cookies_hash_table))
1349 /* no cookies stored; nothing to do. */
1352 DEBUGP (("Saving cookies to %s.\n", file));
1354 cookies_now = time (NULL);
1356 fp = fopen (file, "w");
1359 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1360 file, strerror (errno));
1364 fputs ("# HTTP cookie file.\n", fp);
1365 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (NULL));
1366 fputs ("# Edit at your own risk.\n\n", fp);
1368 hash_table_map (cookies_hash_table, save_cookies_mapper, fp);
1371 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1372 file, strerror (errno));
1374 if (fclose (fp) < 0)
1375 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1376 file, strerror (errno));
1378 DEBUGP (("Done saving cookies.\n"));
1382 delete_cookie_chain_mapper (void *value, void *key, void *arg_ignored)
1384 char *chain_key = (char *)value;
1385 struct cookie *chain = (struct cookie *)key;
1387 /* Remove the chain from the table and free the key. */
1388 hash_table_remove (cookies_hash_table, chain_key);
1391 /* Then delete all the cookies in the chain. */
1394 struct cookie *next = chain->next;
1395 delete_cookie (chain);
1403 /* Clean up cookie-related data. */
1406 cookies_cleanup (void)
1408 if (!cookies_hash_table)
1410 hash_table_map (cookies_hash_table, delete_cookie_chain_mapper, NULL);
1411 hash_table_destroy (cookies_hash_table);
1412 cookies_hash_table = NULL;