From 6d2dc0a1b3511ed0336752251a94c974a006edee Mon Sep 17 00:00:00 2001 From: hniksic Date: Tue, 28 Feb 2006 12:50:37 -0800 Subject: [PATCH] [svn] Use extract_params to parse the Set-Cookies header. --- src/ChangeLog | 6 + src/cookies.c | 458 +++++++++++++++----------------------------------- src/http.c | 14 +- src/http.h | 7 + 4 files changed, 155 insertions(+), 330 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 54452b5b..d2cf9308 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,9 @@ +2006-02-28 Hrvoje Niksic + + * http.c (extract_param): Declare extern so it can be used from + other files. + (extract_param): Return error for empty name. + 2006-02-28 Hrvoje Niksic * url.c (find_last_char): Define in terms of memrchr. diff --git a/src/cookies.c b/src/cookies.c index b731b856..b12e7e83 100644 --- a/src/cookies.c +++ b/src/cookies.c @@ -319,139 +319,10 @@ discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie) /* Functions for parsing the `Set-Cookie' header, and creating new cookies from the wire. */ -#define NAME_IS(string_literal) \ - BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal) +#define TOKEN_IS(token, string_literal) \ + BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal) -#define VALUE_EXISTS (value_b && value_e) - -#define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e)) - -/* Update the appropriate cookie field. [name_b, name_e) are expected - to delimit the attribute name, while [value_b, value_e) (optional) - should delimit the attribute value. - - When called the first time, it will set the cookie's attribute name - and value. After that, it will check the attribute name for - special fields such as `domain', `path', etc. Where appropriate, - it will parse the values of the fields it recognizes and fill the - corresponding fields in COOKIE. - - Returns true on success. Returns false in case a syntax error is - found; such a cookie should be discarded. */ - -static bool -update_cookie_field (struct cookie *cookie, - const char *name_b, const char *name_e, - const char *value_b, const char *value_e) -{ - assert (name_b != NULL && name_e != NULL); - - if (!cookie->attr) - { - if (!VALUE_EXISTS) - return false; - cookie->attr = strdupdelim (name_b, name_e); - cookie->value = strdupdelim (value_b, value_e); - return true; - } - - if (NAME_IS ("domain")) - { - if (!VALUE_NON_EMPTY) - return false; - xfree_null (cookie->domain); - /* Strictly speaking, we should set cookie->domain_exact if the - domain doesn't begin with a dot. But many sites set the - domain to "foo.com" and expect "subhost.foo.com" to get the - cookie, and it apparently works. */ - if (*value_b == '.') - ++value_b; - cookie->domain = strdupdelim (value_b, value_e); - return true; - } - else if (NAME_IS ("path")) - { - if (!VALUE_NON_EMPTY) - return false; - xfree_null (cookie->path); - cookie->path = strdupdelim (value_b, value_e); - return true; - } - else if (NAME_IS ("expires")) - { - char *value_copy; - time_t expires; - - if (!VALUE_NON_EMPTY) - return false; - BOUNDED_TO_ALLOCA (value_b, value_e, value_copy); - - expires = http_atotm (value_copy); - if (expires != (time_t) -1) - { - cookie->permanent = 1; - cookie->expiry_time = expires; - } - else - /* Error in expiration spec. Assume default (cookie doesn't - expire, but valid only for this session.) */ - ; - - /* According to netscape's specification, expiry time in the - past means that discarding of a matching cookie is - requested. */ - if (cookie->expiry_time < cookies_now) - cookie->discard_requested = 1; - - return true; - } - else if (NAME_IS ("max-age")) - { - double maxage = -1; - char *value_copy; - - if (!VALUE_NON_EMPTY) - return false; - BOUNDED_TO_ALLOCA (value_b, value_e, value_copy); - - sscanf (value_copy, "%lf", &maxage); - if (maxage == -1) - /* something went wrong. */ - return false; - cookie->permanent = 1; - cookie->expiry_time = cookies_now + maxage; - - /* According to rfc2109, a cookie with max-age of 0 means that - discarding of a matching cookie is requested. */ - if (maxage == 0) - cookie->discard_requested = 1; - - return true; - } - else if (NAME_IS ("secure")) - { - /* ignore value completely */ - cookie->secure = 1; - return true; - } - else - /* Unrecognized attribute; ignore it. */ - return true; -} - -#undef NAME_IS - -/* Returns true for characters that are legal in the name of an - attribute. This used to allow only alphanumerics, '-', and '_', - but we need to be more lenient because a number of sites wants to - use weirder attribute names. rfc2965 "informally specifies" - attribute name (token) as "a sequence of non-special, non-white - space characters". So we allow everything except the stuff we know - could harm us. */ - -#define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \ - && (c) != '"' && (c) != '=' \ - && (c) != ';' && (c) != ',') +#define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e) /* Parse the contents of the `Set-Cookie' header. The header looks like this: @@ -461,182 +332,124 @@ update_cookie_field (struct cookie *cookie, Trailing semicolon is optional; spaces are allowed between all tokens. Additionally, values may be quoted. - A new cookie is returned upon success, NULL otherwise. The - specified CALLBACK function (normally `update_cookie_field' is used - to update the fields of the newly created cookie structure. */ + A new cookie is returned upon success, NULL otherwise. + + The first name-value pair will be used to set the cookie's + attribute name and value. Subsequent parameters will be checked + against field names such as `domain', `path', etc. Recognized + fields will be parsed and the corresponding members of COOKIE + filled. */ static struct cookie * -parse_set_cookies (const char *sc, - bool (*callback) (struct cookie *, - const char *, const char *, - const char *, const char *), - bool silent) +parse_set_cookie (const char *set_cookie, bool silent) { + const char *ptr = set_cookie; struct cookie *cookie = cookie_new (); + param_token name, value; - /* #### Hand-written DFAs are no fun to debug. We'de be better off - to rewrite this as an inline parser. */ - - enum { S_START, S_NAME, S_NAME_POST, - S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE, - S_ATTR_ACTION, S_DONE, S_ERROR - } state = S_START; + if (!extract_param (&ptr, &name, &value)) + goto error; + if (!value.b) + goto error; + cookie->attr = strdupdelim (name.b, name.e); + cookie->value = strdupdelim (value.b, value.e); - const char *p = sc; - char c; - - const char *name_b = NULL, *name_e = NULL; - const char *value_b = NULL, *value_e = NULL; - - c = *p; - - while (state != S_DONE && state != S_ERROR) + while (extract_param (&ptr, &name, &value)) { - switch (state) + if (TOKEN_IS (name, "domain")) { - case S_START: - if (!c) - state = S_DONE; - else if (ISSPACE (c)) - /* Strip all whitespace preceding the name. */ - c = *++p; - else if (ATTR_NAME_CHAR (c)) - { - name_b = p; - state = S_NAME; - } - else - /* empty attr name not allowed */ - state = S_ERROR; - break; - case S_NAME: - if (!c || c == ';' || c == '=' || ISSPACE (c)) - { - name_e = p; - state = S_NAME_POST; - } - else if (ATTR_NAME_CHAR (c)) - c = *++p; - else - state = S_ERROR; - break; - case S_NAME_POST: - if (!c || c == ';') - { - value_b = value_e = NULL; - if (c == ';') - c = *++p; - state = S_ATTR_ACTION; - } - else if (c == '=') - { - c = *++p; - state = S_VALUE_PRE; - } - else if (ISSPACE (c)) - /* Ignore space and keep the state. */ - c = *++p; - else - state = S_ERROR; - break; - case S_VALUE_PRE: - if (!c || c == ';') - { - value_b = value_e = p; - if (c == ';') - c = *++p; - state = S_ATTR_ACTION; - } - else if (c == '"') - { - c = *++p; - value_b = p; - state = S_QUOTED_VALUE; - } - else if (ISSPACE (c)) - c = *++p; - else - { - value_b = p; - value_e = NULL; - state = S_VALUE; - } - break; - case S_VALUE: - if (!c || c == ';' || ISSPACE (c)) - { - value_e = p; - state = S_VALUE_TRAILSPACE; - } - else - { - value_e = NULL; /* no trailing space */ - c = *++p; - } - break; - case S_QUOTED_VALUE: - if (c == '"') - { - value_e = p; - c = *++p; - state = S_VALUE_TRAILSPACE; - } - else if (!c) - state = S_ERROR; - else - c = *++p; - break; - case S_VALUE_TRAILSPACE: - if (c == ';') + if (!TOKEN_NON_EMPTY (value)) + goto error; + xfree_null (cookie->domain); + /* Strictly speaking, we should set cookie->domain_exact if the + domain doesn't begin with a dot. But many sites set the + domain to "foo.com" and expect "subhost.foo.com" to get the + cookie, and it apparently works in browsers. */ + if (*value.b == '.') + ++value.b; + cookie->domain = strdupdelim (value.b, value.e); + } + else if (TOKEN_IS (name, "path")) + { + if (!TOKEN_NON_EMPTY (value)) + goto error; + xfree_null (cookie->path); + cookie->path = strdupdelim (value.b, value.e); + } + else if (TOKEN_IS (name, "expires")) + { + char *value_copy; + time_t expires; + + if (!TOKEN_NON_EMPTY (value)) + goto error; + BOUNDED_TO_ALLOCA (value.b, value.e, value_copy); + + expires = http_atotm (value_copy); + if (expires != (time_t) -1) { - c = *++p; - state = S_ATTR_ACTION; + cookie->permanent = 1; + cookie->expiry_time = expires; } - else if (!c) - state = S_ATTR_ACTION; - else if (ISSPACE (c)) - c = *++p; else - state = S_VALUE; - break; - case S_ATTR_ACTION: - { - bool legal = callback (cookie, name_b, name_e, value_b, value_e); - if (!legal) - { - if (!silent) - { - char *name; - BOUNDED_TO_ALLOCA (name_b, name_e, name); - logprintf (LOG_NOTQUIET, - _("Error in Set-Cookie, field `%s'"), - escnonprint (name)); - } - state = S_ERROR; - break; - } - state = S_START; - } - break; - case S_DONE: - case S_ERROR: - /* handled by loop condition */ - break; + /* Error in expiration spec. Assume default (cookie doesn't + expire, but valid only for this session.) */ + ; + + /* According to netscape's specification, expiry time in the + past means that discarding of a matching cookie is + requested. */ + if (cookie->expiry_time < cookies_now) + cookie->discard_requested = 1; + } + else if (TOKEN_IS (name, "max-age")) + { + double maxage = -1; + char *value_copy; + + if (!TOKEN_NON_EMPTY (value)) + goto error; + BOUNDED_TO_ALLOCA (value.b, value.e, value_copy); + + sscanf (value_copy, "%lf", &maxage); + if (maxage == -1) + /* something went wrong. */ + goto error; + cookie->permanent = 1; + cookie->expiry_time = cookies_now + maxage; + + /* According to rfc2109, a cookie with max-age of 0 means that + discarding of a matching cookie is requested. */ + if (maxage == 0) + cookie->discard_requested = 1; + } + else if (TOKEN_IS (name, "secure")) + { + /* ignore value completely */ + cookie->secure = 1; } + else + /* Ignore unrecognized attribute. */ + ; } - if (state == S_DONE) - return cookie; + if (*ptr) + /* extract_param has encountered a syntax error */ + goto error; - delete_cookie (cookie); - if (state != S_ERROR) - abort (); + /* The cookie has been successfully constructed; return it. */ + return cookie; + error: if (!silent) logprintf (LOG_NOTQUIET, _("Syntax error in Set-Cookie: %s at position %d.\n"), - escnonprint (sc), (int) (p - sc)); + escnonprint (set_cookie), (int) (ptr - set_cookie)); + delete_cookie (cookie); return NULL; } + +#undef TOKEN_IS +#undef TOKEN_NON_EMPTY /* Sanity checks. These are important, otherwise it is possible for mailcious attackers to destroy important cookie information and/or @@ -849,7 +662,7 @@ cookie_handle_set_cookie (struct cookie_jar *jar, simply prepend slash to PATH. */ PREPEND_SLASH (path); - cookie = parse_set_cookies (set_cookie, update_cookie_field, false); + cookie = parse_set_cookie (set_cookie, false); if (!cookie) goto out; @@ -1510,27 +1323,14 @@ cookie_jar_delete (struct cookie_jar *jar) from main. */ #ifdef TEST_COOKIES -int test_count; -char *test_results[10]; - -static bool test_parse_cookies_callback (struct cookie *ignored, - const char *nb, const char *ne, - const char *vb, const char *ve) -{ - test_results[test_count++] = strdupdelim (nb, ne); - test_results[test_count++] = strdupdelim (vb, ve); - return true; -} - void test_cookies (void) { /* Tests expected to succeed: */ static struct { - char *data; - char *results[10]; + const char *data; + const char *results[10]; } tests_succ[] = { - { "", {NULL} }, { "arg=value", {"arg", "value", NULL} }, { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} }, { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} }, @@ -1554,39 +1354,51 @@ test_cookies (void) for (i = 0; i < countof (tests_succ); i++) { int ind; - char *data = tests_succ[i].data; - char **expected = tests_succ[i].results; + const char *data = tests_succ[i].data; + const char **expected = tests_succ[i].results; struct cookie *c; - test_count = 0; - c = parse_set_cookies (data, test_parse_cookies_callback, true); + c = parse_set_cookie (data, true); if (!c) { printf ("NULL cookie returned for valid data: %s\n", data); continue; } - for (ind = 0; ind < test_count; ind += 2) - { - if (!expected[ind]) - break; - if (0 != strcmp (expected[ind], test_results[ind])) - printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n", - ind / 2 + 1, data, expected[ind], test_results[ind]); - if (0 != strcmp (expected[ind + 1], test_results[ind + 1])) - printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n", - ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]); - } - if (ind < test_count || expected[ind]) - printf ("Unmatched number of results: %s\n", data); + /* Test whether extract_param handles these cases correctly. */ + { + param_token name, value; + const char *ptr = data; + int j = 0; + while (extract_param (&ptr, &name, &value)) + { + char *n = strdupdelim (name.b, name.e); + char *v = strdupdelim (value.b, value.e); + if (!expected[j]) + { + printf ("Too many parameters for '%s'\n", data); + break; + } + if (0 != strcmp (expected[j], n)) + printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n", + j / 2 + 1, data, expected[j], n); + if (0 != strcmp (expected[j + 1], v)) + printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n", + j / 2 + 1, data, expected[j + 1], v); + j += 2; + free (n); + free (v); + } + if (expected[j]) + printf ("Too few parameters for '%s'\n", data); + } } for (i = 0; i < countof (tests_fail); i++) { struct cookie *c; char *data = tests_fail[i]; - test_count = 0; - c = parse_set_cookies (data, test_parse_cookies_callback, 1); + c = parse_set_cookie (data, true); if (c) printf ("Failed to report error on invalid data: %s\n", data); } diff --git a/src/http.c b/src/http.c index 72a5c12e..36680a5e 100644 --- a/src/http.c +++ b/src/http.c @@ -855,11 +855,6 @@ skip_short_body (int fd, wgint contlen) return true; } -typedef struct { - /* A token consists of characters in the [b, e) range. */ - const char *b, *e; -} param_token; - /* Extract a parameter from the HTTP header at *SOURCE and advance *SOURCE to the next parameter. Return false when there are no more parameters to extract. The name of the parameter is returned in @@ -872,19 +867,24 @@ typedef struct { return the token named "filename" and value "foo bar". The third call will return false, indicating no more valid tokens. */ -static bool +bool extract_param (const char **source, param_token *name, param_token *value) { const char *p = *source; while (ISSPACE (*p)) ++p; if (!*p) - return false; /* nothing more to extract */ + { + *source = p; + return false; /* no error; nothing more to extract */ + } /* Extract name. */ name->b = p; while (*p && !ISSPACE (*p) && *p != '=' && *p != ';') ++p; name->e = p; + if (name->b == name->e) + return false; /* empty name: error */ while (ISSPACE (*p)) ++p; if (*p == ';' || !*p) /* no value */ { diff --git a/src/http.h b/src/http.h index bb3def46..4974481a 100644 --- a/src/http.h +++ b/src/http.h @@ -38,4 +38,11 @@ void save_cookies (void); void http_cleanup (void); time_t http_atotm (const char *); +typedef struct { + /* A token consists of characters in the [b, e) range. */ + const char *b, *e; +} param_token; +bool extract_param (const char **, param_token *, param_token *); + + #endif /* HTTP_H */ -- 2.39.2