- attr_raw_value_begin = attr_value_begin = attr_name_begin;
- attr_raw_value_end = attr_value_end = attr_name_end;
- }
- else if (*p == '=')
- {
- ADVANCE (p);
- SKIP_WS (p);
- if (*p == '\"' || *p == '\'')
- {
- int newline_seen = 0;
- char quote_char = *p;
- attr_raw_value_begin = p;
- ADVANCE (p);
- attr_value_begin = p; /* <foo bar="baz"> */
- /* ^ */
- while (*p != quote_char)
- {
- if (!newline_seen && *p == '\n')
- {
- /* If a newline is seen within the quotes, it
- is most likely that someone forgot to close
- the quote. In that case, we back out to
- the value beginning, and terminate the tag
- at either `>' or the delimiter, whichever
- comes first. Such a tag terminated at `>'
- is discarded. */
- p = attr_value_begin;
- newline_seen = 1;
- continue;
- }
- else if (newline_seen && *p == '>')
- break;
- ADVANCE (p);
- }
- attr_value_end = p; /* <foo bar="baz"> */
- /* ^ */
- if (*p == quote_char)
- ADVANCE (p);
- else
- goto look_for_tag;
- attr_raw_value_end = p; /* <foo bar="baz"> */
- /* ^ */
- operation = AP_DECODE_ENTITIES;
- if (flags & MHT_TRIM_VALUES)
- operation |= AP_TRIM_BLANKS;
- }
- else
- {
- attr_value_begin = p; /* <foo bar=baz> */
- /* ^ */
- /* According to SGML, a name token should consist only
- of alphanumerics, . and -. However, this is often
- violated by, for instance, `%' in `width=75%'.
- We'll be liberal and allow just about anything as
- an attribute value. */
- while (!ISSPACE (*p) && *p != '>')
- ADVANCE (p);
- attr_value_end = p; /* <foo bar=baz qux=quix> */
- /* ^ */
- if (attr_value_begin == attr_value_end)
- /* <foo bar=> */
- /* ^ */
- goto backout_tag;
- attr_raw_value_begin = attr_value_begin;
- attr_raw_value_end = attr_value_end;
- operation = AP_DECODE_ENTITIES;
- }
- }
- else
- {
- /* We skipped the whitespace and found something that is
- neither `=' nor the beginning of the next attribute's
- name. Back out. */
- goto backout_tag; /* <foo bar [... */
- /* ^ */
- }
-
- /* If we're not interested in the tag, don't bother with any
+ attr_raw_value_begin = attr_value_begin = attr_name_begin;
+ attr_raw_value_end = attr_value_end = attr_name_end;
+ }
+ else if (*p == '=')
+ {
+ ADVANCE (p);
+ SKIP_WS (p);
+ if (*p == '\"' || *p == '\'')
+ {
+ bool newline_seen = false;
+ char quote_char = *p;
+ attr_raw_value_begin = p;
+ ADVANCE (p);
+ attr_value_begin = p; /* <foo bar="baz"> */
+ /* ^ */
+ while (*p != quote_char)
+ {
+ if (!newline_seen && *p == '\n')
+ {
+ /* If a newline is seen within the quotes, it
+ is most likely that someone forgot to close
+ the quote. In that case, we back out to
+ the value beginning, and terminate the tag
+ at either `>' or the delimiter, whichever
+ comes first. Such a tag terminated at `>'
+ is discarded. */
+ p = attr_value_begin;
+ newline_seen = true;
+ continue;
+ }
+ else if (newline_seen && (*p == '<' || *p == '>'))
+ break;
+ ADVANCE (p);
+ }
+ attr_value_end = p; /* <foo bar="baz"> */
+ /* ^ */
+ if (*p == quote_char)
+ ADVANCE (p);
+ else
+ goto look_for_tag;
+ attr_raw_value_end = p; /* <foo bar="baz"> */
+ /* ^ */
+ operation = AP_DECODE_ENTITIES;
+ if (flags & MHT_TRIM_VALUES)
+ operation |= AP_TRIM_BLANKS;
+ }
+ else
+ {
+ attr_value_begin = p; /* <foo bar=baz> */
+ /* ^ */
+ /* According to SGML, a name token should consist only
+ of alphanumerics, . and -. However, this is often
+ violated by, for instance, `%' in `width=75%'.
+ We'll be liberal and allow just about anything as
+ an attribute value. */
+ while (!c_isspace (*p) && *p != '<' && *p != '>')
+ ADVANCE (p);
+ attr_value_end = p; /* <foo bar=baz qux=quix> */
+ /* ^ */
+ if (attr_value_begin == attr_value_end)
+ /* <foo bar=> */
+ /* ^ */
+ goto backout_tag;
+ attr_raw_value_begin = attr_value_begin;
+ attr_raw_value_end = attr_value_end;
+ operation = AP_DECODE_ENTITIES;
+ }
+ }
+ else
+ {
+ /* We skipped the whitespace and found something that is
+ neither `=' nor the beginning of the next attribute's
+ name. Back out. */
+ goto backout_tag; /* <foo bar [... */
+ /* ^ */
+ }
+
+ /* If we're not interested in the tag, don't bother with any