You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+In addition, as a special exception, the Free Software Foundation
+gives permission to link the code of its release of Wget with the
+OpenSSL project's "OpenSSL" library (or with modified versions of it
+that use the same license as the "OpenSSL" library), and distribute
+the linked executables. You must obey the GNU General Public License
+in all respects for all of the code used other than "OpenSSL". If you
+modify this file, you may extend this exception to your version of the
+file, but you are not obligated to do so. If you do not wish to do
+so, delete this exception statement from your version. */
/* The only entry point to this module is map_html_tags(), which see. */
return 1;
}
\f
-/* RFC1866: name [of attribute or tag] consists of letters, digits,
- periods, or hyphens. We also allow _, for compatibility with
- brain-damaged generators. */
-#define NAME_CHAR_P(x) (ISALNUM (x) || (x) == '.' || (x) == '-' || (x) == '_')
+/* Originally we used to adhere to RFC1866 here, and allowed only
+ letters, digits, periods, and hyphens as names (of tags or
+ attributes). However, this broke too many pages which used
+ proprietary or strange attributes, e.g. <img src="a.gif"
+ v:shapes="whatever">.
+
+ So now we allow any character except:
+ * whitespace
+ * 8-bit and control chars
+ * characters that clearly cannot be part of name:
+ '=', '>', '/'.
+
+ This only affects attribute and tag names; attribute values allow
+ an even greater variety of characters. */
+
+#define NAME_CHAR_P(x) ((x) > 32 && (x) < 127 \
+ && (x) != '=' && (x) != '>' && (x) != '/')
/* States while advancing through comments. */
#define AC_S_DONE 0
}
break;
case AC_S_DCLNAME:
- if (NAME_CHAR_P (ch))
- ch = *p++;
- else if (ch == '-')
+ if (ch == '-')
state = AC_S_DASH1;
+ else if (NAME_CHAR_P (ch))
+ ch = *p++;
else
state = AC_S_DEFAULT;
break;