/* HTML parser for Wget.
- Copyright (C) 1998-2006 Free Software Foundation, Inc.
+ Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007 Free Software Foundation, Inc.
This file is part of GNU Wget.
You should have received a copy of the GNU General Public License
along with Wget. If not, see <http://www.gnu.org/licenses/>.
-In addition, as a special exception, the Free Software Foundation
-gives permission to link the code of its release of Wget with the
-OpenSSL project's "OpenSSL" library (or with modified versions of it
-that use the same license as the "OpenSSL" library), and distribute
-the linked executables. You must obey the GNU General Public License
-in all respects for all of the code used other than "OpenSSL". If you
-modify this file, you may extend this exception to your version of the
-file, but you are not obligated to do so. If you do not wish to do
-so, delete this exception statement from your version. */
+Additional permission under GNU GPL version 3 section 7
+
+If you modify this program, or any covered work, by linking or
+combining it with the OpenSSL project's OpenSSL library (or a
+modified version of that library), containing parts covered by the
+terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
+grants you additional permission to convey the resulting work.
+Corresponding Source for a non-source form of such a combination
+shall include the source code for the parts of OpenSSL used as well
+as that of the covered work. */
/* The only entry point to this module is map_html_tags(), which see. */
/* To test as standalone, compile with `-DSTANDALONE -I.'. You'll
still need Wget headers to compile. */
-#include <config.h>
+#include "wget.h"
#ifdef STANDALONE
# define I_REALLY_WANT_CTYPE_MACROS
#include <string.h>
#include <assert.h>
-#include "wget.h"
#include "html-parse.h"
#ifdef STANDALONE
# define xrealloc realloc
# define xfree free
-# undef ISSPACE
-# undef ISDIGIT
-# undef ISXDIGIT
-# undef ISALPHA
-# undef ISALNUM
-# undef TOLOWER
-# undef TOUPPER
-
-# define ISSPACE(x) isspace (x)
-# define ISDIGIT(x) isdigit (x)
-# define ISXDIGIT(x) isxdigit (x)
-# define ISALPHA(x) isalpha (x)
-# define ISALNUM(x) isalnum (x)
-# define TOLOWER(x) tolower (x)
-# define TOUPPER(x) toupper (x)
+# undef c_isspace
+# undef c_isdigit
+# undef c_isxdigit
+# undef c_isalpha
+# undef c_isalnum
+# undef c_tolower
+# undef c_toupper
+
+# define c_isspace(x) isspace (x)
+# define c_isdigit(x) isdigit (x)
+# define c_isxdigit(x) isxdigit (x)
+# define c_isalpha(x) isalpha (x)
+# define c_isalnum(x) isalnum (x)
+# define c_tolower(x) tolower (x)
+# define c_toupper(x) toupper (x)
struct hash_table {
int dummy;
However, "<foo" will work, as will "<!foo", "<", etc. In
other words an entity needs to be terminated by either a
non-alphanumeric or the end of string. */
-#define FITS(p, n) (p + n == end || (p + n < end && !ISALNUM (p[n])))
+#define FITS(p, n) (p + n == end || (p + n < end && !c_isalnum (p[n])))
/* Macros that test entity names by returning true if P is followed by
the specified characters. */
int digits = 0;
value = 0;
if (*p == 'x')
- for (++p; value < 256 && p < end && ISXDIGIT (*p); p++, digits++)
+ for (++p; value < 256 && p < end && c_isxdigit (*p); p++, digits++)
value = (value << 4) + XDIGIT_TO_NUM (*p);
else
- for (; value < 256 && p < end && ISDIGIT (*p); p++, digits++)
+ for (; value < 256 && p < end && c_isdigit (*p); p++, digits++)
value = (value * 10) + (*p - '0');
if (!digits)
return -1;
` '. */
if (flags & AP_TRIM_BLANKS)
{
- while (beg < end && ISSPACE (*beg))
+ while (beg < end && c_isspace (*beg))
++beg;
- while (end > beg && ISSPACE (end[-1]))
+ while (end > beg && c_isspace (end[-1]))
--end;
}
{
char *p = pool->contents + old_tail;
for (; *p; p++)
- *p = TOLOWER (*p);
+ *p = c_tolower (*p);
}
}
\f
/* Skip whitespace, if any. */
#define SKIP_WS(p) do { \
- while (ISSPACE (*p)) { \
+ while (c_isspace (*p)) { \
ADVANCE (p); \
} \
} while (0)
/* Skip non-whitespace, if any. */
#define SKIP_NON_WS(p) do { \
- while (!ISSPACE (*p)) { \
+ while (!c_isspace (*p)) { \
ADVANCE (p); \
} \
} while (0)
violated by, for instance, `%' in `width=75%'.
We'll be liberal and allow just about anything as
an attribute value. */
- while (!ISSPACE (*p) && *p != '>')
+ while (!c_isspace (*p) && *p != '>')
ADVANCE (p);
attr_value_end = p; /* <foo bar=baz qux=quix> */
/* ^ */