[svn] Remove K&R support.

[wget] / src / html-parse.c
diff --git a/src/html-parse.c b/src/html-parse.c

index 2a09ff09c0e6cd1c9e8e2cc86867d261e31b16e3..4a0a525771a97d173ee10bd713741e2c5f7a11f4 100644 (file)
--- a/src/html-parse.c
+++ b/src/html-parse.c
@@ -96,11 +96,7 @@ so, delete this exception statement from your version.  */
  
  #include <stdio.h>
  #include <stdlib.h>
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif
+#include <string.h>
  #include <assert.h>
  
  #include "wget.h"
@@ -360,17 +356,16 @@ enum {
       the ASCII range when copying the string.
  
     * AP_TRIM_BLANKS -- ignore blanks at the beginning and at the end
-     of text.  */
+     of text, as well as embedded newlines.  */
  
  static void
  convert_and_copy (struct pool *pool, const char *beg, const char *end, int flags)
  {
    int old_tail = pool->tail;
-  int size;
  
-  /* First, skip blanks if required.  We must do this before entities
-     are processed, so that blanks can still be inserted as, for
-     instance, `&#32;'.  */
+  /* Skip blanks if required.  We must do this before entities are
+     processed, so that blanks can still be inserted as, for instance,
+     `&#32;'.  */
    if (flags & AP_TRIM_BLANKS)
      {
        while (beg < end && ISSPACE (*beg))
@@ -378,7 +373,6 @@ convert_and_copy (struct pool *pool, const char *beg, const char *end, int flags
        while (end > beg && ISSPACE (end[-1]))
         --end;
      }
-  size = end - beg;
  
    if (flags & AP_DECODE_ENTITIES)
      {
@@ -391,15 +385,14 @@ convert_and_copy (struct pool *pool, const char *beg, const char *end, int flags
          never lengthen it.  */
        const char *from = beg;
        char *to;
+      int squash_newlines = flags & AP_TRIM_BLANKS;
  
        POOL_GROW (pool, end - beg);
        to = pool->contents + pool->tail;
  
        while (from < end)
         {
-         if (*from != '&')
-           *to++ = *from++;
-         else
+         if (*from == '&')
             {
               int entity = decode_entity (&from, end);
               if (entity != -1)
@@ -407,6 +400,10 @@ convert_and_copy (struct pool *pool, const char *beg, const char *end, int flags
               else
                 *to++ = *from++;
             }
+         else if ((*from == '\n' || *from == '\r') && squash_newlines)
+           ++from;
+         else
+           *to++ = *from++;
         }
        /* Verify that we haven't exceeded the original size.  (It
          shouldn't happen, hence the assert.)  */
@@ -729,17 +726,15 @@ static int tag_backout_count;
     MAPFUN will be called with two arguments: pointer to an initialized
     struct taginfo, and MAPARG.
  
-   ALLOWED_TAG_NAMES should be a NULL-terminated array of tag names to
-   be processed by this function.  If it is NULL, all the tags are
-   allowed.  The same goes for attributes and ALLOWED_ATTRIBUTE_NAMES.
+   ALLOWED_TAGS and ALLOWED_ATTRIBUTES are hash tables the keys of
+   which are the tags and attribute names that this function should
+   use.  If ALLOWED_TAGS is NULL, all tags are processed; if
+   ALLOWED_ATTRIBUTES is NULL, all attributes are returned.
  
     (Obviously, the caller can filter out unwanted tags and attributes
     just as well, but this is just an optimization designed to avoid
-   unnecessary copying for tags/attributes which the caller doesn't
-   want to know about.  These lists are searched linearly; therefore,
-   if you're interested in a large number of tags or attributes, you'd
-   better set these to NULL and filter them out yourself with a
-   hashing process most appropriate for your application.)  */
+   unnecessary copying of tags/attributes which the caller doesn't
+   care about.)  */
  
  void
  map_html_tags (const char *text, int size,
@@ -1015,8 +1010,7 @@ map_html_tags (const char *text, int size,
        taginfo.attrs = pairs;
        taginfo.start_position = tag_start_position;
        taginfo.end_position   = p + 1;
-      /* Ta-dam! */
-      (*mapfun) (&taginfo, maparg);
+      mapfun (&taginfo, maparg);
        ADVANCE (p);
      }
      goto look_for_tag;