[svn] Minor improvements to fd_read_hunk.

author hniksic <devnull@localhost>

Fri, 1 Jul 2005 23:37:50 +0000 (16:37 -0700)

committer hniksic <devnull@localhost>

Fri, 1 Jul 2005 23:37:50 +0000 (16:37 -0700)
author hniksic <devnull@localhost>
Fri, 1 Jul 2005 23:37:50 +0000 (16:37 -0700)
committer hniksic <devnull@localhost>
Fri, 1 Jul 2005 23:37:50 +0000 (16:37 -0700)
diff --git a/src/ChangeLog b/src/ChangeLog

index a0fabf772e5c3fe9d39433f9b6cd63607f636137..6777851db985e6a06a895072663b9de30ce9914e 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,11 @@
+2005-07-02  Hrvoje Niksic  <hniksic@xemacs.org>
+
+       * http.c (response_head_terminator): Minor optimization.
+
+       * retr.c (fd_read_hunk): Call terminator with pointer to the start
+       of the data and the pointer to the current data.  Changed all
+       callers.
+
  2005-07-01  Hrvoje Niksic  <hniksic@xemacs.org>
  
         * url.c (url_parse): Make sure u->params is not initialized for
  2005-07-01  Hrvoje Niksic  <hniksic@xemacs.org>
  
         * url.c (url_parse): Make sure u->params is not initialized for
diff --git a/src/http.c b/src/http.c

index 4d1c16ebc917f72dbb3f94876f4abe98b9d2dbc4..e281cd5232f1f00944c2f200e6608d4ce0c79cfa 100644 (file)
--- a/src/http.c
+++ b/src/http.c
@@ -416,40 +416,51 @@ post_file (int sock, const char *file_name, wgint promised_size)
    return 0;
  }
  \f
    return 0;
  }
  \f
+/* Determine whether [START, PEEKED + PEEKLEN) contains an empty line.
+   If so, return the pointer to the position after the line, otherwise
+   return NULL.  This is used as callback to fd_read_hunk.  The data
+   between START and PEEKED has been read and cannot be "unread"; the
+   data after PEEKED has only been peeked.  */
+
  static const char *
  static const char *
-response_head_terminator (const char *hunk, int oldlen, int peeklen)
+response_head_terminator (const char *start, const char *peeked, int peeklen)
  {
  {
-  const char *start, *end;
+  const char *p, *end;
  
    /* If at first peek, verify whether HUNK starts with "HTTP".  If
       not, this is a HTTP/0.9 request and we must bail out without
       reading anything.  */
  
    /* If at first peek, verify whether HUNK starts with "HTTP".  If
       not, this is a HTTP/0.9 request and we must bail out without
       reading anything.  */
-  if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
-    return hunk;
-
-  if (oldlen < 4)
-    start = hunk;
-  else
-    start = hunk + oldlen - 4;
-  end = hunk + oldlen + peeklen;
-
-  for (; start < end - 1; start++)
-    if (*start == '\n')
+  if (start == peeked && 0 != memcmp (start, "HTTP", MIN (peeklen, 4)))
+    return start;
+
+  /* Look for "\n[\r]\n", and return the following position if found.
+     Start two chars before the current to cover the possibility that
+     part of the terminator (e.g. "\n\r") arrived in the previous
+     batch.  */
+  p = peeked - start < 2 ? start : peeked - 2;
+  end = peeked + peeklen;
+
+  /* Check for \n\r\n or \n\n anywhere in [p, end-2). */
+  for (; p < end - 2; p++)
+    if (*p == '\n')
        {
        {
-       if (start < end - 2
-           && start[1] == '\r'
-           && start[2] == '\n')
-         return start + 3;
-       if (start[1] == '\n')
-         return start + 2;
+       if (p[1] == '\r' && p[2] == '\n')
+         return p + 3;
+       else if (p[1] == '\n')
+         return p + 2;
        }
        }
+  /* p==end-2: check for \n\n directly preceding END. */
+  if (p[0] == '\n' && p[1] == '\n')
+    return p + 2;
+
    return NULL;
  }
  
    return NULL;
  }
  
-/* The maximum size of a single HTTP response we care to read.  This
-   is not meant to impose an arbitrary limit, but to protect the user
-   from Wget slurping up available memory upon encountering malicious
-   or buggy server output.  Define it to 0 to remove the limit.  */
+/* The maximum size of a single HTTP response we care to read.  Rather
+   than being a limit of the reader implementation, this limit
+   prevents Wget from slurping all available memory upon encountering
+   malicious or buggy server output, thus protecting the user.  Define
+   it to 0 to remove the limit.  */
  
  #define HTTP_RESPONSE_MAX_SIZE 65536
  
  
  #define HTTP_RESPONSE_MAX_SIZE 65536
  
diff --git a/src/retr.c b/src/retr.c

index 450619360b639bdac49a4353a7c8a52ee32e01e0..918fb5de6df611a4b328d501925a2fa8f11ac2fe 100644 (file)
--- a/src/retr.c
+++ b/src/retr.c
@@ -336,22 +336,35 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
    return ret;
  }
  \f
    return ret;
  }
  \f
-/* Read a hunk of data from FD, up until a terminator.  The terminator
-   is whatever the TERMINATOR function determines it to be; for
-   example, it can be a line of data, or the head of an HTTP response.
-   The function returns the data read allocated with malloc.
-
-   In case of error, NULL is returned.  In case of EOF and no data
-   read, NULL is returned and errno set to 0.  In case of EOF with
-   data having been read, the data is returned, but it will
-   (obviously) not contain the terminator.
+/* Read a hunk of data from FD, up until a terminator.  The hunk is
+   limited by whatever the TERMINATOR callback chooses as its
+   terminator.  For example, if terminator stops at newline, the hunk
+   will consist of a line of data; if terminator stops at two
+   newlines, it can be used to read the head of an HTTP response.
+   Upon determining the boundary, the function returns the data (up to
+   the terminator) in malloc-allocated storage.
+
+   In case of read error, NULL is returned.  In case of EOF and no
+   data read, NULL is returned and errno set to 0.  In case of having
+   read some data, but encountering EOF before seeing the terminator,
+   the data that has been read is returned, but it will (obviously)
+   not contain the terminator.
+
+   The TERMINATOR function is called with three arguments: the
+   beginning of the data read so far, the beginning of the current
+   block of peeked-at data, and the length of the current block.
+   Depending on its needs, the function is free to choose whether to
+   analyze all data or just the newly arrived data.  If TERMINATOR
+   returns NULL, it means that the terminator has not been seen.
+   Otherwise it should return a pointer to the charactre immediately
+   following the terminator.
  
     The idea is to be able to read a line of input, or otherwise a hunk
     of text, such as the head of an HTTP request, without crossing the
     boundary, so that the next call to fd_read etc. reads the data
     after the hunk.  To achieve that, this function does the following:
  
  
     The idea is to be able to read a line of input, or otherwise a hunk
     of text, such as the head of an HTTP request, without crossing the
     boundary, so that the next call to fd_read etc. reads the data
     after the hunk.  To achieve that, this function does the following:
  
-   1. Peek at available data.
+   1. Peek at incoming data.
  
     2. Determine whether the peeked data, along with the previously
        read data, includes the terminator.
  
     2. Determine whether the peeked data, along with the previously
        read data, includes the terminator.
@@ -396,12 +409,13 @@ fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
           xfree (hunk);
           return NULL;
         }
           xfree (hunk);
           return NULL;
         }
-      end = terminator (hunk, tail, pklen);
+      end = terminator (hunk, hunk + tail, pklen);
        if (end)
         {
           /* The data contains the terminator: we'll drain the data up
              to the end of the terminator.  */
           remain = end - (hunk + tail);
        if (end)
         {
           /* The data contains the terminator: we'll drain the data up
              to the end of the terminator.  */
           remain = end - (hunk + tail);
+         assert (remain >= 0);
           if (remain == 0)
             {
               /* No more data needs to be read. */
           if (remain == 0)
             {
               /* No more data needs to be read. */
@@ -471,11 +485,11 @@ fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
  }
  
  static const char *
  }
  
  static const char *
-line_terminator (const char *hunk, int oldlen, int peeklen)
+line_terminator (const char *start, const char *peeked, int peeklen)
  {
  {
-  const char *p = memchr (hunk + oldlen, '\n', peeklen);
+  const char *p = memchr (peeked, '\n', peeklen);
    if (p)
    if (p)
-    /* p+1 because we want the line to include '\n' */
+    /* p+1 because the line must include '\n' */
      return p + 1;
    return NULL;
  }
      return p + 1;
    return NULL;
  }
diff --git a/src/retr.h b/src/retr.h

index 305ced4f94bee212d871a750144e2eb26dae48e0..441471ac14ea11fdf7905f956e71d7a3a0bb4e11 100644 (file)
--- a/src/retr.h
+++ b/src/retr.h
@@ -45,7 +45,7 @@ enum {
  
  int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int);
  
  
  int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int);
  
-typedef const char *(*hunk_terminator_t) (const char *, int, int);
+typedef const char *(*hunk_terminator_t) (const char *, const char *, int);
  
  char *fd_read_hunk (int, hunk_terminator_t, long, long);
  char *fd_read_line (int);
  
  char *fd_read_hunk (int, hunk_terminator_t, long, long);
  char *fd_read_line (int);
author	hniksic <devnull@localhost>
	Fri, 1 Jul 2005 23:37:50 +0000 (16:37 -0700)
committer	hniksic <devnull@localhost>
	Fri, 1 Jul 2005 23:37:50 +0000 (16:37 -0700)
src/ChangeLog		patch \| blob \| history
src/http.c		patch \| blob \| history
src/retr.c		patch \| blob \| history
src/retr.h		patch \| blob \| history