return 0;
}
\f
+/* Determine whether [START, PEEKED + PEEKLEN) contains an empty line.
+ If so, return the pointer to the position after the line, otherwise
+ return NULL. This is used as callback to fd_read_hunk. The data
+ between START and PEEKED has been read and cannot be "unread"; the
+ data after PEEKED has only been peeked. */
+
static const char *
-response_head_terminator (const char *hunk, int oldlen, int peeklen)
+response_head_terminator (const char *start, const char *peeked, int peeklen)
{
- const char *start, *end;
+ const char *p, *end;
/* If at first peek, verify whether HUNK starts with "HTTP". If
not, this is a HTTP/0.9 request and we must bail out without
reading anything. */
- if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
- return hunk;
-
- if (oldlen < 4)
- start = hunk;
- else
- start = hunk + oldlen - 4;
- end = hunk + oldlen + peeklen;
-
- for (; start < end - 1; start++)
- if (*start == '\n')
+ if (start == peeked && 0 != memcmp (start, "HTTP", MIN (peeklen, 4)))
+ return start;
+
+ /* Look for "\n[\r]\n", and return the following position if found.
+ Start two chars before the current to cover the possibility that
+ part of the terminator (e.g. "\n\r") arrived in the previous
+ batch. */
+ p = peeked - start < 2 ? start : peeked - 2;
+ end = peeked + peeklen;
+
+ /* Check for \n\r\n or \n\n anywhere in [p, end-2). */
+ for (; p < end - 2; p++)
+ if (*p == '\n')
{
- if (start < end - 2
- && start[1] == '\r'
- && start[2] == '\n')
- return start + 3;
- if (start[1] == '\n')
- return start + 2;
+ if (p[1] == '\r' && p[2] == '\n')
+ return p + 3;
+ else if (p[1] == '\n')
+ return p + 2;
}
+ /* p==end-2: check for \n\n directly preceding END. */
+ if (p[0] == '\n' && p[1] == '\n')
+ return p + 2;
+
return NULL;
}
-/* The maximum size of a single HTTP response we care to read. This
- is not meant to impose an arbitrary limit, but to protect the user
- from Wget slurping up available memory upon encountering malicious
- or buggy server output. Define it to 0 to remove the limit. */
+/* The maximum size of a single HTTP response we care to read. Rather
+ than being a limit of the reader implementation, this limit
+ prevents Wget from slurping all available memory upon encountering
+ malicious or buggy server output, thus protecting the user. Define
+ it to 0 to remove the limit. */
#define HTTP_RESPONSE_MAX_SIZE 65536
return ret;
}
\f
-/* Read a hunk of data from FD, up until a terminator. The terminator
- is whatever the TERMINATOR function determines it to be; for
- example, it can be a line of data, or the head of an HTTP response.
- The function returns the data read allocated with malloc.
-
- In case of error, NULL is returned. In case of EOF and no data
- read, NULL is returned and errno set to 0. In case of EOF with
- data having been read, the data is returned, but it will
- (obviously) not contain the terminator.
+/* Read a hunk of data from FD, up until a terminator. The hunk is
+ limited by whatever the TERMINATOR callback chooses as its
+ terminator. For example, if terminator stops at newline, the hunk
+ will consist of a line of data; if terminator stops at two
+ newlines, it can be used to read the head of an HTTP response.
+ Upon determining the boundary, the function returns the data (up to
+ the terminator) in malloc-allocated storage.
+
+ In case of read error, NULL is returned. In case of EOF and no
+ data read, NULL is returned and errno set to 0. In case of having
+ read some data, but encountering EOF before seeing the terminator,
+ the data that has been read is returned, but it will (obviously)
+ not contain the terminator.
+
+ The TERMINATOR function is called with three arguments: the
+ beginning of the data read so far, the beginning of the current
+ block of peeked-at data, and the length of the current block.
+ Depending on its needs, the function is free to choose whether to
+ analyze all data or just the newly arrived data. If TERMINATOR
+ returns NULL, it means that the terminator has not been seen.
+ Otherwise it should return a pointer to the charactre immediately
+ following the terminator.
The idea is to be able to read a line of input, or otherwise a hunk
of text, such as the head of an HTTP request, without crossing the
boundary, so that the next call to fd_read etc. reads the data
after the hunk. To achieve that, this function does the following:
- 1. Peek at available data.
+ 1. Peek at incoming data.
2. Determine whether the peeked data, along with the previously
read data, includes the terminator.
xfree (hunk);
return NULL;
}
- end = terminator (hunk, tail, pklen);
+ end = terminator (hunk, hunk + tail, pklen);
if (end)
{
/* The data contains the terminator: we'll drain the data up
to the end of the terminator. */
remain = end - (hunk + tail);
+ assert (remain >= 0);
if (remain == 0)
{
/* No more data needs to be read. */
}
static const char *
-line_terminator (const char *hunk, int oldlen, int peeklen)
+line_terminator (const char *start, const char *peeked, int peeklen)
{
- const char *p = memchr (hunk + oldlen, '\n', peeklen);
+ const char *p = memchr (peeked, '\n', peeklen);
if (p)
- /* p+1 because we want the line to include '\n' */
+ /* p+1 because the line must include '\n' */
return p + 1;
return NULL;
}