[svn] Minor improvements to fd_read_hunk.

[wget] / src / http.c
diff --git a/src/http.c b/src/http.c

index a5f5673c5f0cb935ec4570d916cd8f938004fa19..e281cd5232f1f00944c2f200e6608d4ce0c79cfa 100644 (file)
--- a/src/http.c
+++ b/src/http.c
@@ -416,40 +416,51 @@ post_file (int sock, const char *file_name, wgint promised_size)
    return 0;
  }
  \f
+/* Determine whether [START, PEEKED + PEEKLEN) contains an empty line.
+   If so, return the pointer to the position after the line, otherwise
+   return NULL.  This is used as callback to fd_read_hunk.  The data
+   between START and PEEKED has been read and cannot be "unread"; the
+   data after PEEKED has only been peeked.  */
+
  static const char *
-response_head_terminator (const char *hunk, int oldlen, int peeklen)
+response_head_terminator (const char *start, const char *peeked, int peeklen)
  {
-  const char *start, *end;
+  const char *p, *end;
  
    /* If at first peek, verify whether HUNK starts with "HTTP".  If
       not, this is a HTTP/0.9 request and we must bail out without
       reading anything.  */
-  if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
-    return hunk;
-
-  if (oldlen < 4)
-    start = hunk;
-  else
-    start = hunk + oldlen - 4;
-  end = hunk + oldlen + peeklen;
-
-  for (; start < end - 1; start++)
-    if (*start == '\n')
+  if (start == peeked && 0 != memcmp (start, "HTTP", MIN (peeklen, 4)))
+    return start;
+
+  /* Look for "\n[\r]\n", and return the following position if found.
+     Start two chars before the current to cover the possibility that
+     part of the terminator (e.g. "\n\r") arrived in the previous
+     batch.  */
+  p = peeked - start < 2 ? start : peeked - 2;
+  end = peeked + peeklen;
+
+  /* Check for \n\r\n or \n\n anywhere in [p, end-2). */
+  for (; p < end - 2; p++)
+    if (*p == '\n')
        {
-       if (start < end - 2
-           && start[1] == '\r'
-           && start[2] == '\n')
-         return start + 3;
-       if (start[1] == '\n')
-         return start + 2;
+       if (p[1] == '\r' && p[2] == '\n')
+         return p + 3;
+       else if (p[1] == '\n')
+         return p + 2;
        }
+  /* p==end-2: check for \n\n directly preceding END. */
+  if (p[0] == '\n' && p[1] == '\n')
+    return p + 2;
+
    return NULL;
  }
  
-/* The maximum size of a single HTTP response we care to read.  This
-   is not meant to impose an arbitrary limit, but to protect the user
-   from Wget slurping up available memory upon encountering malicious
-   or buggy server output.  Define it to 0 to remove the limit.  */
+/* The maximum size of a single HTTP response we care to read.  Rather
+   than being a limit of the reader implementation, this limit
+   prevents Wget from slurping all available memory upon encountering
+   malicious or buggy server output, thus protecting the user.  Define
+   it to 0 to remove the limit.  */
  
  #define HTTP_RESPONSE_MAX_SIZE 65536
  
@@ -1308,20 +1319,25 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
         request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
      }
  
+  /* Generate the Host header, HOST:PORT.  Take into account that:
+
+     - Broken server-side software often doesn't recognize the PORT
+       argument, so we must generate "Host: www.server.com" instead of
+       "Host: www.server.com:80" (and likewise for https port).
+
+     - IPv6 addresses contain ":", so "Host: 3ffe:8100:200:2::2:1234"
+       becomes ambiguous and needs to be rewritten as "Host:
+       [3ffe:8100:200:2::2]:1234".  */
    {
-    /* Whether we need to print the host header with braces around
-       host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the
-       usual "Host: symbolic-name:1234". */
-    bool squares = strchr (u->host, ':') != NULL;
-    if (u->port == scheme_default_port (u->scheme))
-      request_set_header (req, "Host",
-                         aprintf (squares ? "[%s]" : "%s", u->host),
-                         rel_value);
-    else
-      request_set_header (req, "Host",
-                         aprintf (squares ? "[%s]:%d" : "%s:%d",
-                                  u->host, u->port),
-                         rel_value);
+    /* Formats arranged for hfmt[add_port][add_squares].  */
+    static const char *hfmt[][2] = {
+      { "%s", "[%s]" }, { "%s:%d", "[%s]:%d" }
+    };
+    int add_port = u->port != scheme_default_port (u->scheme);
+    int add_squares = strchr (u->host, ':') != NULL;
+    request_set_header (req, "Host",
+                       aprintf (hfmt[add_port][add_squares], u->host, u->port),
+                       rel_value);
    }
  
    if (!inhibit_keep_alive)