[svn] Don't cast return type of malloc/realloc. Assume ANSI C signal handlers.

[wget] / src / connect.c
diff --git a/src/connect.c b/src/connect.c

index d309b27264a759c6f450e8958aaf90e29acdd963..436258c3f0d2a07fef5082e627e6051901f2573a 100644 (file)
--- a/src/connect.c
+++ b/src/connect.c
@@ -31,7 +31,6 @@ so, delete this exception statement from your version.  */
  
  #include <stdio.h>
  #include <stdlib.h>
-#include <sys/types.h>
  #ifdef HAVE_UNISTD_H
  # include <unistd.h>
  #endif
@@ -47,11 +46,7 @@ so, delete this exception statement from your version.  */
  #endif /* not WINDOWS */
  
  #include <errno.h>
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif /* HAVE_STRING_H */
+#include <string.h>
  #ifdef HAVE_SYS_SELECT_H
  # include <sys/select.h>
  #endif /* HAVE_SYS_SELECT_H */
@@ -62,11 +57,15 @@ so, delete this exception statement from your version.  */
  #include "connect.h"
  #include "hash.h"
  
-#ifndef errno
-extern int errno;
-#endif
+/* Define sockaddr_storage where unavailable (presumably on IPv4-only
+   hosts).  */
+
+#ifndef ENABLE_IPV6
+# ifndef HAVE_STRUCT_SOCKADDR_STORAGE
+#  define sockaddr_storage sockaddr_in
+# endif
+#endif /* ENABLE_IPV6 */
  
-\f
  /* Fill SA as per the data in IP and PORT.  SA shoult point to struct
     sockaddr_storage if ENABLE_IPV6 is defined, to struct sockaddr_in
     otherwise.  */
@@ -79,6 +78,7 @@ sockaddr_set_data (struct sockaddr *sa, const ip_address *ip, int port)
      case IPV4_ADDRESS:
        {
         struct sockaddr_in *sin = (struct sockaddr_in *)sa;
+       xzero (*sin);
         sin->sin_family = AF_INET;
         sin->sin_port = htons (port);
         sin->sin_addr = ADDRESS_IPV4_IN_ADDR (ip);
@@ -88,6 +88,7 @@ sockaddr_set_data (struct sockaddr *sa, const ip_address *ip, int port)
      case IPV6_ADDRESS:
        {
         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
+       xzero (*sin6);
         sin6->sin6_family = AF_INET6;
         sin6->sin6_port = htons (port);
         sin6->sin6_addr = ADDRESS_IPV6_IN6_ADDR (ip);
@@ -161,7 +162,6 @@ sockaddr_size (const struct sockaddr *sa)
  #endif
      default:
        abort ();
-      return 0;                        /* so the compiler shuts up. */
      }
  }
  \f
@@ -241,14 +241,17 @@ connect_with_timeout (int fd, const struct sockaddr *addr, socklen_t addrlen,
    return ctx.result;
  }
  \f
-/* Connect to a remote endpoint whose IP address is known.  */
+/* Connect via TCP to the specified address and port.
+
+   If PRINT is non-NULL, it is the host name to print that we're
+   connecting to.  */
  
  int
  connect_to_ip (const ip_address *ip, int port, const char *print)
  {
    struct sockaddr_storage ss;
    struct sockaddr *sa = (struct sockaddr *)&ss;
-  int sock = -1;
+  int sock;
  
    /* If PRINT is non-NULL, print the "Connecting to..." line, with
       PRINT being the host name we're connecting to.  */
@@ -256,8 +259,8 @@ connect_to_ip (const ip_address *ip, int port, const char *print)
      {
        const char *txt_addr = pretty_print_address (ip);
        if (print && 0 != strcmp (print, txt_addr))
-       logprintf (LOG_VERBOSE,
-                  _("Connecting to %s|%s|:%d... "), print, txt_addr, port);
+       logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
+                  escnonprint (print), txt_addr, port);
        else
         logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
      }
@@ -270,6 +273,18 @@ connect_to_ip (const ip_address *ip, int port, const char *print)
    if (sock < 0)
      goto err;
  
+#if defined(ENABLE_IPV6) && defined(IPV6_V6ONLY)
+  if (opt.ipv6_only) {
+    int on = 1;
+    /* In case of error, we will go on anyway... */
+    int err = setsockopt (sock, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof (on));
+#ifdef ENABLE_DEBUG
+    if (err < 0) 
+      DEBUGP (("Failed setting IPV6_V6ONLY: %s", strerror (errno)));
+#endif
+  }
+#endif
+
    /* For very small rate limits, set the buffer size (and hence,
       hopefully, the kernel's TCP window size) to the per-second limit.
       That way we should never have to sleep for more than 1s between
@@ -318,26 +333,29 @@ connect_to_ip (const ip_address *ip, int port, const char *print)
         logprintf.  */
      int save_errno = errno;
      if (sock >= 0)
-      xclose (sock);
+      fd_close (sock);
      if (print)
-      logprintf (LOG_VERBOSE, "failed: %s.\n", strerror (errno));
+      logprintf (LOG_VERBOSE, _("failed: %s.\n"), strerror (errno));
      errno = save_errno;
      return -1;
    }
  }
  
-/* Connect to a remote endpoint specified by host name.  */
+/* Connect via TCP to a remote host on the specified port.
+
+   HOST is resolved as an Internet host name.  If HOST resolves to
+   more than one IP address, they are tried in the order returned by
+   DNS until connecting to one of them succeeds.  */
  
  int
  connect_to_host (const char *host, int port)
  {
    int i, start, end;
-  struct address_list *al;
-  int lh_flags = 0;
-  int sock = -1;
+  int sock;
+
+  struct address_list *al = lookup_host (host, 0);
  
- again:
-  al = lookup_host (host, lh_flags);
+ retry:
    if (!al)
      return E_HOST;
  
@@ -347,64 +365,34 @@ connect_to_host (const char *host, int port)
        const ip_address *ip = address_list_address_at (al, i);
        sock = connect_to_ip (ip, port, host);
        if (sock >= 0)
-       /* Success. */
-       break;
-
-      address_list_set_faulty (al, i);
+       {
+         /* Success. */
+         address_list_set_connected (al);
+         address_list_release (al);
+         return sock;
+       }
  
        /* The attempt to connect has failed.  Continue with the loop
          and try next address. */
-    }
-  address_list_release (al);
-
-  if (sock >= 0)
-    /* Mark a successful connection to one of the addresses. */
-    address_list_set_connected (al);
  
-  if (sock < 0 && address_list_connected_p (al))
-    {
-      /* We are unable to connect to any of HOST's addresses, although
-        we were previously able to connect to HOST.  That might
-        indicate that HOST is under dynamic DNS and the addresses
-        we're connecting to have expired.  Resolve it again.  */
-      lh_flags |= LH_REFRESH;
-      goto again;
+      address_list_set_faulty (al, i);
      }
  
-  return sock;
-}
-
-int
-test_socket_open (int sock)
-{
-#ifdef HAVE_SELECT
-  fd_set check_set;
-  struct timeval to;
-
-  /* Check if we still have a valid (non-EOF) connection.  From Andrew
-   * Maholski's code in the Unix Socket FAQ.  */
-
-  FD_ZERO (&check_set);
-  FD_SET (sock, &check_set);
+  /* Failed to connect to any of the addresses in AL. */
  
-  /* Wait one microsecond */
-  to.tv_sec = 0;
-  to.tv_usec = 1;
-
-  /* If we get a timeout, then that means still connected */
-  if (select (sock + 1, &check_set, NULL, NULL, &to) == 0)
+  if (address_list_connected_p (al))
      {
-      /* Connection is valid (not EOF), so continue */
-      return 1;
+      /* We connected to AL before, but cannot do so now.  That might
+        indicate that our DNS cache entry for HOST has expired.  */
+      address_list_release (al);
+      al = lookup_host (host, LH_REFRESH);
+      goto retry;
      }
-  else
-    return 0;
-#else
-  /* Without select, it's hard to know for sure. */
-  return 1;
-#endif
-}
+  address_list_release (al);
  
+  return -1;
+}
+\f
  /* Create a socket, bind it to local interface BIND_ADDRESS on port
     *PORT, set up a listen backlog, and return the resulting socket, or
     -1 in case of error.
@@ -444,18 +432,11 @@ bind_local (const ip_address *bind_address, int *port)
    setsockopt (sock, SOL_SOCKET, SO_REUSEADDR, setopt_ptr, setopt_size);
  #endif
  
-#ifdef ENABLE_IPV6
-# ifdef HAVE_IPV6_V6ONLY
-  if (family == AF_INET6)
-    setsockopt (sock, IPPROTO_IPV6, IPV6_V6ONLY, setopt_ptr, setopt_size);
-# endif
-#endif
-
    xzero (ss);
    sockaddr_set_data (sa, bind_address, *port);
    if (bind (sock, sa, sockaddr_size (sa)) < 0)
      {
-      xclose (sock);
+      fd_close (sock);
        return -1;
      }
    DEBUGP (("Local socket fd %d bound.\n", sock));
@@ -463,14 +444,14 @@ bind_local (const ip_address *bind_address, int *port)
    /* If *PORT is 0, find out which port we've bound to.  */
    if (*port == 0)
      {
-      socklen_t sa_len = sockaddr_size (sa);
-      if (getsockname (sock, sa, &sa_len) < 0)
+      socklen_t addrlen = sockaddr_size (sa);
+      if (getsockname (sock, sa, &addrlen) < 0)
         {
           /* If we can't find out the socket's local address ("name"),
              something is seriously wrong with the socket, and it's
              unusable for us anyway because we must know the chosen
              port.  */
-         xclose (sock);
+         fd_close (sock);
           return -1;
         }
        sockaddr_get_data (sa, NULL, port);
@@ -479,7 +460,7 @@ bind_local (const ip_address *bind_address, int *port)
      }
    if (listen (sock, 1) < 0)
      {
-      xclose (sock);
+      fd_close (sock);
        return -1;
      }
    return sock;
@@ -506,7 +487,6 @@ accept_connection (int local_sock)
    struct sockaddr *sa = (struct sockaddr *)&ss;
    socklen_t addrlen = sizeof (ss);
  
-#ifdef HAVE_SELECT
    if (opt.connect_timeout)
      {
        int test = select_fd (local_sock, opt.connect_timeout, WAIT_FOR_READ);
@@ -515,7 +495,6 @@ accept_connection (int local_sock)
        if (test <= 0)
         return -1;
      }
-#endif
    sock = accept (local_sock, sa, &addrlen);
    DEBUGP (("Accepted client at socket %d.\n", sock));
    return sock;
@@ -571,8 +550,6 @@ socket_ip_address (int sock, ip_address *ip, int endpoint)
      default:
        abort ();
      }
-
-  return 0;
  }
  
  /* Return non-zero if the error from the connect code can be
@@ -608,25 +585,33 @@ retryable_socket_connect_error (int err)
        )
      return 0;
  
-  if (err == ECONNREFUSED && !opt.retry_connrefused)
-    return 0;
+  if (!opt.retry_connrefused)
+    if (err == ECONNREFUSED
+#ifdef ENETUNREACH
+       || err == ENETUNREACH   /* network is unreachable */
+#endif
+#ifdef EHOSTUNREACH
+       || err == EHOSTUNREACH  /* host is unreachable */
+#endif
+       )
+      return 0;
  
    return 1;
  }
  
-#ifdef HAVE_SELECT
-
-/* Wait for file descriptor FD to be readable or writable or both,
-   timing out after MAXTIME seconds.  Returns 1 if FD is available, 0
-   for timeout and -1 for error.  The argument WAIT_FOR can be a
-   combination of WAIT_READ and WAIT_WRITE.
+/* Wait for a single descriptor to become available, timing out after
+   MAXTIME seconds.  Returns 1 if FD is available, 0 for timeout and
+   -1 for error.  The argument WAIT_FOR can be a combination of
+   WAIT_FOR_READ and WAIT_FOR_WRITE.
  
     This is a mere convenience wrapper around the select call, and
-   should be taken as such.  */
+   should be taken as such (for example, it doesn't implement Wget's
+   0-timeout-means-no-timeout semantics.)  */
  
  int
  select_fd (int fd, double maxtime, int wait_for)
  {
+#ifdef HAVE_SELECT
    fd_set fdset;
    fd_set *rd = NULL, *wr = NULL;
    struct timeval tmout;
@@ -640,16 +625,56 @@ select_fd (int fd, double maxtime, int wait_for)
      wr = &fdset;
  
    tmout.tv_sec = (long) maxtime;
-  tmout.tv_usec = 1000000L * (maxtime - (long) maxtime);
+  tmout.tv_usec = 1000000 * (maxtime - (long) maxtime);
  
    do
      result = select (fd + 1, rd, wr, NULL, &tmout);
    while (result < 0 && errno == EINTR);
  
    return result;
+
+#else  /* not HAVE_SELECT */
+
+  /* If select() unavailable, just return 1.  In most usages in Wget,
+     this is the appropriate response -- "if we can't poll, go ahead
+     with the blocking operation".  If a specific part of code needs
+     different behavior, it can use #ifdef HAVE_SELECT to test whether
+     polling really occurs.  */
+  return 1;
+
+#endif /* not HAVE_SELECT */
  }
  
-#endif /* HAVE_SELECT */
+int
+test_socket_open (int sock)
+{
+#ifdef HAVE_SELECT
+  fd_set check_set;
+  struct timeval to;
+
+  /* Check if we still have a valid (non-EOF) connection.  From Andrew
+   * Maholski's code in the Unix Socket FAQ.  */
+
+  FD_ZERO (&check_set);
+  FD_SET (sock, &check_set);
+
+  /* Wait one microsecond */
+  to.tv_sec = 0;
+  to.tv_usec = 1;
+
+  /* If we get a timeout, then that means still connected */
+  if (select (sock + 1, &check_set, NULL, NULL, &to) == 0)
+    {
+      /* Connection is valid (not EOF), so continue */
+      return 1;
+    }
+  else
+    return 0;
+#else
+  /* Without select, it's hard to know for sure. */
+  return 1;
+#endif
+}
  \f
  /* Basic socket operations, mostly EINTR wrappers.  */
  
@@ -677,7 +702,7 @@ sock_read (int fd, char *buf, int bufsize)
  static int
  sock_write (int fd, char *buf, int bufsize)
  {
-  int res = 0;
+  int res;
    do
      res = write (fd, buf, bufsize);
    while (res == -1 && errno == EINTR);
@@ -687,11 +712,17 @@ sock_write (int fd, char *buf, int bufsize)
  static int
  sock_poll (int fd, double timeout, int wait_for)
  {
-#ifdef HAVE_SELECT
    return select_fd (fd, timeout, wait_for);
-#else
-  return 1;
-#endif
+}
+
+static int
+sock_peek (int fd, char *buf, int bufsize)
+{
+  int res;
+  do
+    res = recv (fd, buf, bufsize, MSG_PEEK);
+  while (res == -1 && errno == EINTR);
+  return res;
  }
  
  static void
@@ -709,17 +740,18 @@ sock_close (int fd)
     that are not mere file descriptors under the hood, such as SSL
     sockets.
  
-   That way the user code can call xread(fd, ...) and we'll run read
+   That way the user code can call fd_read(fd, ...) and we'll run read
     or SSL_read or whatever is necessary.  */
  
  static struct hash_table *transport_map;
  static int transport_map_modified_tick;
  
  struct transport_info {
-  xreader_t reader;
-  xwriter_t writer;
-  xpoller_t poller;
-  xcloser_t closer;
+  fd_reader_t reader;
+  fd_writer_t writer;
+  fd_poller_t poller;
+  fd_peeker_t peeker;
+  fd_closer_t closer;
    void *ctx;
  };
  
@@ -731,13 +763,14 @@ struct transport_info {
     call getpeername, etc.  */
  
  void
-register_transport (int fd, xreader_t reader, xwriter_t writer,
-                   xpoller_t poller, xcloser_t closer, void *ctx)
+fd_register_transport (int fd, fd_reader_t reader, fd_writer_t writer,
+                      fd_poller_t poller, fd_peeker_t peeker,
+                      fd_closer_t closer, void *ctx)
  {
    struct transport_info *info;
  
    /* The file descriptor must be non-negative to be registered.
-     Negative values are ignored by xclose(), and -1 cannot be used as
+     Negative values are ignored by fd_close(), and -1 cannot be used as
       hash key.  */
    assert (fd >= 0);
  
@@ -745,6 +778,7 @@ register_transport (int fd, xreader_t reader, xwriter_t writer,
    info->reader = reader;
    info->writer = writer;
    info->poller = poller;
+  info->peeker = peeker;
    info->closer = closer;
    info->ctx = ctx;
    if (!transport_map)
@@ -753,7 +787,18 @@ register_transport (int fd, xreader_t reader, xwriter_t writer,
    ++transport_map_modified_tick;
  }
  
-/* When xread/xwrite are called multiple times in a loop, they should
+/* Return context of the transport registered with
+   fd_register_transport.  This assumes fd_register_transport was
+   previously called on FD.  */
+
+void *
+fd_transport_context (int fd)
+{
+  struct transport_info *info = hash_table_get (transport_map, (void *) fd);
+  return info->ctx;
+}
+
+/* When fd_read/fd_write are called multiple times in a loop, they should
     remember the INFO pointer instead of fetching it every time.  It is
     not enough to compare FD to LAST_FD because FD might have been
     closed and reopened.  modified_tick ensures that changes to
@@ -778,67 +823,88 @@ register_transport (int fd, xreader_t reader, xwriter_t writer,
      }                                                                  \
  } while (0)
  
-/* Read no more than BUFSIZE bytes of data from FD, storing them to
-   BUF.  If TIMEOUT is non-zero, the operation aborts if no data is
-   received after that many seconds.  If TIMEOUT is -1, the value of
-   opt.timeout is used for TIMEOUT.  */
-
-int
-xread (int fd, char *buf, int bufsize, double timeout)
+static int
+poll_internal (int fd, struct transport_info *info, int wf, double timeout)
  {
-  struct transport_info *info;
-  LAZY_RETRIEVE_INFO (info);
    if (timeout == -1)
      timeout = opt.read_timeout;
    if (timeout)
      {
        int test;
        if (info && info->poller)
-       test = info->poller (fd, timeout, WAIT_FOR_READ, info->ctx);
+       test = info->poller (fd, timeout, wf, info->ctx);
        else
-       test = sock_poll (fd, timeout, WAIT_FOR_READ);
+       test = sock_poll (fd, timeout, wf);
        if (test == 0)
         errno = ETIMEDOUT;
        if (test <= 0)
-       return -1;
+       return 0;
      }
+  return 1;
+}
+
+/* Read no more than BUFSIZE bytes of data from FD, storing them to
+   BUF.  If TIMEOUT is non-zero, the operation aborts if no data is
+   received after that many seconds.  If TIMEOUT is -1, the value of
+   opt.timeout is used for TIMEOUT.  */
+
+int
+fd_read (int fd, char *buf, int bufsize, double timeout)
+{
+  struct transport_info *info;
+  LAZY_RETRIEVE_INFO (info);
+  if (!poll_internal (fd, info, WAIT_FOR_READ, timeout))
+    return -1;
    if (info && info->reader)
      return info->reader (fd, buf, bufsize, info->ctx);
    else
      return sock_read (fd, buf, bufsize);
  }
  
+/* Like fd_read, except it provides a "preview" of the data that will
+   be read by subsequent calls to fd_read.  Specifically, it copies no
+   more than BUFSIZE bytes of the currently available data to BUF and
+   returns the number of bytes copied.  Return values and timeout
+   semantics are the same as those of fd_read.
+
+   CAVEAT: Do not assume that the first subsequent call to fd_read
+   will retrieve the same amount of data.  Reading can return more or
+   less data, depending on the TCP implementation and other
+   circumstances.  However, barring an error, it can be expected that
+   all the peeked data will eventually be read by fd_read.  */
+
+int
+fd_peek (int fd, char *buf, int bufsize, double timeout)
+{
+  struct transport_info *info;
+  LAZY_RETRIEVE_INFO (info);
+  if (!poll_internal (fd, info, WAIT_FOR_READ, timeout))
+    return -1;
+  if (info && info->peeker)
+    return info->peeker (fd, buf, bufsize, info->ctx);
+  else
+    return sock_peek (fd, buf, bufsize);
+}
+
  /* Write the entire contents of BUF to FD.  If TIMEOUT is non-zero,
     the operation aborts if no data is received after that many
     seconds.  If TIMEOUT is -1, the value of opt.timeout is used for
     TIMEOUT.  */
  
  int
-xwrite (int fd, char *buf, int bufsize, double timeout)
+fd_write (int fd, char *buf, int bufsize, double timeout)
  {
    int res;
    struct transport_info *info;
    LAZY_RETRIEVE_INFO (info);
-  if (timeout == -1)
-    timeout = opt.read_timeout;
  
    /* `write' may write less than LEN bytes, thus the loop keeps trying
       it until all was written, or an error occurred.  */
    res = 0;
    while (bufsize > 0)
      {
-      if (timeout)
-       {
-         int test;
-         if (info && info->poller)
-           test = info->poller (fd, timeout, WAIT_FOR_WRITE, info->ctx);
-         else
-           test = sock_poll (fd, timeout, WAIT_FOR_WRITE);
-         if (test == 0)
-           errno = ETIMEDOUT;
-         if (test <= 0)
-           return -1;
-       }
+      if (!poll_internal (fd, info, WAIT_FOR_WRITE, timeout))
+       return -1;
        if (info && info->writer)
         res = info->writer (fd, buf, bufsize, info->ctx);
        else
@@ -854,13 +920,13 @@ xwrite (int fd, char *buf, int bufsize, double timeout)
  /* Close the file descriptor FD.  */
  
  void
-xclose (int fd)
+fd_close (int fd)
  {
    struct transport_info *info;
    if (fd < 0)
      return;
  
-  /* Don't use LAZY_RETRIEVE_INFO because xclose() is only called once
+  /* Don't use LAZY_RETRIEVE_INFO because fd_close() is only called once
       per socket, so that particular optimization wouldn't work.  */
    info = NULL;
    if (transport_map)