[svn] Don't cast return type of malloc/realloc. Assume ANSI C signal handlers.

[wget] / src / url.c
diff --git a/src/url.c b/src/url.c

index ad8fc202e259a3d429ec0372e4cafed1da3982ef..5f722c1a9dbb0b2b352e9070b1731fa3affcf9ff 100644 (file)
--- a/src/url.c
+++ b/src/url.c
@@ -31,12 +31,7 @@ so, delete this exception statement from your version.  */
  
  #include <stdio.h>
  #include <stdlib.h>
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif
-#include <sys/types.h>
+#include <string.h>
  #ifdef HAVE_UNISTD_H
  # include <unistd.h>
  #endif
@@ -48,10 +43,6 @@ so, delete this exception statement from your version.  */
  #include "url.h"
  #include "host.h"  /* for is_valid_ipv6_address */
  
-#ifndef errno
-extern int errno;
-#endif
-
  struct scheme_data
  {
    const char *name;
@@ -75,7 +66,7 @@ static struct scheme_data supported_schemes[] =
  
  /* Forward declarations: */
  
-static int path_simplify PARAMS ((char *));
+static int path_simplify (char *);
  \f
  /* Support for escaping and unescaping of URL strings.  */
  
@@ -214,7 +205,7 @@ url_escape_1 (const char *s, unsigned char mask, int allow_passthrough)
      return allow_passthrough ? (char *)s : xstrdup (s);
  
    newlen = (p1 - s) + addition;
-  newstr = (char *)xmalloc (newlen + 1);
+  newstr = xmalloc (newlen + 1);
  
    p1 = s;
    p2 = newstr;
@@ -255,29 +246,27 @@ url_escape_allow_passthrough (const char *s)
    return url_escape_1 (s, urlchr_unsafe, 1);
  }
  \f
-enum copy_method { cm_encode, cm_passthrough };
+/* Decide whether the char at position P needs to be encoded.  (It is
+   not enough to pass a single char *P because the function may need
+   to inspect the surrounding context.)
  
-/* Decide whether to encode or pass through the char at P.  This used
-   to be a macro, but it got a little too convoluted.  */
+   Return 1 if the char should be escaped as %XX, 0 otherwise.  */
  
-static inline enum copy_method
-decide_copy_method (const char *p)
+static inline int
+char_needs_escaping (const char *p)
  {
    if (*p == '%')
      {
        if (ISXDIGIT (*(p + 1)) && ISXDIGIT (*(p + 2)))
-       /* Prior to 1.10 this decoded %HH escapes corresponding to
-          "safe" chars, but that proved too obtrusive -- it's better
-          to always preserve the escapes found in the URL.  */
-       return cm_passthrough;
+       return 0;
        else
         /* Garbled %.. sequence: encode `%'. */
-       return cm_encode;
+       return 1;
      }
    else if (URL_UNSAFE_CHAR (*p) && !URL_RESERVED_CHAR (*p))
-    return cm_encode;
+    return 1;
    else
-    return cm_passthrough;
+    return 0;
  }
  
  /* Translate a %-escaped (but possibly non-conformant) input string S
@@ -299,15 +288,14 @@ decide_copy_method (const char *p)
  
         GET /abc%20def HTTP/1.0
  
-   It appears that the unsafe chars need to be quoted, for example
-   with url_escape.  But what if we're requested to download
+   It would appear that the unsafe chars need to be quoted, for
+   example with url_escape.  But what if we're requested to download
     `abc%20def'?  url_escape transforms "%" to "%25", which would leave
     us with `abc%2520def'.  This is incorrect -- since %-escapes are
     part of URL syntax, "%20" is the correct way to denote a literal
-   space on the Wget command line.  This leaves us in the conclusion
-   that in that case Wget should not call url_escape, but leave the
-   `%20' as is.  This is clearly contradictory, but it only gets
-   worse.
+   space on the Wget command line.  This leads to the conclusion that
+   in that case Wget should not call url_escape, but leave the `%20'
+   as is.  This is clearly contradictory, but it only gets worse.
  
     What if the requested URI is `abc%20 def'?  If we call url_escape,
     we end up with `/abc%2520%20def', which is almost certainly not
@@ -368,19 +356,11 @@ reencode_escapes (const char *s)
  
    int encode_count = 0;
  
-  /* First, pass through the string to see if there's anything to do,
+  /* First pass: inspect the string to see if there's anything to do,
       and to calculate the new length.  */
    for (p1 = s; *p1; p1++)
-    {
-      switch (decide_copy_method (p1))
-       {
-       case cm_encode:
-         ++encode_count;
-         break;
-       case cm_passthrough:
-         break;
-       }
-    }
+    if (char_needs_escaping (p1))
+      ++encode_count;
  
    if (!encode_count)
      /* The string is good as it is. */
@@ -391,25 +371,22 @@ reencode_escapes (const char *s)
    newlen = oldlen + 2 * encode_count;
    newstr = xmalloc (newlen + 1);
  
+  /* Second pass: copy the string to the destination address, encoding
+     chars when needed.  */
    p1 = s;
    p2 = newstr;
  
    while (*p1)
-    {
-      switch (decide_copy_method (p1))
-       {
-       case cm_encode:
-         {
-           unsigned char c = *p1++;
-           *p2++ = '%';
-           *p2++ = XNUM_TO_DIGIT (c >> 4);
-           *p2++ = XNUM_TO_DIGIT (c & 0xf);
-         }
-         break;
-       case cm_passthrough:
-         *p2++ = *p1++;
-       }
-    }
+    if (char_needs_escaping (p1))
+      {
+       unsigned char c = *p1++;
+       *p2++ = '%';
+       *p2++ = XNUM_TO_DIGIT (c >> 4);
+       *p2++ = XNUM_TO_DIGIT (c & 0xf);
+      }
+    else
+      *p2++ = *p1++;
+
    *p2 = '\0';
    assert (p2 - newstr == newlen);
    return newstr;
@@ -548,6 +525,12 @@ rewrite_shorthand_url (const char *url)
    if (p == url)
      return NULL;
  
+  /* If we're looking at "://", it means the URL uses a scheme we
+     don't support, which may include "https" when compiled without
+     SSL support.  Don't bogusly rewrite such URLs.  */
+  if (p[0] == ':' && p[1] == '/' && p[2] == '/')
+    return NULL;
+
    if (*p == ':')
      {
        const char *pp;
@@ -578,7 +561,7 @@ rewrite_shorthand_url (const char *url)
      }
  }
  \f
-static void split_path PARAMS ((const char *, char **, char **));
+static void split_path (const char *, char **, char **);
  
  /* Like strpbrk, with the exception that it returns the pointer to the
     terminating zero (end-of-string aka "eos") if no matching character
@@ -592,26 +575,26 @@ static void split_path PARAMS ((const char *, char **, char **));
     help because the check for literal accept is in the
     preprocessor.)  */
  
-#ifdef __GNUC__
+#if defined(__GNUC__) && __GNUC__ >= 3
  
  #define strpbrk_or_eos(s, accept) ({           \
    char *SOE_p = strpbrk (s, accept);           \
    if (!SOE_p)                                  \
-    SOE_p = (char *)s + strlen (s);            \
+    SOE_p = strchr (s, '\0');                  \
    SOE_p;                                       \
  })
  
-#else  /* not __GNUC__ */
+#else  /* not __GNUC__ or old gcc */
  
-static char *
+static inline char *
  strpbrk_or_eos (const char *s, const char *accept)
  {
    char *p = strpbrk (s, accept);
    if (!p)
-    p = (char *)s + strlen (s);
+    p = strchr (s, '\0');
    return p;
  }
-#endif
+#endif /* not __GNUC__ or old gcc */
  
  /* Turn STR into lowercase; return non-zero if a character was
     actually changed. */
@@ -680,7 +663,7 @@ url_parse (const char *url, int *error)
    if (scheme == SCHEME_INVALID)
      {
        error_code = PE_UNSUPPORTED_SCHEME;
-      goto err;
+      goto error;
      }
  
    url_encoded = reencode_escapes (url);
@@ -718,7 +701,7 @@ url_parse (const char *url, int *error)
        if (!host_e)
         {
           error_code = PE_UNTERMINATED_IPV6_ADDRESS;
-         goto err;
+         goto error;
         }
  
  #ifdef ENABLE_IPV6
@@ -726,14 +709,14 @@ url_parse (const char *url, int *error)
        if (!is_valid_ipv6_address(host_b, host_e))
         {
           error_code = PE_INVALID_IPV6_ADDRESS;
-         goto err;
+         goto error;
         }
  
        /* Continue parsing after the closing ']'. */
        p = host_e + 1;
  #else
        error_code = PE_IPV6_NOT_SUPPORTED;
-      goto err;
+      goto error;
  #endif
      }
    else
@@ -745,7 +728,7 @@ url_parse (const char *url, int *error)
    if (host_b == host_e)
      {
        error_code = PE_EMPTY_HOST;
-      goto err;
+      goto error;
      }
  
    port = scheme_default_port (scheme);
@@ -770,7 +753,7 @@ url_parse (const char *url, int *error)
                   /* http://host:12randomgarbage/blah */
                   /*               ^                  */
                   error_code = PE_BAD_PORT_NUMBER;
-                 goto err;
+                 goto error;
                 }
               port = 10 * port + (*pp - '0');
               /* Check for too large port numbers here, before we have
@@ -778,7 +761,7 @@ url_parse (const char *url, int *error)
               if (port > 65535)
                 {
                   error_code = PE_BAD_PORT_NUMBER;
-                 goto err;
+                 goto error;
                 }
             }
         }
@@ -837,7 +820,7 @@ url_parse (const char *url, int *error)
        if (!parse_credentials (uname_b, uname_e - 1, &user, &passwd))
         {
           error_code = PE_INVALID_USER_NAME;
-         goto err;
+         goto error;
         }
      }
  
@@ -888,11 +871,10 @@ url_parse (const char *url, int *error)
        else
         u->url = url_encoded;
      }
-  url_encoded = NULL;
  
    return u;
  
- err:
+ error:
    /* Cleanup in case of error: */
    if (url_encoded && url_encoded != url)
      xfree (url_encoded);
@@ -1002,7 +984,7 @@ char *
  url_full_path (const struct url *url)
  {
    int length = full_path_length (url);
-  char *full_path = (char *) xmalloc (length + 1);
+  char *full_path = xmalloc (length + 1);
  
    full_path_write (url, full_path);
    full_path[length] = '\0';
@@ -1081,7 +1063,7 @@ sync_path (struct url *u)
        *p++ = '/';
        memcpy (p, efile, filelen);
        p += filelen;
-      *p++ = '\0';
+      *p = '\0';
      }
  
    u->path = newpath;
@@ -1135,7 +1117,7 @@ url_free (struct url *url)
  }
  \f
  /* Create all the necessary directories for PATH (a file).  Calls
-   mkdirhier() internally.  */
+   make_directory internally.  */
  int
  mkalldirs (const char *path)
  {
@@ -1710,7 +1692,7 @@ uri_merge (const char *base, const char *link)
         start_insert = base;
  
        span = start_insert - base;
-      merge = (char *)xmalloc (span + linklength + 1);
+      merge = xmalloc (span + linklength + 1);
        if (span)
         memcpy (merge, base, span);
        memcpy (merge + span, link, linklength);
@@ -1765,7 +1747,7 @@ uri_merge (const char *base, const char *link)
         start_insert = slash;
  
        span = start_insert - base;
-      merge = (char *)xmalloc (span + linklength + 1);
+      merge = xmalloc (span + linklength + 1);
        if (span)
         memcpy (merge, base, span);
        memcpy (merge + span, link, linklength);
@@ -1803,7 +1785,7 @@ uri_merge (const char *base, const char *link)
         }
  
        span = start_insert - base;
-      merge = (char *)xmalloc (span + linklength + 1);
+      merge = xmalloc (span + linklength + 1);
        if (span)
         memcpy (merge, base, span);
        if (need_explicit_slash)