Merging to bring en@*.po back in (and removed from .hgignore).

[wget] / src / url.c
diff --git a/src/url.c b/src/url.c

index 04ecb3a4f3429170bf0c574df531264edcb77023..2f6dc784939eaf27dd469d7f0169301b1ec4384c 100644 (file)
--- a/src/url.c
+++ b/src/url.c
@@ -1,5 +1,6 @@
  /* URL handling.
-   Copyright (C) 1996-2007 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2004, 2005, 2006, 2007 Free Software Foundation, Inc.
  
  This file is part of GNU Wget.
  
@@ -26,7 +27,7 @@ modify this file, you may extend this exception to your version of the
  file, but you are not obligated to do so.  If you do not wish to do
  so, delete this exception statement from your version.  */
  
-#include <config.h>
+#include "wget.h"
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -37,7 +38,6 @@ so, delete this exception statement from your version.  */
  #include <errno.h>
  #include <assert.h>
  
-#include "wget.h"
  #include "utils.h"
  #include "url.h"
  #include "host.h"  /* for is_valid_ipv6_address */
@@ -182,7 +182,7 @@ url_unescape (char *s)
          {
            char c;
            /* Do nothing if '%' is not followed by two hex digits. */
-          if (!h[1] || !h[2] || !(ISXDIGIT (h[1]) && ISXDIGIT (h[2])))
+          if (!h[1] || !h[2] || !(c_isxdigit (h[1]) && c_isxdigit (h[2])))
              goto copychar;
            c = X2DIGITS_TO_NUM (h[1], h[2]);
            /* Don't unescape %00 because there is no way to insert it
@@ -271,7 +271,7 @@ char_needs_escaping (const char *p)
  {
    if (*p == '%')
      {
-      if (ISXDIGIT (*(p + 1)) && ISXDIGIT (*(p + 2)))
+      if (c_isxdigit (*(p + 1)) && c_isxdigit (*(p + 2)))
          return false;
        else
          /* Garbled %.. sequence: encode `%'. */
@@ -427,7 +427,7 @@ url_scheme (const char *url)
    return SCHEME_INVALID;
  }
  
-#define SCHEME_CHAR(ch) (ISALNUM (ch) || (ch) == '-' || (ch) == '+')
+#define SCHEME_CHAR(ch) (c_isalnum (ch) || (ch) == '-' || (ch) == '+')
  
  /* Return 1 if the URL begins with any "scheme", 0 otherwise.  As
     currently implemented, it returns true if URL begins with
@@ -589,10 +589,10 @@ lowercase_str (char *str)
  {
    bool changed = false;
    for (; *str; str++)
-    if (ISUPPER (*str))
+    if (c_isupper (*str))
        {
          changed = true;
-        *str = TOLOWER (*str);
+        *str = c_tolower (*str);
        }
    return changed;
  }
@@ -768,7 +768,7 @@ url_parse (const char *url, int *error)
        if (port_b != port_e)
          for (port = 0, pp = port_b; pp < port_e; pp++)
            {
-            if (!ISDIGIT (*pp))
+            if (!c_isdigit (*pp))
                {
                  /* http://host:12randomgarbage/blah */
                  /*               ^                  */
@@ -1372,9 +1372,9 @@ append_uri_pathel (const char *b, const char *e, bool escaped,
        for (q = TAIL (dest); q < TAIL (dest) + outlen; ++q)
          {
            if (opt.restrict_files_case == restrict_lowercase)
-            *q = TOLOWER (*q);
+            *q = c_tolower (*q);
            else
-            *q = TOUPPER (*q);
+            *q = c_toupper (*q);
          }
      }
            
@@ -1939,10 +1939,7 @@ getchar_from_escaped_string (const char *str, char *c)
    
    if (p[0] == '%')
      {
-      if (p[1] == 0)
-        return 0; /* error: invalid string */
-
-      if (p[1] == '%')
+      if (!c_isxdigit(p[1]) || !c_isxdigit(p[2]))
          {
            *c = '%';
            return 1;
@@ -1953,8 +1950,13 @@ getchar_from_escaped_string (const char *str, char *c)
              return 0; /* error: invalid string */
  
            *c = X2DIGITS_TO_NUM (p[1], p[2]);
-
-          return 3;
+          if (URL_RESERVED_CHAR(*c))
+            {
+              *c = '%';
+              return 1;
+            }
+          else
+            return 3;
          }
      }
    else
@@ -1971,14 +1973,15 @@ are_urls_equal (const char *u1, const char *u2)
    const char *p, *q;
    int pp, qq;
    char ch1, ch2;
+  assert(u1 && u2);
  
    p = u1;
    q = u2;
  
-  while (*p 
+  while (*p && *q
           && (pp = getchar_from_escaped_string (p, &ch1))
           && (qq = getchar_from_escaped_string (q, &ch2))
-         && (TOLOWER(ch1) == TOLOWER(ch2)))
+         && (c_tolower(ch1) == c_tolower(ch2)))
      {
        p += pp;
        q += qq;
@@ -2111,6 +2114,8 @@ test_are_urls_equal()
      { "http://www.adomain.com/apath/", "http://www.adomain.com/anotherpath/", false },
      { "http://www.adomain.com/apath/", "http://www.anotherdomain.com/path/",  false },
      { "http://www.adomain.com/~path/", "http://www.adomain.com/%7epath/",     true },
+    { "http://www.adomain.com/longer-path/", "http://www.adomain.com/path/",  false },
+    { "http://www.adomain.com/path%2f", "http://www.adomain.com/path/",       false },
    };
    
    for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)