Automated merge.

author Micah Cowan <micah@cowan.name>

Mon, 1 Dec 2008 15:05:29 +0000 (07:05 -0800)

committer Micah Cowan <micah@cowan.name>

Mon, 1 Dec 2008 15:05:29 +0000 (07:05 -0800)
author Micah Cowan <micah@cowan.name>
Mon, 1 Dec 2008 15:05:29 +0000 (07:05 -0800)
committer Micah Cowan <micah@cowan.name>
Mon, 1 Dec 2008 15:05:29 +0000 (07:05 -0800)
diff --git a/src/html-url.c b/src/html-url.c

index c954cb97191b83f87b27a149696c1bcb66982001..e6ab232461d8e01170920e4db7f66a4676daeb90 100644 (file)
--- a/src/html-url.c
+++ b/src/html-url.c
@@ -288,7 +288,7 @@ append_url (const char *link_uri, int position, int size,
            return NULL;
          }
  
-      url = url_parse (link_uri, NULL, NULL);
+      url = url_parse (link_uri, NULL, NULL, false);
        if (!url)
          {
            DEBUGP (("%s: link \"%s\" doesn't parse.\n",
@@ -307,7 +307,7 @@ append_url (const char *link_uri, int position, int size,
        DEBUGP (("%s: merge(\"%s\", \"%s\") -> %s\n",
                 ctx->document_file, base, link_uri, complete_uri));
  
-      url = url_parse (complete_uri, NULL, NULL);
+      url = url_parse (complete_uri, NULL, NULL, false);
        if (!url)
          {
            DEBUGP (("%s: merged link \"%s\" doesn't parse.\n",
@@ -752,7 +752,7 @@ get_urls_file (const char *file)
            url_text = merged;
          }
  
-      url = url_parse (url_text, &up_error_code, NULL);
+      url = url_parse (url_text, &up_error_code, NULL, false);
        if (!url)
          {
            char *error = url_error (url_text, up_error_code);
diff --git a/src/iri.c b/src/iri.c

index e3909d50bb71f6767b8c23d9c0566f5098363b20..b1e0bf89cd9bc2a9caf55e2e533f1cc2b6a23d12 100644 (file)
--- a/src/iri.c
+++ b/src/iri.c
@@ -298,6 +298,7 @@ iri_new (void)
    struct iri *i = xmalloc (sizeof (struct iri));
    i->uri_encoding = opt.encoding_remote ? xstrdup (opt.encoding_remote) : NULL;
    i->content_encoding = NULL;
+  i->orig_url = NULL;
    i->utf8_encode = opt.enable_iri;
    return i;
  }
@@ -308,6 +309,7 @@ iri_free (struct iri *i)
  {
    xfree_null (i->uri_encoding);
    xfree_null (i->content_encoding);
+  xfree_null (i->orig_url);
    xfree (i);
  }
  
diff --git a/src/iri.h b/src/iri.h

index c024de724dcaf74a515c2a35464fb60945047a62..6ad2becfde1ce4ad1ebda6cd87109a0b6ff508f6 100644 (file)
--- a/src/iri.h
+++ b/src/iri.h
@@ -33,6 +33,7 @@ as that of the covered work.  */
  struct iri {
    char *uri_encoding;      /* Encoding of the uri to fetch */
    char *content_encoding;  /* Encoding of links inside the fetched file */
+  char *orig_url;          /* */
    bool utf8_encode;        /* Will/Is the current url encoded in utf8 */
  };
  
diff --git a/src/recur.c b/src/recur.c

index 786824583c1553fd66254ac3c4fae055f8799e56..95581486b28e5e06a9d43ff023e73b7cf3e4a8ba 100644 (file)
--- a/src/recur.c
+++ b/src/recur.c
@@ -214,7 +214,7 @@ retrieve_tree (const char *start_url, struct iri *pi)
      set_uri_encoding (i, opt.locale, true);
  #undef COPYSTR
  
-  start_url_parsed = url_parse (start_url, &up_error_code, i);
+  start_url_parsed = url_parse (start_url, &up_error_code, i, true);
    if (!start_url_parsed)
      {
        char *error = url_error (start_url, up_error_code);
@@ -381,7 +381,7 @@ retrieve_tree (const char *start_url, struct iri *pi)
            if (children)
              {
                struct urlpos *child = children;
-              struct url *url_parsed = url_parse (url, NULL, i);
+              struct url *url_parsed = url_parse (url, NULL, i, false);
                struct iri *ci;
                char *referer_url = url;
                bool strip_auth = (url_parsed != NULL
@@ -694,10 +694,10 @@ descend_redirect_p (const char *redirected, const char *original, int depth,
    struct urlpos *upos;
    bool success;
  
-  orig_parsed = url_parse (original, NULL, NULL);
+  orig_parsed = url_parse (original, NULL, NULL, false);
    assert (orig_parsed != NULL);
  
-  new_parsed = url_parse (redirected, NULL, NULL);
+  new_parsed = url_parse (redirected, NULL, NULL, false);
    assert (new_parsed != NULL);
  
    upos = xnew0 (struct urlpos);
diff --git a/src/retr.c b/src/retr.c

index e3d62978f0b1ee4ea38adb1e36358794f4f2f82b..1d9d74782126dcd9e134a0f7e421e2edd903e384 100644 (file)
--- a/src/retr.c
+++ b/src/retr.c
@@ -626,7 +626,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
      *file = NULL;
  
   second_try:
-  u = url_parse (url, &up_error_code, iri);
+  u = url_parse (url, &up_error_code, iri, true);
    if (!u)
      {
        char *error = url_error (url, up_error_code);
@@ -658,7 +658,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
        pi->utf8_encode = false;
  
        /* Parse the proxy URL.  */
-      proxy_url = url_parse (proxy, &up_error_code, NULL);
+      proxy_url = url_parse (proxy, &up_error_code, NULL, true);
        if (!proxy_url)
          {
            char *error = url_error (proxy, up_error_code);
@@ -739,9 +739,10 @@ retrieve_url (const char *origurl, char **file, char **newloc,
           the content encoding. */
        iri->utf8_encode = opt.enable_iri;
        set_content_encoding (iri, NULL);
+      xfree_null (iri->orig_url);
  
        /* Now, see if this new location makes sense. */
-      newloc_parsed = url_parse (mynewloc, &up_error_code, iri);
+      newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true);
        if (!newloc_parsed)
          {
            char *error = url_error (mynewloc, up_error_code);
@@ -794,7 +795,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
    if (!(*dt & RETROKF) && iri->utf8_encode)
      {
        iri->utf8_encode = false;
-      DEBUGP (("[IRI Fallbacking to non-utf8 for %s\n", quote (url)));
+      DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
        goto second_try;
      }
  
@@ -907,6 +908,8 @@ retrieve_from_file (const char *file, bool html, int *count)
  
        /* Reset UTF-8 encode status */
        iri->utf8_encode = opt.enable_iri;
+      xfree_null (iri->orig_url);
+      iri->orig_url = NULL;
  
        if ((opt.recursive || opt.page_requisites)
            && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
@@ -1100,7 +1103,7 @@ url_uses_proxy (const char *url)
    struct iri *i = iri_new();
    /* url was given in the command line, so use locale as encoding */
    set_uri_encoding (i, opt.locale, true);
-  u= url_parse (url, NULL, i);
+  u= url_parse (url, NULL, i, false);
    if (!u)
      return false;
    ret = getproxy (u) != NULL;
diff --git a/src/url.c b/src/url.c

index 31614794cfd3411614b708939264653a1d284546..86d099a7190b80aaec8bc1cf6771c49892364602 100644 (file)
--- a/src/url.c
+++ b/src/url.c
@@ -649,7 +649,7 @@ static const char *parse_errors[] = {
     error, and if ERROR is not NULL, also set *ERROR to the appropriate
     error code. */
  struct url *
-url_parse (const char *url, int *error, struct iri *iri)
+url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
  {
    struct url *u;
    const char *p;
@@ -681,13 +681,19 @@ url_parse (const char *url, int *error, struct iri *iri)
  
    if (iri && iri->utf8_encode)
      {
-      url_unescape ((char *) url);
-      iri->utf8_encode = remote_to_utf8 (iri, url, (const char **) &new_url);
+      iri->utf8_encode = remote_to_utf8 (iri, iri->orig_url ? iri->orig_url : url, (const char **) &new_url);
        if (!iri->utf8_encode)
          new_url = NULL;
+      else
+        iri->orig_url = xstrdup (url);
      }
  
-  url_encoded = reencode_escapes (new_url ? new_url : url);
+  /* XXX XXX Could that change introduce (security) bugs ???  XXX XXX*/
+  if (percent_encode)
+    url_encoded = reencode_escapes (new_url ? new_url : url);
+  else
+     url_encoded = new_url ? new_url : url;
+
    p = url_encoded;
  
    if (new_url && url_encoded != new_url)
@@ -2001,12 +2007,12 @@ schemes_are_similar_p (enum url_scheme a, enum url_scheme b)
  \f
  static int
  getchar_from_escaped_string (const char *str, char *c)
-{  
+{
    const char *p = str;
  
    assert (str && *str);
    assert (c);
-  
+
    if (p[0] == '%')
      {
        if (!c_isxdigit(p[1]) || !c_isxdigit(p[2]))
@@ -2056,7 +2062,7 @@ are_urls_equal (const char *u1, const char *u2)
        p += pp;
        q += qq;
      }
-  
+
    return (*p == 0 && *q == 0 ? true : false);
  }
  \f
@@ -2165,7 +2171,7 @@ test_append_uri_pathel()
    } test_array[] = {
      { "http://www.yoyodyne.com/path/", "somepage.html", false, "http://www.yoyodyne.com/path/somepage.html" },
    };
-  
+
    for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) 
      {
        struct growable dest;
diff --git a/src/url.h b/src/url.h

index badd92522b90fc39e2b956fd111be9497a431d78..38eafca4b50997aebba6a5f92574c34a93ad0248 100644 (file)
--- a/src/url.h
+++ b/src/url.h
@@ -85,7 +85,7 @@ struct url
  char *url_escape (const char *);
  char *url_escape_unsafe_and_reserved (const char *);
  
-struct url *url_parse (const char *, int *, struct iri *iri);
+struct url *url_parse (const char *, int *, struct iri *iri, bool percent_encode);
  char *url_error (const char *, int);
  char *url_full_path (const struct url *);
  void url_set_dir (struct url *, const char *);
diff --git a/tests/Test-iri.px b/tests/Test-iri.px

index 662019e794a3d63ddbe02967ae8c37ba041b93c8..738c304a95fa82ceef274c40b2a6fe5e1661dcef 100755 (executable)
--- a/tests/Test-iri.px
+++ b/tests/Test-iri.px
@@ -215,9 +215,9 @@ my %expected_downloaded_files = (
  ###############################################################################
  
  my $the_test = HTTPTest->new (name => "Test-iri",
-                              input => \%urls, 
-                              cmdline => $cmdline, 
-                              errcode => $expected_error_code, 
+                              input => \%urls,
+                              cmdline => $cmdline,
+                              errcode => $expected_error_code,
                                output => \%expected_downloaded_files);
  exit $the_test->run();
author	Micah Cowan <micah@cowan.name>
	Mon, 1 Dec 2008 15:05:29 +0000 (07:05 -0800)
committer	Micah Cowan <micah@cowan.name>
	Mon, 1 Dec 2008 15:05:29 +0000 (07:05 -0800)
src/html-url.c		patch \| blob \| history
src/iri.c		patch \| blob \| history
src/iri.h		patch \| blob \| history
src/recur.c		patch \| blob \| history
src/retr.c		patch \| blob \| history
src/url.c		patch \| blob \| history
src/url.h		patch \| blob \| history
tests/Test-iri.px		patch \| blob \| history