Merge with mainline.

author Micah Cowan <micah@cowan.name>

Thu, 25 Jun 2009 08:14:11 +0000 (01:14 -0700)

committer Micah Cowan <micah@cowan.name>

Thu, 25 Jun 2009 08:14:11 +0000 (01:14 -0700)
author Micah Cowan <micah@cowan.name>
Thu, 25 Jun 2009 08:14:11 +0000 (01:14 -0700)
committer Micah Cowan <micah@cowan.name>
Thu, 25 Jun 2009 08:14:11 +0000 (01:14 -0700)
diff --cc ChangeLog
Simple merge
diff --cc configure.ac
Simple merge
diff --cc doc/ChangeLog
Simple merge
diff --cc doc/sample.wgetrc
Simple merge
diff --cc doc/wget.texi
Simple merge
diff --cc src/ChangeLog
Simple merge
diff --cc src/Makefile.am
Simple merge
diff --cc src/build_info.c
Simple merge
diff --cc src/connect.c
Simple merge
diff --cc src/host.c
Simple merge
diff --cc src/http.c

index 50f0c6439c776e50a4a75c25f8beb47de4cfcfc5,9ed226cb9d1062ad0d65c0642d28790f54853788..ae89c46d642fb5e5c6807e88576b5f94422730e4
--- 1/src/http.c
--- 2/src/http.c
+++ b/src/http.c
@@@ -2359,9 -2355,8 +2371,9 @@@ http_loop (struct url *u, char **newloc
     uerr_t err, ret = TRYLIMEXC;
     time_t tmr = -1;               /* remote time-stamp */
     struct http_stat hstat;        /* HTTP status */
-   struct_stat st;  
+   struct_stat st;
     bool send_head_first = true;
+ +  char *file_name;
   
     /* Assert that no value for *LOCAL_FILE was passed. */
     assert (local_file == NULL || *local_file == NULL);
@@@ -2434,13 -2429,11 +2446,13 @@@ File %s already there; not retrieving.\
   
     /* Send preliminary HEAD request if -N is given and we have an existing 
      * destination file. */
-   if (opt.timestamping 
+ +  file_name = url_file_name (u);
+   if (opt.timestamping
         && !opt.content_disposition
- -      && file_exists_p (url_file_name (u)))
+ +      && file_exists_p (file_name))
       send_head_first = true;
- -
+ +  xfree (file_name);
+ +  
     /* THE loop */
     do
       {
diff --cc src/init.c
Simple merge
diff --cc src/main.c

index b8039d6b16b1526a31423fd0c6e0a72ce089f674,a2d408888e816ef7b0038caa949e58f14c319ffc..69df08a73d443c1f8192a88988c67422be1fd4a6
--- 1/src/main.c
--- 2/src/main.c
+++ b/src/main.c
@@@ -1178,45 -1202,40 +1202,51 @@@ WARNING: Can't reopen standard output i
     for (t = url; *t; t++)
       {
         char *filename = NULL, *redirected_URL = NULL;
- -      int dt;
+ +      int dt, url_err;
-       struct url *url_parsed = url_parse (*t, &url_err);
++      struct url *url_parsed = url_parse (*t, &url_err, NULL, false);
   
- -      if ((opt.recursive || opt.page_requisites)
- -          && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (*t)))
+ +      if (!url_parsed)
           {
- -          int old_follow_ftp = opt.follow_ftp;
- -
- -          /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
- -          if (url_scheme (*t) == SCHEME_FTP)
- -            opt.follow_ftp = 1;
- -
- -          status = retrieve_tree (*t, NULL);
- -
- -          opt.follow_ftp = old_follow_ftp;
+ +          char *error = url_error (*t, url_err);
+ +          logprintf (LOG_NOTQUIET, "%s: %s.\n",*t, error);
+ +          xfree (error);
+ +          status = URLERROR;
           }
         else
           {
- -          struct iri *i = iri_new ();
- -          set_uri_encoding (i, opt.locale, true);
- -          status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt,
- -                                 opt.recursive, i);
- -          iri_free (i);
- -        }
+ +          if ((opt.recursive || opt.page_requisites)
+ +              && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (url_parsed)))
+ +            {
+ +              int old_follow_ftp = opt.follow_ftp;
   
- -      if (opt.delete_after && file_exists_p(filename))
- -        {
- -          DEBUGP (("Removing file due to --delete-after in main():\n"));
- -          logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
- -          if (unlink (filename))
- -            logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
- -        }
+ +              /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
+ +              if (url_scheme (*t) == SCHEME_FTP) 
+ +                opt.follow_ftp = 1;
+ +          
-               status = retrieve_tree (url_parsed);
++              status = retrieve_tree (url_parsed, NULL);
   
- -      xfree_null (redirected_URL);
- -      xfree_null (filename);
+ +              opt.follow_ftp = old_follow_ftp;
+ +            }
+ +          else
-             status = retrieve_url (url_parsed, *t, &filename, &redirected_URL, NULL, &dt, opt.recursive);
++          {
++            struct iri *i = iri_new ();
++            set_uri_encoding (i, opt.locale, true);
++            status = retrieve_url (url_parsed, *t, &filename, &redirected_URL,
++                                   NULL, &dt, opt.recursive, i);
++            iri_free (i);
++          }
+ +
+ +          if (opt.delete_after && file_exists_p(filename))
+ +            {
+ +              DEBUGP (("Removing file due to --delete-after in main():\n"));
+ +              logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
+ +              if (unlink (filename))
+ +                logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
+ +            }
+ +          xfree_null (redirected_URL);
+ +          xfree_null (filename);
+ +          url_free (url_parsed);
+ +        }
       }
   
     /* And then from the input file, if any.  */
diff --cc src/recur.c

index 2e067505c1c6a521c8ebcef66178ad7347fb8184,95581486b28e5e06a9d43ff023e73b7cf3e4a8ba..83a9b4ee84d5b155196263841ea37214fd3d014c
--- 1/src/recur.c
--- 2/src/recur.c
+++ b/src/recur.c
@@@ -153,9 -160,9 +160,9 @@@ url_dequeue (struct url_queue *queue, s
   }
   \f
   static bool download_child_p (const struct urlpos *, struct url *, int,
-                               struct url *, struct hash_table *);
+                               struct url *, struct hash_table *, struct iri *);
- -static bool descend_redirect_p (const char *, const char *, int,
+ +static bool descend_redirect_p (const char *, struct url *, int,
-                                 struct url *, struct hash_table *);
+                                 struct url *, struct hash_table *, struct iri *);
   
   
   /* Retrieve a part of the web beginning with START_URL.  This used to
@@@ -180,7 -187,7 +187,7 @@@
             options, add it to the queue. */
   
   uerr_t
- retrieve_tree (struct url *start_url_parsed)
- -retrieve_tree (const char *start_url, struct iri *pi)
++retrieve_tree (struct url *start_url_parsed, struct iri *pi)
   {
     uerr_t status = RETROK;
   
@@@ -191,6 -198,31 +198,21 @@@
        the queue, but haven't been downloaded yet.  */
     struct hash_table *blacklist;
   
- -  struct url *start_url_parsed;
+   int up_error_code;
- -  start_url_parsed = url_parse (start_url, &up_error_code, i, true);
- -  if (!start_url_parsed)
- -    {
- -      char *error = url_error (start_url, up_error_code);
- -      logprintf (LOG_NOTQUIET, "%s: %s.\n", start_url, error);
- -      xfree (error);
- -      return URLERROR;
- -    }
- -
+   struct iri *i = iri_new ();
+ 
+ #define COPYSTR(x)  (x) ? xstrdup(x) : NULL;
+   /* Duplicate pi struct if not NULL */
+   if (pi)
+     {
+       i->uri_encoding = COPYSTR (pi->uri_encoding);
+       i->content_encoding = COPYSTR (pi->content_encoding);
+       i->utf8_encode = pi->utf8_encode;
+     }
+   else
+     set_uri_encoding (i, opt.locale, true);
+ #undef COPYSTR
+ 
     queue = url_queue_new ();
     blacklist = make_string_hash_table (0);
   
@@@ -253,22 -286,11 +276,12 @@@
           }
         else
           {
- -          int dt = 0;
+ +          int dt = 0, url_err;
             char *redirected = NULL;
-           struct url *url_parsed = url_parse (url, &url_err);
++          struct url *url_parsed = url_parse (url, &url_err, i, false);
   
-           if (!url_parsed)
-             {
-               char *error = url_error (url, url_err);
-               logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
-               xfree (error);
-               status = URLERROR;
-             }
-           else
-             {
-               status = retrieve_url (url_parsed, url, &file, &redirected,
-                                      referer, &dt, false);
-             }
- -          status = retrieve_url (url, &file, &redirected, referer, &dt,
- -                                 false, i);
++          status = retrieve_url (url_parsed, url, &file, &redirected, referer,
++                                 &dt, false, i);
   
             if (html_allowed && file && status == RETROK
                 && (dt & RETROKF) && (dt & TEXTHTML))
@@@ -295,8 -317,8 +308,8 @@@
                    want to follow it.  */
                 if (descend)
                   {
- -                  if (!descend_redirect_p (redirected, url, depth,
+ +                  if (!descend_redirect_p (redirected, url_parsed, depth,
-                                            start_url_parsed, blacklist))
+                                            start_url_parsed, blacklist, i))
                       descend = false;
                     else
                       /* Make sure that the old pre-redirect form gets
@@@ -656,24 -686,27 +676,25 @@@ download_child_p (const struct urlpos *
      it is merely a simple-minded wrapper around download_child_p.  */
   
   static bool
- -descend_redirect_p (const char *redirected, const char *original, int depth,
+ +descend_redirect_p (const char *redirected, struct url *orig_parsed, int depth,
-                     struct url *start_url_parsed, struct hash_table *blacklist)
+                     struct url *start_url_parsed, struct hash_table *blacklist,
+                     struct iri *iri)
   {
- -  struct url *orig_parsed, *new_parsed;
+ +  struct url *new_parsed;
     struct urlpos *upos;
     bool success;
   
- -  orig_parsed = url_parse (original, NULL, NULL, false);
     assert (orig_parsed != NULL);
   
-   new_parsed = url_parse (redirected, NULL);
+   new_parsed = url_parse (redirected, NULL, NULL, false);
     assert (new_parsed != NULL);
   
     upos = xnew0 (struct urlpos);
     upos->url = new_parsed;
   
     success = download_child_p (upos, orig_parsed, depth,
-                               start_url_parsed, blacklist);
+                               start_url_parsed, blacklist, iri);
   
- -  url_free (orig_parsed);
     url_free (new_parsed);
     xfree (upos);
   
diff --cc src/recur.h

index 7eeb5642cb3bdd58148db206dff4c923f9da0314,515a382b03dfbfa48cb7d0d70e7aa9d8c0e2e9bd..76c0ef5f51f511ccec3ebd32d705d2efe7c0004f
--- 1/src/recur.h
--- 2/src/recur.h
+++ b/src/recur.h
@@@ -44,6 -42,6 +44,6 @@@ as that of the covered work.  *
   struct urlpos;
   
   void recursive_cleanup (void);
- uerr_t retrieve_tree (struct url *);
- -uerr_t retrieve_tree (const char *, struct iri *);
++uerr_t retrieve_tree (struct url *, struct iri *);
   
   #endif /* RECUR_H */
diff --cc src/res.c

index 20ffe1c8de45947b1d8cd9c262823151703b6bcd,0320d034246cfce5639397522a1a41f9789fdbb8..4b0ff82ba5b5a15ca4cae87e607ea2ac37f016e6
--- 1/src/res.c
--- 2/src/res.c
+++ b/src/res.c
@@@ -537,32 -538,22 +538,38 @@@ res_retrieve_file (const char *url, cha
     uerr_t err;
     char *robots_url = uri_merge (url, RES_SPECS_LOCATION);
     int saved_ts_val = opt.timestamping;
- -  int saved_sp_val = opt.spider;
+ +  int saved_sp_val = opt.spider, url_err;
+ +  struct url * url_parsed;
   
+   /* Copy server URI encoding for a possible IDNA transformation, no need to
+      encode the full URI in UTF-8 because "robots.txt" is plain ASCII */
+   set_uri_encoding (i, iri->uri_encoding, false);
+   i->utf8_encode = false;
+ 
     logputs (LOG_VERBOSE, _("Loading robots.txt; please ignore errors.\n"));
     *file = NULL;
     opt.timestamping = false;
     opt.spider       = false;
- -  err = retrieve_url (robots_url, file, NULL, NULL, NULL, false, i);
+ +
-   url_parsed = url_parse (robots_url, &url_err);
++  url_parsed = url_parse (robots_url, &url_err, iri, true);
+ +  if (!url_parsed)
+ +    {
+ +      char *error = url_error (robots_url, url_err);
+ +      logprintf (LOG_NOTQUIET, "%s: %s.\n", robots_url, error);
+ +      xfree (error);
+ +      err = URLERROR;
+ +    }
+ +  else
+ +    {
+ +      err = retrieve_url (url_parsed, robots_url, file, NULL, NULL, NULL,
-                           false);
++                          false, i);
+ +      url_free(url_parsed);
+ +    }
+ +
     opt.timestamping = saved_ts_val;
-   opt.spider       = saved_sp_val;  
+   opt.spider       = saved_sp_val;
     xfree (robots_url);
+   iri_free (i);
   
     if (err != RETROK && *file != NULL)
       {
diff --cc src/retr.c

index ffa84c38410ef8238ce752714b2c2fb683f23539,1d9d74782126dcd9e134a0f7e421e2edd903e384..0fd936d0d9f540061cf6274c8d869d19259ad823
--- 1/src/retr.c
--- 2/src/retr.c
+++ b/src/retr.c
@@@ -597,8 -596,8 +597,9 @@@ static char *getproxy (struct url *)
      multiple points. */
   
   uerr_t
- -retrieve_url (const char *origurl, char **file, char **newloc,
- -              const char *refurl, int *dt, bool recursive, struct iri *iri)
+ +retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
-               char **newloc, const char *refurl, int *dt, bool recursive)
++              char **newloc, const char *refurl, int *dt, bool recursive,
++              struct iri *iri)
   {
     uerr_t result;
     char *url;
@@@ -626,6 -625,21 +627,11 @@@
     if (file)
       *file = NULL;
   
- -  u = url_parse (url, &up_error_code, iri, true);
- -  if (!u)
- -    {
- -      char *error = url_error (url, up_error_code);
- -      logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
- -      xfree (url);
- -      xfree (error);
- -      return URLERROR;
- -    }
- -
+  second_try:
+   DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote_n (0, url),
+            iri->uri_encoding ? quote_n (1, iri->uri_encoding) : "None",
+            iri->utf8_encode));
+ 
     if (!refurl)
       refurl = opt.referer;
   
@@@ -836,25 -862,20 +866,30 @@@ retrieve_from_file (const char *file, b
   
     status = RETROK;             /* Suppose everything is OK.  */
     *count = 0;                  /* Reset the URL count.  */
-   
+ 
+   /* sXXXav : Assume filename and links in the file are in the locale */
+   set_uri_encoding (iri, opt.locale, true);
+   set_content_encoding (iri, opt.locale);
+ 
     if (url_has_scheme (url))
       {
- -      int dt;
+ +      int dt,url_err;
         uerr_t status;
-       struct url * url_parsed = url_parse(url, &url_err);
++      struct url * url_parsed = url_parse(url, &url_err, NULL, true);
+ +
+ +      if (!url_parsed)
+ +        {
+ +          char *error = url_error (url, url_err);
+ +          logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
+ +          xfree (error);
+ +          return URLERROR;
+ +        }
   
         if (!opt.base_href)
           opt.base_href = xstrdup (url);
   
-       status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt, false);
- -      status = retrieve_url (url, &input_file, NULL, NULL, &dt, false, iri);
++      status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt,
++                             false, iri);
         if (status != RETROK)
           return status;
   
@@@ -886,18 -917,16 +931,16 @@@
             int old_follow_ftp = opt.follow_ftp;
   
             /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
-           if (cur_url->url->scheme == SCHEME_FTP) 
+           if (cur_url->url->scheme == SCHEME_FTP)
               opt.follow_ftp = 1;
-           
-           status = retrieve_tree (cur_url->url);
+ 
- -          status = retrieve_tree (cur_url->url->url, iri);
++          status = retrieve_tree (cur_url->url, iri);
   
             opt.follow_ftp = old_follow_ftp;
           }
         else
-         {
-           status = retrieve_url (cur_url->url, cur_url->url->url, &filename,
-                                  &new_file, NULL, &dt, opt.recursive);
-         }
- -        status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL,
- -                             &dt, opt.recursive, iri);
++        status = retrieve_url (cur_url->url, cur_url->url->url, &filename,
++                               &new_file, NULL, &dt, opt.recursive, iri);
   
         if (filename && opt.delete_after && file_exists_p (filename))
           {
diff --cc src/retr.h

index 72be93b718d067ab11af0394aca81537ba5047eb,bb2e66d3102160ef75a8fb265b19c68c4555db75..8854b68404179a252f4dca1ce165dec1fec26104
--- 1/src/retr.h
--- 2/src/retr.h
+++ b/src/retr.h
@@@ -53,7 -51,8 +53,8 @@@ typedef const char *(*hunk_terminator_t
   char *fd_read_hunk (int, hunk_terminator_t, long, long);
   char *fd_read_line (int);
   
- uerr_t retrieve_url (struct url *, const char *, char **, char **, const char *, int *, bool);
- -uerr_t retrieve_url (const char *, char **, char **, const char *, int *,
- -                     bool, struct iri *);
++uerr_t retrieve_url (struct url *, const char *, char **, char **,
++                     const char *, int *, bool, struct iri *);
   uerr_t retrieve_from_file (const char *, bool, int *);
   
   const char *retr_rate (wgint, double);
diff --cc src/url.c

index d416fcf7fae38e6ee5b19276773ace45425d41a5,86d099a7190b80aaec8bc1cf6771c49892364602..4c22a9fc6e460c5d34cc63ac81f0d6d3ca69c453
--- 1/src/url.c
--- 2/src/url.c
+++ b/src/url.c
@@@ -668,7 -668,7 +668,8 @@@ url_parse (const char *url, int *error
     int port;
     char *user = NULL, *passwd = NULL;
   
-   char *url_encoded = NULL;
- -  char *url_encoded = NULL, *new_url = NULL;
++  const char *url_encoded = NULL;
++  char *new_url = NULL;
   
     int error_code;
   
@@@ -875,7 -904,7 +905,7 @@@
         if (url_encoded == url)
           u->url = xstrdup (url);
         else
--        u->url = url_encoded;
++        u->url = (char *) url_encoded;
       }
   
     return u;
@@@ -883,7 -912,7 +913,7 @@@
    error:
     /* Cleanup in case of error: */
     if (url_encoded && url_encoded != url)
--    xfree (url_encoded);
++    xfree ((char *) url_encoded);
   
     /* Transmit the error code to the caller, if the caller wants to
        know.  */
diff --cc tests/ChangeLog

index 522bd2020c1b57fbaef690882cbe3e1105562827,d9ba6531debc3f6b09042e25230d6c705133367c..3dfc60a312935733fd66edf479849ba73a183012
--- 1/tests/ChangeLog
--- 2/tests/ChangeLog
+++ b/tests/ChangeLog
@@@ -1,27 -1,19 +1,43 @@@
+ 2008-12-04  Micah Cowan  <micah@cowan.name> (not copyrightable)
+ 
+       * run-px, Test-idn-robots.px: Added test for robots-file
+       downloads.
+ 
+       * Test-idn-cmd.px, Test-idn-meta.px, Test-idn-headers.px:
+       Fix test names.
+ 
+ 2008-11-26  Micah Cowan  <micah@cowan.name>  (not copyrightable)
+ 
+       * Test-ftp-iri-disabled.px, Test-ftp-iri-fallback.px,
+       Test-ftp-iri.px, Test-idn-cmd.px, Test-idn-headers.px,
+       Test-idn-meta.px, Test-iri-disabled.px,
+       Test-iri-forced-remote.px, Test-iri-list.px, Test-iri.px: More
+       module-scope warnings.
+ 
+ +2009-06-14  Micah Cowan  <micah@cowan.name>
+ +
+ +      * Makefile.am (EXTRA_DIST): Include all the tests, run-px, and
+ +      certs/, to make distcheck happy.
+ +
+ +2009-06-11  Benjamin Wolsey <bwy@benjaminwolsey.de>
+ +
+ +      * Test-proxied-https-auth.px: Take an optional argument for the
+ +      top source directory, so we can find the cert and key.
+ +
+ +      * run-px: Provide the top source directory as an argument, so
+ +      scripts can find their way around.
+ +
+ +2009-04-11  Steven Schubiger  <stsc@member.fsf.org>
+ +
+ +      * run-px: Skip testing with real rc files by setting 
+ +      SYSTEM_WGETRC and WGETRC to /dev/null.
+ +
+ +2009-02-25  Benjamin Wolsey  <bwy@benjaminwolsey.de>
+ +
+ +      * Makefile.am (run-px-tests): Ensure run-px is run from srcdir.
+ +
+ +      * run-px: Include modules from srcdir.
+ +
   2008-11-25  Steven Schubiger  <stsc@members.fsf.org>
   
         * WgetTest.pm.in: Remove the magic interpreter line;
diff --cc tests/run-px
Simple merge
author	Micah Cowan <micah@cowan.name>
	Thu, 25 Jun 2009 08:14:11 +0000 (01:14 -0700)
committer	Micah Cowan <micah@cowan.name>
	Thu, 25 Jun 2009 08:14:11 +0000 (01:14 -0700)
		1	2
ChangeLog	patch \|	diff1 \|	diff2 \|	blob \| history
configure.ac	patch \|	diff1 \|	diff2 \|	blob \| history
doc/ChangeLog	patch \|	diff1 \|	diff2 \|	blob \| history
doc/sample.wgetrc	patch \|	diff1 \|	diff2 \|	blob \| history
doc/wget.texi	patch \|	diff1 \|	diff2 \|	blob \| history
src/ChangeLog	patch \|	diff1 \|	diff2 \|	blob \| history
src/Makefile.am	patch \|	diff1 \|	diff2 \|	blob \| history
src/build_info.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/connect.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/host.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/http.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/init.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/recur.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/recur.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/res.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/retr.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/retr.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/url.c	patch \|	diff1 \|	diff2 \|	blob \| history
tests/ChangeLog	patch \|	diff1 \|	diff2 \|	blob \| history
tests/run-px	patch \|	diff1 \|	diff2 \|	blob \| history