X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fretr.c;h=ae8ef3ef1f772366efd446311cccf446b2168a64;hp=3234286baa8ea48c16a97d62ec52a49561ccc821;hb=ccd62071dcbdfc0269813746b9f51ff9c23261db;hpb=5dd09d9ba51f039acb217bf2fd5c7fdd340ac946
diff --git a/src/retr.c b/src/retr.c
index 3234286b..ae8ef3ef 100644
--- a/src/retr.c
+++ b/src/retr.c
@@ -1,6 +1,6 @@
/* File retrieval.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
This file is part of GNU Wget.
@@ -17,17 +17,18 @@ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget. If not, see .
-In addition, as a special exception, the Free Software Foundation
-gives permission to link the code of its release of Wget with the
-OpenSSL project's "OpenSSL" library (or with modified versions of it
-that use the same license as the "OpenSSL" library), and distribute
-the linked executables. You must obey the GNU General Public License
-in all respects for all of the code used other than "OpenSSL". If you
-modify this file, you may extend this exception to your version of the
-file, but you are not obligated to do so. If you do not wish to do
-so, delete this exception statement from your version. */
+Additional permission under GNU GPL version 3 section 7
-#include
+If you modify this program, or any covered work, by linking or
+combining it with the OpenSSL project's OpenSSL library (or a
+modified version of that library), containing parts covered by the
+terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
+grants you additional permission to convey the resulting work.
+Corresponding Source for a non-source form of such a combination
+shall include the source code for the parts of OpenSSL used as well
+as that of the covered work. */
+
+#include "wget.h"
#include
#include
@@ -38,7 +39,6 @@ so, delete this exception statement from your version. */
#include
#include
-#include "wget.h"
#include "utils.h"
#include "retr.h"
#include "progress.h"
@@ -51,6 +51,8 @@ so, delete this exception statement from your version. */
#include "hash.h"
#include "convert.h"
#include "ptimer.h"
+#include "iri.h"
+#include "html-url.h"
/* Total size of downloaded files. Used to enforce quota. */
SUM_SIZE_INT total_downloaded_bytes;
@@ -596,7 +598,7 @@ static char *getproxy (struct url *);
uerr_t
retrieve_url (const char *origurl, char **file, char **newloc,
- const char *refurl, int *dt, bool recursive)
+ const char *refurl, int *dt, bool recursive, struct iri *iri)
{
uerr_t result;
char *url;
@@ -624,7 +626,8 @@ retrieve_url (const char *origurl, char **file, char **newloc,
if (file)
*file = NULL;
- u = url_parse (url, &up_error_code);
+ second_try:
+ u = url_parse (url, &up_error_code, iri);
if (!u)
{
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
@@ -632,6 +635,8 @@ retrieve_url (const char *origurl, char **file, char **newloc,
return URLERROR;
}
+ printf ("[Retrieving %s with %s (UTF-8=%d)\n", url, iri->uri_encoding, iri->utf8_encode);
+
if (!refurl)
refurl = opt.referer;
@@ -645,8 +650,13 @@ retrieve_url (const char *origurl, char **file, char **newloc,
proxy = getproxy (u);
if (proxy)
{
+ /* sXXXav : could a proxy include a path ??? */
+ struct iri *pi = iri_new ();
+ set_uri_encoding (pi, opt.locale);
+ pi->utf8_encode = false;
+
/* Parse the proxy URL. */
- proxy_url = url_parse (proxy, &up_error_code);
+ proxy_url = url_parse (proxy, &up_error_code, NULL);
if (!proxy_url)
{
logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
@@ -671,7 +681,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
#endif
|| (proxy_url && proxy_url->scheme == SCHEME_HTTP))
{
- result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
+ result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url, iri);
}
else if (u->scheme == SCHEME_FTP)
{
@@ -721,8 +731,13 @@ retrieve_url (const char *origurl, char **file, char **newloc,
xfree (mynewloc);
mynewloc = construced_newloc;
+ /* Reset UTF-8 encoding state, keep the URI encoding and reset
+ the content encoding. */
+ iri->utf8_encode = opt.enable_iri;
+ set_content_encoding (iri, NULL);
+
/* Now, see if this new location makes sense. */
- newloc_parsed = url_parse (mynewloc, &up_error_code);
+ newloc_parsed = url_parse (mynewloc, &up_error_code, iri);
if (!newloc_parsed)
{
logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
@@ -769,8 +784,21 @@ retrieve_url (const char *origurl, char **file, char **newloc,
goto redirected;
}
- if (local_file)
+ /* Try to not encode in UTF-8 if fetching failed */
+ if (!(*dt & RETROKF) && iri->utf8_encode)
{
+ iri->utf8_encode = false;
+ printf ("[Fallbacking to non-utf8 for `%s'\n", url);
+ goto second_try;
+ }
+
+ if (local_file && *dt & RETROKF)
+ {
+ register_download (u->url, local_file);
+ if (redirection_count && 0 != strcmp (origurl, u->url))
+ register_redirection (origurl, u->url);
+ if (*dt & TEXTHTML)
+ register_html (u->url, local_file);
if (*dt & RETROKF)
{
register_download (u->url, local_file);
@@ -778,6 +806,8 @@ retrieve_url (const char *origurl, char **file, char **newloc,
register_redirection (origurl, u->url);
if (*dt & TEXTHTML)
register_html (u->url, local_file);
+ if (*dt & TEXTCSS)
+ register_css (u->url, local_file);
}
}
@@ -818,12 +848,38 @@ retrieve_from_file (const char *file, bool html, int *count)
{
uerr_t status;
struct urlpos *url_list, *cur_url;
+ struct iri *iri = iri_new();
+
+ char *input_file = NULL;
+ const char *url = file;
- url_list = (html ? get_urls_html (file, NULL, NULL)
- : get_urls_file (file));
status = RETROK; /* Suppose everything is OK. */
*count = 0; /* Reset the URL count. */
+ /* sXXXav : Assume filename and links in the file are in the locale */
+ set_content_encoding (iri, opt.locale);
+
+ if (url_has_scheme (url))
+ {
+ int dt;
+ uerr_t status;
+
+ if (!opt.base_href)
+ opt.base_href = xstrdup (url);
+
+ status = retrieve_url (url, &input_file, NULL, NULL, &dt, false, iri);
+ if (status != RETROK)
+ return status;
+
+ if (dt & TEXTHTML)
+ html = true;
+ }
+ else
+ input_file = (char *) file;
+
+ url_list = (html ? get_urls_html (input_file, NULL, NULL, iri)
+ : get_urls_file (input_file));
+
for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
{
char *filename = NULL, *new_file = NULL;
@@ -843,15 +899,16 @@ retrieve_from_file (const char *file, bool html, int *count)
int old_follow_ftp = opt.follow_ftp;
/* Turn opt.follow_ftp on in case of recursive FTP retrieval */
- if (cur_url->url->scheme == SCHEME_FTP)
+ if (cur_url->url->scheme == SCHEME_FTP)
opt.follow_ftp = 1;
-
+
status = retrieve_tree (cur_url->url->url);
opt.follow_ftp = old_follow_ftp;
}
else
- status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive);
+ status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL,
+ &dt, opt.recursive, iri);
if (filename && opt.delete_after && file_exists_p (filename))
{
@@ -1022,7 +1079,11 @@ bool
url_uses_proxy (const char *url)
{
bool ret;
- struct url *u = url_parse (url, NULL);
+ struct url *u;
+ struct iri *i = iri_new();
+ /* url was given in the command line, so use locale as encoding */
+ set_uri_encoding (i, opt.locale);
+ u= url_parse (url, NULL, i);
if (!u)
return false;
ret = getproxy (u) != NULL;