X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fretr.c;h=fe176eafcfad22b5d539d2d82516c31d62d2b499;hb=84395897ad2d1c107be470946daba744b2e7ebe8;hp=3234286baa8ea48c16a97d62ec52a49561ccc821;hpb=e7d78dd2a75d0ea21a63ac4968df6079b8018cd5;p=wget diff --git a/src/retr.c b/src/retr.c index 3234286b..fe176eaf 100644 --- a/src/retr.c +++ b/src/retr.c @@ -1,6 +1,6 @@ /* File retrieval. Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007 Free Software Foundation, Inc. + 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -17,17 +17,18 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Wget. If not, see . -In addition, as a special exception, the Free Software Foundation -gives permission to link the code of its release of Wget with the -OpenSSL project's "OpenSSL" library (or with modified versions of it -that use the same license as the "OpenSSL" library), and distribute -the linked executables. You must obey the GNU General Public License -in all respects for all of the code used other than "OpenSSL". If you -modify this file, you may extend this exception to your version of the -file, but you are not obligated to do so. If you do not wish to do -so, delete this exception statement from your version. */ +Additional permission under GNU GPL version 3 section 7 -#include +If you modify this program, or any covered work, by linking or +combining it with the OpenSSL project's OpenSSL library (or a +modified version of that library), containing parts covered by the +terms of the OpenSSL or SSLeay licenses, the Free Software Foundation +grants you additional permission to convey the resulting work. +Corresponding Source for a non-source form of such a combination +shall include the source code for the parts of OpenSSL used as well +as that of the covered work. */ + +#include "wget.h" #include #include @@ -38,7 +39,6 @@ so, delete this exception statement from your version. */ #include #include -#include "wget.h" #include "utils.h" #include "retr.h" #include "progress.h" @@ -51,6 +51,7 @@ so, delete this exception statement from your version. */ #include "hash.h" #include "convert.h" #include "ptimer.h" +#include "html-url.h" /* Total size of downloaded files. Used to enforce quota. */ SUM_SIZE_INT total_downloaded_bytes; @@ -596,7 +597,7 @@ static char *getproxy (struct url *); uerr_t retrieve_url (const char *origurl, char **file, char **newloc, - const char *refurl, int *dt, bool recursive) + const char *refurl, int *dt, bool recursive, struct iri *iri) { uerr_t result; char *url; @@ -624,7 +625,8 @@ retrieve_url (const char *origurl, char **file, char **newloc, if (file) *file = NULL; - u = url_parse (url, &up_error_code); + second_try: + u = url_parse (url, &up_error_code, iri); if (!u) { logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code)); @@ -632,6 +634,10 @@ retrieve_url (const char *origurl, char **file, char **newloc, return URLERROR; } + DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote (url), + iri->uri_encoding ? quote (iri->uri_encoding) : "None", + iri->utf8_encode)); + if (!refurl) refurl = opt.referer; @@ -645,8 +651,13 @@ retrieve_url (const char *origurl, char **file, char **newloc, proxy = getproxy (u); if (proxy) { + /* sXXXav : could a proxy include a path ??? */ + struct iri *pi = iri_new (); + set_uri_encoding (pi, opt.locale, true); + pi->utf8_encode = false; + /* Parse the proxy URL. */ - proxy_url = url_parse (proxy, &up_error_code); + proxy_url = url_parse (proxy, &up_error_code, NULL); if (!proxy_url) { logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"), @@ -671,7 +682,7 @@ retrieve_url (const char *origurl, char **file, char **newloc, #endif || (proxy_url && proxy_url->scheme == SCHEME_HTTP)) { - result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url); + result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url, iri); } else if (u->scheme == SCHEME_FTP) { @@ -721,8 +732,13 @@ retrieve_url (const char *origurl, char **file, char **newloc, xfree (mynewloc); mynewloc = construced_newloc; + /* Reset UTF-8 encoding state, keep the URI encoding and reset + the content encoding. */ + iri->utf8_encode = opt.enable_iri; + set_content_encoding (iri, NULL); + /* Now, see if this new location makes sense. */ - newloc_parsed = url_parse (mynewloc, &up_error_code); + newloc_parsed = url_parse (mynewloc, &up_error_code, iri); if (!newloc_parsed) { logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc), @@ -769,8 +785,21 @@ retrieve_url (const char *origurl, char **file, char **newloc, goto redirected; } - if (local_file) + /* Try to not encode in UTF-8 if fetching failed */ + if (!(*dt & RETROKF) && iri->utf8_encode) { + iri->utf8_encode = false; + DEBUGP (("[IRI Fallbacking to non-utf8 for %s\n", quote (url))); + goto second_try; + } + + if (local_file && *dt & RETROKF) + { + register_download (u->url, local_file); + if (redirection_count && 0 != strcmp (origurl, u->url)) + register_redirection (origurl, u->url); + if (*dt & TEXTHTML) + register_html (u->url, local_file); if (*dt & RETROKF) { register_download (u->url, local_file); @@ -778,6 +807,8 @@ retrieve_url (const char *origurl, char **file, char **newloc, register_redirection (origurl, u->url); if (*dt & TEXTHTML) register_html (u->url, local_file); + if (*dt & TEXTCSS) + register_css (u->url, local_file); } } @@ -818,12 +849,38 @@ retrieve_from_file (const char *file, bool html, int *count) { uerr_t status; struct urlpos *url_list, *cur_url; + struct iri *iri = iri_new(); + + char *input_file = NULL; + const char *url = file; - url_list = (html ? get_urls_html (file, NULL, NULL) - : get_urls_file (file)); status = RETROK; /* Suppose everything is OK. */ *count = 0; /* Reset the URL count. */ + /* sXXXav : Assume filename and links in the file are in the locale */ + set_content_encoding (iri, opt.locale); + + if (url_has_scheme (url)) + { + int dt; + uerr_t status; + + if (!opt.base_href) + opt.base_href = xstrdup (url); + + status = retrieve_url (url, &input_file, NULL, NULL, &dt, false, iri); + if (status != RETROK) + return status; + + if (dt & TEXTHTML) + html = true; + } + else + input_file = (char *) file; + + url_list = (html ? get_urls_html (input_file, NULL, NULL, iri) + : get_urls_file (input_file)); + for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count) { char *filename = NULL, *new_file = NULL; @@ -843,15 +900,16 @@ retrieve_from_file (const char *file, bool html, int *count) int old_follow_ftp = opt.follow_ftp; /* Turn opt.follow_ftp on in case of recursive FTP retrieval */ - if (cur_url->url->scheme == SCHEME_FTP) + if (cur_url->url->scheme == SCHEME_FTP) opt.follow_ftp = 1; - + status = retrieve_tree (cur_url->url->url); opt.follow_ftp = old_follow_ftp; } else - status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive); + status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, + &dt, opt.recursive, iri); if (filename && opt.delete_after && file_exists_p (filename)) { @@ -1022,7 +1080,11 @@ bool url_uses_proxy (const char *url) { bool ret; - struct url *u = url_parse (url, NULL); + struct url *u; + struct iri *i = iri_new(); + /* url was given in the command line, so use locale as encoding */ + set_uri_encoding (i, opt.locale, true); + u= url_parse (url, NULL, i); if (!u) return false; ret = getproxy (u) != NULL;