From: Saint Xavier Date: Wed, 23 Jul 2008 22:58:10 +0000 (+0200) Subject: Automated merge. X-Git-Tag: v1.13~338^2~7^2~6^2~13 X-Git-Url: http://sjero.net/git/?p=wget;a=commitdiff_plain;h=ccd62071dcbdfc0269813746b9f51ff9c23261db;hp=-c Automated merge. --- ccd62071dcbdfc0269813746b9f51ff9c23261db diff --combined src/ChangeLog index 3e08d0d8,02bc331b..fd86c51c --- a/src/ChangeLog +++ b/src/ChangeLog @@@ -1,17 -1,24 +1,33 @@@ +2008-07-17 Steven Schubiger + + * retr.c (retrieve_from_file): When given an URL as input file, + use it as baseref if none was specified and treat the input file + as HTML if its content type is text/html. + + * init.c (cleanup): Free the memory associated with the base + option (when DEBUG_MALLOC is defined). + + 2008-07-02 Xavier Saint + + * iri.c, iri.h : New function idn_decode() to decode ASCII + encoded hostname to the locale. + + * host.c : Show hostname to be resolved both in locale and + ASCII encoded. + 2008-06-28 Steven Schubiger * retr.c (retrieve_from_file): Allow for reading the links from an external file (HTTP/FTP). + 2008-06-26 Xavier Saint + + * iri.c, iri.h : New functions locale_to_utf8() and + idn_encode() adding basic capabilities of IRI/IDN. + + * url.c : Convert URLs from locale to UTF-8 allowing a basic + support of IRI/IDN + 2008-06-25 Steven Schubiger * ftp.c (getftp): When spidering a FTP URL, emit a diagnostic @@@ -36,7 -43,7 +52,7 @@@ * http.c: Make -nv --spider include the file's name when it exists. - + 2008-06-22 Micah Cowan * Makefile.am (version.c): Fixed version string invocation so it @@@ -44,12 -51,57 +60,57 @@@ string vars pointers-to-const, and moved line lengths below 80 (in Makefile.am, not in version.c). + 2008-06-19 Xavier Saint + + * iri.c, iri.h : New function check_encoding_name() as + a preliminary encoding name check. + + * main.c, iri.c : Make use of check_encoding_name(). + + 2008-06-19 Xavier Saint + + * iri.c : Include missing stringprep.h file and add a + cast. + + * init.c : set a default initial value for opt.enable_iri, + opt.locale and opt.encoding_remote. + + 2008-06-19 Xavier Saint + + * iri.c, iri.h : Add a new function find_locale() to find + out the local system encoding. + + * main.c : Make use of find_locale(). + + 2008-06-19 Xavier Saint + + * html-url.c : Add "content-type" meta tag parsing for + retrieving page encoding. + + * iri.h : Make no-op version of parse_charset() return + NULL. + 2008-06-16 Micah Cowan * http.c (http_loop): When hstat.len is higher than the successfully completed content's length, but it's because we _set_ it that way, don't abort. + 2008-06-14 Xavier Saint + + * iri.c, iri.h : New files. + + * Makefile.am : Add files iri.h and conditional iri.c. + + * build_info.c : Add compiled feature "iri". + + * http.c : include iri.h and parse charset from Content-Type + header. + + * init.c, main.c, options.h : if an options isn't supported + at compiled time, don't get rid off it and show a dummy + message instead if they are used. + 2008-06-13 Micah Cowan * build_info.c: ENABLE_NTLM, not HAVE_NTLM; distinguish OpenSSL @@@ -93,11 -145,11 +154,11 @@@ default. 2008-05-17 Kenny Parnell - + (cmd_spec_prefer_family): Initialize prefer_family to prefer_none. 2008-05-17 Micah Cowan - + * main.c (main): Handle Ctrl-D on command-line. 2008-05-15 Steven Schubiger @@@ -136,7 -188,7 +197,7 @@@ * options.h: Add an according boolean member to the options struct. - + * sysdep.h: Comment the defines __EXTENSIONS__ and _GNU_SOURCE out, because they're now defined independently by config.h. diff --combined src/init.c index d4fc10e3,f56aa652..d01a1c80 --- a/src/init.c +++ b/src/init.c @@@ -181,9 -181,11 +181,11 @@@ static const struct { "inet6only", &opt.ipv6_only, cmd_boolean }, #endif { "input", &opt.input_filename, cmd_file }, + { "iri", &opt.enable_iri, cmd_boolean }, { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean }, { "limitrate", &opt.limit_rate, cmd_bytes }, { "loadcookies", &opt.cookies_input, cmd_file }, + { "locale", &opt.locale, cmd_string }, { "logfile", &opt.lfilename, cmd_file }, { "login", &opt.ftp_user, cmd_string },/* deprecated*/ { "maxredirect", &opt.max_redirect, cmd_number }, @@@ -223,6 -225,7 +225,7 @@@ { "referer", &opt.referer, cmd_string }, { "reject", &opt.rejects, cmd_vector }, { "relativeonly", &opt.relative_only, cmd_boolean }, + { "remoteencoding", &opt.encoding_remote, cmd_string }, { "removelisting", &opt.remove_listing, cmd_boolean }, { "restrictfilenames", NULL, cmd_spec_restrict_file_names }, { "retrsymlinks", &opt.retr_symlinks, cmd_boolean }, @@@ -330,6 -333,14 +333,14 @@@ defaults (void opt.restrict_files_case = restrict_no_case_restriction; opt.max_redirect = 20; + + #ifdef ENABLE_IRI + opt.enable_iri = true; + #else + opt.enable_iri = false; + #endif + opt.locale = NULL; + opt.encoding_remote = NULL; } /* Return the user's home directory (strdup-ed), or NULL if none is @@@ -1548,8 -1559,6 +1559,8 @@@ cleanup (void xfree_null (opt.cookies_output); xfree_null (opt.user); xfree_null (opt.passwd); + xfree_null (opt.base_href); + #endif /* DEBUG_MALLOC */ } diff --combined src/retr.c index 0fc46837,e70f6e6e..ae8ef3ef --- a/src/retr.c +++ b/src/retr.c @@@ -51,6 -51,7 +51,7 @@@ as that of the covered work. * #include "hash.h" #include "convert.h" #include "ptimer.h" + #include "iri.h" #include "html-url.h" /* Total size of downloaded files. Used to enforce quota. */ @@@ -597,7 -598,7 +598,7 @@@ static char *getproxy (struct url *) uerr_t retrieve_url (const char *origurl, char **file, char **newloc, - const char *refurl, int *dt, bool recursive) + const char *refurl, int *dt, bool recursive, struct iri *iri) { uerr_t result; char *url; @@@ -625,7 -626,8 +626,8 @@@ if (file) *file = NULL; - u = url_parse (url, &up_error_code); + second_try: + u = url_parse (url, &up_error_code, iri); if (!u) { logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code)); @@@ -633,6 -635,8 +635,8 @@@ return URLERROR; } + printf ("[Retrieving %s with %s (UTF-8=%d)\n", url, iri->uri_encoding, iri->utf8_encode); + if (!refurl) refurl = opt.referer; @@@ -646,8 -650,13 +650,13 @@@ proxy = getproxy (u); if (proxy) { + /* sXXXav : could a proxy include a path ??? */ + struct iri *pi = iri_new (); + set_uri_encoding (pi, opt.locale); + pi->utf8_encode = false; + /* Parse the proxy URL. */ - proxy_url = url_parse (proxy, &up_error_code); + proxy_url = url_parse (proxy, &up_error_code, NULL); if (!proxy_url) { logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"), @@@ -672,7 -681,7 +681,7 @@@ #endif || (proxy_url && proxy_url->scheme == SCHEME_HTTP)) { - result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url); + result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url, iri); } else if (u->scheme == SCHEME_FTP) { @@@ -722,8 -731,13 +731,13 @@@ xfree (mynewloc); mynewloc = construced_newloc; + /* Reset UTF-8 encoding state, keep the URI encoding and reset + the content encoding. */ + iri->utf8_encode = opt.enable_iri; + set_content_encoding (iri, NULL); + /* Now, see if this new location makes sense. */ - newloc_parsed = url_parse (mynewloc, &up_error_code); + newloc_parsed = url_parse (mynewloc, &up_error_code, iri); if (!newloc_parsed) { logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc), @@@ -770,8 -784,21 +784,21 @@@ goto redirected; } - if (local_file) + /* Try to not encode in UTF-8 if fetching failed */ + if (!(*dt & RETROKF) && iri->utf8_encode) { + iri->utf8_encode = false; + printf ("[Fallbacking to non-utf8 for `%s'\n", url); + goto second_try; + } + + if (local_file && *dt & RETROKF) + { + register_download (u->url, local_file); + if (redirection_count && 0 != strcmp (origurl, u->url)) + register_redirection (origurl, u->url); + if (*dt & TEXTHTML) + register_html (u->url, local_file); if (*dt & RETROKF) { register_download (u->url, local_file); @@@ -821,32 -848,28 +848,36 @@@ retrieve_from_file (const char *file, b { uerr_t status; struct urlpos *url_list, *cur_url; + struct iri *iri = iri_new(); char *input_file = NULL; const char *url = file; status = RETROK; /* Suppose everything is OK. */ *count = 0; /* Reset the URL count. */ - + + /* sXXXav : Assume filename and links in the file are in the locale */ + set_content_encoding (iri, opt.locale); + if (url_has_scheme (url)) { + int dt; uerr_t status; - status = retrieve_url (url, &input_file, NULL, NULL, NULL, false, iri); + + if (!opt.base_href) + opt.base_href = xstrdup (url); + - status = retrieve_url (url, &input_file, NULL, NULL, &dt, false); ++ status = retrieve_url (url, &input_file, NULL, NULL, &dt, false, iri); if (status != RETROK) return status; + + if (dt & TEXTHTML) + html = true; } else input_file = (char *) file; - url_list = (html ? get_urls_html (input_file, NULL, NULL) + url_list = (html ? get_urls_html (input_file, NULL, NULL, iri) : get_urls_file (input_file)); for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count) @@@ -868,15 -891,16 +899,16 @@@ int old_follow_ftp = opt.follow_ftp; /* Turn opt.follow_ftp on in case of recursive FTP retrieval */ - if (cur_url->url->scheme == SCHEME_FTP) + if (cur_url->url->scheme == SCHEME_FTP) opt.follow_ftp = 1; - + status = retrieve_tree (cur_url->url->url); opt.follow_ftp = old_follow_ftp; } else - status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive); + status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, + &dt, opt.recursive, iri); if (filename && opt.delete_after && file_exists_p (filename)) { @@@ -1047,7 -1071,11 +1079,11 @@@ boo url_uses_proxy (const char *url) { bool ret; - struct url *u = url_parse (url, NULL); + struct url *u; + struct iri *i = iri_new(); + /* url was given in the command line, so use locale as encoding */ + set_uri_encoding (i, opt.locale); + u= url_parse (url, NULL, i); if (!u) return false; ret = getproxy (u) != NULL;