From: Saint Xavier Date: Sun, 20 Jul 2008 19:45:09 +0000 (+0200) Subject: Automated merge. X-Git-Tag: v1.13~338^2~7^2~6^2~13^2~2 X-Git-Url: http://sjero.net/git/?p=wget;a=commitdiff_plain;h=b30a0dd817886f77a64be9218c5e5399bcbc2e67 Automated merge. --- b30a0dd817886f77a64be9218c5e5399bcbc2e67 diff --cc ChangeLog index 89898414,d96ce355..21d380b2 --- a/ChangeLog +++ b/ChangeLog @@@ -1,11 -1,9 +1,17 @@@ + 2008-06-30 Micah Cowan + + * NEWS: Entries for 1.11.4. + + * AUTHORS: Added Steven Schubiger. + +2008-06-26 Xavier Saint + + * configure.ac : IRIs support required libiconv, check it. + +2008-06-14 Xavier Saint + + * configure.ac: Add support for IRIs + 2008-05-29 Micah Cowan * po/*.po: Updated from TP (the 1.11.3 set). diff --cc src/ChangeLog index 7aca0527,e551f1c9..02bc331b --- a/src/ChangeLog +++ b/src/ChangeLog @@@ -1,19 -1,13 +1,29 @@@ +2008-07-02 Xavier Saint + + * iri.c, iri.h : New function idn_decode() to decode ASCII + encoded hostname to the locale. + + * host.c : Show hostname to be resolved both in locale and + ASCII encoded. + + 2008-06-28 Steven Schubiger + + * retr.c (retrieve_from_file): Allow for reading the links from + an external file (HTTP/FTP). + +2008-06-26 Xavier Saint + + * iri.c, iri.h : New functions locale_to_utf8() and + idn_encode() adding basic capabilities of IRI/IDN. + + * url.c : Convert URLs from locale to UTF-8 allowing a basic + support of IRI/IDN + + 2008-06-25 Steven Schubiger + + * ftp.c (getftp): When spidering a FTP URL, emit a diagnostic + message if the remote file exists. + 2008-06-24 Steven Schubiger * http.c (http_loop): Replace escnonprint() occurence with diff --cc src/Makefile.am index 6ae5805d,6db4ac17..edbb592e --- a/src/Makefile.am +++ b/src/Makefile.am @@@ -43,10 -40,10 +44,10 @@@ wget_SOURCES = build_info.c cmpt.c conn ftp-basic.c ftp-ls.c hash.c host.c html-parse.c html-url.c \ http.c init.c log.c main.c netrc.c progress.c ptimer.c \ recur.c res.c retr.c snprintf.c spider.c url.c \ - utils.c \ - css-url.h connect.h convert.h cookies.h \ + utils.c $(IRI_OBJ) \ - connect.h convert.h cookies.h \ - ftp.h gen-md5.h hash.h host.h html-parse.h \ - http.h http-ntlm.h init.h iri.h log.h mswindows.h netrc.h \ ++ css-url.h connect.h convert.h cookies.h \ + ftp.h gen-md5.h hash.h host.h html-parse.h html-url.h \ + http.h http-ntlm.h init.h log.h mswindows.h netrc.h \ options.h progress.h ptimer.h recur.h res.h retr.h \ spider.h ssl.h sysdep.h url.h utils.h wget.h nodist_wget_SOURCES = version.c diff --cc src/html-url.c index 5a0682d3,75bec7d9..ef93a7e4 --- a/src/html-url.c +++ b/src/html-url.c @@@ -41,11 -41,10 +41,11 @@@ as that of the covered work. * #include "utils.h" #include "hash.h" #include "convert.h" - #include "recur.h" /* declaration of get_urls_html */ + #include "recur.h" + #include "html-url.h" + #include "css-url.h" +#include "iri.h" - struct map_context; - typedef void (*tag_handler_t) (int, struct taginfo *, struct map_context *); #define DECLARE_TAG_HANDLER(fun) \ diff --cc src/recur.c index 6f5da2ae,729a14e9..24b80ad4 --- a/src/recur.c +++ b/src/recur.c @@@ -48,9 -48,10 +48,11 @@@ as that of the covered work. * #include "hash.h" #include "res.h" #include "convert.h" + #include "html-url.h" + #include "css-url.h" #include "spider.h" - +#include "iri.h" + /* Functions for maintaining the URL queue. */ struct queue_element { @@@ -59,7 -60,8 +61,9 @@@ int depth; /* the depth */ bool html_allowed; /* whether the document is allowed to be treated as HTML. */ + char *remote_encoding; + bool css_allowed; /* whether the document is allowed to + be treated as CSS. */ struct queue_element *next; /* next element in queue */ }; @@@ -92,21 -94,17 +96,23 @@@ url_queue_delete (struct url_queue *que static void url_enqueue (struct url_queue *queue, - const char *url, const char *referer, int depth, bool html_allowed) + const char *url, const char *referer, int depth, + bool html_allowed, bool css_allowed) { struct queue_element *qel = xnew (struct queue_element); + char *charset = get_current_charset (); qel->url = url; qel->referer = referer; qel->depth = depth; qel->html_allowed = html_allowed; + qel->css_allowed = css_allowed; qel->next = NULL; + if (charset) + qel->remote_encoding = xstrdup (charset); + else + qel->remote_encoding = NULL; + ++queue->count; if (queue->count > queue->maxcount) queue->maxcount = queue->count; diff --cc src/retr.c index dd4978a7,58e00d2f..7a28ea32 --- a/src/retr.c +++ b/src/retr.c @@@ -51,7 -51,7 +51,8 @@@ as that of the covered work. * #include "hash.h" #include "convert.h" #include "ptimer.h" +#include "iri.h" + #include "html-url.h" /* Total size of downloaded files. Used to enforce quota. */ SUM_SIZE_INT total_downloaded_bytes; @@@ -780,21 -770,18 +781,31 @@@ retrieve_url (const char *origurl, cha goto redirected; } - if (local_file) + /* Try to not encode in UTF-8 if fetching failed */ + if (!(*dt & RETROKF) && get_utf8_encode ()) { + set_utf8_encode (false); + /*printf ("[Fallbacking to non-utf8 for `%s'\n", url);*/ + goto second_try; + } + + if (local_file && *dt & RETROKF) + { + register_download (u->url, local_file); + if (redirection_count && 0 != strcmp (origurl, u->url)) + register_redirection (origurl, u->url); + if (*dt & TEXTHTML) + register_html (u->url, local_file); + if (*dt & RETROKF) + { + register_download (u->url, local_file); + if (redirection_count && 0 != strcmp (origurl, u->url)) + register_redirection (origurl, u->url); + if (*dt & TEXTHTML) + register_html (u->url, local_file); + if (*dt & TEXTCSS) + register_css (u->url, local_file); + } } if (file)