X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fres.c;fp=src%2Fres.c;h=4b0ff82ba5b5a15ca4cae87e607ea2ac37f016e6;hp=20ffe1c8de45947b1d8cd9c262823151703b6bcd;hb=4f3dd6817348433eafde04a3c2946f43364de7ef;hpb=789f7e135333779ec5e95e46b3be68090c7ec5df diff --git a/src/res.c b/src/res.c index 20ffe1c8..4b0ff82b 100644 --- a/src/res.c +++ b/src/res.c @@ -532,20 +532,26 @@ res_get_specs (const char *host, int port) Return true if robots were retrieved OK, false otherwise. */ bool -res_retrieve_file (const char *url, char **file) +res_retrieve_file (const char *url, char **file, struct iri *iri) { + struct iri *i = iri_new (); uerr_t err; char *robots_url = uri_merge (url, RES_SPECS_LOCATION); int saved_ts_val = opt.timestamping; int saved_sp_val = opt.spider, url_err; struct url * url_parsed; + /* Copy server URI encoding for a possible IDNA transformation, no need to + encode the full URI in UTF-8 because "robots.txt" is plain ASCII */ + set_uri_encoding (i, iri->uri_encoding, false); + i->utf8_encode = false; + logputs (LOG_VERBOSE, _("Loading robots.txt; please ignore errors.\n")); *file = NULL; opt.timestamping = false; opt.spider = false; - url_parsed = url_parse (robots_url, &url_err); + url_parsed = url_parse (robots_url, &url_err, iri, true); if (!url_parsed) { char *error = url_error (robots_url, url_err); @@ -556,13 +562,14 @@ res_retrieve_file (const char *url, char **file) else { err = retrieve_url (url_parsed, robots_url, file, NULL, NULL, NULL, - false); + false, i); url_free(url_parsed); } opt.timestamping = saved_ts_val; - opt.spider = saved_sp_val; + opt.spider = saved_sp_val; xfree (robots_url); + iri_free (i); if (err != RETROK && *file != NULL) {