X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fres.c;h=1260e7dac19d15f0787f6a823d8650fb62e871b4;hb=7415b33b6c35134f72aa6b2a76ef99b9598234af;hp=0320d034246cfce5639397522a1a41f9789fdbb8;hpb=090f1596ae2eb446c265a62849f59657a9ee9e07;p=wget diff --git a/src/res.c b/src/res.c index 0320d034..1260e7da 100644 --- a/src/res.c +++ b/src/res.c @@ -1,5 +1,6 @@ /* Support for Robot Exclusion Standard (RES). - Copyright (C) 2001, 2006, 2007, 2008 Free Software Foundation, Inc. + Copyright (C) 2001, 2006, 2007, 2008, 2009 Free Software Foundation, + Inc. This file is part of Wget. @@ -538,7 +539,8 @@ res_retrieve_file (const char *url, char **file, struct iri *iri) uerr_t err; char *robots_url = uri_merge (url, RES_SPECS_LOCATION); int saved_ts_val = opt.timestamping; - int saved_sp_val = opt.spider; + int saved_sp_val = opt.spider, url_err; + struct url * url_parsed; /* Copy server URI encoding for a possible IDNA transformation, no need to encode the full URI in UTF-8 because "robots.txt" is plain ASCII */ @@ -549,7 +551,22 @@ res_retrieve_file (const char *url, char **file, struct iri *iri) *file = NULL; opt.timestamping = false; opt.spider = false; - err = retrieve_url (robots_url, file, NULL, NULL, NULL, false, i); + + url_parsed = url_parse (robots_url, &url_err, iri, true); + if (!url_parsed) + { + char *error = url_error (robots_url, url_err); + logprintf (LOG_NOTQUIET, "%s: %s.\n", robots_url, error); + xfree (error); + err = URLERROR; + } + else + { + err = retrieve_url (url_parsed, robots_url, file, NULL, NULL, NULL, + false, i, false); + url_free(url_parsed); + } + opt.timestamping = saved_ts_val; opt.spider = saved_sp_val; xfree (robots_url);