X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Frecur.c;fp=src%2Frecur.c;h=24b80ad475878203680ec355f9ea818758e00b0c;hp=729a14e91d9cc2e57dbdd54894a954bfcca41d5f;hb=b30a0dd817886f77a64be9218c5e5399bcbc2e67;hpb=b28a6abfe66e03dae1f749d8215f4ba2b7303e5a diff --git a/src/recur.c b/src/recur.c index 729a14e9..24b80ad4 100644 --- a/src/recur.c +++ b/src/recur.c @@ -51,7 +51,8 @@ as that of the covered work. */ #include "html-url.h" #include "css-url.h" #include "spider.h" - +#include "iri.h" + /* Functions for maintaining the URL queue. */ struct queue_element { @@ -60,6 +61,7 @@ struct queue_element { int depth; /* the depth */ bool html_allowed; /* whether the document is allowed to be treated as HTML. */ + char *remote_encoding; bool css_allowed; /* whether the document is allowed to be treated as CSS. */ struct queue_element *next; /* next element in queue */ @@ -98,6 +100,7 @@ url_enqueue (struct url_queue *queue, bool html_allowed, bool css_allowed) { struct queue_element *qel = xnew (struct queue_element); + char *charset = get_current_charset (); qel->url = url; qel->referer = referer; qel->depth = depth; @@ -105,6 +108,11 @@ url_enqueue (struct url_queue *queue, qel->css_allowed = css_allowed; qel->next = NULL; + if (charset) + qel->remote_encoding = xstrdup (charset); + else + qel->remote_encoding = NULL; + ++queue->count; if (queue->count > queue->maxcount) queue->maxcount = queue->count; @@ -112,6 +120,8 @@ url_enqueue (struct url_queue *queue, DEBUGP (("Enqueuing %s at depth %d\n", url, depth)); DEBUGP (("Queue count %d, maxcount %d.\n", queue->count, queue->maxcount)); + /*printf ("[Enqueuing %s with %s\n", url, qel->remote_encoding);*/ + if (queue->tail) queue->tail->next = qel; queue->tail = qel; @@ -137,6 +147,10 @@ url_dequeue (struct url_queue *queue, if (!queue->head) queue->tail = NULL; + set_remote_charset (qel->remote_encoding); + if (qel->remote_encoding) + xfree (qel->remote_encoding); + *url = qel->url; *referer = qel->referer; *depth = qel->depth; @@ -192,8 +206,11 @@ retrieve_tree (const char *start_url) struct hash_table *blacklist; int up_error_code; - struct url *start_url_parsed = url_parse (start_url, &up_error_code); + struct url *start_url_parsed; + set_ugly_no_encode (true); + start_url_parsed= url_parse (start_url, &up_error_code); + set_ugly_no_encode (false); if (!start_url_parsed) { logprintf (LOG_NOTQUIET, "%s: %s.\n", start_url, @@ -357,7 +374,9 @@ retrieve_tree (const char *start_url) if (children) { struct urlpos *child = children; - struct url *url_parsed = url_parsed = url_parse (url, NULL); + set_ugly_no_encode (true); + struct url *url_parsed = url_parse (url, NULL); + set_ugly_no_encode (false); char *referer_url = url; bool strip_auth = (url_parsed != NULL && url_parsed->user != NULL); @@ -394,18 +413,18 @@ retrieve_tree (const char *start_url) } } - if (file - && (opt.delete_after + if (file + && (opt.delete_after || opt.spider /* opt.recursive is implicitely true */ || !acceptable (file))) { /* Either --delete-after was specified, or we loaded this - (otherwise unneeded because of --spider or rejected by -R) - HTML file just to harvest its hyperlinks -- in either case, + (otherwise unneeded because of --spider or rejected by -R) + HTML file just to harvest its hyperlinks -- in either case, delete the local file. */ DEBUGP (("Removing file due to %s in recursive_retrieve():\n", opt.delete_after ? "--delete-after" : - (opt.spider ? "--spider" : + (opt.spider ? "--spider" : "recursive rejection criteria"))); logprintf (LOG_VERBOSE, (opt.delete_after || opt.spider @@ -470,7 +489,7 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth, if (string_set_contains (blacklist, url)) { - if (opt.spider) + if (opt.spider) { char *referrer = url_string (parent, URL_AUTH_HIDE_PASSWD); DEBUGP (("download_child_p: parent->url is: %s\n", quote (parent->url))); @@ -662,11 +681,13 @@ descend_redirect_p (const char *redirected, const char *original, int depth, struct urlpos *upos; bool success; + set_ugly_no_encode (true); orig_parsed = url_parse (original, NULL); assert (orig_parsed != NULL); new_parsed = url_parse (redirected, NULL); assert (new_parsed != NULL); + set_ugly_no_encode (false); upos = xnew0 (struct urlpos); upos->url = new_parsed;