/* File retrieval.
- Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU Wget.
#include <string.h>
#include <assert.h>
+#include "exits.h"
#include "utils.h"
#include "retr.h"
#include "progress.h"
#include "convert.h"
#include "ptimer.h"
#include "html-url.h"
+#include "iri.h"
/* Total size of downloaded files. Used to enforce quota. */
SUM_SIZE_INT total_downloaded_bytes;
performance: fast downloads will arrive in large 16K chunks
(which stdio would write out immediately anyway), and slow
downloads wouldn't be limited by disk speed. */
+
+ /* 2005-04-20 SMS.
+ Perhaps it shouldn't hinder performance, but it sure does, at least
+ on VMS (more than 2X). Rather than speculate on what it should or
+ shouldn't do, it might make more sense to test it. Even better, it
+ might be nice to explain what possible benefit it could offer, as
+ it appears to be a clear invitation to poor performance with no
+ actual justification. (Also, why 16K? Anyone test other values?)
+ */
+#ifndef __VMS
fflush (out);
+#endif /* ndef __VMS */
return !ferror (out);
}
uerr_t
retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
char **newloc, const char *refurl, int *dt, bool recursive,
- struct iri *iri)
+ struct iri *iri, bool register_status)
{
uerr_t result;
char *url;
bool location_changed;
+ bool iri_fallbacked = 0;
int dummy;
char *mynewloc, *proxy;
struct url *u = orig_parsed, *proxy_url;
if (file)
*file = NULL;
- second_try:
- DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote_n (0, url),
- iri->uri_encoding ? quote_n (1, iri->uri_encoding) : "None",
- iri->utf8_encode));
-
if (!refurl)
refurl = opt.referer;
redirected:
+ /* (also for IRI fallbacking) */
result = NOCONERROR;
mynewloc = NULL;
xfree (url);
xfree (error);
RESTORE_POST_DATA;
- return PROXERR;
+ result = PROXERR;
+ goto bail;
}
if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
{
url_free (proxy_url);
xfree (url);
RESTORE_POST_DATA;
- return PROXERR;
+ result = PROXERR;
+ goto bail;
}
}
xfree (mynewloc);
xfree (error);
RESTORE_POST_DATA;
- return result;
+ goto bail;
}
/* Now mynewloc will become newloc_parsed->url, because if the
xfree (url);
xfree (mynewloc);
RESTORE_POST_DATA;
- return WRONGCODE;
+ result = WRONGCODE;
+ goto bail;
}
xfree (url);
if (!(*dt & RETROKF) && iri->utf8_encode)
{
iri->utf8_encode = false;
- DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
- goto second_try;
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
+ u = url_parse (origurl, NULL, iri, true);
+ if (u)
+ {
+ DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
+ url = xstrdup (u->url);
+ iri_fallbacked = 1;
+ goto redirected;
+ }
+ else
+ DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url)));
}
if (local_file && *dt & RETROKF)
url_free (u);
}
- if (redirection_count)
+ if (redirection_count || iri_fallbacked)
{
if (newloc)
*newloc = url;
RESTORE_POST_DATA;
+bail:
+ if (register_status)
+ inform_exit_status (result);
return result;
}
{
int dt,url_err;
uerr_t status;
- struct url * url_parsed = url_parse(url, &url_err, NULL, true);
+ struct url * url_parsed = url_parse(url, &url_err, iri, true);
if (!url_parsed)
{
opt.base_href = xstrdup (url);
status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt,
- false, iri);
+ false, iri, true);
if (status != RETROK)
return status;
if (dt & TEXTHTML)
html = true;
- /* If we have a found a content encoding, use it */
- if (iri->content_encoding)
+ /* If we have a found a content encoding, use it.
+ * ( == is okay, because we're checking for identical object) */
+ if (iri->content_encoding != opt.locale)
set_uri_encoding (iri, iri->content_encoding, false);
+
+ /* Reset UTF-8 encode status */
+ iri->utf8_encode = opt.enable_iri;
+ xfree_null (iri->orig_url);
+ iri->orig_url = NULL;
}
else
input_file = (char *) file;
{
char *filename = NULL, *new_file = NULL;
int dt;
+ struct iri *tmpiri = iri_dup (iri);
+ struct url *parsed_url = NULL;
if (cur_url->ignore_when_downloading)
continue;
break;
}
- /* Reset UTF-8 encode status */
- iri->utf8_encode = opt.enable_iri;
- xfree_null (iri->orig_url);
- iri->orig_url = NULL;
+ /* Need to reparse the url, since it didn't have iri information. */
+ if (opt.enable_iri)
+ parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true);
if ((opt.recursive || opt.page_requisites)
&& (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
if (cur_url->url->scheme == SCHEME_FTP)
opt.follow_ftp = 1;
- status = retrieve_tree (cur_url->url, iri);
+ status = retrieve_tree (parsed_url ? parsed_url : cur_url->url,
+ tmpiri);
opt.follow_ftp = old_follow_ftp;
}
else
- status = retrieve_url (cur_url->url, cur_url->url->url, &filename,
- &new_file, NULL, &dt, opt.recursive, iri);
+ status = retrieve_url (parsed_url ? parsed_url : cur_url->url,
+ cur_url->url->url, &filename,
+ &new_file, NULL, &dt, opt.recursive, tmpiri,
+ true);
+
+ if (parsed_url)
+ url_free (parsed_url);
if (filename && opt.delete_after && file_exists_p (filename))
{
xfree_null (new_file);
xfree_null (filename);
+ iri_free (tmpiri);
}
/* Free the linked list of URL-s. */