#include "hash.h"
#include "convert.h"
#include "ptimer.h"
-#include "iri.h"
#include "html-url.h"
+#include "iri.h"
/* Total size of downloaded files. Used to enforce quota. */
SUM_SIZE_INT total_downloaded_bytes;
performance: fast downloads will arrive in large 16K chunks
(which stdio would write out immediately anyway), and slow
downloads wouldn't be limited by disk speed. */
+
+ /* 2005-04-20 SMS.
+ Perhaps it shouldn't hinder performance, but it sure does, at least
+ on VMS (more than 2X). Rather than speculate on what it should or
+ shouldn't do, it might make more sense to test it. Even better, it
+ might be nice to explain what possible benefit it could offer, as
+ it appears to be a clear invitation to poor performance with no
+ actual justification. (Also, why 16K? Anyone test other values?)
+ */
+#ifndef __VMS
fflush (out);
+#endif /* ndef __VMS */
return !ferror (out);
}
/* If we're skipping STARTPOS bytes, pass 0 as the INITIAL
argument to progress_create because the indicator doesn't
(yet) know about "skipping" data. */
- progress = progress_create (skip ? 0 : startpos, startpos + toread);
+ wgint start = skip ? 0 : startpos;
+ progress = progress_create (start, start + toread);
progress_interactive = progress_interactive_p (progress);
}
char *hunk = xmalloc (bufsize);
int tail = 0; /* tail position in HUNK */
- assert (maxsize >= bufsize);
+ assert (!maxsize || maxsize >= bufsize);
while (1)
{
multiple points. */
uerr_t
-retrieve_url (const char *origurl, char **file, char **newloc,
- const char *refurl, int *dt, bool recursive, struct iri *iri)
+retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
+ char **newloc, const char *refurl, int *dt, bool recursive,
+ struct iri *iri)
{
uerr_t result;
char *url;
bool location_changed;
+ bool iri_fallbacked = 0;
int dummy;
char *mynewloc, *proxy;
- struct url *u, *proxy_url;
+ struct url *u = orig_parsed, *proxy_url;
int up_error_code; /* url parse error code */
char *local_file;
int redirection_count = 0;
if (file)
*file = NULL;
- second_try:
- u = url_parse (url, &up_error_code, iri);
- if (!u)
- {
- logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
- xfree (url);
- return URLERROR;
- }
-
- DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote (url),
- iri->uri_encoding ? quote (iri->uri_encoding) : "None",
- iri->utf8_encode));
-
if (!refurl)
refurl = opt.referer;
redirected:
+ /* (also for IRI fallbacking) */
result = NOCONERROR;
mynewloc = NULL;
proxy = getproxy (u);
if (proxy)
{
- /* sXXXav : could a proxy include a path ??? */
struct iri *pi = iri_new ();
set_uri_encoding (pi, opt.locale, true);
pi->utf8_encode = false;
/* Parse the proxy URL. */
- proxy_url = url_parse (proxy, &up_error_code, NULL);
+ proxy_url = url_parse (proxy, &up_error_code, NULL, true);
if (!proxy_url)
{
+ char *error = url_error (proxy, up_error_code);
logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
- proxy, url_error (up_error_code));
+ proxy, error);
xfree (url);
+ xfree (error);
RESTORE_POST_DATA;
return PROXERR;
}
the content encoding. */
iri->utf8_encode = opt.enable_iri;
set_content_encoding (iri, NULL);
+ xfree_null (iri->orig_url);
/* Now, see if this new location makes sense. */
- newloc_parsed = url_parse (mynewloc, &up_error_code, iri);
+ newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true);
if (!newloc_parsed)
{
+ char *error = url_error (mynewloc, up_error_code);
logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
- url_error (up_error_code));
- url_free (u);
+ error);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
xfree (url);
xfree (mynewloc);
+ xfree (error);
RESTORE_POST_DATA;
return result;
}
logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
opt.max_redirect);
url_free (newloc_parsed);
- url_free (u);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
xfree (url);
xfree (mynewloc);
RESTORE_POST_DATA;
xfree (url);
url = mynewloc;
- url_free (u);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
u = newloc_parsed;
/* If we're being redirected from POST, we don't want to POST
if (!(*dt & RETROKF) && iri->utf8_encode)
{
iri->utf8_encode = false;
- DEBUGP (("[IRI Fallbacking to non-utf8 for %s\n", quote (url)));
- goto second_try;
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
+ u = url_parse (origurl, NULL, iri, true);
+ if (u)
+ {
+ DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
+ url = xstrdup (u->url);
+ iri_fallbacked = 1;
+ goto redirected;
+ }
+ else
+ DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url)));
}
if (local_file && *dt & RETROKF)
else
xfree_null (local_file);
- url_free (u);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
- if (redirection_count)
+ if (redirection_count || iri_fallbacked)
{
if (newloc)
*newloc = url;
*count = 0; /* Reset the URL count. */
/* sXXXav : Assume filename and links in the file are in the locale */
+ set_uri_encoding (iri, opt.locale, true);
set_content_encoding (iri, opt.locale);
if (url_has_scheme (url))
{
- int dt;
+ int dt,url_err;
uerr_t status;
+ struct url * url_parsed = url_parse(url, &url_err, iri, true);
+
+ if (!url_parsed)
+ {
+ char *error = url_error (url, url_err);
+ logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
+ xfree (error);
+ return URLERROR;
+ }
if (!opt.base_href)
opt.base_href = xstrdup (url);
- status = retrieve_url (url, &input_file, NULL, NULL, &dt, false, iri);
+ status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt,
+ false, iri);
if (status != RETROK)
return status;
if (dt & TEXTHTML)
html = true;
+
+ /* If we have a found a content encoding, use it.
+ * ( == is okay, because we're checking for identical object) */
+ if (iri->content_encoding != opt.locale)
+ set_uri_encoding (iri, iri->content_encoding, false);
+
+ /* Reset UTF-8 encode status */
+ iri->utf8_encode = opt.enable_iri;
+ xfree_null (iri->orig_url);
+ iri->orig_url = NULL;
}
else
input_file = (char *) file;
{
char *filename = NULL, *new_file = NULL;
int dt;
+ struct iri *tmpiri = iri_dup (iri);
+ struct url *parsed_url = NULL;
if (cur_url->ignore_when_downloading)
continue;
status = QUOTEXC;
break;
}
+
+ /* Need to reparse the url, since it didn't have iri information. */
+ if (opt.enable_iri)
+ parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true);
+
if ((opt.recursive || opt.page_requisites)
&& (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
{
if (cur_url->url->scheme == SCHEME_FTP)
opt.follow_ftp = 1;
- status = retrieve_tree (cur_url->url->url);
+ status = retrieve_tree (parsed_url ? parsed_url : cur_url->url,
+ tmpiri);
opt.follow_ftp = old_follow_ftp;
}
else
- status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL,
- &dt, opt.recursive, iri);
+ status = retrieve_url (parsed_url ? parsed_url : cur_url->url,
+ cur_url->url->url, &filename,
+ &new_file, NULL, &dt, opt.recursive, tmpiri);
+
+ if (parsed_url)
+ url_free (parsed_url);
if (filename && opt.delete_after && file_exists_p (filename))
{
xfree_null (new_file);
xfree_null (filename);
+ iri_free (tmpiri);
}
/* Free the linked list of URL-s. */
free_urlpos (url_list);
+ iri_free (iri);
+
return status;
}
/* Returns true if URL would be downloaded through a proxy. */
bool
-url_uses_proxy (const char *url)
+url_uses_proxy (struct url * u)
{
bool ret;
- struct url *u;
- struct iri *i = iri_new();
- /* url was given in the command line, so use locale as encoding */
- set_uri_encoding (i, opt.locale, true);
- u= url_parse (url, NULL, i);
if (!u)
return false;
ret = getproxy (u) != NULL;
- url_free (u);
return ret;
}
else
return sufmatch (no_proxy, host);
}
+
+/* Set the file parameter to point to the local file string. */
+void
+set_local_file (const char **file, const char *default_file)
+{
+ if (opt.output_document)
+ {
+ if (output_stream_regular)
+ *file = opt.output_document;
+ }
+ else
+ *file = default_file;
+}