/* File retrieval.
- Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
This file is part of GNU Wget.
#include <string.h>
#include <assert.h>
+#include "exits.h"
#include "utils.h"
#include "retr.h"
#include "progress.h"
performance: fast downloads will arrive in large 16K chunks
(which stdio would write out immediately anyway), and slow
downloads wouldn't be limited by disk speed. */
+
+ /* 2005-04-20 SMS.
+ Perhaps it shouldn't hinder performance, but it sure does, at least
+ on VMS (more than 2X). Rather than speculate on what it should or
+ shouldn't do, it might make more sense to test it. Even better, it
+ might be nice to explain what possible benefit it could offer, as
+ it appears to be a clear invitation to poor performance with no
+ actual justification. (Also, why 16K? Anyone test other values?)
+ */
+#ifndef __VMS
fflush (out);
+#endif /* ndef __VMS */
return !ferror (out);
}
bool progress_interactive = false;
bool exact = !!(flags & rb_read_exactly);
+
+ /* Used only by HTTP/HTTPS chunked transfer encoding. */
+ bool chunked = flags & rb_chunked_transfer_encoding;
wgint skip = 0;
/* How much data we've read/written. */
wgint sum_read = 0;
wgint sum_written = 0;
+ wgint remaining_chunk_size = 0;
if (flags & rb_skip_startpos)
skip = startpos;
should be read. */
while (!exact || (sum_read < toread))
{
- int rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
+ int rdsize;
double tmout = opt.read_timeout;
+
+ if (chunked)
+ {
+ if (remaining_chunk_size == 0)
+ {
+ char *line = fd_read_line (fd);
+ char *endl;
+ if (line == NULL)
+ {
+ ret = -1;
+ break;
+ }
+
+ remaining_chunk_size = strtol (line, &endl, 16);
+ if (remaining_chunk_size == 0)
+ {
+ ret = 0;
+ if (fd_read_line (fd) == NULL)
+ ret = -1;
+ break;
+ }
+ }
+
+ rdsize = MIN (remaining_chunk_size, dlbufsize);
+ }
+ else
+ rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
+
if (progress_interactive)
{
/* For interactive progress gauges, always specify a ~1s
else if (ret <= 0)
break; /* EOF or read error */
- if (progress || opt.limit_rate)
+ if (progress || opt.limit_rate || elapsed)
{
ptimer_measure (timer);
if (ret > 0)
ret = -2;
goto out;
}
+ if (chunked)
+ {
+ remaining_chunk_size -= ret;
+ if (remaining_chunk_size == 0)
+ if (fd_read_line (fd) == NULL)
+ {
+ ret = -1;
+ break;
+ }
+ }
}
if (opt.limit_rate)
uerr_t
retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
char **newloc, const char *refurl, int *dt, bool recursive,
- struct iri *iri)
+ struct iri *iri, bool register_status)
{
uerr_t result;
char *url;
bool location_changed;
+ bool iri_fallbacked = 0;
int dummy;
char *mynewloc, *proxy;
struct url *u = orig_parsed, *proxy_url;
if (file)
*file = NULL;
- second_try:
- DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote_n (0, url),
- iri->uri_encoding ? quote_n (1, iri->uri_encoding) : "None",
- iri->utf8_encode));
-
if (!refurl)
refurl = opt.referer;
redirected:
+ /* (also for IRI fallbacking) */
result = NOCONERROR;
mynewloc = NULL;
xfree (url);
xfree (error);
RESTORE_POST_DATA;
- return PROXERR;
+ result = PROXERR;
+ goto bail;
}
if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
{
url_free (proxy_url);
xfree (url);
RESTORE_POST_DATA;
- return PROXERR;
+ result = PROXERR;
+ goto bail;
}
}
#endif
|| (proxy_url && proxy_url->scheme == SCHEME_HTTP))
{
- result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url, iri);
+ result = http_loop (u, orig_parsed, &mynewloc, &local_file, refurl, dt,
+ proxy_url, iri);
}
else if (u->scheme == SCHEME_FTP)
{
if (redirection_count)
oldrec = glob = false;
- result = ftp_loop (u, dt, proxy_url, recursive, glob);
+ result = ftp_loop (u, &local_file, dt, proxy_url, recursive, glob);
recursive = oldrec;
/* There is a possibility of having HTTP being redirected to
xfree (mynewloc);
xfree (error);
RESTORE_POST_DATA;
- return result;
+ goto bail;
}
/* Now mynewloc will become newloc_parsed->url, because if the
xfree (url);
xfree (mynewloc);
RESTORE_POST_DATA;
- return WRONGCODE;
+ result = WRONGCODE;
+ goto bail;
}
xfree (url);
if (u)
{
DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
- goto second_try;
+ url = xstrdup (u->url);
+ iri_fallbacked = 1;
+ goto redirected;
}
else
DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url)));
url_free (u);
}
- if (redirection_count)
+ if (redirection_count || iri_fallbacked)
{
if (newloc)
*newloc = url;
RESTORE_POST_DATA;
+bail:
+ if (register_status)
+ inform_exit_status (result);
return result;
}
struct urlpos *url_list, *cur_url;
struct iri *iri = iri_new();
- char *input_file = NULL;
+ char *input_file, *url_file = NULL;
const char *url = file;
status = RETROK; /* Suppose everything is OK. */
set_uri_encoding (iri, opt.locale, true);
set_content_encoding (iri, opt.locale);
- if (url_has_scheme (url))
+ if (url_valid_scheme (url))
{
int dt,url_err;
uerr_t status;
if (!opt.base_href)
opt.base_href = xstrdup (url);
- status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt,
- false, iri);
- if (status != RETROK)
+ status = retrieve_url (url_parsed, url, &url_file, NULL, NULL, &dt,
+ false, iri, true);
+ url_free (url_parsed);
+
+ if (!url_file || (status != RETROK))
return status;
if (dt & TEXTHTML)
iri->utf8_encode = opt.enable_iri;
xfree_null (iri->orig_url);
iri->orig_url = NULL;
+
+ input_file = url_file;
}
else
input_file = (char *) file;
url_list = (html ? get_urls_html (input_file, NULL, NULL, iri)
: get_urls_file (input_file));
+ xfree_null (url_file);
+
for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
{
char *filename = NULL, *new_file = NULL;
else
status = retrieve_url (parsed_url ? parsed_url : cur_url->url,
cur_url->url->url, &filename,
- &new_file, NULL, &dt, opt.recursive, tmpiri);
+ &new_file, NULL, &dt, opt.recursive, tmpiri,
+ true);
if (parsed_url)
url_free (parsed_url);
else
*file = default_file;
}
+
+/* Return true for an input file's own URL, false otherwise. */
+bool
+input_file_url (const char *input_file)
+{
+ static bool first = true;
+
+ if (input_file
+ && url_has_scheme (input_file)
+ && first)
+ {
+ first = false;
+ return true;
+ }
+ else
+ return false;
+}