X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fretr.c;h=1c587a2cb4bc0dd372915df3ad6a13ac696b03de;hp=8752dce71510d3ef3819bdbbb4ba7a0906aa1c4a;hb=2f6aa1d7417df1dfc58597777686fbd77179b9fd;hpb=a9da78c6d8f9e2699107fcde81efeb607dde1cca diff --git a/src/retr.c b/src/retr.c index 8752dce7..1c587a2c 100644 --- a/src/retr.c +++ b/src/retr.c @@ -1,6 +1,7 @@ /* File retrieval. - Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, + Inc. This file is part of GNU Wget. @@ -32,13 +33,12 @@ as that of the covered work. */ #include #include -#ifdef HAVE_UNISTD_H -# include -#endif /* HAVE_UNISTD_H */ +#include #include #include #include +#include "exits.h" #include "utils.h" #include "retr.h" #include "progress.h" @@ -168,7 +168,18 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip, performance: fast downloads will arrive in large 16K chunks (which stdio would write out immediately anyway), and slow downloads wouldn't be limited by disk speed. */ + + /* 2005-04-20 SMS. + Perhaps it shouldn't hinder performance, but it sure does, at least + on VMS (more than 2X). Rather than speculate on what it should or + shouldn't do, it might make more sense to test it. Even better, it + might be nice to explain what possible benefit it could offer, as + it appears to be a clear invitation to poor performance with no + actual justification. (Also, why 16K? Anyone test other values?) + */ +#ifndef __VMS fflush (out); +#endif /* ndef __VMS */ return !ferror (out); } @@ -197,8 +208,8 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, { int ret = 0; - static char dlbuf[16384]; - int dlbufsize = sizeof (dlbuf); + int dlbufsize = BUFSIZ; + char *dlbuf = xmalloc (BUFSIZ); struct ptimer *timer = NULL; double last_successful_read_tm = 0; @@ -213,11 +224,15 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, bool progress_interactive = false; bool exact = !!(flags & rb_read_exactly); + + /* Used only by HTTP/HTTPS chunked transfer encoding. */ + bool chunked = flags & rb_chunked_transfer_encoding; wgint skip = 0; /* How much data we've read/written. */ wgint sum_read = 0; wgint sum_written = 0; + wgint remaining_chunk_size = 0; if (flags & rb_skip_startpos) skip = startpos; @@ -257,8 +272,36 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, should be read. */ while (!exact || (sum_read < toread)) { - int rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize; + int rdsize; double tmout = opt.read_timeout; + + if (chunked) + { + if (remaining_chunk_size == 0) + { + char *line = fd_read_line (fd); + char *endl; + if (line == NULL) + { + ret = -1; + break; + } + + remaining_chunk_size = strtol (line, &endl, 16); + if (remaining_chunk_size == 0) + { + ret = 0; + if (fd_read_line (fd) == NULL) + ret = -1; + break; + } + } + + rdsize = MIN (remaining_chunk_size, dlbufsize); + } + else + rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize; + if (progress_interactive) { /* For interactive progress gauges, always specify a ~1s @@ -289,7 +332,7 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, else if (ret <= 0) break; /* EOF or read error */ - if (progress || opt.limit_rate) + if (progress || opt.limit_rate || elapsed) { ptimer_measure (timer); if (ret > 0) @@ -304,6 +347,16 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, ret = -2; goto out; } + if (chunked) + { + remaining_chunk_size -= ret; + if (remaining_chunk_size == 0) + if (fd_read_line (fd) == NULL) + { + ret = -1; + break; + } + } } if (opt.limit_rate) @@ -334,6 +387,8 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, if (qtywritten) *qtywritten += sum_written; + free (dlbuf); + return ret; } @@ -600,11 +655,12 @@ static char *getproxy (struct url *); uerr_t retrieve_url (struct url * orig_parsed, const char *origurl, char **file, char **newloc, const char *refurl, int *dt, bool recursive, - struct iri *iri) + struct iri *iri, bool register_status) { uerr_t result; char *url; bool location_changed; + bool iri_fallbacked = 0; int dummy; char *mynewloc, *proxy; struct url *u = orig_parsed, *proxy_url; @@ -628,15 +684,11 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, if (file) *file = NULL; - second_try: - DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote_n (0, url), - iri->uri_encoding ? quote_n (1, iri->uri_encoding) : "None", - iri->utf8_encode)); - if (!refurl) refurl = opt.referer; redirected: + /* (also for IRI fallbacking) */ result = NOCONERROR; mynewloc = NULL; @@ -660,7 +712,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, xfree (url); xfree (error); RESTORE_POST_DATA; - return PROXERR; + result = PROXERR; + goto bail; } if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme) { @@ -668,7 +721,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, url_free (proxy_url); xfree (url); RESTORE_POST_DATA; - return PROXERR; + result = PROXERR; + goto bail; } } @@ -678,7 +732,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, #endif || (proxy_url && proxy_url->scheme == SCHEME_HTTP)) { - result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url, iri); + result = http_loop (u, orig_parsed, &mynewloc, &local_file, refurl, dt, + proxy_url, iri); } else if (u->scheme == SCHEME_FTP) { @@ -689,7 +744,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, if (redirection_count) oldrec = glob = false; - result = ftp_loop (u, dt, proxy_url, recursive, glob); + result = ftp_loop (u, &local_file, dt, proxy_url, recursive, glob); recursive = oldrec; /* There is a possibility of having HTTP being redirected to @@ -749,7 +804,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, xfree (mynewloc); xfree (error); RESTORE_POST_DATA; - return result; + goto bail; } /* Now mynewloc will become newloc_parsed->url, because if the @@ -771,7 +826,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, xfree (url); xfree (mynewloc); RESTORE_POST_DATA; - return WRONGCODE; + result = WRONGCODE; + goto bail; } xfree (url); @@ -805,29 +861,26 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, if (u) { DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url))); - goto second_try; + url = xstrdup (u->url); + iri_fallbacked = 1; + goto redirected; } else DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url))); } - if (local_file && *dt & RETROKF) + if (local_file && u && *dt & RETROKF) { register_download (u->url, local_file); + if (redirection_count && 0 != strcmp (origurl, u->url)) register_redirection (origurl, u->url); + if (*dt & TEXTHTML) register_html (u->url, local_file); - if (*dt & RETROKF) - { - register_download (u->url, local_file); - if (redirection_count && 0 != strcmp (origurl, u->url)) - register_redirection (origurl, u->url); - if (*dt & TEXTHTML) - register_html (u->url, local_file); - if (*dt & TEXTCSS) - register_css (u->url, local_file); - } + + if (*dt & TEXTCSS) + register_css (u->url, local_file); } if (file) @@ -840,7 +893,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, url_free (u); } - if (redirection_count) + if (redirection_count || iri_fallbacked) { if (newloc) *newloc = url; @@ -856,6 +909,9 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, RESTORE_POST_DATA; +bail: + if (register_status) + inform_exit_status (result); return result; } @@ -872,7 +928,7 @@ retrieve_from_file (const char *file, bool html, int *count) struct urlpos *url_list, *cur_url; struct iri *iri = iri_new(); - char *input_file = NULL; + char *input_file, *url_file = NULL; const char *url = file; status = RETROK; /* Suppose everything is OK. */ @@ -882,7 +938,7 @@ retrieve_from_file (const char *file, bool html, int *count) set_uri_encoding (iri, opt.locale, true); set_content_encoding (iri, opt.locale); - if (url_has_scheme (url)) + if (url_valid_scheme (url)) { int dt,url_err; uerr_t status; @@ -899,9 +955,11 @@ retrieve_from_file (const char *file, bool html, int *count) if (!opt.base_href) opt.base_href = xstrdup (url); - status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt, - false, iri); - if (status != RETROK) + status = retrieve_url (url_parsed, url, &url_file, NULL, NULL, &dt, + false, iri, true); + url_free (url_parsed); + + if (!url_file || (status != RETROK)) return status; if (dt & TEXTHTML) @@ -916,6 +974,8 @@ retrieve_from_file (const char *file, bool html, int *count) iri->utf8_encode = opt.enable_iri; xfree_null (iri->orig_url); iri->orig_url = NULL; + + input_file = url_file; } else input_file = (char *) file; @@ -923,6 +983,8 @@ retrieve_from_file (const char *file, bool html, int *count) url_list = (html ? get_urls_html (input_file, NULL, NULL, iri) : get_urls_file (input_file)); + xfree_null (url_file); + for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count) { char *filename = NULL, *new_file = NULL; @@ -960,7 +1022,8 @@ retrieve_from_file (const char *file, bool html, int *count) else status = retrieve_url (parsed_url ? parsed_url : cur_url->url, cur_url->url->url, &filename, - &new_file, NULL, &dt, opt.recursive, tmpiri); + &new_file, NULL, &dt, opt.recursive, tmpiri, + true); if (parsed_url) url_free (parsed_url); @@ -1165,3 +1228,20 @@ set_local_file (const char **file, const char *default_file) else *file = default_file; } + +/* Return true for an input file's own URL, false otherwise. */ +bool +input_file_url (const char *input_file) +{ + static bool first = true; + + if (input_file + && url_has_scheme (input_file) + && first) + { + first = false; + return true; + } + else + return false; +}