X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fretr.c;h=39627e4bbf5067e8ea563ac110c39c44e1cf087a;hp=2a9dec7bbb658aa6bdb12365813c0121142e7cdb;hb=d5e283b1a75c5f8249300b465b4e7b55130bec49;hpb=50e12521d63b8b42370a07d9c9971f56ca6dc58a diff --git a/src/retr.c b/src/retr.c index 2a9dec7b..39627e4b 100644 --- a/src/retr.c +++ b/src/retr.c @@ -52,6 +52,7 @@ as that of the covered work. */ #include "convert.h" #include "ptimer.h" #include "html-url.h" +#include "iri.h" /* Total size of downloaded files. Used to enforce quota. */ SUM_SIZE_INT total_downloaded_bytes; @@ -142,8 +143,10 @@ limit_bandwidth (wgint bytes, struct ptimer *timer) static int write_data (FILE *out, const char *buf, int bufsize, wgint *skip, - wgint *written) + wgint *written, int flags) { + static int cr_pending = 0; /* Found CR in ASCII FTP data. */ + if (!out) return 1; if (*skip > bufsize) @@ -160,14 +163,89 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip, return 1; } - fwrite (buf, 1, bufsize, out); - *written += bufsize; +/* Note: This code assumes that "\n" is the universal line ending + character, as on UNIX and VMS. If this is not true, then here's + where to change it. +*/ + +#if 1 +# define EOL_STRING "\n" +#else /* 1 */ +# define EOL_STRING "\r\n" +#endif /* 1 [else] */ +#define EOL_STRING_LEN (sizeof( EOL_STRING)- 1) + + if (flags & rb_ftp_ascii) + { + const char *bufend; + + /* ASCII transfer. Put out lines delimited by CRLF. */ + bufend = buf+ bufsize; + while (buf < bufend) + { + /* If CR, put out any pending CR, then set CR-pending flag. */ + if (*buf == '\r') + { + if (cr_pending) + { + fwrite ("\r", 1, 1, out); + *written += 1; + } + cr_pending = 1; + buf++; + continue; + } + + if (cr_pending) + { + if (*buf == '\n') + { + /* Found FTP EOL (CRLF). Put out local EOL. */ + fwrite (EOL_STRING, 1, EOL_STRING_LEN, out); + *written += EOL_STRING_LEN; + } + else + { + /* Normal character. Put out pending CR and it. */ + fwrite ("\r", 1, 1, out); + fwrite (buf, 1, 1, out); + *written += 2; + } + buf++; + cr_pending = 0; + } + else + { + /* Normal character. Put it out. */ + fwrite (buf, 1, 1, out); + *written += 1; + buf++; + } + } + } + else + { + /* Image transfer. Put out buffer. */ + fwrite (buf, 1, bufsize, out); + *written += bufsize; + } /* Immediately flush the downloaded data. This should not hinder performance: fast downloads will arrive in large 16K chunks (which stdio would write out immediately anyway), and slow downloads wouldn't be limited by disk speed. */ + + /* 2005-04-20 SMS. + Perhaps it shouldn't hinder performance, but it sure does, at least + on VMS (more than 2X). Rather than speculate on what it should or + shouldn't do, it might make more sense to test it. Even better, it + might be nice to explain what possible benefit it could offer, as + it appears to be a clear invitation to poor performance with no + actual justification. (Also, why 16K? Anyone test other values?) + */ +#ifndef __VMS fflush (out); +#endif /* ndef __VMS */ return !ferror (out); } @@ -298,7 +376,7 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, if (ret > 0) { sum_read += ret; - if (!write_data (out, dlbuf, ret, &skip, &sum_written)) + if (!write_data (out, dlbuf, ret, &skip, &sum_written, flags)) { ret = -2; goto out; @@ -604,6 +682,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, uerr_t result; char *url; bool location_changed; + bool iri_fallbacked = 0; int dummy; char *mynewloc, *proxy; struct url *u = orig_parsed, *proxy_url; @@ -627,15 +706,11 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, if (file) *file = NULL; - second_try: - DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote_n (0, url), - iri->uri_encoding ? quote_n (1, iri->uri_encoding) : "None", - iri->utf8_encode)); - if (!refurl) refurl = opt.referer; redirected: + /* (also for IRI fallbacking) */ result = NOCONERROR; mynewloc = NULL; @@ -804,7 +879,9 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, if (u) { DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url))); - goto second_try; + url = xstrdup (u->url); + iri_fallbacked = 1; + goto redirected; } else DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url))); @@ -839,7 +916,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, url_free (u); } - if (redirection_count) + if (redirection_count || iri_fallbacked) { if (newloc) *newloc = url; @@ -885,7 +962,7 @@ retrieve_from_file (const char *file, bool html, int *count) { int dt,url_err; uerr_t status; - struct url * url_parsed = url_parse(url, &url_err, NULL, true); + struct url * url_parsed = url_parse(url, &url_err, iri, true); if (!url_parsed) { @@ -906,9 +983,15 @@ retrieve_from_file (const char *file, bool html, int *count) if (dt & TEXTHTML) html = true; - /* If we have a found a content encoding, use it */ - if (iri->content_encoding) + /* If we have a found a content encoding, use it. + * ( == is okay, because we're checking for identical object) */ + if (iri->content_encoding != opt.locale) set_uri_encoding (iri, iri->content_encoding, false); + + /* Reset UTF-8 encode status */ + iri->utf8_encode = opt.enable_iri; + xfree_null (iri->orig_url); + iri->orig_url = NULL; } else input_file = (char *) file; @@ -920,6 +1003,8 @@ retrieve_from_file (const char *file, bool html, int *count) { char *filename = NULL, *new_file = NULL; int dt; + struct iri *tmpiri = iri_dup (iri); + struct url *parsed_url = NULL; if (cur_url->ignore_when_downloading) continue; @@ -930,10 +1015,9 @@ retrieve_from_file (const char *file, bool html, int *count) break; } - /* Reset UTF-8 encode status */ - iri->utf8_encode = opt.enable_iri; - xfree_null (iri->orig_url); - iri->orig_url = NULL; + /* Need to reparse the url, since it didn't have iri information. */ + if (opt.enable_iri) + parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true); if ((opt.recursive || opt.page_requisites) && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url))) @@ -944,13 +1028,18 @@ retrieve_from_file (const char *file, bool html, int *count) if (cur_url->url->scheme == SCHEME_FTP) opt.follow_ftp = 1; - status = retrieve_tree (cur_url->url, iri); + status = retrieve_tree (parsed_url ? parsed_url : cur_url->url, + tmpiri); opt.follow_ftp = old_follow_ftp; } else - status = retrieve_url (cur_url->url, cur_url->url->url, &filename, - &new_file, NULL, &dt, opt.recursive, iri); + status = retrieve_url (parsed_url ? parsed_url : cur_url->url, + cur_url->url->url, &filename, + &new_file, NULL, &dt, opt.recursive, tmpiri); + + if (parsed_url) + url_free (parsed_url); if (filename && opt.delete_after && file_exists_p (filename)) { @@ -964,6 +1053,7 @@ Removing file due to --delete-after in retrieve_from_file():\n")); xfree_null (new_file); xfree_null (filename); + iri_free (tmpiri); } /* Free the linked list of URL-s. */