X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fretr.c;h=8c8cdf5b4d6016ca1d919eec87aaf5074b40b385;hb=6d67d793f51af4e0a5a840751c15308ab76ba8b6;hp=1c587a2cb4bc0dd372915df3ad6a13ac696b03de;hpb=2f6aa1d7417df1dfc58597777686fbd77179b9fd;p=wget diff --git a/src/retr.c b/src/retr.c index 1c587a2c..8c8cdf5b 100644 --- a/src/retr.c +++ b/src/retr.c @@ -139,13 +139,16 @@ limit_bandwidth (wgint bytes, struct ptimer *timer) /* Write data in BUF to OUT. However, if *SKIP is non-zero, skip that amount of data and decrease SKIP. Increment *TOTAL by the amount - of data written. */ + of data written. If OUT2 is not NULL, also write BUF to OUT2. + In case of error writing to OUT, -1 is returned. In case of error + writing to OUT2, -2 is returned. In case of any other error, + 1 is returned. */ static int -write_data (FILE *out, const char *buf, int bufsize, wgint *skip, - wgint *written) +write_data (FILE *out, FILE *out2, const char *buf, int bufsize, + wgint *skip, wgint *written) { - if (!out) + if (out == NULL && out2 == NULL) return 1; if (*skip > bufsize) { @@ -161,7 +164,10 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip, return 1; } - fwrite (buf, 1, bufsize, out); + if (out != NULL) + fwrite (buf, 1, bufsize, out); + if (out2 != NULL) + fwrite (buf, 1, bufsize, out2); *written += bufsize; /* Immediately flush the downloaded data. This should not hinder @@ -178,9 +184,17 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip, actual justification. (Also, why 16K? Anyone test other values?) */ #ifndef __VMS - fflush (out); + if (out != NULL) + fflush (out); + if (out2 != NULL) + fflush (out2); #endif /* ndef __VMS */ - return !ferror (out); + if (out != NULL && ferror (out)) + return -1; + else if (out2 != NULL && ferror (out2)) + return -2; + else + return 0; } /* Read the contents of file descriptor FD until it the connection @@ -198,18 +212,26 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip, the amount of data written to disk. The time it took to download the data is stored to ELAPSED. + If OUT2 is non-NULL, the contents is also written to OUT2. + OUT2 will get an exact copy of the response: if this is a chunked + response, everything -- including the chunk headers -- is written + to OUT2. (OUT will only get the unchunked response.) + The function exits and returns the amount of data read. In case of error while reading data, -1 is returned. In case of error while - writing data, -2 is returned. */ + writing data to OUT, -2 is returned. In case of error while writing + data to OUT2, -3 is returned. */ int fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, - wgint *qtyread, wgint *qtywritten, double *elapsed, int flags) + wgint *qtyread, wgint *qtywritten, double *elapsed, int flags, + FILE *out2) { int ret = 0; - - int dlbufsize = BUFSIZ; - char *dlbuf = xmalloc (BUFSIZ); +#undef max +#define max(a,b) ((a) > (b) ? (a) : (b)) + int dlbufsize = max (BUFSIZ, 8 * 1024); + char *dlbuf = xmalloc (dlbufsize); struct ptimer *timer = NULL; double last_successful_read_tm = 0; @@ -286,13 +308,24 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, ret = -1; break; } + else if (out2 != NULL) + fwrite (line, 1, strlen (line), out2); remaining_chunk_size = strtol (line, &endl, 16); + xfree (line); + if (remaining_chunk_size == 0) { ret = 0; - if (fd_read_line (fd) == NULL) + line = fd_read_line (fd); + if (line == NULL) ret = -1; + else + { + if (out2 != NULL) + fwrite (line, 1, strlen (line), out2); + xfree (line); + } break; } } @@ -342,20 +375,30 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, if (ret > 0) { sum_read += ret; - if (!write_data (out, dlbuf, ret, &skip, &sum_written)) + int write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written); + if (write_res != 0) { - ret = -2; + ret = (write_res == -3) ? -3 : -2; goto out; } if (chunked) { remaining_chunk_size -= ret; if (remaining_chunk_size == 0) - if (fd_read_line (fd) == NULL) - { - ret = -1; - break; - } + { + char *line = fd_read_line (fd); + if (line == NULL) + { + ret = -1; + break; + } + else + { + if (out2 != NULL) + fwrite (line, 1, strlen (line), out2); + xfree (line); + } + } } } @@ -764,7 +807,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, proxy_url = NULL; } - location_changed = (result == NEWLOCATION); + location_changed = (result == NEWLOCATION || result == NEWLOCATION_KEEP_POST); if (location_changed) { char *construced_newloc; @@ -838,12 +881,17 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, } u = newloc_parsed; - /* If we're being redirected from POST, we don't want to POST + /* If we're being redirected from POST, and we received a + redirect code different than 307, we don't want to POST again. Many requests answer POST with a redirection to an index page; that redirection is clearly a GET. We "suspend" POST data for the duration of the redirections, and restore - it when we're done. */ - if (!post_data_suspended) + it when we're done. + + RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect + specifically to preserve the method of the request. + */ + if (result != NEWLOCATION_KEEP_POST && !post_data_suspended) SUSPEND_POST_DATA; goto redirected; @@ -873,7 +921,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, { register_download (u->url, local_file); - if (redirection_count && 0 != strcmp (origurl, u->url)) + if (!opt.spider && redirection_count && 0 != strcmp (origurl, u->url)) register_redirection (origurl, u->url); if (*dt & TEXTHTML) @@ -942,8 +990,7 @@ retrieve_from_file (const char *file, bool html, int *count) { int dt,url_err; uerr_t status; - struct url * url_parsed = url_parse(url, &url_err, iri, true); - + struct url *url_parsed = url_parse (url, &url_err, iri, true); if (!url_parsed) { char *error = url_error (url, url_err); @@ -1001,9 +1048,7 @@ retrieve_from_file (const char *file, bool html, int *count) break; } - /* Need to reparse the url, since it didn't have iri information. */ - if (opt.enable_iri) - parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true); + parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true); if ((opt.recursive || opt.page_requisites) && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))