X-Git-Url: http://sjero.net/git/?p=wget;a=blobdiff_plain;f=src%2Fretr.c;h=683c8117fcb131c71eb9f2b9d1732e4847a2ac6c;hp=f1b8f955a36f3f2361f7142b1366fd78aaba72c5;hb=320cfdcb658e8d6556ae9dfd902c2db1db866a6b;hpb=b014f8fae9291e7504c0cca2dd8b9a0035466c03 diff --git a/src/retr.c b/src/retr.c index f1b8f955..683c8117 100644 --- a/src/retr.c +++ b/src/retr.c @@ -1,6 +1,7 @@ /* File retrieval. - Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, + Inc. This file is part of GNU Wget. @@ -32,12 +33,13 @@ as that of the covered work. */ #include #include -#ifdef HAVE_UNISTD_H -# include -#endif /* HAVE_UNISTD_H */ +#include #include #include #include +#ifdef VMS +# include /* For delete(). */ +#endif #include "exits.h" #include "utils.h" @@ -140,13 +142,16 @@ limit_bandwidth (wgint bytes, struct ptimer *timer) /* Write data in BUF to OUT. However, if *SKIP is non-zero, skip that amount of data and decrease SKIP. Increment *TOTAL by the amount - of data written. */ + of data written. If OUT2 is not NULL, also write BUF to OUT2. + In case of error writing to OUT, -1 is returned. In case of error + writing to OUT2, -2 is returned. Return 1 if the whole BUF was + skipped. */ static int -write_data (FILE *out, const char *buf, int bufsize, wgint *skip, - wgint *written) +write_data (FILE *out, FILE *out2, const char *buf, int bufsize, + wgint *skip, wgint *written) { - if (!out) + if (out == NULL && out2 == NULL) return 1; if (*skip > bufsize) { @@ -162,7 +167,10 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip, return 1; } - fwrite (buf, 1, bufsize, out); + if (out != NULL) + fwrite (buf, 1, bufsize, out); + if (out2 != NULL) + fwrite (buf, 1, bufsize, out2); *written += bufsize; /* Immediately flush the downloaded data. This should not hinder @@ -179,9 +187,17 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip, actual justification. (Also, why 16K? Anyone test other values?) */ #ifndef __VMS - fflush (out); + if (out != NULL) + fflush (out); + if (out2 != NULL) + fflush (out2); #endif /* ndef __VMS */ - return !ferror (out); + if (out != NULL && ferror (out)) + return -1; + else if (out2 != NULL && ferror (out2)) + return -2; + else + return 0; } /* Read the contents of file descriptor FD until it the connection @@ -199,18 +215,26 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip, the amount of data written to disk. The time it took to download the data is stored to ELAPSED. + If OUT2 is non-NULL, the contents is also written to OUT2. + OUT2 will get an exact copy of the response: if this is a chunked + response, everything -- including the chunk headers -- is written + to OUT2. (OUT will only get the unchunked response.) + The function exits and returns the amount of data read. In case of error while reading data, -1 is returned. In case of error while - writing data, -2 is returned. */ + writing data to OUT, -2 is returned. In case of error while writing + data to OUT2, -3 is returned. */ int fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, - wgint *qtyread, wgint *qtywritten, double *elapsed, int flags) + wgint *qtyread, wgint *qtywritten, double *elapsed, int flags, + FILE *out2) { int ret = 0; - - static char dlbuf[16384]; - int dlbufsize = sizeof (dlbuf); +#undef max +#define max(a,b) ((a) > (b) ? (a) : (b)) + int dlbufsize = max (BUFSIZ, 8 * 1024); + char *dlbuf = xmalloc (dlbufsize); struct ptimer *timer = NULL; double last_successful_read_tm = 0; @@ -225,11 +249,15 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, bool progress_interactive = false; bool exact = !!(flags & rb_read_exactly); + + /* Used only by HTTP/HTTPS chunked transfer encoding. */ + bool chunked = flags & rb_chunked_transfer_encoding; wgint skip = 0; /* How much data we've read/written. */ wgint sum_read = 0; wgint sum_written = 0; + wgint remaining_chunk_size = 0; if (flags & rb_skip_startpos) skip = startpos; @@ -269,8 +297,47 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, should be read. */ while (!exact || (sum_read < toread)) { - int rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize; + int rdsize; double tmout = opt.read_timeout; + + if (chunked) + { + if (remaining_chunk_size == 0) + { + char *line = fd_read_line (fd); + char *endl; + if (line == NULL) + { + ret = -1; + break; + } + else if (out2 != NULL) + fwrite (line, 1, strlen (line), out2); + + remaining_chunk_size = strtol (line, &endl, 16); + xfree (line); + + if (remaining_chunk_size == 0) + { + ret = 0; + line = fd_read_line (fd); + if (line == NULL) + ret = -1; + else + { + if (out2 != NULL) + fwrite (line, 1, strlen (line), out2); + xfree (line); + } + break; + } + } + + rdsize = MIN (remaining_chunk_size, dlbufsize); + } + else + rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize; + if (progress_interactive) { /* For interactive progress gauges, always specify a ~1s @@ -301,7 +368,7 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, else if (ret <= 0) break; /* EOF or read error */ - if (progress || opt.limit_rate) + if (progress || opt.limit_rate || elapsed) { ptimer_measure (timer); if (ret > 0) @@ -311,11 +378,31 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, if (ret > 0) { sum_read += ret; - if (!write_data (out, dlbuf, ret, &skip, &sum_written)) + int write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written); + if (write_res < 0) { - ret = -2; + ret = (write_res == -3) ? -3 : -2; goto out; } + if (chunked) + { + remaining_chunk_size -= ret; + if (remaining_chunk_size == 0) + { + char *line = fd_read_line (fd); + if (line == NULL) + { + ret = -1; + break; + } + else + { + if (out2 != NULL) + fwrite (line, 1, strlen (line), out2); + xfree (line); + } + } + } } if (opt.limit_rate) @@ -346,6 +433,8 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, if (qtywritten) *qtywritten += sum_written; + free (dlbuf); + return ret; } @@ -534,6 +623,7 @@ retr_rate (wgint bytes, double secs) { static char res[20]; static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" }; + static const char *rate_names_bits[] = {"b/s", "Kb/s", "Mb/s", "Gb/s" }; int units; double dlrate = calc_rate (bytes, secs, &units); @@ -541,7 +631,7 @@ retr_rate (wgint bytes, double secs) e.g. "1022", "247", "12.5", "2.38". */ sprintf (res, "%.*f %s", dlrate >= 99.95 ? 0 : dlrate >= 9.995 ? 1 : 2, - dlrate, rate_names[units]); + dlrate, !opt.report_bps ? rate_names[units]: rate_names_bits[units]); return res; } @@ -558,6 +648,11 @@ double calc_rate (wgint bytes, double secs, int *units) { double dlrate; + double bibyte = 1000.0; + + if (!opt.report_bps) + bibyte = 1024.0; + assert (secs >= 0); assert (bytes >= 0); @@ -569,35 +664,39 @@ calc_rate (wgint bytes, double secs, int *units) 0 and the timer's resolution, assume half the resolution. */ secs = ptimer_resolution () / 2.0; - dlrate = bytes / secs; - if (dlrate < 1024.0) + dlrate = convert_to_bits (bytes) / secs; + if (dlrate < bibyte) *units = 0; - else if (dlrate < 1024.0 * 1024.0) - *units = 1, dlrate /= 1024.0; - else if (dlrate < 1024.0 * 1024.0 * 1024.0) - *units = 2, dlrate /= (1024.0 * 1024.0); + else if (dlrate < (bibyte * bibyte)) + *units = 1, dlrate /= bibyte; + else if (dlrate < (bibyte * bibyte * bibyte)) + *units = 2, dlrate /= (bibyte * bibyte); + else /* Maybe someone will need this, one day. */ - *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0); + *units = 3, dlrate /= (bibyte * bibyte * bibyte); return dlrate; } -#define SUSPEND_POST_DATA do { \ - post_data_suspended = true; \ - saved_post_data = opt.post_data; \ - saved_post_file_name = opt.post_file_name; \ - opt.post_data = NULL; \ - opt.post_file_name = NULL; \ +#define SUSPEND_METHOD do { \ + method_suspended = true; \ + saved_body_data = opt.body_data; \ + saved_body_file_name = opt.body_file; \ + saved_method = opt.method; \ + opt.body_data = NULL; \ + opt.body_file = NULL; \ + opt.method = NULL; \ } while (0) -#define RESTORE_POST_DATA do { \ - if (post_data_suspended) \ +#define RESTORE_METHOD do { \ + if (method_suspended) \ { \ - opt.post_data = saved_post_data; \ - opt.post_file_name = saved_post_file_name; \ - post_data_suspended = false; \ + opt.body_data = saved_body_data; \ + opt.body_file = saved_body_file_name; \ + opt.method = saved_method; \ + method_suspended = false; \ } \ } while (0) @@ -625,9 +724,10 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, char *local_file; int redirection_count = 0; - bool post_data_suspended = false; - char *saved_post_data = NULL; - char *saved_post_file_name = NULL; + bool method_suspended = false; + char *saved_body_data = NULL; + char *saved_method = NULL; + char *saved_body_file_name = NULL; /* If dt is NULL, use local storage. */ if (!dt) @@ -668,7 +768,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, proxy, error); xfree (url); xfree (error); - RESTORE_POST_DATA; + RESTORE_METHOD; result = PROXERR; goto bail; } @@ -677,7 +777,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy); url_free (proxy_url); xfree (url); - RESTORE_POST_DATA; + RESTORE_METHOD; result = PROXERR; goto bail; } @@ -689,7 +789,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, #endif || (proxy_url && proxy_url->scheme == SCHEME_HTTP)) { - result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url, iri); + result = http_loop (u, orig_parsed, &mynewloc, &local_file, refurl, dt, + proxy_url, iri); } else if (u->scheme == SCHEME_FTP) { @@ -700,7 +801,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, if (redirection_count) oldrec = glob = false; - result = ftp_loop (u, dt, proxy_url, recursive, glob); + result = ftp_loop (u, &local_file, dt, proxy_url, recursive, glob); recursive = oldrec; /* There is a possibility of having HTTP being redirected to @@ -720,7 +821,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, proxy_url = NULL; } - location_changed = (result == NEWLOCATION); + location_changed = (result == NEWLOCATION || result == NEWLOCATION_KEEP_POST); if (location_changed) { char *construced_newloc; @@ -744,6 +845,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, iri->utf8_encode = opt.enable_iri; set_content_encoding (iri, NULL); xfree_null (iri->orig_url); + iri->orig_url = NULL; /* Now, see if this new location makes sense. */ newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true); @@ -759,7 +861,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, xfree (url); xfree (mynewloc); xfree (error); - RESTORE_POST_DATA; + RESTORE_METHOD; goto bail; } @@ -781,7 +883,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, } xfree (url); xfree (mynewloc); - RESTORE_POST_DATA; + RESTORE_METHOD; result = WRONGCODE; goto bail; } @@ -794,13 +896,18 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, } u = newloc_parsed; - /* If we're being redirected from POST, we don't want to POST + /* If we're being redirected from POST, and we received a + redirect code different than 307, we don't want to POST again. Many requests answer POST with a redirection to an index page; that redirection is clearly a GET. We "suspend" POST data for the duration of the redirections, and restore - it when we're done. */ - if (!post_data_suspended) - SUSPEND_POST_DATA; + it when we're done. + + RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect + specifically to preserve the method of the request. + */ + if (result != NEWLOCATION_KEEP_POST && !method_suspended) + SUSPEND_METHOD; goto redirected; } @@ -825,23 +932,18 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url))); } - if (local_file && *dt & RETROKF) + if (local_file && u && *dt & RETROKF) { register_download (u->url, local_file); - if (redirection_count && 0 != strcmp (origurl, u->url)) + + if (!opt.spider && redirection_count && 0 != strcmp (origurl, u->url)) register_redirection (origurl, u->url); + if (*dt & TEXTHTML) - register_html (u->url, local_file); - if (*dt & RETROKF) - { - register_download (u->url, local_file); - if (redirection_count && 0 != strcmp (origurl, u->url)) - register_redirection (origurl, u->url); - if (*dt & TEXTHTML) - register_html (u->url, local_file); - if (*dt & TEXTCSS) - register_css (u->url, local_file); - } + register_html (local_file); + + if (*dt & TEXTCSS) + register_css (local_file); } if (file) @@ -868,7 +970,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, xfree (url); } - RESTORE_POST_DATA; + RESTORE_METHOD; bail: if (register_status) @@ -889,7 +991,7 @@ retrieve_from_file (const char *file, bool html, int *count) struct urlpos *url_list, *cur_url; struct iri *iri = iri_new(); - char *input_file = NULL; + char *input_file, *url_file = NULL; const char *url = file; status = RETROK; /* Suppose everything is OK. */ @@ -899,12 +1001,11 @@ retrieve_from_file (const char *file, bool html, int *count) set_uri_encoding (iri, opt.locale, true); set_content_encoding (iri, opt.locale); - if (url_has_scheme (url)) + if (url_valid_scheme (url)) { int dt,url_err; uerr_t status; - struct url * url_parsed = url_parse(url, &url_err, iri, true); - + struct url *url_parsed = url_parse (url, &url_err, iri, true); if (!url_parsed) { char *error = url_error (url, url_err); @@ -916,9 +1017,11 @@ retrieve_from_file (const char *file, bool html, int *count) if (!opt.base_href) opt.base_href = xstrdup (url); - status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt, + status = retrieve_url (url_parsed, url, &url_file, NULL, NULL, &dt, false, iri, true); - if (status != RETROK) + url_free (url_parsed); + + if (!url_file || (status != RETROK)) return status; if (dt & TEXTHTML) @@ -933,6 +1036,8 @@ retrieve_from_file (const char *file, bool html, int *count) iri->utf8_encode = opt.enable_iri; xfree_null (iri->orig_url); iri->orig_url = NULL; + + input_file = url_file; } else input_file = (char *) file; @@ -940,6 +1045,8 @@ retrieve_from_file (const char *file, bool html, int *count) url_list = (html ? get_urls_html (input_file, NULL, NULL, iri) : get_urls_file (input_file)); + xfree_null (url_file); + for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count) { char *filename = NULL, *new_file = NULL; @@ -956,9 +1063,7 @@ retrieve_from_file (const char *file, bool html, int *count) break; } - /* Need to reparse the url, since it didn't have iri information. */ - if (opt.enable_iri) - parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true); + parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true); if ((opt.recursive || opt.page_requisites) && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url))) @@ -1080,7 +1185,16 @@ free_urlpos (struct urlpos *l) void rotate_backups(const char *fname) { - int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1; +#ifdef __VMS +# define SEP "_" +# define AVS ";*" /* All-version suffix. */ +# define AVSL (sizeof (AVS) - 1) +#else +# define SEP "." +# define AVSL 0 +#endif + + int maxlen = strlen (fname) + sizeof (SEP) + numdigit (opt.backups) + AVSL; char *from = (char *)alloca (maxlen); char *to = (char *)alloca (maxlen); struct_stat sb; @@ -1092,12 +1206,24 @@ rotate_backups(const char *fname) for (i = opt.backups; i > 1; i--) { - sprintf (from, "%s.%d", fname, i - 1); - sprintf (to, "%s.%d", fname, i); +#ifdef VMS + /* Delete (all versions of) any existing max-suffix file, to avoid + * creating multiple versions of it. (On VMS, rename() will + * create a new version of an existing destination file, not + * destroy/overwrite it.) + */ + if (i == opt.backups) + { + sprintf (to, "%s%s%d%s", fname, SEP, i, AVS); + delete (to); + } +#endif + sprintf (to, "%s%s%d", fname, SEP, i); + sprintf (from, "%s%s%d", fname, SEP, i - 1); rename (from, to); } - sprintf (to, "%s.%d", fname, 1); + sprintf (to, "%s%s%d", fname, SEP, 1); rename(fname, to); } @@ -1183,3 +1309,20 @@ set_local_file (const char **file, const char *default_file) else *file = default_file; } + +/* Return true for an input file's own URL, false otherwise. */ +bool +input_file_url (const char *input_file) +{ + static bool first = true; + + if (input_file + && url_has_scheme (input_file) + && first) + { + first = false; + return true; + } + else + return false; +}