2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
6 This file is part of GNU Wget.
8 GNU Wget is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at
11 your option) any later version.
13 GNU Wget is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with Wget. If not, see <http://www.gnu.org/licenses/>.
21 Additional permission under GNU GPL version 3 section 7
23 If you modify this program, or any covered work, by linking or
24 combining it with the OpenSSL project's OpenSSL library (or a
25 modified version of that library), containing parts covered by the
26 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
27 grants you additional permission to convey the resulting work.
28 Corresponding Source for a non-source form of such a combination
29 shall include the source code for the parts of OpenSSL used as well
30 as that of the covered work. */
41 # include <unixio.h> /* For delete(). */
60 /* Total size of downloaded files. Used to enforce quota. */
61 SUM_SIZE_INT total_downloaded_bytes;
63 /* Total download time in seconds. */
64 double total_download_time;
66 /* If non-NULL, the stream to which output should be written. This
67 stream is initialized when `-O' is used. */
70 /* Whether output_document is a regular file we can manipulate,
71 i.e. not `-' or a device file. */
72 bool output_stream_regular;
81 limit_bandwidth_reset (void)
86 /* Limit the bandwidth by pausing the download for an amount of time.
87 BYTES is the number of bytes received from the network, and TIMER
88 is the timer that started at the beginning of download. */
91 limit_bandwidth (wgint bytes, struct ptimer *timer)
93 double delta_t = ptimer_read (timer) - limit_data.chunk_start;
96 limit_data.chunk_bytes += bytes;
98 /* Calculate the amount of time we expect downloading the chunk
99 should take. If in reality it took less time, sleep to
100 compensate for the difference. */
101 expected = (double) limit_data.chunk_bytes / opt.limit_rate;
103 if (expected > delta_t)
105 double slp = expected - delta_t + limit_data.sleep_adjust;
109 DEBUGP (("deferring a %.2f ms sleep (%s/%.2f).\n",
110 slp * 1000, number_to_static_string (limit_data.chunk_bytes),
114 DEBUGP (("\nsleeping %.2f ms for %s bytes, adjust %.2f ms\n",
115 slp * 1000, number_to_static_string (limit_data.chunk_bytes),
116 limit_data.sleep_adjust));
118 t0 = ptimer_read (timer);
120 t1 = ptimer_measure (timer);
122 /* Due to scheduling, we probably slept slightly longer (or
123 shorter) than desired. Calculate the difference between the
124 desired and the actual sleep, and adjust the next sleep by
126 limit_data.sleep_adjust = slp - (t1 - t0);
127 /* If sleep_adjust is very large, it's likely due to suspension
128 and not clock inaccuracy. Don't enforce those. */
129 if (limit_data.sleep_adjust > 0.5)
130 limit_data.sleep_adjust = 0.5;
131 else if (limit_data.sleep_adjust < -0.5)
132 limit_data.sleep_adjust = -0.5;
135 limit_data.chunk_bytes = 0;
136 limit_data.chunk_start = ptimer_read (timer);
140 # define MIN(i, j) ((i) <= (j) ? (i) : (j))
143 /* Write data in BUF to OUT. However, if *SKIP is non-zero, skip that
144 amount of data and decrease SKIP. Increment *TOTAL by the amount
145 of data written. If OUT2 is not NULL, also write BUF to OUT2.
146 In case of error writing to OUT, -1 is returned. In case of error
147 writing to OUT2, -2 is returned. Return 1 if the whole BUF was
151 write_data (FILE *out, FILE *out2, const char *buf, int bufsize,
152 wgint *skip, wgint *written)
154 if (out == NULL && out2 == NULL)
171 fwrite (buf, 1, bufsize, out);
173 fwrite (buf, 1, bufsize, out2);
176 /* Immediately flush the downloaded data. This should not hinder
177 performance: fast downloads will arrive in large 16K chunks
178 (which stdio would write out immediately anyway), and slow
179 downloads wouldn't be limited by disk speed. */
182 Perhaps it shouldn't hinder performance, but it sure does, at least
183 on VMS (more than 2X). Rather than speculate on what it should or
184 shouldn't do, it might make more sense to test it. Even better, it
185 might be nice to explain what possible benefit it could offer, as
186 it appears to be a clear invitation to poor performance with no
187 actual justification. (Also, why 16K? Anyone test other values?)
194 #endif /* ndef __VMS */
195 if (out != NULL && ferror (out))
197 else if (out2 != NULL && ferror (out2))
203 /* Read the contents of file descriptor FD until it the connection
204 terminates or a read error occurs. The data is read in portions of
205 up to 16K and written to OUT as it arrives. If opt.verbose is set,
206 the progress is shown.
208 TOREAD is the amount of data expected to arrive, normally only used
209 by the progress gauge.
211 STARTPOS is the position from which the download starts, used by
212 the progress gauge. If QTYREAD is non-NULL, the value it points to
213 is incremented by the amount of data read from the network. If
214 QTYWRITTEN is non-NULL, the value it points to is incremented by
215 the amount of data written to disk. The time it took to download
216 the data is stored to ELAPSED.
218 If OUT2 is non-NULL, the contents is also written to OUT2.
219 OUT2 will get an exact copy of the response: if this is a chunked
220 response, everything -- including the chunk headers -- is written
221 to OUT2. (OUT will only get the unchunked response.)
223 The function exits and returns the amount of data read. In case of
224 error while reading data, -1 is returned. In case of error while
225 writing data to OUT, -2 is returned. In case of error while writing
226 data to OUT2, -3 is returned. */
229 fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread, wgint startpos,
231 wgint *qtyread, wgint *qtywritten, double *elapsed, int flags,
236 #define max(a,b) ((a) > (b) ? (a) : (b))
237 int dlbufsize = max (BUFSIZ, 8 * 1024);
238 char *dlbuf = xmalloc (dlbufsize);
240 struct ptimer *timer = NULL;
241 double last_successful_read_tm = 0;
243 /* The progress gauge, set according to the user preferences. */
244 void *progress = NULL;
246 /* Non-zero if the progress gauge is interactive, i.e. if it can
247 continually update the display. When true, smaller timeout
248 values are used so that the gauge can update the display when
249 data arrives slowly. */
250 bool progress_interactive = false;
252 bool exact = !!(flags & rb_read_exactly);
254 /* Used only by HTTP/HTTPS chunked transfer encoding. */
255 bool chunked = flags & rb_chunked_transfer_encoding;
258 /* How much data we've read/written. */
260 wgint sum_written = 0;
261 wgint remaining_chunk_size = 0;
263 if (flags & rb_skip_startpos)
266 if (opt.show_progress)
268 /* If we're skipping STARTPOS bytes, pass 0 as the INITIAL
269 argument to progress_create because the indicator doesn't
270 (yet) know about "skipping" data. */
271 wgint start = skip ? 0 : startpos;
272 progress = progress_create (downloaded_filename, start, start + toread);
273 progress_interactive = progress_interactive_p (progress);
277 limit_bandwidth_reset ();
279 /* A timer is needed for tracking progress, for throttling, and for
280 tracking elapsed time. If either of these are requested, start
282 if (progress || opt.limit_rate || elapsed)
284 timer = ptimer_new ();
285 last_successful_read_tm = 0;
288 /* Use a smaller buffer for low requested bandwidths. For example,
289 with --limit-rate=2k, it doesn't make sense to slurp in 16K of
290 data and then sleep for 8s. With buffer size equal to the limit,
291 we never have to sleep for more than one second. */
292 if (opt.limit_rate && opt.limit_rate < dlbufsize)
293 dlbufsize = opt.limit_rate;
295 /* Read from FD while there is data to read. Normally toread==0
296 means that it is unknown how much data is to arrive. However, if
297 EXACT is set, then toread==0 means what it says: that no data
299 while (!exact || (sum_read < toread))
302 double tmout = opt.read_timeout;
306 if (remaining_chunk_size == 0)
308 char *line = fd_read_line (fd);
315 else if (out2 != NULL)
316 fwrite (line, 1, strlen (line), out2);
318 remaining_chunk_size = strtol (line, &endl, 16);
321 if (remaining_chunk_size == 0)
324 line = fd_read_line (fd);
330 fwrite (line, 1, strlen (line), out2);
337 rdsize = MIN (remaining_chunk_size, dlbufsize);
340 rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
342 if (progress_interactive)
344 /* For interactive progress gauges, always specify a ~1s
345 timeout, so that the gauge can be updated regularly even
346 when the data arrives very slowly or stalls. */
348 if (opt.read_timeout)
351 waittm = ptimer_read (timer) - last_successful_read_tm;
352 if (waittm + tmout > opt.read_timeout)
354 /* Don't let total idle time exceed read timeout. */
355 tmout = opt.read_timeout - waittm;
358 /* We've already exceeded the timeout. */
359 ret = -1, errno = ETIMEDOUT;
365 ret = fd_read (fd, dlbuf, rdsize, tmout);
367 if (progress_interactive && ret < 0 && errno == ETIMEDOUT)
368 ret = 0; /* interactive timeout, handled above */
370 break; /* EOF or read error */
372 if (progress || opt.limit_rate || elapsed)
374 ptimer_measure (timer);
376 last_successful_read_tm = ptimer_read (timer);
382 int write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written);
385 ret = (write_res == -3) ? -3 : -2;
390 remaining_chunk_size -= ret;
391 if (remaining_chunk_size == 0)
393 char *line = fd_read_line (fd);
402 fwrite (line, 1, strlen (line), out2);
410 limit_bandwidth (ret, timer);
413 progress_update (progress, ret, ptimer_read (timer));
415 if (toread > 0 && opt.show_progress)
416 ws_percenttitle (100.0 *
417 (startpos + sum_read) / (startpos + toread));
425 progress_finish (progress, ptimer_read (timer));
428 *elapsed = ptimer_read (timer);
430 ptimer_destroy (timer);
433 *qtyread += sum_read;
435 *qtywritten += sum_written;
442 /* Read a hunk of data from FD, up until a terminator. The hunk is
443 limited by whatever the TERMINATOR callback chooses as its
444 terminator. For example, if terminator stops at newline, the hunk
445 will consist of a line of data; if terminator stops at two
446 newlines, it can be used to read the head of an HTTP response.
447 Upon determining the boundary, the function returns the data (up to
448 the terminator) in malloc-allocated storage.
450 In case of read error, NULL is returned. In case of EOF and no
451 data read, NULL is returned and errno set to 0. In case of having
452 read some data, but encountering EOF before seeing the terminator,
453 the data that has been read is returned, but it will (obviously)
454 not contain the terminator.
456 The TERMINATOR function is called with three arguments: the
457 beginning of the data read so far, the beginning of the current
458 block of peeked-at data, and the length of the current block.
459 Depending on its needs, the function is free to choose whether to
460 analyze all data or just the newly arrived data. If TERMINATOR
461 returns NULL, it means that the terminator has not been seen.
462 Otherwise it should return a pointer to the charactre immediately
463 following the terminator.
465 The idea is to be able to read a line of input, or otherwise a hunk
466 of text, such as the head of an HTTP request, without crossing the
467 boundary, so that the next call to fd_read etc. reads the data
468 after the hunk. To achieve that, this function does the following:
470 1. Peek at incoming data.
472 2. Determine whether the peeked data, along with the previously
473 read data, includes the terminator.
475 2a. If yes, read the data until the end of the terminator, and
478 2b. If no, read the peeked data and goto 1.
480 The function is careful to assume as little as possible about the
481 implementation of peeking. For example, every peek is followed by
482 a read. If the read returns a different amount of data, the
483 process is retried until all data arrives safely.
485 SIZEHINT is the buffer size sufficient to hold all the data in the
486 typical case (it is used as the initial buffer size). MAXSIZE is
487 the maximum amount of memory this function is allowed to allocate,
488 or 0 if no upper limit is to be enforced.
490 This function should be used as a building block for other
491 functions -- see fd_read_line as a simple example. */
494 fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
496 long bufsize = sizehint;
497 char *hunk = xmalloc (bufsize);
498 int tail = 0; /* tail position in HUNK */
500 assert (!maxsize || maxsize >= bufsize);
505 int pklen, rdlen, remain;
507 /* First, peek at the available data. */
509 pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
515 end = terminator (hunk, hunk + tail, pklen);
518 /* The data contains the terminator: we'll drain the data up
519 to the end of the terminator. */
520 remain = end - (hunk + tail);
521 assert (remain >= 0);
524 /* No more data needs to be read. */
528 if (bufsize - 1 < tail + remain)
530 bufsize = tail + remain + 1;
531 hunk = xrealloc (hunk, bufsize);
535 /* No terminator: simply read the data we know is (or should
539 /* Now, read the data. Note that we make no assumptions about
540 how much data we'll get. (Some TCP stacks are notorious for
541 read returning less data than the previous MSG_PEEK.) */
543 rdlen = fd_read (fd, hunk + tail, remain, 0);
556 /* EOF without anything having been read */
562 /* EOF seen: return the data we've read. */
565 if (end && rdlen == remain)
566 /* The terminator was seen and the remaining data drained --
567 we got what we came for. */
570 /* Keep looping until all the data arrives. */
572 if (tail == bufsize - 1)
574 /* Double the buffer size, but refuse to allocate more than
576 if (maxsize && bufsize >= maxsize)
583 if (maxsize && bufsize > maxsize)
585 hunk = xrealloc (hunk, bufsize);
591 line_terminator (const char *start _GL_UNUSED, const char *peeked, int peeklen)
593 const char *p = memchr (peeked, '\n', peeklen);
595 /* p+1 because the line must include '\n' */
600 /* The maximum size of the single line we agree to accept. This is
601 not meant to impose an arbitrary limit, but to protect the user
602 from Wget slurping up available memory upon encountering malicious
603 or buggy server output. Define it to 0 to remove the limit. */
604 #define FD_READ_LINE_MAX 4096
606 /* Read one line from FD and return it. The line is allocated using
607 malloc, but is never larger than FD_READ_LINE_MAX.
609 If an error occurs, or if no data can be read, NULL is returned.
610 In the former case errno indicates the error condition, and in the
611 latter case, errno is NULL. */
614 fd_read_line (int fd)
616 return fd_read_hunk (fd, line_terminator, 128, FD_READ_LINE_MAX);
619 /* Return a printed representation of the download rate, along with
620 the units appropriate for the download speed. */
623 retr_rate (wgint bytes, double secs)
626 static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
627 static const char *rate_names_bits[] = {"b/s", "Kb/s", "Mb/s", "Gb/s" };
630 double dlrate = calc_rate (bytes, secs, &units);
631 /* Use more digits for smaller numbers (regardless of unit used),
632 e.g. "1022", "247", "12.5", "2.38". */
633 sprintf (res, "%.*f %s",
634 dlrate >= 99.95 ? 0 : dlrate >= 9.995 ? 1 : 2,
635 dlrate, !opt.report_bps ? rate_names[units]: rate_names_bits[units]);
640 /* Calculate the download rate and trim it as appropriate for the
641 speed. Appropriate means that if rate is greater than 1K/s,
642 kilobytes are used, and if rate is greater than 1MB/s, megabytes
645 UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
649 calc_rate (wgint bytes, double secs, int *units)
652 double bibyte = 1000.0;
662 /* If elapsed time is exactly zero, it means we're under the
663 resolution of the timer. This can easily happen on systems
664 that use time() for the timer. Since the interval lies between
665 0 and the timer's resolution, assume half the resolution. */
666 secs = ptimer_resolution () / 2.0;
668 dlrate = convert_to_bits (bytes) / secs;
671 else if (dlrate < (bibyte * bibyte))
672 *units = 1, dlrate /= bibyte;
673 else if (dlrate < (bibyte * bibyte * bibyte))
674 *units = 2, dlrate /= (bibyte * bibyte);
677 /* Maybe someone will need this, one day. */
678 *units = 3, dlrate /= (bibyte * bibyte * bibyte);
684 #define SUSPEND_METHOD do { \
685 method_suspended = true; \
686 saved_body_data = opt.body_data; \
687 saved_body_file_name = opt.body_file; \
688 saved_method = opt.method; \
689 opt.body_data = NULL; \
690 opt.body_file = NULL; \
694 #define RESTORE_METHOD do { \
695 if (method_suspended) \
697 opt.body_data = saved_body_data; \
698 opt.body_file = saved_body_file_name; \
699 opt.method = saved_method; \
700 method_suspended = false; \
704 static char *getproxy (struct url *);
706 /* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
709 /* #### This function should be rewritten so it doesn't return from
713 retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
714 char **newloc, const char *refurl, int *dt, bool recursive,
715 struct iri *iri, bool register_status)
719 bool location_changed;
720 bool iri_fallbacked = 0;
722 char *mynewloc, *proxy;
723 struct url *u = orig_parsed, *proxy_url;
724 int up_error_code; /* url parse error code */
726 int redirection_count = 0;
728 bool method_suspended = false;
729 char *saved_body_data = NULL;
730 char *saved_method = NULL;
731 char *saved_body_file_name = NULL;
733 /* If dt is NULL, use local storage. */
739 url = xstrdup (origurl);
746 refurl = opt.referer;
749 /* (also for IRI fallbacking) */
756 proxy = getproxy (u);
759 struct iri *pi = iri_new ();
760 set_uri_encoding (pi, opt.locale, true);
761 pi->utf8_encode = false;
763 /* Parse the proxy URL. */
764 proxy_url = url_parse (proxy, &up_error_code, NULL, true);
767 char *error = url_error (proxy, up_error_code);
768 logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
776 if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
778 logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
779 url_free (proxy_url);
787 if (u->scheme == SCHEME_HTTP
789 || u->scheme == SCHEME_HTTPS
791 || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
793 result = http_loop (u, orig_parsed, &mynewloc, &local_file, refurl, dt,
796 else if (u->scheme == SCHEME_FTP)
798 /* If this is a redirection, temporarily turn off opt.ftp_glob
799 and opt.recursive, both being undesirable when following
801 bool oldrec = recursive, glob = opt.ftp_glob;
802 if (redirection_count)
803 oldrec = glob = false;
805 result = ftp_loop (u, &local_file, dt, proxy_url, recursive, glob);
808 /* There is a possibility of having HTTP being redirected to
809 FTP. In these cases we must decide whether the text is HTML
810 according to the suffix. The HTML suffixes are `.html',
811 `.htm' and a few others, case-insensitive. */
812 if (redirection_count && local_file && u->scheme == SCHEME_FTP)
814 if (has_html_suffix_p (local_file))
821 url_free (proxy_url);
825 location_changed = (result == NEWLOCATION || result == NEWLOCATION_KEEP_POST);
826 if (location_changed)
828 char *construced_newloc;
829 struct url *newloc_parsed;
831 assert (mynewloc != NULL);
836 /* The HTTP specs only allow absolute URLs to appear in
837 redirects, but a ton of boneheaded webservers and CGIs out
838 there break the rules and use relative URLs, and popular
839 browsers are lenient about this, so wget should be too. */
840 construced_newloc = uri_merge (url, mynewloc);
842 mynewloc = construced_newloc;
844 /* Reset UTF-8 encoding state, keep the URI encoding and reset
845 the content encoding. */
846 iri->utf8_encode = opt.enable_iri;
847 set_content_encoding (iri, NULL);
848 xfree_null (iri->orig_url);
849 iri->orig_url = NULL;
851 /* Now, see if this new location makes sense. */
852 newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true);
855 char *error = url_error (mynewloc, up_error_code);
856 logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
858 if (orig_parsed != u)
869 /* Now mynewloc will become newloc_parsed->url, because if the
870 Location contained relative paths like .././something, we
871 don't want that propagating as url. */
873 mynewloc = xstrdup (newloc_parsed->url);
875 /* Check for max. number of redirections. */
876 if (++redirection_count > opt.max_redirect)
878 logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
880 url_free (newloc_parsed);
881 if (orig_parsed != u)
894 if (orig_parsed != u)
900 /* If we're being redirected from POST, and we received a
901 redirect code different than 307, we don't want to POST
902 again. Many requests answer POST with a redirection to an
903 index page; that redirection is clearly a GET. We "suspend"
904 POST data for the duration of the redirections, and restore
907 RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect
908 specifically to preserve the method of the request.
910 if (result != NEWLOCATION_KEEP_POST && !method_suspended)
916 /* Try to not encode in UTF-8 if fetching failed */
917 if (!(*dt & RETROKF) && iri->utf8_encode)
919 iri->utf8_encode = false;
920 if (orig_parsed != u)
924 u = url_parse (origurl, NULL, iri, true);
927 DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
928 url = xstrdup (u->url);
933 DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url)));
936 if (local_file && u && *dt & RETROKF)
938 register_download (u->url, local_file);
940 if (!opt.spider && redirection_count && 0 != strcmp (origurl, u->url))
941 register_redirection (origurl, u->url);
944 register_html (local_file);
947 register_css (local_file);
951 *file = local_file ? local_file : NULL;
953 xfree_null (local_file);
955 if (orig_parsed != u)
960 if (redirection_count || iri_fallbacked)
978 inform_exit_status (result);
982 /* Find the URLs in the file and call retrieve_url() for each of them.
983 If HTML is true, treat the file as HTML, and construct the URLs
986 If opt.recursive is set, call retrieve_tree() for each file. */
989 retrieve_from_file (const char *file, bool html, int *count)
992 struct urlpos *url_list, *cur_url;
993 struct iri *iri = iri_new();
995 char *input_file, *url_file = NULL;
996 const char *url = file;
998 status = RETROK; /* Suppose everything is OK. */
999 *count = 0; /* Reset the URL count. */
1001 /* sXXXav : Assume filename and links in the file are in the locale */
1002 set_uri_encoding (iri, opt.locale, true);
1003 set_content_encoding (iri, opt.locale);
1005 if (url_valid_scheme (url))
1008 struct url *url_parsed = url_parse (url, &url_err, iri, true);
1011 char *error = url_error (url, url_err);
1012 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
1018 opt.base_href = xstrdup (url);
1020 status = retrieve_url (url_parsed, url, &url_file, NULL, NULL, &dt,
1022 url_free (url_parsed);
1024 if (!url_file || (status != RETROK))
1030 /* If we have a found a content encoding, use it.
1031 * ( == is okay, because we're checking for identical object) */
1032 if (iri->content_encoding != opt.locale)
1033 set_uri_encoding (iri, iri->content_encoding, false);
1035 /* Reset UTF-8 encode status */
1036 iri->utf8_encode = opt.enable_iri;
1037 xfree_null (iri->orig_url);
1038 iri->orig_url = NULL;
1040 input_file = url_file;
1043 input_file = (char *) file;
1045 url_list = (html ? get_urls_html (input_file, NULL, NULL, iri)
1046 : get_urls_file (input_file));
1048 xfree_null (url_file);
1050 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
1052 char *filename = NULL, *new_file = NULL;
1054 struct iri *tmpiri = iri_dup (iri);
1055 struct url *parsed_url = NULL;
1057 if (cur_url->ignore_when_downloading)
1060 if (opt.quota && total_downloaded_bytes > opt.quota)
1066 parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true);
1068 if ((opt.recursive || opt.page_requisites)
1069 && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
1071 int old_follow_ftp = opt.follow_ftp;
1073 /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
1074 if (cur_url->url->scheme == SCHEME_FTP)
1077 status = retrieve_tree (parsed_url ? parsed_url : cur_url->url,
1080 opt.follow_ftp = old_follow_ftp;
1083 status = retrieve_url (parsed_url ? parsed_url : cur_url->url,
1084 cur_url->url->url, &filename,
1085 &new_file, NULL, &dt, opt.recursive, tmpiri,
1089 url_free (parsed_url);
1091 if (filename && opt.delete_after && file_exists_p (filename))
1094 Removing file due to --delete-after in retrieve_from_file():\n"));
1095 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
1096 if (unlink (filename))
1097 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
1101 xfree_null (new_file);
1102 xfree_null (filename);
1106 /* Free the linked list of URL-s. */
1107 free_urlpos (url_list);
1114 /* Print `giving up', or `retrying', depending on the impending
1115 action. N1 and N2 are the attempt number and the attempt limit. */
1117 printwhat (int n1, int n2)
1119 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
1122 /* If opt.wait or opt.waitretry are specified, and if certain
1123 conditions are met, sleep the appropriate number of seconds. See
1124 the documentation of --wait and --waitretry for more information.
1126 COUNT is the count of current retrieval, beginning with 1. */
1129 sleep_between_retrievals (int count)
1131 static bool first_retrieval = true;
1133 if (first_retrieval)
1135 /* Don't sleep before the very first retrieval. */
1136 first_retrieval = false;
1140 if (opt.waitretry && count > 1)
1142 /* If opt.waitretry is specified and this is a retry, wait for
1143 COUNT-1 number of seconds, or for opt.waitretry seconds. */
1144 if (count <= opt.waitretry)
1147 xsleep (opt.waitretry);
1151 if (!opt.random_wait || count > 1)
1152 /* If random-wait is not specified, or if we are sleeping
1153 between retries of the same download, sleep the fixed
1158 /* Sleep a random amount of time averaging in opt.wait
1159 seconds. The sleeping amount ranges from 0.5*opt.wait to
1161 double waitsecs = (0.5 + random_float ()) * opt.wait;
1162 DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
1163 opt.wait, waitsecs));
1169 /* Free the linked list of urlpos. */
1171 free_urlpos (struct urlpos *l)
1175 struct urlpos *next = l->next;
1178 xfree_null (l->local_name);
1184 /* Rotate FNAME opt.backups times */
1186 rotate_backups(const char *fname)
1190 # define AVS ";*" /* All-version suffix. */
1191 # define AVSL (sizeof (AVS) - 1)
1197 int maxlen = strlen (fname) + sizeof (SEP) + numdigit (opt.backups) + AVSL;
1198 char *from = (char *)alloca (maxlen);
1199 char *to = (char *)alloca (maxlen);
1203 if (stat (fname, &sb) == 0)
1204 if (S_ISREG (sb.st_mode) == 0)
1207 for (i = opt.backups; i > 1; i--)
1210 /* Delete (all versions of) any existing max-suffix file, to avoid
1211 * creating multiple versions of it. (On VMS, rename() will
1212 * create a new version of an existing destination file, not
1213 * destroy/overwrite it.)
1215 if (i == opt.backups)
1217 sprintf (to, "%s%s%d%s", fname, SEP, i, AVS);
1221 sprintf (to, "%s%s%d", fname, SEP, i);
1222 sprintf (from, "%s%s%d", fname, SEP, i - 1);
1226 sprintf (to, "%s%s%d", fname, SEP, 1);
1230 static bool no_proxy_match (const char *, const char **);
1232 /* Return the URL of the proxy appropriate for url U. */
1235 getproxy (struct url *u)
1238 char *rewritten_url;
1239 static char rewritten_storage[1024];
1243 if (no_proxy_match (u->host, (const char **)opt.no_proxy))
1249 proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
1253 proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
1257 proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
1259 case SCHEME_INVALID:
1262 if (!proxy || !*proxy)
1265 /* Handle shorthands. `rewritten_storage' is a kludge to allow
1266 getproxy() to return static storage. */
1267 rewritten_url = rewrite_shorthand_url (proxy);
1270 strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage));
1271 rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
1272 proxy = rewritten_storage;
1278 /* Returns true if URL would be downloaded through a proxy. */
1281 url_uses_proxy (struct url * u)
1286 ret = getproxy (u) != NULL;
1290 /* Should a host be accessed through proxy, concerning no_proxy? */
1292 no_proxy_match (const char *host, const char **no_proxy)
1297 return sufmatch (no_proxy, host);
1300 /* Set the file parameter to point to the local file string. */
1302 set_local_file (const char **file, const char *default_file)
1304 if (opt.output_document)
1306 if (output_stream_regular)
1307 *file = opt.output_document;
1310 *file = default_file;
1313 /* Return true for an input file's own URL, false otherwise. */
1315 input_file_url (const char *input_file)
1317 static bool first = true;
1320 && url_has_scheme (input_file)