2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
34 #include <sys/types.h>
37 #endif /* HAVE_UNISTD_H */
43 #endif /* HAVE_STRING_H */
58 # include "gen_sslfunc.h" /* for ssl_iread */
65 /* See the comment in gethttp() why this is needed. */
66 int global_download_count;
76 limit_bandwidth_reset (void)
78 limit_data.chunk_bytes = 0;
79 limit_data.chunk_start = 0;
82 /* Limit the bandwidth by pausing the download for an amount of time.
83 BYTES is the number of bytes received from the network, and DELTA
84 is the number of milliseconds it took to receive them. */
87 limit_bandwidth (long bytes, double *dltime, struct wget_timer *timer)
89 double delta_t = *dltime - limit_data.chunk_start;
92 limit_data.chunk_bytes += bytes;
94 /* Calculate the amount of time we expect downloading the chunk
95 should take. If in reality it took less time, sleep to
96 compensate for the difference. */
97 expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
99 if (expected > delta_t)
101 double slp = expected - delta_t + limit_data.sleep_adjust;
105 DEBUGP (("deferring a %.2f ms sleep (%ld/%.2f).\n",
106 slp, limit_data.chunk_bytes, delta_t));
109 DEBUGP (("\nsleeping %.2f ms for %ld bytes, adjust %.2f ms\n",
110 slp, limit_data.chunk_bytes, limit_data.sleep_adjust));
113 usleep ((unsigned long) (1000 * slp));
114 t1 = wtimer_elapsed (timer);
116 /* Due to scheduling, we probably slept slightly longer (or
117 shorter) than desired. Calculate the difference between the
118 desired and the actual sleep, and adjust the next sleep by
120 limit_data.sleep_adjust = slp - (t1 - t0);
122 /* Since we've called wtimer_elapsed, we might as well update
123 the caller's dltime. */
127 limit_data.chunk_bytes = 0;
128 limit_data.chunk_start = *dltime;
131 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
133 /* Reads the contents of file descriptor FD, until it is closed, or a
134 read error occurs. The data is read in 8K chunks, and stored to
135 stream fp, which should have been open for writing. If BUF is
136 non-NULL and its file descriptor is equal to FD, flush RBUF first.
137 This function will *not* use the rbuf_* functions!
139 The EXPECTED argument is passed to show_progress() unchanged, but
142 If opt.verbose is set, the progress is also shown. RESTVAL
143 represents a value from which to start downloading (which will be
144 shown accordingly). If RESTVAL is non-zero, the stream should have
145 been open for appending.
147 The function exits and returns codes of 0, -1 and -2 if the
148 connection was closed, there was a read error, or if it could not
149 write to the output stream, respectively.
151 IMPORTANT: The function flushes the contents of the buffer in
152 rbuf_flush() before actually reading from fd. If you wish to read
153 from fd immediately, flush or discard the buffer. */
155 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
156 struct rbuf *rbuf, int use_expected, double *elapsed)
160 static char dlbuf[16384];
161 int dlbufsize = sizeof (dlbuf);
163 void *progress = NULL;
164 struct wget_timer *timer = wtimer_allocate ();
170 progress = progress_create (restval, expected);
172 if (rbuf && RBUF_FD (rbuf) == fd)
175 while ((res = rbuf_flush (rbuf, dlbuf, sizeof (dlbuf))) != 0)
177 fwrite (dlbuf, 1, res, fp);
189 progress_update (progress, sz, 0);
193 limit_bandwidth_reset ();
194 wtimer_reset (timer);
196 /* If we're limiting the download, set our buffer size to the
198 if (opt.limit_rate && opt.limit_rate < dlbufsize)
199 dlbufsize = opt.limit_rate;
201 /* Read from fd while there is available data.
203 Normally, if expected is 0, it means that it is not known how
204 much data is expected. However, if use_expected is specified,
205 then expected being zero means exactly that. */
206 while (!use_expected || (*len < expected))
208 int amount_to_read = (use_expected
209 ? MIN (expected - *len, dlbufsize) : dlbufsize);
212 res = ssl_iread (rbuf->ssl, dlbuf, amount_to_read);
214 #endif /* HAVE_SSL */
215 res = iread (fd, dlbuf, amount_to_read);
220 fwrite (dlbuf, 1, res, fp);
221 /* Always flush the contents of the network packet. This should
222 not hinder performance: fast downloads will be received in
223 16K chunks (which stdio would write out anyway), and slow
224 downloads won't be limited with disk performance. */
232 dltime = wtimer_elapsed (timer);
234 limit_bandwidth (res, &dltime, timer);
237 progress_update (progress, res, dltime);
245 progress_finish (progress, dltime);
248 wtimer_delete (timer);
253 /* Return a printed representation of the download rate, as
254 appropriate for the speed. If PAD is non-zero, strings will be
255 padded to the width of 7 characters (xxxx.xx). */
257 retr_rate (long bytes, double msecs, int pad)
260 static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
263 double dlrate = calc_rate (bytes, msecs, &units);
264 sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
269 /* Calculate the download rate and trim it as appropriate for the
270 speed. Appropriate means that if rate is greater than 1K/s,
271 kilobytes are used, and if rate is greater than 1MB/s, megabytes
274 UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
277 calc_rate (long bytes, double msecs, int *units)
285 /* If elapsed time is exactly zero, it means we're under the
286 granularity of the timer. This often happens on systems that
287 use time() for the timer. */
288 msecs = wtimer_granularity ();
290 dlrate = (double)1000 * bytes / msecs;
293 else if (dlrate < 1024.0 * 1024.0)
294 *units = 1, dlrate /= 1024.0;
295 else if (dlrate < 1024.0 * 1024.0 * 1024.0)
296 *units = 2, dlrate /= (1024.0 * 1024.0);
298 /* Maybe someone will need this, one day. */
299 *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
304 /* Maximum number of allowed redirections. 20 was chosen as a
305 "reasonable" value, which is low enough to not cause havoc, yet
306 high enough to guarantee that normal retrievals will not be hurt by
309 #define MAX_REDIRECTIONS 20
311 #define SUSPEND_POST_DATA do { \
312 post_data_suspended = 1; \
313 saved_post_data = opt.post_data; \
314 saved_post_file_name = opt.post_file_name; \
315 opt.post_data = NULL; \
316 opt.post_file_name = NULL; \
319 #define RESTORE_POST_DATA do { \
320 if (post_data_suspended) \
322 opt.post_data = saved_post_data; \
323 opt.post_file_name = saved_post_file_name; \
324 post_data_suspended = 0; \
328 /* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
331 /* #### This function should be rewritten so it doesn't return from
335 retrieve_url (const char *origurl, char **file, char **newloc,
336 const char *refurl, int *dt)
340 int location_changed, dummy;
341 char *mynewloc, *proxy;
342 struct url *u, *proxy_url;
343 int up_error_code; /* url parse error code */
345 int redirection_count = 0;
347 int post_data_suspended = 0;
348 char *saved_post_data = NULL;
349 char *saved_post_file_name = NULL;
351 /* If dt is NULL, just ignore it. */
354 url = xstrdup (origurl);
360 u = url_parse (url, &up_error_code);
363 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
369 refurl = opt.referer;
378 proxy = getproxy (u);
381 /* Parse the proxy URL. */
382 proxy_url = url_parse (proxy, &up_error_code);
385 logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
386 proxy, url_error (up_error_code));
391 if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
393 logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
394 url_free (proxy_url);
401 if (u->scheme == SCHEME_HTTP
403 || u->scheme == SCHEME_HTTPS
405 || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
407 result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
409 else if (u->scheme == SCHEME_FTP)
411 /* If this is a redirection, we must not allow recursive FTP
412 retrieval, so we save recursion to oldrec, and restore it
414 int oldrec = opt.recursive;
415 if (redirection_count)
417 result = ftp_loop (u, dt, proxy_url);
418 opt.recursive = oldrec;
420 /* There is a possibility of having HTTP being redirected to
421 FTP. In these cases we must decide whether the text is HTML
422 according to the suffix. The HTML suffixes are `.html',
423 `.htm' and a few others, case-insensitive. */
424 if (redirection_count && local_file && u->scheme == SCHEME_FTP)
426 if (has_html_suffix_p (local_file))
433 url_free (proxy_url);
437 location_changed = (result == NEWLOCATION);
438 if (location_changed)
440 char *construced_newloc;
441 struct url *newloc_parsed;
443 assert (mynewloc != NULL);
448 /* The HTTP specs only allow absolute URLs to appear in
449 redirects, but a ton of boneheaded webservers and CGIs out
450 there break the rules and use relative URLs, and popular
451 browsers are lenient about this, so wget should be too. */
452 construced_newloc = uri_merge (url, mynewloc);
454 mynewloc = construced_newloc;
456 /* Now, see if this new location makes sense. */
457 newloc_parsed = url_parse (mynewloc, &up_error_code);
460 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
461 url_error (up_error_code));
469 /* Now mynewloc will become newloc_parsed->url, because if the
470 Location contained relative paths like .././something, we
471 don't want that propagating as url. */
473 mynewloc = xstrdup (newloc_parsed->url);
475 /* Check for max. number of redirections. */
476 if (++redirection_count > MAX_REDIRECTIONS)
478 logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
480 url_free (newloc_parsed);
493 /* If we're being redirected from POST, we don't want to POST
494 again. Many requests answer POST with a redirection to an
495 index page; that redirection is clearly a GET. We "suspend"
496 POST data for the duration of the redirections, and restore
497 it when we're done. */
498 if (!post_data_suspended)
508 register_download (u->url, local_file);
509 if (redirection_count && 0 != strcmp (origurl, u->url))
510 register_redirection (origurl, u->url);
512 register_html (u->url, local_file);
517 *file = local_file ? local_file : NULL;
519 FREE_MAYBE (local_file);
523 if (redirection_count)
537 ++global_download_count;
543 /* Find the URLs in the file and call retrieve_url() for each of
544 them. If HTML is non-zero, treat the file as HTML, and construct
545 the URLs accordingly.
547 If opt.recursive is set, call recursive_retrieve() for each file. */
549 retrieve_from_file (const char *file, int html, int *count)
552 struct urlpos *url_list, *cur_url;
554 url_list = (html ? get_urls_html (file, NULL, NULL)
555 : get_urls_file (file));
556 status = RETROK; /* Suppose everything is OK. */
557 *count = 0; /* Reset the URL count. */
559 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
561 char *filename = NULL, *new_file = NULL;
564 if (cur_url->ignore_when_downloading)
567 if (downloaded_exceeds_quota ())
572 if (opt.recursive && cur_url->url->scheme != SCHEME_FTP)
573 status = retrieve_tree (cur_url->url->url);
575 status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
577 if (filename && opt.delete_after && file_exists_p (filename))
579 DEBUGP (("Removing file due to --delete-after in"
580 " retrieve_from_file():\n"));
581 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
582 if (unlink (filename))
583 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
587 FREE_MAYBE (new_file);
588 FREE_MAYBE (filename);
591 /* Free the linked list of URL-s. */
592 free_urlpos (url_list);
597 /* Print `giving up', or `retrying', depending on the impending
598 action. N1 and N2 are the attempt number and the attempt limit. */
600 printwhat (int n1, int n2)
602 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
605 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
606 set opt.downloaded_overflow to 1. */
608 downloaded_increase (unsigned long by_how_much)
611 if (opt.downloaded_overflow)
613 old = opt.downloaded;
614 opt.downloaded += by_how_much;
615 if (opt.downloaded < old) /* carry flag, where are you when I
619 opt.downloaded_overflow = 1;
620 opt.downloaded = ~((VERY_LONG_TYPE)0);
624 /* Return non-zero if the downloaded amount of bytes exceeds the
625 desired quota. If quota is not set or if the amount overflowed, 0
628 downloaded_exceeds_quota (void)
632 if (opt.downloaded_overflow)
633 /* We don't really know. (Wildly) assume not. */
636 return opt.downloaded > opt.quota;
639 /* If opt.wait or opt.waitretry are specified, and if certain
640 conditions are met, sleep the appropriate number of seconds. See
641 the documentation of --wait and --waitretry for more information.
643 COUNT is the count of current retrieval, beginning with 1. */
646 sleep_between_retrievals (int count)
648 static int first_retrieval = 1;
652 /* Don't sleep before the very first retrieval. */
657 if (opt.waitretry && count > 1)
659 /* If opt.waitretry is specified and this is a retry, wait for
660 COUNT-1 number of seconds, or for opt.waitretry seconds. */
661 if (count <= opt.waitretry)
664 usleep (1000000L * opt.waitretry);
668 if (!opt.random_wait || count > 1)
669 /* If random-wait is not specified, or if we are sleeping
670 between retries of the same download, sleep the fixed
672 usleep (1000000L * opt.wait);
675 /* Sleep a random amount of time averaging in opt.wait
676 seconds. The sleeping amount ranges from 0 to
677 opt.wait*2, inclusive. */
678 double waitsecs = 2 * opt.wait * random_float ();
679 DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
680 opt.wait, waitsecs));
681 usleep (1000000L * waitsecs);