2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
34 #include <sys/types.h>
37 #endif /* HAVE_UNISTD_H */
43 #endif /* HAVE_STRING_H */
58 # include "gen_sslfunc.h" /* for ssl_iread */
65 /* See the comment in gethttp() why this is needed. */
66 int global_download_count;
75 limit_bandwidth_reset (void)
78 limit_data.dltime = 0;
81 /* Limit the bandwidth by pausing the download for an amount of time.
82 BYTES is the number of bytes received from the network, DELTA is
83 how long it took to receive them, DLTIME the current download time,
84 TIMER the timer, and ADJUSTMENT the previous. */
87 limit_bandwidth (long bytes, double delta)
91 limit_data.bytes += bytes;
92 limit_data.dltime += delta;
94 expected = 1000.0 * limit_data.bytes / opt.limit_rate;
96 if (expected > limit_data.dltime)
98 double slp = expected - limit_data.dltime;
101 DEBUGP (("deferring a %.2f ms sleep (%ld/%.2f).\n",
102 slp, limit_data.bytes, limit_data.dltime));
105 DEBUGP (("sleeping %.2f ms\n", slp));
106 usleep ((unsigned long) (1000 * slp));
109 limit_data.bytes = 0;
110 limit_data.dltime = 0;
113 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
115 /* Reads the contents of file descriptor FD, until it is closed, or a
116 read error occurs. The data is read in 8K chunks, and stored to
117 stream fp, which should have been open for writing. If BUF is
118 non-NULL and its file descriptor is equal to FD, flush RBUF first.
119 This function will *not* use the rbuf_* functions!
121 The EXPECTED argument is passed to show_progress() unchanged, but
124 If opt.verbose is set, the progress is also shown. RESTVAL
125 represents a value from which to start downloading (which will be
126 shown accordingly). If RESTVAL is non-zero, the stream should have
127 been open for appending.
129 The function exits and returns codes of 0, -1 and -2 if the
130 connection was closed, there was a read error, or if it could not
131 write to the output stream, respectively.
133 IMPORTANT: The function flushes the contents of the buffer in
134 rbuf_flush() before actually reading from fd. If you wish to read
135 from fd immediately, flush or discard the buffer. */
137 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
138 struct rbuf *rbuf, int use_expected, double *elapsed)
142 static char dlbuf[16384];
143 int dlbufsize = sizeof (dlbuf);
145 void *progress = NULL;
146 struct wget_timer *timer = wtimer_allocate ();
147 double dltime = 0, last_dltime = 0;
152 progress = progress_create (restval, expected);
154 if (rbuf && RBUF_FD (rbuf) == fd)
157 while ((res = rbuf_flush (rbuf, dlbuf, sizeof (dlbuf))) != 0)
159 fwrite (dlbuf, 1, res, fp);
171 progress_update (progress, sz, 0);
175 limit_bandwidth_reset ();
176 wtimer_reset (timer);
178 /* If we're limiting the download, set our buffer size to the
180 if (opt.limit_rate && opt.limit_rate < dlbufsize)
181 dlbufsize = opt.limit_rate;
183 /* Read from fd while there is available data.
185 Normally, if expected is 0, it means that it is not known how
186 much data is expected. However, if use_expected is specified,
187 then expected being zero means exactly that. */
188 while (!use_expected || (*len < expected))
190 int amount_to_read = (use_expected
191 ? MIN (expected - *len, dlbufsize) : dlbufsize);
194 res = ssl_iread (rbuf->ssl, dlbuf, amount_to_read);
196 #endif /* HAVE_SSL */
197 res = iread (fd, dlbuf, amount_to_read);
201 fwrite (dlbuf, 1, res, fp);
202 /* Always flush the contents of the network packet. This
203 should not be adverse to performance, as the network
204 packets typically won't be too tiny anyway. */
212 /* If bandwidth is not limited, one call to wtimer_elapsed
214 dltime = wtimer_elapsed (timer);
217 limit_bandwidth (res, dltime - last_dltime);
218 dltime = wtimer_elapsed (timer);
219 last_dltime = dltime;
223 progress_update (progress, res, dltime);
234 progress_finish (progress, dltime);
237 wtimer_delete (timer);
242 /* Return a printed representation of the download rate, as
243 appropriate for the speed. If PAD is non-zero, strings will be
244 padded to the width of 7 characters (xxxx.xx). */
246 retr_rate (long bytes, double msecs, int pad)
249 static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
252 double dlrate = calc_rate (bytes, msecs, &units);
253 sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
258 /* Calculate the download rate and trim it as appropriate for the
259 speed. Appropriate means that if rate is greater than 1K/s,
260 kilobytes are used, and if rate is greater than 1MB/s, megabytes
263 UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
266 calc_rate (long bytes, double msecs, int *units)
274 /* If elapsed time is exactly zero, it means we're under the
275 granularity of the timer. This often happens on systems that
276 use time() for the timer. */
277 msecs = wtimer_granularity ();
279 dlrate = (double)1000 * bytes / msecs;
282 else if (dlrate < 1024.0 * 1024.0)
283 *units = 1, dlrate /= 1024.0;
284 else if (dlrate < 1024.0 * 1024.0 * 1024.0)
285 *units = 2, dlrate /= (1024.0 * 1024.0);
287 /* Maybe someone will need this one day. More realistically, it
288 will get tickled by buggy timers. */
289 *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
294 /* Maximum number of allowed redirections. 20 was chosen as a
295 "reasonable" value, which is low enough to not cause havoc, yet
296 high enough to guarantee that normal retrievals will not be hurt by
299 #define MAX_REDIRECTIONS 20
301 #define SUSPEND_POST_DATA do { \
302 post_data_suspended = 1; \
303 saved_post_data = opt.post_data; \
304 saved_post_file_name = opt.post_file_name; \
305 opt.post_data = NULL; \
306 opt.post_file_name = NULL; \
309 #define RESTORE_POST_DATA do { \
310 if (post_data_suspended) \
312 opt.post_data = saved_post_data; \
313 opt.post_file_name = saved_post_file_name; \
314 post_data_suspended = 0; \
318 /* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
321 /* #### This function should be rewritten so it doesn't return from
325 retrieve_url (const char *origurl, char **file, char **newloc,
326 const char *refurl, int *dt)
330 int location_changed, dummy;
331 char *mynewloc, *proxy;
332 struct url *u, *proxy_url;
333 int up_error_code; /* url parse error code */
335 int redirection_count = 0;
337 int post_data_suspended = 0;
338 char *saved_post_data = NULL;
339 char *saved_post_file_name = NULL;
341 /* If dt is NULL, just ignore it. */
344 url = xstrdup (origurl);
350 u = url_parse (url, &up_error_code);
353 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
359 refurl = opt.referer;
368 proxy = getproxy (u);
371 /* Parse the proxy URL. */
372 proxy_url = url_parse (proxy, &up_error_code);
375 logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
376 proxy, url_error (up_error_code));
381 if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
383 logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
384 url_free (proxy_url);
391 if (u->scheme == SCHEME_HTTP
393 || u->scheme == SCHEME_HTTPS
395 || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
397 result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
399 else if (u->scheme == SCHEME_FTP)
401 /* If this is a redirection, we must not allow recursive FTP
402 retrieval, so we save recursion to oldrec, and restore it
404 int oldrec = opt.recursive;
405 if (redirection_count)
407 result = ftp_loop (u, dt, proxy_url);
408 opt.recursive = oldrec;
410 /* There is a possibility of having HTTP being redirected to
411 FTP. In these cases we must decide whether the text is HTML
412 according to the suffix. The HTML suffixes are `.html',
413 `.htm' and a few others, case-insensitive. */
414 if (redirection_count && local_file && u->scheme == SCHEME_FTP)
416 if (has_html_suffix_p (local_file))
423 url_free (proxy_url);
427 location_changed = (result == NEWLOCATION);
428 if (location_changed)
430 char *construced_newloc;
431 struct url *newloc_parsed;
433 assert (mynewloc != NULL);
438 /* The HTTP specs only allow absolute URLs to appear in
439 redirects, but a ton of boneheaded webservers and CGIs out
440 there break the rules and use relative URLs, and popular
441 browsers are lenient about this, so wget should be too. */
442 construced_newloc = uri_merge (url, mynewloc);
444 mynewloc = construced_newloc;
446 /* Now, see if this new location makes sense. */
447 newloc_parsed = url_parse (mynewloc, &up_error_code);
450 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
451 url_error (up_error_code));
459 /* Now mynewloc will become newloc_parsed->url, because if the
460 Location contained relative paths like .././something, we
461 don't want that propagating as url. */
463 mynewloc = xstrdup (newloc_parsed->url);
465 /* Check for max. number of redirections. */
466 if (++redirection_count > MAX_REDIRECTIONS)
468 logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
470 url_free (newloc_parsed);
483 /* If we're being redirected from POST, we don't want to POST
484 again. Many requests answer POST with a redirection to an
485 index page; that redirection is clearly a GET. We "suspend"
486 POST data for the duration of the redirections, and restore
487 it when we're done. */
488 if (!post_data_suspended)
498 register_download (u->url, local_file);
499 if (redirection_count && 0 != strcmp (origurl, u->url))
500 register_redirection (origurl, u->url);
502 register_html (u->url, local_file);
507 *file = local_file ? local_file : NULL;
509 FREE_MAYBE (local_file);
513 if (redirection_count)
527 ++global_download_count;
533 /* Find the URLs in the file and call retrieve_url() for each of
534 them. If HTML is non-zero, treat the file as HTML, and construct
535 the URLs accordingly.
537 If opt.recursive is set, call recursive_retrieve() for each file. */
539 retrieve_from_file (const char *file, int html, int *count)
542 struct urlpos *url_list, *cur_url;
544 url_list = (html ? get_urls_html (file, NULL, NULL)
545 : get_urls_file (file));
546 status = RETROK; /* Suppose everything is OK. */
547 *count = 0; /* Reset the URL count. */
549 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
551 char *filename = NULL, *new_file = NULL;
554 if (cur_url->ignore_when_downloading)
557 if (downloaded_exceeds_quota ())
562 if (opt.recursive && cur_url->url->scheme != SCHEME_FTP)
563 status = retrieve_tree (cur_url->url->url);
565 status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
567 if (filename && opt.delete_after && file_exists_p (filename))
569 DEBUGP (("Removing file due to --delete-after in"
570 " retrieve_from_file():\n"));
571 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
572 if (unlink (filename))
573 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
577 FREE_MAYBE (new_file);
578 FREE_MAYBE (filename);
581 /* Free the linked list of URL-s. */
582 free_urlpos (url_list);
587 /* Print `giving up', or `retrying', depending on the impending
588 action. N1 and N2 are the attempt number and the attempt limit. */
590 printwhat (int n1, int n2)
592 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
595 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
596 set opt.downloaded_overflow to 1. */
598 downloaded_increase (unsigned long by_how_much)
601 if (opt.downloaded_overflow)
603 old = opt.downloaded;
604 opt.downloaded += by_how_much;
605 if (opt.downloaded < old) /* carry flag, where are you when I
609 opt.downloaded_overflow = 1;
610 opt.downloaded = ~((VERY_LONG_TYPE)0);
614 /* Return non-zero if the downloaded amount of bytes exceeds the
615 desired quota. If quota is not set or if the amount overflowed, 0
618 downloaded_exceeds_quota (void)
622 if (opt.downloaded_overflow)
623 /* We don't really know. (Wildly) assume not. */
626 return opt.downloaded > opt.quota;
629 /* If opt.wait or opt.waitretry are specified, and if certain
630 conditions are met, sleep the appropriate number of seconds. See
631 the documentation of --wait and --waitretry for more information.
633 COUNT is the count of current retrieval, beginning with 1. */
636 sleep_between_retrievals (int count)
638 static int first_retrieval = 1;
642 /* Don't sleep before the very first retrieval. */
647 if (opt.waitretry && count > 1)
649 /* If opt.waitretry is specified and this is a retry, wait for
650 COUNT-1 number of seconds, or for opt.waitretry seconds. */
651 if (count <= opt.waitretry)
654 sleep (opt.waitretry);
658 if (!opt.random_wait || count > 1)
659 /* If random-wait is not specified, or if we are sleeping
660 between retries of the same download, sleep the fixed
665 /* Sleep a random amount of time averaging in opt.wait
666 seconds. The sleeping amount ranges from 0 to
667 opt.wait*2, inclusive. */
668 int waitsecs = random_number (opt.wait * 2 + 1);
670 DEBUGP (("sleep_between_retrievals: norm=%ld,fuzz=%ld,sleep=%d\n",
671 opt.wait, waitsecs - opt.wait, waitsecs));