2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
48 # include "gen_sslfunc.h" /* for ssl_iread */
55 /* See the comment in gethttp() why this is needed. */
56 int global_download_count;
65 limit_bandwidth_reset (void)
68 limit_data.dltime = 0;
71 /* Limit the bandwidth by pausing the download for an amount of time.
72 BYTES is the number of bytes received from the network, DELTA is
73 how long it took to receive them, DLTIME the current download time,
74 TIMER the timer, and ADJUSTMENT the previous. */
77 limit_bandwidth (long bytes, long delta)
81 limit_data.bytes += bytes;
82 limit_data.dltime += delta;
84 expected = (long)(1000.0 * limit_data.bytes / opt.limit_rate);
86 if (expected > limit_data.dltime)
88 long slp = expected - limit_data.dltime;
91 DEBUGP (("deferring a %ld ms sleep (%ld/%ld) until later.\n",
92 slp, limit_data.bytes, limit_data.dltime));
95 DEBUGP (("sleeping %ld ms\n", slp));
100 limit_data.dltime = 0;
103 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
105 /* Reads the contents of file descriptor FD, until it is closed, or a
106 read error occurs. The data is read in 8K chunks, and stored to
107 stream fp, which should have been open for writing. If BUF is
108 non-NULL and its file descriptor is equal to FD, flush RBUF first.
109 This function will *not* use the rbuf_* functions!
111 The EXPECTED argument is passed to show_progress() unchanged, but
114 If opt.verbose is set, the progress is also shown. RESTVAL
115 represents a value from which to start downloading (which will be
116 shown accordingly). If RESTVAL is non-zero, the stream should have
117 been open for appending.
119 The function exits and returns codes of 0, -1 and -2 if the
120 connection was closed, there was a read error, or if it could not
121 write to the output stream, respectively.
123 IMPORTANT: The function flushes the contents of the buffer in
124 rbuf_flush() before actually reading from fd. If you wish to read
125 from fd immediately, flush or discard the buffer. */
127 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
128 struct rbuf *rbuf, int use_expected, long *elapsed)
132 void *progress = NULL;
133 struct wget_timer *timer = wtimer_allocate ();
134 long dltime = 0, last_dltime = 0;
139 progress = progress_create (restval, expected);
141 if (rbuf && RBUF_FD (rbuf) == fd)
144 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
146 fwrite (c, sizeof (char), res, fp);
158 progress_update (progress, sz, 0);
162 limit_bandwidth_reset ();
163 wtimer_reset (timer);
165 /* Read from fd while there is available data.
167 Normally, if expected is 0, it means that it is not known how
168 much data is expected. However, if use_expected is specified,
169 then expected being zero means exactly that. */
170 while (!use_expected || (*len < expected))
172 int amount_to_read = (use_expected
173 ? MIN (expected - *len, sizeof (c))
177 res = ssl_iread (rbuf->ssl, c, amount_to_read);
179 #endif /* HAVE_SSL */
180 res = iread (fd, c, amount_to_read);
184 fwrite (c, sizeof (char), res, fp);
185 /* Always flush the contents of the network packet. This
186 should not be adverse to performance, as the network
187 packets typically won't be too tiny anyway. */
195 /* If bandwidth is not limited, one call to wtimer_elapsed
197 dltime = wtimer_elapsed (timer);
200 limit_bandwidth (res, dltime - last_dltime);
201 dltime = wtimer_elapsed (timer);
202 last_dltime = dltime;
206 progress_update (progress, res, dltime);
217 progress_finish (progress, dltime);
220 wtimer_delete (timer);
225 /* Return a printed representation of the download rate, as
226 appropriate for the speed. If PAD is non-zero, strings will be
227 padded to the width of 7 characters (xxxx.xx). */
229 retr_rate (long bytes, long msecs, int pad)
232 static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
235 double dlrate = calc_rate (bytes, msecs, &units);
236 sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
241 /* Calculate the download rate and trim it as appropriate for the
242 speed. Appropriate means that if rate is greater than 1K/s,
243 kilobytes are used, and if rate is greater than 1MB/s, megabytes
246 UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
249 calc_rate (long bytes, long msecs, int *units)
257 /* If elapsed time is 0, it means we're under the granularity of
258 the timer. This often happens on systems that use time() for
260 msecs = wtimer_granularity ();
262 dlrate = (double)1000 * bytes / msecs;
265 else if (dlrate < 1024.0 * 1024.0)
266 *units = 1, dlrate /= 1024.0;
267 else if (dlrate < 1024.0 * 1024.0 * 1024.0)
268 *units = 2, dlrate /= (1024.0 * 1024.0);
270 /* Maybe someone will need this one day. More realistically, it
271 will get tickled by buggy timers. */
272 *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
277 #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->scheme) \
278 && no_proxy_match((u)->host, \
279 (const char **)opt.no_proxy))
281 /* Maximum number of allowed redirections. 20 was chosen as a
282 "reasonable" value, which is low enough to not cause havoc, yet
283 high enough to guarantee that normal retrievals will not be hurt by
286 #define MAX_REDIRECTIONS 20
288 /* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
292 retrieve_url (const char *origurl, char **file, char **newloc,
293 const char *refurl, int *dt)
297 int location_changed, dummy;
299 char *mynewloc, *proxy;
301 int up_error_code; /* url parse error code */
303 int redirection_count = 0;
305 /* If dt is NULL, just ignore it. */
308 url = xstrdup (origurl);
314 u = url_parse (url, &up_error_code);
317 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
323 refurl = opt.referer;
331 use_proxy = USE_PROXY_P (u);
334 struct url *proxy_url;
336 /* Get the proxy server for the current scheme. */
337 proxy = getproxy (u->scheme);
340 logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
346 /* Parse the proxy URL. */
347 proxy_url = url_parse (proxy, &up_error_code);
350 logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
351 proxy, url_error (up_error_code));
355 if (proxy_url->scheme != SCHEME_HTTP)
357 logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
358 url_free (proxy_url);
363 result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
364 url_free (proxy_url);
366 else if (u->scheme == SCHEME_HTTP
368 || u->scheme == SCHEME_HTTPS
372 result = http_loop (u, &mynewloc, &local_file, refurl, dt, NULL);
374 else if (u->scheme == SCHEME_FTP)
376 /* If this is a redirection, we must not allow recursive FTP
377 retrieval, so we save recursion to oldrec, and restore it
379 int oldrec = opt.recursive;
380 if (redirection_count)
382 result = ftp_loop (u, dt);
383 opt.recursive = oldrec;
385 /* There is a possibility of having HTTP being redirected to
386 FTP. In these cases we must decide whether the text is HTML
387 according to the suffix. The HTML suffixes are `.html' and
388 `.htm', case-insensitive. */
389 if (redirection_count && local_file && u->scheme == SCHEME_FTP)
391 char *suf = suffix (local_file);
392 if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
396 location_changed = (result == NEWLOCATION);
397 if (location_changed)
399 char *construced_newloc;
400 struct url *newloc_parsed;
402 assert (mynewloc != NULL);
407 /* The HTTP specs only allow absolute URLs to appear in
408 redirects, but a ton of boneheaded webservers and CGIs out
409 there break the rules and use relative URLs, and popular
410 browsers are lenient about this, so wget should be too. */
411 construced_newloc = uri_merge (url, mynewloc);
413 mynewloc = construced_newloc;
415 /* Now, see if this new location makes sense. */
416 newloc_parsed = url_parse (mynewloc, &up_error_code);
419 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
420 url_error (up_error_code));
427 /* Now mynewloc will become newloc_parsed->url, because if the
428 Location contained relative paths like .././something, we
429 don't want that propagating as url. */
431 mynewloc = xstrdup (newloc_parsed->url);
433 /* Check for max. number of redirections. */
434 if (++redirection_count > MAX_REDIRECTIONS)
436 logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
438 url_free (newloc_parsed);
456 register_download (u->url, local_file);
457 if (redirection_count && 0 != strcmp (origurl, u->url))
458 register_redirection (origurl, u->url);
460 register_html (u->url, local_file);
465 *file = local_file ? local_file : NULL;
467 FREE_MAYBE (local_file);
471 if (redirection_count)
485 ++global_download_count;
490 /* Find the URLs in the file and call retrieve_url() for each of
491 them. If HTML is non-zero, treat the file as HTML, and construct
492 the URLs accordingly.
494 If opt.recursive is set, call recursive_retrieve() for each file. */
496 retrieve_from_file (const char *file, int html, int *count)
499 struct urlpos *url_list, *cur_url;
501 url_list = (html ? get_urls_html (file, NULL, NULL)
502 : get_urls_file (file));
503 status = RETROK; /* Suppose everything is OK. */
504 *count = 0; /* Reset the URL count. */
506 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
508 char *filename = NULL, *new_file = NULL;
511 if (cur_url->ignore_when_downloading)
514 if (downloaded_exceeds_quota ())
519 if (opt.recursive && cur_url->url->scheme != SCHEME_FTP)
520 status = retrieve_tree (cur_url->url->url);
522 status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
524 if (filename && opt.delete_after && file_exists_p (filename))
526 DEBUGP (("Removing file due to --delete-after in"
527 " retrieve_from_file():\n"));
528 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
529 if (unlink (filename))
530 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
534 FREE_MAYBE (new_file);
535 FREE_MAYBE (filename);
538 /* Free the linked list of URL-s. */
539 free_urlpos (url_list);
544 /* Print `giving up', or `retrying', depending on the impending
545 action. N1 and N2 are the attempt number and the attempt limit. */
547 printwhat (int n1, int n2)
549 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
552 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
553 set opt.downloaded_overflow to 1. */
555 downloaded_increase (unsigned long by_how_much)
558 if (opt.downloaded_overflow)
560 old = opt.downloaded;
561 opt.downloaded += by_how_much;
562 if (opt.downloaded < old) /* carry flag, where are you when I
566 opt.downloaded_overflow = 1;
567 opt.downloaded = ~((VERY_LONG_TYPE)0);
571 /* Return non-zero if the downloaded amount of bytes exceeds the
572 desired quota. If quota is not set or if the amount overflowed, 0
575 downloaded_exceeds_quota (void)
579 if (opt.downloaded_overflow)
580 /* We don't really know. (Wildly) assume not. */
583 return opt.downloaded > opt.quota;
586 /* If opt.wait or opt.waitretry are specified, and if certain
587 conditions are met, sleep the appropriate number of seconds. See
588 the documentation of --wait and --waitretry for more information.
590 COUNT is the count of current retrieval, beginning with 1. */
593 sleep_between_retrievals (int count)
595 static int first_retrieval = 1;
599 /* Don't sleep before the very first retrieval. */
604 if (opt.waitretry && count > 1)
606 /* If opt.waitretry is specified and this is a retry, wait for
607 COUNT-1 number of seconds, or for opt.waitretry seconds. */
608 if (count <= opt.waitretry)
611 sleep (opt.waitretry);
615 if (!opt.random_wait || count > 1)
616 /* If random-wait is not specified, or if we are sleeping
617 between retries of the same download, sleep the fixed
622 /* Sleep a random amount of time averaging in opt.wait
623 seconds. The sleeping amount ranges from 0 to
624 opt.wait*2, inclusive. */
625 int waitsecs = random_number (opt.wait * 2 + 1);
627 DEBUGP (("sleep_between_retrievals: norm=%ld,fuzz=%ld,sleep=%d\n",
628 opt.wait, waitsecs - opt.wait, waitsecs));