2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
48 # include "gen_sslfunc.h" /* for ssl_iread */
55 /* See the comment in gethttp() why this is needed. */
56 int global_download_count;
65 limit_bandwidth_reset (void)
68 limit_data.dltime = 0;
71 /* Limit the bandwidth by pausing the download for an amount of time.
72 BYTES is the number of bytes received from the network, DELTA is
73 how long it took to receive them, DLTIME the current download time,
74 TIMER the timer, and ADJUSTMENT the previous. */
77 limit_bandwidth (long bytes, long delta)
81 limit_data.bytes += bytes;
82 limit_data.dltime += delta;
84 expected = (long)(1000.0 * limit_data.bytes / opt.limit_rate);
86 if (expected > limit_data.dltime)
88 long slp = expected - limit_data.dltime;
91 DEBUGP (("deferring a %ld ms sleep (%ld/%ld) until later.\n",
92 slp, limit_data.bytes, limit_data.dltime));
95 DEBUGP (("sleeping %ld ms\n", slp));
100 limit_data.dltime = 0;
103 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
105 /* Reads the contents of file descriptor FD, until it is closed, or a
106 read error occurs. The data is read in 8K chunks, and stored to
107 stream fp, which should have been open for writing. If BUF is
108 non-NULL and its file descriptor is equal to FD, flush RBUF first.
109 This function will *not* use the rbuf_* functions!
111 The EXPECTED argument is passed to show_progress() unchanged, but
114 If opt.verbose is set, the progress is also shown. RESTVAL
115 represents a value from which to start downloading (which will be
116 shown accordingly). If RESTVAL is non-zero, the stream should have
117 been open for appending.
119 The function exits and returns codes of 0, -1 and -2 if the
120 connection was closed, there was a read error, or if it could not
121 write to the output stream, respectively.
123 IMPORTANT: The function flushes the contents of the buffer in
124 rbuf_flush() before actually reading from fd. If you wish to read
125 from fd immediately, flush or discard the buffer. */
127 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
128 struct rbuf *rbuf, int use_expected, long *elapsed)
132 void *progress = NULL;
133 struct wget_timer *timer = wtimer_allocate ();
134 long dltime = 0, last_dltime = 0;
139 progress = progress_create (restval, expected);
141 if (rbuf && RBUF_FD (rbuf) == fd)
144 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
146 fwrite (c, sizeof (char), res, fp);
158 progress_update (progress, sz, 0);
162 limit_bandwidth_reset ();
163 wtimer_reset (timer);
165 /* Read from fd while there is available data.
167 Normally, if expected is 0, it means that it is not known how
168 much data is expected. However, if use_expected is specified,
169 then expected being zero means exactly that. */
170 while (!use_expected || (*len < expected))
172 int amount_to_read = (use_expected
173 ? MIN (expected - *len, sizeof (c))
177 res = ssl_iread (rbuf->ssl, c, amount_to_read);
179 #endif /* HAVE_SSL */
180 res = iread (fd, c, amount_to_read);
184 fwrite (c, sizeof (char), res, fp);
185 /* Always flush the contents of the network packet. This
186 should not be adverse to performance, as the network
187 packets typically won't be too tiny anyway. */
195 /* If bandwidth is not limited, one call to wtimer_elapsed
197 dltime = wtimer_elapsed (timer);
200 limit_bandwidth (res, dltime - last_dltime);
201 dltime = wtimer_elapsed (timer);
202 last_dltime = dltime;
206 progress_update (progress, res, dltime);
217 progress_finish (progress, dltime);
220 wtimer_delete (timer);
225 /* Return a printed representation of the download rate, as
226 appropriate for the speed. If PAD is non-zero, strings will be
227 padded to the width of 7 characters (xxxx.xx). */
229 retr_rate (long bytes, long msecs, int pad)
232 static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
235 double dlrate = calc_rate (bytes, msecs, &units);
236 sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
241 /* Calculate the download rate and trim it as appropriate for the
242 speed. Appropriate means that if rate is greater than 1K/s,
243 kilobytes are used, and if rate is greater than 1MB/s, megabytes
246 UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
249 calc_rate (long bytes, long msecs, int *units)
257 /* If elapsed time is 0, it means we're under the granularity of
258 the timer. This often happens on systems that use time() for
260 msecs = wtimer_granularity ();
262 dlrate = (double)1000 * bytes / msecs;
265 else if (dlrate < 1024.0 * 1024.0)
266 *units = 1, dlrate /= 1024.0;
267 else if (dlrate < 1024.0 * 1024.0 * 1024.0)
268 *units = 2, dlrate /= (1024.0 * 1024.0);
270 /* Maybe someone will need this one day. More realistically, it
271 will get tickled by buggy timers. */
272 *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
277 /* Maximum number of allowed redirections. 20 was chosen as a
278 "reasonable" value, which is low enough to not cause havoc, yet
279 high enough to guarantee that normal retrievals will not be hurt by
282 #define MAX_REDIRECTIONS 20
284 /* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
288 retrieve_url (const char *origurl, char **file, char **newloc,
289 const char *refurl, int *dt)
293 int location_changed, dummy;
294 char *mynewloc, *proxy;
295 struct url *u, *proxy_url;
296 int up_error_code; /* url parse error code */
298 int redirection_count = 0;
300 /* If dt is NULL, just ignore it. */
303 url = xstrdup (origurl);
309 u = url_parse (url, &up_error_code);
312 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
318 refurl = opt.referer;
327 proxy = getproxy (u);
330 /* Parse the proxy URL. */
331 proxy_url = url_parse (proxy, &up_error_code);
334 logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
335 proxy, url_error (up_error_code));
339 if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
341 logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
342 url_free (proxy_url);
348 if (u->scheme == SCHEME_HTTP
350 || u->scheme == SCHEME_HTTPS
352 || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
354 result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
356 else if (u->scheme == SCHEME_FTP)
358 /* If this is a redirection, we must not allow recursive FTP
359 retrieval, so we save recursion to oldrec, and restore it
361 int oldrec = opt.recursive;
362 if (redirection_count)
364 result = ftp_loop (u, dt, proxy_url);
365 opt.recursive = oldrec;
367 /* There is a possibility of having HTTP being redirected to
368 FTP. In these cases we must decide whether the text is HTML
369 according to the suffix. The HTML suffixes are `.html',
370 `.htm' and a few others, case-insensitive. */
371 if (redirection_count && local_file && u->scheme == SCHEME_FTP)
373 if (has_html_suffix_p (local_file))
380 url_free (proxy_url);
384 location_changed = (result == NEWLOCATION);
385 if (location_changed)
387 char *construced_newloc;
388 struct url *newloc_parsed;
390 assert (mynewloc != NULL);
395 /* The HTTP specs only allow absolute URLs to appear in
396 redirects, but a ton of boneheaded webservers and CGIs out
397 there break the rules and use relative URLs, and popular
398 browsers are lenient about this, so wget should be too. */
399 construced_newloc = uri_merge (url, mynewloc);
401 mynewloc = construced_newloc;
403 /* Now, see if this new location makes sense. */
404 newloc_parsed = url_parse (mynewloc, &up_error_code);
407 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
408 url_error (up_error_code));
415 /* Now mynewloc will become newloc_parsed->url, because if the
416 Location contained relative paths like .././something, we
417 don't want that propagating as url. */
419 mynewloc = xstrdup (newloc_parsed->url);
421 /* Check for max. number of redirections. */
422 if (++redirection_count > MAX_REDIRECTIONS)
424 logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
426 url_free (newloc_parsed);
444 register_download (u->url, local_file);
445 if (redirection_count && 0 != strcmp (origurl, u->url))
446 register_redirection (origurl, u->url);
448 register_html (u->url, local_file);
453 *file = local_file ? local_file : NULL;
455 FREE_MAYBE (local_file);
459 if (redirection_count)
473 ++global_download_count;
478 /* Find the URLs in the file and call retrieve_url() for each of
479 them. If HTML is non-zero, treat the file as HTML, and construct
480 the URLs accordingly.
482 If opt.recursive is set, call recursive_retrieve() for each file. */
484 retrieve_from_file (const char *file, int html, int *count)
487 struct urlpos *url_list, *cur_url;
489 url_list = (html ? get_urls_html (file, NULL, NULL)
490 : get_urls_file (file));
491 status = RETROK; /* Suppose everything is OK. */
492 *count = 0; /* Reset the URL count. */
494 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
496 char *filename = NULL, *new_file = NULL;
499 if (cur_url->ignore_when_downloading)
502 if (downloaded_exceeds_quota ())
507 if (opt.recursive && cur_url->url->scheme != SCHEME_FTP)
508 status = retrieve_tree (cur_url->url->url);
510 status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
512 if (filename && opt.delete_after && file_exists_p (filename))
514 DEBUGP (("Removing file due to --delete-after in"
515 " retrieve_from_file():\n"));
516 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
517 if (unlink (filename))
518 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
522 FREE_MAYBE (new_file);
523 FREE_MAYBE (filename);
526 /* Free the linked list of URL-s. */
527 free_urlpos (url_list);
532 /* Print `giving up', or `retrying', depending on the impending
533 action. N1 and N2 are the attempt number and the attempt limit. */
535 printwhat (int n1, int n2)
537 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
540 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
541 set opt.downloaded_overflow to 1. */
543 downloaded_increase (unsigned long by_how_much)
546 if (opt.downloaded_overflow)
548 old = opt.downloaded;
549 opt.downloaded += by_how_much;
550 if (opt.downloaded < old) /* carry flag, where are you when I
554 opt.downloaded_overflow = 1;
555 opt.downloaded = ~((VERY_LONG_TYPE)0);
559 /* Return non-zero if the downloaded amount of bytes exceeds the
560 desired quota. If quota is not set or if the amount overflowed, 0
563 downloaded_exceeds_quota (void)
567 if (opt.downloaded_overflow)
568 /* We don't really know. (Wildly) assume not. */
571 return opt.downloaded > opt.quota;
574 /* If opt.wait or opt.waitretry are specified, and if certain
575 conditions are met, sleep the appropriate number of seconds. See
576 the documentation of --wait and --waitretry for more information.
578 COUNT is the count of current retrieval, beginning with 1. */
581 sleep_between_retrievals (int count)
583 static int first_retrieval = 1;
587 /* Don't sleep before the very first retrieval. */
592 if (opt.waitretry && count > 1)
594 /* If opt.waitretry is specified and this is a retry, wait for
595 COUNT-1 number of seconds, or for opt.waitretry seconds. */
596 if (count <= opt.waitretry)
599 sleep (opt.waitretry);
603 if (!opt.random_wait || count > 1)
604 /* If random-wait is not specified, or if we are sleeping
605 between retries of the same download, sleep the fixed
610 /* Sleep a random amount of time averaging in opt.wait
611 seconds. The sleeping amount ranges from 0 to
612 opt.wait*2, inclusive. */
613 int waitsecs = random_number (opt.wait * 2 + 1);
615 DEBUGP (("sleep_between_retrievals: norm=%ld,fuzz=%ld,sleep=%d\n",
616 opt.wait, waitsecs - opt.wait, waitsecs));