2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
48 # include "gen_sslfunc.h" /* for ssl_iread */
55 /* See the comment in gethttp() why this is needed. */
56 int global_download_count;
65 limit_bandwidth_reset (void)
68 limit_data.dltime = 0;
71 /* Limit the bandwidth by pausing the download for an amount of time.
72 BYTES is the number of bytes received from the network, DELTA is
73 how long it took to receive them, DLTIME the current download time,
74 TIMER the timer, and ADJUSTMENT the previous. */
77 limit_bandwidth (long bytes, long delta)
81 limit_data.bytes += bytes;
82 limit_data.dltime += delta;
84 expected = (long)(1000.0 * limit_data.bytes / opt.limit_rate);
86 if (expected > limit_data.dltime)
88 long slp = expected - limit_data.dltime;
91 DEBUGP (("deferring a %ld ms sleep (%ld/%ld) until later.\n",
92 slp, limit_data.bytes, limit_data.dltime));
95 DEBUGP (("sleeping %ld ms\n", slp));
100 limit_data.dltime = 0;
103 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
105 /* Reads the contents of file descriptor FD, until it is closed, or a
106 read error occurs. The data is read in 8K chunks, and stored to
107 stream fp, which should have been open for writing. If BUF is
108 non-NULL and its file descriptor is equal to FD, flush RBUF first.
109 This function will *not* use the rbuf_* functions!
111 The EXPECTED argument is passed to show_progress() unchanged, but
114 If opt.verbose is set, the progress is also shown. RESTVAL
115 represents a value from which to start downloading (which will be
116 shown accordingly). If RESTVAL is non-zero, the stream should have
117 been open for appending.
119 The function exits and returns codes of 0, -1 and -2 if the
120 connection was closed, there was a read error, or if it could not
121 write to the output stream, respectively.
123 IMPORTANT: The function flushes the contents of the buffer in
124 rbuf_flush() before actually reading from fd. If you wish to read
125 from fd immediately, flush or discard the buffer. */
127 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
128 struct rbuf *rbuf, int use_expected, long *elapsed)
132 void *progress = NULL;
133 struct wget_timer *timer = wtimer_allocate ();
134 long dltime = 0, last_dltime = 0;
139 progress = progress_create (restval, expected);
141 if (rbuf && RBUF_FD (rbuf) == fd)
144 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
146 fwrite (c, sizeof (char), res, fp);
158 progress_update (progress, sz, 0);
162 limit_bandwidth_reset ();
163 wtimer_reset (timer);
165 /* Read from fd while there is available data.
167 Normally, if expected is 0, it means that it is not known how
168 much data is expected. However, if use_expected is specified,
169 then expected being zero means exactly that. */
170 while (!use_expected || (*len < expected))
172 int amount_to_read = (use_expected
173 ? MIN (expected - *len, sizeof (c))
177 res = ssl_iread (rbuf->ssl, c, amount_to_read);
179 #endif /* HAVE_SSL */
180 res = iread (fd, c, amount_to_read);
184 fwrite (c, sizeof (char), res, fp);
185 /* Always flush the contents of the network packet. This
186 should not be adverse to performance, as the network
187 packets typically won't be too tiny anyway. */
195 /* If bandwidth is not limited, one call to wtimer_elapsed
197 dltime = wtimer_elapsed (timer);
200 limit_bandwidth (res, dltime - last_dltime);
201 dltime = wtimer_elapsed (timer);
202 last_dltime = dltime;
206 progress_update (progress, res, dltime);
217 progress_finish (progress, dltime);
220 wtimer_delete (timer);
225 /* Return a printed representation of the download rate, as
226 appropriate for the speed. If PAD is non-zero, strings will be
227 padded to the width of 7 characters (xxxx.xx). */
229 retr_rate (long bytes, long msecs, int pad)
232 static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
235 double dlrate = calc_rate (bytes, msecs, &units);
236 sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
241 /* Calculate the download rate and trim it as appropriate for the
242 speed. Appropriate means that if rate is greater than 1K/s,
243 kilobytes are used, and if rate is greater than 1MB/s, megabytes
246 UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
249 calc_rate (long bytes, long msecs, int *units)
257 /* If elapsed time is 0, it means we're under the granularity of
258 the timer. This often happens on systems that use time() for
260 msecs = wtimer_granularity ();
262 dlrate = (double)1000 * bytes / msecs;
265 else if (dlrate < 1024.0 * 1024.0)
266 *units = 1, dlrate /= 1024.0;
267 else if (dlrate < 1024.0 * 1024.0 * 1024.0)
268 *units = 2, dlrate /= (1024.0 * 1024.0);
270 /* Maybe someone will need this one day. More realistically, it
271 will get tickled by buggy timers. */
272 *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
278 register_redirections_mapper (void *key, void *value, void *arg)
280 const char *redirected_from = (const char *)key;
281 const char *redirected_to = (const char *)arg;
282 if (0 != strcmp (redirected_from, redirected_to))
283 register_redirection (redirected_from, redirected_to);
287 /* Register the redirections that lead to the successful download of
288 this URL. This is necessary so that the link converter can convert
289 redirected URLs to the local file. */
292 register_all_redirections (struct hash_table *redirections, const char *final)
294 hash_table_map (redirections, register_redirections_mapper, (void *)final);
297 #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->scheme) \
298 && no_proxy_match((u)->host, \
299 (const char **)opt.no_proxy))
301 /* Retrieve the given URL. Decides which loop to call -- HTTP(S), FTP,
302 or simply copy it with file:// (#### the latter not yet
305 retrieve_url (const char *origurl, char **file, char **newloc,
306 const char *refurl, int *dt)
310 int location_changed, dummy;
312 char *mynewloc, *proxy;
314 int up_error_code; /* url parse error code */
316 struct hash_table *redirections = NULL;
318 /* If dt is NULL, just ignore it. */
321 url = xstrdup (origurl);
327 u = url_parse (url, &up_error_code);
330 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
332 string_set_free (redirections);
338 refurl = opt.referer;
346 use_proxy = USE_PROXY_P (u);
349 struct url *proxy_url;
351 /* Get the proxy server for the current scheme. */
352 proxy = getproxy (u->scheme);
355 logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
358 string_set_free (redirections);
363 /* Parse the proxy URL. */
364 proxy_url = url_parse (proxy, &up_error_code);
367 logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
368 proxy, url_error (up_error_code));
370 string_set_free (redirections);
374 if (proxy_url->scheme != SCHEME_HTTP)
376 logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
377 url_free (proxy_url);
379 string_set_free (redirections);
384 result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
385 url_free (proxy_url);
387 else if (u->scheme == SCHEME_HTTP
389 || u->scheme == SCHEME_HTTPS
393 result = http_loop (u, &mynewloc, &local_file, refurl, dt, NULL);
395 else if (u->scheme == SCHEME_FTP)
397 /* If this is a redirection, we must not allow recursive FTP
398 retrieval, so we save recursion to oldrec, and restore it
400 int oldrec = opt.recursive;
403 result = ftp_loop (u, dt);
404 opt.recursive = oldrec;
406 /* There is a possibility of having HTTP being redirected to
407 FTP. In these cases we must decide whether the text is HTML
408 according to the suffix. The HTML suffixes are `.html' and
409 `.htm', case-insensitive. */
410 if (redirections && u->local && (u->scheme == SCHEME_FTP))
412 char *suf = suffix (u->local);
413 if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
418 location_changed = (result == NEWLOCATION);
419 if (location_changed)
421 char *construced_newloc;
422 struct url *newloc_parsed;
424 assert (mynewloc != NULL);
429 /* The HTTP specs only allow absolute URLs to appear in
430 redirects, but a ton of boneheaded webservers and CGIs out
431 there break the rules and use relative URLs, and popular
432 browsers are lenient about this, so wget should be too. */
433 construced_newloc = uri_merge (url, mynewloc);
435 mynewloc = construced_newloc;
437 /* Now, see if this new location makes sense. */
438 newloc_parsed = url_parse (mynewloc, &up_error_code);
441 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
442 url_error (up_error_code));
445 string_set_free (redirections);
451 /* Now mynewloc will become newloc_parsed->url, because if the
452 Location contained relative paths like .././something, we
453 don't want that propagating as url. */
455 mynewloc = xstrdup (newloc_parsed->url);
459 redirections = make_string_hash_table (0);
460 /* Add current URL immediately so we can detect it as soon
461 as possible in case of a cycle. */
462 string_set_add (redirections, u->url);
465 /* The new location is OK. Check for redirection cycle by
466 peeking through the history of redirections. */
467 if (string_set_contains (redirections, newloc_parsed->url))
469 logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
471 url_free (newloc_parsed);
474 string_set_free (redirections);
479 string_set_add (redirections, newloc_parsed->url);
492 register_download (url, local_file);
494 register_all_redirections (redirections, url);
496 register_html (url, local_file);
501 *file = local_file ? local_file : NULL;
503 FREE_MAYBE (local_file);
509 string_set_free (redirections);
522 ++global_download_count;
527 /* Find the URLs in the file and call retrieve_url() for each of
528 them. If HTML is non-zero, treat the file as HTML, and construct
529 the URLs accordingly.
531 If opt.recursive is set, call recursive_retrieve() for each file. */
533 retrieve_from_file (const char *file, int html, int *count)
536 struct urlpos *url_list, *cur_url;
538 url_list = (html ? get_urls_html (file, NULL, NULL)
539 : get_urls_file (file));
540 status = RETROK; /* Suppose everything is OK. */
541 *count = 0; /* Reset the URL count. */
543 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
545 char *filename = NULL, *new_file = NULL;
548 if (cur_url->ignore_when_downloading)
551 if (downloaded_exceeds_quota ())
556 if (opt.recursive && cur_url->url->scheme != SCHEME_FTP)
557 status = retrieve_tree (cur_url->url->url);
559 status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
561 if (filename && opt.delete_after && file_exists_p (filename))
563 DEBUGP (("Removing file due to --delete-after in"
564 " retrieve_from_file():\n"));
565 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
566 if (unlink (filename))
567 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
571 FREE_MAYBE (new_file);
572 FREE_MAYBE (filename);
575 /* Free the linked list of URL-s. */
576 free_urlpos (url_list);
581 /* Print `giving up', or `retrying', depending on the impending
582 action. N1 and N2 are the attempt number and the attempt limit. */
584 printwhat (int n1, int n2)
586 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
589 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
590 set opt.downloaded_overflow to 1. */
592 downloaded_increase (unsigned long by_how_much)
595 if (opt.downloaded_overflow)
597 old = opt.downloaded;
598 opt.downloaded += by_how_much;
599 if (opt.downloaded < old) /* carry flag, where are you when I
603 opt.downloaded_overflow = 1;
604 opt.downloaded = ~((VERY_LONG_TYPE)0);
608 /* Return non-zero if the downloaded amount of bytes exceeds the
609 desired quota. If quota is not set or if the amount overflowed, 0
612 downloaded_exceeds_quota (void)
616 if (opt.downloaded_overflow)
617 /* We don't really know. (Wildly) assume not. */
620 return opt.downloaded > opt.quota;
623 /* If opt.wait or opt.waitretry are specified, and if certain
624 conditions are met, sleep the appropriate number of seconds. See
625 the documentation of --wait and --waitretry for more information.
627 COUNT is the count of current retrieval, beginning with 1. */
630 sleep_between_retrievals (int count)
632 static int first_retrieval = 1;
634 if (first_retrieval && opt.random_wait)
635 /* --random-wait uses the RNG, so seed it. */
638 if (!first_retrieval && (opt.wait || opt.waitretry))
640 if (opt.waitretry && count > 1)
642 /* If opt.waitretry is specified and this is a retry, wait
643 for COUNT-1 number of seconds, or for opt.waitretry
645 if (count <= opt.waitretry)
648 sleep (opt.waitretry);
652 /* Otherwise, check if opt.wait is specified. If so, sleep. */
653 if (count > 1 || !opt.random_wait)
657 int waitmax = 2 * opt.wait;
658 /* This is equivalent to rand() % waitmax, but uses the
659 high-order bits for better randomness. */
660 int waitsecs = (double)waitmax * rand () / (RAND_MAX + 1.0);
662 DEBUGP (("sleep_between_retrievals: norm=%ld,fuzz=%ld,sleep=%d\n",
663 opt.wait, waitsecs - opt.wait, waitsecs));