2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
51 /* See the comment in gethttp() why this is needed. */
52 int global_download_count;
61 limit_bandwidth_reset (void)
64 limit_data.dltime = 0;
67 /* Limit the bandwidth by pausing the download for an amount of time.
68 BYTES is the number of bytes received from the network, DELTA is
69 how long it took to receive them, DLTIME the current download time,
70 TIMER the timer, and ADJUSTMENT the previous. */
73 limit_bandwidth (long bytes, long delta)
77 limit_data.bytes += bytes;
78 limit_data.dltime += delta;
80 expected = (long)(1000.0 * limit_data.bytes / opt.limit_rate);
82 if (expected > limit_data.dltime)
84 long slp = expected - limit_data.dltime;
87 DEBUGP (("deferring a %ld ms sleep (%ld/%ld) until later.\n",
88 slp, limit_data.bytes, limit_data.dltime));
91 DEBUGP (("sleeping %ld ms\n", slp));
96 limit_data.dltime = 0;
99 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
101 /* Reads the contents of file descriptor FD, until it is closed, or a
102 read error occurs. The data is read in 8K chunks, and stored to
103 stream fp, which should have been open for writing. If BUF is
104 non-NULL and its file descriptor is equal to FD, flush RBUF first.
105 This function will *not* use the rbuf_* functions!
107 The EXPECTED argument is passed to show_progress() unchanged, but
110 If opt.verbose is set, the progress is also shown. RESTVAL
111 represents a value from which to start downloading (which will be
112 shown accordingly). If RESTVAL is non-zero, the stream should have
113 been open for appending.
115 The function exits and returns codes of 0, -1 and -2 if the
116 connection was closed, there was a read error, or if it could not
117 write to the output stream, respectively.
119 IMPORTANT: The function flushes the contents of the buffer in
120 rbuf_flush() before actually reading from fd. If you wish to read
121 from fd immediately, flush or discard the buffer. */
123 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
124 struct rbuf *rbuf, int use_expected, long *elapsed)
128 void *progress = NULL;
129 struct wget_timer *timer = wtimer_allocate ();
130 long dltime = 0, last_dltime = 0;
135 progress = progress_create (restval, expected);
137 if (rbuf && RBUF_FD (rbuf) == fd)
140 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
142 fwrite (c, sizeof (char), res, fp);
154 progress_update (progress, sz, 0);
158 limit_bandwidth_reset ();
159 wtimer_reset (timer);
161 /* Read from fd while there is available data.
163 Normally, if expected is 0, it means that it is not known how
164 much data is expected. However, if use_expected is specified,
165 then expected being zero means exactly that. */
166 while (!use_expected || (*len < expected))
168 int amount_to_read = (use_expected
169 ? MIN (expected - *len, sizeof (c))
173 res = ssl_iread (rbuf->ssl, c, amount_to_read);
175 #endif /* HAVE_SSL */
176 res = iread (fd, c, amount_to_read);
180 fwrite (c, sizeof (char), res, fp);
181 /* Always flush the contents of the network packet. This
182 should not be adverse to performance, as the network
183 packets typically won't be too tiny anyway. */
191 /* If bandwidth is not limited, one call to wtimer_elapsed
193 dltime = wtimer_elapsed (timer);
196 limit_bandwidth (res, dltime - last_dltime);
197 dltime = wtimer_elapsed (timer);
198 last_dltime = dltime;
202 progress_update (progress, res, dltime);
213 progress_finish (progress, dltime);
216 wtimer_delete (timer);
221 /* Return a printed representation of the download rate, as
222 appropriate for the speed. If PAD is non-zero, strings will be
223 padded to the width of 7 characters (xxxx.xx). */
225 retr_rate (long bytes, long msecs, int pad)
228 static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
231 double dlrate = calc_rate (bytes, msecs, &units);
232 sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
237 /* Calculate the download rate and trim it as appropriate for the
238 speed. Appropriate means that if rate is greater than 1K/s,
239 kilobytes are used, and if rate is greater than 1MB/s, megabytes
242 UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
245 calc_rate (long bytes, long msecs, int *units)
253 /* If elapsed time is 0, it means we're under the granularity of
254 the timer. This often happens on systems that use time() for
256 msecs = wtimer_granularity ();
258 dlrate = (double)1000 * bytes / msecs;
261 else if (dlrate < 1024.0 * 1024.0)
262 *units = 1, dlrate /= 1024.0;
263 else if (dlrate < 1024.0 * 1024.0 * 1024.0)
264 *units = 2, dlrate /= (1024.0 * 1024.0);
266 /* Maybe someone will need this one day. More realistically, it
267 will get tickled by buggy timers. */
268 *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
274 register_redirections_mapper (void *key, void *value, void *arg)
276 const char *redirected_from = (const char *)key;
277 const char *redirected_to = (const char *)arg;
278 if (0 != strcmp (redirected_from, redirected_to))
279 register_redirection (redirected_from, redirected_to);
283 /* Register the redirections that lead to the successful download of
284 this URL. This is necessary so that the link converter can convert
285 redirected URLs to the local file. */
288 register_all_redirections (struct hash_table *redirections, const char *final)
290 hash_table_map (redirections, register_redirections_mapper, (void *)final);
293 #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->scheme) \
294 && no_proxy_match((u)->host, \
295 (const char **)opt.no_proxy))
297 /* Retrieve the given URL. Decides which loop to call -- HTTP(S), FTP,
298 or simply copy it with file:// (#### the latter not yet
301 retrieve_url (const char *origurl, char **file, char **newloc,
302 const char *refurl, int *dt)
306 int location_changed, dummy;
308 char *mynewloc, *proxy;
310 int up_error_code; /* url parse error code */
312 struct hash_table *redirections = NULL;
314 /* If dt is NULL, just ignore it. */
317 url = xstrdup (origurl);
323 u = url_parse (url, &up_error_code);
326 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
328 string_set_free (redirections);
334 refurl = opt.referer;
342 use_proxy = USE_PROXY_P (u);
345 struct url *proxy_url;
347 /* Get the proxy server for the current scheme. */
348 proxy = getproxy (u->scheme);
351 logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
354 string_set_free (redirections);
359 /* Parse the proxy URL. */
360 proxy_url = url_parse (proxy, &up_error_code);
363 logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
364 proxy, url_error (up_error_code));
366 string_set_free (redirections);
370 if (proxy_url->scheme != SCHEME_HTTP)
372 logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
373 url_free (proxy_url);
375 string_set_free (redirections);
380 result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
381 url_free (proxy_url);
383 else if (u->scheme == SCHEME_HTTP
385 || u->scheme == SCHEME_HTTPS
389 result = http_loop (u, &mynewloc, &local_file, refurl, dt, NULL);
391 else if (u->scheme == SCHEME_FTP)
393 /* If this is a redirection, we must not allow recursive FTP
394 retrieval, so we save recursion to oldrec, and restore it
396 int oldrec = opt.recursive;
399 result = ftp_loop (u, dt);
400 opt.recursive = oldrec;
402 /* There is a possibility of having HTTP being redirected to
403 FTP. In these cases we must decide whether the text is HTML
404 according to the suffix. The HTML suffixes are `.html' and
405 `.htm', case-insensitive. */
406 if (redirections && u->local && (u->scheme == SCHEME_FTP))
408 char *suf = suffix (u->local);
409 if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
414 location_changed = (result == NEWLOCATION);
415 if (location_changed)
417 char *construced_newloc;
418 struct url *newloc_parsed;
420 assert (mynewloc != NULL);
425 /* The HTTP specs only allow absolute URLs to appear in
426 redirects, but a ton of boneheaded webservers and CGIs out
427 there break the rules and use relative URLs, and popular
428 browsers are lenient about this, so wget should be too. */
429 construced_newloc = uri_merge (url, mynewloc);
431 mynewloc = construced_newloc;
433 /* Now, see if this new location makes sense. */
434 newloc_parsed = url_parse (mynewloc, &up_error_code);
437 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
438 url_error (up_error_code));
441 string_set_free (redirections);
447 /* Now mynewloc will become newloc_parsed->url, because if the
448 Location contained relative paths like .././something, we
449 don't want that propagating as url. */
451 mynewloc = xstrdup (newloc_parsed->url);
455 redirections = make_string_hash_table (0);
456 /* Add current URL immediately so we can detect it as soon
457 as possible in case of a cycle. */
458 string_set_add (redirections, u->url);
461 /* The new location is OK. Check for redirection cycle by
462 peeking through the history of redirections. */
463 if (string_set_contains (redirections, newloc_parsed->url))
465 logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
467 url_free (newloc_parsed);
470 string_set_free (redirections);
475 string_set_add (redirections, newloc_parsed->url);
488 register_download (url, local_file);
490 register_all_redirections (redirections, url);
492 register_html (url, local_file);
497 *file = local_file ? local_file : NULL;
499 FREE_MAYBE (local_file);
505 string_set_free (redirections);
518 ++global_download_count;
523 /* Find the URLs in the file and call retrieve_url() for each of
524 them. If HTML is non-zero, treat the file as HTML, and construct
525 the URLs accordingly.
527 If opt.recursive is set, call recursive_retrieve() for each file. */
529 retrieve_from_file (const char *file, int html, int *count)
532 struct urlpos *url_list, *cur_url;
534 url_list = (html ? get_urls_html (file, NULL, FALSE, NULL)
535 : get_urls_file (file));
536 status = RETROK; /* Suppose everything is OK. */
537 *count = 0; /* Reset the URL count. */
539 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
541 char *filename = NULL, *new_file;
544 if (cur_url->ignore_when_downloading)
547 if (downloaded_exceeds_quota ())
552 if (opt.recursive && cur_url->url->scheme != SCHEME_FTP)
553 status = retrieve_tree (cur_url->url->url);
555 status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
557 if (filename && opt.delete_after && file_exists_p (filename))
559 DEBUGP (("Removing file due to --delete-after in"
560 " retrieve_from_file():\n"));
561 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
562 if (unlink (filename))
563 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
567 FREE_MAYBE (new_file);
568 FREE_MAYBE (filename);
571 /* Free the linked list of URL-s. */
572 free_urlpos (url_list);
577 /* Print `giving up', or `retrying', depending on the impending
578 action. N1 and N2 are the attempt number and the attempt limit. */
580 printwhat (int n1, int n2)
582 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
585 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
586 set opt.downloaded_overflow to 1. */
588 downloaded_increase (unsigned long by_how_much)
591 if (opt.downloaded_overflow)
593 old = opt.downloaded;
594 opt.downloaded += by_how_much;
595 if (opt.downloaded < old) /* carry flag, where are you when I
599 opt.downloaded_overflow = 1;
600 opt.downloaded = ~((VERY_LONG_TYPE)0);
604 /* Return non-zero if the downloaded amount of bytes exceeds the
605 desired quota. If quota is not set or if the amount overflowed, 0
608 downloaded_exceeds_quota (void)
612 if (opt.downloaded_overflow)
613 /* We don't really know. (Wildly) assume not. */
616 return opt.downloaded > opt.quota;
619 /* If opt.wait or opt.waitretry are specified, and if certain
620 conditions are met, sleep the appropriate number of seconds. See
621 the documentation of --wait and --waitretry for more information.
623 COUNT is the count of current retrieval, beginning with 1. */
626 sleep_between_retrievals (int count)
628 static int first_retrieval = 1;
630 if (!first_retrieval && (opt.wait || opt.waitretry))
632 if (opt.waitretry && count > 1)
634 /* If opt.waitretry is specified and this is a retry, wait
635 for COUNT-1 number of seconds, or for opt.waitretry
637 if (count <= opt.waitretry)
640 sleep (opt.waitretry);
644 /* Otherwise, check if opt.wait is specified. If so, sleep. */
645 if (count > 1 || !opt.random_wait)
649 int waitsecs = random() % (opt.wait * 2 + 1);
650 DEBUGP(("sleep_between_retrievals: norm=%ld,random=%ld,sleep=%d\n",
651 opt.wait, waitsecs - opt.wait, waitsecs));