2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Wget.
6 GNU Wget is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or (at
9 your option) any later version.
11 GNU Wget is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Wget; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 In addition, as a special exception, the Free Software Foundation
21 gives permission to link the code of its release of Wget with the
22 OpenSSL project's "OpenSSL" library (or with modified versions of it
23 that use the same license as the "OpenSSL" library), and distribute
24 the linked executables. You must obey the GNU General Public License
25 in all respects for all of the code used other than "OpenSSL". If you
26 modify this file, you may extend this exception to your version of the
27 file, but you are not obligated to do so. If you do not wish to do
28 so, delete this exception statement from your version. */
34 #include <sys/types.h>
37 #endif /* HAVE_UNISTD_H */
43 #endif /* HAVE_STRING_H */
58 # include "gen_sslfunc.h" /* for ssl_iread */
65 /* See the comment in gethttp() why this is needed. */
66 int global_download_count;
75 limit_bandwidth_reset (void)
78 limit_data.dltime = 0;
81 /* Limit the bandwidth by pausing the download for an amount of time.
82 BYTES is the number of bytes received from the network, and DELTA
83 is how long it took to receive them. */
86 limit_bandwidth (long bytes, double delta)
90 limit_data.bytes += bytes;
91 limit_data.dltime += delta;
93 expected = 1000.0 * limit_data.bytes / opt.limit_rate;
95 if (expected > limit_data.dltime)
97 double slp = expected - limit_data.dltime;
100 DEBUGP (("deferring a %.2f ms sleep (%ld/%.2f).\n",
101 slp, limit_data.bytes, limit_data.dltime));
104 DEBUGP (("sleeping %.2f ms\n", slp));
105 usleep ((unsigned long) (1000 * slp));
108 limit_data.bytes = 0;
109 limit_data.dltime = 0;
112 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
114 /* Reads the contents of file descriptor FD, until it is closed, or a
115 read error occurs. The data is read in 8K chunks, and stored to
116 stream fp, which should have been open for writing. If BUF is
117 non-NULL and its file descriptor is equal to FD, flush RBUF first.
118 This function will *not* use the rbuf_* functions!
120 The EXPECTED argument is passed to show_progress() unchanged, but
123 If opt.verbose is set, the progress is also shown. RESTVAL
124 represents a value from which to start downloading (which will be
125 shown accordingly). If RESTVAL is non-zero, the stream should have
126 been open for appending.
128 The function exits and returns codes of 0, -1 and -2 if the
129 connection was closed, there was a read error, or if it could not
130 write to the output stream, respectively.
132 IMPORTANT: The function flushes the contents of the buffer in
133 rbuf_flush() before actually reading from fd. If you wish to read
134 from fd immediately, flush or discard the buffer. */
136 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
137 struct rbuf *rbuf, int use_expected, double *elapsed)
141 static char dlbuf[16384];
142 int dlbufsize = sizeof (dlbuf);
144 void *progress = NULL;
145 struct wget_timer *timer = wtimer_allocate ();
146 double dltime = 0, last_dltime = 0;
151 progress = progress_create (restval, expected);
153 if (rbuf && RBUF_FD (rbuf) == fd)
156 while ((res = rbuf_flush (rbuf, dlbuf, sizeof (dlbuf))) != 0)
158 fwrite (dlbuf, 1, res, fp);
170 progress_update (progress, sz, 0);
174 limit_bandwidth_reset ();
175 wtimer_reset (timer);
177 /* If we're limiting the download, set our buffer size to the
179 if (opt.limit_rate && opt.limit_rate < dlbufsize)
180 dlbufsize = opt.limit_rate;
182 /* Read from fd while there is available data.
184 Normally, if expected is 0, it means that it is not known how
185 much data is expected. However, if use_expected is specified,
186 then expected being zero means exactly that. */
187 while (!use_expected || (*len < expected))
189 int amount_to_read = (use_expected
190 ? MIN (expected - *len, dlbufsize) : dlbufsize);
193 res = ssl_iread (rbuf->ssl, dlbuf, amount_to_read);
195 #endif /* HAVE_SSL */
196 res = iread (fd, dlbuf, amount_to_read);
201 fwrite (dlbuf, 1, res, fp);
202 /* Always flush the contents of the network packet. This should
203 not hinder performance: fast downloads will be received in
204 16K chunks (which stdio would write out anyway), and slow
205 downloads won't be limited with disk performance. */
213 /* If bandwidth is not limited, one call to wtimer_elapsed is
215 dltime = wtimer_elapsed (timer);
218 limit_bandwidth (res, dltime - last_dltime);
219 dltime = wtimer_elapsed (timer);
220 last_dltime = dltime;
224 progress_update (progress, res, dltime);
232 progress_finish (progress, dltime);
235 wtimer_delete (timer);
240 /* Return a printed representation of the download rate, as
241 appropriate for the speed. If PAD is non-zero, strings will be
242 padded to the width of 7 characters (xxxx.xx). */
244 retr_rate (long bytes, double msecs, int pad)
247 static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
250 double dlrate = calc_rate (bytes, msecs, &units);
251 sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
256 /* Calculate the download rate and trim it as appropriate for the
257 speed. Appropriate means that if rate is greater than 1K/s,
258 kilobytes are used, and if rate is greater than 1MB/s, megabytes
261 UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
264 calc_rate (long bytes, double msecs, int *units)
272 /* If elapsed time is exactly zero, it means we're under the
273 granularity of the timer. This often happens on systems that
274 use time() for the timer. */
275 msecs = wtimer_granularity ();
277 dlrate = (double)1000 * bytes / msecs;
280 else if (dlrate < 1024.0 * 1024.0)
281 *units = 1, dlrate /= 1024.0;
282 else if (dlrate < 1024.0 * 1024.0 * 1024.0)
283 *units = 2, dlrate /= (1024.0 * 1024.0);
285 /* Maybe someone will need this, one day. */
286 *units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
291 /* Maximum number of allowed redirections. 20 was chosen as a
292 "reasonable" value, which is low enough to not cause havoc, yet
293 high enough to guarantee that normal retrievals will not be hurt by
296 #define MAX_REDIRECTIONS 20
298 #define SUSPEND_POST_DATA do { \
299 post_data_suspended = 1; \
300 saved_post_data = opt.post_data; \
301 saved_post_file_name = opt.post_file_name; \
302 opt.post_data = NULL; \
303 opt.post_file_name = NULL; \
306 #define RESTORE_POST_DATA do { \
307 if (post_data_suspended) \
309 opt.post_data = saved_post_data; \
310 opt.post_file_name = saved_post_file_name; \
311 post_data_suspended = 0; \
315 /* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
318 /* #### This function should be rewritten so it doesn't return from
322 retrieve_url (const char *origurl, char **file, char **newloc,
323 const char *refurl, int *dt)
327 int location_changed, dummy;
328 char *mynewloc, *proxy;
329 struct url *u, *proxy_url;
330 int up_error_code; /* url parse error code */
332 int redirection_count = 0;
334 int post_data_suspended = 0;
335 char *saved_post_data = NULL;
336 char *saved_post_file_name = NULL;
338 /* If dt is NULL, just ignore it. */
341 url = xstrdup (origurl);
347 u = url_parse (url, &up_error_code);
350 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
356 refurl = opt.referer;
365 proxy = getproxy (u);
368 /* Parse the proxy URL. */
369 proxy_url = url_parse (proxy, &up_error_code);
372 logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
373 proxy, url_error (up_error_code));
378 if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
380 logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
381 url_free (proxy_url);
388 if (u->scheme == SCHEME_HTTP
390 || u->scheme == SCHEME_HTTPS
392 || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
394 result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
396 else if (u->scheme == SCHEME_FTP)
398 /* If this is a redirection, we must not allow recursive FTP
399 retrieval, so we save recursion to oldrec, and restore it
401 int oldrec = opt.recursive;
402 if (redirection_count)
404 result = ftp_loop (u, dt, proxy_url);
405 opt.recursive = oldrec;
407 /* There is a possibility of having HTTP being redirected to
408 FTP. In these cases we must decide whether the text is HTML
409 according to the suffix. The HTML suffixes are `.html',
410 `.htm' and a few others, case-insensitive. */
411 if (redirection_count && local_file && u->scheme == SCHEME_FTP)
413 if (has_html_suffix_p (local_file))
420 url_free (proxy_url);
424 location_changed = (result == NEWLOCATION);
425 if (location_changed)
427 char *construced_newloc;
428 struct url *newloc_parsed;
430 assert (mynewloc != NULL);
435 /* The HTTP specs only allow absolute URLs to appear in
436 redirects, but a ton of boneheaded webservers and CGIs out
437 there break the rules and use relative URLs, and popular
438 browsers are lenient about this, so wget should be too. */
439 construced_newloc = uri_merge (url, mynewloc);
441 mynewloc = construced_newloc;
443 /* Now, see if this new location makes sense. */
444 newloc_parsed = url_parse (mynewloc, &up_error_code);
447 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
448 url_error (up_error_code));
456 /* Now mynewloc will become newloc_parsed->url, because if the
457 Location contained relative paths like .././something, we
458 don't want that propagating as url. */
460 mynewloc = xstrdup (newloc_parsed->url);
462 /* Check for max. number of redirections. */
463 if (++redirection_count > MAX_REDIRECTIONS)
465 logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
467 url_free (newloc_parsed);
480 /* If we're being redirected from POST, we don't want to POST
481 again. Many requests answer POST with a redirection to an
482 index page; that redirection is clearly a GET. We "suspend"
483 POST data for the duration of the redirections, and restore
484 it when we're done. */
485 if (!post_data_suspended)
495 register_download (u->url, local_file);
496 if (redirection_count && 0 != strcmp (origurl, u->url))
497 register_redirection (origurl, u->url);
499 register_html (u->url, local_file);
504 *file = local_file ? local_file : NULL;
506 FREE_MAYBE (local_file);
510 if (redirection_count)
524 ++global_download_count;
530 /* Find the URLs in the file and call retrieve_url() for each of
531 them. If HTML is non-zero, treat the file as HTML, and construct
532 the URLs accordingly.
534 If opt.recursive is set, call recursive_retrieve() for each file. */
536 retrieve_from_file (const char *file, int html, int *count)
539 struct urlpos *url_list, *cur_url;
541 url_list = (html ? get_urls_html (file, NULL, NULL)
542 : get_urls_file (file));
543 status = RETROK; /* Suppose everything is OK. */
544 *count = 0; /* Reset the URL count. */
546 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
548 char *filename = NULL, *new_file = NULL;
551 if (cur_url->ignore_when_downloading)
554 if (downloaded_exceeds_quota ())
559 if (opt.recursive && cur_url->url->scheme != SCHEME_FTP)
560 status = retrieve_tree (cur_url->url->url);
562 status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
564 if (filename && opt.delete_after && file_exists_p (filename))
566 DEBUGP (("Removing file due to --delete-after in"
567 " retrieve_from_file():\n"));
568 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
569 if (unlink (filename))
570 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
574 FREE_MAYBE (new_file);
575 FREE_MAYBE (filename);
578 /* Free the linked list of URL-s. */
579 free_urlpos (url_list);
584 /* Print `giving up', or `retrying', depending on the impending
585 action. N1 and N2 are the attempt number and the attempt limit. */
587 printwhat (int n1, int n2)
589 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
592 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
593 set opt.downloaded_overflow to 1. */
595 downloaded_increase (unsigned long by_how_much)
598 if (opt.downloaded_overflow)
600 old = opt.downloaded;
601 opt.downloaded += by_how_much;
602 if (opt.downloaded < old) /* carry flag, where are you when I
606 opt.downloaded_overflow = 1;
607 opt.downloaded = ~((VERY_LONG_TYPE)0);
611 /* Return non-zero if the downloaded amount of bytes exceeds the
612 desired quota. If quota is not set or if the amount overflowed, 0
615 downloaded_exceeds_quota (void)
619 if (opt.downloaded_overflow)
620 /* We don't really know. (Wildly) assume not. */
623 return opt.downloaded > opt.quota;
626 /* If opt.wait or opt.waitretry are specified, and if certain
627 conditions are met, sleep the appropriate number of seconds. See
628 the documentation of --wait and --waitretry for more information.
630 COUNT is the count of current retrieval, beginning with 1. */
633 sleep_between_retrievals (int count)
635 static int first_retrieval = 1;
639 /* Don't sleep before the very first retrieval. */
644 if (opt.waitretry && count > 1)
646 /* If opt.waitretry is specified and this is a retry, wait for
647 COUNT-1 number of seconds, or for opt.waitretry seconds. */
648 if (count <= opt.waitretry)
651 sleep (opt.waitretry);
655 if (!opt.random_wait || count > 1)
656 /* If random-wait is not specified, or if we are sleeping
657 between retries of the same download, sleep the fixed
662 /* Sleep a random amount of time averaging in opt.wait
663 seconds. The sleeping amount ranges from 0 to
664 opt.wait*2, inclusive. */
665 int waitsecs = random_number (opt.wait * 2 + 1);
667 DEBUGP (("sleep_between_retrievals: norm=%ld,fuzz=%ld,sleep=%d\n",
668 opt.wait, waitsecs - opt.wait, waitsecs));