2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
52 LARGE_INTEGER internal_time;
54 /* Internal variables used by the timer. */
55 static long internal_secs, internal_msecs;
58 void logflush PARAMS ((void));
61 uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
63 /* Flags for show_progress(). */
64 enum spflags { SP_NONE, SP_INIT, SP_FINISH };
66 static int show_progress PARAMS ((long, long, enum spflags));
68 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
70 /* Reads the contents of file descriptor FD, until it is closed, or a
71 read error occurs. The data is read in 8K chunks, and stored to
72 stream fp, which should have been open for writing. If BUF is
73 non-NULL and its file descriptor is equal to FD, flush RBUF first.
74 This function will *not* use the rbuf_* functions!
76 The EXPECTED argument is passed to show_progress() unchanged, but
79 If opt.verbose is set, the progress is also shown. RESTVAL
80 represents a value from which to start downloading (which will be
81 shown accordingly). If RESTVAL is non-zero, the stream should have
82 been open for appending.
84 The function exits and returns codes of 0, -1 and -2 if the
85 connection was closed, there was a read error, or if it could not
86 write to the output stream, respectively.
88 IMPORTANT: The function flushes the contents of the buffer in
89 rbuf_flush() before actually reading from fd. If you wish to read
90 from fd immediately, flush or discard the buffer. */
92 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
93 struct rbuf *rbuf, int use_expected)
100 show_progress (restval, expected, SP_INIT);
101 if (rbuf && RBUF_FD (rbuf) == fd)
103 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
105 if (fwrite (c, sizeof (char), res, fp) < res)
109 if (show_progress (res, expected, SP_NONE))
115 /* Read from fd while there is available data.
117 Normally, if expected is 0, it means that it is not known how
118 much data is expected. However, if use_expected is specified,
119 then expected being zero means exactly that. */
120 while (!use_expected || (*len < expected))
122 int amount_to_read = (use_expected
123 ? MIN (expected - *len, sizeof (c))
126 if (rbuf->ssl!=NULL) {
127 res = ssl_iread (rbuf->ssl, c, amount_to_read);
129 #endif /* HAVE_SSL */
130 res = iread (fd, c, amount_to_read);
133 #endif /* HAVE_SSL */
136 if (fwrite (c, sizeof (char), res, fp) < res)
140 if (show_progress (res, expected, SP_NONE))
151 show_progress (0, expected, SP_FINISH);
156 print_percentage (long bytes, long expected)
158 int percentage = (int)(100.0 * bytes / expected);
159 logprintf (LOG_VERBOSE, "%3d%%", percentage);
162 /* Show the dotted progress report of file loading. Called with
163 length and a flag to tell it whether to reset or not. It keeps the
164 offset information in static local variables.
166 Return value: 1 or 0, designating whether any dots have been drawn.
168 If the init argument is set, the routine will initialize.
170 If the res is non-zero, res/line_bytes lines are skipped
171 (meaning the appropriate number ok kilobytes), and the number of
172 "dots" fitting on the first line are drawn as ','. */
174 show_progress (long res, long expected, enum spflags flags)
176 static long line_bytes;
177 static long offs, initial_skip;
178 static int ndot, nrow;
179 static long last_timer, time_offset;
182 if (flags == SP_FINISH)
187 char *tmpstr = (char *)alloca (2 * opt.dots_in_line + 1);
189 time_offset = elapsed_time () - last_timer;
190 for (; dot < opt.dots_in_line; dot++)
192 if (!(dot % opt.dot_spacing))
197 logputs (LOG_VERBOSE, tmpstr);
198 print_percentage (nrow * line_bytes + ndot * opt.dot_bytes + offs,
200 logprintf (LOG_VERBOSE, " @%s",
201 rate (ndot * opt.dot_bytes + offs - initial_skip,
204 logputs (LOG_VERBOSE, "\n\n");
208 /* Temporarily disable flushing. */
210 /* init set means initialization. If res is set, it also means that
211 the retrieval is *not* done from the beginning. The part that
212 was already retrieved is not shown again. */
213 if (flags == SP_INIT)
215 /* Generic initialization of static variables. */
218 line_bytes = (long)opt.dots_in_line * opt.dot_bytes;
219 last_timer = elapsed_time ();
224 if (res >= line_bytes)
226 nrow = res / line_bytes;
228 logprintf (LOG_VERBOSE,
229 _("\n [ skipping %dK ]"),
230 (int) ((nrow * line_bytes) / 1024));
234 logprintf (LOG_VERBOSE, "\n%5ldK", nrow * line_bytes / 1024);
236 /* Offset gets incremented by current value. */
238 /* While offset is >= opt.dot_bytes, print dots, taking care to
239 precede every 50th dot with a status message. */
240 for (; offs >= opt.dot_bytes; offs -= opt.dot_bytes)
242 if (!(ndot % opt.dot_spacing))
243 logputs (LOG_VERBOSE, " ");
245 logputs (LOG_VERBOSE, flags == SP_INIT ? "," : ".");
247 if (ndot == opt.dots_in_line)
249 time_offset = elapsed_time () - last_timer;
250 last_timer += time_offset;
256 print_percentage (nrow * line_bytes, expected);
257 logprintf (LOG_VERBOSE, " @%s",
258 rate (line_bytes - initial_skip, time_offset, 1));
261 logprintf (LOG_VERBOSE, "\n%5ldK", nrow * line_bytes / 1024);
264 /* Reenable flushing. */
267 /* Force flush. #### Oh, what a kludge! */
272 /* Reset the internal timer. */
277 /* Under Unix, the preferred way to measure the passage of time is
278 through gettimeofday() because of its granularity. However, on
279 some old or weird systems, gettimeofday() might not be available.
280 There we use the simple time(). */
281 # ifdef HAVE_GETTIMEOFDAY
283 gettimeofday (&t, NULL);
284 internal_secs = t.tv_sec;
285 internal_msecs = t.tv_usec / 1000;
286 # else /* not HAVE_GETTIMEOFDAY */
287 internal_secs = time (NULL);
289 # endif /* not HAVE_GETTIMEOFDAY */
291 /* Under Windows, use Windows-specific APIs. */
295 SystemTimeToFileTime(&st,&ft);
296 internal_time.HighPart = ft.dwHighDateTime;
297 internal_time.LowPart = ft.dwLowDateTime;
301 /* Return the time elapsed from the last call to reset_timer(), in
307 # ifdef HAVE_GETTIMEOFDAY
309 gettimeofday (&t, NULL);
310 return ((t.tv_sec - internal_secs) * 1000
311 + (t.tv_usec / 1000 - internal_msecs));
312 # else /* not HAVE_GETTIMEOFDAY */
313 return 1000 * ((long)time (NULL) - internal_secs);
314 # endif /* not HAVE_GETTIMEOFDAY */
320 SystemTimeToFileTime(&st,&ft);
321 li.HighPart = ft.dwHighDateTime;
322 li.LowPart = ft.dwLowDateTime;
323 return (long) ((li.QuadPart - internal_time.QuadPart) / 1e4);
327 /* Print out the appropriate download rate. Appropriate means that if
328 rate is > 1024 bytes per second, kilobytes are used, and if rate >
329 1024 * 1024 bps, megabytes are used.
331 If PAD is non-zero, strings will be padded to the width of 7
332 characters (xxxx.xx). */
334 rate (long bytes, long msecs, int pad)
341 dlrate = (double)1000 * bytes / msecs;
343 sprintf (res, pad ? "%7.2f B/s" : "%.2f B/s", dlrate);
344 else if (dlrate < 1024.0 * 1024.0)
345 sprintf (res, pad ? "%7.2f KB/s" : "%.2f KB/s", dlrate / 1024.0);
347 sprintf (res, pad ? "%7.2f MB/s" : "%.2f MB/s", dlrate / (1024.0 * 1024.0));
351 #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \
352 && no_proxy_match((u)->host, \
353 (const char **)opt.no_proxy))
355 /* Retrieve the given URL. Decides which loop to call -- HTTP(S), FTP,
356 or simply copy it with file:// (#### the latter not yet
359 retrieve_url (const char *origurl, char **file, char **newloc,
360 const char *refurl, int *dt)
364 int location_changed, dummy;
366 char *mynewloc, *proxy;
368 struct hash_table *redirections = NULL;
370 /* If dt is NULL, just ignore it. */
373 url = xstrdup (origurl);
381 result = parseurl (url, u, 0);
384 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
387 string_set_free (redirections);
394 /* Set the referer. */
396 u->referer = xstrdup (refurl);
400 u->referer = xstrdup (opt.referer);
405 local_use_proxy = USE_PROXY_P (u);
408 struct urlinfo *pu = newurl ();
410 /* Copy the original URL to new location. */
411 memcpy (pu, u, sizeof (*u));
412 pu->proxy = NULL; /* A minor correction :) */
413 /* Initialize u to nil. */
414 memset (u, 0, sizeof (*u));
416 /* Get the appropriate proxy server, appropriate for the
418 proxy = getproxy (pu->proto);
421 logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
424 string_set_free (redirections);
428 /* Parse the proxy URL. */
429 result = parseurl (proxy, u, 0);
430 if (result != URLOK || u->proto != URLHTTP)
432 if (u->proto == URLHTTP)
433 logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
435 logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
438 string_set_free (redirections);
445 assert (u->proto != URLFILE); /* #### Implement me! */
449 if (u->proto == URLHTTP || u->proto == URLHTTPS )
451 if (u->proto == URLHTTP)
452 #endif /* HAVE_SSL */
453 result = http_loop (u, &mynewloc, dt);
454 else if (u->proto == URLFTP)
456 /* If this is a redirection, we must not allow recursive FTP
457 retrieval, so we save recursion to oldrec, and restore it
459 int oldrec = opt.recursive;
462 result = ftp_loop (u, dt);
463 opt.recursive = oldrec;
464 /* There is a possibility of having HTTP being redirected to
465 FTP. In these cases we must decide whether the text is HTML
466 according to the suffix. The HTML suffixes are `.html' and
467 `.htm', case-insensitive.
469 #### All of this is, of course, crap. These types should be
470 determined through mailcap. */
471 if (redirections && u->local && (u->proto == URLFTP ))
473 char *suf = suffix (u->local);
474 if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
479 location_changed = (result == NEWLOCATION);
480 if (location_changed)
482 char *construced_newloc;
483 uerr_t newloc_result;
484 struct urlinfo *newloc_struct;
486 assert (mynewloc != NULL);
488 /* The HTTP specs only allow absolute URLs to appear in
489 redirects, but a ton of boneheaded webservers and CGIs out
490 there break the rules and use relative URLs, and popular
491 browsers are lenient about this, so wget should be too. */
492 construced_newloc = url_concat (url, mynewloc);
494 mynewloc = construced_newloc;
496 /* Now, see if this new location makes sense. */
497 newloc_struct = newurl ();
498 newloc_result = parseurl (mynewloc, newloc_struct, 1);
499 if (newloc_result != URLOK)
501 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
502 freeurl (newloc_struct, 1);
505 string_set_free (redirections);
511 /* Now mynewloc will become newloc_struct->url, because if the
512 Location contained relative paths like .././something, we
513 don't want that propagating as url. */
515 mynewloc = xstrdup (newloc_struct->url);
519 redirections = make_string_hash_table (0);
520 /* Add current URL immediately so we can detect it as soon
521 as possible in case of a cycle. */
522 string_set_add (redirections, u->url);
525 /* The new location is OK. Let's check for redirection cycle by
526 peeking through the history of redirections. */
527 if (string_set_exists (redirections, newloc_struct->url))
529 logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
531 freeurl (newloc_struct, 1);
534 string_set_free (redirections);
539 string_set_add (redirections, newloc_struct->url);
551 *file = xstrdup (u->local);
557 string_set_free (redirections);
567 /* Find the URLs in the file and call retrieve_url() for each of
568 them. If HTML is non-zero, treat the file as HTML, and construct
569 the URLs accordingly.
571 If opt.recursive is set, call recursive_retrieve() for each file. */
573 retrieve_from_file (const char *file, int html, int *count)
576 urlpos *url_list, *cur_url;
578 url_list = (html ? get_urls_html (file, NULL, FALSE, NULL)
579 : get_urls_file (file));
580 status = RETROK; /* Suppose everything is OK. */
581 *count = 0; /* Reset the URL count. */
583 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
585 char *filename, *new_file;
588 if (downloaded_exceeds_quota ())
593 status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
594 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
595 status = recursive_retrieve (filename, new_file ? new_file
598 if (filename && opt.delete_after && file_exists_p (filename))
600 DEBUGP (("Removing file due to --delete-after in"
601 " retrieve_from_file():\n"));
602 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
603 if (unlink (filename))
604 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
608 FREE_MAYBE (new_file);
609 FREE_MAYBE (filename);
612 /* Free the linked list of URL-s. */
613 free_urlpos (url_list);
618 /* Print `giving up', or `retrying', depending on the impending
619 action. N1 and N2 are the attempt number and the attempt limit. */
621 printwhat (int n1, int n2)
623 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
626 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
627 set opt.downloaded_overflow to 1. */
629 downloaded_increase (unsigned long by_how_much)
632 if (opt.downloaded_overflow)
634 old = opt.downloaded;
635 opt.downloaded += by_how_much;
636 if (opt.downloaded < old) /* carry flag, where are you when I
640 opt.downloaded_overflow = 1;
641 opt.downloaded = ~((VERY_LONG_TYPE)0);
645 /* Return non-zero if the downloaded amount of bytes exceeds the
646 desired quota. If quota is not set or if the amount overflowed, 0
649 downloaded_exceeds_quota (void)
653 if (opt.downloaded_overflow)
654 /* We don't really know. (Wildly) assume not. */
657 return opt.downloaded > opt.quota;
660 /* If opt.wait or opt.waitretry are specified, and if certain
661 conditions are met, sleep the appropriate number of seconds. See
662 the documentation of --wait and --waitretry for more information.
664 COUNT is the count of current retrieval, beginning with 1. */
667 sleep_between_retrievals (int count)
669 static int first_retrieval = 1;
671 if (!first_retrieval && (opt.wait || opt.waitretry))
673 if (opt.waitretry && count > 1)
675 /* If opt.waitretry is specified and this is a retry, wait
676 for COUNT-1 number of seconds, or for opt.waitretry
678 if (count <= opt.waitretry)
681 sleep (opt.waitretry);
684 /* Otherwise, check if opt.wait is specified. If so, sleep. */