2 Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
52 LARGE_INTEGER internal_time;
54 /* Internal variables used by the timer. */
55 static long internal_secs, internal_msecs;
58 void logflush PARAMS ((void));
61 uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
63 /* Flags for show_progress(). */
64 enum spflags { SP_NONE, SP_INIT, SP_FINISH };
66 static int show_progress PARAMS ((long, long, enum spflags));
68 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
70 /* Reads the contents of file descriptor FD, until it is closed, or a
71 read error occurs. The data is read in 8K chunks, and stored to
72 stream fp, which should have been open for writing. If BUF is
73 non-NULL and its file descriptor is equal to FD, flush RBUF first.
74 This function will *not* use the rbuf_* functions!
76 The EXPECTED argument is passed to show_progress() unchanged, but
79 If opt.verbose is set, the progress is also shown. RESTVAL
80 represents a value from which to start downloading (which will be
81 shown accordingly). If RESTVAL is non-zero, the stream should have
82 been open for appending.
84 The function exits and returns codes of 0, -1 and -2 if the
85 connection was closed, there was a read error, or if it could not
86 write to the output stream, respectively.
88 IMPORTANT: The function flushes the contents of the buffer in
89 rbuf_flush() before actually reading from fd. If you wish to read
90 from fd immediately, flush or discard the buffer. */
92 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
93 struct rbuf *rbuf, int use_expected)
100 show_progress (restval, expected, SP_INIT);
101 if (rbuf && RBUF_FD (rbuf) == fd)
103 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
105 if (fwrite (c, sizeof (char), res, fp) < res)
109 if (show_progress (res, expected, SP_NONE))
115 /* Read from fd while there is available data.
117 Normally, if expected is 0, it means that it is not known how
118 much data is expected. However, if use_expected is specified,
119 then expected being zero means exactly that. */
120 while (!use_expected || (*len < expected))
122 int amount_to_read = (use_expected
123 ? MIN (expected - *len, sizeof (c))
126 if (rbuf->ssl!=NULL) {
127 res = ssl_iread (rbuf->ssl, c, amount_to_read);
129 #endif /* HAVE_SSL */
130 res = iread (fd, c, amount_to_read);
133 #endif /* HAVE_SSL */
136 if (fwrite (c, sizeof (char), res, fp) < res)
140 if (show_progress (res, expected, SP_NONE))
151 show_progress (0, expected, SP_FINISH);
156 print_percentage (long bytes, long expected)
158 int percentage = (int)(100.0 * bytes / expected);
159 logprintf (LOG_VERBOSE, " [%3d%%]", percentage);
162 /* Show the dotted progress report of file loading. Called with
163 length and a flag to tell it whether to reset or not. It keeps the
164 offset information in static local variables.
166 Return value: 1 or 0, designating whether any dots have been drawn.
168 If the init argument is set, the routine will initialize.
170 If the res is non-zero, res/line_bytes lines are skipped
171 (meaning the appropriate number ok kilobytes), and the number of
172 "dots" fitting on the first line are drawn as ','. */
174 show_progress (long res, long expected, enum spflags flags)
176 static long line_bytes;
178 static int ndot, nrow;
181 if (flags == SP_FINISH)
186 char *tmpstr = (char *)alloca (2 * opt.dots_in_line + 1);
188 for (; dot < opt.dots_in_line; dot++)
190 if (!(dot % opt.dot_spacing))
195 logputs (LOG_VERBOSE, tmpstr);
196 print_percentage (nrow * line_bytes + ndot * opt.dot_bytes + offs,
199 logputs (LOG_VERBOSE, "\n\n");
203 /* Temporarily disable flushing. */
205 /* init set means initialization. If res is set, it also means that
206 the retrieval is *not* done from the beginning. The part that
207 was already retrieved is not shown again. */
208 if (flags == SP_INIT)
210 /* Generic initialization of static variables. */
213 line_bytes = (long)opt.dots_in_line * opt.dot_bytes;
216 if (res >= line_bytes)
218 nrow = res / line_bytes;
220 logprintf (LOG_VERBOSE,
221 _("\n [ skipping %dK ]"),
222 (int) ((nrow * line_bytes) / 1024));
226 logprintf (LOG_VERBOSE, "\n%5ldK ->", nrow * line_bytes / 1024);
228 /* Offset gets incremented by current value. */
230 /* While offset is >= opt.dot_bytes, print dots, taking care to
231 precede every 50th dot with a status message. */
232 for (; offs >= opt.dot_bytes; offs -= opt.dot_bytes)
234 if (!(ndot % opt.dot_spacing))
235 logputs (LOG_VERBOSE, " ");
237 logputs (LOG_VERBOSE, flags == SP_INIT ? "," : ".");
239 if (ndot == opt.dots_in_line)
244 print_percentage (nrow * line_bytes, expected);
245 logprintf (LOG_VERBOSE, "\n%5ldK ->", nrow * line_bytes / 1024);
248 /* Reenable flushing. */
251 /* Force flush. #### Oh, what a kludge! */
256 /* Reset the internal timer. */
261 /* Under Unix, the preferred way to measure the passage of time is
262 through gettimeofday() because of its granularity. However, on
263 some old or weird systems, gettimeofday() might not be available.
264 There we use the simple time(). */
265 # ifdef HAVE_GETTIMEOFDAY
267 gettimeofday (&t, NULL);
268 internal_secs = t.tv_sec;
269 internal_msecs = t.tv_usec / 1000;
270 # else /* not HAVE_GETTIMEOFDAY */
271 internal_secs = time (NULL);
273 # endif /* not HAVE_GETTIMEOFDAY */
275 /* Under Windows, use Windows-specific APIs. */
279 SystemTimeToFileTime(&st,&ft);
280 internal_time.HighPart = ft.dwHighDateTime;
281 internal_time.LowPart = ft.dwLowDateTime;
285 /* Return the time elapsed from the last call to reset_timer(), in
291 # ifdef HAVE_GETTIMEOFDAY
293 gettimeofday (&t, NULL);
294 return ((t.tv_sec - internal_secs) * 1000
295 + (t.tv_usec / 1000 - internal_msecs));
296 # else /* not HAVE_GETTIMEOFDAY */
297 return 1000 * ((long)time (NULL) - internal_secs);
298 # endif /* not HAVE_GETTIMEOFDAY */
304 SystemTimeToFileTime(&st,&ft);
305 li.HighPart = ft.dwHighDateTime;
306 li.LowPart = ft.dwLowDateTime;
307 return (long) ((li.QuadPart - internal_time.QuadPart) / 1e4);
311 /* Print out the appropriate download rate. Appropriate means that if
312 rate is > 1024 bytes per second, kilobytes are used, and if rate >
313 1024 * 1024 bps, megabytes are used. */
315 rate (long bytes, long msecs)
322 dlrate = (double)1000 * bytes / msecs;
323 /* #### Should these strings be translatable? */
325 sprintf (res, "%.2f B/s", dlrate);
326 else if (dlrate < 1024.0 * 1024.0)
327 sprintf (res, "%.2f KB/s", dlrate / 1024.0);
329 sprintf (res, "%.2f MB/s", dlrate / (1024.0 * 1024.0));
333 #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \
334 && no_proxy_match((u)->host, \
335 (const char **)opt.no_proxy))
337 /* Retrieve the given URL. Decides which loop to call -- HTTP(S), FTP,
338 or simply copy it with file:// (#### the latter not yet
341 retrieve_url (const char *origurl, char **file, char **newloc,
342 const char *refurl, int *dt)
346 int location_changed, dummy;
348 char *mynewloc, *proxy;
350 struct hash_table *redirections = NULL;
352 /* If dt is NULL, just ignore it. */
355 url = xstrdup (origurl);
363 result = parseurl (url, u, 0);
366 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
369 string_set_free (redirections);
376 /* Set the referer. */
378 u->referer = xstrdup (refurl);
382 u->referer = xstrdup (opt.referer);
387 local_use_proxy = USE_PROXY_P (u);
390 struct urlinfo *pu = newurl ();
392 /* Copy the original URL to new location. */
393 memcpy (pu, u, sizeof (*u));
394 pu->proxy = NULL; /* A minor correction :) */
395 /* Initialize u to nil. */
396 memset (u, 0, sizeof (*u));
398 /* Get the appropriate proxy server, appropriate for the
400 proxy = getproxy (pu->proto);
403 logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
406 string_set_free (redirections);
410 /* Parse the proxy URL. */
411 result = parseurl (proxy, u, 0);
412 if (result != URLOK || u->proto != URLHTTP)
414 if (u->proto == URLHTTP)
415 logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
417 logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
420 string_set_free (redirections);
427 assert (u->proto != URLFILE); /* #### Implement me! */
431 if (u->proto == URLHTTP || u->proto == URLHTTPS )
433 if (u->proto == URLHTTP)
434 #endif /* HAVE_SSL */
435 result = http_loop (u, &mynewloc, dt);
436 else if (u->proto == URLFTP)
438 /* If this is a redirection, we must not allow recursive FTP
439 retrieval, so we save recursion to oldrec, and restore it
441 int oldrec = opt.recursive;
444 result = ftp_loop (u, dt);
445 opt.recursive = oldrec;
446 /* There is a possibility of having HTTP being redirected to
447 FTP. In these cases we must decide whether the text is HTML
448 according to the suffix. The HTML suffixes are `.html' and
449 `.htm', case-insensitive.
451 #### All of this is, of course, crap. These types should be
452 determined through mailcap. */
453 if (redirections && u->local && (u->proto == URLFTP ))
455 char *suf = suffix (u->local);
456 if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
461 location_changed = (result == NEWLOCATION);
462 if (location_changed)
464 char *construced_newloc;
465 uerr_t newloc_result;
466 struct urlinfo *newloc_struct;
468 assert (mynewloc != NULL);
470 /* The HTTP specs only allow absolute URLs to appear in
471 redirects, but a ton of boneheaded webservers and CGIs out
472 there break the rules and use relative URLs, and popular
473 browsers are lenient about this, so wget should be too. */
474 construced_newloc = url_concat (url, mynewloc);
476 mynewloc = construced_newloc;
478 /* Now, see if this new location makes sense. */
479 newloc_struct = newurl ();
480 newloc_result = parseurl (mynewloc, newloc_struct, 1);
481 if (newloc_result != URLOK)
483 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
484 freeurl (newloc_struct, 1);
487 string_set_free (redirections);
493 /* Now mynewloc will become newloc_struct->url, because if the
494 Location contained relative paths like .././something, we
495 don't want that propagating as url. */
497 mynewloc = xstrdup (newloc_struct->url);
501 redirections = make_string_hash_table (0);
502 /* Add current URL immediately so we can detect it as soon
503 as possible in case of a cycle. */
504 string_set_add (redirections, u->url);
507 /* The new location is OK. Let's check for redirection cycle by
508 peeking through the history of redirections. */
509 if (string_set_exists (redirections, newloc_struct->url))
511 logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
513 freeurl (newloc_struct, 1);
516 string_set_free (redirections);
521 string_set_add (redirections, newloc_struct->url);
533 *file = xstrdup (u->local);
539 string_set_free (redirections);
549 /* Find the URLs in the file and call retrieve_url() for each of
550 them. If HTML is non-zero, treat the file as HTML, and construct
551 the URLs accordingly.
553 If opt.recursive is set, call recursive_retrieve() for each file. */
555 retrieve_from_file (const char *file, int html, int *count)
558 urlpos *url_list, *cur_url;
560 url_list = (html ? get_urls_html (file, NULL, FALSE, NULL)
561 : get_urls_file (file));
562 status = RETROK; /* Suppose everything is OK. */
563 *count = 0; /* Reset the URL count. */
565 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
567 char *filename, *new_file;
570 if (downloaded_exceeds_quota ())
575 status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
576 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
577 status = recursive_retrieve (filename, new_file ? new_file
580 if (filename && opt.delete_after && file_exists_p (filename))
582 DEBUGP (("Removing file due to --delete-after in"
583 " retrieve_from_file():\n"));
584 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
585 if (unlink (filename))
586 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
590 FREE_MAYBE (new_file);
591 FREE_MAYBE (filename);
594 /* Free the linked list of URL-s. */
595 free_urlpos (url_list);
600 /* Print `giving up', or `retrying', depending on the impending
601 action. N1 and N2 are the attempt number and the attempt limit. */
603 printwhat (int n1, int n2)
605 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
608 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
609 set opt.downloaded_overflow to 1. */
611 downloaded_increase (unsigned long by_how_much)
614 if (opt.downloaded_overflow)
616 old = opt.downloaded;
617 opt.downloaded += by_how_much;
618 if (opt.downloaded < old) /* carry flag, where are you when I
622 opt.downloaded_overflow = 1;
623 opt.downloaded = ~((VERY_LONG_TYPE)0);
627 /* Return non-zero if the downloaded amount of bytes exceeds the
628 desired quota. If quota is not set or if the amount overflowed, 0
631 downloaded_exceeds_quota (void)
635 if (opt.downloaded_overflow)
636 /* We don't really know. (Wildly) assume not. */
639 return opt.downloaded > opt.quota;
642 /* If opt.wait or opt.waitretry are specified, and if certain
643 conditions are met, sleep the appropriate number of seconds. See
644 the documentation of --wait and --waitretry for more information.
646 COUNT is the count of current retrieval, beginning with 1. */
649 sleep_between_retrievals (int count)
651 static int first_retrieval = 1;
653 if (!first_retrieval && (opt.wait || opt.waitretry))
655 if (opt.waitretry && count > 1)
657 /* If opt.waitretry is specified and this is a retry, wait
658 for COUNT-1 number of seconds, or for opt.waitretry
660 if (count <= opt.waitretry)
663 sleep (opt.waitretry);
666 /* Otherwise, check if opt.wait is specified. If so, sleep. */