2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
52 LARGE_INTEGER internal_time;
54 /* Internal variables used by the timer. */
55 static long internal_secs, internal_msecs;
58 void logflush PARAMS ((void));
61 uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
63 /* Flags for show_progress(). */
64 enum spflags { SP_NONE, SP_INIT, SP_FINISH };
66 static int show_progress PARAMS ((long, long, enum spflags));
68 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
70 /* Reads the contents of file descriptor FD, until it is closed, or a
71 read error occurs. The data is read in 8K chunks, and stored to
72 stream fp, which should have been open for writing. If BUF is
73 non-NULL and its file descriptor is equal to FD, flush RBUF first.
74 This function will *not* use the rbuf_* functions!
76 The EXPECTED argument is passed to show_progress() unchanged, but
79 If opt.verbose is set, the progress is also shown. RESTVAL
80 represents a value from which to start downloading (which will be
81 shown accordingly). If RESTVAL is non-zero, the stream should have
82 been open for appending.
84 The function exits and returns codes of 0, -1 and -2 if the
85 connection was closed, there was a read error, or if it could not
86 write to the output stream, respectively.
88 IMPORTANT: The function flushes the contents of the buffer in
89 rbuf_flush() before actually reading from fd. If you wish to read
90 from fd immediately, flush or discard the buffer. */
92 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
93 struct rbuf *rbuf, int use_expected)
100 show_progress (restval, expected, SP_INIT);
101 if (rbuf && RBUF_FD (rbuf) == fd)
103 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
105 if (fwrite (c, sizeof (char), res, fp) < res)
109 if (show_progress (res, expected, SP_NONE))
115 /* Read from fd while there is available data.
117 Normally, if expected is 0, it means that it is not known how
118 much data is expected. However, if use_expected is specified,
119 then expected being zero means exactly that. */
120 while (!use_expected || (*len < expected))
122 int amount_to_read = (use_expected
123 ? MIN (expected - *len, sizeof (c))
126 if (rbuf->ssl!=NULL) {
127 res = ssl_iread (rbuf->ssl, c, amount_to_read);
129 #endif /* HAVE_SSL */
130 res = iread (fd, c, amount_to_read);
133 #endif /* HAVE_SSL */
136 if (fwrite (c, sizeof (char), res, fp) < res)
140 if (show_progress (res, expected, SP_NONE))
151 show_progress (0, expected, SP_FINISH);
156 print_percentage (long bytes, long expected)
158 int percentage = (int)(100.0 * bytes / expected);
159 logprintf (LOG_VERBOSE, "%3d%%", percentage);
162 /* Show the dotted progress report of file loading. Called with
163 length and a flag to tell it whether to reset or not. It keeps the
164 offset information in static local variables.
166 Return value: 1 or 0, designating whether any dots have been drawn.
168 If the init argument is set, the routine will initialize.
170 If the res is non-zero, res/line_bytes lines are skipped
171 (meaning the appropriate number ok kilobytes), and the number of
172 "dots" fitting on the first line are drawn as ','. */
174 show_progress (long res, long expected, enum spflags flags)
176 static long line_bytes;
177 static long offs, initial_skip;
178 static int ndot, nrow;
179 static long last_timer, time_offset;
182 if (flags == SP_FINISH)
187 char *tmpstr = (char *)alloca (2 * opt.dots_in_line + 1);
189 time_offset = elapsed_time () - last_timer;
190 for (; dot < opt.dots_in_line; dot++)
192 if (!(dot % opt.dot_spacing))
197 logputs (LOG_VERBOSE, tmpstr);
198 print_percentage (nrow * line_bytes + ndot * opt.dot_bytes + offs,
200 logprintf (LOG_VERBOSE, " @%s",
201 rate (ndot * opt.dot_bytes
202 + offs - (initial_skip % line_bytes),
205 logputs (LOG_VERBOSE, "\n\n");
209 /* Temporarily disable flushing. */
211 /* init set means initialization. If res is set, it also means that
212 the retrieval is *not* done from the beginning. The part that
213 was already retrieved is not shown again. */
214 if (flags == SP_INIT)
216 /* Generic initialization of static variables. */
219 line_bytes = (long)opt.dots_in_line * opt.dot_bytes;
220 last_timer = elapsed_time ();
225 if (res >= line_bytes)
227 nrow = res / line_bytes;
229 logprintf (LOG_VERBOSE,
230 _("\n [ skipping %dK ]"),
231 (int) ((nrow * line_bytes) / 1024));
235 logprintf (LOG_VERBOSE, "\n%5ldK", nrow * line_bytes / 1024);
237 /* Offset gets incremented by current value. */
239 /* While offset is >= opt.dot_bytes, print dots, taking care to
240 precede every 50th dot with a status message. */
241 for (; offs >= opt.dot_bytes; offs -= opt.dot_bytes)
243 if (!(ndot % opt.dot_spacing))
244 logputs (LOG_VERBOSE, " ");
246 logputs (LOG_VERBOSE, flags == SP_INIT ? "," : ".");
248 if (ndot == opt.dots_in_line)
250 time_offset = elapsed_time () - last_timer;
251 last_timer += time_offset;
257 print_percentage (nrow * line_bytes, expected);
258 logprintf (LOG_VERBOSE, " @%s",
259 rate (line_bytes - (initial_skip % line_bytes),
263 logprintf (LOG_VERBOSE, "\n%5ldK", nrow * line_bytes / 1024);
266 /* Reenable flushing. */
269 /* Force flush. #### Oh, what a kludge! */
274 /* Reset the internal timer. */
279 /* Under Unix, the preferred way to measure the passage of time is
280 through gettimeofday() because of its granularity. However, on
281 some old or weird systems, gettimeofday() might not be available.
282 There we use the simple time(). */
283 # ifdef HAVE_GETTIMEOFDAY
285 gettimeofday (&t, NULL);
286 internal_secs = t.tv_sec;
287 internal_msecs = t.tv_usec / 1000;
288 # else /* not HAVE_GETTIMEOFDAY */
289 internal_secs = time (NULL);
291 # endif /* not HAVE_GETTIMEOFDAY */
293 /* Under Windows, use Windows-specific APIs. */
297 SystemTimeToFileTime(&st,&ft);
298 internal_time.HighPart = ft.dwHighDateTime;
299 internal_time.LowPart = ft.dwLowDateTime;
303 /* Return the time elapsed from the last call to reset_timer(), in
309 # ifdef HAVE_GETTIMEOFDAY
311 gettimeofday (&t, NULL);
312 return ((t.tv_sec - internal_secs) * 1000
313 + (t.tv_usec / 1000 - internal_msecs));
314 # else /* not HAVE_GETTIMEOFDAY */
315 return 1000 * ((long)time (NULL) - internal_secs);
316 # endif /* not HAVE_GETTIMEOFDAY */
322 SystemTimeToFileTime(&st,&ft);
323 li.HighPart = ft.dwHighDateTime;
324 li.LowPart = ft.dwLowDateTime;
325 return (long) ((li.QuadPart - internal_time.QuadPart) / 1e4);
329 /* Print out the appropriate download rate. Appropriate means that if
330 rate is > 1024 bytes per second, kilobytes are used, and if rate >
331 1024 * 1024 bps, megabytes are used.
333 If PAD is non-zero, strings will be padded to the width of 7
334 characters (xxxx.xx). */
336 rate (long bytes, long msecs, int pad)
343 dlrate = (double)1000 * bytes / msecs;
345 sprintf (res, pad ? "%7.2f B/s" : "%.2f B/s", dlrate);
346 else if (dlrate < 1024.0 * 1024.0)
347 sprintf (res, pad ? "%7.2f KB/s" : "%.2f KB/s", dlrate / 1024.0);
349 sprintf (res, pad ? "%7.2f MB/s" : "%.2f MB/s", dlrate / (1024.0 * 1024.0));
353 #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \
354 && no_proxy_match((u)->host, \
355 (const char **)opt.no_proxy))
357 /* Retrieve the given URL. Decides which loop to call -- HTTP(S), FTP,
358 or simply copy it with file:// (#### the latter not yet
361 retrieve_url (const char *origurl, char **file, char **newloc,
362 const char *refurl, int *dt)
366 int location_changed, dummy;
368 char *mynewloc, *proxy;
370 struct hash_table *redirections = NULL;
372 /* If dt is NULL, just ignore it. */
375 url = xstrdup (origurl);
383 result = parseurl (url, u, 0);
386 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
389 string_set_free (redirections);
396 /* Set the referer. */
398 u->referer = xstrdup (refurl);
402 u->referer = xstrdup (opt.referer);
407 local_use_proxy = USE_PROXY_P (u);
410 struct urlinfo *pu = newurl ();
412 /* Copy the original URL to new location. */
413 memcpy (pu, u, sizeof (*u));
414 pu->proxy = NULL; /* A minor correction :) */
415 /* Initialize u to nil. */
416 memset (u, 0, sizeof (*u));
418 /* Get the appropriate proxy server, appropriate for the
420 proxy = getproxy (pu->proto);
423 logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
426 string_set_free (redirections);
430 /* Parse the proxy URL. */
431 result = parseurl (proxy, u, 0);
432 if (result != URLOK || u->proto != URLHTTP)
434 if (u->proto == URLHTTP)
435 logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
437 logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
440 string_set_free (redirections);
447 assert (u->proto != URLFILE); /* #### Implement me! */
451 if (u->proto == URLHTTP || u->proto == URLHTTPS )
453 if (u->proto == URLHTTP)
454 #endif /* HAVE_SSL */
455 result = http_loop (u, &mynewloc, dt);
456 else if (u->proto == URLFTP)
458 /* If this is a redirection, we must not allow recursive FTP
459 retrieval, so we save recursion to oldrec, and restore it
461 int oldrec = opt.recursive;
464 result = ftp_loop (u, dt);
465 opt.recursive = oldrec;
466 /* There is a possibility of having HTTP being redirected to
467 FTP. In these cases we must decide whether the text is HTML
468 according to the suffix. The HTML suffixes are `.html' and
469 `.htm', case-insensitive.
471 #### All of this is, of course, crap. These types should be
472 determined through mailcap. */
473 if (redirections && u->local && (u->proto == URLFTP ))
475 char *suf = suffix (u->local);
476 if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
481 location_changed = (result == NEWLOCATION);
482 if (location_changed)
484 char *construced_newloc;
485 uerr_t newloc_result;
486 struct urlinfo *newloc_struct;
488 assert (mynewloc != NULL);
490 /* The HTTP specs only allow absolute URLs to appear in
491 redirects, but a ton of boneheaded webservers and CGIs out
492 there break the rules and use relative URLs, and popular
493 browsers are lenient about this, so wget should be too. */
494 construced_newloc = url_concat (url, mynewloc);
496 mynewloc = construced_newloc;
498 /* Now, see if this new location makes sense. */
499 newloc_struct = newurl ();
500 newloc_result = parseurl (mynewloc, newloc_struct, 1);
501 if (newloc_result != URLOK)
503 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
504 freeurl (newloc_struct, 1);
507 string_set_free (redirections);
513 /* Now mynewloc will become newloc_struct->url, because if the
514 Location contained relative paths like .././something, we
515 don't want that propagating as url. */
517 mynewloc = xstrdup (newloc_struct->url);
521 redirections = make_string_hash_table (0);
522 /* Add current URL immediately so we can detect it as soon
523 as possible in case of a cycle. */
524 string_set_add (redirections, u->url);
527 /* The new location is OK. Let's check for redirection cycle by
528 peeking through the history of redirections. */
529 if (string_set_exists (redirections, newloc_struct->url))
531 logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
533 freeurl (newloc_struct, 1);
536 string_set_free (redirections);
541 string_set_add (redirections, newloc_struct->url);
553 *file = xstrdup (u->local);
559 string_set_free (redirections);
569 /* Find the URLs in the file and call retrieve_url() for each of
570 them. If HTML is non-zero, treat the file as HTML, and construct
571 the URLs accordingly.
573 If opt.recursive is set, call recursive_retrieve() for each file. */
575 retrieve_from_file (const char *file, int html, int *count)
578 urlpos *url_list, *cur_url;
580 url_list = (html ? get_urls_html (file, NULL, FALSE, NULL)
581 : get_urls_file (file));
582 status = RETROK; /* Suppose everything is OK. */
583 *count = 0; /* Reset the URL count. */
585 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
587 char *filename, *new_file;
590 if (downloaded_exceeds_quota ())
595 status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
596 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
597 status = recursive_retrieve (filename, new_file ? new_file
600 if (filename && opt.delete_after && file_exists_p (filename))
602 DEBUGP (("Removing file due to --delete-after in"
603 " retrieve_from_file():\n"));
604 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
605 if (unlink (filename))
606 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
610 FREE_MAYBE (new_file);
611 FREE_MAYBE (filename);
614 /* Free the linked list of URL-s. */
615 free_urlpos (url_list);
620 /* Print `giving up', or `retrying', depending on the impending
621 action. N1 and N2 are the attempt number and the attempt limit. */
623 printwhat (int n1, int n2)
625 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
628 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
629 set opt.downloaded_overflow to 1. */
631 downloaded_increase (unsigned long by_how_much)
634 if (opt.downloaded_overflow)
636 old = opt.downloaded;
637 opt.downloaded += by_how_much;
638 if (opt.downloaded < old) /* carry flag, where are you when I
642 opt.downloaded_overflow = 1;
643 opt.downloaded = ~((VERY_LONG_TYPE)0);
647 /* Return non-zero if the downloaded amount of bytes exceeds the
648 desired quota. If quota is not set or if the amount overflowed, 0
651 downloaded_exceeds_quota (void)
655 if (opt.downloaded_overflow)
656 /* We don't really know. (Wildly) assume not. */
659 return opt.downloaded > opt.quota;
662 /* If opt.wait or opt.waitretry are specified, and if certain
663 conditions are met, sleep the appropriate number of seconds. See
664 the documentation of --wait and --waitretry for more information.
666 COUNT is the count of current retrieval, beginning with 1. */
669 sleep_between_retrievals (int count)
671 static int first_retrieval = 1;
673 if (!first_retrieval && (opt.wait || opt.waitretry))
675 if (opt.waitretry && count > 1)
677 /* If opt.waitretry is specified and this is a retry, wait
678 for COUNT-1 number of seconds, or for opt.waitretry
680 if (count <= opt.waitretry)
683 sleep (opt.waitretry);
686 /* Otherwise, check if opt.wait is specified. If so, sleep. */