2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
51 LARGE_INTEGER internal_time;
53 /* Internal variables used by the timer. */
54 static long internal_secs, internal_msecs;
57 /* See the comment in gethttp() why this is needed. */
58 int global_download_count;
60 void logflush PARAMS ((void));
63 uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
65 /* Flags for show_progress(). */
66 enum spflags { SP_NONE, SP_INIT, SP_FINISH };
68 static int show_progress PARAMS ((long, long, enum spflags));
70 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
72 /* Reads the contents of file descriptor FD, until it is closed, or a
73 read error occurs. The data is read in 8K chunks, and stored to
74 stream fp, which should have been open for writing. If BUF is
75 non-NULL and its file descriptor is equal to FD, flush RBUF first.
76 This function will *not* use the rbuf_* functions!
78 The EXPECTED argument is passed to show_progress() unchanged, but
81 If opt.verbose is set, the progress is also shown. RESTVAL
82 represents a value from which to start downloading (which will be
83 shown accordingly). If RESTVAL is non-zero, the stream should have
84 been open for appending.
86 The function exits and returns codes of 0, -1 and -2 if the
87 connection was closed, there was a read error, or if it could not
88 write to the output stream, respectively.
90 IMPORTANT: The function flushes the contents of the buffer in
91 rbuf_flush() before actually reading from fd. If you wish to read
92 from fd immediately, flush or discard the buffer. */
94 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
95 struct rbuf *rbuf, int use_expected)
102 show_progress (restval, expected, SP_INIT);
103 if (rbuf && RBUF_FD (rbuf) == fd)
105 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
107 if (fwrite (c, sizeof (char), res, fp) < res)
111 if (show_progress (res, expected, SP_NONE))
117 /* Read from fd while there is available data.
119 Normally, if expected is 0, it means that it is not known how
120 much data is expected. However, if use_expected is specified,
121 then expected being zero means exactly that. */
122 while (!use_expected || (*len < expected))
124 int amount_to_read = (use_expected
125 ? MIN (expected - *len, sizeof (c))
128 if (rbuf->ssl!=NULL) {
129 res = ssl_iread (rbuf->ssl, c, amount_to_read);
131 #endif /* HAVE_SSL */
132 res = iread (fd, c, amount_to_read);
135 #endif /* HAVE_SSL */
138 if (fwrite (c, sizeof (char), res, fp) < res)
142 if (show_progress (res, expected, SP_NONE))
153 show_progress (0, expected, SP_FINISH);
158 print_percentage (long bytes, long expected)
160 int percentage = (int)(100.0 * bytes / expected);
161 logprintf (LOG_VERBOSE, "%3d%%", percentage);
164 /* Show the dotted progress report of file loading. Called with
165 length and a flag to tell it whether to reset or not. It keeps the
166 offset information in static local variables.
168 Return value: 1 or 0, designating whether any dots have been drawn.
170 If the init argument is set, the routine will initialize.
172 If the res is non-zero, res/line_bytes lines are skipped
173 (meaning the appropriate number ok kilobytes), and the number of
174 "dots" fitting on the first line are drawn as ','. */
176 show_progress (long res, long expected, enum spflags flags)
178 static long line_bytes;
179 static long offs, initial_skip;
180 static int ndot, nrow;
181 static long last_timer, time_offset;
184 if (flags == SP_FINISH)
187 char *tmpstr = (char *)alloca (2 * opt.dots_in_line + 1);
189 time_offset = elapsed_time () - last_timer;
190 for (; dot < opt.dots_in_line; dot++)
192 if (!(dot % opt.dot_spacing))
197 logputs (LOG_VERBOSE, tmpstr);
199 print_percentage (nrow * line_bytes + ndot * opt.dot_bytes + offs,
201 logprintf (LOG_VERBOSE, " @%s",
202 rate (ndot * opt.dot_bytes
203 + offs - (initial_skip % line_bytes),
205 logputs (LOG_VERBOSE, "\n\n");
209 /* Temporarily disable flushing. */
211 /* init set means initialization. If res is set, it also means that
212 the retrieval is *not* done from the beginning. The part that
213 was already retrieved is not shown again. */
214 if (flags == SP_INIT)
216 /* Generic initialization of static variables. */
219 line_bytes = (long)opt.dots_in_line * opt.dot_bytes;
220 last_timer = elapsed_time ();
225 if (res >= line_bytes)
227 nrow = res / line_bytes;
229 logprintf (LOG_VERBOSE,
230 _("\n [ skipping %dK ]"),
231 (int) ((nrow * line_bytes) / 1024));
235 logprintf (LOG_VERBOSE, "\n%5ldK", nrow * line_bytes / 1024);
237 /* Offset gets incremented by current value. */
239 /* While offset is >= opt.dot_bytes, print dots, taking care to
240 precede every 50th dot with a status message. */
241 for (; offs >= opt.dot_bytes; offs -= opt.dot_bytes)
243 if (!(ndot % opt.dot_spacing))
244 logputs (LOG_VERBOSE, " ");
246 logputs (LOG_VERBOSE, flags == SP_INIT ? "," : ".");
248 if (ndot == opt.dots_in_line)
250 time_offset = elapsed_time () - last_timer;
251 last_timer += time_offset;
256 print_percentage (nrow * line_bytes, expected);
257 logprintf (LOG_VERBOSE, " @%s",
258 rate (line_bytes - (initial_skip % line_bytes),
261 logprintf (LOG_VERBOSE, "\n%5ldK", nrow * line_bytes / 1024);
264 /* Reenable flushing. */
267 /* Force flush. #### Oh, what a kludge! */
272 /* Reset the internal timer. */
277 /* Under Unix, the preferred way to measure the passage of time is
278 through gettimeofday() because of its granularity. However, on
279 some old or weird systems, gettimeofday() might not be available.
280 There we use the simple time(). */
281 # ifdef HAVE_GETTIMEOFDAY
283 gettimeofday (&t, NULL);
284 internal_secs = t.tv_sec;
285 internal_msecs = t.tv_usec / 1000;
286 # else /* not HAVE_GETTIMEOFDAY */
287 internal_secs = time (NULL);
289 # endif /* not HAVE_GETTIMEOFDAY */
291 /* Under Windows, use Windows-specific APIs. */
295 SystemTimeToFileTime(&st,&ft);
296 internal_time.HighPart = ft.dwHighDateTime;
297 internal_time.LowPart = ft.dwLowDateTime;
301 /* Return the time elapsed from the last call to reset_timer(), in
307 # ifdef HAVE_GETTIMEOFDAY
309 gettimeofday (&t, NULL);
310 return ((t.tv_sec - internal_secs) * 1000
311 + (t.tv_usec / 1000 - internal_msecs));
312 # else /* not HAVE_GETTIMEOFDAY */
313 return 1000 * ((long)time (NULL) - internal_secs);
314 # endif /* not HAVE_GETTIMEOFDAY */
320 SystemTimeToFileTime(&st,&ft);
321 li.HighPart = ft.dwHighDateTime;
322 li.LowPart = ft.dwLowDateTime;
323 return (long) ((li.QuadPart - internal_time.QuadPart) / 1e4);
327 /* Print out the appropriate download rate. Appropriate means that if
328 rate is > 1024 bytes per second, kilobytes are used, and if rate >
329 1024 * 1024 bps, megabytes are used.
331 If PAD is non-zero, strings will be padded to the width of 7
332 characters (xxxx.xx). */
334 rate (long bytes, long msecs, int pad)
341 dlrate = (double)1000 * bytes / msecs;
343 sprintf (res, pad ? "%7.2f B/s" : "%.2f B/s", dlrate);
344 else if (dlrate < 1024.0 * 1024.0)
345 sprintf (res, pad ? "%7.2f KB/s" : "%.2f KB/s", dlrate / 1024.0);
347 sprintf (res, pad ? "%7.2f MB/s" : "%.2f MB/s", dlrate / (1024.0 * 1024.0));
351 #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \
352 && no_proxy_match((u)->host, \
353 (const char **)opt.no_proxy))
355 /* Retrieve the given URL. Decides which loop to call -- HTTP(S), FTP,
356 or simply copy it with file:// (#### the latter not yet
359 retrieve_url (const char *origurl, char **file, char **newloc,
360 const char *refurl, int *dt)
364 int location_changed, dummy;
366 char *mynewloc, *proxy;
368 struct hash_table *redirections = NULL;
370 /* If dt is NULL, just ignore it. */
373 url = xstrdup (origurl);
381 result = parseurl (url, u, 0);
384 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
387 string_set_free (redirections);
394 /* Set the referer. */
396 u->referer = xstrdup (refurl);
400 u->referer = xstrdup (opt.referer);
405 local_use_proxy = USE_PROXY_P (u);
408 struct urlinfo *pu = newurl ();
410 /* Copy the original URL to new location. */
411 memcpy (pu, u, sizeof (*u));
412 pu->proxy = NULL; /* A minor correction :) */
413 /* Initialize u to nil. */
414 memset (u, 0, sizeof (*u));
416 /* Get the appropriate proxy server, appropriate for the
418 proxy = getproxy (pu->proto);
421 logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
424 string_set_free (redirections);
428 /* Parse the proxy URL. */
429 result = parseurl (proxy, u, 0);
430 if (result != URLOK || u->proto != URLHTTP)
432 if (u->proto == URLHTTP)
433 logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
435 logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
438 string_set_free (redirections);
445 assert (u->proto != URLFILE); /* #### Implement me! */
448 if (u->proto == URLHTTP
450 || u->proto == URLHTTPS
453 result = http_loop (u, &mynewloc, dt);
454 else if (u->proto == URLFTP)
456 /* If this is a redirection, we must not allow recursive FTP
457 retrieval, so we save recursion to oldrec, and restore it
459 int oldrec = opt.recursive;
462 result = ftp_loop (u, dt);
463 opt.recursive = oldrec;
464 /* There is a possibility of having HTTP being redirected to
465 FTP. In these cases we must decide whether the text is HTML
466 according to the suffix. The HTML suffixes are `.html' and
467 `.htm', case-insensitive.
469 #### All of this is, of course, crap. These types should be
470 determined through mailcap. */
471 if (redirections && u->local && (u->proto == URLFTP ))
473 char *suf = suffix (u->local);
474 if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
479 location_changed = (result == NEWLOCATION);
480 if (location_changed)
482 char *construced_newloc;
483 uerr_t newloc_result;
484 struct urlinfo *newloc_struct;
486 assert (mynewloc != NULL);
488 /* The HTTP specs only allow absolute URLs to appear in
489 redirects, but a ton of boneheaded webservers and CGIs out
490 there break the rules and use relative URLs, and popular
491 browsers are lenient about this, so wget should be too. */
492 construced_newloc = uri_merge (url, mynewloc);
494 mynewloc = construced_newloc;
496 /* Now, see if this new location makes sense. */
497 newloc_struct = newurl ();
498 newloc_result = parseurl (mynewloc, newloc_struct, 1);
499 if (newloc_result != URLOK)
501 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
502 freeurl (newloc_struct, 1);
505 string_set_free (redirections);
511 /* Now mynewloc will become newloc_struct->url, because if the
512 Location contained relative paths like .././something, we
513 don't want that propagating as url. */
515 mynewloc = xstrdup (newloc_struct->url);
519 redirections = make_string_hash_table (0);
520 /* Add current URL immediately so we can detect it as soon
521 as possible in case of a cycle. */
522 string_set_add (redirections, u->url);
525 /* The new location is OK. Let's check for redirection cycle by
526 peeking through the history of redirections. */
527 if (string_set_exists (redirections, newloc_struct->url))
529 logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
531 freeurl (newloc_struct, 1);
534 string_set_free (redirections);
539 string_set_add (redirections, newloc_struct->url);
552 register_download (url, u->local);
554 register_html (url, u->local);
561 *file = xstrdup (u->local);
567 string_set_free (redirections);
574 ++global_download_count;
579 /* Find the URLs in the file and call retrieve_url() for each of
580 them. If HTML is non-zero, treat the file as HTML, and construct
581 the URLs accordingly.
583 If opt.recursive is set, call recursive_retrieve() for each file. */
585 retrieve_from_file (const char *file, int html, int *count)
588 urlpos *url_list, *cur_url;
590 url_list = (html ? get_urls_html (file, NULL, FALSE, NULL)
591 : get_urls_file (file));
592 status = RETROK; /* Suppose everything is OK. */
593 *count = 0; /* Reset the URL count. */
595 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
597 char *filename, *new_file;
600 if (downloaded_exceeds_quota ())
605 status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
606 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
607 status = recursive_retrieve (filename, new_file ? new_file
610 if (filename && opt.delete_after && file_exists_p (filename))
612 DEBUGP (("Removing file due to --delete-after in"
613 " retrieve_from_file():\n"));
614 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
615 if (unlink (filename))
616 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
620 FREE_MAYBE (new_file);
621 FREE_MAYBE (filename);
624 /* Free the linked list of URL-s. */
625 free_urlpos (url_list);
630 /* Print `giving up', or `retrying', depending on the impending
631 action. N1 and N2 are the attempt number and the attempt limit. */
633 printwhat (int n1, int n2)
635 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
638 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
639 set opt.downloaded_overflow to 1. */
641 downloaded_increase (unsigned long by_how_much)
644 if (opt.downloaded_overflow)
646 old = opt.downloaded;
647 opt.downloaded += by_how_much;
648 if (opt.downloaded < old) /* carry flag, where are you when I
652 opt.downloaded_overflow = 1;
653 opt.downloaded = ~((VERY_LONG_TYPE)0);
657 /* Return non-zero if the downloaded amount of bytes exceeds the
658 desired quota. If quota is not set or if the amount overflowed, 0
661 downloaded_exceeds_quota (void)
665 if (opt.downloaded_overflow)
666 /* We don't really know. (Wildly) assume not. */
669 return opt.downloaded > opt.quota;
672 /* If opt.wait or opt.waitretry are specified, and if certain
673 conditions are met, sleep the appropriate number of seconds. See
674 the documentation of --wait and --waitretry for more information.
676 COUNT is the count of current retrieval, beginning with 1. */
679 sleep_between_retrievals (int count)
681 static int first_retrieval = 1;
683 if (!first_retrieval && (opt.wait || opt.waitretry))
685 if (opt.waitretry && count > 1)
687 /* If opt.waitretry is specified and this is a retry, wait
688 for COUNT-1 number of seconds, or for opt.waitretry
690 if (count <= opt.waitretry)
693 sleep (opt.waitretry);
696 /* Otherwise, check if opt.wait is specified. If so, sleep. */