2 Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
48 LARGE_INTEGER internal_time;
50 /* Internal variables used by the timer. */
51 static long internal_secs, internal_msecs;
54 void logflush PARAMS ((void));
57 uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
59 /* Flags for show_progress(). */
60 enum spflags { SP_NONE, SP_INIT, SP_FINISH };
62 static int show_progress PARAMS ((long, long, enum spflags));
64 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
66 /* Reads the contents of file descriptor FD, until it is closed, or a
67 read error occurs. The data is read in 8K chunks, and stored to
68 stream fp, which should have been open for writing. If BUF is
69 non-NULL and its file descriptor is equal to FD, flush RBUF first.
70 This function will *not* use the rbuf_* functions!
72 The EXPECTED argument is passed to show_progress() unchanged, but
75 If opt.verbose is set, the progress is also shown. RESTVAL
76 represents a value from which to start downloading (which will be
77 shown accordingly). If RESTVAL is non-zero, the stream should have
78 been open for appending.
80 The function exits and returns codes of 0, -1 and -2 if the
81 connection was closed, there was a read error, or if it could not
82 write to the output stream, respectively.
84 IMPORTANT: The function flushes the contents of the buffer in
85 rbuf_flush() before actually reading from fd. If you wish to read
86 from fd immediately, flush or discard the buffer. */
88 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
89 struct rbuf *rbuf, int use_expected)
96 show_progress (restval, expected, SP_INIT);
97 if (rbuf && RBUF_FD (rbuf) == fd)
99 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
101 if (fwrite (c, sizeof (char), res, fp) < res)
105 if (show_progress (res, expected, SP_NONE))
111 /* Read from fd while there is available data.
113 Normally, if expected is 0, it means that it is not known how
114 much data is expected. However, if use_expected is specified,
115 then expected being zero means exactly that. */
116 while (!use_expected || (*len < expected))
118 int amount_to_read = (use_expected
119 ? MIN (expected - *len, sizeof (c))
122 if (rbuf->ssl!=NULL) {
123 res = ssl_iread (rbuf->ssl, c, amount_to_read);
125 #endif /* HAVE_SSL */
126 res = iread (fd, c, amount_to_read);
129 #endif /* HAVE_SSL */
132 if (fwrite (c, sizeof (char), res, fp) < res)
136 if (show_progress (res, expected, SP_NONE))
147 show_progress (0, expected, SP_FINISH);
152 print_percentage (long bytes, long expected)
154 int percentage = (int)(100.0 * bytes / expected);
155 logprintf (LOG_VERBOSE, " [%3d%%]", percentage);
158 /* Show the dotted progress report of file loading. Called with
159 length and a flag to tell it whether to reset or not. It keeps the
160 offset information in static local variables.
162 Return value: 1 or 0, designating whether any dots have been drawn.
164 If the init argument is set, the routine will initialize.
166 If the res is non-zero, res/line_bytes lines are skipped
167 (meaning the appropriate number ok kilobytes), and the number of
168 "dots" fitting on the first line are drawn as ','. */
170 show_progress (long res, long expected, enum spflags flags)
172 static long line_bytes;
174 static int ndot, nrow;
177 if (flags == SP_FINISH)
182 char *tmpstr = (char *)alloca (2 * opt.dots_in_line + 1);
184 for (; dot < opt.dots_in_line; dot++)
186 if (!(dot % opt.dot_spacing))
191 logputs (LOG_VERBOSE, tmpstr);
192 print_percentage (nrow * line_bytes + ndot * opt.dot_bytes + offs,
195 logputs (LOG_VERBOSE, "\n\n");
199 /* Temporarily disable flushing. */
201 /* init set means initialization. If res is set, it also means that
202 the retrieval is *not* done from the beginning. The part that
203 was already retrieved is not shown again. */
204 if (flags == SP_INIT)
206 /* Generic initialization of static variables. */
209 line_bytes = (long)opt.dots_in_line * opt.dot_bytes;
212 if (res >= line_bytes)
214 nrow = res / line_bytes;
216 logprintf (LOG_VERBOSE,
217 _("\n [ skipping %dK ]"),
218 (int) ((nrow * line_bytes) / 1024));
222 logprintf (LOG_VERBOSE, "\n%5ldK ->", nrow * line_bytes / 1024);
224 /* Offset gets incremented by current value. */
226 /* While offset is >= opt.dot_bytes, print dots, taking care to
227 precede every 50th dot with a status message. */
228 for (; offs >= opt.dot_bytes; offs -= opt.dot_bytes)
230 if (!(ndot % opt.dot_spacing))
231 logputs (LOG_VERBOSE, " ");
233 logputs (LOG_VERBOSE, flags == SP_INIT ? "," : ".");
235 if (ndot == opt.dots_in_line)
240 print_percentage (nrow * line_bytes, expected);
241 logprintf (LOG_VERBOSE, "\n%5ldK ->", nrow * line_bytes / 1024);
244 /* Reenable flushing. */
247 /* Force flush. #### Oh, what a kludge! */
252 /* Reset the internal timer. */
257 /* Under Unix, the preferred way to measure the passage of time is
258 through gettimeofday() because of its granularity. However, on
259 some old or weird systems, gettimeofday() might not be available.
260 There we use the simple time(). */
261 # ifdef HAVE_GETTIMEOFDAY
263 gettimeofday (&t, NULL);
264 internal_secs = t.tv_sec;
265 internal_msecs = t.tv_usec / 1000;
266 # else /* not HAVE_GETTIMEOFDAY */
267 internal_secs = time (NULL);
269 # endif /* not HAVE_GETTIMEOFDAY */
271 /* Under Windows, use Windows-specific APIs. */
275 SystemTimeToFileTime(&st,&ft);
276 internal_time.HighPart = ft.dwHighDateTime;
277 internal_time.LowPart = ft.dwLowDateTime;
281 /* Return the time elapsed from the last call to reset_timer(), in
287 # ifdef HAVE_GETTIMEOFDAY
289 gettimeofday (&t, NULL);
290 return ((t.tv_sec - internal_secs) * 1000
291 + (t.tv_usec / 1000 - internal_msecs));
292 # else /* not HAVE_GETTIMEOFDAY */
293 return 1000 * ((long)time (NULL) - internal_secs);
294 # endif /* not HAVE_GETTIMEOFDAY */
300 SystemTimeToFileTime(&st,&ft);
301 li.HighPart = ft.dwHighDateTime;
302 li.LowPart = ft.dwLowDateTime;
303 return (long) ((li.QuadPart - internal_time.QuadPart) / 1e4);
307 /* Print out the appropriate download rate. Appropriate means that if
308 rate is > 1024 bytes per second, kilobytes are used, and if rate >
309 1024 * 1024 bps, megabytes are used. */
311 rate (long bytes, long msecs)
318 dlrate = (double)1000 * bytes / msecs;
319 /* #### Should these strings be translatable? */
321 sprintf (res, "%.2f B/s", dlrate);
322 else if (dlrate < 1024.0 * 1024.0)
323 sprintf (res, "%.2f KB/s", dlrate / 1024.0);
325 sprintf (res, "%.2f MB/s", dlrate / (1024.0 * 1024.0));
329 #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \
330 && no_proxy_match((u)->host, \
331 (const char **)opt.no_proxy))
333 /* Retrieve the given URL. Decides which loop to call -- HTTP(S), FTP,
334 or simply copy it with file:// (#### the latter not yet
337 retrieve_url (const char *origurl, char **file, char **newloc,
338 const char *refurl, int *dt)
342 int location_changed, dummy;
344 char *mynewloc, *proxy;
346 struct hash_table *redirections = NULL;
348 /* If dt is NULL, just ignore it. */
351 url = xstrdup (origurl);
359 result = parseurl (url, u, 0);
362 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
365 string_set_free (redirections);
372 /* Set the referer. */
374 u->referer = xstrdup (refurl);
378 u->referer = xstrdup (opt.referer);
383 local_use_proxy = USE_PROXY_P (u);
386 struct urlinfo *pu = newurl ();
388 /* Copy the original URL to new location. */
389 memcpy (pu, u, sizeof (*u));
390 pu->proxy = NULL; /* A minor correction :) */
391 /* Initialize u to nil. */
392 memset (u, 0, sizeof (*u));
394 /* Get the appropriate proxy server, appropriate for the
396 proxy = getproxy (pu->proto);
399 logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
402 string_set_free (redirections);
406 /* Parse the proxy URL. */
407 result = parseurl (proxy, u, 0);
408 if (result != URLOK || u->proto != URLHTTP)
410 if (u->proto == URLHTTP)
411 logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
413 logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
416 string_set_free (redirections);
423 assert (u->proto != URLFILE); /* #### Implement me! */
427 if (u->proto == URLHTTP || u->proto == URLHTTPS )
429 if (u->proto == URLHTTP)
430 #endif /* HAVE_SSL */
431 result = http_loop (u, &mynewloc, dt);
432 else if (u->proto == URLFTP)
434 /* If this is a redirection, we must not allow recursive FTP
435 retrieval, so we save recursion to oldrec, and restore it
437 int oldrec = opt.recursive;
440 result = ftp_loop (u, dt);
441 opt.recursive = oldrec;
442 /* There is a possibility of having HTTP being redirected to
443 FTP. In these cases we must decide whether the text is HTML
444 according to the suffix. The HTML suffixes are `.html' and
445 `.htm', case-insensitive.
447 #### All of this is, of course, crap. These types should be
448 determined through mailcap. */
449 if (redirections && u->local && (u->proto == URLFTP ))
451 char *suf = suffix (u->local);
452 if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
457 location_changed = (result == NEWLOCATION);
458 if (location_changed)
460 char *construced_newloc;
461 uerr_t newloc_result;
462 struct urlinfo *newloc_struct;
464 assert (mynewloc != NULL);
466 /* The HTTP specs only allow absolute URLs to appear in
467 redirects, but a ton of boneheaded webservers and CGIs out
468 there break the rules and use relative URLs, and popular
469 browsers are lenient about this, so wget should be too. */
470 construced_newloc = url_concat (url, mynewloc);
472 mynewloc = construced_newloc;
474 /* Now, see if this new location makes sense. */
475 newloc_struct = newurl ();
476 newloc_result = parseurl (mynewloc, newloc_struct, 1);
477 if (newloc_result != URLOK)
479 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
480 freeurl (newloc_struct, 1);
483 string_set_free (redirections);
489 /* Now mynewloc will become newloc_struct->url, because if the
490 Location contained relative paths like .././something, we
491 don't want that propagating as url. */
493 mynewloc = xstrdup (newloc_struct->url);
497 redirections = make_string_hash_table (0);
498 /* Add current URL immediately so we can detect it as soon
499 as possible in case of a cycle. */
500 string_set_add (redirections, u->url);
503 /* The new location is OK. Let's check for redirection cycle by
504 peeking through the history of redirections. */
505 if (string_set_exists (redirections, newloc_struct->url))
507 logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
509 freeurl (newloc_struct, 1);
512 string_set_free (redirections);
517 string_set_add (redirections, newloc_struct->url);
529 *file = xstrdup (u->local);
535 string_set_free (redirections);
545 /* Find the URLs in the file and call retrieve_url() for each of
546 them. If HTML is non-zero, treat the file as HTML, and construct
547 the URLs accordingly.
549 If opt.recursive is set, call recursive_retrieve() for each file. */
551 retrieve_from_file (const char *file, int html, int *count)
554 urlpos *url_list, *cur_url;
556 url_list = (html ? get_urls_html (file, NULL, FALSE, NULL)
557 : get_urls_file (file));
558 status = RETROK; /* Suppose everything is OK. */
559 *count = 0; /* Reset the URL count. */
561 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
563 char *filename, *new_file;
566 if (downloaded_exceeds_quota ())
571 status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
572 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
573 status = recursive_retrieve (filename, new_file ? new_file
576 if (filename && opt.delete_after && file_exists_p (filename))
578 DEBUGP (("Removing file due to --delete-after in"
579 " retrieve_from_file():\n"));
580 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
581 if (unlink (filename))
582 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
586 FREE_MAYBE (new_file);
587 FREE_MAYBE (filename);
590 /* Free the linked list of URL-s. */
591 free_urlpos (url_list);
596 /* Print `giving up', or `retrying', depending on the impending
597 action. N1 and N2 are the attempt number and the attempt limit. */
599 printwhat (int n1, int n2)
601 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
604 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
605 set opt.downloaded_overflow to 1. */
607 downloaded_increase (unsigned long by_how_much)
610 if (opt.downloaded_overflow)
612 old = opt.downloaded;
613 opt.downloaded += by_how_much;
614 if (opt.downloaded < old) /* carry flag, where are you when I
618 opt.downloaded_overflow = 1;
619 opt.downloaded = ~((VERY_LONG_TYPE)0);
623 /* Return non-zero if the downloaded amount of bytes exceeds the
624 desired quota. If quota is not set or if the amount overflowed, 0
627 downloaded_exceeds_quota (void)
631 if (opt.downloaded_overflow)
632 /* We don't really know. (Wildly) assume not. */
635 return opt.downloaded > opt.quota;