2 Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
48 LARGE_INTEGER internal_time;
50 /* Internal variables used by the timer. */
51 static long internal_secs, internal_msecs;
54 void logflush PARAMS ((void));
57 uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
59 /* Flags for show_progress(). */
60 enum spflags { SP_NONE, SP_INIT, SP_FINISH };
62 static int show_progress PARAMS ((long, long, enum spflags));
64 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
66 /* Reads the contents of file descriptor FD, until it is closed, or a
67 read error occurs. The data is read in 8K chunks, and stored to
68 stream fp, which should have been open for writing. If BUF is
69 non-NULL and its file descriptor is equal to FD, flush RBUF first.
70 This function will *not* use the rbuf_* functions!
72 The EXPECTED argument is passed to show_progress() unchanged, but
75 If opt.verbose is set, the progress is also shown. RESTVAL
76 represents a value from which to start downloading (which will be
77 shown accordingly). If RESTVAL is non-zero, the stream should have
78 been open for appending.
80 The function exits and returns codes of 0, -1 and -2 if the
81 connection was closed, there was a read error, or if it could not
82 write to the output stream, respectively.
84 IMPORTANT: The function flushes the contents of the buffer in
85 rbuf_flush() before actually reading from fd. If you wish to read
86 from fd immediately, flush or discard the buffer. */
88 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
89 struct rbuf *rbuf, int use_expected)
96 show_progress (restval, expected, SP_INIT);
97 if (rbuf && RBUF_FD (rbuf) == fd)
99 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
101 if (fwrite (c, sizeof (char), res, fp) < res)
105 if (show_progress (res, expected, SP_NONE))
111 /* Read from fd while there is available data.
113 Normally, if expected is 0, it means that it is not known how
114 much data is expected. However, if use_expected is specified,
115 then expected being zero means exactly that. */
116 while (!use_expected || (*len < expected))
118 int amount_to_read = (use_expected
119 ? MIN (expected - *len, sizeof (c))
121 res = iread (fd, c, amount_to_read);
124 if (fwrite (c, sizeof (char), res, fp) < res)
128 if (show_progress (res, expected, SP_NONE))
139 show_progress (0, expected, SP_FINISH);
144 print_percentage (long bytes, long expected)
146 int percentage = (int)(100.0 * bytes / expected);
147 logprintf (LOG_VERBOSE, " [%3d%%]", percentage);
150 /* Show the dotted progress report of file loading. Called with
151 length and a flag to tell it whether to reset or not. It keeps the
152 offset information in static local variables.
154 Return value: 1 or 0, designating whether any dots have been drawn.
156 If the init argument is set, the routine will initialize.
158 If the res is non-zero, res/line_bytes lines are skipped
159 (meaning the appropriate number ok kilobytes), and the number of
160 "dots" fitting on the first line are drawn as ','. */
162 show_progress (long res, long expected, enum spflags flags)
164 static long line_bytes;
166 static int ndot, nrow;
169 if (flags == SP_FINISH)
174 char *tmpstr = (char *)alloca (2 * opt.dots_in_line + 1);
176 for (; dot < opt.dots_in_line; dot++)
178 if (!(dot % opt.dot_spacing))
183 logputs (LOG_VERBOSE, tmpstr);
184 print_percentage (nrow * line_bytes + ndot * opt.dot_bytes + offs,
187 logputs (LOG_VERBOSE, "\n\n");
191 /* Temporarily disable flushing. */
193 /* init set means initialization. If res is set, it also means that
194 the retrieval is *not* done from the beginning. The part that
195 was already retrieved is not shown again. */
196 if (flags == SP_INIT)
198 /* Generic initialization of static variables. */
201 line_bytes = (long)opt.dots_in_line * opt.dot_bytes;
204 if (res >= line_bytes)
206 nrow = res / line_bytes;
208 logprintf (LOG_VERBOSE,
209 _("\n [ skipping %dK ]"),
210 (int) ((nrow * line_bytes) / 1024));
214 logprintf (LOG_VERBOSE, "\n%5ldK ->", nrow * line_bytes / 1024);
216 /* Offset gets incremented by current value. */
218 /* While offset is >= opt.dot_bytes, print dots, taking care to
219 precede every 50th dot with a status message. */
220 for (; offs >= opt.dot_bytes; offs -= opt.dot_bytes)
222 if (!(ndot % opt.dot_spacing))
223 logputs (LOG_VERBOSE, " ");
225 logputs (LOG_VERBOSE, flags == SP_INIT ? "," : ".");
227 if (ndot == opt.dots_in_line)
232 print_percentage (nrow * line_bytes, expected);
233 logprintf (LOG_VERBOSE, "\n%5ldK ->", nrow * line_bytes / 1024);
236 /* Reenable flushing. */
239 /* Force flush. #### Oh, what a kludge! */
244 /* Reset the internal timer. */
249 /* Under Unix, the preferred way to measure the passage of time is
250 through gettimeofday() because of its granularity. However, on
251 some old or weird systems, gettimeofday() might not be available.
252 There we use the simple time(). */
253 # ifdef HAVE_GETTIMEOFDAY
255 gettimeofday (&t, NULL);
256 internal_secs = t.tv_sec;
257 internal_msecs = t.tv_usec / 1000;
258 # else /* not HAVE_GETTIMEOFDAY */
259 internal_secs = time (NULL);
261 # endif /* not HAVE_GETTIMEOFDAY */
263 /* Under Windows, use Windows-specific APIs. */
267 SystemTimeToFileTime(&st,&ft);
268 internal_time.HighPart = ft.dwHighDateTime;
269 internal_time.LowPart = ft.dwLowDateTime;
273 /* Return the time elapsed from the last call to reset_timer(), in
279 # ifdef HAVE_GETTIMEOFDAY
281 gettimeofday (&t, NULL);
282 return ((t.tv_sec - internal_secs) * 1000
283 + (t.tv_usec / 1000 - internal_msecs));
284 # else /* not HAVE_GETTIMEOFDAY */
285 return 1000 * ((long)time (NULL) - internal_secs);
286 # endif /* not HAVE_GETTIMEOFDAY */
292 SystemTimeToFileTime(&st,&ft);
293 li.HighPart = ft.dwHighDateTime;
294 li.LowPart = ft.dwLowDateTime;
295 return (long) ((li.QuadPart - internal_time.QuadPart) / 1e4);
299 /* Print out the appropriate download rate. Appropriate means that if
300 rate is > 1024 bytes per second, kilobytes are used, and if rate >
301 1024 * 1024 bps, megabytes are used. */
303 rate (long bytes, long msecs)
310 dlrate = (double)1000 * bytes / msecs;
311 /* #### Should these strings be translatable? */
313 sprintf (res, "%.2f B/s", dlrate);
314 else if (dlrate < 1024.0 * 1024.0)
315 sprintf (res, "%.2f KB/s", dlrate / 1024.0);
317 sprintf (res, "%.2f MB/s", dlrate / (1024.0 * 1024.0));
321 #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \
322 && no_proxy_match((u)->host, \
323 (const char **)opt.no_proxy))
325 /* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
326 or simply copy it with file:// (#### the latter not yet
329 retrieve_url (const char *origurl, char **file, char **newloc,
330 const char *refurl, int *dt)
334 int location_changed, dummy;
336 char *mynewloc, *proxy;
338 struct hash_table *redirections = NULL;
340 /* If dt is NULL, just ignore it. */
343 url = xstrdup (origurl);
351 result = parseurl (url, u, 0);
354 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
357 string_set_free (redirections);
364 /* Set the referer. */
366 u->referer = xstrdup (refurl);
370 u->referer = xstrdup (opt.referer);
375 local_use_proxy = USE_PROXY_P (u);
378 struct urlinfo *pu = newurl ();
380 /* Copy the original URL to new location. */
381 memcpy (pu, u, sizeof (*u));
382 pu->proxy = NULL; /* A minor correction :) */
383 /* Initialize u to nil. */
384 memset (u, 0, sizeof (*u));
386 /* Get the appropriate proxy server, appropriate for the
388 proxy = getproxy (pu->proto);
391 logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
394 string_set_free (redirections);
398 /* Parse the proxy URL. */
399 result = parseurl (proxy, u, 0);
400 if (result != URLOK || u->proto != URLHTTP)
402 if (u->proto == URLHTTP)
403 logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
405 logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
408 string_set_free (redirections);
415 assert (u->proto != URLFILE); /* #### Implement me! */
418 if (u->proto == URLHTTP)
419 result = http_loop (u, &mynewloc, dt);
420 else if (u->proto == URLFTP)
422 /* If this is a redirection, we must not allow recursive FTP
423 retrieval, so we save recursion to oldrec, and restore it
425 int oldrec = opt.recursive;
428 result = ftp_loop (u, dt);
429 opt.recursive = oldrec;
430 /* There is a possibility of having HTTP being redirected to
431 FTP. In these cases we must decide whether the text is HTML
432 according to the suffix. The HTML suffixes are `.html' and
433 `.htm', case-insensitive.
435 #### All of this is, of course, crap. These types should be
436 determined through mailcap. */
437 if (redirections && u->local && (u->proto == URLFTP ))
439 char *suf = suffix (u->local);
440 if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
445 location_changed = (result == NEWLOCATION);
446 if (location_changed)
448 char *construced_newloc;
449 uerr_t newloc_result;
450 struct urlinfo *newloc_struct;
452 assert (mynewloc != NULL);
454 /* The HTTP specs only allow absolute URLs to appear in
455 redirects, but a ton of boneheaded webservers and CGIs out
456 there break the rules and use relative URLs, and popular
457 browsers are lenient about this, so wget should be too. */
458 construced_newloc = url_concat (url, mynewloc);
460 mynewloc = construced_newloc;
462 /* Now, see if this new location makes sense. */
463 newloc_struct = newurl ();
464 newloc_result = parseurl (mynewloc, newloc_struct, 1);
465 if (newloc_result != URLOK)
467 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
468 freeurl (newloc_struct, 1);
471 string_set_free (redirections);
477 /* Now mynewloc will become newloc_struct->url, because if the
478 Location contained relative paths like .././something, we
479 don't want that propagating as url. */
481 mynewloc = xstrdup (newloc_struct->url);
485 redirections = make_string_hash_table (0);
486 /* Add current URL immediately so we can detect it as soon
487 as possible in case of a cycle. */
488 string_set_add (redirections, u->url);
491 /* The new location is OK. Let's check for redirection cycle by
492 peeking through the history of redirections. */
493 if (string_set_exists (redirections, newloc_struct->url))
495 logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
497 freeurl (newloc_struct, 1);
500 string_set_free (redirections);
505 string_set_add (redirections, newloc_struct->url);
517 *file = xstrdup (u->local);
523 string_set_free (redirections);
533 /* Find the URLs in the file and call retrieve_url() for each of
534 them. If HTML is non-zero, treat the file as HTML, and construct
535 the URLs accordingly.
537 If opt.recursive is set, call recursive_retrieve() for each file. */
539 retrieve_from_file (const char *file, int html, int *count)
542 urlpos *url_list, *cur_url;
544 url_list = (html ? get_urls_html (file, NULL, FALSE, NULL)
545 : get_urls_file (file));
546 status = RETROK; /* Suppose everything is OK. */
547 *count = 0; /* Reset the URL count. */
549 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
551 char *filename, *new_file;
554 if (downloaded_exceeds_quota ())
559 status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
560 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
561 status = recursive_retrieve (filename, new_file ? new_file
564 if (filename && opt.delete_after && file_exists_p (filename))
566 DEBUGP (("Removing file due to --delete-after in"
567 " retrieve_from_file():\n"));
568 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
569 if (unlink (filename))
570 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
574 FREE_MAYBE (new_file);
575 FREE_MAYBE (filename);
578 /* Free the linked list of URL-s. */
579 free_urlpos (url_list);
584 /* Print `giving up', or `retrying', depending on the impending
585 action. N1 and N2 are the attempt number and the attempt limit. */
587 printwhat (int n1, int n2)
589 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
592 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
593 set opt.downloaded_overflow to 1. */
595 downloaded_increase (unsigned long by_how_much)
598 if (opt.downloaded_overflow)
600 old = opt.downloaded;
601 opt.downloaded += by_how_much;
602 if (opt.downloaded < old) /* carry flag, where are you when I
606 opt.downloaded_overflow = 1;
607 opt.downloaded = ~((VERY_LONG_TYPE)0);
611 /* Return non-zero if the downloaded amount of bytes exceeds the
612 desired quota. If quota is not set or if the amount overflowed, 0
615 downloaded_exceeds_quota (void)
619 if (opt.downloaded_overflow)
620 /* We don't really know. (Wildly) assume not. */
623 return opt.downloaded > opt.quota;