2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
51 LARGE_INTEGER internal_time;
53 /* Internal variables used by the timer. */
54 static long internal_secs, internal_msecs;
57 void logflush PARAMS ((void));
60 uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
62 /* Flags for show_progress(). */
63 enum spflags { SP_NONE, SP_INIT, SP_FINISH };
65 static int show_progress PARAMS ((long, long, enum spflags));
67 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
69 /* Reads the contents of file descriptor FD, until it is closed, or a
70 read error occurs. The data is read in 8K chunks, and stored to
71 stream fp, which should have been open for writing. If BUF is
72 non-NULL and its file descriptor is equal to FD, flush RBUF first.
73 This function will *not* use the rbuf_* functions!
75 The EXPECTED argument is passed to show_progress() unchanged, but
78 If opt.verbose is set, the progress is also shown. RESTVAL
79 represents a value from which to start downloading (which will be
80 shown accordingly). If RESTVAL is non-zero, the stream should have
81 been open for appending.
83 The function exits and returns codes of 0, -1 and -2 if the
84 connection was closed, there was a read error, or if it could not
85 write to the output stream, respectively.
87 IMPORTANT: The function flushes the contents of the buffer in
88 rbuf_flush() before actually reading from fd. If you wish to read
89 from fd immediately, flush or discard the buffer. */
91 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
92 struct rbuf *rbuf, int use_expected)
99 show_progress (restval, expected, SP_INIT);
100 if (rbuf && RBUF_FD (rbuf) == fd)
102 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
104 if (fwrite (c, sizeof (char), res, fp) < res)
108 if (show_progress (res, expected, SP_NONE))
114 /* Read from fd while there is available data.
116 Normally, if expected is 0, it means that it is not known how
117 much data is expected. However, if use_expected is specified,
118 then expected being zero means exactly that. */
119 while (!use_expected || (*len < expected))
121 int amount_to_read = (use_expected
122 ? MIN (expected - *len, sizeof (c))
125 if (rbuf->ssl!=NULL) {
126 res = ssl_iread (rbuf->ssl, c, amount_to_read);
128 #endif /* HAVE_SSL */
129 res = iread (fd, c, amount_to_read);
132 #endif /* HAVE_SSL */
135 if (fwrite (c, sizeof (char), res, fp) < res)
139 if (show_progress (res, expected, SP_NONE))
150 show_progress (0, expected, SP_FINISH);
155 print_percentage (long bytes, long expected)
157 int percentage = (int)(100.0 * bytes / expected);
158 logprintf (LOG_VERBOSE, "%3d%%", percentage);
161 /* Show the dotted progress report of file loading. Called with
162 length and a flag to tell it whether to reset or not. It keeps the
163 offset information in static local variables.
165 Return value: 1 or 0, designating whether any dots have been drawn.
167 If the init argument is set, the routine will initialize.
169 If the res is non-zero, res/line_bytes lines are skipped
170 (meaning the appropriate number ok kilobytes), and the number of
171 "dots" fitting on the first line are drawn as ','. */
173 show_progress (long res, long expected, enum spflags flags)
175 static long line_bytes;
176 static long offs, initial_skip;
177 static int ndot, nrow;
178 static long last_timer, time_offset;
181 if (flags == SP_FINISH)
186 char *tmpstr = (char *)alloca (2 * opt.dots_in_line + 1);
188 time_offset = elapsed_time () - last_timer;
189 for (; dot < opt.dots_in_line; dot++)
191 if (!(dot % opt.dot_spacing))
196 logputs (LOG_VERBOSE, tmpstr);
197 print_percentage (nrow * line_bytes + ndot * opt.dot_bytes + offs,
199 logprintf (LOG_VERBOSE, " @%s",
200 rate (ndot * opt.dot_bytes
201 + offs - (initial_skip % line_bytes),
204 logputs (LOG_VERBOSE, "\n\n");
208 /* Temporarily disable flushing. */
210 /* init set means initialization. If res is set, it also means that
211 the retrieval is *not* done from the beginning. The part that
212 was already retrieved is not shown again. */
213 if (flags == SP_INIT)
215 /* Generic initialization of static variables. */
218 line_bytes = (long)opt.dots_in_line * opt.dot_bytes;
219 last_timer = elapsed_time ();
224 if (res >= line_bytes)
226 nrow = res / line_bytes;
228 logprintf (LOG_VERBOSE,
229 _("\n [ skipping %dK ]"),
230 (int) ((nrow * line_bytes) / 1024));
234 logprintf (LOG_VERBOSE, "\n%5ldK", nrow * line_bytes / 1024);
236 /* Offset gets incremented by current value. */
238 /* While offset is >= opt.dot_bytes, print dots, taking care to
239 precede every 50th dot with a status message. */
240 for (; offs >= opt.dot_bytes; offs -= opt.dot_bytes)
242 if (!(ndot % opt.dot_spacing))
243 logputs (LOG_VERBOSE, " ");
245 logputs (LOG_VERBOSE, flags == SP_INIT ? "," : ".");
247 if (ndot == opt.dots_in_line)
249 time_offset = elapsed_time () - last_timer;
250 last_timer += time_offset;
256 print_percentage (nrow * line_bytes, expected);
257 logprintf (LOG_VERBOSE, " @%s",
258 rate (line_bytes - (initial_skip % line_bytes),
262 logprintf (LOG_VERBOSE, "\n%5ldK", nrow * line_bytes / 1024);
265 /* Reenable flushing. */
268 /* Force flush. #### Oh, what a kludge! */
273 /* Reset the internal timer. */
278 /* Under Unix, the preferred way to measure the passage of time is
279 through gettimeofday() because of its granularity. However, on
280 some old or weird systems, gettimeofday() might not be available.
281 There we use the simple time(). */
282 # ifdef HAVE_GETTIMEOFDAY
284 gettimeofday (&t, NULL);
285 internal_secs = t.tv_sec;
286 internal_msecs = t.tv_usec / 1000;
287 # else /* not HAVE_GETTIMEOFDAY */
288 internal_secs = time (NULL);
290 # endif /* not HAVE_GETTIMEOFDAY */
292 /* Under Windows, use Windows-specific APIs. */
296 SystemTimeToFileTime(&st,&ft);
297 internal_time.HighPart = ft.dwHighDateTime;
298 internal_time.LowPart = ft.dwLowDateTime;
302 /* Return the time elapsed from the last call to reset_timer(), in
308 # ifdef HAVE_GETTIMEOFDAY
310 gettimeofday (&t, NULL);
311 return ((t.tv_sec - internal_secs) * 1000
312 + (t.tv_usec / 1000 - internal_msecs));
313 # else /* not HAVE_GETTIMEOFDAY */
314 return 1000 * ((long)time (NULL) - internal_secs);
315 # endif /* not HAVE_GETTIMEOFDAY */
321 SystemTimeToFileTime(&st,&ft);
322 li.HighPart = ft.dwHighDateTime;
323 li.LowPart = ft.dwLowDateTime;
324 return (long) ((li.QuadPart - internal_time.QuadPart) / 1e4);
328 /* Print out the appropriate download rate. Appropriate means that if
329 rate is > 1024 bytes per second, kilobytes are used, and if rate >
330 1024 * 1024 bps, megabytes are used.
332 If PAD is non-zero, strings will be padded to the width of 7
333 characters (xxxx.xx). */
335 rate (long bytes, long msecs, int pad)
342 dlrate = (double)1000 * bytes / msecs;
344 sprintf (res, pad ? "%7.2f B/s" : "%.2f B/s", dlrate);
345 else if (dlrate < 1024.0 * 1024.0)
346 sprintf (res, pad ? "%7.2f KB/s" : "%.2f KB/s", dlrate / 1024.0);
348 sprintf (res, pad ? "%7.2f MB/s" : "%.2f MB/s", dlrate / (1024.0 * 1024.0));
352 #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \
353 && no_proxy_match((u)->host, \
354 (const char **)opt.no_proxy))
356 /* Retrieve the given URL. Decides which loop to call -- HTTP(S), FTP,
357 or simply copy it with file:// (#### the latter not yet
360 retrieve_url (const char *origurl, char **file, char **newloc,
361 const char *refurl, int *dt)
365 int location_changed, dummy;
367 char *mynewloc, *proxy;
369 struct hash_table *redirections = NULL;
371 /* If dt is NULL, just ignore it. */
374 url = xstrdup (origurl);
382 result = parseurl (url, u, 0);
385 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
388 string_set_free (redirections);
395 /* Set the referer. */
397 u->referer = xstrdup (refurl);
401 u->referer = xstrdup (opt.referer);
406 local_use_proxy = USE_PROXY_P (u);
409 struct urlinfo *pu = newurl ();
411 /* Copy the original URL to new location. */
412 memcpy (pu, u, sizeof (*u));
413 pu->proxy = NULL; /* A minor correction :) */
414 /* Initialize u to nil. */
415 memset (u, 0, sizeof (*u));
417 /* Get the appropriate proxy server, appropriate for the
419 proxy = getproxy (pu->proto);
422 logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
425 string_set_free (redirections);
429 /* Parse the proxy URL. */
430 result = parseurl (proxy, u, 0);
431 if (result != URLOK || u->proto != URLHTTP)
433 if (u->proto == URLHTTP)
434 logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
436 logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
439 string_set_free (redirections);
446 assert (u->proto != URLFILE); /* #### Implement me! */
450 if (u->proto == URLHTTP || u->proto == URLHTTPS )
452 if (u->proto == URLHTTP)
453 #endif /* HAVE_SSL */
454 result = http_loop (u, &mynewloc, dt);
455 else if (u->proto == URLFTP)
457 /* If this is a redirection, we must not allow recursive FTP
458 retrieval, so we save recursion to oldrec, and restore it
460 int oldrec = opt.recursive;
463 result = ftp_loop (u, dt);
464 opt.recursive = oldrec;
465 /* There is a possibility of having HTTP being redirected to
466 FTP. In these cases we must decide whether the text is HTML
467 according to the suffix. The HTML suffixes are `.html' and
468 `.htm', case-insensitive.
470 #### All of this is, of course, crap. These types should be
471 determined through mailcap. */
472 if (redirections && u->local && (u->proto == URLFTP ))
474 char *suf = suffix (u->local);
475 if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
480 location_changed = (result == NEWLOCATION);
481 if (location_changed)
483 char *construced_newloc;
484 uerr_t newloc_result;
485 struct urlinfo *newloc_struct;
487 assert (mynewloc != NULL);
489 /* The HTTP specs only allow absolute URLs to appear in
490 redirects, but a ton of boneheaded webservers and CGIs out
491 there break the rules and use relative URLs, and popular
492 browsers are lenient about this, so wget should be too. */
493 construced_newloc = url_concat (url, mynewloc);
495 mynewloc = construced_newloc;
497 /* Now, see if this new location makes sense. */
498 newloc_struct = newurl ();
499 newloc_result = parseurl (mynewloc, newloc_struct, 1);
500 if (newloc_result != URLOK)
502 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
503 freeurl (newloc_struct, 1);
506 string_set_free (redirections);
512 /* Now mynewloc will become newloc_struct->url, because if the
513 Location contained relative paths like .././something, we
514 don't want that propagating as url. */
516 mynewloc = xstrdup (newloc_struct->url);
520 redirections = make_string_hash_table (0);
521 /* Add current URL immediately so we can detect it as soon
522 as possible in case of a cycle. */
523 string_set_add (redirections, u->url);
526 /* The new location is OK. Let's check for redirection cycle by
527 peeking through the history of redirections. */
528 if (string_set_exists (redirections, newloc_struct->url))
530 logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
532 freeurl (newloc_struct, 1);
535 string_set_free (redirections);
540 string_set_add (redirections, newloc_struct->url);
552 *file = xstrdup (u->local);
558 string_set_free (redirections);
568 /* Find the URLs in the file and call retrieve_url() for each of
569 them. If HTML is non-zero, treat the file as HTML, and construct
570 the URLs accordingly.
572 If opt.recursive is set, call recursive_retrieve() for each file. */
574 retrieve_from_file (const char *file, int html, int *count)
577 urlpos *url_list, *cur_url;
579 url_list = (html ? get_urls_html (file, NULL, FALSE, NULL)
580 : get_urls_file (file));
581 status = RETROK; /* Suppose everything is OK. */
582 *count = 0; /* Reset the URL count. */
584 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
586 char *filename, *new_file;
589 if (downloaded_exceeds_quota ())
594 status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
595 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
596 status = recursive_retrieve (filename, new_file ? new_file
599 if (filename && opt.delete_after && file_exists_p (filename))
601 DEBUGP (("Removing file due to --delete-after in"
602 " retrieve_from_file():\n"));
603 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
604 if (unlink (filename))
605 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
609 FREE_MAYBE (new_file);
610 FREE_MAYBE (filename);
613 /* Free the linked list of URL-s. */
614 free_urlpos (url_list);
619 /* Print `giving up', or `retrying', depending on the impending
620 action. N1 and N2 are the attempt number and the attempt limit. */
622 printwhat (int n1, int n2)
624 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
627 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
628 set opt.downloaded_overflow to 1. */
630 downloaded_increase (unsigned long by_how_much)
633 if (opt.downloaded_overflow)
635 old = opt.downloaded;
636 opt.downloaded += by_how_much;
637 if (opt.downloaded < old) /* carry flag, where are you when I
641 opt.downloaded_overflow = 1;
642 opt.downloaded = ~((VERY_LONG_TYPE)0);
646 /* Return non-zero if the downloaded amount of bytes exceeds the
647 desired quota. If quota is not set or if the amount overflowed, 0
650 downloaded_exceeds_quota (void)
654 if (opt.downloaded_overflow)
655 /* We don't really know. (Wildly) assume not. */
658 return opt.downloaded > opt.quota;
661 /* If opt.wait or opt.waitretry are specified, and if certain
662 conditions are met, sleep the appropriate number of seconds. See
663 the documentation of --wait and --waitretry for more information.
665 COUNT is the count of current retrieval, beginning with 1. */
668 sleep_between_retrievals (int count)
670 static int first_retrieval = 1;
672 if (!first_retrieval && (opt.wait || opt.waitretry))
674 if (opt.waitretry && count > 1)
676 /* If opt.waitretry is specified and this is a retry, wait
677 for COUNT-1 number of seconds, or for opt.waitretry
679 if (count <= opt.waitretry)
682 sleep (opt.waitretry);
685 /* Otherwise, check if opt.wait is specified. If so, sleep. */