2 Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
51 LARGE_INTEGER internal_time;
53 /* Internal variables used by the timer. */
54 static long internal_secs, internal_msecs;
57 /* See the comment in gethttp() why this is needed. */
58 int global_download_count;
60 void logflush PARAMS ((void));
63 uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
65 /* Flags for show_progress(). */
66 enum spflags { SP_NONE, SP_INIT, SP_FINISH };
68 static int show_progress PARAMS ((long, long, enum spflags));
70 #define MIN(i, j) ((i) <= (j) ? (i) : (j))
72 /* Reads the contents of file descriptor FD, until it is closed, or a
73 read error occurs. The data is read in 8K chunks, and stored to
74 stream fp, which should have been open for writing. If BUF is
75 non-NULL and its file descriptor is equal to FD, flush RBUF first.
76 This function will *not* use the rbuf_* functions!
78 The EXPECTED argument is passed to show_progress() unchanged, but
81 If opt.verbose is set, the progress is also shown. RESTVAL
82 represents a value from which to start downloading (which will be
83 shown accordingly). If RESTVAL is non-zero, the stream should have
84 been open for appending.
86 The function exits and returns codes of 0, -1 and -2 if the
87 connection was closed, there was a read error, or if it could not
88 write to the output stream, respectively.
90 IMPORTANT: The function flushes the contents of the buffer in
91 rbuf_flush() before actually reading from fd. If you wish to read
92 from fd immediately, flush or discard the buffer. */
94 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
95 struct rbuf *rbuf, int use_expected)
102 show_progress (restval, expected, SP_INIT);
103 if (rbuf && RBUF_FD (rbuf) == fd)
105 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
107 if (fwrite (c, sizeof (char), res, fp) < res)
111 if (show_progress (res, expected, SP_NONE))
117 /* Read from fd while there is available data.
119 Normally, if expected is 0, it means that it is not known how
120 much data is expected. However, if use_expected is specified,
121 then expected being zero means exactly that. */
122 while (!use_expected || (*len < expected))
124 int amount_to_read = (use_expected
125 ? MIN (expected - *len, sizeof (c))
128 if (rbuf->ssl!=NULL) {
129 res = ssl_iread (rbuf->ssl, c, amount_to_read);
131 #endif /* HAVE_SSL */
132 res = iread (fd, c, amount_to_read);
135 #endif /* HAVE_SSL */
138 if (fwrite (c, sizeof (char), res, fp) < res)
142 if (show_progress (res, expected, SP_NONE))
153 show_progress (0, expected, SP_FINISH);
158 print_percentage (long bytes, long expected)
160 int percentage = (int)(100.0 * bytes / expected);
161 logprintf (LOG_VERBOSE, "%3d%%", percentage);
164 /* Show the dotted progress report of file loading. Called with
165 length and a flag to tell it whether to reset or not. It keeps the
166 offset information in static local variables.
168 Return value: 1 or 0, designating whether any dots have been drawn.
170 If the init argument is set, the routine will initialize.
172 If the res is non-zero, res/line_bytes lines are skipped
173 (meaning the appropriate number ok kilobytes), and the number of
174 "dots" fitting on the first line are drawn as ','. */
176 show_progress (long res, long expected, enum spflags flags)
178 static long line_bytes;
179 static long offs, initial_skip;
180 static int ndot, nrow;
181 static long last_timer, time_offset;
184 if (flags == SP_FINISH)
189 char *tmpstr = (char *)alloca (2 * opt.dots_in_line + 1);
191 time_offset = elapsed_time () - last_timer;
192 for (; dot < opt.dots_in_line; dot++)
194 if (!(dot % opt.dot_spacing))
199 logputs (LOG_VERBOSE, tmpstr);
200 print_percentage (nrow * line_bytes + ndot * opt.dot_bytes + offs,
202 logprintf (LOG_VERBOSE, " @%s",
203 rate (ndot * opt.dot_bytes
204 + offs - (initial_skip % line_bytes),
207 logputs (LOG_VERBOSE, "\n\n");
211 /* Temporarily disable flushing. */
213 /* init set means initialization. If res is set, it also means that
214 the retrieval is *not* done from the beginning. The part that
215 was already retrieved is not shown again. */
216 if (flags == SP_INIT)
218 /* Generic initialization of static variables. */
221 line_bytes = (long)opt.dots_in_line * opt.dot_bytes;
222 last_timer = elapsed_time ();
227 if (res >= line_bytes)
229 nrow = res / line_bytes;
231 logprintf (LOG_VERBOSE,
232 _("\n [ skipping %dK ]"),
233 (int) ((nrow * line_bytes) / 1024));
237 logprintf (LOG_VERBOSE, "\n%5ldK", nrow * line_bytes / 1024);
239 /* Offset gets incremented by current value. */
241 /* While offset is >= opt.dot_bytes, print dots, taking care to
242 precede every 50th dot with a status message. */
243 for (; offs >= opt.dot_bytes; offs -= opt.dot_bytes)
245 if (!(ndot % opt.dot_spacing))
246 logputs (LOG_VERBOSE, " ");
248 logputs (LOG_VERBOSE, flags == SP_INIT ? "," : ".");
250 if (ndot == opt.dots_in_line)
252 time_offset = elapsed_time () - last_timer;
253 last_timer += time_offset;
259 print_percentage (nrow * line_bytes, expected);
260 logprintf (LOG_VERBOSE, " @%s",
261 rate (line_bytes - (initial_skip % line_bytes),
265 logprintf (LOG_VERBOSE, "\n%5ldK", nrow * line_bytes / 1024);
268 /* Reenable flushing. */
271 /* Force flush. #### Oh, what a kludge! */
276 /* Reset the internal timer. */
281 /* Under Unix, the preferred way to measure the passage of time is
282 through gettimeofday() because of its granularity. However, on
283 some old or weird systems, gettimeofday() might not be available.
284 There we use the simple time(). */
285 # ifdef HAVE_GETTIMEOFDAY
287 gettimeofday (&t, NULL);
288 internal_secs = t.tv_sec;
289 internal_msecs = t.tv_usec / 1000;
290 # else /* not HAVE_GETTIMEOFDAY */
291 internal_secs = time (NULL);
293 # endif /* not HAVE_GETTIMEOFDAY */
295 /* Under Windows, use Windows-specific APIs. */
299 SystemTimeToFileTime(&st,&ft);
300 internal_time.HighPart = ft.dwHighDateTime;
301 internal_time.LowPart = ft.dwLowDateTime;
305 /* Return the time elapsed from the last call to reset_timer(), in
311 # ifdef HAVE_GETTIMEOFDAY
313 gettimeofday (&t, NULL);
314 return ((t.tv_sec - internal_secs) * 1000
315 + (t.tv_usec / 1000 - internal_msecs));
316 # else /* not HAVE_GETTIMEOFDAY */
317 return 1000 * ((long)time (NULL) - internal_secs);
318 # endif /* not HAVE_GETTIMEOFDAY */
324 SystemTimeToFileTime(&st,&ft);
325 li.HighPart = ft.dwHighDateTime;
326 li.LowPart = ft.dwLowDateTime;
327 return (long) ((li.QuadPart - internal_time.QuadPart) / 1e4);
331 /* Print out the appropriate download rate. Appropriate means that if
332 rate is > 1024 bytes per second, kilobytes are used, and if rate >
333 1024 * 1024 bps, megabytes are used.
335 If PAD is non-zero, strings will be padded to the width of 7
336 characters (xxxx.xx). */
338 rate (long bytes, long msecs, int pad)
345 dlrate = (double)1000 * bytes / msecs;
347 sprintf (res, pad ? "%7.2f B/s" : "%.2f B/s", dlrate);
348 else if (dlrate < 1024.0 * 1024.0)
349 sprintf (res, pad ? "%7.2f KB/s" : "%.2f KB/s", dlrate / 1024.0);
351 sprintf (res, pad ? "%7.2f MB/s" : "%.2f MB/s", dlrate / (1024.0 * 1024.0));
355 #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \
356 && no_proxy_match((u)->host, \
357 (const char **)opt.no_proxy))
359 /* Retrieve the given URL. Decides which loop to call -- HTTP(S), FTP,
360 or simply copy it with file:// (#### the latter not yet
363 retrieve_url (const char *origurl, char **file, char **newloc,
364 const char *refurl, int *dt)
368 int location_changed, dummy;
370 char *mynewloc, *proxy;
372 struct hash_table *redirections = NULL;
374 /* If dt is NULL, just ignore it. */
377 url = xstrdup (origurl);
385 result = parseurl (url, u, 0);
388 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
391 string_set_free (redirections);
398 /* Set the referer. */
400 u->referer = xstrdup (refurl);
404 u->referer = xstrdup (opt.referer);
409 local_use_proxy = USE_PROXY_P (u);
412 struct urlinfo *pu = newurl ();
414 /* Copy the original URL to new location. */
415 memcpy (pu, u, sizeof (*u));
416 pu->proxy = NULL; /* A minor correction :) */
417 /* Initialize u to nil. */
418 memset (u, 0, sizeof (*u));
420 /* Get the appropriate proxy server, appropriate for the
422 proxy = getproxy (pu->proto);
425 logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
428 string_set_free (redirections);
432 /* Parse the proxy URL. */
433 result = parseurl (proxy, u, 0);
434 if (result != URLOK || u->proto != URLHTTP)
436 if (u->proto == URLHTTP)
437 logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
439 logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
442 string_set_free (redirections);
449 assert (u->proto != URLFILE); /* #### Implement me! */
452 if (u->proto == URLHTTP
454 || u->proto == URLHTTPS
457 result = http_loop (u, &mynewloc, dt);
458 else if (u->proto == URLFTP)
460 /* If this is a redirection, we must not allow recursive FTP
461 retrieval, so we save recursion to oldrec, and restore it
463 int oldrec = opt.recursive;
466 result = ftp_loop (u, dt);
467 opt.recursive = oldrec;
468 /* There is a possibility of having HTTP being redirected to
469 FTP. In these cases we must decide whether the text is HTML
470 according to the suffix. The HTML suffixes are `.html' and
471 `.htm', case-insensitive.
473 #### All of this is, of course, crap. These types should be
474 determined through mailcap. */
475 if (redirections && u->local && (u->proto == URLFTP ))
477 char *suf = suffix (u->local);
478 if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
483 location_changed = (result == NEWLOCATION);
484 if (location_changed)
486 char *construced_newloc;
487 uerr_t newloc_result;
488 struct urlinfo *newloc_struct;
490 assert (mynewloc != NULL);
492 /* The HTTP specs only allow absolute URLs to appear in
493 redirects, but a ton of boneheaded webservers and CGIs out
494 there break the rules and use relative URLs, and popular
495 browsers are lenient about this, so wget should be too. */
496 construced_newloc = uri_merge (url, mynewloc);
498 mynewloc = construced_newloc;
500 /* Now, see if this new location makes sense. */
501 newloc_struct = newurl ();
502 newloc_result = parseurl (mynewloc, newloc_struct, 1);
503 if (newloc_result != URLOK)
505 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
506 freeurl (newloc_struct, 1);
509 string_set_free (redirections);
515 /* Now mynewloc will become newloc_struct->url, because if the
516 Location contained relative paths like .././something, we
517 don't want that propagating as url. */
519 mynewloc = xstrdup (newloc_struct->url);
523 redirections = make_string_hash_table (0);
524 /* Add current URL immediately so we can detect it as soon
525 as possible in case of a cycle. */
526 string_set_add (redirections, u->url);
529 /* The new location is OK. Let's check for redirection cycle by
530 peeking through the history of redirections. */
531 if (string_set_exists (redirections, newloc_struct->url))
533 logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
535 freeurl (newloc_struct, 1);
538 string_set_free (redirections);
543 string_set_add (redirections, newloc_struct->url);
556 register_download (url, u->local);
558 register_html (url, u->local);
565 *file = xstrdup (u->local);
571 string_set_free (redirections);
578 ++global_download_count;
583 /* Find the URLs in the file and call retrieve_url() for each of
584 them. If HTML is non-zero, treat the file as HTML, and construct
585 the URLs accordingly.
587 If opt.recursive is set, call recursive_retrieve() for each file. */
589 retrieve_from_file (const char *file, int html, int *count)
592 urlpos *url_list, *cur_url;
594 url_list = (html ? get_urls_html (file, NULL, FALSE, NULL)
595 : get_urls_file (file));
596 status = RETROK; /* Suppose everything is OK. */
597 *count = 0; /* Reset the URL count. */
599 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
601 char *filename, *new_file;
604 if (downloaded_exceeds_quota ())
609 status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
610 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
611 status = recursive_retrieve (filename, new_file ? new_file
614 if (filename && opt.delete_after && file_exists_p (filename))
616 DEBUGP (("Removing file due to --delete-after in"
617 " retrieve_from_file():\n"));
618 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
619 if (unlink (filename))
620 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
624 FREE_MAYBE (new_file);
625 FREE_MAYBE (filename);
628 /* Free the linked list of URL-s. */
629 free_urlpos (url_list);
634 /* Print `giving up', or `retrying', depending on the impending
635 action. N1 and N2 are the attempt number and the attempt limit. */
637 printwhat (int n1, int n2)
639 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
642 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
643 set opt.downloaded_overflow to 1. */
645 downloaded_increase (unsigned long by_how_much)
648 if (opt.downloaded_overflow)
650 old = opt.downloaded;
651 opt.downloaded += by_how_much;
652 if (opt.downloaded < old) /* carry flag, where are you when I
656 opt.downloaded_overflow = 1;
657 opt.downloaded = ~((VERY_LONG_TYPE)0);
661 /* Return non-zero if the downloaded amount of bytes exceeds the
662 desired quota. If quota is not set or if the amount overflowed, 0
665 downloaded_exceeds_quota (void)
669 if (opt.downloaded_overflow)
670 /* We don't really know. (Wildly) assume not. */
673 return opt.downloaded > opt.quota;
676 /* If opt.wait or opt.waitretry are specified, and if certain
677 conditions are met, sleep the appropriate number of seconds. See
678 the documentation of --wait and --waitretry for more information.
680 COUNT is the count of current retrieval, beginning with 1. */
683 sleep_between_retrievals (int count)
685 static int first_retrieval = 1;
687 if (!first_retrieval && (opt.wait || opt.waitretry))
689 if (opt.waitretry && count > 1)
691 /* If opt.waitretry is specified and this is a retry, wait
692 for COUNT-1 number of seconds, or for opt.waitretry
694 if (count <= opt.waitretry)
697 sleep (opt.waitretry);
700 /* Otherwise, check if opt.wait is specified. If so, sleep. */