2 Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
47 LARGE_INTEGER internal_time;
49 /* Internal variables used by the timer. */
50 static long internal_secs, internal_msecs;
53 void logflush PARAMS ((void));
56 uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
58 /* Flags for show_progress(). */
59 enum spflags { SP_NONE, SP_INIT, SP_FINISH };
61 static int show_progress PARAMS ((long, long, enum spflags));
63 /* Reads the contents of file descriptor FD, until it is closed, or a
64 read error occurs. The data is read in 8K chunks, and stored to
65 stream fp, which should have been open for writing. If BUF is
66 non-NULL and its file descriptor is equal to FD, flush RBUF first.
67 This function will *not* use the rbuf_* functions!
69 The EXPECTED argument is passed to show_progress() unchanged, but
72 If opt.verbose is set, the progress is also shown. RESTVAL
73 represents a value from which to start downloading (which will be
74 shown accordingly). If RESTVAL is non-zero, the stream should have
75 been open for appending.
77 The function exits and returns codes of 0, -1 and -2 if the
78 connection was closed, there was a read error, or if it could not
79 write to the output stream, respectively.
81 IMPORTANT: The function flushes the contents of the buffer in
82 rbuf_flush() before actually reading from fd. If you wish to read
83 from fd immediately, flush or discard the buffer. */
85 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
93 show_progress (restval, expected, SP_INIT);
94 if (rbuf && RBUF_FD (rbuf) == fd)
96 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
98 if (fwrite (c, sizeof (char), res, fp) < res)
102 if (show_progress (res, expected, SP_NONE))
108 /* Read from fd while there is available data. */
111 res = iread (fd, c, sizeof (c));
114 if (fwrite (c, sizeof (char), res, fp) < res)
118 if (show_progress (res, expected, SP_NONE))
127 show_progress (0, expected, SP_FINISH);
132 print_percentage (long bytes, long expected)
134 int percentage = (int)(100.0 * bytes / expected);
135 logprintf (LOG_VERBOSE, " [%3d%%]", percentage);
138 /* Show the dotted progress report of file loading. Called with
139 length and a flag to tell it whether to reset or not. It keeps the
140 offset information in static local variables.
142 Return value: 1 or 0, designating whether any dots have been drawn.
144 If the init argument is set, the routine will initialize.
146 If the res is non-zero, res/line_bytes lines are skipped
147 (meaning the appropriate number ok kilobytes), and the number of
148 "dots" fitting on the first line are drawn as ','. */
150 show_progress (long res, long expected, enum spflags flags)
152 static long line_bytes;
154 static int ndot, nrow;
157 if (flags == SP_FINISH)
162 char *tmpstr = (char *)alloca (2 * opt.dots_in_line + 1);
164 for (; dot < opt.dots_in_line; dot++)
166 if (!(dot % opt.dot_spacing))
171 logputs (LOG_VERBOSE, tmpstr);
172 print_percentage (nrow * line_bytes + ndot * opt.dot_bytes + offs,
175 logputs (LOG_VERBOSE, "\n\n");
179 /* Temporarily disable flushing. */
181 /* init set means initialization. If res is set, it also means that
182 the retrieval is *not* done from the beginning. The part that
183 was already retrieved is not shown again. */
184 if (flags == SP_INIT)
186 /* Generic initialization of static variables. */
189 line_bytes = (long)opt.dots_in_line * opt.dot_bytes;
192 if (res >= line_bytes)
194 nrow = res / line_bytes;
196 logprintf (LOG_VERBOSE,
197 _("\n [ skipping %dK ]"),
198 (int) ((nrow * line_bytes) / 1024));
202 logprintf (LOG_VERBOSE, "\n%5ldK ->", nrow * line_bytes / 1024);
204 /* Offset gets incremented by current value. */
206 /* While offset is >= opt.dot_bytes, print dots, taking care to
207 precede every 50th dot with a status message. */
208 for (; offs >= opt.dot_bytes; offs -= opt.dot_bytes)
210 if (!(ndot % opt.dot_spacing))
211 logputs (LOG_VERBOSE, " ");
213 logputs (LOG_VERBOSE, flags == SP_INIT ? "," : ".");
215 if (ndot == opt.dots_in_line)
220 print_percentage (nrow * line_bytes, expected);
221 logprintf (LOG_VERBOSE, "\n%5ldK ->", nrow * line_bytes / 1024);
224 /* Reenable flushing. */
227 /* Force flush. #### Oh, what a kludge! */
232 /* Reset the internal timer. */
237 /* Under Unix, the preferred way to measure the passage of time is
238 through gettimeofday() because of its granularity. However, on
239 some old or weird systems, gettimeofday() might not be available.
240 There we use the simple time(). */
241 # ifdef HAVE_GETTIMEOFDAY
243 gettimeofday (&t, NULL);
244 internal_secs = t.tv_sec;
245 internal_msecs = t.tv_usec / 1000;
246 # else /* not HAVE_GETTIMEOFDAY */
247 internal_secs = time (NULL);
249 # endif /* not HAVE_GETTIMEOFDAY */
251 /* Under Windows, use Windows-specific APIs. */
255 SystemTimeToFileTime(&st,&ft);
256 internal_time.HighPart = ft.dwHighDateTime;
257 internal_time.LowPart = ft.dwLowDateTime;
261 /* Return the time elapsed from the last call to reset_timer(), in
267 # ifdef HAVE_GETTIMEOFDAY
269 gettimeofday (&t, NULL);
270 return ((t.tv_sec - internal_secs) * 1000
271 + (t.tv_usec / 1000 - internal_msecs));
272 # else /* not HAVE_GETTIMEOFDAY */
273 return 1000 * ((long)time (NULL) - internal_secs);
274 # endif /* not HAVE_GETTIMEOFDAY */
280 SystemTimeToFileTime(&st,&ft);
281 li.HighPart = ft.dwHighDateTime;
282 li.LowPart = ft.dwLowDateTime;
283 return (long) ((li.QuadPart - internal_time.QuadPart) / 1e4);
287 /* Print out the appropriate download rate. Appropriate means that if
288 rate is > 1024 bytes per second, kilobytes are used, and if rate >
289 1024 * 1024 bps, megabytes are used. */
291 rate (long bytes, long msecs)
298 dlrate = (double)1000 * bytes / msecs;
299 /* #### Should these strings be translatable? */
301 sprintf (res, "%.2f B/s", dlrate);
302 else if (dlrate < 1024.0 * 1024.0)
303 sprintf (res, "%.2f KB/s", dlrate / 1024.0);
305 sprintf (res, "%.2f MB/s", dlrate / (1024.0 * 1024.0));
309 #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \
310 && no_proxy_match((u)->host, \
311 (const char **)opt.no_proxy))
313 /* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
314 or simply copy it with file:// (#### the latter not yet
317 retrieve_url (const char *origurl, char **file, char **newloc,
318 const char *refurl, int *dt)
322 int location_changed, dummy;
324 char *mynewloc, *proxy;
328 /* If dt is NULL, just ignore it. */
331 url = xstrdup (origurl);
341 result = parseurl (url, u, 0);
344 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
346 free_slist (redirections);
352 /* Set the referer. */
354 u->referer = xstrdup (refurl);
358 u->referer = xstrdup (opt.referer);
363 local_use_proxy = USE_PROXY_P (u);
366 struct urlinfo *pu = newurl ();
368 /* Copy the original URL to new location. */
369 memcpy (pu, u, sizeof (*u));
370 pu->proxy = NULL; /* A minor correction :) */
371 /* Initialize u to nil. */
372 memset (u, 0, sizeof (*u));
374 /* Get the appropriate proxy server, appropriate for the
376 proxy = getproxy (pu->proto);
379 logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
381 free_slist (redirections);
384 /* Parse the proxy URL. */
385 result = parseurl (proxy, u, 0);
386 if (result != URLOK || u->proto != URLHTTP)
388 if (u->proto == URLHTTP)
389 logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
391 logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
393 free_slist (redirections);
399 assert (u->proto != URLFILE); /* #### Implement me! */
402 if (u->proto == URLHTTP)
403 result = http_loop (u, &mynewloc, dt);
404 else if (u->proto == URLFTP)
406 /* If this is a redirection, we must not allow recursive FTP
407 retrieval, so we save recursion to oldrec, and restore it
409 int oldrec = opt.recursive;
412 result = ftp_loop (u, dt);
413 opt.recursive = oldrec;
414 /* There is a possibility of having HTTP being redirected to
415 FTP. In these cases we must decide whether the text is HTML
416 according to the suffix. The HTML suffixes are `.html' and
417 `.htm', case-insensitive.
419 #### All of this is, of course, crap. These types should be
420 determined through mailcap. */
421 if (redirections && u->local && (u->proto == URLFTP ))
423 char *suf = suffix (u->local);
424 if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
429 location_changed = (result == NEWLOCATION);
430 if (location_changed)
432 char *construced_newloc;
433 uerr_t newloc_result;
434 struct urlinfo *newloc_struct;
436 assert (mynewloc != NULL);
438 /* The HTTP specs only allow absolute URLs to appear in
439 redirects, but a ton of boneheaded webservers and CGIs out
440 there break the rules and use relative URLs, and popular
441 browsers are lenient about this, so wget should be too. */
442 construced_newloc = url_concat (url, mynewloc);
444 mynewloc = construced_newloc;
446 /* Now, see if this new location makes sense. */
447 newloc_struct = newurl ();
448 newloc_result = parseurl (mynewloc, newloc_struct, 1);
449 if (newloc_result != URLOK)
451 logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
452 freeurl (newloc_struct, 1);
454 free_slist (redirections);
458 /* Now mynewloc will become newloc_struct->url, because if the
459 Location contained relative paths like .././something, we
460 don't want that propagating as url. */
462 mynewloc = xstrdup (newloc_struct->url);
464 /* Check for redirection to back to itself. */
465 if (!strcmp (u->url, newloc_struct->url))
467 logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
469 freeurl (newloc_struct, 1);
471 free_slist (redirections);
475 /* The new location is OK. Let's check for redirection cycle by
476 peeking through the history of redirections. */
477 if (in_slist (redirections, newloc_struct->url))
479 logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
481 freeurl (newloc_struct, 1);
483 free_slist (redirections);
487 redirections = add_slist (redirections, newloc_struct->url, NOSORT);
499 *file = xstrdup (u->local);
504 free_slist (redirections);
514 /* Find the URLs in the file and call retrieve_url() for each of
515 them. If HTML is non-zero, treat the file as HTML, and construct
516 the URLs accordingly.
518 If opt.recursive is set, call recursive_retrieve() for each file. */
520 retrieve_from_file (const char *file, int html, int *count)
523 urlpos *url_list, *cur_url;
525 /* If spider-mode is on, we do not want get_urls_html barfing
526 errors on baseless links. */
527 url_list = (html ? get_urls_html (file, NULL, opt.spider, FALSE)
528 : get_urls_file (file));
529 status = RETROK; /* Suppose everything is OK. */
530 *count = 0; /* Reset the URL count. */
532 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
534 char *filename, *new_file;
537 if (downloaded_exceeds_quota ())
542 status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
543 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
544 status = recursive_retrieve (filename, new_file ? new_file
547 if (filename && opt.delete_after && file_exists_p (filename))
549 DEBUGP (("Removing file due to --delete-after in"
550 " retrieve_from_file():\n"));
551 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
552 if (unlink (filename))
553 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
557 FREE_MAYBE (new_file);
558 FREE_MAYBE (filename);
561 /* Free the linked list of URL-s. */
562 free_urlpos (url_list);
567 /* Print `giving up', or `retrying', depending on the impending
568 action. N1 and N2 are the attempt number and the attempt limit. */
570 printwhat (int n1, int n2)
572 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
575 /* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
576 set opt.downloaded_overflow to 1. */
578 downloaded_increase (unsigned long by_how_much)
581 if (opt.downloaded_overflow)
583 old = opt.downloaded;
584 opt.downloaded += by_how_much;
585 if (opt.downloaded < old) /* carry flag, where are you when I
589 opt.downloaded_overflow = 1;
590 opt.downloaded = ~((VERY_LONG_TYPE)0);
594 /* Return non-zero if the downloaded amount of bytes exceeds the
595 desired quota. If quota is not set or if the amount overflowed, 0
598 downloaded_exceeds_quota (void)
602 if (opt.downloaded_overflow)
603 /* We don't really no. (Wildly) assume not. */
606 return opt.downloaded > opt.quota;