2 Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
47 LARGE_INTEGER internal_time;
49 /* Internal variables used by the timer. */
50 static long internal_secs, internal_msecs;
53 void logflush PARAMS ((void));
56 uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
58 /* Flags for show_progress(). */
59 enum spflags { SP_NONE, SP_INIT, SP_FINISH };
61 static int show_progress PARAMS ((long, long, enum spflags));
63 /* Reads the contents of file descriptor FD, until it is closed, or a
64 read error occurs. The data is read in 8K chunks, and stored to
65 stream fp, which should have been open for writing. If BUF is
66 non-NULL and its file descriptor is equal to FD, flush RBUF first.
67 This function will *not* use the rbuf_* functions!
69 The EXPECTED argument is passed to show_progress() unchanged, but
72 If opt.verbose is set, the progress is also shown. RESTVAL
73 represents a value from which to start downloading (which will be
74 shown accordingly). If RESTVAL is non-zero, the stream should have
75 been open for appending.
77 The function exits and returns codes of 0, -1 and -2 if the
78 connection was closed, there was a read error, or if it could not
79 write to the output stream, respectively.
81 IMPORTANT: The function flushes the contents of the buffer in
82 rbuf_flush() before actually reading from fd. If you wish to read
83 from fd immediately, flush or discard the buffer. */
85 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
93 show_progress (restval, expected, SP_INIT);
94 if (rbuf && RBUF_FD (rbuf) == fd)
96 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
98 if (fwrite (c, sizeof (char), res, fp) < res)
102 if (show_progress (res, expected, SP_NONE))
108 /* Read from fd while there is available data. */
111 res = iread (fd, c, sizeof (c));
114 if (fwrite (c, sizeof (char), res, fp) < res)
118 if (show_progress (res, expected, SP_NONE))
127 show_progress (0, expected, SP_FINISH);
132 print_percentage (long bytes, long expected)
134 int percentage = (int)(100.0 * bytes / expected);
135 logprintf (LOG_VERBOSE, " [%3d%%]", percentage);
138 /* Show the dotted progress report of file loading. Called with
139 length and a flag to tell it whether to reset or not. It keeps the
140 offset information in static local variables.
142 Return value: 1 or 0, designating whether any dots have been drawn.
144 If the init argument is set, the routine will initialize.
146 If the res is non-zero, res/line_bytes lines are skipped
147 (meaning the appropriate number ok kilobytes), and the number of
148 "dots" fitting on the first line are drawn as ','. */
150 show_progress (long res, long expected, enum spflags flags)
152 static long line_bytes;
154 static int ndot, nrow;
157 if (flags == SP_FINISH)
162 char *tmpstr = (char *)alloca (2 * opt.dots_in_line + 1);
164 for (; dot < opt.dots_in_line; dot++)
166 if (!(dot % opt.dot_spacing))
171 logputs (LOG_VERBOSE, tmpstr);
172 print_percentage (nrow * line_bytes + ndot * opt.dot_bytes + offs,
175 logputs (LOG_VERBOSE, "\n\n");
179 /* Temporarily disable flushing. */
181 /* init set means initialization. If res is set, it also means that
182 the retrieval is *not* done from the beginning. The part that
183 was already retrieved is not shown again. */
184 if (flags == SP_INIT)
186 /* Generic initialization of static variables. */
189 line_bytes = (long)opt.dots_in_line * opt.dot_bytes;
192 if (res >= line_bytes)
194 nrow = res / line_bytes;
196 logprintf (LOG_VERBOSE,
197 _("\n [ skipping %dK ]"),
198 (int) ((nrow * line_bytes) / 1024));
202 logprintf (LOG_VERBOSE, "\n%5ldK ->", nrow * line_bytes / 1024);
204 /* Offset gets incremented by current value. */
206 /* While offset is >= opt.dot_bytes, print dots, taking care to
207 precede every 50th dot with a status message. */
208 for (; offs >= opt.dot_bytes; offs -= opt.dot_bytes)
210 if (!(ndot % opt.dot_spacing))
211 logputs (LOG_VERBOSE, " ");
213 logputs (LOG_VERBOSE, flags == SP_INIT ? "," : ".");
215 if (ndot == opt.dots_in_line)
220 print_percentage (nrow * line_bytes, expected);
221 logprintf (LOG_VERBOSE, "\n%5ldK ->", nrow * line_bytes / 1024);
224 /* Reenable flushing. */
227 /* Force flush. #### Oh, what a kludge! */
232 /* Reset the internal timer. */
237 /* Under Unix, the preferred way to measure the passage of time is
238 through gettimeofday() because of its granularity. However, on
239 some old or weird systems, gettimeofday() might not be available.
240 There we use the simple time(). */
241 # ifdef HAVE_GETTIMEOFDAY
243 gettimeofday (&t, NULL);
244 internal_secs = t.tv_sec;
245 internal_msecs = t.tv_usec / 1000;
246 # else /* not HAVE_GETTIMEOFDAY */
247 internal_secs = time (NULL);
249 # endif /* not HAVE_GETTIMEOFDAY */
251 /* Under Windows, use Windows-specific APIs. */
255 SystemTimeToFileTime(&st,&ft);
256 internal_time.HighPart = ft.dwHighDateTime;
257 internal_time.LowPart = ft.dwLowDateTime;
261 /* Return the time elapsed from the last call to reset_timer(), in
267 # ifdef HAVE_GETTIMEOFDAY
269 gettimeofday (&t, NULL);
270 return ((t.tv_sec - internal_secs) * 1000
271 + (t.tv_usec / 1000 - internal_msecs));
272 # else /* not HAVE_GETTIMEOFDAY */
273 return 1000 * ((long)time (NULL) - internal_secs);
274 # endif /* not HAVE_GETTIMEOFDAY */
280 SystemTimeToFileTime(&st,&ft);
281 li.HighPart = ft.dwHighDateTime;
282 li.LowPart = ft.dwLowDateTime;
283 return (long) ((li.QuadPart - internal_time.QuadPart) / 1e4);
287 /* Print out the appropriate download rate. Appropriate means that if
288 rate is > 1024 bytes per second, kilobytes are used, and if rate >
289 1024 * 1024 bps, megabytes are used. */
291 rate (long bytes, long msecs)
298 dlrate = (double)1000 * bytes / msecs;
299 /* #### Should these strings be translatable? */
301 sprintf (res, "%.2f B/s", dlrate);
302 else if (dlrate < 1024.0 * 1024.0)
303 sprintf (res, "%.2f KB/s", dlrate / 1024.0);
305 sprintf (res, "%.2f MB/s", dlrate / (1024.0 * 1024.0));
309 #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \
310 && no_proxy_match((u)->host, \
311 (const char **)opt.no_proxy))
313 /* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
314 or simply copy it with file:// (#### the latter not yet
317 retrieve_url (const char *origurl, char **file, char **newloc,
318 const char *refurl, int *dt)
322 int location_changed, already_redirected, dummy;
324 char *mynewloc, *proxy;
328 /* If dt is NULL, just ignore it. */
331 url = xstrdup (origurl);
336 already_redirected = 0;
341 result = parseurl (url, u, already_redirected);
345 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
349 /* Set the referer. */
351 u->referer = xstrdup (refurl);
355 u->referer = xstrdup (opt.referer);
360 local_use_proxy = USE_PROXY_P (u);
363 struct urlinfo *pu = newurl ();
365 /* Copy the original URL to new location. */
366 memcpy (pu, u, sizeof (*u));
367 pu->proxy = NULL; /* A minor correction :) */
368 /* Initialize u to nil. */
369 memset (u, 0, sizeof (*u));
371 /* Get the appropriate proxy server, appropriate for the
373 proxy = getproxy (pu->proto);
376 logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
380 /* Parse the proxy URL. */
381 result = parseurl (proxy, u, 0);
382 if (result != URLOK || u->proto != URLHTTP)
384 if (u->proto == URLHTTP)
385 logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
387 logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
394 assert (u->proto != URLFILE); /* #### Implement me! */
397 if (u->proto == URLHTTP)
398 result = http_loop (u, &mynewloc, dt);
399 else if (u->proto == URLFTP)
401 /* If this is a redirection, we must not allow recursive FTP
402 retrieval, so we save recursion to oldrec, and restore it
404 int oldrec = opt.recursive;
405 if (already_redirected)
407 result = ftp_loop (u, dt);
408 opt.recursive = oldrec;
409 /* There is a possibility of having HTTP being redirected to
410 FTP. In these cases we must decide whether the text is HTML
411 according to the suffix. The HTML suffixes are `.html' and
412 `.htm', case-insensitive.
414 #### All of this is, of course, crap. These types should be
415 determined through mailcap. */
416 if (already_redirected && u->local && (u->proto == URLFTP ))
418 char *suf = suffix (u->local);
419 if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
424 location_changed = (result == NEWLOCATION);
425 if (location_changed)
429 /* The HTTP specs only allow absolute URLs to appear in
430 redirects, but a ton of boneheaded webservers and CGIs
431 out there break the rules and use relative URLs, and
432 popular browsers are lenient about this, so wget should
434 char *construced_newloc = url_concat (url, mynewloc);
436 mynewloc = construced_newloc;
438 /* Check for redirection to back to itself. */
439 if (url_equal (url, mynewloc))
441 logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
448 already_redirected = 1;
454 *file = xstrdup (u->local);
468 /* Find the URLs in the file and call retrieve_url() for each of
469 them. If HTML is non-zero, treat the file as HTML, and construct
470 the URLs accordingly.
472 If opt.recursive is set, call recursive_retrieve() for each file. */
474 retrieve_from_file (const char *file, int html, int *count)
477 urlpos *url_list, *cur_url;
479 /* If spider-mode is on, we do not want get_urls_html barfing
480 errors on baseless links. */
481 url_list = (html ? get_urls_html (file, NULL, opt.spider, FALSE)
482 : get_urls_file (file));
483 status = RETROK; /* Suppose everything is OK. */
484 *count = 0; /* Reset the URL count. */
486 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
488 char *filename, *new_file;
491 if (opt.quota && opt.downloaded > opt.quota)
496 status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
497 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
498 status = recursive_retrieve (filename, new_file ? new_file
501 if (filename && opt.delete_after && file_exists_p (filename))
503 DEBUGP (("Removing file due to --delete-after in"
504 " retrieve_from_file():\n"));
505 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
506 if (unlink (filename))
507 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
511 FREE_MAYBE (new_file);
512 FREE_MAYBE (filename);
515 /* Free the linked list of URL-s. */
516 free_urlpos (url_list);
521 /* Print `giving up', or `retrying', depending on the impending
522 action. N1 and N2 are the attempt number and the attempt limit. */
524 printwhat (int n1, int n2)
526 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));