2 Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
4 This file is part of Wget.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 #include <sys/types.h>
27 #endif /* HAVE_UNISTD_H */
33 #endif /* HAVE_STRING_H */
47 LARGE_INTEGER internal_time;
49 /* Internal variables used by the timer. */
50 static long internal_secs, internal_msecs;
53 void logflush PARAMS ((void));
56 uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
58 /* Flags for show_progress(). */
59 enum spflags { SP_NONE, SP_INIT, SP_FINISH };
61 static int show_progress PARAMS ((long, long, enum spflags));
63 /* Reads the contents of file descriptor FD, until it is closed, or a
64 read error occurs. The data is read in 8K chunks, and stored to
65 stream fp, which should have been open for writing. If BUF is
66 non-NULL and its file descriptor is equal to FD, flush RBUF first.
67 This function will *not* use the rbuf_* functions!
69 The EXPECTED argument is passed to show_progress() unchanged, but
72 If opt.verbose is set, the progress is also shown. RESTVAL
73 represents a value from which to start downloading (which will be
74 shown accordingly). If RESTVAL is non-zero, the stream should have
75 been open for appending.
77 The function exits and returns codes of 0, -1 and -2 if the
78 connection was closed, there was a read error, or if it could not
79 write to the output stream, respectively.
81 IMPORTANT: The function flushes the contents of the buffer in
82 rbuf_flush() before actually reading from fd. If you wish to read
83 from fd immediately, flush or discard the buffer. */
85 get_contents (int fd, FILE *fp, long *len, long restval, long expected,
93 show_progress (restval, expected, SP_INIT);
94 if (rbuf && RBUF_FD (rbuf) == fd)
96 while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
98 if (fwrite (c, sizeof (char), res, fp) < res)
102 if (show_progress (res, expected, SP_NONE))
108 /* Read from fd while there is available data. */
111 res = iread (fd, c, sizeof (c));
114 if (fwrite (c, sizeof (char), res, fp) < res)
118 if (show_progress (res, expected, SP_NONE))
127 show_progress (0, expected, SP_FINISH);
132 print_percentage (long bytes, long expected)
134 int percentage = (int)(100.0 * bytes / expected);
135 logprintf (LOG_VERBOSE, " [%3d%%]", percentage);
138 /* Show the dotted progress report of file loading. Called with
139 length and a flag to tell it whether to reset or not. It keeps the
140 offset information in static local variables.
142 Return value: 1 or 0, designating whether any dots have been drawn.
144 If the init argument is set, the routine will initialize.
146 If the res is non-zero, res/line_bytes lines are skipped
147 (meaning the appropriate number ok kilobytes), and the number of
148 "dots" fitting on the first line are drawn as ','. */
150 show_progress (long res, long expected, enum spflags flags)
152 static long line_bytes;
154 static int ndot, nrow;
157 if (flags == SP_FINISH)
162 char *tmpstr = (char *)alloca (2 * opt.dots_in_line + 1);
164 for (; dot < opt.dots_in_line; dot++)
166 if (!(dot % opt.dot_spacing))
171 logputs (LOG_VERBOSE, tmpstr);
172 print_percentage (nrow * line_bytes + ndot * opt.dot_bytes + offs,
175 logputs (LOG_VERBOSE, "\n\n");
179 /* Temporarily disable flushing. */
181 /* init set means initialization. If res is set, it also means that
182 the retrieval is *not* done from the beginning. The part that
183 was already retrieved is not shown again. */
184 if (flags == SP_INIT)
186 /* Generic initialization of static variables. */
189 line_bytes = (long)opt.dots_in_line * opt.dot_bytes;
192 if (res >= line_bytes)
194 nrow = res / line_bytes;
196 logprintf (LOG_VERBOSE,
197 _("\n [ skipping %dK ]"),
198 (int) ((nrow * line_bytes) / 1024));
202 logprintf (LOG_VERBOSE, "\n%5ldK ->", nrow * line_bytes / 1024);
204 /* Offset gets incremented by current value. */
206 /* While offset is >= opt.dot_bytes, print dots, taking care to
207 precede every 50th dot with a status message. */
208 for (; offs >= opt.dot_bytes; offs -= opt.dot_bytes)
210 if (!(ndot % opt.dot_spacing))
211 logputs (LOG_VERBOSE, " ");
213 logputs (LOG_VERBOSE, flags == SP_INIT ? "," : ".");
215 if (ndot == opt.dots_in_line)
220 print_percentage (nrow * line_bytes, expected);
221 logprintf (LOG_VERBOSE, "\n%5ldK ->", nrow * line_bytes / 1024);
224 /* Reenable flushing. */
227 /* Force flush. #### Oh, what a kludge! */
232 /* Reset the internal timer. */
237 /* Under Unix, the preferred way to measure the passage of time is
238 through gettimeofday() because of its granularity. However, on
239 some old or weird systems, gettimeofday() might not be available.
240 There we use the simple time(). */
241 # ifdef HAVE_GETTIMEOFDAY
243 gettimeofday (&t, NULL);
244 internal_secs = t.tv_sec;
245 internal_msecs = t.tv_usec / 1000;
246 # else /* not HAVE_GETTIMEOFDAY */
247 internal_secs = time (NULL);
249 # endif /* not HAVE_GETTIMEOFDAY */
251 /* Under Windows, use Windows-specific APIs. */
255 SystemTimeToFileTime(&st,&ft);
256 internal_time.HighPart = ft.dwHighDateTime;
257 internal_time.LowPart = ft.dwLowDateTime;
261 /* Return the time elapsed from the last call to reset_timer(), in
267 # ifdef HAVE_GETTIMEOFDAY
269 gettimeofday (&t, NULL);
270 return ((t.tv_sec - internal_secs) * 1000
271 + (t.tv_usec / 1000 - internal_msecs));
272 # else /* not HAVE_GETTIMEOFDAY */
273 return 1000 * ((long)time (NULL) - internal_secs);
274 # endif /* not HAVE_GETTIMEOFDAY */
280 SystemTimeToFileTime(&st,&ft);
281 li.HighPart = ft.dwHighDateTime;
282 li.LowPart = ft.dwLowDateTime;
283 return (long) ((li.QuadPart - internal_time.QuadPart) / 1e4);
287 /* Print out the appropriate download rate. Appropriate means that if
288 rate is > 1024 bytes per second, kilobytes are used, and if rate >
289 1024 * 1024 bps, megabytes are used. */
291 rate (long bytes, long msecs)
298 dlrate = (double)1000 * bytes / msecs;
299 /* #### Should these strings be translatable? */
301 sprintf (res, "%.2f B/s", dlrate);
302 else if (dlrate < 1024.0 * 1024.0)
303 sprintf (res, "%.2f KB/s", dlrate / 1024.0);
305 sprintf (res, "%.2f MB/s", dlrate / (1024.0 * 1024.0));
309 #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \
310 && no_proxy_match((u)->host, \
311 (const char **)opt.no_proxy))
313 /* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
314 or simply copy it with file:// (#### the latter not yet
317 retrieve_url (const char *origurl, char **file, char **newloc,
318 const char *refurl, int *dt)
322 int location_changed, already_redirected, dummy;
324 char *mynewloc, *proxy;
328 /* If dt is NULL, just ignore it. */
331 url = xstrdup (origurl);
336 already_redirected = 0;
340 /* Parse the URL. RFC2068 requires `Location' to contain an
341 absoluteURI, but many sites break this requirement. #### We
342 should be liberal and accept a relative location, too. */
343 result = parseurl (url, u, already_redirected);
347 logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
351 /* Set the referer. */
353 u->referer = xstrdup (refurl);
357 u->referer = xstrdup (opt.referer);
362 local_use_proxy = USE_PROXY_P (u);
365 struct urlinfo *pu = newurl ();
367 /* Copy the original URL to new location. */
368 memcpy (pu, u, sizeof (*u));
369 pu->proxy = NULL; /* A minor correction :) */
370 /* Initialize u to nil. */
371 memset (u, 0, sizeof (*u));
373 /* Get the appropriate proxy server, appropriate for the
375 proxy = getproxy (pu->proto);
378 logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
382 /* Parse the proxy URL. */
383 result = parseurl (proxy, u, 0);
384 if (result != URLOK || u->proto != URLHTTP)
386 if (u->proto == URLHTTP)
387 logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
389 logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
396 assert (u->proto != URLFILE); /* #### Implement me! */
399 if (u->proto == URLHTTP)
400 result = http_loop (u, &mynewloc, dt);
401 else if (u->proto == URLFTP)
403 /* If this is a redirection, we must not allow recursive FTP
404 retrieval, so we save recursion to oldrec, and restore it
406 int oldrec = opt.recursive;
407 if (already_redirected)
409 result = ftp_loop (u, dt);
410 opt.recursive = oldrec;
411 /* There is a possibility of having HTTP being redirected to
412 FTP. In these cases we must decide whether the text is HTML
413 according to the suffix. The HTML suffixes are `.html' and
414 `.htm', case-insensitive.
416 #### All of this is, of course, crap. These types should be
417 determined through mailcap. */
418 if (already_redirected && u->local && (u->proto == URLFTP ))
420 char *suf = suffix (u->local);
421 if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
426 location_changed = (result == NEWLOCATION);
427 if (location_changed)
429 /* Check for redirection to oneself. */
430 if (url_equal (url, mynewloc))
432 logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
438 /* The HTTP specs only allow absolute URLs to appear in redirects, but
439 a ton of boneheaded webservers and CGIs out there break the rules
440 and use relative URLs, and popular browsers are lenient about this,
441 so wget should be too. */
442 if (strstr(mynewloc, "://") == NULL)
443 /* Doesn't look like an absolute URL (this check will incorrectly
444 think that rare relative URLs containing "://" later in the
445 string are absolute). */
447 char *temp = malloc(strlen(url) + strlen(mynewloc) + 1);
449 if (mynewloc[0] == '/')
450 /* "Hostless absolute" URL. Convert to absolute. */
451 sprintf(temp,"%s%s", url, mynewloc);
453 /* Relative URL. Convert to absolute. */
454 sprintf(temp,"%s/%s", url, mynewloc);
464 already_redirected = 1;
470 *file = xstrdup (u->local);
484 /* Find the URLs in the file and call retrieve_url() for each of
485 them. If HTML is non-zero, treat the file as HTML, and construct
486 the URLs accordingly.
488 If opt.recursive is set, call recursive_retrieve() for each file. */
490 retrieve_from_file (const char *file, int html, int *count)
493 urlpos *url_list, *cur_url;
495 /* If spider-mode is on, we do not want get_urls_html barfing
496 errors on baseless links. */
497 url_list = (html ? get_urls_html (file, NULL, opt.spider, FALSE)
498 : get_urls_file (file));
499 status = RETROK; /* Suppose everything is OK. */
500 *count = 0; /* Reset the URL count. */
502 for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
504 char *filename, *new_file;
507 if (opt.quota && opt.downloaded > opt.quota)
512 status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
513 if (opt.recursive && status == RETROK && (dt & TEXTHTML))
514 status = recursive_retrieve (filename, new_file ? new_file
517 if (filename && opt.delete_after && file_exists_p (filename))
519 DEBUGP (("Removing file due to --delete-after in"
520 " retrieve_from_file():\n"));
521 logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
522 if (unlink (filename))
523 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
527 FREE_MAYBE (new_file);
528 FREE_MAYBE (filename);
531 /* Free the linked list of URL-s. */
532 free_urlpos (url_list);
537 /* Print `giving up', or `retrying', depending on the impending
538 action. N1 and N2 are the attempt number and the attempt limit. */
540 printwhat (int n1, int n2)
542 logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));