/* File retrieval.
- Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
This file is part of Wget.
#include "ftp.h"
#include "host.h"
#include "connect.h"
+#include "hash.h"
+
+#ifndef errno
+extern int errno;
+#endif
#ifdef WINDOWS
LARGE_INTEGER internal_time;
static int show_progress PARAMS ((long, long, enum spflags));
+#define MIN(i, j) ((i) <= (j) ? (i) : (j))
+
/* Reads the contents of file descriptor FD, until it is closed, or a
read error occurs. The data is read in 8K chunks, and stored to
stream fp, which should have been open for writing. If BUF is
from fd immediately, flush or discard the buffer. */
int
get_contents (int fd, FILE *fp, long *len, long restval, long expected,
- struct rbuf *rbuf)
+ struct rbuf *rbuf, int use_expected)
{
- int res;
+ int res = 0;
static char c[8192];
*len = restval;
*len += res;
}
}
- /* Read from fd while there is available data. */
- do
+ /* Read from fd while there is available data.
+
+ Normally, if expected is 0, it means that it is not known how
+ much data is expected. However, if use_expected is specified,
+ then expected being zero means exactly that. */
+ while (!use_expected || (*len < expected))
{
- res = iread (fd, c, sizeof (c));
+ int amount_to_read = (use_expected
+ ? MIN (expected - *len, sizeof (c))
+ : sizeof (c));
+#ifdef HAVE_SSL
+ if (rbuf->ssl!=NULL) {
+ res = ssl_iread (rbuf->ssl, c, amount_to_read);
+ } else {
+#endif /* HAVE_SSL */
+ res = iread (fd, c, amount_to_read);
+#ifdef HAVE_SSL
+ }
+#endif /* HAVE_SSL */
if (res > 0)
{
if (fwrite (c, sizeof (char), res, fp) < res)
}
*len += res;
}
- } while (res > 0);
+ else
+ break;
+ }
if (res < -1)
res = -1;
if (opt.verbose)
&& no_proxy_match((u)->host, \
(const char **)opt.no_proxy))
-/* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
+/* Retrieve the given URL. Decides which loop to call -- HTTP(S), FTP,
or simply copy it with file:// (#### the latter not yet
implemented!). */
uerr_t
{
uerr_t result;
char *url;
- int location_changed, already_redirected, dummy;
+ int location_changed, dummy;
int local_use_proxy;
char *mynewloc, *proxy;
struct urlinfo *u;
-
+ struct hash_table *redirections = NULL;
/* If dt is NULL, just ignore it. */
if (!dt)
*newloc = NULL;
if (file)
*file = NULL;
- already_redirected = 0;
- again:
u = newurl ();
/* Parse the URL. */
- result = parseurl (url, u, already_redirected);
+ result = parseurl (url, u, 0);
if (result != URLOK)
{
- freeurl (u, 1);
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
+ freeurl (u, 1);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
return result;
}
+ redirected:
+
/* Set the referer. */
if (refurl)
u->referer = xstrdup (refurl);
{
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
freeurl (u, 1);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
return PROXERR;
}
/* Parse the proxy URL. */
else
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
freeurl (u, 1);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
return PROXERR;
}
u->proto = URLHTTP;
assert (u->proto != URLFILE); /* #### Implement me! */
mynewloc = NULL;
+#ifdef HAVE_SSL
+ if (u->proto == URLHTTP || u->proto == URLHTTPS )
+#else
if (u->proto == URLHTTP)
+#endif /* HAVE_SSL */
result = http_loop (u, &mynewloc, dt);
else if (u->proto == URLFTP)
{
retrieval, so we save recursion to oldrec, and restore it
later. */
int oldrec = opt.recursive;
- if (already_redirected)
+ if (redirections)
opt.recursive = 0;
result = ftp_loop (u, dt);
opt.recursive = oldrec;
#### All of this is, of course, crap. These types should be
determined through mailcap. */
- if (already_redirected && u->local && (u->proto == URLFTP ))
+ if (redirections && u->local && (u->proto == URLFTP ))
{
char *suf = suffix (u->local);
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
location_changed = (result == NEWLOCATION);
if (location_changed)
{
- if (mynewloc)
+ char *construced_newloc;
+ uerr_t newloc_result;
+ struct urlinfo *newloc_struct;
+
+ assert (mynewloc != NULL);
+
+ /* The HTTP specs only allow absolute URLs to appear in
+ redirects, but a ton of boneheaded webservers and CGIs out
+ there break the rules and use relative URLs, and popular
+ browsers are lenient about this, so wget should be too. */
+ construced_newloc = url_concat (url, mynewloc);
+ xfree (mynewloc);
+ mynewloc = construced_newloc;
+
+ /* Now, see if this new location makes sense. */
+ newloc_struct = newurl ();
+ newloc_result = parseurl (mynewloc, newloc_struct, 1);
+ if (newloc_result != URLOK)
{
- /* The HTTP specs only allow absolute URLs to appear in
- redirects, but a ton of boneheaded webservers and CGIs
- out there break the rules and use relative URLs, and
- popular browsers are lenient about this, so wget should
- be too. */
- char *construced_newloc = url_concat (url, mynewloc);
- free (mynewloc);
- mynewloc = construced_newloc;
+ logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
+ freeurl (newloc_struct, 1);
+ freeurl (u, 1);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
+ xfree (mynewloc);
+ return result;
+ }
+
+ /* Now mynewloc will become newloc_struct->url, because if the
+ Location contained relative paths like .././something, we
+ don't want that propagating as url. */
+ xfree (mynewloc);
+ mynewloc = xstrdup (newloc_struct->url);
+
+ if (!redirections)
+ {
+ redirections = make_string_hash_table (0);
+ /* Add current URL immediately so we can detect it as soon
+ as possible in case of a cycle. */
+ string_set_add (redirections, u->url);
}
- /* Check for redirection to back to itself. */
- if (url_equal (url, mynewloc))
+
+ /* The new location is OK. Let's check for redirection cycle by
+ peeking through the history of redirections. */
+ if (string_set_exists (redirections, newloc_struct->url))
{
- logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
+ logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
mynewloc);
+ freeurl (newloc_struct, 1);
+ freeurl (u, 1);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
+ xfree (mynewloc);
return WRONGCODE;
}
- free (url);
+ string_set_add (redirections, newloc_struct->url);
+
+ xfree (url);
url = mynewloc;
freeurl (u, 1);
- already_redirected = 1;
- goto again;
+ u = newloc_struct;
+ goto redirected;
}
+
if (file)
{
if (u->local)
*file = NULL;
}
freeurl (u, 1);
+ if (redirections)
+ string_set_free (redirections);
if (newloc)
*newloc = url;
else
- free (url);
+ xfree (url);
return result;
}
uerr_t status;
urlpos *url_list, *cur_url;
- /* If spider-mode is on, we do not want get_urls_html barfing
- errors on baseless links. */
- url_list = (html ? get_urls_html (file, NULL, opt.spider, FALSE)
+ url_list = (html ? get_urls_html (file, NULL, FALSE, NULL)
: get_urls_file (file));
status = RETROK; /* Suppose everything is OK. */
*count = 0; /* Reset the URL count. */
char *filename, *new_file;
int dt;
- if (opt.quota && opt.downloaded > opt.quota)
+ if (downloaded_exceeds_quota ())
{
status = QUOTEXC;
break;
{
logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
}
+
+/* Increment opt.downloaded by BY_HOW_MUCH. If an overflow occurs,
+ set opt.downloaded_overflow to 1. */
+void
+downloaded_increase (unsigned long by_how_much)
+{
+ VERY_LONG_TYPE old;
+ if (opt.downloaded_overflow)
+ return;
+ old = opt.downloaded;
+ opt.downloaded += by_how_much;
+ if (opt.downloaded < old) /* carry flag, where are you when I
+ need you? */
+ {
+ /* Overflow. */
+ opt.downloaded_overflow = 1;
+ opt.downloaded = ~((VERY_LONG_TYPE)0);
+ }
+}
+
+/* Return non-zero if the downloaded amount of bytes exceeds the
+ desired quota. If quota is not set or if the amount overflowed, 0
+ is returned. */
+int
+downloaded_exceeds_quota (void)
+{
+ if (!opt.quota)
+ return 0;
+ if (opt.downloaded_overflow)
+ /* We don't really know. (Wildly) assume not. */
+ return 0;
+
+ return opt.downloaded > opt.quota;
+}
+
+/* If opt.wait or opt.waitretry are specified, and if certain
+ conditions are met, sleep the appropriate number of seconds. See
+ the documentation of --wait and --waitretry for more information.
+
+ COUNT is the count of current retrieval, beginning with 1. */
+
+void
+sleep_between_retrievals (int count)
+{
+ static int first_retrieval = 1;
+
+ if (!first_retrieval && (opt.wait || opt.waitretry))
+ {
+ if (opt.waitretry && count > 1)
+ {
+ /* If opt.waitretry is specified and this is a retry, wait
+ for COUNT-1 number of seconds, or for opt.waitretry
+ seconds. */
+ if (count <= opt.waitretry)
+ sleep (count - 1);
+ else
+ sleep (opt.waitretry);
+ }
+ else if (opt.wait)
+ /* Otherwise, check if opt.wait is specified. If so, sleep. */
+ sleep (opt.wait);
+ }
+ if (first_retrieval)
+ first_retrieval = 0;
+}