/* File retrieval.
- Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
This file is part of Wget.
#include "ftp.h"
#include "host.h"
#include "connect.h"
+#include "hash.h"
+
+#ifndef errno
+extern int errno;
+#endif
#ifdef WINDOWS
LARGE_INTEGER internal_time;
static int show_progress PARAMS ((long, long, enum spflags));
+#define MIN(i, j) ((i) <= (j) ? (i) : (j))
+
/* Reads the contents of file descriptor FD, until it is closed, or a
read error occurs. The data is read in 8K chunks, and stored to
stream fp, which should have been open for writing. If BUF is
from fd immediately, flush or discard the buffer. */
int
get_contents (int fd, FILE *fp, long *len, long restval, long expected,
- struct rbuf *rbuf)
+ struct rbuf *rbuf, int use_expected)
{
- int res;
+ int res = 0;
static char c[8192];
*len = restval;
*len += res;
}
}
- /* Read from fd while there is available data. */
- do
+ /* Read from fd while there is available data.
+
+ Normally, if expected is 0, it means that it is not known how
+ much data is expected. However, if use_expected is specified,
+ then expected being zero means exactly that. */
+ while (!use_expected || (*len < expected))
{
- res = iread (fd, c, sizeof (c));
+ int amount_to_read = (use_expected
+ ? MIN (expected - *len, sizeof (c))
+ : sizeof (c));
+#ifdef HAVE_SSL
+ if (rbuf->ssl!=NULL) {
+ res = ssl_iread (rbuf->ssl, c, amount_to_read);
+ } else {
+#endif /* HAVE_SSL */
+ res = iread (fd, c, amount_to_read);
+#ifdef HAVE_SSL
+ }
+#endif /* HAVE_SSL */
if (res > 0)
{
if (fwrite (c, sizeof (char), res, fp) < res)
}
*len += res;
}
- } while (res > 0);
+ else
+ break;
+ }
if (res < -1)
res = -1;
if (opt.verbose)
&& no_proxy_match((u)->host, \
(const char **)opt.no_proxy))
-/* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
+/* Retrieve the given URL. Decides which loop to call -- HTTP(S), FTP,
or simply copy it with file:// (#### the latter not yet
implemented!). */
uerr_t
int local_use_proxy;
char *mynewloc, *proxy;
struct urlinfo *u;
- slist *redirections;
+ struct hash_table *redirections = NULL;
/* If dt is NULL, just ignore it. */
if (!dt)
if (file)
*file = NULL;
- redirections = NULL;
-
u = newurl ();
/* Parse the URL. */
result = parseurl (url, u, 0);
{
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
freeurl (u, 1);
- free_slist (redirections);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
return result;
}
{
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
freeurl (u, 1);
- free_slist (redirections);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
return PROXERR;
}
/* Parse the proxy URL. */
else
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
freeurl (u, 1);
- free_slist (redirections);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
return PROXERR;
}
u->proto = URLHTTP;
assert (u->proto != URLFILE); /* #### Implement me! */
mynewloc = NULL;
+#ifdef HAVE_SSL
+ if (u->proto == URLHTTP || u->proto == URLHTTPS )
+#else
if (u->proto == URLHTTP)
+#endif /* HAVE_SSL */
result = http_loop (u, &mynewloc, dt);
else if (u->proto == URLFTP)
{
there break the rules and use relative URLs, and popular
browsers are lenient about this, so wget should be too. */
construced_newloc = url_concat (url, mynewloc);
- free (mynewloc);
+ xfree (mynewloc);
mynewloc = construced_newloc;
/* Now, see if this new location makes sense. */
logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
freeurl (newloc_struct, 1);
freeurl (u, 1);
- free_slist (redirections);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
+ xfree (mynewloc);
return result;
}
/* Now mynewloc will become newloc_struct->url, because if the
Location contained relative paths like .././something, we
don't want that propagating as url. */
- free (mynewloc);
+ xfree (mynewloc);
mynewloc = xstrdup (newloc_struct->url);
- /* Check for redirection to back to itself. */
- if (!strcmp (u->url, newloc_struct->url))
+ if (!redirections)
{
- logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
- mynewloc);
- freeurl (newloc_struct, 1);
- freeurl (u, 1);
- free_slist (redirections);
- return WRONGCODE;
+ redirections = make_string_hash_table (0);
+ /* Add current URL immediately so we can detect it as soon
+ as possible in case of a cycle. */
+ string_set_add (redirections, u->url);
}
/* The new location is OK. Let's check for redirection cycle by
peeking through the history of redirections. */
- if (in_slist (redirections, newloc_struct->url))
+ if (string_set_exists (redirections, newloc_struct->url))
{
logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
mynewloc);
freeurl (newloc_struct, 1);
freeurl (u, 1);
- free_slist (redirections);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
+ xfree (mynewloc);
return WRONGCODE;
}
+ string_set_add (redirections, newloc_struct->url);
- redirections = add_slist (redirections, newloc_struct->url, NOSORT);
-
- free (url);
+ xfree (url);
url = mynewloc;
freeurl (u, 1);
u = newloc_struct;
*file = NULL;
}
freeurl (u, 1);
- free_slist (redirections);
+ if (redirections)
+ string_set_free (redirections);
if (newloc)
*newloc = url;
else
- free (url);
+ xfree (url);
return result;
}
uerr_t status;
urlpos *url_list, *cur_url;
- /* If spider-mode is on, we do not want get_urls_html barfing
- errors on baseless links. */
- url_list = (html ? get_urls_html (file, NULL, opt.spider, FALSE)
+ url_list = (html ? get_urls_html (file, NULL, FALSE, NULL)
: get_urls_file (file));
status = RETROK; /* Suppose everything is OK. */
*count = 0; /* Reset the URL count. */
if (!opt.quota)
return 0;
if (opt.downloaded_overflow)
- /* We don't really no. (Wildly) assume not. */
+ /* We don't really know. (Wildly) assume not. */
return 0;
return opt.downloaded > opt.quota;
}
+
+/* If opt.wait or opt.waitretry are specified, and if certain
+ conditions are met, sleep the appropriate number of seconds. See
+ the documentation of --wait and --waitretry for more information.
+
+ COUNT is the count of current retrieval, beginning with 1. */
+
+void
+sleep_between_retrievals (int count)
+{
+ static int first_retrieval = 1;
+
+ if (!first_retrieval && (opt.wait || opt.waitretry))
+ {
+ if (opt.waitretry && count > 1)
+ {
+ /* If opt.waitretry is specified and this is a retry, wait
+ for COUNT-1 number of seconds, or for opt.waitretry
+ seconds. */
+ if (count <= opt.waitretry)
+ sleep (count - 1);
+ else
+ sleep (opt.waitretry);
+ }
+ else if (opt.wait)
+ /* Otherwise, check if opt.wait is specified. If so, sleep. */
+ sleep (opt.wait);
+ }
+ if (first_retrieval)
+ first_retrieval = 0;
+}