/* File retrieval.
- Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
This file is part of Wget.
#include "ftp.h"
#include "host.h"
#include "connect.h"
+#include "hash.h"
#ifdef WINDOWS
LARGE_INTEGER internal_time;
static int show_progress PARAMS ((long, long, enum spflags));
+#define MIN(i, j) ((i) <= (j) ? (i) : (j))
+
/* Reads the contents of file descriptor FD, until it is closed, or a
read error occurs. The data is read in 8K chunks, and stored to
stream fp, which should have been open for writing. If BUF is
from fd immediately, flush or discard the buffer. */
int
get_contents (int fd, FILE *fp, long *len, long restval, long expected,
- struct rbuf *rbuf)
+ struct rbuf *rbuf, int use_expected)
{
- int res;
+ int res = 0;
static char c[8192];
*len = restval;
*len += res;
}
}
- /* Read from fd while there is available data. */
- do
+ /* Read from fd while there is available data.
+
+ Normally, if expected is 0, it means that it is not known how
+ much data is expected. However, if use_expected is specified,
+ then expected being zero means exactly that. */
+ while (!use_expected || (*len < expected))
{
- res = iread (fd, c, sizeof (c));
+ int amount_to_read = (use_expected
+ ? MIN (expected - *len, sizeof (c))
+ : sizeof (c));
+ res = iread (fd, c, amount_to_read);
if (res > 0)
{
if (fwrite (c, sizeof (char), res, fp) < res)
}
*len += res;
}
- } while (res > 0);
+ else
+ break;
+ }
if (res < -1)
res = -1;
if (opt.verbose)
int local_use_proxy;
char *mynewloc, *proxy;
struct urlinfo *u;
- slist *redirections;
+ struct hash_table *redirections = NULL;
/* If dt is NULL, just ignore it. */
if (!dt)
if (file)
*file = NULL;
- redirections = NULL;
-
u = newurl ();
/* Parse the URL. */
result = parseurl (url, u, 0);
{
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
freeurl (u, 1);
- free_slist (redirections);
- free (url);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
return result;
}
{
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
freeurl (u, 1);
- free_slist (redirections);
- free (url);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
return PROXERR;
}
/* Parse the proxy URL. */
else
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
freeurl (u, 1);
- free_slist (redirections);
- free (url);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
return PROXERR;
}
u->proto = URLHTTP;
there break the rules and use relative URLs, and popular
browsers are lenient about this, so wget should be too. */
construced_newloc = url_concat (url, mynewloc);
- free (mynewloc);
+ xfree (mynewloc);
mynewloc = construced_newloc;
/* Now, see if this new location makes sense. */
logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
freeurl (newloc_struct, 1);
freeurl (u, 1);
- free_slist (redirections);
- free (url);
- free (mynewloc);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
+ xfree (mynewloc);
return result;
}
/* Now mynewloc will become newloc_struct->url, because if the
Location contained relative paths like .././something, we
don't want that propagating as url. */
- free (mynewloc);
+ xfree (mynewloc);
mynewloc = xstrdup (newloc_struct->url);
- /* Check for redirection to back to itself. */
- if (!strcmp (u->url, newloc_struct->url))
+ if (!redirections)
{
- logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
- mynewloc);
- freeurl (newloc_struct, 1);
- freeurl (u, 1);
- free_slist (redirections);
- free (url);
- free (mynewloc);
- return WRONGCODE;
+ redirections = make_string_hash_table (0);
+ /* Add current URL immediately so we can detect it as soon
+ as possible in case of a cycle. */
+ string_set_add (redirections, u->url);
}
/* The new location is OK. Let's check for redirection cycle by
peeking through the history of redirections. */
- if (in_slist (redirections, newloc_struct->url))
+ if (string_set_exists (redirections, newloc_struct->url))
{
logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
mynewloc);
freeurl (newloc_struct, 1);
freeurl (u, 1);
- free_slist (redirections);
- free (url);
- free (mynewloc);
+ if (redirections)
+ string_set_free (redirections);
+ xfree (url);
+ xfree (mynewloc);
return WRONGCODE;
}
+ string_set_add (redirections, newloc_struct->url);
- redirections = add_slist (redirections, newloc_struct->url, NOSORT);
-
- free (url);
+ xfree (url);
url = mynewloc;
freeurl (u, 1);
u = newloc_struct;
*file = NULL;
}
freeurl (u, 1);
- free_slist (redirections);
+ if (redirections)
+ string_set_free (redirections);
if (newloc)
*newloc = url;
else
- free (url);
+ xfree (url);
return result;
}
uerr_t status;
urlpos *url_list, *cur_url;
- /* If spider-mode is on, we do not want get_urls_html barfing
- errors on baseless links. */
- url_list = (html ? get_urls_html (file, NULL, opt.spider, FALSE)
+ url_list = (html ? get_urls_html (file, NULL, FALSE, NULL)
: get_urls_file (file));
status = RETROK; /* Suppose everything is OK. */
*count = 0; /* Reset the URL count. */