/* File retrieval.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
This file is part of GNU Wget.
shall include the source code for the parts of OpenSSL used as well
as that of the covered work. */
-#include <config.h>
+#include "wget.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
-#include "wget.h"
#include "utils.h"
#include "retr.h"
#include "progress.h"
#include "hash.h"
#include "convert.h"
#include "ptimer.h"
+#include "html-url.h"
/* Total size of downloaded files. Used to enforce quota. */
SUM_SIZE_INT total_downloaded_bytes;
/* If we're skipping STARTPOS bytes, pass 0 as the INITIAL
argument to progress_create because the indicator doesn't
(yet) know about "skipping" data. */
- progress = progress_create (skip ? 0 : startpos, startpos + toread);
+ wgint start = skip ? 0 : startpos;
+ progress = progress_create (start, start + toread);
progress_interactive = progress_interactive_p (progress);
}
char *hunk = xmalloc (bufsize);
int tail = 0; /* tail position in HUNK */
- assert (maxsize >= bufsize);
+ assert (!maxsize || maxsize >= bufsize);
while (1)
{
multiple points. */
uerr_t
-retrieve_url (const char *origurl, char **file, char **newloc,
- const char *refurl, int *dt, bool recursive)
+retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
+ char **newloc, const char *refurl, int *dt, bool recursive)
{
uerr_t result;
char *url;
bool location_changed;
int dummy;
char *mynewloc, *proxy;
- struct url *u, *proxy_url;
+ struct url *u = orig_parsed, *proxy_url;
int up_error_code; /* url parse error code */
char *local_file;
int redirection_count = 0;
if (file)
*file = NULL;
- u = url_parse (url, &up_error_code);
- if (!u)
- {
- logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
- xfree (url);
- return URLERROR;
- }
-
if (!refurl)
refurl = opt.referer;
proxy_url = url_parse (proxy, &up_error_code);
if (!proxy_url)
{
+ char *error = url_error (proxy, up_error_code);
logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
- proxy, url_error (up_error_code));
+ proxy, error);
xfree (url);
+ xfree (error);
RESTORE_POST_DATA;
return PROXERR;
}
newloc_parsed = url_parse (mynewloc, &up_error_code);
if (!newloc_parsed)
{
+ char *error = url_error (mynewloc, up_error_code);
logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
- url_error (up_error_code));
- url_free (u);
+ error);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
xfree (url);
xfree (mynewloc);
+ xfree (error);
RESTORE_POST_DATA;
return result;
}
logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
opt.max_redirect);
url_free (newloc_parsed);
- url_free (u);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
xfree (url);
xfree (mynewloc);
RESTORE_POST_DATA;
xfree (url);
url = mynewloc;
- url_free (u);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
u = newloc_parsed;
/* If we're being redirected from POST, we don't want to POST
register_redirection (origurl, u->url);
if (*dt & TEXTHTML)
register_html (u->url, local_file);
+ if (*dt & TEXTCSS)
+ register_css (u->url, local_file);
}
}
else
xfree_null (local_file);
- url_free (u);
+ if (orig_parsed != u)
+ {
+ url_free (u);
+ }
if (redirection_count)
{
uerr_t status;
struct urlpos *url_list, *cur_url;
- url_list = (html ? get_urls_html (file, NULL, NULL)
- : get_urls_file (file));
+ char *input_file = NULL;
+ const char *url = file;
+
status = RETROK; /* Suppose everything is OK. */
*count = 0; /* Reset the URL count. */
+
+ if (url_has_scheme (url))
+ {
+ int dt,url_err;
+ uerr_t status;
+ struct url * url_parsed = url_parse(url, &url_err);
+
+ if (!url_parsed)
+ {
+ char *error = url_error (url, url_err);
+ logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
+ xfree (error);
+ return URLERROR;
+ }
+
+ if (!opt.base_href)
+ opt.base_href = xstrdup (url);
+
+ status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt, false);
+ if (status != RETROK)
+ return status;
+
+ if (dt & TEXTHTML)
+ html = true;
+ }
+ else
+ input_file = (char *) file;
+
+ url_list = (html ? get_urls_html (input_file, NULL, NULL)
+ : get_urls_file (input_file));
for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
{
if (cur_url->url->scheme == SCHEME_FTP)
opt.follow_ftp = 1;
- status = retrieve_tree (cur_url->url->url);
+ status = retrieve_tree (cur_url->url);
opt.follow_ftp = old_follow_ftp;
}
else
- status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive);
+ {
+ status = retrieve_url (cur_url->url, cur_url->url->url, &filename,
+ &new_file, NULL, &dt, opt.recursive);
+ }
if (filename && opt.delete_after && file_exists_p (filename))
{
/* Returns true if URL would be downloaded through a proxy. */
bool
-url_uses_proxy (const char *url)
+url_uses_proxy (struct url * u)
{
bool ret;
- struct url *u = url_parse (url, NULL);
if (!u)
return false;
ret = getproxy (u) != NULL;
- url_free (u);
return ret;
}
else
return sufmatch (no_proxy, host);
}
+
+/* Set the file parameter to point to the local file string. */
+void
+set_local_file (const char **file, const char *default_file)
+{
+ if (opt.output_document)
+ {
+ if (output_stream_regular)
+ *file = opt.output_document;
+ }
+ else
+ *file = default_file;
+}