/* File retrieval.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
- 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
This file is part of GNU Wget.
shall include the source code for the parts of OpenSSL used as well
as that of the covered work. */
-#include <config.h>
+#include "wget.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
-#include "wget.h"
#include "utils.h"
#include "retr.h"
#include "progress.h"
#include "hash.h"
#include "convert.h"
#include "ptimer.h"
+#include "iri.h"
+#include "html-url.h"
/* Total size of downloaded files. Used to enforce quota. */
SUM_SIZE_INT total_downloaded_bytes;
if (file)
*file = NULL;
+ reset_utf8_encode ();
+
+ second_try:
u = url_parse (url, &up_error_code);
if (!u)
{
return URLERROR;
}
+ /*printf ("[Retrieving %s with %s (UTF-8=%d)\n", url, get_remote_charset (), utf8_encoded);*/
+
if (!refurl)
refurl = opt.referer;
proxy = getproxy (u);
if (proxy)
{
+ /* sXXXav : support IRI for proxy */
/* Parse the proxy URL. */
+ set_ugly_no_encode (true);
proxy_url = url_parse (proxy, &up_error_code);
+ set_ugly_no_encode (false);
if (!proxy_url)
{
logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
xfree (mynewloc);
mynewloc = construced_newloc;
+ reset_utf8_encode ();
+
/* Now, see if this new location makes sense. */
newloc_parsed = url_parse (mynewloc, &up_error_code);
if (!newloc_parsed)
goto redirected;
}
- if (local_file)
+ /* Try to not encode in UTF-8 if fetching failed */
+ if (!(*dt & RETROKF) && get_utf8_encode ())
+ {
+ set_utf8_encode (false);
+ /*printf ("[Fallbacking to non-utf8 for `%s'\n", url);*/
+ goto second_try;
+ }
+
+ if (local_file && *dt & RETROKF)
{
+ register_download (u->url, local_file);
+ if (redirection_count && 0 != strcmp (origurl, u->url))
+ register_redirection (origurl, u->url);
+ if (*dt & TEXTHTML)
+ register_html (u->url, local_file);
if (*dt & RETROKF)
{
register_download (u->url, local_file);
register_redirection (origurl, u->url);
if (*dt & TEXTHTML)
register_html (u->url, local_file);
+ if (*dt & TEXTCSS)
+ register_css (u->url, local_file);
}
}
uerr_t status;
struct urlpos *url_list, *cur_url;
- url_list = (html ? get_urls_html (file, NULL, NULL)
- : get_urls_file (file));
+ char *input_file = NULL;
+ const char *url = file;
+
status = RETROK; /* Suppose everything is OK. */
*count = 0; /* Reset the URL count. */
+
+ if (url_has_scheme (url))
+ {
+ uerr_t status;
+ status = retrieve_url (url, &input_file, NULL, NULL, NULL, false);
+ if (status != RETROK)
+ return status;
+ }
+ else
+ input_file = (char *) file;
+
+ url_list = (html ? get_urls_html (input_file, NULL, NULL)
+ : get_urls_file (input_file));
for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
{
int old_follow_ftp = opt.follow_ftp;
/* Turn opt.follow_ftp on in case of recursive FTP retrieval */
- if (cur_url->url->scheme == SCHEME_FTP)
+ if (cur_url->url->scheme == SCHEME_FTP)
opt.follow_ftp = 1;
-
+
status = retrieve_tree (cur_url->url->url);
opt.follow_ftp = old_follow_ftp;
url_uses_proxy (const char *url)
{
bool ret;
- struct url *u = url_parse (url, NULL);
+ struct url *u;
+ set_ugly_no_encode(true);
+ u= url_parse (url, NULL);
+ set_ugly_no_encode(false);
if (!u)
return false;
ret = getproxy (u) != NULL;