/* Conversion of links to local files.
- Copyright (C) 2003, 2004, 2005, 2006, 2007,
- 2008 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+ Free Software Foundation, Inc.
This file is part of GNU Wget.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#ifdef HAVE_UNISTD_H
-# include <unistd.h>
-#endif /* HAVE_UNISTD_H */
+#include <unistd.h>
#include <errno.h>
#include <assert.h>
#include "convert.h"
#include "res.h"
#include "html-url.h"
#include "css-url.h"
+#include "iri.h"
static struct hash_table *dl_file_url_map;
struct hash_table *dl_url_file_map;
static void convert_links (const char *, struct urlpos *);
-void
+static void
convert_links_in_hashtable (struct hash_table *downloaded_set,
int is_css,
int *file_count)
for (cur_url = urls; cur_url; cur_url = cur_url->next)
{
char *local_name;
- struct url *u = cur_url->url;
+ struct url *u;
+ struct iri *pi;
if (cur_url->link_base_p)
{
/* We decide the direction of conversion according to whether
a URL was downloaded. Downloaded URLs will be converted
ABS2REL, whereas non-downloaded will be converted REL2ABS. */
+
+ pi = iri_new ();
+ set_uri_encoding (pi, opt.locale, true);
+
+ u = url_parse (cur_url->url->url, NULL, pi, true);
+ if (!u)
+ continue;
+
local_name = hash_table_get (dl_url_file_map, u->url);
/* Decide on the conversion type. */
cur_url->local_name = NULL;
DEBUGP (("will convert url %s to complete\n", u->url));
}
+
+ url_free (u);
+ iri_free (pi);
}
/* Convert the links in the file. */
static const char *replace_attr (const char *, int, FILE *, const char *);
static const char *replace_attr_refresh_hack (const char *, int, FILE *,
const char *, int);
-static char *local_quote_string (const char *);
+static char *local_quote_string (const char *, bool);
static char *construct_relative (const char *, const char *);
/* Change the links in one file. LINKS is a list of links in the
}
}
- fm = read_file (file);
+ fm = wget_read_file (file);
if (!fm)
{
logprintf (LOG_NOTQUIET, _("Cannot convert links in %s: %s\n"),
{
logprintf (LOG_NOTQUIET, _("Unable to delete %s: %s\n"),
quote (file), strerror (errno));
- read_file_free (fm);
+ wget_read_file_free (fm);
return;
}
/* Now open the file for writing. */
{
logprintf (LOG_NOTQUIET, _("Cannot convert links in %s: %s\n"),
file, strerror (errno));
- read_file_free (fm);
+ wget_read_file_free (fm);
return;
}
/* Convert absolute URL to relative. */
{
char *newname = construct_relative (file, link->local_name);
- char *quoted_newname = local_quote_string (newname);
+ char *quoted_newname = local_quote_string (newname,
+ link->link_css_p);
if (link->link_css_p)
p = replace_plain (p, link->size, fp, quoted_newname);
char *quoted_newlink = html_quote_string (newlink);
if (link->link_css_p)
- p = replace_plain (p, link->size, fp, quoted_newlink);
+ p = replace_plain (p, link->size, fp, newlink);
else if (!link->link_refresh_p)
p = replace_attr (p, link->size, fp, quoted_newlink);
else
if (p - fm->content < fm->length)
fwrite (p, 1, fm->length - (p - fm->content), fp);
fclose (fp);
- read_file_free (fm);
+ wget_read_file_free (fm);
logprintf (LOG_VERBOSE, "%d-%d\n", to_file_count, to_url_count);
}
"index.html?foo=bar.html" to "index.html%3Ffoo=bar.html" should be
safe for both local and HTTP-served browsing.
- We always quote "#" as "%23" and "%" as "%25" because those
- characters have special meanings in URLs. */
+ We always quote "#" as "%23", "%" as "%25" and ";" as "%3B"
+ because those characters have special meanings in URLs. */
static char *
-local_quote_string (const char *file)
+local_quote_string (const char *file, bool no_html_quote)
{
const char *from;
char *newname, *to;
- char *any = strpbrk (file, "?#%");
+ char *any = strpbrk (file, "?#%;");
if (!any)
- return html_quote_string (file);
+ return no_html_quote ? strdup (file) : html_quote_string (file);
/* Allocate space assuming the worst-case scenario, each character
having to be quoted. */
*to++ = '2';
*to++ = '3';
break;
+ case ';':
+ *to++ = '%';
+ *to++ = '3';
+ *to++ = 'B';
+ break;
case '?':
if (opt.adjust_extension)
{
}
*to = '\0';
- return html_quote_string (newname);
+ return no_html_quote ? strdup (newname) : html_quote_string (newname);
}
\f
/* Book-keeping code for dl_file_url_map, dl_url_file_map,
/* Register that FILE is an HTML file that has been downloaded. */
void
-register_html (const char *url, const char *file)
+register_html (const char *file)
{
if (!downloaded_html_set)
downloaded_html_set = make_string_hash_table (0);
/* Register that FILE is a CSS file that has been downloaded. */
void
-register_css (const char *url, const char *file)
+register_css (const char *file)
{
if (!downloaded_css_set)
downloaded_css_set = make_string_hash_table (0);
However, our hash tables only accept pointers for keys and values.
So when we need a pointer, we use the address of a
downloaded_file_t variable of static storage. */
-
+
static downloaded_file_t *
downloaded_mode_to_ptr (downloaded_file_t mode)
{