#include <stdio.h>
#include <stdlib.h>
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif /* HAVE_STRING_H */
+#include <string.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include <errno.h>
#include <assert.h>
-#include <sys/types.h>
#include "wget.h"
#include "convert.h"
#include "recur.h"
#include "utils.h"
#include "hash.h"
+#include "ptimer.h"
static struct hash_table *dl_file_url_map;
struct hash_table *dl_url_file_map;
conversion after Wget is done. */
struct hash_table *downloaded_html_set;
-static void convert_links PARAMS ((const char *, struct urlpos *));
+static void convert_links (const char *, struct urlpos *);
/* This function is called when the retrieval is done to convert the
links that have been downloaded. It has to be called at the end of
double secs;
int file_count = 0;
- struct wget_timer *timer = wtimer_new ();
+ struct ptimer *timer = ptimer_new ();
int cnt;
char **file_array;
free_urlpos (urls);
}
- wtimer_update (timer);
- secs = wtimer_read (timer) / 1000;
- wtimer_delete (timer);
- logprintf (LOG_VERBOSE, _("Converted %d files in %.*f seconds.\n"),
- file_count, secs < 10 ? 3 : 1, secs);
+ secs = ptimer_measure (timer) / 1000;
+ ptimer_destroy (timer);
+ logprintf (LOG_VERBOSE, _("Converted %d files in %s seconds.\n"),
+ file_count, print_decimal (secs));
}
-static void write_backup_file PARAMS ((const char *, downloaded_file_t));
-static const char *replace_attr PARAMS ((const char *, int, FILE *,
- const char *));
-static const char *replace_attr_refresh_hack PARAMS ((const char *, int, FILE *,
- const char *, int));
-static char *local_quote_string PARAMS ((const char *));
-static char *construct_relative PARAMS ((const char *, const char *));
+static void write_backup_file (const char *, downloaded_file_t);
+static const char *replace_attr (const char *, int, FILE *, const char *);
+static const char *replace_attr_refresh_hack (const char *, int, FILE *,
+ const char *, int);
+static char *local_quote_string (const char *);
+static char *construct_relative (const char *, const char *);
/* Change the links in one HTML file. LINKS is a list of links in the
document, along with their positions and the desired direction of
}
/* Construct LINK as explained above. */
- link = (char *)xmalloc (3 * basedirs + strlen (linkfile) + 1);
+ link = xmalloc (3 * basedirs + strlen (linkfile) + 1);
for (i = 0; i < basedirs; i++)
memcpy (link + 3 * i, "../", 3);
strcpy (link + 3 * i, linkfile);
return link;
}
+/* Used by write_backup_file to remember which files have been
+ written. */
+static struct hash_table *converted_files;
+
static void
write_backup_file (const char *file, downloaded_file_t downloaded_file_return)
{
/* Construct the backup filename as the original name plus ".orig". */
size_t filename_len = strlen (file);
char* filename_plus_orig_suffix;
- static struct hash_table *converted_files;
if (downloaded_file_return == FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED)
{
}
}
-static int find_fragment PARAMS ((const char *, int, const char **,
- const char **));
+static bool find_fragment (const char *, int, const char **, const char **);
/* Replace an attribute's original text with NEW_TEXT. */
static const char *
replace_attr (const char *p, int size, FILE *fp, const char *new_text)
{
- int quote_flag = 0;
+ bool quote_flag = false;
char quote_char = '\"'; /* use "..." for quoting, unless the
original value is quoted, in which
case reuse its quoting char. */
if (*p == '\"' || *p == '\'')
{
quote_char = *p;
- quote_flag = 1;
+ quote_flag = true;
++p;
size -= 2; /* disregard opening and closing quote */
}
/* Find the first occurrence of '#' in [BEG, BEG+SIZE) that is not
preceded by '&'. If the character is not found, return zero. If
- the character is found, return 1 and set BP and EP to point to the
- beginning and end of the region.
+ the character is found, return true and set BP and EP to point to
+ the beginning and end of the region.
This is used for finding the fragment indentifiers in URLs. */
-static int
+static bool
find_fragment (const char *beg, int size, const char **bp, const char **ep)
{
const char *end = beg + size;
- int saw_amp = 0;
+ bool saw_amp = false;
for (; beg < end; beg++)
{
switch (*beg)
{
case '&':
- saw_amp = 1;
+ saw_amp = true;
break;
case '#':
if (!saw_amp)
{
*bp = beg;
*ep = end;
- return 1;
+ return true;
}
/* fallthrough */
default:
- saw_amp = 0;
+ saw_amp = false;
}
}
- return 0;
+ return false;
}
/* Quote FILE for use as local reference to an HTML file.
dl_url_file_map = make_string_hash_table (0); \
} while (0)
-/* Return 1 if S1 and S2 are the same, except for "/index.html". The
- three cases in which it returns one are (substitute any substring
- for "foo"):
+/* Return true if S1 and S2 are the same, except for "/index.html".
+ The three cases in which it returns one are (substitute any
+ substring for "foo"):
m("foo/index.html", "foo/") ==> 1
m("foo/", "foo/index.html") ==> 1
m("foo", "foo/" ==> 1
m("foo", "foo") ==> 1 */
-static int
+static bool
match_except_index (const char *s1, const char *s2)
{
int i;
/* Strings differ at the very beginning -- bail out. We need to
check this explicitly to avoid `lng - 1' reading outside the
array. */
- return 0;
+ return false;
if (!*s1 && !*s2)
/* Both strings hit EOF -- strings are equal. */
- return 1;
+ return true;
else if (*s1 && *s2)
/* Strings are randomly different, e.g. "/foo/bar" and "/foo/qux". */
- return 0;
+ return false;
else if (*s1)
/* S1 is the longer one. */
lng = s1;
if (*lng == '/' && *(lng + 1) == '\0')
/* foo */
/* foo/ */
- return 1;
+ return true;
return 0 == strcmp (lng, "/index.html");
}
string_set_add (downloaded_html_set, file);
}
-/* Cleanup the data structures associated with recursive retrieving
- (the variables above). */
+static void downloaded_files_free (void);
+
+/* Cleanup the data structures associated with this file. */
+
void
convert_cleanup (void)
{
}
if (downloaded_html_set)
string_set_free (downloaded_html_set);
+ downloaded_files_free ();
+ if (converted_files)
+ string_set_free (converted_files);
}
\f
/* Book-keeping code for downloaded files that enables extension
return 0;
}
-void
+static void
downloaded_files_free (void)
{
if (downloaded_files_hash)
downloaded_files_hash = NULL;
}
}
+\f
+/* The function returns the pointer to the malloc-ed quoted version of
+ string s. It will recognize and quote numeric and special graphic
+ entities, as per RFC1866:
+
+ `&' -> `&'
+ `<' -> `<'
+ `>' -> `>'
+ `"' -> `"'
+ SP -> ` '
+
+ No other entities are recognized or replaced. */
+char *
+html_quote_string (const char *s)
+{
+ const char *b = s;
+ char *p, *res;
+ int i;
+
+ /* Pass through the string, and count the new size. */
+ for (i = 0; *s; s++, i++)
+ {
+ if (*s == '&')
+ i += 4; /* `amp;' */
+ else if (*s == '<' || *s == '>')
+ i += 3; /* `lt;' and `gt;' */
+ else if (*s == '\"')
+ i += 5; /* `quot;' */
+ else if (*s == ' ')
+ i += 4; /* #32; */
+ }
+ res = xmalloc (i + 1);
+ s = b;
+ for (p = res; *s; s++)
+ {
+ switch (*s)
+ {
+ case '&':
+ *p++ = '&';
+ *p++ = 'a';
+ *p++ = 'm';
+ *p++ = 'p';
+ *p++ = ';';
+ break;
+ case '<': case '>':
+ *p++ = '&';
+ *p++ = (*s == '<' ? 'l' : 'g');
+ *p++ = 't';
+ *p++ = ';';
+ break;
+ case '\"':
+ *p++ = '&';
+ *p++ = 'q';
+ *p++ = 'u';
+ *p++ = 'o';
+ *p++ = 't';
+ *p++ = ';';
+ break;
+ case ' ':
+ *p++ = '&';
+ *p++ = '#';
+ *p++ = '3';
+ *p++ = '2';
+ *p++ = ';';
+ break;
+ default:
+ *p++ = *s;
+ }
+ }
+ *p = '\0';
+ return res;
+}