+2001-12-01 Hrvoje Niksic <hniksic@arsdigita.com>
+
+ * url.c (replace_attr_refresh_hack): New function.
+ (convert_links): Call replace_attr_refresh_hack for Refresh
+ links. It will add the "TMOUT; URL=" junk before the link.
+
+ * html-url.c (collect_tags_mapper): Set ID to the ID of the
+ "content" attribute, not "http-equiv".
+ (collect_tags_mapper): Don't use OFFSET to hack the raw_* values;
+ instead, store the information that this entry belongs to a
+ "refresh" link.
+
2001-12-01 Hrvoje Niksic <hniksic@arsdigita.com>
* recur.c (retrieve_tree): Allow -p retrievals to exceed maximum
So we just need to skip past the "NUMBER; URL=" garbage
to get to the URL. */
{
- int id;
char *name = find_attr (tag, "name", NULL);
- char *http_equiv = find_attr (tag, "http-equiv", &id);
+ char *http_equiv = find_attr (tag, "http-equiv", NULL);
if (http_equiv && !strcasecmp (http_equiv, "refresh"))
{
- char *refresh = find_attr (tag, "content", NULL);
- char *p = refresh;
- int offset;
- while (ISDIGIT (*p))
- ++p;
+ struct urlpos *entry;
+
+ int id;
+ char *p, *refresh = find_attr (tag, "content", &id);
+ int timeout = 0;
+
+ for (p = refresh; ISDIGIT (*p); p++)
+ timeout = 10 * timeout + *p - '0';
if (*p++ != ';')
return;
+
while (ISSPACE (*p))
++p;
if (!(TOUPPER (*p) == 'U'
p += 4;
while (ISSPACE (*p))
++p;
- offset = p - refresh;
- tag->attrs[id].value_raw_beginning += offset;
- tag->attrs[id].value_raw_size -= offset;
- handle_link (closure, p, tag, id);
+
+ entry = handle_link (closure, p, tag, id);
+ if (entry)
+ {
+ entry->link_refresh_p = 1;
+ entry->refresh_timeout = timeout;
+ }
}
else if (name && !strcasecmp (name, "robots"))
{
}
\f
static void write_backup_file PARAMS ((const char *, downloaded_file_t));
-static const char *replace_attr PARAMS ((const char *, int, FILE *, const char *));
+static const char *replace_attr PARAMS ((const char *, int, FILE *,
+ const char *));
+static const char *replace_attr_refresh_hack PARAMS ((const char *, int, FILE *,
+ const char *, int));
static char *local_quote_string PARAMS ((const char *));
/* Change the links in one HTML file. LINKS is a list of links in the
{
char *newname = construct_relative (file, link->local_name);
char *quoted_newname = local_quote_string (newname);
- p = replace_attr (p, link->size, fp, quoted_newname);
+
+ if (!link->link_refresh_p)
+ p = replace_attr (p, link->size, fp, quoted_newname);
+ else
+ p = replace_attr_refresh_hack (p, link->size, fp, quoted_newname,
+ link->refresh_timeout);
+
DEBUGP (("TO_RELATIVE: %s to %s at position %d in %s.\n",
link->url->url, newname, link->pos, file));
xfree (newname);
{
char *newlink = link->url->url;
char *quoted_newlink = html_quote_string (newlink);
- p = replace_attr (p, link->size, fp, quoted_newlink);
+
+ if (!link->link_refresh_p)
+ p = replace_attr (p, link->size, fp, quoted_newlink);
+ else
+ p = replace_attr_refresh_hack (p, link->size, fp, quoted_newlink,
+ link->refresh_timeout);
+
DEBUGP (("TO_COMPLETE: <something> to %s at position %d in %s.\n",
newlink, link->pos, file));
xfree (quoted_newlink);
return p;
}
+/* The same as REPLACE_ATTR, but used when replacing
+ <meta http-equiv=refresh content="new_text"> because we need to
+ append "timeout_value; URL=" before the next_text. */
+
+static const char *
+replace_attr_refresh_hack (const char *p, int size, FILE *fp,
+ const char *new_text, int timeout)
+{
+ /* "0; URL=..." */
+ char *new_with_timeout = (char *)alloca (numdigit (timeout)
+ + 6 /* "; URL=" */
+ + strlen (new_text)
+ + 1);
+ sprintf (new_with_timeout, "%d; URL=%s", timeout, new_text);
+
+ return replace_attr (p, size, fp, new_with_timeout);
+}
+
/* Find the first occurrence of '#' in [BEG, BEG+SIZE) that is not
preceded by '&'. If the character is not found, return zero. If
the character is found, return 1 and set BP and EP to point to the