/* Collect URLs from HTML source.
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
- 2007, 2008 Free Software Foundation, Inc.
+ 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
This file is part of GNU Wget.
else if (link_has_scheme)
newel->link_complete_p = 1;
- if (ctx->tail)
+ /* Append the new URL maintaining the order by position. */
+ if (ctx->head == NULL)
+ ctx->head = newel;
+ else
{
- ctx->tail->next = newel;
- ctx->tail = newel;
+ struct urlpos *it, *prev = NULL;
+
+ it = ctx->head;
+ while (it && position > it->pos)
+ {
+ prev = it;
+ it = it->next;
+ }
+
+ newel->next = it;
+
+ if (prev)
+ prev->next = newel;
+ else
+ ctx->head = newel;
}
- else
- ctx->tail = ctx->head = newel;
return newel;
}
{
while (*content)
{
- /* Find the next occurrence of ',' or the end of
- the string. */
- char *end = strchr (content, ',');
- if (end)
- ++end;
- else
- end = content + strlen (content);
+ char *end;
+ /* Skip any initial whitespace. */
+ content += strspn (content, " \f\n\r\t\v");
+ /* Find the next occurrence of ',' or whitespace,
+ * or the end of the string. */
+ end = content + strcspn (content, ", \f\n\r\t\v");
if (!strncasecmp (content, "nofollow", end - content))
ctx->nofollow = true;
+ /* Skip past the next comma, if any. */
+ if (*end == ',')
+ ++end;
+ else
+ {
+ end = strchr (end, ',');
+ if (end)
+ ++end;
+ else
+ end = content + strlen (content);
+ }
content = end;
}
}
/* Find the tag in our table of tags. This must not fail because
map_html_tags only returns tags found in interesting_tags.
-
+
I've changed this for now, I'm passing NULL as interesting_tags
to map_html_tags. This way we can check all tags for a style
attribute.
int flags;
/* Load the file. */
- fm = read_file (file);
+ fm = wget_read_file (file);
if (!fm)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
DEBUGP (("Loaded %s (size %s).\n", file, number_to_static_string (fm->length)));
ctx.text = fm->content;
- ctx.head = ctx.tail = NULL;
+ ctx.head = NULL;
ctx.base = NULL;
ctx.parent_base = url ? url : opt.base_href;
ctx.document_file = file;
*meta_disallow_follow = ctx.nofollow;
xfree_null (ctx.base);
- read_file_free (fm);
+ wget_read_file_free (fm);
return ctx.head;
}
const char *text, *text_end;
/* Load the file. */
- fm = read_file (file);
+ fm = wget_read_file (file);
if (!fm)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
tail->next = entry;
tail = entry;
}
- read_file_free (fm);
+ wget_read_file_free (fm);
return head;
}