/* Collect URLs from HTML source.
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
- 2007, 2008 Free Software Foundation, Inc.
+ 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU Wget.
return NULL;
}
- url = url_parse (link_uri, NULL, NULL);
+ url = url_parse (link_uri, NULL, NULL, false);
if (!url)
{
DEBUGP (("%s: link \"%s\" doesn't parse.\n",
char *complete_uri = uri_merge (base, link_uri);
- DEBUGP (("%s: merge(\"%s\", \"%s\") -> %s\n",
- ctx->document_file, base, link_uri, complete_uri));
+ DEBUGP (("%s: merge(%s, %s) -> %s\n",
+ quotearg_n_style (0, escape_quoting_style, ctx->document_file),
+ quote_n (1, base),
+ quote_n (2, link_uri),
+ quotearg_n_style (3, escape_quoting_style, complete_uri)));
- url = url_parse (complete_uri, NULL, NULL);
+ url = url_parse (complete_uri, NULL, NULL, false);
if (!url)
{
DEBUGP (("%s: merged link \"%s\" doesn't parse.\n",
xfree (complete_uri);
}
- DEBUGP (("appending \"%s\" to urlpos.\n", url->url));
+ DEBUGP (("appending %s to urlpos.\n", quote (url->url)));
newel = xnew0 (struct urlpos);
newel->url = url;
{
while (*content)
{
- /* Find the next occurrence of ',' or the end of
- the string. */
- char *end = strchr (content, ',');
- if (end)
- ++end;
- else
- end = content + strlen (content);
+ char *end;
+ /* Skip any initial whitespace. */
+ content += strspn (content, " \f\n\r\t\v");
+ /* Find the next occurrence of ',' or whitespace,
+ * or the end of the string. */
+ end = content + strcspn (content, ", \f\n\r\t\v");
if (!strncasecmp (content, "nofollow", end - content))
ctx->nofollow = true;
+ /* Skip past the next comma, if any. */
+ if (*end == ',')
+ ++end;
+ else
+ {
+ end = strchr (end, ',');
+ if (end)
+ ++end;
+ else
+ end = content + strlen (content);
+ }
content = end;
}
}
url_text = merged;
}
- url = url_parse (url_text, &up_error_code, NULL);
+ url = url_parse (url_text, &up_error_code, NULL, false);
if (!url)
{
char *error = url_error (url_text, up_error_code);