X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fhtml-url.c;h=3c7c409e82e4c9f48e490abee7f4131a9113da0f;hb=4d7c5e087b2bc82c9f503dff003916d1047903ce;hp=2a162bd7e4a346df9445fa9c8ceeff44173bb850;hpb=1fab70a664245d67260b4e367b21cb5a77d50711;p=wget diff --git a/src/html-url.c b/src/html-url.c index 2a162bd7..3c7c409e 100644 --- a/src/html-url.c +++ b/src/html-url.c @@ -1,11 +1,11 @@ /* Collect URLs from HTML source. - Copyright (C) 1998, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. + Copyright (C) 1998-2006 Free Software Foundation, Inc. This file is part of GNU Wget. GNU Wget is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GNU Wget is distributed in the hope that it will be useful, @@ -14,8 +14,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Wget; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +along with Wget. If not, see . In addition, as a special exception, the Free Software Foundation gives permission to link the code of its release of Wget with the @@ -30,11 +29,7 @@ so, delete this exception statement from your version. */ #include #include -#ifdef HAVE_STRING_H -# include -#else -# include -#endif +#include #include #include #include @@ -47,17 +42,12 @@ so, delete this exception statement from your version. */ #include "convert.h" #include "recur.h" /* declaration of get_urls_html */ -#ifndef errno -extern int errno; -#endif - struct map_context; -typedef void (*tag_handler_t) PARAMS ((int, struct taginfo *, - struct map_context *)); +typedef void (*tag_handler_t) (int, struct taginfo *, struct map_context *); -#define DECLARE_TAG_HANDLER(fun) \ - static void fun PARAMS ((int, struct taginfo *, struct map_context *)) +#define DECLARE_TAG_HANDLER(fun) \ + static void fun (int, struct taginfo *, struct map_context *) DECLARE_TAG_HANDLER (tag_find_urls); DECLARE_TAG_HANDLER (tag_handle_base); @@ -179,8 +169,8 @@ static const char *additional_attributes[] = { "action" /* used by tag_handle_form */ }; -struct hash_table *interesting_tags; -struct hash_table *interesting_attributes; +static struct hash_table *interesting_tags; +static struct hash_table *interesting_attributes; static void init_interesting (void) @@ -261,7 +251,7 @@ struct map_context { changed through . */ const char *parent_base; /* Base of the current document. */ const char *document_file; /* File name of this document. */ - int nofollow; /* whether NOFOLLOW was specified in a + bool nofollow; /* whether NOFOLLOW was specified in a tag. */ struct urlpos *head, *tail; /* List of URLs that is being @@ -481,6 +471,10 @@ tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx) && (0 == strcasecmp (rel, "stylesheet") || 0 == strcasecmp (rel, "shortcut icon"))) up->link_inline_p = 1; + else + /* The external ones usually point to HTML pages, such as + */ + up->link_expect_html = 1; } } } @@ -546,7 +540,7 @@ tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx) if (!content) return; if (!strcasecmp (content, "none")) - ctx->nofollow = 1; + ctx->nofollow = true; else { while (*content) @@ -559,7 +553,7 @@ tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx) else end = content + strlen (content); if (!strncasecmp (content, "nofollow", end - content)) - ctx->nofollow = 1; + ctx->nofollow = true; content = end; } } @@ -587,7 +581,7 @@ collect_tags_mapper (struct taginfo *tag, void *arg) and does the right thing. */ struct urlpos * -get_urls_html (const char *file, const char *url, int *meta_disallow_follow) +get_urls_html (const char *file, const char *url, bool *meta_disallow_follow) { struct file_memory *fm; struct map_context ctx; @@ -607,7 +601,7 @@ get_urls_html (const char *file, const char *url, int *meta_disallow_follow) ctx.base = NULL; ctx.parent_base = url ? url : opt.base_href; ctx.document_file = file; - ctx.nofollow = 0; + ctx.nofollow = false; if (!interesting_tags) init_interesting (); @@ -698,7 +692,7 @@ get_urls_file (const char *file) url = url_parse (url_text, &up_error_code); if (!url) { - logprintf (LOG_NOTQUIET, "%s: Invalid URL %s: %s\n", + logprintf (LOG_NOTQUIET, _("%s: Invalid URL %s: %s\n"), file, url_text, url_error (up_error_code)); xfree (url_text); continue; @@ -706,7 +700,6 @@ get_urls_file (const char *file) xfree (url_text); entry = xnew0 (struct urlpos); - entry->next = NULL; entry->url = url; if (!head)