You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+In addition, as a special exception, the Free Software Foundation
+gives permission to link the code of its release of Wget with the
+OpenSSL project's "OpenSSL" library (or with modified versions of it
+that use the same license as the "OpenSSL" library), and distribute
+the linked executables. You must obey the GNU General Public License
+in all respects for all of the code used other than "OpenSSL". If you
+modify this file, you may extend this exception to your version of the
+file, but you are not obligated to do so. If you do not wish to do
+so, delete this exception statement from your version. */
/* This file implements the Robot Exclusion Standard (RES).
#include <stdio.h>
#include <stdlib.h>
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif /* HAVE_STRING_H */
+#include <string.h>
#include <errno.h>
#include <assert.h>
int allowedp, int exactp)
{
struct path_info pp;
+ if (path_b < path_e && *path_b == '/')
+ /* Our path representation doesn't use a leading slash, so remove
+ one from theirs. */
+ ++path_b;
pp.path = strdupdelim (path_b, path_e);
pp.allowedp = allowedp;
pp.user_agent_exact_p = exactp;
for (i = 0; i < specs->count; i++)
if (specs->paths[i].user_agent_exact_p)
++cnt;
- newpaths = xmalloc (cnt * sizeof (struct path_info));
+ newpaths = xnew_array (struct path_info, cnt);
for (i = 0, j = 0; i < specs->count; i++)
if (specs->paths[i].user_agent_exact_p)
newpaths[j++] = specs->paths[i];
the last `user-agent' instructions. */
int record_count = 0;
- struct robot_specs *specs = xmalloc (sizeof (struct robot_specs));
- memset (specs, '\0', sizeof (struct robot_specs));
+ struct robot_specs *specs = xnew0 (struct robot_specs);
while (1)
{
struct file_memory *fm = read_file (filename);
if (!fm)
{
- logprintf (LOG_NOTQUIET, "Cannot open %s: %s",
+ logprintf (LOG_NOTQUIET, _("Cannot open %s: %s"),
filename, strerror (errno));
return NULL;
}
static void
free_specs (struct robot_specs *specs)
{
- FREE_MAYBE (specs->paths);
+ int i;
+ for (i = 0; i < specs->count; i++)
+ xfree (specs->paths[i].path);
+ xfree_null (specs->paths);
xfree (specs);
}
\f
that number is not a numerical representation of '/', decode C and
advance the pointer. */
-#define DECODE_MAYBE(c, ptr) do { \
- if (c == '%' && ISXDIGIT (ptr[1]) && ISXDIGIT (ptr[2])) \
- { \
- char decoded \
- = (XCHAR_TO_XDIGIT (ptr[1]) << 4) + XCHAR_TO_XDIGIT (ptr[2]); \
- if (decoded != '/') \
- { \
- c = decoded; \
- ptr += 2; \
- } \
- } \
+#define DECODE_MAYBE(c, ptr) do { \
+ if (c == '%' && ISXDIGIT (ptr[1]) && ISXDIGIT (ptr[2])) \
+ { \
+ char decoded = X2DIGITS_TO_NUM (ptr[1], ptr[2]); \
+ if (decoded != '/') \
+ { \
+ c = decoded; \
+ ptr += 2; \
+ } \
+ } \
} while (0)
/* The inner matching engine: return non-zero if RECORD_PATH matches
URL_PATH. The rules for matching are described at
- <http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html>,
- section 3.2.2. */
+ <http://www.robotstxt.org/wc/norobots-rfc.txt>, section 3.2.2. */
static int
matches (const char *record_path, const char *url_path)
\f
/* Registering the specs. */
-struct hash_table *registered_specs;
+static struct hash_table *registered_specs;
/* Stolen from cookies.c. */
#define SET_HOSTPORT(host, port, result) do { \
result = alloca (HP_len + 1 + numdigit (port) + 1); \
memcpy (result, host, HP_len); \
result[HP_len] = ':'; \
- long_to_string (result + HP_len + 1, port); \
+ number_to_string (result + HP_len + 1, port); \
} while (0)
/* Register RES specs that below to server on HOST:PORT. They will
if (!registered_specs)
registered_specs = make_nocase_string_hash_table (0);
- if (hash_table_get_pair (registered_specs, hp, hp_old, old))
+ if (hash_table_get_pair (registered_specs, hp, &hp_old, &old))
{
if (old)
free_specs (old);
}
return err == RETROK;
}
+\f
+static int
+cleanup_hash_table_mapper (void *key, void *value, void *arg_ignored)
+{
+ xfree (key);
+ free_specs (value);
+ return 0;
+}
+
+void
+res_cleanup (void)
+{
+ if (registered_specs)
+ {
+ hash_table_map (registered_specs, cleanup_hash_table_mapper, NULL);
+ hash_table_destroy (registered_specs);
+ registered_specs = NULL;
+ }
+}