X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Fres.c;h=8c35f0e1cfae726a877e23e12b4ac44256d940ff;hb=123f5c39669abc055987d69a311785c861494c87;hp=103bc4e7c8aedcbca3037d7cbce95ded305f2f84;hpb=60c88ee992b501590aeed111a669e99fbff7ef82;p=wget diff --git a/src/res.c b/src/res.c index 103bc4e7..8c35f0e1 100644 --- a/src/res.c +++ b/src/res.c @@ -1,11 +1,11 @@ /* Support for Robot Exclusion Standard (RES). - Copyright (C) 2001 Free Software Foundation, Inc. + Copyright (C) 2001, 2006, 2007, 2008 Free Software Foundation, Inc. This file is part of Wget. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or (at +the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but @@ -14,18 +14,18 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Wget; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - -In addition, as a special exception, the Free Software Foundation -gives permission to link the code of its release of Wget with the -OpenSSL project's "OpenSSL" library (or with modified versions of it -that use the same license as the "OpenSSL" library), and distribute -the linked executables. You must obey the GNU General Public License -in all respects for all of the code used other than "OpenSSL". If you -modify this file, you may extend this exception to your version of the -file, but you are not obligated to do so. If you do not wish to do -so, delete this exception statement from your version. */ +along with Wget. If not, see . + +Additional permission under GNU GPL version 3 section 7 + +If you modify this program, or any covered work, by linking or +combining it with the OpenSSL project's OpenSSL library (or a +modified version of that library), containing parts covered by the +terms of the OpenSSL or SSLeay licenses, the Free Software Foundation +grants you additional permission to convey the resulting work. +Corresponding Source for a non-source form of such a combination +shall include the source code for the parts of OpenSSL used as well +as that of the covered work. */ /* This file implements the Robot Exclusion Standard (RES). @@ -67,9 +67,7 @@ so, delete this exception statement from your version. */ res_match_path, res_register_specs, res_get_specs, and res_retrieve_file. */ -#ifdef HAVE_CONFIG_H -# include -#endif +#include "wget.h" #include #include @@ -77,7 +75,6 @@ so, delete this exception statement from your version. */ #include #include -#include "wget.h" #include "utils.h" #include "hash.h" #include "url.h" @@ -181,7 +178,7 @@ prune_non_exact (struct robot_specs *specs) #define EOL(p) ((p) >= lineend) #define SKIP_SPACE(p) do { \ - while (!EOL (p) && ISSPACE (*p)) \ + while (!EOL (p) && c_isspace (*p)) \ ++p; \ } while (0) @@ -267,18 +264,18 @@ res_parse (const char *source, int length) lineend to a location preceding the first comment. Real line ending remains in lineend_real. */ for (lineend = p; lineend < lineend_real; lineend++) - if ((lineend == p || ISSPACE (*(lineend - 1))) + if ((lineend == p || c_isspace (*(lineend - 1))) && *lineend == '#') break; /* Ignore trailing whitespace in the same way. */ - while (lineend > p && ISSPACE (*(lineend - 1))) + while (lineend > p && c_isspace (*(lineend - 1))) --lineend; assert (!EOL (p)); field_b = p; - while (!EOL (p) && (ISALNUM (*p) || *p == '-')) + while (!EOL (p) && (c_isalnum (*p) || *p == '-')) ++p; field_e = p; @@ -416,7 +413,7 @@ free_specs (struct robot_specs *specs) advance the pointer. */ #define DECODE_MAYBE(c, ptr) do { \ - if (c == '%' && ISXDIGIT (ptr[1]) && ISXDIGIT (ptr[2])) \ + if (c == '%' && c_isxdigit (ptr[1]) && c_isxdigit (ptr[2])) \ { \ char decoded = X2DIGITS_TO_NUM (ptr[1], ptr[2]); \ if (decoded != '/') \ @@ -466,9 +463,9 @@ res_match_path (const struct robot_specs *specs, const char *path) if (matches (specs->paths[i].path, path)) { bool allowedp = specs->paths[i].allowedp; - DEBUGP (("%s path %s because of rule `%s'.\n", + DEBUGP (("%s path %s because of rule %s.\n", allowedp ? "Allowing" : "Rejecting", - path, specs->paths[i].path)); + path, quote (specs->paths[i].path))); return allowedp; } return true; @@ -539,10 +536,16 @@ res_retrieve_file (const char *url, char **file) { uerr_t err; char *robots_url = uri_merge (url, RES_SPECS_LOCATION); + int saved_ts_val = opt.timestamping; + int saved_sp_val = opt.spider; logputs (LOG_VERBOSE, _("Loading robots.txt; please ignore errors.\n")); *file = NULL; + opt.timestamping = false; + opt.spider = false; err = retrieve_url (robots_url, file, NULL, NULL, NULL, false); + opt.timestamping = saved_ts_val; + opt.spider = saved_sp_val; xfree (robots_url); if (err != RETROK && *file != NULL)