Fix build when libpsl is not available

[wget] / src / res.c
diff --git a/src/res.c b/src/res.c

index 94d9769ddabeb7d53f5878de71f4a8dc3ff1ebba..3038229bf074891e3dcdee5c383aeef77d264df3 100644 (file)
--- a/src/res.c
+++ b/src/res.c
@@ -1,5 +1,6 @@
  /* Support for Robot Exclusion Standard (RES).
-   Copyright (C) 2001, 2006, 2007, 2008 Free Software Foundation, Inc.
+   Copyright (C) 2001, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+   Foundation, Inc.
  
  This file is part of Wget.
  
@@ -67,8 +68,6 @@ as that of the covered work.  */
     res_match_path, res_register_specs, res_get_specs, and
     res_retrieve_file.  */
  
-#define USE_GNULIB_ALLOC
-
  #include "wget.h"
  
  #include <stdio.h>
@@ -284,7 +283,7 @@ res_parse (const char *source, int length)
        SKIP_SPACE (p);
        if (field_b == field_e || EOL (p) || *p != ':')
          {
-          DEBUGP (("Ignoring malformed line %d", line_count));
+          DEBUGP (("Ignoring malformed line %d\n", line_count));
            goto next;
          }
        ++p;                      /* skip ':' */
@@ -352,7 +351,7 @@ res_parse (const char *source, int length)
          }
        else
          {
-          DEBUGP (("Ignoring unknown field at line %d", line_count));
+          DEBUGP (("Ignoring unknown field at line %d\n", line_count));
            goto next;
          }
  
@@ -386,7 +385,7 @@ struct robot_specs *
  res_parse_from_file (const char *filename)
  {
    struct robot_specs *specs;
-  struct file_memory *fm = read_file (filename);
+  struct file_memory *fm = wget_read_file (filename);
    if (!fm)
      {
        logprintf (LOG_NOTQUIET, _("Cannot open %s: %s"),
@@ -394,7 +393,7 @@ res_parse_from_file (const char *filename)
        return NULL;
      }
    specs = res_parse (fm->content, fm->length);
-  read_file_free (fm);
+  wget_read_file_free (fm);
    return specs;
  }
  
@@ -534,21 +533,44 @@ res_get_specs (const char *host, int port)
     Return true if robots were retrieved OK, false otherwise.  */
  
  bool
-res_retrieve_file (const char *url, char **file)
+res_retrieve_file (const char *url, char **file, struct iri *iri)
  {
+  struct iri *i = iri_new ();
    uerr_t err;
    char *robots_url = uri_merge (url, RES_SPECS_LOCATION);
    int saved_ts_val = opt.timestamping;
-  int saved_sp_val = opt.spider;
+  int saved_sp_val = opt.spider, url_err;
+  struct url * url_parsed;
+
+  /* Copy server URI encoding for a possible IDNA transformation, no need to
+     encode the full URI in UTF-8 because "robots.txt" is plain ASCII */
+  set_uri_encoding (i, iri->uri_encoding, false);
+  i->utf8_encode = false;
  
    logputs (LOG_VERBOSE, _("Loading robots.txt; please ignore errors.\n"));
    *file = NULL;
    opt.timestamping = false;
    opt.spider       = false;
-  err = retrieve_url (robots_url, file, NULL, NULL, NULL, false);
+
+  url_parsed = url_parse (robots_url, &url_err, i, true);
+  if (!url_parsed)
+    {
+      char *error = url_error (robots_url, url_err);
+      logprintf (LOG_NOTQUIET, "%s: %s.\n", robots_url, error);
+      xfree (error);
+      err = URLERROR;
+    }
+  else
+    {
+      err = retrieve_url (url_parsed, robots_url, file, NULL, NULL, NULL,
+                          false, i, false);
+      url_free(url_parsed);
+    }
+
    opt.timestamping = saved_ts_val;
-  opt.spider       = saved_sp_val;  
+  opt.spider       = saved_sp_val;
    xfree (robots_url);
+  iri_free (i);
  
    if (err != RETROK && *file != NULL)
      {
@@ -568,7 +590,7 @@ is_robots_txt_url (const char *url)
    bool ret = are_urls_equal (url, robots_url);
  
    xfree (robots_url);
-  
+
    return ret;
  }
  \f
@@ -593,21 +615,21 @@ res_cleanup (void)
  #ifdef TESTING
  
  const char *
-test_is_robots_txt_url()
+test_is_robots_txt_url(void)
  {
-  int i;
-  struct {
-    char *url;
+  unsigned i;
+  static const struct {
+    const char *url;
      bool expected_result;
    } test_array[] = {
      { "http://www.yoyodyne.com/robots.txt", true },
      { "http://www.yoyodyne.com/somepath/", false },
      { "http://www.yoyodyne.com/somepath/robots.txt", false },
    };
-  
-  for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) 
+
+  for (i = 0; i < countof(test_array); ++i)
      {
-      mu_assert ("test_is_robots_txt_url: wrong result", 
+      mu_assert ("test_is_robots_txt_url: wrong result",
                   is_robots_txt_url (test_array[i].url) == test_array[i].expected_result);
      }