[svn] Better version of read_whole_line().

[wget] / src / recur.c
diff --git a/src/recur.c b/src/recur.c

index dc58c6458ee46198dfa0fe4f583e85164f535550..52cc8e12385fc72b427c6ea88b023a4e766c8c49 100644 (file)
--- a/src/recur.c
+++ b/src/recur.c
@@ -1,5 +1,5 @@
  /* Handling of recursive HTTP retrieving.
-   Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+   Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
  
  This file is part of Wget.
  
@@ -127,7 +127,7 @@ recursive_retrieve (const char *file, const char *this_url)
    assert (this_url != NULL);
    assert (file != NULL);
    /* If quota was exceeded earlier, bail out.  */
-  if (opt.quota && (opt.downloaded > opt.quota))
+  if (downloaded_exceeds_quota ())
      return QUOTEXC;
    /* Cache the current URL in the list.  */
    if (first_time)
@@ -198,7 +198,7 @@ recursive_retrieve (const char *file, const char *this_url)
    for (cur_url = url_list; cur_url; cur_url = cur_url->next)
      {
        /* If quota was exceeded earlier, bail out.  */
-      if (opt.quota && (opt.downloaded > opt.quota))
+      if (downloaded_exceeds_quota ())
         break;
        /* Parse the URL for convenient use in other functions, as well
          as to get the optimized form.  It also checks URL integrity.  */
@@ -455,10 +455,15 @@ recursive_retrieve (const char *file, const char *this_url)
           else
             DEBUGP (("%s is not text/html so we don't chase.\n",
                      filename ? filename: "(null)"));
-         /* If an suffix-rejected file was loaded only because it was HTML,
-            undo the error now */
+
           if (opt.delete_after || (filename && !acceptable (filename)))
+           /* Either --delete-after was specified, or we loaded this otherwise
+              rejected (e.g. by -R) HTML file just so we could harvest its
+              hyperlinks -- in either case, delete the local file. */
             {
+             DEBUGP (("Removing file due to %s in recursive_retrieve():\n",
+                      opt.delete_after ? "--delete-after" :
+                      "recursive rejection criteria"));
               logprintf (LOG_VERBOSE,
                          (opt.delete_after ? _("Removing %s.\n")
                           : _("Removing %s since it should be rejected.\n")),
@@ -467,6 +472,7 @@ recursive_retrieve (const char *file, const char *this_url)
                 logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
               dt &= ~RETROKF;
             }
+
           /* If everything was OK, and links are to be converted, let's
              store the local filename.  */
           if (opt.convert_links && (dt & RETROKF) && (filename != NULL))
@@ -482,7 +488,7 @@ recursive_retrieve (const char *file, const char *this_url)
        freeurl (u, 1);
        /* Increment the pbuf for the appropriate size.  */
      }
-  if (opt.convert_links)
+  if (opt.convert_links && !opt.delete_after)
      convert_links (file, url_list);
    /* Free the linked list of URL-s.  */
    free_urlpos (url_list);
@@ -490,7 +496,7 @@ recursive_retrieve (const char *file, const char *this_url)
    FREE_MAYBE (canon_this_url);
    /* Decrement the recursion depth.  */
    --depth;
-  if (opt.quota && (opt.downloaded > opt.quota))
+  if (downloaded_exceeds_quota ())
      return QUOTEXC;
    else
      return RETROK;
@@ -696,9 +702,11 @@ parse_robots (const char *robots_filename)
    while ((line = read_whole_line (fp)))
      {
        len = strlen (line);
-      /* Destroy <CR> if there is one.  */
+      /* Destroy <CR><LF> if present.  */
+      if (len && line[len - 1] == '\n')
+       line[--len] = '\0';
        if (len && line[len - 1] == '\r')
-       line[len - 1] = '\0';
+       line[--len] = '\0';
        /* According to specifications, optional space may be at the
          end...  */
        DEBUGP (("Line: %s\n", line));