sjero.net Git - wget/blob - src/recur.c

   1 /* Handling of recursive HTTP retrieving.
   2    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
   3    2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Wget.
   6
   7 GNU Wget is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3 of the License, or
  10  (at your option) any later version.
  11
  12 GNU Wget is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
  19
  20 Additional permission under GNU GPL version 3 section 7
  21
  22 If you modify this program, or any covered work, by linking or
  23 combining it with the OpenSSL project's OpenSSL library (or a
  24 modified version of that library), containing parts covered by the
  25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
  26 grants you additional permission to convey the resulting work.
  27 Corresponding Source for a non-source form of such a combination
  28 shall include the source code for the parts of OpenSSL used as well
  29 as that of the covered work.  */
  30
  31 #include "wget.h"
  32
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <string.h>
  36 #ifdef HAVE_UNISTD_H
  37 # include <unistd.h>
  38 #endif /* HAVE_UNISTD_H */
  39 #include <errno.h>
  40 #include <assert.h>
  41
  42 #include "url.h"
  43 #include "recur.h"
  44 #include "utils.h"
  45 #include "retr.h"
  46 #include "ftp.h"
  47 #include "host.h"
  48 #include "hash.h"
  49 #include "res.h"
  50 #include "convert.h"
  51 #include "html-url.h"
  52 #include "css-url.h"
  53 #include "spider.h"
  54 \f
  55 /* Functions for maintaining the URL queue.  */
  56
  57 struct queue_element {
  58   const char *url;              /* the URL to download */
  59   const char *referer;          /* the referring document */
  60   int depth;                    /* the depth */
  61   bool html_allowed;            /* whether the document is allowed to
  62                                    be treated as HTML. */
  63   struct iri *iri;                /* sXXXav */
  64   bool css_allowed;             /* whether the document is allowed to
  65                                    be treated as CSS. */
  66   struct queue_element *next;   /* next element in queue */
  67 };
  68
  69 struct url_queue {
  70   struct queue_element *head;
  71   struct queue_element *tail;
  72   int count, maxcount;
  73 };
  74
  75 /* Create a URL queue. */
  76
  77 static struct url_queue *
  78 url_queue_new (void)
  79 {
  80   struct url_queue *queue = xnew0 (struct url_queue);
  81   return queue;
  82 }
  83
  84 /* Delete a URL queue. */
  85
  86 static void
  87 url_queue_delete (struct url_queue *queue)
  88 {
  89   xfree (queue);
  90 }
  91
  92 /* Enqueue a URL in the queue.  The queue is FIFO: the items will be
  93    retrieved ("dequeued") from the queue in the order they were placed
  94    into it.  */
  95
  96 static void
  97 url_enqueue (struct url_queue *queue, struct iri *i,
  98              const char *url, const char *referer, int depth,
  99              bool html_allowed, bool css_allowed)
 100 {
 101   struct queue_element *qel = xnew (struct queue_element);
 102   qel->iri = i;
 103   qel->url = url;
 104   qel->referer = referer;
 105   qel->depth = depth;
 106   qel->html_allowed = html_allowed;
 107   qel->css_allowed = css_allowed;
 108   qel->next = NULL;
 109
 110   ++queue->count;
 111   if (queue->count > queue->maxcount)
 112     queue->maxcount = queue->count;
 113
 114   DEBUGP (("Enqueuing %s at depth %d\n", url, depth));
 115   DEBUGP (("Queue count %d, maxcount %d.\n", queue->count, queue->maxcount));
 116
 117   if (i)
 118     DEBUGP (("[IRI Enqueuing %s with %s\n", quote (url),
 119              i->uri_encoding ? quote (i->uri_encoding) : "None"));
 120
 121   if (queue->tail)
 122     queue->tail->next = qel;
 123   queue->tail = qel;
 124
 125   if (!queue->head)
 126     queue->head = queue->tail;
 127 }
 128
 129 /* Take a URL out of the queue.  Return true if this operation
 130    succeeded, or false if the queue is empty.  */
 131
 132 static bool
 133 url_dequeue (struct url_queue *queue, struct iri **i,
 134              const char **url, const char **referer, int *depth,
 135              bool *html_allowed, bool *css_allowed)
 136 {
 137   struct queue_element *qel = queue->head;
 138
 139   if (!qel)
 140     return false;
 141
 142   queue->head = queue->head->next;
 143   if (!queue->head)
 144     queue->tail = NULL;
 145
 146   *i = qel->iri;
 147   *url = qel->url;
 148   *referer = qel->referer;
 149   *depth = qel->depth;
 150   *html_allowed = qel->html_allowed;
 151   *css_allowed = qel->css_allowed;
 152
 153   --queue->count;
 154
 155   DEBUGP (("Dequeuing %s at depth %d\n", qel->url, qel->depth));
 156   DEBUGP (("Queue count %d, maxcount %d.\n", queue->count, queue->maxcount));
 157
 158   xfree (qel);
 159   return true;
 160 }
 161 \f
 162 static bool download_child_p (const struct urlpos *, struct url *, int,
 163                               struct url *, struct hash_table *, struct iri *);
 164 static bool descend_redirect_p (const char *, const char *, int,
 165                                 struct url *, struct hash_table *, struct iri *);
 166
 167
 168 /* Retrieve a part of the web beginning with START_URL.  This used to
 169    be called "recursive retrieval", because the old function was
 170    recursive and implemented depth-first search.  retrieve_tree on the
 171    other hand implements breadth-search traversal of the tree, which
 172    results in much nicer ordering of downloads.
 173
 174    The algorithm this function uses is simple:
 175
 176    1. put START_URL in the queue.
 177    2. while there are URLs in the queue:
 178
 179      3. get next URL from the queue.
 180      4. download it.
 181      5. if the URL is HTML and its depth does not exceed maximum depth,
 182         get the list of URLs embedded therein.
 183      6. for each of those URLs do the following:
 184
 185        7. if the URL is not one of those downloaded before, and if it
 186           satisfies the criteria specified by the various command-line
 187           options, add it to the queue. */
 188
 189 uerr_t
 190 retrieve_tree (const char *start_url, struct iri *pi)
 191 {
 192   uerr_t status = RETROK;
 193
 194   /* The queue of URLs we need to load. */
 195   struct url_queue *queue;
 196
 197   /* The URLs we do not wish to enqueue, because they are already in
 198      the queue, but haven't been downloaded yet.  */
 199   struct hash_table *blacklist;
 200
 201   int up_error_code;
 202   struct url *start_url_parsed;
 203   struct iri *i = iri_new ();
 204
 205 #define COPYSTR(x)  (x) ? xstrdup(x) : NULL;
 206   /* Duplicate pi struct if not NULL */
 207   if (pi)
 208     {
 209       i->uri_encoding = COPYSTR (pi->uri_encoding);
 210       i->content_encoding = COPYSTR (pi->content_encoding);
 211       i->utf8_encode = pi->utf8_encode;
 212     }
 213   else
 214     set_uri_encoding (i, opt.locale, true);
 215 #undef COPYSTR
 216
 217   start_url_parsed = url_parse (start_url, &up_error_code, i);
 218   if (!start_url_parsed)
 219     {
 220       char *error = url_error (start_url, up_error_code);
 221       logprintf (LOG_NOTQUIET, "%s: %s.\n", start_url, error);
 222       xfree (error);
 223       return URLERROR;
 224     }
 225
 226   queue = url_queue_new ();
 227   blacklist = make_string_hash_table (0);
 228
 229   /* Enqueue the starting URL.  Use start_url_parsed->url rather than
 230      just URL so we enqueue the canonical form of the URL.  */
 231   url_enqueue (queue, i, xstrdup (start_url_parsed->url), NULL, 0, true,
 232                false);
 233   string_set_add (blacklist, start_url_parsed->url);
 234
 235   while (1)
 236     {
 237       bool descend = false;
 238       char *url, *referer, *file = NULL;
 239       int depth;
 240       bool html_allowed, css_allowed;
 241       bool is_css = false;
 242       bool dash_p_leaf_HTML = false;
 243
 244       if (opt.quota && total_downloaded_bytes > opt.quota)
 245         break;
 246       if (status == FWRITEERR)
 247         break;
 248
 249       /* Get the next URL from the queue... */
 250
 251       if (!url_dequeue (queue, (struct iri **) &i,
 252                         (const char **)&url, (const char **)&referer,
 253                         &depth, &html_allowed, &css_allowed))
 254         break;
 255
 256       /* ...and download it.  Note that this download is in most cases
 257          unconditional, as download_child_p already makes sure a file
 258          doesn't get enqueued twice -- and yet this check is here, and
 259          not in download_child_p.  This is so that if you run `wget -r
 260          URL1 URL2', and a random URL is encountered once under URL1
 261          and again under URL2, but at a different (possibly smaller)
 262          depth, we want the URL's children to be taken into account
 263          the second time.  */
 264       if (dl_url_file_map && hash_table_contains (dl_url_file_map, url))
 265         {
 266           file = xstrdup (hash_table_get (dl_url_file_map, url));
 267
 268           DEBUGP (("Already downloaded \"%s\", reusing it from \"%s\".\n",
 269                    url, file));
 270
 271           /* this sucks, needs to be combined! */
 272           if (html_allowed
 273               && downloaded_html_set
 274               && string_set_contains (downloaded_html_set, file))
 275             {
 276               descend = true;
 277               is_css = false;
 278             }
 279           if (css_allowed
 280               && downloaded_css_set
 281               && string_set_contains (downloaded_css_set, file))
 282             {
 283               descend = true;
 284               is_css = true;
 285             }
 286         }
 287       else
 288         {
 289           int dt = 0;
 290           char *redirected = NULL;
 291
 292           status = retrieve_url (url, &file, &redirected, referer, &dt,
 293                                  false, i);
 294
 295           if (html_allowed && file && status == RETROK
 296               && (dt & RETROKF) && (dt & TEXTHTML))
 297             {
 298               descend = true;
 299               is_css = false;
 300             }
 301
 302           /* a little different, css_allowed can override content type
 303              lots of web servers serve css with an incorrect content type
 304           */
 305           if (file && status == RETROK
 306               && (dt & RETROKF) &&
 307               ((dt & TEXTCSS) || css_allowed))
 308             {
 309               descend = true;
 310               is_css = true;
 311             }
 312
 313           if (redirected)
 314             {
 315               /* We have been redirected, possibly to another host, or
 316                  different path, or wherever.  Check whether we really
 317                  want to follow it.  */
 318               if (descend)
 319                 {
 320                   if (!descend_redirect_p (redirected, url, depth,
 321                                            start_url_parsed, blacklist, i))
 322                     descend = false;
 323                   else
 324                     /* Make sure that the old pre-redirect form gets
 325                        blacklisted. */
 326                     string_set_add (blacklist, url);
 327                 }
 328
 329               xfree (url);
 330               url = redirected;
 331             }
 332         }
 333
 334       if (opt.spider)
 335         {
 336           visited_url (url, referer);
 337         }
 338
 339       if (descend
 340           && depth >= opt.reclevel && opt.reclevel != INFINITE_RECURSION)
 341         {
 342           if (opt.page_requisites
 343               && (depth == opt.reclevel || depth == opt.reclevel + 1))
 344             {
 345               /* When -p is specified, we are allowed to exceed the
 346                  maximum depth, but only for the "inline" links,
 347                  i.e. those that are needed to display the page.
 348                  Originally this could exceed the depth at most by
 349                  one, but we allow one more level so that the leaf
 350                  pages that contain frames can be loaded
 351                  correctly.  */
 352               dash_p_leaf_HTML = true;
 353             }
 354           else
 355             {
 356               /* Either -p wasn't specified or it was and we've
 357                  already spent the two extra (pseudo-)levels that it
 358                  affords us, so we need to bail out. */
 359               DEBUGP (("Not descending further; at depth %d, max. %d.\n",
 360                        depth, opt.reclevel));
 361               descend = false;
 362             }
 363         }
 364
 365       /* If the downloaded document was HTML or CSS, parse it and enqueue the
 366          links it contains. */
 367
 368       if (descend)
 369         {
 370           bool meta_disallow_follow = false;
 371           struct urlpos *children
 372             = is_css ? get_urls_css_file (file, url) :
 373                        get_urls_html (file, url, &meta_disallow_follow, i);
 374
 375           if (opt.use_robots && meta_disallow_follow)
 376             {
 377               free_urlpos (children);
 378               children = NULL;
 379             }
 380
 381           if (children)
 382             {
 383               struct urlpos *child = children;
 384               struct url *url_parsed = url_parse (url, NULL, i);
 385               struct iri *ci;
 386               char *referer_url = url;
 387               bool strip_auth = (url_parsed != NULL
 388                                  && url_parsed->user != NULL);
 389               assert (url_parsed != NULL);
 390
 391               /* Strip auth info if present */
 392               if (strip_auth)
 393                 referer_url = url_string (url_parsed, URL_AUTH_HIDE);
 394
 395               for (; child; child = child->next)
 396                 {
 397                   if (child->ignore_when_downloading)
 398                     continue;
 399                   if (dash_p_leaf_HTML && !child->link_inline_p)
 400                     continue;
 401                   if (download_child_p (child, url_parsed, depth, start_url_parsed,
 402                                         blacklist, i))
 403                     {
 404                       ci = iri_new ();
 405                       set_uri_encoding (ci, i->content_encoding, false);
 406                       url_enqueue (queue, ci, xstrdup (child->url->url),
 407                                    xstrdup (referer_url), depth + 1,
 408                                    child->link_expect_html,
 409                                    child->link_expect_css);
 410                       /* We blacklist the URL we have enqueued, because we
 411                          don't want to enqueue (and hence download) the
 412                          same URL twice.  */
 413                       string_set_add (blacklist, child->url->url);
 414                     }
 415                 }
 416
 417               if (strip_auth)
 418                 xfree (referer_url);
 419               url_free (url_parsed);
 420               free_urlpos (children);
 421             }
 422         }
 423
 424       if (file
 425           && (opt.delete_after
 426               || opt.spider /* opt.recursive is implicitely true */
 427               || !acceptable (file)))
 428         {
 429           /* Either --delete-after was specified, or we loaded this
 430              (otherwise unneeded because of --spider or rejected by -R)
 431              HTML file just to harvest its hyperlinks -- in either case,
 432              delete the local file. */
 433           DEBUGP (("Removing file due to %s in recursive_retrieve():\n",
 434                    opt.delete_after ? "--delete-after" :
 435                    (opt.spider ? "--spider" :
 436                     "recursive rejection criteria")));
 437           logprintf (LOG_VERBOSE,
 438                      (opt.delete_after || opt.spider
 439                       ? _("Removing %s.\n")
 440                       : _("Removing %s since it should be rejected.\n")),
 441                      file);
 442           if (unlink (file))
 443             logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
 444           logputs (LOG_VERBOSE, "\n");
 445           register_delete_file (file);
 446         }
 447
 448       xfree (url);
 449       xfree_null (referer);
 450       xfree_null (file);
 451       iri_free (i);
 452     }
 453
 454   /* If anything is left of the queue due to a premature exit, free it
 455      now.  */
 456   {
 457     char *d1, *d2;
 458     int d3;
 459     bool d4, d5;
 460     struct iri *d6;
 461     while (url_dequeue (queue, (struct iri **)&d6,
 462                         (const char **)&d1, (const char **)&d2, &d3, &d4, &d5))
 463       {
 464         iri_free (d6);
 465         xfree (d1);
 466         xfree_null (d2);
 467       }
 468   }
 469   url_queue_delete (queue);
 470
 471   if (start_url_parsed)
 472     url_free (start_url_parsed);
 473   string_set_free (blacklist);
 474
 475   if (opt.quota && total_downloaded_bytes > opt.quota)
 476     return QUOTEXC;
 477   else if (status == FWRITEERR)
 478     return FWRITEERR;
 479   else
 480     return RETROK;
 481 }
 482
 483 /* Based on the context provided by retrieve_tree, decide whether a
 484    URL is to be descended to.  This is only ever called from
 485    retrieve_tree, but is in a separate function for clarity.
 486
 487    The most expensive checks (such as those for robots) are memoized
 488    by storing these URLs to BLACKLIST.  This may or may not help.  It
 489    will help if those URLs are encountered many times.  */
 490
 491 static bool
 492 download_child_p (const struct urlpos *upos, struct url *parent, int depth,
 493                   struct url *start_url_parsed, struct hash_table *blacklist,
 494                   struct iri *iri)
 495 {
 496   struct url *u = upos->url;
 497   const char *url = u->url;
 498   bool u_scheme_like_http;
 499
 500   DEBUGP (("Deciding whether to enqueue \"%s\".\n", url));
 501
 502   if (string_set_contains (blacklist, url))
 503     {
 504       if (opt.spider)
 505         {
 506           char *referrer = url_string (parent, URL_AUTH_HIDE_PASSWD);
 507           DEBUGP (("download_child_p: parent->url is: %s\n", quote (parent->url)));
 508           visited_url (url, referrer);
 509           xfree (referrer);
 510         }
 511       DEBUGP (("Already on the black list.\n"));
 512       goto out;
 513     }
 514
 515   /* Several things to check for:
 516      1. if scheme is not http, and we don't load it
 517      2. check for relative links (if relative_only is set)
 518      3. check for domain
 519      4. check for no-parent
 520      5. check for excludes && includes
 521      6. check for suffix
 522      7. check for same host (if spanhost is unset), with possible
 523      gethostbyname baggage
 524      8. check for robots.txt
 525
 526      Addendum: If the URL is FTP, and it is to be loaded, only the
 527      domain and suffix settings are "stronger".
 528
 529      Note that .html files will get loaded regardless of suffix rules
 530      (but that is remedied later with unlink) unless the depth equals
 531      the maximum depth.
 532
 533      More time- and memory- consuming tests should be put later on
 534      the list.  */
 535
 536   /* Determine whether URL under consideration has a HTTP-like scheme. */
 537   u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP);
 538
 539   /* 1. Schemes other than HTTP are normally not recursed into. */
 540   if (!u_scheme_like_http && !(u->scheme == SCHEME_FTP && opt.follow_ftp))
 541     {
 542       DEBUGP (("Not following non-HTTP schemes.\n"));
 543       goto out;
 544     }
 545
 546   /* 2. If it is an absolute link and they are not followed, throw it
 547      out.  */
 548   if (u_scheme_like_http)
 549     if (opt.relative_only && !upos->link_relative_p)
 550       {
 551         DEBUGP (("It doesn't really look like a relative link.\n"));
 552         goto out;
 553       }
 554
 555   /* 3. If its domain is not to be accepted/looked-up, chuck it
 556      out.  */
 557   if (!accept_domain (u))
 558     {
 559       DEBUGP (("The domain was not accepted.\n"));
 560       goto out;
 561     }
 562
 563   /* 4. Check for parent directory.
 564
 565      If we descended to a different host or changed the scheme, ignore
 566      opt.no_parent.  Also ignore it for documents needed to display
 567      the parent page when in -p mode.  */
 568   if (opt.no_parent
 569       && schemes_are_similar_p (u->scheme, start_url_parsed->scheme)
 570       && 0 == strcasecmp (u->host, start_url_parsed->host)
 571       && u->port == start_url_parsed->port
 572       && !(opt.page_requisites && upos->link_inline_p))
 573     {
 574       if (!subdir_p (start_url_parsed->dir, u->dir))
 575         {
 576           DEBUGP (("Going to \"%s\" would escape \"%s\" with no_parent on.\n",
 577                    u->dir, start_url_parsed->dir));
 578           goto out;
 579         }
 580     }
 581
 582   /* 5. If the file does not match the acceptance list, or is on the
 583      rejection list, chuck it out.  The same goes for the directory
 584      exclusion and inclusion lists.  */
 585   if (opt.includes || opt.excludes)
 586     {
 587       if (!accdir (u->dir))
 588         {
 589           DEBUGP (("%s (%s) is excluded/not-included.\n", url, u->dir));
 590           goto out;
 591         }
 592     }
 593
 594   /* 6. Check for acceptance/rejection rules.  We ignore these rules
 595      for directories (no file name to match) and for non-leaf HTMLs,
 596      which can lead to other files that do need to be downloaded.  (-p
 597      automatically implies non-leaf because with -p we can, if
 598      necesary, overstep the maximum depth to get the page requisites.)  */
 599   if (u->file[0] != '\0'
 600       && !(has_html_suffix_p (u->file)
 601            /* The exception only applies to non-leaf HTMLs (but -p
 602               always implies non-leaf because we can overstep the
 603               maximum depth to get the requisites): */
 604            && (/* non-leaf */
 605                opt.reclevel == INFINITE_RECURSION
 606                /* also non-leaf */
 607                || depth < opt.reclevel - 1
 608                /* -p, which implies non-leaf (see above) */
 609                || opt.page_requisites)))
 610     {
 611       if (!acceptable (u->file))
 612         {
 613           DEBUGP (("%s (%s) does not match acc/rej rules.\n",
 614                    url, u->file));
 615           goto out;
 616         }
 617     }
 618
 619   /* 7. */
 620   if (schemes_are_similar_p (u->scheme, parent->scheme))
 621     if (!opt.spanhost && 0 != strcasecmp (parent->host, u->host))
 622       {
 623         DEBUGP (("This is not the same hostname as the parent's (%s and %s).\n",
 624                  u->host, parent->host));
 625         goto out;
 626       }
 627
 628   /* 8. */
 629   if (opt.use_robots && u_scheme_like_http)
 630     {
 631       struct robot_specs *specs = res_get_specs (u->host, u->port);
 632       if (!specs)
 633         {
 634           char *rfile;
 635           if (res_retrieve_file (url, &rfile, iri))
 636             {
 637               specs = res_parse_from_file (rfile);
 638
 639               /* Delete the robots.txt file if we chose to either delete the
 640                  files after downloading or we're just running a spider. */
 641               if (opt.delete_after || opt.spider)
 642                 {
 643                   logprintf (LOG_VERBOSE, "Removing %s.\n", rfile);
 644                   if (unlink (rfile))
 645                       logprintf (LOG_NOTQUIET, "unlink: %s\n",
 646                                  strerror (errno));
 647                 }
 648
 649               xfree (rfile);
 650             }
 651           else
 652             {
 653               /* If we cannot get real specs, at least produce
 654                  dummy ones so that we can register them and stop
 655                  trying to retrieve them.  */
 656               specs = res_parse ("", 0);
 657             }
 658           res_register_specs (u->host, u->port, specs);
 659         }
 660
 661       /* Now that we have (or don't have) robots.txt specs, we can
 662          check what they say.  */
 663       if (!res_match_path (specs, u->path))
 664         {
 665           DEBUGP (("Not following %s because robots.txt forbids it.\n", url));
 666           string_set_add (blacklist, url);
 667           goto out;
 668         }
 669     }
 670
 671   /* The URL has passed all the tests.  It can be placed in the
 672      download queue. */
 673   DEBUGP (("Decided to load it.\n"));
 674
 675   return true;
 676
 677  out:
 678   DEBUGP (("Decided NOT to load it.\n"));
 679
 680   return false;
 681 }
 682
 683 /* This function determines whether we will consider downloading the
 684    children of a URL whose download resulted in a redirection,
 685    possibly to another host, etc.  It is needed very rarely, and thus
 686    it is merely a simple-minded wrapper around download_child_p.  */
 687
 688 static bool
 689 descend_redirect_p (const char *redirected, const char *original, int depth,
 690                     struct url *start_url_parsed, struct hash_table *blacklist,
 691                     struct iri *iri)
 692 {
 693   struct url *orig_parsed, *new_parsed;
 694   struct urlpos *upos;
 695   bool success;
 696
 697   orig_parsed = url_parse (original, NULL, NULL);
 698   assert (orig_parsed != NULL);
 699
 700   new_parsed = url_parse (redirected, NULL, NULL);
 701   assert (new_parsed != NULL);
 702
 703   upos = xnew0 (struct urlpos);
 704   upos->url = new_parsed;
 705
 706   success = download_child_p (upos, orig_parsed, depth,
 707                               start_url_parsed, blacklist, iri);
 708
 709   url_free (orig_parsed);
 710   url_free (new_parsed);
 711   xfree (upos);
 712
 713   if (!success)
 714     DEBUGP (("Redirection \"%s\" failed the test.\n", redirected));
 715
 716   return success;
 717 }
 718
 719 /* vim:set sts=2 sw=2 cino+={s: */