- assert (u->url != NULL);
- constr = xstrdup (u->url);
-
- /* Several checkings whether a file is acceptable to load:
- 1. check if URL is ftp, and we don't load it
- 2. check for relative links (if relative_only is set)
- 3. check for domain
- 4. check for no-parent
- 5. check for excludes && includes
- 6. check for suffix
- 7. check for same host (if spanhost is unset), with possible
- gethostbyname baggage
- 8. check for robots.txt
-
- Addendum: If the URL is FTP, and it is to be loaded, only the
- domain and suffix settings are "stronger".
-
- Note that .html and (yuck) .htm will get loaded regardless of
- suffix rules (but that is remedied later with unlink) unless
- the depth equals the maximum depth.
-
- More time- and memory- consuming tests should be put later on
- the list. */
-
- /* inl is set if the URL we are working on (constr) is stored in
- ulist. Using it is crucial to avoid the incessant calls to
- in_slist, which is quite slow. */
- inl = in_slist (ulist, constr);
-
- /* If it is FTP, and FTP is not followed, chuck it out. */
- if (!inl)
- if (u->proto == URLFTP && !opt.follow_ftp && !this_url_ftp)
- {
- DEBUGP (("Uh, it is FTP but i'm not in the mood to follow FTP.\n"));
- ulist = add_slist (ulist, constr, 0);
- inl = 1;
- }
- /* If it is absolute link and they are not followed, chuck it
- out. */
- if (!inl && u->proto != URLFTP)
- if (opt.relative_only && !(cur_url->flags & URELATIVE))
- {
- DEBUGP (("It doesn't really look like a relative link.\n"));
- ulist = add_slist (ulist, constr, 0);
- inl = 1;
- }
- /* If its domain is not to be accepted/looked-up, chuck it out. */
- if (!inl)
- if (!accept_domain (u))
- {
- DEBUGP (("I don't like the smell of that domain.\n"));
- ulist = add_slist (ulist, constr, 0);
- inl = 1;
- }
- /* Check for parent directory. */
- if (!inl && opt.no_parent
- /* If the new URL is FTP and the old was not, ignore
- opt.no_parent. */
- && !(!this_url_ftp && u->proto == URLFTP))