- {
- int meta_disallow_follow = 0;
- struct urlpos *children
- = get_urls_html (file, url, &meta_disallow_follow);
-
- if (opt.use_robots && meta_disallow_follow)
- {
- free_urlpos (children);
- children = NULL;
- }
-
- if (children)
- {
- struct urlpos *child = children;
- struct url *url_parsed = url_parsed = url_parse (url, NULL);
- assert (url_parsed != NULL);
-
- for (; child; child = child->next)
- {
- if (child->ignore_when_downloading)
- continue;
- if (dash_p_leaf_HTML && !child->link_inline_p)
- continue;
- if (download_child_p (child, url_parsed, depth, start_url_parsed,
- blacklist))
- {
- url_enqueue (queue, xstrdup (child->url->url),
- xstrdup (url), depth + 1,
- child->link_expect_html);
- /* We blacklist the URL we have enqueued, because we
- don't want to enqueue (and hence download) the
- same URL twice. */
- string_set_add (blacklist, child->url->url);
- }
- }
-
- url_free (url_parsed);
- free_urlpos (children);
- }
- }
-
- if (opt.delete_after || (file && !acceptable (file)))
- {
- /* Either --delete-after was specified, or we loaded this
- otherwise rejected (e.g. by -R) HTML file just so we
- could harvest its hyperlinks -- in either case, delete
- the local file. */
- DEBUGP (("Removing file due to %s in recursive_retrieve():\n",
- opt.delete_after ? "--delete-after" :
- "recursive rejection criteria"));
- logprintf (LOG_VERBOSE,
- (opt.delete_after
- ? _("Removing %s.\n")
- : _("Removing %s since it should be rejected.\n")),
- file);
- if (unlink (file))
- logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
- register_delete_file (file);
- }
+ {
+ bool meta_disallow_follow = false;
+ struct urlpos *children
+ = is_css ? get_urls_css_file (file, url) :
+ get_urls_html (file, url, &meta_disallow_follow, i);
+
+ if (opt.use_robots && meta_disallow_follow)
+ {
+ free_urlpos (children);
+ children = NULL;
+ }
+
+ if (children)
+ {
+ struct urlpos *child = children;
+ struct url *url_parsed = url_parse (url, NULL, i, true);
+ struct iri *ci;
+ char *referer_url = url;
+ bool strip_auth = (url_parsed != NULL
+ && url_parsed->user != NULL);
+ assert (url_parsed != NULL);
+
+ /* Strip auth info if present */
+ if (strip_auth)
+ referer_url = url_string (url_parsed, URL_AUTH_HIDE);
+
+ for (; child; child = child->next)
+ {
+ if (child->ignore_when_downloading)
+ continue;
+ if (dash_p_leaf_HTML && !child->link_inline_p)
+ continue;
+ if (download_child_p (child, url_parsed, depth, start_url_parsed,
+ blacklist, i))
+ {
+ ci = iri_new ();
+ set_uri_encoding (ci, i->content_encoding, false);
+ url_enqueue (queue, ci, xstrdup (child->url->url),
+ xstrdup (referer_url), depth + 1,
+ child->link_expect_html,
+ child->link_expect_css);
+ /* We blacklist the URL we have enqueued, because we
+ don't want to enqueue (and hence download) the
+ same URL twice. */
+ string_set_add (blacklist, child->url->url);
+ }
+ }
+
+ if (strip_auth)
+ xfree (referer_url);
+ url_free (url_parsed);
+ free_urlpos (children);
+ }
+ }
+
+ if (file
+ && (opt.delete_after
+ || opt.spider /* opt.recursive is implicitely true */
+ || !acceptable (file)))
+ {
+ /* Either --delete-after was specified, or we loaded this
+ (otherwise unneeded because of --spider or rejected by -R)
+ HTML file just to harvest its hyperlinks -- in either case,
+ delete the local file. */
+ DEBUGP (("Removing file due to %s in recursive_retrieve():\n",
+ opt.delete_after ? "--delete-after" :
+ (opt.spider ? "--spider" :
+ "recursive rejection criteria")));
+ logprintf (LOG_VERBOSE,
+ (opt.delete_after || opt.spider
+ ? _("Removing %s.\n")
+ : _("Removing %s since it should be rejected.\n")),
+ file);
+ if (unlink (file))
+ logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
+ logputs (LOG_VERBOSE, "\n");
+ register_delete_file (file);
+ }