int dt, inl, dash_p_leaf_HTML = FALSE;
int meta_disallow_follow;
int this_url_ftp; /* See below the explanation */
- uerr_t err;
urlpos *url_list, *cur_url;
- struct urlinfo *u;
+ struct url *u;
assert (this_url != NULL);
assert (file != NULL);
hash_table_clear (undesirable_urls);
string_set_add (undesirable_urls, this_url);
/* Enter this_url to the hash table, in original and "enhanced" form. */
- u = newurl ();
- err = parseurl (this_url, u, 0);
- if (err == URLOK)
+ u = url_parse (this_url, NULL);
+ if (u)
{
string_set_add (undesirable_urls, u->url);
if (opt.no_parent)
DEBUGP (("Double yuck! The *base* URL is broken.\n"));
base_dir = NULL;
}
- freeurl (u, 1);
+ url_free (u);
depth = 1;
first_time = 0;
}
that the retrieval is done through proxy. In that case, FTP
links will be followed by default and recursion will not be
turned off when following them. */
- this_url_ftp = (urlproto (this_url) == URLFTP);
+ this_url_ftp = (url_scheme (this_url) == SCHEME_FTP);
/* Get the URL-s from an HTML file: */
url_list = get_urls_html (file, canon_this_url ? canon_this_url : this_url,
break;
/* Parse the URL for convenient use in other functions, as well
as to get the optimized form. It also checks URL integrity. */
- u = newurl ();
- if (parseurl (cur_url->url, u, 0) != URLOK)
+ u = url_parse (cur_url->url, NULL);
+ if (!u)
{
DEBUGP (("Yuck! A bad URL.\n"));
- freeurl (u, 1);
- continue;
- }
- if (u->proto == URLFILE)
- {
- DEBUGP (("Nothing to do with file:// around here.\n"));
- freeurl (u, 1);
continue;
}
assert (u->url != NULL);
/* If it is FTP, and FTP is not followed, chuck it out. */
if (!inl)
- if (u->proto == URLFTP && !opt.follow_ftp && !this_url_ftp)
+ if (u->scheme == SCHEME_FTP && !opt.follow_ftp && !this_url_ftp)
{
DEBUGP (("Uh, it is FTP but i'm not in the mood to follow FTP.\n"));
string_set_add (undesirable_urls, constr);
}
/* If it is absolute link and they are not followed, chuck it
out. */
- if (!inl && u->proto != URLFTP)
+ if (!inl && u->scheme != SCHEME_FTP)
if (opt.relative_only && !cur_url->link_relative_p)
{
DEBUGP (("It doesn't really look like a relative link.\n"));
if (!inl && opt.no_parent
/* If the new URL is FTP and the old was not, ignore
opt.no_parent. */
- && !(!this_url_ftp && u->proto == URLFTP))
+ && !(!this_url_ftp && u->scheme == SCHEME_FTP))
{
/* Check for base_dir first. */
if (!(base_dir && frontcmp (base_dir, u->dir)))
{
/* Failing that, check for parent dir. */
- struct urlinfo *ut = newurl ();
- if (parseurl (this_url, ut, 0) != URLOK)
+ struct url *ut = url_parse (this_url, NULL);
+ if (!ut)
DEBUGP (("Double yuck! The *base* URL is broken.\n"));
else if (!frontcmp (ut->dir, u->dir))
{
string_set_add (undesirable_urls, constr);
inl = 1;
}
- freeurl (ut, 1);
+ url_free (ut);
}
}
/* If the file does not match the acceptance list, or is on the
if (!inl)
{
if (!opt.simple_check)
- opt_url (u);
+ {
+ /* Find the "true" host. */
+ char *host = realhost (u->host);
+ xfree (u->host);
+ u->host = host;
+
+ /* Refresh the printed representation of the URL. */
+ xfree (u->url);
+ u->url = url_string (u, 0);
+ }
else
{
char *p;
for (p = u->host; *p; p++)
*p = TOLOWER (*p);
xfree (u->url);
- u->url = str_url (u, 0);
+ u->url = url_string (u, 0);
}
xfree (constr);
constr = xstrdup (u->url);
/* This line is bogus. */
/*string_set_add (undesirable_urls, constr);*/
- if (!inl && !((u->proto == URLFTP) && !this_url_ftp))
+ if (!inl && !((u->scheme == SCHEME_FTP) && !this_url_ftp))
if (!opt.spanhost && this_url && !same_host (this_url, constr))
{
DEBUGP (("This is not the same hostname as the parent's.\n"));
}
}
/* What about robots.txt? */
- if (!inl && opt.use_robots && u->proto == URLHTTP)
+ if (!inl && opt.use_robots && u->scheme == SCHEME_FTP)
{
struct robot_specs *specs = res_get_specs (u->host, u->port);
if (!specs)
string_set_add (undesirable_urls, constr);
/* Automatically followed FTPs will *not* be downloaded
recursively. */
- if (u->proto == URLFTP)
+ if (u->scheme == SCHEME_FTP)
{
/* Don't you adore side-effects? */
opt.recursive = 0;
/* Retrieve it. */
retrieve_url (constr, &filename, &newloc,
canon_this_url ? canon_this_url : this_url, &dt);
- if (u->proto == URLFTP)
+ if (u->scheme == SCHEME_FTP)
{
/* Restore... */
opt.recursive = 1;
/* Free filename and constr. */
FREE_MAYBE (filename);
FREE_MAYBE (constr);
- freeurl (u, 1);
+ url_free (u);
/* Increment the pbuf for the appropriate size. */
}
if (opt.convert_links && !opt.delete_after)
char *local_name;
/* The URL must be in canonical form to be compared. */
- struct urlinfo *u = newurl ();
- uerr_t res = parseurl (cur_url->url, u, 0);
- if (res != URLOK)
- {
- freeurl (u, 1);
- continue;
- }
+ struct url *u = url_parse (cur_url->url, NULL);
+ if (!u)
+ continue;
/* We decide the direction of conversion according to whether
a URL was downloaded. Downloaded URLs will be converted
ABS2REL, whereas non-downloaded will be converted REL2ABS. */
cur_url->convert = CO_CONVERT_TO_COMPLETE;
cur_url->local_name = NULL;
}
- freeurl (u, 1);
+ url_free (u);
}
/* Convert the links in the file. */
convert_links (html->string, urls);