X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Frecur.c;h=11c30a2157e00ba2f433cba0e98045b3bb9bb2a4;hb=d5be8ecca466601bda9b81c28a79077fbda6ccde;hp=a159f119430948995f0f17b108af214aa8803247;hpb=f178e6c61367309bef8ba5789a025d7c2aa05775;p=wget diff --git a/src/recur.c b/src/recur.c index a159f119..11c30a21 100644 --- a/src/recur.c +++ b/src/recur.c @@ -120,9 +120,8 @@ recursive_retrieve (const char *file, const char *this_url) int dt, inl, dash_p_leaf_HTML = FALSE; int meta_disallow_follow; int this_url_ftp; /* See below the explanation */ - uerr_t err; urlpos *url_list, *cur_url; - struct urlinfo *u; + struct url *u; assert (this_url != NULL); assert (file != NULL); @@ -140,9 +139,8 @@ recursive_retrieve (const char *file, const char *this_url) hash_table_clear (undesirable_urls); string_set_add (undesirable_urls, this_url); /* Enter this_url to the hash table, in original and "enhanced" form. */ - u = newurl (); - err = parseurl (this_url, u, 0); - if (err == URLOK) + u = url_parse (this_url, NULL); + if (u) { string_set_add (undesirable_urls, u->url); if (opt.no_parent) @@ -156,7 +154,7 @@ recursive_retrieve (const char *file, const char *this_url) DEBUGP (("Double yuck! The *base* URL is broken.\n")); base_dir = NULL; } - freeurl (u, 1); + url_free (u); depth = 1; first_time = 0; } @@ -210,11 +208,10 @@ recursive_retrieve (const char *file, const char *this_url) break; /* Parse the URL for convenient use in other functions, as well as to get the optimized form. It also checks URL integrity. */ - u = newurl (); - if (parseurl (cur_url->url, u, 0) != URLOK) + u = url_parse (cur_url->url, NULL); + if (!u) { DEBUGP (("Yuck! A bad URL.\n")); - freeurl (u, 1); continue; } assert (u->url != NULL); @@ -281,8 +278,8 @@ recursive_retrieve (const char *file, const char *this_url) if (!(base_dir && frontcmp (base_dir, u->dir))) { /* Failing that, check for parent dir. */ - struct urlinfo *ut = newurl (); - if (parseurl (this_url, ut, 0) != URLOK) + struct url *ut = url_parse (this_url, NULL); + if (!ut) DEBUGP (("Double yuck! The *base* URL is broken.\n")); else if (!frontcmp (ut->dir, u->dir)) { @@ -291,7 +288,7 @@ recursive_retrieve (const char *file, const char *this_url) string_set_add (undesirable_urls, constr); inl = 1; } - freeurl (ut, 1); + url_free (ut); } } /* If the file does not match the acceptance list, or is on the @@ -343,7 +340,16 @@ recursive_retrieve (const char *file, const char *this_url) if (!inl) { if (!opt.simple_check) - opt_url (u); + { + /* Find the "true" host. */ + char *host = realhost (u->host); + xfree (u->host); + u->host = host; + + /* Refresh the printed representation of the URL. */ + xfree (u->url); + u->url = url_string (u, 0); + } else { char *p; @@ -351,7 +357,7 @@ recursive_retrieve (const char *file, const char *this_url) for (p = u->host; *p; p++) *p = TOLOWER (*p); xfree (u->url); - u->url = str_url (u, 0); + u->url = url_string (u, 0); } xfree (constr); constr = xstrdup (u->url); @@ -473,7 +479,7 @@ recursive_retrieve (const char *file, const char *this_url) /* Free filename and constr. */ FREE_MAYBE (filename); FREE_MAYBE (constr); - freeurl (u, 1); + url_free (u); /* Increment the pbuf for the appropriate size. */ } if (opt.convert_links && !opt.delete_after) @@ -573,13 +579,9 @@ convert_all_links (void) char *local_name; /* The URL must be in canonical form to be compared. */ - struct urlinfo *u = newurl (); - uerr_t res = parseurl (cur_url->url, u, 0); - if (res != URLOK) - { - freeurl (u, 1); - continue; - } + struct url *u = url_parse (cur_url->url, NULL); + if (!u) + continue; /* We decide the direction of conversion according to whether a URL was downloaded. Downloaded URLs will be converted ABS2REL, whereas non-downloaded will be converted REL2ABS. */ @@ -608,7 +610,7 @@ convert_all_links (void) cur_url->convert = CO_CONVERT_TO_COMPLETE; cur_url->local_name = NULL; } - freeurl (u, 1); + url_free (u); } /* Convert the links in the file. */ convert_links (html->string, urls);