From 728584d072cf071a434751602db6848765807d67 Mon Sep 17 00:00:00 2001 From: hniksic Date: Fri, 30 Mar 2001 18:05:54 -0800 Subject: [PATCH] [svn] Record downloaded files and downloaded HTML files in all cases. Published under the subject "Link conversion fix" in . --- src/ChangeLog | 10 +++++++++ src/recur.c | 62 +++++++++++++++++++++++++-------------------------- src/recur.h | 2 ++ src/retr.c | 18 +++++++++++---- 4 files changed, 56 insertions(+), 36 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index e7a8c594..7d6cb1da 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,13 @@ +2001-03-31 Hrvoje Niksic + + * retr.c (retrieve_url): Call register_download() for downloaded + files and register_html() for downloaded HTML files. + + * recur.c (register_download): New function; register here that a + file has been downloaded, rather than in recursive_retrieve(). + (register_html): New function; enqueue the location of HTML files + here rather than in recursive_retrieve(). + 2001-03-31 Hrvoje Niksic * main.c (print_help): Use multiple fputs instead of a single ugly diff --git a/src/recur.c b/src/recur.c index 99ebde56..ba2115cf 100644 --- a/src/recur.c +++ b/src/recur.c @@ -54,8 +54,9 @@ extern char *version_string; static struct hash_table *dl_file_url_map; static struct hash_table *dl_url_file_map; -/* List of HTML URLs. */ -static slist *urls_html; +/* List of HTML files downloaded in this Wget run. Used for link + conversion after Wget is done. */ +static slist *downloaded_html_files; /* List of undesirable-to-load URLs. */ static struct hash_table *undesirable_urls; @@ -106,8 +107,8 @@ recursive_cleanup (void) undesirable_urls = NULL; free_vec (forbidden); forbidden = NULL; - slist_free (urls_html); - urls_html = NULL; + slist_free (downloaded_html_files); + downloaded_html_files = NULL; FREE_MAYBE (base_dir); FREE_MAYBE (robots_host); first_time = 1; @@ -153,25 +154,17 @@ recursive_retrieve (const char *file, const char *this_url) run. They should probably be at a different location. */ if (!undesirable_urls) undesirable_urls = make_string_hash_table (0); - if (!dl_file_url_map) - dl_file_url_map = make_string_hash_table (0); - if (!dl_url_file_map) - dl_url_file_map = make_string_hash_table (0); hash_table_clear (undesirable_urls); string_set_add (undesirable_urls, this_url); hash_table_clear (dl_file_url_map); hash_table_clear (dl_url_file_map); - urls_html = NULL; /* Enter this_url to the hash table, in original and "enhanced" form. */ u = newurl (); err = parseurl (this_url, u, 0); if (err == URLOK) { string_set_add (undesirable_urls, u->url); - hash_table_put (dl_file_url_map, xstrdup (file), xstrdup (u->url)); - hash_table_put (dl_url_file_map, xstrdup (u->url), xstrdup (file)); - urls_html = slist_prepend (urls_html, file); if (opt.no_parent) base_dir = xstrdup (u->dir); /* Set the base dir. */ /* Set the canonical this_url to be sent as referer. This @@ -469,22 +462,6 @@ recursive_retrieve (const char *file, const char *this_url) xfree (constr); constr = newloc; } - /* In case of convert_links: If there was no error, add it to - the list of downloaded URLs. We might need it for - conversion. */ - if (opt.convert_links && filename) - { - if (dt & RETROKF) - { - hash_table_put (dl_file_url_map, - xstrdup (filename), xstrdup (constr)); - hash_table_put (dl_url_file_map, - xstrdup (constr), xstrdup (filename)); - /* If the URL is HTML, note it. */ - if (dt & TEXTHTML) - urls_html = slist_prepend (urls_html, filename); - } - } /* If there was no error, and the type is text/html, parse it recursively. */ if (dt & TEXTHTML) @@ -547,6 +524,27 @@ recursive_retrieve (const char *file, const char *this_url) return RETROK; } +void +register_download (const char *url, const char *file) +{ + if (!opt.convert_links) + return; + if (!dl_file_url_map) + dl_file_url_map = make_string_hash_table (0); + hash_table_put (dl_file_url_map, xstrdup (file), xstrdup (url)); + if (!dl_url_file_map) + dl_url_file_map = make_string_hash_table (0); + hash_table_put (dl_url_file_map, xstrdup (url), xstrdup (file)); +} + +void +register_html (const char *url, const char *file) +{ + if (!opt.convert_links) + return; + downloaded_html_files = slist_prepend (downloaded_html_files, file); +} + /* convert_links() is called from recursive_retrieve() after we're done with an HTML file. This call to convert_links is not complete because it converts only the downloaded files, and Wget cannot know @@ -570,7 +568,7 @@ recursive_retrieve (const char *file, const char *this_url) convert_all_links to go once more through the entire list of retrieved HTMLs, and re-convert them. - All the downloaded HTMLs are kept in urls_html, and downloaded URLs + All the downloaded HTMLs are kept in downloaded_html_files, and downloaded URLs in urls_downloaded. From these two lists information is extracted. */ void @@ -578,11 +576,11 @@ convert_all_links (void) { slist *html; - /* Destructively reverse urls_html to get it in the right order. + /* Destructively reverse downloaded_html_files to get it in the right order. recursive_retrieve() used slist_prepend() consistently. */ - urls_html = slist_nreverse (urls_html); + downloaded_html_files = slist_nreverse (downloaded_html_files); - for (html = urls_html; html; html = html->next) + for (html = downloaded_html_files; html; html = html->next) { urlpos *urls, *cur_url; char *url; diff --git a/src/recur.h b/src/recur.h index 6e906c5b..d26ed3e0 100644 --- a/src/recur.h +++ b/src/recur.h @@ -24,6 +24,8 @@ void recursive_cleanup PARAMS ((void)); void recursive_reset PARAMS ((void)); uerr_t recursive_retrieve PARAMS ((const char *, const char *)); +void register_download PARAMS ((const char *, const char *)); +void register_html PARAMS ((const char *, const char *)); void convert_all_links PARAMS ((void)); #endif /* RECUR_H */ diff --git a/src/retr.c b/src/retr.c index 97a67246..5207e54d 100644 --- a/src/retr.c +++ b/src/retr.c @@ -446,11 +446,11 @@ retrieve_url (const char *origurl, char **file, char **newloc, assert (u->proto != URLFILE); /* #### Implement me! */ mynewloc = NULL; + if (u->proto == URLHTTP #ifdef HAVE_SSL - if (u->proto == URLHTTP || u->proto == URLHTTPS ) -#else - if (u->proto == URLHTTP) -#endif /* HAVE_SSL */ + || u->proto == URLHTTPS +#endif + ) result = http_loop (u, &mynewloc, dt); else if (u->proto == URLFTP) { @@ -546,6 +546,16 @@ retrieve_url (const char *origurl, char **file, char **newloc, goto redirected; } + if (u->local) + { + if (*dt & RETROKF) + { + register_download (url, u->local); + if (*dt & TEXTHTML) + register_html (url, u->local); + } + } + if (file) { if (u->local) -- 2.39.2