1 /* Conversion of links to local files.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007,
3 2008 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 Additional permission under GNU GPL version 3 section 7
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
38 #endif /* HAVE_UNISTD_H */
51 static struct hash_table *dl_file_url_map;
52 struct hash_table *dl_url_file_map;
54 /* Set of HTML/CSS files downloaded in this Wget run, used for link
55 conversion after Wget is done. */
56 struct hash_table *downloaded_html_set;
57 struct hash_table *downloaded_css_set;
59 static void convert_links (const char *, struct urlpos *);
63 convert_links_in_hashtable (struct hash_table *downloaded_set,
74 cnt = hash_table_count (downloaded_set);
77 file_array = alloca_array (char *, cnt);
78 string_set_to_array (downloaded_set, file_array);
80 for (i = 0; i < cnt; i++)
82 struct urlpos *urls, *cur_url;
84 char *file = file_array[i];
86 /* Determine the URL of the file. get_urls_{html,css} will need
88 url = hash_table_get (dl_file_url_map, file);
91 DEBUGP (("Apparently %s has been removed.\n", file));
95 DEBUGP (("Scanning %s (from %s)\n", file, url));
97 /* Parse the file... */
98 urls = is_css ? get_urls_css_file (file, url) :
99 get_urls_html (file, url, NULL, NULL);
101 /* We don't respect meta_disallow_follow here because, even if
102 the file is not followed, we might still want to convert the
103 links that have been followed from other files. */
105 for (cur_url = urls; cur_url; cur_url = cur_url->next)
108 struct url *u = cur_url->url;
110 if (cur_url->link_base_p)
112 /* Base references have been resolved by our parser, so
113 we turn the base URL into an empty string. (Perhaps
114 we should remove the tag entirely?) */
115 cur_url->convert = CO_NULLIFY_BASE;
119 /* We decide the direction of conversion according to whether
120 a URL was downloaded. Downloaded URLs will be converted
121 ABS2REL, whereas non-downloaded will be converted REL2ABS. */
122 local_name = hash_table_get (dl_url_file_map, u->url);
124 /* Decide on the conversion type. */
127 /* We've downloaded this URL. Convert it to relative
128 form. We do this even if the URL already is in
129 relative form, because our directory structure may
130 not be identical to that on the server (think `-nd',
131 `--cut-dirs', etc.) */
132 cur_url->convert = CO_CONVERT_TO_RELATIVE;
133 cur_url->local_name = xstrdup (local_name);
134 DEBUGP (("will convert url %s to local %s\n", u->url, local_name));
138 /* We haven't downloaded this URL. If it's not already
139 complete (including a full host name), convert it to
140 that form, so it can be reached while browsing this
142 if (!cur_url->link_complete_p)
143 cur_url->convert = CO_CONVERT_TO_COMPLETE;
144 cur_url->local_name = NULL;
145 DEBUGP (("will convert url %s to complete\n", u->url));
149 /* Convert the links in the file. */
150 convert_links (file, urls);
158 /* This function is called when the retrieval is done to convert the
159 links that have been downloaded. It has to be called at the end of
160 the retrieval, because only then does Wget know conclusively which
161 URLs have been downloaded, and which not, so it can tell which
162 direction to convert to.
164 The "direction" means that the URLs to the files that have been
165 downloaded get converted to the relative URL which will point to
166 that file. And the other URLs get converted to the remote URL on
169 All the downloaded HTMLs are kept in downloaded_html_files, and
170 downloaded URLs in urls_downloaded. All the information is
171 extracted from these two lists. */
174 convert_all_links (void)
179 struct ptimer *timer = ptimer_new ();
181 convert_links_in_hashtable (downloaded_html_set, 0, &file_count);
182 convert_links_in_hashtable (downloaded_css_set, 1, &file_count);
184 secs = ptimer_measure (timer);
185 logprintf (LOG_VERBOSE, _("Converted %d files in %s seconds.\n"),
186 file_count, print_decimal (secs));
188 ptimer_destroy (timer);
191 static void write_backup_file (const char *, downloaded_file_t);
192 static const char *replace_plain (const char*, int, FILE*, const char *);
193 static const char *replace_attr (const char *, int, FILE *, const char *);
194 static const char *replace_attr_refresh_hack (const char *, int, FILE *,
196 static char *local_quote_string (const char *);
197 static char *construct_relative (const char *, const char *);
199 /* Change the links in one file. LINKS is a list of links in the
200 document, along with their positions and the desired direction of
203 convert_links (const char *file, struct urlpos *links)
205 struct file_memory *fm;
208 downloaded_file_t downloaded_file_return;
211 int to_url_count = 0, to_file_count = 0;
213 logprintf (LOG_VERBOSE, _("Converting %s... "), file);
216 /* First we do a "dry run": go through the list L and see whether
217 any URL needs to be converted in the first place. If not, just
218 leave the file alone. */
221 for (dry = links; dry; dry = dry->next)
222 if (dry->convert != CO_NOCONVERT)
226 logputs (LOG_VERBOSE, _("nothing to do.\n"));
231 fm = read_file (file);
234 logprintf (LOG_NOTQUIET, _("Cannot convert links in %s: %s\n"),
235 file, strerror (errno));
239 downloaded_file_return = downloaded_file (CHECK_FOR_FILE, file);
240 if (opt.backup_converted && downloaded_file_return)
241 write_backup_file (file, downloaded_file_return);
243 /* Before opening the file for writing, unlink the file. This is
244 important if the data in FM is mmaped. In such case, nulling the
245 file, which is what fopen() below does, would make us read all
246 zeroes from the mmaped region. */
247 if (unlink (file) < 0 && errno != ENOENT)
249 logprintf (LOG_NOTQUIET, _("Unable to delete %s: %s\n"),
250 quote (file), strerror (errno));
254 /* Now open the file for writing. */
255 fp = fopen (file, "wb");
258 logprintf (LOG_NOTQUIET, _("Cannot convert links in %s: %s\n"),
259 file, strerror (errno));
264 /* Here we loop through all the URLs in file, replacing those of
265 them that are downloaded with relative references. */
267 for (link = links; link; link = link->next)
269 char *url_start = fm->content + link->pos;
271 if (link->pos >= fm->length)
273 DEBUGP (("Something strange is going on. Please investigate."));
276 /* If the URL is not to be converted, skip it. */
277 if (link->convert == CO_NOCONVERT)
279 DEBUGP (("Skipping %s at position %d.\n", link->url->url, link->pos));
283 /* Echo the file contents, up to the offending URL's opening
284 quote, to the outfile. */
285 fwrite (p, 1, url_start - p, fp);
288 switch (link->convert)
290 case CO_CONVERT_TO_RELATIVE:
291 /* Convert absolute URL to relative. */
293 char *newname = construct_relative (file, link->local_name);
294 char *quoted_newname = local_quote_string (newname);
296 if (link->link_css_p)
297 p = replace_plain (p, link->size, fp, quoted_newname);
298 else if (!link->link_refresh_p)
299 p = replace_attr (p, link->size, fp, quoted_newname);
301 p = replace_attr_refresh_hack (p, link->size, fp, quoted_newname,
302 link->refresh_timeout);
304 DEBUGP (("TO_RELATIVE: %s to %s at position %d in %s.\n",
305 link->url->url, newname, link->pos, file));
307 xfree (quoted_newname);
311 case CO_CONVERT_TO_COMPLETE:
312 /* Convert the link to absolute URL. */
314 char *newlink = link->url->url;
315 char *quoted_newlink = html_quote_string (newlink);
317 if (link->link_css_p)
318 p = replace_plain (p, link->size, fp, quoted_newlink);
319 else if (!link->link_refresh_p)
320 p = replace_attr (p, link->size, fp, quoted_newlink);
322 p = replace_attr_refresh_hack (p, link->size, fp, quoted_newlink,
323 link->refresh_timeout);
325 DEBUGP (("TO_COMPLETE: <something> to %s at position %d in %s.\n",
326 newlink, link->pos, file));
327 xfree (quoted_newlink);
331 case CO_NULLIFY_BASE:
332 /* Change the base href to "". */
333 p = replace_attr (p, link->size, fp, "");
341 /* Output the rest of the file. */
342 if (p - fm->content < fm->length)
343 fwrite (p, 1, fm->length - (p - fm->content), fp);
347 logprintf (LOG_VERBOSE, "%d-%d\n", to_file_count, to_url_count);
350 /* Construct and return a link that points from BASEFILE to LINKFILE.
351 Both files should be local file names, BASEFILE of the referrering
352 file, and LINKFILE of the referred file.
356 cr("foo", "bar") -> "bar"
357 cr("A/foo", "A/bar") -> "bar"
358 cr("A/foo", "A/B/bar") -> "B/bar"
359 cr("A/X/foo", "A/Y/bar") -> "../Y/bar"
360 cr("X/", "Y/bar") -> "../Y/bar" (trailing slash does matter in BASE)
362 Both files should be absolute or relative, otherwise strange
363 results might ensue. The function makes no special efforts to
364 handle "." and ".." in links, so make sure they're not there
365 (e.g. using path_simplify). */
368 construct_relative (const char *basefile, const char *linkfile)
375 /* First, skip the initial directory components common to both
378 for (b = basefile, l = linkfile; *b == *l && *b != '\0'; ++b, ++l)
381 start = (b - basefile) + 1;
386 /* With common directories out of the way, the situation we have is
388 b - b1/b2/[...]/bfile
389 l - l1/l2/[...]/lfile
391 The link we're constructing needs to be:
392 lnk - ../../l1/l2/[...]/lfile
394 Where the number of ".."'s equals the number of bN directory
397 /* Count the directory components in B. */
399 for (b = basefile; *b; b++)
405 /* Construct LINK as explained above. */
406 link = xmalloc (3 * basedirs + strlen (linkfile) + 1);
407 for (i = 0; i < basedirs; i++)
408 memcpy (link + 3 * i, "../", 3);
409 strcpy (link + 3 * i, linkfile);
413 /* Used by write_backup_file to remember which files have been
415 static struct hash_table *converted_files;
418 write_backup_file (const char *file, downloaded_file_t downloaded_file_return)
420 /* Rather than just writing over the original .html file with the
421 converted version, save the former to *.orig. Note we only do
422 this for files we've _successfully_ downloaded, so we don't
423 clobber .orig files sitting around from previous invocations. */
425 /* Construct the backup filename as the original name plus ".orig". */
426 size_t filename_len = strlen (file);
427 char* filename_plus_orig_suffix;
429 /* TODO: hack this to work with css files */
430 if (downloaded_file_return == FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED)
432 /* Just write "orig" over "html". We need to do it this way
433 because when we're checking to see if we've downloaded the
434 file before (to see if we can skip downloading it), we don't
435 know if it's a text/html file. Therefore we don't know yet
436 at that stage that -E is going to cause us to tack on
437 ".html", so we need to compare vs. the original URL plus
438 ".orig", not the original URL plus ".html.orig". */
439 filename_plus_orig_suffix = alloca (filename_len + 1);
440 strcpy (filename_plus_orig_suffix, file);
441 strcpy ((filename_plus_orig_suffix + filename_len) - 4, "orig");
443 else /* downloaded_file_return == FILE_DOWNLOADED_NORMALLY */
445 /* Append ".orig" to the name. */
446 filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
447 strcpy (filename_plus_orig_suffix, file);
448 strcpy (filename_plus_orig_suffix + filename_len, ".orig");
451 if (!converted_files)
452 converted_files = make_string_hash_table (0);
454 /* We can get called twice on the same URL thanks to the
455 convert_all_links() call in main(). If we write the .orig file
456 each time in such a case, it'll end up containing the first-pass
457 conversion, not the original file. So, see if we've already been
458 called on this file. */
459 if (!string_set_contains (converted_files, file))
461 /* Rename <file> to <file>.orig before former gets written over. */
462 if (rename (file, filename_plus_orig_suffix) != 0)
463 logprintf (LOG_NOTQUIET, _("Cannot back up %s as %s: %s\n"),
464 file, filename_plus_orig_suffix, strerror (errno));
466 /* Remember that we've already written a .orig backup for this file.
467 Note that we never free this memory since we need it till the
468 convert_all_links() call, which is one of the last things the
469 program does before terminating. BTW, I'm not sure if it would be
470 safe to just set 'converted_file_ptr->string' to 'file' below,
471 rather than making a copy of the string... Another note is that I
472 thought I could just add a field to the urlpos structure saying
473 that we'd written a .orig file for this URL, but that didn't work,
474 so I had to make this separate list.
475 -- Dan Harkless <wget@harkless.org>
477 This [adding a field to the urlpos structure] didn't work
478 because convert_file() is called from convert_all_links at
479 the end of the retrieval with a freshly built new urlpos
481 -- Hrvoje Niksic <hniksic@xemacs.org>
483 string_set_add (converted_files, file);
487 static bool find_fragment (const char *, int, const char **, const char **);
489 /* Replace a string with NEW_TEXT. Ignore quoting. */
491 replace_plain (const char *p, int size, FILE *fp, const char *new_text)
493 fputs (new_text, fp);
498 /* Replace an attribute's original text with NEW_TEXT. */
501 replace_attr (const char *p, int size, FILE *fp, const char *new_text)
503 bool quote_flag = false;
504 char quote_char = '\"'; /* use "..." for quoting, unless the
505 original value is quoted, in which
506 case reuse its quoting char. */
507 const char *frag_beg, *frag_end;
509 /* Structure of our string is:
511 <--- size ---> (with quotes)
514 <--- size --> (no quotes) */
516 if (*p == '\"' || *p == '\'')
521 size -= 2; /* disregard opening and closing quote */
523 putc (quote_char, fp);
524 fputs (new_text, fp);
526 /* Look for fragment identifier, if any. */
527 if (find_fragment (p, size, &frag_beg, &frag_end))
528 fwrite (frag_beg, 1, frag_end - frag_beg, fp);
532 putc (quote_char, fp);
537 /* The same as REPLACE_ATTR, but used when replacing
538 <meta http-equiv=refresh content="new_text"> because we need to
539 append "timeout_value; URL=" before the next_text. */
542 replace_attr_refresh_hack (const char *p, int size, FILE *fp,
543 const char *new_text, int timeout)
546 char *new_with_timeout = (char *)alloca (numdigit (timeout)
550 sprintf (new_with_timeout, "%d; URL=%s", timeout, new_text);
552 return replace_attr (p, size, fp, new_with_timeout);
555 /* Find the first occurrence of '#' in [BEG, BEG+SIZE) that is not
556 preceded by '&'. If the character is not found, return zero. If
557 the character is found, return true and set BP and EP to point to
558 the beginning and end of the region.
560 This is used for finding the fragment indentifiers in URLs. */
563 find_fragment (const char *beg, int size, const char **bp, const char **ep)
565 const char *end = beg + size;
566 bool saw_amp = false;
567 for (; beg < end; beg++)
589 /* Quote FILE for use as local reference to an HTML file.
591 We quote ? as %3F to avoid passing part of the file name as the
592 parameter when browsing the converted file through HTTP. However,
593 it is safe to do this only when `--html-extension' is turned on.
594 This is because converting "index.html?foo=bar" to
595 "index.html%3Ffoo=bar" would break local browsing, as the latter
596 isn't even recognized as an HTML file! However, converting
597 "index.html?foo=bar.html" to "index.html%3Ffoo=bar.html" should be
598 safe for both local and HTTP-served browsing.
600 We always quote "#" as "%23" and "%" as "%25" because those
601 characters have special meanings in URLs. */
604 local_quote_string (const char *file)
609 char *any = strpbrk (file, "?#%");
611 return html_quote_string (file);
613 /* Allocate space assuming the worst-case scenario, each character
614 having to be quoted. */
615 to = newname = (char *)alloca (3 * strlen (file) + 1);
616 for (from = file; *from; from++)
630 if (opt.html_extension)
643 return html_quote_string (newname);
646 /* Book-keeping code for dl_file_url_map, dl_url_file_map,
647 downloaded_html_list, and downloaded_html_set. Other code calls
648 these functions to let us know that a file has been downloaded. */
650 #define ENSURE_TABLES_EXIST do { \
651 if (!dl_file_url_map) \
652 dl_file_url_map = make_string_hash_table (0); \
653 if (!dl_url_file_map) \
654 dl_url_file_map = make_string_hash_table (0); \
657 /* Return true if S1 and S2 are the same, except for "/index.html".
658 The three cases in which it returns one are (substitute any
659 substring for "foo"):
661 m("foo/index.html", "foo/") ==> 1
662 m("foo/", "foo/index.html") ==> 1
663 m("foo", "foo/index.html") ==> 1
664 m("foo", "foo/" ==> 1
665 m("foo", "foo") ==> 1 */
668 match_except_index (const char *s1, const char *s2)
673 /* Skip common substring. */
674 for (i = 0; *s1 && *s2 && *s1 == *s2; s1++, s2++, i++)
677 /* Strings differ at the very beginning -- bail out. We need to
678 check this explicitly to avoid `lng - 1' reading outside the
683 /* Both strings hit EOF -- strings are equal. */
686 /* Strings are randomly different, e.g. "/foo/bar" and "/foo/qux". */
689 /* S1 is the longer one. */
692 /* S2 is the longer one. */
696 /* foo/index.html */ /* or */ /* foo/index.html */
700 /* The right-hand case. */
703 if (*lng == '/' && *(lng + 1) == '\0')
708 return 0 == strcmp (lng, "/index.html");
712 dissociate_urls_from_file_mapper (void *key, void *value, void *arg)
714 char *mapping_url = (char *)key;
715 char *mapping_file = (char *)value;
716 char *file = (char *)arg;
718 if (0 == strcmp (mapping_file, file))
720 hash_table_remove (dl_url_file_map, mapping_url);
722 xfree (mapping_file);
725 /* Continue mapping. */
729 /* Remove all associations from various URLs to FILE from dl_url_file_map. */
732 dissociate_urls_from_file (const char *file)
734 /* Can't use hash_table_iter_* because the table mutates while mapping. */
735 hash_table_for_each (dl_url_file_map, dissociate_urls_from_file_mapper,
739 /* Register that URL has been successfully downloaded to FILE. This
740 is used by the link conversion code to convert references to URLs
741 to references to local files. It is also being used to check if a
742 URL has already been downloaded. */
745 register_download (const char *url, const char *file)
747 char *old_file, *old_url;
751 /* With some forms of retrieval, it is possible, although not likely
752 or particularly desirable. If both are downloaded, the second
753 download will override the first one. When that happens,
754 dissociate the old file name from the URL. */
756 if (hash_table_get_pair (dl_file_url_map, file, &old_file, &old_url))
758 if (0 == strcmp (url, old_url))
759 /* We have somehow managed to download the same URL twice.
763 if (match_except_index (url, old_url)
764 && !hash_table_contains (dl_url_file_map, url))
765 /* The two URLs differ only in the "index.html" ending. For
766 example, one is "http://www.server.com/", and the other is
767 "http://www.server.com/index.html". Don't remove the old
768 one, just add the new one as a non-canonical entry. */
771 hash_table_remove (dl_file_url_map, file);
775 /* Remove all the URLs that point to this file. Yes, there can
776 be more than one such URL, because we store redirections as
777 multiple entries in dl_url_file_map. For example, if URL1
778 redirects to URL2 which gets downloaded to FILE, we map both
779 URL1 and URL2 to FILE in dl_url_file_map. (dl_file_url_map
780 only points to URL2.) When another URL gets loaded to FILE,
781 we want both URL1 and URL2 dissociated from it.
783 This is a relatively expensive operation because it performs
784 a linear search of the whole hash table, but it should be
785 called very rarely, only when two URLs resolve to the same
786 file name, *and* the "<file>.1" extensions are turned off.
787 In other words, almost never. */
788 dissociate_urls_from_file (file);
791 hash_table_put (dl_file_url_map, xstrdup (file), xstrdup (url));
794 /* A URL->FILE mapping is not possible without a FILE->URL mapping.
795 If the latter were present, it should have been removed by the
796 above `if'. So we could write:
798 assert (!hash_table_contains (dl_url_file_map, url));
800 The above is correct when running in recursive mode where the
801 same URL always resolves to the same file. But if you do
806 then the first URL will resolve to "FILE", and the other to
807 "FILE.1". In that case, FILE.1 will not be found in
808 dl_file_url_map, but URL will still point to FILE in
810 if (hash_table_get_pair (dl_url_file_map, url, &old_url, &old_file))
812 hash_table_remove (dl_url_file_map, url);
817 hash_table_put (dl_url_file_map, xstrdup (url), xstrdup (file));
820 /* Register that FROM has been redirected to TO. This assumes that TO
821 is successfully downloaded and already registered using
822 register_download() above. */
825 register_redirection (const char *from, const char *to)
831 file = hash_table_get (dl_url_file_map, to);
832 assert (file != NULL);
833 if (!hash_table_contains (dl_url_file_map, from))
834 hash_table_put (dl_url_file_map, xstrdup (from), xstrdup (file));
837 /* Register that the file has been deleted. */
840 register_delete_file (const char *file)
842 char *old_url, *old_file;
846 if (!hash_table_get_pair (dl_file_url_map, file, &old_file, &old_url))
849 hash_table_remove (dl_file_url_map, file);
852 dissociate_urls_from_file (file);
855 /* Register that FILE is an HTML file that has been downloaded. */
858 register_html (const char *url, const char *file)
860 if (!downloaded_html_set)
861 downloaded_html_set = make_string_hash_table (0);
862 string_set_add (downloaded_html_set, file);
865 /* Register that FILE is a CSS file that has been downloaded. */
868 register_css (const char *url, const char *file)
870 if (!downloaded_css_set)
871 downloaded_css_set = make_string_hash_table (0);
872 string_set_add (downloaded_css_set, file);
875 static void downloaded_files_free (void);
877 /* Cleanup the data structures associated with this file. */
880 convert_cleanup (void)
884 free_keys_and_values (dl_file_url_map);
885 hash_table_destroy (dl_file_url_map);
886 dl_file_url_map = NULL;
890 free_keys_and_values (dl_url_file_map);
891 hash_table_destroy (dl_url_file_map);
892 dl_url_file_map = NULL;
894 if (downloaded_html_set)
895 string_set_free (downloaded_html_set);
896 downloaded_files_free ();
898 string_set_free (converted_files);
901 /* Book-keeping code for downloaded files that enables extension
904 /* This table should really be merged with dl_file_url_map and
905 downloaded_html_files. This was originally a list, but I changed
906 it to a hash table beause it was actually taking a lot of time to
907 find things in it. */
909 static struct hash_table *downloaded_files_hash;
911 /* We're storing "modes" of type downloaded_file_t in the hash table.
912 However, our hash tables only accept pointers for keys and values.
913 So when we need a pointer, we use the address of a
914 downloaded_file_t variable of static storage. */
916 static downloaded_file_t *
917 downloaded_mode_to_ptr (downloaded_file_t mode)
919 static downloaded_file_t
920 v1 = FILE_NOT_ALREADY_DOWNLOADED,
921 v2 = FILE_DOWNLOADED_NORMALLY,
922 v3 = FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED,
927 case FILE_NOT_ALREADY_DOWNLOADED:
929 case FILE_DOWNLOADED_NORMALLY:
931 case FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED:
939 /* Remembers which files have been downloaded. In the standard case,
940 should be called with mode == FILE_DOWNLOADED_NORMALLY for each
941 file we actually download successfully (i.e. not for ones we have
942 failures on or that we skip due to -N).
944 When we've downloaded a file and tacked on a ".html" extension due
945 to -E, call this function with
946 FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED rather than
947 FILE_DOWNLOADED_NORMALLY.
949 If you just want to check if a file has been previously added
950 without adding it, call with mode == CHECK_FOR_FILE. Please be
951 sure to call this function with local filenames, not remote
955 downloaded_file (downloaded_file_t mode, const char *file)
957 downloaded_file_t *ptr;
959 if (mode == CHECK_FOR_FILE)
961 if (!downloaded_files_hash)
962 return FILE_NOT_ALREADY_DOWNLOADED;
963 ptr = hash_table_get (downloaded_files_hash, file);
965 return FILE_NOT_ALREADY_DOWNLOADED;
969 if (!downloaded_files_hash)
970 downloaded_files_hash = make_string_hash_table (0);
972 ptr = hash_table_get (downloaded_files_hash, file);
976 ptr = downloaded_mode_to_ptr (mode);
977 hash_table_put (downloaded_files_hash, xstrdup (file), ptr);
979 return FILE_NOT_ALREADY_DOWNLOADED;
983 downloaded_files_free (void)
985 if (downloaded_files_hash)
987 hash_table_iterator iter;
988 for (hash_table_iterate (downloaded_files_hash, &iter);
989 hash_table_iter_next (&iter);
992 hash_table_destroy (downloaded_files_hash);
993 downloaded_files_hash = NULL;
997 /* The function returns the pointer to the malloc-ed quoted version of
998 string s. It will recognize and quote numeric and special graphic
999 entities, as per RFC1866:
1007 No other entities are recognized or replaced. */
1009 html_quote_string (const char *s)
1015 /* Pass through the string, and count the new size. */
1016 for (i = 0; *s; s++, i++)
1019 i += 4; /* `amp;' */
1020 else if (*s == '<' || *s == '>')
1021 i += 3; /* `lt;' and `gt;' */
1022 else if (*s == '\"')
1023 i += 5; /* `quot;' */
1027 res = xmalloc (i + 1);
1029 for (p = res; *s; s++)
1042 *p++ = (*s == '<' ? 'l' : 'g');