From: hniksic Date: Sun, 25 Nov 2001 17:44:28 +0000 (-0800) Subject: [svn] Attempt to quote '?' as "%3F" when linking to local files. X-Git-Tag: v1.13~2031 X-Git-Url: http://sjero.net/git/?p=wget;a=commitdiff_plain;h=2e6e3f21f83f73312e2a9a4436ae84dbd4bbb0a7 [svn] Attempt to quote '?' as "%3F" when linking to local files. Given up on the attempt, as it breaks local browsing. --- diff --git a/TODO b/TODO index 9632add7..a9cb902e 100644 --- a/TODO +++ b/TODO @@ -55,10 +55,6 @@ changes. * Make -K compare X.orig to X and move the former on top of the latter if they're the same, rather than leaving identical .orig files laying around. -* If CGI output is saved to a file, e.g. cow.cgi?param, -k needs to change the - '?' to a "%3F" in links to that file to avoid passing part of the filename as - a parameter. - * Make `-k' convert too. * Make `-k' check for files that were downloaded in the past and convert links diff --git a/src/ChangeLog b/src/ChangeLog index 3cf327fe..58ed40e7 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,9 @@ +2001-11-25 Hrvoje Niksic + + * url.c (convert_links): Attempt to quote '?' as "%3F" when + linking to local files. Given up on the attempt, as it breaks + local browsing. + 2001-11-25 Hrvoje Niksic * main.c (private_initialize): Removed. diff --git a/src/cookies.c b/src/cookies.c index 766896c2..5f374951 100644 --- a/src/cookies.c +++ b/src/cookies.c @@ -836,20 +836,6 @@ set_cookie_header_cb (const char *hdr, void *closure) previously stored cookies. Entry point is `build_cookies_request'. */ - -/* Count how many times CHR occurs in STRING. */ - -static int -count_char (const char *string, char chr) -{ - const char *p; - int count = 0; - for (p = string; *p; p++) - if (*p == chr) - ++count; - return count; -} - /* Store CHAIN to STORE if there is room in STORE. If not, inrecement COUNT anyway, so that when the function is done, we end up with the exact count of how much place we actually need. */ diff --git a/src/url.c b/src/url.c index 74547d90..26642e59 100644 --- a/src/url.c +++ b/src/url.c @@ -1699,16 +1699,20 @@ no_proxy_match (const char *host, const char **no_proxy) static void write_backup_file PARAMS ((const char *, downloaded_file_t)); static void replace_attr PARAMS ((const char **, int, FILE *, const char *)); +static char *local_quote_string PARAMS ((const char *)); -/* Change the links in an HTML document. Accepts a structure that - defines the positions of all the links. */ +/* Change the links in one HTML file. LINKS is a list of links in the + document, along with their positions and the desired direction of + the conversion. */ void -convert_links (const char *file, struct urlpos *l) +convert_links (const char *file, struct urlpos *links) { struct file_memory *fm; - FILE *fp; - const char *p; - downloaded_file_t downloaded_file_return; + FILE *fp; + const char *p; + downloaded_file_t downloaded_file_return; + + struct urlpos *link; int to_url_count = 0, to_file_count = 0; logprintf (LOG_VERBOSE, _("Converting %s... "), file); @@ -1718,8 +1722,8 @@ convert_links (const char *file, struct urlpos *l) any URL needs to be converted in the first place. If not, just leave the file alone. */ int dry_count = 0; - struct urlpos *dry = l; - for (dry = l; dry; dry = dry->next) + struct urlpos *dry = links; + for (dry = links; dry; dry = dry->next) if (dry->convert != CO_NOCONVERT) ++dry_count; if (!dry_count) @@ -1764,19 +1768,19 @@ convert_links (const char *file, struct urlpos *l) /* Here we loop through all the URLs in file, replacing those of them that are downloaded with relative references. */ p = fm->content; - for (; l; l = l->next) + for (link = links; link; link = link->next) { - char *url_start = fm->content + l->pos; + char *url_start = fm->content + link->pos; - if (l->pos >= fm->length) + if (link->pos >= fm->length) { DEBUGP (("Something strange is going on. Please investigate.")); break; } /* If the URL is not to be converted, skip it. */ - if (l->convert == CO_NOCONVERT) + if (link->convert == CO_NOCONVERT) { - DEBUGP (("Skipping %s at position %d.\n", l->url->url, l->pos)); + DEBUGP (("Skipping %s at position %d.\n", link->url->url, link->pos)); continue; } @@ -1784,26 +1788,26 @@ convert_links (const char *file, struct urlpos *l) quote, to the outfile. */ fwrite (p, 1, url_start - p, fp); p = url_start; - if (l->convert == CO_CONVERT_TO_RELATIVE) + if (link->convert == CO_CONVERT_TO_RELATIVE) { /* Convert absolute URL to relative. */ - char *newname = construct_relative (file, l->local_name); - char *quoted_newname = html_quote_string (newname); - replace_attr (&p, l->size, fp, quoted_newname); + char *newname = construct_relative (file, link->local_name); + char *quoted_newname = local_quote_string (newname); + replace_attr (&p, link->size, fp, quoted_newname); DEBUGP (("TO_RELATIVE: %s to %s at position %d in %s.\n", - l->url->url, newname, l->pos, file)); + link->url->url, newname, link->pos, file)); xfree (newname); xfree (quoted_newname); ++to_file_count; } - else if (l->convert == CO_CONVERT_TO_COMPLETE) + else if (link->convert == CO_CONVERT_TO_COMPLETE) { /* Convert the link to absolute URL. */ - char *newlink = l->url->url; + char *newlink = link->url->url; char *quoted_newlink = html_quote_string (newlink); - replace_attr (&p, l->size, fp, quoted_newlink); + replace_attr (&p, link->size, fp, quoted_newlink); DEBUGP (("TO_COMPLETE: to %s at position %d in %s.\n", - newlink, l->pos, file)); + newlink, link->pos, file)); xfree (quoted_newlink); ++to_url_count; } @@ -1967,10 +1971,10 @@ replace_attr (const char **pp, int raw_size, FILE *fp, const char *new_str) /* Structure of our string is: "...old-contents..." - <--- l->size ---> (with quotes) + <--- size ---> (with quotes) OR: ...old-contents... - <--- l->size --> (no quotes) */ + <--- size --> (no quotes) */ if (*p == '\"' || *p == '\'') { @@ -2026,6 +2030,66 @@ find_fragment (const char *beg, int size, const char **bp, const char **ep) return 0; } +/* The idea here was to quote ? as %3F to avoid passing part of the + file name as the parameter when browsing the converted file through + HTTP. However, actually doing that breaks local browsing because + "index.html%3Ffoo=bar" isn't even recognized as an HTML file! + Perhaps this should be controlled by an option, but for now I'm + leaving the question marks. + + This is the original docstring of this function: + + FILE should be a relative link to a local file. It should be + quoted as HTML because it will be used in HTML context. However, + we need to quote ? as %3F to avoid passing part of the file name as + the parameter. (This is not a problem when viewing locally, but is + if the downloaded and converted tree is served by an HTTP + server.) */ + +/* Quote string as HTML. */ + +static char * +local_quote_string (const char *file) +{ + return html_quote_string (file); + +#if 0 + const char *file_sans_qmark; + int qm = count_char (file, '?'); + + if (qm) + { + const char *from = file; + char *to, *newname; + + /* qm * 2 because we replace each question mark with "%3F", + i.e. replace one char with three, hence two more. */ + int fsqlen = strlen (file) + qm * 2; + + to = newname = (char *)alloca (fsqlen + 1); + for (; *from; from++) + { + if (*from != '?') + *to++ = *from; + else + { + *to++ = '%'; + *to++ = '3'; + *to++ = 'F'; + } + } + assert (to - newname == fsqlen); + *to = '\0'; + + file_sans_qmark = newname; + } + else + file_sans_qmark = file; + + return html_quote_string (file_sans_qmark); +#endif +} + /* We're storing "modes" of type downloaded_file_t in the hash table. However, our hash tables only accept pointers for keys and values. So when we need a pointer, we use the address of a diff --git a/src/utils.c b/src/utils.c index 444455a5..f602f046 100644 --- a/src/utils.c +++ b/src/utils.c @@ -319,6 +319,19 @@ xstrdup_lower (const char *s) return copy; } +/* Return a count of how many times CHR occurs in STRING. */ + +int +count_char (const char *string, char chr) +{ + const char *p; + int count = 0; + for (p = string; *p; p++) + if (*p == chr) + ++count; + return count; +} + /* Copy the string formed by two pointers (one on the beginning, other on the char after the last char) to a new, malloc-ed location. 0-terminate it. */ diff --git a/src/utils.h b/src/utils.h index ae7f47a3..4f142ea9 100644 --- a/src/utils.h +++ b/src/utils.h @@ -49,6 +49,8 @@ void print_malloc_debug_stats (); #endif char *xstrdup_lower PARAMS ((const char *)); +int count_char PARAMS ((const char *, char)); + char *strdupdelim PARAMS ((const char *, const char *)); char **sepstring PARAMS ((const char *)); int frontcmp PARAMS ((const char *, const char *));