X-Git-Url: http://sjero.net/git/?a=blobdiff_plain;f=src%2Furl.c;h=8cbb8923e2fed5898ca42967eefe0f479dcf671e;hb=857c3e2ca4ce9f87bc6b9bf71b80f63f6db08fef;hp=ad8fc202e259a3d429ec0372e4cafed1da3982ef;hpb=5aba2a585033945fe8c7d58c27eb7eb47a391eb9;p=wget diff --git a/src/url.c b/src/url.c index ad8fc202..8cbb8923 100644 --- a/src/url.c +++ b/src/url.c @@ -255,29 +255,27 @@ url_escape_allow_passthrough (const char *s) return url_escape_1 (s, urlchr_unsafe, 1); } -enum copy_method { cm_encode, cm_passthrough }; +/* Decide whether the char at position P needs to be encoded. (It is + not enough to pass a single char *P because the function may need + to inspect the surrounding context.) -/* Decide whether to encode or pass through the char at P. This used - to be a macro, but it got a little too convoluted. */ + Return 1 if the char should be escaped as %XX, 0 otherwise. */ -static inline enum copy_method -decide_copy_method (const char *p) +static inline int +char_needs_escaping (const char *p) { if (*p == '%') { if (ISXDIGIT (*(p + 1)) && ISXDIGIT (*(p + 2))) - /* Prior to 1.10 this decoded %HH escapes corresponding to - "safe" chars, but that proved too obtrusive -- it's better - to always preserve the escapes found in the URL. */ - return cm_passthrough; + return 0; else /* Garbled %.. sequence: encode `%'. */ - return cm_encode; + return 1; } else if (URL_UNSAFE_CHAR (*p) && !URL_RESERVED_CHAR (*p)) - return cm_encode; + return 1; else - return cm_passthrough; + return 0; } /* Translate a %-escaped (but possibly non-conformant) input string S @@ -299,15 +297,14 @@ decide_copy_method (const char *p) GET /abc%20def HTTP/1.0 - It appears that the unsafe chars need to be quoted, for example - with url_escape. But what if we're requested to download + It would appear that the unsafe chars need to be quoted, for + example with url_escape. But what if we're requested to download `abc%20def'? url_escape transforms "%" to "%25", which would leave us with `abc%2520def'. This is incorrect -- since %-escapes are part of URL syntax, "%20" is the correct way to denote a literal - space on the Wget command line. This leaves us in the conclusion - that in that case Wget should not call url_escape, but leave the - `%20' as is. This is clearly contradictory, but it only gets - worse. + space on the Wget command line. This leads to the conclusion that + in that case Wget should not call url_escape, but leave the `%20' + as is. This is clearly contradictory, but it only gets worse. What if the requested URI is `abc%20 def'? If we call url_escape, we end up with `/abc%2520%20def', which is almost certainly not @@ -368,19 +365,11 @@ reencode_escapes (const char *s) int encode_count = 0; - /* First, pass through the string to see if there's anything to do, + /* First pass: inspect the string to see if there's anything to do, and to calculate the new length. */ for (p1 = s; *p1; p1++) - { - switch (decide_copy_method (p1)) - { - case cm_encode: - ++encode_count; - break; - case cm_passthrough: - break; - } - } + if (char_needs_escaping (p1)) + ++encode_count; if (!encode_count) /* The string is good as it is. */ @@ -391,25 +380,22 @@ reencode_escapes (const char *s) newlen = oldlen + 2 * encode_count; newstr = xmalloc (newlen + 1); + /* Second pass: copy the string to the destination address, encoding + chars when needed. */ p1 = s; p2 = newstr; while (*p1) - { - switch (decide_copy_method (p1)) - { - case cm_encode: - { - unsigned char c = *p1++; - *p2++ = '%'; - *p2++ = XNUM_TO_DIGIT (c >> 4); - *p2++ = XNUM_TO_DIGIT (c & 0xf); - } - break; - case cm_passthrough: - *p2++ = *p1++; - } - } + if (char_needs_escaping (p1)) + { + unsigned char c = *p1++; + *p2++ = '%'; + *p2++ = XNUM_TO_DIGIT (c >> 4); + *p2++ = XNUM_TO_DIGIT (c & 0xf); + } + else + *p2++ = *p1++; + *p2 = '\0'; assert (p2 - newstr == newlen); return newstr; @@ -548,6 +534,12 @@ rewrite_shorthand_url (const char *url) if (p == url) return NULL; + /* If we're looking at "://", it means the URL uses a scheme we + don't support, which may include "https" when compiled without + SSL support. Don't bogusly rewrite such URLs. */ + if (p[0] == ':' && p[1] == '/' && p[2] == '/') + return NULL; + if (*p == ':') { const char *pp; @@ -597,21 +589,21 @@ static void split_path PARAMS ((const char *, char **, char **)); #define strpbrk_or_eos(s, accept) ({ \ char *SOE_p = strpbrk (s, accept); \ if (!SOE_p) \ - SOE_p = (char *)s + strlen (s); \ + SOE_p = strchr (s, '\0'); \ SOE_p; \ }) #else /* not __GNUC__ */ -static char * +static inline char * strpbrk_or_eos (const char *s, const char *accept) { char *p = strpbrk (s, accept); if (!p) - p = (char *)s + strlen (s); + p = strchr (s, '\0'); return p; } -#endif +#endif /* not __GNUC__ */ /* Turn STR into lowercase; return non-zero if a character was actually changed. */ @@ -888,7 +880,6 @@ url_parse (const char *url, int *error) else u->url = url_encoded; } - url_encoded = NULL; return u; @@ -1081,7 +1072,7 @@ sync_path (struct url *u) *p++ = '/'; memcpy (p, efile, filelen); p += filelen; - *p++ = '\0'; + *p = '\0'; } u->path = newpath; @@ -1135,7 +1126,7 @@ url_free (struct url *url) } /* Create all the necessary directories for PATH (a file). Calls - mkdirhier() internally. */ + make_directory internally. */ int mkalldirs (const char *path) {