}
/* Used by main.c: detect URLs written using the "shorthand" URL forms
- popularized by Netscape and NcFTP. HTTP shorthands look like this:
+ originally popularized by Netscape and NcFTP. HTTP shorthands look
+ like this:
www.foo.com[:port]/dir/file -> http://www.foo.com[:port]/dir/file
www.foo.com[:port] -> http://www.foo.com[:port]
rewrite_shorthand_url (const char *url)
{
const char *p;
+ char *ret;
if (url_scheme (url) != SCHEME_INVALID)
return NULL;
/* Look for a ':' or '/'. The former signifies NcFTP syntax, the
latter Netscape. */
- for (p = url; *p && *p != ':' && *p != '/'; p++)
- ;
-
+ p = strpbrk (url, ":/");
if (p == url)
return NULL;
/* If we're looking at "://", it means the URL uses a scheme we
don't support, which may include "https" when compiled without
SSL support. Don't bogusly rewrite such URLs. */
- if (p[0] == ':' && p[1] == '/' && p[2] == '/')
+ if (p && p[0] == ':' && p[1] == '/' && p[2] == '/')
return NULL;
- if (*p == ':')
+ if (p && *p == ':')
{
- const char *pp;
- char *res;
- /* If the characters after the colon and before the next slash
- or end of string are all digits, it's HTTP. */
- int digits = 0;
- for (pp = p + 1; ISDIGIT (*pp); pp++)
- ++digits;
- if (digits > 0 && (*pp == '/' || *pp == '\0'))
+ /* Colon indicates ftp, as in foo.bar.com:path. Check for
+ special case of http port number ("localhost:10000"). */
+ int digits = strspn (p + 1, "0123456789");
+ if (digits && (p[1 + digits] == '/' || p[1 + digits] == '\0'))
goto http;
- /* Prepend "ftp://" to the entire URL... */
- res = xmalloc (6 + strlen (url) + 1);
- sprintf (res, "ftp://%s", url);
- /* ...and replace ':' with '/'. */
- res[6 + (p - url)] = '/';
- return res;
+ /* Turn "foo.bar.com:path" to "ftp://foo.bar.com/path". */
+ ret = aprintf ("ftp://%s", url);
+ ret[6 + (p - url)] = '/';
}
else
{
- char *res;
http:
- /* Just prepend "http://" to what we have. */
- res = xmalloc (7 + strlen (url) + 1);
- sprintf (res, "http://%s", url);
- return res;
+ /* Just prepend "http://" to URL. */
+ ret = aprintf ("http://%s", url);
}
+ return ret;
}
\f
static void split_path (const char *, char **, char **);
/* Like strpbrk, with the exception that it returns the pointer to the
terminating zero (end-of-string aka "eos") if no matching character
- is found.
-
- Although I normally balk at Gcc-specific optimizations, it probably
- makes sense here: glibc has optimizations that detect strpbrk being
- called with literal string as ACCEPT and inline the search. That
- optimization is defeated if strpbrk is hidden within the call to
- another function. (And no, making strpbrk_or_eos inline doesn't
- help because the check for literal accept is in the
- preprocessor.) */
-
-#if defined(__GNUC__) && __GNUC__ >= 3
-
-#define strpbrk_or_eos(s, accept) ({ \
- char *SOE_p = strpbrk (s, accept); \
- if (!SOE_p) \
- SOE_p = strchr (s, '\0'); \
- SOE_p; \
-})
-
-#else /* not __GNUC__ or old gcc */
+ is found. */
static inline char *
strpbrk_or_eos (const char *s, const char *accept)
p = strchr (s, '\0');
return p;
}
-#endif /* not __GNUC__ or old gcc */
/* Turn STR into lowercase; return true if a character was actually
changed. */
}
\f
/* Return the length of URL's path. Path is considered to be
- terminated by one of '?', ';', '#', or by the end of the
- string. */
+ terminated by one or more of the ?query or ;params or #fragment,
+ depending on the scheme. */
-static int
-path_length (const char *url)
+static const char *
+path_end (const char *url)
{
- const char *q = strpbrk_or_eos (url, "?;#");
- return q - url;
+ enum url_scheme scheme = url_scheme (url);
+ const char *seps;
+ if (scheme == SCHEME_INVALID)
+ scheme = SCHEME_HTTP; /* use http semantics for rel links */
+ /* +2 to ignore the first two separators ':' and '/' */
+ seps = init_seps (scheme) + 2;
+ return strpbrk_or_eos (url, seps);
}
/* Find the last occurrence of character C in the range [b, e), or
return xstrdup (link);
/* We may not examine BASE past END. */
- end = base + path_length (base);
+ end = path_end (base);
linklength = strlen (link);
if (!*link)