/* A list of unsafe characters for encoding, as per RFC1738. '@' and
':' (not listed in RFC) were added because of user/password
- encoding, and \033 for safe printing. */
+ encoding. */
#ifndef WINDOWS
-# define URL_UNSAFE " <>\"#%{}|\\^~[]`@:\033"
+# define URL_UNSAFE_CHARS "<>\"#%{}|\\^~[]`@:"
#else /* WINDOWS */
-# define URL_UNSAFE " <>\"%{}|\\^[]`\033"
+# define URL_UNSAFE_CHARS "<>\"%{}|\\^[]`"
#endif /* WINDOWS */
+#define UNSAFE_CHAR(c) ( ((unsigned char)(c) <= ' ') /* ASCII 32 */ \
+ || ((unsigned char)(c) > '~') /* ASCII 127 */ \
+ || strchr (URL_UNSAFE_CHARS, c))
+
/* If S contains unsafe characters, free it and replace it with a
version that doesn't. */
#define URL_CLEANSE(s) do \
{
int i;
- if (toupper (url[0]) == 'U'
- && toupper (url[1]) == 'R'
- && toupper (url[2]) == 'L'
+ if (TOUPPER (url[0]) == 'U'
+ && TOUPPER (url[1]) == 'R'
+ && TOUPPER (url[2]) == 'L'
&& url[3] == ':')
{
/* Skip blanks. */
contains_unsafe (const char *s)
{
for (; *s; s++)
- if (strchr (URL_UNSAFE, *s))
+ if (UNSAFE_CHAR (*s))
return 1;
return 0;
}
*p = '\0';
}
-/* Encodes the unsafe characters (listed in URL_UNSAFE) in a given
- string, returning a malloc-ed %XX encoded string. */
+/* Encode the unsafe characters (as determined by URL_UNSAFE) in a
+ given string, returning a malloc-ed %XX encoded string. */
char *
encode_string (const char *s)
{
b = s;
for (i = 0; *s; s++, i++)
- if (strchr (URL_UNSAFE, *s))
+ if (UNSAFE_CHAR (*s))
i += 2; /* Two more characters (hex digits) */
res = (char *)xmalloc (i + 1);
s = b;
for (p = res; *s; s++)
- if (strchr (URL_UNSAFE, *s))
+ if (UNSAFE_CHAR (*s))
{
const unsigned char c = *s;
*p++ = '%';
{
u->ftp_type = process_ftp_type (u->path);
/* #### We don't handle type `d' correctly yet. */
- if (!u->ftp_type || toupper (u->ftp_type) == 'D')
+ if (!u->ftp_type || TOUPPER (u->ftp_type) == 'D')
u->ftp_type = 'I';
}
DEBUGP (("opath %s -> ", u->path));
{
char *res, *host, *user, *passwd, *proto_name, *dir, *file;
int i, l, ln, lu, lh, lp, lf, ld;
+ unsigned short proto_default_port;
/* Look for the protocol name. */
for (i = 0; i < ARRAY_SIZE (sup_protos); i++)
if (i == ARRAY_SIZE (sup_protos))
return NULL;
proto_name = sup_protos[i].name;
+ proto_default_port = sup_protos[i].port;
host = CLEANDUP (u->host);
dir = CLEANDUP (u->dir);
file = CLEANDUP (u->file);
}
memcpy (res + l, host, lh);
l += lh;
- res[l++] = ':';
- long_to_string (res + l, (long)u->port);
- l += numdigit (u->port);
+ if (u->port != proto_default_port)
+ {
+ res[l++] = ':';
+ long_to_string (res + l, (long)u->port);
+ l += numdigit (u->port);
+ }
res[l++] = '/';
memcpy (res + l, dir, ld);
l += ld;
If SILENT is non-zero, do not barf on baseless relative links. */
urlpos *
-get_urls_html (const char *file, const char *this_url, int silent)
+get_urls_html (const char *file, const char *this_url, int silent,
+ int dash_p_leaf_HTML)
{
long nread;
FILE *fp;
first_time = 1;
/* Iterate over the URLs in BUF, picked by htmlfindurl(). */
for (buf = orig_buf;
- (buf = htmlfindurl (buf, nread - (buf - orig_buf), &step, first_time));
+ (buf = htmlfindurl (buf, nread - (buf - orig_buf), &step, first_time,
+ dash_p_leaf_HTML));
buf += step)
{
int i, no_proto;