Published in <sxswv0n7h7s.fsf@florida.arsdigita.de>.
+2001-11-19 Hrvoje Niksic <hniksic@arsdigita.com>
+
+ * url.c: Clean up handling of URL schemes.
+
2001-05-13 Hrvoje Niksic <hniksic@arsdigita.com>
* url.c: Get rid of `protostrings'.
2001-05-13 Hrvoje Niksic <hniksic@arsdigita.com>
* url.c: Get rid of `protostrings'.
char *real1, *real2;
/* Skip protocol, if present. */
char *real1, *real2;
/* Skip protocol, if present. */
- u1 += skip_proto (u1);
- u2 += skip_proto (u2);
+ u1 += url_skip_scheme (u1);
+ u2 += url_skip_scheme (u2);
/* Skip username ans password, if present. */
/* Skip username ans password, if present. */
- u1 += skip_uname (u1);
- u2 += skip_uname (u2);
+ u1 += url_skip_uname (u1);
+ u2 += url_skip_uname (u2);
for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
p1 = strdupdelim (s, u1);
for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
p1 = strdupdelim (s, u1);
handle_link (struct collect_urls_closure *closure, const char *link_uri,
struct taginfo *tag, int attrid)
{
handle_link (struct collect_urls_closure *closure, const char *link_uri,
struct taginfo *tag, int attrid)
{
- int no_proto = !has_proto (link_uri);
+ int no_scheme = !url_has_scheme (link_uri);
urlpos *newel;
const char *base = closure->base ? closure->base : closure->parent_base;
urlpos *newel;
const char *base = closure->base ? closure->base : closure->parent_base;
- /* We have no base, and the link does not have a protocol or
- a host attached to it. Nothing we can do. */
+ /* We have no base, and the link does not have a host
+ attached to it. Nothing we can do. */
/* #### Should we print a warning here? Wget 1.5.x used to. */
return;
}
/* #### Should we print a warning here? Wget 1.5.x used to. */
return;
}
newel->pos = tag->attrs[attrid].value_raw_beginning - closure->text;
newel->size = tag->attrs[attrid].value_raw_size;
newel->pos = tag->attrs[attrid].value_raw_beginning - closure->text;
newel->size = tag->attrs[attrid].value_raw_size;
- /* A URL is relative if the host and protocol are not named, and the
- name does not start with `/'. */
- if (no_proto && *link_uri != '/')
+ /* A URL is relative if the host is not named, and the name does not
+ start with `/'. */
+ if (no_scheme && *link_uri != '/')
newel->link_relative_p = 1;
newel->link_relative_p = 1;
newel->link_complete_p = 1;
if (closure->tail)
newel->link_complete_p = 1;
if (closure->tail)
#ifndef HAVE_SSL
!persistent_available_p (u->host, u->port)
#else
#ifndef HAVE_SSL
!persistent_available_p (u->host, u->port)
#else
- !persistent_available_p (u->host, u->port, (u->proto==URLHTTPS ? 1 : 0))
+ !persistent_available_p (u->host, u->port, u->scheme == SCHEME_HTTPS)
#endif /* HAVE_SSL */
)
{
#endif /* HAVE_SSL */
)
{
- if (u->proto == URLHTTPS)
+ if (u->scheme == SCHEME_HTTPS)
if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
{
logputs (LOG_VERBOSE, "\n");
if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
{
logputs (LOG_VERBOSE, "\n");
port_maybe = NULL;
if (1
#ifdef HAVE_SSL
port_maybe = NULL;
if (1
#ifdef HAVE_SSL
- && remport != (u->proto == URLHTTPS
+ && remport != (u->scheme == SCHEME_HTTPS
? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT)
#else
&& remport != DEFAULT_HTTP_PORT
? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT)
#else
&& remport != DEFAULT_HTTP_PORT
if (opt.cookies)
cookies = build_cookies_request (ou->host, ou->port, ou->path,
if (opt.cookies)
cookies = build_cookies_request (ou->host, ou->port, ou->path,
- ou->proto == URLHTTPS);
+#ifdef HAVE_SSL
+ ou->scheme == SCHEME_HTTPS
+#else
+ 0
+#endif
+ );
/* Allocate the memory for the request. */
request = (char *)alloca (strlen (command) + strlen (path)
/* Allocate the memory for the request. */
request = (char *)alloca (strlen (command) + strlen (path)
/* Send the request to server. */
#ifdef HAVE_SSL
/* Send the request to server. */
#ifdef HAVE_SSL
- if (u->proto == URLHTTPS)
+ if (u->scheme == SCHEME_HTTPS)
num_written = ssl_iwrite (ssl, request, strlen (request));
else
#endif /* HAVE_SSL */
num_written = ssl_iwrite (ssl, request, strlen (request));
else
#endif /* HAVE_SSL */
/* Before reading anything, initialize the rbuf. */
rbuf_initialize (&rbuf, sock);
#ifdef HAVE_SSL
/* Before reading anything, initialize the rbuf. */
rbuf_initialize (&rbuf, sock);
#ifdef HAVE_SSL
- if (u->proto == URLHTTPS)
+ if (u->scheme == SCHEME_HTTPS)
rbuf.ssl = ssl;
else
rbuf.ssl = NULL;
rbuf.ssl = ssl;
else
rbuf.ssl = NULL;
that the retrieval is done through proxy. In that case, FTP
links will be followed by default and recursion will not be
turned off when following them. */
that the retrieval is done through proxy. In that case, FTP
links will be followed by default and recursion will not be
turned off when following them. */
- this_url_ftp = (urlproto (this_url) == URLFTP);
+ this_url_ftp = (url_scheme (this_url) == SCHEME_FTP);
/* Get the URL-s from an HTML file: */
url_list = get_urls_html (file, canon_this_url ? canon_this_url : this_url,
/* Get the URL-s from an HTML file: */
url_list = get_urls_html (file, canon_this_url ? canon_this_url : this_url,
freeurl (u, 1);
continue;
}
freeurl (u, 1);
continue;
}
- if (u->proto == URLFILE)
- {
- DEBUGP (("Nothing to do with file:// around here.\n"));
- freeurl (u, 1);
- continue;
- }
assert (u->url != NULL);
constr = xstrdup (u->url);
assert (u->url != NULL);
constr = xstrdup (u->url);
/* If it is FTP, and FTP is not followed, chuck it out. */
if (!inl)
/* If it is FTP, and FTP is not followed, chuck it out. */
if (!inl)
- if (u->proto == URLFTP && !opt.follow_ftp && !this_url_ftp)
+ if (u->scheme == SCHEME_FTP && !opt.follow_ftp && !this_url_ftp)
{
DEBUGP (("Uh, it is FTP but i'm not in the mood to follow FTP.\n"));
string_set_add (undesirable_urls, constr);
{
DEBUGP (("Uh, it is FTP but i'm not in the mood to follow FTP.\n"));
string_set_add (undesirable_urls, constr);
}
/* If it is absolute link and they are not followed, chuck it
out. */
}
/* If it is absolute link and they are not followed, chuck it
out. */
- if (!inl && u->proto != URLFTP)
+ if (!inl && u->scheme != SCHEME_FTP)
if (opt.relative_only && !cur_url->link_relative_p)
{
DEBUGP (("It doesn't really look like a relative link.\n"));
if (opt.relative_only && !cur_url->link_relative_p)
{
DEBUGP (("It doesn't really look like a relative link.\n"));
if (!inl && opt.no_parent
/* If the new URL is FTP and the old was not, ignore
opt.no_parent. */
if (!inl && opt.no_parent
/* If the new URL is FTP and the old was not, ignore
opt.no_parent. */
- && !(!this_url_ftp && u->proto == URLFTP))
+ && !(!this_url_ftp && u->scheme == SCHEME_FTP))
{
/* Check for base_dir first. */
if (!(base_dir && frontcmp (base_dir, u->dir)))
{
/* Check for base_dir first. */
if (!(base_dir && frontcmp (base_dir, u->dir)))
/* This line is bogus. */
/*string_set_add (undesirable_urls, constr);*/
/* This line is bogus. */
/*string_set_add (undesirable_urls, constr);*/
- if (!inl && !((u->proto == URLFTP) && !this_url_ftp))
+ if (!inl && !((u->scheme == SCHEME_FTP) && !this_url_ftp))
if (!opt.spanhost && this_url && !same_host (this_url, constr))
{
DEBUGP (("This is not the same hostname as the parent's.\n"));
if (!opt.spanhost && this_url && !same_host (this_url, constr))
{
DEBUGP (("This is not the same hostname as the parent's.\n"));
}
}
/* What about robots.txt? */
}
}
/* What about robots.txt? */
- if (!inl && opt.use_robots && u->proto == URLHTTP)
+ if (!inl && opt.use_robots && u->scheme == SCHEME_FTP)
{
struct robot_specs *specs = res_get_specs (u->host, u->port);
if (!specs)
{
struct robot_specs *specs = res_get_specs (u->host, u->port);
if (!specs)
string_set_add (undesirable_urls, constr);
/* Automatically followed FTPs will *not* be downloaded
recursively. */
string_set_add (undesirable_urls, constr);
/* Automatically followed FTPs will *not* be downloaded
recursively. */
- if (u->proto == URLFTP)
+ if (u->scheme == SCHEME_FTP)
{
/* Don't you adore side-effects? */
opt.recursive = 0;
{
/* Don't you adore side-effects? */
opt.recursive = 0;
/* Retrieve it. */
retrieve_url (constr, &filename, &newloc,
canon_this_url ? canon_this_url : this_url, &dt);
/* Retrieve it. */
retrieve_url (constr, &filename, &newloc,
canon_this_url ? canon_this_url : this_url, &dt);
- if (u->proto == URLFTP)
+ if (u->scheme == SCHEME_FTP)
{
/* Restore... */
opt.recursive = 1;
{
/* Restore... */
opt.recursive = 1;
-#define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \
+#define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->scheme) \
&& no_proxy_match((u)->host, \
(const char **)opt.no_proxy))
&& no_proxy_match((u)->host, \
(const char **)opt.no_proxy))
memset (u, 0, sizeof (*u));
u->proxy = pu;
/* Get the appropriate proxy server, appropriate for the
memset (u, 0, sizeof (*u));
u->proxy = pu;
/* Get the appropriate proxy server, appropriate for the
- current protocol. */
- proxy = getproxy (pu->proto);
+ current scheme. */
+ proxy = getproxy (pu->scheme);
if (!proxy)
{
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
if (!proxy)
{
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
}
/* Parse the proxy URL. */
result = parseurl (proxy, u, 0);
}
/* Parse the proxy URL. */
result = parseurl (proxy, u, 0);
- if (result != URLOK || u->proto != URLHTTP)
+ if (result != URLOK || u->scheme != SCHEME_HTTP)
- if (u->proto == URLHTTP)
+ if (u->scheme == SCHEME_HTTP)
logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
else
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
else
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
xfree (url);
return PROXERR;
}
xfree (url);
return PROXERR;
}
+ u->scheme = SCHEME_HTTP;
- assert (u->proto != URLFILE); /* #### Implement me! */
- if (u->proto == URLHTTP
+ if (u->scheme == SCHEME_HTTP
- || u->proto == URLHTTPS
+ || u->scheme == SCHEME_HTTPS
#endif
)
result = http_loop (u, &mynewloc, dt);
#endif
)
result = http_loop (u, &mynewloc, dt);
- else if (u->proto == URLFTP)
+ else if (u->scheme == SCHEME_FTP)
{
/* If this is a redirection, we must not allow recursive FTP
retrieval, so we save recursion to oldrec, and restore it
{
/* If this is a redirection, we must not allow recursive FTP
retrieval, so we save recursion to oldrec, and restore it
#### All of this is, of course, crap. These types should be
determined through mailcap. */
#### All of this is, of course, crap. These types should be
determined through mailcap. */
- if (redirections && u->local && (u->proto == URLFTP ))
+ if (redirections && u->local && (u->scheme == SCHEME_FTP))
{
char *suf = suffix (u->local);
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
{
char *suf = suffix (u->local);
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
static int urlpath_length PARAMS ((const char *));
static int urlpath_length PARAMS ((const char *));
- char *name;
- uerr_t ind;
- unsigned short port;
+ enum url_scheme scheme;
+ char *leading_string;
+ int default_port;
-/* Supported protocols: */
-static struct proto sup_protos[] =
+/* Supported schemes: */
+static struct scheme_data supported_schemes[] =
- { "http://", URLHTTP, DEFAULT_HTTP_PORT },
+ { SCHEME_HTTP, "http://", DEFAULT_HTTP_PORT },
- { "https://",URLHTTPS, DEFAULT_HTTPS_PORT},
+ { SCHEME_HTTPS, "https://", DEFAULT_HTTPS_PORT },
- { "ftp://", URLFTP, DEFAULT_FTP_PORT }
+ { SCHEME_FTP, "ftp://", DEFAULT_FTP_PORT }
};
static void parse_dir PARAMS ((const char *, char **, char **));
};
static void parse_dir PARAMS ((const char *, char **, char **));
-/* Returns the protocol type if URL's protocol is supported, or
- URLUNKNOWN if not. */
-uerr_t
-urlproto (const char *url)
+/* Returns the scheme type if the scheme is supported, or
+ SCHEME_INVALID if not. */
+enum url_scheme
+url_scheme (const char *url)
- for (i = 0; i < ARRAY_SIZE (sup_protos); i++)
- if (!strncasecmp (url, sup_protos[i].name, strlen (sup_protos[i].name)))
- return sup_protos[i].ind;
- for (i = 0; url[i] && url[i] != ':' && url[i] != '/'; i++);
- if (url[i] == ':')
- {
- for (++i; url[i] && url[i] != '/'; i++)
- if (!ISDIGIT (url[i]))
- return URLBADPORT;
- if (url[i - 1] == ':')
- return URLFTP;
- else
- return URLHTTP;
- }
- else
- return URLHTTP;
+ for (i = 0; i < ARRAY_SIZE (supported_schemes); i++)
+ if (!strncasecmp (url, supported_schemes[i].leading_string,
+ strlen (supported_schemes[i].leading_string)))
+ return supported_schemes[i].scheme;
+ return SCHEME_INVALID;
-/* Skip the protocol part of the URL, e.g. `http://'. If no protocol
- part is found, returns 0. */
+/* Return the number of characters needed to skip the scheme part of
+ the URL, e.g. `http://'. If no scheme is found, returns 0. */
-skip_proto (const char *url)
+url_skip_scheme (const char *url)
- /* Skip protocol name. We allow `-' and `+' because of `whois++',
+ /* Skip the scheme name. We allow `-' and `+' because of `whois++',
etc. */
while (ISALNUM (*p) || *p == '-' || *p == '+')
++p;
etc. */
while (ISALNUM (*p) || *p == '-' || *p == '+')
++p;
-/* Returns 1 if the URL begins with a protocol (supported or
+/* Returns 1 if the URL begins with a scheme (supported or
unsupported), 0 otherwise. */
int
unsupported), 0 otherwise. */
int
-has_proto (const char *url)
+url_has_scheme (const char *url)
{
const char *p = url;
while (ISALNUM (*p) || *p == '-' || *p == '+')
{
const char *p = url;
while (ISALNUM (*p) || *p == '-' || *p == '+')
/* Skip the username and password, if present here. The function
should be called *not* with the complete URL, but with the part
/* Skip the username and password, if present here. The function
should be called *not* with the complete URL, but with the part
- right after the protocol.
+ right after the scheme.
If no username and password are found, return 0. */
int
If no username and password are found, return 0. */
int
-skip_uname (const char *url)
+url_skip_uname (const char *url)
{
const char *p;
const char *q = NULL;
{
const char *p;
const char *q = NULL;
u = (struct urlinfo *)xmalloc (sizeof (struct urlinfo));
memset (u, 0, sizeof (*u));
u = (struct urlinfo *)xmalloc (sizeof (struct urlinfo));
memset (u, 0, sizeof (*u));
+ u->scheme = SCHEME_INVALID;
+enum url_parse_error {
+ PE_UNRECOGNIZED_SCHEME, PE_BAD_PORT
+};
+
/* Extract the given URL of the form
(http:|ftp:)// (user (:password)?@)?hostname (:port)? (/path)?
1. hostname (terminated with `/' or `:')
/* Extract the given URL of the form
(http:|ftp:)// (user (:password)?@)?hostname (:port)? (/path)?
1. hostname (terminated with `/' or `:')
- 2. port number (terminated with `/'), or chosen for the protocol
+ 2. port number (terminated with `/'), or chosen for the scheme
3. dirname (everything after hostname)
Most errors are handled. No allocation is done, you must supply
pointers to allocated memory.
3. dirname (everything after hostname)
Most errors are handled. No allocation is done, you must supply
pointers to allocated memory.
{
int i, l, abs_ftp;
int recognizable; /* Recognizable URL is the one where
{
int i, l, abs_ftp;
int recognizable; /* Recognizable URL is the one where
- the protocol name was explicitly
- named, i.e. it wasn't deduced from
- the URL format. */
+ the scheme was explicitly named,
+ i.e. it wasn't deduced from the URL
+ format. */
uerr_t type;
DEBUGP (("parseurl (\"%s\") -> ", url));
uerr_t type;
DEBUGP (("parseurl (\"%s\") -> ", url));
- recognizable = has_proto (url);
+ recognizable = url_has_scheme (url);
if (strict && !recognizable)
return URLUNKNOWN;
if (strict && !recognizable)
return URLUNKNOWN;
- for (i = 0, l = 0; i < ARRAY_SIZE (sup_protos); i++)
+ for (i = 0, l = 0; i < ARRAY_SIZE (supported_schemes); i++)
- l = strlen (sup_protos[i].name);
- if (!strncasecmp (sup_protos[i].name, url, l))
+ l = strlen (supported_schemes[i].leading_string);
+ if (!strncasecmp (supported_schemes[i].leading_string, url, l))
- /* If protocol is recognizable, but unsupported, bail out, else
+ /* If scheme is recognizable, but unsupported, bail out, else
- if (recognizable && i == ARRAY_SIZE (sup_protos))
+ if (recognizable && i == ARRAY_SIZE (supported_schemes))
- else if (i == ARRAY_SIZE (sup_protos))
+ else if (i == ARRAY_SIZE (supported_schemes))
- u->proto = type = sup_protos[i].ind;
+ u->scheme = type = supported_schemes[i].scheme;
if (type == URLUNKNOWN)
l = 0;
/* Allow a username and password to be specified (i.e. just skip
them for now). */
if (recognizable)
if (type == URLUNKNOWN)
l = 0;
/* Allow a username and password to be specified (i.e. just skip
them for now). */
if (recognizable)
- l += skip_uname (url + l);
+ l += url_skip_uname (url + l);
for (i = l; url[i] && url[i] != ':' && url[i] != '/'; i++);
if (i == l)
return URLBADHOST;
for (i = l; url[i] && url[i] != ':' && url[i] != '/'; i++);
if (i == l)
return URLBADHOST;
if (ISDIGIT (url[++i])) /* A port number */
{
if (type == URLUNKNOWN)
if (ISDIGIT (url[++i])) /* A port number */
{
if (type == URLUNKNOWN)
- u->proto = type = URLHTTP;
+ {
+ type = URLHTTP;
+ u->scheme = SCHEME_HTTP;
+ }
for (; url[i] && url[i] != '/'; i++)
if (ISDIGIT (url[i]))
u->port = 10 * u->port + (url[i] - '0');
for (; url[i] && url[i] != '/'; i++)
if (ISDIGIT (url[i]))
u->port = 10 * u->port + (url[i] - '0');
DEBUGP (("port %hu -> ", u->port));
}
else if (type == URLUNKNOWN) /* or a directory */
DEBUGP (("port %hu -> ", u->port));
}
else if (type == URLUNKNOWN) /* or a directory */
- u->proto = type = URLFTP;
+ {
+ type = URLFTP;
+ u->scheme = SCHEME_FTP;
+ }
else /* or just a misformed port number */
return URLBADPORT;
}
else if (type == URLUNKNOWN)
else /* or just a misformed port number */
return URLBADPORT;
}
else if (type == URLUNKNOWN)
- u->proto = type = URLHTTP;
+ {
+ type = URLHTTP;
+ u->scheme = SCHEME_HTTP;
+ }
- for (ind = 0; ind < ARRAY_SIZE (sup_protos); ind++)
- if (sup_protos[ind].ind == type)
+ for (ind = 0; ind < ARRAY_SIZE (supported_schemes); ind++)
+ if (supported_schemes[ind].scheme == u->scheme)
- if (ind == ARRAY_SIZE (sup_protos))
+ if (ind == ARRAY_SIZE (supported_schemes))
- u->port = sup_protos[ind].port;
+ u->port = supported_schemes[ind].default_port;
}
/* Some delimiter troubles... */
if (url[i] == '/' && url[i - 1] != ':')
}
/* Some delimiter troubles... */
if (url[i] == '/' && url[i - 1] != ':')
if (l > 1 && u->dir[l - 1] == '/')
u->dir[l - 1] = '\0';
/* Re-create the path: */
if (l > 1 && u->dir[l - 1] == '/')
u->dir[l - 1] = '\0';
/* Re-create the path: */
- abs_ftp = (u->proto == URLFTP && *u->dir == '/');
+ abs_ftp = (u->scheme == SCHEME_FTP && *u->dir == '/');
/* sprintf (u->path, "%s%s%s%s", abs_ftp ? "%2F": "/",
abs_ftp ? (u->dir + 1) : u->dir, *u->dir ? "/" : "", u->file); */
strcpy (u->path, abs_ftp ? "%2F" : "/");
/* sprintf (u->path, "%s%s%s%s", abs_ftp ? "%2F": "/",
abs_ftp ? (u->dir + 1) : u->dir, *u->dir ? "/" : "", u->file); */
strcpy (u->path, abs_ftp ? "%2F" : "/");
*user = NULL;
*passwd = NULL;
*user = NULL;
*passwd = NULL;
- /* Look for the end of the protocol string. */
- l = skip_proto (url);
+ /* Look for the end of the scheme identifier. */
+ l = url_skip_scheme (url);
if (!l)
return URLUNKNOWN;
if (!l)
return URLUNKNOWN;
- /* Add protocol offset. */
url += l;
/* Is there an `@' character? */
for (p = url; *p && *p != '/'; p++)
url += l;
/* Is there an `@' character? */
for (p = url; *p && *p != '/'; p++)
-/* Return the URL as fine-formed string, with a proper protocol, optional port
- number, directory and optional user/password. If `hide' is non-zero (as it
- is when we're calling this on a URL we plan to print, but not when calling it
- to canonicalize a URL for use within the program), password will be hidden.
- The forbidden characters in the URL will be cleansed. */
+/* Recreate the URL string from the data in urlinfo. This can be used
+ to create a "canonical" representation of the URL. If `hide' is
+ non-zero (as it is when we're calling this on a URL we plan to
+ print, but not when calling it to canonicalize a URL for use within
+ the program), password will be hidden. The forbidden characters in
+ the URL will be cleansed. */
char *
str_url (const struct urlinfo *u, int hide)
{
char *
str_url (const struct urlinfo *u, int hide)
{
- char *res, *host, *user, *passwd, *proto_name, *dir, *file;
+ char *res, *host, *user, *passwd, *scheme_name, *dir, *file;
int i, l, ln, lu, lh, lp, lf, ld;
int i, l, ln, lu, lh, lp, lf, ld;
- unsigned short proto_default_port;
+ unsigned short default_port;
- /* Look for the protocol name. */
- for (i = 0; i < ARRAY_SIZE (sup_protos); i++)
- if (sup_protos[i].ind == u->proto)
+ /* Look for the scheme. */
+ for (i = 0; i < ARRAY_SIZE (supported_schemes); i++)
+ if (supported_schemes[i].scheme == u->scheme)
- if (i == ARRAY_SIZE (sup_protos))
+ if (i == ARRAY_SIZE (supported_schemes))
- proto_name = sup_protos[i].name;
- proto_default_port = sup_protos[i].port;
+ scheme_name = supported_schemes[i].leading_string;
+ default_port = supported_schemes[i].default_port;
host = encode_string (u->host);
dir = encode_string (u->dir);
file = encode_string (u->file);
host = encode_string (u->host);
dir = encode_string (u->dir);
file = encode_string (u->file);
else
passwd = encode_string (u->passwd);
}
else
passwd = encode_string (u->passwd);
}
- if (u->proto == URLFTP && *dir == '/')
+ if (u->scheme == SCHEME_FTP && *dir == '/')
{
char *tmp = (char *)xmalloc (strlen (dir) + 3);
/*sprintf (tmp, "%%2F%s", dir + 1);*/
{
char *tmp = (char *)xmalloc (strlen (dir) + 3);
/*sprintf (tmp, "%%2F%s", dir + 1);*/
- ln = strlen (proto_name);
+ ln = strlen (scheme_name);
lu = user ? strlen (user) : 0;
lp = passwd ? strlen (passwd) : 0;
lh = strlen (host);
ld = strlen (dir);
lf = strlen (file);
res = (char *)xmalloc (ln + lu + lp + lh + ld + lf + 20); /* safe sex */
lu = user ? strlen (user) : 0;
lp = passwd ? strlen (passwd) : 0;
lh = strlen (host);
ld = strlen (dir);
lf = strlen (file);
res = (char *)xmalloc (ln + lu + lp + lh + ld + lf + 20); /* safe sex */
- /* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", proto_name,
+ /* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", scheme_name,
(user ? user : ""), (passwd ? ":" : ""),
(passwd ? passwd : ""), (user ? "@" : ""),
host, u->port, dir, *dir ? "/" : "", file); */
l = 0;
(user ? user : ""), (passwd ? ":" : ""),
(passwd ? passwd : ""), (user ? "@" : ""),
host, u->port, dir, *dir ? "/" : "", file); */
l = 0;
- memcpy (res, proto_name, ln);
+ memcpy (res, scheme_name, ln);
}
memcpy (res + l, host, lh);
l += lh;
}
memcpy (res + l, host, lh);
l += lh;
- if (u->port != proto_default_port)
+ if (u->port != default_port)
{
res[l++] = ':';
long_to_string (res + l, (long)u->port);
{
res[l++] = ':';
long_to_string (res + l, (long)u->port);
Either of the URIs may be absolute or relative, complete with the
host name, or path only. This tries to behave "reasonably" in all
foreseeable cases. It employs little specific knowledge about
Either of the URIs may be absolute or relative, complete with the
host name, or path only. This tries to behave "reasonably" in all
foreseeable cases. It employs little specific knowledge about
- protocols or URL-specific stuff -- it just works on strings.
+ schemes or URL-specific stuff -- it just works on strings.
The parameters LINKLENGTH is useful if LINK is not zero-terminated.
See uri_merge for a gentler interface to this functionality.
The parameters LINKLENGTH is useful if LINK is not zero-terminated.
See uri_merge for a gentler interface to this functionality.
#### This function should handle `./' and `../' so that the evil
path_simplify can go. */
static char *
#### This function should handle `./' and `../' so that the evil
path_simplify can go. */
static char *
-uri_merge_1 (const char *base, const char *link, int linklength, int no_proto)
+uri_merge_1 (const char *base, const char *link, int linklength, int no_scheme)
{
const char *end = base + urlpath_length (base);
{
const char *end = base + urlpath_length (base);
constr[span + linklength] = '\0';
}
}
constr[span + linklength] = '\0';
}
}
{
constr = strdupdelim (link, link + linklength);
}
{
constr = strdupdelim (link, link + linklength);
}
char *
uri_merge (const char *base, const char *link)
{
char *
uri_merge (const char *base, const char *link)
{
- return uri_merge_1 (base, link, strlen (link), !has_proto (link));
+ return uri_merge_1 (base, link, strlen (link), !url_has_scheme (link));
}
\f
/* Optimize URL by host, destructively replacing u->host with realhost
}
\f
/* Optimize URL by host, destructively replacing u->host with realhost
u->url = str_url (u, 0);
}
\f
u->url = str_url (u, 0);
}
\f
-/* Returns proxy host address, in accordance with PROTO. */
+/* Returns proxy host address, in accordance with SCHEME. */
+getproxy (enum url_scheme scheme)
- if (proto == URLHTTP)
- proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
- else if (proto == URLFTP)
- proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
+ switch (scheme)
+ {
+ case SCHEME_HTTP:
+ proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
+ break;
- else if (proto == URLHTTPS)
- proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
-#endif /* HAVE_SSL */
- else
- proxy = NULL;
+ case SCHEME_HTTPS:
+ proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
+ break;
+#endif
+ case SCHEME_FTP:
+ proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
+ break;
+ case SCHEME_INVALID:
+ break;
+ }
if (!proxy || !*proxy)
return NULL;
return proxy;
if (!proxy || !*proxy)
return NULL;
return proxy;
#define DEFAULT_FTP_PORT 21
#define DEFAULT_HTTPS_PORT 443
#define DEFAULT_FTP_PORT 21
#define DEFAULT_HTTPS_PORT 443
+enum url_scheme {
+ SCHEME_HTTP,
+#ifdef HAVE_SSL
+ SCHEME_HTTPS,
+#endif
+ SCHEME_FTP,
+ SCHEME_INVALID
+};
/* Structure containing info on a URL. */
struct urlinfo
{
char *url; /* Unchanged URL */
/* Structure containing info on a URL. */
struct urlinfo
{
char *url; /* Unchanged URL */
- uerr_t proto; /* URL protocol */
+ enum url_scheme scheme; /* URL scheme */
+
char *host; /* Extracted hostname */
unsigned short port;
char ftp_type;
char *host; /* Extracted hostname */
unsigned short port;
char ftp_type;
struct urlinfo *newurl PARAMS ((void));
void freeurl PARAMS ((struct urlinfo *, int));
struct urlinfo *newurl PARAMS ((void));
void freeurl PARAMS ((struct urlinfo *, int));
-uerr_t urlproto PARAMS ((const char *));
-int skip_proto PARAMS ((const char *));
-int has_proto PARAMS ((const char *));
-int skip_uname PARAMS ((const char *));
+enum url_scheme url_detect_scheme PARAMS ((const char *));
+int url_skip_scheme PARAMS ((const char *));
+int url_has_scheme PARAMS ((const char *));
+int url_skip_uname PARAMS ((const char *));
uerr_t parseurl PARAMS ((const char *, struct urlinfo *, int));
char *str_url PARAMS ((const struct urlinfo *, int));
uerr_t parseurl PARAMS ((const char *, struct urlinfo *, int));
char *str_url PARAMS ((const struct urlinfo *, int));