[svn] Clean up handling of schemes.

author hniksic <devnull@localhost>

Mon, 19 Nov 2001 00:12:05 +0000 (16:12 -0800)

committer hniksic <devnull@localhost>

Mon, 19 Nov 2001 00:12:05 +0000 (16:12 -0800)
author hniksic <devnull@localhost>
Mon, 19 Nov 2001 00:12:05 +0000 (16:12 -0800)
committer hniksic <devnull@localhost>
Mon, 19 Nov 2001 00:12:05 +0000 (16:12 -0800)
diff --git a/src/ChangeLog b/src/ChangeLog

index 7f85c6ac234482d8ba9480c9d55e69987e93dddb..c37a399cf9c4cd60d295244a88c3ab63a5bcb2bf 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,7 @@
+2001-11-19  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+       * url.c: Clean up handling of URL schemes.
+
  2001-05-13  Hrvoje Niksic  <hniksic@arsdigita.com>
  
         * url.c: Get rid of `protostrings'.
diff --git a/src/host.c b/src/host.c

index b35c8bd3b704748e69221a290cb89aec9f9361fb..087a90c97ec1cb03d1d5c7ad7bc968f642ee19ae 100644 (file)
--- a/src/host.c
+++ b/src/host.c
@@ -278,12 +278,12 @@ same_host (const char *u1, const char *u2)
    char *real1, *real2;
  
    /* Skip protocol, if present.  */
-  u1 += skip_proto (u1);
-  u2 += skip_proto (u2);
+  u1 += url_skip_scheme (u1);
+  u2 += url_skip_scheme (u2);
  
    /* Skip username ans password, if present.  */
-  u1 += skip_uname (u1);
-  u2 += skip_uname (u2);
+  u1 += url_skip_uname (u1);
+  u2 += url_skip_uname (u2);
  
    for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
    p1 = strdupdelim (s, u1);
diff --git a/src/html-url.c b/src/html-url.c

index 03290853a1762c7ab7d70056d48eac43a589dd4d..6ab54a9f992761528e7aa6cf1031a8375a2669b3 100644 (file)
--- a/src/html-url.c
+++ b/src/html-url.c
@@ -301,7 +301,7 @@ static void
  handle_link (struct collect_urls_closure *closure, const char *link_uri,
              struct taginfo *tag, int attrid)
  {
-  int no_proto = !has_proto (link_uri);
+  int no_scheme = !url_has_scheme (link_uri);
    urlpos *newel;
  
    const char *base = closure->base ? closure->base : closure->parent_base;
@@ -324,10 +324,10 @@ handle_link (struct collect_urls_closure *closure, const char *link_uri,
  
    if (!base)
      {
-      if (no_proto)
+      if (no_scheme)
         {
-         /* We have no base, and the link does not have a protocol or
-             a host attached to it.  Nothing we can do.  */
+         /* We have no base, and the link does not have a host
+            attached to it.  Nothing we can do.  */
           /* #### Should we print a warning here?  Wget 1.5.x used to.  */
           return;
         }
@@ -349,11 +349,11 @@ handle_link (struct collect_urls_closure *closure, const char *link_uri,
    newel->pos = tag->attrs[attrid].value_raw_beginning - closure->text;
    newel->size = tag->attrs[attrid].value_raw_size;
  
-  /* A URL is relative if the host and protocol are not named, and the
-     name does not start with `/'.  */
-  if (no_proto && *link_uri != '/')
+  /* A URL is relative if the host is not named, and the name does not
+     start with `/'.  */
+  if (no_scheme && *link_uri != '/')
      newel->link_relative_p = 1;
-  else if (!no_proto)
+  else if (!no_scheme)
      newel->link_complete_p = 1;
  
    if (closure->tail)
diff --git a/src/http.c b/src/http.c

index f95a53f531a02e4e2f15a7ac544d2266a8a5c5ca..c4f661b5bd4b8e5ed9cd74aaa9cb6c3a6d6021db 100644 (file)
--- a/src/http.c
+++ b/src/http.c
@@ -614,7 +614,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
  #ifndef HAVE_SSL
        !persistent_available_p (u->host, u->port)
  #else
-      !persistent_available_p (u->host, u->port, (u->proto==URLHTTPS ? 1 : 0))
+      !persistent_available_p (u->host, u->port, u->scheme == SCHEME_HTTPS)
  #endif /* HAVE_SSL */
        )
      {
@@ -653,7 +653,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
           break;
         }
  #ifdef HAVE_SSL
-     if (u->proto == URLHTTPS)
+     if (u->scheme == SCHEME_HTTPS)
         if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
          {
            logputs (LOG_VERBOSE, "\n");
@@ -786,7 +786,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
    port_maybe = NULL;
    if (1
  #ifdef HAVE_SSL
-      && remport != (u->proto == URLHTTPS
+      && remport != (u->scheme == SCHEME_HTTPS
                      ? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT)
  #else
        && remport != DEFAULT_HTTP_PORT
@@ -804,7 +804,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
  
    if (opt.cookies)
      cookies = build_cookies_request (ou->host, ou->port, ou->path,
-                                    ou->proto == URLHTTPS);
+#ifdef HAVE_SSL
+                                    ou->scheme == SCHEME_HTTPS
+#else
+                                    0
+#endif
+                                    );
  
    /* Allocate the memory for the request.  */
    request = (char *)alloca (strlen (command) + strlen (path)
@@ -848,7 +853,7 @@ Accept: %s\r\n\
  
    /* Send the request to server.  */
  #ifdef HAVE_SSL
-  if (u->proto == URLHTTPS)
+  if (u->scheme == SCHEME_HTTPS)
      num_written = ssl_iwrite (ssl, request, strlen (request));
    else
  #endif /* HAVE_SSL */
@@ -871,7 +876,7 @@ Accept: %s\r\n\
    /* Before reading anything, initialize the rbuf.  */
    rbuf_initialize (&rbuf, sock);
  #ifdef HAVE_SSL
-  if (u->proto == URLHTTPS)
+  if (u->scheme == SCHEME_HTTPS)
      rbuf.ssl = ssl;
    else
      rbuf.ssl = NULL;
diff --git a/src/recur.c b/src/recur.c

index 497c455fe2a8a39bd9002e1eb1de737d5a357e92..a159f119430948995f0f17b108af214aa8803247 100644 (file)
--- a/src/recur.c
+++ b/src/recur.c
@@ -187,7 +187,7 @@ recursive_retrieve (const char *file, const char *this_url)
       that the retrieval is done through proxy.  In that case, FTP
       links will be followed by default and recursion will not be
       turned off when following them.  */
-  this_url_ftp = (urlproto (this_url) == URLFTP);
+  this_url_ftp = (url_scheme (this_url) == SCHEME_FTP);
  
    /* Get the URL-s from an HTML file: */
    url_list = get_urls_html (file, canon_this_url ? canon_this_url : this_url,
@@ -217,12 +217,6 @@ recursive_retrieve (const char *file, const char *this_url)
           freeurl (u, 1);
           continue;
         }
-      if (u->proto == URLFILE)
-       {
-         DEBUGP (("Nothing to do with file:// around here.\n"));
-         freeurl (u, 1);
-         continue;
-       }
        assert (u->url != NULL);
        constr = xstrdup (u->url);
  
@@ -254,7 +248,7 @@ recursive_retrieve (const char *file, const char *this_url)
  
        /* If it is FTP, and FTP is not followed, chuck it out.  */
        if (!inl)
-       if (u->proto == URLFTP && !opt.follow_ftp && !this_url_ftp)
+       if (u->scheme == SCHEME_FTP && !opt.follow_ftp && !this_url_ftp)
           {
             DEBUGP (("Uh, it is FTP but i'm not in the mood to follow FTP.\n"));
             string_set_add (undesirable_urls, constr);
@@ -262,7 +256,7 @@ recursive_retrieve (const char *file, const char *this_url)
           }
        /* If it is absolute link and they are not followed, chuck it
          out.  */
-      if (!inl && u->proto != URLFTP)
+      if (!inl && u->scheme != SCHEME_FTP)
         if (opt.relative_only && !cur_url->link_relative_p)
           {
             DEBUGP (("It doesn't really look like a relative link.\n"));
@@ -281,7 +275,7 @@ recursive_retrieve (const char *file, const char *this_url)
        if (!inl && opt.no_parent
           /* If the new URL is FTP and the old was not, ignore
               opt.no_parent.  */
-         && !(!this_url_ftp && u->proto == URLFTP))
+         && !(!this_url_ftp && u->scheme == SCHEME_FTP))
         {
           /* Check for base_dir first.  */
           if (!(base_dir && frontcmp (base_dir, u->dir)))
@@ -368,7 +362,7 @@ recursive_retrieve (const char *file, const char *this_url)
           /* This line is bogus. */
           /*string_set_add (undesirable_urls, constr);*/
  
-         if (!inl && !((u->proto == URLFTP) && !this_url_ftp))
+         if (!inl && !((u->scheme == SCHEME_FTP) && !this_url_ftp))
             if (!opt.spanhost && this_url && !same_host (this_url, constr))
               {
                 DEBUGP (("This is not the same hostname as the parent's.\n"));
@@ -377,7 +371,7 @@ recursive_retrieve (const char *file, const char *this_url)
               }
         }
        /* What about robots.txt?  */
-      if (!inl && opt.use_robots && u->proto == URLHTTP)
+      if (!inl && opt.use_robots && u->scheme == SCHEME_FTP)
         {
           struct robot_specs *specs = res_get_specs (u->host, u->port);
           if (!specs)
@@ -418,7 +412,7 @@ recursive_retrieve (const char *file, const char *this_url)
           string_set_add (undesirable_urls, constr);
           /* Automatically followed FTPs will *not* be downloaded
              recursively.  */
-         if (u->proto == URLFTP)
+         if (u->scheme == SCHEME_FTP)
             {
               /* Don't you adore side-effects?  */
               opt.recursive = 0;
@@ -428,7 +422,7 @@ recursive_retrieve (const char *file, const char *this_url)
           /* Retrieve it.  */
           retrieve_url (constr, &filename, &newloc,
                        canon_this_url ? canon_this_url : this_url, &dt);
-         if (u->proto == URLFTP)
+         if (u->scheme == SCHEME_FTP)
             {
               /* Restore...  */
               opt.recursive = 1;
diff --git a/src/retr.c b/src/retr.c

index 457938e4c966e043b8d13551561c9289d4067f9b..3251b3d08f0b2bf84ce9d7c4cea6924972116a1e 100644 (file)
--- a/src/retr.c
+++ b/src/retr.c
@@ -300,7 +300,7 @@ rate (long bytes, long msecs, int pad)
    return res;
  }
  \f
-#define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto)          \
+#define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->scheme)         \
                         && no_proxy_match((u)->host,                    \
                                           (const char **)opt.no_proxy))
  
@@ -366,8 +366,8 @@ retrieve_url (const char *origurl, char **file, char **newloc,
        memset (u, 0, sizeof (*u));
        u->proxy = pu;
        /* Get the appropriate proxy server, appropriate for the
-        current protocol.  */
-      proxy = getproxy (pu->proto);
+        current scheme.  */
+      proxy = getproxy (pu->scheme);
        if (!proxy)
         {
           logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
@@ -379,9 +379,9 @@ retrieve_url (const char *origurl, char **file, char **newloc,
         }
        /* Parse the proxy URL.  */
        result = parseurl (proxy, u, 0);
-      if (result != URLOK || u->proto != URLHTTP)
+      if (result != URLOK || u->scheme != SCHEME_HTTP)
         {
-         if (u->proto == URLHTTP)
+         if (u->scheme == SCHEME_HTTP)
             logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
           else
             logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
@@ -391,19 +391,18 @@ retrieve_url (const char *origurl, char **file, char **newloc,
           xfree (url);
           return PROXERR;
         }
-      u->proto = URLHTTP;
+      u->scheme = SCHEME_HTTP;
      }
  
-  assert (u->proto != URLFILE);        /* #### Implement me!  */
    mynewloc = NULL;
  
-  if (u->proto == URLHTTP
+  if (u->scheme == SCHEME_HTTP
  #ifdef HAVE_SSL
-      || u->proto == URLHTTPS
+      || u->scheme == SCHEME_HTTPS
  #endif
        )
      result = http_loop (u, &mynewloc, dt);
-  else if (u->proto == URLFTP)
+  else if (u->scheme == SCHEME_FTP)
      {
        /* If this is a redirection, we must not allow recursive FTP
          retrieval, so we save recursion to oldrec, and restore it
@@ -420,7 +419,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
  
          #### All of this is, of course, crap.  These types should be
          determined through mailcap.  */
-      if (redirections && u->local && (u->proto == URLFTP ))
+      if (redirections && u->local && (u->scheme == SCHEME_FTP))
         {
           char *suf = suffix (u->local);
           if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
diff --git a/src/url.c b/src/url.c

index 582023e8a6fa98ff254311b733a755f01dea8e1b..c1c2b59632ee3621975407ce4aa6f9f8627c0975 100644 (file)
--- a/src/url.c
+++ b/src/url.c
@@ -49,21 +49,21 @@ extern int errno;
  
  static int urlpath_length PARAMS ((const char *));
  
-struct proto
+struct scheme_data
  {
-  char *name;
-  uerr_t ind;
-  unsigned short port;
+  enum url_scheme scheme;
+  char *leading_string;
+  int default_port;
  };
  
-/* Supported protocols: */
-static struct proto sup_protos[] =
+/* Supported schemes: */
+static struct scheme_data supported_schemes[] =
  {
-  { "http://", URLHTTP, DEFAULT_HTTP_PORT },
+  { SCHEME_HTTP,  "http://",  DEFAULT_HTTP_PORT },
  #ifdef HAVE_SSL
-  { "https://",URLHTTPS, DEFAULT_HTTPS_PORT},
+  { SCHEME_HTTPS, "https://", DEFAULT_HTTPS_PORT },
  #endif
-  { "ftp://", URLFTP, DEFAULT_FTP_PORT }
+  { SCHEME_FTP,   "ftp://",   DEFAULT_FTP_PORT }
  };
  
  static void parse_dir PARAMS ((const char *, char **, char **));
@@ -229,39 +229,28 @@ encode_string (const char *s)
      }                                          \
  } while (0)
  \f
-/* Returns the protocol type if URL's protocol is supported, or
-   URLUNKNOWN if not.  */
-uerr_t
-urlproto (const char *url)
+/* Returns the scheme type if the scheme is supported, or
+   SCHEME_INVALID if not.  */
+enum url_scheme
+url_scheme (const char *url)
  {
    int i;
  
-  for (i = 0; i < ARRAY_SIZE (sup_protos); i++)
-    if (!strncasecmp (url, sup_protos[i].name, strlen (sup_protos[i].name)))
-      return sup_protos[i].ind;
-  for (i = 0; url[i] && url[i] != ':' && url[i] != '/'; i++);
-  if (url[i] == ':')
-    {
-      for (++i; url[i] && url[i] != '/'; i++)
-       if (!ISDIGIT (url[i]))
-         return URLBADPORT;
-      if (url[i - 1] == ':')
-       return URLFTP;
-      else
-       return URLHTTP;
-    }
-  else
-    return URLHTTP;
+  for (i = 0; i < ARRAY_SIZE (supported_schemes); i++)
+    if (!strncasecmp (url, supported_schemes[i].leading_string,
+                     strlen (supported_schemes[i].leading_string)))
+      return supported_schemes[i].scheme;
+  return SCHEME_INVALID;
  }
  
-/* Skip the protocol part of the URL, e.g. `http://'.  If no protocol
-   part is found, returns 0.  */
+/* Return the number of characters needed to skip the scheme part of
+   the URL, e.g. `http://'.  If no scheme is found, returns 0.  */
  int
-skip_proto (const char *url)
+url_skip_scheme (const char *url)
  {
    const char *p = url;
  
-  /* Skip protocol name.  We allow `-' and `+' because of `whois++',
+  /* Skip the scheme name.  We allow `-' and `+' because of `whois++',
       etc. */
    while (ISALNUM (*p) || *p == '-' || *p == '+')
      ++p;
@@ -277,10 +266,10 @@ skip_proto (const char *url)
    return p - url;
  }
  
-/* Returns 1 if the URL begins with a protocol (supported or
+/* Returns 1 if the URL begins with a scheme (supported or
     unsupported), 0 otherwise.  */
  int
-has_proto (const char *url)
+url_has_scheme (const char *url)
  {
    const char *p = url;
    while (ISALNUM (*p) || *p == '-' || *p == '+')
@@ -290,11 +279,11 @@ has_proto (const char *url)
  
  /* Skip the username and password, if present here.  The function
     should be called *not* with the complete URL, but with the part
-   right after the protocol.
+   right after the scheme.
  
     If no username and password are found, return 0.  */
  int
-skip_uname (const char *url)
+url_skip_uname (const char *url)
  {
    const char *p;
    const char *q = NULL;
@@ -317,7 +306,7 @@ newurl (void)
  
    u = (struct urlinfo *)xmalloc (sizeof (struct urlinfo));
    memset (u, 0, sizeof (*u));
-  u->proto = URLUNKNOWN;
+  u->scheme = SCHEME_INVALID;
    return u;
  }
  
@@ -344,10 +333,14 @@ freeurl (struct urlinfo *u, int complete)
    return;
  }
  \f
+enum url_parse_error {
+  PE_UNRECOGNIZED_SCHEME, PE_BAD_PORT
+};
+
  /* Extract the given URL of the form
     (http:|ftp:)// (user (:password)?@)?hostname (:port)? (/path)?
     1. hostname (terminated with `/' or `:')
-   2. port number (terminated with `/'), or chosen for the protocol
+   2. port number (terminated with `/'), or chosen for the scheme
     3. dirname (everything after hostname)
     Most errors are handled.  No allocation is done, you must supply
     pointers to allocated memory.
@@ -367,36 +360,36 @@ parseurl (const char *url, struct urlinfo *u, int strict)
  {
    int i, l, abs_ftp;
    int recognizable;            /* Recognizable URL is the one where
-                                 the protocol name was explicitly
-                                 named, i.e. it wasn't deduced from
-                                 the URL format.  */
+                                 the scheme was explicitly named,
+                                 i.e. it wasn't deduced from the URL
+                                 format.  */
    uerr_t type;
  
    DEBUGP (("parseurl (\"%s\") -> ", url));
-  recognizable = has_proto (url);
+  recognizable = url_has_scheme (url);
    if (strict && !recognizable)
      return URLUNKNOWN;
-  for (i = 0, l = 0; i < ARRAY_SIZE (sup_protos); i++)
+  for (i = 0, l = 0; i < ARRAY_SIZE (supported_schemes); i++)
      {
-      l = strlen (sup_protos[i].name);
-      if (!strncasecmp (sup_protos[i].name, url, l))
+      l = strlen (supported_schemes[i].leading_string);
+      if (!strncasecmp (supported_schemes[i].leading_string, url, l))
         break;
      }
-  /* If protocol is recognizable, but unsupported, bail out, else
+  /* If scheme is recognizable, but unsupported, bail out, else
       suppose unknown.  */
-  if (recognizable && i == ARRAY_SIZE (sup_protos))
+  if (recognizable && i == ARRAY_SIZE (supported_schemes))
      return URLUNKNOWN;
-  else if (i == ARRAY_SIZE (sup_protos))
+  else if (i == ARRAY_SIZE (supported_schemes))
      type = URLUNKNOWN;
    else
-    u->proto = type = sup_protos[i].ind;
+    u->scheme = type = supported_schemes[i].scheme;
  
    if (type == URLUNKNOWN)
      l = 0;
    /* Allow a username and password to be specified (i.e. just skip
       them for now).  */
    if (recognizable)
-    l += skip_uname (url + l);
+    l += url_skip_uname (url + l);
    for (i = l; url[i] && url[i] != ':' && url[i] != '/'; i++);
    if (i == l)
      return URLBADHOST;
@@ -413,7 +406,10 @@ parseurl (const char *url, struct urlinfo *u, int strict)
        if (ISDIGIT (url[++i]))    /* A port number */
         {
           if (type == URLUNKNOWN)
-           u->proto = type = URLHTTP;
+           {
+             type = URLHTTP;
+             u->scheme = SCHEME_HTTP;
+           }
           for (; url[i] && url[i] != '/'; i++)
             if (ISDIGIT (url[i]))
               u->port = 10 * u->port + (url[i] - '0');
@@ -424,21 +420,27 @@ parseurl (const char *url, struct urlinfo *u, int strict)
           DEBUGP (("port %hu -> ", u->port));
         }
        else if (type == URLUNKNOWN) /* or a directory */
-       u->proto = type = URLFTP;
+       {
+         type = URLFTP;
+         u->scheme = SCHEME_FTP;
+       }
        else                      /* or just a misformed port number */
         return URLBADPORT;
      }
    else if (type == URLUNKNOWN)
-    u->proto = type = URLHTTP;
+    {
+      type = URLHTTP;
+      u->scheme = SCHEME_HTTP;
+    }
    if (!u->port)
      {
        int ind;
-      for (ind = 0; ind < ARRAY_SIZE (sup_protos); ind++)
-       if (sup_protos[ind].ind == type)
+      for (ind = 0; ind < ARRAY_SIZE (supported_schemes); ind++)
+       if (supported_schemes[ind].scheme == u->scheme)
           break;
-      if (ind == ARRAY_SIZE (sup_protos))
+      if (ind == ARRAY_SIZE (supported_schemes))
         return URLUNKNOWN;
-      u->port = sup_protos[ind].port;
+      u->port = supported_schemes[ind].default_port;
      }
    /* Some delimiter troubles...  */
    if (url[i] == '/' && url[i - 1] != ':')
@@ -480,7 +482,7 @@ parseurl (const char *url, struct urlinfo *u, int strict)
    if (l > 1 && u->dir[l - 1] == '/')
      u->dir[l - 1] = '\0';
    /* Re-create the path: */
-  abs_ftp = (u->proto == URLFTP && *u->dir == '/');
+  abs_ftp = (u->scheme == SCHEME_FTP && *u->dir == '/');
    /*  sprintf (u->path, "%s%s%s%s", abs_ftp ? "%2F": "/",
        abs_ftp ? (u->dir + 1) : u->dir, *u->dir ? "/" : "", u->file); */
    strcpy (u->path, abs_ftp ? "%2F" : "/");
@@ -574,11 +576,10 @@ parse_uname (const char *url, char **user, char **passwd)
    *user = NULL;
    *passwd = NULL;
  
-  /* Look for the end of the protocol string.  */
-  l = skip_proto (url);
+  /* Look for the end of the scheme identifier.  */
+  l = url_skip_scheme (url);
    if (!l)
      return URLUNKNOWN;
-  /* Add protocol offset.  */
    url += l;
    /* Is there an `@' character?  */
    for (p = url; *p && *p != '/'; p++)
@@ -623,26 +624,27 @@ process_ftp_type (char *path)
      return '\0';
  }
  \f
-/* Return the URL as fine-formed string, with a proper protocol, optional port
-   number, directory and optional user/password.  If `hide' is non-zero (as it
-   is when we're calling this on a URL we plan to print, but not when calling it
-   to canonicalize a URL for use within the program), password will be hidden.
-   The forbidden characters in the URL will be cleansed.  */
+/* Recreate the URL string from the data in urlinfo.  This can be used
+   to create a "canonical" representation of the URL.  If `hide' is
+   non-zero (as it is when we're calling this on a URL we plan to
+   print, but not when calling it to canonicalize a URL for use within
+   the program), password will be hidden.  The forbidden characters in
+   the URL will be cleansed.  */
  char *
  str_url (const struct urlinfo *u, int hide)
  {
-  char *res, *host, *user, *passwd, *proto_name, *dir, *file;
+  char *res, *host, *user, *passwd, *scheme_name, *dir, *file;
    int i, l, ln, lu, lh, lp, lf, ld;
-  unsigned short proto_default_port;
+  unsigned short default_port;
  
-  /* Look for the protocol name.  */
-  for (i = 0; i < ARRAY_SIZE (sup_protos); i++)
-    if (sup_protos[i].ind == u->proto)
+  /* Look for the scheme.  */
+  for (i = 0; i < ARRAY_SIZE (supported_schemes); i++)
+    if (supported_schemes[i].scheme == u->scheme)
        break;
-  if (i == ARRAY_SIZE (sup_protos))
+  if (i == ARRAY_SIZE (supported_schemes))
      return NULL;
-  proto_name = sup_protos[i].name;
-  proto_default_port = sup_protos[i].port;
+  scheme_name = supported_schemes[i].leading_string;
+  default_port = supported_schemes[i].default_port;
    host = encode_string (u->host);
    dir = encode_string (u->dir);
    file = encode_string (u->file);
@@ -660,7 +662,7 @@ str_url (const struct urlinfo *u, int hide)
        else
         passwd = encode_string (u->passwd);
      }
-  if (u->proto == URLFTP && *dir == '/')
+  if (u->scheme == SCHEME_FTP && *dir == '/')
      {
        char *tmp = (char *)xmalloc (strlen (dir) + 3);
        /*sprintf (tmp, "%%2F%s", dir + 1);*/
@@ -672,19 +674,19 @@ str_url (const struct urlinfo *u, int hide)
        dir = tmp;
      }
  
-  ln = strlen (proto_name);
+  ln = strlen (scheme_name);
    lu = user ? strlen (user) : 0;
    lp = passwd ? strlen (passwd) : 0;
    lh = strlen (host);
    ld = strlen (dir);
    lf = strlen (file);
    res = (char *)xmalloc (ln + lu + lp + lh + ld + lf + 20); /* safe sex */
-  /* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", proto_name,
+  /* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", scheme_name,
       (user ? user : ""), (passwd ? ":" : ""),
       (passwd ? passwd : ""), (user ? "@" : ""),
       host, u->port, dir, *dir ? "/" : "", file); */
    l = 0;
-  memcpy (res, proto_name, ln);
+  memcpy (res, scheme_name, ln);
    l += ln;
    if (user)
      {
@@ -700,7 +702,7 @@ str_url (const struct urlinfo *u, int hide)
      }
    memcpy (res + l, host, lh);
    l += lh;
-  if (u->port != proto_default_port)
+  if (u->port != default_port)
      {
        res[l++] = ':';
        long_to_string (res + l, (long)u->port);
@@ -1123,7 +1125,7 @@ find_last_char (const char *b, const char *e, char c)
     Either of the URIs may be absolute or relative, complete with the
     host name, or path only.  This tries to behave "reasonably" in all
     foreseeable cases.  It employs little specific knowledge about
-   protocols or URL-specific stuff -- it just works on strings.
+   schemes or URL-specific stuff -- it just works on strings.
  
     The parameters LINKLENGTH is useful if LINK is not zero-terminated.
     See uri_merge for a gentler interface to this functionality.
@@ -1131,11 +1133,11 @@ find_last_char (const char *b, const char *e, char c)
     #### This function should handle `./' and `../' so that the evil
     path_simplify can go.  */
  static char *
-uri_merge_1 (const char *base, const char *link, int linklength, int no_proto)
+uri_merge_1 (const char *base, const char *link, int linklength, int no_scheme)
  {
    char *constr;
  
-  if (no_proto)
+  if (no_scheme)
      {
        const char *end = base + urlpath_length (base);
  
@@ -1252,7 +1254,7 @@ uri_merge_1 (const char *base, const char *link, int linklength, int no_proto)
           constr[span + linklength] = '\0';
         }
      }
-  else /* !no_proto */
+  else /* !no_scheme */
      {
        constr = strdupdelim (link, link + linklength);
      }
@@ -1265,7 +1267,7 @@ uri_merge_1 (const char *base, const char *link, int linklength, int no_proto)
  char *
  uri_merge (const char *base, const char *link)
  {
-  return uri_merge_1 (base, link, strlen (link), !has_proto (link));
+  return uri_merge_1 (base, link, strlen (link), !url_has_scheme (link));
  }
  \f
  /* Optimize URL by host, destructively replacing u->host with realhost
@@ -1283,22 +1285,28 @@ opt_url (struct urlinfo *u)
    u->url = str_url (u, 0);
  }
  \f
-/* Returns proxy host address, in accordance with PROTO.  */
+/* Returns proxy host address, in accordance with SCHEME.  */
  char *
-getproxy (uerr_t proto)
+getproxy (enum url_scheme scheme)
  {
-  char *proxy;
+  char *proxy = NULL;
  
-  if (proto == URLHTTP)
-    proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
-  else if (proto == URLFTP)
-    proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
+  switch (scheme)
+    {
+    case SCHEME_HTTP:
+      proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
+      break;
  #ifdef HAVE_SSL
-  else if (proto == URLHTTPS)
-    proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
-#endif /* HAVE_SSL */
-  else
-    proxy = NULL;
+    case SCHEME_HTTPS:
+      proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
+      break;
+#endif
+    case SCHEME_FTP:
+      proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
+      break;
+    case SCHEME_INVALID:
+      break;
+    }
    if (!proxy || !*proxy)
      return NULL;
    return proxy;
diff --git a/src/url.h b/src/url.h

index c47da58c58366f7d98563222d7f5effb46fc4afa..3c74a0e244226c1217cd8780aca1a7593f89e608 100644 (file)
--- a/src/url.h
+++ b/src/url.h
@@ -25,12 +25,21 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  #define DEFAULT_FTP_PORT 21
  #define DEFAULT_HTTPS_PORT 443
  
+enum url_scheme {
+  SCHEME_HTTP,
+#ifdef HAVE_SSL
+  SCHEME_HTTPS,
+#endif
+  SCHEME_FTP,
+  SCHEME_INVALID
+};
  
  /* Structure containing info on a URL.  */
  struct urlinfo
  {
    char *url;                   /* Unchanged URL */
-  uerr_t proto;                        /* URL protocol */
+  enum url_scheme scheme;      /* URL scheme */
+
    char *host;                  /* Extracted hostname */
    unsigned short port;
    char ftp_type;
@@ -97,10 +106,10 @@ char *encode_string PARAMS ((const char *));
  
  struct urlinfo *newurl PARAMS ((void));
  void freeurl PARAMS ((struct urlinfo *, int));
-uerr_t urlproto PARAMS ((const char *));
-int skip_proto PARAMS ((const char *));
-int has_proto PARAMS ((const char *));
-int skip_uname PARAMS ((const char *));
+enum url_scheme url_detect_scheme PARAMS ((const char *));
+int url_skip_scheme PARAMS ((const char *));
+int url_has_scheme PARAMS ((const char *));
+int url_skip_uname PARAMS ((const char *));
  
  uerr_t parseurl PARAMS ((const char *, struct urlinfo *, int));
  char *str_url PARAMS ((const struct urlinfo *, int));
author	hniksic <devnull@localhost>
	Mon, 19 Nov 2001 00:12:05 +0000 (16:12 -0800)
committer	hniksic <devnull@localhost>
	Mon, 19 Nov 2001 00:12:05 +0000 (16:12 -0800)
src/ChangeLog		patch \| blob \| history
src/host.c		patch \| blob \| history
src/html-url.c		patch \| blob \| history
src/http.c		patch \| blob \| history
src/recur.c		patch \| blob \| history
src/retr.c		patch \| blob \| history
src/url.c		patch \| blob \| history
src/url.h		patch \| blob \| history