From: hniksic Date: Mon, 19 Nov 2001 01:14:14 +0000 (-0800) Subject: [svn] Rewrite shorthand URLs in a step separate from parsing. X-Git-Tag: v1.13~2064 X-Git-Url: http://sjero.net/git/?p=wget;a=commitdiff_plain;h=e8e87978737f88a536f6606b9029ece7b1001b4e [svn] Rewrite shorthand URLs in a step separate from parsing. Published in . --- diff --git a/src/ChangeLog b/src/ChangeLog index c37a399c..f375f50f 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,9 @@ +2001-11-19 Hrvoje Niksic + + * main.c (main): Use it. + + * url.c (rewrite_url_maybe): New function. + 2001-11-19 Hrvoje Niksic * url.c: Clean up handling of URL schemes. diff --git a/src/main.c b/src/main.c index 6a79e0b0..092a455f 100644 --- a/src/main.c +++ b/src/main.c @@ -50,6 +50,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "recur.h" #include "host.h" #include "cookies.h" +#include "url.h" /* On GNU system this will include system-wide getopt.h. */ #include "getopt.h" @@ -739,9 +740,14 @@ Can't timestamp and not clobber old files at the same time.\n")); /* Fill in the arguments. */ for (i = 0; i < nurl; i++, optind++) { - char *irix4_cc_needs_this; - STRDUP_ALLOCA (irix4_cc_needs_this, argv[optind]); - url[i] = irix4_cc_needs_this; + char *rewritten = rewrite_url_maybe (argv[optind]); + if (rewritten) + { + printf ("Converted %s to %s\n", argv[optind], rewritten); + url[i] = rewritten; + } + else + url[i] = xstrdup (argv[optind]); } url[i] = NULL; @@ -853,6 +859,8 @@ Can't timestamp and not clobber old files at the same time.\n")); convert_all_links (); } log_close (); + for (i = 0; i < nurl; i++) + free (url[i]); cleanup (); #ifdef DEBUG_MALLOC print_malloc_debug_stats (); diff --git a/src/url.c b/src/url.c index c1c2b596..2159bb91 100644 --- a/src/url.c +++ b/src/url.c @@ -296,6 +296,66 @@ url_skip_uname (const char *url) else return 0; } + +/* Used by main.c: detect URLs written using the "shorthand" URL forms + popularized by Netscape and NcFTP. HTTP shorthands look like this: + + www.foo.com[:port]/dir/file -> http://www.foo.com[:port]/dir/file + www.foo.com[:port] -> http://www.foo.com[:port] + + FTP shorthands look like this: + + foo.bar.com:dir/file -> ftp://foo.bar.com/dir/file + foo.bar.com:/absdir/file -> ftp://foo.bar.com//absdir/file + + If the URL needs not or cannot be rewritten, return NULL. */ +char * +rewrite_url_maybe (const char *url) +{ + const char *p; + + if (url_has_scheme (url)) + return NULL; + + /* Look for a ':' or '/'. The former signifies NcFTP syntax, the + latter Netscape. */ + for (p = url; *p && *p != ':' && *p != '/'; p++) + ; + + if (p == url) + return NULL; + + if (*p == ':') + { + const char *pp, *path; + char *res; + /* If the characters after the colon and before the next slash + or end of string are all digits, it's HTTP. */ + int digits = 0; + for (pp = p + 1; ISDIGIT (*pp); pp++) + ++digits; + if (digits > 0 + && (*pp == '/' || *pp == '\0')) + goto http; + + /* Prepend "ftp://" to the entire URL... */ + path = p + 1; + res = xmalloc (6 + strlen (url) + 1); + sprintf (res, "ftp://%s", url); + /* ...and replace ':' with '/'. */ + res[6 + (p - url)] = '/'; + return res; + } + else + { + char *res; + http: + /* Just prepend "http://" to what we have. */ + res = xmalloc (7 + strlen (url) + 1); + sprintf (res, "http://%s", url); + return res; + } +} /* Allocate a new urlinfo structure, fill it with default values and return a pointer to it. */ diff --git a/src/url.h b/src/url.h index 3c74a0e2..e9bcc379 100644 --- a/src/url.h +++ b/src/url.h @@ -137,4 +137,6 @@ urlpos *add_url PARAMS ((urlpos *, const char *, const char *)); downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *)); +char *rewrite_url_maybe PARAMS ((const char *)); + #endif /* URL_H */