From 0e72aa6e137b5c4e4b14ffd24a319f00b7b6acbf Mon Sep 17 00:00:00 2001 From: hniksic Date: Sun, 9 Dec 2001 17:58:37 -0800 Subject: [PATCH] [svn] Rewrite path_simplify. Published in . --- src/ChangeLog | 5 + src/utils.c | 315 +++++++++++++++++++++++++++++--------------------- 2 files changed, 190 insertions(+), 130 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 20373dd2..6be54d17 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,8 @@ +2001-12-10 Hrvoje Niksic + + * utils.c (path_simplify): Rewrite, with better comments, and + without the use of strcpy to move overlapping blocks. + 2001-12-09 Hrvoje Niksic * init.c (cmd_spec_progress): Resurrect. Check whether VAL is a diff --git a/src/utils.c b/src/utils.c index 32c17b4b..fd283710 100644 --- a/src/utils.c +++ b/src/utils.c @@ -473,163 +473,125 @@ fork_to_background (void) } #endif /* not WINDOWS */ -#if 0 -/* debug */ -char * -ps (char *orig) -{ - char *r = xstrdup (orig); - path_simplify (r); - return r; -} -#endif +/* Resolve "." and ".." elements of PATH by destructively modifying + PATH. "." is resolved by removing that path element, and ".." is + resolved by removing the preceding path element. Leading and + trailing slashes are preserved. -/* Canonicalize PATH, and return a new path. The new path differs from PATH - in that: - Multple `/'s are collapsed to a single `/'. - Leading `./'s and trailing `/.'s are removed. - Trailing `/'s are removed. - Non-leading `../'s and trailing `..'s are handled by removing - portions of the path. - - E.g. "a/b/c/./../d/.." will yield "a/b/". This function originates - from GNU Bash and has been mutilated to unrecognition for use in - Wget. - - Changes for Wget: - Always use '/' as stub_char. - Don't check for local things using canon_stat. - Change the original string instead of strdup-ing. - React correctly when beginning with `./' and `../'. - Don't zip out trailing slashes. - Return a value indicating whether any modifications took place. - - If you dare change this function, take a careful look at the test - cases below, and make sure that they pass. */ + Return non-zero if any changes have been made. + + For example, "a/b/c/./../d/.." will yield "a/b/". More exhaustive + test examples are provided below. If you change anything in this + function, run test_path_simplify to make sure you haven't broken a + test case. + + A previous version of this function was based on path_simplify() + from GNU Bash, but it has been rewritten for Wget 1.8.1. */ int path_simplify (char *path) { - register int i, start; - int changes = 0; - - if (!*path) - return 0; + int change = 0; + char *p, *end; if (path[0] == '/') - /* Preserve initial '/'. */ - ++path; + ++path; /* preserve the leading '/'. */ - /* Nix out leading `.' or `..' with. */ - if ((path[0] == '.' && path[1] == '\0') - || (path[0] == '.' && path[1] == '.' && path[2] == '\0')) - { - path[0] = '\0'; - changes = 1; - return changes; - } + p = path; + end = p + strlen (p) + 1; /* position past the terminating zero. */ - /* Walk along PATH looking for things to compact. */ - i = 0; while (1) { - if (!path[i]) - break; - - while (path[i] && path[i] != '/') - i++; + again: + /* P should point to the beginning of a path element. */ - start = i++; - - /* If we didn't find any slashes, then there is nothing left to do. */ - if (!path[start]) - break; - - /* Handle multiple `/'s in a row. */ - while (path[i] == '/') - i++; - - if ((start + 1) != i) + if (*p == '.' && (*(p + 1) == '/' || *(p + 1) == '\0')) { - strcpy (path + start + 1, path + i); - i = start + 1; - changes = 1; + /* Handle "./foo" by moving "foo" two characters to the + left. */ + if (*(p + 1) == '/') + { + change = 1; + memmove (p, p + 2, end - p); + end -= 2; + goto again; + } + else + { + change = 1; + *p = '\0'; + break; + } } - - /* Check for `../', `./' or trailing `.' by itself. */ - if (path[i] == '.') + else if (*p == '.' && *(p + 1) == '.' + && (*(p + 2) == '/' || *(p + 2) == '\0')) { - /* Handle trailing `.' by itself. */ - if (!path[i + 1]) + /* Handle "../foo" by moving "foo" one path element to the + left. */ + char *b = p; + + /* Backtrack by one path element, but not past the beginning + of PATH. */ + + /* foo/bar/../baz */ + /* ^ p */ + /* ^ b */ + + if (b > path + 1) { - path[--i] = '\0'; - changes = 1; - break; + /* Find the character preceded by slash or by the + beginning of path. */ + for (--b; b > path && *(b - 1) != '/'; b--) + ; } - /* Handle `./'. */ - if (path[i + 1] == '/') + change = 1; + if (*(p + 2) == '/') { - strcpy (path + i, path + i + 1); - i = (start < 0) ? 0 : start; - changes = 1; - continue; + memmove (b, p + 3, end - (p + 3)); + end -= (p + 3) - b; + p = b; + } + else + { + *b = '\0'; + break; } - /* Handle `../' or trailing `..' by itself. */ - if (path[i + 1] == '.' && - (path[i + 2] == '/' || !path[i + 2])) + goto again; + } + else if (*p == '/') + { + /* Remove empty path elements. Not mandated by rfc1808 et + al, but empty path elements are not all that useful, and + the rest of Wget might not deal with them well. */ + char *q = p; + while (*q == '/') + ++q; + change = 1; + if (*q == '\0') { - while (--start > -1 && path[start] != '/'); - strcpy (path + start + 1, path + i + 2 + (start == -1 && path[i + 2])); - i = (start < 0) ? 0 : start; - changes = 1; - continue; + *p = '\0'; + break; } - } /* path == '.' */ - } /* while */ + memmove (p, q, end - q); + end -= q - p; + goto again; + } - /* Addition: Remove all `./'-s and `../'-s preceding the string. */ - i = 0; - while (1) - { - if (path[i] == '.' && path[i + 1] == '/') - i += 2; - else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/') - i += 3; - else + /* Skip to the next path element. */ + while (*p && *p != '/') + ++p; + if (*p == '\0') break; - } - if (i) - { - strcpy (path, path + i - 0); - changes = 1; + + /* Make sure P points to the beginning of the next path element, + which is location after the slash. */ + ++p; } - return changes; -} - -/* Test cases: - ps("") -> "" - ps("/") -> "/" - ps(".") -> "" - ps("..") -> "" - ps("/.") -> "/" - ps("/..") -> "/" - ps("foo") -> "foo" - ps("foo/bar") -> "foo/bar" - ps("foo//bar") -> "foo/bar" (possibly a bug) - ps("foo/../bar") -> "bar" - ps("foo/bar/..") -> "foo/" - ps("foo/bar/../x") -> "foo/x" - ps("foo/bar/../x/") -> "foo/x/" - ps("foo/..") -> "" - ps("/foo/..") -> "/" - ps("a/b/../../c") -> "c" - ps("/a/b/../../c") -> "/c" - ps("./a/../b") -> "b" - ps("/./a/../b") -> "/b" -*/ + return change; +} /* "Touch" FILE, i.e. make its atime and mtime equal to the time specified with TM. */ @@ -1846,3 +1808,96 @@ debug_test_md5 (char *buf) return res; } #endif + +#if 0 +/* Debugging and testing support for path_simplify. */ + +/* Debug: run path_simplify on PATH and return the result in a new + string. Useful for calling from the debugger. */ +static char * +ps (char *path) +{ + char *copy = xstrdup (path); + path_simplify (copy); + return copy; +} + +static void +run_test (char *test, char *expected_result, int expected_change) +{ + char *test_copy = xstrdup (test); + int modified = path_simplify (test_copy); + + if (0 != strcmp (test_copy, expected_result)) + { + printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n", + test, expected_result, test_copy); + } + if (modified != expected_change) + { + if (expected_change == 1) + printf ("Expected no modification with path_simplify(\"%s\").\n", + test); + else + printf ("Expected modification with path_simplify(\"%s\").\n", + test); + } + xfree (test_copy); +} + +static void +test_path_simplify (void) +{ + static struct { + char *test, *result; + int should_modify; + } tests[] = { + { "", "", 0 }, + { ".", "", 1 }, + { "..", "", 1 }, + { "foo", "foo", 0 }, + { "foo/bar", "foo/bar", 0 }, + { "foo///bar", "foo/bar", 1 }, + { "foo/.", "foo/", 1 }, + { "foo/./", "foo/", 1 }, + { "foo./", "foo./", 0 }, + { "foo/../bar", "bar", 1 }, + { "foo/../bar/", "bar/", 1 }, + { "foo/bar/..", "foo/", 1 }, + { "foo/bar/../x", "foo/x", 1 }, + { "foo/bar/../x/", "foo/x/", 1 }, + { "foo/..", "", 1 }, + { "foo/../..", "", 1 }, + { "a/b/../../c", "c", 1 }, + { "./a/../b", "b", 1 } + }; + int i; + + for (i = 0; i < ARRAY_SIZE (tests); i++) + { + char *test = tests[i].test; + char *expected_result = tests[i].result; + int expected_change = tests[i].should_modify; + run_test (test, expected_result, expected_change); + } + + /* Now run all the tests with a leading slash before the test case, + to prove that the slash is being preserved. */ + for (i = 0; i < ARRAY_SIZE (tests); i++) + { + char *test, *expected_result; + int expected_change = tests[i].should_modify; + + test = xmalloc (1 + strlen (tests[i].test) + 1); + sprintf (test, "/%s", tests[i].test); + + expected_result = xmalloc (1 + strlen (tests[i].result) + 1); + sprintf (expected_result, "/%s", tests[i].result); + + run_test (test, expected_result, expected_change); + + xfree (test); + xfree (expected_result); + } +} +#endif -- 2.39.2