2 * Copyright (c) 2001-2007 Sun Microsystems, Inc. All rights reserved.
4 * The Sun Project JXTA(TM) Software License
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
16 * 3. The end-user documentation included with the redistribution, if any, must
17 * include the following acknowledgment: "This product includes software
18 * developed by Sun Microsystems, Inc. for JXTA(TM) technology."
19 * Alternately, this acknowledgment may appear in the software itself, if
20 * and wherever such third-party acknowledgments normally appear.
22 * 4. The names "Sun", "Sun Microsystems, Inc.", "JXTA" and "Project JXTA" must
23 * not be used to endorse or promote products derived from this software
24 * without prior written permission. For written permission, please contact
25 * Project JXTA at http://www.jxta.org.
27 * 5. Products derived from this software may not be called "JXTA", nor may
28 * "JXTA" appear in their name, without prior written permission of Sun.
30 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
32 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SUN
33 * MICROSYSTEMS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
34 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
37 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
38 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
39 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 * JXTA is a registered trademark of Sun Microsystems, Inc. in the United
42 * States and other countries.
44 * Please see the license information page at :
45 * <http://www.jxta.org/project/www/license.html> for instructions on use of
46 * the license in source files.
48 * ====================================================================
50 * This software consists of voluntary contributions made by many individuals
51 * on behalf of Project JXTA. For more information on Project JXTA, please see
52 * http://www.jxta.org.
54 * This license is based on the BSD license adopted by the Apache Foundation.
57 package net.jxta.impl.document;
60 import java.io.Writer;
62 import java.util.ArrayList;
63 import java.util.Collections;
64 import java.util.Enumeration;
65 import java.util.Iterator;
66 import java.util.List;
67 import java.util.logging.Level;
68 import java.util.logging.Logger;
70 import java.io.IOException;
72 import net.jxta.document.Attribute;
73 import net.jxta.document.XMLElement;
74 import net.jxta.logging.Logging;
78 * An element of a <CODE>StructuredDocument</CODE>. <CODE>StructuredDocument</CODE>s
79 * are made up of hierarchies of elements. LiteXMLElement is part of an implementation
80 * while makes use of XML-style document conventions, but without the overhead of a
83 public class LiteXMLElement implements XMLElement<LiteXMLElement> {
86 * Defines a range of characters, probably within a string. The range is
87 * deemed to be invalid if 'start' is -1. A zero length range is, by
88 * convention, described by an 'end' value of 'start' - 1.
90 protected static class charRange implements Comparable<charRange> {
93 * Contains the start position of this range.
98 * Contains the end position of this range. one weird thing: if end == start -1,
99 * then the item is of zero length beginning at start.
104 * Constructor for a null charRange.
112 * Constructor for which the bounds are specified.
114 public charRange(int start, int end) {
123 public boolean equals(Object aRange) {
124 if (this == aRange) {
128 if (!(aRange instanceof charRange)) {
132 charRange someRange = (charRange) aRange;
134 return (start == someRange.start) && (end == someRange.end);
140 public int compareTo(charRange someRange) {
141 if (this == someRange) {
145 if (start < someRange.start) {
149 if (start > someRange.start) {
153 if (end < someRange.end) {
157 if (end > someRange.end) {
168 public String toString() {
169 return "[" + start + "," + end + "]";
173 * Returns true if the <CODE>charRange</CODE> specified by someRange is
174 * contained within this range.
176 * @param someRange The range which must be contained within this range.
177 * @return true if the specified range is contained with this range otherwise false.
179 public boolean contains(charRange someRange) {
180 return (isValid() && someRange.isValid() && (start <= someRange.start) && (end >= someRange.end));
184 * Returns true if the <CODE>tagRange</CODE> specified by someRange is
185 * contained within this range.
187 * @param someRange The range which must be contained within this range.
188 * @return true if the specified range is contained with this range otherwise false.
190 public boolean contains(tagRange someRange) {
191 return (isValid() && someRange.isValid() && (start <= someRange.startTag.start) && (end >= someRange.endTag.end));
195 * Returns true if the location specified is contained in this range.
197 * @param someLoc the location which is to be tested.
198 * @return true if the location is in this range, otherwise false.
200 public boolean contains(int someLoc) {
201 return (isValid() && (someLoc >= 0) && (start <= someLoc) && (end >= someLoc));
205 * Returns true if the range is both non-null and has a length of greater
206 * than or equal to zero.
208 * @return true if the range is a valid one, otherwise false.
210 public boolean isValid() {
211 return length() >= 0;
215 * Returns the length of this range.
217 * @return The length of the range or -1 if the range is null.
219 public int length() {
220 if ((-1 == start) || (-1 == end)) {
224 return (end - start + 1);
230 * A tagRange is a collection of char ranges useful for describing XML
234 * <dt><code>startTag</code></dt>
235 * <dd>The range of the opening tag, ie. <tag></dd>
236 * <dt><code>body</code></dt>
237 * <dd>Everything between <code>startTag</code> and <code>endTag</code>.</dd>
238 * <dt><code>endTag</code></dt>
239 * <dd>The range of the terminating tag, ie. </tag>.</dd>
242 * <p/>For empty-element tags the <code>startTag</code>, <code>body</code>
243 * and <code>endTag</code> will be equal.
245 protected static class tagRange implements Comparable<tagRange> {
246 public charRange startTag;
247 public charRange body;
248 public charRange endTag;
251 startTag = new charRange();
252 body = new charRange();
253 endTag = new charRange();
256 public tagRange(charRange startTag, charRange body, charRange endTag) {
257 this.startTag = startTag;
259 this.endTag = endTag;
266 public boolean equals(Object aRange) {
267 if (this == aRange) {
271 if (!(aRange instanceof tagRange)) {
275 tagRange likeMe = (tagRange) aRange;
277 return startTag.equals(likeMe.startTag) && body.equals(likeMe.body) && endTag.equals(likeMe.endTag);
283 public int compareTo(tagRange someRange) {
284 if (this == someRange) {
288 int compared = startTag.compareTo(someRange.startTag);
294 return endTag.compareTo(someRange.endTag);
301 public String toString() {
302 return startTag + ":" + body + ":" + endTag;
306 * Returns true if the <CODE>tagRange</CODE> specified by someRange is
307 * contained within the body portion of this range.
309 * @param someRange The range which must be contained within this range.
310 * @return true if the specified range is contained with this range
313 public boolean contains(tagRange someRange) {
314 return (isValid() && someRange.isValid() && (body.start <= someRange.startTag.start)
315 && (body.end >= someRange.endTag.end));
319 * Returns true if the <CODE>charRange</CODE> specified by someRange is
320 * contained within the body portion of this range.
322 * @param someRange The range which must be contained within this range.
323 * @return true if the specified range is contained with this range
326 public boolean contains(charRange someRange) {
327 return (isValid() && someRange.isValid() && (body.start <= someRange.start) && (body.end >= someRange.end));
331 * @return <code>true</code> if this tagRange represents and empty
334 public boolean isEmptyElement() {
335 return isValid() && startTag.equals(body) && startTag.equals(endTag);
339 * @return true if valid
341 public boolean isValid() {
342 return (null != startTag) && (null != body) && (null != endTag) && startTag.isValid() && body.isValid()
350 private final static transient Logger LOG = Logger.getLogger(LiteXMLElement.class.getName());
353 * If true then every operation which modifies the state of the document will
354 * perform a consistency check. This is a deadly performance killer but
355 * helps a lot in isolating bugs.
357 protected final static transient boolean paranoidConsistencyChecking = false;
360 * The document associated with this Element.
362 protected final transient LiteXMLDocument doc;
365 * Identifies the element which is the parent of this element. If <code>
366 * this.parent == this</code> then this element is the root of the document.
367 * If <code>null == parent</code> then this element has not yet been
368 * inserted into the document.
370 protected transient LiteXMLElement parent;
373 * The portion of the source XML associated with this node
375 protected transient tagRange loc;
378 * If this node has yet to be inserted into the document then will contain
379 * the String value of this node, otherwise null.
381 private transient StringBuilder uninserted = null;
384 * The child elements associated with this element
386 private transient List<LiteXMLElement> children;
389 * Creates new LiteXMLElement
391 * @param loc The location of the element within the document.
392 * @param doc The {@link LiteXMLDocument} which is the root of the document.
394 protected LiteXMLElement(LiteXMLDocument doc, tagRange loc) {
400 * Creates new LiteElement
402 * @param doc The {@link LiteXMLDocument} which is the root of the document.
403 * @param name The name of the element being created.
404 * @param val The value of the element being created or null if there is no
405 * content to the element.
407 public LiteXMLElement(LiteXMLDocument doc, final String name, final String val) {
408 this(doc, new tagRange());
410 for (int eachChar = name.length() - 1; eachChar >= 0; eachChar--) {
411 if (Character.isWhitespace(name.charAt(eachChar))) {
412 throw new IllegalArgumentException("Element names may not contain spaces.");
416 if ((null == val) || (0 == val.length())) {
417 uninserted = new StringBuilder("<" + name + "/>");
419 uninserted = new StringBuilder(val);
420 encodeEscaped(uninserted);
421 uninserted.insert(0, "<" + name + ">");
422 uninserted.append("</").append(name).append(">");
430 public boolean equals(Object element) {
431 if (this == element) {
435 if (!(element instanceof LiteXMLElement)) {
439 LiteXMLElement liteElement = (LiteXMLElement) element;
441 if (getDocument() != liteElement.getDocument()) {
445 if (!getName().equals(liteElement.getName())) {
451 if (null != uninserted) {
452 val1 = uninserted.toString();
454 val1 = getTextValue();
457 String val2 = liteElement.getTextValue();
459 if ((null == val1) && (null == val2)) {
463 return null != val1 && null != val2 && val1.equals(val2);
470 * <p/>A toString implementation for debugging purposes.
473 public String toString() {
474 if (paranoidConsistencyChecking) {
478 String name = getName();
481 name = "<<null name>>";
483 String value = getTextValue();
486 value = "<<null value>>";
489 if ((value.length() + name.length()) >= 60) {
490 int len = Math.max(20, 60 - name.length());
492 value = value.substring(0, Math.min(len, value.length()));
495 // FIXME 20021125 bondolo@jxta.org should remove carriage control.
497 return super.toString() + " / " + name + " = " + value;
503 public LiteXMLDocument getRoot() {
504 return getDocument();
510 public LiteXMLElement getParent() {
517 public Enumeration<LiteXMLElement> getChildren() {
518 if (null != uninserted) {
519 throw new IllegalStateException("This element has not been added.");
522 if (null == children) {
523 List<LiteXMLElement> empty = Collections.emptyList();
525 return Collections.enumeration(empty);
527 return Collections.enumeration(children);
534 public String getName() {
535 if (null != uninserted) {
536 throw new IllegalStateException("This element has not been added.");
539 if (paranoidConsistencyChecking) {
543 int current = loc.startTag.start + 1;
545 while (current <= loc.startTag.end) {
546 char inTagName = getDocument().docContent.charAt(current);
548 if (Character.isWhitespace(inTagName) || ('/' == inTagName) || ('>' == inTagName)) {
555 return getDocument().docContent.substring(loc.startTag.start + 1, current);
559 * Get the name associated with an element.
561 * @return A string containing the key of this element.
563 public String getKey() {
568 * Get the value (if any) associated with an element.
570 * @return A string containing the value of this element, if any, otherwise null.
572 public String getValue() {
573 return getTextValue();
579 public void appendChild(LiteXMLElement element) {
580 if (element.getDocument() != getDocument()) {
581 throw new IllegalArgumentException("Wrong document");
584 if (null != element.parent) {
585 throw new IllegalArgumentException("New element is already in document");
588 if (null != uninserted) {
589 throw new IllegalStateException("This element has not been added.");
592 if (paranoidConsistencyChecking) {
596 // If uninserted then this new element contains content which needs to
597 // be added to the document. If uninserted is null then the child
598 // element's content is already in the document, but merely needs to
599 // be recognized as a child.
600 if (null != element.uninserted) {
601 if (loc.startTag.equals(loc.endTag)) {
602 getDocument().docContent.deleteCharAt(loc.endTag.end - 1); // delete the /
603 loc.startTag.end -= 1;
605 // skip past the name portion
606 int current = loc.startTag.start + 1;
608 while (current <= loc.startTag.end) {
609 char inTagName = getDocument().docContent.charAt(current);
611 if (Character.isWhitespace(inTagName) || ('>' == inTagName)) {
618 String tagName = getDocument().docContent.substring(loc.startTag.start + 1, current);
620 getDocument().docContent.insert(loc.startTag.end + 1, "</" + tagName + ">");
621 getDocument().adjustLocations(loc.startTag.end + 1, tagName.length() + 2);
622 loc.endTag = new charRange(loc.startTag.end + 1, loc.startTag.end + 3 + tagName.length());
623 loc.body = new charRange(loc.startTag.end + 1, loc.startTag.end);
626 getDocument().docContent.insert(loc.endTag.start, element.uninserted);
628 element.loc.startTag.start = loc.endTag.start;
629 element.loc.startTag.end = getDocument().docContent.indexOf(">", element.loc.startTag.start);
631 if ('/' != element.uninserted.charAt(element.uninserted.length() - 2)) {
632 element.loc.body.start = element.loc.startTag.end + 1;
634 element.loc.endTag.end = element.loc.startTag.start + element.uninserted.length() - 1;
635 element.loc.endTag.start = getDocument().docContent.lastIndexOf("<", element.loc.endTag.end);
637 element.loc.body.end = element.loc.endTag.start - 1;
639 element.loc.body = new charRange(element.loc.startTag.start, element.loc.startTag.end);
640 element.loc.endTag = new charRange(element.loc.startTag.start, element.loc.startTag.end);
643 if (0 != loc.body.length()) {
644 getDocument().adjustLocations(loc.endTag.start, element.uninserted.length());
647 getDocument().adjustLocations(loc.endTag.start, element.uninserted.length());
651 loc.body.end += element.uninserted.length();
653 element.uninserted = null;
656 element.parent = this;
658 if (null == children) {
659 children = new ArrayList<LiteXMLElement>();
662 children.add(element);
664 if (paranoidConsistencyChecking) {
670 * Returns an enumeration of the immediate children of this element whose
671 * name match the specified string.
673 * @param key The key which will be matched against.
674 * @return enumeration containing all of the children of this element.
676 public Enumeration<LiteXMLElement> getChildren(Object key) {
677 if (key instanceof String)
678 return getChildren((String) key);
680 throw new ClassCastException(key.getClass().getName() + " not supported by getChildren.");
686 public Enumeration<LiteXMLElement> getChildren(String name) {
687 if (null != uninserted) {
688 throw new IllegalStateException("This element has not been added.");
691 if (paranoidConsistencyChecking) {
695 if (null == children) {
696 List<LiteXMLElement> empty = Collections.emptyList();
698 return Collections.enumeration(empty);
701 List<LiteXMLElement> result = new ArrayList<LiteXMLElement>();
703 for (LiteXMLElement aChild : children) {
704 if (name.equals(aChild.getName())) {
709 return Collections.enumeration(result);
715 public String getTextValue() {
716 return getTextValue(false, true);
720 * Get the value (if any) associated with an element.
722 * @param getEncoded if true then the contents will be encoded such that
723 * the contents will not be interpreted as XML. see
724 * {@link <a href="http://www.w3.org/TR/REC-xml#syntax">W3C XML 1.0 Specification</a>}
725 * ie. < -> < & -> &
726 * @param trim if true trims prefix and suffix white space
727 * @return A string containing the value of this element, if any, otherwise null.
729 protected String getTextValue(boolean getEncoded, boolean trim) {
730 if (null != uninserted) {
731 throw new IllegalStateException("This element has not been added.");
734 if (paranoidConsistencyChecking) {
738 StringBuilder building = new StringBuilder();
740 List<charRange> ranges = new ArrayList<charRange>();
743 * insert the ranges of the children in order. insertion method is ok
744 * because the number of children is usually less than 10 or so.
746 for (Enumeration<LiteXMLElement> eachChild = getChildren(); eachChild.hasMoreElements();) {
747 LiteXMLElement aChild = eachChild.nextElement();
748 charRange childsRange = new charRange(aChild.loc.startTag.start, aChild.loc.endTag.end);
750 // find where to insert.
751 for (int eachRange = 0; eachRange < ranges.size(); eachRange++) {
752 charRange rangeChild = ranges.get(eachRange);
754 if (1 == rangeChild.compareTo(childsRange)) {
755 ranges.set(eachRange, childsRange);
756 childsRange = rangeChild;
759 ranges.add(childsRange);
762 int current = loc.body.start;
764 // add all the text not part of some child
765 for (charRange aRange : ranges) {
766 building.append(getDocument().docContent.substring(current, aRange.start));
768 current = aRange.end + 1;
772 building.append(getDocument().docContent.substring(current, loc.endTag.start));
775 building = decodeEscaped(building);
779 int firstNonWhiteSpace = 0;
780 int lastNonWhiteSpace = building.length() - 1;
783 while (firstNonWhiteSpace < building.length()) {
784 char possibleSpace = building.charAt(firstNonWhiteSpace);
786 if (!Character.isWhitespace(possibleSpace)) {
790 firstNonWhiteSpace++;
793 // did we find no non-whitespace?
794 if (firstNonWhiteSpace >= building.length()) {
798 while (lastNonWhiteSpace >= firstNonWhiteSpace) {
799 char possibleSpace = building.charAt(lastNonWhiteSpace);
801 if (!Character.isWhitespace(possibleSpace)) {
809 String result = building.substring(firstNonWhiteSpace, lastNonWhiteSpace + 1);
815 * Write the contents of this element and optionally its children. The
816 * writing is done to a provided <code>java.io.Writer</code>. The writing
817 * can optionally be indented.
819 * @param into The java.io.Writer that the output will be sent to.
820 * @param indent the number of tabs which will be inserted before each
822 * @param recurse if true then also print the children of this element.
823 * @throws java.io.IOException if an io error occurs
825 protected void printNice(Writer into, int indent, boolean recurse) throws IOException {
826 if (null != uninserted) {
827 throw new IllegalStateException("This element has not been added.");
830 if (paranoidConsistencyChecking) {
835 StringBuilder start = new StringBuilder();
839 for (int eachTab = 0; eachTab < indent; eachTab++) {
844 start.append(getDocument().docContent.substring(loc.startTag.start, loc.startTag.end + 1));
850 into.write(start.toString());
852 // print the rest if this was not an empty element.
853 if (!loc.startTag.equals(loc.endTag)) {
854 String itsValue = getTextValue(true, (-1 != indent));
857 if (null != itsValue) {
860 for (int eachTab = 0; eachTab < indent + 1; eachTab++) {
865 into.write(itsValue);
876 Enumeration<LiteXMLElement> childrens = getChildren();
878 Attribute space = getAttribute("xml:space");
881 if ("preserve".equals(space.getValue())) {
884 childIndent = indent + 1;
888 childIndent = indent + 1;
894 while (childrens.hasMoreElements()) {
895 LiteXMLElement aChild = childrens.nextElement();
897 aChild.printNice(into, childIndent, recurse);
902 StringBuilder end = new StringBuilder();
906 for (int eachTab = 0; eachTab < indent; eachTab++) {
911 end.append(getDocument().docContent.substring(loc.endTag.start, loc.endTag.end + 1));
917 into.write(end.toString());
922 * Given a source string, an optional tag and a range with in the source
923 * find either the tag specified or the next tag.
925 * The search consists of 4 phases :
926 * 0. If no tag was specified, determine if a tag can be found and
928 * 1. Search for the start of the named tag.
929 * 2. Search for the end tag. Each time we think we have found a tag
930 * which might be the end tag we make sure it is not the end tag
931 * of another element with the same name as our tag.
932 * 3. Calculate the position of the body of the tag given the locations
933 * of the start and end.
935 * @param source the string to search
936 * @param tag the tag to search for in the source string. If this tag is
937 * empty or null then we will search for the next tag.
938 * @param range describes the range of character locations in the source
939 * string to which the search will be limited.
940 * @return tagRange containing the ranges of the found tag.
943 protected tagRange getTagRanges(final StringBuilder source, String tag, final charRange range) {
945 // FIXME bondolo@jxta.org 20010327 Does not handle XML comments. ie. <!-- -->
946 if (null != uninserted) {
947 throw new IllegalStateException("This element has not been added to the document.");
950 tagRange result = new tagRange();
951 int start = range.start;
952 int end = source.length() - 1;
954 boolean foundStartTag = false;
955 boolean foundEndTag = false;
956 boolean emptyTag = (null == tag) || (0 == tag.length());
958 // check for bogosity
959 if ((-1 == start) || (start >= end)) {
960 throw new IllegalArgumentException("Illegal start value");
963 // adjust end of range
964 if ((-1 != range.end) && (end > range.end)) {
968 // check for empty tag and assign empty string
973 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
974 LOG.finer("Searching for \"" + tag + "\" in range [" + start + "," + end + "]");
979 // Begin Phase 0 : Search for any tag.
982 int foundTagText = source.indexOf("<", current);
984 // was it not found? if not then quit
985 if (-1 == foundTagText) {
986 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
987 LOG.finer("No Tags Found");
992 // this part is about setting the tag if necessary
995 int afterTagText = foundTagText;
997 while (afterTagText <= end) {
998 char inTagName = source.charAt(afterTagText);
1000 if (!Character.isWhitespace(inTagName) && ('/' != inTagName) && ('>' != inTagName)) {
1005 tag = source.substring(foundTagText, afterTagText);
1006 emptyTag = (null == tag) || (0 == tag.length());
1011 // it better not be still empty
1013 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1014 LOG.finer("No tag found");
1020 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1021 LOG.finer("Search for \"" + tag + "\" [" + start + "," + end + "]");
1024 // Begin Phase 1: Search for the Start Tag
1026 while (!foundStartTag && (current < end)) {
1027 int foundTagText = source.indexOf(tag, current + 1); // first loc is one past current location
1028 int foundTagTerminator;
1029 int foundNextTagStart;
1030 int afterTagText = foundTagText + tag.length();
1033 if ((-1 == foundTagText) || (afterTagText > end)) {
1034 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1035 LOG.finer("Tag \"" + tag + "\" Not Found(1)");
1040 char checkChar = source.charAt(afterTagText);
1042 // check to see if it is the start tag
1043 if (('<' != source.charAt(foundTagText - 1)) || // it has the open tag delimiter before it
1044 (!Character.isWhitespace(checkChar) && ('/' != checkChar) && ('>' != checkChar))) { // is immediately followed by a delimiter
1045 current = afterTagText;
1049 foundTagTerminator = source.indexOf(">", afterTagText);
1050 foundNextTagStart = source.indexOf("<", afterTagText + 1);
1052 if ((-1 == foundTagTerminator) || // the tag has no terminator
1053 (foundTagTerminator > end) || // it is past the valid range
1054 ((-1 != foundNextTagStart) && // there is another tag start
1055 (foundNextTagStart < foundTagTerminator))) { // and it is before the terminator we found. very bad
1056 current = afterTagText;
1060 foundStartTag = true;
1061 result.startTag.start = foundTagText - 1;
1062 result.startTag.end = foundTagTerminator;
1065 if (!foundStartTag) {
1066 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1067 LOG.finer("Tag \"" + tag + "\" Not Found(2)");
1072 // is this an empty element declaration?
1073 if ('/' == source.charAt(result.startTag.end - 1)) {
1074 // end is the start and there is no body
1075 result.body = new charRange(result.startTag.start, result.startTag.end);
1076 result.endTag = new charRange(result.startTag.start, result.startTag.end);
1077 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1078 LOG.finer("Empty Element \"" + tag + "\" Start : " + result.startTag);
1083 current = result.startTag.end + 1;
1085 // if current is past the end then our end tag is not found.
1086 if (current >= end) {
1087 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1088 LOG.finer("End not found \"" + tag + "\" Start : " + result.startTag);
1093 // Begin Phase 2 : Search for the end tag
1095 String endTag = "</" + tag + ">";
1096 int searchFrom = result.startTag.end + 1;
1098 while (!foundEndTag && (current < end) && (searchFrom < end)) {
1099 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1100 LOG.finer("Searching for \"" + endTag + "\" in range [" + current + "," + end + "]");
1103 int foundTagText = source.indexOf(endTag, current);
1105 // was it not found or not in bounds?
1106 if ((-1 == foundTagText) || ((foundTagText + endTag.length() - 1) > end)) {
1108 } // it was not found
1110 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1112 "Prospective tag pair for \"" + tag + "\" " + result.startTag + ":[" + foundTagText + ","
1113 + (foundTagText + endTag.length() - 1) + "]");
1116 // We recurse here in order to exclude the end tags of any sub elements with the same name
1117 charRange subRange = new charRange(searchFrom, foundTagText - 1);
1119 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1120 LOG.finer("Recursing to search for \"" + tag + "\" in " + subRange);
1123 tagRange subElement = getTagRanges(source, tag, subRange);
1125 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1126 LOG.finer("Recursion result \"" + tag + "\" " + subElement);
1129 // if there was an incomplete sub-tag with the same name, skip past it
1130 if (subElement.startTag.isValid()) {
1131 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1132 LOG.finer("Found sub-tag \"" + tag + "\" at " + subElement + " within " + subRange);
1135 if (subElement.endTag.isValid()) {
1136 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1137 LOG.finer("Complete sub-tag \"" + tag + "\" at " + subElement + " within " + subRange);
1139 current = subElement.endTag.end + 1;
1140 searchFrom = subElement.endTag.end + 1;
1142 current = foundTagText + endTag.length();
1149 result.endTag.start = foundTagText;
1150 result.endTag.end = foundTagText + endTag.length() - 1;
1152 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1153 LOG.finer("Prospective tag \"" + tag + "\" " + result.endTag + " is confirmed.");
1157 // Begin Phase 3 : Calculate the location of the body.
1159 result.body.start = result.startTag.end + 1;
1162 result.body.end = result.endTag.start - 1;
1164 result.body.end = end;
1167 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1168 LOG.finer("Found element : \"" + tag + "\" " + result);
1175 * Parse a charRange and add any tags found as content as children of a
1176 * specified element. This process is repeated recursivly.
1178 * @param scanRange the range to be parsed for sub-tags
1179 * @param addTo the element to add any discovered children to.
1181 protected void addChildTags(final charRange scanRange, LiteXMLElement addTo) {
1182 if (null != uninserted) {
1183 throw new IllegalStateException("This element has not been added to the document.");
1186 int current = scanRange.start;
1188 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1189 LOG.finer("Scanning for children in range " + scanRange);
1193 // scan for any tag.
1194 tagRange aSubtag = getTagRanges(getDocument().docContent, null, new charRange(current, scanRange.end));
1197 if (aSubtag.isValid()) {
1198 LiteXMLElement newChild = getDocument().createElement(aSubtag);
1200 if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) {
1202 "Adding child tag \""
1203 + getDocument().docContent.substring(aSubtag.endTag.start + 2, aSubtag.endTag.end) + "\" "
1207 addTo.appendChild(newChild);
1209 if (paranoidConsistencyChecking) {
1213 if (!aSubtag.startTag.equals(aSubtag.endTag)) {
1214 addChildTags(aSubtag.body, newChild); // recurse into the new tag
1217 // all done this tag, move on
1218 current = aSubtag.endTag.end + 1;
1220 current = -1; // all done!
1222 } while ((-1 != current) && (current < scanRange.end));
1224 if (paranoidConsistencyChecking) {
1230 * For this element and all its children adjust the location of its ranges
1231 * by the amount specified.
1233 * @param beginningAt adjust all locations which are at or past this
1235 * @param by amount to adjust all matching locations.
1237 protected void adjustLocations(final int beginningAt, final int by) {
1238 if (null != uninserted) {
1239 throw new IllegalStateException("This element has not been added.");
1242 // Check that this element is not entirely to the left of the shift
1243 // zone. NB: end can be < start if len is 0.
1244 if (loc.endTag.end < beginningAt && loc.endTag.start < beginningAt) {
1248 if ((loc.startTag.end >= beginningAt)
1249 || ((loc.startTag.start >= beginningAt) && ((loc.startTag.end + 1) == loc.startTag.start))) {
1250 loc.startTag.end += by;
1253 if (loc.startTag.start >= beginningAt) {
1254 loc.startTag.start += by;
1257 if ((loc.body.end >= beginningAt) || ((loc.body.start >= beginningAt) && ((loc.body.end + 1) == loc.body.start))) {
1261 if (loc.body.start >= beginningAt) {
1262 loc.body.start += by;
1265 if ((loc.endTag.end >= beginningAt) || ((loc.endTag.start >= beginningAt) && ((loc.endTag.end + 1) == loc.endTag.start))) {
1266 loc.endTag.end += by;
1269 if (loc.endTag.start >= beginningAt) {
1270 loc.endTag.start += by;
1273 for (Enumeration<LiteXMLElement> eachChild = getChildren(); eachChild.hasMoreElements();) {
1274 LiteXMLElement aChild = eachChild.nextElement();
1276 aChild.adjustLocations(beginningAt, by);
1279 if (paranoidConsistencyChecking) {
1285 * Given a StringBuilder find all occurrences of escaped characters which
1286 * must be decoded and convert them back to their non-escaped equivalents.
1288 * <p/>Also does end of line folding per: <a href="http://www.w3.org/TR/REC-xml#sec-line-ends"/>
1290 * @param target The StringBuilder which will be decoded.
1291 * @return The decoded version of the StringBuilder.
1293 protected StringBuilder decodeEscaped(StringBuilder target) {
1297 StringBuilder result = new StringBuilder(target.length());
1299 while (current < target.length()) {
1300 // FIXME bondolo@jxta.org 20010422 Should process xml comments out here.
1302 // fold 0x0D and 0x0D 0x0A to 0x0A
1303 if ('\r' == target.charAt(current)) {
1304 result.append('\n');
1306 if ((current < target.length()) && ('\n' == target.charAt(current))) {
1312 if ('&' != target.charAt(current)) {
1313 result.append(target.charAt(current));
1318 int terminusAt = current + 1;
1320 while ((terminusAt < target.length()) && // dont go past end
1321 ((terminusAt - current) < 6) && // only look 6 chars away.
1322 (';' != target.charAt(terminusAt))) { // must be a ;
1326 if ((terminusAt >= target.length()) || (';' != target.charAt(terminusAt))) {
1327 // if we get here then we didnt find the terminal we needed
1328 // so we just leave ampersand as it was, the document is
1329 // ill-formed but why make things worse?
1330 result.append(target.charAt(current));
1335 char[] sub = new char[terminusAt - current + 1];
1337 target.getChars(current, terminusAt + 1, sub, 0);
1338 String escaped = new String(sub);
1340 if ("&".equals(escaped)) {
1343 } else if ("<".equals(escaped)) {
1346 } else if (">".equals(escaped)) { // for compatibility with SGML. We dont encode these
1349 } else if (escaped.startsWith("&#")) {
1350 String numericChar = escaped.substring(2, escaped.length() - 1);
1353 if (numericChar.length() < 1) {
1354 result.append(target.charAt(current));
1359 // is it hex numeric
1360 if (numericChar.charAt(0) == 'x') {
1361 numericChar = numericChar.substring(1);
1364 if (numericChar.length() < 1) {
1365 result.append(target.charAt(current));
1371 char asChar = (char) Integer.parseInt(numericChar.toLowerCase(), 16);
1373 result.append(asChar);
1374 current += escaped.length();
1375 } catch (NumberFormatException badref) {
1376 // it was bad, we will just skip it.
1377 result.append(target.charAt(current));
1385 char asChar = (char) Integer.parseInt(numericChar, 10);
1387 result.append(asChar);
1388 current += escaped.length();
1389 } catch (NumberFormatException badref) {
1390 // it was bad, we will just skip it.
1391 result.append(target.charAt(current));
1396 // if we get here then we didn't know what to do with the
1397 // entity. so we just send it unchanged.
1398 result.append(target.charAt(current));
1410 * Given a StringBuilder find all occurrences of characters which must be
1411 * escaped and convert them to their escaped equivalents.
1413 * @param target The StringBuilder which will be encoded in place.
1415 protected void encodeEscaped(StringBuilder target) {
1419 while (current < target.length()) {
1420 if ('&' == target.charAt(current)) {
1421 target.insert(current + 1, "amp;");
1423 } else if ('<' == target.charAt(current)) {
1424 target.setCharAt(current, '&');
1425 target.insert(current + 1, "lt;");
1434 * Returns an enumerations of the attributes associated with this object.
1435 * Each element is of type Attribute.
1437 * @return Enumeration the attributes associated with this object.
1439 public Enumeration<Attribute> getAttributes() {
1440 List<Attribute> results = new ArrayList<Attribute>();
1442 if (null != uninserted) {
1443 throw new IllegalStateException("This element has not been added.");
1446 if (paranoidConsistencyChecking) {
1450 // find the start of the first attribute
1451 int current = loc.startTag.start + 1;
1453 while (current <= loc.startTag.end) {
1454 char inTagName = getDocument().docContent.charAt(current);
1456 if (Character.isWhitespace(inTagName) || ('/' == inTagName) || ('>' == inTagName)) {
1462 // loop and add attributes to the vector
1463 while (current < loc.startTag.end) {
1464 tagRange nextAttr = getAttributeLoc(null, new charRange(current, loc.startTag.end));
1466 if (!nextAttr.isValid()) {
1471 new Attribute(this, getDocument().docContent.substring(nextAttr.startTag.start, nextAttr.startTag.end + 1)
1473 getDocument().docContent.substring(nextAttr.body.start, nextAttr.body.end + 1)));
1475 current = nextAttr.endTag.end + 1;
1478 return Collections.enumeration(results);
1482 * Returns the tagRange of the next attribute contained in the range
1483 * provided. The tag range returned consists of the startTag indicating
1484 * the location of the name, body indicating the location of the value and
1485 * endTag indicating the location of the final quote delimiter.
1487 * @param name Name to match. null means match any name.
1488 * @param inRange the limits of the locations to scan.
1489 * @return tagRange containing the location of the next attribute
1491 protected tagRange getAttributeLoc(String name, charRange inRange) {
1492 tagRange result = new tagRange();
1493 int current = inRange.start;
1496 // skip the whitespace
1498 while (current <= inRange.end) {
1499 char inTagName = getDocument().docContent.charAt(current);
1501 if (!Character.isWhitespace(inTagName) && ('/' != inTagName) && ('>' != inTagName)) {
1507 int equalsAt = getDocument().docContent.indexOf("=", current);
1509 // make sure there is an equals
1510 if ((-1 == equalsAt) || (equalsAt >= inRange.end)) {
1515 result.startTag.start = current;
1516 result.startTag.end = equalsAt - 1;
1518 // get the quote char we must match
1519 String requiredQuote = getDocument().docContent.substring(equalsAt + 1, equalsAt + 2);
1521 // make sure its a valid quote
1522 if (('\'' != requiredQuote.charAt(0)) && ('\"' != requiredQuote.charAt(0))) {
1526 // find the next occurance of this quote
1527 int nextQuote = getDocument().docContent.indexOf(requiredQuote, equalsAt + 2);
1529 // make sure the quote is in a good spot.
1530 if ((-1 == nextQuote) || (nextQuote >= inRange.end)) {
1534 result.body.start = equalsAt + 2;
1535 result.body.end = nextQuote - 1;
1537 result.endTag.start = nextQuote;
1538 result.endTag.end = nextQuote;
1540 // check if the name matches.
1541 if ((null != name) && !name.equals(getDocument().docContent.substring(result.startTag.start, result.startTag.end + 1))) {
1542 result.startTag.start = -1;
1545 current = nextQuote + 1;
1546 } while ((current < inRange.end) && (!result.isValid()));
1554 public String addAttribute(String name, String value) {
1555 if (null != uninserted) {
1556 throw new IllegalStateException("This element has not been added.");
1560 throw new IllegalArgumentException("name must not be null");
1563 if (null == value) {
1564 throw new IllegalArgumentException("value must not be null");
1567 for (int eachChar = name.length() - 1; eachChar >= 0; eachChar--) {
1568 if (Character.isWhitespace(name.charAt(eachChar))) {
1569 throw new IllegalArgumentException("Attribute names may not contain spaces.");
1573 if (paranoidConsistencyChecking) {
1577 // skip past the name portion
1578 int current = loc.startTag.start + 1;
1580 while (current <= loc.startTag.end) {
1581 char inTagName = getDocument().docContent.charAt(current);
1583 if (Character.isWhitespace(inTagName) || ('/' == inTagName) || ('>' == inTagName)) {
1590 // find out if there was a previous value for this name
1591 String oldValue = null;
1592 tagRange oldAttr = getAttributeLoc(name, new charRange(current, loc.startTag.end));
1594 // choose which kind of quote to use
1595 char usingQuote = (-1 != value.indexOf('"')) ? '\'' : '\"';
1597 // make sure we can use it.
1598 if (('\'' == usingQuote) && (-1 != value.indexOf('\''))) {
1599 throw new IllegalArgumentException("Value contains both \" and \'");
1602 // build the new attribute string
1603 StringBuilder newStuff = new StringBuilder(" ");
1605 newStuff.append(name);
1606 newStuff.append("=");
1607 newStuff.append(usingQuote);
1608 newStuff.append(value);
1609 newStuff.append(usingQuote);
1612 if (!oldAttr.isValid()) {
1613 // we aren't replacing an existing value
1614 getDocument().docContent.insert(current, newStuff.toString());
1616 // move all doc locations which follow this one based on how much we
1618 getDocument().adjustLocations(current, newStuff.length());
1620 // we are replacing an existing value
1621 oldValue = getDocument().docContent.substring(oldAttr.body.start, oldAttr.body.end + 1);
1623 getDocument().docContent.delete(oldAttr.body.start, oldAttr.body.end + 1);
1624 getDocument().docContent.insert(oldAttr.body.start, value);
1626 int delta = value.length() - (oldAttr.body.end - oldAttr.body.start + 1);
1628 // move all doc locations which follow this one based on how much we
1629 // inserted or deleted.
1630 getDocument().adjustLocations(loc.startTag.start + 1, delta);
1633 if (paranoidConsistencyChecking) {
1643 public String addAttribute(Attribute newAttrib) {
1644 return addAttribute(newAttrib.getName(), newAttrib.getValue());
1650 public Attribute getAttribute(String name) {
1651 if (null != uninserted) {
1652 throw new IllegalStateException("This element has not been added.");
1655 if (paranoidConsistencyChecking) {
1659 // skip past the name portion
1660 int current = loc.startTag.start + 1;
1662 while (current <= loc.startTag.end) {
1663 char inTagName = getDocument().docContent.charAt(current);
1665 if (Character.isWhitespace(inTagName) || ('/' == inTagName) || ('>' == inTagName)) {
1672 // find the attribute matching this name
1673 tagRange attr = getAttributeLoc(name, new charRange(current, loc.startTag.end));
1675 if (!attr.isValid()) {
1680 return new Attribute(this, getDocument().docContent.substring(attr.startTag.start, attr.startTag.end + 1)
1682 getDocument().docContent.substring(attr.body.start, attr.body.end + 1));
1685 protected boolean checkConsistency() {
1686 assert loc.isValid();
1688 charRange elementRange = new charRange(loc.startTag.start, loc.endTag.end);
1690 assert elementRange.contains(loc.startTag);
1691 assert elementRange.contains(loc.body);
1692 assert elementRange.contains(loc.endTag);
1694 if (null != children) {
1695 Iterator<LiteXMLElement> eachChild = children.iterator();
1696 Iterator<LiteXMLElement> nextChilds = children.iterator();
1698 if (nextChilds.hasNext()) {
1702 while (eachChild.hasNext()) {
1703 LiteXMLElement aChild = eachChild.next();
1705 assert loc.contains(aChild.loc);
1707 if (nextChilds.hasNext()) {
1708 LiteXMLElement nextChild = nextChilds.next();
1710 assert aChild.loc.compareTo(nextChild.loc) < 0;
1712 assert !eachChild.hasNext();
1715 aChild.checkConsistency();
1722 * The document we are a part of.
1724 * @return The document we are a part of.
1726 LiteXMLDocument getDocument() {