package com.srbenoit.xml;

import java.text.ParseException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.List;
import com.srbenoit.log.LoggedObject;

/**
 * A very basic parser that can extract structural information from XML and can detect some XML
 * syntax problems.
 */
public class XmlParser extends LoggedObject {

    /** the list of tag spans */
    private final transient List<TagSpan> tags;

    /** a stack of open tags (must be matched by close tags in proper order) */
    private final transient Deque<NonemptyElement> openTags;

    /**
     * Constructs a new <code>XmlParser</code>.
     */
    public XmlParser() {

        super();

        this.tags = new ArrayList<TagSpan>(20);
        this.openTags = new ArrayDeque<NonemptyElement>(20);
    }

    /**
     * Parses some XML into an object tree.
     *
     * @param   xml          the XML to parse
     * @param   ignoreCData  <code>true</code> to ignore any CData in the XML, resulting in a node
     *                       tree containing only <code>EmptyElement</code> and <code>
     *                       NonemptyElement</code> nodes.
     * @return  the list of top-level nodes, of which any <code>Element</code> nodes can contain
     *          child nodes
     * @throws  ParseException  if an error is detected in the XML
     */
    public List<Node> parse(final String xml, final boolean ignoreCData) throws ParseException {

        List<Node> nodes;
        int len;
        int pos;
        int index;
        TagSpan span;
        String tag;

        // Construct the list of tags (open, close, and empty) in the XML
        this.tags.clear();
        buildSpansList(xml);

        nodes = new ArrayList<Node>(20);

        // Build the set of nodes from the list of spans
        len = xml.length();
        pos = 0;
        index = 0;

        while ((pos < len) && (index < this.tags.size())) {

            // Get the next tag
            span = this.tags.get(index);
            index++;

            // If there are characters before the tag, add a CData node
            if (pos < span.start) {

                if (!ignoreCData) {
                    makeCdata(pos, span.start, nodes);
                }

                pos = span.start;
            }

            if (span.isOpen) {

                if (span.isClose) {
                    makeEmpty(xml, span, nodes);
                } else {
                    makeNonempty(xml, span, nodes);
                }
            } else {

                if (this.openTags.isEmpty()) {
                    throw new ParseException("Unmatched close tag: '" + getTag(xml, span) + "'",
                        span.start);
                }

                // This is a non-empty element's close tag
                tag = getTag(xml, span);

                if (!this.openTags.pop().tagName.equals(tag)) {
                    throw new ParseException("Mismatched close tag: '" + tag + "'", span.start);
                }
            }

            pos = span.end + 1;
        }

        if ((pos < len) && (!ignoreCData)) {
            makeCdata(pos, len, nodes);
        }

        return nodes;
    }

    /**
     * Builds a CDATA element and adds it to the node list.
     *
     * @param  start  the offset of the start of the data
     * @param  end    the offset of the end of the data
     * @param  nodes  the list of nodes to which to add the element
     */
    private void makeCdata(final int start, final int end, final List<Node> nodes) {

        CData cdata;

        cdata = new CData(start, end);

        if (this.openTags.isEmpty()) {
            nodes.add(cdata);
        } else {
            this.openTags.getFirst().children.add(cdata);
        }
    }

    /**
     * Builds an empty element and adds it to the node list.
     *
     * @param   xml    the source XML
     * @param   span   the tag span describing the empty element
     * @param   nodes  the list of nodes to which to add the element
     * @throws  ParseException  if an error is detected in the XML
     */
    private void makeEmpty(final String xml, final TagSpan span, final List<Node> nodes)
        throws ParseException {

        String tag;
        EmptyElement empty;

        tag = getTag(xml, span);
        empty = new EmptyElement(tag, span);
        extractAttribtues(xml, span, empty);

        if (this.openTags.isEmpty()) {
            nodes.add(empty);
        } else {
            this.openTags.getFirst().children.add(empty);
        }
    }

    /**
     * Builds a nonempty element and adds it to the node list.
     *
     * @param   xml    the source XML
     * @param   span   the tag span describing the empty element
     * @param   nodes  the list of nodes to which to add the element
     * @throws  ParseException  if an error is detected in the XML
     */
    private void makeNonempty(final String xml, final TagSpan span, final List<Node> nodes)
        throws ParseException {

        String tag;
        NonemptyElement nonempty;

        // This is a non-empty element's open tag
        tag = getTag(xml, span);
        nonempty = new NonemptyElement(tag, span);
        extractAttribtues(xml, span, nonempty);

        if (this.openTags.isEmpty()) {
            nodes.add(nonempty);
        } else {
            this.openTags.getFirst().children.add(nonempty);
        }

        this.openTags.push(nonempty);
    }

    /**
     * Scans through the XML looking for '&lt;' and '&gt;' characters and assembles them into an
     * ordered list of spans.
     *
     * @param   xml  the XML to parse
     * @throws  ParseException  if there is an invalid or unterminated tag
     */
    private void buildSpansList(final String xml) throws ParseException {

        int start;
        int end;

        // Scan through the XML accumulating a list of '<', '>', '</', '/>'
        // and assembling them into tag spans
        start = xml.indexOf('<');

        while (start >= 0) {

            end = xml.indexOf('>', start + 1);

            if (end == -1) {
                throw new ParseException("Unterminated tag (missing '>')", start);
            }

            // Detect pathological "<>" and "</>" cases.
            if (end == (start + 1)) {
                throw new ParseException("Invalid tag (<>)", start);
            }

            if ((end == (start + 2)) && (xml.charAt(start + 1) == '/')) {
                throw new ParseException("Invalid tag (</>)", start);
            }

            validateTag(xml, start, end);

            start = xml.indexOf('<', end + 1);
        }
    }

    /**
     * Examine a tag delimited by a start and end position, and add it to the list of tags.
     *
     * @param   xml    the source XML
     * @param   start  the start position of the tag
     * @param   end    the end position of the tag
     * @throws  ParseException  if there is an invalid or unterminated tag
     */
    private void validateTag(final String xml, final int start, final int end)
        throws ParseException {

        TagSpan span;
        char ch1;
        char ch2;
        int nameStart;
        int nameEnd;

        ch1 = xml.charAt(start + 1);
        ch2 = xml.charAt(end - 1);

        // Start of tag name is first non-whitespace after tag start
        nameStart = scanWhitespace(xml, (ch1 == '/') ? (start + 2) : (start + 1), end, false);

        // Test for no tag name, as in "<  >" or "<  />"
        if ((nameStart == end) || ((nameStart == (end - 1)) && (ch2 == '/'))) {
            throw new ParseException("Missing tag name", start);
        }

        // End of tag name is first whitespace after tag name start,
        // excepting the possible '/' at the end of an empty tag
        nameEnd = scanWhitespace(xml, nameStart + 1, end, true);

        if ((nameEnd == end) && (ch2 == '/')) {
            nameEnd--;
        }

        span = new TagSpan(start, end, (ch1 != '/'), ((ch1 == '/') || (ch2 == '/')), nameStart,
                nameEnd);
        this.tags.add(span);
    }

    /**
     * Scans the source XML for the next character that is either whitespace or not whitespace,
     * starting at a specified starting index and limiting the search to a specified ending index.
     *
     * @param   xml           the source XML
     * @param   start         the index at which to begin searching
     * @param   end           the index at which to end searching
     * @param   isWhitespace  <code>true</code> to search for the next whitespace, <code>
     *                        false</code> to search for the next non-whitespace
     * @return  the index of the first matching character in the designated range, or the end index
     *          if no non-whitespace was found
     */
    private int scanWhitespace(final String xml, final int start, final int end,
        final boolean isWhitespace) {

        int pos;

        for (pos = start; pos < end; pos++) {

            if (Character.isWhitespace(xml.charAt(pos)) == isWhitespace) {
                break;
            }
        }

        return pos;
    }

    /**
     * Returns the tag name from a tag span.
     *
     * @param   xml   the source XML
     * @param   span  the tag span
     * @return  the extracted tag name
     */
    private String getTag(final String xml, final TagSpan span) {

        String raw;
        String name;

        raw = xml.substring(span.nameStart, span.nameEnd);
        name = XmlEscaper.unescape(raw);

        return name;
    }

    /**
     * Extracts a list of attributes from a tag.
     *
     * @param   xml      the source XML
     * @param   span     the tag span defining the tag from which to extract attributes
     * @param   element  the element to which to add extracted attributes
     * @throws  ParseException  if there is an error parsing attributes
     */
    private void extractAttribtues(final String xml, final TagSpan span, final ElementBase element)
        throws ParseException {

        char chr;
        int end;
        int pos;

        // Figure out where tag content ends
        end = (xml.charAt(span.end - 1) == '/') ? (span.end - 1) : span.end;

        // Scan for attribute name='value' pairs
        pos = span.nameEnd;

        while (pos < end) {
            chr = xml.charAt(pos);

            // Skip whitespace before attribute name
            if (Character.isWhitespace(chr)) {
                pos++;
            } else if (chr == '=') {

                // Leading '=' not valid as attribute name
                throw new ParseException("Missing attribute name", pos);
            } else {
                pos = processAttribute(xml, pos, end, element);
            }
        }
    }

    /**
     * Extracts a single attribute from the XML stream.
     *
     * @param   xml      the source XMl
     * @param   start    the start position of the attribute
     * @param   end      the end of the current tag
     * @param   element  the element to which to add the attribute
     * @return  the position immediately following the end of the attribute
     * @throws  ParseException  if there is an error parsing the attribute
     */
    private int processAttribute(final String xml, final int start, final int end,
        final ElementBase element) throws ParseException {

        int pos;
        char chr;
        int attrEnd;
        int equals;
        int valueEnd;
        char match;

        // Find the end of the attribute name and the equals sign
        for (attrEnd = start; attrEnd < end; attrEnd++) {
            chr = xml.charAt(attrEnd);

            if (Character.isWhitespace(chr) || (chr == '=')) {
                break;
            }
        }

        equals = scanWhitespace(xml, attrEnd, end, false);

        if ((equals == end) || (xml.charAt(equals) != '=')) {
            throw new ParseException("Invalid attribute", start);
        }

        pos = scanWhitespace(xml, equals + 1, end, false);

        // Attribute must be wrapped in ' or "
        match = xml.charAt(pos);

        if ("'\"".indexOf(match) == -1) {
            throw new ParseException("Invalid attribute value", pos);
        }

        valueEnd = xml.indexOf(match, pos + 1);

        if ((valueEnd == -1) || (valueEnd >= end)) {
            throw new ParseException("Unterminated attribute value", pos);
        }

        addAttr(xml, start, attrEnd, pos + 1, valueEnd, element);

        return valueEnd + 1;
    }

    /**
     * Decodes an attribute name and value and adds the attribute to an element.
     *
     * @param  xml         the source XML
     * @param  attrStart   the index of the start of the attribute name
     * @param  attrEnd     the index of the end of the attribute name
     * @param  valueStart  the index of the start of the attribute value
     * @param  valueEnd    the index of the end of the attribute value
     * @param  element     the element to which to add the attribute
     */
    private void addAttr(final String xml, final int attrStart, final int attrEnd,
        final int valueStart, final int valueEnd, final ElementBase element) {

        String raw;
        String name;
        String value;

        raw = xml.substring(attrStart, attrEnd).trim();
        name = XmlEscaper.unescape(raw);

        raw = xml.substring(valueStart, valueEnd).trim();
        value = XmlEscaper.unescape(raw);

        element.put(name, value);
    }
}
