/*
 * CharacterSetElement.cs
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 3
 * of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free
 * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 * MA 02111-1307, USA.
 *
 * Copyright (c) 2003-2009 Per Cederberg. All rights reserved.
 */

using System;
using System.Collections;
using System.IO;
using System.Text;

using PerCederberg.Grammatica.Runtime;

namespace PerCederberg.Grammatica.Runtime.RE {

    /**
     * A regular expression character set element. This element
     * matches a single character inside (or outside) a character set.
     * The character set is user defined and may contain ranges of
     * characters. The set may also be inverted, meaning that only
     * characters not inside the set will be considered to match.
     *
     * @author   Per Cederberg, <per at percederberg dot net>
     * @version  1.5
     */
    internal class CharacterSetElement : Element {

        /**
         * The dot ('.') character set. This element matches a single
         * character that is not equal to a newline character.
         */
        public static CharacterSetElement DOT =
            new CharacterSetElement(false);

        /**
         * The digit character set. This element matches a single
         * numeric character.
         */
        public static CharacterSetElement DIGIT =
            new CharacterSetElement(false);

        /**
         * The non-digit character set. This element matches a single
         * non-numeric character.
         */
        public static CharacterSetElement NON_DIGIT =
            new CharacterSetElement(true);

        /**
         * The whitespace character set. This element matches a single
         * whitespace character.
         */
        public static CharacterSetElement WHITESPACE =
            new CharacterSetElement(false);

        /**
         * The non-whitespace character set. This element matches a
         * single non-whitespace character.
         */
        public static CharacterSetElement NON_WHITESPACE =
            new CharacterSetElement(true);

        /**
         * The word character set. This element matches a single word
         * character.
         */
        public static CharacterSetElement WORD =
            new CharacterSetElement(false);

        /**
         * The non-word character set. This element matches a single
         * non-word character.
         */
        public static CharacterSetElement NON_WORD =
            new CharacterSetElement(true);

        /**
         * The inverted character set flag.
         */
        private bool inverted;

        /**
         * The character set content. This array may contain either
         * range objects or Character objects.
         */
        private ArrayList contents = new ArrayList();

        /**
         * Creates a new character set element. If the inverted character
         * set flag is set, only characters NOT in the set will match.
         *
         * @param inverted       the inverted character set flag
         */
        public CharacterSetElement(bool inverted) {
            this.inverted = inverted;
        }

        /**
         * Adds a single character to this character set.
         *
         * @param c              the character to add
         */
        public void AddCharacter(char c) {
            contents.Add(c);
        }

        /**
         * Adds multiple characters to this character set.
         *
         * @param str            the string with characters to add
         */
        public void AddCharacters(string str) {
            for (int i = 0; i < str.Length; i++) {
                AddCharacter(str[i]);
            }
        }

        /**
         * Adds multiple characters to this character set.
         *
         * @param elem           the string element with characters to add
         */
        public void AddCharacters(StringElement elem) {
            AddCharacters(elem.GetString());
        }

        /**
         * Adds a character range to this character set.
         *
         * @param min            the minimum character value
         * @param max            the maximum character value
         */
        public void AddRange(char min, char max) {
            contents.Add(new Range(min, max));
        }

        /**
         * Adds a character subset to this character set.
         *
         * @param elem           the character set to add
         */
        public void AddCharacterSet(CharacterSetElement elem) {
            contents.Add(elem);
        }

        /**
         * Returns this element as the character set shouldn't be
         * modified after creation. This partially breaks the contract
         * of clone(), but as new characters are not added to the
         * character set after creation, this will work correctly.
         *
         * @return this character set element
         */
        public override object Clone() {
            return this;
        }

        /**
         * Returns the length of a matching string starting at the
         * specified position. The number of matches to skip can also be
         * specified, but numbers higher than zero (0) cause a failed
         * match for any element that doesn't attempt to combine other
         * elements.
         *
         * @param m              the matcher being used
         * @param buffer         the input character buffer to match
         * @param start          the starting position
         * @param skip           the number of matches to skip
         *
         * @return the length of the matching string, or
         *         -1 if no match was found
         *
         * @throws IOException if an I/O error occurred
         */
        public override int Match(Matcher m,
                                  ReaderBuffer buffer,
                                  int start,
                                  int skip) {

            int  c;

            if (skip != 0) {
                return -1;
            }
            c = buffer.Peek(start);
            if (c < 0) {
                m.SetReadEndOfString();
                return -1;
            }
            if (m.IsCaseInsensitive()) {
                c = (int) Char.ToLower((char) c);
            }
            return InSet((char) c) ? 1 : -1;
        }

        /**
         * Checks if the specified character matches this character
         * set. This method takes the inverted flag into account.
         *
         * @param c               the character to check
         *
         * @return true if the character matches, or
         *         false otherwise
         */
        private bool InSet(char c) {
            if (this == DOT) {
                return InDotSet(c);
            } else if (this == DIGIT || this == NON_DIGIT) {
                return InDigitSet(c) != inverted;
            } else if (this == WHITESPACE || this == NON_WHITESPACE) {
                return InWhitespaceSet(c) != inverted;
            } else if (this == WORD || this == NON_WORD) {
                return InWordSet(c) != inverted;
            } else {
                return InUserSet(c) != inverted;
            }
        }

        /**
         * Checks if the specified character is present in the 'dot'
         * set. This method does not consider the inverted flag.
         *
         * @param c               the character to check
         *
         * @return true if the character is present, or
         *         false otherwise
         */
        private bool InDotSet(char c) {
            switch (c) {
            case '\n':
            case '\r':
            case '\u0085':
            case '\u2028':
            case '\u2029':
                return false;
            default:
                return true;
            }
        }

        /**
         * Checks if the specified character is a digit. This method
         * does not consider the inverted flag.
         *
         * @param c               the character to check
         *
         * @return true if the character is a digit, or
         *         false otherwise
         */
        private bool InDigitSet(char c) {
            return '0' <= c && c <= '9';
        }

        /**
         * Checks if the specified character is a whitespace
         * character. This method does not consider the inverted flag.
         *
         * @param c               the character to check
         *
         * @return true if the character is a whitespace character, or
         *         false otherwise
         */
        private bool InWhitespaceSet(char c) {
            switch (c) {
            case ' ':
            case '\t':
            case '\n':
            case '\f':
            case '\r':
            case (char) 11:
                return true;
            default:
                return false;
            }
        }

        /**
         * Checks if the specified character is a word character. This
         * method does not consider the inverted flag.
         *
         * @param c               the character to check
         *
         * @return true if the character is a word character, or
         *         false otherwise
         */
        private bool InWordSet(char c) {
            return ('a' <= c && c <= 'z')
                || ('A' <= c && c <= 'Z')
                || ('0' <= c && c <= '9')
                || c == '_';
        }

        /**
         * Checks if the specified character is present in the user-
         * defined set. This method does not consider the inverted
         * flag.
         *
         * @param value           the character to check
         *
         * @return true if the character is present, or
         *         false otherwise
         */
        private bool InUserSet(char value) {
            object               obj;
            char                 c;
            Range                r;
            CharacterSetElement  e;

            for (int i = 0; i < contents.Count; i++) {
                obj = contents[i];
                if (obj is char) {
                    c = (char) obj;
                    if (c == value) {
                        return true;
                    }
                } else if (obj is Range) {
                    r = (Range) obj;
                    if (r.Inside(value)) {
                        return true;
                    }
                } else if (obj is CharacterSetElement) {
                    e = (CharacterSetElement) obj;
                    if (e.InSet(value)) {
                        return true;
                    }
                }
            }
            return false;
        }

        /**
         * Prints this element to the specified output stream.
         *
         * @param output         the output stream to use
         * @param indent         the current indentation
         */
        public override void PrintTo(TextWriter output, string indent) {
            output.WriteLine(indent + ToString());
        }

        /**
         * Returns a string description of this character set.
         *
         * @return a string description of this character set
         */
        public override string ToString() {
            StringBuilder  buffer;

            // Handle predefined character sets
            if (this == DOT) {
                return ".";
            } else if (this == DIGIT) {
                return "\\d";
            } else if (this == NON_DIGIT) {
                return "\\D";
            } else if (this == WHITESPACE) {
                return "\\s";
            } else if (this == NON_WHITESPACE) {
                return "\\S";
            } else if (this == WORD) {
                return "\\w";
            } else if (this == NON_WORD) {
                return "\\W";
            }

            // Handle user-defined character sets
            buffer = new StringBuilder();
            if (inverted) {
                buffer.Append("^[");
            } else {
                buffer.Append("[");
            }
            for (int i = 0; i < contents.Count; i++) {
                buffer.Append(contents[i]);
            }
            buffer.Append("]");

            return buffer.ToString();
        }


        /**
         * A character range class.
         */
        private class Range {

            /**
             * The minimum character value.
             */
            private char min;

            /**
             * The maximum character value.
             */
            private char max;

            /**
             * Creates a new character range.
             *
             * @param min        the minimum character value
             * @param max        the maximum character value
             */
            public Range(char min, char max) {
                this.min = min;
                this.max = max;
            }

            /**
             * Checks if the specified character is inside the range.
             *
             * @param c          the character to check
             *
             * @return true if the character is in the range, or
             *         false otherwise
             */
            public bool Inside(char c) {
                return min <= c && c <= max;
            }

            /**
             * Returns a string representation of this object.
             *
             * @return a string representation of this object
             */
            public override string ToString() {
                return min + "-" + max;
            }
        }
    }
}