/*
* Parser.cs
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 3
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307, USA.
*
* Copyright (c) 2003-2009 Per Cederberg. All rights reserved.
*/
using System;
using System.Collections;
using System.IO;
using System.Text;
namespace PerCederberg.Grammatica.Runtime {
/**
* A base parser class. This class provides the standard parser
* interface, as well as token handling.
*
* @author Per Cederberg, <per at percederberg dot net>
* @version 1.5
*/
public abstract class Parser {
/**
* The parser initialization flag.
*/
private bool initialized = false;
/**
* The tokenizer to use.
*/
private Tokenizer tokenizer;
/**
* The analyzer to use for callbacks.
*/
private Analyzer analyzer;
/**
* The list of production patterns.
*/
private ArrayList patterns = new ArrayList();
/**
* The map with production patterns and their id:s. This map
* contains the production patterns indexed by their id:s.
*/
private Hashtable patternIds = new Hashtable();
/**
* The list of buffered tokens. This list will contain tokens that
* have been read from the tokenizer, but not yet consumed.
*/
private ArrayList tokens = new ArrayList();
/**
* The error log. All parse errors will be added to this log as
* the parser attempts to recover from the error. If the error
* count is higher than zero (0), this log will be thrown as the
* result from the parse() method.
*/
private ParserLogException errorLog = new ParserLogException();
/**
* The error recovery counter. This counter is initially set to a
* negative value to indicate that no error requiring recovery
* has been encountered. When a parse error is found, the counter
* is set to three (3), and is then decreased by one for each
* correctly read token until it reaches zero (0).
*/
private int errorRecovery = -1;
/**
* Creates a new parser.
*
* @param input the input stream to read from
*
* @throws ParserCreationException if the tokenizer couldn't be
* initialized correctly
*
* @since 1.5
*/
internal Parser(TextReader input) : this(input, null) {
}
/**
* Creates a new parser.
*
* @param input the input stream to read from
* @param analyzer the analyzer callback to use
*
* @throws ParserCreationException if the tokenizer couldn't be
* initialized correctly
*
* @since 1.5
*/
internal Parser(TextReader input, Analyzer analyzer) {
this.tokenizer = NewTokenizer(input);
this.analyzer = (analyzer == null) ? NewAnalyzer() : analyzer;
}
/**
* Creates a new parser.
*
* @param tokenizer the tokenizer to use
*/
internal Parser(Tokenizer tokenizer) : this(tokenizer, null) {
}
/**
* Creates a new parser.
*
* @param tokenizer the tokenizer to use
* @param analyzer the analyzer callback to use
*/
internal Parser(Tokenizer tokenizer, Analyzer analyzer) {
this.tokenizer = tokenizer;
this.analyzer = (analyzer == null) ? NewAnalyzer() : analyzer;
}
/**
* Creates a new tokenizer for this parser. Can be overridden by
* a subclass to provide a custom implementation.
*
* @param in the input stream to read from
*
* @return the tokenizer created
*
* @throws ParserCreationException if the tokenizer couldn't be
* initialized correctly
*
* @since 1.5
*/
protected virtual Tokenizer NewTokenizer(TextReader input) {
// TODO: This method should really be abstract, but it isn't in this
// version due to backwards compatibility requirements.
return new Tokenizer(input);
}
/**
* Creates a new analyzer for this parser. Can be overridden by a
* subclass to provide a custom implementation.
*
* @return the analyzer created
*
* @since 1.5
*/
protected virtual Analyzer NewAnalyzer() {
// TODO: This method should really be abstract, but it isn't in this
// version due to backwards compatibility requirements.
return new Analyzer();
}
/**
* The tokenizer property (read-only). This property contains
* the tokenizer in use by this parser.
*
* @since 1.5
*/
public Tokenizer Tokenizer {
get {
return tokenizer;
}
}
/**
* The analyzer property (read-only). This property contains
* the analyzer in use by this parser.
*
* @since 1.5
*/
public Analyzer Analyzer {
get {
return analyzer;
}
}
/**
* Returns the tokenizer in use by this parser.
*
* @return the tokenizer in use by this parser
*
* @since 1.4
*
* @see #Tokenizer
*
* @deprecated Use the Tokenizer property instead.
*/
public Tokenizer GetTokenizer() {
return Tokenizer;
}
/**
* Returns the analyzer in use by this parser.
*
* @return the analyzer in use by this parser
*
* @since 1.4
*
* @see #Analyzer
*
* @deprecated Use the Analyzer property instead.
*/
public Analyzer GetAnalyzer() {
return Analyzer;
}
/**
* Sets the parser initialized flag. Normally this flag is set by
* the prepare() method, but this method allows further
* modifications to it.
*
* @param initialized the new initialized flag
*/
internal void SetInitialized(bool initialized) {
this.initialized = initialized;
}
/**
* Adds a new production pattern to the parser. The first pattern
* added is assumed to be the starting point in the grammar. The
* patterns added may be validated to some extent.
*
* @param pattern the pattern to add
*
* @throws ParserCreationException if the pattern couldn't be
* added correctly to the parser
*/
public virtual void AddPattern(ProductionPattern pattern) {
if (pattern.Count <= 0) {
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_PRODUCTION,
pattern.Name,
"no production alternatives are present (must have at " +
"least one)");
}
if (patternIds.ContainsKey(pattern.Id)) {
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_PRODUCTION,
pattern.Name,
"another pattern with the same id (" + pattern.Id +
") has already been added");
}
patterns.Add(pattern);
patternIds.Add(pattern.Id, pattern);
SetInitialized(false);
}
/**
* Initializes the parser. All the added production patterns will
* be analyzed for ambiguities and errors. This method also
* initializes internal data structures used during the parsing.
*
* @throws ParserCreationException if the parser couldn't be
* initialized correctly
*/
public virtual void Prepare() {
if (patterns.Count <= 0) {
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_PARSER,
"no production patterns have been added");
}
for (int i = 0; i < patterns.Count; i++) {
CheckPattern((ProductionPattern) patterns[i]);
}
SetInitialized(true);
}
/**
* Checks a production pattern for completeness. If some rule
* in the pattern referenced an production pattern not added
* to this parser, a parser creation exception will be thrown.
*
* @param pattern the production pattern to check
*
* @throws ParserCreationException if the pattern referenced a
* pattern not added to this parser
*/
private void CheckPattern(ProductionPattern pattern) {
for (int i = 0; i < pattern.Count; i++) {
CheckAlternative(pattern.Name, pattern[i]);
}
}
/**
* Checks a production pattern alternative for completeness.
* If some element in the alternative referenced a production
* pattern not added to this parser, a parser creation
* exception will be thrown.
*
* @param name the name of the pattern being checked
* @param alt the production pattern alternative
*
* @throws ParserCreationException if the alternative
* referenced a pattern not added to this parser
*/
private void CheckAlternative(string name,
ProductionPatternAlternative alt) {
for (int i = 0; i < alt.Count; i++) {
CheckElement(name, alt[i]);
}
}
/**
* Checks a production pattern element for completeness. If
* the element references a production pattern not added to
* this parser, a parser creation exception will be thrown.
*
* @param name the name of the pattern being checked
* @param elem the production pattern element to check
*
* @throws ParserCreationException if the element referenced a
* pattern not added to this parser
*/
private void CheckElement(string name,
ProductionPatternElement elem) {
if (elem.IsProduction() && GetPattern(elem.Id) == null) {
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_PRODUCTION,
name,
"an undefined production pattern id (" + elem.Id +
") is referenced");
}
}
/**
* Resets this parser for usage with another input stream. The
* associated tokenizer and analyzer will also be reset. This
* method will clear all the internal state and the error log in
* the parser. It is normally called in order to reuse a parser
* and tokenizer pair with multiple input streams, thereby
* avoiding the cost of re-analyzing the grammar structures.
*
* @param input the new input stream to read
*
* @see Tokenizer#Reset
* @see Analyzer#Reset
*
* @since 1.5
*/
public void Reset(TextReader input) {
this.tokenizer.Reset(input);
this.analyzer.Reset();
}
/**
* Parses the token stream and returns a parse tree. This
* method will call Prepare() if not previously called. It
* will also call the Reset() method, to make sure that only
* the Tokenizer.Reset() method must be explicitly called in
* order to reuse a parser for multiple input streams. In case
* of a parse error, the parser will attempt to recover and
* throw all the errors found in a parser log exception in the
* end.
*
* @return the parse tree
*
* @throws ParserCreationException if the parser couldn't be
* initialized correctly
* @throws ParserLogException if the input couldn't be parsed
* correctly
*
* @see #Prepare
* @see #Reset
* @see Tokenizer#Reset
*/
public Node Parse() {
Node root = null;
// Initialize parser
if (!initialized) {
Prepare();
}
this.tokens.Clear();
this.errorLog = new ParserLogException();
this.errorRecovery = -1;
// Parse input
try {
root = ParseStart();
} catch (ParseException e) {
AddError(e, true);
}
// Check for errors
if (errorLog.Count > 0) {
throw errorLog;
}
return root;
}
/**
* Parses the token stream and returns a parse tree.
*
* @return the parse tree
*
* @throws ParseException if the input couldn't be parsed
* correctly
*/
protected abstract Node ParseStart();
/**
* Factory method to create a new production node. This method
* can be overridden to provide other production implementations
* than the default one.
*
* @param pattern the production pattern
*
* @return the new production node
*
* @since 1.5
*/
protected virtual Production NewProduction(ProductionPattern pattern) {
return analyzer.NewProduction(pattern);
}
/**
* Adds an error to the error log. If the parser is in error
* recovery mode, the error will not be added to the log. If the
* recovery flag is set, this method will set the error recovery
* counter thus enter error recovery mode. Only lexical or
* syntactical errors require recovery, so this flag shouldn't be
* set otherwise.
*
* @param e the error to add
* @param recovery the recover flag
*/
internal void AddError(ParseException e, bool recovery) {
if (errorRecovery <= 0) {
errorLog.AddError(e);
}
if (recovery) {
errorRecovery = 3;
}
}
/**
* Returns the production pattern with the specified id.
*
* @param id the production pattern id
*
* @return the production pattern found, or
* null if non-existent
*/
internal ProductionPattern GetPattern(int id) {
return (ProductionPattern) patternIds[id];
}
/**
* Returns the production pattern for the starting production.
*
* @return the start production pattern, or
* null if no patterns have been added
*/
internal ProductionPattern GetStartPattern() {
if (patterns.Count <= 0) {
return null;
} else {
return (ProductionPattern) patterns[0];
}
}
/**
* Returns the ordered set of production patterns.
*
* @return the ordered set of production patterns
*/
internal ICollection GetPatterns() {
return patterns;
}
/**
* Handles the parser entering a production. This method calls the
* appropriate analyzer callback if the node is not hidden. Note
* that this method will not call any callback if an error
* requiring recovery has ocurred.
*
* @param node the parse tree node
*/
internal void EnterNode(Node node) {
if (!node.IsHidden() && errorRecovery < 0) {
try {
analyzer.Enter(node);
} catch (ParseException e) {
AddError(e, false);
}
}
}
/**
* Handles the parser leaving a production. This method calls the
* appropriate analyzer callback if the node is not hidden, and
* returns the result. Note that this method will not call any
* callback if an error requiring recovery has ocurred.
*
* @param node the parse tree node
*
* @return the parse tree node, or
* null if no parse tree should be created
*/
internal Node ExitNode(Node node) {
if (!node.IsHidden() && errorRecovery < 0) {
try {
return analyzer.Exit(node);
} catch (ParseException e) {
AddError(e, false);
}
}
return node;
}
/**
* Handles the parser adding a child node to a production. This
* method calls the appropriate analyzer callback. Note that this
* method will not call any callback if an error requiring
* recovery has ocurred.
*
* @param node the parent parse tree node
* @param child the child parse tree node, or null
*/
internal void AddNode(Production node, Node child) {
if (errorRecovery >= 0) {
// Do nothing
} else if (node.IsHidden()) {
node.AddChild(child);
} else if (child != null && child.IsHidden()) {
for (int i = 0; i < child.Count; i++) {
AddNode(node, child[i]);
}
} else {
try {
analyzer.Child(node, child);
} catch (ParseException e) {
AddError(e, false);
}
}
}
/**
* Reads and consumes the next token in the queue. If no token
* was available for consumation, a parse error will be
* thrown.
*
* @return the token consumed
*
* @throws ParseException if the input stream couldn't be read or
* parsed correctly
*/
internal Token NextToken() {
Token token = PeekToken(0);
if (token != null) {
tokens.RemoveAt(0);
return token;
} else {
throw new ParseException(
ParseException.ErrorType.UNEXPECTED_EOF,
null,
tokenizer.GetCurrentLine(),
tokenizer.GetCurrentColumn());
}
}
/**
* Reads and consumes the next token in the queue. If no token was
* available for consumation, a parse error will be thrown. A
* parse error will also be thrown if the token id didn't match
* the specified one.
*
* @param id the expected token id
*
* @return the token consumed
*
* @throws ParseException if the input stream couldn't be parsed
* correctly, or if the token wasn't expected
*/
internal Token NextToken(int id) {
Token token = NextToken();
ArrayList list;
if (token.Id == id) {
if (errorRecovery > 0) {
errorRecovery--;
}
return token;
} else {
list = new ArrayList(1);
list.Add(tokenizer.GetPatternDescription(id));
throw new ParseException(
ParseException.ErrorType.UNEXPECTED_TOKEN,
token.ToShortString(),
list,
token.StartLine,
token.StartColumn);
}
}
/**
* Returns a token from the queue. This method is used to check
* coming tokens before they have been consumed. Any number of
* tokens forward can be checked.
*
* @param steps the token queue number, zero (0) for first
*
* @return the token in the queue, or
* null if no more tokens in the queue
*/
internal Token PeekToken(int steps) {
Token token;
while (steps >= tokens.Count) {
try {
token = tokenizer.Next();
if (token == null) {
return null;
} else {
tokens.Add(token);
}
} catch (ParseException e) {
AddError(e, true);
}
}
return (Token) tokens[steps];
}
/**
* Returns a string representation of this parser. The string will
* contain all the production definitions and various additional
* information.
*
* @return a detailed string representation of this parser
*/
public override string ToString() {
StringBuilder buffer = new StringBuilder();
for (int i = 0; i < patterns.Count; i++) {
buffer.Append(ToString((ProductionPattern) patterns[i]));
buffer.Append("\n");
}
return buffer.ToString();
}
/**
* Returns a string representation of a production pattern.
*
* @param prod the production pattern
*
* @return a detailed string representation of the pattern
*/
private string ToString(ProductionPattern prod) {
StringBuilder buffer = new StringBuilder();
StringBuilder indent = new StringBuilder();
LookAheadSet set;
int i;
buffer.Append(prod.Name);
buffer.Append(" (");
buffer.Append(prod.Id);
buffer.Append(") ");
for (i = 0; i < buffer.Length; i++) {
indent.Append(" ");
}
buffer.Append("= ");
indent.Append("| ");
for (i = 0; i < prod.Count; i++) {
if (i > 0) {
buffer.Append(indent);
}
buffer.Append(ToString(prod[i]));
buffer.Append("\n");
}
for (i = 0; i < prod.Count; i++) {
set = prod[i].LookAhead;
if (set.GetMaxLength() > 1) {
buffer.Append("Using ");
buffer.Append(set.GetMaxLength());
buffer.Append(" token look-ahead for alternative ");
buffer.Append(i + 1);
buffer.Append(": ");
buffer.Append(set.ToString(tokenizer));
buffer.Append("\n");
}
}
return buffer.ToString();
}
/**
* Returns a string representation of a production pattern
* alternative.
*
* @param alt the production pattern alternative
*
* @return a detailed string representation of the alternative
*/
private string ToString(ProductionPatternAlternative alt) {
StringBuilder buffer = new StringBuilder();
for (int i = 0; i < alt.Count; i++) {
if (i > 0) {
buffer.Append(" ");
}
buffer.Append(ToString(alt[i]));
}
return buffer.ToString();
}
/**
* Returns a string representation of a production pattern
* element.
*
* @param elem the production pattern element
*
* @return a detailed string representation of the element
*/
private string ToString(ProductionPatternElement elem) {
StringBuilder buffer = new StringBuilder();
int min = elem.MinCount;
int max = elem.MaxCount;
if (min == 0 && max == 1) {
buffer.Append("[");
}
if (elem.IsToken()) {
buffer.Append(GetTokenDescription(elem.Id));
} else {
buffer.Append(GetPattern(elem.Id).Name);
}
if (min == 0 && max == 1) {
buffer.Append("]");
} else if (min == 0 && max == Int32.MaxValue) {
buffer.Append("*");
} else if (min == 1 && max == Int32.MaxValue) {
buffer.Append("+");
} else if (min != 1 || max != 1) {
buffer.Append("{");
buffer.Append(min);
buffer.Append(",");
buffer.Append(max);
buffer.Append("}");
}
return buffer.ToString();
}
/**
* Returns a token description for a specified token.
*
* @param token the token to describe
*
* @return the token description
*/
internal string GetTokenDescription(int token) {
if (tokenizer == null) {
return "";
} else {
return tokenizer.GetPatternDescription(token);
}
}
}
}