This is the twenty second part of the SQLxD series. For your convenience you can find other parts in the table of contents in Part 1 – XML Transformation

For parsing queries we use IronPython. Let’s begin with C# code for executing PLY engine:

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reflection;
using IronPython.Hosting;
using IronPython.Modules;
using Microsoft.Scripting.Hosting;
using QueryLogic.Filtering;
using QueryLogic.Joins.Abstraction;
using QueryLogic.RelationProviding;
using QueryLogic.Selecting;
using QueryLogic.Selectors;

namespace QueryParser.Parsing
{
    public class Parser
    {
        private static dynamic _ipy;
        private Dictionary _aliasedSelectors;

        public Parser()
        {
            _ipy = _ipy ?? CreateEngine();
        }

        private dynamic CreateEngine()
        {
            ScriptRuntimeSetup setup = Python.CreateRuntimeSetup(GetRuntimeOptions());
            var pyRuntime = new ScriptRuntime(setup);
            ScriptEngine engineInstance = Python.GetEngine(pyRuntime);

            AddPythonLibrariesToSysMetaPath(engineInstance);

            dynamic ipy = pyRuntime.UseFile(@"Parsing\Parser.py");
            ipy.initialize(GetPlyPath());

            return ipy;
        }

        private void AddPythonLibrariesToSysMetaPath(ScriptEngine engineInstance)
        {
            Assembly asm = GetType().Assembly;
            IEnumerable resQuery =
                from name in asm.GetManifestResourceNames()
                where name.ToLowerInvariant().EndsWith("python27.zip")
                select name;
            string resName = resQuery.Single();
            var importer = new ResourceMetaPathImporter(asm, resName);
            dynamic sys = engineInstance.GetSysModule();
            sys.meta_path.append(importer);
            sys.path.append(importer);
        }

        private static Dictionary GetRuntimeOptions()
        {
            var options = new Dictionary();
            options["Debug"] = false;
            return options;
        }

        private static string GetPlyPath()
        {
            return Path.Combine(Environment.CurrentDirectory, "Lib", "ply");
        }

        public Tuple Parse(string content)
        {
            Tuple result;
            try
            {
                result = _ipy.parse(content);
            }
            catch (Exception e)
            {
                return Tuple.Create(null, new[]
                {
                    string.Format("{0}\n{1}\n{2}",e.Message, e.InnerException, e.StackTrace)
                });
            }
            FixSelectors(result.Item1);
            result = Tuple.Create(result.Item1, result.Item2);

            return result;
        }

        private void FixSelectors(Select result)
        {
            if (result == null)
            {
                return;
            }

            _aliasedSelectors = new Dictionary();
            IRelationProvider sourceProvider = result.GroupBy.SourceRelationProvider;
            FixSelectorInSourceRelationProvider(sourceProvider);
        }

        private void FixSelectorInSourceRelationProvider(IRelationProvider sourceRelationProvider)
        {
            if (sourceRelationProvider is From)
            {
                FixSelectorInFrom(sourceRelationProvider as From);
            }
            else if (sourceRelationProvider is IJoin)
            {
                FixSelectorInJoin(sourceRelationProvider as IJoin);
            }
            else if (sourceRelationProvider is Where)
            {
                FixSelectorInWhere(sourceRelationProvider as Where);
            }
        }

        private void FixSelectorInWhere(Where @where)
        {
            FixSelectorInSourceRelationProvider(@where.SourceRelation);
        }

        private void FixSelectorInJoin(IJoin @join)
        {
            FixSelectorInSourceRelationProvider(@join.FirstRelation);
            FixSelectorInSourceRelationProvider(@join.SecondRelation);
        }

        private void FixSelectorInFrom(From sourceProvider)
        {
            ISelector selector = sourceProvider.Selector;
            if (selector is NodeSelector)
            {
                sourceProvider.Selector = GetRealSelectorForNodeSelector(selector);
            }
            else if (selector is ChainedSelector)
            {
                sourceProvider.Selector = FixChainedSelector(selector as ChainedSelector);
            }

            _aliasedSelectors.Add(sourceProvider.Alias, sourceProvider.Selector);
        }

        private ChainedSelector FixChainedSelector(ChainedSelector chainedSelector)
        {
            return new ChainedSelector(GetRealSelectorForNodeSelector(chainedSelector.First), chainedSelector.Second);
        }

        private ISelector GetRealSelectorForNodeSelector(ISelector selector)
        {
            var nodeSelector = selector as NodeSelector;
            if (nodeSelector == null)
            {
                return selector;
            }

            ISelector aliasedSelector;
            _aliasedSelectors.TryGetValue(nodeSelector.Name, out aliasedSelector);

            return aliasedSelector ?? new TopLevelSelector(nodeSelector.Name);
        }
    }
}

And now goes the parser:

# -*- coding: utf-8 -*- 

def makeCaseInsensitive(s):
    return ''.join(sum(map(lambda c: ["[", c.lower(), c.upper(), "]"], s), []))

keywords = {
   'SELECT' : 'SELECT',
   'DISTINCT' : 'DISTINCT',
   'ALL' : 'ALL',
   'AS' : 'AS',
   'FROM' : 'FROM',
   'CROSS' : 'CROSS',
   'JOIN' : 'JOIN',
   'NATURAL' : 'NATURAL',
   'INNER' : 'INNER',
   'LEFT' : 'LEFT',
   'RIGHT' : 'RIGHT',
   'FULL' : 'FULL',
   'OUTER' : 'OUTER',
   'OR' : 'OR',
   'AND' : 'AND',
   'NOT' : 'NOT',
   'LIKE' : 'LIKE',
   'ESCAPE' : 'ESCAPE',
   'ON' : 'ON',
   'COUNT' : 'COUNT',
   'AVG' : 'AVG',
   'MAX' : 'MAX',
   'MIN' : 'MIN',
   'SUM' : 'SUM',
   'WHERE' : 'WHERE',
   'IS' : 'IS',
   'NULL' : 'NULL',
   'GROUP' : 'GROUP',
   'ORDER' : 'ORDER',
   'BY' : 'BY',
   'DESC' : 'DESC',
   'SKIP' : 'SKIP',
   'FETCH' : 'FETCH',
   'LEN' : 'LEN',
   'TRIM' : 'TRIM',
   'SUBSTR' : 'SUBSTR',
   'YEAR' : 'YEAR',
   'MONTH' : 'MONTH',
   'DAY' : 'DAY',
   'HOUR' : 'HOUR',
   'MINUTE' : 'MINUTE',
   'SECOND' : 'SECOND',
   'CATENATE' : 'CATENATE',
}

tokens = [
    'IDENTIFIER',
    'ASTERISK',
    'PERIOD',
    'QUESTION_MARK',
    'LEFT_PAREN',
    'RIGHT_PAREN',
    'EQUALS_OPERATOR',
    'NOT_EQUALS_OPERATOR',
    'LESS_THAN_OPERATOR',
    'GREATER_THAN_OPERATOR',
    'LESS_THAN_OR_EQUALS_OPERATOR',
    'GREATER_THAN_OR_EQUALS_OPERATOR',
    'QUOTTED_STRING',
    'COMMA',
    'NUMBER',
] + list(keywords.values())

errors = []

precedence = (
        ("right", 'NATURAL'),
)

t_ignore = ' \t\n\r'

def t_IDENTIFIER(t):
    r"[#a-zA-Z][#a-zA-Z0-9_]*"
    t.type = keywords.get(t.value.upper(),'IDENTIFIER') 
    return t

def t_ASTERISK(t):
    r"[*]"
    return t

def t_PERIOD(t):
    r"[.]"
    return t

def t_QUESTION_MARK(t):
    r"[?]"
    return t

def t_LEFT_PAREN(t):
    r"[(]"
    return t

def t_RIGHT_PAREN(t):
    r"[)]"
    return t

def t_EQUALS_OPERATOR(t):
    r"[=]"
    return t

def t_NOT_EQUALS_OPERATOR(t):
    r"[< ][>]"
    return t

def t_LESS_THAN_OR_EQUALS_OPERATOR(t):
    r"[< ][=]"
    return t

def t_GREATER_THAN_OR_EQUALS_OPERATOR(t):
    r"[>][=]"
    return t

def t_LESS_THAN_OPERATOR(t):
    r"[< ]"
    return t

def t_GREATER_THAN_OPERATOR(t):
    r"[>]"
    return t

def t_COMMA(t):
    r"[,]"
    return t

def t_NUMBER(t):
    r"[0-9]+"
    return t

def t_QUOTTED_STRING(t):
    r"['][^']*?[']"
    t.value = t.value[1:-1]
    return t


def t_error(t):
    global errors
    errors.append("Line {0:3}:\tIllegal character '{1}'".format(t.lexer.lineno, t.value[0]))
    t.lexer.skip(1)

def p_error(p):
    global errors
    if p:
        errors.append("Line {0:3}:\tSyntax error - unexpected '{1}' ".format(p.lineno, str(p.value)))
    else:
        errors.append("Syntax error - unexpected EOF ")
    print errors

def p_start(p):
    """start : querySpecification"""
    p[0] = p[1]


#< query specification>    ::= 
#          SELECT  [ < set quantifier> ] < select list> < table expression>
def p_querySpecification_without_set_quantifier(p):
    """querySpecification : SELECT selectList tableExpression"""
    p[0] = QueryLogic.Selecting.Select(p[3][0], p[3][1])
    p[0].AddRowTransformer(p[2][0])
    for aggregate in p[2][1]:
        p[0].AddAggregate(aggregate)

def p_querySpecification_with_set_quantifier(p):
    """querySpecification : SELECT setQuantifier selectList tableExpression"""
    pass

#< set quantifier>    ::=    DISTINCT  |  ALL
def p_setQuantifier_distinct(p):
    """setQuantifier : DISTINCT"""
    pass

def p_setQuantifier_all(p):
    """setQuantifier : ALL"""
    pass

#< select list>    ::= 
#         < asterisk>
#     |     < select sublist> [ { < comma> < select sublist> }... ]
def p_selectList_asterisk(p):
    """selectList : ASTERISK"""
    p[0] = (QueryLogic.Transformers.RowTransformers.IdentityRowTransformer(), [])

def p_selectList_selectListSelectSublist(p):
    """selectList : selectListSelectSublist"""
    cellTransformers = filter(lambda c: isinstance(c, QueryLogic.Transformers.CellTransformers.ICellTransformer), p[1])
    rowTransformer = QueryLogic.Transformers.RowTransformers.CellsRowTransformer(System.Array[QueryLogic.Transformers.CellTransformers.ICellTransformer](cellTransformers))
    aggregates = filter(lambda c: isinstance(c, QueryLogic.Aggregates.IAggregate), p[1])
    p[0] = (rowTransformer, aggregates)

def p_selectListSelectSublist_first(p):
    """selectListSelectSublist : selectSublist"""
    p[0] = p[1]

def p_selectListSelectSublist_next(p):
    """selectListSelectSublist : selectSublist COMMA selectListSelectSublist"""
    p[0] = p[1] + p[3]

#< select sublist>    ::=   < derived column> 
def p_selectSublist(p):
    """selectSublist : derivedColumn"""
    p[0] = [p[1]]

#< derived column>    ::=   < value expression> [ < as clause> ]
def p_derivedColumn_without_as_clause(p):
    """derivedColumn : valueExpression"""
    if isinstance(p[1], Model.ColumnHeader):
        p[0] = QueryLogic.Transformers.CellTransformers.CellTransformer(QueryLogic.Expressions.CellExpressions.GetOriginalCellCellExpression(p[1]))
    else:
        #p[0] = createAggregate(p[1], '')
        p[0] = parseFunctionOrAggregateStack(p[1], '')

def p_derivedColumn_with_as_clause(p):
    """derivedColumn : valueExpression asClause"""
    if isinstance(p[1], Model.ColumnHeader):
        p[0] = QueryLogic.Transformers.CellTransformers.CellTransformer(QueryLogic.Expressions.CellExpressions.GetOriginalCellCellExpression(p[1]), p[2])
    else:
        #p[0] = createAggregate(p[1], p[2])
        p[0] = parseFunctionOrAggregateStack(p[1], p[2])

def parseFunctionOrAggregateStack(stack, alias):
    innerFunctions, outerFunctions, aggregate = parseWholeStack(stack, [], [], None)

    if aggregate is None:
        expression = CreateChainedCellExpression(innerFunctions)
        result = QueryLogic.Transformers.CellTransformers.CellTransformer(expression, alias)
    elif aggregate == 'COUNT_*':
            result = QueryLogic.Aggregates.CountAggregate(QueryLogic.Expressions.CellExpressions.GetOriginalCellCellExpression(Model.ColumnHeader("", alias)), alias)
    else:
        innerExpression = CreateChainedCellExpression(innerFunctions)
        outerExpression = CreateChainedCellExpression(outerFunctions + [QueryLogic.Expressions.CellExpressions.GetOriginalCellCellExpression(Model.ColumnHeader("", alias))])
        if aggregate == 'AVG':
            result = QueryLogic.Aggregates.AverageAggregate(innerExpression, outerExpression, alias)
        elif aggregate == 'MAX':
            result = QueryLogic.Aggregates.MaximumAggregate(innerExpression, outerExpression, alias)
        elif aggregate == 'MIN':
            result = QueryLogic.Aggregates.MinimumAggregate(innerExpression, outerExpression, alias)
        elif aggregate == 'SUM':
            result = QueryLogic.Aggregates.SumAggregate(innerExpression, outerExpression, alias)
        elif aggregate == 'COUNT':
            result = QueryLogic.Aggregates.CountColumnAggregate(innerExpression, outerExpression, alias)
        else:
            result = None

    return result

def CreateChainedCellExpression(functions):
    if len(functions) > 1:
        selected = functions[0]
        if selected == 'LEN':
            inner = CreateChainedCellExpression(functions[1:])
            return QueryLogic.Expressions.CellExpressions.ChainedCellExpression(QueryLogic.Expressions.CellExpressions.GetLengthCellExpression(inner.Source), inner)
        elif selected == 'TRIM':
            inner = CreateChainedCellExpression(functions[1:])
            return QueryLogic.Expressions.CellExpressions.ChainedCellExpression(QueryLogic.Expressions.CellExpressions.GetTrimmedCellExpression(inner.Source), inner)
        elif selected == 'YEAR':
            inner = CreateChainedCellExpression(functions[1:])
            return QueryLogic.Expressions.CellExpressions.ChainedCellExpression(QueryLogic.Expressions.CellExpressions.GetDatePartCellExpression(inner.Source, QueryLogic.Expressions.CellExpressions.DatePart.Year), inner)
        elif selected == 'MONTH':
            inner = CreateChainedCellExpression(functions[1:])
            return QueryLogic.Expressions.CellExpressions.ChainedCellExpression(QueryLogic.Expressions.CellExpressions.GetDatePartCellExpression(inner.Source, QueryLogic.Expressions.CellExpressions.DatePart.Month), inner)
        elif selected == 'DAY':
            inner = CreateChainedCellExpression(functions[1:])
            return QueryLogic.Expressions.CellExpressions.ChainedCellExpression(QueryLogic.Expressions.CellExpressions.GetDatePartCellExpression(inner.Source, QueryLogic.Expressions.CellExpressions.DatePart.Day), inner)
        elif selected == 'HOUR':
            inner = CreateChainedCellExpression(functions[1:])
            return QueryLogic.Expressions.CellExpressions.ChainedCellExpression(QueryLogic.Expressions.CellExpressions.GetDatePartCellExpression(inner.Source, QueryLogic.Expressions.CellExpressions.DatePart.Hour), inner)
        elif selected == 'MINUTE':
            inner = CreateChainedCellExpression(functions[1:])
            return QueryLogic.Expressions.CellExpressions.ChainedCellExpression(QueryLogic.Expressions.CellExpressions.GetDatePartCellExpression(inner.Source, QueryLogic.Expressions.CellExpressions.DatePart.Minute), inner)
        elif selected == 'SECOND':
            inner = CreateChainedCellExpression(functions[1:])
            return QueryLogic.Expressions.CellExpressions.ChainedCellExpression(QueryLogic.Expressions.CellExpressions.GetDatePartCellExpression(inner.Source, QueryLogic.Expressions.CellExpressions.DatePart.Second), inner)

        elif type(selected) is tuple:
            if selected[0] == 'SUBSTR':
                inner = CreateChainedCellExpression(functions[1:])
                return QueryLogic.Expressions.CellExpressions.ChainedCellExpression(QueryLogic.Expressions.CellExpressions.GetSubstringCellExpression(inner.Source, selected[1], selected[2]), inner)
            if selected[0] == 'CATENATE':
                inner = CreateChainedCellExpression(functions[1:])
                return QueryLogic.Expressions.CellExpressions.ChainedCellExpression(QueryLogic.Expressions.CellExpressions.CatenateStringCellExpression(inner.Source, selected[1]), inner)
        else:
            return None
    else:
        return functions[0]

    return None

def parseWholeStack(stack, innerFunctions, outerFunctions, aggregate):
    if isinstance(stack, Model.ColumnHeader):
        return (innerFunctions + [QueryLogic.Expressions.CellExpressions.GetOriginalCellCellExpression(stack)], outerFunctions, aggregate)

    function, parameters = stack
    if aggregate is None:
        if function == 'AVG' or function == 'MAX' or function == 'MIN' or function == 'SUM' or function == 'COUNT':
            return parseInnerFunctions(parameters[0], [], innerFunctions, function)
        elif function == 'COUNT_*':
            return (innerFunctions, outerFunctions, function)
        
    if function == 'LEN' or function == 'TRIM' or function == 'YEAR' or function == 'MONTH' or function == 'DAY' or function == 'HOUR' or function == 'MINUTE' or function == 'SECOND':
        return parseWholeStack(parameters[0], innerFunctions + [function], outerFunctions, aggregate)
    elif function == 'SUBSTR':
        return parseWholeStack(parameters[0], innerFunctions + [(function, int(parameters[1]), int(parameters[2]))], outerFunctions, aggregate)
    elif function == 'CATENATE':
        return parseWholeStack(parameters[0], innerFunctions + [(function, parameters[1])], outerFunctions, aggregate)
    else:
        return None

def parseInnerFunctions(stack, innerFunctions, outerFunctions, aggregate):
    if isinstance(stack, Model.ColumnHeader):
        return (innerFunctions + [QueryLogic.Expressions.CellExpressions.GetOriginalCellCellExpression(stack)], outerFunctions, aggregate)
    
    function, parameters = stack
    if function == 'LEN' or function == 'TRIM' or function == 'YEAR' or function == 'MONTH' or function == 'DAY' or function == 'HOUR' or function == 'MINUTE' or function == 'SECOND':
        return parseInnerFunctions(parameters[0], innerFunctions + [function], outerFunctions, aggregate)
    elif function == 'SUBSTR':
        return parseInnerFunctions(parameters[0], innerFunctions + [(function, int(parameters[1]), int(parameters[2]))], outerFunctions, aggregate)
    elif function == 'CATENATE':
        return parseInnerFunctions(parameters[0], innerFunctions + [(function, parameters[1])], outerFunctions, aggregate)
    else:
        return None

#< value expression>    ::=      < string value expression>
def p_valueExpression_stringValueExpression(p):
    """valueExpression : stringValueExpression"""
    p[0] = p[1]

#< string value expression>    ::=   < character value expression> | < bit value expression>
def p_stringValueExpression_characterValueExpression(p):
    """stringValueExpression : characterValueExpression"""
    p[0] = p[1]

#< character value expression>    ::=   < character factor>
def p_characterValueExpression_characterFactor(p):
    """characterValueExpression : characterFactor"""
    p[0] = p[1]

#< character factor>    ::=   < character primary>
def p_characterFactor_characterPrimary(p):
    """characterFactor : characterPrimary"""
    p[0] = p[1]

#< character primary>    ::=   < value expression primary>
def p_characterPrimary_valueExpression_Primary(p):
    """characterPrimary : valueExpressionPrimary"""
    p[0] = p[1]

#< value expression primary>    ::= 
#       < unsigned value specification>
#   | < column reference>
#   | < set function specification>
def p_valueExpressionPrimary_unsignedValueSpecification(p):
    """valueExpressionPrimary : unsignedValueSpecification"""
    p[0] = p[1]

def p_valueExpressionPrimary_columnReference(p):
    """valueExpressionPrimary : columnReference"""
    p[0] = p[1]

def p_valueExpressionPrimary_setFunctionSpecification(p):
    """valueExpressionPrimary : setFunctionSpecification"""
    p[0] = p[1]

#< set function specification>    ::= 
#          COUNT < left paren> < asterisk> < right paren>
#     | < general set function>
#< general set function>    ::= 
#         < set function type> < left paren> < value expression> < right paren>
def p_setFunctionSpecification_generalSetFunction(p):
    """setFunctionSpecification : generalSetFunction"""
    p[0] = p[1]

def p_generalSetFunction_valueExpression(p):
    """generalSetFunction : setFunctionType LEFT_PAREN manyValueExpressions RIGHT_PAREN"""
    p[0] = (p[1], p[3])

def p_manyValueExpressions_first(p):
    """manyValueExpressions : valueExpression"""
    p[0] = [p[1]]

def p_manyValueExpressions_mext(p):
    """manyValueExpressions : manyValueExpressions COMMA valueExpression"""
    p[0] = p[1] + [p[3]]


def p_generalSetFunction_ASTERISK(p):
    """generalSetFunction : setFunctionType LEFT_PAREN ASTERISK RIGHT_PAREN"""
    p[0] = ('COUNT_*', p[3])

#< set function type>    ::=    AVG  |  MAX  |  MIN  |  SUM  |  COUNT
def p_setFunctionType(p):
    """setFunctionType : AVG  
        | MAX  
        | MIN  
        | SUM  
        | COUNT
        | LEN
        | TRIM
        | SUBSTR
        | YEAR
        | MONTH
        | DAY
        | HOUR
        | MINUTE
        | SECOND
        | CATENATE"""
    p[0] = p[1]

#< column reference>    ::=   < qualifier> < period> < column name> [ < period> < column name> ... ]
def p_columnReference_columnNames(p):
    """columnReference : qualifier PERIOD columnNames"""
    p[0] = Model.ColumnHeader(p[1], p[3])

def p_columnNames_first(p):
    """columnNames : columnName"""
    p[0] = p[1]

def p_columnNames_next(p):
    """columnNames : columnName PERIOD columnNames"""
    p[0] = p[1] + '.' + p[3]

#< column name>    ::=   < identifier>
def p_columnName(p):
    """columnName : identifier"""
    p[0] = p[1]

#< as clause>    ::=   [  AS  ] < column name>
def p_asClause_without_as_keyword(p):
    """asClause : columnName"""
    p[0] = p[1]

def p_asClause_with_as_keyword(p):
    """asClause : AS columnName"""
    p[0] = p[2]

#< unsigned value specification>    ::=   < unsigned literal>
def p_unsignedValueSpecification(p):
    """unsignedValueSpecification : unsignedLiteral"""
    p[0] = p[1]

#< unsigned literal>    ::=    < unsigned numeric literal> | < general literal>
def p_unsignedLiteral_generalLiteral(p):
    """unsignedLiteral : generalLiteral"""
    p[0] = p[1]

def p_unsignedLiteral_unsignedNumericLiteral(p):
    """unsignedLiteral : unsignedNumericLiteral"""
    p[0] = p[1]

#< unsigned numeric literal>    ::= 
#         < exact numeric literal>
def p_unsignedNumericLiteral_exactNumericLiteral(p):
    """unsignedNumericLiteral : exactNumericLiteral"""
    p[0] = p[1]

#< exact numeric literal>    ::= 
#         < unsigned integer>
def p_exactNumericLiteral_unsignedInteger(p):
    """exactNumericLiteral : unsignedInteger"""
    p[0] = p[1]

#< exact numeric literal>    ::= 
#         < unsigned integer>
def p_unsignedInteger(p):
    """unsignedInteger : NUMBER"""
    p[0] = p[1]

#< general literal>    ::= 
#         < character string literal>
def p_generalLiteral(p):
    """generalLiteral : characterStringLiteral"""
    p[0] = p[1]

#< character string literal>    ::= 
#     QUOTE [ < character representation> ... ] QUOTE
#< character representation>    ::=   < nonquote character>
def p_characterStringLiteral(p):
    """characterStringLiteral : QUOTTED_STRING"""
    p[0] = p[1]

#< identifier>    ::=   < actual identifier>
def p_identifier(p):
    """identifier : actualIdentifier"""
    p[0] = p[1]

#< actual identifier>    ::=   < regular identifier>
def p_actualIdentifier(p):
    """actualIdentifier : regularIdentifier"""
    p[0] = p[1]

#< regular identifier>    ::=   < identifier body>
def p_regularIdentifier(p):
    """regularIdentifier : identifierBody"""
    p[0] = p[1]

#< identifier body>    ::=   < identifier start> [ { < underscore> | < identifier part> } ... ]
#< identifier start>    ::=   !! See the Syntax rules
#< identifier part>    ::=   < identifier start> | < digit>
def p_identifierBody(p):
    """identifierBody : IDENTIFIER"""
    p[0] = p[1]

#< table expression>    ::= 
#         < from clause>
#         [ < where clause> ]
#         [ < group by clause> ]
#         [ < order by clause]
def p_tableExpression_only_fromClause(p):
    """tableExpression : fromClause"""
    p[0] = (p[1], None)

def p_tableExpression_fromClause_whereClause(p):
    """tableExpression : fromClause whereClause"""
    p[0] = (QueryLogic.Filtering.Where(p[1], p[2]), None)

def p_tableExpression_fromClause_groupByClause(p):
    """tableExpression : fromClause groupByClause"""
    p[0] = (QueryLogic.Grouping.GroupBy(p[1], p[2]), None)

def p_tableExpression_fromClause_whereClause_groupByClause(p):
    """tableExpression : fromClause whereClause groupByClause"""
    p[0] = (QueryLogic.Grouping.GroupBy(QueryLogic.Filtering.Where(p[1], p[2]), p[3]), None)

def p_tableExpression_fromClause_orderByClause(p):
    """tableExpression : fromClause orderByClause"""
    p[0] = (p[1], p[2])

def p_tableExpression_fromClause_groupByClause_orderByClause(p):
    """tableExpression : fromClause groupByClause orderByClause"""
    p[0] = (QueryLogic.Grouping.GroupBy(p[1], p[2]), p[3])

def p_tableExpression_fromClause_whereClause_orderByClause(p):
    """tableExpression : fromClause whereClause orderByClause"""
    p[0] = (QueryLogic.Filtering.Where(p[1], p[2]), p[3])

def p_tableExpression_fromClause_whereClause_groupByClause_orderByClause(p):
    """tableExpression : fromClause whereClause groupByClause orderByClause"""
    p[0] = (QueryLogic.Grouping.GroupBy(QueryLogic.Filtering.Where(p[1], p[2]), p[3]), p[4])

#< from clause>    ::=    FROM < table reference>
def p_fromClause(p):
    """fromClause : FROM tableReference"""
    p[0] = p[2]

#< table reference>    ::= 
#         < table name>  < correlation specification> 
#     | < joined table>
def p_tableReference_tableReferenceFrom(p):
    """tableReference : tableReferenceFrom"""
    p[0] = p[1]

def p_tableReference_joinedTable(p):
    """tableReference : joinedTable"""
    p[0] = p[1]

def p_tableReferenceFrom_tableName(p):
    """tableReferenceFrom : tableName correlationSpecification"""
    p[0] = QueryLogic.RelationProviding.From(p[1], p[2])


#< table name>    ::=   < qualified name>
def p_tableName(p):
    """tableName : qualifiedName"""
    p[0] = p[1]

#< qualified name>    ::=    < qualified identifier> [ PERIOD < qualified identifier> ...]
def p_qalifiedName(p):
    """qualifiedName : qualifiedIdentifiers"""
    p[0] = p[1]

def p_qalifiedName_first(p):
    """qualifiedIdentifiers : qualifiedIdentifier"""
    p[0] = p[1]

def p_qalifiedName_next(p):
    """qualifiedIdentifiers : qualifiedIdentifier PERIOD qualifiedIdentifiers"""
    p[0] = QueryLogic.Selectors.ChainedSelector(p[1], p[3])

#< qualified identifier>    ::=   < identifier> | QUESTION_MARK | ASTERISK
def p_qalifiedIdentifier_identifier(p):
    """qualifiedIdentifier : identifier"""
    p[0] = QueryLogic.Selectors.NodeSelector(p[1])

def p_qalifiedIdentifier_QUESTION_MARK(p):
    """qualifiedIdentifier : QUESTION_MARK"""
    p[0] = QueryLogic.Selectors.LevelSelector()

def p_qalifiedIdentifier_ASTERISK(p):
    """qualifiedIdentifier : ASTERISK"""
    p[0] = QueryLogic.Selectors.AnySelector()


#< correlation specification>    ::= 
#          AS   < correlation name> 
#< correlation name>    ::=   < identifier>
def p_correlationSpecification(p):
    """correlationSpecification : AS identifier"""
    p[0] = p[2]

#< joined table>    ::= 
#         < cross join>
#     | < qualified join>
def p_joinedTable_crossJoin(p):
    """joinedTable : crossJoin"""
    p[0] = p[1]

def p_joinedTable_qualifiedJoin(p):
    """joinedTable : qualifiedJoin"""
    p[0] = p[1]

#< cross join>    ::= 
#         < table reference>  CROSS  JOIN < table reference>
def p_crossJoin(p):
    """crossJoin : tableReference CROSS JOIN tableReference"""
    p[0] = QueryLogic.Joins.Implementation.CrossJoin(p[1], p[4])

#< qualified join>    ::= 
#         < table reference>  < join type> JOIN < table reference> [ < join specification> ]
#< join type>    ::= 
#		NATURAL
#	  | INNER 
#     | < outer join type> [  OUTER  ]
#< outer join type>    ::=    LEFT  |  RIGHT  |  FULL
def p_qualifiedJoin_natural(p):
    """qualifiedJoin : tableReference NATURAL JOIN tableReferenceFrom"""
    p[0] = QueryLogic.Joins.Implementation.NaturalJoin(p[1], p[4])

def p_qualifiedJoin_inner(p):
    """qualifiedJoin : tableReference INNER JOIN tableReference joinSpecification"""
    p[0] = QueryLogic.Joins.Implementation.InnerJoin(p[1], p[4], p[5])

def p_qualifiedJoin_left_outer(p):
    """qualifiedJoin : tableReference LEFT OUTER JOIN tableReference joinSpecification"""
    p[0] = QueryLogic.Joins.Implementation.LeftOuterJoin(p[1], p[5], p[6])

def p_qualifiedJoin_left(p):
    """qualifiedJoin : tableReference LEFT JOIN tableReference joinSpecification"""
    p[0] = QueryLogic.Joins.Implementation.LeftOuterJoin(p[1], p[4], p[5])

def p_qualifiedJoin_right_outer(p):
    """qualifiedJoin : tableReference RIGHT OUTER JOIN tableReference joinSpecification"""
    p[0] = QueryLogic.Joins.Implementation.RightOuterJoin(p[1], p[5], p[6])

def p_qualifiedJoin_right(p):
    """qualifiedJoin : tableReference RIGHT JOIN tableReference joinSpecification"""
    p[0] = QueryLogic.Joins.Implementation.RightOuterJoin(p[1], p[4], p[5])

def p_qualifiedJoin_full_outer(p):
    """qualifiedJoin : tableReference FULL OUTER JOIN tableReference joinSpecification"""
    p[0] = QueryLogic.Joins.Implementation.FullOuterJoin(p[1], p[5], p[6])

def p_qualifiedJoin_full(p):
    """qualifiedJoin : tableReference FULL JOIN tableReference joinSpecification"""
    p[0] = QueryLogic.Joins.Implementation.FullOuterJoin(p[1], p[4], p[5])

#< join specification>    ::=   < join condition>
def p_joinSpecification_joinCondition(p):
    """joinSpecification : joinCondition"""
    p[0] = p[1]

#< join condition>    ::=    ON < search condition>
def p_joinCondition_searchCondition(p):
    """joinCondition : ON searchCondition"""
    p[0] = p[2]

#< search condition>    ::= 
#         < boolean term>
#     | < search condition>  OR < boolean term>
def p_searchCondition_booleanTerm(p):
    """searchCondition : booleanTerm"""
    p[0] = p[1]

def p_searchCondition_searchCondition_OR_booleanTerm(p):
    """searchCondition : booleanTerm OR searchCondition"""
    p[0] = QueryLogic.Predicates.Complex.OrPredicate(p[1], p[3])

#< boolean term>    ::= 
#         < boolean factor>
#     | < boolean term>  AND < boolean factor>
def p_booleanTerm_booleanFactor(p):
    """booleanTerm : booleanFactor"""
    p[0] = p[1]

def p_booleanTerm_booleanTerm_AND_booleanFactor(p):
    """booleanTerm : booleanFactor AND booleanTerm"""
    p[0] = QueryLogic.Predicates.Complex.AndPredicate(p[1], p[3])

#< boolean factor>    ::=   [  NOT  ] < boolean test>
def p_booleanFactor_booleanTest(p):
    """booleanFactor : booleanTest"""
    p[0] = p[1]

def p_booleanFactor_NOT_booleanTest(p):
    """booleanFactor : NOT booleanTest"""
    p[0] = QueryLogic.Predicates.Complex.NotPredicate(p[2])

#< boolean test>    ::=   < boolean primary> 
def p_booleanTest_booleanPrimary(p):
    """booleanTest : booleanPrimary"""
    p[0] = p[1]

#< boolean primary>    ::=   < predicate> | < left paren> < search condition> < right paren>
def p_booleanPrimary_predicate(p):
    """booleanPrimary : predicate"""
    p[0] = p[1]

def p_booleanPrimary_LEFT_PAREN_searchCondition_rightParen(p):
    """booleanPrimary : LEFT_PAREN searchCondition RIGHT_PAREN"""
    p[0] = p[2]

#< predicate>    ::= 
#         < comparison predicate>
#     | < like predicate>
#     | < null predicate>
def p_predicate_comparisonPredicate(p):
    """predicate : comparisonPredicate"""
    p[0] = p[1]

def p_predicate_likePredicate(p):
    """predicate : likePredicate"""
    p[0] = p[1]

def p_predicate_nullPredicate(p):
    """predicate : nullPredicate"""
    p[0] = p[1]

#< comparison predicate>    ::=   < row value constructor> < comp op> < row value constructor>
def p_comparisonPredicate(p):
    """comparisonPredicate : rowValueConstructor compOp rowValueConstructor"""
    left = p[1]
    right = p[3]
    operator = p[2]
    if operator == '=':
        predicate = QueryLogic.Predicates.Simple.EqualPredicate(left, right)
    elif operator == '< >':
        predicate = QueryLogic.Predicates.Simple.NotEqualPredicate(left, right)
    elif operator == '>':
        predicate = QueryLogic.Predicates.Simple.GreaterThanPredicate(left, right)
    elif operator == '< ':
        predicate = QueryLogic.Predicates.Simple.LessThanPredicate(left, right)
    elif operator == '>=':
        predicate = QueryLogic.Predicates.Simple.GreaterEqualPredicate(left, right)
    elif operator == '< =':
        predicate = QueryLogic.Predicates.Simple.LessEqualPredicate(left, right)
    else:
        predicate = None
    p[0] = predicate

#< row value constructor>    ::= 
#         < row value constructor element>
def p_rowValueConstructor_rowValueConstructorElement(p):
    """rowValueConstructor : rowValueConstructorElement"""
    p[0] = p[1]

def p_rowValueConstructorElement_valueExpression(p):
    """rowValueConstructorElement : valueExpression"""
    if isinstance(p[1], Model.ColumnHeader):
        p[0] = QueryLogic.Expressions.RowExpressions.GetCellRowExpression(p[1])
    else:
        p[0] = QueryLogic.Expressions.RowExpressions.ConstantRowExpression(p[1])

#< like predicate>    ::=   < match value> [  NOT  ]  LIKE < pattern> [  ESCAPE < escape character> ]
def p_likePredicate_matchValue_LIKE_pattern(p):
    """likePredicate : matchValue LIKE pattern"""
    p[0] = createLike(p[1], p[3], '\0')

def p_likePredicate_matchValue_NOT_LIKE_pattern(p):
    """likePredicate : matchValue NOT LIKE pattern"""
    p[0] = QueryLogic.Predicates.Complex.NotPredicate(createLike(p[1], p[4], '\0'))

def p_likePredicate_matchValue_LIKE_pattern_ESCAPE_escapeCharacter(p):
    """likePredicate : matchValue LIKE pattern ESCAPE escapeCharacter"""
    p[0] = createLike(p[1], p[3], p[5])

def p_likePredicate_matchValue_NOT_LIKE_pattern_ESCAPE_escapeCharacter(p):
    """likePredicate : matchValue NOT LIKE pattern ESCAPE escapeCharacter"""
    p[0] = QueryLogic.Predicates.Complex.NotPredicate(createLike(p[1], p[4], p[6]))

def createLike(value, pattern, escapeCharacter):
    return QueryLogic.Predicates.Simple.LikePredicate(QueryLogic.Expressions.RowExpressions.GetCellRowExpression(value),
                                                      QueryLogic.Expressions.RowExpressions.ConstantRowExpression(pattern),
                                                      escapeCharacter)

#< match value>    ::=   < character value expression>
def p_matchValue(p):
    """matchValue : characterValueExpression"""
    p[0] = p[1]

#< qualifier>    ::=   < table name> | < correlation name>
def p_qualifier_tableName(p):
    """qualifier : identifier"""
    p[0] = p[1]

#< pattern>    ::=   < character value expression>
def p_pattern(p):
    """pattern : characterValueExpression"""
    p[0] = p[1]

#< escape character>    ::=   < character value expression>
def p_escapeCharacter(p):
    """escapeCharacter : characterValueExpression"""
    p[0] = p[1]

#< null predicate>    ::=    IS  [  NOT  ]  NULL
#[AF] Tutaj jak dla mnie jest bug, powinno być < row value constructor> IS [ NOT ] NULL
def p_nullPredicate_isNull(p):
    """nullPredicate : rowValueConstructor IS NULL"""
    p[0] = QueryLogic.Predicates.Simple.IsNullPredicate(p[1])

#< where clause>    ::=    WHERE < search condition>
def p_whereClause(p):
    """whereClause : WHERE searchCondition"""
    p[0] = p[2]

#< group by clause>    ::=    GROUP  BY < grouping column reference list>
def p_groupByClause(p):
    """groupByClause : GROUP BY groupingColumnReferenceList"""
    p[0] = System.Array[QueryLogic.Grouping.GroupingSet]([QueryLogic.Grouping.GroupingSet(System.Array[Model.ColumnHeader](p[3]))])

#< grouping column reference list>    ::=
#         < grouping column reference> [ { < comma> < grouping column reference> }... ]
def p_groupingColumnReferenceList_next(p):
    """groupingColumnReferenceList : groupingColumnReference COMMA groupingColumnReferenceList"""
    p[0] = [p[1]] + p[3]

def p_groupingColumnReferenceList_first(p):
    """groupingColumnReferenceList : groupingColumnReference"""
    p[0] = [p[1]]
    
#< grouping column reference>    ::=   < column reference> 
def p_groupingColumnReference(p):
    """groupingColumnReference : columnReference"""
    p[0] = p[1]

#< order by clause>  ::=     ORDER BY < ordering column reference list> [ SKIP < unsigned integer> ] [ FETCH < unsigned integer> ]
def p_orderByClause_orderingColumnReferenceList(p):
    """orderByClause : ORDER BY orderingColumnReferenceList"""
    p[0] = createOrderBy(p[3], None, None)

def p_orderByClause_orderingColumnReferenceList_skip(p):
    """orderByClause : ORDER BY orderingColumnReferenceList SKIP unsignedInteger"""
    p[0] = createOrderBy(p[3], p[5], None)

def p_orderByClause_orderingColumnReferenceList_fetch(p):
    """orderByClause : ORDER BY orderingColumnReferenceList FETCH unsignedInteger"""
    p[0] = createOrderBy(p[3], None, p[5])

def p_orderByClause_orderingColumnReferenceList_skip_fetch(p):
    """orderByClause : ORDER BY orderingColumnReferenceList SKIP unsignedInteger FETCH unsignedInteger"""
    p[0] = createOrderBy(p[3], p[5], p[7])

def createOrderBy(columns, skip, fetch):
    asc = QueryLogic.Ordering.OrderDirection.Ascending
    desc = QueryLogic.Ordering.OrderDirection.Descending
    skip = castToNullableInt(skip)
    fetch = castToNullableInt(fetch)
    columns = System.Array[QueryLogic.Ordering.ColumnOrdering](
        map(lambda column: QueryLogic.Ordering.ColumnOrdering(column[0], asc if column[1] == 'ASC' else desc),
            columns))
    return QueryLogic.Ordering.OrderBy(columns, skip, fetch)

def castToNullableInt(value):
    return None if value is None else System.Nullable[int](int(value))

#< ordering column reference list>    ::=
#         < ordering column reference> [ { < comma> < ordering column reference> }... ]
def p_orderingColumnReferenceList_next(p):
    """orderingColumnReferenceList : orderingColumnReference COMMA orderingColumnReferenceList"""
    p[0] = [p[1]] + p[3]

def p_orderingColumnReferenceList_first(p):
    """orderingColumnReferenceList : orderingColumnReference"""
    p[0] = [p[1]]
    
#< ordering column reference>    ::=   < column reference> [ DESC ]
def p_orderingColumnReference(p):
    """orderingColumnReference : columnReference"""
    p[0] = (p[1], 'ASC')

def p_orderingColumnReference_DESC(p):
    """orderingColumnReference : columnReference DESC"""
    p[0] = (p[1], 'DESC')

#< comp op>    ::= 
#         < equals operator>
#     | < not equals operator>
#     | < less than operator>
#     | < greater than operator>
#     | < less than or equals operator>
#     | < greater than or equals operator>
def p_compOp_EQUALS_OPERATOR(p):
    """compOp : EQUALS_OPERATOR"""
    p[0] = p[1]

def p_compOp_NOT_EQUALS_OPERATOR(p):
    """compOp : NOT_EQUALS_OPERATOR"""
    p[0] = p[1]

def p_compOp_LESS_THAN_OPERATOR(p):
    """compOp : LESS_THAN_OPERATOR"""
    p[0] = p[1]

def p_compOp_GREATER_THAN_OPERATOR(p):
    """compOp : GREATER_THAN_OPERATOR"""
    p[0] = p[1]

def p_compOp_LESS_THAN_OR_EQUALS_OPERATOR(p):
    """compOp : LESS_THAN_OR_EQUALS_OPERATOR"""
    p[0] = p[1]

def p_compOp_GREATER_THAN_OR_EQUALS_OPERATOR(p):
    """compOp : GREATER_THAN_OR_EQUALS_OPERATOR"""
    p[0] = p[1]


def initialize(plyBasePath):
    global yacc
    global lex
    global sys
    global clr
    global parser
    global lexer
    global System
    global Model
    global QueryLogic

    import imp
    import sys
    import clr

    lex = imp.load_source('ply.lex', plyBasePath + '\\lex.py')
    yacc = imp.load_source('ply.yacc',  plyBasePath + '\\yacc.py')
    lexer = lex.lex(module = sys.modules[__name__], debug=1)
    parser = yacc.yacc(module = sys.modules[__name__])

    clr.AddReference("Model")
    clr.AddReference("QueryLogic")
    clr.AddReference("System")
    import System
    import Model
    import QueryLogic


def parse(text):
    if lex == 0 or yacc == 0:
        raise RuntimeError("Not initialized")

    global errors
    errors = []

    parsedObject = parser.parse(text, lexer=lexer)
    return System.Tuple.Create[QueryLogic.Selecting.Select, System.Array[System.String]](parsedObject, System.Array[System.String](errors))

This is simply an implementation of grammar parsing with some amendments for handling muli-part identifiers and with non-standard construct SKIP FETCH instead of OFFSET FETCH. If you understand PLY then this code should be pretty straightforward. In the next part we will implement the tests.