This is the fifth part of the SQLxD series. For your convenience you can find other parts in the table of contents in Part 1 – XML Transformation

We already have the model, can parse XML document, and know how to refer to specific node in it. It is time to transform node into row.

As described in XML Transformation we map single node (optionally containing nested nodes) into single row (just like in SQL database). For that, we flatten all nested properties and choose the first value for each of them. We store result in the following manner:

using System;
using System.Collections.Generic;
using System.Linq;

namespace Model
{
    public class Row : IEquatable< Row>
    {
        private readonly IList< Cell> _cells;
        private IList< GuidCell> _guids;

        public Row()
        {
            _cells = new List< Cell>();
            _guids = new List< GuidCell>();
        }

        public Row(IEnumerable< Cell> cells)
            : this()
        {
            AddCells(cells);
        }

        public IEnumerable< ColumnHeader> Columns
        {
            get { return _cells.Select(c => c.ColumnHeader); }
        }

        public IEnumerable< Cell> Cells
        {
            get { return _cells; }
        }

        public IEnumerable< GuidCell> Guids
        {
            get { return _guids; }
        }

        public bool Equals(Row other)
        {
            if (ReferenceEquals(null, other)) return false;
            if (ReferenceEquals(this, other)) return true;

            return
                _cells.OrderBy(c => c.ColumnHeader.ToString())
                    .SequenceEqual(other._cells.OrderBy(c => c.ColumnHeader.ToString()));
        }

        public void AddCell(Cell cell)
        {
            _cells.Add(cell);
        }

        public void AddCells(IEnumerable< Cell> cells)
        {
            foreach (Cell cell in cells)
            {
                AddCell(cell);
            }
        }

        public void AddGuid(GuidCell cell)
        {
            _guids.Add(cell);
        }

        public void AddGuids(IEnumerable< GuidCell> cells)
        {
            foreach (GuidCell cell in cells)
            {
                AddGuid(cell);
            }
        }

        public bool HasColumn(ColumnHeader columnHeader)
        {
            return Columns.Contains(columnHeader);
        }

        public string GetCellValue(ColumnHeader columnHeader)
        {
            return GetCell(columnHeader).Value;
        }

        public Cell GetCell(ColumnHeader columnHeader)
        {
            Cell cell = _cells.Single(c => Equals(c.ColumnHeader, columnHeader));
            return cell;
        }

        public override bool Equals(object obj)
        {
            if (ReferenceEquals(null, obj)) return false;
            if (ReferenceEquals(this, obj)) return true;
            if (obj.GetType() != GetType()) return false;
            return Equals((Row)obj);
        }

        public override int GetHashCode()
        {
            unchecked
            {
                return ((_cells != null ? _cells.GetHashCode() : 0) * 397);
            }
        }

        public override string ToString()
        {
            return string.Join("   ", Cells.Select(c => string.Format("{0} = {1}", c.ColumnHeader, c.Value ?? "(null)")));
        }

        public GuidCell GetGuid(string schema, string name)
        {
            return _guids.SingleOrDefault(g => g.GuidHeader.Name == name && g.GuidHeader.Schema == schema);
        }

        public void RemoveDuplicateGuidsByName()
        {
            _guids = _guids.GroupBy(g => g.GuidHeader.Name).Select(cells => cells.First()).ToList();
        }
    }
}

Each row has cells and guids. Cells represent column values, whereas guids represents row origin.

What is origin? We have notion of natural join in our database. In typical SQL databases natural join uses columns with the same names in both tables as a predicate. However, in SQLxD natural join means connecting by parent-child relationship. Since we are allowed to rename columns when performing selection we cannot rely on mechanism comparing only column names (even using some fancy logic with stripping prefixes etc). However, during parsing we assign guid to each node so we can store them in rows to remember row’s origin.

Rest of the model is as follows:

using System;

namespace Model
{
    public class Cell : IEquatable< Cell>, IConvertible
    {
        public Cell(ColumnHeader header, string value)
        {
            ColumnHeader = header;
            Value = value;
        }

        public Cell(string schema, string name, string value)
            : this(new ColumnHeader(schema, name), value)
        {
        }

        public ColumnHeader ColumnHeader { get; private set; }

        public string Value { get; private set; }

        public TypeCode GetTypeCode()
        {
            bool b;
            if (Boolean.TryParse(Value, out b))
            {
                return TypeCode.Boolean;
            }

            DateTime dt;
            if (DateTime.TryParse(Value, out dt))
            {
                return TypeCode.DateTime;
            }

            double d;
            if (Double.TryParse(Value, out d))
            {
                return TypeCode.Double;
            }

            return TypeCode.String;
        }

        public bool ToBoolean(IFormatProvider provider)
        {
            return Boolean.Parse(Value);
        }

        public double ToDouble(IFormatProvider provider)
        {
            return Double.Parse(Value);
        }

        public DateTime ToDateTime(IFormatProvider provider)
        {
            return DateTime.Parse(Value);
        }

        public string ToString(IFormatProvider provider)
        {
            return Value;
        }

        public bool Equals(Cell other)
        {
            if (ReferenceEquals(null, other)) return false;
            if (ReferenceEquals(this, other)) return true;
            return Equals(ColumnHeader, other.ColumnHeader) && string.Equals(Value, other.Value);
        }

        public override bool Equals(object obj)
        {
            if (ReferenceEquals(null, obj)) return false;
            if (ReferenceEquals(this, obj)) return true;
            if (obj.GetType() != GetType()) return false;
            return Equals((Cell)obj);
        }

        public override int GetHashCode()
        {
            unchecked
            {
                return ((ColumnHeader != null ? ColumnHeader.GetHashCode() : 0) * 397) ^
                       (Value != null ? Value.GetHashCode() : 0);
            }
        }

        #region Invalid Cast Exception

        public char ToChar(IFormatProvider provider)
        {
            throw new InvalidCastException();
        }

        public sbyte ToSByte(IFormatProvider provider)
        {
            throw new InvalidCastException();
        }

        public byte ToByte(IFormatProvider provider)
        {
            throw new InvalidCastException();
        }

        public short ToInt16(IFormatProvider provider)
        {
            throw new InvalidCastException();
        }

        public ushort ToUInt16(IFormatProvider provider)
        {
            throw new InvalidCastException();
        }

        public int ToInt32(IFormatProvider provider)
        {
            throw new InvalidCastException();
        }

        public uint ToUInt32(IFormatProvider provider)
        {
            throw new InvalidCastException();
        }

        public long ToInt64(IFormatProvider provider)
        {
            throw new InvalidCastException();
        }

        public ulong ToUInt64(IFormatProvider provider)
        {
            throw new InvalidCastException();
        }

        public float ToSingle(IFormatProvider provider)
        {
            throw new InvalidCastException();
        }

        public decimal ToDecimal(IFormatProvider provider)
        {
            throw new InvalidCastException();
        }

        public object ToType(Type conversionType, IFormatProvider provider)
        {
            throw new InvalidCastException();
        }

        #endregion
    }
}
using System;

namespace Model
{
    public class ColumnHeader : IEquatable< ColumnHeader>
    {
        public ColumnHeader(string schema, string name)
        {
            Name = name;
            Schema = schema;
        }

        public string Name { get; private set; }
        public string Schema { get; private set; }

        public bool Equals(ColumnHeader other)
        {
            if (ReferenceEquals(null, other)) return false;
            if (ReferenceEquals(this, other)) return true;
            return string.Equals(Name, other.Name) && string.Equals(Schema, other.Schema);
        }

        public override string ToString()
        {
            return string.Format("{0}.{1}", Schema, Name);
        }

        public override bool Equals(object obj)
        {
            if (ReferenceEquals(null, obj)) return false;
            if (ReferenceEquals(this, obj)) return true;
            if (obj.GetType() != GetType()) return false;
            return Equals((ColumnHeader)obj);
        }

        public override int GetHashCode()
        {
            return (Name != null ? Name.GetHashCode() : 0);
        }
    }
}
using System;

namespace Model
{
    public class GuidCell
    {
        public GuidCell(string schema, string name, Guid guid)
        {
            GuidHeader = new GuidHeader(schema, name);
            Guid = guid;
        }

        public GuidHeader GuidHeader { get; private set; }

        public Guid Guid { get; private set; }

        protected bool Equals(GuidCell other)
        {
            return Equals(GuidHeader, other.GuidHeader) && Guid.Equals(other.Guid);
        }

        public override bool Equals(object obj)
        {
            if (ReferenceEquals(null, obj)) return false;
            if (ReferenceEquals(this, obj)) return true;
            if (obj.GetType() != GetType()) return false;
            return Equals((GuidCell)obj);
        }

        public override int GetHashCode()
        {
            unchecked
            {
                return ((GuidHeader != null ? GuidHeader.GetHashCode() : 0) * 397) ^ Guid.GetHashCode();
            }
        }
    }
}
namespace Model
{
    public class GuidHeader
    {
        public GuidHeader(string schema, string name)
        {
            Schema = schema;
            Name = name;
        }

        public string Schema { get; private set; }
        public string Name { get; private set; }

        protected bool Equals(GuidHeader other)
        {
            return string.Equals(Schema, other.Schema) && string.Equals(Name, other.Name);
        }

        public override bool Equals(object obj)
        {
            if (ReferenceEquals(null, obj)) return false;
            if (ReferenceEquals(this, obj)) return true;
            if (obj.GetType() != GetType()) return false;
            return Equals((GuidHeader)obj);
        }

        public override int GetHashCode()
        {
            unchecked
            {
                return ((Schema != null ? Schema.GetHashCode() : 0) * 397) ^ (Name != null ? Name.GetHashCode() : 0);
            }
        }
    }
}

Building rows

Below goes the code for creating row. First, we extract all column names in order to find common schema for all resulting rows (since different documents might have different set of values we need to insert nulls in case of missing value). Next, we create rows.

using System.Collections.Generic;
using System.Linq;

namespace Model
{
    public sealed class RowBuilder
    {
        private const char CompartmentsSeparator = '.';
        private const char TextNodeName = '#';
        private const char AttributeNodeNamePrefix = '#';

        public static IEnumerable< Row> BuildRows(Node[] source, string alias)
        {
            IEnumerable< string> columnNames = source.SelectMany(n => GetNodeColumnNames(n, "")).Distinct();
            return source.Select(n => Build(n, alias, columnNames));
        }

        private static IEnumerable< string> GetNodeColumnNames(Node node, string currentName)
        {
            currentName = CreateNewColumnName(node, currentName);

            return new[] { currentName }.Union(node.InnerNodes.SelectMany(n => GetNodeColumnNames(n, currentName)));
        }

        private static Row Build(Node source, string alias, IEnumerable< string> columnNames)
        {
            var row = new Row();
            const string startColumnName = "";

            CreateColumnsFromNode(source, startColumnName, alias, false, row);
            AddGuidsForNode(source, row, alias);
            AddMissingColumns(row, columnNames, alias);

            return row;
        }

        private static void AddMissingColumns(Row row, IEnumerable< string> columnNames, string alias)
        {
            IEnumerable< string> existingNames = row.Cells.Select(c => c.ColumnHeader.Name).ToArray();
            IEnumerable< string> missingNames = columnNames.Except(existingNames);
            IEnumerable< Cell> newCells = missingNames.Select(n => new Cell(alias, n, null));
            row.AddCells(newCells);
        }

        private static void AddGuidsForNode(Node source, Row row, string alias)
        {
            IEnumerable< Node> parents = GetAncestors(source).Reverse();
            string name = string.Empty;

            foreach (Node parent in parents)
            {
                name += parent.Name;

                row.AddGuid(new GuidCell(alias, name, parent.Guid));

                name += ".";
            }
        }

        private static IEnumerable< Node> GetAncestors(Node source)
        {
            Node current = source;
            while (current != null)
            {
                yield return current;
                current = current.Parent;
            }
        }

        private static void CreateColumnsFromNode(Node node, string columnName, string alias,
            bool sameSubtreeAlreadyParsed, Row row)
        {
            columnName = CreateNewColumnName(node, columnName);
            Cell cell = CreateBaseCellForNode(node, columnName, alias, sameSubtreeAlreadyParsed);

            if (row.HasColumn(cell.ColumnHeader))
            {
                sameSubtreeAlreadyParsed = true;
            }
            else
            {
                row.AddCell(cell);
            }

            CreateColumnsFromChildren(node, columnName, alias, sameSubtreeAlreadyParsed, row);
        }

        private static void CreateColumnsFromChildren(Node node, string columnName, string alias, bool elementExist,
            Row row)
        {
            foreach (Node innerNode in node.InnerNodes)
            {
                CreateColumnsFromNode(innerNode, columnName, alias, elementExist, row);
            }
        }

        private static string CreateNewColumnName(Node node, string columnName)
        {
            return (columnName + CreateColumnNameForNode(node)).TrimStart(CompartmentsSeparator);
        }

        private static string ExtractValueForColumn(Node node)
        {
            string value = node.Value;
            if (node.Type != NodeType.Element)
            {
                return value;
            }

            Node tmp = node.InnerNodes.FirstOrDefault(n => n.Type == NodeType.Text);
            value = (tmp != null) ? tmp.Value : value;
            return value;
        }

        private static string CreateColumnNameForNode(Node node)
        {
            string name = null;
            switch (node.Type)
            {
                case NodeType.Text:
                    name = string.Concat(CompartmentsSeparator, TextNodeName);
                    break;
                case NodeType.Attribute:
                    name = string.Concat(CompartmentsSeparator, AttributeNodeNamePrefix, node.Name);
                    break;
                case NodeType.Element:
                    name = string.Concat(CompartmentsSeparator, node.Name);
                    break;
            }

            return name;
        }

        private static Cell CreateBaseCellForNode(Node node, string columnName, string schema, bool nullValue)
        {
            var column = new ColumnHeader(schema, columnName);
            string value = nullValue ? null : ExtractValueForColumn(node);

            return new Cell(column, value);
        }
    }
}

That’s all. Now we have our nodes transformed into rows so we can start implementing query logic. Most of the operations described in next parts will be rather straightforward — after all they are just relational operations.