This is the thirtieth part of the SQLxD series. For your convenience you can find other parts in the table of contents in Part 1 – XML Transformation
We mentioned the natural join a few times and it finally the part when we implement it. Here we go:
using System; using System.Collections.Generic; using System.Linq; using Model; using QueryLogic.Exceptions; using QueryLogic.Expressions.RowExpressions; using QueryLogic.Predicates; using QueryLogic.Predicates.Complex; using QueryLogic.Predicates.Simple; using QueryLogic.RelationProviding; namespace QueryLogic.Joins.Implementation { public class NaturalJoin : InnerJoin { public NaturalJoin(IRelationProvider firstRelation, IRelationProvider secondRelation) : base(firstRelation, secondRelation, null) { } protected bool Equals(NaturalJoin other) { return base.Equals(other); } public override bool Equals(object obj) { if (ReferenceEquals(null, obj)) return false; if (ReferenceEquals(this, obj)) return true; if (obj.GetType() != GetType()) return false; return Equals((NaturalJoin)obj); } public override int GetHashCode() { return 0; } public override Relation CreateRelation(Node source) { var firstRelation = FirstRelation.CreateRelation(source); var secondRelation = SecondRelation.CreateRelation(source); return new Relation(firstRelation.Rows.SelectMany(firstRow => { return secondRelation.Rows.Select(secondRow => { return MatchRows(firstRelation, secondRelation, firstRow, secondRow); }); }).Where(r => r != null)); } private Row MatchRows(Relation firstRelation, Relation secondRelation, Row firstRow, Row secondRow) { if (firstRow.Guids.Count() < secondRow.Guids.Count()) { return MatchParentToChild(firstRelation, secondRelation, firstRow, secondRow); } else { return MatchParentToChild(secondRelation, firstRelation, secondRow, firstRow); } } private Row MatchParentToChild(Relation firstRelation, Relation secondRelation, Row firstRow, Row secondRow) { ThrowIfAmbiguousGuidsInRelation(firstRelation, firstRow.Guids.Select(g => g.GuidHeader).OrderBy(g => g.Name)); ThrowIfAmbiguousGuidsInRelation(secondRelation, secondRow.Guids.Select(g => g.GuidHeader).OrderBy(g => g.Name)); foreach (GuidCell firstGuid in firstRow.Guids) { var anySecondGuid = secondRow.Guids.FirstOrDefault(); var secondSchema = anySecondGuid != null ? anySecondGuid.GuidHeader.Schema : null; ; var secondGuid = secondRow.GetGuid(secondSchema, firstGuid.GuidHeader.Name); if (secondGuid == null || firstGuid.Guid != secondGuid.Guid) { return null; } } var newRow = new Row(); newRow.AddCells(firstRow.Cells); newRow.AddCells(secondRow.Cells); newRow.AddGuids(firstRow.Guids); newRow.AddGuids(secondRow.Guids); newRow.RemoveDuplicateGuidsByName(); return newRow; } private void ThrowIfAmbiguousGuidsInRelation(Relation source, IOrderedEnumerablecolumns) { IEnumerable > pairs = columns.Zip(columns.Skip(1), Tuple.Create); foreach (var pair in pairs) { if (pair.Item1.Name == pair.Item2.Name) { throw new AmbiguousGuidsInRelationException(source, pair.Item1.Name); } } } } }
First, we compare guid collection lengths. This is for checking whether it is possible for the relations to be in parent-child relation. Next, we find candidate for parent and start comparing guids. If all guids match then we can join rows.
And here are tests for this mechanism:
using System; using System.Collections.Generic; using System.Linq; using Model; using NUnit.Framework; using QueryLogic.Exceptions; using QueryLogic.Joins.Implementation; using QueryLogic.Test.Mocks; namespace QueryLogic.Test.Joins.Implementation { [TestFixture] public class NaturalJoinTests { [Test] [ExpectedException(typeof(AmbiguousGuidsInRelationException))] public void CreateRelation_AmbiguousGuidsPassed_ShouldThrowAmbiguousGuidsInRelationException() { // Arrange var row11 = new Row(); row11.AddGuids(new List{ new GuidCell("schema1", "a", Guid.NewGuid()), new GuidCell("schema2", "a", Guid.NewGuid()), }); var firstRelation = new Relation(); firstRelation.AddRow(row11); var row21 = new Row(); row21.AddGuids(new List { new GuidCell("schema1", "b", Guid.NewGuid()), }); var secondRelation = new Relation(); secondRelation.AddRow(row21); // Act // ReSharper disable once UnusedVariable Relation actual = new NaturalJoin(new DummyRelationProvider(firstRelation), new DummyRelationProvider(secondRelation)) .CreateRelation(null); } [Test] public void CreateRelation_MatchingGuidsPassed_ShouldReturnRelationWithMatchingGuids() { // Arrange Guid matchingGuid = Guid.NewGuid(); var row11 = new Row(); row11.AddGuids(new List { new GuidCell("schema1", "a", matchingGuid) }); row11.AddCells(new List { new Cell(new ColumnHeader("schema", "a"), "a") }); var row12 = new Row(); row12.AddGuids(new List | { new GuidCell("schema1", "a", Guid.NewGuid()) }); var firstRelation = new Relation(); firstRelation.AddRow(row11); firstRelation.AddRow(row12); var row21 = new Row(); row21.AddGuids(new List { new GuidCell("schema2", "a", matchingGuid) }); row21.AddCells(new List { new Cell(new ColumnHeader("schema2", "a"), "a") }); var row22 = new Row(); row22.AddGuids(new List | { new GuidCell("schema2", "a", Guid.NewGuid()) }); var secondRelation = new Relation(); secondRelation.AddRow(row21); secondRelation.AddRow(row22); var expectedRow = new Row(); expectedRow.AddCells(row11.Cells); expectedRow.AddCells(row21.Cells); var expectedRelation = new Relation(); expectedRelation.AddRow(expectedRow); IEnumerable expected = expectedRelation.Rows; // Act IEnumerable
actual = new NaturalJoin(new DummyRelationProvider(firstRelation), new DummyRelationProvider(secondRelation)) .CreateRelation(null).Rows; // Assert CollectionAssert.AreEquivalent(expected, actual); } [Test] public void CreateRelation_MatchingGuids_ShouldReturnRelationWithoutGuidsDuplicates() { // Arrange Guid matchingGuid = Guid.NewGuid(); var row11 = new Row(); row11.AddGuids(new List
{ new GuidCell("schema1", "a", matchingGuid) }); row11.AddCells(new List { new Cell(new ColumnHeader("schema", "a"), "a") }); var firstRelation = new Relation(); firstRelation.AddRow(row11); var row21 = new Row(); row21.AddGuids(new List | { new GuidCell("schema2", "a", matchingGuid) }); row21.AddCells(new List { new Cell(new ColumnHeader("schema2", "a"), "a") }); var secondRelation = new Relation(); secondRelation.AddRow(row21); const int expected = 1; // Act int actual = new NaturalJoin(new DummyRelationProvider(firstRelation), new DummyRelationProvider(secondRelation)) .CreateRelation(null).Guids.Count(); // Assert Assert.That(actual, Is.EqualTo(expected)); } [Test] public void CreateRelation_RelationWithoutMatchingGuidsPassed_ShouldReturnEmptyRelation() { // Arrange var row11 = new Row(); row11.AddGuids(new List | { new GuidCell("schema1", "a", Guid.NewGuid()) }); var firstRelation = new Relation(); firstRelation.AddRow(row11); var row21 = new Row(); row21.AddGuids(new List { new GuidCell("schema1", "b", Guid.NewGuid()) }); var secondRelation = new Relation(); secondRelation.AddRow(row21); // Act Relation actual = new NaturalJoin(new DummyRelationProvider(firstRelation), new DummyRelationProvider(secondRelation)) .CreateRelation(null); // Assert Assert.That(actual.Rows, Is.Empty); } [Test] public void CreateRelation_NodesOnMayLevels_ShouldMatchCorrectDescendants() { // Arrange var guidSzef = Guid.NewGuid(); var guidOsoba1 = Guid.NewGuid(); var guidOsoba2 = Guid.NewGuid(); var guidOsoba3 = Guid.NewGuid(); var guidOsoba4 = Guid.NewGuid(); var row11 = new Row(); row11.AddGuids(new List { new GuidCell("schema1", "dok.osoba",guidOsoba1) }); var row12 = new Row(); row12.AddGuids(new List { new GuidCell("schema1", "dok.osoba", guidOsoba1), new GuidCell("schema1", "dok.osoba.osoba", guidOsoba2) }); var row13 = new Row(); row13.AddGuids(new List { new GuidCell("schema1", "dok.osoba", guidOsoba1), new GuidCell("schema1", "dok.osoba.osoba", guidOsoba2), new GuidCell("schema1", "dok.osoba.osoba.osoba", guidOsoba3) }); var row14 = new Row(); row14.AddGuids(new List { new GuidCell("schema1", "dok.szef", guidSzef), new GuidCell("schema1", "dok.szef.osoba", guidOsoba4) }); var firstRelation = new Relation(); firstRelation.AddRow(row11); firstRelation.AddRow(row12); firstRelation.AddRow(row13); firstRelation.AddRow(row14); var guidMail1 = Guid.NewGuid(); var guidMail2 = Guid.NewGuid(); var guidMail3 = Guid.NewGuid(); var guidMail4 = Guid.NewGuid(); var row21 = new Row(); row21.AddGuids(new List { new GuidCell("schema1", "dok.osoba", guidOsoba1), new GuidCell("schema1", "dok.osoba.mail", guidMail1) }); var row22 = new Row(); row22.AddGuids(new List { new GuidCell("schema1", "dok.osoba", guidOsoba1), new GuidCell("schema1", "dok.osoba.osoba", guidOsoba2), new GuidCell("schema1", "dok.osoba.osoba.mail", guidMail2), }); var row23 = new Row(); row23.AddGuids(new List { new GuidCell("schema1", "dok.osoba", guidOsoba1), new GuidCell("schema1", "dok.osoba.osoba", guidOsoba2), new GuidCell("schema1", "dok.osoba.osoba.osoba", guidOsoba3), new GuidCell("schema1", "dok.osoba.osoba.osoba.mail", guidMail3), }); var row24 = new Row(); row24.AddGuids(new List { new GuidCell("schema1", "dok.szef", guidSzef), new GuidCell("schema1", "dok.szef.osoba", guidOsoba4), new GuidCell("schema1", "dok.szef.osoba.mail", guidMail4), }); var secondRelation = new Relation(); secondRelation.AddRow(row21); secondRelation.AddRow(row22); secondRelation.AddRow(row23); secondRelation.AddRow(row24); // Act Relation actual = new NaturalJoin(new DummyRelationProvider(firstRelation), new DummyRelationProvider(secondRelation)) .CreateRelation(null); // Assert Assert.That(actual.Rows.Count(), Is.EqualTo(7)); } } }
This concludes our JOIN journey. Next time we are going to implement grouping.