This is the fifty first part of the ILP series. For your convenience you can find other parts in the table of contents in Part 1 – Boolean algebra

This is the fifth part of the ML series. For your convenience you can find other parts in the table of contents in Part 1 – Linear regression in MXNet

Today we are going to implement linear regression with ILP. Let’s go!

var solver = new CplexMilpSolver(10);

var samples = new object[][] {
	new object[]{5.1,3.5,1.4,0.2,"setosa"},
	new object[]{4.9,3,1.4,0.2,"setosa"},
	new object[]{4.7,3.2,1.3,0.2,"setosa"},
	new object[]{4.6,3.1,1.5,0.2,"setosa"},
	new object[]{5,3.6,1.4,0.2,"setosa"},
	new object[]{5.4,3.9,1.7,0.4,"setosa"},
	new object[]{4.6,3.4,1.4,0.3,"setosa"},
	new object[]{5,3.4,1.5,0.2,"setosa"},
	new object[]{4.4,2.9,1.4,0.2,"setosa"},
	new object[]{4.9,3.1,1.5,0.1,"setosa"},
	new object[]{5.4,3.7,1.5,0.2,"setosa"},
	new object[]{4.8,3.4,1.6,0.2,"setosa"},
	new object[]{4.8,3,1.4,0.1,"setosa"},
	new object[]{4.3,3,1.1,0.1,"setosa"},
	new object[]{5.8,4,1.2,0.2,"setosa"},
	new object[]{5.7,4.4,1.5,0.4,"setosa"},
	new object[]{5.4,3.9,1.3,0.4,"setosa"},
	new object[]{5.1,3.5,1.4,0.3,"setosa"},
	new object[]{5.7,3.8,1.7,0.3,"setosa"},
	new object[]{5.1,3.8,1.5,0.3,"setosa"},
	new object[]{5.4,3.4,1.7,0.2,"setosa"},
	new object[]{5.1,3.7,1.5,0.4,"setosa"},
	new object[]{4.6,3.6,1,0.2,"setosa"},
	new object[]{5.1,3.3,1.7,0.5,"setosa"},
	new object[]{4.8,3.4,1.9,0.2,"setosa"},
	new object[]{5,3,1.6,0.2,"setosa"},
	new object[]{5,3.4,1.6,0.4,"setosa"},
	new object[]{5.2,3.5,1.5,0.2,"setosa"},
	new object[]{5.2,3.4,1.4,0.2,"setosa"},
	new object[]{4.7,3.2,1.6,0.2,"setosa"},
	new object[]{4.8,3.1,1.6,0.2,"setosa"},
	new object[]{5.4,3.4,1.5,0.4,"setosa"},
	new object[]{5.2,4.1,1.5,0.1,"setosa"},
	new object[]{5.5,4.2,1.4,0.2,"setosa"},
	new object[]{4.9,3.1,1.5,0.1,"setosa"},
	new object[]{5,3.2,1.2,0.2,"setosa"},
	new object[]{5.5,3.5,1.3,0.2,"setosa"},
	new object[]{4.9,3.1,1.5,0.1,"setosa"},
	new object[]{4.4,3,1.3,0.2,"setosa"},
	new object[]{5.1,3.4,1.5,0.2,"setosa"},
	new object[]{5,3.5,1.3,0.3,"setosa"},
	new object[]{4.5,2.3,1.3,0.3,"setosa"},
	new object[]{4.4,3.2,1.3,0.2,"setosa"},
	new object[]{5,3.5,1.6,0.6,"setosa"},
	new object[]{5.1,3.8,1.9,0.4,"setosa"},
	new object[]{4.8,3,1.4,0.3,"setosa"},
	new object[]{5.1,3.8,1.6,0.2,"setosa"},
	new object[]{4.6,3.2,1.4,0.2,"setosa"},
	new object[]{5.3,3.7,1.5,0.2,"setosa"},
	new object[]{5,3.3,1.4,0.2,"setosa"},
	new object[]{7,3.2,4.7,1.4,"versicolor"},
	new object[]{6.4,3.2,4.5,1.5,"versicolor"},
	new object[]{6.9,3.1,4.9,1.5,"versicolor"},
	new object[]{5.5,2.3,4,1.3,"versicolor"},
	new object[]{6.5,2.8,4.6,1.5,"versicolor"},
	new object[]{5.7,2.8,4.5,1.3,"versicolor"},
	new object[]{6.3,3.3,4.7,1.6,"versicolor"},
	new object[]{4.9,2.4,3.3,1,"versicolor"},
	new object[]{6.6,2.9,4.6,1.3,"versicolor"},
	new object[]{5.2,2.7,3.9,1.4,"versicolor"},
	new object[]{5,2,3.5,1,"versicolor"},
	new object[]{5.9,3,4.2,1.5,"versicolor"},
	new object[]{6,2.2,4,1,"versicolor"},
	new object[]{6.1,2.9,4.7,1.4,"versicolor"},
	new object[]{5.6,2.9,3.6,1.3,"versicolor"},
	new object[]{6.7,3.1,4.4,1.4,"versicolor"},
	new object[]{5.6,3,4.5,1.5,"versicolor"},
	new object[]{5.8,2.7,4.1,1,"versicolor"},
	new object[]{6.2,2.2,4.5,1.5,"versicolor"},
	new object[]{5.6,2.5,3.9,1.1,"versicolor"},
	new object[]{5.9,3.2,4.8,1.8,"versicolor"},
	new object[]{6.1,2.8,4,1.3,"versicolor"},
	new object[]{6.3,2.5,4.9,1.5,"versicolor"},
	new object[]{6.1,2.8,4.7,1.2,"versicolor"},
	new object[]{6.4,2.9,4.3,1.3,"versicolor"},
	new object[]{6.6,3,4.4,1.4,"versicolor"},
	new object[]{6.8,2.8,4.8,1.4,"versicolor"},
	new object[]{6.7,3,5,1.7,"versicolor"},
	new object[]{6,2.9,4.5,1.5,"versicolor"},
	new object[]{5.7,2.6,3.5,1,"versicolor"},
	new object[]{5.5,2.4,3.8,1.1,"versicolor"},
	new object[]{5.5,2.4,3.7,1,"versicolor"},
	new object[]{5.8,2.7,3.9,1.2,"versicolor"},
	new object[]{6,2.7,5.1,1.6,"versicolor"},
	new object[]{5.4,3,4.5,1.5,"versicolor"},
	new object[]{6,3.4,4.5,1.6,"versicolor"},
	new object[]{6.7,3.1,4.7,1.5,"versicolor"},
	new object[]{6.3,2.3,4.4,1.3,"versicolor"},
	new object[]{5.6,3,4.1,1.3,"versicolor"},
	new object[]{5.5,2.5,4,1.3,"versicolor"},
	new object[]{5.5,2.6,4.4,1.2,"versicolor"},
	new object[]{6.1,3,4.6,1.4,"versicolor"},
	new object[]{5.8,2.6,4,1.2,"versicolor"},
	new object[]{5,2.3,3.3,1,"versicolor"},
	new object[]{5.6,2.7,4.2,1.3,"versicolor"},
	new object[]{5.7,3,4.2,1.2,"versicolor"},
	new object[]{5.7,2.9,4.2,1.3,"versicolor"},
	new object[]{6.2,2.9,4.3,1.3,"versicolor"},
	new object[]{5.1,2.5,3,1.1,"versicolor"},
	new object[]{5.7,2.8,4.1,1.3,"versicolor"},
	new object[]{6.3,3.3,6,2.5,"virginica"},
	new object[]{5.8,2.7,5.1,1.9,"virginica"},
	new object[]{7.1,3,5.9,2.1,"virginica"},
	new object[]{6.3,2.9,5.6,1.8,"virginica"},
	new object[]{6.5,3,5.8,2.2,"virginica"},
	new object[]{7.6,3,6.6,2.1,"virginica"},
	new object[]{4.9,2.5,4.5,1.7,"virginica"},
	new object[]{7.3,2.9,6.3,1.8,"virginica"},
	new object[]{6.7,2.5,5.8,1.8,"virginica"},
	new object[]{7.2,3.6,6.1,2.5,"virginica"},
	new object[]{6.5,3.2,5.1,2,"virginica"},
	new object[]{6.4,2.7,5.3,1.9,"virginica"},
	new object[]{6.8,3,5.5,2.1,"virginica"},
	new object[]{5.7,2.5,5,2,"virginica"},
	new object[]{5.8,2.8,5.1,2.4,"virginica"},
	new object[]{6.4,3.2,5.3,2.3,"virginica"},
	new object[]{6.5,3,5.5,1.8,"virginica"},
	new object[]{7.7,3.8,6.7,2.2,"virginica"},
	new object[]{7.7,2.6,6.9,2.3,"virginica"},
	new object[]{6,2.2,5,1.5,"virginica"},
	new object[]{6.9,3.2,5.7,2.3,"virginica"},
	new object[]{5.6,2.8,4.9,2,"virginica"},
	new object[]{7.7,2.8,6.7,2,"virginica"},
	new object[]{6.3,2.7,4.9,1.8,"virginica"},
	new object[]{6.7,3.3,5.7,2.1,"virginica"},
	new object[]{7.2,3.2,6,1.8,"virginica"},
	new object[]{6.2,2.8,4.8,1.8,"virginica"},
	new object[]{6.1,3,4.9,1.8,"virginica"},
	new object[]{6.4,2.8,5.6,2.1,"virginica"},
	new object[]{7.2,3,5.8,1.6,"virginica"},
	new object[]{7.4,2.8,6.1,1.9,"virginica"},
	new object[]{7.9,3.8,6.4,2,"virginica"},
	new object[]{6.4,2.8,5.6,2.2,"virginica"},
	new object[]{6.3,2.8,5.1,1.5,"virginica"},
	new object[]{6.1,2.6,5.6,1.4,"virginica"},
	new object[]{7.7,3,6.1,2.3,"virginica"},
	new object[]{6.3,3.4,5.6,2.4,"virginica"},
	new object[]{6.4,3.1,5.5,1.8,"virginica"},
	new object[]{6,3,4.8,1.8,"virginica"},
	new object[]{6.9,3.1,5.4,2.1,"virginica"},
	new object[]{6.7,3.1,5.6,2.4,"virginica"},
	new object[]{6.9,3.1,5.1,2.3,"virginica"},
	new object[]{5.8,2.7,5.1,1.9,"virginica"},
	new object[]{6.8,3.2,5.9,2.3,"virginica"},
	new object[]{6.7,3.3,5.7,2.5,"virginica"},
	new object[]{6.7,3,5.2,2.3,"virginica"},
	new object[]{6.3,2.5,5,1.9,"virginica"},
	new object[]{6.5,3,5.2,2,"virginica"},
	new object[]{6.2,3.4,5.4,2.3,"virginica"},
	new object[]{5.9,3,5.1,1.8,"virginica"}
};

var weights = Enumerable.Range(0, 5).Select(x => solver.Create(string.Format("w_{0}", x), Domain.AnyReal)).ToArray();
foreach (var w in weights)
{
	w.Set(ConstraintType.LessOrEqual, solver.FromConstant(10000)).Set(ConstraintType.GreaterOrEqual, solver.FromConstant(-10000));
}

var b = solver.Create("b", Domain.AnyReal);
b.Set(ConstraintType.LessOrEqual, solver.FromConstant(10000)).Set(ConstraintType.GreaterOrEqual, solver.FromConstant(-10000));

var predicted = samples.Select(s => {
	var features = new double[] {
		double.Parse(s[1].ToString()),
		double.Parse(s[2].ToString()),
		double.Parse(s[3].ToString()),
		s[4].ToString() == "setosa" ? 1.0 : 0.0,
		s[4].ToString() == "virginica" ? 1.0 : 0.0
	};

	return solver.Operation(OperationType.Addition, Enumerable.Range(0, features.Length).Select(x => solver.FromConstant(features[x]).Operation(OperationType.Multiplication, weights[x])).ToArray()).Operation(OperationType.Addition, b);
}).ToArray();

var distances = Enumerable.Range(0, samples.Length).Select(i => {
	var target = double.Parse(samples[i][0].ToString());
	return predicted[i].Operation(OperationType.Subtraction, solver.FromConstant(target)).Operation(OperationType.AbsoluteValue);
}).ToArray();

// Sum of distances
var error = solver.Operation(OperationType.Addition, distances.Select(d => d).ToArray());

// Sum of squared distances - not supported
//var error = solver.Operation(OperationType.Addition, distances.Select(d => d.Operation(OperationType.Multiplication, d)).ToArray());

// Average of distances
//var error = solver.Operation(OperationType.Addition, distances.Select(d => d).ToArray()).Operation(OperationType.Division, solver.FromConstant(samples.Length));

// Sum of approximated squared distances
//var error = solver.Operation(OperationType.Addition, distances.Select(d => {
//    var casted = solver.CreateAnonymous(Domain.PositiveOrZeroInteger);
//    casted.Set(ConstraintType.LessOrEqual, d);
//    casted.Operation(OperationType.Addition, solver.FromConstant(1)).Set(ConstraintType.GreaterOrEqual, d);
//    return casted.Operation(OperationType.Multiplication, casted);
//}).ToArray());

solver.AddGoal("Goal", error.Operation(OperationType.Negation));
solver.Solve();

foreach (var w in weights)
{
	Console.WriteLine("W: " + solver.GetValue(w));
}

Console.WriteLine("B: " + solver.GetValue(b));

Console.WriteLine("Error: " + solver.GetValue(error));

We start with defining the samples set. Next, we create real variables for linear regression coefficients. We add arbitrary bounds. Next, we calculate the predicted value in an obvious way.

Next, we calculate distances and error.

The error is the hard part. We can easily calculate sum of distances or approximated average of them, or even sum of approximated squared distances. But we don’t know how to multiply two real variables so we can’t calculate the real MSE.

Finally, we minimize te error and solve the problem. Here goes the solution:

Total (root+branch&cut) =    0.44 sec. (250.99 ticks)
W: 0.464135021097045
W: 0.915611814345992
W: -0.388185654008438
W: 0.868776371308019
W: -0.375949367088609
B: 1.2957805907173
Error: 35.9801687763713

As you can see, it is very fast (around 0.44 second). You can see the error and coefficients. Calculation time for sum of approximated squares is worse but not terrible: Total (root+branch&cut) = 8.28 sec. (7154.17 ticks)