C# – Random IT Utensils https://blog.adamfurmanek.pl IT, operating systems, maths, and more. Fri, 25 Jul 2025 13:57:38 +0000 en-US hourly 1 https://wordpress.org/?v=6.7.1 SAT Part 4 — Solving 3CNF with C++ templates https://blog.adamfurmanek.pl/2025/07/25/sat-part-4/ https://blog.adamfurmanek.pl/2025/07/25/sat-part-4/#respond Fri, 25 Jul 2025 13:57:38 +0000 https://blog.adamfurmanek.pl/?p=5125 Continue reading SAT Part 4 — Solving 3CNF with C++ templates]]>

This is the fourth part of the SAT series. For your convenience you can find other parts in the table of contents in Part 1 — Boolean logic

Let’s use C++ templates to solve 3CNF SAT in compile-time.

The solution

First, let’s start with the code:

#include <iostream>

using namespace std;

// ============================

class NullType {};

// ============================

template <class T, class U>
struct Typelist
{
	typedef T Head;
	typedef U Tail;
};

// ============================

template <class TList, unsigned int index> struct TypeAt;
template <class Head, class Tail>
struct TypeAt<Typelist<Head, Tail>, 0>
{
	typedef Head Result;
};
template <class Head, class Tail, unsigned int i>
struct TypeAt<Typelist<Head, Tail>, i>
{
	typedef typename TypeAt<Tail, i - 1>::Result Result;
};

// ============================

template <class TList, class TAggregated> struct ReversedTypelist;
template<class TAggregated>
struct ReversedTypelist<NullType, TAggregated>{
	typedef TAggregated Result;
};
template <class Head, class Tail, class TAggregated>
struct ReversedTypelist<Typelist<Head, Tail>, TAggregated>
{
	typedef typename ReversedTypelist<Tail, Typelist<Head, TAggregated> >::Result Result;
};

// ============================

template<int v> struct Literal {
	enum {
		value = v
	};
};

struct LiteralTrue {
	enum {
		value = 1
	};
};

struct LiteralFalse {
	enum {
		value = 0
	};
};

// ============================


template<typename T> struct Id { typedef T Result; };
template<typename T> struct Negated {};
template<> struct Negated<LiteralTrue> { typedef LiteralFalse Result; };
template<> struct Negated<LiteralFalse> { typedef LiteralTrue Result; };




// ============================

template<int i>
struct Variable {};

template<>
struct Variable<0> {
	typedef LiteralFalse value1;
	typedef LiteralFalse value2;
};

template<>
struct Variable<1> {
	typedef LiteralTrue value1;
	typedef LiteralTrue value2;
};

template<>
struct Variable<2> {
	typedef LiteralFalse value1;
	typedef LiteralTrue value2;
};

// ============================

template<typename Constraint, typename BoundVariables> struct Conjunction { };
template<typename BoundVariables> struct Conjunction<LiteralFalse, BoundVariables> { typedef NullType Result; };
template<typename BoundVariables> struct Conjunction<LiteralTrue, BoundVariables> { typedef BoundVariables Result; };

template<typename BoundVariables1, typename BoundVariables2> struct Disjunction { typedef BoundVariables1 Result; };
template<> struct Disjunction<NullType, NullType> { typedef NullType Result; };
template<typename BoundVariables1> struct Disjunction<BoundVariables1, NullType> { typedef BoundVariables1 Result; };
template<typename BoundVariables2> struct Disjunction<NullType, BoundVariables2> { typedef BoundVariables2 Result; };

// ============================


// ============================
// Constraints with actual calculation
// This is where we check if SAT clauses (Variables) really satisfy a particular constraint
template<typename AOrNegated, typename BOrNegated, typename COrNegated> struct Constraint {};

template<typename A, typename B, typename C> struct ThreeClause { };
template<> struct ThreeClause<LiteralFalse, LiteralFalse, LiteralFalse> { typedef LiteralFalse value; };
template<> struct ThreeClause<LiteralFalse, LiteralFalse, LiteralTrue> { typedef LiteralTrue value; };
template<> struct ThreeClause<LiteralFalse, LiteralTrue, LiteralFalse> { typedef LiteralTrue value; };
template<> struct ThreeClause<LiteralFalse, LiteralTrue, LiteralTrue> { typedef LiteralTrue value; };
template<> struct ThreeClause<LiteralTrue, LiteralFalse, LiteralFalse> { typedef LiteralTrue value; };
template<> struct ThreeClause<LiteralTrue, LiteralFalse, LiteralTrue> { typedef LiteralTrue value; };
template<> struct ThreeClause<LiteralTrue, LiteralTrue, LiteralFalse> { typedef LiteralTrue value; };
template<> struct ThreeClause<LiteralTrue, LiteralTrue, LiteralTrue> { typedef LiteralTrue value; };

// ============================




// ============================


// ============================
// BoundConstraints
// Extracts referenced Variables and calculates literal representing the result of logical operation


template<typename ConstraintType, typename BoundVariables> struct BoundConstraint {};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Id<Literal<A> >, Id<Literal<B> >, Id<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Id<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Id<Literal<A> >, Id<Literal<B> >, Negated<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Id<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Id<Literal<A> >, Negated<Literal<B> >, Id<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Id<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Id<Literal<A> >, Negated<Literal<B> >, Negated<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Id<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Negated<Literal<A> >, Id<Literal<B> >, Id<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Negated<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Negated<Literal<A> >, Id<Literal<B> >, Negated<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Negated<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Negated<Literal<A> >, Negated<Literal<B> >, Id<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Negated<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Negated<Literal<A> >, Negated<Literal<B> >, Negated<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Negated<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};

// ============================


// ============================
// Model

template<typename Variables, typename BoundVariables, typename Constraints, typename BoundConstraints> struct Model {};

// Model with no BoundConstraints just returns the bound variables
template<typename BoundVariables>
struct Model<NullType, BoundVariables, NullType, NullType>{
	typedef BoundVariables Result;
};

// Recursively calculates AND between current BoundConstraint and the rest of BoundConstraints
// Effectively removes one element from BoundConstraints, and returns Conjunction of the removed BoundConstraint and the rest
template<typename BoundVariables, typename Head, typename Tail>
struct Model<NullType, BoundVariables, NullType, Typelist<Head, Tail> > {
	typedef typename Conjunction<
		typename Head::value,
		typename Model<NullType, BoundVariables, NullType, Tail>::Result
	>::Result Result;
};

// Recursively bounds Constraint with BoundVariables
// Effectively removes one element from Constraints, and adds one element to BoundConstraints
template<typename BoundVariables, typename Head, typename Tail, typename BoundConstraints>
struct Model<NullType, BoundVariables, Typelist<Head, Tail>, BoundConstraints> {
	typedef typename Model<NullType, BoundVariables, Tail, Typelist<BoundConstraint<Head, BoundVariables>, BoundConstraints> >::Result Result;
};

// Takes first Variable, branches into two trees for the first value of the variable and the second value
// Effectively removes one element from Variables, and adds one element to BoundVariables
template<typename Head, typename Tail, typename BoundVariables, typename Constraints, typename BoundConstraints>
struct Model<Typelist<Head, Tail>, BoundVariables, Constraints, BoundConstraints> {
	typedef typename Disjunction<
		typename Model<Tail, Typelist<typename Head::value1, BoundVariables>, Constraints, BoundConstraints>::Result,
		typename Model<Tail, Typelist<typename Head::value2, BoundVariables>, Constraints, BoundConstraints>::Result
	>::Result Result;
};

template<typename Variables, typename Constraints>
struct FullModel {
	typedef typename Model<typename ReversedTypelist<Variables, NullType>::Result, NullType, Constraints, NullType>::Result Result;
};

// ============================


// ============================
// Print helpers

template<typename T> struct Print {};
template<typename Head, typename Tail>
struct Print<Typelist<Head, Tail> >{
	void print(){
		std::cout<<Head::value<<std::endl;
		Print<Tail>().print();
	}
};
template<>
struct Print<NullType>{
	void print(){
		std::cout<<"NullType"<<std::endl;
	}
};

// ============================


int main() {
	// XOR
	Print<typename FullModel< 
		Typelist<Variable<1>, Typelist<Variable<0>, Typelist<Variable<2>, NullType> > >,
			Typelist<Constraint<Negated<Literal<0> >, Id<Literal<1> >, Id<Literal<2> > >,
			Typelist<Constraint<Id<Literal<0> >, Negated<Literal<1> >, Id<Literal<2> > >,
			Typelist<Constraint<Id<Literal<0> >, Id<Literal<1> >, Negated<Literal<2> > >,
			Typelist<Constraint<Negated<Literal<0> >, Negated<Literal<1> >, Negated<Literal<2> > >,
				NullType> > > >
	>::Result>().print();
}

Problem definition

Let’s start with the main function which shows how to calculate exclusive or (XOR) using this approach:

Print<typename FullModel< 
	Typelist<Variable<1>, Typelist<Variable<0>, Typelist<Variable<2>, NullType> > >,
		Typelist<Constraint<Negated<Literal<0> >, Id<Literal<1> >, Id<Literal<2> > >,
		Typelist<Constraint<Id<Literal<0> >, Negated<Literal<1> >, Id<Literal<2> > >,
		Typelist<Constraint<Id<Literal<0> >, Id<Literal<1> >, Negated<Literal<2> > >,
		Typelist<Constraint<Negated<Literal<0> >, Negated<Literal<1> >, Negated<Literal<2> > >,
			NullType> > > >
>::Result>().print();

We will create an instance of FullModel template that will encapsulate our parameters.

As a first parameter, we are going to pass a list of variables. Each variable can be either fixed (Variable<0> is false, Variable<1> is true) or free (Variable<2>). Fixed variables represent clauses for which we know the value. Free variables need to be solved by the solver. We’ll later refer to this list as list of variables and we’ll get its elements by the 0-based indices.

The second parameter is a list of constraints. Each constraint represents a disjunction of three variables. Each variable is either negated or left intact. Let’s take this line as an example:

Constraint<Negated<Literal<0> >, Id<Literal<1> >, Id<Literal<2> >

This creates a constraint in which we take variables with numbers 0, 1, and 2. The first variable (with index 0) is negated, other ones are left intact. This constraint represents the following formula:

    \[\neg A \vee B \vee C\]

Helpers

We need some helper structures to represent concepts.

class NullType {};

This represents a null type. It’s basically a sentinel to indicate end of list or missing values.

Next, we need a definition of linked list. We build it the typical way, with head and tail:

template <class T, class U>
struct Typelist
{
	typedef T Head;
	typedef U Tail;
};

We need some helpers to operate on this list. We start with indexing:

template <class TList, unsigned int index> struct TypeAt;
template <class Head, class Tail>
struct TypeAt<Typelist<Head, Tail>, 0>
{
	typedef Head Result;
};
template <class Head, class Tail, unsigned int i>
struct TypeAt<Typelist<Head, Tail>, i>
{
	typedef typename TypeAt<Tail, i - 1>::Result Result;
};

We also need a way to reverse the list:

template <class TList, class TAggregated> struct ReversedTypelist;
template<class TAggregated>
struct ReversedTypelist<NullType, TAggregated>{
	typedef TAggregated Result;
};
template <class Head, class Tail, class TAggregated>
struct ReversedTypelist<Typelist<Head, Tail>, TAggregated>
{
	typedef typename ReversedTypelist<Tail, Typelist<Head, TAggregated> >::Result Result;
};

We assume that each list will end with a null type and will not have any null type in the middle.

Now, we need a literal that represents a number used for indexing inside the list:

template<int v> struct Literal {
	enum {
		value = v
	};
};

In a similar way, we need two literals to represent true and false. They are used to represent the actual variable values:

struct LiteralTrue {
	enum {
		value = 1
	};
};

struct LiteralFalse {
	enum {
		value = 0
	};
};

Finally, we need to have a way to negate the variable value or leave it intact.

template<typename T> struct Id { typedef T Result; };
template<typename T> struct Negated {};
template<> struct Negated<LiteralTrue> { typedef LiteralFalse Result; };
template<> struct Negated<LiteralFalse> { typedef LiteralTrue Result; };

Representing variables

Let’s now define actual variables used in the model. They represent logical clauses that can be either true or false.

Let’s now sketch the general idea for how we’re going to solve the model. We’ll try to substitute each clause with the allowed values. For fixed variables, only one value is allowed (either 0 or 1). For free variables, we need to consider both values (0 and 1). Therefore, each variable will have two fields to store the available values. For fixed variables, both fields will have the same value. For free variables, the fields will have two different values.

template<int i>
struct Variable {};

template<>
struct Variable<0> {
	typedef LiteralFalse value1;
	typedef LiteralFalse value2;
};

template<>
struct Variable<1> {
	typedef LiteralTrue value1;
	typedef LiteralTrue value2;
};

template<>
struct Variable<2> {
	typedef LiteralFalse value1;
	typedef LiteralTrue value2;
};

Constraints

When solving the model, we’ll effectively examine each possible valuation. Therefore, constraints need to be resolved after valuating variables. Therefore, we’ll have a two-stage process: first, we’ll define constraints as which variables they should incorporate. In the second stage, the constraints will be bound with valuated variables.

To do that, we will need to calculate the CNF. Therefore, we’ll need to calculate the conjunction of all constraints, and disjunction of possible valuations. We define these as follows:

template<typename Constraint, typename BoundVariables> struct Conjunction { };
template<typename BoundVariables> struct Conjunction<LiteralFalse, BoundVariables> { typedef NullType Result; };
template<typename BoundVariables> struct Conjunction<LiteralTrue, BoundVariables> { typedef BoundVariables Result; };

template<typename BoundVariables1, typename BoundVariables2> struct Disjunction { typedef BoundVariables1 Result; };
template<> struct Disjunction<NullType, NullType> { typedef NullType Result; };
template<typename BoundVariables1> struct Disjunction<BoundVariables1, NullType> { typedef BoundVariables1 Result; };
template<typename BoundVariables2> struct Disjunction<NullType, BoundVariables2> { typedef BoundVariables2 Result; };

Conceptually, we’ll use them as following:

Disjunction(first valuation, Disjunction(second valuation, Disjunction(third valuation, ...)))

For each valuation, we’ll calculate the following:

Conjunction(first constraint, Conjunction(second constraint, Conjunction(third constraint, ...)))

This will effectively create a tree of valuations (thanks to disjunctions), and for each specific valuation we’ll verify if all 3CNF clauses are met (thanks to conjunction).

Notice that both conjunction and disjunction return a set of bound variables. We use that to retrieve the particular result. If a given valuation doesn’t meet the constraints, then the null type is returned.

Now, we need to actually be able to check if a particular 3CNF is solved. We do it like this:

template<typename AOrNegated, typename BOrNegated, typename COrNegated> struct Constraint {};

template<typename A, typename B, typename C> struct ThreeClause { };
template<> struct ThreeClause<LiteralFalse, LiteralFalse, LiteralFalse> { typedef LiteralFalse value; };
template<> struct ThreeClause<LiteralFalse, LiteralFalse, LiteralTrue> { typedef LiteralTrue value; };
template<> struct ThreeClause<LiteralFalse, LiteralTrue, LiteralFalse> { typedef LiteralTrue value; };
template<> struct ThreeClause<LiteralFalse, LiteralTrue, LiteralTrue> { typedef LiteralTrue value; };
template<> struct ThreeClause<LiteralTrue, LiteralFalse, LiteralFalse> { typedef LiteralTrue value; };
template<> struct ThreeClause<LiteralTrue, LiteralFalse, LiteralTrue> { typedef LiteralTrue value; };
template<> struct ThreeClause<LiteralTrue, LiteralTrue, LiteralFalse> { typedef LiteralTrue value; };
template<> struct ThreeClause<LiteralTrue, LiteralTrue, LiteralTrue> { typedef LiteralTrue value; };

This is quite straightforward. We take the 3CNF and check if any variable is true.

Now, we need to bound the constraints with the variables. To do that, we take the variable by its index, negate it if needed, and use in the 3CNF:

template<typename ConstraintType, typename BoundVariables> struct BoundConstraint {};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Id<Literal<A> >, Id<Literal<B> >, Id<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Id<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Id<Literal<A> >, Id<Literal<B> >, Negated<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Id<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Id<Literal<A> >, Negated<Literal<B> >, Id<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Id<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Id<Literal<A> >, Negated<Literal<B> >, Negated<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Id<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Negated<Literal<A> >, Id<Literal<B> >, Id<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Negated<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Negated<Literal<A> >, Id<Literal<B> >, Negated<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Negated<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Negated<Literal<A> >, Negated<Literal<B> >, Id<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Negated<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};
template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Negated<Literal<A> >, Negated<Literal<B> >, Negated<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Negated<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Negated<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};

I imagine this part can be simplified somehow (with even more templates and higher-kinded templates).

Model

We now have everything to define the model. A model is a structure with 4 elements: list of variables, list of bound variables (variables with valuation), list of constraints, and a list of bound constraints:

template<typename Variables, typename BoundVariables, typename Constraints, typename BoundConstraints> struct Model {};

We don’t want the user to provide all the values, as we just need the variables and constraints. Therefore, we have the following wrapper:

template<typename Variables, typename Constraints>
struct FullModel {
	typedef typename Model<typename ReversedTypelist<Variables, NullType>::Result, NullType, Constraints, NullType>::Result Result;
};

Now, the plan of action. For the exclusive or example, we start with something like this:

FullModel(
  [fixed variable 0 with value true; fixed variable 1 with value false; free variable 2],
  [
    Constraint(!0, 1, 2),
    Constraint(0, !1, 2),
    Constraint(0, 1, !2),
    Constraint(!0, !1, !2)
  ]
)

First, we add two more empty parameters. We also reverse the list of variables because we’ll reverse it later when evaluating:

Model(
  [free variable 2, fixed variable 1 with value false, fixed variable 0 with value true],
  [], // valuated variables
  [
    Constraint(!0, 1, 2),
    Constraint(0, !1, 2),
    Constraint(0, 1, !2),
    Constraint(!0, !1, !2)
  ],
  [] // bound constraints
)

Now, we want to iterate over variables until the list is empty. For each iteration, we want to use disjunction to split into two subtrees:

template<typename Head, typename Tail, typename BoundVariables, typename Constraints, typename BoundConstraints>
struct Model<Typelist<Head, Tail>, BoundVariables, Constraints, BoundConstraints> {
	typedef typename Disjunction<
		typename Model<Tail, Typelist<typename Head::value1, BoundVariables>, Constraints, BoundConstraints>::Result,
		typename Model<Tail, Typelist<typename Head::value2, BoundVariables>, Constraints, BoundConstraints>::Result
	>::Result Result;
};

So, the very first step of evaluation will take first variable and generate this first subtree:

Model(
  [fixed variable 1 with value false, fixed variable 0 with value true],
  [bound variable 2 with value false], // valuated variables
  [
    Constraint(!0, 1, 2),
    Constraint(0, !1, 2),
    Constraint(0, 1, !2),
    Constraint(!0, !1, !2)
  ],
  [] // bound constraints
)

You can see that we removed one unbound variable (from the first list) and added one bound variable (to the second list). We also need to do the same for the other possible value of the variable, so we generate another subtree:

Model(
  [fixed variable 1 with value false, fixed variable 0 with value true],
  [bound variable 2 with value true], // valuated variables
  [
    Constraint(!0, 1, 2),
    Constraint(0, !1, 2),
    Constraint(0, 1, !2),
    Constraint(!0, !1, !2)
  ],
  [] // bound constraints
)

Next steps are similar, but both generated subtrees will be the same (as variables are fixed). Ultimately, we end up with something like this:

Model(
  [],
  [variable 0 with value true, variable 1 with value false, variable 2 with value false], // valuated variables
  [
    Constraint(!0, 1, 2),
    Constraint(0, !1, 2),
    Constraint(0, 1, !2),
    Constraint(!0, !1, !2)
  ],
  [] // bound constraints
)

We bound all the variables. Now, we need to do the same for constraints:

template<typename BoundVariables, typename Head, typename Tail, typename BoundConstraints>
struct Model<NullType, BoundVariables, Typelist<Head, Tail>, BoundConstraints> {
	typedef typename Model<NullType, BoundVariables, Tail, Typelist<BoundConstraint<Head, BoundVariables>, BoundConstraints> >::Result Result;
};

In the first step, we take the first constraint and bound it with the variables to get something like this:

Model(
  [],
  [bound variable 0 with value true, bound variable 1 with value false, bound variable 2 with value false], // valuated variables
  [    
    Constraint(0, !1, 2),
    Constraint(0, 1, !2),
    Constraint(!0, !1, !2)
  ],
  [
    BoundConstraint(!0, 1, 2) with bound variables(true, false, false),
  ] // bound constraints
)

We do the same for all the constraints, and finally end up with this:

Model(
  [],
  [bound variable 0 with value true, bound variable 1 with value false, bound variable 2 with value false], // valuated variables
  [],
  [
    BoundConstraint(0, !1, 2) with bound variables(true, false, false),
    BoundConstraint(0, 1, !2) with bound variables(true, false, false),
    BoundConstraint(!0, !1, !2) with bound variables(true, false, false),
    BoundConstraint(!0, 1, 2) with bound variables(true, false, false),
  ] // bound constraints
)

We repeat a lot, but that’s not a problem.

Finally, we can start evaluating the constraints:

template<typename BoundVariables, typename Head, typename Tail>
struct Model<NullType, BoundVariables, NullType, Typelist<Head, Tail> > {
	typedef typename Conjunction<
		typename Head::value,
		typename Model<NullType, BoundVariables, NullType, Tail>::Result
	>::Result Result;
};

We unwrap the first constraint and turn it into a conjunction. Effectively, we’ll build a structure of this:

Conjunction(BoundConstraint(0, !1, 2) with bound variables(true, false, false), Conjunction(BoundConstraint(0, 1, !2) with bound variables(true, false, false), Conjunction(BoundConstraint(!0, !1, !2) with bound variables(true, false, false), Conjunction(BoundConstraint(!0, 1, 2) with bound variables(true, false, false), Model([],  [bound variable 0 with value true, bound variable 1 with value false, bound variable 2 with value false], [], [])))))

And now we can calculate the conjunctions. If the second parameter is empty model, then we simply return bound variables:

template<typename BoundVariables>
struct Model<NullType, BoundVariables, NullType, NullType>{
	typedef BoundVariables Result;
};

Now, we need to refer back to the definition of conjunction we already saw:

template<typename BoundVariables> struct Conjunction<LiteralFalse, BoundVariables> { typedef NullType Result; };
template<typename BoundVariables> struct Conjunction<LiteralTrue, BoundVariables> { typedef BoundVariables Result; };

You can see that we’ll get some variables as a second parameter, and we’ll need to evaluate the first parameter which is BoundConstraint. We already saw how it’s done:

template<int A, int B, int C, typename BoundVariables>
struct BoundConstraint<Constraint<Negated<Literal<A> >, Id<Literal<B> >, Id<Literal<C> > >, BoundVariables> {
	typedef typename ThreeClause<
		typename Negated<typename TypeAt<BoundVariables, A>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, B>::Result>::Result,
		typename Id<typename TypeAt<BoundVariables, C>::Result>::Result
	>::value value;
};

And here is where the magic happens. We extract variables from the list, we negate them accordingly, and then use the 3CNF logic:

template<> struct ThreeClause<LiteralFalse, LiteralFalse, LiteralFalse> { typedef LiteralFalse value; };

And we can see this constraint isn’t met. We’ll therefore return LiteralFalse back to Conjunction above and this will in turn return NullType up the tree. You can trace the recursive calls further to figure out what happens next.

Printing

Last thing is printing. It’s rather straighforward:

template<typename T> struct Print {};
template<typename Head, typename Tail>
struct Print<Typelist<Head, Tail> >{
	void print(){
		std::cout<<Head::value<<std::endl;
		Print<Tail>().print();
	}
};
template<>
struct Print<NullType>{
	void print(){
		std::cout<<"NullType"<<std::endl;
	}
};

This simply prints the structure. The solution to SAT is calculated in the compilation time. Printing happens in runtime, so you need to run the application to get the precalculated result.

Summary

We already saw how to reduce ILP to 0-1 ILP, then to 3CNF SAT, and now we can calculate that using C++ templates. Therefore, we can solve our ILP problems using C++ compiler.

]]>
https://blog.adamfurmanek.pl/2025/07/25/sat-part-4/feed/ 0
.NET Inside Out Part 30 – Conditional types in C# https://blog.adamfurmanek.pl/2023/01/20/net-inside-out-part-30-conditional-types-in-c/ https://blog.adamfurmanek.pl/2023/01/20/net-inside-out-part-30-conditional-types-in-c/#respond Fri, 20 Jan 2023 09:00:35 +0000 https://blog.adamfurmanek.pl/?p=4829 Continue reading .NET Inside Out Part 30 – Conditional types in C#]]>

This is the thirtieth part of the .NET Inside Out series. For your convenience you can find other parts in the table of contents in Part 1 – Virtual and non-virtual calls in C#

TypeScript has a very nice feature called conditional types. Can we mimic something like this in C#? Let’s say that we have one API endpoint that should return different output depending on the type of the input.

We can start with the following code:

using System;
					
public class Program
{
	public static void Main()
	{
		Console.WriteLine(GetForInput(new ScenarioAInput()).GetType());
		Console.WriteLine(GetForInput(new ScenarioBInput()).GetType());
	}
	
	public static Output<T> GetForInput<T>(Input<T> input) where T: Scenario{
		if(typeof(Input<ScenarioA>).IsAssignableFrom(input.GetType())){
			return (Output<T>)(object)GetForInputA((ScenarioAInput)(object)input);
		}
		if(typeof(Input<ScenarioB>).IsAssignableFrom(input.GetType())){
			return (Output<T>)(object)GetForInputB((ScenarioBInput)(object)input);
		}
		
		throw new Exception("Bang!");
	}
	
	public static ScenarioAOutput GetForInputA(ScenarioAInput input){
		return new ScenarioAOutput();
	}
	
	public static ScenarioBOutput GetForInputB(ScenarioBInput input){
		return new ScenarioBOutput();
	}
}

public abstract class Scenario {
}

public class ScenarioA : Scenario{
}

public class ScenarioB: Scenario{
}

public abstract class Input<T> where T: Scenario{
}

public class ScenarioAInput : Input<ScenarioA> {
}

public class ScenarioBInput : Input<ScenarioB> {
}

public abstract class Output<T> where T: Scenario{
}

public class ScenarioAOutput: Output<ScenarioA> {
}

public class ScenarioBOutput: Output<ScenarioB>{
}

This works but is a little bit cumbersome. When we need to add new implementation, we need to add another if condition. Can we do better? Well, a little:

using System;
using System.Linq;
using System.Collections.Generic;
					
public class Program
{
	public static void Main()
	{
		Console.WriteLine(GetForInput(new ScenarioAInput()).GetType());
		Console.WriteLine(GetForInput(new ScenarioBInput()).GetType());
	}
	
	public static Output<T> GetForInput<T>(Input<T> input) where T: Scenario<T> {
		List<Scenario> implementations = new List<Scenario>(){
			new ScenarioA(),
			new ScenarioB()
		};
		
		foreach(var implementation in implementations){
			if(implementation.GetType().GetInterfaces().First().GetGenericArguments().First() == typeof(T)){
				return ((Scenario<T>)implementation).Calculate(input);
			}
		}
		
		throw new Exception("Didn't work");
	}
}

public interface Scenario{
}

public interface Scenario<T> : Scenario where T: Scenario<T> {
	Output<T> Calculate(Input<T> input);
}

public class ScenarioA : Scenario<ScenarioA> {
	public Output<ScenarioA> Calculate(Input<ScenarioA> input){
		return new ScenarioAOutput();
	}
}

public class ScenarioB: Scenario<ScenarioB> {
	public Output<ScenarioB> Calculate(Input<ScenarioB> input){
		return new ScenarioBOutput();
	}
}

public abstract class Input<T> where T: Scenario<T> {
}

public class ScenarioAInput : Input<ScenarioA> {
}

public class ScenarioBInput : Input<ScenarioB> {
}

public abstract class Output<T> where T: Scenario<T> {
}

public class ScenarioAOutput: Output<ScenarioA> {
}

public class ScenarioBOutput: Output<ScenarioB> {
}

Now adding new type requires adding new instance to the collection. However, this time we can scan all the types and crate them on the fly as needed with DI or whatever other mechanism.

]]>
https://blog.adamfurmanek.pl/2023/01/20/net-inside-out-part-30-conditional-types-in-c/feed/ 0
Async Wandering Part 14 — Async with Fibers reimplemented in .NET Core https://blog.adamfurmanek.pl/2022/07/02/async-wandering-part-14/ https://blog.adamfurmanek.pl/2022/07/02/async-wandering-part-14/#respond Sat, 02 Jul 2022 08:00:21 +0000 https://blog.adamfurmanek.pl/?p=4544 Continue reading Async Wandering Part 14 — Async with Fibers reimplemented in .NET Core]]>

This is the fourteenth part of the Async Wandering series. For your convenience you can find other parts in the table of contents in Part 1 – Why creating Form from WinForms in unit tests breaks async?

We already implemented Loom in C# with Fibers. This time, a piece of code which uses P/Invoke (instead of C++/CLI) and works in .NET 5 (on Windows).

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Threading;

namespace AsyncWithFibers
{
    class Program
    {
        static void Main(string[] args)
        {
            HKTMonadFiberAsync.Run();
        }

        public static void WhereAmI(string what)
        {
            Console.WriteLine($"Thread {Thread.CurrentThread.ManagedThreadId} Time {DateTime.Now}: {what}");
        }
    }

    public class FiberHelper
    {
        [DllImport("kernel32.dll")]
        static extern IntPtr ConvertThreadToFiber(IntPtr lpParameter);

        [DllImport("kernel32.dll")]
        static extern IntPtr CreateFiber(uint dwStackSize, IntPtr lpStartAddress, IntPtr lpParameter);

        [DllImport("kernel32.dll")]
        static extern IntPtr SwitchToFiber(IntPtr lpStartAddress);

        [DllImport("kernel32.dll")]
        static extern IntPtr DeleteFiber(IntPtr lpStartAddress);

        Dictionary<int, IntPtr> actions;
        public IntPtr fiberCaller;

        public FiberHelper()
        {
            actions = new Dictionary<int, IntPtr>();
        }

        public void Convert()
        {
            actions.Add(0, ConvertThreadToFiber(IntPtr.Zero));
        }

        public void Create(int action)
        {
            actions.Add(action, CreateFiber(1024 * 1024, fiberCaller, (IntPtr)action));
        }

        public void Switch(int action)
        {
            Thread.Sleep(100);
            IntPtr param = actions[action];
            SwitchToFiber(param);
        }

        public void Delete(int action)
        {
            DeleteFiber(actions[action]);
        }
    }

    public class HKTMonadFiberAsync
    {
        public static ConcurrentDictionary<int, byte> readyToGo = new ConcurrentDictionary<int, byte>();
        public static ConcurrentDictionary<int, Action> allJobs = new ConcurrentDictionary<int, Action>();
        public static FiberHelper helper = new FiberHelper();
        public static int current;
        public static bool done;

        public static int StartFiber(int actionId)
        {
            allJobs[actionId]();
            if (actionId != 0)
            {
                HKTMonadFiberAsync.done = true;
                HKTMonadFiberAsync.helper.Switch(0);
            }

            return 0;
        }

        delegate int StartFiberDelegate(int actionId);

        public static void Run()
        {
            helper.fiberCaller = Marshal.GetFunctionPointerForDelegate((StartFiberDelegate)StartFiber);

            helper.Convert();

            allJobs.TryAdd(1, RunInternal);
            readyToGo.TryAdd(1, 0);
            helper.Create(1);

            allJobs.TryAdd(2, SideJob);
            readyToGo.TryAdd(2, 0);
            helper.Create(2);


            while (true)
            {
                done = false;
                var keys = readyToGo.Keys.GetEnumerator();
                while (keys.MoveNext())
                {
                    current = keys.Current;
                    helper.Switch(current);
                    if (done)
                    {
                        helper.Delete(current);
                        Action action;
                        allJobs.TryRemove(current, out action);
                        byte b;
                        readyToGo.TryRemove(current, out b);
                    }
                }

                if (allJobs.IsEmpty)
                {
                    break;
                }

                Thread.Sleep(1);
            }
        }

        private static void RunInternal()
        {
            Program.WhereAmI("\tBefore nesting");

            RunInternalNested<AsyncBuilder>();
            //RunInternalNested<IdBuilder>();

            Program.WhereAmI("\tAfter nesting");
        }

        private static void RunInternalNested<T>() where T : Builder, new()
        {
            Program.WhereAmI("\t\tBefore creating delay");

            Delay<T>(2000);

            Program.WhereAmI("\t\tAfter sleeping");

            var data = Data<T, string>("Some string");

            Program.WhereAmI($"\t\tAfter creating data {data}");
        }

        private static void Delay<T>(int timeout) where T : Builder, new()
        {
            var context = new T().Build<object>();
            var timer = new Timer(_ => context.Complete(new object()), null, timeout, Timeout.Infinite);
            GC.KeepAlive(timer);
            context.Map((object)null, _ => timeout);
        }

        private static U Data<T, U>(U d) where T : Builder, new()
        {
            var context = new T().Build<U>();
            return context.Map(d, _ => d);
        }

        private static void SideJob()
        {
            Program.WhereAmI("\tSide job");
        }
    }

    public abstract class Builder
    {
        public abstract Monad<T> Build<T>();
    }

    public class IdBuilder : Builder
    {
        public override Monad<T> Build<T>()
        {
            return new Id<T>();
        }
    }

    public class AsyncBuilder : Builder
    {
        public override Monad<T> Build<T>()
        {
            return new Async<T>();
        }
    }

    public interface Monad<T>
    {
        U Map<U>(T value, Func<T, U> lambda);
        void Complete(T t);
    }

    public class Id<T> : Monad<T>
    {
        private T t;

        public U Map<U>(T value, Func<T, U> lambda)
        {
            this.t = value;
            lock (this)
            {
                while (t == null)
                {
                    Monitor.Wait(this);
                }
            }

            return lambda(this.t);
        }

        public void Complete(T t)
        {
            lock (this)
            {
                this.t = t;
                Monitor.PulseAll(this);
            }
        }
    }

    public class Async<T> : Monad<T>
    {
        private T t;
        private int current;

        public U Map<U>(T value, Func<T, U> lambda)
        {
            this.t = value;
            if (t == null)
            {
                this.current = HKTMonadFiberAsync.current;
                byte b;
                HKTMonadFiberAsync.readyToGo.TryRemove(this.current, out b);
                HKTMonadFiberAsync.helper.Switch(0);
            }

            return lambda(this.t);
        }

        public void Complete(T t)
        {
            this.t = t;
            HKTMonadFiberAsync.readyToGo.TryAdd(this.current, 0);
        }
    }
}

Crucial differences are in lines 22-65. We use P/Invoke and pass things as IntPtrs so they work in x86 and in x64. Also, notice the Marshal.GetFunctionPointerForDelegate in line 91 which allows us to call the managed code from the WinAPI.

]]>
https://blog.adamfurmanek.pl/2022/07/02/async-wandering-part-14/feed/ 0
.NET Inside Out Part 29 – Terminating some existing thread with jumps https://blog.adamfurmanek.pl/2022/03/26/net-inside-out-part-29/ https://blog.adamfurmanek.pl/2022/03/26/net-inside-out-part-29/#respond Sat, 26 Mar 2022 09:00:06 +0000 https://blog.adamfurmanek.pl/?p=4413 Continue reading .NET Inside Out Part 29 – Terminating some existing thread with jumps]]>

This is the twentieth ninth part of the .NET Inside Out series. For your convenience you can find other parts in the table of contents in Part 1 – Virtual and non-virtual calls in C#

Last time we discussed ways to terminate a thread. We mentioned a method based on unwinding the stack and preserving the registers. Today we’re going to implement this solution. I’m using .NET 5 on Windows 10 x64.

We start with the following code:

using System;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading;

namespace RerouteThreadWithDebugger
{
    class Program
    {
        static long[] registersHolderArray = new long[9];
        static int[] entryPointHolderArray = new int[3];

        static void Main(string[] args)
        {
            HackEntryPoint();

            var infinite = new Thread(InfiniteThread);
            infinite.Start();
            Thread.Sleep(2000);

            Console.WriteLine("Doing magic");

            TerminateThread(infinite);

            infinite.Join();
            Console.WriteLine("Finished!");
        }

        static void InfiniteThread()
        {
            while (true)
            {
                Console.WriteLine($"{Thread.CurrentThread.ManagedThreadId} - Still looping");
                Thread.Sleep(1000);
            }
        }

        //...
    }
}

We create a thread which runs forever and we want to terminate it.

First, we need to modify the entry point of the thread and capture registers once it starts. Let’s do it:

static void HackEntryPoint()
{
	// Get address of array to store registers
	var registersArrayAddress = GetArrayPosition(registersHolderArray);

	// Get address of calling the function
	var sourceMethod = typeof(Program).GetMethod(nameof(InfiniteThread), BindingFlags.Static | BindingFlags.NonPublic);
	RuntimeHelpers.PrepareMethod(sourceMethod.MethodHandle);

	IntPtr entryPointAddress = Marshal.ReadIntPtr(sourceMethod.MethodHandle.Value, 2 * IntPtr.Size);
	UnlockPage(entryPointAddress);

	// Preserve preamble of the entry point
	Marshal.Copy(entryPointAddress, entryPointHolderArray, 0, entryPointHolderArray.Length);

	// Create a code for new entry point
	var newConstructor = new byte[0]
		// --------------------------------------------
		.Concat(new byte[] {
			0x48, 0xB8,                                                     // movabs rax, address_of_array
		}).Concat(BitConverter.GetBytes((long)registersArrayAddress)).Concat(new byte[] {
			0x48, 0x89, 0x20,                                               // mov QWORD PTR [rax], rsp
			0x48, 0x89, 0x68, 0x8,                                          // mov QWORD PTR [rax+0x8], rbp
			0x48, 0x89, 0x70, 0x10,                                         // mov QWORD PTR [rax+0x10], rsi
			0x48, 0x89, 0x78, 0x18,                                         // mov QWORD PTR [rax+0x18], rdi
			0x48, 0x89, 0x58, 0x20,                                         // mov QWORD PTR [rax+0x20], rbx
			0x4C, 0x89, 0x60, 0x28,                                         // mov QWORD PTR [rax+0x28], r12
			0x4C, 0x89, 0x68, 0x30,                                         // mov QWORD PTR [rax+0x30], r13
			0x4C, 0x89, 0x70, 0x38,                                         // mov QWORD PTR [rax+0x38], r14
			0x4C, 0x89, 0x78, 0x40,                                         // mov QWORD PTR [rax+0x40], r15
			0x48, 0xB8                                                      // movabs rax, address_of_thread_constructor
		}).Concat(BitConverter.GetBytes((long)entryPointAddress)).Concat(new byte[] {
			0xC7, 0x00                                                      // mov DWORD PTR [rax], 0-4 bytes of constructor function
		}).Concat(BitConverter.GetBytes((int)entryPointHolderArray[0])).Concat(new byte[] {
			0xC7, 0x40, 0x04,                                               // mov DWORD PTR [rax+0x4], 5-8 bytes of constructor function
		}).Concat(BitConverter.GetBytes((int)entryPointHolderArray[1])).Concat(new byte[] {
			0xC7, 0x40, 0x08,                                               // mov DWORD PTR [rax+0x8], 9-12 bytes of constructor function
		}).Concat(BitConverter.GetBytes((int)entryPointHolderArray[2])).Concat(new byte[] {
			0x50,                                                           // push rax
			0xC3                                                            // ret
		}).ToArray();

	var newEntryPointAddress = GetArrayPosition(newConstructor);

	Console.WriteLine($"Holder addres {registersArrayAddress.ToString("X")}\n" +
		$"Entrypoint code {entryPointAddress.ToString("X")}\n" +
		$"New entrypoint code {newEntryPointAddress.ToString("X")}");

	HijackMethod(entryPointAddress, newEntryPointAddress);
}


private static IntPtr GetArrayPosition(object array)
{
	GCHandle handle = GCHandle.Alloc(array, GCHandleType.Pinned);
	return handle.AddrOfPinnedObject();
}


public static void HijackMethod(IntPtr sourceAddress, IntPtr targetAddress)
{
	byte[] instruction = new byte[] {
		0x48, 0xB8 // mov rax <value>
	}
	.Concat(BitConverter.GetBytes((long)targetAddress))
	.Concat(new byte[] {
		0x50, // push rax
		0xC3  // ret
	}).ToArray();

	UnlockPage(sourceAddress);
	UnlockPage(targetAddress);

	Marshal.Copy(instruction, 0, sourceAddress, instruction.Length);
}

[DllImport("kernel32.dll", SetLastError = true)]
static extern bool VirtualProtect(IntPtr lpAddress, uint dwSize, uint flNewProtect, out uint lpflOldProtect);

private static void UnlockPage(IntPtr address)
{
	uint pageSize = 4096;
	uint pageExecuteReadWrite = 0x40;
	VirtualProtect(address, pageSize, pageExecuteReadWrite, out _);
}

static string DBG_PATH = @"PATH TO CDB";
static string LOAD_SOS = @".load PATH TO SOS";

First, we use a helper method GetArrayPosition to extract the address of array’s content. We first use it to get the address of an array to store registers which need to be preserved in x64 calling convention (these are rsp, rbp, rsi, rdi, rbx, r12, r13, r14, r15).

Next, we get the address of the entrypoint of the thread (lines 6-11). This is user’s code.

Now, we need to modify the entrypoint. We want to make it jump to our code so we need an array to hold the preamble. 12 bytes are enough (line 14).

Next, we craft the machine code to create a set jump callsite. Let’s go through it line by line:

Lines 20-21: we store the address of the array holding registers in the rax register so we can use it.
Lines 22-30: we copy registers one by one to the array.
Lines 31-32: we copy the address of the original entrypoint to the rax register so we can restore the entrypoint.
Lines 33-38: we copy 12 bytes from the array to the original entrypoint.
Lines 39-40: we jump back to the original (now restored) entrypoint.

Where do these 12 bytes come from? It’s in the HijackMethod which pushes address of the jump target (lines 62-65) and then jumps (lines 67-68). This is 12 bytes in total.

Okay, we have the entrypoint. We now go to the termination method:

static void TerminateThread(Thread thread)
{
	var processId = Environment.ProcessId;

	var threadsCommand = @$"-p {processId} -noio -logo cdb.log -c """ +
		string.Join("; ",
			LOAD_SOS,
			$@"!threads",
			$@"qd"
		) +
		@$"""";

	var process = Process.Start(
		DBG_PATH,
		threadsCommand
	);
	process.WaitForExit();

	var threadsOutput = File.ReadAllText("cdb.log");

	var nativeThreadToKill = threadsOutput
		.Substring(threadsOutput.IndexOf("Hosted Runtime"))
		.Split("\r", StringSplitOptions.RemoveEmptyEntries).Where(line =>
		{
			var parts = line.Trim().Split(" ", StringSplitOptions.RemoveEmptyEntries);
			return parts.Length > 2 && parts[1] == $"{thread.ManagedThreadId}";
		}).FirstOrDefault().Trim().Split(" ", StringSplitOptions.RemoveEmptyEntries)[0];

	var stackCommand = @$"-p {processId} -noio -logo cdb.log -c """ +
		string.Join("; ",
			LOAD_SOS,
			$@"~{nativeThreadToKill}s",
			$@"!clrstack",
			$@"qd"
		) +
		@$"""";

	process = Process.Start(
		DBG_PATH,
		stackCommand
	);
	process.WaitForExit();

	var stackOutput = File.ReadAllText("cdb.log");

	var stackReturnAddress = stackOutput
		.Split("\r", StringSplitOptions.RemoveEmptyEntries)
		.Where(line => line.Contains("System.Threading.ThreadHelper.ThreadStart_Context"))
		.FirstOrDefault()
		.Trim()
		.Split(" ", StringSplitOptions.RemoveEmptyEntries)[1];

	var rerouteCommand = @$"-p {processId} -noio -c """ +
		string.Join("; ",
			$@"~{nativeThreadToKill}s",
			$@"r rip=0x{stackReturnAddress}",
			$@"r rsp=0x{(registersHolderArray[0] + IntPtr.Size).ToString("X")}",
			$@"r rbp=0x{registersHolderArray[1].ToString("X")}",
			$@"r rsi=0x{registersHolderArray[2].ToString("X")}",
			$@"r rdi=0x{registersHolderArray[3].ToString("X")}",
			$@"r rbx=0x{registersHolderArray[4].ToString("X")}",
			$@"r r12=0x{registersHolderArray[5].ToString("X")}",
			$@"r r13=0x{registersHolderArray[6].ToString("X")}",
			$@"r r14=0x{registersHolderArray[7].ToString("X")}",
			$@"r r15=0x{registersHolderArray[8].ToString("X")}",
			$@"qd"
		) +
		@$"""";

	Process.Start(
		DBG_PATH,
		rerouteCommand
	);
}

First, we attach the debugger and dump threads (lines 5-17). We then extract the native thread id of the thread we want to kill (lines 21-27).

Next, we want to dump the stack to extract the return address (lines 29-51). What we do is we dump the output of !clrstack command, get line showing the ThreadHelper.ThreadStart_Context method which calls our entrypoint, and get its return address.

Finally, we create a command which restores all the addresses. Note that we can hardcode values in the command as we can read them from the array just like that. Notice also that we need to increase the stack pointer to drop the return address of the original entrypoint from it (line 57)

Code works and makes the thread to terminate gracefully. However, it doesn’t call any finally blocks. It just disappears. Combine this with our code for handling stack overflow issue and we now have a pretty robust test runner which can run your unit tests without risking StackOverflowException or other infinite loops.

]]>
https://blog.adamfurmanek.pl/2022/03/26/net-inside-out-part-29/feed/ 0
.NET Inside Out Part 28 – Terminating some existing thread https://blog.adamfurmanek.pl/2022/03/19/net-inside-out-part-28/ https://blog.adamfurmanek.pl/2022/03/19/net-inside-out-part-28/#respond Sat, 19 Mar 2022 09:00:53 +0000 https://blog.adamfurmanek.pl/?p=4410 Continue reading .NET Inside Out Part 28 – Terminating some existing thread]]>

This is the twentieth eighth part of the .NET Inside Out series. For your convenience you can find other parts in the table of contents in Part 1 – Virtual and non-virtual calls in C#

Last time we learned how to reroute an existing thread to some other code. Today we want to terminate the thread.

First method: there is a method Thread.Abort which throws PlatformNotSupportedException starting with .NET Core. Too bad.

Second method: we can go with TerminateThread. The problem is there is no clear equivalent in non-Windows world. It also has some drawback which we’ll cover later.

Third method: we can go with ExitThread or pthread_exit To do that we need to reroute the thread the same way we did in the last part and call this method. However, it suffers from the same issue as method 2 — .NET doesn’t understand what happened. Once we exit the thread this way .NET just cannot handle it correctly anymore. It means that if you try calling terminatedThread.join you get a deadlock. You could work that around by setting IsBackground = true so it doesn’t stop the process from exiting but it’s still bad.

Fourth method: we can simulate exiting. We suspend the thread, take its stack, examine it and exit all the things. That’s nearly impossible as we’d need to analyze the whole code with all dependencies. Even reading the machine code without symbols may be impossible in x86, not to mention that we might need to solve the halting problem.

Fifth method: we unwind the stack and then clear the thread. This is the only working method I’m aware of.

Unwinding stack

First, we need to hijack the thread constructor. Just before it starts executing the thread function we take all important registers (rbp, rsp, rsi, rbx, rdx) and save them on the side (so we do the “set jump” part). Next, we call the thread function.

Next, once we want to stop the thread, we restore all important registers and let it carry on. This way we remove all the stack frames from the stack and continue as if the main function finished.

]]>
https://blog.adamfurmanek.pl/2022/03/19/net-inside-out-part-28/feed/ 0
.NET Inside Out Part 27 – Rerouting a running thread to some other method https://blog.adamfurmanek.pl/2022/03/12/net-inside-out-part-27/ https://blog.adamfurmanek.pl/2022/03/12/net-inside-out-part-27/#respond Sat, 12 Mar 2022 09:00:22 +0000 https://blog.adamfurmanek.pl/?p=4408 Continue reading .NET Inside Out Part 27 – Rerouting a running thread to some other method]]>

This is the twentieth seventh part of the .NET Inside Out series. For your convenience you can find other parts in the table of contents in Part 1 – Virtual and non-virtual calls in C#

Today we are going to grab an existing running thread and make it run some other code.

Let’s take this:

using System;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Threading;

namespace RerouteThreadWithDebugger
{
    class Program
    {
        static void Main(string[] args)
        {
            var infinite = new Thread(() => InfiniteThread());
            infinite.Start();
            Thread.Sleep(2000);
            HackThread(infinite);
            infinite.Join();
            Console.WriteLine("Finished!");
        }

        static void InfiniteThread()
        {
            while (true)
            {
                Console.WriteLine($"{Thread.CurrentThread.ManagedThreadId} - Still looping");
                Thread.Sleep(1000);
            }
        }

        public static void InfiniteThreadReplacement()
        {
            Console.WriteLine($"{Thread.CurrentThread.ManagedThreadId} - Replaced.");
            throw new Exception("Killed");
        }
        //...
    }
}

We have a thread which executes some infinite loop. We’d like to make it jump out of that loop and do something else. How can we do that?

The trick is to modify its instruction pointer register. We can’t do it from the same process directly as we need to debug it so we’ll use CDB for that. Obviously, this can be done manually if needed:

static void HackThread(Thread thread)
{
	var method = typeof(Program).GetMethod(nameof(InfiniteThreadReplacement), BindingFlags.Static | BindingFlags.Public);
	RuntimeHelpers.PrepareMethod(method.MethodHandle);

	var methodAddress = method.MethodHandle.GetFunctionPointer();

	var processId = Environment.ProcessId;

	var threadsCommand = @$"-p {processId} -noio -logo cdb.log -c """ +
		string.Join("; ",
			LOAD_SOS,
			$@"!threads",
			$@"qd"
		) +
		@$"""";
	
	var process = Process.Start(
		DBG_PATH,
		threadsCommand
	);
	process.WaitForExit();

	var threadsOutput = File.ReadAllText("cdb.log");

	var nativeThreadToKill = threadsOutput
		.Substring(threadsOutput.IndexOf("Hosted Runtime"))
		.Split("\r", StringSplitOptions.RemoveEmptyEntries).Where(line =>
	{
		var parts = line.Trim().Split(" ", StringSplitOptions.RemoveEmptyEntries);
		return parts.Length > 2 && parts[1] == $"{thread.ManagedThreadId}";
	}).Select(line => line.Trim().Split(" ", StringSplitOptions.RemoveEmptyEntries)[0])
	.FirstOrDefault();
	
	var rerouteCommand = @$"-p {processId} -noio -c """ +
		string.Join("; ",
			$@"~{nativeThreadToKill}s",
			$@"r rip=0x{methodAddress.ToString("X")}", // Reroute a thread
			$@"qd"
		) +
		@$"""";

	Process.Start(
		DBG_PATH,
		rerouteCommand
	);
}

static string DBG_PATH = @"path_to_cdb.exe";
static string LOAD_SOS = @"path_to_sos.dll";

How does it work? We first run CDB and dump threads to get the native thread id from the output of the !threads command. There are other ways to do so but this one is the most reliable.

Next, we run CDB again. This time it switches to thread, modifies its rip register and then exists.

The problem with this approach is that once we modify the thread it becomes unreliable. We don’t know if the stack is correct or whether we can safely exit the method. Ideally, we’d like to kill the thread. We’ll discuss this aspect in some later part.

]]>
https://blog.adamfurmanek.pl/2022/03/12/net-inside-out-part-27/feed/ 0
Custom memory allocation in C# Part 18 — Hijacking methods on .NET 5 with modifying machine code https://blog.adamfurmanek.pl/2021/12/11/custom-memory-allocation-in-c-part-18/ https://blog.adamfurmanek.pl/2021/12/11/custom-memory-allocation-in-c-part-18/#respond Sat, 11 Dec 2021 09:00:40 +0000 https://blog.adamfurmanek.pl/?p=4272 Continue reading Custom memory allocation in C# Part 18 — Hijacking methods on .NET 5 with modifying machine code]]>

This is the eighteenth part of the Custom memory allocation series. For your convenience you can find other parts in the table of contents in Part 1 — Allocating object on a stack

Today we are going to see a rewritten way of hijacking method with machine code. It works in Windows and Linux, for both Debug and Release using .NET 5.

using System;
using System.Linq;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading;

namespace MethodHijackerNetCore
{
    class Program
    {
        static void Main(string[] args)
        {
            Console.WriteLine($"Calling StaticString method before hacking:\t{TestClass.StaticString()}");
            HijackMethod(typeof(TestClass), nameof(TestClass.StaticString), typeof(Program), nameof(StaticStringHijacked));
            Console.WriteLine($"Calling StaticString method after hacking:\t{TestClass.StaticString()}");

            Console.WriteLine();

            var instance = new TestClass();
            Console.WriteLine($"Calling InstanceString method before hacking:\t{instance.InstanceString()}");
            HijackMethod(typeof(TestClass), nameof(TestClass.InstanceString), typeof(Program), nameof(InstanceStringHijacked));
            Console.WriteLine($"Calling InstanceString method after hacking:\t{instance.InstanceString()}");

            Console.WriteLine();

            Vector2 v = new Vector2(9.856331f, -2.2437377f);
            for (int i = 1; i <= 35 ; i++)
            {
                MultiTieredClass.Test(v, i);
                Thread.Sleep(100);
            }
        }

        [MethodImpl(MethodImplOptions.NoInlining)]
        public static string StaticStringHijacked()
        {
            return "Static string hijacked";
        }

        [MethodImpl(MethodImplOptions.NoInlining)]
        public string InstanceStringHijacked()
        {
            return "Instance string hijacked";
        }

        public static void HijackMethod(Type sourceType, string sourceMethod, Type targetType, string targetMethod)
        {
            var source = sourceType.GetMethod(sourceMethod);
            var target = targetType.GetMethod(targetMethod);

            RuntimeHelpers.PrepareMethod(source.MethodHandle);
            RuntimeHelpers.PrepareMethod(target.MethodHandle);


            var offset = 2 * IntPtr.Size;
            IntPtr sourceAddress = Marshal.ReadIntPtr(source.MethodHandle.Value, offset);
            IntPtr targetAddress = Marshal.ReadIntPtr(target.MethodHandle.Value, offset);

            var is32Bit = IntPtr.Size == 4;
            byte[] instruction;

            if (is32Bit)
            {
                instruction = new byte[] {
                    0x68, // push <value>
                }
                 .Concat(BitConverter.GetBytes((int)targetAddress))
                 .Concat(new byte[] {
                    0xC3 //ret
                 }).ToArray();
            }
            else
            {
                instruction = new byte[] {
                    0x48, 0xB8 // mov rax <value>
                }
                .Concat(BitConverter.GetBytes((long)targetAddress))
                .Concat(new byte[] {
                    0x50, // push rax
                    0xC3  // ret
                }).ToArray();
            }

            Marshal.Copy(instruction, 0, sourceAddress, instruction.Length);
        }
    }

    class TestClass
    {
        [MethodImpl(MethodImplOptions.NoInlining)]
        public static string StaticString()
        {
            return "Static string";
        }

        [MethodImpl(MethodImplOptions.NoInlining)]
        public string InstanceString()
        {
            return "Instance string";
        }
    }

    class MultiTieredClass
    {
        [MethodImpl(MethodImplOptions.NoInlining)]
        public static void Test(Vector2 v, int i)
        {
            v = Vector2.Normalize(v);
            Console.WriteLine($"Vector iteration {i:0000}:\t{v}\t{TestClass.StaticString()}");
        }
    }
}

Tested with .NET 5.0.102 on Windows and .NET 5.0.401 on WSL2 Ubuntu 20.04. This is the output:

Calling StaticString method before hacking:     Static string
Calling StaticString method after hacking:      Static string hijacked

Calling InstanceString method before hacking:   Instance string
Calling InstanceString method after hacking:    Instance string hijacked

Vector iteration 0001:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0002:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0003:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0004:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0005:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0006:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0007:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0008:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0009:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0010:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0011:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0012:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0013:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0014:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0015:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0016:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0017:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0018:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0019:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0020:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0021:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0022:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0023:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0024:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0025:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0026:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0027:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0028:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0029:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0030:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0031:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0032:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0033:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0034:  <0.97505456, -0.22196563>       Static string
Vector iteration 0035:  <0.97505456, -0.22196563>       Static string
Examine MethodDescriptor: 7FFA3ACF5218

So we clearly see that the hacking works and that after multitiered compilation kicks in it no longer calls method but inlines it.

]]>
https://blog.adamfurmanek.pl/2021/12/11/custom-memory-allocation-in-c-part-18/feed/ 0
Custom memory allocation in C# Part 17 — Hijacking methods on .NET 5 with modifying metadata curious thing https://blog.adamfurmanek.pl/2021/12/04/custom-memory-allocation-in-c-part-17/ https://blog.adamfurmanek.pl/2021/12/04/custom-memory-allocation-in-c-part-17/#respond Sat, 04 Dec 2021 09:00:48 +0000 https://blog.adamfurmanek.pl/?p=4270 Continue reading Custom memory allocation in C# Part 17 — Hijacking methods on .NET 5 with modifying metadata curious thing]]>

This is the seventeenth part of the Custom memory allocation series. For your convenience you can find other parts in the table of contents in Part 1 — Allocating object on a stack

I was rewriting my method hijacking samples to .NET 5 and I found an interesting behavior. Let’s take this code:

using System;
using System.Linq;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading;

namespace OverridingSealedMethodNetCore
{
    class Program
    {
        static void Main(string[] args)
        {
            Console.WriteLine($"Calling StaticString method before hacking:\t{TestClass.StaticString()}");
            HijackMethod(typeof(TestClass), nameof(TestClass.StaticString), typeof(Program), nameof(StaticStringHijacked));
            Console.WriteLine($"Calling StaticString method after hacking:\t{TestClass.StaticString()}");

            Console.WriteLine();

            var instance = new TestClass();
            Console.WriteLine($"Calling InstanceString method before hacking:\t{instance.InstanceString()}");
            HijackMethod(typeof(TestClass), nameof(TestClass.InstanceString), typeof(Program), nameof(InstanceStringHijacked));
            Console.WriteLine($"Calling InstanceString method after hacking:\t{instance.InstanceString()}");

            Console.WriteLine();

            Vector2 v = new Vector2(9.856331f, -2.2437377f);
            for (int i = 1; i <= 35; i++)
            {
                MultiTieredClass.Test(v, i);
                Thread.Sleep(100);
            }
        }

        public static void HijackMethod(Type sourceType, string sourceMethod, Type targetType, string targetMethod)
        {
            // Get methods using reflection
            var source = sourceType.GetMethod(sourceMethod);
            var target = targetType.GetMethod(targetMethod);

            // Prepare methods to get machine code (not needed in this example, though)
            RuntimeHelpers.PrepareMethod(source.MethodHandle);
            RuntimeHelpers.PrepareMethod(target.MethodHandle);

            var sourceMethodDescriptorAddress = source.MethodHandle.Value;
            var targetMethodMachineCodeAddress = target.MethodHandle.GetFunctionPointer();

            // Pointer is two pointers from the beginning of the method descriptor
            Marshal.WriteIntPtr(sourceMethodDescriptorAddress, 2 * IntPtr.Size, targetMethodMachineCodeAddress);
        }

        [MethodImpl(MethodImplOptions.NoInlining)]
        public static string StaticStringHijacked()
        {
            return "Static string hijacked";
        }

        [MethodImpl(MethodImplOptions.NoInlining)]
        public string InstanceStringHijacked()
        {
             return "Instance string hijacked";
        }
    }

    class TestClass
    {
        [MethodImpl(MethodImplOptions.NoInlining)]
        public static string StaticString()
        {
            return "Static string";
        }

        [MethodImpl(MethodImplOptions.NoInlining)]
        public string InstanceString()
        {
            return "Instance string";
        }
    }

    class MultiTieredClass
    {
        [MethodImpl(MethodImplOptions.NoInlining)]
        public static void Test(Vector2 v, int i)
        {
            v = Vector2.Normalize(v);
            Console.WriteLine($"Vector iteration {i:0000}:\t{v}\t{TestClass.StaticString()}");
        }
    }
}

If you follow my blog then there is nothing new here. We try to hijack method by modifying its runtime metadata. The MultiTiered part is only to show recompilation of the code. I’m running this on W10 x64 in Release x64 mode and I’m getting this output:

Calling StaticString method before hacking:     Static string
Calling StaticString method after hacking:      Static string

Calling InstanceString method before hacking:   Instance string
Calling InstanceString method after hacking:    Instance string

Vector iteration 0001:  <0.9750545, -0.22196561>        Static string
Vector iteration 0002:  <0.9750545, -0.22196561>        Static string
Vector iteration 0003:  <0.9750545, -0.22196561>        Static string
Vector iteration 0004:  <0.9750545, -0.22196561>        Static string
Vector iteration 0005:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0006:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0007:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0008:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0009:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0010:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0011:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0012:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0013:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0014:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0015:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0016:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0017:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0018:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0019:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0020:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0021:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0022:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0023:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0024:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0025:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0026:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0027:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0028:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0029:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0030:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0031:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0032:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0033:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0034:  <0.9750545, -0.22196561>        Static string hijacked
Vector iteration 0035:  <0.97505456, -0.22196563>       Static string

And this is nice. Notice that first two lines of the output show that even though we hacked the method, we’re still not getting the new behavior. That’s first why.

Next, we see that we start calling the example of multitiered compilation method and first 4 instances are consistent. However, in fifth one we see that a hijacked method was called instead of the original one. That’s second why. This lasts until iteration 35 when multitiered compilation kicks in and recompiles things.

I don’t know the answer why it works this way but I presume there is this new code cache thing which was implemented around .NET Core 2.1 to support multitiered compilation. I may be wrong, though.

]]>
https://blog.adamfurmanek.pl/2021/12/04/custom-memory-allocation-in-c-part-17/feed/ 0
Custom memory allocation in C# Part 16 — Hijacking new on Linux with .NET 5 https://blog.adamfurmanek.pl/2021/09/25/custom-memory-allocation-in-c-part-16/ https://blog.adamfurmanek.pl/2021/09/25/custom-memory-allocation-in-c-part-16/#comments Sat, 25 Sep 2021 08:00:20 +0000 https://blog.adamfurmanek.pl/?p=4019 Continue reading Custom memory allocation in C# Part 16 — Hijacking new on Linux with .NET 5]]>

This is the sixteenth part of the Custom memory allocation series. For your convenience you can find other parts in the table of contents in Part 1 — Allocating object on a stack

I was recently asked if it’s possible to hijack the new operator in Linux. We’ve already seen that we can do it in both .NET Framework and .NET Core, in both x86_32 and x86_64, now it’s time to do it in .NET 5 on Linux.

Docker configuration

I’m going to use Ubuntu 20.04 on WSL2 running on Windows 10:

afish@ubuntu:/home/af$ uname -r
4.19.128-microsoft-standard

I’m going to use Docker for installing .NET and everything around. So I do this to create a container based on .NET 5:

docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --security-opt apparmor=unconfined --rm -i -v /home/afish/makeref:/makeref mcr.microsoft.com/dotnet/sdk:5.0 bash

You can see I’m mapping directory /home/afish/makeref and enabling some security flags to be able to debug application and modify page protection.

lldb and SOS

First thing I want to do is to install lldb and SOS:

apt-get update

Get:1 http://security.debian.org/debian-security buster/updates InRelease [65.4 kB]
Get:2 http://deb.debian.org/debian buster InRelease [121 kB]
Get:3 http://deb.debian.org/debian buster-updates InRelease [51.9 kB]
Get:4 http://security.debian.org/debian-security buster/updates/main amd64 Packages [258 kB]
Get:5 http://deb.debian.org/debian buster/main amd64 Packages [7907 kB]
Get:6 http://deb.debian.org/debian buster-updates/main amd64 Packages [7860 B]
Fetched 8412 kB in 2s (3467 kB/s)
Reading package lists...

apt-get install -y lldb

Reading package lists...
Building dependency tree...
Reading state information...
The following additional packages will be installed:
  binfmt-support bzip2 file libbsd0 libc-dev-bin libc6-dev libedit2 libffi-dev
  libgpm2 liblldb-7 libllvm7 libmagic-mgc libmagic1 libncurses-dev libncurses6
  libpipeline1 libpython-stdlib libpython2-stdlib libpython2.7
  libpython2.7-minimal libpython2.7-stdlib libreadline7 libsqlite3-0
  libtinfo-dev linux-libc-dev lldb-7 llvm-7 llvm-7-dev llvm-7-runtime lsb-base
  manpages manpages-dev mime-support python python-lldb-7 python-minimal
  python-six python2 python2-minimal python2.7 python2.7-minimal
  readline-common xz-utils
Suggested packages:
  bzip2-doc glibc-doc gpm ncurses-doc llvm-7-doc man-browser python-doc
  python-tk python2-doc python2.7-doc binutils readline-doc
The following NEW packages will be installed:
  binfmt-support bzip2 file libbsd0 libc-dev-bin libc6-dev libedit2 libffi-dev
  libgpm2 liblldb-7 libllvm7 libmagic-mgc libmagic1 libncurses-dev libncurses6
  libpipeline1 libpython-stdlib libpython2-stdlib libpython2.7
  libpython2.7-minimal libpython2.7-stdlib libreadline7 libsqlite3-0
  libtinfo-dev linux-libc-dev lldb lldb-7 llvm-7 llvm-7-dev llvm-7-runtime
  lsb-base manpages manpages-dev mime-support python python-lldb-7
  python-minimal python-six python2 python2-minimal python2.7
  python2.7-minimal readline-common xz-utils
0 upgraded, 44 newly installed, 0 to remove and 1 not upgraded.
Need to get 71.3 MB of archives.
After this operation, 369 MB of additional disk space will be used.
Get:1 http://deb.debian.org/debian buster/main amd64 libpython2.7-minimal amd64 2.7.16-2+deb10u1 [395 kB]
Get:2 http://deb.debian.org/debian buster/main amd64 python2.7-minimal amd64 2.7.16-2+deb10u1 [1369 kB]
Get:3 http://deb.debian.org/debian buster/main amd64 python2-minimal amd64 2.7.16-1 [41.4 kB]
Get:4 http://deb.debian.org/debian buster/main amd64 python-minimal amd64 2.7.16-1 [21.0 kB]
Get:5 http://deb.debian.org/debian buster/main amd64 mime-support all 3.62 [37.2 kB]
Get:6 http://deb.debian.org/debian buster/main amd64 readline-common all 7.0-5 [70.6 kB]
Get:7 http://deb.debian.org/debian buster/main amd64 libreadline7 amd64 7.0-5 [151 kB]
Get:8 http://deb.debian.org/debian buster/main amd64 libsqlite3-0 amd64 3.27.2-3+deb10u1 [641 kB]
Get:9 http://deb.debian.org/debian buster/main amd64 libpython2.7-stdlib amd64 2.7.16-2+deb10u1 [1912 kB]
Get:10 http://deb.debian.org/debian buster/main amd64 python2.7 amd64 2.7.16-2+deb10u1 [305 kB]
Get:11 http://deb.debian.org/debian buster/main amd64 libpython2-stdlib amd64 2.7.16-1 [20.8 kB]
Get:12 http://deb.debian.org/debian buster/main amd64 libpython-stdlib amd64 2.7.16-1 [20.8 kB]
Get:13 http://deb.debian.org/debian buster/main amd64 python2 amd64 2.7.16-1 [41.6 kB]
Get:14 http://deb.debian.org/debian buster/main amd64 python amd64 2.7.16-1 [22.8 kB]
Get:15 http://deb.debian.org/debian buster/main amd64 bzip2 amd64 1.0.6-9.2~deb10u1 [48.4 kB]
Get:16 http://deb.debian.org/debian buster/main amd64 libmagic-mgc amd64 1:5.35-4+deb10u1 [242 kB]
Get:17 http://deb.debian.org/debian buster/main amd64 libmagic1 amd64 1:5.35-4+deb10u1 [117 kB]
Get:18 http://deb.debian.org/debian buster/main amd64 file amd64 1:5.35-4+deb10u1 [66.4 kB]
Get:19 http://deb.debian.org/debian buster/main amd64 manpages all 4.16-2 [1295 kB]
Get:20 http://deb.debian.org/debian buster/main amd64 xz-utils amd64 5.2.4-1 [183 kB]
Get:21 http://deb.debian.org/debian buster/main amd64 libpipeline1 amd64 1.5.1-2 [31.2 kB]
Get:22 http://deb.debian.org/debian buster/main amd64 lsb-base all 10.2019051400 [28.4 kB]
Get:23 http://deb.debian.org/debian buster/main amd64 binfmt-support amd64 2.2.0-2 [70.0 kB]
Get:24 http://deb.debian.org/debian buster/main amd64 libbsd0 amd64 0.9.1-2 [99.5 kB]
Get:25 http://deb.debian.org/debian buster/main amd64 libc-dev-bin amd64 2.28-10 [275 kB]
Get:26 http://deb.debian.org/debian buster/main amd64 linux-libc-dev amd64 4.19.160-2 [1416 kB]
Get:27 http://deb.debian.org/debian buster/main amd64 libc6-dev amd64 2.28-10 [2691 kB]
Get:28 http://deb.debian.org/debian buster/main amd64 libedit2 amd64 3.1-20181209-1 [94.0 kB]
Get:29 http://deb.debian.org/debian buster/main amd64 libffi-dev amd64 3.2.1-9 [156 kB]
Get:30 http://deb.debian.org/debian buster/main amd64 libgpm2 amd64 1.20.7-5 [35.1 kB]
Get:31 http://deb.debian.org/debian buster/main amd64 libllvm7 amd64 1:7.0.1-8+deb10u2 [13.1 MB]
Get:32 http://deb.debian.org/debian buster/main amd64 libncurses6 amd64 6.1+20181013-2+deb10u2 [102 kB]
Get:33 http://deb.debian.org/debian buster/main amd64 libpython2.7 amd64 2.7.16-2+deb10u1 [1036 kB]
Get:34 http://deb.debian.org/debian buster/main amd64 liblldb-7 amd64 1:7.0.1-8+deb10u2 [7938 kB]
Get:35 http://deb.debian.org/debian buster/main amd64 libncurses-dev amd64 6.1+20181013-2+deb10u2 [333 kB]
Get:36 http://deb.debian.org/debian buster/main amd64 libtinfo-dev amd64 6.1+20181013-2+deb10u2 [940 B]
Get:37 http://deb.debian.org/debian buster/main amd64 llvm-7-runtime amd64 1:7.0.1-8+deb10u2 [190 kB]
Get:38 http://deb.debian.org/debian buster/main amd64 llvm-7 amd64 1:7.0.1-8+deb10u2 [4554 kB]
Get:39 http://deb.debian.org/debian buster/main amd64 llvm-7-dev amd64 1:7.0.1-8+deb10u2 [21.3 MB]
Get:40 http://deb.debian.org/debian buster/main amd64 python-six all 1.12.0-1 [15.7 kB]
Get:41 http://deb.debian.org/debian buster/main amd64 python-lldb-7 amd64 1:7.0.1-8+deb10u2 [122 kB]
Get:42 http://deb.debian.org/debian buster/main amd64 lldb-7 amd64 1:7.0.1-8+deb10u2 [8459 kB]
Get:43 http://deb.debian.org/debian buster/main amd64 lldb amd64 1:7.0-47 [7176 B]
Get:44 http://deb.debian.org/debian buster/main amd64 manpages-dev all 4.16-2 [2232 kB]
debconf: delaying package configuration, since apt-utils is not installed
Fetched 71.3 MB in 2s (29.9 MB/s)
Selecting previously unselected package libpython2.7-minimal:amd64.
(Reading database ... 9877 files and directories currently installed.)
Preparing to unpack .../00-libpython2.7-minimal_2.7.16-2+deb10u1_amd64.deb ...
Unpacking libpython2.7-minimal:amd64 (2.7.16-2+deb10u1) ...
Selecting previously unselected package python2.7-minimal.
Preparing to unpack .../01-python2.7-minimal_2.7.16-2+deb10u1_amd64.deb ...
Unpacking python2.7-minimal (2.7.16-2+deb10u1) ...
Selecting previously unselected package python2-minimal.
Preparing to unpack .../02-python2-minimal_2.7.16-1_amd64.deb ...
Unpacking python2-minimal (2.7.16-1) ...
Selecting previously unselected package python-minimal.
Preparing to unpack .../03-python-minimal_2.7.16-1_amd64.deb ...
Unpacking python-minimal (2.7.16-1) ...
Selecting previously unselected package mime-support.
Preparing to unpack .../04-mime-support_3.62_all.deb ...
Unpacking mime-support (3.62) ...
Selecting previously unselected package readline-common.
Preparing to unpack .../05-readline-common_7.0-5_all.deb ...
Unpacking readline-common (7.0-5) ...
Selecting previously unselected package libreadline7:amd64.
Preparing to unpack .../06-libreadline7_7.0-5_amd64.deb ...
Unpacking libreadline7:amd64 (7.0-5) ...
Selecting previously unselected package libsqlite3-0:amd64.
Preparing to unpack .../07-libsqlite3-0_3.27.2-3+deb10u1_amd64.deb ...
Unpacking libsqlite3-0:amd64 (3.27.2-3+deb10u1) ...
Selecting previously unselected package libpython2.7-stdlib:amd64.
Preparing to unpack .../08-libpython2.7-stdlib_2.7.16-2+deb10u1_amd64.deb ...
Unpacking libpython2.7-stdlib:amd64 (2.7.16-2+deb10u1) ...
Selecting previously unselected package python2.7.
Preparing to unpack .../09-python2.7_2.7.16-2+deb10u1_amd64.deb ...
Unpacking python2.7 (2.7.16-2+deb10u1) ...
Selecting previously unselected package libpython2-stdlib:amd64.
Preparing to unpack .../10-libpython2-stdlib_2.7.16-1_amd64.deb ...
Unpacking libpython2-stdlib:amd64 (2.7.16-1) ...
Selecting previously unselected package libpython-stdlib:amd64.
Preparing to unpack .../11-libpython-stdlib_2.7.16-1_amd64.deb ...
Unpacking libpython-stdlib:amd64 (2.7.16-1) ...
Setting up libpython2.7-minimal:amd64 (2.7.16-2+deb10u1) ...
Setting up python2.7-minimal (2.7.16-2+deb10u1) ...
Linking and byte-compiling packages for runtime python2.7...
Setting up python2-minimal (2.7.16-1) ...
Selecting previously unselected package python2.
(Reading database ... 10694 files and directories currently installed.)
Preparing to unpack .../python2_2.7.16-1_amd64.deb ...
Unpacking python2 (2.7.16-1) ...
Setting up python-minimal (2.7.16-1) ...
Selecting previously unselected package python.
(Reading database ... 10727 files and directories currently installed.)
Preparing to unpack .../00-python_2.7.16-1_amd64.deb ...
Unpacking python (2.7.16-1) ...
Selecting previously unselected package bzip2.
Preparing to unpack .../01-bzip2_1.0.6-9.2~deb10u1_amd64.deb ...
Unpacking bzip2 (1.0.6-9.2~deb10u1) ...
Selecting previously unselected package libmagic-mgc.
Preparing to unpack .../02-libmagic-mgc_1%3a5.35-4+deb10u1_amd64.deb ...
Unpacking libmagic-mgc (1:5.35-4+deb10u1) ...
Selecting previously unselected package libmagic1:amd64.
Preparing to unpack .../03-libmagic1_1%3a5.35-4+deb10u1_amd64.deb ...
Unpacking libmagic1:amd64 (1:5.35-4+deb10u1) ...
Selecting previously unselected package file.
Preparing to unpack .../04-file_1%3a5.35-4+deb10u1_amd64.deb ...
Unpacking file (1:5.35-4+deb10u1) ...
Selecting previously unselected package manpages.
Preparing to unpack .../05-manpages_4.16-2_all.deb ...
Unpacking manpages (4.16-2) ...
Selecting previously unselected package xz-utils.
Preparing to unpack .../06-xz-utils_5.2.4-1_amd64.deb ...
Unpacking xz-utils (5.2.4-1) ...
Selecting previously unselected package libpipeline1:amd64.
Preparing to unpack .../07-libpipeline1_1.5.1-2_amd64.deb ...
Unpacking libpipeline1:amd64 (1.5.1-2) ...
Selecting previously unselected package lsb-base.
Preparing to unpack .../08-lsb-base_10.2019051400_all.deb ...
Unpacking lsb-base (10.2019051400) ...
Selecting previously unselected package binfmt-support.
Preparing to unpack .../09-binfmt-support_2.2.0-2_amd64.deb ...
Unpacking binfmt-support (2.2.0-2) ...
Selecting previously unselected package libbsd0:amd64.
Preparing to unpack .../10-libbsd0_0.9.1-2_amd64.deb ...
Unpacking libbsd0:amd64 (0.9.1-2) ...
Selecting previously unselected package libc-dev-bin.
Preparing to unpack .../11-libc-dev-bin_2.28-10_amd64.deb ...
Unpacking libc-dev-bin (2.28-10) ...
Selecting previously unselected package linux-libc-dev:amd64.
Preparing to unpack .../12-linux-libc-dev_4.19.160-2_amd64.deb ...
Unpacking linux-libc-dev:amd64 (4.19.160-2) ...
Selecting previously unselected package libc6-dev:amd64.
Preparing to unpack .../13-libc6-dev_2.28-10_amd64.deb ...
Unpacking libc6-dev:amd64 (2.28-10) ...
Selecting previously unselected package libedit2:amd64.
Preparing to unpack .../14-libedit2_3.1-20181209-1_amd64.deb ...
Unpacking libedit2:amd64 (3.1-20181209-1) ...
Selecting previously unselected package libffi-dev:amd64.
Preparing to unpack .../15-libffi-dev_3.2.1-9_amd64.deb ...
Unpacking libffi-dev:amd64 (3.2.1-9) ...
Selecting previously unselected package libgpm2:amd64.
Preparing to unpack .../16-libgpm2_1.20.7-5_amd64.deb ...
Unpacking libgpm2:amd64 (1.20.7-5) ...
Selecting previously unselected package libllvm7:amd64.
Preparing to unpack .../17-libllvm7_1%3a7.0.1-8+deb10u2_amd64.deb ...
Unpacking libllvm7:amd64 (1:7.0.1-8+deb10u2) ...
Selecting previously unselected package libncurses6:amd64.
Preparing to unpack .../18-libncurses6_6.1+20181013-2+deb10u2_amd64.deb ...
Unpacking libncurses6:amd64 (6.1+20181013-2+deb10u2) ...
dSelecting previously unselected package libpython2.7:amd64.
Preparing to unpack .../19-libpython2.7_2.7.16-2+deb10u1_amd64.deb ...
Unpacking libpython2.7:amd64 (2.7.16-2+deb10u1) ...
irSelecting previously unselected package liblldb-7.
Preparing to unpack .../20-liblldb-7_1%3a7.0.1-8+deb10u2_amd64.deb ...
Unpacking liblldb-7 (1:7.0.1-8+deb10u2) ...
Selecting previously unselected package libncurses-dev:amd64.
Preparing to unpack .../21-libncurses-dev_6.1+20181013-2+deb10u2_amd64.deb ...
Unpacking libncurses-dev:amd64 (6.1+20181013-2+deb10u2) ...
Selecting previously unselected package libtinfo-dev:amd64.
Preparing to unpack .../22-libtinfo-dev_6.1+20181013-2+deb10u2_amd64.deb ...
Unpacking libtinfo-dev:amd64 (6.1+20181013-2+deb10u2) ...
Selecting previously unselected package llvm-7-runtime.
Preparing to unpack .../23-llvm-7-runtime_1%3a7.0.1-8+deb10u2_amd64.deb ...
Unpacking llvm-7-runtime (1:7.0.1-8+deb10u2) ...
Selecting previously unselected package llvm-7.
Preparing to unpack .../24-llvm-7_1%3a7.0.1-8+deb10u2_amd64.deb ...
Unpacking llvm-7 (1:7.0.1-8+deb10u2) ...
Selecting previously unselected package llvm-7-dev.
Preparing to unpack .../25-llvm-7-dev_1%3a7.0.1-8+deb10u2_amd64.deb ...
Unpacking llvm-7-dev (1:7.0.1-8+deb10u2) ...
Selecting previously unselected package python-six.
Preparing to unpack .../26-python-six_1.12.0-1_all.deb ...
Unpacking python-six (1.12.0-1) ...
Selecting previously unselected package python-lldb-7.
Preparing to unpack .../27-python-lldb-7_1%3a7.0.1-8+deb10u2_amd64.deb ...
Unpacking python-lldb-7 (1:7.0.1-8+deb10u2) ...
Selecting previously unselected package lldb-7.
Preparing to unpack .../28-lldb-7_1%3a7.0.1-8+deb10u2_amd64.deb ...
Unpacking lldb-7 (1:7.0.1-8+deb10u2) ...
Selecting previously unselected package lldb.
Preparing to unpack .../29-lldb_1%3a7.0-47_amd64.deb ...
Unpacking lldb (1:7.0-47) ...
Selecting previously unselected package manpages-dev.
Preparing to unpack .../30-manpages-dev_4.16-2_all.deb ...
Unpacking manpages-dev (4.16-2) ...
Setting up libpipeline1:amd64 (1.5.1-2) ...
Setting up lsb-base (10.2019051400) ...
Setting up libgpm2:amd64 (1.20.7-5) ...
Setting up mime-support (3.62) ...
Setting up libmagic-mgc (1:5.35-4+deb10u1) ...
Setting up manpages (4.16-2) ...
Setting up libsqlite3-0:amd64 (3.27.2-3+deb10u1) ...
Setting up libmagic1:amd64 (1:5.35-4+deb10u1) ...
Setting up linux-libc-dev:amd64 (4.19.160-2) ...
Setting up file (1:5.35-4+deb10u1) ...
Setting up bzip2 (1.0.6-9.2~deb10u1) ...
Setting up libffi-dev:amd64 (3.2.1-9) ...
Setting up libncurses6:amd64 (6.1+20181013-2+deb10u2) ...
Setting up xz-utils (5.2.4-1) ...
update-alternatives: using /usr/bin/xz to provide /usr/bin/lzma (lzma) in auto mode
update-alternatives: warning: skip creation of /usr/share/man/man1/lzma.1.gz because associated file /usr/share/man/man1/xz.1.gz (of link group lzma) doesn't exist
update-alternatives: warning: skip creation of /usr/share/man/man1/unlzma.1.gz because associated file /usr/share/man/man1/unxz.1.gz (of link group lzma) doesn't exist
update-alternatives: warning: skip creation of /usr/share/man/man1/lzcat.1.gz because associated file /usr/share/man/man1/xzcat.1.gz (of link group lzma) doesn't exist
update-alternatives: warning: skip creation of /usr/share/man/man1/lzmore.1.gz because associated file /usr/share/man/man1/xzmore.1.gz (of link group lzma) doesn't exist
update-alternatives: warning: skip creation of /usr/share/man/man1/lzless.1.gz because associated file /usr/share/man/man1/xzless.1.gz (of link group lzma) doesn't exist
update-alternatives: warning: skip creation of /usr/share/man/man1/lzdiff.1.gz because associated file /usr/share/man/man1/xzdiff.1.gz (of link group lzma) doesn't exist
update-alternatives: warning: skip creation of /usr/share/man/man1/lzcmp.1.gz because associated file /usr/share/man/man1/xzcmp.1.gz (of link group lzma) doesn't exist
update-alternatives: warning: skip creation of /usr/share/man/man1/lzgrep.1.gz because associated file /usr/share/man/man1/xzgrep.1.gz (of link group lzma) doesn't exist
update-alternatives: warning: skip creation of /usr/share/man/man1/lzegrep.1.gz because associated file /usr/share/man/man1/xzegrep.1.gz (of link group lzma) doesn't exist
update-alternatives: warning: skip creation of /usr/share/man/man1/lzfgrep.1.gz because associated file /usr/share/man/man1/xzfgrep.1.gz (of link group lzma) doesn't exist
Setting up binfmt-support (2.2.0-2) ...
invoke-rc.d: could not determine current runlevel
invoke-rc.d: policy-rc.d denied execution of start.
Setting up libc-dev-bin (2.28-10) ...
Setting up libbsd0:amd64 (0.9.1-2) ...
Setting up readline-common (7.0-5) ...
Setting up libreadline7:amd64 (7.0-5) ...
Setting up manpages-dev (4.16-2) ...
Setting up libedit2:amd64 (3.1-20181209-1) ...
Setting up libpython2.7-stdlib:amd64 (2.7.16-2+deb10u1) ...
Setting up libllvm7:amd64 (1:7.0.1-8+deb10u2) ...
Setting up libc6-dev:amd64 (2.28-10) ...
Setting up libpython2.7:amd64 (2.7.16-2+deb10u1) ...
Setting up libncurses-dev:amd64 (6.1+20181013-2+deb10u2) ...
Setting up llvm-7-runtime (1:7.0.1-8+deb10u2) ...
Setting up python2.7 (2.7.16-2+deb10u1) ...
Setting up llvm-7 (1:7.0.1-8+deb10u2) ...
Setting up libpython2-stdlib:amd64 (2.7.16-1) ...
Setting up python2 (2.7.16-1) ...
Setting up libpython-stdlib:amd64 (2.7.16-1) ...
Setting up python (2.7.16-1) ...
Setting up libtinfo-dev:amd64 (6.1+20181013-2+deb10u2) ...
Setting up liblldb-7 (1:7.0.1-8+deb10u2) ...
Setting up llvm-7-dev (1:7.0.1-8+deb10u2) ...
Setting up python-six (1.12.0-1) ...
Setting up python-lldb-7 (1:7.0.1-8+deb10u2) ...
Setting up lldb-7 (1:7.0.1-8+deb10u2) ...
Setting up lldb (1:7.0-47) ...
Processing triggers for libc-bin (2.28-10) ...

dotnet tool install --global dotnet-sos

Tools directory '/root/.dotnet/tools' is not currently on the PATH environment variable.
If you are using bash, you can add it to your profile by running the following command:

cat << \EOF >> ~/.bash_profile
# Add .NET Core SDK tools
export PATH="$PATH:/root/.dotnet/tools"
EOF

You can add it to the current session by running the following command:

export PATH="$PATH:/root/.dotnet/tools"

You can invoke the tool using the following command: dotnet-sos
Tool 'dotnet-sos' (version '5.0.160202') was successfully installed.

/root/.dotnet/tools/dotnet-sos install

Installing SOS to /root/.dotnet/sos from /root/.dotnet/tools/.store/dotnet-sos/5.0.160202/dotnet-sos/5.0.160202/tools/netcoreapp2.1/any/linux-x64
Creating installation directory...
Copying files...
Creating new /root/.lldbinit file - LLDB will load SOS automatically at startup
SOS install succeeded

Project configuration

Okay. Now we can create new project with dotnet new console -lang C# makeref, enter the directory and modify the csproj file:

<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net5.0</TargetFramework>
    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
  </PropertyGroup>

</Project>

Notice that I added AllowUnsafeBlocks to enable unsafe code (which we already know is not needed but just to keep it simple).

Application

Finally, the application code:

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Reflection;


namespace HijackingNewOperatorNetCore
{
    class Program
    {
        static void Main(string[] args)
        {
            var allocator = new GenericMemoryAllocator();

            // Allocate object through allocator
            var customlyAlocated = allocator.Allocate<TestClass>();
            // Allocate ordinary object
            var ordinary = new object();

            // Hijack method and allocate object
            HijackNew();
            System.Diagnostics.Debugger.Break();
            var hijacked = new object();

            // Observe that hijacked objects are in generation 2
            Console.WriteLine($"Object customly allocated by hand: {GC.GetGeneration(customlyAlocated)}");
            Console.WriteLine($"Object created normally: {GC.GetGeneration(ordinary)}");
            Console.WriteLine($"Object with hijacked newobj: {GC.GetGeneration(hijacked)}");
        }

        public static void HijackNew()
        {
            var methodHandle = typeof(GenericMemoryAllocator).GetMethod(nameof(GenericMemoryAllocator.RawAllocate)).MethodHandle;
            RuntimeHelpers.PrepareMethod(methodHandle);

            var myAllocAddress = Marshal.ReadIntPtr(methodHandle.Value, 8);
            var defaultAllocAddress = GenericMemoryAllocator.GetAllocMethodAddress();


            int offset = (int)((long)myAllocAddress - defaultAllocAddress - 4 - 1); // 4 bytes for relative address and one byte for opcode
            byte[] instruction = {
                0xE9, // Long jump instruction
                (byte)(offset & 0xFF),
                (byte)((offset >> 8) & 0xFF),
                (byte)((offset >> 16) & 0xFF),
                (byte)((offset >> 24) & 0xFF)
            };

            GenericMemoryAllocator.UnlockPage((IntPtr)defaultAllocAddress);
            Marshal.Copy(instruction, 0, (IntPtr)defaultAllocAddress, instruction.Length);
        }
    }

    class TestClass
    {
        public int a, b, c, d;
    }
}

namespace HijackingNewOperatorNetCore
{
    class GenericMemoryAllocator
    {
        public T Allocate<T>()
        {
            var methodTable = typeof(T).TypeHandle.Value; // Get handle to the method table
            RawAllocate(methodTable); // Allocate the object and set the field, also JIT-compile the method
            return (T)Dummy;
        }

        // Method needs to be static in order to maintain the calling convention
        public static unsafe IntPtr RawAllocate(IntPtr methodTable)
        {
            // Calculate the object size by extracting it from method table and dividing by int size.
            // We assume that the size starts 4 bytes after the beginning of method table (works from .NET 3.5 to .NET Core 3.1)
            int objectSize = Marshal.ReadInt32(methodTable, 4) / sizeof(int);
            // Skip sizeof(int) bytes for syncblock
            _currentOffset++;
            // Write the address to method table
            Memory[_currentOffset] = methodTable;

            // Get the handle for the newly created object
            TypedReference newObjectReference = __makeref(Dummy);
            // Get the handle for the memory
            TypedReference memoryReference = __makeref(Memory);
            // Calculate the address of  the spawned object. We need to add 2 since we need to skip the method table of the array and the array size
            var spawnedObjectAddress = *(IntPtr*)*(IntPtr*)&memoryReference + (_currentOffset + 2) * sizeof(IntPtr);

            // Modify the handle for the new object using the address of the existing memory
            *(IntPtr*)*(IntPtr*)&newObjectReference = spawnedObjectAddress;

            // Move within the memory
            _currentOffset += objectSize;

            return *(IntPtr*)*(IntPtr*)&newObjectReference;
        }

        // Fields needs to be static in order to be accessible from RawAllocate
        private static bool Is64 = IntPtr.Size == sizeof(long);
        // Array big enough to be stored in Generation 2
        private static IntPtr[] Memory = new IntPtr[102400];
        private static int _currentOffset;
        private static object Dummy = new object();

        // This method is used to find the address of the CLR allocation function
        [MethodImpl(MethodImplOptions.NoOptimization)]
        private void CreateObject()
        {
            new object();
        }

        public static long GetAllocMethodAddress()
        {
            // Get the handle to the method creating the object
            var methodHandle = typeof(GenericMemoryAllocator).GetMethod(nameof(CreateObject), BindingFlags.NonPublic | BindingFlags.Instance).MethodHandle;

            // JIT-compile methods
            RuntimeHelpers.PrepareMethod(methodHandle);

            // Get the address of the jitted method
            IntPtr methodAddress = Marshal.ReadIntPtr(methodHandle.Value, 16);

            // Call to internal function differs between architectures, builds etc
            int offset = 51;

            // Read the jump offset
            int jumpOffset = 0;
            for (int i = 1; i < 5; ++i)
            {
                jumpOffset = jumpOffset + (Marshal.ReadByte(methodAddress, offset + i) << (i - 1) * 8);
            }
            // Calculate the absolute address
            long absoluteAddress = (long)methodAddress + offset + jumpOffset + 1 + 4; // 1 byte for jmp instruction, 4 bytes for relative address

            return absoluteAddress;
        }

        // Method to unlock the page for executing
        [DllImport("libc", SetLastError = true)]
        static extern int mprotect(IntPtr lpAddress, uint dwSize, uint flags);

        // Unlocks the page for executing
        public static void UnlockPage(IntPtr address)
        {
              long newAddress = ((long)address) & (long)(~0 << 12);
              IntPtr na = (IntPtr)newAddress;
              long length = ((long)address) + 6 - newAddress;
              // 1 for read, 2 for write, 4 for execute
              mprotect(na, (uint)length, 1 | 2 | 4);
        }
    }
}

This should look familiar to you. There are three main differences from Windows solution.

First, the CreateObject method in line 107 is now assembled differently. Machine code looks like this (and let’s see lldb in action at the same time):

dotnet --version

5.0.101

dotnet build

Microsoft (R) Build Engine version 16.8.0+126527ff1 for .NET
Copyright (C) Microsoft Corporation. All rights reserved.

  Determining projects to restore...
  All projects are up-to-date for restore.
/makeref/makeref/Program.cs(56,26): warning CS0649: Field 'TestClass.c' is never assigned to, and will always have its default value 0 [/makeref/makeref/makeref.csproj]
/makeref/makeref/Program.cs(56,23): warning CS0649: Field 'TestClass.b' is never assigned to, and will always have its default value 0 [/makeref/makeref/makeref.csproj]
/makeref/makeref/Program.cs(56,20): warning CS0649: Field 'TestClass.a' is never assigned to, and will always have its default value 0 [/makeref/makeref/makeref.csproj]
/makeref/makeref/Program.cs(56,29): warning CS0649: Field 'TestClass.d' is never assigned to, and will always have its default value 0 [/makeref/makeref/makeref.csproj]
  makeref -> /makeref/makeref/bin/Debug/net5.0/makeref.dll

Build succeeded.

/makeref/makeref/Program.cs(56,26): warning CS0649: Field 'TestClass.c' is never assigned to, and will always have its default value 0 [/makeref/makeref/makeref.csproj]
/makeref/makeref/Program.cs(56,23): warning CS0649: Field 'TestClass.b' is never assigned to, and will always have its default value 0 [/makeref/makeref/makeref.csproj]
/makeref/makeref/Program.cs(56,20): warning CS0649: Field 'TestClass.a' is never assigned to, and will always have its default value 0 [/makeref/makeref/makeref.csproj]
/makeref/makeref/Program.cs(56,29): warning CS0649: Field 'TestClass.d' is never assigned to, and will always have its default value 0 [/makeref/makeref/makeref.csproj]
    4 Warning(s)
    0 Error(s)

Time Elapsed 00:00:04.77

lldb bin/Debug/net5.0/makeref

(lldb) target create "bin/Debug/net5.0/makeref"
Current executable set to 'bin/Debug/net5.0/makeref' (x86_64).

r

(lldb) r
Process 1181 launched: '/makeref/makeref/bin/Debug/net5.0/makeref' (x86_64)
Process 1181 stopped
* thread #1, name = 'makeref', stop reason = signal SIGTRAP
    frame #0: 0x00007ffff73ee1ed libcoreclr.so`___lldb_unnamed_symbol15306$$libcoreclr.so + 1
libcoreclr.so`___lldb_unnamed_symbol15306$$libcoreclr.so:
->  0x7ffff73ee1ed <+1>: retq
    0x7ffff73ee1ee <+2>: nop

libcoreclr.so`___lldb_unnamed_symbol15307$$libcoreclr.so:
    0x7ffff73ee1f0 <+0>: pushq  %rbp
    0x7ffff73ee1f1 <+1>: movq   0xd8(%rdi), %r12

sos Name2EE makeref.dll HijackingNewOperatorNetCore.GenericMemoryAllocator.CreateObject

(lldb) sos Name2EE makeref.dll HijackingNewOperatorNetCore.GenericMemoryAllocator.CreateObject
Module:      00007fff7de42788
Assembly:    makeref.dll
Token:       0000000006000007
MethodDesc:  00007fff7deb6ea0
Name:        HijackingNewOperatorNetCore.GenericMemoryAllocator.CreateObject()
JITTED Code Address: 00007fff7dda9030

sos u 00007fff7dda9030

(lldb) sos u 00007fff7dda9030
Normal JIT generated code
HijackingNewOperatorNetCore.GenericMemoryAllocator.CreateObject()
ilAddr is 00007FFFF3D4F463 pImport is 00000000014B3BF0
Begin 00007FFF7DDA9030, size 4d

/makeref/makeref/Program.cs @ 113:
>>> 00007fff7dda9030 55                   push    rbp
00007fff7dda9031 4883ec10             sub     rsp, 0x10
00007fff7dda9035 488d6c2410           lea     rbp, [rsp + 0x10]
00007fff7dda903a 33c0                 xor     eax, eax
00007fff7dda903c 488945f0             mov     qword ptr [rbp - 0x10], rax
00007fff7dda9040 48897df8             mov     qword ptr [rbp - 0x8], rdi
00007fff7dda9044 48b8082ce47dff7f0000 movabs  rax, 0x7fff7de42c08
00007fff7dda904e 833800               cmp     dword ptr [rax], 0x0
00007fff7dda9051 7405                 je      0x7fff7dda9058
00007fff7dda9053 e828213879           call    0x7ffff712b180 (JitHelp: CORINFO_HELP_DBG_IS_JUST_MY_CODE)
00007fff7dda9058 90                   nop

/makeref/makeref/Program.cs @ 114:
00007fff7dda9059 48bf000cd67dff7f0000 movabs  rdi, 0x7fff7dd60c00
00007fff7dda9063 e8c8953779           call    0x7ffff7122630 (HijackingNewOperatorNetCore.GenericMemoryAllocator.RawAllocate(IntPtr), mdToken: 0000000006000006)
00007fff7dda9068 488945f0             mov     qword ptr [rbp - 0x10], rax
00007fff7dda906c 488b7df0             mov     rdi, qword ptr [rbp - 0x10]
00007fff7dda9070 e81370feff           call    0x7fff7dd90088 (System.Object..ctor(), mdToken: 000000000600045E)
00007fff7dda9075 90                   nop

/makeref/makeref/Program.cs @ 115:
00007fff7dda9076 90                   nop
00007fff7dda9077 488d6500             lea     rsp, [rbp]
00007fff7dda907b 5d                   pop     rbp
00007fff7dda907c c3                   ret

If you count all bytes you’ll find out that the offset is now 51.

Second difference is the method descriptor. Function address used to be 8 bytes from the beginning, now it’s 16 (in line 121):

memory read -count 64 00007fff7deb6ea0

(lldb) memory read -count 64 00007fff7deb6ea0
0x7fff7deb6ea0: 07 00 08 03 08 00 28 00 28 5b da 7d ff 7f 00 00  ......(.([.}....
0x7fff7deb6eb0: 30 90 da 7d ff 7f 00 00 08 00 0b 03 09 00 a8 00  0..}............
0x7fff7deb6ec0: 30 5b da 7d ff 7f 00 00 80 8a da 7d ff 7f 00 00  0[.}.......}....
0x7fff7deb6ed0: 09 00 0e 03 0a 00 8a 00 00 00 00 00 00 00 00 00  ................

Finally, we cannot use VirtualProtectEx anymore as we’re on Linux. We need to go with mprotect:

[DllImport("libc", SetLastError = true)]
static extern int mprotect(IntPtr lpAddress, uint dwSize, uint flags);

public static void UnlockPage(IntPtr address)
{
	  long newAddress = ((long)address) & (long)(~0 << 12);
	  IntPtr na = (IntPtr)newAddress;
	  long length = ((long)address) + 6 - newAddress;
	  // 1 for read, 2 for write, 4 for execute
	  mprotect(na, (uint)length, 1 | 2 | 4);
}

mprotect requires the address to be aligned to a page boundary (which is 4096 bytes on my machine) so I clear lowest 12 bits (line 6 in the listing above). Next, I calculate new offset of the method (I’m actually not sure if that’s needed). Finally, I enable all permissions for the page in line 10.

And just for the sake of completeness, final output:

dotnet run

Object customly allocated by hand: 2
Object created normally: 0
Object with hijacked newobj: 2

Final notes

As you can see, there is no magic in this approach, it’s just a bunch of bytes which we can modify in the same way as long as we’re on the same architecture. However, keep in mind the following:

  • I do not recommend using this in production code. I do use things like these in real applications but this is always risky and requires good understanding of all internals
  • This is just one of multiple allocation methods provided by .NET. If you want it to be “production ready” then you need to update all of them
  • Since you override the method globally, you can’t control easily when it’s called. In other words, .NET will use your logic as well so you need to take care of all memory management (or do some fancy juggling to call .NET methods when you actually need to allocate some new memory)
  • Keep in mind that .NET scans the heap and requires it to be parseable. Be careful with what you allocate and how. Also, make sure your objects are pinned or that you have good concurrency management (since GC can kick in anytime and move objects around)

Have fun!

]]>
https://blog.adamfurmanek.pl/2021/09/25/custom-memory-allocation-in-c-part-16/feed/ 1
Custom memory allocation in C# Part 15 — Allocating object on a stack without unsafe https://blog.adamfurmanek.pl/2021/03/06/custom-memory-allocation-in-c-part-15/ https://blog.adamfurmanek.pl/2021/03/06/custom-memory-allocation-in-c-part-15/#respond Sat, 06 Mar 2021 09:00:37 +0000 https://blog.adamfurmanek.pl/?p=3789 Continue reading Custom memory allocation in C# Part 15 — Allocating object on a stack without unsafe]]>

This is the fifteenth part of the Custom memory allocation series. For your convenience you can find other parts in the table of contents in Part 1 — Allocating object on a stack

Last time we saw how to do unsafe operations without the unsafe keyword. This time we’ll allocate some reference type on a stack in a similar manner.

Let’s this code:

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace Makeref_Safe_OnStack
{
    public class Program
	{
		public static void Main(string[] args)
		{
			Structure structure = new Structure();
			structure.syncBlock = 0xBADF00D;
			structure.methodHandle = 0xBADF00D;
			structure.field = 0xBADF00D;

			var method = typeof(Program).GetMethod("GetStackAddress", System.Reflection.BindingFlags.Static | System.Reflection.BindingFlags.NonPublic);
			RuntimeHelpers.PrepareMethod(method.MethodHandle);
			var codeAddress = method.MethodHandle.GetFunctionPointer();

			Console.WriteLine(codeAddress.ToString("X"));

			Marshal.Copy(addressGetter, 0, codeAddress, addressGetter.Length);

			var structureAddress = GetStackAddress() + 132;
			Console.WriteLine(structureAddress.ToString("X"));

			structure.syncBlock = 0;
			structure.methodHandle = (int)typeof(Klass).TypeHandle.Value;
			structure.field = 123;

			Holder<Klass> holder = new Holder<Klass>();
			GCHandle holderHandle = GCHandle.Alloc(holder);
			var holderAddress = Marshal.ReadIntPtr(GCHandle.ToIntPtr(holderHandle));
			Marshal.WriteIntPtr(holderAddress, 4, structureAddress + 4); // Skip first integer for sync block, assumes x86

			Console.WriteLine(holder.reference.GetType());
			Console.WriteLine(holder.reference.Field);

			structure.field = 456;
			Console.WriteLine(holder.reference.Field);
		}

		static byte[] addressGetter = new byte[] 
		{
			0x89, 0xE0, // mov eax, esp
			0xC3        // ret
		};

		static IntPtr GetStackAddress()
		{
			Console.WriteLine("Some dummy code to be replaced");
			return IntPtr.Zero;
		}
	}

	class Holder<T>
	{
		public T reference;
	}

	class Klass
	{
		public int Field;
	}

	struct Structure
	{
		public int syncBlock;
		public int methodHandle;
		public int field;
	}
}

In line 43 we have a machine code for getting the esp register. In line 22 we modify the GetStackAddress method with our machine code.

We allocate some structure on the stack which we’ll later override with a reference type. We do it in line 34.

Finally, you can see how we take type in line 36 and then modify reference instance field by using structure. This confirms the object is exactly in the stack.

Output:

438F1F0
88BE8E0
Makeref_Safe_OnStack.Klass
123
456

]]>
https://blog.adamfurmanek.pl/2021/03/06/custom-memory-allocation-in-c-part-15/feed/ 0