diff --git a/cmake/modules/FindAE108_PETSc.cmake b/cmake/modules/FindAE108_PETSc.cmake
index 75de1f8b256ec74ff0c53f87a0c5a9af557bc1da..1330a22935e777c5b0c59c33cee0148eb29086aa 100755
--- a/cmake/modules/FindAE108_PETSc.cmake
+++ b/cmake/modules/FindAE108_PETSc.cmake
@@ -51,6 +51,7 @@ if(AE108_PETSc_FOUND AND NOT TARGET ae108::external::petsc)
     endif()
 
     try_compile_with_petsc("-DAE108_PETSC_COMPLEX")
+    set(AE108_PETSC_COMPLEX ${AE108_PETSc_COMPILE_RESULT} CACHE BOOL "ae108: PETSc with complex scalar type")
     if(AE108_PETSc_COMPILE_RESULT)
         target_compile_definitions(ae108::external::petsc
             INTERFACE AE108_PETSC_COMPLEX=1
diff --git a/cpppetsc/src/include/ae108/cpppetsc/TAOSolver.h b/cpppetsc/src/include/ae108/cpppetsc/TAOSolver.h
index 3e343fc2739a01c1fdf16ad4361818065d139771..b44ad10b57333beafe32c28b1f96aa92c352d1dd 100644
--- a/cpppetsc/src/include/ae108/cpppetsc/TAOSolver.h
+++ b/cpppetsc/src/include/ae108/cpppetsc/TAOSolver.h
@@ -1,4 +1,4 @@
-// Â© 2020 ETH Zurich, Mechanics and Materials Lab
+// Â© 2020, 2022 ETH Zurich, Mechanics and Materials Lab
 // Â© 2020 California Institute of Technology
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -25,6 +25,7 @@
 #include "ae108/cpppetsc/UniqueEntity.h"
 #include "ae108/cpppetsc/Vector.h"
 #include <functional>
+#include <optional>
 #include <petscmat.h>
 #include <petscmath.h>
 #include <petscsys.h>
@@ -71,12 +72,17 @@ public:
     gpcg,
     nm,
     pounders,
+    brgn,
     lcl,
     ssils,
     ssfls,
     asils,
     asfls,
-    ipm
+    ipm,
+    pdipm,
+    shell,
+    admm,
+    almm,
   };
 
   /**
@@ -132,6 +138,29 @@ public:
   void setBounds(const distributed<vector_type> &lowerBounds,
                  const distributed<vector_type> &upperBounds);
 
+  struct EqualityConstraints {
+    /**
+     * @brief The solver attempts to reach a residual of zero.
+     */
+    GradientFunctor residual;
+
+    /**
+     * @brief The jacobian of the residual.
+     */
+    HessianFunctor jacobian;
+
+    /**
+     * @brief Used to store residuals and jacobians internally.
+     */
+    std::pair<distributed<vector_type>, matrix_type> buffer;
+  };
+
+  /**
+   * @brief Set the equality constraints of the problem.
+   * @note Not all solvers consider these constraints.
+   */
+  void setConstraints(EqualityConstraints constraints);
+
   /**
    * @brief Minimizes the objective function using optimization.
    *
@@ -164,6 +193,8 @@ private:
   UniqueEntity<Tao> _tao;
 
   matrix_type _buffer_matrix;
+
+  std::optional<EqualityConstraints> _constraints;
 };
 
 template <class Policy>
@@ -288,6 +319,10 @@ template <class Policy> void TAOSolver<Policy>::setType(const Type type) {
     taoType = TAOPOUNDERS;
     break;
   }
+  case Type::brgn: {
+    taoType = TAOBRGN;
+    break;
+  }
   case Type::lcl: {
     taoType = TAOLCL;
     break;
@@ -312,6 +347,22 @@ template <class Policy> void TAOSolver<Policy>::setType(const Type type) {
     taoType = TAOIPM;
     break;
   }
+  case Type::pdipm: {
+    taoType = TAOPDIPM;
+    break;
+  }
+  case Type::shell: {
+    taoType = TAOSHELL;
+    break;
+  }
+  case Type::admm: {
+    taoType = TAOADMM;
+    break;
+  }
+  case Type::almm: {
+    taoType = TAOALMM;
+    break;
+  }
   }
   Policy::handleError(TaoSetType(_tao.get(), taoType));
 }
@@ -323,6 +374,19 @@ void TAOSolver<Policy>::setBounds(const distributed<vector_type> &lowerBounds,
       _tao.get(), lowerBounds.unwrap().data(), upperBounds.unwrap().data()));
 }
 
+template <class Policy>
+void TAOSolver<Policy>::setConstraints(EqualityConstraints constraints) {
+  _constraints.emplace(std::move(constraints));
+
+  Policy::handleError(TaoSetEqualityConstraintsRoutine(
+      _tao.get(), _constraints->buffer.first.unwrap().data(),
+      &TAOSolver::gradientAdapter, &_constraints->residual));
+  Policy::handleError(TaoSetJacobianEqualityRoutine(
+      _tao.get(), _constraints->buffer.second.data(),
+      _constraints->buffer.second.data(), &TAOSolver::hessianAdapter,
+      &_constraints->jacobian));
+}
+
 template <class Policy> void TAOSolver<Policy>::checkConvergence() const {
   const auto errorCode = [this]() {
     auto reason = TaoConvergedReason{};
diff --git a/cpppetsc/test/TAOSolver_Test.cc b/cpppetsc/test/TAOSolver_Test.cc
index e54a1275a29f81012b9203dbe8554789ef966ad3..107e656dbb8eae244caa68e8ee138d3e0d56174c 100644
--- a/cpppetsc/test/TAOSolver_Test.cc
+++ b/cpppetsc/test/TAOSolver_Test.cc
@@ -1,4 +1,4 @@
-// Â© 2020 ETH Zurich, Mechanics and Materials Lab
+// Â© 2020, 2022 ETH Zurich, Mechanics and Materials Lab
 // Â© 2020 California Institute of Technology
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -26,6 +26,7 @@
 
 using ae108::cppptest::isLocal;
 using ae108::cppptest::ScalarEqIfLocal;
+using ae108::cppptest::ScalarNearIfLocal;
 using testing::Test;
 using testing::Types;
 
@@ -140,6 +141,54 @@ TYPED_TEST(TAOSolver_Test, raises_exception_on_nonconvergence) {
   EXPECT_THROW(callSolve(), TAOSolverDivergedException);
 }
 
+TYPED_TEST(TAOSolver_Test, minimizes_with_equality_constraints) {
+  using real_type = typename TestFixture::real_type;
+  using vector_type = typename TestFixture::vector_type;
+  using matrix_type = typename TestFixture::matrix_type;
+
+  typename TestFixture::solver_type solver(
+      matrix_type(2, 2), TestFixture::solver_type::Type::almm);
+
+  const auto value = 5.;
+
+  solver.setConstraints({
+      [&](const distributed<vector_type> &input,
+          distributed<vector_type> *const output) {
+        const auto full = vector_type::fromDistributed(input);
+        output->unwrap().setZero();
+        output->unwrap().replace().element(0, full(0) - value);
+      },
+      [](const distributed<vector_type> &, matrix_type *const output) {
+        output->setZero();
+        const auto replacer =
+            output->assemblyView().replace().element(0, 0, 1.);
+      },
+      {tag<DistributedTag>(vector_type(1)), matrix_type(1, 2)},
+  });
+
+  const auto solution = solver.solve(
+      [](const distributed<vector_type> &input, real_type *const output) {
+        const auto full = vector_type::fromDistributed(input);
+        *output = std::pow(full(0) - 1., 2.) + std::pow(full(1) - 2., 2.);
+      },
+      [](const distributed<vector_type> &input,
+         distributed<vector_type> *const output) {
+        const auto full = vector_type::fromDistributed(input);
+        const auto replacer = output->unwrap().replace();
+        replacer(0) = 2. * (full(0) - 1.);
+        replacer(1) = 2. * (full(1) - 2.);
+      },
+      [](const distributed<vector_type> &, matrix_type *const output) {
+        const auto replacer = output->assemblyView().replace();
+        replacer(0, 0) = 2.;
+        replacer(1, 1) = 2.;
+      },
+      tag<DistributedTag>(vector_type::fromList({3., 4.})));
+
+  EXPECT_THAT(solution.unwrap(), ScalarNearIfLocal(0, value, 1e-7));
+  EXPECT_THAT(solution.unwrap(), ScalarEqIfLocal(1, 2.));
+}
+
 template <class Policy> struct TAOSolver_BoundsTest : Test {
   using solver_type = TAOSolver<Policy>;
   using value_type = typename solver_type::value_type;
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 921a4ac2e90a8eff8fd471945d77b2d1fcd9fa50..356c5b08e104bf4fb0efa6b7ad774ee77bb8c936 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -126,4 +126,13 @@ target_link_libraries(${PROJECT_NAME}-Bar
                       PRIVATE ae108::solve
                       PRIVATE ae108::assembly
                       PRIVATE ae108::elements
-)
\ No newline at end of file
+)
+
+if (NOT ${AE108_PETSC_COMPLEX})
+    add_executable(${PROJECT_NAME}-NonlinearBC NonlinearBC.cc)
+    target_link_libraries(${PROJECT_NAME}-NonlinearBC
+                          PRIVATE ae108::solve
+                          PRIVATE ae108::assembly
+                          PRIVATE ae108::elements
+    )
+endif()
\ No newline at end of file
diff --git a/examples/NonlinearBC.cc b/examples/NonlinearBC.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a0acf5c0bf701b8719f347bfa0a29f1474184e28
--- /dev/null
+++ b/examples/NonlinearBC.cc
@@ -0,0 +1,271 @@
+// Â© 2020, 2022 ETH Zurich, Mechanics and Materials Lab
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ae108/assembly/Assembler.h"
+#include "ae108/cpppetsc/Context.h"
+#include "ae108/cpppetsc/Mesh.h"
+#include "ae108/cpppetsc/ParallelComputePolicy.h"
+#include "ae108/cpppetsc/Vector.h"
+#include "ae108/elements/CoreElement.h"
+#include "ae108/elements/embedding/IsoparametricEmbedding.h"
+#include "ae108/elements/integrator/IsoparametricIntegrator.h"
+#include "ae108/elements/materialmodels/Hookean.h"
+#include "ae108/elements/quadrature/Quadrature.h"
+#include "ae108/elements/shape/Quad4.h"
+#include "ae108/solve/GeneralizedTAOSolver.h"
+#include <array>
+#include <unordered_map>
+
+using namespace ae108;
+
+using Policy = cpppetsc::ParallelComputePolicy;
+using Context = cpppetsc::Context<Policy>;
+using Mesh = cpppetsc::Mesh<Policy>;
+using Vector = cpppetsc::Vector<Policy>;
+using Matrix = cpppetsc::Matrix<Policy>;
+
+namespace embedding = ae108::elements::embedding;
+namespace integrator = ae108::elements::integrator;
+namespace materialmodels = ae108::elements::materialmodels;
+namespace quadrature = ae108::elements::quadrature;
+namespace shape = ae108::elements::shape;
+
+// In this example we will simulate two elements, A and B.
+// Each of these elements has four vertices.
+
+//  3 ------- 2 ------- 5
+//  |         |         |
+//  |    A    |    B    |
+//  0---------1---------4
+
+// Let's specify the parameters of this mesh.
+
+constexpr auto number_of_vertices_per_element = Mesh::size_type{4};
+constexpr auto number_of_elements = Mesh::size_type{2};
+constexpr auto number_of_vertices = Mesh::size_type{6};
+constexpr auto dimension = Mesh::size_type{2};
+constexpr auto dof_per_vertex = Mesh::size_type{2};
+
+// The connectivity specifies the vertex indices per Element.
+
+using Connectivity =
+    std::array<std::array<Mesh::size_type, number_of_vertices_per_element>,
+               number_of_elements>;
+constexpr auto connectivity = Connectivity{{
+    {{0, 1, 2, 3}}, // vertices of element A
+    {{1, 4, 5, 2}}, // vertices of element B
+}};
+
+// Each of the vertices is located at the following coordinates in physical
+// space.
+
+using VertexPositions =
+    std::array<std::array<Mesh::real_type, dimension>, number_of_vertices>;
+constexpr auto vertex_positions = VertexPositions{{
+    {{0., 0.}},
+    {{1., 0.}},
+    {{1., 1.}},
+    {{0., 1.}},
+    {{2., 0.}},
+    {{2., 1.}},
+}};
+
+// Let's specify how the elements behave when deformed.
+
+// First, we select a Hookean (linear elastic) material model in 2D.
+
+using MaterialModel =
+    materialmodels::Hookean<dimension, Vector::value_type, Vector::real_type>;
+
+// We'll choose the Quad4 shape functions (4 shape functions) that are defined
+// in 2D reference space.
+
+using Shape = shape::Quad4;
+
+// Since also our physical space is 2D we may use the isoparametric embedding.
+
+using Embedding = embedding::IsoparametricEmbedding<Shape>;
+
+// Let's choose a simple quadrature rule in reference space.
+
+using Quadrature = quadrature::Quadrature<quadrature::QuadratureType::Cube,
+                                          dimension, 1 /* order */>;
+
+// We use an "Integrator" to integrate in physical space. Of course, it depends
+// on the selected quadrature rule and the embedding. In the case of the
+// isoparametric embedding, this embedding is specified by the "Shape".
+
+using Integrator =
+    integrator::IsoparametricIntegrator<Shape, Quadrature, Vector::value_type,
+                                        Vector::real_type>;
+
+// Now we are ready to select our element type.
+
+using Element = elements::CoreElement<MaterialModel, Integrator,
+                                      Vector::value_type, Vector::real_type>;
+
+// We will assemble e.g. energy using a collection of elements. This is done by
+// the assembler. (The list DefaultFeaturePlugins contain the features (e.g.
+// energy) that most elements support.)
+
+using Assembler =
+    assembly::Assembler<Element, assembly::DefaultFeaturePlugins, Policy>;
+
+// Our goals is to minimize the energy. This is done by the solver.
+
+using Solver = solve::GeneralizedTAOSolver<Assembler>;
+using BoundaryCondition = Solver::BoundaryConditionContainer;
+
+int main(int argc, char **argv) {
+  // MPI/PETSc/cpppetsc must be initialized before using it.
+
+  const auto context = Context(&argc, &argv);
+
+  // Let's create the mesh, an assembler, and a material model.
+
+  const auto mesh = Mesh::fromConnectivity(dimension, connectivity,
+                                           number_of_vertices, dof_per_vertex);
+  auto assembler = Assembler();
+  const auto model = MaterialModel(1.0, 0.);
+
+  // Depending on whether we use MPI, our mesh may be distributed and not all
+  // elements are present on this computational node.
+
+  // Let's add those elements that are "local" to the assembler.
+
+  for (const auto &element : mesh.localElements()) {
+    assembler.emplaceElement(
+        element, model,
+        Integrator(Embedding(Embedding::Collection<Embedding::PhysicalPoint>{{
+            vertex_positions.at(connectivity.at(element.index()).at(0)),
+            vertex_positions.at(connectivity.at(element.index()).at(1)),
+            vertex_positions.at(connectivity.at(element.index()).at(2)),
+            vertex_positions.at(connectivity.at(element.index()).at(3)),
+        }})));
+  }
+
+  // We need to create a solver. We do not use the time, so we can set it to
+  // zero.
+
+  const auto solver = Solver(&mesh);
+  const auto time = Element::Time{0.};
+
+  // Before we can produce meaningful results, we need to specify boundary
+  // conditions. Let's fix the nodes at x=0 and pull on the nodes at x=2.
+
+  // For the sake of this example we will specify those boundary conditions
+  // via nonlinear equations. More precisely, we will ask the solver to minimize
+  // the sin of the difference.
+
+  // We will create a residual function with 8-dimensional output, each
+  // representing the sin of the difference for one degree of freedom. Let's
+  // decide on a first row in this output for each vertex.
+
+  const auto vertex_to_row = [](const auto vertex) -> Mesh::size_type {
+    switch (vertex.index()) {
+    case 0:
+      return 0;
+    case 3:
+      return 2;
+    case 4:
+      return 4;
+    case 5:
+      return 6;
+    default:
+      throw std::out_of_range("The vertex index does not correspond to a row.");
+    }
+  };
+
+  // To simplify writing the residual function, we create two helper functions
+  // that identify unconstrained and pulled vertices.
+
+  const auto is_unconstrained_vertex = [](const auto vertex) {
+    return vertex.index() == 1 || vertex.index() == 2;
+  };
+
+  const auto is_pulled_vertex = [](const auto vertex) {
+    return vertex.index() == 4 || vertex.index() == 5;
+  };
+
+  const auto boundary_conditions = BoundaryCondition{
+      8,
+      [&](const cpppetsc::distributed<Vector> &in,
+          cpppetsc::distributed<Vector> *const out) {
+        out->unwrap().setZero();
+        auto replacer = out->unwrap().replace();
+        const auto full = Vector::fromDistributed(in);
+        for (const auto &vertex : mesh.localVertices()) {
+          if (is_unconstrained_vertex(vertex))
+            continue;
+          // The rows corresponding to the vertex contain the sin of the
+          // difference between the degree of freedoms and the prescribed
+          // values.
+          replacer(vertex_to_row(vertex)) =
+              std::sin(full(vertex.globalDofLineRange().first) +
+                       (is_pulled_vertex(vertex) ? -.5 : 0.));
+          replacer(vertex_to_row(vertex) + 1) =
+              std::sin(full(vertex.globalDofLineRange().first + 1));
+        }
+      },
+      [&](const cpppetsc::distributed<Vector> &in, Matrix *const out) {
+        out->setZero();
+        const auto full = Vector::fromDistributed(in);
+        auto replacer = out->preallocatedAssemblyView(1).replace();
+        for (const auto &vertex : mesh.localVertices()) {
+          if (is_unconstrained_vertex(vertex))
+            continue;
+          // The rows corresponding to the vertex contain the cos of the
+          // difference between the degree of freedoms and the prescribed
+          // values.
+          replacer(vertex_to_row(vertex), vertex.globalDofLineRange().first) =
+              std::cos(full(vertex.globalDofLineRange().first) +
+                       (is_pulled_vertex(vertex) ? -.5 : 0.));
+          replacer(vertex_to_row(vertex) + 1,
+                   vertex.globalDofLineRange().first + 1) =
+              std::cos(full(vertex.globalDofLineRange().first + 1));
+        }
+      },
+  };
+
+  // We are ready to minimize the energy.
+
+  const auto result = solver.computeSolution(
+      boundary_conditions, Vector::fromGlobalMesh(mesh), time, &assembler);
+
+  // As a final step, we print the result.
+  // We could use: result.unwrap().print();
+
+  // However, the output of this command changes when running the application
+  // in MPI-parallel mode. This happens because the vector "result" is
+  // distributed between the ranks in this case.
+
+  // So we gather all the results locally in the canonical data format first:
+  // [ vertex-0-dof-0, vertex-0-dof-1, vertex-1-dof-0, vertex-1-dof-1, ...].
+
+  const auto global_result =
+      Vector::fromDistributedInCanonicalOrder(result, mesh);
+
+  // Then we print this global vector only on the primary rank.
+  // We expect the degrees of freedom in x direction to be between 0 and .5,
+  // and the degrees of freedom in y direction to be 0:
+  // Vertex 0: (0.0, 0.0)
+  // Vertex 1: (.25, 0.0)
+  // Vertex 2: (.25, 0.0)
+  // Vertex 3: (0.0, 0.0)
+  // Vertex 4: (0.5, 0.0)
+  // Vertex 5: (0.5, 0.0)
+
+  if (Policy::isPrimaryRank())
+    global_result.unwrap().print();
+}
diff --git a/solve/src/CMakeLists.txt b/solve/src/CMakeLists.txt
index 3d3f8eddf0ebfabd5c3e41ca5f550db67c886ce4..9c8e60841fc8863da8588d22ce98f2b64521af9b 100644
--- a/solve/src/CMakeLists.txt
+++ b/solve/src/CMakeLists.txt
@@ -21,6 +21,7 @@ add_library(${PROJECT_NAME}-solve
             ./DynamicSolver.cc
             ./DynamicState.cc
             ./GeneralizedNonlinearSolver.cc
+            ./GeneralizedTAOSolver.cc
             ./InconsistentBoundaryConditionsException.cc
             ./InvalidVertexException.cc
             ./LeastSquaresSolver.cc
diff --git a/solve/src/GeneralizedTAOSolver.cc b/solve/src/GeneralizedTAOSolver.cc
new file mode 100644
index 0000000000000000000000000000000000000000..45795ca19c7b5187ecf2b73739b0c8c63d4427ef
--- /dev/null
+++ b/solve/src/GeneralizedTAOSolver.cc
@@ -0,0 +1,15 @@
+// Â© 2022 ETH Zurich, Mechanics and Materials Lab
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ae108/solve/GeneralizedTAOSolver.h"
\ No newline at end of file
diff --git a/solve/src/include/ae108/solve/GeneralizedTAOSolver.h b/solve/src/include/ae108/solve/GeneralizedTAOSolver.h
new file mode 100644
index 0000000000000000000000000000000000000000..073b34e8f2d053923b1695dcccf4ed8926a79868
--- /dev/null
+++ b/solve/src/include/ae108/solve/GeneralizedTAOSolver.h
@@ -0,0 +1,311 @@
+// Â© 2020, 2022 ETH Zurich, Mechanics and Materials Lab
+// Â© 2020 California Institute of Technology
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#ifndef AE108_PETSC_COMPLEX
+
+#include "ae108/assembly/AssemblerTypeTraits.h"
+#include "ae108/cpppetsc/MeshBoundaryCondition.h"
+#include "ae108/cpppetsc/TAOSolver.h"
+#include "ae108/cpppetsc/TaggedVector.h"
+#include <cassert>
+#include <functional>
+#include <mpi.h>
+#include <petscsys.h>
+#include <vector>
+
+namespace ae108 {
+namespace solve {
+
+template <class Assembler> class GeneralizedTAOSolver {
+public:
+  using policy_type = typename assembly::PolicyTypeTrait<Assembler>::type;
+  using mesh_type = typename assembly::MeshTypeTrait<Assembler>::type;
+  using size_type = typename mesh_type::size_type;
+  using value_type = typename mesh_type::value_type;
+  using real_type = typename mesh_type::real_type;
+  using vector_type = typename mesh_type::vector_type;
+  using matrix_type = typename mesh_type::matrix_type;
+
+  /**
+   * @param mesh A valid nonzero pointer to a mesh_type instance.
+   */
+  explicit GeneralizedTAOSolver(const mesh_type *mesh);
+
+  struct BoundaryConditionContainer {
+    /**
+     * @brief The dimension of the output of `residual`.
+     */
+    size_type size;
+
+    /**
+     * @brief The solver attempts to reach a residual of zero.
+     */
+    typename cpppetsc::TAOSolver<policy_type>::GradientFunctor residual;
+
+    /**
+     * @brief The jacobian of the residual.
+     */
+    typename cpppetsc::TAOSolver<policy_type>::HessianFunctor jacobian;
+  };
+
+  /**
+   * @brief Calls computeSolution with the default assembler calls (ie. calls
+   * assembleEnergy, assembleForceVector or assembleStiffnessMatrix). Passes on
+   * the rest of the arguments.
+   *
+   * @param boundaryConditions The nonlinear boundary conditions to apply.
+   * @param initialGuess The global vector to start iterating at.
+   * @param time This will be used to configure the assembler.
+   * @param assembler Valid nonzero pointer.
+   */
+  cpppetsc::distributed<vector_type>
+  computeSolution(const BoundaryConditionContainer &boundaryConditions,
+                  cpppetsc::distributed<vector_type> initialGuess,
+                  const double time, const Assembler *const assembler) const;
+
+  using LocalEnergyAssembler = std::function<void(
+      const cpppetsc::local<vector_type> &, double, real_type *)>;
+
+  using LocalForceVectorAssembler =
+      std::function<void(const cpppetsc::local<vector_type> &, double,
+                         cpppetsc::local<vector_type> *)>;
+
+  using LocalStiffnessMatrixAssembler = std::function<void(
+      const cpppetsc::local<vector_type> &, double, matrix_type *)>;
+
+  /**
+   * @brief Calls computeSolution wrapping up the local assembler calls into
+   * distributed assembler calls. Passes on the rest of the arguments.
+   *
+   * @param boundaryConditions The nonlinear boundary conditions to apply.
+   * Note that empty functions (residual, jacobian) are interpreted as functions
+   * that do not change the output.
+   * @param initialGuess The global vector to start iterating at.
+   * @param time This will be used to configure the assembler.
+   * @param assembleEnergy A valid callable. It will be called to assemble
+   * the local energy.
+   * @param assembleForceVector A valid callable. It will be called to assemble
+   * the local force vector.
+   * @param assembleStiffnessMatrix A valid callable. It will be called to
+   * assemble the stiffness matrix.
+   */
+  cpppetsc::distributed<vector_type>
+  computeSolution(const BoundaryConditionContainer &boundaryConditions,
+                  cpppetsc::distributed<vector_type> initialGuess,
+                  const double time, LocalEnergyAssembler assembleEnergy,
+                  LocalForceVectorAssembler assembleForceVector,
+                  LocalStiffnessMatrixAssembler assemblerStiffnessMatrix) const;
+
+  using DistributedEnergyAssembler = std::function<void(
+      const cpppetsc::distributed<vector_type> &, double, real_type *)>;
+
+  using DistributedForceVectorAssembler =
+      std::function<void(const cpppetsc::distributed<vector_type> &, double,
+                         cpppetsc::distributed<vector_type> *)>;
+
+  using DistributedStiffnessMatrixAssembler = std::function<void(
+      const cpppetsc::distributed<vector_type> &, double, matrix_type *)>;
+
+  /**
+   * @brief The goal of this function is to solve E(x) = min under the condition
+   * that the residual is equal to zero.
+   *
+   * @remark Does not update internal variables.
+   *
+   * @param boundaryConditions The nonlinear boundary conditions to apply.
+   * @param initialGuess The global vector to start iterating at.
+   * @param time This will be used to configure the assembler.
+   * @param assembleEnergy A valid callable. It will be called to assemble
+   * the distributed energy.
+   * @param assembleForceVector A valid callable. It will be called to assemble
+   * the distributed force vector.
+   * @param assembleStiffnessMatrix A valid callable. It will be called to
+   * assemble the stiffness matrix.
+   */
+  cpppetsc::distributed<vector_type> computeSolution(
+      const BoundaryConditionContainer &boundaryConditions,
+      cpppetsc::distributed<vector_type> initialGuess, const double time,
+      DistributedEnergyAssembler assembleEnergy,
+      DistributedForceVectorAssembler assembleForceVector,
+      DistributedStiffnessMatrixAssembler assemblerStiffnessMatrix) const;
+
+private:
+  const mesh_type *_mesh;
+}; // namespace solve
+} // namespace solve
+} // namespace ae108
+
+/********************************************************************
+ *  implementations
+ *******************************************************************/
+
+namespace ae108 {
+namespace solve {
+template <class Assembler>
+GeneralizedTAOSolver<Assembler>::GeneralizedTAOSolver(const mesh_type *mesh)
+    : _mesh(mesh) {}
+
+template <class Assembler>
+cpppetsc::distributed<typename GeneralizedTAOSolver<Assembler>::vector_type>
+GeneralizedTAOSolver<Assembler>::computeSolution(
+    const BoundaryConditionContainer &boundaryConditions,
+    cpppetsc::distributed<vector_type> initialGuess, const double time,
+    const Assembler *const assembler) const {
+  assert(assembler);
+  return computeSolution(
+      boundaryConditions, std::move(initialGuess), time,
+      LocalEnergyAssembler(
+          [assembler](const cpppetsc::local<vector_type> &localDisplacements,
+                      const double time, real_type *const localEnergy) {
+            assembler->assembleEnergy(localDisplacements, time, localEnergy);
+          }),
+      LocalForceVectorAssembler(
+          [assembler](const cpppetsc::local<vector_type> &localDisplacements,
+                      const double time,
+                      cpppetsc::local<vector_type> *const localForces) {
+            assembler->assembleForceVector(localDisplacements, time,
+                                           localForces);
+          }),
+      LocalStiffnessMatrixAssembler(
+          [assembler](const cpppetsc::local<vector_type> &localDisplacements,
+                      const double time, matrix_type *const output) {
+            assembler->assembleStiffnessMatrix(localDisplacements, time,
+                                               output);
+          }));
+}
+
+template <class Assembler>
+cpppetsc::distributed<typename GeneralizedTAOSolver<Assembler>::vector_type>
+GeneralizedTAOSolver<Assembler>::computeSolution(
+    const BoundaryConditionContainer &boundaryConditions,
+    cpppetsc::distributed<vector_type> initialGuess, const double time,
+    LocalEnergyAssembler assembleEnergy,
+    LocalForceVectorAssembler assembleForceVector,
+    LocalStiffnessMatrixAssembler assembleStiffnessMatrix) const {
+  assert(assembleEnergy);
+  assert(assembleForceVector);
+  assert(assembleStiffnessMatrix);
+
+  const auto &mesh = *_mesh;
+
+  auto localDisplacements = vector_type::fromLocalMesh(mesh);
+  auto localForces = vector_type::fromLocalMesh(mesh);
+
+  const auto distributedEnergyAssembler =
+      [&assembleEnergy, &mesh,
+       &localDisplacements](const cpppetsc::distributed<vector_type> &input,
+                            const double time, real_type *const output) {
+        mesh.copyToLocalVector(input, &localDisplacements);
+
+        *output = real_type{};
+        assembleEnergy(localDisplacements, time, output);
+
+        policy_type::handleError(MPI_Allreduce(MPI_IN_PLACE, output, 1,
+                                               MPIU_REAL, MPIU_SUM,
+                                               policy_type::communicator()));
+      };
+
+  const auto distributedForceVectorAssembler =
+      [&assembleForceVector, &mesh, &localDisplacements, &localForces](
+          const cpppetsc::distributed<vector_type> &input, const double time,
+          cpppetsc::distributed<vector_type> *const output) {
+        mesh.copyToLocalVector(input, &localDisplacements);
+
+        localForces.unwrap().setZero();
+        assembleForceVector(localDisplacements, time, &localForces);
+
+        mesh.addToGlobalVector(localForces, output);
+      };
+
+  const auto distributedStiffnessMatrixAssembler =
+      [&assembleStiffnessMatrix, &mesh,
+       &localDisplacements](const cpppetsc::distributed<vector_type> &input,
+                            const double time, matrix_type *const output) {
+        mesh.copyToLocalVector(input, &localDisplacements);
+
+        assembleStiffnessMatrix(localDisplacements, time, output);
+        output->finalize();
+      };
+
+  return computeSolution(boundaryConditions, std::move(initialGuess), time,
+                         distributedEnergyAssembler,
+                         distributedForceVectorAssembler,
+                         distributedStiffnessMatrixAssembler);
+}
+
+template <class Assembler>
+cpppetsc::distributed<typename GeneralizedTAOSolver<Assembler>::vector_type>
+GeneralizedTAOSolver<Assembler>::computeSolution(
+    const BoundaryConditionContainer &boundaryConditions,
+    cpppetsc::distributed<vector_type> initialGuess, const double time,
+    DistributedEnergyAssembler assembleEnergy,
+    DistributedForceVectorAssembler assembleForceVector,
+    DistributedStiffnessMatrixAssembler assembleStiffnessMatrix) const {
+  assert(assembleEnergy);
+  assert(assembleForceVector);
+  assert(assembleStiffnessMatrix);
+
+  using Solver = cpppetsc::TAOSolver<policy_type>;
+  auto matrix = matrix_type::fromMesh(*_mesh);
+
+  const auto global = matrix.size();
+  const auto local = matrix.localSize();
+
+  Solver solver{std::move(matrix), Solver::Type::almm};
+
+  solver.setConstraints(typename Solver::EqualityConstraints{
+      boundaryConditions.residual
+          ? boundaryConditions.residual
+          : [](const cpppetsc::distributed<vector_type> &,
+               cpppetsc::distributed<vector_type> *) {},
+      boundaryConditions.jacobian
+          ? boundaryConditions.jacobian
+          : [](const cpppetsc::distributed<vector_type> &, matrix_type *) {},
+      {
+          cpppetsc::distributed<vector_type>(boundaryConditions.size),
+          matrix_type(typename matrix_type::LocalRows(PETSC_DECIDE),
+                      typename matrix_type::LocalCols(local.second),
+                      typename matrix_type::GlobalRows(boundaryConditions.size),
+                      typename matrix_type::GlobalCols(global.second)),
+      },
+  });
+
+  return solver.solve(
+      [time, &assembleEnergy](const cpppetsc::distributed<vector_type> &input,
+                              double *const output) {
+        *output = real_type{};
+        assembleEnergy(input, time, output);
+      },
+      [time,
+       &assembleForceVector](const cpppetsc::distributed<vector_type> &input,
+                             cpppetsc::distributed<vector_type> *const output) {
+        output->unwrap().setZero();
+        assembleForceVector(input, time, output);
+      },
+      [time, &assembleStiffnessMatrix](
+          const cpppetsc::distributed<vector_type> &input,
+          matrix_type *const output) {
+        output->setZero();
+        assembleStiffnessMatrix(input, time, output);
+      },
+      std::move(initialGuess));
+}
+
+} // namespace solve
+} // namespace ae108
+
+#endif
\ No newline at end of file
diff --git a/solve/test/CMakeLists.txt b/solve/test/CMakeLists.txt
index d86645660457acbdc13d5194460e12ec1362436d..ff34b6e7b3e1f7af60e61d8b854069626897fc36 100644
--- a/solve/test/CMakeLists.txt
+++ b/solve/test/CMakeLists.txt
@@ -19,6 +19,7 @@ add_executable(${PROJECT_NAME}-SolveTests
                ./AffineTransform_Test.cc
                ./DynamicSolver_Test.cc
                ./GeneralizedNonlinearSolver_Test.cc
+               ./GeneralizedTAOSolver_Test.cc
                ./InconsistentBoundaryConditionsException_Test.cc
                ./InvalidVertexException_Test.cc
                ./LeastSquaresSolver_Test.cc
diff --git a/solve/test/GeneralizedTAOSolver_Test.cc b/solve/test/GeneralizedTAOSolver_Test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..9949dca1b64cd13adc45cf955b86247708e1efeb
--- /dev/null
+++ b/solve/test/GeneralizedTAOSolver_Test.cc
@@ -0,0 +1,193 @@
+// Â© 2022 ETH Zurich, Mechanics and Materials Lab
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef AE108_PETSC_COMPLEX
+
+#include "ae108/cpppetsc/ParallelComputePolicy.h"
+#include "ae108/cpppetsc/SequentialComputePolicy.h"
+#include "ae108/cppptest/Matchers.h"
+#include "ae108/solve/GeneralizedTAOSolver.h"
+#include "ae108/solve/test/Solver_Test.h"
+#include <gmock/gmock.h>
+
+using ae108::cppptest::ScalarEqIfLocal;
+using ae108::cppptest::ScalarNearIfLocal;
+using testing::Types;
+
+namespace ae108 {
+namespace solve {
+namespace {
+
+template <class Policy> struct Configuration {
+  using policy_type = Policy;
+  template <class T> using solver_type = GeneralizedTAOSolver<T>;
+};
+
+template <class Configuration>
+struct GeneralizedTAOSolver_Test : test::Solver_Test<Configuration> {};
+
+using Configurations = Types<Configuration<cpppetsc::SequentialComputePolicy>,
+                             Configuration<cpppetsc::ParallelComputePolicy>>;
+TYPED_TEST_CASE(GeneralizedTAOSolver_Test, Configurations);
+
+TYPED_TEST(GeneralizedTAOSolver_Test, solves_unconstrained_problem) {
+  using assembler_type = typename TestFixture::assembler_type;
+  using vector_type = typename TestFixture::vector_type;
+
+  const auto solution = this->solver.computeSolution(
+      {}, vector_type::fromGlobalMesh(this->mesh), assembler_type::constantTime,
+      &this->assembler);
+
+  EXPECT_THAT(solution.unwrap(), ScalarEqIfLocal(0, 1.));
+  EXPECT_THAT(solution.unwrap(), ScalarEqIfLocal(1, 2.));
+}
+
+TYPED_TEST(GeneralizedTAOSolver_Test, solves_partially_constrained_problem) {
+  using assembler_type = typename TestFixture::assembler_type;
+  using matrix_type = typename TestFixture::matrix_type;
+  using vector_type = typename TestFixture::vector_type;
+
+  const auto solution = this->solver.computeSolution(
+      {
+          1,
+          [](const cpppetsc::distributed<vector_type> &in,
+             cpppetsc::distributed<vector_type> *const out) {
+            const auto full = vector_type::fromDistributed(in);
+            out->unwrap().setZero();
+            out->unwrap().replace().element(0, full(0) - full(1));
+          },
+          [](const cpppetsc::distributed<vector_type> &,
+             matrix_type *const out) {
+            out->setZero();
+            out->preallocatedAssemblyView(2)
+                .replace()
+                .element(0, 0, 1.)
+                .element(0, 1, -1.);
+          },
+      },
+      vector_type::fromGlobalMesh(this->mesh), assembler_type::constantTime,
+      &this->assembler);
+
+  EXPECT_THAT(solution.unwrap(), ScalarNearIfLocal(0, 3. / 2., 1e-6));
+  EXPECT_THAT(solution.unwrap(), ScalarNearIfLocal(1, 3. / 2., 1e-6));
+}
+
+TYPED_TEST(GeneralizedTAOSolver_Test, solves_fully_constrained_problem) {
+  using assembler_type = typename TestFixture::assembler_type;
+  using matrix_type = typename TestFixture::matrix_type;
+  using vector_type = typename TestFixture::vector_type;
+
+  const auto solution = this->solver.computeSolution(
+      {
+          2,
+          [](const cpppetsc::distributed<vector_type> &in,
+             cpppetsc::distributed<vector_type> *const out) {
+            const auto full = vector_type::fromDistributed(in);
+            out->unwrap().setZero();
+            out->unwrap()
+                .replace()
+                .element(0, full(0) - 3.)
+                .element(1, full(1) - 4.);
+          },
+          [](const cpppetsc::distributed<vector_type> &,
+             matrix_type *const out) {
+            out->setZero();
+            out->preallocatedAssemblyView(1)
+                .replace()
+                .element(0, 0, 1.)
+                .element(1, 1, 1.);
+          },
+      },
+      vector_type::fromGlobalMesh(this->mesh), assembler_type::constantTime,
+      &this->assembler);
+
+  EXPECT_THAT(solution.unwrap(), ScalarNearIfLocal(0, 3., 1e-6));
+  EXPECT_THAT(solution.unwrap(), ScalarNearIfLocal(1, 4., 1e-6));
+}
+
+TYPED_TEST(GeneralizedTAOSolver_Test,
+           assembler_can_be_replaced_by_local_assembly_functions) {
+  using solver_type = typename TestFixture::solver_type;
+  using matrix_type = typename TestFixture::matrix_type;
+  using vector_type = typename TestFixture::vector_type;
+
+  const auto solution = this->solver.computeSolution(
+      {}, vector_type::fromGlobalMesh(this->mesh),
+      TestFixture::assembler_type::constantTime,
+      [assembler = &this->assembler](
+          const cpppetsc::local<vector_type> &in, const double time,
+          typename solver_type::real_type *const out) {
+        assembler->assembleEnergy(in, time, out);
+      },
+      [assembler = &this->assembler](const cpppetsc::local<vector_type> &in,
+                                     const double time,
+                                     cpppetsc::local<vector_type> *const out) {
+        assembler->assembleForceVector(in, time, out);
+      },
+      [assembler = &this->assembler](const cpppetsc::local<vector_type> &in,
+                                     const double time,
+                                     matrix_type *const out) {
+        assembler->assembleStiffnessMatrix(in, time, out);
+      });
+
+  EXPECT_THAT(solution.unwrap(), ScalarEqIfLocal(0, 1.));
+  EXPECT_THAT(solution.unwrap(), ScalarEqIfLocal(1, 2.));
+}
+
+TYPED_TEST(GeneralizedTAOSolver_Test,
+           assembler_can_be_replaced_by_distributed_assembly_functions) {
+  using policy_type = typename TestFixture::policy_type;
+  using vector_type = typename TestFixture::vector_type;
+  using matrix_type = typename TestFixture::matrix_type;
+  using real_type = typename TestFixture::solver_type::real_type;
+
+  const auto solution = this->solver.computeSolution(
+      {}, vector_type::fromGlobalMesh(this->mesh),
+      TestFixture::assembler_type::constantTime,
+      [assembler = &this->assembler,
+       mesh = &this->mesh](const cpppetsc::distributed<vector_type> &in,
+                           const double time, real_type *const out) {
+        auto local = vector_type::fromLocalMesh(*mesh);
+        mesh->copyToLocalVector(in, &local);
+        assembler->assembleEnergy(local, time, out);
+        policy_type::handleError(MPI_Allreduce(MPI_IN_PLACE, out, 1, MPIU_REAL,
+                                               MPIU_SUM,
+                                               policy_type::communicator()));
+      },
+      [assembler = &this->assembler, mesh = &this->mesh](
+          const cpppetsc::distributed<vector_type> &in, const double time,
+          cpppetsc::distributed<vector_type> *const out) {
+        auto localIn = vector_type::fromLocalMesh(*mesh);
+        auto localOut = vector_type::fromLocalMesh(*mesh);
+        mesh->copyToLocalVector(in, &localIn);
+        assembler->assembleForceVector(localIn, time, &localOut);
+        mesh->addToGlobalVector(localOut, out);
+      },
+      [assembler = &this->assembler,
+       mesh = &this->mesh](const cpppetsc::distributed<vector_type> &in,
+                           const double time, matrix_type *const out) {
+        auto local = vector_type::fromLocalMesh(*mesh);
+        mesh->copyToLocalVector(in, &local);
+        assembler->assembleStiffnessMatrix(local, time, out);
+        out->finalize();
+      });
+
+  EXPECT_THAT(solution.unwrap(), ScalarEqIfLocal(0, 1.));
+  EXPECT_THAT(solution.unwrap(), ScalarEqIfLocal(1, 2.));
+}
+} // namespace
+} // namespace solve
+} // namespace ae108
+
+#endif
\ No newline at end of file
diff --git a/tests/examples/nonlinearbc/definition.json b/tests/examples/nonlinearbc/definition.json
new file mode 100644
index 0000000000000000000000000000000000000000..e9f78d0ea60f26e8ec1870c1dde39e01cf925b85
--- /dev/null
+++ b/tests/examples/nonlinearbc/definition.json
@@ -0,0 +1,11 @@
+{
+  "executable": [
+    "examples",
+    "ae108-examples-NonlinearBC"
+  ],
+  "compare_stdout": "numeric",
+  "mpi_processes": [
+    1,
+    3
+  ]
+}
\ No newline at end of file
diff --git a/tests/examples/nonlinearbc/references/stdout.txt b/tests/examples/nonlinearbc/references/stdout.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b63e010232622ec6f46afed12a95e518d7c9a69a
--- /dev/null
+++ b/tests/examples/nonlinearbc/references/stdout.txt
@@ -0,0 +1,14 @@
+Vec Object: 1 MPI processes
+  type: seq
+5.47984e-07
+-6.7861e-19
+0.25
+-9.60278e-18
+0.25
+-9.60278e-18
+5.47984e-07
+-6.7861e-19
+0.499998
+-7.44116e-19
+0.499998
+-7.44116e-19
diff --git a/tests/run.py b/tests/run.py
index 3944fbd21399d096097f094af5c72fd94ccd16f5..bf2fd51446138c8f79770f163546da3d069cd695 100755
--- a/tests/run.py
+++ b/tests/run.py
@@ -394,26 +394,24 @@ def load_tests(
                     ROOT_DIRECTORY / "tests" / "schema.json", "r", encoding="utf-8"
                 ) as schema:
                     jsonschema.validate(instance=instance, schema=json.load(schema))
+                executable_path = pathlib.Path.cwd() / pathlib.Path(
+                    *instance["executable"]
+                )
+                if not executable_path.exists():
+                    print(
+                        f"Warning: Test '{path.parent}' will be skipped. "
+                        f"The executable '{executable_path}' does not exist.",
+                        file=sys.stderr,
+                    )
+                    continue
                 test_case_definitions = as_test_case_definitions(path.parent, instance)
             except jsonschema.ValidationError as error:
                 print(
-                    f"Warning: Test definition '{path}' is invalid. {error.message}.",
+                    f"Warning: Test '{path.parent}' will be skipped. "
+                    f"The test definition is invalid: {error.message}.",
                     file=sys.stderr,
                 )
-                test_case_definitions = (
-                    definition
-                    for definition in [
-                        TestCaseDefinition(
-                            executable=pathlib.Path(),
-                            references=pathlib.Path(),
-                            args=[],
-                            mpi_processes=1,
-                            compare_stdout=ComparisonType.NONE,
-                            ae108_output=[],
-                        )
-                    ]
-                )
-
+                continue
             testcase = type(
                 group_name,
                 (unittest.TestCase,),