Commit d056efa3 authored by Nicolas Winkler's avatar Nicolas Winkler
Browse files

fixing kokkos scaling

parent e6de8282
......@@ -26,15 +26,49 @@ struct TrsmTask
bool transposed;
Kokkos::View<double **> lu;
Kokkos::View<double **> matrix;
bool split_up;
KOKKOS_INLINE_FUNCTION
TrsmTask(Kokkos::View<double **> lu, Kokkos::View<double **> matrix, bool transposed) : lu{lu}, matrix{matrix}, transposed{transposed} {}
TrsmTask(Kokkos::View<double **> lu, Kokkos::View<double **> matrix, bool transposed) : lu{lu}, matrix{matrix}, transposed{transposed}, split_up{false} {}
KOKKOS_INLINE_FUNCTION
void operator()(typename Scheduler::member_type &member, value_type &result)
{
if (transposed)
{
auto &scheduler = member.scheduler();
if (split_up) {
return;
}
int task_size = transposed ? matrix.extent(0) : matrix.extent(1);
if (task_size > block_size * 32) {
int splitpt = task_size / 2;
Kokkos::View<double **> M1;
Kokkos::View<double **> M2;
if (transposed) {
M1 = Kokkos::subview(matrix, IndexPair{ 0, splitpt }, Kokkos::ALL);
M2 = Kokkos::subview(matrix, IndexPair{ splitpt, task_size }, Kokkos::ALL);
}
else {
M1 = Kokkos::subview(matrix, Kokkos::ALL, IndexPair{ 0, splitpt });
M2 = Kokkos::subview(matrix, Kokkos::ALL, IndexPair{ splitpt, task_size });
}
future_type blocks[2];
blocks[0] = Kokkos::task_spawn(
Kokkos::TaskSingle(scheduler),
TrsmTask<Scheduler>{ lu, M1, transposed });
blocks[1] = Kokkos::task_spawn(
Kokkos::TaskSingle(scheduler),
TrsmTask<Scheduler>{ lu, M2, transposed });
auto all_done = scheduler.when_all(blocks, 2);
split_up = true;
Kokkos::respawn(this, all_done);
}
if (transposed) {
trans_trsm(lu, matrix);
result = matrix;
}
......@@ -67,8 +101,7 @@ struct MmMinusTask
void operator()(typename Scheduler::member_type &member, value_type &result)
{
auto &scheduler = member.scheduler();
if (split_up)
{
if (split_up) {
return;
}
......
......@@ -53,6 +53,14 @@ int bench_kokkos_dag(int run_id, BenchUtil &bench, int num_runs, int matrix_size
int main(int argc, char **argv)
{
int num_threads = stoi(argv[1]);
omp_set_dynamic(-1);
omp_set_num_threads(num_threads);
char envset[64];
sprintf(envset, "OMP_NUM_THREADS=%d", num_threads);
putenv(envset);
// init
BenchUtil bench("lu_kokkos_dag");
Kokkos::initialize(argc, argv);
......@@ -62,11 +70,6 @@ int main(int argc, char **argv)
printf("incorrect number of args\n");
return -1;
}
int num_threads = stoi(argv[1]);
omp_set_dynamic(0);
omp_set_num_threads(num_threads);
int num_runs = stoi(argv[2]);
int configs[(argc - 2) / 2][2];
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment