Commit 6ced3abf authored by fabianw's avatar fabianw
Browse files

finished MPI implementation of iso extractor

parent a383ae33
......@@ -9,6 +9,7 @@
#include <cassert>
#include <vector>
#include <cmath>
#include <functional>
#include <mpi.h>
#include "Interpolator.h"
......@@ -161,24 +162,51 @@ private:
void _load_wavelet_MPI(ArgumentParser& p);
// halo handling
using func_t = std::function<void(const int, const int, const int)>;
void _fetch_halo(const MPI_Comm comm);
void _send_faces(const MPI_Comm comm, const MPInbr& nbrs, const Faces& f, std::vector<MPI_Request>& f_req) const;
void _recv_faces(const MPI_Comm comm, const MPInbr& nbrs, Faces& f, std::vector<MPI_Request>& f_req) const;
void _pack_faces(Faces& f, const MPInbr& nbr) const;
void _pack_faces(Faces& f, const MPInbr& nbr);
void _unpack_faces(const Faces& f, const MPInbr& nbr);
inline void _pack_faceX(Face_t& f, const int _s) const;
inline void _pack_faceY(Face_t& f, const int _s) const;
inline void _pack_faceZ(Face_t& f, const int _s) const;
inline void _upack_faceX(const Face_t& f, const int _d);
inline void _upack_faceY(const Face_t& f, const int _d);
inline void _upack_faceZ(const Face_t& f, const int _d);
inline void _set_halo_faceX(const int _s, const int _d);
inline void _set_halo_faceY(const int _s, const int _d);
inline void _set_halo_faceZ(const int _s, const int _d);
void _send_edges();
void _send_corners();
inline void _process_faceX(func_t& eval);
inline void _process_faceY(func_t& eval);
inline void _process_faceZ(func_t& eval);
inline void _pack_faceX(Face_t& f, const int _s0);
inline void _pack_faceY(Face_t& f, const int _s0);
inline void _pack_faceZ(Face_t& f, const int _s0);
inline void _upack_faceX(const Face_t& f, const int _d0);
inline void _upack_faceY(const Face_t& f, const int _d0);
inline void _upack_faceZ(const Face_t& f, const int _d0);
inline void _halo_faceX(const int _s0, const int _d0);
inline void _halo_faceY(const int _s0, const int _d0);
inline void _halo_faceZ(const int _s0, const int _d0);
void _send_edges(const MPI_Comm comm, const MPInbr& nbrs, const Edges& e, std::vector<MPI_Request>& e_req) const;
void _recv_edges(const MPI_Comm comm, const MPInbr& nbrs, Edges& e, std::vector<MPI_Request>& e_req) const;
void _pack_edges(Edges& f, const MPInbr& nbr);
void _unpack_edges(const Edges& e, const MPInbr& nbr);
inline void _process_edgeX(func_t& eval);
inline void _process_edgeY(func_t& eval);
inline void _process_edgeZ(func_t& eval);
inline void _pack_edgeX(Edge_t& e, const int _s0, const int _s1);
inline void _pack_edgeY(Edge_t& e, const int _s0, const int _s1);
inline void _pack_edgeZ(Edge_t& e, const int _s0, const int _s1);
inline void _upack_edgeX(const Edge_t& e, const int _d0, const int _d1);
inline void _upack_edgeY(const Edge_t& e, const int _d0, const int _d1);
inline void _upack_edgeZ(const Edge_t& e, const int _d0, const int _d1);
inline void _halo_edgeX(const int _s0, const int _s1, const int _d0, const int _d1);
inline void _halo_edgeY(const int _s0, const int _s1, const int _d0, const int _d1);
inline void _halo_edgeZ(const int _s0, const int _s1, const int _d0, const int _d1);
void _send_corners(const MPI_Comm comm, const MPInbr& nbrs, const Corners& c, std::vector<MPI_Request>& c_req) const;
void _recv_corners(const MPI_Comm comm, const MPInbr& nbrs, Corners& c, std::vector<MPI_Request>& c_req) const;
void _pack_corners(Corners& c, const MPInbr& nbr);
void _unpack_corners(const Corners& c, const MPInbr& nbr);
inline void _process_corner(func_t& eval);
inline void _pack_corner(Corner_t& c, const int _s0, const int _s1, const int _s2);
inline void _upack_corner(const Corner_t& c, const int _d0, const int _d1, const int _d2);
inline void _halo_corner(const int _s0, const int _s1, const int _s2, const int _d0, const int _d1, const int _d2);
};
......@@ -302,71 +330,79 @@ void InterpolatorMPI<Tinterp>::_load_wavelet_MPI(ArgumentParser& p)
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_fetch_halo(const MPI_Comm comm)
{
int myrank;
MPI_Comm_rank(comm, &myrank);
MPInbr mynbrs(comm);
// concept:
// 0.) prepare neighbor info
// 1.) pack buffers and send asynchronous messages
// 2.) unpack buffers
// int myrank;
// int size;
// MPI_Comm_rank(comm, &myrank);
// MPI_Comm_size(comm, &size);
// for (int i=0; i<size; ++i)
// {
// if (i==myrank)
// {
// std::cout << "Rank " << myrank << " nbrs" << std::endl;
// for (int i=0; i<27; ++i)
// {
// int off[3] = {(i%3)-1, ((i/3)%3)-1, (i/9)-1};
// std::cout << mynbrs(off[0], off[1], off[2]) << " ";
// }
// std::cout << std::endl << std::endl;
// }
// MPI_Barrier(comm);
// }
// 0.)
MPInbr mynbrs(comm);
// 1.)
const int Nx = this->m_data.Nx();
const int Ny = this->m_data.Ny();
const int Nz = this->m_data.Nz();
// recv faces
// faces
std::vector<MPI_Request> fpend_recv;
Faces frecv(Ny,Nz, Nx,Nz, Nx,Ny);
_recv_faces(comm, mynbrs, frecv, fpend_recv);
// send faces
std::vector<MPI_Request> fpend_send;
Faces fsend(Ny,Nz, Nx,Nz, Nx,Ny);
_pack_faces(fsend, mynbrs);
_send_faces(comm, mynbrs, fsend, fpend_send);
// edges
std::vector<MPI_Request> epend_recv;
Edges erecv(Nx, Ny, Nz);
_recv_edges(comm, mynbrs, erecv, epend_recv);
std::vector<MPI_Request> epend_send;
Edges esend(Nx, Ny, Nz);
_pack_edges(esend, mynbrs);
_send_edges(comm, mynbrs, esend, epend_send);
// corners
std::vector<MPI_Request> cpend_recv;
Corners crecv;
_recv_corners(comm, mynbrs, crecv, cpend_recv);
std::vector<MPI_Request> cpend_send;
Corners csend;
_pack_corners(csend, mynbrs);
_send_corners(comm, mynbrs, csend, cpend_send);
// 2.)
std::vector<MPI_Status> fstat_recv(fpend_recv.size());
MPI_Waitall(fpend_recv.size(), fpend_recv.data(), fstat_recv.data());
_unpack_faces(frecv, mynbrs);
// // send edges
// Edges esend;
std::vector<MPI_Status> estat_recv(epend_recv.size());
MPI_Waitall(epend_recv.size(), epend_recv.data(), estat_recv.data());
_unpack_edges(erecv, mynbrs);
// // send corners
// Corners csend;
std::vector<MPI_Status> cstat_recv(cpend_recv.size());
MPI_Waitall(cpend_recv.size(), cpend_recv.data(), cstat_recv.data());
_unpack_corners(crecv, mynbrs);
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_send_faces(const MPI_Comm comm, const MPInbr& nbrs, const Faces& f, std::vector<MPI_Request>& f_req) const
{
const int me = nbrs(0,0,0);
const int dst[6] = {nbrs(-1,0,0), nbrs(1,0,0), nbrs(0,-1,0), nbrs(0,1,0), nbrs(0,0,-1), nbrs(0,0,1)};
const Real* const data[6] = {f.x0.data, f.x1.data, f.y0.data, f.y1.data, f.z0.data, f.z1.data};
const int Nelem[6] = {f.x0._Nelem, f.x1._Nelem, f.y0._Nelem, f.y1._Nelem, f.z0._Nelem, f.z1._Nelem};
const int tagbase = 0;
for (int i = 0; i < 6; ++i)
{
const int dir = i/2;
const int side = i%2;
if (dst[i] >= 0)
{
MPI_Request req;
MPI_Isend(data[i], Nelem[i], MPIREAL, dst[i], me, comm, &req);
MPI_Isend(data[i], Nelem[i], MPIREAL, dst[i], tagbase+dir*2+side, comm, &req);
f_req.push_back(req);
}
}
......@@ -379,12 +415,15 @@ void InterpolatorMPI<Tinterp>::_recv_faces(const MPI_Comm comm, const MPInbr& nb
const int src[6] = {nbrs(-1,0,0), nbrs(1,0,0), nbrs(0,-1,0), nbrs(0,1,0), nbrs(0,0,-1), nbrs(0,0,1)};
Real* const data[6] = {f.x0.data, f.x1.data, f.y0.data, f.y1.data, f.z0.data, f.z1.data};
const int Nelem[6] = {f.x0._Nelem, f.x1._Nelem, f.y0._Nelem, f.y1._Nelem, f.z0._Nelem, f.z1._Nelem};
const int tagbase = 0;
for (int i = 0; i < 6; ++i)
{
const int dir = i/2;
const int side = (i+1)%2;
if (src[i] >= 0)
{
MPI_Request req;
MPI_Irecv(data[i], Nelem[i], MPIREAL, src[i], src[i], comm, &req);
MPI_Irecv(data[i], Nelem[i], MPIREAL, src[i], tagbase+dir*2+side, comm, &req);
f_req.push_back(req);
}
}
......@@ -392,7 +431,125 @@ void InterpolatorMPI<Tinterp>::_recv_faces(const MPI_Comm comm, const MPInbr& nb
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_pack_faces(Faces& f, const MPInbr& nbr) const
void InterpolatorMPI<Tinterp>::_send_edges(const MPI_Comm comm, const MPInbr& nbrs, const Edges& e, std::vector<MPI_Request>& e_req) const
{
const int dstX[4] = {nbrs(0,-1,-1), nbrs(0,1,-1), nbrs(0,1,1), nbrs(0,-1,1)};
const int dstY[4] = {nbrs(-1,0,-1), nbrs(-1,0,1), nbrs(1,0,1), nbrs(1,0,-1)};
const int dstZ[4] = {nbrs(-1,-1,0), nbrs(1,-1,0), nbrs(1,1,0), nbrs(-1,1,0)};
const Real* const dataX[4] = {e.x0.data, e.x1.data, e.x2.data, e.x3.data};
const Real* const dataY[4] = {e.y0.data, e.y1.data, e.y2.data, e.y3.data};
const Real* const dataZ[4] = {e.z0.data, e.z1.data, e.z2.data, e.z3.data};
const int NelemX[4] = {e.x0._Nelem, e.x1._Nelem, e.x2._Nelem, e.x3._Nelem};
const int NelemY[4] = {e.y0._Nelem, e.y1._Nelem, e.y2._Nelem, e.y3._Nelem};
const int NelemZ[4] = {e.z0._Nelem, e.z1._Nelem, e.z2._Nelem, e.z3._Nelem};
const int tagbase = 6;
for (int i = 0; i < 4; ++i)
{
const int edge = i;
if (dstX[i] >= 0)
{
MPI_Request req;
MPI_Isend(dataX[i], NelemX[i], MPIREAL, dstX[i], tagbase+0*4+edge, comm, &req);
e_req.push_back(req);
}
if (dstY[i] >= 0)
{
MPI_Request req;
MPI_Isend(dataY[i], NelemY[i], MPIREAL, dstY[i], tagbase+1*4+edge, comm, &req);
e_req.push_back(req);
}
if (dstZ[i] >= 0)
{
MPI_Request req;
MPI_Isend(dataZ[i], NelemZ[i], MPIREAL, dstZ[i], tagbase+2*4+edge, comm, &req);
e_req.push_back(req);
}
}
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_recv_edges(const MPI_Comm comm, const MPInbr& nbrs, Edges& e, std::vector<MPI_Request>& e_req) const
{
const int srcX[4] = {nbrs(0,-1,-1), nbrs(0,1,-1), nbrs(0,1,1), nbrs(0,-1,1)};
const int srcY[4] = {nbrs(-1,0,-1), nbrs(-1,0,1), nbrs(1,0,1), nbrs(1,0,-1)};
const int srcZ[4] = {nbrs(-1,-1,0), nbrs(1,-1,0), nbrs(1,1,0), nbrs(-1,1,0)};
Real* const dataX[4] = {e.x0.data, e.x1.data, e.x2.data, e.x3.data};
Real* const dataY[4] = {e.y0.data, e.y1.data, e.y2.data, e.y3.data};
Real* const dataZ[4] = {e.z0.data, e.z1.data, e.z2.data, e.z3.data};
const int NelemX[4] = {e.x0._Nelem, e.x1._Nelem, e.x2._Nelem, e.x3._Nelem};
const int NelemY[4] = {e.y0._Nelem, e.y1._Nelem, e.y2._Nelem, e.y3._Nelem};
const int NelemZ[4] = {e.z0._Nelem, e.z1._Nelem, e.z2._Nelem, e.z3._Nelem};
const int tagbase = 6;
for (int i = 0; i < 4; ++i)
{
const int edge = (i+2)%4;
if (srcX[i] >= 0)
{
MPI_Request req;
MPI_Irecv(dataX[i], NelemX[i], MPIREAL, srcX[i], tagbase+0*4+edge, comm, &req);
e_req.push_back(req);
}
if (srcY[i] >= 0)
{
MPI_Request req;
MPI_Irecv(dataY[i], NelemY[i], MPIREAL, srcY[i], tagbase+1*4+edge, comm, &req);
e_req.push_back(req);
}
if (srcZ[i] >= 0)
{
MPI_Request req;
MPI_Irecv(dataZ[i], NelemZ[i], MPIREAL, srcZ[i], tagbase+2*4+edge, comm, &req);
e_req.push_back(req);
}
}
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_send_corners(const MPI_Comm comm, const MPInbr& nbrs, const Corners& c, std::vector<MPI_Request>& c_req) const
{
const int dst[8] = {nbrs(-1,-1,-1), nbrs(-1,1,-1), nbrs(-1,1,1), nbrs(-1,-1,1), nbrs(1,-1,-1), nbrs(1,1,-1), nbrs(1,1,1), nbrs(1,-1,1)};
const Real* const data[8] = {c.x00.data, c.x01.data, c.x02.data, c.x03.data, c.x10.data, c.x11.data, c.x12.data, c.x13.data};
const int Nelem[8] = {c.x00._Nelem, c.x01._Nelem, c.x02._Nelem, c.x03._Nelem, c.x10._Nelem, c.x11._Nelem, c.x12._Nelem, c.x13._Nelem};
const int tagbase = 18;
for (int i = 0; i < 8; ++i)
{
const int cycl = i%4;
const int side = i/4;
if (dst[i] >= 0)
{
MPI_Request req;
MPI_Isend(data[i], Nelem[i], MPIREAL, dst[i], tagbase+side*4+cycl, comm, &req);
c_req.push_back(req);
}
}
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_recv_corners(const MPI_Comm comm, const MPInbr& nbrs, Corners& c, std::vector<MPI_Request>& c_req) const
{
const int src[8] = {nbrs(-1,-1,-1), nbrs(-1,1,-1), nbrs(-1,1,1), nbrs(-1,-1,1), nbrs(1,-1,-1), nbrs(1,1,-1), nbrs(1,1,1), nbrs(1,-1,1)};
Real* const data[8] = {c.x00.data, c.x01.data, c.x02.data, c.x03.data, c.x10.data, c.x11.data, c.x12.data, c.x13.data};
const int Nelem[8] = {c.x00._Nelem, c.x01._Nelem, c.x02._Nelem, c.x03._Nelem, c.x10._Nelem, c.x11._Nelem, c.x12._Nelem, c.x13._Nelem};
const int tagbase = 18;
for (int i = 0; i < 8; ++i)
{
const int cycl = (i+2)%4;
const int side = (7-i)/4;
if (src[i] >= 0)
{
MPI_Request req;
MPI_Irecv(data[i], Nelem[i], MPIREAL, src[i], tagbase+side*4+cycl, comm, &req);
c_req.push_back(req);
}
}
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_pack_faces(Faces& f, const MPInbr& nbr)
{
const int Nx = this->m_data.Nx();
const int Ny = this->m_data.Ny();
......@@ -417,19 +574,125 @@ void InterpolatorMPI<Tinterp>::_unpack_faces(const Faces& f, const MPInbr& nbr)
const int Nz = this->m_data.Nz();
if (nbr(-1,0,0) >= 0) _upack_faceX(f.x0, -Face_t::_P);
else _set_halo_faceX(0, -Face_t::_P);
else _halo_faceX(0, -Face_t::_P);
if (nbr(1,0,0) >= 0) _upack_faceX(f.x1, Nx);
else _set_halo_faceX(Nx-1, Nx);
else _halo_faceX(Nx-1, Nx);
if (nbr(0,-1,0) >= 0) _upack_faceY(f.y0, -Face_t::_P);
else _set_halo_faceY(0, -Face_t::_P);
else _halo_faceY(0, -Face_t::_P);
if (nbr(0,1,0) >= 0) _upack_faceY(f.y1, Ny);
else _set_halo_faceY(Ny-1, Ny);
else _halo_faceY(Ny-1, Ny);
if (nbr(0,0,-1) >= 0) _upack_faceZ(f.z0, -Face_t::_P);
else _set_halo_faceZ(0, -Face_t::_P);
else _halo_faceZ(0, -Face_t::_P);
if (nbr(0,0,1) >= 0) _upack_faceZ(f.z1, Nz);
else _set_halo_faceZ(Nz-1, Nz);
else _halo_faceZ(Nz-1, Nz);
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_pack_edges(Edges& e, const MPInbr& nbr)
{
const int Nx = this->m_data.Nx();
const int Ny = this->m_data.Ny();
const int Nz = this->m_data.Nz();
if (nbr(0,-1,-1) >= 0) _pack_edgeX(e.x0, 0, 0);
if (nbr(0,1,-1) >= 0) _pack_edgeX(e.x1, Ny-Edge_t::_P, 0);
if (nbr(0,1,1) >= 0) _pack_edgeX(e.x2, Ny-Edge_t::_P, Nz-Edge_t::_Q);
if (nbr(0,-1,1) >= 0) _pack_edgeX(e.x3, 0, Nz-Edge_t::_Q);
if (nbr(-1,0,-1) >= 0) _pack_edgeY(e.y0, 0, 0);
if (nbr(-1,0,1) >= 0) _pack_edgeY(e.y1, 0, Nz-Edge_t::_Q);
if (nbr(1,0,1) >= 0) _pack_edgeY(e.y2, Nx-Edge_t::_P, Nz-Edge_t::_Q);
if (nbr(1,0,-1) >= 0) _pack_edgeY(e.y3, Nx-Edge_t::_P, 0);
if (nbr(-1,-1,0) >= 0) _pack_edgeZ(e.z0, 0, 0);
if (nbr(1,-1,0) >= 0) _pack_edgeZ(e.z1, Nx-Edge_t::_P, 0);
if (nbr(1,1,0) >= 0) _pack_edgeZ(e.z2, Nx-Edge_t::_P, Ny-Edge_t::_Q);
if (nbr(-1,1,0) >= 0) _pack_edgeZ(e.z3, 0, Ny-Edge_t::_Q);
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_unpack_edges(const Edges& e, const MPInbr& nbr)
{
const int Nx = this->m_data.Nx();
const int Ny = this->m_data.Ny();
const int Nz = this->m_data.Nz();
if (nbr(0,-1,-1) >= 0) _upack_edgeX(e.x0, -Edge_t::_P, -Edge_t::_Q);
else _halo_edgeX(0, 0, -Edge_t::_P, -Edge_t::_Q);
if (nbr(0,1,-1) >= 0) _upack_edgeX(e.x1, Ny, -Edge_t::_Q);
else _halo_edgeX(Ny-1, 0, Ny, -Edge_t::_Q);
if (nbr(0,1,1) >= 0) _upack_edgeX(e.x2, Ny, Nz);
else _halo_edgeX(Ny-1, Nz-1, Ny, Nz);
if (nbr(0,-1,1) >= 0) _upack_edgeX(e.x3, -Edge_t::_P, Nz);
else _halo_edgeX(0, Nz-1, -Edge_t::_P, Nz);
if (nbr(-1,0,-1) >= 0) _upack_edgeY(e.y0, -Edge_t::_P, -Edge_t::_Q);
else _halo_edgeY(0, 0, -Edge_t::_P, -Edge_t::_Q);
if (nbr(-1,0,1) >= 0) _upack_edgeY(e.y1, -Edge_t::_P, Nz);
else _halo_edgeY(0, Nz-1, -Edge_t::_P, Nz);
if (nbr(1,0,1) >= 0) _upack_edgeY(e.y2, Nx, Nz);
else _halo_edgeY(Nx-1, Nz-1, Nx, Nz);
if (nbr(1,0,-1) >= 0) _upack_edgeY(e.y3, Nx, -Edge_t::_Q);
else _halo_edgeY(Nx-1, 0, Nx, -Edge_t::_Q);
if (nbr(-1,-1,0) >= 0) _upack_edgeZ(e.z0, -Edge_t::_P, -Edge_t::_Q);
else _halo_edgeZ(0, 0, -Edge_t::_P, -Edge_t::_Q);
if (nbr(1,-1,0) >= 0) _upack_edgeZ(e.z1, Nx, -Edge_t::_Q);
else _halo_edgeZ(Nx-1, 0, Nx, -Edge_t::_Q);
if (nbr(1,1,0) >= 0) _upack_edgeZ(e.z2, Nx, Ny);
else _halo_edgeZ(Nx-1, Ny-1, Nx, Ny);
if (nbr(-1,1,0) >= 0) _upack_edgeZ(e.z3, -Edge_t::_P, Ny);
else _halo_edgeZ(0, Ny-1, -Edge_t::_P, Ny);
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_pack_corners(Corners& c, const MPInbr& nbr)
{
const int Nx = this->m_data.Nx();
const int Ny = this->m_data.Ny();
const int Nz = this->m_data.Nz();
if (nbr(-1,-1,-1) >= 0) _pack_corner(c.x00, 0, 0, 0);
if (nbr(-1,1,-1) >= 0) _pack_corner(c.x01, 0, Ny-Corner_t::_Q, 0);
if (nbr(-1,1,1) >= 0) _pack_corner(c.x02, 0, Ny-Corner_t::_Q, Nz-Corner_t::_R);
if (nbr(-1,-1,1) >= 0) _pack_corner(c.x03, 0, 0, Nz-Corner_t::_R);
if (nbr(1,-1,-1) >= 0) _pack_corner(c.x10, Nx-Corner_t::_P, 0, 0);
if (nbr(1,1,-1) >= 0) _pack_corner(c.x11, Nx-Corner_t::_P, Ny-Corner_t::_Q, 0);
if (nbr(1,1,1) >= 0) _pack_corner(c.x12, Nx-Corner_t::_P, Ny-Corner_t::_Q, Nz-Corner_t::_R);
if (nbr(1,-1,1) >= 0) _pack_corner(c.x13, Nx-Corner_t::_P, 0, Nz-Corner_t::_R);
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_unpack_corners(const Corners& c, const MPInbr& nbr)
{
const int Nx = this->m_data.Nx();
const int Ny = this->m_data.Ny();
const int Nz = this->m_data.Nz();
if (nbr(-1,-1,-1) >= 0) _upack_corner(c.x00, -Corner_t::_P, -Corner_t::_Q, -Corner_t::_R);
else _halo_corner(0, 0, 0, -Corner_t::_P, -Corner_t::_Q, -Corner_t::_R);
if (nbr(-1,1,-1) >= 0) _upack_corner(c.x01, -Corner_t::_P, Ny, -Corner_t::_R);
else _halo_corner(0, Ny-1, 0, -Corner_t::_P, Ny, -Corner_t::_R);
if (nbr(-1,1,1) >= 0) _upack_corner(c.x02, -Corner_t::_P, Ny, Nz);
else _halo_corner(0, Ny-1, Nz-1, -Corner_t::_P, Ny, Nz);
if (nbr(-1,-1,1) >= 0) _upack_corner(c.x03, -Corner_t::_P, -Corner_t::_Q, Nz);
else _halo_corner(0, 0, Nz-1, -Corner_t::_P, -Corner_t::_Q, Nz);
if (nbr(1,-1,-1) >= 0) _upack_corner(c.x10, Nx, -Corner_t::_Q, -Corner_t::_R);
else _halo_corner(Nx-1, 0, 0, Nx, -Corner_t::_Q, -Corner_t::_R);
if (nbr(1,1,-1) >= 0) _upack_corner(c.x11, Nx, Ny, -Corner_t::_R);
else _halo_corner(Nx-1, Ny-1, 0, Nx, Ny, -Corner_t::_R);
if (nbr(1,1,1) >= 0) _upack_corner(c.x12, Nx, Ny, Nz);
else _halo_corner(Nx-1, Ny-1, Nz-1, Nx, Ny, Nz);
if (nbr(1,-1,1) >= 0) _upack_corner(c.x13, Nx, -Corner_t::_Q, Nz);
else _halo_corner(Nx-1, 0, Nz-1, Nx, -Corner_t::_Q, Nz);
}
// PUP kernels
......
......@@ -6,85 +6,223 @@
#ifndef INTERPOLATORPUPMPI_H_EPMRY0VT
#define INTERPOLATORPUPMPI_H_EPMRY0VT
///////////////////////////////////////////////////////////////////////////////
// FACES
///////////////////////////////////////////////////////////////////////////////
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_pack_faceX(Face_t& f, const int _s) const
void InterpolatorMPI<Tinterp>::_process_faceX(func_t& eval)
{
for (int iz=0; iz<this->m_data.Nz(); ++iz)
for (int iy=0; iy<this->m_data.Ny(); ++iy)
for (int ix=0; ix<Face_t::_P; ++ix)
f(iy,iz,ix) = this->m_data(ix+_s,iy,iz);
eval(ix,iy,iz);
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_pack_faceY(Face_t& f, const int _s) const
void InterpolatorMPI<Tinterp>::_process_faceY(func_t& eval)
{
for (int iz=0; iz<this->m_data.Nz(); ++iz)
for (int iy=0; iy<Face_t::_P; ++iy)
for (int ix=0; ix<this->m_data.Nx(); ++ix)
f(ix,iz,iy) = this->m_data(ix,iy+_s,iz);
eval(ix,iy,iz);
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_pack_faceZ(Face_t& f, const int _s) const
void InterpolatorMPI<Tinterp>::_process_faceZ(func_t& eval)
{
for (int iz=0; iz<Face_t::_P; ++iz)
for (int iy=0; iy<this->m_data.Ny(); ++iy)
for (int ix=0; ix<this->m_data.Nx(); ++ix)
f(ix,iy,iz) = this->m_data(ix,iy,iz+_s);
eval(ix,iy,iz);
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_upack_faceX(const Face_t& f, const int _d)
void InterpolatorMPI<Tinterp>::_pack_faceX(Face_t& f, const int _s0)
{
for (int iz=0; iz<this->m_data.Nz(); ++iz)
for (int iy=0; iy<this->m_data.Ny(); ++iy)
for (int ix=0; ix<Face_t::_P; ++ix)
this->m_data(ix+_d,iy,iz) = f(iy,iz,ix);
func_t pack = [this, &f, _s0](const int ix, const int iy, const int iz) { f(iy,iz,ix) = this->m_data(ix+_s0,iy,iz); };
_process_faceX(pack);
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_upack_faceY(const Face_t& f, const int _d)
void InterpolatorMPI<Tinterp>::_pack_faceY(Face_t& f, const int _s0)
{
for (int iz=0; iz<this->m_data.Nz(); ++iz)
for (int iy=0; iy<Face_t::_P; ++iy)
for (int ix=0; ix<this->m_data.Nx(); ++ix)
this->m_data(ix,iy+_d,iz) = f(ix,iz,iy);
func_t pack = [this, &f, _s0](const int ix, const int iy, const int iz) { f(ix,iz,iy) = this->m_data(ix,iy+_s0,iz); };
_process_faceY(pack);
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_upack_faceZ(const Face_t& f, const int _d)
void InterpolatorMPI<Tinterp>::_pack_faceZ(Face_t& f, const int _s0)
{
for (int iz=0; iz<Face_t::_P; ++iz)
for (int iy=0; iy<this->m_data.Ny(); ++iy)
func_t pack = [this, &f, _s0](const int ix, const int iy, const int iz) { f(ix,iy,iz) = this->m_data(ix,iy,iz+_s0); };
_process_faceZ(pack);
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_upack_faceX(const Face_t& f, const int _d0)
{
func_t unpack = [this, &f, _d0](const int ix, const int iy, const int iz) { this->m_data(ix+_d0,iy,iz) = f(iy,iz,ix); };
_process_faceX(unpack);
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_upack_faceY(const Face_t& f, const int _d0)
{
func_t unpack = [this, &f, _d0](const int ix, const int iy, const int iz) { this->m_data(ix,iy+_d0,iz) = f(ix,iz,iy); };
_process_faceY(unpack);
}
template <typename Tinterp>
void InterpolatorMPI<Tinterp>::_upack_faceZ(const Face_t& f, const int _d0)
{
func_t unpack = [this, &f, _d0](const int ix, const int iy, const int iz) { this->m_data(ix,iy,iz+_d0) = f(ix,iy,iz); };
_process_faceZ(unpack);