Commit a7b35095 authored by fabianw's avatar fabianw
Browse files

fixed shared memory issue in wavelet loaders

parent c6c54438
......@@ -236,25 +236,28 @@ void Interpolator<TKernel>::_load_wavelet(ArgumentParser& p)
const size_t num_elem = static_cast<size_t>(maxDim[0]) * maxDim[1] * maxDim[2];
Real* const data = new Real[num_elem];
Real blockdata[_BLOCKSIZE_][_BLOCKSIZE_][_BLOCKSIZE_];
const int nBlocks = NBX * NBY * NBZ;
#pragma omp parallel for
for (int i = 0; i < nBlocks; ++i)
#pragma omp parallel
{
const int ix = i%NBX;
const int iy = (i/NBX)%NBY;
const int iz = (i/(NBX*NBY))%NBZ;
const double zratio = myreader.load_block2(ix, iy, iz, blockdata);
Real blockdata[_BLOCKSIZE_][_BLOCKSIZE_][_BLOCKSIZE_];
#pragma omp for
for (int i = 0; i < nBlocks; ++i)
{
const int ix = i%NBX;
const int iy = (i/NBX)%NBY;
const int iz = (i/(NBX*NBY))%NBZ;
const double zratio = myreader.load_block2(ix, iy, iz, blockdata);
for (int z=0; z < _BLOCKSIZE_; ++z)
for (int y=0; y < _BLOCKSIZE_; ++y)
{
assert(iy*_BLOCKSIZE_+y < maxDim[1]);
assert(iz*_BLOCKSIZE_+z < maxDim[2]);
const size_t offset = _BLOCKSIZE_*(static_cast<size_t>(ix) + NBX*(y+iy*_BLOCKSIZE_ + (z+static_cast<size_t>(iz)*_BLOCKSIZE_)*NBY*_BLOCKSIZE_));
Real* const dst = data + offset;
memcpy(dst, &blockdata[z][y][0], _BLOCKSIZE_*sizeof(Real));
}
for (int z=0; z < _BLOCKSIZE_; ++z)
for (int y=0; y < _BLOCKSIZE_; ++y)
{
assert(iy*_BLOCKSIZE_+y < maxDim[1]);
assert(iz*_BLOCKSIZE_+z < maxDim[2]);
const size_t offset = _BLOCKSIZE_*(static_cast<size_t>(ix) + NBX*(y+iy*_BLOCKSIZE_ + (z+static_cast<size_t>(iz)*_BLOCKSIZE_)*NBY*_BLOCKSIZE_));
Real* const dst = data + offset;
memcpy(dst, &blockdata[z][y][0], _BLOCKSIZE_*sizeof(Real));
}
}
}
this->m_data = std::move(Matrix_t(maxDim[0], maxDim[1], maxDim[2], data));
......
......@@ -384,29 +384,32 @@ void InterpolatorMPI<Tinterp>::_load_wavelet_MPI(ArgumentParser& p)
const size_t num_elem = static_cast<size_t>(maxDim[0]) * maxDim[1] * maxDim[2];
Real* const data = new Real[num_elem];
Real blockdata[_BLOCKSIZE_][_BLOCKSIZE_][_BLOCKSIZE_];
const int nBlocks = myNBX * myNBY * myNBZ;
#pragma omp parallel for
for (int i = 0; i < nBlocks; ++i)
#pragma omp parallel
{
const int ix = i%myNBX;
const int iy = (i/myNBX)%myNBY;
const int iz = (i/(myNBX*myNBY))%myNBZ;
const double zratio = myreader.load_block2(
ix+myNBX*m_myPEIndex[0],
iy+myNBY*m_myPEIndex[1],
iz+myNBZ*m_myPEIndex[2],
blockdata);
for (int z=0; z < _BLOCKSIZE_; ++z)
for (int y=0; y < _BLOCKSIZE_; ++y)
{
assert(iy*_BLOCKSIZE_+y < maxDim[1]);
assert(iz*_BLOCKSIZE_+z < maxDim[2]);
const size_t offset = _BLOCKSIZE_*(static_cast<size_t>(ix) + myNBX*(y+iy*_BLOCKSIZE_ + (z+static_cast<size_t>(iz)*_BLOCKSIZE_)*myNBY*_BLOCKSIZE_));
Real* const dst = data + offset;
memcpy(dst, &blockdata[z][y][0], _BLOCKSIZE_*sizeof(Real));
}
Real blockdata[_BLOCKSIZE_][_BLOCKSIZE_][_BLOCKSIZE_];
#pragma omp for
for (int i = 0; i < nBlocks; ++i)
{
const int ix = i%myNBX;
const int iy = (i/myNBX)%myNBY;
const int iz = (i/(myNBX*myNBY))%myNBZ;
const double zratio = myreader.load_block2(
ix+myNBX*m_myPEIndex[0],
iy+myNBY*m_myPEIndex[1],
iz+myNBZ*m_myPEIndex[2],
blockdata);
for (int z=0; z < _BLOCKSIZE_; ++z)
for (int y=0; y < _BLOCKSIZE_; ++y)
{
assert(iy*_BLOCKSIZE_+y < maxDim[1]);
assert(iz*_BLOCKSIZE_+z < maxDim[2]);
const size_t offset = _BLOCKSIZE_*(static_cast<size_t>(ix) + myNBX*(y+iy*_BLOCKSIZE_ + (z+static_cast<size_t>(iz)*_BLOCKSIZE_)*myNBY*_BLOCKSIZE_));
Real* const dst = data + offset;
memcpy(dst, &blockdata[z][y][0], _BLOCKSIZE_*sizeof(Real));
}
}
}
this->m_data = std::move(Matrix_t(maxDim[0], maxDim[1], maxDim[2], data));
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment