Commit 9ab57e26 authored by kicici's avatar kicici

Remove _BLOCKSIZE*_ macros

Instead of using _BLOCKSIZE*_ macros, use Block::sizeX/Y/Z.
Slice dumper currently is not implemented for non-cubical blocks.
pack/unpack functions get block size as a runtime argument, which
shouldn't really affect performance.

In tests, avoid _Capitalized, as that formatting is reserved by C++.
parent b76d62c8
......@@ -27,6 +27,10 @@ CUBISM_NAMESPACE_BEGIN
template <typename Block, template<typename X> class allocator=std::allocator>
class Grid
{
// Here we actually want to ensure asap that Block::sizeX/Y/Z are defined.
static_assert(Block::sizeX > 0, "Block size should be a positive integer.");
static_assert(Block::sizeY > 0, "Block size should be a positive integer.");
static_assert(Block::sizeZ > 0, "Block size should be a positive integer.");
Block * m_blocks;
std::vector<BlockInfo> m_vInfo;
......
......@@ -25,6 +25,13 @@ namespace SliceTypes
template <typename TGrid>
class Slice
{
// To generalize, check all occurences of BS and replace with
// the appropriate X/Y/Z.
static_assert(TGrid::BlockType::sizeX == TGrid::BlockType::sizeY
&& TGrid::BlockType::sizeX == TGrid::BlockType::sizeZ,
"Only cubic block type implemented so far.");
static constexpr int BS = TGrid::BlockType::sizeX;
public:
template <typename TSlice>
static std::vector<TSlice> getEntities(ArgumentParser& parser, TGrid& grid)
......@@ -120,8 +127,8 @@ namespace SliceTypes
std::vector<BlockInfo> bInfo_local = m_grid->getBlocksInfo();
for (size_t i = 0; i < bInfo_local.size(); ++i)
{
const int start = bInfo_local[i].index[m_dir] * _BLOCKSIZE_;
if (start <= m_idx && m_idx < (start+_BLOCKSIZE_))
const int start = bInfo_local[i].index[m_dir] * BS;
if (start <= m_idx && m_idx < (start + BS))
m_intersecting_blocks.push_back(bInfo_local[i]);
}
......@@ -191,7 +198,7 @@ namespace SliceTypes
template <typename TStreamer, typename hdf5Real>
void _YZ(hdf5Real * const data) const
{
const int ix = m_idx % _BLOCKSIZE_;
const int ix = m_idx % BS;
const unsigned int NCHANNELS = TStreamer::NCHANNELS;
#pragma omp parallel for
......@@ -224,7 +231,7 @@ namespace SliceTypes
template <typename TStreamer, typename hdf5Real>
void _XZ(hdf5Real * const data) const
{
const int iy = m_idx % _BLOCKSIZE_;
const int iy = m_idx % BS;
const unsigned int NCHANNELS = TStreamer::NCHANNELS;
#pragma omp parallel for
......@@ -257,7 +264,7 @@ namespace SliceTypes
template <typename TStreamer, typename hdf5Real>
void _YX(hdf5Real * const data) const
{
const int iz = m_idx % _BLOCKSIZE_;
const int iz = m_idx % BS;
const unsigned int NCHANNELS = TStreamer::NCHANNELS;
#pragma omp parallel for
......
......@@ -20,6 +20,13 @@ namespace SliceTypesMPI
template <typename TGrid>
class Slice : public SliceTypes::Slice<TGrid>
{
// To generalize, check all occurences of BS and replace with
// the appropriate X/Y/Z.
static_assert(TGrid::BlockType::sizeX == TGrid::BlockType::sizeY
&& TGrid::BlockType::sizeX == TGrid::BlockType::sizeZ,
"Only cubic block type implemented so far.");
static constexpr int BS = TGrid::BlockType::sizeX;
public:
template <typename TSlice>
static std::vector<TSlice> getEntities(ArgumentParser& parser, TGrid& grid)
......@@ -63,8 +70,8 @@ namespace SliceTypesMPI
this->m_intersecting_blocks.swap(clean);
for (size_t i = 0; i < bInfo_local.size(); ++i)
{
const int start = bInfo_local[i].index[this->m_dir] * _BLOCKSIZE_;
if (start <= this->m_idx && this->m_idx < (start+_BLOCKSIZE_))
const int start = bInfo_local[i].index[this->m_dir] * BS;
if (start <= this->m_idx && this->m_idx < (start + BS))
this->m_intersecting_blocks.push_back(bInfo_local[i]);
}
......
......@@ -19,13 +19,14 @@ inline void pack(const Real * const srcbase, Real * const dst,
const unsigned int gptfloats,
int * selected_components, const int ncomponents,
const int xstart, const int ystart, const int zstart,
const int xend, const int yend, const int zend)
const int xend, const int yend, const int zend,
const int BSX, const int BSY)
{
for(int idst=0, iz=zstart; iz<zend; ++iz)
for(int iy=ystart; iy<yend; ++iy)
for(int ix=xstart; ix<xend; ++ix)
{
const Real * src = srcbase + gptfloats*(ix + _BLOCKSIZEX_*(iy + _BLOCKSIZEY_*iz));
const Real * src = srcbase + gptfloats*(ix + BSX*(iy + BSY*iz));
// bgq: s_c[ic] = ic! -> memcpy or stripes...
for(int ic=0; ic<ncomponents; ic++, idst++)
......@@ -38,13 +39,14 @@ inline void pack_stripes1(const Real * const srcbase, Real * const dst,
const unsigned int gptfloats,
const int selstart, const int selend,
const int xstart, const int ystart, const int zstart,
const int xend, const int yend, const int zend)
const int xend, const int yend, const int zend,
const int BSX, const int BSY)
{
for(int idst=0, iz=zstart; iz<zend; ++iz)
for(int iy=ystart; iy<yend; ++iy)
for(int ix=xstart; ix<xend; ++ix)
{
const Real * src = srcbase + gptfloats*(ix + _BLOCKSIZEX_*(iy + _BLOCKSIZEY_*iz));
const Real * src = srcbase + gptfloats*(ix + BSX*(iy + BSY*iz));
for(int ic=selstart; ic<selend; ic++, idst++)
dst[idst] = src[ic];
......@@ -56,7 +58,8 @@ inline void pack_stripes_(const Real * const srcbase, Real * const dst,
const unsigned int gptfloats,
const int selstart, const int selend,
const int xstart, const int ystart, const int zstart,
const int xend, const int yend, const int zend)
const int xend, const int yend, const int zend,
const int BSX, const int BSY)
{
const int seldiff = selend - selstart;
const int nbytes = seldiff*sizeof(Real);
......@@ -66,7 +69,7 @@ inline void pack_stripes_(const Real * const srcbase, Real * const dst,
{
for(int ix=xstart; ix<xend; ++ix)
{
const Real * src = srcbase + gptfloats*(ix + _BLOCKSIZEX_*(iy + _BLOCKSIZEY_*iz));
const Real * src = srcbase + gptfloats*(ix + BSX*(iy + BSY*iz));
memcpy(&dst[idst], &src[selstart], nbytes);
idst += seldiff;
......@@ -80,18 +83,19 @@ inline void pack_stripes_x(const Real * const srcbase, Real * const dst,
const unsigned int gptfloats,
const int selstart, const int selend,
const int xstart, const int ystart, const int zstart,
const int xend, const int yend, const int zend)
const int xend, const int yend, const int zend,
const int BSX, const int BSY)
{
const int seldiff = selend - selstart;
const int nbytes = seldiff*sizeof(Real);
const int _BS_XY_ = _BLOCKSIZEX_*_BLOCKSIZEY_;
const int _BS_XY_ = BSX*BSY;
int idst = 0;
for(int iz=zstart; iz<zend; ++iz)
{
const int iz_off = _BS_XY_*iz;
for(int iy=ystart; iy<yend; ++iy)
{
const int iy_off = _BLOCKSIZEX_*iy;
const int iy_off = BSX*iy;
for(int ix=xstart; ix<xend; ++ix)
{
const Real * src = srcbase + gptfloats*(ix + iy_off + iz_off);
......@@ -115,7 +119,8 @@ inline void pack_stripes_unroll0(const Real * const srcbase, Real * const dst,
const unsigned int gptfloats,
const int selstart, const int selend,
const int xstart, const int ystart, const int zstart,
const int xend, const int yend, const int zend)
const int xend, const int yend, const int zend,
const int BSX, const int BSY)
{
const int seldiff = selend - selstart;
const int nbytes = seldiff*sizeof(Real);
......@@ -131,7 +136,7 @@ inline void pack_stripes_unroll0(const Real * const srcbase, Real * const dst,
int ix = xstart;
while (repeat-- > 0)
{
const Real * src0 = srcbase + gptfloats*(ix + _BLOCKSIZEX_*(iy + _BLOCKSIZEY_*iz));
const Real * src0 = srcbase + gptfloats*(ix + BSX*(iy + BSY*iz));
const Real * src1 = src0 + gptfloats;
const Real * src2 = src1 + gptfloats;
const Real * src3 = src2 + gptfloats;
......@@ -146,7 +151,7 @@ inline void pack_stripes_unroll0(const Real * const srcbase, Real * const dst,
if (left == 3)
{
const Real * src0 = srcbase + gptfloats*(ix + _BLOCKSIZEX_*(iy + _BLOCKSIZEY_*iz));
const Real * src0 = srcbase + gptfloats*(ix + BSX*(iy + BSY*iz));
const Real * src1 = src0 + gptfloats;
const Real * src2 = src1 + gptfloats;
......@@ -158,7 +163,7 @@ inline void pack_stripes_unroll0(const Real * const srcbase, Real * const dst,
}
else if (left == 2)
{
const Real * src0 = srcbase + gptfloats*(ix + _BLOCKSIZEX_*(iy + _BLOCKSIZEY_*iz));
const Real * src0 = srcbase + gptfloats*(ix + BSX*(iy + BSY*iz));
const Real * src1 = src0 + gptfloats;
memcpy2((char *)&dst[idst+0*seldiff], (char *)&src0[selstart], nbytes);
......@@ -168,7 +173,7 @@ inline void pack_stripes_unroll0(const Real * const srcbase, Real * const dst,
}
else /* left == 1 */
{
const Real * src0 = srcbase + gptfloats*(ix + _BLOCKSIZEX_*(iy + _BLOCKSIZEY_*iz));
const Real * src0 = srcbase + gptfloats*(ix + BSX*(iy + BSY*iz));
memcpy2((char *)&dst[idst+0*seldiff], (char *)&src0[selstart], nbytes);
idst += 1*seldiff;
......@@ -184,7 +189,8 @@ inline void pack_stripesxx(const Real * const srcbase, Real * const dst,
const unsigned int gptfloats,
const int selstart, const int selend,
const int xstart, const int ystart, const int zstart,
const int xend, const int yend, const int zend)
const int xend, const int yend, const int zend,
const int BSX, const int BSY)
{
// printf("xstart/end = (%d, %d)\n", xstart, xend);
const int seldiff = selend - selstart;
......@@ -197,7 +203,7 @@ inline void pack_stripesxx(const Real * const srcbase, Real * const dst,
{
for(int ix=xstart; ix<xend; ++ix)
{
const Real * src = srcbase + gptfloats*(ix + _BLOCKSIZEX_*(iy + _BLOCKSIZEY_*iz));
const Real * src = srcbase + gptfloats*(ix + BSX*(iy + BSY*iz));
memcpy2((char *)&dst[idst], (char *)&src[selstart], nbytes);
idst += seldiff;
......@@ -207,7 +213,7 @@ inline void pack_stripesxx(const Real * const srcbase, Real * const dst,
{
for(int ix=xstart; ix<xend; ix+=8)
{
const Real * src0 = srcbase + gptfloats*(ix + _BLOCKSIZEX_*(iy + _BLOCKSIZEY_*iz));
const Real * src0 = srcbase + gptfloats*(ix + BSX*(iy + BSY*iz));
const Real * src1 = src0 + gptfloats;
const Real * src2 = src1 + gptfloats;
const Real * src3 = src2 + gptfloats;
......@@ -231,7 +237,7 @@ inline void pack_stripesxx(const Real * const srcbase, Real * const dst,
{
for(int ix=xstart; ix<xend; ix+=4)
{
const Real * src0 = srcbase + gptfloats*(ix + _BLOCKSIZEX_*(iy + _BLOCKSIZEY_*iz));
const Real * src0 = srcbase + gptfloats*(ix + BSX*(iy + BSY*iz));
const Real * src1 = src0 + gptfloats;
const Real * src2 = src1 + gptfloats;
const Real * src3 = src2 + gptfloats;
......@@ -252,12 +258,13 @@ inline void pack_stripes(const Real * const srcbase, Real * const dst,
const unsigned int gptfloats,
const int selstart, const int selend,
const int xstart, const int ystart, const int zstart,
const int xend, const int yend, const int zend)
const int xend, const int yend, const int zend,
const int BSX, const int BSY)
{
const int seldiff = selend - selstart;
const int nbytes = seldiff*sizeof(Real);
if ((xend - xstart) == _BLOCKSIZEX_)
if ((xend - xstart) == BSX)
{
for(int idst=0, iz=zstart; iz<zend; ++iz)
{
......@@ -265,7 +272,7 @@ inline void pack_stripes(const Real * const srcbase, Real * const dst,
{
for(int ix=xstart; ix<xend; ix+=4)
{
const Real * src0 = srcbase + gptfloats*(ix + _BLOCKSIZEX_*(iy + _BLOCKSIZEY_*iz));
const Real * src0 = srcbase + gptfloats*(ix + BSX*(iy + BSY*iz));
const Real * src1 = src0 + gptfloats;
const Real * src2 = src1 + gptfloats;
const Real * src3 = src2 + gptfloats;
......@@ -288,7 +295,7 @@ inline void pack_stripes(const Real * const srcbase, Real * const dst,
int ix = xstart;
//for(int ix=xstart; ix<xend; ++ix)
{
const Real * src0 = srcbase + gptfloats*(ix + _BLOCKSIZEX_*(iy + _BLOCKSIZEY_*iz));
const Real * src0 = srcbase + gptfloats*(ix + BSX*(iy + BSY*iz));
const Real * src1 = src0 + gptfloats;
const Real * src2 = src1 + gptfloats;
......
......@@ -775,7 +775,9 @@ public:
for(int i=0; i<N; ++i)
{
PackInfo info = send_packinfos[i];
pack(info.block, info.pack, gptfloats, &selcomponents.front(), NC, info.sx, info.sy, info.sz, info.ex, info.ey, info.ez);
pack(info.block, info.pack, gptfloats, &selcomponents.front(), NC,
info.sx, info.sy, info.sz, info.ex, info.ey, info.ez,
blocksize[0], blocksize[1]);
}
}
else
......@@ -787,7 +789,9 @@ public:
for(int i=0; i<N; ++i)
{
PackInfo info = send_packinfos[i];
pack_stripes(info.block, info.pack, gptfloats, selstart, selend, info.sx, info.sy, info.sz, info.ex, info.ey, info.ez);
pack_stripes(info.block, info.pack, gptfloats, selstart, selend,
info.sx, info.sy, info.sz, info.ex, info.ey, info.ez,
blocksize[0], blocksize[1]);
}
}
}
......@@ -972,7 +976,9 @@ public:
for(int i=0; i<N; ++i)
{
PackInfo info = send_packinfos[i];
pack(info.block, info.pack, gptfloats, &selcomponents.front(), NC, info.sx, info.sy, info.sz, info.ex, info.ey, info.ez);
pack(info.block, info.pack, gptfloats, &selcomponents.front(), NC,
info.sx, info.sy, info.sz, info.ex, info.ey, info.ez,
blocksize[0], blocksize[1]);
}
}
else
......@@ -984,7 +990,9 @@ public:
for(int i=0; i<N; ++i)
{
PackInfo info = send_packinfos[i];
pack_stripes(info.block, info.pack, gptfloats, selstart, selend, info.sx, info.sy, info.sz, info.ex, info.ey, info.ez);
pack_stripes(info.block, info.pack, gptfloats, selstart, selend,
info.sx, info.sy, info.sz, info.ex, info.ey, info.ez,
blocksize[0], blocksize[1]);
}
}
......
......@@ -12,10 +12,7 @@ using MyReal = double;
using MyReal = float;
#endif /* _DOUBLE_ */
#define _BLOCKSIZE_ 16
#define _BLOCKSIZEX_ _BLOCKSIZE_
#define _BLOCKSIZEY_ _BLOCKSIZE_
#define _BLOCKSIZEZ_ _BLOCKSIZE_
constexpr int BLOCK_SIZE = 16;
#include <cassert>
#include <mpi.h>
......@@ -34,30 +31,32 @@ struct Block
typedef TReal ElementType;
typedef TReal element_type;
typedef TReal Real;
static const size_t sizeX = _BLOCKSIZE_;
static const size_t sizeY = _BLOCKSIZE_;
static const size_t sizeZ = _BLOCKSIZE_;
static const size_t members = _AOSmembers;
static constexpr size_t sizeX = BLOCK_SIZE;
static constexpr size_t sizeY = BLOCK_SIZE;
static constexpr size_t sizeZ = BLOCK_SIZE;
static constexpr size_t members = _AOSmembers;
inline void clear() { memset(&m_data[0][0][0][0], 0, _BLOCKSIZE_*_BLOCKSIZE_*_BLOCKSIZE_*_AOSmembers*sizeof(TReal)); }
inline void clear() {
memset(&m_data[0][0][0][0], 0, BLOCK_SIZE * BLOCK_SIZE * BLOCK_SIZE * _AOSmembers * sizeof(TReal));
}
inline const TReal (&operator()(const size_t ix, const size_t iy, const size_t iz) const)[_AOSmembers]
{
assert(ix<_BLOCKSIZE_);
assert(iy<_BLOCKSIZE_);
assert(iz<_BLOCKSIZE_);
assert(ix<BLOCK_SIZE);
assert(iy<BLOCK_SIZE);
assert(iz<BLOCK_SIZE);
return this->m_data[iz][iy][ix];
}
inline TReal (&operator()(const size_t ix, const size_t iy, const size_t iz))[_AOSmembers]
{
assert(ix<_BLOCKSIZE_);
assert(iy<_BLOCKSIZE_);
assert(iz<_BLOCKSIZE_);
assert(ix<BLOCK_SIZE);
assert(iy<BLOCK_SIZE);
assert(iz<BLOCK_SIZE);
return this->m_data[iz][iy][ix];
}
TReal m_data[_BLOCKSIZE_][_BLOCKSIZE_][_BLOCKSIZE_][_AOSmembers];
TReal m_data[BLOCK_SIZE][BLOCK_SIZE][BLOCK_SIZE][_AOSmembers];
};
template <size_t _comp=0>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment