Commit ec1f40fa authored by fabianw's avatar fabianw
Browse files

added barrier after iso-extraction

parent 562a2725
......@@ -469,7 +469,7 @@ void InterpolatorMPI<Tinterp>::_load_wavelet_MPI(ArgumentParser& p, const std::s
#pragma omp critical
{
t_load += t;
t_load += t/nthreads;
}
}
......
......@@ -66,13 +66,21 @@ public:
assert(origin[2] >= 0.0);
this->m_interp.setOrigin(origin);
std::chrono::time_point<std::chrono::system_clock> start, end;
start = std::chrono::system_clock::now();
this->_extractIso(isoval, Nx, Ny, Nz, chx, chy, chz, Ox, Oy, Oz, this->m_interp.isroot());
const MPI_Comm comm = this->m_interp.getComm();
MPI_Barrier(comm);
end = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end-start;
if (this->m_interp.isroot())
std::cout << "Iso-extraction done (all ranks). Elapsed time = " << elapsed_seconds.count() << "s" << std::endl;
std::chrono::time_point<std::chrono::system_clock> start, end;
start = std::chrono::system_clock::now();
_syncMesh();
end = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end-start;
elapsed_seconds = end-start;
if (this->m_interp.isroot())
std::cout << "Mesh sync done. Elapsed time = " << elapsed_seconds.count() << "s" << std::endl;
......@@ -273,6 +281,8 @@ private:
MPInbr nbrs(comm);
Timer t1, t2;
// order is important
const int myDST[7] = { nbrs(1,1,1), nbrs(0,1,1), nbrs(1,0,1), nbrs(0,0,1), nbrs(1,1,0), nbrs(0,1,0), nbrs(1,0,0) };
const int mySRC[7] = { nbrs(-1,-1,-1), nbrs(0,-1,-1), nbrs(-1,0,-1), nbrs(0,0,-1), nbrs(-1,-1,0), nbrs(0,-1,0), nbrs(-1,0,0) };
......@@ -282,10 +292,19 @@ private:
MPI_Comm_rank(comm, &rank);
// 1.) get initial number of vertices on all ranks
t1.start();
std::vector<unsigned long long> rankVertices0(size);
unsigned long long myVertices = mymesh.vertices().size();
MPI_Allgather(&myVertices, 1, MPI_UNSIGNED_LONG_LONG, rankVertices0.data(), 1, MPI_UNSIGNED_LONG_LONG, comm);
const double t_allg0 = t1.stop();
double tg_allg0 = 0;
MPI_Reduce(&t_allg0, &tg_allg0, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
if (0 == rank)
{
std::cout << "Total time spent in Allgather/offset verts: " << tg_allg0 << "s" << std::endl;
}
// 2.) request the recv buffer size (non-blocking)
std::vector<MPI_Request> recvBufSize_req(7, MPI_REQUEST_NULL);
std::vector<unsigned long long> recvBufSize(7,0);
......@@ -307,7 +326,15 @@ private:
sendBufVerts[i] = new TMesh::VertVec(0);
sendBufVertsIdx[i] = new std::vector<TMesh::Int>(0);
}
t1.start();
_extract_send_buffers(mymesh, sendBufVerts, sendBufVertsIdx);
const double t_extraction = t1.stop();
double tg_extraction = 0;
MPI_Reduce(&t_extraction, &tg_extraction, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
if (0 == rank)
{
std::cout << "Total time spent in extraction: " << tg_extraction << "s" << std::endl;
}
// 4.) send buffer sizes and data to neighbors
unsigned long long bs[7];
......@@ -332,6 +359,8 @@ private:
for (size_t j = 0; j < rankVertices0.size(); ++j)
max0 = std::max(max0, static_cast<TMesh::Int>(rankVertices0[j]));
double t_fenc = 0;
t1.start();
for (int i = 0; i < 7; ++i)
{
if (mySRC[i] >= 0)
......@@ -343,12 +372,24 @@ private:
MPI_Status stat0;
MPI_Recv(&recvVerts.front().x, recvBufSize[i]*sizeof(TMesh::Point_t), MPI_CHAR, mySRC[i], 10+i, comm, &stat0);
t2.start();
_substitute_duplicates_encode(mymesh, recvVerts, sendBufVertsIdx, i*max0);
t_fenc += t2.stop();
}
}
}
const double t_enc = t1.stop();
double tg_fenc = 0;
double tg_enc = 0;
MPI_Reduce(&t_fenc, &tg_fenc, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
MPI_Reduce(&t_enc, &tg_enc, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
if (0 == rank)
{
std::cout << "Total time spent in substitution/encode: " << tg_enc << "s (in function " << tg_fenc << "s)" << std::endl;
}
// 6.) vertex ID correction step and send to neighbors
t1.start();
std::vector<unsigned long long> rankVertices1(size);
myVertices = mymesh.vertices().size();
MPI_Allgather(&myVertices, 1, MPI_UNSIGNED_LONG_LONG, rankVertices1.data(), 1, MPI_UNSIGNED_LONG_LONG, comm);
......@@ -357,6 +398,15 @@ private:
myOffset += rankVertices1[i];
_offset_verts(mymesh, sendBufVertsIdx, myOffset);
const double t_allg1 = t1.stop();
double tg_allg1 = 0;
MPI_Reduce(&t_allg1, &tg_allg1, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
if (0 == rank)
{
std::cout << "Total time spent in Allgather/offset verts: " << tg_allg1 << "s" << std::endl;
}
for (int i = 0; i < 7; ++i)
{
if (myDST[i] >= 0 && bs[i] > 0)
......@@ -368,6 +418,8 @@ private:
}
// 7.) correct substituted ID's
double t_fdec = 0;
t1.start();
for (int i = 0; i < 7; ++i)
{
if (mySRC[i] >= 0 && recvBufSize[i] > 0)
......@@ -376,17 +428,36 @@ private:
std::vector<TMesh::Int> recvVertsIdx(recvBufSize[i]);
MPI_Recv(&recvVertsIdx.front(), recvBufSize[i]*sizeof(TMesh::Int), MPI_CHAR, mySRC[i], 20+i, comm, &stat1);
t2.start();
_decode_vertex(mymesh, recvVertsIdx, i*max0);
t_fdec += t2.stop();
}
}
const double t_dec = t1.stop();
double tg_fdec = 0;
double tg_dec = 0;
MPI_Reduce(&t_fdec, &tg_fdec, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
MPI_Reduce(&t_dec, &tg_dec, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
if (0 == rank)
{
std::cout << "Total time spent in decode: " << tg_dec << "s (in function " << tg_fdec << "s)" << std::endl;
}
// 8.) clean up
MPI_Barrier(comm);
t1.start();
for (int i = 0; i < 7; ++i)
{
delete sendBufVerts[i];
delete sendBufVertsIdx[i];
}
const double t_free = t1.stop();
double tg_free = 0;
MPI_Reduce(&t_free, &tg_free, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
if (0 == rank)
{
std::cout << "Total time spent in free: " << tg_free << "s" << std::endl;
}
#ifdef DEBUG
TMesh::ConnectVec& tv = mymesh.triangles();
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment