To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit d3ee30ee authored by Dmitry Alexeev's avatar Dmitry Alexeev
Browse files

solution for ex 9

parent bcd3b076
CXX?=g++
MPICXX?=mpic++
perf?=1
ifeq "$(perf)" "1"
CFLAGS += -D_PERF_
endif
# these are for running on Euler!
CFLAGS+=-Wall -O3 -std=c++11
CFLAGS_THREADS=$(CFLAGS) -fopenmp
all: diffusion2d_serial diffusion2d_openmp diffusion2d_mpi
diffusion2d_serial: diffusion2d_openmp.cpp
$(CXX) $(CFLAGS) -o $@ $<
diffusion2d_openmp: diffusion2d_openmp.cpp
$(CXX) $(CFLAGS_THREADS) -o $@ $<
diffusion2d_mpi: diffusion2d_mpi.cpp
$(MPICXX) $(CFLAGS) -o $@ $<
clean:
rm -f diffusion2d_serial diffusion2d_openmp diffusion2d_mpi
#include <iostream>
#include <algorithm>
#include <string>
#include <fstream>
#include <cassert>
#include <vector>
#include <cmath>
#include <omp.h>
#include <mpi.h>
#include <stdio.h>
#include "timer.hpp"
typedef double value_type;
#define MPI_VALUE_TYPE MPI_DOUBLE
typedef std::size_t size_type;
struct Diagnostics
{
value_type time;
value_type heat;
value_type mu2;
Diagnostics(value_type time, value_type heat, value_type mu2) : time(time), heat(heat), mu2(mu2) {}
};
class Diffusion2D_MPI {
public:
Diffusion2D_MPI(const value_type D,
const value_type L,
const size_type N,
const value_type dt,
const int rank,
const int procs)
: D_(D), L_(L), N_(N), dt_(dt), rank_(rank), procs_(procs)
{
// initialize to zero
t_ = 0.0;
/// real space grid spacing
dr_ = L_ / (N_ - 1);
/// stencil factor
fac_ = dt_ * D_ / (dr_ * dr_);
// number of rows per process
local_N_ = N_ / procs_;
// small correction for the last process
if (rank_ == procs - 1) local_N_ += (N_ % procs_);
// actual dimension of a row (+2 for the ghosts)
real_N_ = N_ + 2;
Ntot = (local_N_ + 2) * (N_+2);
rho_.resize(Ntot, 0.);
rho_tmp.resize(Ntot, 0.);
// check that the timestep satisfies the restriction for stability
if (rank_ == 0)
std::cout << "timestep from stability condition is " << dr_*dr_/(4.*D_) << std::endl;
initialize_density();
}
value_type advance()
{
MPI_Request req[4];
MPI_Status status[4];
int prev_rank = rank_ - 1;
int next_rank = rank_ + 1;
if (prev_rank >= 0) {
MPI_Irecv(&rho_[ 0*real_N_+1], N_, MPI_VALUE_TYPE, prev_rank, 100, MPI_COMM_WORLD, &req[0]);
MPI_Isend(&rho_[ 1*real_N_+1], N_, MPI_VALUE_TYPE, prev_rank, 100, MPI_COMM_WORLD, &req[1]);
} else {
req[0] = MPI_REQUEST_NULL;
req[1] = MPI_REQUEST_NULL;
}
if (next_rank < procs_) {
MPI_Irecv(&rho_[(local_N_+1)*real_N_+1], N_, MPI_VALUE_TYPE, next_rank, 100, MPI_COMM_WORLD, &req[2]);
MPI_Isend(&rho_[ local_N_*real_N_+1], N_, MPI_VALUE_TYPE, next_rank, 100, MPI_COMM_WORLD, &req[3]);
} else {
req[2] = MPI_REQUEST_NULL;
req[3] = MPI_REQUEST_NULL;
}
/// Dirichlet boundaries; central differences in space, forward Euler
/// in time
// update the interior rows
for(size_type i = 2; i < local_N_; ++i) {
for(size_type j = 1; j <= N_; ++j) {
rho_tmp[i*real_N_ + j] = rho_[i*real_N_ + j] +
fac_
*
(
rho_[i*real_N_ + (j+1)]
+
rho_[i*real_N_ + (j-1)]
+
rho_[(i+1)*real_N_ + j]
+
rho_[(i-1)*real_N_ + j]
-
4.*rho_[i*real_N_ + j]
);
}
}
// ensure boundaries have arrived
MPI_Waitall(4, req, status);
// update the first and the last rows
for(size_type i = 1; i <= local_N_; i += (local_N_-1)) {
for(size_type j = 1; j <= N_; ++j) {
rho_tmp[i*real_N_ + j] = rho_[i*real_N_ + j] +
fac_
*
(
rho_[i*real_N_ + (j+1)]
+
rho_[i*real_N_ + (j-1)]
+
rho_[(i+1)*real_N_ + j]
+
rho_[(i-1)*real_N_ + j]
-
4.*rho_[i*real_N_ + j]
);
}
}
/// use swap instead of rho_=rho_tmp. this is much more efficient, because it does not have to copy
/// element by element.
using std::swap;
swap(rho_tmp, rho_);
t_ += dt_;
return t_;
}
void compute_diagnostics()
{
value_type heat=0, mu2=0;
for(size_type i = 1; i <= local_N_; ++i) {
size_type gi = rank_ * (N_ / procs_) + i; // convert local index to global index
for(size_type j = 1; j <= N_; ++j)
{
const value_type x2 = ((gi-1)*dr_ - L_/2.);
const value_type y2 = ((j-1)*dr_ - L_/2.);
heat += rho_[i*real_N_ + j] * dr_ * dr_;
mu2 += rho_[i*real_N_ + j] * dr_ * dr_ * (x2*x2 + y2*y2);
}
}
MPI_Reduce(rank_ == 0? MPI_IN_PLACE: &heat, &heat, 1, MPI_VALUE_TYPE, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(rank_ == 0? MPI_IN_PLACE: &mu2, &mu2, 1, MPI_VALUE_TYPE, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank_ == 0) {
#if DBG
std::cout << "t = " << t_ << " heat = " << heat << " mu2 = " << mu2 << std::endl;
#endif
diag.push_back(Diagnostics(t_,heat,mu2));
}
}
void write_diagnostics(std::string const& filename) const
{
std::ofstream out_file(filename, std::ios::out);
for (Diagnostics d : diag)
out_file << d.time << '\t' << d.heat << '\t' << d.mu2 << "\n";
out_file.close();
}
private:
void initialize_density()
{
size_type gi;
/// initialize rho(x,y,t=0)
value_type bound = 1/2.;
for (size_type i = 1; i <= local_N_; ++i) {
gi = rank_ * (N_ / procs_) + i; // convert local index to global index
for (size_type j = 1; j <= N_; ++j) {
if (std::abs((gi-1)*dr_ - L_/2.) < bound && std::abs((j-1)*dr_ - L_/2.) < bound) {
rho_[i*real_N_ + j] = 1;
} else {
rho_[i*real_N_ + j] = 0;
}
}
}
}
value_type D_, L_;
size_type N_, Ntot, local_N_, real_N_;
value_type dr_, dt_, t_, fac_;
int rank_, procs_;
std::vector<value_type> rho_, rho_tmp;
std::vector<Diagnostics> diag;
};
int main(int argc, char* argv[])
{
if (argc < 5) {
std::cerr << "Usage: " << argv[0] << " D L N dt" << std::endl;
return 1;
}
MPI_Init(&argc, &argv);
int rank, procs;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &procs);
if (rank == 0)
std::cout << "Running with " << procs << " MPI processes" << std::endl;
const value_type D = std::stod(argv[1]);
const value_type L = std::stod(argv[2]);
const size_type N = std::stoul(argv[3]);
const value_type dt = std::stod(argv[4]);
Diffusion2D_MPI system(D, L, N, dt, rank, procs);
#if DBG
system.compute_diagnostics();
#endif
const value_type tmax = 10000 * dt;
value_type time = 0;
timer t;
int i = 0;
t.start();
while (time < tmax) {
time = system.advance();
#ifndef _PERF_
system.compute_diagnostics();
#endif
i++;
if (i == 100) break;
}
t.stop();
if (rank == 0)
std::cout << "Timing: " << N << " " << t.get_timing() << std::endl;
#ifndef _PERF_
if (rank == 0)
system.write_diagnostics("diagnostics_mpi.dat");
#endif
MPI_Finalize();
return 0;
}
#include <iostream>
#include <algorithm>
#include <string>
#include <fstream>
#include <cassert>
#include <vector>
#include <cmath>
#include <omp.h>
#include "timer.hpp"
typedef double value_type;
typedef std::size_t size_type;
struct Diagnostics
{
value_type time;
value_type heat;
value_type mu2;
Diagnostics(value_type time, value_type heat, value_type mu2) : time(time), heat(heat), mu2(mu2) {}
};
class Diffusion2D {
public:
Diffusion2D(const value_type D,
const value_type L,
const size_type N,
const value_type dt,
const int rank)
: D_(D), L_(L), N_(N), Ntot((N_+2)*(N_+2)), dt_(dt), rank_(rank)
{
/// real space grid spacing
dr_ = L_ / (N_ - 1);
/// stencil factor
fac_ = dt_ * D_ / (dr_ * dr_);
rho_.resize(Ntot, 0.);
rho_tmp.resize(Ntot, 0.);
// check that the timestep satisfies the restriction for stability
std::cout << "timestep from stability condition is " << dr_*dr_/(4.*D_) << std::endl;
initialize_density();
}
value_type advance()
{
/// Dirichlet boundaries; central differences in space, forward Euler
/// in time
#pragma omp parallel for collapse(2)
for(size_type i = 1; i <= N_; ++i) {
for(size_type j = 1; j <= N_; ++j) {
rho_tmp[i*N_ + j] = rho_[i*N_ + j] +
fac_
*
(
rho_[i*N_ + (j+1)]
+
rho_[i*N_ + (j-1)]
+
rho_[(i+1)*N_ + j]
+
rho_[(i-1)*N_ + j]
-
4.*rho_[i*N_ + j]
);
}
}
/// use swap instead of rho_=rho_tmp. this is much more efficient, because it does not have to copy
/// element by element.
using std::swap;
swap(rho_tmp, rho_);
t_ += dt_;
return t_;
}
void compute_diagnostics()
{
value_type heat=0, mu2=0;
for(size_type i = 1; i <= N_; ++i)
for(size_type j = 1; j <= N_; ++j)
{
const value_type x2 = ((i-1)*dr_ - L_/2.);
const value_type y2 = ((j-1)*dr_ - L_/2.);
heat += rho_[i*N_ + j] * dr_ * dr_;
mu2 += rho_[i*N_ + j] * dr_ * dr_ * (x2*x2 + y2*y2);
}
#if DBG
std::cout << "t = " << t_ << " heat = " << heat << " mu2 = " << mu2 << std::endl;
#endif
diag.push_back(Diagnostics(t_,heat,mu2));
}
void write_diagnostics(std::string const& filename) const
{
std::ofstream out_file(filename, std::ios::out);
for (Diagnostics d : diag)
out_file << d.time << '\t' << d.heat << '\t' << d.mu2 << "\n";
out_file.close();
}
private:
void initialize_density()
{
/// initialize rho(x,y,t=0)
value_type bound = 1/2.;
#pragma omp parallel for collapse(2)
for (size_type i = 1; i <= N_; ++i) {
for (size_type j = 1; j <= N_; ++j) {
if (std::abs((i-1)*dr_ - L_/2.) < bound && std::abs((j-1)*dr_ - L_/2.) < bound) {
rho_[i*N_ + j] = 1;
} else {
rho_[i*N_ + j] = 0;
}
}
}
}
value_type D_, L_;
size_type N_, Ntot;
value_type dr_, dt_, t_, fac_;
int rank_;
std::vector<value_type> rho_, rho_tmp;
std::vector<Diagnostics> diag;
};
#if !defined(_OPENMP)
int omp_get_num_threads() { return 1; }
#endif
int main(int argc, char* argv[])
{
if (argc < 5) {
std::cerr << "Usage: " << argv[0] << " D L N dt" << std::endl;
return 1;
}
#pragma omp parallel
{
#pragma omp master
std::cout << "Running with " << omp_get_num_threads() << " threads" << std::endl;
}
const value_type D = std::stod(argv[1]);
const value_type L = std::stod(argv[2]);
const size_type N = std::stoul(argv[3]);
const value_type dt = std::stod(argv[4]);
Diffusion2D system(D, L, N, dt, 0);
const value_type tmax = 10000 * dt;
value_type time = 0;
timer t;
int i = 0;
t.start();
while (time < tmax) {
time = system.advance();
#ifndef _PERF_
system.compute_diagnostics();
#endif
i++;
if (i == 100) break;
}
t.stop();
std::cout << "Timing: " << N << " " << t.get_timing() << std::endl;
#ifndef _PERF_
system.write_diagnostics("diagnostics_openmp.dat");
#endif
return 0;
}
// Timer for the HPCSE I course
//
#ifndef HPCSE15_TIMER_HPP
#define HPCSE15_TIMER_HPP
#include <sys/time.h>
class timer {
public:
timer() {
start_time.tv_sec = 0;
start_time.tv_usec = 0;
stop_time.tv_sec = 0;
stop_time.tv_usec = 0;
}
inline void start() {
gettimeofday(&start_time, NULL);
}
inline void stop() {
gettimeofday(&stop_time, NULL);
}
double get_timing() const {
return (stop_time.tv_sec - start_time.tv_sec) + (stop_time.tv_usec - start_time.tv_usec)*1e-6;
}
private:
struct timeval start_time, stop_time;
};
#endif //HPCSE15_TIMER_HPP
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment