To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 564d164c authored by Dmitry Alexeev's avatar Dmitry Alexeev

solution 2

parent a28900d7
CXX?=g++
CFLAGS=-Wall -O3 -std=c++11
CFLAGS_THREADS=$(CFLAGS) -pthread
all: barrier_test diffusion2d_serial diffusion2d_threaded diffusion2d_barrier
barrier_test: barrier_test.cpp inc/timer.hpp
$(CXX) $(CFLAGS_THREADS) -o $@ $< -Iinc
diffusion2d_serial: diffusion2d_serial.cpp inc/timer.hpp
$(CXX) $(CFLAGS) -o $@ $< -Iinc
diffusion2d_threaded: diffusion2d_threaded.cpp inc/timer.hpp
$(CXX) $(CFLAGS_THREADS) -o $@ $< -Iinc
diffusion2d_barrier: diffusion2d_barrier.cpp inc/barrier.hpp inc/timer.hpp
$(CXX) $(CFLAGS_THREADS) -o $@ $< -Iinc
clean:
rm -f barrier_test diffusion2d_serial diffusion2d_threaded diffusion2d_barrier
#include <iostream>
#include <vector>
#include <thread>
#include <mutex>
#include "timer.hpp"
#include "barrier.hpp"
typedef std::size_t size_type;
int main()
{
const size_type max_threads = std::thread::hardware_concurrency();
const size_type nrep = 500;
timer t;
for (size_type nthreads=1; nthreads<max_threads; nthreads++) {
t.start();
std::vector<std::thread> threads(nthreads);
barrier b(nthreads);
for (size_type n = 0; n < nthreads; ++n)
threads[n] = std::thread([&]() {
long ii = nrep;
while( ii-- )
b.wait();
});
for (auto & t : threads)
t.join();
t.stop();
std::cout << "Timing : " << nthreads << " " << t.get_timing() << std::endl;
}
return 0;
}
#include <iostream>
#include <algorithm>
#include <string>
#include <fstream>
#include <cassert>
#include <vector>
#include <cmath>
#include <thread>
#include <mutex>
#include "timer.hpp"
#include "barrier.hpp"
typedef double value_type;
typedef std::size_t size_type;
class Diffusion2D {
public:
Diffusion2D(const value_type D,
const value_type L,
const size_type N,
const value_type dt,
const size_type nthreads)
: D_(D), L_(L), N_(N), Ntot(N_*N_), dt_(dt), nthreads_(nthreads), b_(nthreads_)
{
/// real space grid spacing
dr_ = L_ / (N_ - 1);
/// stencil factor
fac_ = dt_ * D_ / (dr_ * dr_);
rho_.resize(Ntot, 0.);
rho_tmp.resize(Ntot, 0.);
initialize_density();
}
void advance(const size_type start, const size_type end)
{
/// Open boundaries; central differences in space, forward Euler
/// in time
for(size_type i = start; i < end; ++i) {
for(size_type j = 0; j < N_; ++j) {
rho_tmp[i*N_ + j] = rho_[i*N_ + j] +
fac_
*
(
(j == N_-1 ? 0. : rho_[i*N_ + (j+1)])
+
(j == 0 ? 0. : rho_[i*N_ + (j-1)])
+
(i == N_-1 ? 0. : rho_[(i+1)*N_ + j])
+
(i == 0 ? 0. : rho_[(i-1)*N_ + j])
-
4.*rho_[i*N_ + j]
);
}
}
/// wait for all threads before swapping the data
b_.wait();
/// use swap instead of rho_=rho_tmp. this is much more efficient, because it does not have to copy
/// element by element.
if (start == 0) std::swap(rho_tmp, rho_); /// only one thread is doing the swap!!
/// make sure that no thread is leaving the iteration before the data has been swapped,
/// otherwise some threads could start the next iteration using the old data
b_.wait();
}
void write_density(std::string const& filename) const
{
std::ofstream out_file(filename, std::ios::out);
for(size_type i = 0; i < N_; ++i) {
for(size_type j = 0; j < N_; ++j)
out_file << (i*dr_ - L_/2.) << '\t' << (j*dr_ - L_/2.) << '\t' << rho_[i*N_ + j] << "\n";
out_file << "\n";
}
out_file.close();
}
private:
void initialize_density()
{
/// initialize rho(x,y,t=0)
value_type bound = 1/2.;
for (size_type i = 0; i < N_; ++i) {
for (size_type j = 0; j < N_; ++j) {
if (std::abs(i*dr_ - L_/2.) < bound && std::abs(j*dr_ - L_/2.) < bound) {
rho_[i*N_ + j] = 1;
} else {
rho_[i*N_ + j] = 0;
}
}
}
}
value_type D_, L_;
size_type N_, Ntot;
value_type dr_, dt_, fac_;
size_type nthreads_;
barrier b_;
std::vector<value_type> rho_, rho_tmp;
};
int main(int argc, char* argv[])
{
if (argc < 6) {
std::cerr << "Usage: " << argv[0] << " D L N dt nthreads" << std::endl;
return 1;
}
const value_type D = std::stod(argv[1]);
const value_type L = std::stod(argv[2]);
const size_type N = std::stoul(argv[3]);
const value_type dt = std::stod(argv[4]);
const size_type nthreads = std::stoul(argv[5]);
Diffusion2D system(D, L, N, dt, nthreads);
const value_type tmax = 10000 * dt;
timer t;
t.start();
std::vector<std::thread> threads(nthreads);
for (size_type n = 0; n < nthreads; ++n)
threads[n] = std::thread([&, n] () {
const size_type begin = n * N / nthreads;
const size_type end = (n+1) * N / nthreads;
value_type time = 0;
while (time < tmax) {
system.advance(begin, end);
time += dt;
}
});
for (auto & t : threads)
t.join();
t.stop();
std::cout << "Timing : " << N << " " << nthreads << " " << t.get_timing() << std::endl;
system.write_density("density_barrier.dat");
return 0;
}
#include <iostream>
#include <algorithm>
#include <string>
#include <fstream>
#include <cassert>
#include <vector>
#include <cmath>
#include "timer.hpp"
typedef double value_type;
typedef std::size_t size_type;
class Diffusion2D {
public:
Diffusion2D(const value_type D,
const value_type L,
const size_type N,
const value_type dt)
: D_(D), L_(L), N_(N), Ntot(N_*N_), dt_(dt)
{
/// real space grid spacing
dr_ = L_ / (N_ - 1);
/// stencil factor
fac_ = dt_ * D_ / (dr_ * dr_);
rho_.resize(Ntot, 0.);
rho_tmp.resize(Ntot, 0.);
initialize_density();
}
void advance()
{
/// Open boundaries; central differences in space, forward Euler
/// in time
for(size_type i = 0; i < N_; ++i)
for(size_type j = 0; j < N_; ++j) {
rho_tmp[i*N_ + j] = rho_[i*N_ + j] +
fac_
*
(
(j == N_-1 ? 0. : rho_[i*N_ + (j+1)]) +
(j == 0 ? 0. : rho_[i*N_ + (j-1)]) +
(i == N_-1 ? 0. : rho_[(i+1)*N_ + j]) +
(i == 0 ? 0. : rho_[(i-1)*N_ + j]) -
4.*rho_[i*N_ + j]
);
}
/// use swap instead of rho_=rho_tmp. this is much more efficient, because it does not have to copy
/// element by element.
std::swap(rho_tmp, rho_);
}
void write_density(std::string const& filename) const
{
std::ofstream out_file(filename, std::ios::out);
for(size_type i = 0; i < N_; ++i) {
for(size_type j = 0; j < N_; ++j)
out_file << (i*dr_ - L_/2.) << '\t' << (j*dr_ - L_/2.) << '\t' << rho_[i*N_ + j] << "\n";
out_file << "\n";
}
out_file.close();
}
private:
void initialize_density()
{
/// initialize rho(x,y,t=0)
value_type bound = 1/2.;
for (size_type i = 0; i < N_; ++i) {
for (size_type j = 0; j < N_; ++j) {
if (std::abs(i*dr_ - L_/2.) < bound && std::abs(j*dr_ - L_/2.) < bound) {
rho_[i*N_ + j] = 1;
} else {
rho_[i*N_ + j] = 0;
}
}
}
}
value_type D_, L_;
size_type N_, Ntot;
value_type dr_, dt_, fac_;
std::vector<value_type> rho_, rho_tmp;
};
int main(int argc, char* argv[])
{
if (argc < 5) {
std::cerr << "Usage: " << argv[0] << " D L N dt" << std::endl;
return 1;
}
const value_type D = std::stod(argv[1]);
const value_type L = std::stod(argv[2]);
const size_type N = std::stoul(argv[3]);
const value_type dt = std::stod(argv[4]);
Diffusion2D system(D, L, N, dt);
system.write_density("density_000.dat");
const value_type tmax = 10000 * dt;
value_type time = 0;
timer t;
t.start();
while (time < tmax) {
system.advance();
time += dt;
}
t.stop();
std::cout << "Timing : " << N << " " << 1 << " " << t.get_timing() << std::endl;
system.write_density("density_serial.dat");
return 0;
}
#include <iostream>
#include <algorithm>
#include <string>
#include <fstream>
#include <cassert>
#include <vector>
#include <cmath>
#include <thread>
#include <mutex>
#include "timer.hpp"
typedef double value_type;
typedef std::size_t size_type;
class Diffusion2D {
public:
Diffusion2D(const value_type D,
const value_type L,
const size_type N,
const value_type dt,
const size_type nthreads)
: D_(D), L_(L), N_(N), Ntot(N_*N_), dt_(dt), nthreads_(nthreads)
{
/// real space grid spacing
dr_ = L_ / (N_ - 1);
/// stencil factor
fac_ = dt_ * D_ / (dr_ * dr_);
rho_.resize(Ntot, 0.);
rho_tmp.resize(Ntot, 0.);
initialize_density();
}
void advance()
{
/// Open boundaries; central differences in space, forward Euler
/// in time
std::vector<std::thread> threads(nthreads_);
for (size_type n = 0; n < nthreads_; ++n)
threads[n] = std::thread([n,this] () {
const size_type begin = n * N_ / nthreads_;
const size_type end = (n+1) * N_ / nthreads_;
for(size_type i = begin; i < end; ++i) {
for(size_type j = 0; j < N_; ++j) {
rho_tmp[i*N_ + j] = rho_[i*N_ + j] +
fac_
*
(
(j == N_-1 ? 0. : rho_[i*N_ + (j+1)])
+
(j == 0 ? 0. : rho_[i*N_ + (j-1)])
+
(i == N_-1 ? 0. : rho_[(i+1)*N_ + j])
+
(i == 0 ? 0. : rho_[(i-1)*N_ + j])
-
4.*rho_[i*N_ + j]
);
}
}
} );
/// after join() the code will run sequentially again, no sync problems
for (auto & t : threads)
t.join();
/// use swap instead of rho_=rho_tmp. this is much more efficient, because it does not have to copy
/// element by element.
std::swap(rho_tmp, rho_);
}
void write_density(std::string const& filename) const
{
std::ofstream out_file(filename, std::ios::out);
for(size_type i = 0; i < N_; ++i) {
for(size_type j = 0; j < N_; ++j)
out_file << (i*dr_ - L_/2.) << '\t' << (j*dr_ - L_/2.) << '\t' << rho_[i*N_ + j] << "\n";
out_file << "\n";
}
out_file.close();
}
private:
void initialize_density()
{
/// initialize rho(x,y,t=0)
value_type bound = 1/2.;
for (size_type i = 0; i < N_; ++i) {
for (size_type j = 0; j < N_; ++j) {
if (std::abs(i*dr_ - L_/2.) < bound && std::abs(j*dr_ - L_/2.) < bound) {
rho_[i*N_ + j] = 1;
} else {
rho_[i*N_ + j] = 0;
}
}
}
}
value_type D_, L_;
size_type N_, Ntot;
value_type dr_, dt_, fac_;
size_type nthreads_;
std::vector<value_type> rho_, rho_tmp;
};
int main(int argc, char* argv[])
{
if (argc < 6) {
std::cerr << "Usage: " << argv[0] << " D L N dt nthreads" << std::endl;
return 1;
}
const value_type D = std::stod(argv[1]);
const value_type L = std::stod(argv[2]);
const size_type N = std::stoul(argv[3]);
const value_type dt = std::stod(argv[4]);
const size_type nthreads = std::stoul(argv[5]);
Diffusion2D system(D, L, N, dt, nthreads);
const value_type tmax = 10000 * dt;
value_type time = 0;
timer t;
t.start();
while (time < tmax) {
system.advance();
time += dt;
}
t.stop();
std::cout << "Timing : " << N << " " << nthreads << " " << t.get_timing() << std::endl;
system.write_density("density_threaded.dat");
return 0;
}
#ifndef HPCSE_BARRIER_HPP
#define HPCSE_BARRIER_HPP
#include <thread>
#include <mutex>
#include <cassert>
#include <limits>
class barrier
{
public:
barrier(unsigned int count)
: m_total(count)
, m_count(count)
, m_generation(0)
{
assert(count != 0);
}
void wait()
{
std::unique_lock<std::mutex> lock(m_mutex);
unsigned int gen = m_generation;
// decrease the count
if (--m_count==0) {
// if done reset to new generation of wait
m_count = m_total;
m_generation++;
}
else {
lock.unlock();
while (true) {
lock.lock();
if (gen != m_generation)
break;
lock.unlock();
}
}
}
unsigned int num_waiting() const
{
std::unique_lock<std::mutex> lock(m_mutex);
return m_count;
}
private:
mutable std::mutex m_mutex;
unsigned long const m_total;
unsigned long m_count;
unsigned long m_generation;
};
#endif //HPCSE_BARRIER_HPP
// Timer for the HPCSE I course
//
#ifndef HPCSE_TIMER_HPP
#define HPCSE_TIMER_HPP
#include <sys/time.h>
class timer {
public:
timer() {
start_time.tv_sec = 0;
start_time.tv_usec = 0;
stop_time.tv_sec = 0;
stop_time.tv_usec = 0;
}
inline void start() {
gettimeofday(&start_time, NULL);
}
inline void stop() {
gettimeofday(&stop_time, NULL);
}
double get_timing() const {
return (stop_time.tv_sec - start_time.tv_sec) + (stop_time.tv_usec - start_time.tv_usec)*1e-6;
}
private:
struct timeval start_time, stop_time;
};
#endif //HPCSE_TIMER_HPP
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment