To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit cde7972d authored by ahuegli's avatar ahuegli

Merge branch 'master' of gitlab.ethz.ch:hpcse20/exercise

parents 34cd8a29 032d7d21
......@@ -14,8 +14,9 @@ exercise_conf = {
'Name' : 'Homework 3',
'Questions' : {
'Question 1': {'Total Points': 20},
'Question 2': {'Total Points': 20},
'Question 3': {'Total Points': 20}
'Question 2': {'Total Points': 40},
'Question 3': {'Total Points': 20},
'Question 4': {'Total Points': 20}
}
}
......
CXX=mpicxx #h5pcc
CXXFLAGS = -Wpedantic -Wall -Wextra -std=c++11 -lstdc++ -g -O3 -fopenmp
all: main
%.o: %.cpp
$(CXX) -c -o $@ $< $(CXXFLAGS)
clean:
rm -rf *.o *.xmf *h5 *txt main
main: main.o auxiliary.o wave.o
$(CXX) $(CXXFLAGS)-I. -o main $^
run_mpi:
make clean ; make ; mpirun -n 64 ./main 512 4 0.25
run_hybrid_v1:
export OMP_NUM_THREADS=8 ; make clean ; make ; mpirun -n 8 --map-by ppr:3:node ./main 512 2 0.25
run_hybrid_v2:
export OMP_NUM_THREADS=8 ; make clean ; make ; mpirun -n 8 --map-by ppr:2:node ./main 512 2 0.25
develop:
export OMP_NUM_THREADS=8 ; make clean ; make ; mpirun -n 8 ./main 256 2 0.1
.PHONY: all clean run_mpi run_hybrid_v1 run_hybrid_v2 develop
This diff is collapsed.
#include "wave.h"
int main(int argc, char **argv) {
int provided;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (argc != 4) {
if (rank == 0) {
std::cout << " Incorrect number of inputs. Run as:\n";
std::cout << " mpirun -n x ./main N y t \n";
std::cout << " where \n";
std::cout << " N= number of grid points per direction\n";
std::cout << " y= number of ranks per direction (=(x)^(1/3))\n";
std::cout << " t= final time of simulation \n";
std::cout << " Aborting...\n";
}
int err = 1;
MPI_Abort(MPI_COMM_WORLD, err);
}
int points = std::stoi(argv[1]);
int procs_per_dim = std::stoi(argv[2]);
double t_end = std::stof(argv[3]);
if (size != procs_per_dim * procs_per_dim * procs_per_dim) {
if (rank == 0)
std::cout << " Incorrect number of ranks per direction. Aborting... \n";
int err = 2;
MPI_Abort(MPI_COMM_WORLD, err);
}
if (points % procs_per_dim != 0) {
if (rank == 0)
std::cout << " Grid points per direction must be divisible by the number "
"of ranks per direction. Aborting... \n";
int err = 3;
MPI_Abort(MPI_COMM_WORLD, err);
}
int nthreads = omp_get_max_threads();
int threads_per_dim = pow(nthreads, 1.0 / 3.0) + 0.5;
if (nthreads != threads_per_dim * threads_per_dim * threads_per_dim) {
if (rank == 0)
std::cout
<< " Number of OPENMP threads must be a cubic number. Aborting... \n";
int err = 4;
MPI_Abort(MPI_COMM_WORLD, err);
}
WaveEquation Simulation(points, procs_per_dim);
Simulation.run(t_end);
MPI_Finalize();
return 0;
}
This diff is collapsed.
#pragma once
#include <cassert>
#include <cmath>
#include <iomanip>
#include <iostream>
#include <mpi.h>
#include <omp.h>
#include <sstream>
#include <string>
#include <vector>
//#define USE_HDF5 // comment this line if you do not want hdf5 file output
// (suitable for visualization with Paraview)
#ifdef USE_HDF5
#include <hdf5.h>
#endif
#define L 1.0 // domain size (unit cube) with Ntot x Ntot x Ntot grid points
struct WaveEquation {
int N; // grid points per direction for this rank
double h; // grid spacing (dx = dy = dz = h)
double dt; // timestep
double t; // current time
double *u; // solution vector
double *u_old;
double *u_new;
int Ntot; // total grid points per direction
int procs_per_dim; // ranks per direction
int nthreads;
int threads_per_dim;
int size; // =total ranks = procs_per_dim*procs_per_dim*procs_per_dim
int rank;
int rank_plus[3]; // neighboring ranks
int rank_minus[3]; //
MPI_Comm cart_comm;
double origin[3]; // physical space (x0,x1,x2) coordinates of 1st grid point
// of this rank
int coords[3]; // index space coordinates of this rank
double aux;
~WaveEquation();
WaveEquation(int a_N, int a_procs_per_dim);
void FindCoordinates();
double Initial_Condition(double x0, double x1, double x2);
void UpdateGridPoint(int i0, int i1, int i2);
void Print(int kt);
void pack_face(double *pack, int array_of_sizes[3], int array_of_subsizes[3],
int array_of_starts[3]);
void unpack_face(double *pack, int array_of_sizes[3],
int array_of_subsizes[3], int array_of_starts[3]);
void run(double t_end);
double **pack;
double **unpack;
void pack_all();
void unpack_all();
void thread_coordinates(int tid, int threads_per_dim, int &ti0, int &ti1,
int &ti2);
};
#!/usr/bin/env python
# File : grade.py
# Description: Generate grading submission file
# Copyright 2020 ETH Zurich. All Rights Reserved.
'''
Example:
python grade.py -q1 7 -c1 "This is why I scored 7 points" -q2 20 -q3 15 -c3 "Second comment here" -c3 "More comments"
'''
#Modify this, according to a given homework
exercise_conf = {
'Name' : 'Homework 4',
'Questions' : {
'Question 1': {'Total Points': 25},
'Question 2': {'Total Points': 25},
'Question 3': {'Total Points': 20}
}
}
'''
==========================================
Do not modify anything below this comment
==========================================
'''
import argparse
import datetime
import sys
def parse_args():
parser = argparse.ArgumentParser()
for i in range(1, len(exercise_conf['Questions'])+1, 1):
parser.add_argument('-q{:d}'.format(i),'--question{:d}'.format(i),
type=int, default=0,
help='Scored points for Question {:d}'.format(i))
parser.add_argument('-c{:d}'.format(i),'--comment{:d}'.format(i),
type=str, action='append', nargs='*',
help='Comments for Question {:d} (you can add multiple comments)'.format(i))
return vars(parser.parse_args())
if __name__ == "__main__":
args = parse_args()
grade = lambda s,m: 2.0 + (6.0-2.0) * float(s)/m
summary = {}
score = 0
maxpoints = 0
header = '{name:s}: {date:s}\n'.format(
name = exercise_conf['Name'], date = str(datetime.datetime.now()))
width = len(header.rstrip())
summary[0] = [header]
for i in range(1, len(exercise_conf['Questions'])+1, 1):
content = []
qscore = args['question{:d}'.format(i)]
qmax = exercise_conf['Questions']['Question {:d}'.format(i)]['Total Points']
qscore = max(0 , min(qscore, qmax))
content.append( 'Question {id:d}: {score:d}/{max:d}\n'.format(
id = i, score = qscore, max = qmax)
)
comments = args['comment{:d}'.format(i)]
if comments is not None:
for j,comment in enumerate([s for x in comments for s in x]):
content.append( ' -Comment {id:d}: {issue:s}\n'.format(
id = j+1, issue = comment.strip())
)
for line in content:
width = width if len(line.rstrip())<width else len(line.rstrip())
score += qscore
maxpoints += qmax
summary[i] = content
assert maxpoints > 0
with open('grade.txt', 'w') as out:
out.write(width*'*'+'\n')
for lines in summary.values():
for line in lines:
out.write(line)
out.write(width*'*'+'\n')
out.write('Grade: {:.2f}'.format(grade(score, maxpoints)))
ssa
cmaes
*.o
*.txt
*.swp
*.orig
_korali_result
CC=g++
LD=$(CC)
KORALICFLAGS=`python3 -m korali.cxx --cflags`
KORALILIBS=`python3 -m korali.cxx --libs`
CFLAGS = -Wall -Wfatal-errors -std=c++14 -I ./includes/
CFLAGS += -O3
CFLAGS += -fopenmp -D _OPENMP
OBJECTS = main_ssa.o SSA_CPU.o
.DEFAULT: all
all: ssa cmaes
ssa: $(OBJECTS) SSA_CPU.cpp ./includes/SSA_CPU.hpp
$(CC) $(CFLAGS) -c SSA_CPU.cpp -o SSA_CPU.o
$(LD) $(OBJECTS) -o ssa $(CFLAGS)
cmaes: $(OBJECTS) main_cmaes.cpp ./includes/objective.hpp
$(CC) -c main_cmaes.cpp -o main_cmaes.o $(KORALICFLAGS) $(KORALILIBS) $(CFLAGS)
$(LD) main_cmaes.o SSA_CPU.o -o cmaes $(KORALICFLAGS) $(KORALILIBS) $(CFLAGS)
%.o: %.cpp
$(CC) $(CFLAGS) -c $^ -o $@
clean:
rm -f *.o
rm -f ssa
rm -f cmaes
HOW TO RUN
----------
To compile and run the code on Euler remove all previously loaded nodules (purge) and load the following modules:
module purge
module load new
module load gcc/6.3.0
module load python/3.7.1
or run `source modules.src`
Make sure that you loaded this files during compilation of Korali.
Request interactive shell
-------------------------
bsub -n 4 -R -W 04:00 -Is bash
and compile with `make`, respectively `make ssa` or `make cmaes` for partial builds.
You may also run with more or less nodes.
Run
-------------------------
export OMP_NUM_THREADS=4
./ssa
./cmaes
You may want to set the OMP_NUM_THREADS variables to different values.
If interested feel free to use input arguments and study the behaviour of the system for different inputs, e.g.
./ssa -omega 1 -samples 5000
Lower quantities usually increase the stochastic effects and require more simulations/samples.
WHAT TO MODIFY
--------------
You only need to change the indicated sections in
- SSA_CPU.cpp
- includes/objective.hpp
and
- main_cmaes.cpp
if you wish to modify the population size of CMA-ES.
CODE OUTPUT
-----------
When the ./ssa code runs, it prints
the averaged time for 1 SSA simulation, the FLOPs, Byte Transfers, and the performance.
Note that the latter values depend on your implementation of SSA::getTransfers() and SSA::getFlops() and are wrong estimates if not (correctly) implemented.
#include "SSA_CPU.hpp"
#ifdef _OPENMP
#include <omp.h>
#endif
#include <cmath>
void SSA_CPU::operator()()
{
// number of reactions
const int m = 4;
// number of species
const int n = 2;
// initial conditions
const int S0[n] = {4*omega,0};
const int niters = static_cast<int>(tend*1000);
double * const r48 = new double[2*niters*numSamples];
double * const curT = new double[numSamples];
double * const x0 = new double[numSamples];
double * const x1 = new double[numSamples];
// NUMA aware initialization (first touch)
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int s=0; s<numSamples; s++)
{
curT[s] = 0.0;
x0[s] = 0.0;
x1[s] = 0.0;
for (int iter=0; iter<niters; iter++)
{
r48[2*s*niters + iter*2 ] = 0.;
r48[2*s*niters + iter*2 + 1] = 0.;
}
}
bool bNotDone = true;
pass = 0;
while (bNotDone)
{
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int i=0; i<niters*2*numSamples; i++)
r48[i] = drand48();
startTiming();
#ifdef _OPENMP
int num_threads;
#pragma omp parallel
#pragma omp single
{
num_threads = omp_get_num_threads();
}
#else
const int num_threads = 1;
#endif
const int nbins = trajS1.size();
double * const trajS1L = new double[nbins*num_threads];
double * const trajS2L = new double[nbins*num_threads];
int * const ntrajL = new int[nbins*num_threads];
// NUMA aware initialization (first touch)
#ifdef _OPENMP
#pragma omp parallel for
#endif
for(int t=0; t <num_threads; ++t)
{
for(int b=0; b <nbins; ++b)
{
trajS1L[t*nbins+b] = 0.0;
trajS2L[t*nbins+b] = 0.0;
ntrajL[t*nbins+b] = 0;
}
}
#ifdef _OPENMP
#pragma omp parallel for
#endif
for(int s = 0; s < numSamples; ++s)
{
#ifdef _OPENMP
const int thread_no = omp_get_thread_num();
#else
const int thread_no = 0;
#endif
// local version of trajectory bins
const int nbins = trajS1.size();
// init
double time;
double Sa;
double Sb;
if (pass>0 && bNotDone)
{
time = curT[s];
Sa = x0[s];
Sb = x1[s];
}
else
{
time = 0.0;
Sa = S0[0];
Sb = S0[1];
}
// propensities
double a[m];
// time stepping
int iter = 0;
while (time <= tend && iter<niters)
{
// store trajectory
const int ib = static_cast<int>(time / bin_dt); // 1 FLOP
trajS1L[ib+thread_no*nbins] += Sa;
trajS2L[ib+thread_no*nbins] += Sb; // 2 FLOP, 2 WRITE
++ntrajL[ib+thread_no*nbins]; // 1 WRITE
// TODO: Task 1a) (STEP 0)
// - compute propensities a[0], a[1], .., a[3] and a0
// - use values Sa and Sb, and values stored in k[4], check initialization in SSA_CPU.hpp
a[0] = 0.0;
a[1] = 0.0;
a[2] = 0.0;
a[3] = 0.0;
double a0 = 0.0;
// TODO: Task 1a) (STEP 1)
// - sample tau using the inverse sampling method and increment time, use uniform random numbers initialized in r48
time += 0.1; // 0.1 is a dummy
// TODO: Task 1a) (STEP 2)
// - sample a reaction, use uniform random numbers initialized in r48
// TODO: Task 1a) (STEP 3)
// - increment Sa, Sb
Sa += 0;
Sb += 0;
iter++;
}
curT[s] = time;
x0[s] = Sa;
x1[s] = Sb;
bNotDone = time <= tend && Sa!=0 && Sb!=0;
}
for(int t = 0; t < num_threads; ++t)
{
for (int i = 0; i < nbins; ++i) {
trajS1[i] += trajS1L[i+t*nbins];
trajS2[i] += trajS2L[i+t*nbins];
ntraj[i] += ntrajL[i+t*nbins]; // bins * (3 FLOP, 3 READ, 3 WRITE) (assuming trajS1L, trajS2L, ntrajL) in cache
}
}
delete[] ntrajL;
delete[] trajS2L;
delete[] trajS1L;
stopTiming();
pass++;
}
delete[] x1;
delete[] x0;
delete[] curT;
delete[] r48;
normalize_bins();
}
void SSA_CPU::normalize_bins()
{
assert( trajS2.size() == trajS1.size() );
assert( ntraj.size() == trajS1.size() );
const int nbins = trajS1.size();
#ifdef _OPENMP
#pragma omp parallel for
#endif
for(int i=0; i < nbins; ++i)
{
trajS1[i]/=ntraj[i];
trajS2[i]/=ntraj[i]; // 2 FLOP, 3 READ, 2 WRITE
}
}
double SSA_CPU::getTransfers() const
{
// TODO: (Optional) Task 1c)
// - return number of read writes in [BYTES]
return 1.0;
}
double SSA_CPU::getFlops() const
{
// TODO: (Optional) Task 1c)
// - return number of floating point operations
return 1.0;
}
#pragma once
#include <cstring>
#include <map>
#include <vector>
#include <string>
using namespace std;
class Value
{
private:
string content;
public:
Value() : content("") {}
Value(string content_) : content(content_) { }
double asDouble(double def=0) const
{
if (content == "") return def;
return (double) atof(content.c_str());
}
int asInt(int def=0) const
{
if (content == "") return def;
return atoi(content.c_str());
}
bool asBool(bool def=false) const
{
if (content == "") return def;
if (content == "0") return false;
if (content == "false") return false;
return true;
}
string asString(string def="") const
{
if (content == "") return def;
return content;
}
};
class ArgumentParser
{
private:
map<string,Value> mapArguments;
const int iArgC;
const char** vArgV;
bool bStrictMode;
public:
Value operator()(const string arg)
{
if (bStrictMode)
{
map<string,Value>::const_iterator it = mapArguments.find(arg);
if (it == mapArguments.end())
{