To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 06d576d7 authored by Michail Chatzimanolakis's avatar Michail Chatzimanolakis

Merge branch 'master' of gitlab.ethz.ch:hpcse20/exercise

parents 0f79dc19 c290e64e
ssa
cmaes
*.o
*.txt
*.swp
*.orig
_korali_result
CC=g++
LD=$(CC)
KORALICFLAGS=`python3 -m korali.cxx --cflags`
KORALILIBS=`python3 -m korali.cxx --libs`
CFLAGS = -Wall -Wfatal-errors -std=c++14 -I ./includes/
CFLAGS += -O3
#CFLAGS += -fopenmp -D _OPENMP
OBJECTS = main_ssa.o SSA_CPU.o
.DEFAULT: all
all: ssa cmaes
ssa: $(OBJECTS) SSA_CPU.cpp ./includes/SSA_CPU.hpp
$(CC) $(CFLAGS) -c SSA_CPU.cpp -o SSA_CPU.o
$(LD) $(OBJECTS) -o ssa $(CFLAGS)
cmaes: $(OBJECTS) main_cmaes.cpp ./includes/objective.hpp
$(CC) -c main_cmaes.cpp -o main_cmaes.o $(KORALICFLAGS) $(KORALILIBS) $(CFLAGS)
$(LD) main_cmaes.o SSA_CPU.o -o cmaes $(KORALICFLAGS) $(KORALILIBS) $(CFLAGS)
%.o: %.cpp
$(CC) $(CFLAGS) -c $^ -o $@
clean:
rm -f *.o
rm -f ssa
rm -f cmaes
HOW TO RUN
----------
To compile and run the code on Euler remove all previously loaded nodules (purge) and load the following modules:
module purge
module load new
module load gcc/6.3.0
module load python/3.7.1
or run `source modules.src`
Make sure that you loaded this files during compilation of Korali.
Request interactive shell
-------------------------
bsub -n 4 -R -W 04:00 -Is bash
and compile with `make`, respectively `make ssa` or `make cmaes` for partial builds.
You may also run with more or less nodes.
Run
-------------------------
export OMP_NUM_THREADS=4
./ssa
./cmaes
You may want to set the OMP_NUM_THREADS variables to different values.
WHAT TO MODIFY
--------------
You only need to change the indicated sections in
- SSA_CPU.cpp
- includes/objective.hpp
and
- main_cmaes.cpp
if you wish to modify the population size of CMA-ES.
CODE OUTPUT
-----------
When the ./ssa code runs, it prints
the averaged time for 1 SSA simulation, the FLOPs, Byte Transfers, and the performance.
Note that the latter values depend on your implementation of SSA::getTransfers() and SSA::getFlops() and are wrong estimates if not (correctly) implemented.
#include "SSA_CPU.hpp"
#ifdef _OPENMP
#include <omp.h>
#endif
#include <cmath>
void SSA_CPU::operator()()
{
// number of reactions
const int m = 4;
// number of species
const int n = 2;
// initial conditions
const int S0[n] = {4*omega,0};
const int niters = static_cast<int>(tend*1000);
double * const r48 = new double[2*niters*numSamples];
double * const curT = new double[numSamples];
double * const x0 = new double[numSamples];
double * const x1 = new double[numSamples];
// NUMA aware initialization (first touch)
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int s=0; s<numSamples; s++)
{
curT[s] = 0.0;
x0[s] = 0.0;
x1[s] = 0.0;
for (int iter=0; iter<niters; iter++)
{
r48[2*s*niters + iter*2 ] = 0.;
r48[2*s*niters + iter*2 + 1] = 0.;
}
}
bool bNotDone = true;
pass = 0;
while (bNotDone)
{
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int i=0; i<niters*2*numSamples; i++)
r48[i] = drand48();
startTiming();
#ifdef _OPENMP
int num_threads;
#pragma omp parallel
#pragma omp single
{
num_threads = omp_get_num_threads();
}
#else
const int num_threads = 1;
#endif
const int nbins = trajS1.size();
double * const trajS1L = new double[nbins*num_threads];
double * const trajS2L = new double[nbins*num_threads];
int * const ntrajL = new int[nbins*num_threads];
// NUMA aware initialization (first touch)
#ifdef _OPENMP
#pragma omp parallel for
#endif
for(int t=0; t <num_threads; ++t)
{
for(int b=0; b <nbins; ++b)
{
trajS1L[t*nbins+b] = 0.0;
trajS2L[t*nbins+b] = 0.0;
ntrajL[t*nbins+b] = 0;
}
}
#ifdef _OPENMP
#pragma omp parallel for
#endif
for(int s = 0; s < numSamples; ++s)
{
#ifdef _OPENMP
const int thread_no = omp_get_thread_num();
#else
const int thread_no = 0;
#endif
// local version of trajectory bins
const int nbins = trajS1.size();
// init
double time;
double Sa;
double Sb;
if (pass>0 && bNotDone)
{
time = curT[s];
Sa = x0[s];
Sb = x1[s];
}
else
{
time = 0.0;
Sa = S0[0];
Sb = S0[1];
}
// propensities
double a[m];
// time stepping
int iter = 0;
while (time <= tend && iter<niters)
{
// store trajectory
const int ib = static_cast<int>(time / bin_dt); // 1 FLOP
trajS1L[ib+thread_no*nbins] += Sa;
trajS2L[ib+thread_no*nbins] += Sb; // 2 FLOP, 2 WRITE
++ntrajL[ib+thread_no*nbins]; // 1 WRITE
// TODO: Task 1a) (STEP 0)
// - compute propensities a[0], a[1], .., a[3] and a0
// - use values Sa and Sb, and values stored in k[4], check initialization in SSA_CPU.hpp
a[0] = 0.0;
a[1] = 0.0;
a[2] = 0.0;
a[3] = 0.0;
double a0 = 0.0;
// TODO: Task 1a) (STEP 1)
// - sample tau using the inverse sampling method and increment time, use uniform random numbers initialized in r48
time += 0.1; // 0.1 is a dummy
// TODO: Task 1a) (STEP 2)
// - sample a reaction, use uniform random numbers initialized in r48
// - increment Sa, Sb
// TODO: Task 1a) (STEP 3)
// - increment Sa, Sb
Sa += 0;
Sb += 0;
iter++;
}
curT[s] = time;
x0[s] = Sa;
x1[s] = Sb;
bNotDone = time <= tend && Sa!=0 && Sb!=0;
}
for(int t = 0; t < num_threads; ++t)
{
for (int i = 0; i < nbins; ++i) {
trajS1[i] += trajS1L[i+t*nbins];
trajS2[i] += trajS2L[i+t*nbins];
ntraj[i] += ntrajL[i+t*nbins]; // bins * (3 FLOP, 3 READ, 3 WRITE) (assuming trajS1L, trajS2L, ntrajL) in cache
}
}
delete[] ntrajL;
delete[] trajS2L;
delete[] trajS1L;
stopTiming();
pass++;
}
delete[] x1;
delete[] x0;
delete[] curT;
delete[] r48;
normalize_bins();
}
void SSA_CPU::normalize_bins()
{
assert( trajS2.size() == trajS1.size() );
assert( ntraj.size() == trajS1.size() );
const int nbins = trajS1.size();
#ifdef _OPENMP
#pragma omp parallel for
#endif
for(int i=0; i < nbins; ++i)
{
trajS1[i]/=ntraj[i];
trajS2[i]/=ntraj[i]; // 2 FLOP, 3 READ, 2 WRITE
}
}
double SSA_CPU::getTransfers() const
{
// TODO: (Optional) Task 1c)
// - return number of read writes in [BYTES]
return 1.0;
}
double SSA_CPU::getFlops() const
{
// TODO: (Optional) Task 1c)
// - return number of floating point operations
return 1.0;
}
#pragma once
#include <cstring>
#include <map>
#include <vector>
#include <string>
using namespace std;
class Value
{
private:
string content;
public:
Value() : content("") {}
Value(string content_) : content(content_) { }
double asDouble(double def=0) const
{
if (content == "") return def;
return (double) atof(content.c_str());
}
int asInt(int def=0) const
{
if (content == "") return def;
return atoi(content.c_str());
}
bool asBool(bool def=false) const
{
if (content == "") return def;
if (content == "0") return false;
if (content == "false") return false;
return true;
}
string asString(string def="") const
{
if (content == "") return def;
return content;
}
};
class ArgumentParser
{
private:
map<string,Value> mapArguments;
const int iArgC;
const char** vArgV;
bool bStrictMode;
public:
Value operator()(const string arg)
{
if (bStrictMode)
{
map<string,Value>::const_iterator it = mapArguments.find(arg);
if (it == mapArguments.end())
{
printf("Runtime option NOT SPECIFIED! ABORTING! name: %s\n",arg.data());
abort();
}
}
return mapArguments[arg];
}
ArgumentParser(const int argc, const char ** argv) : mapArguments(), iArgC(argc), vArgV(argv), bStrictMode(false)
{
for (int i=1; i<argc; i++)
if (argv[i][0] == '-')
{
string values = "";
int itemCount = 0;
for (int j=i+1; j<argc; j++)
if (argv[j][0] == '-')
break;
else
{
if (strcmp(values.c_str(), ""))
values += ' ';
values += argv[j];
itemCount++;
}
if (itemCount == 0)
values += '1';
mapArguments[argv[i]] = Value(values);
i += itemCount;
}
}
int getargc() const {
return iArgC;
}
const char** getargv() const {
return vArgV;
}
void set_strict_mode()
{
bStrictMode = true;
}
void unset_strict_mode()
{
bStrictMode = false;
}
void save_options(string path=".")
{
string options;
for(map<string,Value>::const_iterator it=mapArguments.begin(); it!=mapArguments.end(); it++)
{
options+= it->first + " " + it->second.asString() + " ";
}
string filepath = (path + "/" + string("argumentparser.log"));
FILE * f = fopen(filepath.data(), "a");
if (f == NULL)
{
printf("impossible to write %s.\n", filepath.data());
return;
}
fprintf(f, "%s\n", options.data());
fclose(f);
}
};
#pragma once
#include <stdio.h>
#include <stdlib.h>
#include <cmath>
#include <assert.h>
#include <algorithm>
#include <vector>
#include <fstream>
#include <iomanip>
#include <iostream>
class SSABase
{
protected:
/** Multiplier for number of molecules. */
const int omega;
/** Number of samples of stochastic algo to run. */
const int numSamples;
/** End time for algorithm. */
const double tend;
/** Width of trajectory bin. */
const double bin_dt;
/** Trajectories (binned) for S1*omega and S2*omega (trajS(i) = avg of S for t in [i,i+1]*bin_dt). */
std::vector<double> trajS1;
std::vector<double> trajS2;
std::vector<int> ntraj;
/** Store timing result here. */
double time;
public:
SSABase(int _omega, int num_samples, double T, double bin_dt)
: omega(_omega)
, numSamples(num_samples)
, tend(T)
, bin_dt(bin_dt)
, trajS1(static_cast<unsigned int>(std::ceil(T/bin_dt)))
, trajS2(static_cast<unsigned int>(std::ceil(T/bin_dt)))
, ntraj(static_cast<unsigned int>(std::ceil(T/bin_dt)))
, time(0.0)
{ }
/** Return time taken by start/stopTiming. */
double getTime() {
return time;
}
/** Get omega. */
double getOmega() {
return omega;
}
/**
* Dump trajectories to file (nbins rows and 2 columns).
*/
void dumpTrajToFile(std::string filename) {
// Open file stream
std::ofstream outfile(filename.c_str());
// Dump it
const int nbins = trajS1.size();
int totalevals = 0;
for (int i = 0; i < nbins; ++i) {
// must rescale
outfile << i*bin_dt+bin_dt/2 << " "
<< (trajS1[i] / omega) << " "
<< (trajS2[i] / omega) << std::endl;
totalevals += ntraj[i];
}
std::cout << "Average number of time steps per sample: " << double(totalevals)/numSamples << std::endl;
}
};
#include <iostream>
#include <fstream>
#include <cmath>
class SSABenchmark
{
public:
template< typename SSAType>
static void timeIt(int omega, int num_samples, double T, double dt, int runs)
{
#ifdef _OPENMP
#pragma omp parallel
#endif
{
printf ("Printing one line per active thread....\n");
}
// init
vector<double> times(runs);
vector<double> flops(runs);
vector<double> trfs(runs);
vector<double> perfs(runs);
vector<double> rs(runs);
printf("\n==========================================================================\n");
for (int i=0; i<runs; i++)
{
SSAType ssa(omega, num_samples, T, dt);
// run it
ssa();
ssa.dumpTrajToFile("output.txt");
// check timing and performance
const double time = ssa.getTime();
const double flop = ssa.getFlops();
const double trf = ssa.getTransfers();
const double r = flop/trf;
times[i] = time;
flops[i] = flop;
trfs[i] = trf;
perfs[i] = (flop/time/1e9);
rs[i] = r;
std::cerr<<"time: "<< time <<", "<< flop <<" FLOP, Trf: "<< trf <<" Byte, R: "<<r<<" FLOP/Byte, perf: "<<flop/time/1e9<<" GFLOP/s"<<std::endl;
}
sort(times.begin(),times.end());
sort(flops.begin(),flops.end());
sort(trfs.begin(),trfs.end());
sort(perfs.begin(),perfs.end());
sort(rs.begin(),rs.end());
const int i05 = static_cast<int>(runs*.05);
const int i40 = static_cast<int>(runs*.4);
const int i60 = static_cast<int>(runs*.6);
const int i95 = static_cast<int>(runs*.95);
std::cerr<<"==========================================================================\n";
std::cerr<<"(05th perc.) time: "<< times[i05] <<", "<< flops[i05] <<" FLOP, Trf: "<< trfs[i05] <<" Byte, R: "<<rs[i05]<<" FLOP/Byte, perf: "<<perfs[i05]<<" GFLOP/s\n";
std::cerr<<"(40th perc.) time: "<< times[i40] <<", "<< flops[i40] <<" FLOP, Trf: "<< trfs[i40] <<" Byte, R: "<<rs[i40]<<" FLOP/Byte, perf: "<<perfs[i40]<<" GFLOP/s\n";
std::cerr<<"(60th perc.) time: "<< times[i60] <<", "<< flops[i60] <<" FLOP, Trf: "<< trfs[i60] <<" Byte, R: "<<rs[i60]<<" FLOP/Byte, perf: "<<perfs[i60]<<" GFLOP/s\n";
std::cerr<<"(95th perc.) time: "<< times[i95] <<", "<< flops[i95] <<" FLOP, Trf: "<< trfs[i95] <<" Byte, R: "<<rs[i95]<<" FLOP/Byte, perf: "<<perfs[i95]<<" GFLOP/s\n";
std::cerr<<"=========================================================================="<<std::endl;
}
};
/*
* Execute dimetization with 0815 SSA on CPU.
*/
#pragma once
#include <numeric>
#include <vector>
#include "SSABase.hpp"
#include "Timer.hpp"
using namespace std;
class SSA_CPU : public SSABase
{
private:
double k[4]; // rates
protected:
Timer timer;
int pass;
public:
SSA_CPU(const int omega, const int num_samples, const double T, const double bin_dt)
: SSABase(omega,num_samples,T,bin_dt)
,pass(0)
{
k[0] = 1;
k[1] = 1;
k[2] = 0.2/omega;
k[3] = 20.0*omega;
}
void operator()();
void normalize_bins();
void setRates(double k1, double k2, double k3, double k4) {
k[0] = k1;
k[1] = k2;
k[2] = k3/omega;
k[3] = k4*omega;
}
double getS1() const {