Commit d7c2d3ee authored by Donjan Rodic's avatar Donjan Rodic
Browse files

added ex05 solution

parent d9703c10
......@@ -16,7 +16,7 @@ int main(int argc, char *argv[])
{
double a = 1.0;
double b = 4.0;
unsigned long n = 200UL*35389440UL;
unsigned long n = 1e8;
unsigned int nthreads = 1;
// Get number of threads from program arguments (if provided)
......
/*******************************************************************************
*
* Argument parser v1.2
* (C) 2012-2014 Donjan Rodic <donjan@dyx.ch>
* Licensed under the WTFPL (http://sam.zoy.org/wtfpl/)
*
* Simple argument parser that fetches command line arguments without previous
* declaration, assuming the programmer plans to have defaults for all named
* arguments.
*
*
* REFERENCE
*
* class ArgumentParser {
* ArgumentParser(int, char **);
* T get(char, T);
* T get(std::string, T);
* T get(unsigned int);
* size_t argcFree();
* class RangeError : public std::runtime_error;
* };
*
*
* USAGE
*
* ArgumentParser arg(argc, argv); // early in main
* int answer = arg.get('d', 42); // named argument
* char gender = arg.get("gender", 'm'); // named argument
* std::string targetfile = arg.get(1); // "free" argument
* try{ arg.get(999); } catch(ArgumentParser::RangeError e) {}
* int freeCount = arg.argcFree();
*
* The first argument of get() can be a char, string literal or std::string.
* The second argument defaults the value if not given on the command line.
* "Free" (unnamed) arguments are retrieved with the single-argument get().
* For type casting, check the NOTES ON TYPE CONVERSION below.
* The zero element get(0) is the name of the executable.
* Upon out-of-bounds accessing free arguments, an ArgumentParser::RangeError
* will be thrown.
* The number of free arguments accessible is given by the argcFree() method and
* does not count the zero element: get(argcFree()) is always successful.
*
*
* COMMAND LINE BEHAVIOUR
*
* Both single-dash (-o) and double-dash (--out) syntax is supported.
* Double-dash arguments like --out are stored as "out".
* Single-dash arguments are always 1 character wide: -out is stored as 'o' with
* parameter value "ut", enabling the common UNIX CLI syntax -n3 (besides -n 3).
* Arguments followed by either a dash-parameter or nothing (example: -a -b) are
* assumed to be flags and set to "1" or true (in this case both 'a' and 'b').
*
* Negative values can be passed as -n-56, but not -n -56, because the latter
* assumes that -n is a flag and -56 sets the argument '5' to the value "6".
* Currently no negative parameters can be passed to double-dash arguments.
* On duplicate arguments, ArgumentParser will issue a warning to stdout and
* save only the newer (further right) one.
*
*
* NOTES ON TYPE CONVERSION
*
* The return type of get() is defaulted to the type of the second parameter.
* The optional template argument can be omitted if the user is confident that
* automatic type casting is appropriate (which it generally is).
* For example if no argument -d via command line is given:
*
* int res = arg.get ("d", 9.7); // res == 9
* int res = arg.get<int> ("d", 9.7); // res == 9
* int res = arg.get<double>("d", 9.7); // res == 9
* double res = arg.get ("d", 9.7); // res == 9.7
* double res = arg.get<int> ("d", 9.7); // res == 9.0
* double res = arg.get<double>("d", 9.7); // res == 9.7
*
* A special case are second arguments of type char* and const char*:
*
* arg.get('f', "default.out");
*
* and
*
* char defout[] = "default.out";
* arg.get('f', defout);
*
* are both internally converted to
*
* arg.get<std::string>('f', std::string("default.out"));
*
* unless specifially called with a <char*> or <const char*> template parameter.
* Likewise, all unnamed/free arguments are treated as std::string unless called
* with a template parameter:
*
* std::string str = arg.get(1); // str receives a std::string
* std::string str = arg.get<int>(1); // error
* int val = arg.get(1); // error
* int val = arg.get<int>(1); // val receives an integer
*
******************************************************************************/
#pragma once
#include <iostream>
#include <sstream>
#include <cstring>
#include <map>
#include <vector>
#include <stdexcept>
class ArgumentParser {
private:
std::map<std::string, std::string> arguments;
std::vector<std::string> freeArguments;
size_t _argcFree;
template<typename T> struct fwdtype { }; // simulate method specialisation
public:
class RangeError : public std::runtime_error {
public:
template<typename T>
RangeError(T param) : std::runtime_error(param) {}
};
ArgumentParser(int argc, const char ** argv) : _argcFree(0) {
for(int i = 0; i < argc; ++i) {
std::string str(argv[i]),
argname,
argval = std::string(1,i);
if(str[0] == '-') {
if(str[1] == '-' ) // --arg
argname = str.substr(2, str.size() -2);
else // -a
argname = str[1];
if(str[1] != '-' && str.size() >= 3) // we have -n30
argval = str.substr(2, str.size()-2);
else if( i < argc-1 && argv[i+1][0] != '-' ) // there is a parameter
argval = argv[++i]; // assign and skip one i
else // it's a flag
argval = "1"; // set it to "true"
check(argname) = argval;
} else { // free string
freeArguments.push_back(str);
if(i != 0) ++_argcFree;
}
}
// Diagnostics
//~ std::cout << "\nNamed arguments:";
//~ for(std::map<std::string, std::string>::iterator it = arguments.begin();
//~ it != arguments.end(); ++it)
//~ std::cout << "\n" << it->first << " : " << it->second;
//~ std::cout << "\nFree floating arguments:";
//~ for(unsigned int k = 0; k < freeArguments.size(); ++k)
//~ std::cout << "\n" << k << " : " << freeArguments[k];
//~ std::cout << "\n";
}
size_t argcFree() { return _argcFree; }
private:
// Check if argument is already present and print a warning
std::string & check(const std::string & s) {
if(arguments.find(s) != arguments.end())
std::cout << "\nArgumentParser duplicate warning for argument: "
<< s << std::endl;
return arguments[s];
}
std::string & check(const char & c) {
return check(std::string(1,c));
}
public:
// Simulate template specialisation with overloaded implementation methods
template <typename T>
T get(const std::string & key, const T dfault) {
return get_impl(key, dfault, fwdtype<T>());
}
template <typename T>
T get(const char key, const T dfault) {
return get(std::string(1, key), dfault);
}
template <typename T>
T get(const unsigned int k) {
return get_impl(k, fwdtype<T>());
}
// Shortcuts for handling constant character strings as default parameters
std::string get(const std::string & key, const char * dfault) {
return get(key, std::string(dfault));
}
std::string get(const std::string & key, char * dfault) {
return get(key, std::string(dfault));
}
std::string get(const char key, const char * dfault) {
return get(key, std::string(dfault));
}
std::string get(const char key, char * dfault) {
return get(key, std::string(dfault));
}
std::string get(const unsigned int k) {
return get<std::string>(k);
}
private:
// Implementation for named arguments
template <typename T>
T get_impl(const std::string & key, const T & dfault, fwdtype<T>) {
std::map<std::string, std::string>::iterator found = arguments.find(key);
if(found != arguments.end()) {
T ret;
std::stringstream ss(arguments[key]);
ss >> ret;
return ret;
} else
return dfault;
}
// Method "specialisation", needed to preserve strings because stringstream
// stops at whitespaces ("foo bar" gets split up)
std::string get_impl(const std::string & key, const std::string & dfault,
fwdtype<std::string>) {
std::map<std::string, std::string>::iterator found = arguments.find(key);
if(found != arguments.end())
return arguments[key];
else
return dfault;
}
// Implementation for free arguments
template <typename T>
T get_impl(const unsigned int k, fwdtype<T>) {
if(k >= freeArguments.size())
throw RangeError("Out of bounds access.");
std::stringstream ss(freeArguments[k]);
T ret;
ss >> ret;
return ret;
}
// Method "specialisation", needed to preserve strings because stringstream
// stops at whitespaces ("foo bar" gets split up)
template <typename T>
T get_impl(const unsigned int k, fwdtype<std::string>) {
if(k >= freeArguments.size())
throw RangeError("Out of bounds access.");
return freeArguments[k];
}
};
# Makefile
SHELL := /bin/bash
#######################
# USER SETUP
#
CC = g++
omp ?= 1
numa ?= 1
triadd ?= 0
range ?= 0
config ?= release
#######################
CFLAGS = -Wall -Wextra -Wpedantic
ifeq "$(config)" "release"
CFLAGS += -O3 -march=native
else
CFLAGS += -g
endif
ifeq "$(omp)" "1"
CFLAGS += -fopenmp
endif
ifeq "$(numa)" "1"
CFLAGS += -D_USE_NUMA_
endif
ifeq "$(triadd)" "1"
CFLAGS += -D_TRIADD_
endif
ifeq "$(range)" "1"
CFLAGS += -D_RANGE_
endif
LIBS = -lstdc++ -lm
ifeq "$(numa)" "1"
LIBS += -lnuma
endif
# define paths for source files
VPATH = .
.PHONY: all
all: computepower bandwidth
%: %.cpp
$(CC) $(INCLUDES) $(CFLAGS) $^ -o $@ $(LIBS)
.PHONY: clean
clean:
rm -f *.o
rm -f computepower bandwidth
/*
* bandwidth.cpp
* Copyright 2016 ETH Zurich. All rights reserved.
*
*/
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <cstring>
#include <map>
#include <algorithm>
#include <vector>
#include <string>
#include <cassert>
#include <sys/time.h>
#if defined(_OPENMP)
#include <omp.h>
#endif
#include <numa.h>
#include "ArgumentParser.hpp"
using namespace std;
//========================================================================================
//
// Benchmark
//
//========================================================================================
double mysecond() {
#if defined(_OPENMP)
return omp_get_wtime();
#else
struct timeval tp;
struct timezone tzp;
int i;
i = gettimeofday(&tp,&tzp);
return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 );
#endif
}
void mycopy(const float* const a, float* const b, const int N) {
#if !defined(_RANGE_)
#pragma omp parallel for
#endif
for (int j=0; j<N; j++)
b[j] = a[j];
}
void myadd(const float* const a, float* const b, float* const c, const int N) {
#if !defined(_RANGE_)
#pragma omp parallel for
#endif
for (int j=0; j<N; j++)
b[j] = a[j]+c[j];
}
vector<double> benchmark(int N, size_t NTIMES) {
#if !defined(_USE_NUMA_)
#define numa_alloc_interleaved malloc
#define numa_free(x,y) free(x)
#endif
float * a = (float*)numa_alloc_interleaved(sizeof(float)*N);
float * b = (float*)numa_alloc_interleaved(sizeof(float)*N);
#if defined(_TRIADD_)
float * c = (float*)numa_alloc_interleaved(sizeof(float)*N);
#endif
// check that the arrays are correctly allocated
if (a == NULL || ((size_t)a & 0xf) != 0) {
cout << "a is NULL! aborting" << endl;
abort();
}
if (b == NULL || ((size_t)b & 0xf) != 0) {
cout << "b is NULL! aborting" << endl;
abort();
}
#if defined(_TRIADD_)
if (c == NULL || ((size_t)c & 0xf) != 0) {
cout << "c is NULL! aborting" << endl;
abort();
}
#endif
// fill the arrays
#if !defined(_RANGE_)
#pragma omp parallel for
#endif
for (int j=0; j<N; j++) {
a[j] = 2.0;
b[j] = 1.0;
#if defined(_TRIADD_)
c[j] = 1.0;
#endif
}
// vector containing timing of each sample
vector<double> result(NTIMES);
// run benchmark
for (uint j=0; j<NTIMES; ++j) {
const double tstart = mysecond();
#if defined(_TRIADD_)
myadd(a, b, c, N);
#else
mycopy(a, b, N);
#endif
const double tend = mysecond();
result[j] = tend - tstart;
swap(a, b);
}
sort(result.begin(), result.end());
numa_free(a, sizeof(float)*N);
numa_free(b, sizeof(float)*N);
#if defined(_TRIADD_)
numa_free(c, sizeof(float)*N);
#endif
return result;
}
void driver(const int N, int NTIMES, const int wordsize) {
// running benchmarks
{
vector<double> timings;
timings = benchmark(N, NTIMES);
#if defined(_TRIADD_)
const double denom = 1.0E-9 * 3. * wordsize * N;
#else
const double denom = 1.0E-9 * 2. * wordsize * N;
#endif
if (timings.size() == 0) {
cout << "Error: 0 Samples collected. Aborting.\n";
abort();
}
static bool first = true;
if (first) {
cout << "Footprint [KB]\t\tN\t\tBandwidth[GB/s]\n";
first = false;
}
// we look at the 50th percentile
const int i2 = 0.50*NTIMES;
#if defined(_TRIADD_)
cout << 3*wordsize*N/1024. << "\t\t" << N << "\t\t" << denom/(timings[i2]) << endl;
#else
cout << 2*wordsize*N/1024. << "\t\t" << N << "\t\t" << denom/(timings[i2]) << endl;
#endif
}
}
int main(int argc, const char * argv[]) {
ArgumentParser arg(argc,argv);
const bool RANGE = arg.get("range", false);
const int N = arg.get("size", 40'000'000); // ticks require c++14
size_t NTIMES = arg.get("iter", 100);
const int wordsize = sizeof(float);
const float KB = 2.*wordsize*N/1024.;
if (KB < 2) {
cout << "This size is to small! aborting. " << KB << " KB\n" ;
abort();
}
// running benchmarks
if(!RANGE)
driver(16*((N+15)/16), NTIMES, wordsize);
else {
double myN = N;
const size_t NEND = N*5e5;
while (myN < NEND) {
driver(16*(int)((myN+15)/16), NTIMES, wordsize);
// this factor is arbitrary and can be tuned
myN *= 1.05;
}
}
}
/*
* computepower.cpp
* Copyright 2016 ETH Zurich. All rights reserved.
*
*/
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <map>
#include <vector>
#include <string>
#include <cassert>
#include <algorithm>
#include <sys/time.h>
#if defined(_OPENMP)
#include <omp.h>
#endif
#include "ArgumentParser.hpp"
#define MULADD(b, x, a) (b*(x+a))
#define SUM8(x0, x1, x2, x3, x4, x5, x6, x7) (((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)))
//========================================================================================
//
// Helper Functions
//
//========================================================================================
// timer method
double mysecond() {
#if defined(_OPENMP)
return omp_get_wtime();
#else
struct timeval tp;
struct timezone tzp;
int i;
i = gettimeofday(&tp,&tzp);
return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 );
#endif
}
void ComputePower(double * s, int iSize) {
const int static N = iSize; {
// store coefficients in registers
const double a0 = 0.01f;
const double a1 = 0.035f;
const double a2 = 0.001f;
const double a3 = 0.15f;
const double b0 = 0.012f;
const double b1 = 0.067f;
const double b2 = 0.02f;
const double b3 = 0.21f;
// 8 independent streams of computations
// this helps filling the processing pipeline
// 8 data reads
double x0 = s[0];
double y0 = s[0];
double z0 = s[0];
double w0 = s[0];
double r0 = s[0];
double t0 = s[0];
double u0 = s[0];