Commit f5e9cba2 authored by lmeinen's avatar lmeinen
Browse files

implemented sequential lu

parent 000e02e5
#include <fstream>
#include <sys/types.h>
#include <dirent.h>
#include <iostream>
#include <ctime>
#include <iomanip>
#include "bench_util.h"
#ifdef _WIN32
#include <Windows.h>
#else
#include <unistd.h>
#endif
BenchUtil::BenchUtil(string projname)
{
BenchUtil::projname = projname;
}
unsigned int get_number_of_files(string prefix)
{
DIR *dp;
struct dirent *ep;
dp = opendir("./");
unsigned int cnt = 0;
if (dp != NULL)
{
while (ep = readdir(dp))
{
string filename(ep->d_name);
if (filename.find(prefix) != string::npos)
{
cnt++;
}
}
(void)closedir(dp);
}
else
perror("Couldn't open the directory");
return cnt;
}
unsigned int get_column_width(string col_name, string col_value)
{
return max(col_name.size(), col_value.size());
}
void BenchUtil::bench_finalize()
{
// output data to file
string output_filename = projname + "-result-";
unsigned int num_output_files = get_number_of_files(output_filename);
ofstream outputfile;
outputfile.open(output_filename + to_string(num_output_files) + ".txt", ios::out | ios::trunc);
printf("Outputting results to file '%s'\n", (output_filename + to_string(num_output_files) + ".txt").c_str());
if (outputfile.is_open())
{
char hostname[256];
if (gethostname(hostname, sizeof(hostname)) != -1)
{
outputfile << "# Node: " << hostname << endl;
}
time_t now = time(0);
char *dt = ctime(&now);
tm *gmtm = gmtime(&now);
outputfile << "# UTC date and time: " << asctime(gmtm);
outputfile << "# Local date and time: " << dt;
outputfile << "# Measurements are in microseconds" << endl;
// set column names
for (list<tuple<string, string>>::iterator param_it = parameters.begin()->second.begin(); param_it != parameters.begin()->second.end(); ++param_it)
{
outputfile << setw(get_column_width(get<0>(*param_it) + " ", get<1>(*param_it) + " ")) << right << " " + get<0>(*param_it);
}
map<unsigned int, list<tuple<string, double>>>::iterator measurement_it = measurements.begin();
auto id = measurement_it->first;
outputfile << setw(8) << right << " id";
for (list<tuple<string, double>>::iterator region_measurement = measurement_it->second.begin(); region_measurement != measurement_it->second.end(); ++region_measurement)
{
string region = " " + get<0>(*region_measurement);
outputfile << setw(max((int)region.size(), 20)) << right << region;
}
outputfile << endl;
// fill columns
for (map<unsigned int, list<tuple<string, double>>>::iterator it = measurements.begin(); it != measurements.end(); ++it)
{
unsigned int id = it->first;
for (list<tuple<string, string>>::iterator param_it = parameters[id].begin(); param_it != parameters[id].end(); ++param_it)
{
outputfile << setw(get_column_width(get<0>(*param_it) + " ", get<1>(*param_it) + " ")) << right << " " + get<1>(*param_it);
}
outputfile << setw(8) << right << id;
for (list<tuple<string, double>>::iterator measurement = it->second.begin(); measurement != it->second.end(); ++measurement)
{
string region = " " + get<0>(*measurement);
double duration_ns = get<1>(*measurement);
outputfile << setw(max((int)region.size(), 20)) << setprecision(6) << fixed << right << duration_ns / 1000;
}
outputfile << endl;
}
outputfile.close();
}
}
void BenchUtil::bench_start(unsigned int id, string region)
{
start_ns = chrono::high_resolution_clock::now().time_since_epoch().count();
curr_measurement_id = id;
curr_region = region;
}
double BenchUtil::bench_stop()
{
auto end_ns = chrono::high_resolution_clock::now().time_since_epoch().count();
double duration = end_ns - start_ns;
measurements[curr_measurement_id].push_back(tuple<string, double>(curr_region, duration));
return duration;
}
void BenchUtil::bench_param(unsigned int id, string key, string value)
{
parameters[id].push_back(tuple<string, string>(key, value));
}
/*
example usage:
int example(int argc, char *argv[])
{
BenchUtil benchUtil("test");
benchUtil.bench_param(0, "key", "val_0");
benchUtil.bench_start(0, "test_region_0");
sleep(1);
auto duration_ns = benchUtil.bench_stop();
printf("slept for %f ms\n", duration_ns / 1000000);
benchUtil.bench_start(0, "test_region_1");
sleep(1.5);
duration_ns = benchUtil.bench_stop();
printf("slept for %f ms\n", duration_ns / 1000000);
benchUtil.bench_param(1, "key", "val_1");
benchUtil.bench_start(1, "test_region_0");
sleep(2);
duration_ns = benchUtil.bench_stop();
printf("slept for %f ms\n", duration_ns / 1000000);
benchUtil.bench_start(1, "test_region_1");
sleep(1);
duration_ns = benchUtil.bench_stop();
printf("slept for %f ms\n", duration_ns / 1000000);
benchUtil.bench_finalize();
}
*/
\ No newline at end of file
#ifndef BENCH_UTIL_H
#define BENCH_UTIL_H
#include <map>
#include <list>
#include <tuple>
#include <chrono>
using namespace std;
class BenchUtil
{
private:
string projname;
double start_ns;
map<unsigned int, list<tuple<string, string>>> parameters;
map<unsigned int, list<tuple<string, double>>> measurements;
unsigned int curr_measurement_id;
string curr_region;
public:
/**
* @brief Construct a new Bench Util object
*
* @param projname Project name to be used in experiment output
*/
BenchUtil(string projname);
/// Clear state and output all measurement data to file in pretty format
void bench_finalize();
void bench_debug();
/**
* @brief Set the start time of a particular region of an experiment run.
*
* The implementation assumes for convenience that each id offers the same regions
*
* @param id identifier of the experiment run
* @param region name of the region being measured
*/
void bench_start(unsigned int id, string region);
/**
* @brief End the current measurement and store duration
*
* Note that nested regions are not supported in this implementation
*
* @return double measured execution time
*/
double bench_stop();
/// Set a parameter for the current experiment
/**
* @brief Set a parameter for the current experiment
*
* The implementation assumes for convenience that each id offers the same parameters
*
* @param id identifier of the experiment run
* @param key name of the parameter to store
* @param value value of the parameter to store
*/
void bench_param(unsigned int id, string key, string value);
};
#endif
\ No newline at end of file
/**
* This version is stamped on May 10, 2016
*
* Contact:
* Louis-Noel Pouchet <pouchet.ohio-state.edu>
* Tomofumi Yuki <tomofumi.yuki.fr>
*
* Web address: http://polybench.sourceforge.net
*/
/* lu.c: this file is part of PolyBench/C */
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <math.h>
/* Include polybench common header. */
#include <polybench.h>
/* Include benchmark-specific header. */
#include "lu.h"
/* Array initialization. */
static
void init_array (int n,
DATA_TYPE POLYBENCH_2D(A,N,N,n,n))
{
int i, j;
for (i = 0; i < n; i++)
{
for (j = 0; j <= i; j++)
A[i][j] = (DATA_TYPE)(-j % n) / n + 1;
for (j = i+1; j < n; j++) {
A[i][j] = 0;
}
A[i][i] = 1;
}
/* Make the matrix positive semi-definite. */
/* not necessary for LU, but using same code as cholesky */
int r,s,t;
POLYBENCH_2D_ARRAY_DECL(B, DATA_TYPE, N, N, n, n);
for (r = 0; r < n; ++r)
for (s = 0; s < n; ++s)
(POLYBENCH_ARRAY(B))[r][s] = 0;
for (t = 0; t < n; ++t)
for (r = 0; r < n; ++r)
for (s = 0; s < n; ++s)
(POLYBENCH_ARRAY(B))[r][s] += A[r][t] * A[s][t];
for (r = 0; r < n; ++r)
for (s = 0; s < n; ++s)
A[r][s] = (POLYBENCH_ARRAY(B))[r][s];
POLYBENCH_FREE_ARRAY(B);
}
/* DCE code. Must scan the entire live-out data.
Can be used also to check the correctness of the output. */
static
void print_array(int n,
DATA_TYPE POLYBENCH_2D(A,N,N,n,n))
{
int i, j;
POLYBENCH_DUMP_START;
POLYBENCH_DUMP_BEGIN("A");
for (i = 0; i < n; i++)
for (j = 0; j < n; j++) {
if ((i * n + j) % 20 == 0) fprintf (POLYBENCH_DUMP_TARGET, "\n");
fprintf (POLYBENCH_DUMP_TARGET, DATA_PRINTF_MODIFIER, A[i][j]);
}
POLYBENCH_DUMP_END("A");
POLYBENCH_DUMP_FINISH;
}
/* Main computational kernel. The whole function will be timed,
including the call and return. */
static
void kernel_lu(int n,
DATA_TYPE POLYBENCH_2D(A,N,N,n,n))
{
int i, j, k;
#pragma scop
for (i = 0; i < _PB_N; i++) {
for (j = 0; j <i; j++) {
for (k = 0; k < j; k++) {
A[i][j] -= A[i][k] * A[k][j];
}
A[i][j] /= A[j][j];
}
for (j = i; j < _PB_N; j++) {
for (k = 0; k < i; k++) {
A[i][j] -= A[i][k] * A[k][j];
}
}
}
#pragma endscop
}
int main(int argc, char** argv)
{
/* Retrieve problem size. */
int n = N;
/* Variable declaration/allocation. */
POLYBENCH_2D_ARRAY_DECL(A, DATA_TYPE, N, N, n, n);
/* Initialize array(s). */
init_array (n, POLYBENCH_ARRAY(A));
/* Start timer. */
polybench_start_instruments;
/* Run kernel. */
kernel_lu (n, POLYBENCH_ARRAY(A));
/* Stop and print timer. */
polybench_stop_instruments;
polybench_print_instruments;
/* Prevent dead-code elimination. All live-out data must be printed
by the function call in argument. */
polybench_prevent_dce(print_array(n, POLYBENCH_ARRAY(A)));
/* Be clean. */
POLYBENCH_FREE_ARRAY(A);
return 0;
}
#include <iostream>
#include <random>
#include <chrono>
#include <iomanip>
#include "matrix_operations.h"
#define DOUBLE_PRECISION
#ifdef DOUBLE_PRECISION
using Scalar = double;
#else
using Scalar = float;
#endif
extern int matrix_size;
extern int block_size;
void randomize_matrix(int n, Scalar *A)
{
std::random_device rd;
std::mt19937 mt(rd());
std::uniform_real_distribution<Scalar> dist(-1, 1);
for (int i = 0; i < n * n; i++)
{
A[i] = dist(mt);
}
}
void blocked_lu(Matrix &A)
{
size_t n = A.n;
int i, j, k;
#pragma scop
for (i = 0; i < n; i++)
{
for (j = 0; j < i; j++)
{
for (k = 0; k < j; k++)
{
A(i, j) -= A(i, k) * A(k, j);
}
A(i, j) /= A(j, j);
}
for (j = i; j < n; j++)
{
for (k = 0; k < i; k++)
{
A(i, j) -= A(i, k) * A(k, j);
}
}
}
#pragma endscop
}
/**
* This version is stamped on May 10, 2016
*
* Contact:
* Louis-Noel Pouchet <pouchet.ohio-state.edu>
* Tomofumi Yuki <tomofumi.yuki.fr>
*
* Web address: http://polybench.sourceforge.net
*/
#ifndef _LU_H
# define _LU_H
/* Default to LARGE_DATASET. */
# if !defined(MINI_DATASET) && !defined(SMALL_DATASET) && !defined(MEDIUM_DATASET) && !defined(LARGE_DATASET) && !defined(EXTRALARGE_DATASET)
# define LARGE_DATASET
# endif
# if !defined(N)
/* Define sample dataset sizes. */
# ifdef MINI_DATASET
# define N 40
# endif
# ifdef SMALL_DATASET
# define N 120
# endif
# ifdef MEDIUM_DATASET
# define N 400
# endif
# ifdef LARGE_DATASET
# define N 2000
# endif
# ifdef EXTRALARGE_DATASET
# define N 4000
# endif
#endif /* !(N) */
# define _PB_N POLYBENCH_LOOP_BOUND(N,n)
/* Default data type */
# if !defined(DATA_TYPE_IS_INT) && !defined(DATA_TYPE_IS_FLOAT) && !defined(DATA_TYPE_IS_DOUBLE)
# define DATA_TYPE_IS_DOUBLE
# endif
#ifdef DATA_TYPE_IS_INT
# define DATA_TYPE int
# define DATA_PRINTF_MODIFIER "%d "
#endif
#ifdef DATA_TYPE_IS_FLOAT
# define DATA_TYPE float
# define DATA_PRINTF_MODIFIER "%0.2f "
# define SCALAR_VAL(x) x##f
# define SQRT_FUN(x) sqrtf(x)
# define EXP_FUN(x) expf(x)
# define POW_FUN(x,y) powf(x,y)
# endif
#ifdef DATA_TYPE_IS_DOUBLE
# define DATA_TYPE double
# define DATA_PRINTF_MODIFIER "%0.2lf "
# define SCALAR_VAL(x) x
# define SQRT_FUN(x) sqrt(x)
# define EXP_FUN(x) exp(x)
# define POW_FUN(x,y) pow(x,y)
# endif
#endif /* !_LU_H */
#include <stdio.h>
#include <vector>
#include "matrix_operations.h"
#include "lu.cpp"
#include "bench_util.h"
int matrix_size;
int block_size;
int blocks_n;
/// Benchmark for Sequential
int bench_sequential(int run_id, BenchUtil &bench, int num_runs, int matrix_size, int num_threads)
{
for (int i = 0; i < num_runs; i++)
{
printf(".");
fflush(stdout);
// generate matrix
std::vector<Scalar> matrix_data(matrix_size * matrix_size);
randomize_matrix(matrix_size, matrix_data.data());
std::vector<Scalar> original = matrix_data;
Matrix A(matrix_size, matrix_data.data());
bench.bench_param(run_id, "type", "Sequential");
bench.bench_param(run_id, "num_threads", to_string(num_threads));
bench.bench_param(run_id, "run", to_string(i));
bench.bench_param(run_id, "matrix_size", to_string(matrix_size));
bench.bench_start(run_id, "execution_time");
blocked_lu(A);
/* Record and complete the current measure */
bench.bench_stop();
++run_id;
}
return run_id;
}
/// call as: ./lu_bench.cpp num_threads num_runs matrix_size_0 block_size_0 matrix_size_1 block_size_1 ...
int main(int argc, char **argv)
{
// init
BenchUtil bench("lu_sequential");
if (argc < 3 || argc % 2 != 1)
{
printf("incorrect number of args\n");
return -1;
}
int num_threads = stoi(argv[1]);
int num_runs = stoi(argv[2]);
int configs[(argc - 2) / 2][2];
for (int i = 3, j = 0; i < argc; i += 2, j++)
{
configs[j][0] = stoi(argv[i]);
configs[j][1] = stoi(argv[i + 1]);
}
int run_id = 0;
/* Perform the measurements */
for (auto config : configs)
{
printf("Running measurements for matrix size %d", config[0]);
fflush(stdout);
matrix_size = config[0];
block_size = config[1];
blocks_n = (matrix_size + block_size - 1) / block_size;
run_id = bench_sequential(run_id, bench, num_runs, matrix_size, num_threads);
printf("done\n");
}
bench.bench_finalize();
return 0;
}
CC := gcc
CXX := g++
OPTS := -std=c++11 -mavx
all: lu_bench
lu_bench: lu.cpp bench_util.cpp lu_bench.cpp
$(CXX) bench_util.cpp lu_bench.cpp -O3 $(OPTS) -o lu_bench.o
.PHONY: clean
clean:
rm -f *.o
#ifndef MATRIX_OPERATIONS_H_
#define MATRIX_OPERATIONS_H_
///
/// Matrix in row-major
///
struct Matrix {
double* data;
size_t n, m;
size_t stride;
Matrix(void) = default;
inline Matrix(size_t n, double* data) :
data{ data }, n{ n }, m{ n }, stride{ n } {}
inline const double& operator() (size_t i, size_t j) const {
return data[i*stride + j];
}
inline double& operator() (size_t i, size_t j) {
return data[i*stride + j];
}