Commit 663299fb authored by Donjan Rodic's avatar Donjan Rodic
Browse files

added exercise 07

parent d8413444
CXX?= c++
CXXFLAGS?= -std=c++11 -Wall -Wextra -pedantic -O3 -march=native
LIBS=
all: gemm
gemm: gemm.cpp matrix.hpp aligned_allocator.hpp
$(CXX) $(CXXFLAGS) gemm.cpp $(LIBS) -o gemm
// Example codes for HPCSE course
// (c) 2012 Andreas Hehn, ETH Zurich
#ifndef HPCSE_ALIGNED_ALLOCATOR_HPP
#define HPCSE_ALIGNED_ALLOCATOR_HPP
#ifdef _WIN32
#include <malloc.h>
#else
#include <cstdlib>
#endif
#if __cplusplus >= 201103L
#define NOEXCEPT_SPEC noexcept
#else
#define NOEXCEPT_SPEC
#endif
namespace hpcse {
// Alignment must be a power of 2 !
template <typename T, unsigned int Alignment>
class aligned_allocator {
public:
typedef T* pointer;
typedef T const* const_pointer;
typedef T& reference;
typedef T const& const_reference;
typedef T value_type;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
template <typename U>
struct rebind {
typedef aligned_allocator<U,Alignment> other;
};
aligned_allocator() NOEXCEPT_SPEC {
}
aligned_allocator(aligned_allocator const&) NOEXCEPT_SPEC {
}
template <typename U>
aligned_allocator(aligned_allocator<U,Alignment> const& b) NOEXCEPT_SPEC {
}
pointer allocate(size_type n) {
pointer p;
#ifdef _WIN32
p = reinterpret_cast<pointer>(_aligned_malloc(n*sizeof(T), Alignment));
if(p == 0)
throw std::bad_alloc();
#else
if(posix_memalign(reinterpret_cast<void**>(&p), Alignment, n * sizeof(T) ))
throw std::bad_alloc();
#endif
return p;
}
void deallocate(pointer p, size_type) NOEXCEPT_SPEC {
#ifdef _WIN32
_aligned_free(p);
#else
std::free(p);
#endif
}
size_type max_size() const NOEXCEPT_SPEC {
std::allocator<T> a;
return a.max_size();
}
#if __cplusplus >= 201103L
template <typename C, class... Args>
void construct(C* c, Args&&... args) {
new ((void*)c) C(std::forward<Args>(args)...);
}
#else
void construct(pointer p, const_reference t) {
new((void *)p) T(t);
}
#endif
template <typename C>
void destroy(C* c) {
c->~C();
}
bool operator == (aligned_allocator const& a2) const NOEXCEPT_SPEC {
return true;
}
bool operator != (aligned_allocator const& a2) const NOEXCEPT_SPEC {
return false;
}
template <typename U, unsigned int UAlignment>
bool operator == (aligned_allocator<U,UAlignment> const& b) const NOEXCEPT_SPEC {
return false;
}
template <typename U, unsigned int UAlignment>
bool operator != (aligned_allocator<U,UAlignment> const& b) const NOEXCEPT_SPEC {
return true;
}
};
}
#undef NOEXPECT_SPEC
#endif //HPCSE_ALIGNED_ALLOCATOR_HPP
#include <iostream>
#include <cassert>
#include <algorithm>
#include <chrono>
#include "matrix.hpp"
typedef hpcse::matrix<double,hpcse::column_major> matrix_type;
// This function does a naive matrix multiplication c = a*b
// We assume column_major ordering
void gemm(
matrix_type const& a
, matrix_type const& b
, matrix_type& c
){
// Assumtions on the sizes of the matrices
assert(num_rows(a) == num_cols(b));
assert(num_rows(c) == num_rows(a));
assert(num_cols(c) == num_cols(b));
for(unsigned int i=0; i < num_rows(a); ++i) {
for(unsigned int j=0; j < num_cols(b); ++j) {
c(i,j) = 0.0;
for(unsigned int k=0; k < num_cols(a); ++k) {
c(i,j) += a(i,k) * b(k,j);
}
}
}
}
int main() {
matrix_type c(1536,1536);
matrix_type b(1536,1536);
matrix_type a(1536,1536);
// Fill matrices a and b with some values
double x = 0.0;
std::generate_n( a.data(), num_rows(a)*num_cols(a), [&x]() -> double { x+=0.1; return x; });
std::generate_n( b.data(), num_rows(b)*num_cols(b), [&x]() -> double { x-=0.15; return x; });
std::chrono::time_point<std::chrono::high_resolution_clock> start, end;
start = std::chrono::high_resolution_clock::now();
// Do gemm
gemm(a,b,c);
end = std::chrono::high_resolution_clock::now();
double elapsed_seconds = std::chrono::duration<double>(end-start).count();
std::cout <<"GEMM ran " << elapsed_seconds << "s" <<std::endl;
return 0;
}
// Example codes for HPC course
// (c) 2012 Andreas Hehn and Matthias Troyer, ETH Zurich
// a stripped-down matrix class for the examples of the HPC course
#ifndef HPCSE_MATRIX_HPP
#define HPCSE_MATRIX_HPP
#include <cassert>
#include <iostream>
#include <vector>
#include "aligned_allocator.hpp"
namespace hpcse
{
struct column_major {
static unsigned int size1(unsigned int n_rows, unsigned int) {
return n_rows;
}
static unsigned int size2(unsigned int, unsigned int n_cols) {
return n_cols;
}
static std::size_t index(unsigned int n_rows, unsigned int, unsigned int i, unsigned int j)
{
return n_rows*j+i;
}
};
struct row_major{
static unsigned int size1(unsigned int, unsigned int n_cols) {
return n_cols;
}
static unsigned int size2(unsigned int n_rows, unsigned int) {
return n_rows;
}
static std::size_t index(unsigned int, unsigned int n_cols, unsigned int i, unsigned int j)
{
return n_cols*i+j;
}
};
template <typename T, typename Ordering = column_major, typename Allocator = hpcse::aligned_allocator<T,64> >
class matrix {
public:
typedef T value_type;
explicit matrix(unsigned int rows = 0, unsigned int cols = 0, T init = T())
: data_(rows*cols,init)
, n_rows_(rows)
, n_cols_(cols)
{
}
unsigned int num_rows() const {
return n_rows_;
}
unsigned int num_cols() const {
return n_cols_;
}
unsigned int size1() const {
return Ordering::size1(n_rows_,n_cols_);
}
unsigned int size2() const {
return Ordering::size2(n_rows_,n_cols_);
}
unsigned int leading_dimension() const {
return size1();
}
// Element access
value_type& operator()(unsigned int i, unsigned int j) {
assert( i < n_rows_);
assert( j < n_cols_);
return data_[Ordering::index(n_rows_,n_cols_,i,j)];
}
value_type const& operator()(unsigned int i, unsigned int j) const {
assert( i < n_rows_);
assert( j < n_cols_);
return data_[Ordering::index(n_rows_,n_cols_,i,j)];
}
value_type const* data() const {
return data_.empty() ? 0 : &data_.front();
}
value_type* data() {
return data_.empty() ? 0 : &data_.front();
}
friend void swap(matrix& a, matrix& b) {
using std::swap;
swap(a.data_, b.data_);
swap(a.n_rows_, b.n_rows_);
swap(a.n_cols_, b.n_cols_);
}
private:
std::vector<value_type,Allocator> data_;
unsigned int n_rows_;
unsigned int n_cols_;
};
// Get number of rows/colums
template <typename T, typename Ordering, typename Allocator>
unsigned int num_rows(matrix<T,Ordering,Allocator> const& m) {
return m.num_rows();
}
template <typename T, typename Ordering, typename Allocator>
unsigned int num_cols(matrix<T,Ordering,Allocator> const& m) {
return m.num_cols();
}
template <typename T, typename Ordering, typename Allocator>
unsigned int leading_dimension(matrix<T,Ordering,Allocator> const& m) {
return m.leading_dimension();
}
// A nice operator for the output (to write std::cout << matrix )
template <typename T, typename Ordering, typename Allocator>
std::ostream& operator << (std::ostream& os, matrix<T,Ordering,Allocator> const& m) {
std::cout << "[";
for(unsigned int i=0; i < num_rows(m); ++i) {
if(i > 0)
std::cout << "," << std::endl;
std::cout << "[";
for(unsigned int j=0; j < num_cols(m); ++j) {
if(j > 0)
std::cout << ", ";
std::cout << m(i,j);
}
std::cout << "]";
}
std::cout << "]" << std::endl;
return os;
}
} // end namespace hpcse
#endif //HPCSE_MATRIX_HPP
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment