Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
lecture
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Package Registry
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
pt1_hs20
lecture
Commits
663d0203
Commit
663d0203
authored
Nov 26, 2020
by
Ignacio Labarca Figueroa
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[ex10] Solutions
parent
3477d708
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
368 additions
and
0 deletions
+368
-0
exercises/ex10/solutions/matrix_multiplication/CMakeLists.txt
...cises/ex10/solutions/matrix_multiplication/CMakeLists.txt
+20
-0
exercises/ex10/solutions/matrix_multiplication/main.cpp
exercises/ex10/solutions/matrix_multiplication/main.cpp
+115
-0
exercises/ex10/solutions/matrix_multiplication/matrix_multiplication.hpp
...solutions/matrix_multiplication/matrix_multiplication.hpp
+21
-0
exercises/ex10/solutions/matrix_multiplication/mm0.cpp
exercises/ex10/solutions/matrix_multiplication/mm0.cpp
+18
-0
exercises/ex10/solutions/matrix_multiplication/mm1.cpp
exercises/ex10/solutions/matrix_multiplication/mm1.cpp
+20
-0
exercises/ex10/solutions/matrix_multiplication/mm2.cpp
exercises/ex10/solutions/matrix_multiplication/mm2.cpp
+30
-0
exercises/ex10/solutions/matrix_multiplication/mm3.cpp
exercises/ex10/solutions/matrix_multiplication/mm3.cpp
+62
-0
exercises/ex10/solutions/matrix_multiplication/mm_blas.cpp
exercises/ex10/solutions/matrix_multiplication/mm_blas.cpp
+27
-0
exercises/ex10/solutions/matrix_multiplication/mm_eigen.cpp
exercises/ex10/solutions/matrix_multiplication/mm_eigen.cpp
+15
-0
exercises/ex10/solutions/matrix_multiplication/plot.py
exercises/ex10/solutions/matrix_multiplication/plot.py
+40
-0
No files found.
exercises/ex10/solutions/matrix_multiplication/CMakeLists.txt
0 → 100644
View file @
663d0203
# Programming Techniques for Scientific Simulations I
# HS 2020
# Exercise 10
cmake_minimum_required
(
VERSION 3.15
)
project
(
ex10-matrix-multiplication
)
set
(
CMAKE_CXX_STANDARD 17
)
set
(
CMAKE_CXX_STANDARD_REQUIRED TRUE
)
set
(
CMAKE_CXX_EXTENSIONS FALSE
)
add_compile_options
(
-Wall -Wextra -Wpedantic -march=native
)
add_executable
(
main main.cpp mm0.cpp mm1.cpp mm2.cpp mm3.cpp mm_blas.cpp mm_eigen.cpp
)
find_package
(
BLAS REQUIRED
)
target_link_libraries
(
main
${
BLAS_LINKER_FLAGS
}
${
BLAS_LIBRARIES
}
)
find_package
(
Eigen3 REQUIRED NO_MODULE
)
target_link_libraries
(
main Eigen3::Eigen
)
exercises/ex10/solutions/matrix_multiplication/main.cpp
0 → 100644
View file @
663d0203
/*
* Programming Techniques for Scientific Simulations I
* HS 2019
* Exercise 10
*/
#include <iostream>
#include <chrono>
#include <cstdlib>
#include <random>
#include "matrix_multiplication.hpp"
double
benchmark
(
function_t
f
,
matrix_t
const
&
A
,
matrix_t
const
&
B
,
matrix_t
&
C
,
std
::
size_t
N
)
noexcept
{
for
(
std
::
size_t
i
=
0
;
i
<
N
*
N
;
++
i
)
{
C
[
i
]
=
0
;
}
auto
start
=
std
::
chrono
::
high_resolution_clock
::
now
();
f
(
A
,
B
,
C
,
N
);
auto
end
=
std
::
chrono
::
high_resolution_clock
::
now
();
return
std
::
chrono
::
duration
<
double
>
(
end
-
start
).
count
();
}
int
main
()
{
std
::
mt19937
gen
;
std
::
uniform_real_distribution
<
double
>
dis
(
0
,
1
);
{
constexpr
int
N
=
16
;
constexpr
double
tolerance
=
1e-6
;
matrix_t
A
(
N
*
N
);
matrix_t
B
(
N
*
N
);
for
(
std
::
size_t
j
=
0
;
j
<
N
;
++
j
)
{
for
(
std
::
size_t
i
=
0
;
i
<
N
;
++
i
)
{
A
[
i
+
j
*
N
]
=
dis
(
gen
);
B
[
i
+
j
*
N
]
=
dis
(
gen
);
}
}
#define TEST(f) do { \
matrix_t C(N * N); \
for (int i = 0; i < N * N; ++i) { \
C[i] = 0; \
} \
f(A, B, C, N); \
for (int i = 0; i < N; ++i) { \
for (int j = 0; j < N; ++j) { \
if (std::abs(C0[i + j * N] - C[i + j * N]) > tolerance) { \
std::cerr << #f << " incorrect" << '\n' << "C0" << '\n'; \
for (int j = 0; j < N; ++j) { \
for (int i = 0; i < N; ++i) { \
std::cerr << C0[i + j * N] << ' '; \
} \
std::cerr << '\n'; \
} \
std::cerr << "C" << '\n'; \
for (int j = 0; j < N; ++j) { \
for (int i = 0; i < N; ++i) { \
std::cerr << C[i + j * N] << ' '; \
} \
std::cerr << '\n'; \
} \
exit(1); \
} \
} \
} \
} while (false)
matrix_t
C0
(
N
*
N
);
mm0
(
A
,
B
,
C0
,
N
);
TEST
(
mm0
);
TEST
(
mm1
);
TEST
(
mm2
);
TEST
(
mm3
);
TEST
(
mm_blas
);
TEST
(
mm_eigen
);
}
constexpr
int
runs
=
10
;
#define BENCHMARK(f) do { \
for (int i = 0; i < runs; ++i) { \
auto time = benchmark(f, A, B, C, N); \
std::cout << #f << ',' << N << ',' << time << std::endl; \
} \
} while (false)
std
::
cout
<<
"function,size,time"
<<
std
::
endl
;
for
(
int
N
=
4
;
N
<=
1024
;
N
*=
2
)
{
matrix_t
A
(
N
*
N
);
matrix_t
B
(
N
*
N
);
matrix_t
C
(
N
*
N
);
for
(
std
::
size_t
i
=
0
;
i
<
N
*
N
;
++
i
)
{
A
[
i
]
=
dis
(
gen
);
B
[
i
]
=
dis
(
gen
);
}
BENCHMARK
(
mm0
);
BENCHMARK
(
mm1
);
BENCHMARK
(
mm2
);
BENCHMARK
(
mm3
);
BENCHMARK
(
mm_blas
);
BENCHMARK
(
mm_eigen
);
}
#undef BENCHMARK
}
exercises/ex10/solutions/matrix_multiplication/matrix_multiplication.hpp
0 → 100644
View file @
663d0203
/*
* Programming Techniques for Scientific Simulations I
* HS 2019
* Exercise 10
*/
#pragma once
#include <cstdint>
#include <vector>
using
value_t
=
double
;
using
matrix_t
=
std
::
vector
<
value_t
>
;
using
function_t
=
void
(
*
)(
matrix_t
const
&
,
matrix_t
const
&
,
matrix_t
&
,
std
::
size_t
N
);
void
mm0
(
matrix_t
const
&
A
,
matrix_t
const
&
B
,
matrix_t
&
C
,
std
::
size_t
N
)
noexcept
;
void
mm1
(
matrix_t
const
&
A
,
matrix_t
const
&
B
,
matrix_t
&
C
,
std
::
size_t
N
)
noexcept
;
void
mm2
(
matrix_t
const
&
A
,
matrix_t
const
&
B
,
matrix_t
&
C
,
std
::
size_t
N
)
noexcept
;
void
mm3
(
matrix_t
const
&
A
,
matrix_t
const
&
B
,
matrix_t
&
C
,
std
::
size_t
N
)
noexcept
;
void
mm_blas
(
matrix_t
const
&
A
,
matrix_t
const
&
B
,
matrix_t
&
C
,
std
::
size_t
N
)
noexcept
;
void
mm_eigen
(
matrix_t
const
&
A
,
matrix_t
const
&
B
,
matrix_t
&
C
,
std
::
size_t
N
)
noexcept
;
exercises/ex10/solutions/matrix_multiplication/mm0.cpp
0 → 100644
View file @
663d0203
/*
* Programming Techniques for Scientific Simulations I
* HS 2019
* Exercise 10
*/
#include "matrix_multiplication.hpp"
// Trivial implementation
void
mm0
(
matrix_t
const
&
A
,
matrix_t
const
&
B
,
matrix_t
&
C
,
std
::
size_t
N
)
noexcept
{
for
(
std
::
size_t
i
=
0
;
i
<
N
;
++
i
)
{
for
(
std
::
size_t
j
=
0
;
j
<
N
;
++
j
)
{
for
(
std
::
size_t
k
=
0
;
k
<
N
;
++
k
)
{
C
[
i
+
j
*
N
]
+=
A
[
i
+
k
*
N
]
*
B
[
k
+
j
*
N
];
}
}
}
}
exercises/ex10/solutions/matrix_multiplication/mm1.cpp
0 → 100644
View file @
663d0203
/*
* Programming Techniques for Scientific Simulations I
* HS 2020
* Exercise 10
*/
#include "matrix_multiplication.hpp"
// Invert loop order for locality
// Lift B out of innermost loop
void
mm1
(
matrix_t
const
&
A
,
matrix_t
const
&
B
,
matrix_t
&
C
,
std
::
size_t
N
)
noexcept
{
for
(
std
::
size_t
j
=
0
;
j
<
N
;
++
j
)
{
for
(
std
::
size_t
k
=
0
;
k
<
N
;
++
k
)
{
auto
b
=
B
[
k
+
j
*
N
];
for
(
std
::
size_t
i
=
0
;
i
<
N
;
++
i
)
{
C
[
i
+
j
*
N
]
+=
A
[
i
+
k
*
N
]
*
b
;
}
}
}
}
exercises/ex10/solutions/matrix_multiplication/mm2.cpp
0 → 100644
View file @
663d0203
/*
* Programming Techniques for Scientific Simulations I
* HS 2020
* Exercise 10
*/
#include "matrix_multiplication.hpp"
#include <cmath> // for std::min
// Blocking
void
mm2
(
matrix_t
const
&
A
,
matrix_t
const
&
B
,
matrix_t
&
C
,
std
::
size_t
N
)
noexcept
{
constexpr
std
::
size_t
L1
=
1
<<
15
;
constexpr
std
::
size_t
n
=
std
::
sqrt
(
L1
/
(
3.0
*
sizeof
(
double
)));
for
(
std
::
size_t
j
=
0
;
j
<
N
;
j
+=
n
)
{
for
(
std
::
size_t
k
=
0
;
k
<
N
;
k
+=
n
)
{
for
(
std
::
size_t
i
=
0
;
i
<
N
;
i
+=
n
)
{
// Macro-kernel
for
(
std
::
size_t
jj
=
j
;
jj
<
std
::
min
(
j
+
n
,
N
);
++
jj
)
{
for
(
std
::
size_t
kk
=
k
;
kk
<
std
::
min
(
k
+
n
,
N
);
++
kk
)
{
auto
b
=
B
[
kk
+
jj
*
N
];
for
(
std
::
size_t
ii
=
i
;
ii
<
std
::
min
(
i
+
n
,
N
);
++
ii
)
{
C
[
ii
+
jj
*
N
]
+=
A
[
ii
+
kk
*
N
]
*
b
;
}
}
}
}
}
}
}
exercises/ex10/solutions/matrix_multiplication/mm3.cpp
0 → 100644
View file @
663d0203
/*
* Programming Techniques for Scientific Simulations I
* HS 2020
* Exercise 10
*/
#include "matrix_multiplication.hpp"
#include <cmath> // for std::min
#include <immintrin.h>
// Manual vectorization using intrinsics
void
mm3
(
matrix_t
const
&
A
,
matrix_t
const
&
B
,
matrix_t
&
C
,
std
::
size_t
N
)
noexcept
{
constexpr
std
::
size_t
L1
=
1
<<
15
;
constexpr
std
::
size_t
n
=
std
::
sqrt
(
L1
/
(
3.0
*
sizeof
(
double
)));
constexpr
int
v
=
4
;
for
(
std
::
size_t
j
=
0
;
j
<
N
;
j
+=
n
)
{
for
(
std
::
size_t
k
=
0
;
k
<
N
;
k
+=
n
)
{
for
(
std
::
size_t
i
=
0
;
i
<
N
;
i
+=
n
)
{
// Macro-kernel
for
(
std
::
size_t
jj
=
j
;
jj
<
std
::
min
(
j
+
n
,
N
);
jj
+=
v
)
{
for
(
std
::
size_t
kk
=
k
;
kk
<
std
::
min
(
k
+
n
,
N
);
kk
+=
v
)
{
for
(
std
::
size_t
ii
=
i
;
ii
<
std
::
min
(
i
+
n
,
N
);
ii
+=
v
)
{
// Micro-kernel
auto
a
=
A
.
data
()
+
ii
+
kk
*
N
;
auto
b
=
B
.
data
()
+
kk
+
jj
*
N
;
auto
c
=
C
.
data
()
+
ii
+
jj
*
N
;
auto
a0_
=
_mm256_loadu_pd
(
a
+
0
*
N
);
auto
a1_
=
_mm256_loadu_pd
(
a
+
1
*
N
);
auto
a2_
=
_mm256_loadu_pd
(
a
+
2
*
N
);
auto
a3_
=
_mm256_loadu_pd
(
a
+
3
*
N
);
for
(
int
cc
=
0
;
cc
<
4
;
++
cc
)
{
auto
b00
=
_mm256_broadcast_sd
(
b
+
0
+
cc
*
N
);
auto
b01
=
_mm256_broadcast_sd
(
b
+
1
+
cc
*
N
);
auto
b02
=
_mm256_broadcast_sd
(
b
+
2
+
cc
*
N
);
auto
b03
=
_mm256_broadcast_sd
(
b
+
3
+
cc
*
N
);
auto
c0_
=
_mm256_loadu_pd
(
c
+
cc
*
N
);
c0_
=
_mm256_add_pd
(
c0_
,
_mm256_add_pd
(
_mm256_add_pd
(
_mm256_mul_pd
(
a0_
,
b00
),
_mm256_mul_pd
(
a1_
,
b01
)
),
_mm256_add_pd
(
_mm256_mul_pd
(
a2_
,
b02
),
_mm256_mul_pd
(
a3_
,
b03
)
)
)
);
_mm256_storeu_pd
(
c
+
cc
*
N
,
c0_
);
}
}
}
}
}
}
}
}
exercises/ex10/solutions/matrix_multiplication/mm_blas.cpp
0 → 100644
View file @
663d0203
/*
* Programming Techniques for Scientific Simulations I
* HS 2020
* Exercise 10
*/
#include "matrix_multiplication.hpp"
extern
"C"
void
dgemm_
(
char
const
&
TRANSA
,
char
const
&
TRANSB
,
int
const
&
M
,
int
const
&
N
,
int
const
&
K
,
double
const
&
alpha
,
double
const
*
A
,
int
const
&
LDA
,
double
const
*
B
,
int
const
&
LDB
,
double
const
&
beta
,
double
*
C
,
int
const
&
LDC
);
void
mm_blas
(
matrix_t
const
&
A
,
matrix_t
const
&
B
,
matrix_t
&
C
,
std
::
size_t
N
)
noexcept
{
dgemm_
(
'N'
,
'N'
,
N
,
N
,
N
,
1
,
A
.
data
(),
N
,
B
.
data
(),
N
,
0
,
C
.
data
(),
N
);
}
exercises/ex10/solutions/matrix_multiplication/mm_eigen.cpp
0 → 100644
View file @
663d0203
/*
* Programming Techniques for Scientific Simulations I
* HS 2020
* Exercise 10
*/
#include "matrix_multiplication.hpp"
#include <Eigen/Core>
void
mm_eigen
(
matrix_t
const
&
A
,
matrix_t
const
&
B
,
matrix_t
&
C
,
std
::
size_t
N
)
noexcept
{
auto
const
Ae
=
Eigen
::
Map
<
Eigen
::
MatrixXd
const
>
(
A
.
data
(),
N
,
N
);
auto
const
Be
=
Eigen
::
Map
<
Eigen
::
MatrixXd
const
>
(
B
.
data
(),
N
,
N
);
auto
Ce
=
Eigen
::
Map
<
Eigen
::
MatrixXd
>
(
C
.
data
(),
N
,
N
);
Ce
=
Ae
*
Be
;
}
exercises/ex10/solutions/matrix_multiplication/plot.py
0 → 100755
View file @
663d0203
#!/usr/bin/env python3
# Programming Techniques for Scientific Simulations I
# HS 2020
# Exercise 10
import
sys
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
import
seaborn
as
sns
sns
.
set
()
def
main
():
if
not
(
2
<=
len
(
sys
.
argv
)
and
len
(
sys
.
argv
)
<=
3
):
print
(
'Usage: plot.py input_file [output_file]'
)
exit
(
1
)
input_file
=
sys
.
argv
[
1
]
if
len
(
sys
.
argv
)
==
3
:
output_file
=
sys
.
argv
[
2
]
data
=
pd
.
read_csv
(
input_file
)
# sns.barplot(x='function', y='time', data=data)
sns
.
lineplot
(
x
=
'size'
,
y
=
'time'
,
hue
=
'function'
,
data
=
data
)
plt
.
gca
().
set_xscale
(
'log'
,
basex
=
2
)
plt
.
gca
().
set_yscale
(
'log'
,
basey
=
2
)
plt
.
title
(
'Matrix-multiplication benchmark'
)
plt
.
xlabel
(
'Function'
)
plt
.
ylabel
(
'Time in seconds'
,
rotation
=
0
,
horizontalalignment
=
'left'
)
plt
.
gca
().
yaxis
.
set_label_coords
(
0
,
1
)
if
len
(
sys
.
argv
)
==
3
:
plt
.
savefig
(
output_file
)
else
:
plt
.
show
()
if
__name__
==
'__main__'
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment