Commit 47fdb3ff authored by Roger Kaeppeli's avatar Roger Kaeppeli

Add week 10

parent e44ead7f
cmake_minimum_required(VERSION 2.8)
project(week10)
set(CMAKE_CXX_STANDARD 11)
# needs BLAS!
find_package(BLAS)
if ( BLAS_FOUND )
add_executable(blas_dot blas_dot.cpp)
target_link_libraries(blas_dot ${BLAS_LIBRARIES})
add_executable(blas_levels blas_levels.cpp timer.cpp)
target_link_libraries(blas_levels ${BLAS_LIBRARIES})
else()
message("BLAS not found")
endif()
# needs LAPACK!
find_package(LAPACK)
if ( LAPACK_FOUND )
add_executable(lapack_dgesv lapack_dgesv.cpp)
target_link_libraries(lapack_dgesv ${LAPACK_LIBRARIES})
else()
message("LAPACK not found")
endif()
# needs GSL!
find_package(GSL)
if ( GSL_FOUND )
add_executable(gsl_demo gsl_demo.cpp)
target_include_directories(gsl_demo PUBLIC ${GSL_INCLUDE_DIRS})
target_link_libraries(gsl_demo ${GSL_LIBRARIES})
else()
message("GSL not found")
endif()
int main() {
asm ("movq $60, %rax\n\t" // the exit syscall number on Linux
"movq $2, %rdi\n\t" // this program returns 2
"syscall");
}
#include <cstdlib>
#include <iostream>
extern "C"
double ddot_(const int& N, const double* X, int& INCX, double* Y, int& INCY);
int main() {
// vectors
int N = 16384;
double* a = new double[N];
double* b = new double[N];
// initialize vectors with random numbers
srand48(42);
for (int i=0; i<N; ++i) {
a[i] = drand48();
b[i] = drand48();
}
// compute scalar product
int inc = 1;
double d = ddot_(N,&a[0],inc,&b[0],inc);
std::cout << "a . b = " << d << std::endl;
// deallocate
delete[] a;
delete[] b;
}
#include "timer.hpp"
#include <iostream>
extern "C" {
void dgemm_(const char& TRANSA, const char& TRANSB,
const int& M, const int& N, const int& K,
const double& ALPHA, const double* A, const int& LDA,
const double* B, const int& LDB,
const double& BETA, double* C, const int& LDC);
void dgemv_(const char& TRANS, const int& M, const int& N,
const double& ALPHA, const double* A, const int& LDA,
const double* X, const int& INCX,
const double& BETA, double* Y, const int& INCY);
double ddot_(const int& N, const double* X, int& INCX, double* Y,
int& INCY);
}
void print_matrix(const int N, const double* A) {
for (int i=0; i<N; ++i) {
for (int j=0; j<N; ++j) {
std::cout << A[i*N + j] << " ";
}
std::cout << '\n';
}
}
int main() {
// declare some variables
bool print = false;
Timer timer;
int incx, incy;
double alpha, beta;
double speed;
// matrices
int N = 1000;
double* A = new double[N*N];
double* B = new double[N*N];
double* C = new double[N*N];
// initialize matrices (note: we choose column major order!)
for (int i=0; i<N; ++i) {
for (int j=0; j<N; ++j) {
A[i + j*N] = 1.;
B[i + j*N] = 2.;
C[i + j*N] = 3.;
}
}
// blas level 1
incx = 1;
incy = 1;
timer.start();
for (int i=0; i<N; ++i) {
for (int j=0; j<N; ++j) {
C[i + j*N] = ddot_(N, &A[i], incx, &B[j*N], incy);
}
}
timer.stop();
speed = 2.*N*N*N/timer.duration()*1.e-9;
std::cout << "BLAS LEVEL 1: ddot ---\n";
std::cout << "SPEED GFLOPs/sec: " << speed << '\n';
if ( print ) print_matrix(N, C);
// blas level 2
incx = 1;
incy = 1;
alpha = 1.;
beta = 0.;
timer.start();
for (int j=0; j<N; ++j) {
dgemv_('N', N, N, alpha, &A[0], N, &B[j*N], incx, beta, &C[j*N], incy);
}
timer.stop();
speed = 2.*N*N*N/timer.duration()*1.e-9;
std::cout << "BLAS LEVEL 2: dgemv ---\n";
std::cout << "SPEED GFLOPs/sec: " << speed << '\n';
if ( print ) print_matrix(N, C);
// blas level 3
alpha = 1.;
beta = 0.;
timer.start();
dgemm_('N', 'N', N, N, N, alpha, &A[0], N, &B[0], N, beta , &C[0], N);
timer.stop();
speed = 2.*N*N*N/timer.duration()*1.e-9;
std::cout << "BLAS LEVEL 3: dgemm ---\n";
std::cout << "SPEED GFLOPs/sec: " << speed << '\n';
if ( print ) print_matrix(N, C);
// deallocate matrices
delete[] A;
delete[] B;
delete[] C;
}
#include <cstdio>
#include <gsl/gsl_sf_bessel.h>
int main() {
double x = 5.0;
double y = gsl_sf_bessel_J0 (x);
std::printf ("J0(%g) = %.18e\n", x, y);
}
// compile with sse4 enabled
// E.g. gnu/clang: g++ -msse4 intrinsic.cpp
#include <iostream>
#include <nmmintrin.h> // intrinsics header
int bitcount(unsigned int x) {
int cnt = 0;
for (int i=0 ; i<32 ;++i) {
if ( x & 1 ) ++cnt; // check lower bit
x = (x >> 1); // shift bits by one to the right
}
return cnt;
}
int main() {
unsigned int x;
std::cout << "Enter x: ";
std::cin >> x;
int cnt = _mm_popcnt_u32(x);
std::cout << x << " has " << cnt << " bits set [bitcount]\n";
std::cout << x << " has " << bitcount(x) << " bits set [_mm_popcnt_u32]\n";
}
#include <iostream>
#include <vector>
extern "C" {
void dgesv_( const int* N // The number of linear equations.
, const int* NRHS // The number of right hand sides.
, double* A // On entry, the N-by-N coefficient matrix A.
// On exit, the factors L and U from the
// factorization.
, const int* LDA // The leading dimension of the array A.
, int* IPIV // The pivot indices that define the
// permutation matrix P.
, double* B // On entry, the N-by-NRHS matrix of right hand
// side matrix B.
// On exit, if INFO = 0, the N-by-NRHS solution
// matrix X.
, const int* LDB // The leading dimension of the array B.
, int* INFO // = 0: successful exit.
);
}
void print_matrix(const int N, const double* A) {
for (int i=0; i<N; ++i) {
for (int j=0; j<N; ++j) {
std::cout << A[i + j*N] << " ";
}
std::cout << '\n';
}
}
int main() {
const int N = 2;
std::vector<double> A(N*N);
std::vector<double> b(N);
std::vector<int> ipiv(N);
const int nrhs = 1;
int info;
// initialize matrix A (note: we choose column-major order!)
A[0 + 0*N] = 6.; // A_{1,1}
A[1 + 0*N] = 4.; // A_{2,1}
A[0 + 1*N] = 3.; // A_{1,2}
A[1 + 1*N] = 3.; // A_{2,2}
// initialize right-hand ide vector b
b[0] = 1.;
b[1] = 2.;
// print A
std::cout << "A = \n";
print_matrix(N, &A.front());
// solve
dgesv_(&N, &nrhs, &A.front(), &N, &ipiv.front(), &b.front(), &N, &info);
// print L/U
std::cout << "LU = \n";
print_matrix(N, &A.front());
// print the solution
std::cout << "b = " << b[0] << " " << b[1] << '\n';
}
#include <iostream>
double function1() {
int i;
double retval = 0.;
for (i=1; i<10000000; ++i) {
retval += 1./i;
}
return retval;
}
double function2() {
int i;
double retval = 0.;
for (i=1; i<100000000; ++i) {
retval += 1./(i + 1.);
}
return retval;
}
void function3() {
return;
}
int main() {
int i;
std::cout << "function1 result: " << function1() << std::endl;
std::cout << "function2 result: " << function2() << std::endl;
if (false) function3();
}
#include <iostream>
#include <vector>
std::size_t insert_begin() {
std::vector<int> v;
for (int i=0;i<100000;++i) {
v.insert(v.begin(), 1);
}
return v.size();
}
std::size_t insert_end() {
std::vector<int> v;
for (int i=0;i<100000;++i) {
v.push_back(1);
}
return v.size();
}
int main() {
for (int i=0;i<50;++i) {
std::cout << insert_begin() << std::endl;
std::cout << insert_end() << std::endl;
}
}
#include "timer.hpp"
#include <chrono>
Timer::Timer() {
tstart_ = std::chrono::high_resolution_clock::now();
}
void Timer::start() {
tstart_ = std::chrono::high_resolution_clock::now();
}
void Timer::stop() {
tend_ = std::chrono::high_resolution_clock::now();
}
double Timer::duration() const {
return std::chrono::duration< double >(tend_ - tstart_).count();
}
#ifndef TIMER_HPP
#define TIMER_HPP
#include <chrono>
class Timer {
public:
Timer();
void start();
void stop();
double duration() const;
private:
using time_point_t = std::chrono::high_resolution_clock::time_point;
time_point_t tstart_, tend_;
};
#endif // !defined TIMER_HPP
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment