To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit b2f9790a authored by ahuegli's avatar ahuegli

finished p2

parent 882738f9
......@@ -6,6 +6,21 @@ __global__ void computeForcesKernel(int N, const double3 *p, double3 *f) {
return;
// TODO: Copy the code from `nbody_0.cu` and fix the redundant memory accesses.
double3 f_tmp = double3{0.0, 0.0, 0.0};
for (int i = 0; i < N; ++i) {
double dx = p[i].x - p[idx].x;
double dy = p[i].y - p[idx].y;
double dz = p[i].z - p[idx].z;
// Instead of skipping the i == idx case, add 1e-150 to avoid division
// by zero. (dx * inv_r will be exactly 0.0)
double r = sqrt(1e-150 + dx * dx + dy * dy + dz * dz);
double inv_r = 1 / r;
f_tmp.x += dx * inv_r * inv_r * inv_r;
f_tmp.y += dy * inv_r * inv_r * inv_r;
f_tmp.z += dz * inv_r * inv_r * inv_r;
}
f[idx] = double3{f_tmp.x, f_tmp.y, f_tmp.z};
}
void computeForces(int N, const double3 *p, double3 *f) {
......
......@@ -6,6 +6,22 @@ __global__ void computeForcesKernel(int N, const double3 *p, double3 *f) {
return;
// TODO: Copy the code from `nbody_a.cu` and fix the reduntant arithmetic operations.
double3 f_tmp = double3{0.0, 0.0, 0.0};
for (int i = 0; i < N; ++i) {
double dx = p[i].x - p[idx].x;
double dy = p[i].y - p[idx].y;
double dz = p[i].z - p[idx].z;
// Instead of skipping the i == idx case, add 1e-150 to avoid division
// by zero. (dx * inv_r will be exactly 0.0)
double r = sqrt(1e-150 + dx * dx + dy * dy + dz * dz);
double inv_r = 1 / r;
double inv_r_tmp = inv_r * inv_r * inv_r;
f_tmp.x += dx * inv_r_tmp;
f_tmp.y += dy * inv_r_tmp;
f_tmp.z += dz * inv_r_tmp;
}
f[idx] = double3{f_tmp.x, f_tmp.y, f_tmp.z};
}
void computeForces(int N, const double3 *p, double3 *f) {
......
......@@ -6,6 +6,22 @@ __global__ void computeForcesKernel(int N, const double3 *p, double3 *f) {
return;
// TODO: Copy the code from `nbody_b.cu` and utilize rsqrt.
double3 f_tmp = double3{0.0, 0.0, 0.0};
for (int i = 0; i < N; ++i) {
double dx = p[i].x - p[idx].x;
double dy = p[i].y - p[idx].y;
double dz = p[i].z - p[idx].z;
// Instead of skipping the i == idx case, add 1e-150 to avoid division
// by zero. (dx * inv_r will be exactly 0.0)
double r = rsqrt(1e-150 + dx * dx + dy * dy + dz * dz);
double inv_r = 1 / r;
double inv_r_tmp = inv_r * inv_r * inv_r;
f_tmp.x += dx * inv_r_tmp;
f_tmp.y += dy * inv_r_tmp;
f_tmp.z += dz * inv_r_tmp;
}
f[idx] = double3{f_tmp.x, f_tmp.y, f_tmp.z};
}
void computeForces(int N, const double3 *p, double3 *f) {
......
......@@ -2,6 +2,41 @@
__global__ void computeForcesKernel(int N, const double3 *p, double3 *f) {
// TODO: Copy the code from `nbody_c.cu` and utilize shared memory.
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int tid = threadIdx.x;
if (idx >= N)
return;
extern __shared__ double3 sPos[];
double3 f_tmp = double3{0.0,0.0,0.0};
double3 particle = p[idx]
for(int j = 0; j < N; ++j){
sPos[tid] = p[j + tid];
__syncthreads();
for (int i = 0; i < N; ++i) {
double dx = p[i].x - p[idx].x;
double dy = p[i].y - p[idx].y;
double dz = p[i].z - p[idx].z;
// Instead of skipping the i == idx case, add 1e-150 to avoid division
// by zero. (dx * inv_r will be exactly 0.0)
double r = rsqrt(1e-150 + dx * dx + dy * dy + dz * dz);
double inv_r = 1 / r;
double inv_r_tmp = inv_r * inv_r * inv_r;
f_tmp.x += dx * inv_r_tmp;
f_tmp.y += dy * inv_r_tmp;
f_tmp.z += dz * inv_r_tmp;
}
__syncthreads();
}
f[idx].x += f_tmp.x;
f[idx].y += f_tmp.y;
f[idx].z += f_tmp.z;
}
void computeForces(int N, const double3 *p, double3 *f) {
......@@ -10,5 +45,6 @@ void computeForces(int N, const double3 *p, double3 *f) {
// TODO: Set the required shared memory size.
// Don't bother with checking errors here.
computeForcesKernel<<<numBlocks, numThreads>>>(N, p, f);
int sharedMem = numThreads * sizeof(double3);
computeForcesKernel<<<numBlocks, numThreads, sharedMem>>>(N, p, f);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment