To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit e1468820 authored by ahuegli's avatar ahuegli

finished p3

parent b2f9790a
......@@ -8,20 +8,44 @@
__global__ void jacobiStepKernel(int N, double hh, double invhh,
const double *rho, const double *phik, double *phik1) {
// TODO: Task 3b) Compute phik1[iy * N + ix]. Don't forget about 0 boundary conditions!
int ix = threadIdx.x + blockIdx.x*blockIdx.x;
int iy = threadIdx.y + blockIdx.y*blockIdx.y;
if(ix < N && iy < N){
double tmpA = phik[iy*N + (ix+1)] + phik[iy*N + (ix-1)] + phik[(iy+1)*N + ix] + phik[(iy-1)*N + ix];
phik1[iy * N +ix] = -0.25 * hh * ( - rho[iy*N + ix] - (invhh*tmpA));
}
}
void jacobiStep(int N, double h, const double *rhoDev, const double *phikDev, double *phik1Dev) {
/// TODO: Task 3b) Invoke the kernel jacobiSetKernel. Consider using dim3 as number
/// of blocks and number of threads!
dim3 numBlocks(N/32, N/32);
dim3 numThreads(32, 32);
double hh = h*h;
jacobiStepKernel<<<numBlocks, numThreads>>>(N, hh, 1.0/hh, rhoDev, phikDev, phik1Dev);
}
__global__ void computeAphiKernel(int N, double invhh, const double *phi, double *Aphi) {
// TODO: Task 3d) Compute Aphi[iy * N + ix]. Don't forget about 0 boundary conditions!
int ix = threadIdx.x + blockIdx.x*blockIdx.x;
int iy = threadIdx.y + blockIdx.y*blockIdx.y;
if(ix < N && iy < N){
double tmpA = phi[iy*N + (ix+1)] + phi[iy*N + (ix-1)] + phi[(iy+1)*N + ix] + phi[(iy-1)*N + ix] - 4*phi[iy*N + ix];
Aphi[iy * N +ix] = invhh * tmpA;
}
}
void computeAphi(int N, double h, const double *xDev, double *AphiDev) {
/// TODO: Task 3d) Invoke the kernel `computeAphiKernel`.
dim3 numBlocks(N/32, N/32);
dim3 numThreads(32, 32);
computeAphiKernel<<<numBlocks, numThreads>>>(N, 1.0/(h*h), xDev, AphiDev);
}
// Print L1 and L2 error. Do not edit!
......@@ -74,10 +98,18 @@ int main() {
// TODO: Task 3b) Allocate buffers of N^2 doubles.
// (You might need additional temporary buffers to complete all tasks.)
cudaMalloc(&rhoDev, N2*sizeof(double));
cudaMalloc(&phikDev, N2*sizeof(double));
cudaMalloc(&phik1Dev, N2*sizeof(double));
cudaMalloc(&AphiDev, N2*sizeof(double));
cudaMallocHost(&rhoHost, N2*sizeof(double));
cudaMallocHost(&AphiHost, N2*sizeof(double));
cudaMallocHost(&xHost, N2*sizeof(double));
// TODO: Task 3b) Remove this.
printf("Implement allocation first\n");
return;
// // TODO: Task 3b) Remove this.
// printf("Implement allocation first\n");
// return;
// RHS with three non-zero elements at (x, y)=(0.3, 0.1), (0.4, 0.1) and (0.5, 0.6).
for (int i = 0; i < N * N; ++i)
......@@ -87,14 +119,32 @@ int main() {
rhoHost[(6 * N / 10) * N + (5 * N / 10)] = -2.0;
// TODO: Task 3b) Upload rhoHost to rhoDev.
cudaMemcpy(rhoDev, rhoHost, N2*sizeof(double), cudaMemcpyHostToDevice);
// Initial guess x^(0)_i = 0.
// TODO: Task 3b) Memset phikDev to 0.
cudaMemset(phikDev, 0, N2*sizeof(double));
// TODO: Task 3c) Run the jacobiStep numIterations times.
// TODO: Task 3d) Call computeAphi, download the result and call printL1L2.
// Ensure that L1 and L2 drop over time.
for(int i = 0; i < numIterations; ++i){
jacobiStep(N, h, rhoDev, phikDev, phik1Dev);
computeAphi(N, h, phikDev, AphiDev);
cudaMemcpy(AphiHost, AphiDev, N2*sizeof(double), cudaMemcpyDeviceToHost);
}
// TODO: (OPTIONAL) Download the vector phik1 (or phik) and call dumpCSV for visualization.
jacobiStep(N, h, rhoDev, phikDev, phik1Dev);
//jacobiStep(N, h, rhoDev, phikDev, phik1Dev);
// TODO: Task 3a) Deallocate buffers.
cudaFree(rhoDev);
cudaFree(phikDev);
cudaFree(phik1Dev);
cudaFree(rhoHost);
cudaFree(AphiDev);
cudaFree(AphiHost);
cudaFree(xHost);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment