From 1f78a17ea19b31c38d16338cfbac1d287eb837b0 Mon Sep 17 00:00:00 2001 From: wadaniel Date: Mon, 23 Mar 2020 09:27:14 +0100 Subject: [PATCH] hw2 solution code added --- hw2/solution/p1/Makefile | 19 + hw2/solution/p1/README | 33 ++ hw2/solution/p1/auxiliary.cpp | 244 +++++++++++++ hw2/solution/p1/main.cpp | 61 ++++ hw2/solution/p1/solution.cpp | 485 ++++++++++++++++++++++++++ hw2/solution/p1/wave.cpp | 151 ++++++++ hw2/solution/p1/wave.h | 58 +++ hw2/solution/p2/.gitignore | 2 + hw2/solution/p2/Makefile | 26 ++ hw2/solution/p2/auxiliar/Makefile | 18 + hw2/solution/p2/auxiliar/auxiliar.cpp | 46 +++ hw2/solution/p2/auxiliar/auxiliar.hpp | 8 + hw2/solution/p2/cannon.cpp | 114 ++++++ hw2/solution/p3/particle.cpp | 88 +++++ 14 files changed, 1353 insertions(+) create mode 100644 hw2/solution/p1/Makefile create mode 100644 hw2/solution/p1/README create mode 100644 hw2/solution/p1/auxiliary.cpp create mode 100644 hw2/solution/p1/main.cpp create mode 100644 hw2/solution/p1/solution.cpp create mode 100644 hw2/solution/p1/wave.cpp create mode 100644 hw2/solution/p1/wave.h create mode 100644 hw2/solution/p2/.gitignore create mode 100755 hw2/solution/p2/Makefile create mode 100644 hw2/solution/p2/auxiliar/Makefile create mode 100644 hw2/solution/p2/auxiliar/auxiliar.cpp create mode 100644 hw2/solution/p2/auxiliar/auxiliar.hpp create mode 100755 hw2/solution/p2/cannon.cpp create mode 100644 hw2/solution/p3/particle.cpp diff --git a/hw2/solution/p1/Makefile b/hw2/solution/p1/Makefile new file mode 100644 index 0000000..4a7d586 --- /dev/null +++ b/hw2/solution/p1/Makefile @@ -0,0 +1,19 @@ +CXX=h5pcc +CXXFLAGS = -Wpedantic -Wall -Wextra -std=c++11 -lstdc++ -O3 -g + +all: main + +%.o: %.cpp + $(CXX) -c -o $@ $< $(CXXFLAGS) +clean: + rm -rf *.o *.xmf *h5 *txt main + +main: main.o auxiliary.o wave.o + $(CXX) $(CXXFLAGS)-I. -o main $^ + +run: + make clean ; make ; mpirun -n 8 ./main 100 2 1.0 +run1: + make clean ; make ; mpirun -n 1 ./main 100 1 1.0 + +.PHONY: all clean run run1 diff --git a/hw2/solution/p1/README b/hw2/solution/p1/README new file mode 100644 index 0000000..08d42db --- /dev/null +++ b/hw2/solution/p1/README @@ -0,0 +1,33 @@ +HOW TO RUN +---------- +To run the code on Euler remove all previously loaded nodules (purge) and load the following modules: + +module purge; +module load new; +module load gcc; +module load open_mpi/3.0.0; +module load hdf5 + +You can request 8 cores and an interative shell to work with: +bsub -n 8 -W 04:00 -Is bash + +Then compile and run by typing +make run + + +WHAT TO MODIFY +-------------- +You only need to change the code in wave.cpp +When the code runs, it prints a checksum. This number should be the same for the solution you provide. + + +CODE OUTPUT +----------- +You are not asked to visualize any results in this question, but we thought I'd be nice if you can see what you are actually solving. +Other outputs are .h5 files which can be used to visualize the solution. Each .h5 file corresponds to an .xmf file. +You can use Paraview to open the .xmf files and visualize the results. +Alternatively, you may comment out the line +#define USE_HDF5 +with '//' in wave.h. In this case, the code will output simple .txt files with the solution, saved as +x y z u +... diff --git a/hw2/solution/p1/auxiliary.cpp b/hw2/solution/p1/auxiliary.cpp new file mode 100644 index 0000000..ef943ba --- /dev/null +++ b/hw2/solution/p1/auxiliary.cpp @@ -0,0 +1,244 @@ +#include "wave.h" + +WaveEquation::WaveEquation(int a_N, int a_procs_per_dim) +{ + MPI_Comm_size(MPI_COMM_WORLD, &size); + procs_per_dim = a_procs_per_dim; + Ntot = a_N; + h = L / a_N; + N = a_N / procs_per_dim; + + // the chosen numerical method is stable if dt <= h/sqrt(3) + dt = h / sqrt(3.0); + + FindCoordinates(); + + origin[0] = coords[0] * N * h; + origin[1] = coords[1] * N * h; + origin[2] = coords[2] * N * h; + u = new double[(N + 2) * (N + 2) * (N + 2)]; + for (int i0 = 0; i0 < N; i0++) + { + double x0 = origin[0] + i0 * h + 0.5 * h; + for (int i1 = 0; i1 < N; i1++) + { + double x1 = origin[1] + i1 * h + 0.5 * h; + for (int i2 = 0; i2 < N; i2++) + { + double x2 = origin[2] + i2 * h + 0.5 * h; + u[(i0 + 1) * (N + 2) * (N + 2) + (i1 + 1) * (N + 2) + (i2 + 1)] = + Initial_Condition(x0, x1, x2); + } + } + } + + u_old = new double[(N + 2) * (N + 2) * (N + 2)]; + u_new = new double[(N + 2) * (N + 2) * (N + 2)]; + + for (int i0 = 1; i0 <= N; i0++) + for (int i1 = 1; i1 <= N; i1++) + for (int i2 = 1; i2 <= N; i2++) + { + int m = i2 + i1 * (N + 2) + i0 * (N + 2) * (N + 2); + u_new[m] = u[m]; + u_old[m] = u[m]; + // assuming that u_old = u is equivalent to du/dt(t=0) = 0 + } + + aux = dt * dt / h / h; +} + +WaveEquation::~WaveEquation() +{ + delete[] u; + delete[] u_old; + delete[] u_new; +} + +double WaveEquation::Initial_Condition(double x0, double x1, double x2) +{ + double r = + (x0 - 0.5) * (x0 - 0.5) + (x1 - 0.5) * (x1 - 0.5) + (x2-0.5)*(x2-0.5); + return exp(-r / 0.1); +} + +// do not change this function +void WaveEquation::Print(int kt = 0) +{ +#ifdef USE_HDF5 + + std::string name = "Wave__" + std::to_string(kt); + if (rank == 0) + { + FILE *xmf = 0; + xmf = fopen((name + ".xmf").c_str(), "w"); + + std::stringstream s_head; + + s_head << "\n"; + s_head << "\n"; + s_head << "\n"; + s_head << " \n"; + + s_head << " \n"; + s_tail << "\n"; + + fprintf(xmf, (s_tail.str()).c_str()); + fclose(xmf); + } + + hid_t file_id, dataset_id, fspace_id, fapl_id, mspace_id; + + H5open(); + fapl_id = H5Pcreate(H5P_FILE_ACCESS); + + H5Pset_fapl_mpio(fapl_id, cart_comm, MPI_INFO_NULL); + + file_id = + H5Fcreate((name + ".h5").c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id); + + H5Pclose(fapl_id); + + double *array_all = new double[N * N * N]; + + hsize_t count[4] = {static_cast(N), + static_cast(N), + static_cast(N), 1}; + hsize_t dims[4] = {static_cast(Ntot), + static_cast(Ntot), + static_cast(Ntot), 1}; + hsize_t offset[4] = {static_cast(coords[0] * N), + static_cast(coords[1] * N), + static_cast(coords[2] * N), 0}; + + for (int i0 = 1; i0 <= N; i0++) + { + for (int i1 = 1; i1 <= N; i1++) + { + for (int i2 = 1; i2 <= N; i2++) + { + array_all[(i2 - 1) + (i1 - 1) * N + (i0 - 1) * N * N] = + u[i2 + i1 * (N + 2) + i0 * (N + 2) * (N + 2)]; + } + } + } + + fapl_id = H5Pcreate(H5P_DATASET_XFER); + H5Pset_dxpl_mpio(fapl_id, H5FD_MPIO_COLLECTIVE); + fspace_id = H5Screate_simple(4, dims, NULL); + + dataset_id = H5Dcreate(file_id, "data", H5T_NATIVE_DOUBLE, fspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + + fspace_id = H5Dget_space(dataset_id); + + H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset, NULL, count, NULL); + + mspace_id = H5Screate_simple(4, count, NULL); + + H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, mspace_id, fspace_id, fapl_id, + array_all); + + H5Sclose(mspace_id); + H5Sclose(fspace_id); + H5Dclose(dataset_id); + H5Pclose(fapl_id); + H5Fclose(file_id); + H5close(); + + delete[] array_all; + +#else + + std::string name = "Wave__" + std::to_string(kt) + ".txt"; + MPI_File file; + MPI_File_open(cart_comm, name.c_str(), MPI_MODE_RDWR | MPI_MODE_CREATE, + MPI_INFO_NULL, &file); + + std::stringstream ss; + + for (int i0 = 0; i0 < N; i0++) + { + double x0 = origin[0] + i0 * h + 0.5 * h; + for (int i1 = 0; i1 < N; i1++) + { + double x1 = origin[1] + i1 * h + 0.5 * h; + for (int i2 = 0; i2 < N; i2++) + { + double x2 = origin[2] + i2 * h + 0.5 * h; + ss << x0 << " " << x1 << " " << x2 << " " + << u[(i2 + 1) + (i1 + 1) * (N + 2) + (i0 + 1) * (N + 2) * (N + 2)] + << "\n"; + } + } + } + + std::string asciiData = ss.str(); + + MPI_Offset my_length = asciiData.size() * sizeof(char); + + MPI_Offset offset; + MPI_Exscan(&my_length, &offset, 1, MPI_OFFSET, MPI_SUM, cart_comm); + + MPI_File_write_at_all(file, offset, asciiData.data(), asciiData.size(), + MPI_CHAR, MPI_STATUS_IGNORE); + MPI_File_close(&file); + +#endif +} + +// do not change this function +void WaveEquation::UpdateGridPoint(int i0, int i1, int i2) +{ + int m = i0 * (N + 2) * (N + 2) + i1 * (N + 2) + i2; + int ip1 = (i0 + 1) * (N + 2) * (N + 2) + (i1) * (N + 2) + (i2); + int im1 = (i0 - 1) * (N + 2) * (N + 2) + (i1) * (N + 2) + (i2); + int jp1 = (i0) * (N + 2) * (N + 2) + (i1 + 1) * (N + 2) + (i2); + int jm1 = (i0) * (N + 2) * (N + 2) + (i1 - 1) * (N + 2) + (i2); + int kp1 = (i0) * (N + 2) * (N + 2) + (i1) * (N + 2) + (i2 + 1); + int km1 = (i0) * (N + 2) * (N + 2) + (i1) * (N + 2) + (i2 - 1); + u_new[m] = + 2.0 * u[m] - u_old[m] + + aux * (u[ip1] + u[im1] + u[jp1] + u[jm1] + u[kp1] + u[km1] - 6.0 * u[m]); +} diff --git a/hw2/solution/p1/main.cpp b/hw2/solution/p1/main.cpp new file mode 100644 index 0000000..be8512f --- /dev/null +++ b/hw2/solution/p1/main.cpp @@ -0,0 +1,61 @@ +#include "wave.h" + +int main(int argc, char **argv) +{ + MPI_Init(&argc, &argv); + + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + if (argc != 4) + { + if (rank == 0) + { + std::cout << " Incorrect number of inputs. Run as:\n"; + std::cout << " mpirun -n x ./main N y t \n"; + std::cout << " where \n"; + std::cout << " N= number of grid points per direction\n"; + std::cout << " y= number of ranks per direction (=(x)^(1/3))\n"; + std::cout << " t= final time of simulation \n"; + std::cout << " Aborting...\n"; + } + int err = 1; + MPI_Abort(MPI_COMM_WORLD, err); + } + + int points = atoi(argv[1]); + int procs_per_dim = atoi(argv[2]); + double t_end = atof(argv[3]); + + if (size != procs_per_dim * procs_per_dim * procs_per_dim) + { + if (rank == 0) + std::cout << " Incorrect number of ranks per direction. Aborting... \n"; + int err = 2; + MPI_Abort(MPI_COMM_WORLD, err); + } + + if (points % procs_per_dim != 0) + { + if (rank == 0) + std::cout << " Grid points per direction must be divisible by the number " + "of ranks per direction. Aborting... \n"; + int err = 3; + MPI_Abort(MPI_COMM_WORLD, err); + } + + WaveEquation *Simulation = new WaveEquation(points, procs_per_dim); + + double start = MPI_Wtime(); + Simulation->run(t_end); + double finish = MPI_Wtime(); + + if (rank == 0) + std::cout << "Total time = " << finish - start << "\n"; + + delete Simulation; + + MPI_Finalize(); + return 0; +} \ No newline at end of file diff --git a/hw2/solution/p1/solution.cpp b/hw2/solution/p1/solution.cpp new file mode 100644 index 0000000..9f86233 --- /dev/null +++ b/hw2/solution/p1/solution.cpp @@ -0,0 +1,485 @@ +#include +#include +#include +#include +#include +#include + +#define USE_HDF5 + +#ifdef USE_HDF5 +#include +#endif + +using namespace std; + +#define L 1.0 + +struct WaveEquation +{ + int N; //grid points per dimension + double h; //grid spacing (dx = dy = dz = h) + double dt; //timestep + double t; //current time + double * u; //solution vector + double * u_old; + double * u_new; + + int Ntot; + int procs_per_dim; + + int size; + int rank; + + int rank_plus [3]; + int rank_minus [3]; + + MPI_Comm cart_comm; + + double origin[3]; + double aux; + int coords[3]; + + ~WaveEquation() + { + delete [] u; + delete [] u_old; + delete [] u_new; + MPI_Comm_free(&cart_comm); + } + + WaveEquation (int a_N, int a_procs_per_dim) + { + MPI_Comm_size(MPI_COMM_WORLD,&size); + + + procs_per_dim = a_procs_per_dim; + Ntot = a_N; + + h = L / a_N; + N = a_N / procs_per_dim; + + dt = h / sqrt(3.0); + + int nums [3] = {procs_per_dim,procs_per_dim,procs_per_dim}; + int periodic[3] = {true,true,true}; + //int periodic[3] = {false,false,false}; + + MPI_Cart_create(MPI_COMM_WORLD, 3, nums, periodic, true, &cart_comm); + MPI_Comm_rank(cart_comm,&rank); + + MPI_Cart_shift(cart_comm, 0, 1, &rank_plus[0], &rank_minus[0]); + MPI_Cart_shift(cart_comm, 1, 1, &rank_plus[1], &rank_minus[1]); + MPI_Cart_shift(cart_comm, 2, 1, &rank_plus[2], &rank_minus[2]); + + MPI_Cart_coords(cart_comm,rank,3,&coords[0]); + + + origin[0] = coords[0] * N * h; + origin[1] = coords[1] * N * h; + origin[2] = coords[2] * N * h; + + u = new double [(N+2)*(N+2)*(N+2)]; + + for (int i0=0; i0\n"; + s_head << "\n"; + s_head << "\n"; + s_head << " \n"; + + s_head << " \n"; + s_tail << "\n"; + + fprintf(xmf, (s_tail.str()).c_str()); + fclose(xmf); + } + + hid_t file_id, dataset_id, fspace_id, fapl_id, mspace_id; + + H5open(); + fapl_id = H5Pcreate(H5P_FILE_ACCESS); + + H5Pset_fapl_mpio(fapl_id, cart_comm, MPI_INFO_NULL); + + file_id = H5Fcreate((name + ".h5").c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id); + + H5Pclose(fapl_id); + + double * array_all = new double[N * N * N]; + + hsize_t count [4] = {static_cast(N) , static_cast(N) , static_cast(N) , 1}; + hsize_t dims [4] = {static_cast(Ntot) , static_cast(Ntot) , static_cast(Ntot) , 1}; + hsize_t offset[4] = {static_cast(coords[0] * N), static_cast(coords[1] * N), static_cast(coords[2] * N), 0}; + + for(int i0=1; i0<=N; i0++) + { + for(int i1=1; i1<=N; i1++) + { + for(int i2=1; i2<=N; i2++) + { + array_all[(i2-1)+(i1-1)*N + (i0-1)*N*N] = u[i2 + i1*(N+2)+i0*(N+2)*(N+2)]; + } + } + } + + fapl_id = H5Pcreate(H5P_DATASET_XFER); + H5Pset_dxpl_mpio(fapl_id, H5FD_MPIO_COLLECTIVE); + fspace_id = H5Screate_simple(4, dims, NULL); + + dataset_id = H5Dcreate(file_id, "data", H5T_NATIVE_DOUBLE, fspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + + fspace_id = H5Dget_space(dataset_id); + + H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset, NULL, count, NULL); + + mspace_id = H5Screate_simple(4, count, NULL); + + + H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, mspace_id, fspace_id, fapl_id, array_all); + + H5Sclose(mspace_id ); + H5Sclose(fspace_id ); + H5Dclose(dataset_id); + H5Pclose(fapl_id ); + H5Fclose(file_id ); + H5close(); + + delete [] array_all; + + #else + + string a_name = "Wave__" + to_string(kt); + char name [30] = a_name.c_str(); + MPI_File file; + MPI_File_open( cart_comm, name, MPI_MODE_RDWR | MPI_MODE_CREATE , MPI_INFO_NULL, &file); + + stringstream ss; + + + for (int i0=0; i0run(t_end); + double finish = MPI_Wtime(); + + if (rank==0)std::cout << "Total time = " << finish - start << "\n"; + + delete Simulation; + + MPI_Finalize(); + return 0; +} \ No newline at end of file diff --git a/hw2/solution/p1/wave.cpp b/hw2/solution/p1/wave.cpp new file mode 100644 index 0000000..d3a2ded --- /dev/null +++ b/hw2/solution/p1/wave.cpp @@ -0,0 +1,151 @@ +#include "wave.h" + +/********************************************************************/ +/* Subquestion a: change the following function and use a Cartesian */ +/* topology to find coords[3], rank_plus[3] and rank_minus[3] */ +/********************************************************************/ +void WaveEquation::FindCoordinates() +{ + int nums [3] = {procs_per_dim,procs_per_dim,procs_per_dim}; + int periodic[3] = {true,true,true}; + //int periodic[3] = {false,false,false}; + + MPI_Cart_create(MPI_COMM_WORLD, 3, nums, periodic, true, &cart_comm); + MPI_Comm_rank(cart_comm,&rank); + + MPI_Cart_shift(cart_comm, 0, 1, &rank_plus[0], &rank_minus[0]); + MPI_Cart_shift(cart_comm, 1, 1, &rank_plus[1], &rank_minus[1]); + MPI_Cart_shift(cart_comm, 2, 1, &rank_plus[2], &rank_minus[2]); + + MPI_Cart_coords(cart_comm,rank,3,&coords[0]); +} + +void WaveEquation::run(double t_end) +{ + t = 0; + + /********************************************************************/ + /* Subquestion b: you need to define 12 custom datatypes. */ + /* For sending data, six datatypes (one per face) are required. */ + /* For receiving data, six more datatypes are required. */ + /* You should use MPI_Type_create_subarray for those datatypes. */ + /********************************************************************/ + + /* Subquestion b: Create and commit custom datatypes here */ + /************************************************************************************************/ + MPI_Datatype SEND_FACE_PLUS[3]; + MPI_Datatype SEND_FACE_MINUS[3]; + + MPI_Datatype RECV_FACE_PLUS[3]; + MPI_Datatype RECV_FACE_MINUS[3]; + + int ndims = 3; + int order = MPI_ORDER_C; + int array_of_sizes [3] = {N+2,N+2,N+2}; + int array_of_subsizes [3]; + int array_of_starts [3]; + for (int d0 = 0; d0 < 3; d0++) + { + int d1 = (d0+1)%3; + int d2 = (d0+2)%3; + + array_of_subsizes[d0] = 1; + array_of_subsizes[d1] = N; + array_of_subsizes[d2] = N; + + array_of_starts[d0] = 1; + array_of_starts[d1] = 1; + array_of_starts[d2] = 1; + MPI_Type_create_subarray(ndims,array_of_sizes,array_of_subsizes,array_of_starts,order,MPI_DOUBLE,&SEND_FACE_MINUS[d0]); + + array_of_starts[d0] = N; + array_of_starts[d1] = 1; + array_of_starts[d2] = 1; + MPI_Type_create_subarray(ndims,array_of_sizes,array_of_subsizes,array_of_starts,order,MPI_DOUBLE,&SEND_FACE_PLUS[d0]); + + + array_of_starts[d0] = 0; + array_of_starts[d1] = 1; + array_of_starts[d2] = 1; + MPI_Type_create_subarray(ndims,array_of_sizes,array_of_subsizes,array_of_starts,order,MPI_DOUBLE,&RECV_FACE_MINUS[d0]); + + array_of_starts[d0] = N+1; + array_of_starts[d1] = 1; + array_of_starts[d2] = 1; + MPI_Type_create_subarray(ndims,array_of_sizes,array_of_subsizes,array_of_starts,order,MPI_DOUBLE,&RECV_FACE_PLUS[d0]); + + MPI_Type_commit(&SEND_FACE_PLUS [d0]); MPI_Type_commit(&RECV_FACE_PLUS [d0]); + MPI_Type_commit(&SEND_FACE_MINUS[d0]); MPI_Type_commit(&RECV_FACE_MINUS[d0]); + } + /************************************************************************************************/ + + int count = 0; + do + { + if (count % 5 == 0) + { + if (rank == 0) + std::cout << count << " t=" << t << "\n"; + Print(count); + } + + MPI_Request request[12]; + + /* Subquestion b: Replace the sends and receives with ones that correspond + * to custom datatypes*/ + /**********************************************************************************************/ + for (int d=0; d<3; d++) + { + MPI_Irecv(u,1,RECV_FACE_MINUS[d],rank_minus[d],100*d ,cart_comm,&request[2*d ]); + MPI_Irecv(u,1,RECV_FACE_PLUS [d],rank_plus [d],100*d+1,cart_comm,&request[2*d+1]); + } + + for (int d=0; d<3; d++) + { + MPI_Isend(u,1,SEND_FACE_PLUS [d],rank_plus [d],100*d ,cart_comm,&request[6+2*d ]); + MPI_Isend(u,1,SEND_FACE_MINUS[d],rank_minus[d],100*d+1,cart_comm,&request[6+2*d+1]); + } + /**********************************************************************************************/ + + // Wait for communication to finish + MPI_Waitall(12, &request[0], MPI_STATUSES_IGNORE); + + + for (int i0 = 1; i0 <= N; i0++) + for (int i1 = 1; i1 <= N; i1++) + for (int i2 = 1; i2 <= N; i2++) + UpdateGridPoint(i0, i1, i2); + + double *temp = u_old; + u_old = u; + u = u_new; + u_new = temp; + t += dt; + count++; + } while (t < t_end); + + double s = 0; + double Checksum = 0; + for (int k = 1; k <= N; k++) + for (int j = 1; j <= N; j++) + for (int i = 1; i <= N; i++) + { + int m = k + j * (N + 2) + i * (N + 2) * (N + 2); + s += u[m] * u[m]; + } + + MPI_Reduce(&s, &Checksum, 1, MPI_DOUBLE, MPI_SUM, 0, cart_comm); + if (rank == 0) + std::cout << "Checksum = " << Checksum << "\n"; + + /* Subquestion b: You should free the custom datatypes and the communicator + * here. */ + for (int d=0; d<3; d++) + { + MPI_Type_free(&RECV_FACE_PLUS[d]) ; + MPI_Type_free(&RECV_FACE_MINUS[d]); + MPI_Type_free(&SEND_FACE_PLUS[d]) ; + MPI_Type_free(&SEND_FACE_MINUS[d]); + } + MPI_Comm_free(&cart_comm); +} \ No newline at end of file diff --git a/hw2/solution/p1/wave.h b/hw2/solution/p1/wave.h new file mode 100644 index 0000000..b613b6a --- /dev/null +++ b/hw2/solution/p1/wave.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#define USE_HDF5 // comment this line if you do not want hdf5 file output + // (suitable for visualization with Paraview) + +#ifdef USE_HDF5 +#include +#endif + + +#define L 1.0 // domain size (unit cube) with Ntot x Ntot x Ntot grid points + +struct WaveEquation +{ + int N; // grid points per direction for this rank + double h; // grid spacing (dx = dy = dz = h) + double dt; // timestep + double t; // current time + double *u; // solution vector + double *u_old; + double *u_new; + + int Ntot; // total grid points per direction + int procs_per_dim; // ranks per direction + + int size; // =total ranks = procs_per_dim*procs_per_dim*procs_per_dim + int rank; + + int rank_plus[3]; // neighboring ranks + int rank_minus[3]; // + + MPI_Comm cart_comm; // should be a Cartesian topology communicator + + double origin[3]; // physical space (x0,x1,x2) coordinates of 1st grid point + // of this rank + int coords[3]; // index space coordinates of this rank + + double aux; + + ~WaveEquation(); + WaveEquation(int a_N, int a_procs_per_dim); + void FindCoordinates(); + double Initial_Condition(double x0, double x1, double x2); + void UpdateGridPoint(int i0, int i1, int i2); + void Print(int kt); + void pack_face(double *pack, int array_of_sizes[3], int array_of_subsizes[3], + int array_of_starts[3]); + void unpack_face(double *pack, int array_of_sizes[3], + int array_of_subsizes[3], int array_of_starts[3]); + void run(double t_end); +}; diff --git a/hw2/solution/p2/.gitignore b/hw2/solution/p2/.gitignore new file mode 100644 index 0000000..1585d5a --- /dev/null +++ b/hw2/solution/p2/.gitignore @@ -0,0 +1,2 @@ +cannon +*.o diff --git a/hw2/solution/p2/Makefile b/hw2/solution/p2/Makefile new file mode 100755 index 0000000..19cb8a8 --- /dev/null +++ b/hw2/solution/p2/Makefile @@ -0,0 +1,26 @@ +CFLAGS = -O3 +LDFLAGS = -lopenblas +CC =$(shell which mpicxx) + +.SECONDARY: +BINARIES = cannon + +.PHONY: all auxiliar + +all: auxiliar $(BINARIES) + +auxiliar: + $(MAKE) -C auxiliar + +cannon: cannon.o auxiliar/auxiliar.o + $(CC) -o $@ $^ $(LDFLAGS) + +%.o: %.cpp + $(CC) -c $(CFLAGS) $< + +.PHONY: clean +clean: + $(RM) $(BINARIES) auxiliar/*.o *.o *.ti + $(MAKE) -C auxiliar clean + + diff --git a/hw2/solution/p2/auxiliar/Makefile b/hw2/solution/p2/auxiliar/Makefile new file mode 100644 index 0000000..728e3e1 --- /dev/null +++ b/hw2/solution/p2/auxiliar/Makefile @@ -0,0 +1,18 @@ +CFLAGS = -O3 +LDFLAGS = -lblas +CC =$(shell which mpicxx) + +.SECONDARY: +OBJECTS = auxiliar.o + +.PHONY: all +all: $(OBJECTS) + +%.o: %.cpp + $(CC) -c $(CFLAGS) $< + +.PHONY: clean +clean: + $(RM) $(BINARIES) *.o *.ti + + diff --git a/hw2/solution/p2/auxiliar/auxiliar.cpp b/hw2/solution/p2/auxiliar/auxiliar.cpp new file mode 100644 index 0000000..a90492f --- /dev/null +++ b/hw2/solution/p2/auxiliar/auxiliar.cpp @@ -0,0 +1,46 @@ +#include "auxiliar.hpp" +#include +#include + +void initializeMatrices(double* A, double* B, const int n, const int N, const int rankx, const int ranky) +{ + //initialize value and align for A, B + double *aptr, *bptr; + aptr = A; bptr = B; + for(int i = n*rankx; i < n*(rankx+1); i++) + for(int j = n*ranky; j < n*(ranky+1); j++) + { + *aptr = 1.0/((i +ranky*n)%N + j +1); + *bptr = 1.0/(i+ (j+rankx*n)%N +1); + aptr++; bptr++; + } +} + +void verifyMatMulResults(double* C, const int n, const int N, const int rankx, const int ranky, double execTime) +{ + if (rankx == 0 && ranky == 0) printf("Verifying Result... "); + + double tolerance = 1e-6; + int error = 0; + double* cptr = C; + for(int i = n*rankx; i < n*(rankx+1) && !error; i++) + for(int j = n*ranky; j < n*(ranky+1) && !error; j++, cptr++) + { + double tmp = 0; + for(int k = 0; k < N; k++) tmp += 1.0/((i+k+1)*(k+j+1)); + error = fabs(*cptr-tmp) > tolerance; + } + int tempErr = error; + MPI_Reduce(&tempErr, &error, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + + if (rankx == 0 && ranky == 0) + { + if (error) { printf("\n[Error] Verification Failed!\n"); exit(-1); } + + double gflops = (((2e-9)*N)*N)*N/execTime; + printf("Passed! \n"); + printf("Execution time: %.3fs \n",execTime); + printf("GFlop/s: %.4f \n",gflops); + } +} + diff --git a/hw2/solution/p2/auxiliar/auxiliar.hpp b/hw2/solution/p2/auxiliar/auxiliar.hpp new file mode 100644 index 0000000..bb5d400 --- /dev/null +++ b/hw2/solution/p2/auxiliar/auxiliar.hpp @@ -0,0 +1,8 @@ +#ifndef _HPCSE2_AUXILIAR_ +#define _HPCSE2_AUXILIAR_ +#include +#include +void verifyMatMulResults(double* C, const int n, const int N, const int rankx, const int ranky, double execTime); +void initializeMatrices(double* A, double* B, const int n, const int N, const int rankx, const int ranky); + +#endif diff --git a/hw2/solution/p2/cannon.cpp b/hw2/solution/p2/cannon.cpp new file mode 100755 index 0000000..f148349 --- /dev/null +++ b/hw2/solution/p2/cannon.cpp @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include +#include "auxiliar/auxiliar.hpp" + +extern "C" void dgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, const double *beta, double *c, const int *ldc); + +int main(int argc, char* argv[]) +{ + /********************************************************** + * Initial Setup + **********************************************************/ + size_t N = 1024; + double one = 1.0; + double *A,*B,*C, *tmpA, *tmpB; + MPI_Request request[2]; + + int rank, size; + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + /********************************************************** + * Determining rank geometry < You need to improve this > + **********************************************************/ + + int p = sqrt(size); + if (size != p*p) { printf("[Error] Number of processors must be a square integer.\n"); MPI_Abort(MPI_COMM_WORLD, -1); } + + int coords[2] = {0, 0}; + int nums[2] = {p, p}; + int periodic[2] = {true, true}; + + MPI_Comm cannonComm; + MPI_Cart_create(MPI_COMM_WORLD, 2, nums, periodic, true, &cannonComm); + + int up, down, left, right; + MPI_Cart_shift(cannonComm, 0, 1, &left, &right); + MPI_Cart_shift(cannonComm, 1, 1, &up, &down); + + MPI_Cart_coords(cannonComm, rank, 2, coords); + int rankx = coords[0]; + int ranky = coords[1]; + + /******************************************** + * Initializing Matrices + *******************************************/ + + // Determining local matrix side size (n) + const int n = N/p; + + // Allocating space for local matrices A, B, C + A = (double*) calloc (n*n, sizeof(double)); + B = (double*) calloc (n*n, sizeof(double)); + C = (double*) calloc (n*n, sizeof(double)); + + // Allocating space for recieve buffers for A and B local matrices + tmpA = (double*) calloc (n*n, sizeof(double)); + tmpB = (double*) calloc (n*n, sizeof(double)); + + // Initializing values for input matrices A and B + initializeMatrices(A, B, n, N, rankx, ranky); + + /******************************************************* + * Creating Contiguous Datatype + ******************************************************/ + + MPI_Datatype subMatrixType; + MPI_Type_contiguous(n*n, MPI_DOUBLE, &subMatrixType); + MPI_Type_commit(&subMatrixType); + + /************************************************************** + * Running Cannon's Matrix-Matrix Multiplication Algorithm + **************************************************************/ + + if (rank == 0) printf("Running Matrix-Matrix Multiplication...\n"); + + // Registering initial time. It's important to precede this with a barrier to make sure + // all ranks are ready to start when we take the time. + MPI_Barrier(MPI_COMM_WORLD); + double execTime = -MPI_Wtime(); + + dgemm_("N", "N", &n, &n, &n, &one, A, &n, B, &n, &one, C, &n); + for(int step =1; step < p; step++) + { + MPI_Irecv(tmpA, 1, subMatrixType, right, 0, cannonComm, &request[0]); + MPI_Irecv(tmpB, 1, subMatrixType, down, 1, cannonComm, &request[1]); + + MPI_Send(A, 1, subMatrixType, left, 0, cannonComm); + MPI_Send(B, 1, subMatrixType, up, 1, cannonComm); + + MPI_Waitall(2, request, MPI_STATUS_IGNORE); + + double* holdA= A; A = tmpA; tmpA = holdA; + double* holdB= B; B = tmpB; tmpB = holdB; + + dgemm_("N", "N", &n, &n, &n, &one, A, &n, B, &n, &one, C, &n); + } + + // Registering final time. It's important to precede this with a barrier to make sure all ranks have finished before we take the time. + MPI_Barrier(MPI_COMM_WORLD); + execTime += MPI_Wtime(); + + /************************************************************** + * Verification Stage + **************************************************************/ + + verifyMatMulResults(C, n, N, rankx, ranky, execTime); + free(A); free(B); free(C); free(tmpA); free(tmpB); + MPI_Comm_free(&cannonComm); + return MPI_Finalize(); +} diff --git a/hw2/solution/p3/particle.cpp b/hw2/solution/p3/particle.cpp new file mode 100644 index 0000000..8584012 --- /dev/null +++ b/hw2/solution/p3/particle.cpp @@ -0,0 +1,88 @@ +#include +#include +#include + + +struct particle +{ + int id; + double x[3]; + bool state; + double gamma; +}; + + + +int main(int argc, char ** argv) +{ + MPI_Init(&argc, &argv); + + int rank,size; + MPI_Comm_rank(MPI_COMM_WORLD,&rank); + MPI_Comm_size(MPI_COMM_WORLD,&size); + + particle p; + + MPI_Datatype MPI_PARTICLE; + int count = 4; + int array_of_blocklengths [4] = {1,3,1,1}; + MPI_Aint array_of_displacements [4]; + MPI_Datatype array_of_types [4] = {MPI_INT,MPI_DOUBLE,MPI_C_BOOL,MPI_DOUBLE}; + + + MPI_Aint base; + MPI_Aint id_address; + MPI_Aint x_address; + MPI_Aint state_address; + MPI_Aint gamma_address; + + MPI_Get_address(&p,&base); + MPI_Get_address(&p.id,&id_address); + MPI_Get_address(&p.x,&x_address); + MPI_Get_address(&p.state,&state_address); + MPI_Get_address(&p.gamma,&gamma_address); + + array_of_displacements[0] = id_address - base; + array_of_displacements[1] = x_address - base; + array_of_displacements[2] = state_address - base; + array_of_displacements[3] = gamma_address - base; + + + MPI_Type_create_struct(count,array_of_blocklengths,array_of_displacements,array_of_types,&MPI_PARTICLE); + MPI_Type_commit(&MPI_PARTICLE); + + + + if (rank == 0) + { + p.id = 1; + p.x[0] = 3.14159265359; + p.x[1] = 2.71828182846; + p.x[2] = ( 1.0 + sqrt(5.0) ) / 2.0; + p.state = false; + p.gamma = 0.57721566490; + + MPI_Send(&p,1,MPI_PARTICLE,1,100,MPI_COMM_WORLD); + } + else if (rank == 1) + { + MPI_Recv(&p,1,MPI_PARTICLE,0,100,MPI_COMM_WORLD,MPI_STATUS_IGNORE); + + printf("%d \n", p.id ); + printf("%10.8f \n", p.x[0] ); + printf("%10.8f \n", p.x[1] ); + printf("%10.8f \n", p.x[2] ); + printf("%d \n" , p.state ); + printf("%10.8f \n", p.gamma ); + } + else + { + printf("Run with exactly two ranks!\n"); + int err = 1; + MPI_Abort(MPI_COMM_WORLD,err); + } + MPI_Type_free(&MPI_PARTICLE); + + MPI_Finalize(); + return 0; +} -- GitLab