2013-05-07/0002775000017000003100000000000012141744014011243 5ustar borchertsai2013-05-07/np-jacobi.cpp0000664000017000003100000000417212141743462013621 0ustar borchertsai#include #include #include #include #include using namespace std; // single Jacobi iteration step double single_jacobi_iteration(double** A, double** B, int n, int m) { for (int i = 1; i <= n; ++i) { for (int j = 1; j <= m; ++j) { B[i][j] = 0.25 * (A[i-1][j] + A[i][j-1] + A[i][j+1] + A[i+1][j]); } } double maxdiff = 0; for (int i = 1; i <= n; ++i) { for (int j = 1; j <= m; ++j) { double diff = fabs(A[i][j] - B[i][j]); if (diff > maxdiff) maxdiff = diff; A[i][j] = B[i][j]; } } return maxdiff; } void initialize_A(double& Aij, int i, int j, int N) { const static double E_POWER_MINUS_PI = pow(M_E, -M_PI); if (j == 0) { Aij = sin(M_PI * ((double)i/(N-1))); } else if (j == N-1) { Aij = sin(M_PI * ((double)i/(N-1))) * E_POWER_MINUS_PI; } else { Aij = 0; } } double** run_jacobi_iteration(int N, double eps) { int n = N-2; double** A = new double*[N]; assert(A); for (int i = 0; i < N; ++i) { A[i] = new double[N]; assert(A[i]); for (int j = 0; j < N; ++j) { initialize_A(A[i][j], i, j, N); } } double** B = new double*[N-1]; for (int i = 1; i < N-1; ++i) { B[i] = new double[N-1]; assert(B[i]); } double maxdiff; do { maxdiff = single_jacobi_iteration(A, B, n, n); } while (maxdiff > eps); for (int i = 1; i < N-1; ++i) { delete[] B[i]; } delete[] B; return A; } char* cmdname; void usage() { cerr << "Usage: " << cmdname << " [N [eps]] " << endl; exit(1); } int main(int argc, char** argv) { int N = 10; double eps = 1e-6; cmdname = *argv++; --argc; if (argc > 2) usage(); if (argc > 0) { istringstream arg(*argv++); --argc; if (!(arg >> N) || N < 3) usage(); } if (argc > 0) { istringstream arg(*argv++); --argc; if (!(arg >> eps) || eps <= 0) usage(); } cout << N << endl; double** A = run_jacobi_iteration(N, eps); for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { cout << " " << A[i][j]; } cout << endl; delete[] A[i]; } delete[] A; } 2013-05-07/mpi-deadlock.cpp0000664000017000003100000000132212141743541014300 0ustar borchertsai#include #include #include using namespace std; int main(int argc, char** argv) { MPI_Init(&argc, &argv); int nofprocesses; MPI_Comm_size(MPI_COMM_WORLD, &nofprocesses); int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); assert(nofprocesses == 2); const int other = 1 - rank; const unsigned int maxsize = 8192; double* bigbuf = new double[maxsize]; for (int len = 1; len <= maxsize; len *= 2) { MPI_Send(bigbuf, len, MPI_DOUBLE, other, 0, MPI_COMM_WORLD); MPI_Status status; MPI_Recv(bigbuf, len, MPI_DOUBLE, other, 0, MPI_COMM_WORLD, &status); if (rank == 0) { cout << "len = " << len << " survived" << endl; } } MPI_Finalize(); } 2013-05-07/mpi-sendrecv.cpp0000664000017000003100000000140212141743572014346 0ustar borchertsai#include #include #include using namespace std; int main(int argc, char** argv) { MPI_Init(&argc, &argv); int nofprocesses; MPI_Comm_size(MPI_COMM_WORLD, &nofprocesses); int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); assert(nofprocesses == 2); const int other = 1 - rank; const unsigned int maxsize = 8192; double* bigbuf[2] = {new double[maxsize], new double[maxsize]}; for (int len = 1; len <= maxsize; len *= 2) { MPI_Status status; MPI_Sendrecv( bigbuf[rank], len, MPI::DOUBLE, other, 0, bigbuf[other], len, MPI::DOUBLE, other, 0, MPI_COMM_WORLD, &status); if (rank == 0) { cout << "len = " << len << " survived" << endl; } } MPI_Finalize(); } 2013-05-07/mpi-jacobi.cpp0000664000017000003100000001030012141743612013754 0ustar borchertsai#include #include #include #include #include #include using namespace std; // single Jacobi iteration step double single_jacobi_iteration(double** A, double** B, int n, int m) { for (int i = 1; i <= n; ++i) { for (int j = 1; j <= m; ++j) { B[i][j] = 0.25 * (A[i-1][j] + A[i][j-1] + A[i][j+1] + A[i+1][j]); } } double maxdiff = 0; for (int i = 1; i <= n; ++i) { for (int j = 1; j <= m; ++j) { double diff = fabs(A[i][j] - B[i][j]); if (diff > maxdiff) maxdiff = diff; A[i][j] = B[i][j]; } } return maxdiff; } void initialize_A(double& Aij, int i, int j, int N) { const static double E_POWER_MINUS_PI = pow(M_E, -M_PI); if (j == 0) { Aij = sin(M_PI * ((double)i/(N-1))); } else if (j == N-1) { Aij = sin(M_PI * ((double)i/(N-1))) * E_POWER_MINUS_PI; } else { Aij = 0; } } // 1D-partitioned task double** run_jacobi_iteration(int rank, int nofprocesses, int N, double eps) { int n = N-2; assert(nofprocesses <= n); int nofrows = n / nofprocesses; int remainder = n % nofprocesses; int first_row = rank * nofrows + 1; if (rank < remainder) { ++nofrows; if (rank > 0) first_row += rank; } else { first_row += remainder; } int last_row = first_row + nofrows - 1; double** A = new double*[nofrows+2]; for (int i = 0; i <= nofrows+1; ++i) { A[i] = new double[N]; for (int j = 0; j < N; ++j) { initialize_A(A[i][j], i + first_row, j, N); } } double** B = new double*[nofrows+1]; for (int i = 1; i <= nofrows; ++i) { B[i] = new double[N-1]; } int previous = rank == 0? MPI_PROC_NULL: rank-1; int next = rank == nofprocesses-1? MPI_PROC_NULL: rank+1; for(;;) { double maxdiff = single_jacobi_iteration(A, B, nofrows, n); double global_max; MPI_Reduce(&maxdiff, &global_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Bcast(&global_max, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); if (global_max < eps) break; MPI_Status status; // send highest row to the process which is next in rank MPI_Sendrecv(A[nofrows] + 1, n, MPI_DOUBLE, next, 0, A[0] + 1, n, MPI_DOUBLE, previous, 0, MPI_COMM_WORLD, &status); // send lowest row to the process which is previous in rank MPI_Sendrecv(A[1] + 1, n, MPI_DOUBLE, previous, 0, A[nofrows+1] + 1, n, MPI_DOUBLE, next, 0, MPI_COMM_WORLD, &status); } // collect results in process 0 double** Result = 0; if (rank == 0) { Result = new double*[N]; assert(Result); for (int i = 0; i < N; ++i) { Result[i] = new double[N]; assert(Result[i]); for (int j = 0; j < N; ++j) { initialize_A(Result[i][j], i, j, N); } } for (int i = 1; i <= last_row; ++i) { memcpy(Result[i] + 1, A[i] + 1, n * sizeof(double)); } for (int i = last_row+1; i <= n; ++i) { MPI_Status status; MPI_Recv(Result[i] + 1, n, MPI_DOUBLE, MPI_ANY_SOURCE, i, MPI_COMM_WORLD, &status); } } else { for (int i = 1; i <= nofrows; ++i) { MPI_Send(A[i] + 1, n, MPI_DOUBLE, 0, first_row + i - 1, MPI_COMM_WORLD); } } return Result; } char* cmdname; void usage() { cerr << "Usage: " << cmdname << " [N [eps]] " << endl; MPI_Abort(MPI_COMM_WORLD, 1); } int main(int argc, char** argv) { int N = 10; double eps = 1e-6; MPI_Init(&argc, &argv); int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); int nofprocesses; MPI_Comm_size(MPI_COMM_WORLD, &nofprocesses); if (rank == 0) { cmdname = *argv++; --argc; if (argc > 2) usage(); if (argc > 0) { istringstream arg(*argv++); --argc; if (!(arg >> N) || N < 3 || nofprocesses > N-2) usage(); } if (argc > 0) { istringstream arg(*argv++); --argc; if (!(arg >> eps) || eps <= 0) usage(); } } MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&eps, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); double** A = run_jacobi_iteration(rank, nofprocesses, N, eps); if (rank == 0) { cout << N << endl; for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { cout << " " << A[i][j]; } cout << endl; delete[] A[i]; } delete[] A; } MPI_Finalize(); } 2013-05-07/mpi-jacobi-nb.cpp0000664000017000003100000001077212141743636014374 0ustar borchertsai#include #include #include #include #include #include using namespace std; void initialize_A(double& Aij, int i, int j, int N) { const static double E_POWER_MINUS_PI = pow(M_E, -M_PI); if (j == 0) { Aij = sin(M_PI * ((double)i/(N-1))); } else if (j == N-1) { Aij = sin(M_PI * ((double)i/(N-1))) * E_POWER_MINUS_PI; } else { Aij = 0; } } // 1D-partitioned task double** run_jacobi_iteration(int rank, int nofprocesses, int N, double eps) { int n = N-2; assert(nofprocesses <= n); int nofrows = n / nofprocesses; int remainder = n % nofprocesses; int first_row = rank * nofrows + 1; if (rank < remainder) { ++nofrows; if (rank > 0) first_row += rank; } else { first_row += remainder; } int last_row = first_row + nofrows - 1; double** A = new double*[nofrows+2]; for (int i = 0; i <= nofrows+1; ++i) { A[i] = new double[N]; for (int j = 0; j < N; ++j) { initialize_A(A[i][j], i + first_row, j, N); } } double** B = new double*[nofrows+1]; for (int i = 1; i <= nofrows; ++i) { B[i] = new double[N-1]; } int previous = rank == 0? MPI_PROC_NULL: rank-1; int next = rank == nofprocesses-1? MPI_PROC_NULL: rank+1; double global_max; do { // compute border zones for (int j = 1; j <= n; ++j) { B[1][j] = 0.25 * (A[0][j] + A[1][j-1] + A[1][j+1] + A[2][j]); B[nofrows][j] = 0.25 * (A[nofrows-1][j] + A[nofrows][j-1] + A[nofrows][j+1] + A[nofrows+1][j]); } // initiate non-blocking communication MPI_Request req[4]; MPI_Irecv(A[0] + 1, n, MPI_DOUBLE, previous, 0, MPI_COMM_WORLD, &req[0]); MPI_Irecv(A[nofrows+1] + 1, n, MPI_DOUBLE, next, 0, MPI_COMM_WORLD, &req[1]); MPI_Isend(B[1] + 1, n, MPI_DOUBLE, previous, 0, MPI_COMM_WORLD, &req[2]); MPI_Isend(B[nofrows] + 1, n, MPI_DOUBLE, next, 0, MPI_COMM_WORLD, &req[3]); // computer inner zone for (int i = 2; i < nofrows; ++i) { for (int j = 1; j <= n; ++j) { B[i][j] = 0.25 * (A[i-1][j] + A[i][j-1] + A[i][j+1] + A[i+1][j]); } } // prepare next iteration and compute maxdiff double maxdiff = 0; for (int i = 1; i <= nofrows; ++i) { for (int j = 1; j <= n; ++j) { double diff = fabs(A[i][j] - B[i][j]); if (diff > maxdiff) maxdiff = diff; A[i][j] = B[i][j]; } } MPI_Reduce(&maxdiff, &global_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Bcast(&global_max, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); // block until initiated communication is finished for (int i = 0; i < 4; ++i) { MPI_Status status; MPI_Wait(&req[i], &status); } } while (global_max > eps); // collect results in process 0 double** Result = 0; if (rank == 0) { Result = new double*[N]; assert(Result); for (int i = 0; i < N; ++i) { Result[i] = new double[N]; assert(Result[i]); for (int j = 0; j < N; ++j) { initialize_A(Result[i][j], i, j, N); } } for (int i = 1; i <= last_row; ++i) { memcpy(Result[i] + 1, A[i] + 1, n * sizeof(double)); } for (int i = last_row+1; i <= n; ++i) { MPI_Status status; MPI_Recv(Result[i] + 1, n, MPI_DOUBLE, MPI_ANY_SOURCE, i, MPI_COMM_WORLD, &status); } } else { for (int i = 1; i <= nofrows; ++i) { MPI_Send(A[i] + 1, n, MPI_DOUBLE, 0, first_row + i - 1, MPI_COMM_WORLD); } } return Result; } char* cmdname; void usage() { cerr << "Usage: " << cmdname << " [N [eps]] " << endl; MPI_Abort(MPI_COMM_WORLD, 1); } int main(int argc, char** argv) { int N = 10; double eps = 1e-6; MPI_Init(&argc, &argv); int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); int nofprocesses; MPI_Comm_size(MPI_COMM_WORLD, &nofprocesses); if (rank == 0) { cmdname = *argv++; --argc; if (argc > 2) usage(); if (argc > 0) { istringstream arg(*argv++); --argc; if (!(arg >> N) || N < 3 || nofprocesses > N-2) usage(); } if (argc > 0) { istringstream arg(*argv++); --argc; if (!(arg >> eps) || eps <= 0) usage(); } } MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&eps, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); double** A = run_jacobi_iteration(rank, nofprocesses, N, eps); if (rank == 0) { cout << N << endl; for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { cout << " " << A[i][j]; } cout << endl; delete[] A[i]; } delete[] A; } MPI_Finalize(); } 2013-05-07/mpi-jacobi-2d.cpp0000664000017000003100000001650212141743672014277 0ustar borchertsai#include #include #include #include #include #include #include // M_E and M_PI are not part of ISO C++ #ifndef M_E #define M_E 2.7182818284590452354 #endif #ifndef M_PI #define M_PI 3.14159265358979323846 #endif using namespace std; typedef flens::GeMatrix > Matrix; void get_partition(int len, int nofprocesses, int rank, int& start, int& locallen) { locallen = (len - rank - 1) / nofprocesses + 1; int share = len / nofprocesses; int remainder = len % nofprocesses; start = rank * share + (rank < remainder? rank: remainder); } void get_submatrix(const MPI_Comm& grid, int* dims, int n, int rank, int& first_row, int& first_col, int& nof_rows, int& nof_cols) { int coords[2]; MPI_Cart_coords(grid, rank, 2, coords); // retrieve our position get_partition(n, dims[0], coords[0], first_row, nof_rows); get_partition(n, dims[1], coords[1], first_col, nof_cols); } void initialize_A(double& Aij, int i, int j, int N) { const static double E_POWER_MINUS_PI = pow(M_E, -M_PI); if (j == 0) { Aij = sin(M_PI * ((double)i/(N-1))); } else if (j == N-1) { Aij = sin(M_PI * ((double)i/(N-1))) * E_POWER_MINUS_PI; } else { Aij = 0; } } // single Jacobi iteration step double single_jacobi_iteration(Matrix& A, Matrix& B) { for (int i = B.firstRow(); i <= B.lastRow(); ++i) { for (int j = B.firstCol(); j <= B.lastCol(); ++j) { B(i,j) = 0.25 * (A(i-1,j) + A(i,j-1) + A(i,j+1) + A(i+1,j)); } } double maxdiff = 0; for (int i = B.firstRow(); i <= B.lastRow(); ++i) { for (int j = B.firstCol(); j <= B.lastCol(); ++j) { double diff = fabs(A(i,j) - B(i,j)); if (diff > maxdiff) maxdiff = diff; A(i,j) = B(i,j); } } return maxdiff; } MPI_Datatype vector_type(int len, int stride) { MPI_Datatype datatype; MPI_Type_vector( /* count = */ len, /* blocklength = */ 1, /* stride = */ stride, /* element type = */ MPI_DOUBLE, /* newly created type = */ &datatype); MPI_Type_commit(&datatype); return datatype; } MPI_Datatype matrix_type(const Matrix::View& submatrix) { MPI_Datatype datatype; MPI_Type_vector( /* count = */ submatrix.numRows(), /* blocklength = */ submatrix.numCols(), /* stride = */ submatrix.engine().leadingDimension(), /* element type = */ MPI_DOUBLE, /* newly created type = */ &datatype); MPI_Type_commit(&datatype); return datatype; } // 2D-partitioned task Matrix* run_jacobi_iteration(int rank, int nofprocesses, int N, double eps) { int n = N - 2; // without the surrounding border // create two-dimensional Cartesian grid int dims[2] = {0, 0}; int periods[2] = {false, false}; MPI_Dims_create(nofprocesses, 2, dims); MPI_Comm grid; MPI_Cart_create(MPI_COMM_WORLD, 2, // number of dimensions dims, // actual dimensions periods, // both dimensions are non-periodical true, // reorder is permitted &grid // newly created communication domain ); MPI_Comm_rank(MPI_COMM_WORLD, &rank); // update rank (could have changed) // locate our own submatrix int first_row, nof_rows, first_col, nof_cols; get_submatrix(grid, dims, n, rank, first_row, first_col, nof_rows, nof_cols); Matrix A(nof_rows + 2, nof_cols + 2, first_row, first_col); Matrix B(nof_rows, nof_cols, first_row + 1, first_col + 1); for (int i = A.firstRow(); i <= A.lastRow(); ++i) { for (int j = A.firstCol(); j <= A.lastCol(); ++j) { initialize_A(A(i, j), i, j, N); } } // create the associated vector views struct buffer { double* buf; MPI_Datatype type; }; struct buffer in_vectors[] = { {&A(A.firstRow(), A.firstCol() + 1), vector_type(nof_cols, 1)}, {&A(A.lastRow(), A.firstCol() + 1), vector_type(nof_cols, 1)}, {&A(A.firstRow() + 1, A.firstCol()), vector_type(nof_rows, nof_cols + 2)}, {&A(A.firstRow() + 1, A.lastCol()), vector_type(nof_rows, nof_cols + 2)} }; struct buffer out_vectors[] = { {&B(B.lastRow(), B.firstCol()), vector_type(nof_cols, 1)}, {&B(B.firstRow(), B.firstCol()), vector_type(nof_cols, 1)}, {&B(B.firstRow(), B.lastCol()), vector_type(nof_rows, nof_cols)}, {&B(B.firstRow(), B.firstCol()), vector_type(nof_rows, nof_cols)} }; // get the process numbers of our neighbors int left, right, upper, lower; MPI_Cart_shift(grid, 0, 1, &upper, &lower); MPI_Cart_shift(grid, 1, 1, &left, &right); int in_neighbor[] = {upper, lower, left, right}; int out_neighbor[] = {lower, upper, right, left}; for(;;) { double maxdiff = single_jacobi_iteration(A, B); double global_max; MPI_Reduce(&maxdiff, &global_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Bcast(&global_max, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); if (global_max < eps) break; // exchange borders with our neighbors for (int dir = 0; dir < 4; ++dir) { MPI_Status status; MPI_Sendrecv( out_vectors[dir].buf, 1, out_vectors[dir].type, out_neighbor[dir], 0, in_vectors[dir].buf, 1, in_vectors[dir].type, in_neighbor[dir], 0, MPI_COMM_WORLD, &status ); } } // collect results in process 0 Matrix* Result = 0; if (rank == 0) { Result = new Matrix(N, N, 0, 0); assert(Result); for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { initialize_A((*Result)(i,j), i, j, N); } } for (int p = 0; p < nofprocesses; ++p) { int first_row, first_col, nof_rows, nof_cols; get_submatrix(grid, dims, n, p, first_row, first_col, nof_rows, nof_cols); ++first_row; ++first_col; Matrix::View submatrix(Result->engine().view(first_row, first_col, first_row + nof_rows - 1, first_col + nof_cols - 1, first_row, first_col)); if (p == 0) { submatrix = B; } else { MPI_Status status; MPI_Recv( &submatrix(submatrix.firstRow(), submatrix.firstCol()), 1, matrix_type(submatrix), p, 0, MPI_COMM_WORLD, &status); } } } else { MPI_Send(&B(B.firstRow(), B.firstCol()), B.numRows() * B.numCols(), MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); } return Result; } char* cmdname; void usage() { cerr << "Usage: " << cmdname << " [N [eps]] " << endl; MPI_Abort(MPI_COMM_WORLD, 1); } int main(int argc, char** argv) { int N = 10; double eps = 1e-6; MPI_Init(&argc, &argv); int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); int nofprocesses; MPI_Comm_size(MPI_COMM_WORLD, &nofprocesses); if (rank == 0) { cmdname = *argv++; --argc; if (argc > 2) usage(); if (argc > 0) { istringstream arg(*argv++); --argc; if (!(arg >> N) || N < 3 || nofprocesses > (N-2)*(N-2)) usage(); } if (argc > 0) { istringstream arg(*argv++); --argc; if (!(arg >> eps) || eps <= 0) usage(); } } MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&eps, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); Matrix* A = run_jacobi_iteration(rank, nofprocesses, N, eps); if (rank == 0) { cout << N << endl; for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { cout << " " << (*A)(i,j); } cout << endl; } delete A; } MPI_Finalize(); } 2013-05-07/mpi-jacobi-2d-nb.cpp0000664000017000003100000001760612141743726014702 0ustar borchertsai#include #include #include #include #include #include #include // M_E and M_PI are not part of ISO C++ #ifndef M_E #define M_E 2.7182818284590452354 #endif #ifndef M_PI #define M_PI 3.14159265358979323846 #endif using namespace std; typedef flens::GeMatrix > Matrix; void get_partition(int len, int nofprocesses, int rank, int& start, int& locallen) { locallen = (len - rank - 1) / nofprocesses + 1; int share = len / nofprocesses; int remainder = len % nofprocesses; start = rank * share + (rank < remainder? rank: remainder); } void get_submatrix(const MPI_Comm& grid, int* dims, int n, int rank, int& first_row, int& first_col, int& nof_rows, int& nof_cols) { int coords[2]; MPI_Cart_coords(grid, rank, 2, coords); // retrieve our position get_partition(n, dims[0], coords[0], first_row, nof_rows); get_partition(n, dims[1], coords[1], first_col, nof_cols); } void initialize_A(double& Aij, int i, int j, int N) { const static double E_POWER_MINUS_PI = pow(M_E, -M_PI); if (j == 0) { Aij = sin(M_PI * ((double)i/(N-1))); } else if (j == N-1) { Aij = sin(M_PI * ((double)i/(N-1))) * E_POWER_MINUS_PI; } else { Aij = 0; } } MPI_Datatype vector_type(int len, int stride) { MPI_Datatype datatype; MPI_Type_vector( /* count = */ len, /* blocklength = */ 1, /* stride = */ stride, /* element type = */ MPI_DOUBLE, /* newly created type = */ &datatype); MPI_Type_commit(&datatype); return datatype; } MPI_Datatype matrix_type(const Matrix::View& submatrix) { MPI_Datatype datatype; MPI_Type_vector( /* count = */ submatrix.numRows(), /* blocklength = */ submatrix.numCols(), /* stride = */ submatrix.engine().leadingDimension(), /* element type = */ MPI_DOUBLE, /* newly created type = */ &datatype); MPI_Type_commit(&datatype); return datatype; } // 2D-partitioned task Matrix* run_jacobi_iteration(int rank, int nofprocesses, int N, double eps) { int n = N - 2; // without the surrounding border // create two-dimensional Cartesian grid int dims[2] = {0, 0}; int periods[2] = {false, false}; MPI_Dims_create(nofprocesses, 2, dims); MPI_Comm grid; MPI_Cart_create(MPI_COMM_WORLD, 2, // number of dimensions dims, // actual dimensions periods, // both dimensions are non-periodical true, // reorder is permitted &grid // newly created communication domain ); MPI_Comm_rank(MPI_COMM_WORLD, &rank); // update rank (could have changed) // locate our own submatrix int first_row, nof_rows, first_col, nof_cols; get_submatrix(grid, dims, n, rank, first_row, first_col, nof_rows, nof_cols); Matrix A(nof_rows + 2, nof_cols + 2, first_row, first_col); Matrix B(nof_rows, nof_cols, first_row + 1, first_col + 1); for (int i = A.firstRow(); i <= A.lastRow(); ++i) { for (int j = A.firstCol(); j <= A.lastCol(); ++j) { initialize_A(A(i, j), i, j, N); } } // create the associated vector views struct buffer { double* buf; MPI_Datatype type; }; struct buffer in_vectors[] = { {&A(A.firstRow(), A.firstCol() + 1), vector_type(nof_cols, 1)}, {&A(A.lastRow(), A.firstCol() + 1), vector_type(nof_cols, 1)}, {&A(A.firstRow() + 1, A.firstCol()), vector_type(nof_rows, nof_cols + 2)}, {&A(A.firstRow() + 1, A.lastCol()), vector_type(nof_rows, nof_cols + 2)} }; struct buffer out_vectors[] = { {&B(B.lastRow(), B.firstCol()), vector_type(nof_cols, 1)}, {&B(B.firstRow(), B.firstCol()), vector_type(nof_cols, 1)}, {&B(B.firstRow(), B.lastCol()), vector_type(nof_rows, nof_cols)}, {&B(B.firstRow(), B.firstCol()), vector_type(nof_rows, nof_cols)} }; // get the process numbers of our neighbors int left, right, upper, lower; MPI_Cart_shift(grid, 0, 1, &upper, &lower); MPI_Cart_shift(grid, 1, 1, &left, &right); int in_neighbor[] = {upper, lower, left, right}; int out_neighbor[] = {lower, upper, right, left}; double global_max; do { // compute border zones for (int j = B.firstCol(); j <= B.lastCol(); ++j) { int i = B.firstRow(); B(i,j) = 0.25 * (A(i-1,j) + A(i,j-1) + A(i,j+1) + A(i+1,j)); i = B.lastRow(); B(i,j) = 0.25 * (A(i-1,j) + A(i,j-1) + A(i,j+1) + A(i+1,j)); } for (int i = B.firstRow(); i <= B.lastRow(); ++i) { int j = B.firstCol(); B(i,j) = 0.25 * (A(i-1,j) + A(i,j-1) + A(i,j+1) + A(i+1,j)); j = B.lastCol(); B(i,j) = 0.25 * (A(i-1,j) + A(i,j-1) + A(i,j+1) + A(i+1,j)); } // exchange borders with our neighbors MPI_Request req[8]; for (int dir = 0; dir < 4; ++dir) { MPI_Isend(out_vectors[dir].buf, 1, out_vectors[dir].type, out_neighbor[dir], 0, MPI_COMM_WORLD, &req[dir*2]); MPI_Irecv(in_vectors[dir].buf, 1, in_vectors[dir].type, in_neighbor[dir], 0, MPI_COMM_WORLD, &req[dir*2+1]); } // compute inner region for (int i = B.firstRow() + 1; i < B.lastRow(); ++i) { for (int j = B.firstCol() + 1; j < B.lastCol(); ++j) { B(i,j) = 0.25 * (A(i-1,j) + A(i,j-1) + A(i,j+1) + A(i+1,j)); } } double maxdiff = 0; for (int i = B.firstRow(); i <= B.lastRow(); ++i) { for (int j = B.firstCol(); j <= B.lastCol(); ++j) { double diff = fabs(A(i,j) - B(i,j)); A(i,j) = B(i,j); if (diff > maxdiff) maxdiff = diff; } } // block until initiated communication is finished for (int i = 0; i < 8; ++i) { MPI_Status status; MPI_Wait(&req[i], &status); } // check remaining error MPI_Reduce(&maxdiff, &global_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Bcast(&global_max, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); } while (global_max > eps); // collect results in process 0 Matrix* Result = 0; if (rank == 0) { Result = new Matrix(N, N, 0, 0); assert(Result); for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { initialize_A((*Result)(i,j), i, j, N); } } for (int p = 0; p < nofprocesses; ++p) { int first_row, first_col, nof_rows, nof_cols; get_submatrix(grid, dims, n, p, first_row, first_col, nof_rows, nof_cols); ++first_row; ++first_col; Matrix::View submatrix(Result->engine().view(first_row, first_col, first_row + nof_rows - 1, first_col + nof_cols - 1, first_row, first_col)); if (p == 0) { submatrix = B; } else { MPI_Status status; MPI_Recv( &submatrix(submatrix.firstRow(), submatrix.firstCol()), 1, matrix_type(submatrix), p, 0, MPI_COMM_WORLD, &status); } } } else { MPI_Send(&B(B.firstRow(), B.firstCol()), B.numRows() * B.numCols(), MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); } return Result; } char* cmdname; void usage() { cerr << "Usage: " << cmdname << " [N [eps]] " << endl; MPI_Abort(MPI_COMM_WORLD, 1); } int main(int argc, char** argv) { int N = 10; double eps = 1e-6; MPI_Init(&argc, &argv); int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); int nofprocesses; MPI_Comm_size(MPI_COMM_WORLD, &nofprocesses); if (rank == 0) { cmdname = *argv++; --argc; if (argc > 2) usage(); if (argc > 0) { istringstream arg(*argv++); --argc; if (!(arg >> N) || N < 3 || nofprocesses > (N-2)*(N-2)) usage(); } if (argc > 0) { istringstream arg(*argv++); --argc; if (!(arg >> eps) || eps <= 0) usage(); } } MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&eps, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); Matrix* A = run_jacobi_iteration(rank, nofprocesses, N, eps); if (rank == 0) { cout << N << endl; for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { cout << " " << (*A)(i,j); } cout << endl; } delete A; } MPI_Finalize(); }