На этом шаге мы приведем результаты выполнения последовательного и параллельного алгоритма, а также тексты приложений.
Приведем результаты вычислительных экспериментов, проведенных для оценки времени выполнения рассмотренного параллельного алгоритма Фокса умножения матриц (таблица 1).
Размер матрицы | Последовательный алгоритм | Параллельный алгоритм | |
---|---|---|---|
1 процессор | 4 процессора | ||
10 | 0,0427 | 0,1433 | 0,1076 |
20 | 0,3137 | 0,4715 | 0,3075 |
30 | 1,0805 | 1,2658 | 0,8514 |
40 | 2,5363 | 3,1452 | 1,8859 |
50 | 4,8992 | 5,5382 | 3,4271 |
60 | 8,8941 | 9,4643 | 5,8842 |
70 | 14,372 | 15,6952 | 9,0321 |
80 | 21,687 | 22,4523 | 13,3906 |
90 | 33,618 | 32,6518 | 18,8856 |
100 | 44,388 | 48,1497 | 26,6772 |
Приведем реализации последовательного и параллельных алгоритмов.
Последовательный алгоритм:
#include "stdafx.h" #include <stdio.h> #include <stdlib.h> #include <conio.h> #include <time.h> #include <windows.h> void RandomDataInitialization (double* pAMatrix, double* pBMatrix, int Size) { int i, j; srand(unsigned(clock())); for (i=0; i<Size; i++) for (j=0; j<Size; j++) { pAMatrix[i*Size+j] = rand()/double(1000); pBMatrix[i*Size+j] = rand()/double(1000); } } void ProcessInitialization (double* &pAMatrix, double* &pBMatrix, double* &pCMatrix, int &Size) { printf("\nEnter size: "); scanf("%d", &Size); pAMatrix = new double [Size*Size]; pBMatrix = new double [Size*Size]; pCMatrix = new double [Size*Size]; RandomDataInitialization(pAMatrix, pBMatrix, Size); for (int i=0; i<Size*Size; i++) { pCMatrix[i] = 0; } } void PrintMatrix (double* pMatrix, int RowCount, int ColCount) { int i, j; for (i=0; i<RowCount; i++) { for (j=0; j<ColCount; j++) printf("%7.4f ", pMatrix[i*RowCount+j]); printf("\n"); } } void SerialResultCalculation(double* pAMatrix, double* pBMatrix, double* pCMatrix, int Size) { int i, j, k; for (i=0; i<Size; i++) { for (j=0; j<Size; j++) for (k=0; k<Size; k++) pCMatrix[i*Size+j] += pAMatrix[i*Size+k]*pBMatrix[k*Size+j]; } } void ProcessTermination (double* pAMatrix, double* pBMatrix, double* pCMatrix) { delete [] pAMatrix; delete [] pBMatrix; delete [] pCMatrix; } void main() { double* pAMatrix; double* pBMatrix; double* pCMatrix; int Size; double start, finish, duration; printf("Serial matrix multiplication program\n"); ProcessInitialization(pAMatrix, pBMatrix, pCMatrix, Size); printf ("\n Matrix A: \n"); PrintMatrix(pAMatrix, Size, Size); printf("\n Matrix B: \n"); PrintMatrix(pBMatrix, Size, Size); start = GetTickCount(); SerialResultCalculation(pAMatrix, pBMatrix, pCMatrix, Size); finish = GetTickCount(); duration = (finish-start)*1000; printf ("\n Result Matrix: \n"); PrintMatrix(pCMatrix, Size, Size); printf("\n Time: %10.5f\n", duration); ProcessTermination(pAMatrix, pBMatrix, pCMatrix); getch(); }
Параллельный алгоритм:
#include "stdafx.h" #include <stdio.h> #include <stdlib.h> #include <time.h> #include <conio.h> #include <math.h> #include <mpi.h> int ProcNum = 0; int ProcRank = 0; int GridSize; int GridCoords[2]; MPI_Comm GridComm; MPI_Comm ColComm; MPI_Comm RowComm; void RandomDataInitialization (double* pAMatrix, double* pBMatrix, int Size) { int i, j; srand(unsigned(clock())); for (i=0; i<Size; i++) for (j=0; j<Size; j++) { pAMatrix[i*Size+j] = rand()/double(1000); pBMatrix[i*Size+j] = rand()/double(1000); } } void PrintMatrix (double* pMatrix, int RowCount, int ColCount) { int i, j; for (i=0; i<RowCount; i++) { for (j=0; j<ColCount; j++) printf("%7.4f ", pMatrix[i*ColCount+j]); printf("\n"); } } void PrintTime(double x1, double x2) { printf("\nTime: "); printf("%15.10f ",(x2-x1)*1000); } void SerialResultCalculation(double* pAMatrix, double* pBMatrix, double* pCMatrix, int Size) { int i, j, k; for (i=0; i<Size; i++) { for (j=0; j<Size; j++) for (k=0; k<Size; k++) pCMatrix[i*Size+j] += pAMatrix[i*Size+k]*pBMatrix[k*Size+j]; } } void BlockMultiplication(double* pAblock, double* pBblock, double* pCblock, int Size) { SerialResultCalculation(pAblock, pBblock, pCblock, Size); } void CreateGridCommunicators() { int DimSize[2]; int Periodic[2]; int Subdims[2]; DimSize[0] = GridSize; DimSize[1] = GridSize; Periodic[0] = 0; Periodic[1] = 0; MPI_Cart_create(MPI_COMM_WORLD, 2, DimSize, Periodic, 1, &GridComm); MPI_Cart_coords(GridComm, ProcRank, 2, GridCoords); Subdims[0] = 0; Subdims[1] = 1; MPI_Cart_sub(GridComm, Subdims, &RowComm); Subdims[0] = 1; Subdims[1] = 0; MPI_Cart_sub(GridComm, Subdims, &ColComm); } void ProcessInitialization (double* &pAMatrix, double* &pBMatrix, double* &pCMatrix, double* &pAblock, double* &pBblock, double* &pCblock, double* &pTemporaryAblock, int &Size, int &BlockSize ) { if (ProcRank == 0) { do { printf("\nEnter size of the initial objects: "); scanf("%d", &Size); if (Size%GridSize != 0) { printf ("Size of matricies must be divisible by the grid size! \n"); } } while (Size%GridSize != 0); } MPI_Bcast(&Size, 1, MPI_INT, 0, MPI_COMM_WORLD); BlockSize = Size/GridSize; pAblock = new double [BlockSize*BlockSize]; pBblock = new double [BlockSize*BlockSize]; pCblock = new double [BlockSize*BlockSize]; pTemporaryAblock = new double [BlockSize*BlockSize]; for (int i=0; i<BlockSize*BlockSize; i++) { pCblock[i] = 0; } if (ProcRank == 0) { pAMatrix = new double [Size*Size]; pBMatrix = new double [Size*Size]; pCMatrix = new double [Size*Size]; RandomDataInitialization(pAMatrix, pBMatrix, Size); } } void CheckerboardMatrixScatter(double* pMatrix, double* pMatrixBlock, int Size, int BlockSize) { double * MatrixRow = new double [BlockSize*Size]; if (GridCoords[1] == 0) { MPI_Scatter(pMatrix, BlockSize*Size, MPI_DOUBLE, MatrixRow, BlockSize*Size, MPI_DOUBLE, 0, ColComm); } for (int i=0; i<BlockSize; i++) { MPI_Scatter(&MatrixRow[i*Size], BlockSize, MPI_DOUBLE, &(pMatrixBlock[i*BlockSize]), BlockSize, MPI_DOUBLE, 0, RowComm); } delete [] MatrixRow; } void DataDistribution(double* pAMatrix, double* pBMatrix, double* pMatrixAblock, double* pBblock, int Size, int BlockSize) { CheckerboardMatrixScatter(pAMatrix, pMatrixAblock, Size, BlockSize); CheckerboardMatrixScatter(pBMatrix, pBblock, Size, BlockSize); } void ResultCollection (double* pCMatrix, double* pCblock, int Size, int BlockSize) { double * pResultRow = new double [Size*BlockSize]; for (int i=0; i<BlockSize; i++) { MPI_Gather( &pCblock[i*BlockSize], BlockSize, MPI_DOUBLE, &pResultRow[i*Size], BlockSize, MPI_DOUBLE, 0, RowComm); } if (GridCoords[1] == 0) { MPI_Gather(pResultRow, BlockSize*Size, MPI_DOUBLE, pCMatrix, BlockSize*Size, MPI_DOUBLE, 0, ColComm); } delete [] pResultRow; } void ABlockCommunication (int iter, double *pAblock, double* pMatrixAblock, int BlockSize) { int Pivot = (GridCoords[0] + iter) % GridSize; if (GridCoords[1] == Pivot) { for (int i=0; i<BlockSize*BlockSize; i++) pAblock[i] = pMatrixAblock[i]; } MPI_Bcast(pAblock, BlockSize*BlockSize, MPI_DOUBLE, Pivot, RowComm); } void BblockCommunication (double *pBblock, int BlockSize) { MPI_Status Status; int NextProc = GridCoords[0] + 1; if ( GridCoords[0] == GridSize-1 ) NextProc = 0; int PrevProc = GridCoords[0] - 1; if ( GridCoords[0] == 0 ) PrevProc = GridSize-1; MPI_Sendrecv_replace( pBblock, BlockSize*BlockSize, MPI_DOUBLE, NextProc, 0, PrevProc, 0, ColComm, &Status); } void ParallelResultCalculation(double* pAblock, double* pMatrixAblock, double* pBblock, double* pCblock, int BlockSize) { for (int iter = 0; iter < GridSize; iter ++) { ABlockCommunication (iter, pAblock, pMatrixAblock, BlockSize); BlockMultiplication(pAblock, pBblock, pCblock, BlockSize); BblockCommunication(pBblock, BlockSize); } } void TestBlocks (double* pBlock, int BlockSize, char str[]) { MPI_Barrier(MPI_COMM_WORLD); if (ProcRank == 0) { printf("%s \n", str); } for (int i=0; i<ProcNum; i++) { if (ProcRank == i) { printf ("ProcRank = %d \n", ProcRank); PrintMatrix(pBlock, BlockSize, BlockSize); } MPI_Barrier(MPI_COMM_WORLD); } } void TestDistribution(double* pAMatrix, double* pBMatrix,double* pCMatrix, int Size, double t1, double t2) { if (ProcRank == 0) { printf("\nMatrix A: \n"); PrintMatrix(pAMatrix, Size, Size); printf("\nMatrix B: \n"); PrintMatrix(pBMatrix, Size, Size); printf("\n\nResult Matrix: \n"); PrintMatrix(pCMatrix, Size, Size); PrintTime(t1,t2); } } void ProcessTermination (double* pAMatrix, double* pBMatrix, double* pCMatrix, double* pAblock, double* pBblock, double* pCblock, double* pMatrixAblock) { if (ProcRank == 0) { delete [] pAMatrix; delete [] pBMatrix; delete [] pCMatrix; } delete [] pAblock; delete [] pBblock; delete [] pCblock; delete [] pMatrixAblock; } void main(int argc, char* argv[]) { double* pAMatrix; double* pBMatrix; double* pCMatrix; int Size; int BlockSize; double *pAblock; double *pBblock; double *pCblock; double *pMatrixAblock; double t1, t2; setvbuf(stdout, 0, _IONBF, 0); MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &ProcNum); MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank); GridSize = sqrt((double)ProcNum); if (ProcNum != GridSize*GridSize) { if (ProcRank == 0) { printf ("Number of processes must be a perfect square \n"); } } else { if (ProcRank == 0) printf("Parallel matrix multiplication program\n"); CreateGridCommunicators(); ProcessInitialization ( pAMatrix, pBMatrix, pCMatrix, pAblock, pBblock, pCblock, pMatrixAblock, Size, BlockSize ); t1 = MPI_Wtime(); DataDistribution(pAMatrix, pBMatrix, pMatrixAblock, pBblock, Size, BlockSize); ParallelResultCalculation(pAblock, pMatrixAblock, pBblock, pCblock, BlockSize); ResultCollection(pCMatrix, pCblock, Size, BlockSize); t2 = MPI_Wtime(); TestDistribution(pAMatrix, pBMatrix, pCMatrix, Size, t1, t2); ProcessTermination (pAMatrix, pBMatrix, pCMatrix, pAblock, pBblock, pCblock, pMatrixAblock); } MPI_Finalize(); getch(); }
На следующем шаге мы рассмотрим параллельные алгоритмы решения систем линейных уравнений.