На этом шаге мы приведем результаты выполнения последовательного и параллельного алгоритма, а также тексты приложений.
Приведем результаты вычислительных экспериментов для параллельного алгоритма Гаусса-Зейделя при ленточной схеме разбиения данных (таблица 1).
Размер сетки | Последовательный алгоритм | Параллельный алгоритм | |
---|---|---|---|
1 процессор | 4 процессора | ||
10 | 1,2705 | 0,8445 | 0,9808 |
20 | 24,8506 | 0,9442 | 1,0185 |
30 | 133,5828 | 1,0568 | 1,1588 |
40 | 414,2733 | 1,3211 | 1,3392 |
50 | 1018,9573 | 1,5879 | 1,4809 |
60 | 1815,1841 | 1,9773 | 1,6736 |
70 | 3248,9905 | 2,3852 | 1,8963 |
80 | 5346,5328 | 2,9093 | 2,1706 |
90 | 8147,7365 | 3,5306 | 2,4327 |
100 | 12041,8961 | 4,5866 | 2,8534 |
Приведем реализации последовательного и параллельных алгоритмов.
Последовательный алгоритм:
#include "stdafx.h" #include <stdio.h> #include <stdlib.h> #include <conio.h> #include <time.h> #include <math.h> #include <windows.h> void PrintMatrix (double* pMatrix, int RowCount, int ColCount) { int i, j; for (i=0; i<RowCount; i++) { for (j=0; j<ColCount; j++) printf("%7.4f ", pMatrix[i*ColCount+j]); printf("\n"); } } void ResultCalculation(double* pMatrix, int Size, double &Eps, int &Iterations) { int i, j; double dm, dmax,temp; Iterations = 0; do { dmax = 0; for (i = 1; i < Size - 1; i++) for(j = 1; j < Size - 1; j++) { temp = pMatrix[Size * i + j]; pMatrix[Size * i + j] = 0.25 * (pMatrix[Size * i + j + 1] + pMatrix[Size * i + j - 1] + pMatrix[Size * (i + 1) + j] + pMatrix[Size * (i - 1) + j]); dm = fabs(pMatrix[Size * i + j] - temp); if (dmax < dm) dmax = dm; } Iterations++; } while (dmax > Eps); } void ProcessTermination(double* pMatrix) { delete [] pMatrix; } void RandomDataInitialization (double* pMatrix, int Size) { int i, j; srand(unsigned(clock())); double h = 1.0 / (Size - 1); for (i=0; i<Size; i++) { for (j=0; j<Size; j++) if ((i==0) || (i== Size-1) || (j==0) || (j==Size-1)) pMatrix[i*Size+j] = 100; else pMatrix[i*Size+j] = rand()/double(1000); } } void ProcessInitialization (double* &pMatrix, int &Size, double &Eps) { do { printf("\nEnter the grid size of the initial objects: "); scanf("%d", &Size); if (Size <= 2) printf("\nSize of grid must be greater than 2!\n"); } while (Size <= 2); do { printf("\nEnter the required accuracy: "); scanf("%lf", &Eps); printf("\nChosen accuracy = %lf", Eps); if (Eps <= 2) printf("\nAccuracy must be greater than 0!\n"); } while (Eps <= 0); pMatrix = new double [Size*Size]; RandomDataInitialization(pMatrix, Size); } void main() { double* pMatrix; int Size; double Eps; int Iterations; double t1, t2, dt; printf ("Serial Gauss - Seidel algorithm\n"); ProcessInitialization(pMatrix, Size, Eps); printf ("\nInitial Matrix: \n"); PrintMatrix (pMatrix, Size, Size); t1=GetTickCount(); ResultCalculation(pMatrix, Size, Eps, Iterations); t2=GetTickCount(); printf ("\nResult matrix: \n"); PrintMatrix (pMatrix, Size, Size); dt = (t2-t1)*1000; printf("\nTime: %10.5f\n", dt); ProcessTermination(pMatrix); getch(); }
Параллельный алгоритм:
#include "stdafx.h" #include <stdio.h> #include <stdlib.h> #include <conio.h> #include <time.h> #include <math.h> #include <mpi.h> #include <comutil.h> #include <algorithm> using namespace std; static int ProcNum = 0; static int ProcRank = -1; void RandomDataInitialization (double* pMatrix, int Size) { int i, j; srand(unsigned(clock())); for (i=0; i<Size; i++) { for (j=0; j<Size; j++) if ((i==0) || (i== Size-1) || (j==0) || (j==Size-1)) pMatrix[i*Size+j] = 100; else pMatrix[i*Size+j] = rand()/double(1000); } } void DataDistribution(double* pMatrix, double* pProcRows, int RowNum, int Size) { int *pSendNum; int *pSendInd; int RestRows=Size; pSendInd = new int [ProcNum]; pSendNum = new int [ProcNum]; RowNum = (Size-2)/ProcNum+2; pSendNum[0] = RowNum*Size; pSendInd[0] = 0; for (int i=1; i<ProcNum; i++) { RestRows = RestRows - RowNum + 1; RowNum = (RestRows-2)/(ProcNum-i)+2; pSendNum[i] = (RowNum)*Size; pSendInd[i] = pSendInd[i-1]+pSendNum[i-1]-Size; } MPI_Scatterv(pMatrix , pSendNum, pSendInd, MPI_DOUBLE, pProcRows, pSendNum[ProcRank], MPI_DOUBLE, 0, MPI_COMM_WORLD); delete []pSendInd; delete []pSendNum; } void ProcessTermination (double* pMatrix, double* pProcRows) { if (ProcRank == 0) delete [] pMatrix; delete [] pProcRows; } void PrintMatrix(double *pMatrix, int RowCount, int ColCount){ int i,j; for(i=0; i < RowCount; i++) { for(j=0; j < ColCount; j++) printf("%7.4f ", pMatrix[i*ColCount+j]); printf("\n"); } } double IterationCalculation(double* pProcRows, int Size, int RowNum) { int i, j; double dm, dmax,temp; dmax = 0; for (i = 1; i < RowNum-1; i++) for(j = 1; j < Size-1; j++) { temp = pProcRows[Size * i + j]; pProcRows[Size * i + j] = 0.25 * (pProcRows[Size * i + j + 1] + pProcRows[Size * i + j - 1] + pProcRows[Size * (i + 1) + j] + pProcRows[Size * (i - 1) + j]); dm = fabs(pProcRows[Size * i + j] - temp); if (dmax < dm) dmax = dm; } return dmax; } void TestDistribution(double* pMatrix, double* pProcRows, int Size, int RowNum) { if (ProcRank == 0) { printf("\nInitial Matrix: \n"); PrintMatrix(pMatrix, Size, Size); } } void ProcessInitialization (double* &pMatrix, double* &pProcRows, int &Size, int &RowNum, double &Eps) { int RestRows; if (ProcRank == 0) { do { printf("\nEnter the grid size: "); scanf("%d", &Size); if (Size <= 2) { printf("\n Size of grid must be greater than 2! \n"); } if (Size < ProcNum) { printf("Size of the objects must be greater than number of processes! \n "); } if ((Size-2)%ProcNum != 0) { printf("Number of processes must be multiple of size of objects! \n"); } } while ( (Size <= 2) || (Size < ProcNum) || ((Size-2)%ProcNum != 0)); do { printf("\nEnter the required accuracy: "); scanf("%lf", &Eps); if (Eps <= 0) printf("\nAccuracy must be greater than 0!\n"); } while (Eps <= 0); } MPI_Bcast(&Size, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&Eps, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); RestRows = Size; for (int i=0; i<ProcRank; i++) RestRows = RestRows-RestRows/(ProcNum-i); RowNum = (RestRows-2)/(ProcNum-ProcRank)+2; pProcRows = new double [RowNum*Size]; if (ProcRank == 0) { pMatrix = new double [Size*Size]; RandomDataInitialization(pMatrix, Size); } } void ExchangeData(double* pProcRows, int Size, int RowNum) { MPI_Status status; int NextProcNum = (ProcRank == ProcNum - 1)? MPI_PROC_NULL : ProcRank + 1; int PrevProcNum = (ProcRank == 0)? MPI_PROC_NULL : ProcRank - 1; MPI_Sendrecv(pProcRows + Size * (RowNum - 2),Size, MPI_DOUBLE, NextProcNum, 4, pProcRows, Size, MPI_DOUBLE, PrevProcNum, 4, MPI_COMM_WORLD, &status); MPI_Sendrecv(pProcRows + Size, Size, MPI_DOUBLE, PrevProcNum, 5, pProcRows + (RowNum - 1) * Size, Size,MPI_DOUBLE, NextProcNum, 5, MPI_COMM_WORLD, &status); } void ParallelResultCalculation(double *pProcRows, int Size, int RowNum, double Eps, int &Iterations) { double ProcDelta,Delta; Iterations=0; do { Iterations++; ExchangeData(pProcRows, Size,RowNum); ProcDelta = IterationCalculation(pProcRows, Size, RowNum); MPI_Allreduce(&ProcDelta, &Delta, 1,MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);} while (Delta>Eps); } void ResultCollection(double *pMatrix, double* pProcRows, int Size, int RowNum) { int i; int *pReceiveNum; int *pReceiveInd; int RestRows = Size; pReceiveNum = new int [ProcNum]; pReceiveInd = new int [ProcNum]; pReceiveInd[0] = 0; RowNum = (Size-2)/ProcNum+2; pReceiveNum[0] = RowNum*Size; for ( i=1; i < ProcNum; i++){ RestRows = RestRows - RowNum + 1; RowNum = (RestRows-2)/(ProcNum-i)+2; pReceiveNum[i] = RowNum*Size; pReceiveInd[i] = pReceiveInd[i-1]+pReceiveNum[i-1]-Size; } MPI_Allgatherv(pProcRows, pReceiveNum[ProcRank], MPI_DOUBLE, pMatrix, pReceiveNum, pReceiveInd, MPI_DOUBLE, MPI_COMM_WORLD); delete [] pReceiveNum; delete [] pReceiveInd; } void main(int argc, char* argv[]) { double* pMatrix; double* pProcRows; int Size; double Eps; int Iterations; int RowNum; double t1,t2,dt; setvbuf(stdout, 0, _IONBF, 0); MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &ProcNum); MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank); if(ProcRank == 0) { printf("Parallel Gauss - Seidel algorithm \n"); fflush(stdout); } ProcessInitialization (pMatrix,pProcRows,Size,RowNum,Eps); TestDistribution(pMatrix, pProcRows, Size,RowNum); t1 = MPI_Wtime(); DataDistribution(pMatrix, pProcRows, Size,RowNum); ParallelResultCalculation(pProcRows, Size,RowNum,Eps, Iterations); ResultCollection(pMatrix, pProcRows,Size, RowNum); t2 = MPI_Wtime(); printf("\nResult matrix: \n"); if (ProcRank==0) { PrintMatrix(pMatrix,Size,Size); } dt = (t2-t1)*1000; if(ProcRank == 0) printf("\nTime: %10.5f\n", dt); ProcessTermination(pMatrix, pProcRows); MPI_Finalize(); getch(); }
Мы закончили изложение основ разработки параллельных алгоритмов. Надеемся, что материал будет Вам полезен.