矩阵乘法非常慢C++

0sgqnhkj  于 2022-11-27  发布在  其他
关注(0)|答案(2)|浏览(261)

我不知道为什么,但是我的矩阵乘法非常慢,我需要优化它。而且矩阵(1000X1000)的打印也需要很长时间。该函数的目的是计算矩阵指数,但我的主要问题是,这两个操作对于像1000X1000这样的大型矩阵来说非常慢。
这两个动作分别在poweMat()函数和printeResult()函数中实现,代码如下:

#define M 1000
    #define e 2.71828182845904523536;
    
    //declaration of the functions
    void sumMatrices(vector<vector<double> >& mat1, vector<vector<double> >& mat2);
    void printResult(vector<vector<double> >&matRes);
    void mulMatWithFactorial(long factorialValue);
    long factorialCalculate(int n);
    void initializeMatrix();
    void initializeIdenticalMatrix();
    void checkIfTheMatrixIsDiagonal();
    void calculateExpoMatrixWithDiagonalMatrix();
    void readMatrixFromFile();
    void powerMat(vector<vector<double> >& mat, int powNum);
    
    //declaration of the variables
    vector<vector<double>> inputMatrix(M, vector<double>(M));
    vector<vector<double>> sumMatrixResult(M, vector<double>(M));
    vector<vector<double>> powerMatrixResult(M, vector<double>(M));
    vector<vector<double>> mulFactorialMatrixResult(M, vector<double>(M));
    vector<vector<double>> finalMatrixResult(M, vector<double>(M));
    vector<vector<double>> identicalMatrix(M, vector<double>(M));
    vector<vector<vector<double>>> listOfMatrices;
    bool matrixIsNilpotent = false;
    int diagonaMatrixlFlag = 1;
    
    int main() {
        //variables
        long factorialValue;
        
    initializeIdenticalMatrix();
    readMatrixFromFile();

    //check if the matrix is diagonal - so we will have easier and faster compute
    checkIfTheMatrixIsDiagonal();
    if (diagonaMatrixlFlag == 1) {
        calculateExpoMatrixWithDiagonalMatrix();
        goto endOfLoop;
    }
    
    //loop for taylor series
    for (int i = 0; i < 5; i++) {
        if (i == 0) { // first we add identical matrix when the power is 0
            sumMatrices(finalMatrixResult, identicalMatrix); // summarize between this 2 matrices
            finalMatrixResult = sumMatrixResult; //copy matrices
        }
        
        if (i == 1) { // we add the matrix itself because the power is 1
            sumMatrices(finalMatrixResult, inputMatrix);
            finalMatrixResult = sumMatrixResult; //copy matrices
        }
        if (i > 1 ) {
            powerMat(inputMatrix, i);
            if (matrixIsNilpotent) { // it means that A^i is 0 for some integer, so the series terminates after a finite number
                goto endOfLoop;
            }
            factorialValue = factorialCalculate(i); // calculate the factorial of i
            mulMatWithFactorial(factorialValue); // multiply (1/i) * matrix^i - like in the algorithm
            sumMatrices(finalMatrixResult, mulFactorialMatrixResult); // summarize it with the previous result
            finalMatrixResult = sumMatrixResult; //copy matrices
        }
    }
    
    endOfLoop:
    printResult(finalMatrixResult); // print the final result - e^M
    return 0;
}
    
//Summarize matrices
void sumMatrices(vector<vector<double> >& mat1, vector<vector<double> >& mat2) {
    for (int i = 0; i < M; i++)
        for (int j = 0; j < M; j++)
            sumMatrixResult[i][j] = mat1[i][j] + mat2[i][j];
}

//Print matrix
void printResult(vector<vector<double> >& matRes) {
    for (int i = 0; i < M; i++) {
        for (int j = 0; j < M; j++) {
            printf("%f ", matRes[i][j]);
            if (j == M - 1) {
                printf("\n");
            }
        }
    }
}

//Calculate the factorial of n
long factorialCalculate(int n) {
    long factorial = 1.0;
    for (int i = 1; i <= n; ++i) {
        factorial *= i;
    }
    return factorial;
}

// mutiply the matrix with scalar
void mulMatWithFactorial(long factorialValue) {
    for (int i = 0; i < M; i++) {
        for (int j = 0; j < M; j++) {
            mulFactorialMatrixResult[i][j] = powerMatrixResult[i][j] * 1/factorialValue;
        }
    }
}

//initialize matrix
void initializeMatrix() {
    for (int i = 0; i < M; i++) {
        for (int j = 0; j < M; j++) {
            powerMatrixResult[i][j] = 0;
        }
    }
}

void checkIfTheMatrixIsDiagonal() {
    for (int i = 0; i < M; i++) {
        for (int j = 0; j < M; j++) {
            if (i == j)
            {
                if (inputMatrix[i][j] == 0) {
                    diagonaMatrixlFlag = 0;
                    goto endOfLoop;
                }

            }
            else
            {
                if (inputMatrix[i][j] != 0) {
                    diagonaMatrixlFlag = 0;
                    goto endOfLoop;
                }

            }
        }
    }
    endOfLoop:
    return;
}

void calculateExpoMatrixWithDiagonalMatrix() {
    for (int i = 0; i < M; i++) {
        for (int j = 0; j < M; j++) {
            if (i == j)
            {
                for (int k = 0; k < inputMatrix[i][j]; ++k)// loop to calculate the pow of e^alpha
                {
                    finalMatrixResult[i][j] *= e;
                }
            }
            else
            {
                finalMatrixResult[i][j] = 0;
            }
        }
    }
}

void readMatrixFromFile() {
    ifstream f("inv_matrix(1000x1000).txt");
    for (int i = 0; i < M; i++)
        for (int j = 0; j < M; j++) {
            f >> inputMatrix[i][j];
            if (f.peek() == ',')
                f.ignore();
        }
    listOfMatrices.push_back(inputMatrix);
}

void initializeIdenticalMatrix() {
    for (int i = 0; i < M; i++) {
        for (int k = 0; k < M; k++) {
            if (i == k) {
                identicalMatrix[i][k] = 1;
            }
            else {
                identicalMatrix[i][k] = 0;
            }
        }
    }
}

void powerMat(vector<vector<double> >& mat, int powNum) {
    int counterForNilpotent = 0;

    initializeMatrix();
    auto start = high_resolution_clock::now();

    for (int i = 0; i < M; i++) {
        for (int k = 0; k < M; k++) {
            for (int j = 0; j < M; j++) {
                powerMatrixResult[i][j] += mat[i][k] * listOfMatrices[powNum-2][k][j];
            }
        }
    }
    auto stop = high_resolution_clock::now();
    auto duration = duration_cast<seconds>(stop - start);
    cout << duration.count() << " seconds" << endl; // checking run time

    listOfMatrices.push_back(powerMatrixResult);

    // check if after we we did A^i , the matrix is equal to 0
    for (int i = 0; i < M; i++) {
        for (int j = 0; j < M; j++) {
            if (powerMatrixResult[i][j] == 0) {
                counterForNilpotent++;
            }
        }
    }

    if (counterForNilpotent == M * M) {
        matrixIsNilpotent = true;
    }
}
w8f9ii69

w8f9ii691#

遍历n个数组中的每个元素的计算效率为O(n^2),这意味着对于大型数组来说,这需要一段时间,但不会是“宇宙生命周期”的时间长度。
通常要对这样的大规模数组进行运算,首先要对它们进行某种形式的约简,这样计算就可以更接近O(n),或者更好地使用矩阵约简形式的一些真理。

因此,更快的矩阵乘法实现将从两个矩阵上的某个rref()函数开始,然后仅计算那些在列和行中具有对象的矩阵部分。

这里有一些很好的地方复习/学习(免费)线性代数:
“3b 1b(2016年):线性代数的本质”= https://www.youtube.com/watch?v=kjBOesZCoqc&list=PL0-GT3co4r2y2YErbmuJw2L5tW4Ew2O5B
“麻省理工学院开放式课程(2009):线性代数”= https://www.youtube.com/watch?v=ZK3O402wf1c&list=PL49CF3715CB9EF31D&index=1

yks3o0rb

yks3o0rb2#

使用SSE2。它不是一个库。它是一个使用cpu矢量硬件的方法。
将操作设置为并行运行。
https://en.wikipedia.org/wiki/SSE2

相关问题