线性方程组求解-高斯消元

2009年4月22日阅读(426)

4    5
1.000000    4.000000    -2.000000    3.000000    6.000000
2.000000    2.000000    0.000000    4.000000    2.000000
3.000000    0.000000    -1.000000    2.000000    1.000000
1.000000    2.000000    2.000000    -3.000000    8.000000

一.基本实现

#include <cstdio>
#include <cstdlib>
#include <cstring>
using namespace std;
void environment_init();
typedef double data_t;
void cblas_factor(data_t *vec,int size,data_t factor);
void cblas_sub(data_t *minuend,data_t *subtrahend,int size);
data_t * cblas_max(data_t *vec,int size);
//N行，M列，M = N+1
int N,M;
//X的系数矩阵
data_t * matrix_A;
//y向量
data_t * B;
//x的解向量
data_t * X;
//通过A(i,j)得到，x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*N+j;
}
void print_matrix(){
    printf("matrix:\n");
    for(int i = 0 ; i < N ; i++){
        for(int j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}
int main()
{
    FILE * f = fopen("data.txt","r");
    fscanf(f,"%d%d",&N,&M);
    matrix_A = (data_t *)malloc(sizeof(data_t)*N*N);
    B = (data_t *)malloc(sizeof(data_t)*N);
    X = (data_t *)malloc(sizeof(data_t)*N);
    int *shift_table = (int*)malloc(sizeof(int)*N);

    //get matrix from file
    for(int i = 0 ; i < N ; i++){
        for(int j = 0 ; j < N ; j++){
            fscanf(f,"%lf",A(i,j));
            printf("%lf ",*A(i,j));
        }
        fscanf(f,"%lf",B+i);
        printf("%lf\n",*(B+i));
    }
    //init shift table
    for(int i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
    //computer
    for(int i = 0 ; i < N-1 ; i++){
        //选主元
        int index = cblas_max(A(i,i),N-i)-A(i,0);
        //列交换
        if(index != i){

    //错误！！！
           shift_table[i] = index;
           shift_table[index] = i;

           for(int row = 0 ; row < N ; row++){
               data_t temp = *A(row,i);
               *(A(row,i)) = *A(row,index);
               *A(row,index) = temp;
           }
        }
        //主元归一化
        // 消元

    //错误！！！ *A(row,i)!= 0会让消元提前结束
        for(int row = i+1 ; *A(row,i)!= 0 && row < N ; row++){
            data_t factor = *A(i,i)/ *A(row,i);
            cblas_factor(A(row,i),N-i,factor);
            cblas_sub(A(row,i),A(i,i),N-i);
            *(B+row) *= factor;
            *(B+row) = *(B+row)-*(B+i);
        }
        print_matrix();

    }
    //回代，i代表了Xi,col代表了列号，用来计算前面已经计算的xi值的sum
    for(int i = N-1 ; i >= 0 ; i–){
        data_t sum = 0;
        for(int col = i+1 ; col < N ; col++){
            sum += *A(i,col)*X[col];
        }
        X[i] = (*(B+i) – sum) / *A(i,i);
    }
    //恢复,其中shift table保存了原来的列号
    for(int i = 0 ; i < N ; i++){
        printf("x%d:%lf ",shift_table[i],X[i]);
    }

}

//向量操作，对所有的数*factor
void cblas_factor(data_t *vec,int size,data_t factor){
    for(int i = 0 ; i < size ; i++){
        *(vec+i) = *(vec+i)*factor;
    }
}
//两个向量的减
void cblas_sub(data_t *minuend,data_t *subtrahend,int size){
    for(int i = 0 ; i < size ; i++){
        *(minuend+i) = *(minuend+i) – *(subtrahend+i);
    }

}
//寻找向量中的最大值
data_t * cblas_max(data_t *vec,int size){
    data_t * index = vec;
    data_t max = *vec;
    for(int i = 0 ; i < size ; i++){
        if(*(vec+i) > max){
            max = *(vec+i);
            index = vec+i;
        }
    }
    return index;

}

二.使用blas上的cell实现

blas参考文档：http://www.netlib.org/blas/blast-forum/blas-report.pdf

基本blas实现：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "malloc_align.h"

void environment_init();
typedef double data_t;
int N,M;//N行，M列，M = N+1
data_t * matrix_A;//X的系数矩阵
data_t * B;//y向量
data_t * X;//x的解向量
int *shift_table;

//通过A(i,j)得到，x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
return matrix_A+i*N+j;
}
void print_matrix();

int main()
{
    FILE * f = fopen("data.txt","r");
    int i = 0,j = 0,index = 0,row = 0,temp = 0;

    fscanf(f,"%d%d",&N,&M);
    matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*N,7);
    B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
    X = (data_t *)_malloc_align(sizeof(data_t)*N,7);
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);

    //get matrix from file
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
            fscanf(f,"%lf",A(i,j));
            printf("%lf ",*A(i,j));
        }
        fscanf(f,"%lf",B+i);
        printf("%lf\n",*(B+i));
    }
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
    //computer
    for(i = 0 ; i < N-1 ; i++){
        //选主元
    index = cblas_idamax(N-i,A(i,i),1)+i;//cblas_idamax 选择最大绝对值的blas库
    printf("index:%d\n",index);
        //列交换
        if(index != i){
           temp = shift_table[i];
       shift_table[i] = shift_table[index];
           shift_table[index] = temp;
           for(row = 0 ; row < N ; row++){
               data_t temp = *A(row,i);
               *(A(row,i)) = *A(row,index);
               *A(row,index) = temp;
           }
        }
    for(row = 0 ; row < N ; row++)
        printf("%d ",shift_table[row]);
    printf("\n");
        //主元归一化
        // 消元
        for(row = i+1 ; row < N ; row++){
        if(*A(row,i) == 0) continue;
        data_t factor = -1*(*A(row,i)/ *A(i,i));
        cblas_daxpy(N-i,factor,A(i,i),1,A(row,i),1);//cblas_daxpy( N, alpha,X,incX, Y, incY); Y=Y+alpha*X
        *(B+row) = *(B+row) + *(B+i)*factor;
        }
        print_matrix();

    }
    //回代，i代表了Xi,col代表了列号，用来计算前面已经计算的xi值的sum
    for(i = N-1 ; i >= 0 ; i–){
        data_t sum = 0;
        for( j = i+1 ; j < N ; j++){
            sum += *A(i,j)*X[j];
        }
        X[i] = (*(B+i) – sum) / *A(i,i);
    }
    //恢复,其中shift table保存了原来的列号
    for(i = 0 ; i < N ; i++){
        printf("x%d:%lf ",shift_table[i],X[i]);
    }

}
void print_matrix(){
    int i,j;
    printf("matrix:\n");
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}

三.使用mpi的基本实现

四.使用mpi的cell实现

五.使用mpi的异构实现

六.使用mpi的异构适用性实现

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "mpi.h"
#include "malloc_align.h"

void environment_init();
typedef double data_t;
int N,M;//N行，M列，M = N+1
data_t * matrix_A;//X的系数矩阵
data_t * B;//y向量
data_t * X;//x的解向量
int *shift_table;
MPI_Status status;

//通过A(i,j)得到，x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
return matrix_A+i*M+j;
}
void print_matrix();

int main(int argc,char **argv)
{
    FILE * f;
    int i = 0,j = 0,index = 0,row = 0;
    int group_size;
    int *row_rank_table;
    int process_rownum;
    int myrank;
    data_t * mainrow_space;
    data_t ** row_address_table;

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&group_size);
    MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
    //printf("mpi:%d %d\n",group_size,myrank);

    if(myrank == 0){
       f= fopen("/home/duanple/CBLAS/data.txt","r");
       fscanf(f,"%d%d",&N,&M);
       matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*M,7);
       B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
       X = (data_t *)_malloc_align(sizeof(data_t)*N,7);
       //get matrix from file
       for(i = 0 ; i < N ; i++){
             for(j = 0 ; j < N ; j++){
               fscanf(f,"%lf",A(i,j));
        }
        fscanf(f,"%lf",A(i,j));
   *(B+i) = *A(i,j);
       }
   //print_matrix();
    }
    //broadcast the process row to be in,and rows num in every process
    MPI_Bcast(&N,1,MPI_INT,0,MPI_COMM_WORLD);
    M = N+1;
    //printf("rank:%d m:%d\n",myrank,M);
    //malloc the memory for the row of matrix
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);
    row_rank_table = (int *)_malloc_align(sizeof(int)*N,7);
    row_address_table = (data_t**)_malloc_align(sizeof(data_t*)*N,7);
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }

    //divide matrix
    if(myrank == 0){
   for(i = 0,j = 0 ; i < N ; i++,j++){
        row_rank_table[i] = j%group_size;
   }
    }
    MPI_Bcast(row_rank_table,N,MPI_INT,0,MPI_COMM_WORLD);

//上面的条件，可能造成同一个进程上的其他行不被处理。

           //选主元
       //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");
       index = cblas_idamax(N-i,row_address_table[i]+i,1)+i;
           //列交换
                if(index != i){
                        int temp = shift_table[i];
              shift_table[i] = shift_table[index];
                  shift_table[index] = temp;

           data_t data_temp = *(row_address_table[i]+i);
           *(row_address_table[i]+i) = *(row_address_table[i]+index);
           *(row_address_table[i]+index) = data_temp;
           }
       //拷贝主行元素到临时空间
       //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");
       memcpy(mainrow_space,row_address_table[i],sizeof(data_t)*M);
       //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");
   }
   //其他行的列交换
       MPI_Bcast(&index,1,MPI_INT,row_rank_table[i],MPI_COMM_WORLD);
   if(myrank != row_rank_table[i]){

//上面的条件，可能造成同一个进程上的其他行不被处理。
                if(index != i){
                        int temp = shift_table[i];
              shift_table[i] = shift_table[index];
                  shift_table[index] = temp;
           //swap the other rows
           for(j = 0 ; j < N ; j++){
                if(myrank == row_rank_table[j]){
               data_t data_temp = *(row_address_table[j]+i);
               *(row_address_table[j]+i) = *(row_address_table[j]+index);
               *(row_address_table[j]+index) = data_temp;
                }
           }//end of for
           }

   }

   //发送主行元素
       MPI_Bcast(mainrow_space,M,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
        //主元归一化
   if(myrank == row_rank_table[i]){

//注意，上面这个条件是错的，因为当前rank上还有其他行，这样会导致其他行不被处理。
//if(*(row_address_table[i]+i) == 0){continue;}
//printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",mainrow_space[j]);}printf("\n");

   }else{
        for(j = 0 ; j < N ; j++){
       //处理该进程所分配的行。
       if(j != i && myrank == row_rank_table[j]){
           if(*(row_address_table[j]+i) == 0){continue;}
              // 消元
               data_t factor = -1*(*(row_address_table[j]+i)/mainrow_space[i]);
           cblas_daxpy(N,factor,mainrow_space,1,row_address_table[j],1);
           *(row_address_table[j]+N) = *(row_address_table[j]+N) +mainrow_space[N]*factor;//elem of B
       }

}

}
}

   //

   //printf the revecive result.
   /*
   for(i = 0 ; i < N ; i++){
   if(myrank != row_rank_table[i]) continue;
    printf("rank%d-row%d:\n",myrank,row_rank_table[i]);
   for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
   }
   printf("\n");
    }   */

    //回代
    data_t sum = 0;

//注意一个进程不只一个行，因此只安排一个sum也是不够的。
    for(i = N-1 ; i>= 0 ; i–){
   if(myrank == row_rank_table[i]){
        mainrow_space[i] = (*(row_address_table[i]+N)-sum)/(*(row_address_table[i]+i));//(*(B+i) – sum) / *A(i,i);
   }
       MPI_Bcast(mainrow_space+i,1,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
   for(j = 0 ; j < i ; j++){
   //处理该进程所分配的行。
        if(myrank == row_rank_table[j]){
          sum += *(row_address_table[j]+i)*mainrow_space[i];
        }

   }


    }
    //printf res
    for(i = 0 ; myrank == 0 && i < N ; i++){
        printf("x%d:%lf ",shift_table[i],mainrow_space[i]);
    }

    /*
    //computer
    for(i = 0 ; i < N-1 ; i++){
        //选主元
   index = cblas_idamax(N-i,A(i,i),1)+i;//cblas_idamax 选择最大绝对值的blas库
   //printf("index:%d\n",index);
        //列交换
        if(index != i){
           temp = shift_table[i];
       shift_table[i] = shift_table[index];
           shift_table[index] = temp;
           for(row = 0 ; row < N ; row++){
               data_t temp = *A(row,i);
               *(A(row,i)) = *A(row,index);
               *A(row,index) = temp;
           }
        }
   for(row = 0 ; row < N ; row++)
       printf("%d ",shift_table[row]);
   printf("\n");
        //主元归一化
        // 消元
        for(row = i+1 ; row < N ; row++){
        if(*A(row,i) == 0) continue;
        data_t factor = -1*(*A(row,i)/ *A(i,i));
        cblas_daxpy(N-i,factor,A(i,i),1,A(row,i),1);//cblas_daxpy( N, alpha,X,incX, Y, incY); Y=Y+alpha*X
        *(B+row) = *(B+row) + *(B+i)*factor;
        }
        //print_matrix();

    }
    //回代，i代表了Xi,col代表了列号，用来计算前面已经计算的xi值的sum
    for(i = N-1 ; i >= 0 ; i–){
        data_t sum = 0;
        for( j = i+1 ; j < N ; j++){
            sum += *A(i,j)*X[j];
        }
        X[i] = (*(B+i) – sum) / *A(i,i);
    }
    //恢复,其中shift table保存了原来的列号
    for(i = 0 ; i < N ; i++){
        printf("x%d:%lf ",shift_table[i],X[i]);
    }
               */
    MPI_Finalize();

}

修改后的实现：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "mpi.h"
#include "malloc_align.h"

//通过A(i,j)得到，x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
return matrix_A+i*M+j;
}
void print_matrix();

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&group_size);
    MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
    //printf("mpi:%d %d\n",group_size,myrank);

    if(myrank == 0){
        f= fopen("/home/duanple/CBLAS/data.txt","r");
        fscanf(f,"%d%d",&N,&M);
        matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*M,7);
        B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
        X = (data_t *)_malloc_align(sizeof(data_t)*N,7);
        //get matrix from file
        for(i = 0 ; i < N ; i++){
             for(j = 0 ; j < N ; j++){
                fscanf(f,"%lf",A(i,j));
        }
        fscanf(f,"%lf",A(i,j));//A(i,N) also stores B[i]
    *(B+i) = *A(i,j);
        }
    //print_matrix();
    }
    //broadcast the process row to be in,and rows num in every process
    MPI_Bcast(&N,1,MPI_INT,0,MPI_COMM_WORLD);
    M = N+1;
    //printf("rank:%d m:%d\n",myrank,M);
    //malloc the memory for the row of matrix
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);
    row_rank_table = (int *)_malloc_align(sizeof(int)*N,7);
    row_address_table = (data_t**)_malloc_align(sizeof(data_t*)*N,7);
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }

    //divide matrix
    if(myrank == 0){
    for(i = 0,j = 0 ; i < N ; i++,j++){
        row_rank_table[i] = j%group_size;
    }
    }
    MPI_Bcast(row_rank_table,N,MPI_INT,0,MPI_COMM_WORLD);

    process_rownum = 0;
    for(i = 0 ; i < N ; i++){
    //printf("rank:%d row%d\n",myrank,row_rank_table[i]);
    if(row_rank_table[i] == myrank){
        row_address_table[i] = (data_t *)_malloc_align(sizeof(data_t)*M,7);
        process_rownum++;
    }
    }
    //
    //printf("rank:%d rows:%d\n",myrank,process_rownum);
    //root send the data to the processes.
    if(myrank == 0){//send
        for(i = 0 ; i < N ; i++){
       if(row_rank_table[i] == 0){
        row_address_table[i] = A(i,0);
       }else{
            MPI_Send(A(i,0),N,MPI_DOUBLE,row_rank_table[i],i,MPI_COMM_WORLD);
            MPI_Send(B+i,1,MPI_DOUBLE,row_rank_table[i],i,MPI_COMM_WORLD);
       }
        }
    }else{//receive
        for(i = 0 ; i < N ; i++){
       if(myrank !=0 && myrank == row_rank_table[i]){
           MPI_Recv(row_address_table[i],N,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);
           MPI_Recv(row_address_table[i]+N,1,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);
       }
        }
    }

   //printf the revecive result.
   /*
   for(i = 0 ; i < N ; i++){
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }    */
    //computer
    //消元
    mainrow_space = (data_t *)_malloc_align(sizeof(data_t)*M,7);//存放主行的临时空间
    for(i = 0 ; i < N-1 ; i++){
    if(myrank == row_rank_table[i]){
            //选主元
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");
        index = cblas_idamax(N-i,row_address_table[i]+i,1)+i;
            //列交换
                if(index != i){
                        int temp = shift_table[i];
               shift_table[i] = shift_table[index];
                   shift_table[index] = temp;

            data_t data_temp = *(row_address_table[i]+i);
            *(row_address_table[i]+i) = *(row_address_table[i]+index);
            *(row_address_table[i]+index) = data_temp;
            }
        //拷贝主行元素到临时空间
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");
        memcpy(mainrow_space,row_address_table[i],sizeof(data_t)*M);
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");
    }
    //其他行的列交换
        MPI_Bcast(&index,1,MPI_INT,row_rank_table[i],MPI_COMM_WORLD);
    //修改列交换标志，以进程为单位，不是以行
    if(myrank != row_rank_table[i]){
                  int temp = shift_table[i];
           shift_table[i] = shift_table[index];
               shift_table[index] = temp;
    }
    for(j = 0 ; j < N ; j++){
        if(j != i && myrank == row_rank_table[j]){
            data_t data_temp = *(row_address_table[j]+i);
            *(row_address_table[j]+i) = *(row_address_table[j]+index);
            *(row_address_table[j]+index) = data_temp;
        }
    }

    //发送主行元素
        MPI_Bcast(mainrow_space,M,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
    //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",mainrow_space[j]);}printf("\n");
        //主元归一化
    for(j = i+1 ; j < N ; j++){
    //处理该进程所分配的行。
        if(myrank == row_rank_table[j]){
            if(*(row_address_table[j]+i) == 0){continue;}
               // 消元
            data_t factor = -1*(*(row_address_table[j]+i)/mainrow_space[i]);
            cblas_daxpy(N,factor,mainrow_space,1,row_address_table[j],1);
            *(row_address_table[j]+N) = *(row_address_table[j]+N) +mainrow_space[N]*factor;//elem of B
        }

}

   //

   //printf the result.
   /*
   for(i = 0 ; i < N ; i++){
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }
    */
    //回代
    for(i = N-1 ; i>= 0 ; i–){
    if(myrank == row_rank_table[i]){
        mainrow_space[i] = *(row_address_table[i]+N)/(*(row_address_table[i]+i));//(*(B+i) – sum) / *A(i,i);
    }
        MPI_Bcast(mainrow_space+i,1,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
    for(j = 0 ; j < i ; j++){
    //处理该进程所分配的行。
        if(myrank == row_rank_table[j]){
          *(row_address_table[j]+N) -= *(row_address_table[j]+i)*mainrow_space[i];
        }

    }


    }
    //printf res

    for(i = 0 ; myrank == 0 && i < N ; i++){
        printf("x%d:%lf ",shift_table[i],mainrow_space[i]);
    }
    MPI_Finalize();

}
void print_matrix(){
    int i,j;
    printf("matrix:\n");
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}

#############################################################################

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "mpi.h"
#include "malloc_align.h"

//通过A(i,j)得到，x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
return matrix_A+i*M+j;
}
void print_matrix();
double * processor_test(MPI_Comm comm,void test(),long loops,double comp_size,int master_node);
void data_malloc(double * processor_descrip,int * rank_table,int matrix_size);
void linear_solver(MPI_Comm comm);

void test(){
    double a = 1.0;
    double b = 2.44;
    double c = a*b;

}
int main(int argc,char **argv)
{

    MPI_Init(&argc,&argv);
    processor_test(MPI_COMM_WORLD,test,1000,1.0,0);
    //linear_solver(MPI_COMM_WORLD);
    MPI_Finalize();

}

//test the computer ability of processors
double * processor_test(MPI_Comm comm,void test(),long loops,double comp_size,int master_node){
    int myrank;
    int group_size,i;
    double start_time,end_time,time;
    double * processor_able;
    MPI_Comm_size(comm,&group_size);
    MPI_Comm_rank(comm,&myrank);
    if(myrank == master_node){
        processor_able = (double *)_malloc_align(sizeof(double)*group_size+2,7);
        processor_able[0] = group_size;
        processor_able[1] = comp_size*loops;
    } else
        processor_able = NULL;

    start_time = MPI_Wtime();
    for(i = 0; i < loops ; i++)
        test();
    end_time = MPI_Wtime();
    time = end_time-start_time;
    printf("rank%d %lf\n",myrank,time);

    MPI_Gather(&time,1,MPI_DOUBLE,processor_able+2,1,MPI_DOUBLE,master_node,comm);

    //print the test result.
    if(myrank == master_node){
        for(i = 0 ; i < group_size ; i++){
            printf("%d %lf ",i,processor_able[i+2]);
        }

}

return processor_able;

}
void data_malloc(double * processor_descrip,int * rank_table,int matrix_size){
int p_num =(int)processor_descrip[0];

}
void linear_solver(MPI_Comm comm){

    FILE * f;
    int i = 0,j = 0,index = 0,row = 0;
    int group_size;
    int *row_rank_table;
    int process_rownum;
    int myrank;
    data_t * mainrow_space;
    data_t ** row_address_table;

    MPI_Comm_size(comm,&group_size);
    MPI_Comm_rank(comm,&myrank);
    //printf("mpi:%d %d\n",group_size,myrank);

    if(myrank == 0){
        f= fopen("/home/duanple/CBLAS/data.txt","r");
        fscanf(f,"%d%d",&N,&M);
        matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*M,7);
        B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
        X = (data_t *)_malloc_align(sizeof(data_t)*N,7);
        //get matrix from file
        for(i = 0 ; i < N ; i++){
             for(j = 0 ; j < N ; j++){
                fscanf(f,"%lf",A(i,j));
        }
        fscanf(f,"%lf",A(i,j));//A(i,N) also stores B[i]
    *(B+i) = *A(i,j);
        }
    //print_matrix();
    }
    //broadcast the process row to be in,and rows num in every process
    MPI_Bcast(&N,1,MPI_INT,0,comm);
    M = N+1;
    //printf("rank:%d m:%d\n",myrank,M);
    //malloc the memory for the row of matrix
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);
    row_rank_table = (int *)_malloc_align(sizeof(int)*N,7);
    row_address_table = (data_t**)_malloc_align(sizeof(data_t*)*N,7);
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }

    //divide matrix
    if(myrank == 0){
    for(i = 0,j = 0 ; i < N ; i++,j++){
        row_rank_table[i] = j%group_size;
    }
    }
    MPI_Bcast(row_rank_table,N,MPI_INT,0,comm);

高性能计算

线性方程组求解-高斯消元

You Might Also Like

在Ubuntu 8.10上安装MPICH2并行计算环境(zz)

多机mpi并行配置

cuda安装 nvidia.ko错误

duanple

c++静态对象的初始化

cell下blas库的使用－程序编译及模拟运行