高性能计算

线性方程组求解-高斯消元

2009年4月22日 阅读(421)

4    5
1.000000    4.000000    -2.000000    3.000000    6.000000
2.000000    2.000000    0.000000    4.000000    2.000000
3.000000    0.000000    -1.000000    2.000000    1.000000
1.000000    2.000000    2.000000    -3.000000    8.000000

一.基本实现

#include <cstdio>
#include <cstdlib>
#include <cstring>
using namespace std;
void environment_init();
typedef double data_t;
void cblas_factor(data_t *vec,int size,data_t factor);
void cblas_sub(data_t *minuend,data_t *subtrahend,int size);
data_t * cblas_max(data_t *vec,int size);
//N行,M列,M = N+1
int N,M;
//X的系数矩阵
data_t * matrix_A;
//y向量
data_t * B;
//x的解向量
data_t * X;
//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*N+j;
}
void print_matrix(){
    printf("matrix:\n");
    for(int i = 0 ; i < N ; i++){
        for(int j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}
int main()
{
    FILE * f = fopen("data.txt","r");
    fscanf(f,"%d%d",&N,&M);
    matrix_A = (data_t *)malloc(sizeof(data_t)*N*N);
    B = (data_t *)malloc(sizeof(data_t)*N);
    X = (data_t *)malloc(sizeof(data_t)*N);
    int *shift_table = (int*)malloc(sizeof(int)*N);
   
    //get matrix from file
    for(int i = 0 ; i < N ; i++){
        for(int j = 0 ; j < N ; j++){
            fscanf(f,"%lf",A(i,j));   
            printf("%lf ",*A(i,j));
        }
        fscanf(f,"%lf",B+i);
        printf("%lf\n",*(B+i));
    }     
    //init shift table
    for(int i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
    //computer
    for(int i = 0 ; i < N-1 ; i++){
        //选主元
        int index = cblas_max(A(i,i),N-i)-A(i,0);
        //列交换
        if(index != i){

    //错误!!!
           shift_table[i] = index;    
           shift_table[index] = i;

           for(int row = 0 ; row < N ; row++){
               data_t temp = *A(row,i);   
               *(A(row,i)) = *A(row,index);  
               *A(row,index) = temp;
           }
        }
        //主元归一化
        // 消元 

    //错误!!! *A(row,i)!= 0会让消元提前结束
        for(int row = i+1 ; *A(row,i)!= 0 && row < N ; row++){
            data_t factor = *A(i,i)/ *A(row,i);   
            cblas_factor(A(row,i),N-i,factor);
            cblas_sub(A(row,i),A(i,i),N-i);
            *(B+row) *= factor;
            *(B+row) = *(B+row)-*(B+i);   
        }
        print_matrix();
       
    }
    //回代,i代表了Xi,col代表了列号,用来计算前面已经计算的xi值的sum
    for(int i = N-1 ; i >= 0 ; i–){
        data_t sum = 0;
        for(int col = i+1 ; col < N ; col++){
            sum += *A(i,col)*X[col];   
        }
        X[i] = (*(B+i) – sum) / *A(i,i);
    }
    //恢复,其中shift table保存了原来的列号
    for(int i = 0 ; i < N ; i++){
        printf("x%d:%lf ",shift_table[i],X[i]);
    }
   
               

 
}

//向量操作,对所有的数*factor
void cblas_factor(data_t *vec,int size,data_t factor){
    for(int i = 0 ; i < size ; i++){
        *(vec+i) = *(vec+i)*factor;
    }
}
//两个向量的减
void cblas_sub(data_t *minuend,data_t *subtrahend,int size){
    for(int i = 0 ; i < size ; i++){
        *(minuend+i) = *(minuend+i) – *(subtrahend+i);
    }

}
//寻找向量中的最大值
data_t * cblas_max(data_t *vec,int size){
    data_t * index = vec;
    data_t max = *vec;
    for(int i = 0 ; i < size ; i++){
        if(*(vec+i) > max){
            max =  *(vec+i);
            index = vec+i;  
        }
    }
    return index;

}

二.使用blas上的cell实现

 blas参考文档:http://www.netlib.org/blas/blast-forum/blas-report.pdf

基本blas实现:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "malloc_align.h"

void environment_init();
typedef double data_t;
int N,M;//N行,M列,M = N+1
data_t * matrix_A;//X的系数矩阵
data_t * B;//y向量
data_t * X;//x的解向量
int *shift_table;

//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*N+j;
}
void print_matrix();

int main()
{
    FILE * f = fopen("data.txt","r");
    int i = 0,j = 0,index = 0,row = 0,temp = 0;
   
    fscanf(f,"%d%d",&N,&M);
    matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*N,7);
    B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
    X = (data_t *)_malloc_align(sizeof(data_t)*N,7);
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);
  
    //get matrix from file
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
            fscanf(f,"%lf",A(i,j));  
            printf("%lf ",*A(i,j));
        }
        fscanf(f,"%lf",B+i);
        printf("%lf\n",*(B+i));
    }    
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
    //computer
    for(i = 0 ; i < N-1 ; i++){
        //选主元
    index = cblas_idamax(N-i,A(i,i),1)+i;//cblas_idamax 选择最大绝对值的blas库
    printf("index:%d\n",index);
        //列交换
        if(index != i){
           temp = shift_table[i]; 
       shift_table[i] = shift_table[index];     
           shift_table[index] = temp;
           for(row = 0 ; row < N ; row++){
               data_t temp = *A(row,i);  
               *(A(row,i)) = *A(row,index); 
               *A(row,index) = temp;
           }
        }
    for(row = 0 ; row < N ; row++)
        printf("%d ",shift_table[row]);
    printf("\n");
        //主元归一化
        // 消元
        for(row = i+1 ; row < N ; row++){
        if(*A(row,i) == 0) continue;   
        data_t factor = -1*(*A(row,i)/ *A(i,i));
        cblas_daxpy(N-i,factor,A(i,i),1,A(row,i),1);//cblas_daxpy( N, alpha,X,incX, Y, incY); Y=Y+alpha*X
        *(B+row) = *(B+row) + *(B+i)*factor;   
        }
        print_matrix();
      
    }
    //回代,i代表了Xi,col代表了列号,用来计算前面已经计算的xi值的sum
    for(i = N-1 ; i >= 0 ; i–){
        data_t sum = 0;
        for( j = i+1 ; j < N ; j++){
            sum += *A(i,j)*X[j];  
        }
        X[i] = (*(B+i) – sum) / *A(i,i);
    }
    //恢复,其中shift table保存了原来的列号
    for(i = 0 ; i < N ; i++){
        printf("x%d:%lf ",shift_table[i],X[i]);
    } 
              

 
}
void print_matrix(){
    int i,j;   
    printf("matrix:\n");
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}

三.使用mpi的基本实现

 

四.使用mpi的cell实现

 

五.使用mpi的异构实现

 

六.使用mpi的异构适用性实现

 #include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "mpi.h"
#include "malloc_align.h"

void environment_init();
typedef double data_t;
int N,M;//N行,M列,M = N+1
data_t * matrix_A;//X的系数矩阵
data_t * B;//y向量
data_t * X;//x的解向量
int *shift_table;
MPI_Status status;

//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*M+j;
}
void print_matrix();

int main(int argc,char **argv)
{
    FILE * f;
    int i = 0,j = 0,index = 0,row = 0;
    int group_size;
    int *row_rank_table;
    int process_rownum;
    int myrank;    
    data_t * mainrow_space;
    data_t ** row_address_table;    
        

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&group_size);
    MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
    //printf("mpi:%d %d\n",group_size,myrank);
                
    if(myrank == 0){
        f= fopen("/home/duanple/CBLAS/data.txt","r");            
        fscanf(f,"%d%d",&N,&M);
        matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*M,7);
        B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
        X = (data_t *)_malloc_align(sizeof(data_t)*N,7);    
        //get matrix from file
        for(i = 0 ; i < N ; i++){
             for(j = 0 ; j < N ; j++){
                fscanf(f,"%lf",A(i,j));   
        }
        fscanf(f,"%lf",A(i,j));
    *(B+i) = *A(i,j);
        }     
    //print_matrix();
    }    
    //broadcast the process row to be in,and rows num in every process
    MPI_Bcast(&N,1,MPI_INT,0,MPI_COMM_WORLD);    
    M = N+1;
    //printf("rank:%d m:%d\n",myrank,M);    
    //malloc the memory for the row of matrix
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);    
    row_rank_table = (int *)_malloc_align(sizeof(int)*N,7);      
    row_address_table = (data_t**)_malloc_align(sizeof(data_t*)*N,7);       
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
        
    //divide matrix
    if(myrank == 0){    
    for(i = 0,j = 0 ; i < N ; i++,j++){
        row_rank_table[i] = j%group_size;                
    }    
    }
    MPI_Bcast(row_rank_table,N,MPI_INT,0,MPI_COMM_WORLD);

    process_rownum = 0;
    for(i = 0 ; i < N ; i++){
    //printf("rank:%d row%d\n",myrank,row_rank_table[i]);
    if(row_rank_table[i] == myrank){
        row_address_table[i] =  (data_t *)_malloc_align(sizeof(data_t)*M,7);    
        process_rownum++;
    }
    }    
    //
    //printf("rank:%d rows:%d\n",myrank,process_rownum);    
    //root send the data to the processes.
    if(myrank == 0){//send
        for(i = 0 ; i < N ; i++){
       if(row_rank_table[i] == 0){
        row_address_table[i] = A(i,0);    
       }else{    
            MPI_Send(A(i,0),N,MPI_DOUBLE,row_rank_table[i],i,MPI_COMM_WORLD);    
            MPI_Send(B+i,1,MPI_DOUBLE,row_rank_table[i],i,MPI_COMM_WORLD);    
       }
        }
    }else{//receive
        for(i = 0 ; i < N ; i++){
       if(myrank !=0 && myrank == row_rank_table[i]){
           MPI_Recv(row_address_table[i],N,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);    
           MPI_Recv(row_address_table[i]+N,1,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);    
       }
        }
    }
 
   //printf the revecive result.
   /*
   for(i = 0 ; i < N ; i++){    
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);    
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }    */
    //computer
    //消元
    mainrow_space = (data_t *)_malloc_align(sizeof(data_t)*M,7);//存放主行的临时空间
    for(i = 0 ; i < N-1 ; i++){
    if(myrank == row_rank_table[i]){

//上面的条件,可能造成同一个进程上的其他行不被处理。

            //选主元
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");    
        index = cblas_idamax(N-i,row_address_table[i]+i,1)+i;
            //列交换
                if(index != i){
                        int temp = shift_table[i];  
               shift_table[i] = shift_table[index];      
                   shift_table[index] = temp;
            
            data_t data_temp = *(row_address_table[i]+i);
            *(row_address_table[i]+i) = *(row_address_table[i]+index);
            *(row_address_table[i]+index) = data_temp;
            }
        //拷贝主行元素到临时空间
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");    
        memcpy(mainrow_space,row_address_table[i],sizeof(data_t)*M);
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");    
    }    
    //其他行的列交换
        MPI_Bcast(&index,1,MPI_INT,row_rank_table[i],MPI_COMM_WORLD);
    if(myrank != row_rank_table[i]){

//上面的条件,可能造成同一个进程上的其他行不被处理。
                if(index != i){
                        int temp = shift_table[i];  
               shift_table[i] = shift_table[index];      
                   shift_table[index] = temp;
            //swap the other rows
            for(j = 0 ; j < N ; j++){
                if(myrank == row_rank_table[j]){
                data_t data_temp = *(row_address_table[j]+i);
                *(row_address_table[j]+i) = *(row_address_table[j]+index);
                *(row_address_table[j]+index) = data_temp;
                }
            }//end of for
            }
    
    }    
    
    //发送主行元素
        MPI_Bcast(mainrow_space,M,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
        //主元归一化
    if(myrank == row_rank_table[i]){

//注意,上面这个条件是错的,因为当前rank上还有其他行,这样会导致其他行不被处理。
        //if(*(row_address_table[i]+i) == 0){continue;}
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",mainrow_space[j]);}printf("\n");    

    }else{
        for(j = 0 ; j < N ; j++){
        //处理该进程所分配的行。
        if(j != i && myrank == row_rank_table[j]){
           if(*(row_address_table[j]+i) == 0){continue;}
                // 消元
               data_t factor = -1*(*(row_address_table[j]+i)/mainrow_space[i]);
           cblas_daxpy(N,factor,mainrow_space,1,row_address_table[j],1);
           *(row_address_table[j]+N) =  *(row_address_table[j]+N) +mainrow_space[N]*factor;//elem of B
        }

        }

    }
    }

   //
 
   //printf the revecive result.
   /*
   for(i = 0 ; i < N ; i++){    
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);    
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }    */
    
    //回代
    data_t sum = 0;

//注意一个进程不只一个行,因此只安排一个sum也是不够的。
    for(i = N-1 ; i>= 0 ; i–){
    if(myrank == row_rank_table[i]){
        mainrow_space[i] = (*(row_address_table[i]+N)-sum)/(*(row_address_table[i]+i));//(*(B+i) – sum) / *A(i,i);    
    }
        MPI_Bcast(mainrow_space+i,1,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
    for(j = 0 ; j < i ; j++){
    //处理该进程所分配的行。
        if(myrank == row_rank_table[j]){
          sum += *(row_address_table[j]+i)*mainrow_space[i];
        }

    }
                
    
    }
    //printf res
    for(i = 0 ; myrank == 0 && i < N ; i++){
        printf("x%d:%lf ",shift_table[i],mainrow_space[i]);
    }  

    /*        
    //computer
    for(i = 0 ; i < N-1 ; i++){
        //选主元
    index = cblas_idamax(N-i,A(i,i),1)+i;//cblas_idamax 选择最大绝对值的blas库
    //printf("index:%d\n",index);
        //列交换
        if(index != i){
           temp = shift_table[i];  
       shift_table[i] = shift_table[index];      
           shift_table[index] = temp;
           for(row = 0 ; row < N ; row++){
               data_t temp = *A(row,i);   
               *(A(row,i)) = *A(row,index);  
               *A(row,index) = temp;
           }
        }
    for(row = 0 ; row < N ; row++)
        printf("%d ",shift_table[row]);
    printf("\n");
        //主元归一化
        // 消元
        for(row = i+1 ; row < N ; row++){
        if(*A(row,i) == 0) continue;    
        data_t factor = -1*(*A(row,i)/ *A(i,i));
        cblas_daxpy(N-i,factor,A(i,i),1,A(row,i),1);//cblas_daxpy( N, alpha,X,incX, Y, incY); Y=Y+alpha*X
        *(B+row) = *(B+row) + *(B+i)*factor;    
        }
        //print_matrix();
       
    }
    //回代,i代表了Xi,col代表了列号,用来计算前面已经计算的xi值的sum
    for(i = N-1 ; i >= 0 ; i–){
        data_t sum = 0;
        for( j = i+1 ; j < N ; j++){
            sum += *A(i,j)*X[j];   
        }
        X[i] = (*(B+i) – sum) / *A(i,i);
    }
    //恢复,其中shift table保存了原来的列号
    for(i = 0 ; i < N ; i++){
        printf("x%d:%lf ",shift_table[i],X[i]);
    }  
               */
    MPI_Finalize();    

 
}
void print_matrix(){
    int i,j;    
    printf("matrix:\n");
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}

修改后的实现:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "mpi.h"
#include "malloc_align.h"

void environment_init();
typedef double data_t;
int N,M;//N行,M列,M = N+1
data_t * matrix_A;//X的系数矩阵
data_t * B;//y向量
data_t * X;//x的解向量
int *shift_table;
MPI_Status status;

//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*M+j;
}
void print_matrix();

int main(int argc,char **argv)
{
    FILE * f;
    int i = 0,j = 0,index = 0,row = 0;
    int group_size;
    int *row_rank_table;
    int process_rownum;
    int myrank;   
    data_t * mainrow_space;
    data_t ** row_address_table;   
       

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&group_size);
    MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
    //printf("mpi:%d %d\n",group_size,myrank);
               
    if(myrank == 0){
        f= fopen("/home/duanple/CBLAS/data.txt","r");           
        fscanf(f,"%d%d",&N,&M);
        matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*M,7);
        B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
        X = (data_t *)_malloc_align(sizeof(data_t)*N,7);   
        //get matrix from file
        for(i = 0 ; i < N ; i++){
             for(j = 0 ; j < N ; j++){
                fscanf(f,"%lf",A(i,j));  
        }
        fscanf(f,"%lf",A(i,j));//A(i,N) also stores B[i]
    *(B+i) = *A(i,j);
        }    
    //print_matrix();
    }   
    //broadcast the process row to be in,and rows num in every process
    MPI_Bcast(&N,1,MPI_INT,0,MPI_COMM_WORLD);     
    M = N+1;
    //printf("rank:%d m:%d\n",myrank,M);   
    //malloc the memory for the row of matrix
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);   
    row_rank_table = (int *)_malloc_align(sizeof(int)*N,7);     
    row_address_table = (data_t**)_malloc_align(sizeof(data_t*)*N,7);      
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
       
    //divide matrix
    if(myrank == 0){   
    for(i = 0,j = 0 ; i < N ; i++,j++){
        row_rank_table[i] = j%group_size;               
    }   
    }
    MPI_Bcast(row_rank_table,N,MPI_INT,0,MPI_COMM_WORLD);

    process_rownum = 0;
    for(i = 0 ; i < N ; i++){
    //printf("rank:%d row%d\n",myrank,row_rank_table[i]);
    if(row_rank_table[i] == myrank){
        row_address_table[i] =  (data_t *)_malloc_align(sizeof(data_t)*M,7);   
        process_rownum++;
    }
    }   
    //
    //printf("rank:%d rows:%d\n",myrank,process_rownum);   
    //root send the data to the processes.
    if(myrank == 0){//send
        for(i = 0 ; i < N ; i++){
       if(row_rank_table[i] == 0){
        row_address_table[i] = A(i,0);   
       }else{   
            MPI_Send(A(i,0),N,MPI_DOUBLE,row_rank_table[i],i,MPI_COMM_WORLD);   
            MPI_Send(B+i,1,MPI_DOUBLE,row_rank_table[i],i,MPI_COMM_WORLD);   
       }
        }
    }else{//receive
        for(i = 0 ; i < N ; i++){
       if(myrank !=0 && myrank == row_rank_table[i]){
           MPI_Recv(row_address_table[i],N,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);   
           MPI_Recv(row_address_table[i]+N,1,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);   
       }
        }
    }
 
   //printf the revecive result.
   /*
   for(i = 0 ; i < N ; i++){   
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);   
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }    */
    //computer
    //消元
    mainrow_space = (data_t *)_malloc_align(sizeof(data_t)*M,7);//存放主行的临时空间
    for(i = 0 ; i < N-1 ; i++){
    if(myrank == row_rank_table[i]){
            //选主元
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");   
        index = cblas_idamax(N-i,row_address_table[i]+i,1)+i;
            //列交换
                if(index != i){
                        int temp = shift_table[i]; 
               shift_table[i] = shift_table[index];     
                   shift_table[index] = temp;
           
            data_t data_temp = *(row_address_table[i]+i);
            *(row_address_table[i]+i) = *(row_address_table[i]+index);
            *(row_address_table[i]+index) = data_temp;
            }
        //拷贝主行元素到临时空间
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");   
        memcpy(mainrow_space,row_address_table[i],sizeof(data_t)*M);
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");   
    }   
    //其他行的列交换
        MPI_Bcast(&index,1,MPI_INT,row_rank_table[i],MPI_COMM_WORLD);
    //修改列交换标志,以进程为单位,不是以行
    if(myrank != row_rank_table[i]){
                  int temp = shift_table[i]; 
           shift_table[i] = shift_table[index];     
               shift_table[index] = temp;
    }
    for(j = 0 ; j < N ; j++){
        if(j != i && myrank == row_rank_table[j]){
            data_t data_temp = *(row_address_table[j]+i);
            *(row_address_table[j]+i) = *(row_address_table[j]+index);
            *(row_address_table[j]+index) = data_temp;
        }   
    }
   
    //发送主行元素
        MPI_Bcast(mainrow_space,M,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
    //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",mainrow_space[j]);}printf("\n");
        //主元归一化
    for(j = i+1 ; j < N ; j++){
    //处理该进程所分配的行。
        if(myrank == row_rank_table[j]){
            if(*(row_address_table[j]+i) == 0){continue;}
               // 消元
            data_t factor = -1*(*(row_address_table[j]+i)/mainrow_space[i]);
            cblas_daxpy(N,factor,mainrow_space,1,row_address_table[j],1);
            *(row_address_table[j]+N) =  *(row_address_table[j]+N) +mainrow_space[N]*factor;//elem of B
        }

    }

   
    }

   //
 
   //printf the  result.
   /*
   for(i = 0 ; i < N ; i++){   
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);   
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }   
    */
    //回代
    for(i = N-1 ; i>= 0 ; i–){
    if(myrank == row_rank_table[i]){
        mainrow_space[i] = *(row_address_table[i]+N)/(*(row_address_table[i]+i));//(*(B+i) – sum) / *A(i,i);   
    }
        MPI_Bcast(mainrow_space+i,1,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
    for(j = 0 ; j < i ; j++){
    //处理该进程所分配的行。
        if(myrank == row_rank_table[j]){
          *(row_address_table[j]+N) -= *(row_address_table[j]+i)*mainrow_space[i];
        }

    }
               
   
    }
    //printf res
   
    for(i = 0 ; myrank == 0 && i < N ; i++){
        printf("x%d:%lf ",shift_table[i],mainrow_space[i]);
    } 
    MPI_Finalize();   
 
}
void print_matrix(){
    int i,j;   
    printf("matrix:\n");
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}

#############################################################################

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "mpi.h"
#include "malloc_align.h"

void environment_init();
typedef double data_t;
int N,M;//N行,M列,M = N+1
data_t * matrix_A;//X的系数矩阵
data_t * B;//y向量
data_t * X;//x的解向量
int *shift_table;
MPI_Status status;

//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*M+j;
}
void print_matrix();
double * processor_test(MPI_Comm comm,void test(),long loops,double comp_size,int master_node);
void data_malloc(double * processor_descrip,int * rank_table,int matrix_size);
void linear_solver(MPI_Comm comm);

void test(){
    double a = 1.0;
    double b = 2.44;
    double c = a*b;

}
int main(int argc,char **argv)
{
       

    MPI_Init(&argc,&argv);
    processor_test(MPI_COMM_WORLD,test,1000,1.0,0);
    //linear_solver(MPI_COMM_WORLD);
    MPI_Finalize();   
 
}

//test the computer ability of processors
double * processor_test(MPI_Comm comm,void test(),long loops,double comp_size,int master_node){
    int myrank;
    int group_size,i;
    double start_time,end_time,time;
    double * processor_able;
    MPI_Comm_size(comm,&group_size);
    MPI_Comm_rank(comm,&myrank);
    if(myrank == master_node){
        processor_able = (double *)_malloc_align(sizeof(double)*group_size+2,7);   
        processor_able[0] = group_size;
        processor_able[1] = comp_size*loops;
    } else
        processor_able = NULL;  

    start_time = MPI_Wtime();
    for(i = 0; i < loops ; i++)
        test();   
    end_time = MPI_Wtime();
    time = end_time-start_time;
    printf("rank%d %lf\n",myrank,time);   
   
    MPI_Gather(&time,1,MPI_DOUBLE,processor_able+2,1,MPI_DOUBLE,master_node,comm);
   
    //print the test result.
    if(myrank == master_node){
        for(i = 0 ; i < group_size ; i++){
            printf("%d %lf ",i,processor_able[i+2]);
        }

    }

    return processor_able;   

}
void data_malloc(double * processor_descrip,int * rank_table,int matrix_size){
    int p_num =(int)processor_descrip[0];
   

}
void linear_solver(MPI_Comm comm){

    FILE * f;
    int i = 0,j = 0,index = 0,row = 0;
    int group_size;
    int *row_rank_table;
    int process_rownum;
    int myrank;   
    data_t * mainrow_space;
    data_t ** row_address_table;   

    MPI_Comm_size(comm,&group_size);
    MPI_Comm_rank(comm,&myrank);
    //printf("mpi:%d %d\n",group_size,myrank);
               
    if(myrank == 0){
        f= fopen("/home/duanple/CBLAS/data.txt","r");           
        fscanf(f,"%d%d",&N,&M);
        matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*M,7);
        B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
        X = (data_t *)_malloc_align(sizeof(data_t)*N,7);   
        //get matrix from file
        for(i = 0 ; i < N ; i++){
             for(j = 0 ; j < N ; j++){
                fscanf(f,"%lf",A(i,j));  
        }
        fscanf(f,"%lf",A(i,j));//A(i,N) also stores B[i]
    *(B+i) = *A(i,j);
        }    
    //print_matrix();
    }   
    //broadcast the process row to be in,and rows num in every process
    MPI_Bcast(&N,1,MPI_INT,0,comm);     
    M = N+1;
    //printf("rank:%d m:%d\n",myrank,M);   
    //malloc the memory for the row of matrix
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);   
    row_rank_table = (int *)_malloc_align(sizeof(int)*N,7);     
    row_address_table = (data_t**)_malloc_align(sizeof(data_t*)*N,7);      
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
       
    //divide matrix
    if(myrank == 0){   
    for(i = 0,j = 0 ; i < N ; i++,j++){
        row_rank_table[i] = j%group_size;               
    }   
    }
    MPI_Bcast(row_rank_table,N,MPI_INT,0,comm);

    process_rownum = 0;
    for(i = 0 ; i < N ; i++){
    //printf("rank:%d row%d\n",myrank,row_rank_table[i]);
    if(row_rank_table[i] == myrank){
        row_address_table[i] =  (data_t *)_malloc_align(sizeof(data_t)*M,7);   
        process_rownum++;
    }
    }   
    //
    //printf("rank:%d rows:%d\n",myrank,process_rownum);   
    //root send the data to the processes.
    if(myrank == 0){//send
        for(i = 0 ; i < N ; i++){
       if(row_rank_table[i] == 0){
        row_address_table[i] = A(i,0);   
       }else{   
            MPI_Send(A(i,0),N,MPI_DOUBLE,row_rank_table[i],i,comm);   
            MPI_Send(B+i,1,MPI_DOUBLE,row_rank_table[i],i,comm);   
       }
        }
    }else{//receive
        for(i = 0 ; i < N ; i++){
       if(myrank !=0 && myrank == row_rank_table[i]){
           MPI_Recv(row_address_table[i],N,MPI_DOUBLE,0,i,comm,&status);   
           MPI_Recv(row_address_table[i]+N,1,MPI_DOUBLE,0,i,comm,&status);   
       }
        }
    }
 
   //printf the revecive result.
   /*
   for(i = 0 ; i < N ; i++){   
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);   
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }    */
    //computer
   

You Might Also Like