高性能计算

线性方程组求解-高斯消元

2009年4月22日 阅读(50)

4    5
1.000000    4.000000    -2.000000    3.000000    6.000000
2.000000    2.000000    0.000000    4.000000    2.000000
3.000000    0.000000    -1.000000    2.000000    1.000000
1.000000    2.000000    2.000000    -3.000000    8.000000

一.基本实现

#include <cstdio>
#include <cstdlib>
#include <cstring>
using namespace std;
void environment_init();
typedef double data_t;
void cblas_factor(data_t *vec,int size,data_t factor);
void cblas_sub(data_t *minuend,data_t *subtrahend,int size);
data_t * cblas_max(data_t *vec,int size);
//N行,M列,M = N+1
int N,M;
//X的系数矩阵
data_t * matrix_A;
//y向量
data_t * B;
//x的解向量
data_t * X;
//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*N+j;
}
void print_matrix(){
    printf("matrix:\n");
    for(int i = 0 ; i < N ; i++){
        for(int j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}
int main()
{
    FILE * f = fopen("data.txt","r");
    fscanf(f,"%d%d",&N,&M);
    matrix_A = (data_t *)malloc(sizeof(data_t)*N*N);
    B = (data_t *)malloc(sizeof(data_t)*N);
    X = (data_t *)malloc(sizeof(data_t)*N);
    int *shift_table = (int*)malloc(sizeof(int)*N);
   
    //get matrix from file
    for(int i = 0 ; i < N ; i++){
        for(int j = 0 ; j < N ; j++){
            fscanf(f,"%lf",A(i,j));   
            printf("%lf ",*A(i,j));
        }
        fscanf(f,"%lf",B+i);
        printf("%lf\n",*(B+i));
    }     
    //init shift table
    for(int i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
    //computer
    for(int i = 0 ; i < N-1 ; i++){
        //选主元
        int index = cblas_max(A(i,i),N-i)-A(i,0);
        //列交换
        if(index != i){

    //错误!!!
           shift_table[i] = index;    
           shift_table[index] = i;

           for(int row = 0 ; row < N ; row++){
               data_t temp = *A(row,i);   
               *(A(row,i)) = *A(row,index);  
               *A(row,index) = temp;
           }
        }
        //主元归一化
        // 消元 

    //错误!!! *A(row,i)!= 0会让消元提前结束
        for(int row = i+1 ; *A(row,i)!= 0 && row < N ; row++){
            data_t factor = *A(i,i)/ *A(row,i);   
            cblas_factor(A(row,i),N-i,factor);
            cblas_sub(A(row,i),A(i,i),N-i);
            *(B+row) *= factor;
            *(B+row) = *(B+row)-*(B+i);   
        }
        print_matrix();
       
    }
    //回代,i代表了Xi,col代表了列号,用来计算前面已经计算的xi值的sum
    for(int i = N-1 ; i >= 0 ; i–){
        data_t sum = 0;
        for(int col = i+1 ; col < N ; col++){
            sum += *A(i,col)*X[col];   
        }
        X[i] = (*(B+i) – sum) / *A(i,i);
    }
    //恢复,其中shift table保存了原来的列号
    for(int i = 0 ; i < N ; i++){
        printf("x%d:%lf ",shift_table[i],X[i]);
    }
   
               

 
}

//向量操作,对所有的数*factor
void cblas_factor(data_t *vec,int size,data_t factor){
    for(int i = 0 ; i < size ; i++){
        *(vec+i) = *(vec+i)*factor;
    }
}
//两个向量的减
void cblas_sub(data_t *minuend,data_t *subtrahend,int size){
    for(int i = 0 ; i < size ; i++){
        *(minuend+i) = *(minuend+i) – *(subtrahend+i);
    }

}
//寻找向量中的最大值
data_t * cblas_max(data_t *vec,int size){
    data_t * index = vec;
    data_t max = *vec;
    for(int i = 0 ; i < size ; i++){
        if(*(vec+i) > max){
            max =  *(vec+i);
            index = vec+i;  
        }
    }
    return index;

}

二.使用blas上的cell实现

 blas参考文档:http://www.netlib.org/blas/blast-forum/blas-report.pdf

基本blas实现:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "malloc_align.h"

void environment_init();
typedef double data_t;
int N,M;//N行,M列,M = N+1
data_t * matrix_A;//X的系数矩阵
data_t * B;//y向量
data_t * X;//x的解向量
int *shift_table;

//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*N+j;
}
void print_matrix();

int main()
{
    FILE * f = fopen("data.txt","r");
    int i = 0,j = 0,index = 0,row = 0,temp = 0;
   
    fscanf(f,"%d%d",&N,&M);
    matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*N,7);
    B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
    X = (data_t *)_malloc_align(sizeof(data_t)*N,7);
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);
  
    //get matrix from file
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
            fscanf(f,"%lf",A(i,j));  
            printf("%lf ",*A(i,j));
        }
        fscanf(f,"%lf",B+i);
        printf("%lf\n",*(B+i));
    }    
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
    //computer
    for(i = 0 ; i < N-1 ; i++){
        //选主元
    index = cblas_idamax(N-i,A(i,i),1)+i;//cblas_idamax 选择最大绝对值的blas库
    printf("index:%d\n",index);
        //列交换
        if(index != i){
           temp = shift_table[i]; 
       shift_table[i] = shift_table[index];     
           shift_table[index] = temp;
           for(row = 0 ; row < N ; row++){
               data_t temp = *A(row,i);  
               *(A(row,i)) = *A(row,index); 
               *A(row,index) = temp;
           }
        }
    for(row = 0 ; row < N ; row++)
        printf("%d ",shift_table[row]);
    printf("\n");
        //主元归一化
        // 消元
        for(row = i+1 ; row < N ; row++){
        if(*A(row,i) == 0) continue;   
        data_t factor = -1*(*A(row,i)/ *A(i,i));
        cblas_daxpy(N-i,factor,A(i,i),1,A(row,i),1);//cblas_daxpy( N, alpha,X,incX, Y, incY); Y=Y+alpha*X
        *(B+row) = *(B+row) + *(B+i)*factor;   
        }
        print_matrix();
      
    }
    //回代,i代表了Xi,col代表了列号,用来计算前面已经计算的xi值的sum
    for(i = N-1 ; i >= 0 ; i–){
        data_t sum = 0;
        for( j = i+1 ; j < N ; j++){
            sum += *A(i,j)*X[j];  
        }
        X[i] = (*(B+i) – sum) / *A(i,i);
    }
    //恢复,其中shift table保存了原来的列号
    for(i = 0 ; i < N ; i++){
        printf("x%d:%lf ",shift_table[i],X[i]);
    } 
              

 
}
void print_matrix(){
    int i,j;   
    printf("matrix:\n");
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}

三.使用mpi的基本实现

 

四.使用mpi的cell实现

 

五.使用mpi的异构实现

 

六.使用mpi的异构适用性实现

 #include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "mpi.h"
#include "malloc_align.h"

void environment_init();
typedef double data_t;
int N,M;//N行,M列,M = N+1
data_t * matrix_A;//X的系数矩阵
data_t * B;//y向量
data_t * X;//x的解向量
int *shift_table;
MPI_Status status;

//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*M+j;
}
void print_matrix();

int main(int argc,char **argv)
{
    FILE * f;
    int i = 0,j = 0,index = 0,row = 0;
    int group_size;
    int *row_rank_table;
    int process_rownum;
    int myrank;    
    data_t * mainrow_space;
    data_t ** row_address_table;    
        

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&group_size);
    MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
    //printf("mpi:%d %d\n",group_size,myrank);
                
    if(myrank == 0){
        f= fopen("/home/duanple/CBLAS/data.txt","r");            
        fscanf(f,"%d%d",&N,&M);
        matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*M,7);
        B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
        X = (data_t *)_malloc_align(sizeof(data_t)*N,7);    
        //get matrix from file
        for(i = 0 ; i < N ; i++){
             for(j = 0 ; j < N ; j++){
                fscanf(f,"%lf",A(i,j));   
        }
        fscanf(f,"%lf",A(i,j));
    *(B+i) = *A(i,j);
        }     
    //print_matrix();
    }    
    //broadcast the process row to be in,and rows num in every process
    MPI_Bcast(&N,1,MPI_INT,0,MPI_COMM_WORLD);    
    M = N+1;
    //printf("rank:%d m:%d\n",myrank,M);    
    //malloc the memory for the row of matrix
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);    
    row_rank_table = (int *)_malloc_align(sizeof(int)*N,7);      
    row_address_table = (data_t**)_malloc_align(sizeof(data_t*)*N,7);       
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
        
    //divide matrix
    if(myrank == 0){    
    for(i = 0,j = 0 ; i < N ; i++,j++){
        row_rank_table[i] = j%group_size;                
    }    
    }
    MPI_Bcast(row_rank_table,N,MPI_INT,0,MPI_COMM_WORLD);

    process_rownum = 0;
    for(i = 0 ; i < N ; i++){
    //printf("rank:%d row%d\n",myrank,row_rank_table[i]);
    if(row_rank_table[i] == myrank){
        row_address_table[i] =  (data_t *)_malloc_align(sizeof(data_t)*M,7);    
        process_rownum++;
    }
    }    
    //
    //printf("rank:%d rows:%d\n",myrank,process_rownum);    
    //root send the data to the processes.
    if(myrank == 0){//send
        for(i = 0 ; i < N ; i++){
       if(row_rank_table[i] == 0){
        row_address_table[i] = A(i,0);    
       }else{    
            MPI_Send(A(i,0),N,MPI_DOUBLE,row_rank_table[i],i,MPI_COMM_WORLD);    
            MPI_Send(B+i,1,MPI_DOUBLE,row_rank_table[i],i,MPI_COMM_WORLD);    
       }
        }
    }else{//receive
        for(i = 0 ; i < N ; i++){
       if(myrank !=0 && myrank == row_rank_table[i]){
           MPI_Recv(row_address_table[i],N,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);    
           MPI_Recv(row_address_table[i]+N,1,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);    
       }
        }
    }
 
   //printf the revecive result.
   /*
   for(i = 0 ; i < N ; i++){    
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);    
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }    */
    //computer
    //消元
    mainrow_space = (data_t *)_malloc_align(sizeof(data_t)*M,7);//存放主行的临时空间
    for(i = 0 ; i < N-1 ; i++){
    if(myrank == row_rank_table[i]){

//上面的条件,可能造成同一个进程上的其他行不被处理。

            //选主元
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");    
        index = cblas_idamax(N-i,row_address_table[i]+i,1)+i;
            //列交换
                if(index != i){
                        int temp = shift_table[i];  
               shift_table[i] = shift_table[index];      
                   shift_table[index] = temp;
            
            data_t data_temp = *(row_address_table[i]+i);
            *(row_address_table[i]+i) = *(row_address_table[i]+index);
            *(row_address_table[i]+index) = data_temp;
            }
        //拷贝主行元素到临时空间
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");    
        memcpy(mainrow_space,row_address_table[i],sizeof(data_t)*M);
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");    
    }    
    //其他行的列交换
        MPI_Bcast(&index,1,MPI_INT,row_rank_table[i],MPI_COMM_WORLD);
    if(myrank != row_rank_table[i]){

//上面的条件,可能造成同一个进程上的其他行不被处理。
                if(index != i){
                        int temp = shift_table[i];  
               shift_table[i] = shift_table[index];      
                   shift_table[index] = temp;
            //swap the other rows
            for(j = 0 ; j < N ; j++){
                if(myrank == row_rank_table[j]){
                data_t data_temp = *(row_address_table[j]+i);
                *(row_address_table[j]+i) = *(row_address_table[j]+index);
                *(row_address_table[j]+index) = data_temp;
                }
            }//end of for
            }
    
    }    
    
    //发送主行元素
        MPI_Bcast(mainrow_space,M,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
        //主元归一化
    if(myrank == row_rank_table[i]){

//注意,上面这个条件是错的,因为当前rank上还有其他行,这样会导致其他行不被处理。
        //if(*(row_address_table[i]+i) == 0){continue;}
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",mainrow_space[j]);}printf("\n");    

    }else{
        for(j = 0 ; j < N ; j++){
        //处理该进程所分配的行。
        if(j != i && myrank == row_rank_table[j]){
           if(*(row_address_table[j]+i) == 0){continue;}
                // 消元
               data_t factor = -1*(*(row_address_table[j]+i)/mainrow_space[i]);
           cblas_daxpy(N,factor,mainrow_space,1,row_address_table[j],1);
           *(row_address_table[j]+N) =  *(row_address_table[j]+N) +mainrow_space[N]*factor;//elem of B
        }

        }

    }
    }

   //
 
   //printf the revecive result.
   /*
   for(i = 0 ; i < N ; i++){    
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);    
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }    */
    
    //回代
    data_t sum = 0;

//注意一个进程不只一个行,因此只安排一个sum也是不够的。
    for(i = N-1 ; i>= 0 ; i–){
    if(myrank == row_rank_table[i]){
        mainrow_space[i] = (*(row_address_table[i]+N)-sum)/(*(row_address_table[i]+i));//(*(B+i) – sum) / *A(i,i);    
    }
        MPI_Bcast(mainrow_space+i,1,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
    for(j = 0 ; j < i ; j++){
    //处理该进程所分配的行。
        if(myrank == row_rank_table[j]){
          sum += *(row_address_table[j]+i)*mainrow_space[i];
        }

    }
                
    
    }
    //printf res
    for(i = 0 ; myrank == 0 && i < N ; i++){
        printf("x%d:%lf ",shift_table[i],mainrow_space[i]);
    }  

    /*        
    //computer
    for(i = 0 ; i < N-1 ; i++){
        //选主元
    index = cblas_idamax(N-i,A(i,i),1)+i;//cblas_idamax 选择最大绝对值的blas库
    //printf("index:%d\n",index);
        //列交换
        if(index != i){
           temp = shift_table[i];  
       shift_table[i] = shift_table[index];      
           shift_table[index] = temp;
           for(row = 0 ; row < N ; row++){
               data_t temp = *A(row,i);   
               *(A(row,i)) = *A(row,index);  
               *A(row,index) = temp;
           }
        }
    for(row = 0 ; row < N ; row++)
        printf("%d ",shift_table[row]);
    printf("\n");
        //主元归一化
        // 消元
        for(row = i+1 ; row < N ; row++){
        if(*A(row,i) == 0) continue;    
        data_t factor = -1*(*A(row,i)/ *A(i,i));
        cblas_daxpy(N-i,factor,A(i,i),1,A(row,i),1);//cblas_daxpy( N, alpha,X,incX, Y, incY); Y=Y+alpha*X
        *(B+row) = *(B+row) + *(B+i)*factor;    
        }
        //print_matrix();
       
    }
    //回代,i代表了Xi,col代表了列号,用来计算前面已经计算的xi值的sum
    for(i = N-1 ; i >= 0 ; i–){
        data_t sum = 0;
        for( j = i+1 ; j < N ; j++){
            sum += *A(i,j)*X[j];   
        }
        X[i] = (*(B+i) – sum) / *A(i,i);
    }
    //恢复,其中shift table保存了原来的列号
    for(i = 0 ; i < N ; i++){
        printf("x%d:%lf ",shift_table[i],X[i]);
    }  
               */
    MPI_Finalize();    

 
}
void print_matrix(){
    int i,j;    
    printf("matrix:\n");
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}

修改后的实现:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "mpi.h"
#include "malloc_align.h"

void environment_init();
typedef double data_t;
int N,M;//N行,M列,M = N+1
data_t * matrix_A;//X的系数矩阵
data_t * B;//y向量
data_t * X;//x的解向量
int *shift_table;
MPI_Status status;

//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*M+j;
}
void print_matrix();

int main(int argc,char **argv)
{
    FILE * f;
    int i = 0,j = 0,index = 0,row = 0;
    int group_size;
    int *row_rank_table;
    int process_rownum;
    int myrank;   
    data_t * mainrow_space;
    data_t ** row_address_table;   
       

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&group_size);
    MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
    //printf("mpi:%d %d\n",group_size,myrank);
               
    if(myrank == 0){
        f= fopen("/home/duanple/CBLAS/data.txt","r");           
        fscanf(f,"%d%d",&N,&M);
        matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*M,7);
        B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
        X = (data_t *)_malloc_align(sizeof(data_t)*N,7);   
        //get matrix from file
        for(i = 0 ; i < N ; i++){
             for(j = 0 ; j < N ; j++){
                fscanf(f,"%lf",A(i,j));  
        }
        fscanf(f,"%lf",A(i,j));//A(i,N) also stores B[i]
    *(B+i) = *A(i,j);
        }    
    //print_matrix();
    }   
    //broadcast the process row to be in,and rows num in every process
    MPI_Bcast(&N,1,MPI_INT,0,MPI_COMM_WORLD);     
    M = N+1;
    //printf("rank:%d m:%d\n",myrank,M);   
    //malloc the memory for the row of matrix
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);   
    row_rank_table = (int *)_malloc_align(sizeof(int)*N,7);     
    row_address_table = (data_t**)_malloc_align(sizeof(data_t*)*N,7);      
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
       
    //divide matrix
    if(myrank == 0){   
    for(i = 0,j = 0 ; i < N ; i++,j++){
        row_rank_table[i] = j%group_size;               
    }   
    }
    MPI_Bcast(row_rank_table,N,MPI_INT,0,MPI_COMM_WORLD);

    process_rownum = 0;
    for(i = 0 ; i < N ; i++){
    //printf("rank:%d row%d\n",myrank,row_rank_table[i]);
    if(row_rank_table[i] == myrank){
        row_address_table[i] =  (data_t *)_malloc_align(sizeof(data_t)*M,7);   
        process_rownum++;
    }
    }   
    //
    //printf("rank:%d rows:%d\n",myrank,process_rownum);   
    //root send the data to the processes.
    if(myrank == 0){//send
        for(i = 0 ; i < N ; i++){
       if(row_rank_table[i] == 0){
        row_address_table[i] = A(i,0);   
       }else{   
            MPI_Send(A(i,0),N,MPI_DOUBLE,row_rank_table[i],i,MPI_COMM_WORLD);   
            MPI_Send(B+i,1,MPI_DOUBLE,row_rank_table[i],i,MPI_COMM_WORLD);   
       }
        }
    }else{//receive
        for(i = 0 ; i < N ; i++){
       if(myrank !=0 && myrank == row_rank_table[i]){
           MPI_Recv(row_address_table[i],N,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);   
           MPI_Recv(row_address_table[i]+N,1,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);   
       }
        }
    }
 
   //printf the revecive result.
   /*
   for(i = 0 ; i < N ; i++){   
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);   
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }    */
    //computer
    //消元
    mainrow_space = (data_t *)_malloc_align(sizeof(data_t)*M,7);//存放主行的临时空间
    for(i = 0 ; i < N-1 ; i++){
    if(myrank == row_rank_table[i]){
            //选主元
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");   
        index = cblas_idamax(N-i,row_address_table[i]+i,1)+i;
            //列交换
                if(index != i){
                        int temp = shift_table[i]; 
               shift_table[i] = shift_table[index];     
                   shift_table[index] = temp;
           
            data_t data_temp = *(row_address_table[i]+i);
            *(row_address_table[i]+i) = *(row_address_table[i]+index);
            *(row_address_table[i]+index) = data_temp;
            }
        //拷贝主行元素到临时空间
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");   
        memcpy(mainrow_space,row_address_table[i],sizeof(data_t)*M);
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");   
    }   
    //其他行的列交换
        MPI_Bcast(&index,1,MPI_INT,row_rank_table[i],MPI_COMM_WORLD);
    //修改列交换标志,以进程为单位,不是以行
    if(myrank != row_rank_table[i]){
                  int temp = shift_table[i]; 
           shift_table[i] = shift_table[index];     
               shift_table[index] = temp;
    }
    for(j = 0 ; j < N ; j++){
        if(j != i && myrank == row_rank_table[j]){
            data_t data_temp = *(row_address_table[j]+i);
            *(row_address_table[j]+i) = *(row_address_table[j]+index);
            *(row_address_table[j]+index) = data_temp;
        }   
    }
   
    //发送主行元素
        MPI_Bcast(mainrow_space,M,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
    //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",mainrow_space[j]);}printf("\n");
        //主元归一化
    for(j = i+1 ; j < N ; j++){
    //处理该进程所分配的行。
        if(myrank == row_rank_table[j]){
            if(*(row_address_table[j]+i) == 0){continue;}
               // 消元
            data_t factor = -1*(*(row_address_table[j]+i)/mainrow_space[i]);
            cblas_daxpy(N,factor,mainrow_space,1,row_address_table[j],1);
            *(row_address_table[j]+N) =  *(row_address_table[j]+N) +mainrow_space[N]*factor;//elem of B
        }

    }

   
    }

   //
 
   //printf the  result.
   /*
   for(i = 0 ; i < N ; i++){   
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);   
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }   
    */
    //回代
    for(i = N-1 ; i>= 0 ; i–){
    if(myrank == row_rank_table[i]){
        mainrow_space[i] = *(row_address_table[i]+N)/(*(row_address_table[i]+i));//(*(B+i) – sum) / *A(i,i);   
    }
        MPI_Bcast(mainrow_space+i,1,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
    for(j = 0 ; j < i ; j++){
    //处理该进程所分配的行。
        if(myrank == row_rank_table[j]){
          *(row_address_table[j]+N) -= *(row_address_table[j]+i)*mainrow_space[i];
        }

    }
               
   
    }
    //printf res
   
    for(i = 0 ; myrank == 0 && i < N ; i++){
        printf("x%d:%lf ",shift_table[i],mainrow_space[i]);
    } 
    MPI_Finalize();   
 
}
void print_matrix(){
    int i,j;   
    printf("matrix:\n");
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}

#############################################################################

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "mpi.h"
#include "malloc_align.h"

void environment_init();
typedef double data_t;
int N,M;//N行,M列,M = N+1
data_t * matrix_A;//X的系数矩阵
data_t * B;//y向量
data_t * X;//x的解向量
int *shift_table;
MPI_Status status;

//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*M+j;
}
void print_matrix();
double * processor_test(MPI_Comm comm,void test(),long loops,double comp_size,int master_node);
void data_malloc(double * processor_descrip,int * rank_table,int matrix_size);
void linear_solver(MPI_Comm comm);

void test(){
    double a = 1.0;
    double b = 2.44;
    double c = a*b;

}
int main(int argc,char **argv)
{
       

    MPI_Init(&argc,&argv);
    processor_test(MPI_COMM_WORLD,test,1000,1.0,0);
    //linear_solver(MPI_COMM_WORLD);
    MPI_Finalize();   
 
}

//test the computer ability of processors
double * processor_test(MPI_Comm comm,void test(),long loops,double comp_size,int master_node){
    int myrank;
    int group_size,i;
    double start_time,end_time,time;
    double * processor_able;
    MPI_Comm_size(comm,&group_size);
    MPI_Comm_rank(comm,&myrank);
    if(myrank == master_node){
        processor_able = (double *)_malloc_align(sizeof(double)*group_size+2,7);   
        processor_able[0] = group_size;
        processor_able[1] = comp_size*loops;
    } else
        processor_able = NULL;  

    start_time = MPI_Wtime();
    for(i = 0; i < loops ; i++)
        test();   
    end_time = MPI_Wtime();
    time = end_time-start_time;
    printf("rank%d %lf\n",myrank,time);   
   
    MPI_Gather(&time,1,MPI_DOUBLE,processor_able+2,1,MPI_DOUBLE,master_node,comm);
   
    //print the test result.
    if(myrank == master_node){
        for(i = 0 ; i < group_size ; i++){
            printf("%d %lf ",i,processor_able[i+2]);
        }

    }

    return processor_able;   

}
void data_malloc(double * processor_descrip,int * rank_table,int matrix_size){
    int p_num =(int)processor_descrip[0];
   

}
void linear_solver(MPI_Comm comm){

    FILE * f;
    int i = 0,j = 0,index = 0,row = 0;
    int group_size;
    int *row_rank_table;
    int process_rownum;
    int myrank;   
    data_t * mainrow_space;
    data_t ** row_address_table;   

    MPI_Comm_size(comm,&group_size);
    MPI_Comm_rank(comm,&myrank);
    //printf("mpi:%d %d\n",group_size,myrank);
               
    if(myrank == 0){
        f= fopen("/home/duanple/CBLAS/data.txt","r");           
        fscanf(f,"%d%d",&N,&M);
        matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*M,7);
        B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
        X = (data_t *)_malloc_align(sizeof(data_t)*N,7);   
        //get matrix from file
        for(i = 0 ; i < N ; i++){
             for(j = 0 ; j < N ; j++){
                fscanf(f,"%lf",A(i,j));  
        }
        fscanf(f,"%lf",A(i,j));//A(i,N) also stores B[i]
    *(B+i) = *A(i,j);
        }    
    //print_matrix();
    }   
    //broadcast the process row to be in,and rows num in every process
    MPI_Bcast(&N,1,MPI_INT,0,comm);     
    M = N+1;
    //printf("rank:%d m:%d\n",myrank,M);   
    //malloc the memory for the row of matrix
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);   
    row_rank_table = (int *)_malloc_align(sizeof(int)*N,7);     
    row_address_table = (data_t**)_malloc_align(sizeof(data_t*)*N,7);      
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
       
    //divide matrix
    if(myrank == 0){   
    for(i = 0,j = 0 ; i < N ; i++,j++){
        row_rank_table[i] = j%group_size;               
    }   
    }
    MPI_Bcast(row_rank_table,N,MPI_INT,0,comm);

    process_rownum = 0;
    for(i = 0 ; i < N ; i++){
    //printf("rank:%d row%d\n",myrank,row_rank_table[i]);
    if(row_rank_table[i] == myrank){
        row_address_table[i] =  (data_t *)_malloc_align(sizeof(data_t)*M,7);   
        process_rownum++;
    }
    }   
    //
    //printf("rank:%d rows:%d\n",myrank,process_rownum);   
    //root send the data to the processes.
    if(myrank == 0){//send
        for(i = 0 ; i < N ; i++){
       if(row_rank_table[i] == 0){
        row_address_table[i] = A(i,0);   
       }else{   
            MPI_Send(A(i,0),N,MPI_DOUBLE,row_rank_table[i],i,comm);   
            MPI_Send(B+i,1,MPI_DOUBLE,row_rank_table[i],i,comm);   
       }
        }
    }else{//receive
        for(i = 0 ; i < N ; i++){
       if(myrank !=0 && myrank == row_rank_table[i]){
           MPI_Recv(row_address_table[i],N,MPI_DOUBLE,0,i,comm,&status);   
           MPI_Recv(row_address_table[i]+N,1,MPI_DOUBLE,0,i,comm,&status);   
       }
        }
    }
 
   //printf the revecive result.
   /*
   for(i = 0 ; i < N ; i++){   
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);   
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }    */
    //computer
   

You Might Also Like

8 Comments

  • topairmax2012trade 2019年2月11日 at 下午7:19

    topairmax2012trade

    outletretrojordans, dentontimes,

  • asics x ronnie fieg gel lyte v cove hombre 2019年3月20日 at 下午6:00

    asics x ronnie fieg gel lyte v cove hombre

    damen nike performance flex trainer 6 sportschuhe wei脽 reines platin wolf grau anthracitenew balance 623v3 trainer leather damskieadidas ozweego uomonike lebron soldier 12 sfg white midnight navy preschool boys herr

  • puma creeper velvet rihanna fenty royal purple kvinder 2019年3月26日 at 下午11:57

    puma creeper velvet rihanna fenty royal purple kvinder

    converse all star white toddler kids casual herennike hyperdunk 2016 low unlimited uomoadidas country sleek w schoenenadidas zx flux space sinner schoenen

  • nike air max griffey fury jackie robinson schuhe damen 2019年3月28日 at 上午2:17

    nike air max griffey fury jackie robinson schuhe damen

    asics gel evate 3 herrenike air jordan 1 retro olmypic 2008 herreavis nike terra kiger 2nike free 6.0 en l铆nea

  • air vapormax 2 black hot punch dusty cactus w homme 2019年4月2日 at 下午1:19

    air vapormax 2 black hot punch dusty cactus w homme

    new balance fresh foam 3000v4 turf black with orange schuhe herrennike air zoom spiridon 16 gpx roundel dameair jordan 13 og damnike cortez ultra breathe woven mesh zwart wit schoenen kopen sale

  • adidas by alexander wang aw bball soccer orange white black 2019年4月8日 at 下午1:10

    adidas by alexander wang aw bball soccer orange white black

    adidas ace 17 purecontrol fg noir rouge blancnike lebron 12 all star game m忙ndadidas superstar negras 39carhartt nike air force 1 utility store list heren

  • adidas busenitz pure boost grey shoes 2019年4月15日 at 下午2:57

    adidas busenitz pure boost grey shoes

    nike kd vii 7 easy money preschool schuhe herrennike air max 90 rood zwartzapatillas nike metcon dsx flyknit 2 xnike m2k tekno denim bv0970 001 schuhe damen

  • nike womens in season tr 8 womens training schuhe herren 2019年4月16日 at 下午1:51

    nike womens in season tr 8 womens training schuhe herren

    nike hypervenom phantom ii df fgfeminino t茫陋nis nike air max 2016 sapatos peachjadepreto 80672015 nike air max 1 femme pas chernike darwin m nner alle wei

  • Leave a Reply