高性能计算

线性方程组求解-高斯消元

2009年4月22日 阅读(61)

4    5
1.000000    4.000000    -2.000000    3.000000    6.000000
2.000000    2.000000    0.000000    4.000000    2.000000
3.000000    0.000000    -1.000000    2.000000    1.000000
1.000000    2.000000    2.000000    -3.000000    8.000000

一.基本实现

#include <cstdio>
#include <cstdlib>
#include <cstring>
using namespace std;
void environment_init();
typedef double data_t;
void cblas_factor(data_t *vec,int size,data_t factor);
void cblas_sub(data_t *minuend,data_t *subtrahend,int size);
data_t * cblas_max(data_t *vec,int size);
//N行,M列,M = N+1
int N,M;
//X的系数矩阵
data_t * matrix_A;
//y向量
data_t * B;
//x的解向量
data_t * X;
//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*N+j;
}
void print_matrix(){
    printf("matrix:\n");
    for(int i = 0 ; i < N ; i++){
        for(int j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}
int main()
{
    FILE * f = fopen("data.txt","r");
    fscanf(f,"%d%d",&N,&M);
    matrix_A = (data_t *)malloc(sizeof(data_t)*N*N);
    B = (data_t *)malloc(sizeof(data_t)*N);
    X = (data_t *)malloc(sizeof(data_t)*N);
    int *shift_table = (int*)malloc(sizeof(int)*N);
   
    //get matrix from file
    for(int i = 0 ; i < N ; i++){
        for(int j = 0 ; j < N ; j++){
            fscanf(f,"%lf",A(i,j));   
            printf("%lf ",*A(i,j));
        }
        fscanf(f,"%lf",B+i);
        printf("%lf\n",*(B+i));
    }     
    //init shift table
    for(int i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
    //computer
    for(int i = 0 ; i < N-1 ; i++){
        //选主元
        int index = cblas_max(A(i,i),N-i)-A(i,0);
        //列交换
        if(index != i){

    //错误!!!
           shift_table[i] = index;    
           shift_table[index] = i;

           for(int row = 0 ; row < N ; row++){
               data_t temp = *A(row,i);   
               *(A(row,i)) = *A(row,index);  
               *A(row,index) = temp;
           }
        }
        //主元归一化
        // 消元 

    //错误!!! *A(row,i)!= 0会让消元提前结束
        for(int row = i+1 ; *A(row,i)!= 0 && row < N ; row++){
            data_t factor = *A(i,i)/ *A(row,i);   
            cblas_factor(A(row,i),N-i,factor);
            cblas_sub(A(row,i),A(i,i),N-i);
            *(B+row) *= factor;
            *(B+row) = *(B+row)-*(B+i);   
        }
        print_matrix();
       
    }
    //回代,i代表了Xi,col代表了列号,用来计算前面已经计算的xi值的sum
    for(int i = N-1 ; i >= 0 ; i–){
        data_t sum = 0;
        for(int col = i+1 ; col < N ; col++){
            sum += *A(i,col)*X[col];   
        }
        X[i] = (*(B+i) – sum) / *A(i,i);
    }
    //恢复,其中shift table保存了原来的列号
    for(int i = 0 ; i < N ; i++){
        printf("x%d:%lf ",shift_table[i],X[i]);
    }
   
               

 
}

//向量操作,对所有的数*factor
void cblas_factor(data_t *vec,int size,data_t factor){
    for(int i = 0 ; i < size ; i++){
        *(vec+i) = *(vec+i)*factor;
    }
}
//两个向量的减
void cblas_sub(data_t *minuend,data_t *subtrahend,int size){
    for(int i = 0 ; i < size ; i++){
        *(minuend+i) = *(minuend+i) – *(subtrahend+i);
    }

}
//寻找向量中的最大值
data_t * cblas_max(data_t *vec,int size){
    data_t * index = vec;
    data_t max = *vec;
    for(int i = 0 ; i < size ; i++){
        if(*(vec+i) > max){
            max =  *(vec+i);
            index = vec+i;  
        }
    }
    return index;

}

二.使用blas上的cell实现

 blas参考文档:http://www.netlib.org/blas/blast-forum/blas-report.pdf

基本blas实现:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "malloc_align.h"

void environment_init();
typedef double data_t;
int N,M;//N行,M列,M = N+1
data_t * matrix_A;//X的系数矩阵
data_t * B;//y向量
data_t * X;//x的解向量
int *shift_table;

//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*N+j;
}
void print_matrix();

int main()
{
    FILE * f = fopen("data.txt","r");
    int i = 0,j = 0,index = 0,row = 0,temp = 0;
   
    fscanf(f,"%d%d",&N,&M);
    matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*N,7);
    B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
    X = (data_t *)_malloc_align(sizeof(data_t)*N,7);
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);
  
    //get matrix from file
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
            fscanf(f,"%lf",A(i,j));  
            printf("%lf ",*A(i,j));
        }
        fscanf(f,"%lf",B+i);
        printf("%lf\n",*(B+i));
    }    
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
    //computer
    for(i = 0 ; i < N-1 ; i++){
        //选主元
    index = cblas_idamax(N-i,A(i,i),1)+i;//cblas_idamax 选择最大绝对值的blas库
    printf("index:%d\n",index);
        //列交换
        if(index != i){
           temp = shift_table[i]; 
       shift_table[i] = shift_table[index];     
           shift_table[index] = temp;
           for(row = 0 ; row < N ; row++){
               data_t temp = *A(row,i);  
               *(A(row,i)) = *A(row,index); 
               *A(row,index) = temp;
           }
        }
    for(row = 0 ; row < N ; row++)
        printf("%d ",shift_table[row]);
    printf("\n");
        //主元归一化
        // 消元
        for(row = i+1 ; row < N ; row++){
        if(*A(row,i) == 0) continue;   
        data_t factor = -1*(*A(row,i)/ *A(i,i));
        cblas_daxpy(N-i,factor,A(i,i),1,A(row,i),1);//cblas_daxpy( N, alpha,X,incX, Y, incY); Y=Y+alpha*X
        *(B+row) = *(B+row) + *(B+i)*factor;   
        }
        print_matrix();
      
    }
    //回代,i代表了Xi,col代表了列号,用来计算前面已经计算的xi值的sum
    for(i = N-1 ; i >= 0 ; i–){
        data_t sum = 0;
        for( j = i+1 ; j < N ; j++){
            sum += *A(i,j)*X[j];  
        }
        X[i] = (*(B+i) – sum) / *A(i,i);
    }
    //恢复,其中shift table保存了原来的列号
    for(i = 0 ; i < N ; i++){
        printf("x%d:%lf ",shift_table[i],X[i]);
    } 
              

 
}
void print_matrix(){
    int i,j;   
    printf("matrix:\n");
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}

三.使用mpi的基本实现

 

四.使用mpi的cell实现

 

五.使用mpi的异构实现

 

六.使用mpi的异构适用性实现

 #include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "mpi.h"
#include "malloc_align.h"

void environment_init();
typedef double data_t;
int N,M;//N行,M列,M = N+1
data_t * matrix_A;//X的系数矩阵
data_t * B;//y向量
data_t * X;//x的解向量
int *shift_table;
MPI_Status status;

//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*M+j;
}
void print_matrix();

int main(int argc,char **argv)
{
    FILE * f;
    int i = 0,j = 0,index = 0,row = 0;
    int group_size;
    int *row_rank_table;
    int process_rownum;
    int myrank;    
    data_t * mainrow_space;
    data_t ** row_address_table;    
        

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&group_size);
    MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
    //printf("mpi:%d %d\n",group_size,myrank);
                
    if(myrank == 0){
        f= fopen("/home/duanple/CBLAS/data.txt","r");            
        fscanf(f,"%d%d",&N,&M);
        matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*M,7);
        B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
        X = (data_t *)_malloc_align(sizeof(data_t)*N,7);    
        //get matrix from file
        for(i = 0 ; i < N ; i++){
             for(j = 0 ; j < N ; j++){
                fscanf(f,"%lf",A(i,j));   
        }
        fscanf(f,"%lf",A(i,j));
    *(B+i) = *A(i,j);
        }     
    //print_matrix();
    }    
    //broadcast the process row to be in,and rows num in every process
    MPI_Bcast(&N,1,MPI_INT,0,MPI_COMM_WORLD);    
    M = N+1;
    //printf("rank:%d m:%d\n",myrank,M);    
    //malloc the memory for the row of matrix
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);    
    row_rank_table = (int *)_malloc_align(sizeof(int)*N,7);      
    row_address_table = (data_t**)_malloc_align(sizeof(data_t*)*N,7);       
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
        
    //divide matrix
    if(myrank == 0){    
    for(i = 0,j = 0 ; i < N ; i++,j++){
        row_rank_table[i] = j%group_size;                
    }    
    }
    MPI_Bcast(row_rank_table,N,MPI_INT,0,MPI_COMM_WORLD);

    process_rownum = 0;
    for(i = 0 ; i < N ; i++){
    //printf("rank:%d row%d\n",myrank,row_rank_table[i]);
    if(row_rank_table[i] == myrank){
        row_address_table[i] =  (data_t *)_malloc_align(sizeof(data_t)*M,7);    
        process_rownum++;
    }
    }    
    //
    //printf("rank:%d rows:%d\n",myrank,process_rownum);    
    //root send the data to the processes.
    if(myrank == 0){//send
        for(i = 0 ; i < N ; i++){
       if(row_rank_table[i] == 0){
        row_address_table[i] = A(i,0);    
       }else{    
            MPI_Send(A(i,0),N,MPI_DOUBLE,row_rank_table[i],i,MPI_COMM_WORLD);    
            MPI_Send(B+i,1,MPI_DOUBLE,row_rank_table[i],i,MPI_COMM_WORLD);    
       }
        }
    }else{//receive
        for(i = 0 ; i < N ; i++){
       if(myrank !=0 && myrank == row_rank_table[i]){
           MPI_Recv(row_address_table[i],N,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);    
           MPI_Recv(row_address_table[i]+N,1,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);    
       }
        }
    }
 
   //printf the revecive result.
   /*
   for(i = 0 ; i < N ; i++){    
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);    
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }    */
    //computer
    //消元
    mainrow_space = (data_t *)_malloc_align(sizeof(data_t)*M,7);//存放主行的临时空间
    for(i = 0 ; i < N-1 ; i++){
    if(myrank == row_rank_table[i]){

//上面的条件,可能造成同一个进程上的其他行不被处理。

            //选主元
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");    
        index = cblas_idamax(N-i,row_address_table[i]+i,1)+i;
            //列交换
                if(index != i){
                        int temp = shift_table[i];  
               shift_table[i] = shift_table[index];      
                   shift_table[index] = temp;
            
            data_t data_temp = *(row_address_table[i]+i);
            *(row_address_table[i]+i) = *(row_address_table[i]+index);
            *(row_address_table[i]+index) = data_temp;
            }
        //拷贝主行元素到临时空间
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");    
        memcpy(mainrow_space,row_address_table[i],sizeof(data_t)*M);
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");    
    }    
    //其他行的列交换
        MPI_Bcast(&index,1,MPI_INT,row_rank_table[i],MPI_COMM_WORLD);
    if(myrank != row_rank_table[i]){

//上面的条件,可能造成同一个进程上的其他行不被处理。
                if(index != i){
                        int temp = shift_table[i];  
               shift_table[i] = shift_table[index];      
                   shift_table[index] = temp;
            //swap the other rows
            for(j = 0 ; j < N ; j++){
                if(myrank == row_rank_table[j]){
                data_t data_temp = *(row_address_table[j]+i);
                *(row_address_table[j]+i) = *(row_address_table[j]+index);
                *(row_address_table[j]+index) = data_temp;
                }
            }//end of for
            }
    
    }    
    
    //发送主行元素
        MPI_Bcast(mainrow_space,M,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
        //主元归一化
    if(myrank == row_rank_table[i]){

//注意,上面这个条件是错的,因为当前rank上还有其他行,这样会导致其他行不被处理。
        //if(*(row_address_table[i]+i) == 0){continue;}
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",mainrow_space[j]);}printf("\n");    

    }else{
        for(j = 0 ; j < N ; j++){
        //处理该进程所分配的行。
        if(j != i && myrank == row_rank_table[j]){
           if(*(row_address_table[j]+i) == 0){continue;}
                // 消元
               data_t factor = -1*(*(row_address_table[j]+i)/mainrow_space[i]);
           cblas_daxpy(N,factor,mainrow_space,1,row_address_table[j],1);
           *(row_address_table[j]+N) =  *(row_address_table[j]+N) +mainrow_space[N]*factor;//elem of B
        }

        }

    }
    }

   //
 
   //printf the revecive result.
   /*
   for(i = 0 ; i < N ; i++){    
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);    
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }    */
    
    //回代
    data_t sum = 0;

//注意一个进程不只一个行,因此只安排一个sum也是不够的。
    for(i = N-1 ; i>= 0 ; i–){
    if(myrank == row_rank_table[i]){
        mainrow_space[i] = (*(row_address_table[i]+N)-sum)/(*(row_address_table[i]+i));//(*(B+i) – sum) / *A(i,i);    
    }
        MPI_Bcast(mainrow_space+i,1,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
    for(j = 0 ; j < i ; j++){
    //处理该进程所分配的行。
        if(myrank == row_rank_table[j]){
          sum += *(row_address_table[j]+i)*mainrow_space[i];
        }

    }
                
    
    }
    //printf res
    for(i = 0 ; myrank == 0 && i < N ; i++){
        printf("x%d:%lf ",shift_table[i],mainrow_space[i]);
    }  

    /*        
    //computer
    for(i = 0 ; i < N-1 ; i++){
        //选主元
    index = cblas_idamax(N-i,A(i,i),1)+i;//cblas_idamax 选择最大绝对值的blas库
    //printf("index:%d\n",index);
        //列交换
        if(index != i){
           temp = shift_table[i];  
       shift_table[i] = shift_table[index];      
           shift_table[index] = temp;
           for(row = 0 ; row < N ; row++){
               data_t temp = *A(row,i);   
               *(A(row,i)) = *A(row,index);  
               *A(row,index) = temp;
           }
        }
    for(row = 0 ; row < N ; row++)
        printf("%d ",shift_table[row]);
    printf("\n");
        //主元归一化
        // 消元
        for(row = i+1 ; row < N ; row++){
        if(*A(row,i) == 0) continue;    
        data_t factor = -1*(*A(row,i)/ *A(i,i));
        cblas_daxpy(N-i,factor,A(i,i),1,A(row,i),1);//cblas_daxpy( N, alpha,X,incX, Y, incY); Y=Y+alpha*X
        *(B+row) = *(B+row) + *(B+i)*factor;    
        }
        //print_matrix();
       
    }
    //回代,i代表了Xi,col代表了列号,用来计算前面已经计算的xi值的sum
    for(i = N-1 ; i >= 0 ; i–){
        data_t sum = 0;
        for( j = i+1 ; j < N ; j++){
            sum += *A(i,j)*X[j];   
        }
        X[i] = (*(B+i) – sum) / *A(i,i);
    }
    //恢复,其中shift table保存了原来的列号
    for(i = 0 ; i < N ; i++){
        printf("x%d:%lf ",shift_table[i],X[i]);
    }  
               */
    MPI_Finalize();    

 
}
void print_matrix(){
    int i,j;    
    printf("matrix:\n");
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}

修改后的实现:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "mpi.h"
#include "malloc_align.h"

void environment_init();
typedef double data_t;
int N,M;//N行,M列,M = N+1
data_t * matrix_A;//X的系数矩阵
data_t * B;//y向量
data_t * X;//x的解向量
int *shift_table;
MPI_Status status;

//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*M+j;
}
void print_matrix();

int main(int argc,char **argv)
{
    FILE * f;
    int i = 0,j = 0,index = 0,row = 0;
    int group_size;
    int *row_rank_table;
    int process_rownum;
    int myrank;   
    data_t * mainrow_space;
    data_t ** row_address_table;   
       

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&group_size);
    MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
    //printf("mpi:%d %d\n",group_size,myrank);
               
    if(myrank == 0){
        f= fopen("/home/duanple/CBLAS/data.txt","r");           
        fscanf(f,"%d%d",&N,&M);
        matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*M,7);
        B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
        X = (data_t *)_malloc_align(sizeof(data_t)*N,7);   
        //get matrix from file
        for(i = 0 ; i < N ; i++){
             for(j = 0 ; j < N ; j++){
                fscanf(f,"%lf",A(i,j));  
        }
        fscanf(f,"%lf",A(i,j));//A(i,N) also stores B[i]
    *(B+i) = *A(i,j);
        }    
    //print_matrix();
    }   
    //broadcast the process row to be in,and rows num in every process
    MPI_Bcast(&N,1,MPI_INT,0,MPI_COMM_WORLD);     
    M = N+1;
    //printf("rank:%d m:%d\n",myrank,M);   
    //malloc the memory for the row of matrix
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);   
    row_rank_table = (int *)_malloc_align(sizeof(int)*N,7);     
    row_address_table = (data_t**)_malloc_align(sizeof(data_t*)*N,7);      
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
       
    //divide matrix
    if(myrank == 0){   
    for(i = 0,j = 0 ; i < N ; i++,j++){
        row_rank_table[i] = j%group_size;               
    }   
    }
    MPI_Bcast(row_rank_table,N,MPI_INT,0,MPI_COMM_WORLD);

    process_rownum = 0;
    for(i = 0 ; i < N ; i++){
    //printf("rank:%d row%d\n",myrank,row_rank_table[i]);
    if(row_rank_table[i] == myrank){
        row_address_table[i] =  (data_t *)_malloc_align(sizeof(data_t)*M,7);   
        process_rownum++;
    }
    }   
    //
    //printf("rank:%d rows:%d\n",myrank,process_rownum);   
    //root send the data to the processes.
    if(myrank == 0){//send
        for(i = 0 ; i < N ; i++){
       if(row_rank_table[i] == 0){
        row_address_table[i] = A(i,0);   
       }else{   
            MPI_Send(A(i,0),N,MPI_DOUBLE,row_rank_table[i],i,MPI_COMM_WORLD);   
            MPI_Send(B+i,1,MPI_DOUBLE,row_rank_table[i],i,MPI_COMM_WORLD);   
       }
        }
    }else{//receive
        for(i = 0 ; i < N ; i++){
       if(myrank !=0 && myrank == row_rank_table[i]){
           MPI_Recv(row_address_table[i],N,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);   
           MPI_Recv(row_address_table[i]+N,1,MPI_DOUBLE,0,i,MPI_COMM_WORLD,&status);   
       }
        }
    }
 
   //printf the revecive result.
   /*
   for(i = 0 ; i < N ; i++){   
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);   
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }    */
    //computer
    //消元
    mainrow_space = (data_t *)_malloc_align(sizeof(data_t)*M,7);//存放主行的临时空间
    for(i = 0 ; i < N-1 ; i++){
    if(myrank == row_rank_table[i]){
            //选主元
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");   
        index = cblas_idamax(N-i,row_address_table[i]+i,1)+i;
            //列交换
                if(index != i){
                        int temp = shift_table[i]; 
               shift_table[i] = shift_table[index];     
                   shift_table[index] = temp;
           
            data_t data_temp = *(row_address_table[i]+i);
            *(row_address_table[i]+i) = *(row_address_table[i]+index);
            *(row_address_table[i]+index) = data_temp;
            }
        //拷贝主行元素到临时空间
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");   
        memcpy(mainrow_space,row_address_table[i],sizeof(data_t)*M);
        //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",*(row_address_table[i]+j));}printf("\n");   
    }   
    //其他行的列交换
        MPI_Bcast(&index,1,MPI_INT,row_rank_table[i],MPI_COMM_WORLD);
    //修改列交换标志,以进程为单位,不是以行
    if(myrank != row_rank_table[i]){
                  int temp = shift_table[i]; 
           shift_table[i] = shift_table[index];     
               shift_table[index] = temp;
    }
    for(j = 0 ; j < N ; j++){
        if(j != i && myrank == row_rank_table[j]){
            data_t data_temp = *(row_address_table[j]+i);
            *(row_address_table[j]+i) = *(row_address_table[j]+index);
            *(row_address_table[j]+index) = data_temp;
        }   
    }
   
    //发送主行元素
        MPI_Bcast(mainrow_space,M,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
    //printf("myrank:%d\n",myrank);for(j = 0 ; j < M ; j++){printf("%lf ",mainrow_space[j]);}printf("\n");
        //主元归一化
    for(j = i+1 ; j < N ; j++){
    //处理该进程所分配的行。
        if(myrank == row_rank_table[j]){
            if(*(row_address_table[j]+i) == 0){continue;}
               // 消元
            data_t factor = -1*(*(row_address_table[j]+i)/mainrow_space[i]);
            cblas_daxpy(N,factor,mainrow_space,1,row_address_table[j],1);
            *(row_address_table[j]+N) =  *(row_address_table[j]+N) +mainrow_space[N]*factor;//elem of B
        }

    }

   
    }

   //
 
   //printf the  result.
   /*
   for(i = 0 ; i < N ; i++){   
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);   
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }   
    */
    //回代
    for(i = N-1 ; i>= 0 ; i–){
    if(myrank == row_rank_table[i]){
        mainrow_space[i] = *(row_address_table[i]+N)/(*(row_address_table[i]+i));//(*(B+i) – sum) / *A(i,i);   
    }
        MPI_Bcast(mainrow_space+i,1,MPI_DOUBLE,row_rank_table[i],MPI_COMM_WORLD);
    for(j = 0 ; j < i ; j++){
    //处理该进程所分配的行。
        if(myrank == row_rank_table[j]){
          *(row_address_table[j]+N) -= *(row_address_table[j]+i)*mainrow_space[i];
        }

    }
               
   
    }
    //printf res
   
    for(i = 0 ; myrank == 0 && i < N ; i++){
        printf("x%d:%lf ",shift_table[i],mainrow_space[i]);
    } 
    MPI_Finalize();   
 
}
void print_matrix(){
    int i,j;   
    printf("matrix:\n");
    for(i = 0 ; i < N ; i++){
        for(j = 0 ; j < N ; j++){
                printf("%lf ",*A(i,j));
        }
        printf("%lf\n",*(B+i));
    }

}

#############################################################################

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include "mpi.h"
#include "malloc_align.h"

void environment_init();
typedef double data_t;
int N,M;//N行,M列,M = N+1
data_t * matrix_A;//X的系数矩阵
data_t * B;//y向量
data_t * X;//x的解向量
int *shift_table;
MPI_Status status;

//通过A(i,j)得到,x系数矩阵中[i,j]元素的地址
data_t * A(int i,int j){
    return matrix_A+i*M+j;
}
void print_matrix();
double * processor_test(MPI_Comm comm,void test(),long loops,double comp_size,int master_node);
void data_malloc(double * processor_descrip,int * rank_table,int matrix_size);
void linear_solver(MPI_Comm comm);

void test(){
    double a = 1.0;
    double b = 2.44;
    double c = a*b;

}
int main(int argc,char **argv)
{
       

    MPI_Init(&argc,&argv);
    processor_test(MPI_COMM_WORLD,test,1000,1.0,0);
    //linear_solver(MPI_COMM_WORLD);
    MPI_Finalize();   
 
}

//test the computer ability of processors
double * processor_test(MPI_Comm comm,void test(),long loops,double comp_size,int master_node){
    int myrank;
    int group_size,i;
    double start_time,end_time,time;
    double * processor_able;
    MPI_Comm_size(comm,&group_size);
    MPI_Comm_rank(comm,&myrank);
    if(myrank == master_node){
        processor_able = (double *)_malloc_align(sizeof(double)*group_size+2,7);   
        processor_able[0] = group_size;
        processor_able[1] = comp_size*loops;
    } else
        processor_able = NULL;  

    start_time = MPI_Wtime();
    for(i = 0; i < loops ; i++)
        test();   
    end_time = MPI_Wtime();
    time = end_time-start_time;
    printf("rank%d %lf\n",myrank,time);   
   
    MPI_Gather(&time,1,MPI_DOUBLE,processor_able+2,1,MPI_DOUBLE,master_node,comm);
   
    //print the test result.
    if(myrank == master_node){
        for(i = 0 ; i < group_size ; i++){
            printf("%d %lf ",i,processor_able[i+2]);
        }

    }

    return processor_able;   

}
void data_malloc(double * processor_descrip,int * rank_table,int matrix_size){
    int p_num =(int)processor_descrip[0];
   

}
void linear_solver(MPI_Comm comm){

    FILE * f;
    int i = 0,j = 0,index = 0,row = 0;
    int group_size;
    int *row_rank_table;
    int process_rownum;
    int myrank;   
    data_t * mainrow_space;
    data_t ** row_address_table;   

    MPI_Comm_size(comm,&group_size);
    MPI_Comm_rank(comm,&myrank);
    //printf("mpi:%d %d\n",group_size,myrank);
               
    if(myrank == 0){
        f= fopen("/home/duanple/CBLAS/data.txt","r");           
        fscanf(f,"%d%d",&N,&M);
        matrix_A = (data_t *)_malloc_align(sizeof(data_t)*N*M,7);
        B = (data_t *)_malloc_align(sizeof(data_t)*N,7);
        X = (data_t *)_malloc_align(sizeof(data_t)*N,7);   
        //get matrix from file
        for(i = 0 ; i < N ; i++){
             for(j = 0 ; j < N ; j++){
                fscanf(f,"%lf",A(i,j));  
        }
        fscanf(f,"%lf",A(i,j));//A(i,N) also stores B[i]
    *(B+i) = *A(i,j);
        }    
    //print_matrix();
    }   
    //broadcast the process row to be in,and rows num in every process
    MPI_Bcast(&N,1,MPI_INT,0,comm);     
    M = N+1;
    //printf("rank:%d m:%d\n",myrank,M);   
    //malloc the memory for the row of matrix
    shift_table = (int*)_malloc_align(sizeof(int)*N,7);   
    row_rank_table = (int *)_malloc_align(sizeof(int)*N,7);     
    row_address_table = (data_t**)_malloc_align(sizeof(data_t*)*N,7);      
    //init shift table
    for(i = 0 ; i < N ; i++){
        shift_table[i] = i;
    }
       
    //divide matrix
    if(myrank == 0){   
    for(i = 0,j = 0 ; i < N ; i++,j++){
        row_rank_table[i] = j%group_size;               
    }   
    }
    MPI_Bcast(row_rank_table,N,MPI_INT,0,comm);

    process_rownum = 0;
    for(i = 0 ; i < N ; i++){
    //printf("rank:%d row%d\n",myrank,row_rank_table[i]);
    if(row_rank_table[i] == myrank){
        row_address_table[i] =  (data_t *)_malloc_align(sizeof(data_t)*M,7);   
        process_rownum++;
    }
    }   
    //
    //printf("rank:%d rows:%d\n",myrank,process_rownum);   
    //root send the data to the processes.
    if(myrank == 0){//send
        for(i = 0 ; i < N ; i++){
       if(row_rank_table[i] == 0){
        row_address_table[i] = A(i,0);   
       }else{   
            MPI_Send(A(i,0),N,MPI_DOUBLE,row_rank_table[i],i,comm);   
            MPI_Send(B+i,1,MPI_DOUBLE,row_rank_table[i],i,comm);   
       }
        }
    }else{//receive
        for(i = 0 ; i < N ; i++){
       if(myrank !=0 && myrank == row_rank_table[i]){
           MPI_Recv(row_address_table[i],N,MPI_DOUBLE,0,i,comm,&status);   
           MPI_Recv(row_address_table[i]+N,1,MPI_DOUBLE,0,i,comm,&status);   
       }
        }
    }
 
   //printf the revecive result.
   /*
   for(i = 0 ; i < N ; i++){   
    if(myrank != row_rank_table[i]) continue;
       printf("rank%d-row%d:\n",myrank,row_rank_table[i]);   
    for(j = 0 ; j < M ; j++){
       printf("%lf ",*(row_address_table[i]+j));
    }
    printf("\n");
    }    */
    //computer
   

You Might Also Like

4 Comments

  • nike air max zero mujer 2019年5月14日 at 下午2:07

    nike air max zero mujer

    adidas stan smith pharrell tactile rose m臋skieadidas hardwood m忙ndnike air max 90 essential zapatillas para hombre negro 40 euair jordan v pinnacle heren

  • nike air jordan hydro 6 cool grey white wolf grey m臋skie 2019年5月20日 at 下午5:20

    nike air jordan hydro 6 cool grey white wolf grey m臋skie

    nike jr mercurial superfly vi academy neymar jr mg herre skonike herren 819213 018 fu脽ballschuhe schwarz uknike air force 1 mid 07 players m臋skienike vapor street flyknit red aq1763 600 02 uomo

  • zapatillas de futbol con tacos nike outlet baratas espa帽a 2018 2019年6月20日 at 下午5:25

    zapatillas de futbol con tacos nike outlet baratas espa帽a 2018

    nike roshe ld 1000 chaussures de course hommeofferte scarpe nike air max vertnike herren jordan executive turnschuhe grau 41 eunike herren air max 95 prm gymnastikschuhe schwarz

  • nike air max 90 mens black and grey 2019年6月24日 at 下午3:29

    nike air max 90 mens black and grey

    nike womens air max 90 premium bbnike air max tavas jordan adidas reebok pumacaliente nike air max 90 mujer blancas cnb05nike air max 97 ultra 17 femme chaussures 917704 008

  • Leave a Reply