我已经实现了一个有效的MPI send/recv,但是我想更好地组织和提高性能,所以我采用了std::vector和std::map,并以块的形式发送数据,而不是单独发送。
// working method (Individual send/recv)
// ... here the struct fvm::F is turned into an MPI datatype STRUCT_FACE to be used ...
// main rank 0
map<int, vector<fvm::F>> faces_buf;
// ... here faces_buf is initialized and the inside vectors are properly resized using vector.resize() ...
// the collection loop in main rank 0 (collects all faces from compute ranks 1, 2, 3, ...)
for(auto& fb : faces_buf){
int compute_rank = fb.first;
int n = 0;
for(auto& f : fb.second){
MPI_Recv(&f, 1, STRUCT_FACE, compute_rank, (n++ + 100000), MPI_COMM_WORLD, MPI_STATUS_IGNORE);
// print results of collection
for(auto& fb : faces_buf){
for(auto& f : fb.second){
cerr << "f.T " << f.T << endl;
if(f.T == 0.){
goto done;
// compute ranks 1, 2, 3, ...
vector<fvm::F> faces;
// ... here faces vector is filled with values ...
for(auto& f : faces){
MPI_Send(&f, 1, STRUCT_FACE, 0, (f.id + 100000), MPI_COMM_WORLD);
// Works as intended!
// failing method (Chunk send/recv)
// ... here the struct fvm::F is turned into an MPI datatype STRUCT_FACE to be used ...
// main rank 0
map<int, vector<fvm::F>> faces_buf;
// ... here faces_buf is initialized and the inside vectors are properly resized using vector.resize() ...
// the collection loop in main rank 0 (collects all faces from compute ranks 1, 2, 3, ...)
for(auto& fb : faces_buf){
int compute_rank = fb.first;
MPI_Recv(fb.second.data(), fb.second.size(), STRUCT_FACE, compute_rank, (compute_rank + 100000), MPI_COMM_WORLD, MPI_STATUS_IGNORE);
// print results of collection
for(auto& fb : faces_buf){
for(auto& f : fb.second){
cerr << "f.T " << f.T << endl;
if(f.T == 0.){
goto done;
// compute ranks 1, 2, 3, ...
vector<fvm::F> faces;
// ... here faces vector is filled with values ...
MPI_Isend(faces.data(), faces.size(), STRUCT_FACE, 0, (rank + 100000), MPI_COMM_WORLD, &reqf);
int flagf = 0;
MPI_Test(&reqf, &flagf, MPI_STATUS_IGNORE);
// Fails to recv new fvm::F values
f.T 298.15
f.T 298.15
f.T 298.15
f.T 298.15
f.T 298.15
f.T 298.15
f.T 298.15
f.T 298.15
f.T 8.48798e-315
f.T 0