I want to scatter matrix from root to other processors using scatterv. I am creating a communicator topology using mpi_cart_create. As an example I have the below code in fortran:
PROGRAM SendRecv USE mpi IMPLICIT none integer, PARAMETER :: m = 4, n = 4 integer, DIMENSION(m,n) :: a, b,h integer :: i,j,count integer,allocatable, dimension(:,:):: loc ! local piece of global 2d array INTEGER :: istatus(MPI_STATUS_SIZE),ierr integer, dimension(2) :: sizes, subsizes, starts INTEGER :: ista,iend,jsta,jend,ilen,jlen INTEGER :: iprocs, jprocs, nprocs integer,allocatable,dimension(:):: rcounts, displs INTEGER :: rcounts0,displs0 integer, PARAMETER :: ROOT = 0 integer :: dims(2),coords(2) logical :: periods(2) data periods/2*.false./ integer :: status(MPI_STATUS_SIZE) integer :: comm2d,source,myrank integer :: newtype, resizedtype integer :: comsize,charsize integer(kind=MPI_ADDRESS_KIND) :: extent, begin CALL MPI_INIT(ierr) CALL MPI_COMM_SIZE(MPI_COMM_WORLD, nprocs, ierr) CALL MPI_COMM_RANK(MPI_COMM_WORLD, myrank, ierr) ! Get a new communicator for a decomposition of the domain. dims(1) = 0 dims(2) = 0 CALL MPI_DIMS_CREATE(nprocs,2,dims,ierr) if (myrank.EQ.Root) then print *,nprocs,'processors have been arranged into',dims(1),'X',dims(2),'grid' endif CALL MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periods,.true., & comm2d,ierr) ! Get my position in this communicator CALL MPI_COMM_RANK(comm2d,myrank,ierr) ! Get the decomposition CALL fnd2ddecomp(comm2d,m,n,ista,iend,jsta,jend) ! print *,ista,jsta,iend,jend ilen = iend - ista + 1 jlen = jend - jsta + 1 CALL MPI_Cart_get(comm2d,2,dims,periods,coords,ierr) iprocs = dims(1) jprocs = dims(2) ! define the global matrix if (myrank==ROOT) then count = 0 do j = 1,n do i = 1,m a(i,j) = count count = count+1 enddo enddo print *, 'global matrix is: ' do 90 i=1,m do 80 j = 1,n write(*,70)a(i,j) 70 format(2x,I5,$) 80 continue print *, '' 90 continue endif call MPI_Barrier(MPI_COMM_WORLD, ierr) starts = [0,0] sizes = [m, n] subsizes = [ilen, jlen] call MPI_Type_create_subarray(2, sizes, subsizes, starts, & MPI_ORDER_FORTRAN, MPI_INTEGER, & newtype, ierr) call MPI_Type_size(MPI_INTEGER, charsize, ierr) begin = 0 extent = charsize call MPI_Type_create_resized(newtype, begin, extent, resizedtype, ierr) call MPI_Type_commit(resizedtype, ierr) ! get counts and displacmeents allocate(rcounts(nprocs),displs(nprocs)) rcounts0 = 1 displs0 = (ista-1) + (jsta-1)*m CALL MPI_Allgather(rcounts0,1,MPI_INT,rcounts,1,MPI_INT,MPI_COMM_WORLD,IERR) CALL MPI_Allgather(displs0,1,MPI_INT,displs,1,MPI_INT,MPI_COMM_WORLD,IERR) CALL MPI_Barrier(MPI_COMM_WORLD, ierr) ! scatter data allocate(loc(ilen,jlen)) call MPI_Scatterv(a,rcounts,displs,resizedtype, & loc,ilen*jlen,MPI_INTEGER, & ROOT,MPI_COMM_WORLD,ierr) ! print each processor matrix do source = 0,nprocs-1 if (myrank.eq.source) then print *,'myrank:',source do i=1,ilen do j = 1,jlen write(*,701)loc(i,j) 701 format(2x,I5,$) enddo print *, '' enddo endif call MPI_Barrier(MPI_COMM_WORLD, ierr) enddo call MPI_Type_free(newtype,ierr) call MPI_Type_free(resizedtype,ierr) deallocate(rcounts,displs) deallocate(loc) CALL MPI_FINALIZE(ierr) contains subroutine fnd2ddecomp(comm2d,m,n,ista,iend,jsta,jend) integer comm2d integer m,n,ista,jsta,iend,jend integer dims(2),coords(2),ierr logical periods(2) ! Get (i,j) position of a processor from Cartesian topology. CALL MPI_Cart_get(comm2d,2,dims,periods,coords,ierr) ! Decomposition in first (ie. X) direction CALL MPE_DECOMP1D(m,dims(1),coords(1),ista,iend) ! Decomposition in second (ie. Y) direction CALL MPE_DECOMP1D(n,dims(2),coords(2),jsta,jend) end subroutine fnd2ddecomp SUBROUTINE MPE_DECOMP1D(n,numprocs,myid,s,e) integer n,numprocs,myid,s,e,nlocal,deficit nlocal = n / numprocs s = myid * nlocal + 1 deficit = mod(n,numprocs) s = s + min(myid,deficit) ! Give one more slice to processors if (myid .lt. deficit) then nlocal = nlocal + 1 endif e = s + nlocal - 1 if (e .gt. n .or. myid .eq. numprocs-1) e = n end subroutine MPE_DECOMP1D END program SendRecv
I am generating a 4x4 matrix, and using scatterv I am sending the blocks of matrices to other processors. Code works fine for 4,2 and 16 processors. But throws a error for three processors. What modifications I have to do make it work for any number of given processors.
Global matrix in Root:
[ 0 4 8 12 1 5 9 13 2 6 10 14 3 7 11 15 ]
For 4 processors each processors gets.
Rank =0 : [0 4 1 5] Rank =1 : [8 12 9 13] Rank =2 : [2 6 3 7] Rank =3 : [10 14 11 15]
Code works for 2 and 16 processors; in fact it works when sub-arrays are of similar size. It fails for 3 processors. For 3 processors I am expecting:
Rank =0 : [0 4 8 12 1 5 9 13] Rank =1 : [2 6 10 14] Rank =2 : [3 7 11 15]
But I am getting the following error:
Fatal error in PMPI_Scatterv: Message truncated, error stack: PMPI_Scatterv(671)................: MPI_Scatterv(sbuf=0x6b58c0, scnts=0xf95d90, displs=0xfafbe0, dtype=USER<resized>, rbuf=0xfafc00, rcount=4, MPI_INTEGER, root=0, MPI_COMM_WORLD) failed MPIR_Scatterv_impl(211)...........: I_MPIR_Scatterv_intra(278)........: Failure during collective I_MPIR_Scatterv_intra(272)........: MPIR_Scatterv(147)................: MPIDI_CH3U_Receive_data_found(131): Message from rank 0 and tag 6 truncated; 32 bytes received but buffer size is 16 Fatal error in PMPI_Scatterv: Message truncated, error stack: PMPI_Scatterv(671)................: MPI_Scatterv(sbuf=0x6b58c0, scnts=0x240bda0, displs=0x240be60, dtype=USER<resized>, rbuf=0x240be80, rcount=4, MPI_INTEGER, root=0, MPI_COMM_WORLD) failed MPIR_Scatterv_impl(211)...........: I_MPIR_Scatterv_intra(278)........: Failure during collective I_MPIR_Scatterv_intra(272)........: MPIR_Scatterv(147)................: MPIDI_CH3U_Receive_data_found(131): Message from rank 0 and tag 6 truncated; 32 bytes received but buffer size is 16 forrtl: error (69): process interrupted (SIGINT) Image PC Routine Line Source a.out 0000000000479165 Unknown Unknown Unknown a.out 0000000000476D87 Unknown Unknown Unknown a.out 000000000044B7C4 Unknown Unknown Unknown a.out 000000000044B5D6 Unknown Unknown Unknown a.out 000000000042DB76 Unknown Unknown Unknown a.out 00000000004053DE Unknown Unknown Unknown libpthread.so.0 00007F2327456790 Unknown Unknown Unknown libc.so.6 00007F2326EFE2F7 Unknown Unknown Unknown libmpi.so.12 00007F2327B899E8 Unknown Unknown Unknown libmpi.so.12 00007F2327C94E39 Unknown Unknown Unknown libmpi.so.12 00007F2327C94B32 Unknown Unknown Unknown libmpi.so.12 00007F2327B6E44A Unknown Unknown Unknown libmpi.so.12 00007F2327B6DD5D Unknown Unknown Unknown libmpi.so.12 00007F2327B6DBDC Unknown Unknown Unknown libmpi.so.12 00007F2327B6DB0C Unknown Unknown Unknown libmpi.so.12 00007F2327B6F932 Unknown Unknown Unknown libmpifort.so.12 00007F2328294B1C Unknown Unknown Unknown a.out 000000000040488B Unknown Unknown Unknown a.out 000000000040385E Unknown Unknown Unknown libc.so.6 00007F2326E4DD5D Unknown Unknown Unknown a.out 0000000000403769 Unknown Unknown Unknown
Where I am missing? what modifications I have to make to make it work?