TABLE OF CONTENTS
ABINIT/indirect_parallel_Fourier [ Functions ]
NAME
indirect_parallel_Fourier
FUNCTION
The purpose of this routine is to transfer data from right to left right(:,index(i))=left(:,i) The difficulty is that right and left are distributed among processors We will suppose that the distribution is done as a density in Fourier space We first order the right hand side data according to the processor in which they are going to be located in the left hand side. This is done is a way such that a mpi_alltoall put the data on the correct processor. We also transfer their future adress. A final ordering put everything in place
COPYRIGHT
Copyright (C) 1998-2018 ABINIT group (GZ) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . For the initials of contributors, see ~abinit/doc/developers/contributors.txt.
INPUTS
index(sizeindex)= global adress for the transfer from right to left left(2,nleft)=left hand side mpi_enreg=information about MPI parallelization ngleft(18)=contain all needed information about 3D FFT for the left hand side see ~abinit/doc/variables/vargs.htm#ngfft ngright(18)=contain all needed information about 3D FFT for the right hand side see ~abinit/doc/variables/vargs.htm#ngfft nleft=second dimension of left array (for this processor) nright=second dimension of right array (for this processor) sizeindex=size of the index array (different form nright, because it is global to all proccessors)
OUTPUT
left(2,nleft)=the elements of the right hand side, at the correct palce in the correct processor
NOTES
A lot of things to improve.
PARENTS
prcref,prcref_PMA,transgrid
CHILDREN
mpi_alltoall,ptabs_fourdp
SOURCE
50 #if defined HAVE_CONFIG_H 51 #include "config.h" 52 #endif 53 54 #include "abi_common.h" 55 56 subroutine indirect_parallel_Fourier(index,left,mpi_enreg,ngleft,ngright,nleft,nright,paral_kgb,right,sizeindex) 57 58 use m_profiling_abi 59 use m_errors 60 use defs_basis 61 use defs_abitypes 62 63 use m_mpinfo, only : ptabs_fourdp 64 65 #if defined HAVE_MPI2 66 use mpi 67 #endif 68 69 !This section has been created automatically by the script Abilint (TD). 70 !Do not modify the following lines by hand. 71 #undef ABI_FUNC 72 #define ABI_FUNC 'indirect_parallel_Fourier' 73 !End of the abilint section 74 75 implicit none 76 77 #if defined HAVE_MPI1 78 include 'mpif.h' 79 #endif 80 81 !Arguments --------------------------------------------- 82 !scalars 83 integer,intent(in) :: ngleft(18),ngright(18),nleft,nright,paral_kgb,sizeindex 84 type(MPI_type),intent(in) :: mpi_enreg 85 !arrays 86 integer,intent(in) :: index(sizeindex) 87 real(dp),intent(in) :: right(2,nright) 88 real(dp),intent(inout) :: left(2,nleft) 89 90 !Local variables --------------------------------------- 91 !scalars 92 integer :: ierr,i_global,ileft,iright,iright_global 93 integer :: j,j1,j2,j3,j_global,jleft_global 94 integer :: jleft_local,me_fft,n1l,n2l,n3l,n1r,n2r,n3r,nd2l,nd2r 95 integer :: nproc_fft,proc_dest,r2,siz_slice_max 96 !arrays 97 integer,allocatable :: index_recv(:),index_send(:),siz_slice(:), ffti2r_global(:) 98 integer, ABI_CONTIGUOUS pointer :: fftn2l_distrib(:),ffti2l_local(:) 99 integer, ABI_CONTIGUOUS pointer :: fftn3l_distrib(:),ffti3l_local(:) 100 integer, ABI_CONTIGUOUS pointer :: fftn2r_distrib(:),ffti2r_local(:) 101 integer, ABI_CONTIGUOUS pointer :: fftn3r_distrib(:),ffti3r_local(:) 102 real(dp),allocatable :: right_send(:,:),right_recv(:,:) 103 104 ! ************************************************************************* 105 n1r=ngright(1);n2r=ngright(2);n3r=ngright(3) 106 n1l=ngleft(1) ;n2l=ngleft(2) ;n3l=ngleft(3) 107 nproc_fft=mpi_enreg%nproc_fft; me_fft=mpi_enreg%me_fft 108 nd2r=n2r/nproc_fft; nd2l=n2l/nproc_fft 109 110 !Get the distrib associated with the left fft_grid 111 call ptabs_fourdp(mpi_enreg,n2l,n3l,fftn2l_distrib,ffti2l_local,fftn3l_distrib,ffti3l_local) 112 113 !Get the distrib associated with the right fft_grid 114 call ptabs_fourdp(mpi_enreg,n2r,n3r,fftn2r_distrib,ffti2r_local,fftn3r_distrib,ffti3r_local) 115 116 !Precompute local --> global corespondance 117 ABI_ALLOCATE(ffti2r_global,(nd2r)) 118 ffti2r_global(:) = -1 119 do j2=1,n2r 120 if( fftn2r_distrib(j2) == me_fft ) then 121 ffti2r_global( ffti2r_local(j2) ) = j2 122 end if 123 end do 124 125 126 ABI_ALLOCATE(siz_slice,(nproc_fft)) 127 siz_slice(:)=0 128 do i_global=1,sizeindex !look for the maximal size of slice of data 129 j_global=index(i_global)!; write(std_out,*) j_global,i_global 130 if(j_global /=0) then 131 !use the fact that (j-1)=i1 + n1l*(j2l-1 + n2l*(j3l-1)) 132 proc_dest= fftn2l_distrib( modulo((j_global-1)/n1l,n2l) + 1) 133 siz_slice(proc_dest+1)=siz_slice(proc_dest+1)+1 134 !DEBUG 135 !write(std_out,*) 'in indirect proc',proc_dest,siz_slice(proc_dest+1) 136 !ENDDEBUG 137 end if 138 end do 139 siz_slice_max=maxval(siz_slice) !This value could be made smaller by looking locally 140 !and performing a allgather with a max 141 !DEBUG 142 !write(std_out,*) 'siz_slice,sizeindex,siz_slice',siz_slice(:),sizeindex,siz_slice_max 143 !write(std_out,*) 'sizeindex,nright,nleft',sizeindex,nright,nleft 144 !ENDDEBUG 145 ABI_ALLOCATE(right_send,(2,nproc_fft*siz_slice_max)) 146 ABI_ALLOCATE(index_send,(nproc_fft*siz_slice_max)) 147 siz_slice(:)=0; index_send(:)=0; right_send(:,:)=zero 148 do iright=1,nright 149 j=iright-1;j1=modulo(j,n1r);j2=modulo(j/n1r,nd2r);j3=j/(n1r*nd2r) 150 j2 = ffti2r_global(j2+1) - 1 151 iright_global=n1r*(n2r*j3+j2)+j1+1 152 jleft_global=index(iright_global) 153 if(jleft_global/=0)then 154 j=jleft_global-1;j1=modulo(j,n1l);j2=modulo(j/n1l,n2l);j3=j/(n1l*n2l); r2=ffti2l_local(j2+1)-1 155 jleft_local=n1l*(nd2l*j3+r2)+j1+1 156 proc_dest=fftn2l_distrib(j2+1) 157 siz_slice(proc_dest+1)=siz_slice(proc_dest+1)+1 158 right_send(:,proc_dest*siz_slice_max+siz_slice(proc_dest+1))=right(:,iright) 159 index_send(proc_dest*siz_slice_max+siz_slice(proc_dest+1))=jleft_local 160 !DEBUG 161 ! write(std_out,*) 'loop ir',jleft_local,jleft_global,iright_global,iright 162 !ENDDEBUG 163 end if 164 end do 165 ABI_ALLOCATE(right_recv,(2,nproc_fft*siz_slice_max)) 166 ABI_ALLOCATE(index_recv,(nproc_fft*siz_slice_max)) 167 #if defined HAVE_MPI 168 if(paral_kgb == 1) then 169 call mpi_alltoall (right_send,2*siz_slice_max, & 170 & MPI_double_precision, & 171 & right_recv,2*siz_slice_max, & 172 & MPI_double_precision,mpi_enreg%comm_fft,ierr) 173 call mpi_alltoall (index_send,siz_slice_max, & 174 & MPI_integer, & 175 & index_recv,siz_slice_max, & 176 & MPI_integer,mpi_enreg%comm_fft,ierr) 177 endif 178 #endif 179 do ileft=1,siz_slice_max*nproc_fft 180 !DEBUG 181 !write(std_out,*)index_recv(ileft) 182 !ENDEBUG 183 if(index_recv(ileft) /=0 ) left(:,index_recv(ileft))=right_recv(:,ileft) 184 end do 185 ABI_DEALLOCATE(right_recv) 186 ABI_DEALLOCATE(index_recv) 187 ABI_DEALLOCATE(right_send) 188 ABI_DEALLOCATE(index_send) 189 ABI_DEALLOCATE(siz_slice) 190 ABI_DEALLOCATE(ffti2r_global) 191 192 end subroutine indirect_parallel_Fourier