TABLE OF CONTENTS
ABINIT/prep_fourwf [ Functions ]
NAME
prep_fourwf
FUNCTION
this routine prepares the data to the call of fourwf.
COPYRIGHT
Copyright (C) 1998-2018 ABINIT group (FBottin,MT,GZ,FDahm) this file is distributed under the terms of the gnu general public license, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . for the initials of contributors, see ~abinit/doc/developers/contributors.txt .
INPUTS
blocksize= size of block for FFT cwavef(2,npw*ndat)=planewave coefficients of wavefunction (one spinorial component?). dtfil <type(datafiles_type)>=variables related to files gvnlc=matrix elements <G|Vnonlocal|C> kg_k(3,npw_k)=reduced planewave coordinates. lmnmax=if useylm=1, max number of (l,m,n) comp. over all type of psps =if useylm=0, max number of (l,n) comp. over all type of psps mgfft=maximum size of 1d ffts mpi_enreg=informations about mpi parallelization mpsang= 1+maximum angular momentum for nonlocal pseudopotentials mpssoang= 1+maximum (spin*angular momentum) for nonlocal pseudopotentials natom=number of atoms in cell. nband_k=number of bands at this k point for that spin polarization ndat=number of FFT to do in // ngfft(18)= contain all needed information about 3D FFT npw_k=number of plane waves at this k point nspinor=number of spinorial components of the wavefunctions ntypat=number of types of atoms in unit cell. n4,n5,n6 used for dimensionning of vlocal option_fourwf=option for fourwf (see fourwf.F90) prtvol=control print volume and debugging output ucvol=unit cell volume [bandfft_kpt_tab]= (optional) if present, contains tabs used to implement the "band-fft" parallelism if not present, the bandfft_kpt global variable is used [use_gpu_cuda]= (optional) 1 if Cuda (GPU) is on
OUTPUT
gwavef=(2,npw*ndat)=matrix elements <G|H|C>.
SIDE EFFECTS
PARENTS
mkrho,posdoppler,vtowfk
CHILDREN
fourwf,gpu_fourwf,prep_index_wavef_bandpp,prep_wavef_sym_do prep_wavef_sym_undo,timab,xmpi_alltoallv,xmpi_sum
SOURCE
58 #if defined HAVE_CONFIG_H 59 #include "config.h" 60 #endif 61 62 #include "abi_common.h" 63 64 subroutine prep_fourwf(rhoaug,blocksize,cwavef,wfraug,iblock,istwf_k,mgfft,& 65 & mpi_enreg,nband_k,ndat,ngfft,npw_k,n4,n5,n6,occ_k,option_fourwf,ucvol,wtk,& 66 & bandfft_kpt_tab,use_gpu_cuda) ! Optional arguments 67 68 use defs_basis 69 use defs_abitypes 70 use m_errors 71 use m_profiling_abi 72 use m_xmpi 73 74 use m_hamiltonian, only : gs_hamiltonian_type 75 use m_bandfft_kpt,only : bandfft_kpt_type,bandfft_kpt,bandfft_kpt_get_ikpt 76 77 !This section has been created automatically by the script Abilint (TD). 78 !Do not modify the following lines by hand. 79 #undef ABI_FUNC 80 #define ABI_FUNC 'prep_fourwf' 81 use interfaces_18_timing 82 use interfaces_53_ffts 83 use interfaces_66_wfs, except_this_one => prep_fourwf 84 !End of the abilint section 85 86 implicit none 87 88 !Arguments ------------------------------------ 89 !scalars 90 integer,intent(in) :: blocksize,iblock,istwf_k,mgfft,n4,n5,n6,nband_k,ndat,npw_k 91 integer,intent(in) :: option_fourwf 92 integer,intent(in),optional :: use_gpu_cuda 93 real(dp),intent(in) :: ucvol,wtk 94 type(bandfft_kpt_type),optional,target,intent(in) :: bandfft_kpt_tab 95 type(mpi_type),intent(inout) :: mpi_enreg 96 !arrays 97 integer,intent(in) :: ngfft(18) 98 real(dp),intent(in) :: occ_k(nband_k) 99 real(dp),intent(out) :: rhoaug(n4,n5,n6) 100 real(dp),intent(in) :: cwavef(2,npw_k*blocksize) 101 real(dp),target,intent(inout) :: wfraug(2,n4,n5,n6*ndat) 102 103 !Local variables------------------------------- 104 !scalars 105 integer :: bandpp,bandpp_sym,ier,iibandpp,ikpt_this_proc,ind_occ,ind_occ1,ind_occ2,ipw 106 integer :: istwf_k_,jjbandpp,me_fft,nd3,nproc_band,nproc_fft,npw_fft 107 integer :: old_me_g0=0,spaceComm=0,tim_fourwf,use_gpu_cuda_ 108 integer,pointer :: idatarecv0,ndatarecv,ndatarecv_tot,ndatasend_sym 109 logical :: flag_inv_sym,have_to_reequilibrate 110 real(dp) :: weight,weight1,weight2 111 type(bandfft_kpt_type),pointer :: bandfft_kpt_ptr 112 !arrays 113 integer,ABI_CONTIGUOUS pointer :: indices_pw_fft(:),kg_k_fft(:,:),kg_k_gather(:,:),kg_k_gather_sym(:,:) 114 integer,ABI_CONTIGUOUS pointer :: rdispls(:),rdispls_sym(:) 115 integer,ABI_CONTIGUOUS pointer :: recvcounts(:),recvcount_fft(:),recvcounts_sym(:),recvcounts_sym_tot(:) 116 integer,ABI_CONTIGUOUS pointer :: recvdisp_fft(:),sdispls(:),sdispls_sym(:) 117 integer,ABI_CONTIGUOUS pointer :: sendcounts(:),sendcount_fft(:),sendcounts_sym(:),sendcounts_sym_all(:) 118 integer,ABI_CONTIGUOUS pointer :: senddisp_fft(:),tab_proc(:) 119 integer,allocatable :: rdisplsloc(:) 120 integer,allocatable :: recvcountsloc(:),sdisplsloc(:) 121 integer,allocatable :: sendcountsloc(:) 122 integer,allocatable :: index_wavef_band(:),index_wavef_send(:) 123 integer,pointer :: gbound_(:,:) 124 real(dp) :: dummy(2,1),tsec(2) 125 real(dp),allocatable :: buff_wf(:,:),cwavef_alltoall1(:,:),cwavef_alltoall2(:,:) 126 real(dp),allocatable :: cwavef_fft(:,:), cwavef_fft_tr(:,:) 127 real(dp),allocatable :: weight_t(:),weight1_t(:),weight2_t(:) 128 real(dp),pointer :: ewavef_alltoall_sym(:,:),wfraug_ptr(:,:,:,:) 129 130 ! ************************************************************************* 131 132 ABI_CHECK((option_fourwf/=3),'Option=3 (FFT r->g) not implemented') 133 ABI_CHECK((mpi_enreg%bandpp==ndat),'BUG: bandpp/=ndat') 134 135 spaceComm=mpi_enreg%comm_band 136 nproc_band = mpi_enreg%nproc_band 137 nproc_fft = mpi_enreg%nproc_fft 138 bandpp = mpi_enreg%bandpp 139 me_fft = mpi_enreg%me_fft 140 141 use_gpu_cuda_=0;if (present(use_gpu_cuda)) use_gpu_cuda_=use_gpu_cuda 142 143 if (present(bandfft_kpt_tab)) then 144 bandfft_kpt_ptr => bandfft_kpt_tab 145 else 146 ikpt_this_proc=bandfft_kpt_get_ikpt() 147 bandfft_kpt_ptr => bandfft_kpt(ikpt_this_proc) 148 end if 149 150 istwf_k_=istwf_k 151 flag_inv_sym = (istwf_k_==2 .and. any(ngfft(7) == [401,402,312])) 152 if (option_fourwf==0) flag_inv_sym=((flag_inv_sym).and.(use_gpu_cuda_==0)) 153 154 if (flag_inv_sym) then 155 istwf_k_ = 1 156 if (modulo(bandpp,2)==0) then 157 bandpp_sym = bandpp/2 158 else 159 bandpp_sym = bandpp 160 end if 161 end if 162 163 !==================================================================================== 164 ABI_ALLOCATE(sendcountsloc,(nproc_band)) 165 ABI_ALLOCATE(sdisplsloc ,(nproc_band)) 166 ABI_ALLOCATE(recvcountsloc,(nproc_band)) 167 ABI_ALLOCATE(rdisplsloc ,(nproc_band)) 168 169 recvcounts =>bandfft_kpt_ptr%recvcounts(:) 170 sendcounts =>bandfft_kpt_ptr%sendcounts(:) 171 rdispls =>bandfft_kpt_ptr%rdispls (:) 172 sdispls =>bandfft_kpt_ptr%sdispls (:) 173 ndatarecv =>bandfft_kpt_ptr%ndatarecv 174 175 kg_k_gather =>bandfft_kpt_ptr%kg_k_gather(:,:) 176 gbound_ =>bandfft_kpt_ptr%gbound(:,:) 177 178 if (flag_inv_sym ) then 179 idatarecv0 =>bandfft_kpt_ptr%idatarecv0 180 ndatarecv_tot =>bandfft_kpt_ptr%ndatarecv_tot 181 ndatasend_sym =>bandfft_kpt_ptr%ndatasend_sym 182 kg_k_gather_sym =>bandfft_kpt_ptr%kg_k_gather_sym(:,:) 183 rdispls_sym =>bandfft_kpt_ptr%rdispls_sym(:) 184 recvcounts_sym =>bandfft_kpt_ptr%recvcounts_sym(:) 185 recvcounts_sym_tot =>bandfft_kpt_ptr%recvcounts_sym_tot(:) 186 sdispls_sym =>bandfft_kpt_ptr%sdispls_sym(:) 187 sendcounts_sym =>bandfft_kpt_ptr%sendcounts_sym(:) 188 sendcounts_sym_all =>bandfft_kpt_ptr%sendcounts_sym_all(:) 189 tab_proc =>bandfft_kpt_ptr%tab_proc(:) 190 end if 191 192 ABI_ALLOCATE(cwavef_alltoall2,(2,ndatarecv*bandpp)) 193 if ( ((.not.flag_inv_sym) .and. (bandpp>1) ) .or. flag_inv_sym )then 194 ABI_ALLOCATE(cwavef_alltoall1,(2,ndatarecv*bandpp)) 195 end if 196 197 recvcountsloc(:)=recvcounts(:)*2*bandpp 198 rdisplsloc(:)=rdispls(:)*2*bandpp 199 sendcountsloc(:)=sendcounts(:)*2 200 sdisplsloc(:)=sdispls(:)*2 201 202 call timab(547,1,tsec) 203 call xmpi_alltoallv(cwavef,sendcountsloc,sdisplsloc,cwavef_alltoall2,& 204 & recvcountsloc,rdisplsloc,spaceComm,ier) 205 call timab(547,2,tsec) 206 207 !If me_fft==0, I have the G=0 vector, but keep for the record the old value 208 if (me_fft==0) then 209 old_me_g0=mpi_enreg%me_g0 210 mpi_enreg%me_g0=1 211 end if 212 213 tim_fourwf=16 214 215 !Eventually adjust load balancing for FFT (by changing FFT distrib) 216 have_to_reequilibrate = bandfft_kpt_ptr%have_to_reequilibrate 217 if(have_to_reequilibrate) then 218 npw_fft = bandfft_kpt_ptr%npw_fft 219 sendcount_fft => bandfft_kpt_ptr%sendcount_fft(:) 220 recvcount_fft => bandfft_kpt_ptr%recvcount_fft(:) 221 senddisp_fft => bandfft_kpt_ptr%senddisp_fft(:) 222 recvdisp_fft => bandfft_kpt_ptr%recvdisp_fft(:) 223 indices_pw_fft => bandfft_kpt_ptr%indices_pw_fft(:) 224 kg_k_fft => bandfft_kpt_ptr%kg_k_fft(:,:) 225 ABI_ALLOCATE( buff_wf, (2,ndatarecv*bandpp) ) ! for sorting cgwavef 226 ABI_ALLOCATE( cwavef_fft, (2,npw_fft*bandpp) ) 227 if(bandpp>1) then 228 ABI_ALLOCATE( cwavef_fft_tr, (2,npw_fft*bandpp)) 229 end if 230 end if 231 232 if (option_fourwf==0) wfraug(:,:,:,:)=zero 233 234 !==================================================================== 235 if ((.not.(flag_inv_sym)) .and. (bandpp==1)) then 236 237 ! Compute the index of the band 238 ind_occ = (iblock-1)*blocksize + mpi_enreg%me_band + 1 239 240 if(abs(occ_k(ind_occ))>=tol8.or.option_fourwf==0) then 241 242 ! Compute the weight of the band 243 weight=occ_k(ind_occ)*wtk/ucvol 244 245 if(have_to_reequilibrate) then 246 ! filling of sorted send buffers before exchange 247 do ipw = 1 ,ndatarecv 248 buff_wf(1:2, indices_pw_fft(ipw) ) = cwavef_alltoall2(1:2,ipw) 249 end do 250 call xmpi_alltoallv(buff_wf,2*sendcount_fft,2*senddisp_fft, & 251 & cwavef_fft,2*recvcount_fft, 2*recvdisp_fft, mpi_enreg%comm_fft,ier) 252 call fourwf(1,rhoaug,cwavef_fft,dummy,wfraug,gbound_,gbound_,& 253 & istwf_k_,kg_k_fft,kg_k_fft,mgfft,mpi_enreg,1,& 254 & ngfft,npw_fft,1,n4,n5,n6,option_fourwf,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,& 255 & use_gpu_cuda=use_gpu_cuda_) 256 else 257 call fourwf(1,rhoaug,cwavef_alltoall2,dummy,wfraug,gbound_,gbound_,& 258 & istwf_k_,kg_k_gather,kg_k_gather,mgfft,mpi_enreg,1,& 259 & ngfft,ndatarecv,1,n4,n5,n6,option_fourwf,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,& 260 & use_gpu_cuda=use_gpu_cuda_) 261 end if 262 if (option_fourwf==0.and.nproc_fft>1) then 263 if (me_fft>0) then 264 nd3=(ngfft(3)-1)/nproc_fft+1 265 wfraug(:,:,:,me_fft*nd3+1:me_fft*nd3+nd3)=wfraug(:,:,:,1:nd3) 266 wfraug(:,:,:,1:nd3)=zero 267 end if 268 call xmpi_sum(wfraug,mpi_enreg%comm_fft,ier) 269 end if 270 end if 271 272 !==================================================================== 273 else if ((.not.(flag_inv_sym)) .and. (bandpp>1) ) then 274 275 ! ------------------------------------------------------------- 276 ! Computation of the index to class the waves functions below bandpp 277 ! ------------------------------------------------------------- 278 call prep_index_wavef_bandpp(nproc_band,bandpp,& 279 & 1,ndatarecv,& 280 & recvcounts,rdispls,& 281 & index_wavef_band) 282 283 ! ------------------------------------------------------- 284 ! Sorting of the wave functions below bandpp 285 ! ------------------------------------------------------- 286 cwavef_alltoall1(:,:) = cwavef_alltoall2(:,index_wavef_band) 287 288 if(have_to_reequilibrate) then 289 ! filling of sorted send buffers before exchange 290 do iibandpp=1,bandpp 291 do ipw = 1 ,ndatarecv 292 buff_wf(1:2, iibandpp + bandpp*(indices_pw_fft(ipw)-1)) = & 293 & cwavef_alltoall1(1:2,ipw + ndatarecv*(iibandpp-1)) 294 end do 295 end do 296 call xmpi_alltoallv(buff_wf,2*bandpp*sendcount_fft,2*bandpp*senddisp_fft, & 297 & cwavef_fft_tr,2*bandpp*recvcount_fft, 2*bandpp*recvdisp_fft, mpi_enreg%comm_fft,ier) 298 do iibandpp=1,bandpp 299 do ipw = 1 ,npw_fft 300 cwavef_fft(1:2, ipw + npw_fft*(iibandpp -1)) = cwavef_fft_tr(1:2, iibandpp + bandpp*(ipw-1)) 301 end do 302 end do 303 end if 304 305 ! ------------------- 306 ! Fourier calculation 307 ! ------------------- 308 ! Cuda version 309 if(use_gpu_cuda_==1) then 310 ABI_ALLOCATE(weight_t,(bandpp)) 311 do iibandpp=1,bandpp 312 ! Compute the index of the band 313 ind_occ = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + iibandpp 314 ! Compute the weight of the band 315 weight_t(iibandpp)=occ_k(ind_occ)*wtk/ucvol 316 if(abs(occ_k(ind_occ)) < tol8) weight_t(iibandpp) = zero 317 end do 318 ! Accumulate time because it is not done in gpu_fourwf 319 call timab(240+tim_fourwf,1,tsec) 320 #if defined HAVE_GPU_CUDA 321 call gpu_fourwf(1,rhoaug,& 322 & cwavef_alltoall1,& 323 & dummy,wfraug,gbound_,gbound_,& 324 & istwf_k_,kg_k_gather,kg_k_gather,mgfft,mpi_enreg,bandpp,& 325 & ngfft,ndatarecv,1,n4,n5,n6,option_fourwf,mpi_enreg%paral_kgb,& 326 & tim_fourwf,weight_t,weight_t) 327 #endif 328 call timab(240+tim_fourwf,2,tsec) 329 ABI_DEALLOCATE(weight_t) 330 331 ! Standard version 332 else 333 do iibandpp=1,bandpp 334 ! Compute the index of the band 335 ind_occ = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + iibandpp 336 ! Compute the weight of the band 337 weight=occ_k(ind_occ)*wtk/ucvol 338 if (option_fourwf==0) then 339 wfraug_ptr => wfraug(:,:,:,(iibandpp-1)*n6+1:iibandpp*n6) 340 else 341 wfraug_ptr => wfraug 342 end if 343 if (abs(occ_k(ind_occ)) >=tol8.or.option_fourwf==0) then 344 if(have_to_reequilibrate) then 345 call fourwf(1,rhoaug, & 346 & cwavef_fft(:,(npw_fft*(iibandpp-1))+1:(npw_fft*iibandpp)), & 347 & dummy,wfraug_ptr,gbound_,gbound_,& 348 & istwf_k_,kg_k_fft,kg_k_fft,mgfft,mpi_enreg,1,& 349 & ngfft,npw_fft,1,n4,n5,n6,option_fourwf,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,& 350 & use_gpu_cuda=use_gpu_cuda_) 351 else 352 call fourwf(1,rhoaug,& 353 & cwavef_alltoall1(:,(ndatarecv*(iibandpp-1))+1:(ndatarecv*iibandpp)),& 354 & dummy,wfraug_ptr,gbound_,gbound_,& 355 & istwf_k_,kg_k_gather,kg_k_gather,mgfft,mpi_enreg,1,& 356 & ngfft,ndatarecv,1,n4,n5,n6,option_fourwf,mpi_enreg%paral_kgb,& 357 & tim_fourwf,weight,weight) 358 end if 359 if (option_fourwf==0.and.nproc_fft>1) then 360 if (me_fft>0) then 361 nd3=(ngfft(3)-1)/nproc_fft+1 362 wfraug_ptr(:,:,:,me_fft*nd3+1:me_fft*nd3+nd3)=wfraug_ptr(:,:,:,1:nd3) 363 wfraug_ptr(:,:,:,1:nd3)=zero 364 end if 365 call xmpi_sum(wfraug_ptr,mpi_enreg%comm_fft,ier) 366 end if 367 end if 368 end do 369 end if ! (use_gpu_cuda==1) 370 371 ! ----------------------------------------------------- 372 ! Sorting waves functions below the processors 373 ! ----------------------------------------------------- 374 ! cwavef_alltoall(:,index_wavef_band) = cwavef_alltoall(:,:) ! NOT NEEDED 375 ABI_DEALLOCATE(index_wavef_band) 376 377 !==================================================================== 378 else if (flag_inv_sym) then 379 380 ! ------------------------------------------------------------- 381 ! Computation of the index to class the waves functions below bandpp 382 ! ------------------------------------------------------------- 383 call prep_index_wavef_bandpp(nproc_band,bandpp,& 384 & 1,ndatarecv,& 385 & recvcounts,rdispls,& 386 & index_wavef_band) 387 388 ! ------------------------------------------------------- 389 ! Sorting the wave functions below bandpp 390 ! ------------------------------------------------------- 391 cwavef_alltoall1(:,:) = cwavef_alltoall2(:,index_wavef_band) 392 393 ! ------------------------------------------------------------ 394 ! We associate the waves functions by two 395 ! ------------------------------------------------------------ 396 call prep_wavef_sym_do(mpi_enreg,bandpp,1,& 397 & ndatarecv,& 398 & ndatarecv_tot,ndatasend_sym,tab_proc,& 399 & cwavef_alltoall1,& 400 & sendcounts_sym,sdispls_sym,& 401 & recvcounts_sym,rdispls_sym,& 402 & ewavef_alltoall_sym,& 403 & index_wavef_send) 404 405 ! ------------------------------------------------------------ 406 ! Fourier calculation 407 ! ------------------------------------------------------------ 408 ! Cuda version 409 if (use_gpu_cuda_==1) then 410 ABI_ALLOCATE(weight1_t,(bandpp_sym)) 411 ABI_ALLOCATE(weight2_t,(bandpp_sym)) 412 do iibandpp=1,bandpp_sym 413 if (bandpp/=1) then 414 ind_occ1 = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + (2*iibandpp-1) 415 ind_occ2 = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + (2*iibandpp ) 416 else 417 ind_occ1 = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + 1 418 ind_occ2 = ind_occ1 419 end if 420 weight1_t(iibandpp) = occ_k(ind_occ1)*wtk/ucvol 421 weight2_t(iibandpp) = occ_k(ind_occ2)*wtk/ucvol 422 end do 423 call timab(240+tim_fourwf,1,tsec) 424 #if defined HAVE_GPU_CUDA 425 call gpu_fourwf(1,rhoaug,& 426 & ewavef_alltoall_sym,& 427 & dummy,wfraug,gbound_,gbound_,& 428 & istwf_k_,kg_k_gather_sym,kg_k_gather_sym,mgfft,mpi_enreg,bandpp_sym,& 429 & ngfft,ndatarecv_tot,1,n4,n5,n6,option_fourwf,mpi_enreg%paral_kgb,& 430 & tim_fourwf,weight1_t,weight2_t) 431 #endif 432 call timab(240+tim_fourwf,2,tsec) 433 ABI_DEALLOCATE(weight1_t) 434 ABI_DEALLOCATE(weight2_t) 435 436 ! Standard version 437 else 438 if (option_fourwf==0.and.bandpp>1) then 439 ABI_ALLOCATE(wfraug_ptr,(2,n4,n5,n6)) 440 else 441 wfraug_ptr => wfraug 442 end if 443 do iibandpp=1,bandpp_sym 444 if (bandpp/=1) then 445 ind_occ1 = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + (2*iibandpp-1) 446 ind_occ2 = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + (2*iibandpp ) 447 else 448 ind_occ1 = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + 1 449 ind_occ2 = ind_occ1 450 end if 451 weight1 = occ_k(ind_occ1)*wtk/ucvol 452 weight2 = occ_k(ind_occ2)*wtk/ucvol 453 call fourwf(1,rhoaug,& 454 & ewavef_alltoall_sym(:,(ndatarecv_tot*(iibandpp-1))+1:(ndatarecv_tot*iibandpp)),& 455 & dummy,wfraug_ptr,gbound_,gbound_,& 456 & istwf_k_,kg_k_gather_sym,kg_k_gather_sym,mgfft,mpi_enreg,1,& 457 & ngfft,ndatarecv_tot,1,n4,n5,n6,option_fourwf,mpi_enreg%paral_kgb,& 458 & tim_fourwf,weight1,weight2) 459 if (option_fourwf==0) then 460 if (modulo(bandpp,2)==0) then 461 jjbandpp=2*iibandpp-1 462 wfraug(1,:,:,(jjbandpp-1)*n6+1:jjbandpp*n6)=wfraug_ptr(1,:,:,1:n6) 463 wfraug(1,:,:,(jjbandpp)*n6+1:(jjbandpp+1)*n6)=wfraug_ptr(2,:,:,1:n6) 464 else if (bandpp>1) then 465 wfraug(1,:,:,(iibandpp-1)*n6+1:iibandpp*n6)=wfraug_ptr(1,:,:,1:n6) 466 end if 467 if (nproc_fft>1) then 468 if (me_fft>0) then 469 nd3=(ngfft(3)-1)/nproc_fft+1 470 wfraug(1,:,:,me_fft*nd3+1:me_fft*nd3+nd3)=wfraug(1,:,:,1:nd3) 471 wfraug(1,:,:,1:nd3)=zero 472 end if 473 call xmpi_sum(wfraug,mpi_enreg%comm_fft,ier) 474 end if 475 end if 476 end do 477 if (option_fourwf==0.and.bandpp>1) then 478 ABI_DEALLOCATE(wfraug_ptr) 479 end if 480 end if ! (use_gpu_cuda==1) 481 482 ! ------------------------------------------------------------ 483 ! We dissociate each wave function in two waves functions 484 ! gwavef is classed below of bandpp 485 ! ------------------------------------------------------------ 486 call prep_wavef_sym_undo(mpi_enreg,bandpp,1,& 487 & ndatarecv,& 488 & ndatarecv_tot,ndatasend_sym,idatarecv0,& 489 & cwavef_alltoall1,& 490 & sendcounts_sym,sdispls_sym,& 491 & recvcounts_sym,rdispls_sym,& 492 & ewavef_alltoall_sym,& 493 & index_wavef_send) 494 495 ABI_DEALLOCATE(ewavef_alltoall_sym) 496 ABI_DEALLOCATE(index_wavef_send) 497 498 ! ------------------------------------------------------- 499 ! Sorting waves functions below the processors 500 ! ------------------------------------------------------- 501 ! cwavef_alltoall(:,index_wavef_band) = cwavef_alltoall(:,:) ! NOT NEEDED 502 503 ABI_DEALLOCATE(index_wavef_band) 504 505 end if 506 507 !==================================================================== 508 if (me_fft==0) mpi_enreg%me_g0=old_me_g0 509 if(have_to_reequilibrate) then 510 ABI_DEALLOCATE(buff_wf) 511 ABI_DEALLOCATE(cwavef_fft) 512 if(bandpp > 1) then 513 ABI_DEALLOCATE(cwavef_fft_tr) 514 end if 515 end if 516 ABI_DEALLOCATE(sendcountsloc) 517 ABI_DEALLOCATE(sdisplsloc) 518 ABI_DEALLOCATE(recvcountsloc) 519 ABI_DEALLOCATE(rdisplsloc) 520 ABI_DEALLOCATE(cwavef_alltoall2) 521 if ( ((.not.flag_inv_sym) .and. (bandpp>1) ) .or. flag_inv_sym ) then 522 ABI_DEALLOCATE(cwavef_alltoall1) 523 end if 524 525 end subroutine prep_fourwf