TABLE OF CONTENTS


ABINIT/prep_fourwf [ Functions ]

[ Top ] [ Functions ]

NAME

 prep_fourwf

FUNCTION

 this routine prepares the data to the call of fourwf.

COPYRIGHT

 Copyright (C) 1998-2018 ABINIT group (FBottin,MT,GZ,FDahm)
 this file is distributed under the terms of the
 gnu general public license, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 for the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

  blocksize= size of block for FFT
  cwavef(2,npw*ndat)=planewave coefficients of wavefunction (one spinorial component?).
  dtfil <type(datafiles_type)>=variables related to files
  gvnlc=matrix elements <G|Vnonlocal|C>
  kg_k(3,npw_k)=reduced planewave coordinates.
  lmnmax=if useylm=1, max number of (l,m,n) comp. over all type of psps
        =if useylm=0, max number of (l,n)   comp. over all type of psps
  mgfft=maximum size of 1d ffts
  mpi_enreg=informations about mpi parallelization
  mpsang= 1+maximum angular momentum for nonlocal pseudopotentials
  mpssoang= 1+maximum (spin*angular momentum) for nonlocal pseudopotentials
  natom=number of atoms in cell.
  nband_k=number of bands at this k point for that spin polarization
  ndat=number of FFT to do in //
  ngfft(18)= contain all needed information about 3D FFT
  npw_k=number of plane waves at this k point
  nspinor=number of spinorial components of the wavefunctions
  ntypat=number of types of atoms in unit cell.
  n4,n5,n6 used for dimensionning of vlocal
  option_fourwf=option for fourwf (see fourwf.F90)
  prtvol=control print volume and debugging output
  ucvol=unit cell volume
  [bandfft_kpt_tab]= (optional) if present, contains tabs used to implement
                     the "band-fft" parallelism
                      if not present, the bandfft_kpt global variable is used
  [use_gpu_cuda]= (optional) 1 if Cuda (GPU) is on

OUTPUT

  gwavef=(2,npw*ndat)=matrix elements <G|H|C>.

SIDE EFFECTS

PARENTS

      mkrho,posdoppler,vtowfk

CHILDREN

      fourwf,gpu_fourwf,prep_index_wavef_bandpp,prep_wavef_sym_do
      prep_wavef_sym_undo,timab,xmpi_alltoallv,xmpi_sum

SOURCE

 58 #if defined HAVE_CONFIG_H
 59 #include "config.h"
 60 #endif
 61 
 62 #include "abi_common.h"
 63 
 64 subroutine prep_fourwf(rhoaug,blocksize,cwavef,wfraug,iblock,istwf_k,mgfft,&
 65 &          mpi_enreg,nband_k,ndat,ngfft,npw_k,n4,n5,n6,occ_k,option_fourwf,ucvol,wtk,&
 66 &          bandfft_kpt_tab,use_gpu_cuda) ! Optional arguments
 67 
 68  use defs_basis
 69  use defs_abitypes
 70  use m_errors
 71  use m_profiling_abi
 72  use m_xmpi
 73 
 74  use m_hamiltonian, only : gs_hamiltonian_type
 75  use m_bandfft_kpt,only : bandfft_kpt_type,bandfft_kpt,bandfft_kpt_get_ikpt
 76 
 77 !This section has been created automatically by the script Abilint (TD).
 78 !Do not modify the following lines by hand.
 79 #undef ABI_FUNC
 80 #define ABI_FUNC 'prep_fourwf'
 81  use interfaces_18_timing
 82  use interfaces_53_ffts
 83  use interfaces_66_wfs, except_this_one => prep_fourwf
 84 !End of the abilint section
 85 
 86  implicit none
 87 
 88 !Arguments ------------------------------------
 89 !scalars
 90  integer,intent(in) :: blocksize,iblock,istwf_k,mgfft,n4,n5,n6,nband_k,ndat,npw_k
 91  integer,intent(in) :: option_fourwf
 92  integer,intent(in),optional :: use_gpu_cuda
 93  real(dp),intent(in) :: ucvol,wtk
 94  type(bandfft_kpt_type),optional,target,intent(in) :: bandfft_kpt_tab
 95  type(mpi_type),intent(inout) :: mpi_enreg
 96 !arrays
 97  integer,intent(in) :: ngfft(18)
 98  real(dp),intent(in) :: occ_k(nband_k)
 99  real(dp),intent(out) :: rhoaug(n4,n5,n6)
100  real(dp),intent(in) :: cwavef(2,npw_k*blocksize)
101  real(dp),target,intent(inout) :: wfraug(2,n4,n5,n6*ndat)
102 
103 !Local variables-------------------------------
104 !scalars
105  integer :: bandpp,bandpp_sym,ier,iibandpp,ikpt_this_proc,ind_occ,ind_occ1,ind_occ2,ipw
106  integer :: istwf_k_,jjbandpp,me_fft,nd3,nproc_band,nproc_fft,npw_fft
107  integer :: old_me_g0=0,spaceComm=0,tim_fourwf,use_gpu_cuda_
108  integer,pointer :: idatarecv0,ndatarecv,ndatarecv_tot,ndatasend_sym
109  logical :: flag_inv_sym,have_to_reequilibrate
110  real(dp) :: weight,weight1,weight2
111  type(bandfft_kpt_type),pointer :: bandfft_kpt_ptr
112 !arrays
113  integer,ABI_CONTIGUOUS pointer :: indices_pw_fft(:),kg_k_fft(:,:),kg_k_gather(:,:),kg_k_gather_sym(:,:)
114  integer,ABI_CONTIGUOUS pointer :: rdispls(:),rdispls_sym(:)
115  integer,ABI_CONTIGUOUS pointer :: recvcounts(:),recvcount_fft(:),recvcounts_sym(:),recvcounts_sym_tot(:)
116  integer,ABI_CONTIGUOUS pointer :: recvdisp_fft(:),sdispls(:),sdispls_sym(:)
117  integer,ABI_CONTIGUOUS pointer :: sendcounts(:),sendcount_fft(:),sendcounts_sym(:),sendcounts_sym_all(:)
118  integer,ABI_CONTIGUOUS pointer :: senddisp_fft(:),tab_proc(:)
119  integer,allocatable :: rdisplsloc(:)
120  integer,allocatable :: recvcountsloc(:),sdisplsloc(:)
121  integer,allocatable :: sendcountsloc(:)
122  integer,allocatable :: index_wavef_band(:),index_wavef_send(:)
123  integer,pointer :: gbound_(:,:)
124  real(dp) :: dummy(2,1),tsec(2)
125  real(dp),allocatable :: buff_wf(:,:),cwavef_alltoall1(:,:),cwavef_alltoall2(:,:)
126  real(dp),allocatable :: cwavef_fft(:,:), cwavef_fft_tr(:,:)
127  real(dp),allocatable :: weight_t(:),weight1_t(:),weight2_t(:)
128  real(dp),pointer :: ewavef_alltoall_sym(:,:),wfraug_ptr(:,:,:,:)
129 
130 ! *************************************************************************
131 
132  ABI_CHECK((option_fourwf/=3),'Option=3 (FFT r->g) not implemented')
133  ABI_CHECK((mpi_enreg%bandpp==ndat),'BUG: bandpp/=ndat')
134 
135  spaceComm=mpi_enreg%comm_band
136  nproc_band = mpi_enreg%nproc_band
137  nproc_fft  = mpi_enreg%nproc_fft
138  bandpp     = mpi_enreg%bandpp
139  me_fft     = mpi_enreg%me_fft
140 
141  use_gpu_cuda_=0;if (present(use_gpu_cuda)) use_gpu_cuda_=use_gpu_cuda
142 
143  if (present(bandfft_kpt_tab)) then
144    bandfft_kpt_ptr => bandfft_kpt_tab
145  else
146    ikpt_this_proc=bandfft_kpt_get_ikpt()
147    bandfft_kpt_ptr => bandfft_kpt(ikpt_this_proc)
148  end if
149 
150  istwf_k_=istwf_k
151  flag_inv_sym = (istwf_k_==2 .and. any(ngfft(7) == [401,402,312]))
152  if (option_fourwf==0) flag_inv_sym=((flag_inv_sym).and.(use_gpu_cuda_==0))
153 
154  if (flag_inv_sym) then
155    istwf_k_       = 1
156    if (modulo(bandpp,2)==0) then
157      bandpp_sym   = bandpp/2
158    else
159      bandpp_sym   = bandpp
160    end if
161  end if
162 
163 !====================================================================================
164  ABI_ALLOCATE(sendcountsloc,(nproc_band))
165  ABI_ALLOCATE(sdisplsloc   ,(nproc_band))
166  ABI_ALLOCATE(recvcountsloc,(nproc_band))
167  ABI_ALLOCATE(rdisplsloc   ,(nproc_band))
168 
169  recvcounts   =>bandfft_kpt_ptr%recvcounts(:)
170  sendcounts   =>bandfft_kpt_ptr%sendcounts(:)
171  rdispls      =>bandfft_kpt_ptr%rdispls   (:)
172  sdispls      =>bandfft_kpt_ptr%sdispls   (:)
173  ndatarecv    =>bandfft_kpt_ptr%ndatarecv
174 
175  kg_k_gather  =>bandfft_kpt_ptr%kg_k_gather(:,:)
176  gbound_      =>bandfft_kpt_ptr%gbound(:,:)
177 
178  if (flag_inv_sym ) then
179    idatarecv0           =>bandfft_kpt_ptr%idatarecv0
180    ndatarecv_tot        =>bandfft_kpt_ptr%ndatarecv_tot
181    ndatasend_sym        =>bandfft_kpt_ptr%ndatasend_sym
182    kg_k_gather_sym      =>bandfft_kpt_ptr%kg_k_gather_sym(:,:)
183    rdispls_sym          =>bandfft_kpt_ptr%rdispls_sym(:)
184    recvcounts_sym       =>bandfft_kpt_ptr%recvcounts_sym(:)
185    recvcounts_sym_tot   =>bandfft_kpt_ptr%recvcounts_sym_tot(:)
186    sdispls_sym          =>bandfft_kpt_ptr%sdispls_sym(:)
187    sendcounts_sym       =>bandfft_kpt_ptr%sendcounts_sym(:)
188    sendcounts_sym_all   =>bandfft_kpt_ptr%sendcounts_sym_all(:)
189    tab_proc             =>bandfft_kpt_ptr%tab_proc(:)
190  end if
191 
192  ABI_ALLOCATE(cwavef_alltoall2,(2,ndatarecv*bandpp))
193  if ( ((.not.flag_inv_sym) .and. (bandpp>1) ) .or. flag_inv_sym )then
194    ABI_ALLOCATE(cwavef_alltoall1,(2,ndatarecv*bandpp))
195  end if
196 
197  recvcountsloc(:)=recvcounts(:)*2*bandpp
198  rdisplsloc(:)=rdispls(:)*2*bandpp
199  sendcountsloc(:)=sendcounts(:)*2
200  sdisplsloc(:)=sdispls(:)*2
201 
202  call timab(547,1,tsec)
203  call xmpi_alltoallv(cwavef,sendcountsloc,sdisplsloc,cwavef_alltoall2,&
204 & recvcountsloc,rdisplsloc,spaceComm,ier)
205  call timab(547,2,tsec)
206 
207 !If me_fft==0, I have the G=0 vector, but keep for the record the old value
208  if (me_fft==0) then
209    old_me_g0=mpi_enreg%me_g0
210    mpi_enreg%me_g0=1
211  end if
212 
213  tim_fourwf=16
214 
215 !Eventually adjust load balancing for FFT (by changing FFT distrib)
216  have_to_reequilibrate = bandfft_kpt_ptr%have_to_reequilibrate
217  if(have_to_reequilibrate) then
218    npw_fft =  bandfft_kpt_ptr%npw_fft
219    sendcount_fft  => bandfft_kpt_ptr%sendcount_fft(:)
220    recvcount_fft  => bandfft_kpt_ptr%recvcount_fft(:)
221    senddisp_fft   => bandfft_kpt_ptr%senddisp_fft(:)
222    recvdisp_fft   => bandfft_kpt_ptr%recvdisp_fft(:)
223    indices_pw_fft => bandfft_kpt_ptr%indices_pw_fft(:)
224    kg_k_fft       => bandfft_kpt_ptr%kg_k_fft(:,:)
225    ABI_ALLOCATE( buff_wf, (2,ndatarecv*bandpp) ) ! for sorting cgwavef
226    ABI_ALLOCATE( cwavef_fft, (2,npw_fft*bandpp) )
227    if(bandpp>1) then
228      ABI_ALLOCATE( cwavef_fft_tr, (2,npw_fft*bandpp))
229    end if
230  end if
231 
232  if (option_fourwf==0) wfraug(:,:,:,:)=zero
233 
234 !====================================================================
235  if ((.not.(flag_inv_sym)) .and. (bandpp==1)) then
236 
237 !  Compute the index of the band
238    ind_occ = (iblock-1)*blocksize + mpi_enreg%me_band + 1
239 
240    if(abs(occ_k(ind_occ))>=tol8.or.option_fourwf==0) then
241 
242 !    Compute the weight of the band
243      weight=occ_k(ind_occ)*wtk/ucvol
244 
245      if(have_to_reequilibrate) then
246 !      filling of sorted send buffers before exchange
247        do ipw = 1 ,ndatarecv
248          buff_wf(1:2, indices_pw_fft(ipw) ) = cwavef_alltoall2(1:2,ipw)
249        end do
250        call xmpi_alltoallv(buff_wf,2*sendcount_fft,2*senddisp_fft,  &
251 &       cwavef_fft,2*recvcount_fft, 2*recvdisp_fft, mpi_enreg%comm_fft,ier)
252        call fourwf(1,rhoaug,cwavef_fft,dummy,wfraug,gbound_,gbound_,&
253 &       istwf_k_,kg_k_fft,kg_k_fft,mgfft,mpi_enreg,1,&
254 &       ngfft,npw_fft,1,n4,n5,n6,option_fourwf,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,&
255 &       use_gpu_cuda=use_gpu_cuda_)
256      else
257        call fourwf(1,rhoaug,cwavef_alltoall2,dummy,wfraug,gbound_,gbound_,&
258 &       istwf_k_,kg_k_gather,kg_k_gather,mgfft,mpi_enreg,1,&
259 &       ngfft,ndatarecv,1,n4,n5,n6,option_fourwf,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,&
260 &       use_gpu_cuda=use_gpu_cuda_)
261      end if
262      if (option_fourwf==0.and.nproc_fft>1) then
263        if (me_fft>0) then
264          nd3=(ngfft(3)-1)/nproc_fft+1
265          wfraug(:,:,:,me_fft*nd3+1:me_fft*nd3+nd3)=wfraug(:,:,:,1:nd3)
266          wfraug(:,:,:,1:nd3)=zero
267        end if
268        call xmpi_sum(wfraug,mpi_enreg%comm_fft,ier)
269      end if
270    end if
271 
272 !====================================================================
273  else if ((.not.(flag_inv_sym)) .and. (bandpp>1) ) then
274 
275 !  -------------------------------------------------------------
276 !  Computation of the index to class the waves functions below bandpp
277 !  -------------------------------------------------------------
278    call prep_index_wavef_bandpp(nproc_band,bandpp,&
279 &   1,ndatarecv,&
280 &   recvcounts,rdispls,&
281 &   index_wavef_band)
282 
283 !  -------------------------------------------------------
284 !  Sorting of the wave functions below bandpp
285 !  -------------------------------------------------------
286    cwavef_alltoall1(:,:) = cwavef_alltoall2(:,index_wavef_band)
287 
288    if(have_to_reequilibrate) then
289 !    filling of sorted send buffers before exchange
290      do iibandpp=1,bandpp
291        do ipw = 1 ,ndatarecv
292          buff_wf(1:2, iibandpp + bandpp*(indices_pw_fft(ipw)-1)) = &
293 &         cwavef_alltoall1(1:2,ipw + ndatarecv*(iibandpp-1))
294        end do
295      end do
296      call xmpi_alltoallv(buff_wf,2*bandpp*sendcount_fft,2*bandpp*senddisp_fft,  &
297 &     cwavef_fft_tr,2*bandpp*recvcount_fft, 2*bandpp*recvdisp_fft, mpi_enreg%comm_fft,ier)
298      do iibandpp=1,bandpp
299        do ipw = 1 ,npw_fft
300          cwavef_fft(1:2,  ipw + npw_fft*(iibandpp -1)) = cwavef_fft_tr(1:2,  iibandpp + bandpp*(ipw-1))
301        end do
302      end do
303    end if
304 
305 !  -------------------
306 !  Fourier calculation
307 !  -------------------
308 !  Cuda version
309    if(use_gpu_cuda_==1) then
310      ABI_ALLOCATE(weight_t,(bandpp))
311      do iibandpp=1,bandpp
312 !      Compute the index of the band
313        ind_occ = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + iibandpp
314 !      Compute the weight of the band
315        weight_t(iibandpp)=occ_k(ind_occ)*wtk/ucvol
316        if(abs(occ_k(ind_occ)) < tol8) weight_t(iibandpp) = zero
317      end do
318 !    Accumulate time because it is not done in gpu_fourwf
319      call timab(240+tim_fourwf,1,tsec)
320 #if defined HAVE_GPU_CUDA
321      call gpu_fourwf(1,rhoaug,&
322 &     cwavef_alltoall1,&
323 &     dummy,wfraug,gbound_,gbound_,&
324 &     istwf_k_,kg_k_gather,kg_k_gather,mgfft,mpi_enreg,bandpp,&
325 &     ngfft,ndatarecv,1,n4,n5,n6,option_fourwf,mpi_enreg%paral_kgb,&
326 &     tim_fourwf,weight_t,weight_t)
327 #endif
328      call timab(240+tim_fourwf,2,tsec)
329      ABI_DEALLOCATE(weight_t)
330 
331 !  Standard version
332    else
333      do iibandpp=1,bandpp
334 !      Compute the index of the band
335        ind_occ = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + iibandpp
336 !      Compute the weight of the band
337        weight=occ_k(ind_occ)*wtk/ucvol
338        if (option_fourwf==0) then
339          wfraug_ptr => wfraug(:,:,:,(iibandpp-1)*n6+1:iibandpp*n6)
340        else
341          wfraug_ptr => wfraug
342        end if
343        if (abs(occ_k(ind_occ)) >=tol8.or.option_fourwf==0) then
344          if(have_to_reequilibrate) then
345            call fourwf(1,rhoaug, &
346 &           cwavef_fft(:,(npw_fft*(iibandpp-1))+1:(npw_fft*iibandpp)), &
347 &           dummy,wfraug_ptr,gbound_,gbound_,&
348 &           istwf_k_,kg_k_fft,kg_k_fft,mgfft,mpi_enreg,1,&
349 &           ngfft,npw_fft,1,n4,n5,n6,option_fourwf,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,&
350 &           use_gpu_cuda=use_gpu_cuda_)
351          else
352            call fourwf(1,rhoaug,&
353 &           cwavef_alltoall1(:,(ndatarecv*(iibandpp-1))+1:(ndatarecv*iibandpp)),&
354 &           dummy,wfraug_ptr,gbound_,gbound_,&
355 &           istwf_k_,kg_k_gather,kg_k_gather,mgfft,mpi_enreg,1,&
356 &           ngfft,ndatarecv,1,n4,n5,n6,option_fourwf,mpi_enreg%paral_kgb,&
357 &           tim_fourwf,weight,weight)
358          end if
359          if (option_fourwf==0.and.nproc_fft>1) then
360            if (me_fft>0) then
361              nd3=(ngfft(3)-1)/nproc_fft+1
362              wfraug_ptr(:,:,:,me_fft*nd3+1:me_fft*nd3+nd3)=wfraug_ptr(:,:,:,1:nd3)
363              wfraug_ptr(:,:,:,1:nd3)=zero
364            end if
365            call xmpi_sum(wfraug_ptr,mpi_enreg%comm_fft,ier)
366          end if
367        end if
368      end do
369    end if ! (use_gpu_cuda==1)
370 
371 !  -----------------------------------------------------
372 !  Sorting waves functions below the processors
373 !  -----------------------------------------------------
374 !  cwavef_alltoall(:,index_wavef_band) = cwavef_alltoall(:,:)   ! NOT NEEDED
375    ABI_DEALLOCATE(index_wavef_band)
376 
377 !====================================================================
378  else if (flag_inv_sym) then
379 
380 !  -------------------------------------------------------------
381 !  Computation of the index to class the waves functions below bandpp
382 !  -------------------------------------------------------------
383    call prep_index_wavef_bandpp(nproc_band,bandpp,&
384 &   1,ndatarecv,&
385 &   recvcounts,rdispls,&
386 &   index_wavef_band)
387 
388 !  -------------------------------------------------------
389 !  Sorting the wave functions below bandpp
390 !  -------------------------------------------------------
391    cwavef_alltoall1(:,:) = cwavef_alltoall2(:,index_wavef_band)
392 
393 !  ------------------------------------------------------------
394 !  We associate the waves functions by two
395 !  ------------------------------------------------------------
396    call prep_wavef_sym_do(mpi_enreg,bandpp,1,&
397 &   ndatarecv,&
398 &   ndatarecv_tot,ndatasend_sym,tab_proc,&
399 &   cwavef_alltoall1,&
400 &   sendcounts_sym,sdispls_sym,&
401 &   recvcounts_sym,rdispls_sym,&
402 &   ewavef_alltoall_sym,&
403 &   index_wavef_send)
404 
405 !  ------------------------------------------------------------
406 !  Fourier calculation
407 !  ------------------------------------------------------------
408 !  Cuda version
409    if (use_gpu_cuda_==1) then
410      ABI_ALLOCATE(weight1_t,(bandpp_sym))
411      ABI_ALLOCATE(weight2_t,(bandpp_sym))
412      do iibandpp=1,bandpp_sym
413        if (bandpp/=1) then
414          ind_occ1 = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + (2*iibandpp-1)
415          ind_occ2 = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + (2*iibandpp  )
416        else
417          ind_occ1 = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + 1
418          ind_occ2 = ind_occ1
419        end if
420        weight1_t(iibandpp) = occ_k(ind_occ1)*wtk/ucvol
421        weight2_t(iibandpp) = occ_k(ind_occ2)*wtk/ucvol
422      end do
423      call timab(240+tim_fourwf,1,tsec)
424 #if defined HAVE_GPU_CUDA
425      call gpu_fourwf(1,rhoaug,&
426 &     ewavef_alltoall_sym,&
427 &     dummy,wfraug,gbound_,gbound_,&
428 &     istwf_k_,kg_k_gather_sym,kg_k_gather_sym,mgfft,mpi_enreg,bandpp_sym,&
429 &     ngfft,ndatarecv_tot,1,n4,n5,n6,option_fourwf,mpi_enreg%paral_kgb,&
430 &     tim_fourwf,weight1_t,weight2_t)
431 #endif
432      call timab(240+tim_fourwf,2,tsec)
433      ABI_DEALLOCATE(weight1_t)
434      ABI_DEALLOCATE(weight2_t)
435 
436 !  Standard version
437    else
438      if (option_fourwf==0.and.bandpp>1) then
439        ABI_ALLOCATE(wfraug_ptr,(2,n4,n5,n6))
440      else
441        wfraug_ptr => wfraug
442      end if
443      do iibandpp=1,bandpp_sym
444        if (bandpp/=1) then
445          ind_occ1 = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + (2*iibandpp-1)
446          ind_occ2 = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + (2*iibandpp  )
447        else
448          ind_occ1 = (iblock-1)*blocksize + (mpi_enreg%me_band * bandpp) + 1
449          ind_occ2 = ind_occ1
450        end if
451        weight1 = occ_k(ind_occ1)*wtk/ucvol
452        weight2 = occ_k(ind_occ2)*wtk/ucvol
453        call fourwf(1,rhoaug,&
454 &       ewavef_alltoall_sym(:,(ndatarecv_tot*(iibandpp-1))+1:(ndatarecv_tot*iibandpp)),&
455 &       dummy,wfraug_ptr,gbound_,gbound_,&
456 &       istwf_k_,kg_k_gather_sym,kg_k_gather_sym,mgfft,mpi_enreg,1,&
457 &       ngfft,ndatarecv_tot,1,n4,n5,n6,option_fourwf,mpi_enreg%paral_kgb,&
458 &       tim_fourwf,weight1,weight2)
459        if (option_fourwf==0) then
460          if (modulo(bandpp,2)==0) then
461            jjbandpp=2*iibandpp-1
462            wfraug(1,:,:,(jjbandpp-1)*n6+1:jjbandpp*n6)=wfraug_ptr(1,:,:,1:n6)
463            wfraug(1,:,:,(jjbandpp)*n6+1:(jjbandpp+1)*n6)=wfraug_ptr(2,:,:,1:n6)
464          else if (bandpp>1) then
465            wfraug(1,:,:,(iibandpp-1)*n6+1:iibandpp*n6)=wfraug_ptr(1,:,:,1:n6)
466          end if
467          if (nproc_fft>1) then
468            if (me_fft>0) then
469              nd3=(ngfft(3)-1)/nproc_fft+1
470              wfraug(1,:,:,me_fft*nd3+1:me_fft*nd3+nd3)=wfraug(1,:,:,1:nd3)
471              wfraug(1,:,:,1:nd3)=zero
472            end if
473            call xmpi_sum(wfraug,mpi_enreg%comm_fft,ier)
474          end if
475        end if
476      end do
477      if (option_fourwf==0.and.bandpp>1) then
478        ABI_DEALLOCATE(wfraug_ptr)
479      end if
480    end if ! (use_gpu_cuda==1)
481 
482 !  ------------------------------------------------------------
483 !  We dissociate each wave function in two waves functions
484 !  gwavef is classed below of bandpp
485 !  ------------------------------------------------------------
486    call prep_wavef_sym_undo(mpi_enreg,bandpp,1,&
487 &   ndatarecv,&
488 &   ndatarecv_tot,ndatasend_sym,idatarecv0,&
489 &   cwavef_alltoall1,&
490 &   sendcounts_sym,sdispls_sym,&
491 &   recvcounts_sym,rdispls_sym,&
492 &   ewavef_alltoall_sym,&
493 &   index_wavef_send)
494 
495    ABI_DEALLOCATE(ewavef_alltoall_sym)
496    ABI_DEALLOCATE(index_wavef_send)
497 
498 !  -------------------------------------------------------
499 !  Sorting waves functions below the processors
500 !  -------------------------------------------------------
501 !  cwavef_alltoall(:,index_wavef_band) = cwavef_alltoall(:,:) ! NOT NEEDED
502 
503    ABI_DEALLOCATE(index_wavef_band)
504 
505  end if
506 
507 !====================================================================
508  if (me_fft==0) mpi_enreg%me_g0=old_me_g0
509  if(have_to_reequilibrate) then
510    ABI_DEALLOCATE(buff_wf)
511    ABI_DEALLOCATE(cwavef_fft)
512    if(bandpp > 1) then
513      ABI_DEALLOCATE(cwavef_fft_tr)
514    end if
515  end if
516  ABI_DEALLOCATE(sendcountsloc)
517  ABI_DEALLOCATE(sdisplsloc)
518  ABI_DEALLOCATE(recvcountsloc)
519  ABI_DEALLOCATE(rdisplsloc)
520  ABI_DEALLOCATE(cwavef_alltoall2)
521  if ( ((.not.flag_inv_sym) .and. (bandpp>1) ) .or. flag_inv_sym ) then
522    ABI_DEALLOCATE(cwavef_alltoall1)
523  end if
524 
525 end subroutine prep_fourwf