TABLE OF CONTENTS


ABINIT/getghc [ Functions ]

[ Top ] [ Functions ]

NAME

 getghc

FUNCTION

 Compute <G|H|C> for input vector |C> expressed in reciprocal space;
 Result is put in array ghc.
 <G|Vnonlocal|C> is also returned in gvnlc.
 if required, <G|S|C> is returned in gsc (S=overlap - PAW only)
 Note that left and right k points can be different, i.e. ghc=<k^prime+G|H|C_k>.

COPYRIGHT

 Copyright (C) 1998-2018 ABINIT group (DCA, XG, GMR, LSI, MT)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

 cpopt=flag defining the status of cwaveprj%cp(:)=<Proj_i|Cnk> scalars (PAW only)
       (same meaning as in nonlop.F90 routine)
       if cpopt=-1, <p_lmn|in> (and derivatives) are computed here (and not saved)
       if cpopt= 0, <p_lmn|in> are computed here and saved
       if cpopt= 1, <p_lmn|in> and first derivatives are computed here and saved
       if cpopt= 2  <p_lmn|in> are already in memory;
       if cpopt= 3  <p_lmn|in> are already in memory; first derivatives are computed here and saved
       if cpopt= 4  <p_lmn|in> and first derivatives are already in memory;
 cwavef(2,npw*my_nspinor*ndat)=planewave coefficients of wavefunction.
 gs_ham <type(gs_hamiltonian_type)>=all data for the Hamiltonian to be applied
 lambda=factor to be used when computing <G|H-lambda.S|C> - only for sij_opt=-1
        Typically lambda is the eigenvalue (or its guess)
 mpi_enreg=information about MPI parallelization
 ndat=number of FFT to do in parallel
 prtvol=control print volume and debugging output
 sij_opt= -PAW ONLY-  if  0, only matrix elements <G|H|C> have to be computed
    (S=overlap)       if  1, matrix elements <G|S|C> have to be computed in gsc in addition to ghc
                      if -1, matrix elements <G|H-lambda.S|C> have to be computed in ghc (gsc not used)
 tim_getghc=timing code of the calling subroutine(can be set to 0 if not attributed)
 type_calc= option governing which part of Hamitonian is to be applied:
            1: local part only
            2: non-local+kinetic only (added to the exixting Hamiltonian)
            3: local + kinetic only (added to the existing Hamiltonian)
 ===== Optional inputs =====
   [kg_fft_k(3,:)]=optional, (k+G) vector coordinates to be used for the FFT tranformation
                   instead of the one contained in gs_ham datastructure.
                   Typically used for real WF (in parallel) which are FFT-transformed 2 by 2.
   [kg_fft_kp(3,:)]=optional, (k^prime+G) vector coordinates to be used for the FFT tranformation
   [select_k]=optional, option governing the choice of k points to be used.
             gs_ham datastructure contains quantities needed to apply Hamiltonian
             in reciprocal space between 2 kpoints, k and k^prime (equal in most cases);
             if select_k=1, <k^prime|H|k>       is applied [default]
             if select_k=2, <k|H|k^prime>       is applied
             if select_k=3, <k|H|k>             is applied
             if select_k=4, <k^prime|H|k^prime> is applied

OUTPUT

  ghc(2,npw*my_nspinor*ndat)=matrix elements <G|H|C> (if sij_opt>=0)
                                          or <G|H-lambda.S|C> (if sij_opt=-1)
  gvnlc(2,npw*my_nspinor*ndat)=matrix elements <G|Vnonlocal|C> (if sij_opt>=0)
                                            or <G|Vnonlocal-lambda.S|C> (if sij_opt=-1)
  if (sij_opt=1)
    gsc(2,npw*my_nspinor*ndat)=matrix elements <G|S|C> (S=overlap).

SIDE EFFECTS

  cwaveprj(natom,my_nspinor*(1+cpopt)*ndat)= wave function projected on nl projectors (PAW only)

PARENTS

      cgwf,chebfi,dfpt_cgwf,gwls_hamiltonian,ks_ddiago,lobpcgwf,m_io_kss
      m_rf2,mkresi,multithreaded_getghc

CHILDREN

      fourwf

SOURCE

 79 #if defined HAVE_CONFIG_H
 80 #include "config.h"
 81 #endif
 82 
 83 #include "abi_common.h"
 84 
 85 subroutine getghc(cpopt,cwavef,cwaveprj,ghc,gsc,gs_ham,gvnlc,lambda,mpi_enreg,ndat,&
 86 &                 prtvol,sij_opt,tim_getghc,type_calc,&
 87 &                 kg_fft_k,kg_fft_kp,select_k) ! optional arguments
 88 
 89  use defs_basis
 90  use defs_abitypes
 91  use m_errors
 92  use m_profiling_abi
 93  use m_xmpi
 94 
 95  use m_pawcprj,     only : pawcprj_type,pawcprj_alloc,pawcprj_free,pawcprj_getdim
 96  use m_bandfft_kpt, only : bandfft_kpt,bandfft_kpt_get_ikpt
 97  use m_hamiltonian, only : gs_hamiltonian_type,KPRIME_H_K,K_H_KPRIME,K_H_K,KPRIME_H_KPRIME
 98  use m_fock,        only : fock_common_type,fock_get_getghc_call
 99 
100 !This section has been created automatically by the script Abilint (TD).
101 !Do not modify the following lines by hand.
102 #undef ABI_FUNC
103 #define ABI_FUNC 'getghc'
104  use interfaces_14_hidewrite
105  use interfaces_18_timing
106  use interfaces_53_ffts
107  use interfaces_66_nonlocal
108  use interfaces_66_wfs, except_this_one => getghc
109 !End of the abilint section
110 
111  implicit none
112 
113 !Arguments ------------------------------------
114 !scalars
115  integer,intent(in) :: cpopt,ndat, prtvol
116  integer,intent(in) :: sij_opt,tim_getghc,type_calc
117  integer,intent(in),optional :: select_k
118  real(dp),intent(in) :: lambda
119  type(MPI_type),intent(in) :: mpi_enreg
120  type(gs_hamiltonian_type),intent(inout),target :: gs_ham
121 !arrays
122  integer,intent(in),optional,target :: kg_fft_k(:,:),kg_fft_kp(:,:)
123  real(dp),intent(out),target :: gsc(:,:)
124  real(dp),intent(inout) :: cwavef(:,:)
125  real(dp),intent(out) :: ghc(:,:),gvnlc(:,:)
126  type(pawcprj_type),intent(inout),target :: cwaveprj(:,:)
127 
128 !Local variables-------------------------------
129 !scalars
130  integer,parameter :: level=114,re=1,im=2,tim_fourwf=1
131  integer :: choice,cplex,cpopt_here,i1,i2,i3,idat,idir,ierr
132  integer :: ig,igspinor,ii,iispinor,ikpt_this_proc,ipw,ispinor,my_nspinor
133  integer :: nnlout,npw_fft,npw_k1,npw_k2,nspinortot
134  integer :: paw_opt,select_k_,shift1,shift2,signs,tim_nonlop
135  logical :: k1_eq_k2,have_to_reequilibrate,has_fock
136  logical :: nspinor1TreatedByThisProc,nspinor2TreatedByThisProc
137  real(dp) :: ghcim,ghcre,weight
138  character(len=500) :: msg
139 !arrays
140  integer, pointer :: gbound_k1(:,:),gbound_k2(:,:),kg_k1(:,:),kg_k2(:,:)
141  integer, ABI_CONTIGUOUS pointer :: indices_pw_fft(:),kg_k_fft(:,:)
142  integer, ABI_CONTIGUOUS pointer :: recvcount_fft(:),recvdisp_fft(:)
143  integer, ABI_CONTIGUOUS pointer ::  sendcount_fft(:),senddisp_fft(:)
144  integer, allocatable:: dimcprj(:)
145  real(dp) :: enlout(ndat),lambda_ndat(ndat),tsec(2)
146  real(dp),target :: nonlop_dum(1,1)
147  real(dp),allocatable :: buff_wf(:,:),cwavef1(:,:),cwavef2(:,:),cwavef_fft(:,:),cwavef_fft_tr(:,:)
148  real(dp),allocatable :: ghc1(:,:),ghc2(:,:),ghc3(:,:),ghc4(:,:),ghcnd(:,:),ghc_mGGA(:,:)
149  real(dp),allocatable :: vlocal_tmp(:,:,:),work(:,:,:,:)
150  real(dp), pointer :: kinpw_k1(:),kinpw_k2(:),kpt_k1(:),kpt_k2(:)
151  real(dp), pointer :: gsc_ptr(:,:)
152  type(fock_common_type),pointer :: fock
153  type(pawcprj_type),pointer :: cwaveprj_fock(:,:),cwaveprj_idat(:,:),cwaveprj_nonlop(:,:)
154 
155 ! *********************************************************************
156 
157 !Keep track of total time spent in getghc:
158  call timab(200+tim_getghc,1,tsec)
159 
160 !Structured debugging if prtvol==-level
161  if(prtvol==-level)then
162    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' getghc : enter, debugging '
163    call wrtout(std_out,msg,'PERS')
164  end if
165 
166 !Select k-dependent objects according to select_k input parameter
167  select_k_=1;if (present(select_k)) select_k_=select_k
168  if (select_k_==KPRIME_H_K) then
169 !  <k^prime|H|k>
170    npw_k1    =  gs_ham%npw_fft_k ; npw_k2    =  gs_ham%npw_fft_kp
171    kpt_k1    => gs_ham%kpt_k     ; kpt_k2    => gs_ham%kpt_kp
172    kg_k1     => gs_ham%kg_k      ; kg_k2     => gs_ham%kg_kp
173    gbound_k1 => gs_ham%gbound_k  ; gbound_k2 => gs_ham%gbound_kp
174    kinpw_k1  => gs_ham%kinpw_k   ; kinpw_k2  => gs_ham%kinpw_kp
175  else if (select_k_==K_H_KPRIME) then
176 !  <k|H|k^prime>
177    npw_k1    =  gs_ham%npw_fft_kp; npw_k2    =  gs_ham%npw_fft_k
178    kpt_k1    => gs_ham%kpt_kp    ; kpt_k2    => gs_ham%kpt_k
179    kg_k1     => gs_ham%kg_kp     ; kg_k2     => gs_ham%kg_k
180    gbound_k1 => gs_ham%gbound_kp ; gbound_k2 => gs_ham%gbound_k
181    kinpw_k1  => gs_ham%kinpw_kp  ; kinpw_k2  => gs_ham%kinpw_k
182  else if (select_k_==K_H_K) then
183 !  <k|H|k>
184    npw_k1    =  gs_ham%npw_fft_k ; npw_k2    =  gs_ham%npw_fft_k
185    kpt_k1    => gs_ham%kpt_k     ; kpt_k2    => gs_ham%kpt_k
186    kg_k1     => gs_ham%kg_k      ; kg_k2     => gs_ham%kg_k
187    gbound_k1 => gs_ham%gbound_k  ; gbound_k2 => gs_ham%gbound_k
188    kinpw_k1  => gs_ham%kinpw_k   ; kinpw_k2  => gs_ham%kinpw_k
189  else if (select_k_==KPRIME_H_KPRIME) then
190 !  <k^prime|H|k^prime>
191    npw_k1    =  gs_ham%npw_fft_kp; npw_k2    =  gs_ham%npw_fft_kp
192    kpt_k1    => gs_ham%kpt_kp    ; kpt_k2    => gs_ham%kpt_kp
193    kg_k1     => gs_ham%kg_kp     ; kg_k2     => gs_ham%kg_kp
194    gbound_k1 => gs_ham%gbound_kp ; gbound_k2 => gs_ham%gbound_kp
195    kinpw_k1  => gs_ham%kinpw_kp  ; kinpw_k2  => gs_ham%kinpw_kp
196  end if
197  k1_eq_k2=(all(abs(kpt_k1(:)-kpt_k2(:))<tol8))
198 
199 !Check sizes
200  my_nspinor=max(1,gs_ham%nspinor/mpi_enreg%nproc_spinor)
201  if (size(cwavef)<2*npw_k1*my_nspinor*ndat) then
202    msg='wrong size for cwavef!'
203    MSG_BUG(msg)
204  end if
205  if (size(ghc)<2*npw_k2*my_nspinor*ndat) then
206    msg='wrong size for ghc!'
207    MSG_BUG(msg)
208  end if
209  if (size(gvnlc)<2*npw_k2*my_nspinor*ndat) then
210    msg='wrong size for gvnlc!'
211    MSG_BUG(msg)
212  end if
213  if (sij_opt==1) then
214    if (size(gsc)<2*npw_k2*my_nspinor*ndat) then
215      msg='wrong size for gsc!'
216      MSG_BUG(msg)
217    end if
218  end if
219  if (gs_ham%usepaw==1.and.cpopt>=0) then
220    if (size(cwaveprj)<gs_ham%natom*my_nspinor*ndat) then
221      msg='wrong size for cwaveprj!'
222      MSG_BUG(msg)
223    end if
224  end if
225 
226 !Eventually overwrite plane waves data for FFT
227  if (present(kg_fft_k)) then
228    kg_k1 => kg_fft_k ; kg_k2 => kg_fft_k
229    npw_k1=size(kg_k1,2) ; npw_k2=size(kg_k2,2)
230  end if
231  if (present(kg_fft_kp)) then
232    kg_k2 => kg_fft_kp ; npw_k2=size(kg_k2,2)
233  end if
234 
235 !paral_kgb constraint
236  if (mpi_enreg%paral_kgb==1.and.(.not.k1_eq_k2)) then
237    msg='paral_kgb=1 not allowed for k/=k_^prime!'
238    MSG_BUG(msg)
239  end if
240 
241 !Do we add Fock exchange term ?
242  has_fock=(associated(gs_ham%fockcommon))
243  if (has_fock) fock => gs_ham%fockcommon
244 
245 !Parallelization over spinors management
246  nspinortot=gs_ham%nspinor
247  if (mpi_enreg%paral_spinor==0) then
248    shift1=npw_k1;shift2=npw_k2
249    nspinor1TreatedByThisProc=.true.
250    nspinor2TreatedByThisProc=(nspinortot==2)
251  else
252    shift1=0;shift2=0
253    nspinor1TreatedByThisProc=(mpi_enreg%me_spinor==0)
254    nspinor2TreatedByThisProc=(mpi_enreg%me_spinor==1)
255  end if
256 
257 !============================================================
258 ! Application of the local potential
259 !============================================================
260 
261  if ((type_calc==0).or.(type_calc==1).or.(type_calc==3)) then
262 
263 !  Need a Vlocal
264    if (.not.associated(gs_ham%vlocal)) then
265      msg='we need Vlocal!'
266      MSG_BUG(msg)
267    end if
268 
269 !  fourwf can only process with one value of istwf_k
270    if (.not.k1_eq_k2) then
271      msg='vlocal (fourwf) cannot be computed with k/=k^prime!'
272      MSG_BUG(msg)
273    end if
274 
275 !  Eventually adjust load balancing for FFT (by changing FFT distrib)
276    have_to_reequilibrate=.false.
277    if (mpi_enreg%paral_kgb==1) then
278      ikpt_this_proc=bandfft_kpt_get_ikpt()
279      have_to_reequilibrate=bandfft_kpt(ikpt_this_proc)%have_to_reequilibrate
280    end if
281    if (have_to_reequilibrate) then
282      npw_fft =  bandfft_kpt(ikpt_this_proc)%npw_fft
283      sendcount_fft  => bandfft_kpt(ikpt_this_proc)%sendcount_fft(:)
284      recvcount_fft  => bandfft_kpt(ikpt_this_proc)%recvcount_fft(:)
285      senddisp_fft   => bandfft_kpt(ikpt_this_proc)%senddisp_fft(:)
286      recvdisp_fft   => bandfft_kpt(ikpt_this_proc)%recvdisp_fft(:)
287      indices_pw_fft => bandfft_kpt(ikpt_this_proc)%indices_pw_fft(:)
288      kg_k_fft       => bandfft_kpt(ikpt_this_proc)%kg_k_fft(:,:)
289      ABI_ALLOCATE(buff_wf,(2,npw_k1*ndat) )
290      ABI_ALLOCATE(cwavef_fft,(2,npw_fft*ndat) )
291      if(ndat>1) then
292        ABI_ALLOCATE(cwavef_fft_tr, (2,npw_fft*ndat))
293      end if
294      do idat=1, ndat
295        do ipw = 1 ,npw_k1
296          buff_wf(1:2, idat + ndat*(indices_pw_fft(ipw)-1) ) = cwavef(1:2,ipw + npw_k1*(idat-1))
297        end do
298      end do
299      if(ndat > 1) then
300        call xmpi_alltoallv(buff_wf,2*ndat*sendcount_fft,2*ndat*senddisp_fft,  &
301 &       cwavef_fft_tr,2*ndat*recvcount_fft, 2*ndat*recvdisp_fft, mpi_enreg%comm_fft,ierr)
302 !      We need to transpose data
303        do idat=1,ndat
304          do ipw = 1 ,npw_fft
305            cwavef_fft(1:2,  ipw + npw_fft*(idat-1)) = cwavef_fft_tr(1:2,  idat + ndat*(ipw-1))
306          end do
307        end do
308      else
309        call xmpi_alltoallv(buff_wf,2*sendcount_fft,2*senddisp_fft,  &
310 &       cwavef_fft,2*recvcount_fft, 2*recvdisp_fft, mpi_enreg%comm_fft,ierr)
311      end if
312    end if
313 
314 !  Apply the local potential to the wavefunction
315 !  Start from wavefunction in reciprocal space cwavef
316 !  End with function ghc in reciprocal space also.
317    ABI_ALLOCATE(work,(2,gs_ham%n4,gs_ham%n5,gs_ham%n6*ndat))
318    weight=one
319 
320    if (nspinortot==2) then
321      ABI_ALLOCATE(cwavef1,(2,npw_k1*ndat))
322      ABI_ALLOCATE(cwavef2,(2,npw_k1*ndat))
323      do idat=1,ndat
324        do ipw=1,npw_k1
325          cwavef1(1:2,ipw+(idat-1)*npw_k1)=cwavef(1:2,ipw+(idat-1)*my_nspinor*npw_k1)
326          cwavef2(1:2,ipw+(idat-1)*npw_k1)=cwavef(1:2,ipw+(idat-1)*my_nspinor*npw_k1+shift1)
327        end do
328      end do
329 !    call cg_zcopy(npw_k1*ndat,cwavef(1,1),cwavef1)
330 !    call cg_zcopy(npw_k1*ndat,cwavef(1,1+shift1),cwavef2)
331    end if
332 
333 !  Treat scalar local potentials
334    if (gs_ham%nvloc==1) then
335 
336      if (nspinortot==1) then
337 
338        if (have_to_reequilibrate) then
339          call fourwf(1,gs_ham%vlocal,cwavef_fft,cwavef_fft,work,gbound_k1,gbound_k2,&
340 &         gs_ham%istwf_k,kg_k_fft,kg_k_fft,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
341 &         npw_fft,npw_fft,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,&
342 &         weight,weight,use_gpu_cuda=gs_ham%use_gpu_cuda)
343        else
344          call fourwf(1,gs_ham%vlocal,cwavef,ghc,work,gbound_k1,gbound_k2,&
345 &         gs_ham%istwf_k,kg_k1,kg_k2,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
346 &         npw_k1,npw_k2,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,&
347 &         weight,weight,use_gpu_cuda=gs_ham%use_gpu_cuda)
348        end if
349 
350      else ! nspinortot==2
351 
352        if (nspinor1TreatedByThisProc) then
353          ABI_ALLOCATE(ghc1,(2,npw_k2*ndat))
354          call fourwf(1,gs_ham%vlocal,cwavef1,ghc1,work,gbound_k1,gbound_k2,&
355 &         gs_ham%istwf_k,kg_k1,kg_k2,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
356 &         npw_k1,npw_k2,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,&
357 &         weight,weight,use_gpu_cuda=gs_ham%use_gpu_cuda)
358          do idat=1,ndat
359            do ipw =1, npw_k2
360              ghc(1:2,ipw+(idat-1)*my_nspinor*npw_k2)=ghc1(1:2,ipw+(idat-1)*npw_k2)
361            end do
362          end do
363          ABI_DEALLOCATE(ghc1)
364        end if ! spin 1 treated by this proc
365 
366        if (nspinor2TreatedByThisProc) then
367          ABI_ALLOCATE(ghc2,(2,npw_k2*ndat))
368          call fourwf(1,gs_ham%vlocal,cwavef2,ghc2,work,gbound_k1,gbound_k2,&
369 &         gs_ham%istwf_k,kg_k1,kg_k2,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
370 &         npw_k1,npw_k2,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,&
371 &         use_gpu_cuda=gs_ham%use_gpu_cuda)
372          do idat=1,ndat
373            do ipw=1,npw_k2
374              ghc(1:2,ipw+(idat-1)*my_nspinor*npw_k2+shift2)=ghc2(1:2,ipw+(idat-1)*npw_k2)
375            end do
376          end do
377          ABI_DEALLOCATE(ghc2)
378        end if ! spin 2 treated by this proc
379 
380      end if ! npsinortot
381 
382 !    Treat non-collinear local potentials
383    else if (gs_ham%nvloc==4) then
384 
385      ABI_ALLOCATE(ghc1,(2,npw_k2*ndat))
386      ABI_ALLOCATE(ghc2,(2,npw_k2*ndat))
387      ABI_ALLOCATE(ghc3,(2,npw_k2*ndat))
388      ABI_ALLOCATE(ghc4,(2,npw_k2*ndat))
389      ghc1(:,:)=zero; ghc2(:,:)=zero; ghc3(:,:)=zero ;  ghc4(:,:)=zero
390      ABI_ALLOCATE(vlocal_tmp,(gs_ham%n4,gs_ham%n5,gs_ham%n6))
391 !    ghc1=v11*phi1
392      vlocal_tmp(:,:,:)=gs_ham%vlocal(:,:,:,1)
393      if (nspinor1TreatedByThisProc) then
394        call fourwf(1,vlocal_tmp,cwavef1,ghc1,work,gbound_k1,gbound_k2,&
395 &       gs_ham%istwf_k,kg_k1,kg_k2,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
396 &       npw_k1,npw_k2,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,&
397 &       use_gpu_cuda=gs_ham%use_gpu_cuda)
398      end if
399 !    ghc2=v22*phi2
400      vlocal_tmp(:,:,:)=gs_ham%vlocal(:,:,:,2)
401      if (nspinor2TreatedByThisProc) then
402        call fourwf(1,vlocal_tmp,cwavef2,ghc2,work,gbound_k1,gbound_k2,&
403 &       gs_ham%istwf_k,kg_k1,kg_k2,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
404 &       npw_k1,npw_k2,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,&
405 &       use_gpu_cuda=gs_ham%use_gpu_cuda)
406      end if
407      ABI_DEALLOCATE(vlocal_tmp)
408      cplex=2
409      ABI_ALLOCATE(vlocal_tmp,(cplex*gs_ham%n4,gs_ham%n5,gs_ham%n6))
410 !    ghc3=(re(v12)-im(v12))*phi1
411      do i3=1,gs_ham%n6
412        do i2=1,gs_ham%n5
413          do i1=1,gs_ham%n4
414            vlocal_tmp(2*i1-1,i2,i3)= gs_ham%vlocal(i1,i2,i3,3)
415            vlocal_tmp(2*i1  ,i2,i3)=-gs_ham%vlocal(i1,i2,i3,4)
416          end do
417        end do
418      end do
419      if (nspinor1TreatedByThisProc) then
420        call fourwf(cplex,vlocal_tmp,cwavef1,ghc3,work,gbound_k1,gbound_k2,&
421 &       gs_ham%istwf_k,kg_k1,kg_k2,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
422 &       npw_k1,npw_k2,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,&
423 &       use_gpu_cuda=gs_ham%use_gpu_cuda)
424      end if
425 !    ghc4=(re(v12)+im(v12))*phi2
426      if (nspinor2TreatedByThisProc) then
427        do i3=1,gs_ham%n6
428          do i2=1,gs_ham%n5
429            do i1=1,gs_ham%n4
430              vlocal_tmp(2*i1,i2,i3)=-vlocal_tmp(2*i1,i2,i3)
431            end do
432          end do
433        end do
434        call fourwf(cplex,vlocal_tmp,cwavef2,ghc4,work,gbound_k1,gbound_k2,&
435 &       gs_ham%istwf_k,kg_k1,kg_k2,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
436 &       npw_k1,npw_k2,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,&
437 &       use_gpu_cuda=gs_ham%use_gpu_cuda)
438      end if
439      ABI_DEALLOCATE(vlocal_tmp)
440 !    Build ghc from pieces
441 !    (v11,v22,Re(v12)+iIm(v12);Re(v12)-iIm(v12))(psi1;psi2): matrix product
442      if (mpi_enreg%paral_spinor==0) then
443        do idat=1,ndat
444          do ipw=1,npw_k2
445            ghc(1:2,ipw+(idat-1)*my_nspinor*npw_k2)       =ghc1(1:2,ipw+(idat-1)*npw_k2)+ghc4(1:2,ipw+(idat-1)*npw_k2)
446            ghc(1:2,ipw+(idat-1)*my_nspinor*npw_k2+npw_k2)=ghc3(1:2,ipw+(idat-1)*npw_k2)+ghc2(1:2,ipw+(idat-1)*npw_k2)
447          end do
448        end do
449      else
450        call xmpi_sum(ghc4,mpi_enreg%comm_spinor,ierr)
451        call xmpi_sum(ghc3,mpi_enreg%comm_spinor,ierr)
452        if (nspinor1TreatedByThisProc) then
453          do idat=1,ndat
454            do ipw=1,npw_k2
455              ghc(1:2,ipw+(idat-1)*my_nspinor*npw_k2)=ghc1(1:2,ipw+(idat-1)*npw_k2)+ghc4(1:2,ipw+(idat-1)*npw_k2)
456            end do
457          end do
458        else if (nspinor2TreatedByThisProc) then
459          do idat=1,ndat
460            do ipw=1,npw_k2
461              ghc(1:2,ipw+(idat-1)*my_nspinor*npw_k2)=ghc3(1:2,ipw+(idat-1)*npw_k2)+ghc2(1:2,ipw+(idat-1)*npw_k2)
462            end do
463          end do
464        end if
465      end if
466      ABI_DEALLOCATE(ghc1)
467      ABI_DEALLOCATE(ghc2)
468      ABI_DEALLOCATE(ghc3)
469      ABI_DEALLOCATE(ghc4)
470    end if ! nvloc
471 
472    if (nspinortot==2)  then
473      ABI_DEALLOCATE(cwavef1)
474      ABI_DEALLOCATE(cwavef2)
475    end if
476    ABI_DEALLOCATE(work)
477 
478 !  Retrieve eventually original FFT distrib
479    if(have_to_reequilibrate) then
480      if(ndat > 1 ) then
481        do idat=1,ndat
482          do ipw = 1 ,npw_fft
483            cwavef_fft_tr(1:2,  idat + ndat*(ipw-1)) = cwavef_fft(1:2,  ipw + npw_fft*(idat-1))
484          end do
485        end do
486        call xmpi_alltoallv(cwavef_fft_tr,2*ndat*recvcount_fft, 2*ndat*recvdisp_fft, &
487 &       buff_wf,2*ndat*sendcount_fft,2*ndat*senddisp_fft, mpi_enreg%comm_fft,ierr)
488      else
489        call xmpi_alltoallv(cwavef_fft,2*recvcount_fft, 2*recvdisp_fft, &
490 &       buff_wf,2*sendcount_fft,2*senddisp_fft, mpi_enreg%comm_fft,ierr)
491      end if
492      do idat=1,ndat
493        do ipw = 1 ,npw_k2
494          ghc(1:2,ipw + npw_k2*(idat-1)) = buff_wf(1:2, idat + ndat*(indices_pw_fft(ipw)-1))
495        end do
496      end do
497      ABI_DEALLOCATE(buff_wf)
498      ABI_DEALLOCATE(cwavef_fft)
499      if(ndat > 1) then
500        ABI_DEALLOCATE(cwavef_fft_tr)
501      end if
502    end if
503 
504 !  Add metaGGA contribution
505    if (associated(gs_ham%vxctaulocal)) then
506      if (.not.k1_eq_k2) then
507        msg='metaGGA not allowed for k/=k_^prime!'
508        MSG_BUG(msg)
509      end if
510      if (size(gs_ham%vxctaulocal)/=gs_ham%n4*gs_ham%n5*gs_ham%n6*gs_ham%nvloc*4) then
511        msg='wrong sizes for vxctaulocal!'
512        MSG_BUG(msg)
513      end if
514      ABI_ALLOCATE(ghc_mGGA,(2,npw_k2*my_nspinor*ndat))
515      call getghc_mGGA(cwavef,ghc_mGGA,gbound_k1,gs_ham%gprimd,gs_ham%istwf_k,kg_k1,kpt_k1,&
516 &     gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,npw_k1,gs_ham%nvloc,&
517 &     gs_ham%n4,gs_ham%n5,gs_ham%n6,my_nspinor,gs_ham%vxctaulocal,gs_ham%use_gpu_cuda)
518      ghc(1:2,1:npw_k2*my_nspinor*ndat)=ghc(1:2,1:npw_k2*my_nspinor*ndat)+ghc_mGGA(1:2,1:npw_k2*my_nspinor*ndat)
519      ABI_DEALLOCATE(ghc_mGGA)
520    end if
521 
522 !  Add nuclear dipole moment contribution if nonzero
523    if (any(abs(gs_ham%nucdipmom)>tol8)) then
524      if (.not.k1_eq_k2) then
525        msg='Nuclear Dipole Moment not allowed for k/=k_^prime!'
526        MSG_BUG(msg)
527      end if
528      ABI_ALLOCATE(ghcnd,(2,npw_k2*my_nspinor*ndat))
529      call getghcnd(cwavef,ghcnd,gs_ham,my_nspinor,ndat)
530      ghc(1:2,1:npw_k2*my_nspinor*ndat)=ghc(1:2,1:npw_k2*my_nspinor*ndat)+ghcnd(1:2,1:npw_k2*my_nspinor*ndat)
531      ABI_DEALLOCATE(ghcnd)
532    end if ! end computation of nuclear dipole moment component
533 
534  end if ! type_calc
535 
536 
537  if ((type_calc==0).or.(type_calc==2).or.(type_calc==3)) then
538 
539 !============================================================
540 ! Application of the non-local potential
541 !============================================================
542 
543    if ((type_calc==0).or.(type_calc==2)) then
544      signs=2 ; choice=1 ; nnlout=1 ; idir=0 ; tim_nonlop=1
545      cpopt_here=-1;if (gs_ham%usepaw==1) cpopt_here=cpopt
546      if (has_fock) then
547        if (gs_ham%usepaw==1) then
548          cpopt_here=max(cpopt,0)
549          if (cpopt<2) then
550            ABI_DATATYPE_ALLOCATE(cwaveprj_fock,(gs_ham%natom,my_nspinor*ndat))
551            ABI_ALLOCATE(dimcprj,(gs_ham%natom))
552            call pawcprj_getdim(dimcprj,gs_ham%natom,gs_ham%nattyp,gs_ham%ntypat,&
553 &           gs_ham%typat,fock%pawtab,'O')
554            call pawcprj_alloc(cwaveprj_fock,0,dimcprj)
555            ABI_DEALLOCATE(dimcprj)
556          else
557            cwaveprj_fock=>cwaveprj
558          end if
559          cwaveprj_nonlop=>cwaveprj_fock
560        else
561          cwaveprj_nonlop=>cwaveprj
562          cwaveprj_fock=>cwaveprj
563        end if
564      else
565        cwaveprj_nonlop=>cwaveprj
566      end if
567      paw_opt=gs_ham%usepaw ; if (sij_opt/=0) paw_opt=sij_opt+3
568      lambda_ndat = lambda
569 
570      if (gs_ham%usepaw==0) gsc_ptr => nonlop_dum
571      if (gs_ham%usepaw==1) gsc_ptr => gsc
572      call nonlop(choice,cpopt_here,cwaveprj_nonlop,enlout,gs_ham,idir,lambda_ndat,mpi_enreg,ndat,&
573 &     nnlout,paw_opt,signs,gsc_ptr,tim_nonlop,cwavef,gvnlc,select_k=select_k_)
574    end if ! end type_calc 0 or 2 for nonlop application
575 
576 
577 !============================================================
578 ! Assemble kinetic, local, nonlocal and Fock contributions
579 !============================================================
580 
581 !  Assemble modified kinetic, local and nonlocal contributions
582 !  to <G|H|C(n,k)>. Take also into account build-in debugging.
583    if(prtvol/=-level)then
584      do idat=1,ndat
585        if (k1_eq_k2) then
586 !      !!$OMP PARALLEL DO PRIVATE(igspinor) COLLAPSE(2)
587          do ispinor=1,my_nspinor
588            do ig=1,npw_k2
589              igspinor=ig+npw_k2*(ispinor-1)+npw_k2*my_nspinor*(idat-1)
590              if(kinpw_k2(ig)<huge(zero)*1.d-11)then
591                ghc(re,igspinor)= ghc(re,igspinor) + kinpw_k2(ig)*cwavef(re,igspinor) + gvnlc(re,igspinor)
592                ghc(im,igspinor)= ghc(im,igspinor) + kinpw_k2(ig)*cwavef(im,igspinor) + gvnlc(im,igspinor)
593              else
594                ghc(re,igspinor)=zero
595                ghc(im,igspinor)=zero
596                if (sij_opt==1) then
597                  gsc(re,igspinor)=zero
598                  gsc(im,igspinor)=zero
599                end if
600              end if
601            end do ! ig
602          end do ! ispinor
603 
604        else
605 !      !!$OMP PARALLEL DO PRIVATE(igspinor) COLLAPSE(2)
606          do ispinor=1,my_nspinor
607            do ig=1,npw_k2
608              igspinor=ig+npw_k2*(ispinor-1)+npw_k2*my_nspinor*(idat-1)
609              if(kinpw_k2(ig)<huge(zero)*1.d-11)then
610                ghc(re,igspinor)= ghc(re,igspinor) + gvnlc(re,igspinor)
611                ghc(im,igspinor)= ghc(im,igspinor) + gvnlc(im,igspinor)
612              else
613                ghc(re,igspinor)=zero
614                ghc(im,igspinor)=zero
615                if (sij_opt==1) then
616                  gsc(re,igspinor)=zero
617                  gsc(im,igspinor)=zero
618                end if
619              end if
620            end do ! ig
621          end do ! ispinor
622        end if
623      end do ! idat
624    else
625 !    Here, debugging section
626      call wrtout(std_out,' getghc : components of ghc ','PERS')
627      write(msg,'(a)')&
628 &     'icp ig ispinor igspinor re/im     ghc        kinpw         cwavef      glocc        gvnlc  gsc'
629      call wrtout(std_out,msg,'PERS')
630      do idat=1,ndat
631        do ispinor=1,my_nspinor
632          do ig=1,npw_k2
633            igspinor=ig+npw_k2*(ispinor-1)+npw_k2*my_nspinor*(idat-1)
634            if(kinpw_k2(ig)<huge(zero)*1.d-11)then
635              if (k1_eq_k2) then
636                ghcre=kinpw_k2(ig)*cwavef(re,igspinor)+ghc(re,igspinor)+gvnlc(re,igspinor)
637                ghcim=kinpw_k2(ig)*cwavef(im,igspinor)+ghc(im,igspinor)+gvnlc(im,igspinor)
638              else
639                ghcre=ghc(re,igspinor)+gvnlc(re,igspinor)
640                ghcim=ghc(im,igspinor)+gvnlc(im,igspinor)
641              end if
642            else
643              ghcre=zero
644              ghcim=zero
645              if (sij_opt==1) then
646                gsc(re,igspinor)=zero
647                gsc(im,igspinor)=zero
648              end if
649            end if
650            iispinor=ispinor;if (mpi_enreg%paral_spinor==1) iispinor=mpi_enreg%me_spinor+1
651            if (sij_opt == 1) then
652              write(msg,'(a,3(1x,i5),6(1x,es13.6))') '  1 ', ig, iispinor, igspinor,ghcre,&
653 &             kinpw_k2(ig),cwavef(re,igspinor),ghc(re,igspinor),gvnlc(re,igspinor), gsc(re,igspinor)
654              call wrtout(std_out,msg,'PERS')
655              write(msg,'(a,3(1x,i5),6(1x,es13.6))') '  2 ', ig, iispinor, igspinor,ghcim,&
656 &             kinpw_k2(ig),cwavef(im,igspinor),ghc(im,igspinor),gvnlc(im,igspinor), gsc(im,igspinor)
657              call wrtout(std_out,msg,'PERS')
658            else
659              write(msg,'(a,3(1x,i5),6(1x,es13.6))') '  1 ', ig, iispinor, igspinor,ghcre,&
660 &             kinpw_k2(ig),cwavef(re,igspinor),ghc(re,igspinor),gvnlc(re,igspinor)
661              call wrtout(std_out,msg,'PERS')
662              write(msg,'(a,3(1x,i5),6(1x,es13.6))') '  2 ', ig, iispinor, igspinor,ghcim,&
663 &             kinpw_k2(ig),cwavef(im,igspinor),ghc(im,igspinor),gvnlc(im,igspinor)
664              call wrtout(std_out,msg,'PERS')
665            end if
666            ghc(re,igspinor)=ghcre
667            ghc(im,igspinor)=ghcim
668          end do ! ig
669        end do ! ispinor
670      end do ! idat
671    end if
672 
673 !  Calculation of the Fock exact exchange term
674    if (has_fock) then
675      if (fock_get_getghc_call(fock)==1) then
676        if (gs_ham%usepaw==0) cwaveprj_idat => cwaveprj
677        do idat=1,ndat
678          if (fock%use_ACE==0) then
679            if (gs_ham%usepaw==1) cwaveprj_idat => cwaveprj_fock(:,(idat-1)*my_nspinor+1:idat*my_nspinor)
680            call fock_getghc(cwavef(:,1+(idat-1)*npw_k1*my_nspinor:idat*npw_k1*my_nspinor),cwaveprj_idat,&
681 &           ghc(:,1+(idat-1)*npw_k2*my_nspinor:idat*npw_k2*my_nspinor),gs_ham,mpi_enreg)
682          else
683            call fock_ACE_getghc(cwavef(:,1+(idat-1)*npw_k1*my_nspinor:idat*npw_k1*my_nspinor),&
684 &           ghc(:,1+(idat-1)*npw_k2*my_nspinor:idat*npw_k2*my_nspinor),gs_ham,mpi_enreg)
685          end if
686        end do ! idat
687      end if
688    end if
689 
690 !  Structured debugging : if prtvol=-level, stop here.
691    if(prtvol==-level)then
692      write(msg,'(a,i0,a)')' getghc : exit prtvol=-',level,', debugging mode => stop '
693      MSG_ERROR(msg)
694    end if
695 
696    if ((type_calc==0).or.(type_calc==2)) then
697      if (has_fock.and.gs_ham%usepaw==1.and.cpopt<2) then
698        call pawcprj_free(cwaveprj_fock)
699        ABI_DATATYPE_DEALLOCATE(cwaveprj_fock)
700      end if
701    end if
702 
703  end if ! type_calc
704 
705  call timab(200+tim_getghc,2,tsec)
706 
707 end subroutine getghc

ABINIT/getghc_mGGA [ Functions ]

[ Top ] [ Functions ]

NAME

 getghc_mGGA

FUNCTION

 Compute metaGGA contribution to <G|H|C> for input vector |C> expressed in reciprocal space.

COPYRIGHT

 Copyright (C) 1998-2018 ABINIT group (DCA, XG, GMR, LSI, MT)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

 cwavef(2,npw_k*my_nspinor*ndat)=planewave coefficients of wavefunction.
 gbound_k(2*mgfft+4)=sphere boundary info
 gprimd(3,3)=dimensional reciprocal space primitive translations (b^-1)
 istwf_k=input parameter that describes the storage of wfs
 kg_k(3,npw_k)=G vec coordinates wrt recip lattice transl.
 kpt(3)=current k point
 mgfft=maximum single fft dimension
 mpi_enreg=informations about MPI parallelization
 my_nspinor=number of spinorial components of the wavefunctions (on current proc)
 ndat=number of FFTs to perform in parall
 ngfft(18)=contain all needed information about 3D FFT
 npw_k=number of planewaves in basis for given k point.
 nvloc=number of spin components of vxctaulocal
 n4,n5,n6=for dimensionning of vxctaulocal
 use_gpu_cuda=1 if Cuda (GPU) is on
 vxctaulocal(n4,n5,n6,nvloc,4)= local potential corresponding to the derivative of XC energy with respect to
  kinetic energy density, in real space, on the augmented fft grid.
  This array contains also the gradient of vxctaulocal (gvxctaulocal) in vxctaulocal(:,:,:,:,2:4).

OUTPUT

  ghc_mGGA(2,npw_k*my_nspinor*ndat)=metaGGA contribution to <G|H|C>

SIDE EFFECTS

PARENTS

      getghc

CHILDREN

      fourwf

SOURCE

 761 #if defined HAVE_CONFIG_H
 762 #include "config.h"
 763 #endif
 764 
 765 #include "abi_common.h"
 766 
 767 subroutine getghc_mGGA(cwavef,ghc_mGGA,gbound_k,gprimd,istwf_k,kg_k,kpt,mgfft,mpi_enreg,&
 768 &                      ndat,ngfft,npw_k,nvloc,n4,n5,n6,my_nspinor,vxctaulocal,use_gpu_cuda)
 769 
 770  use defs_basis
 771  use defs_abitypes
 772  use m_errors
 773  use m_profiling_abi
 774 
 775 !This section has been created automatically by the script Abilint (TD).
 776 !Do not modify the following lines by hand.
 777 #undef ABI_FUNC
 778 #define ABI_FUNC 'getghc_mGGA'
 779  use interfaces_53_ffts
 780 !End of the abilint section
 781 
 782  implicit none
 783 
 784 !Arguments ------------------------------------
 785 !scalars
 786  integer,intent(in) :: istwf_k,mgfft,my_nspinor,ndat,npw_k,nvloc,n4,n5,n6,use_gpu_cuda
 787  type(MPI_type),intent(in) :: mpi_enreg
 788 !arrays
 789  integer,intent(in) :: gbound_k(2*mgfft+4),kg_k(3,npw_k),ngfft(18)
 790  real(dp),intent(in) :: gprimd(3,3),kpt(3)
 791  real(dp),intent(inout) :: cwavef(2,npw_k*my_nspinor*ndat)
 792  real(dp),intent(inout) :: ghc_mGGA(2,npw_k*my_nspinor*ndat)
 793  real(dp),intent(inout) :: vxctaulocal(n4,n5,n6,nvloc,4)
 794 
 795 !Local variables-------------------------------
 796 !scalars
 797  integer,parameter :: tim_fourwf=1
 798  integer :: idat,idir,ipw,nspinortot,shift
 799  logical :: nspinor1TreatedByThisProc,nspinor2TreatedByThisProc
 800  real(dp) :: gp2pi1,gp2pi2,gp2pi3,kpt_cart,kg_k_cart,weight=one
 801 !arrays
 802  real(dp),allocatable :: cwavef1(:,:),cwavef2(:,:)
 803  real(dp),allocatable :: gcwavef(:,:,:),gcwavef1(:,:,:),gcwavef2(:,:,:)
 804  real(dp),allocatable :: ghc1(:,:),ghc2(:,:)
 805  real(dp),allocatable :: lcwavef(:,:),lcwavef1(:,:),lcwavef2(:,:)
 806  real(dp),allocatable :: work(:,:,:,:)
 807 
 808 ! *********************************************************************
 809 
 810  ghc_mGGA(:,:)=zero
 811  if (nvloc/=1) return
 812 
 813  nspinortot=min(2,(1+mpi_enreg%paral_spinor)*my_nspinor)
 814  if (mpi_enreg%paral_spinor==0) then
 815    shift=npw_k
 816    nspinor1TreatedByThisProc=.true.
 817    nspinor2TreatedByThisProc=(nspinortot==2)
 818  else
 819    shift=0
 820    nspinor1TreatedByThisProc=(mpi_enreg%me_spinor==0)
 821    nspinor2TreatedByThisProc=(mpi_enreg%me_spinor==1)
 822  end if
 823 
 824  ABI_ALLOCATE(work,(2,n4,n5,n6*ndat))
 825 
 826  if (nspinortot==1) then
 827 
 828    ABI_ALLOCATE(ghc1,(2,npw_k*ndat))
 829 
 830 !  Do it in 3 STEPs:
 831 !  STEP1: Compute grad of cwavef and Laplacian of cwavef
 832    ABI_ALLOCATE(gcwavef,(2,npw_k*ndat,3))
 833    ABI_ALLOCATE(lcwavef,(2,npw_k*ndat))
 834 !!$OMP PARALLEL DO
 835    do idat=1,ndat
 836      do ipw=1,npw_k
 837        gcwavef(:,ipw+(idat-1)*npw_k,1:3)=zero
 838        lcwavef(:,ipw+(idat-1)*npw_k)  =zero
 839      end do
 840    end do
 841    do idir=1,3
 842      gp2pi1=gprimd(idir,1)*two_pi
 843      gp2pi2=gprimd(idir,2)*two_pi
 844      gp2pi3=gprimd(idir,3)*two_pi
 845      kpt_cart=gp2pi1*kpt(1)+gp2pi2*kpt(2)+gp2pi3*kpt(3)
 846 !    Multiplication by 2pi i (G+k)_idir for gradient
 847 !    Multiplication by -(2pi (G+k)_idir )**2 for Laplacian
 848      do idat=1,ndat
 849        do ipw=1,npw_k
 850          kg_k_cart=gp2pi1*kg_k(1,ipw)+gp2pi2*kg_k(2,ipw)+gp2pi3*kg_k(3,ipw)+kpt_cart
 851          gcwavef(1,ipw+(idat-1)*npw_k,idir)= cwavef(2,ipw+(idat-1)*npw_k)*kg_k_cart
 852          gcwavef(2,ipw+(idat-1)*npw_k,idir)=-cwavef(1,ipw+(idat-1)*npw_k)*kg_k_cart
 853          lcwavef(1,ipw+(idat-1)*npw_k)=lcwavef(1,ipw+(idat-1)*npw_k)-cwavef(1,ipw+(idat-1)*npw_k)*kg_k_cart**2
 854          lcwavef(2,ipw+(idat-1)*npw_k)=lcwavef(2,ipw+(idat-1)*npw_k)-cwavef(2,ipw+(idat-1)*npw_k)*kg_k_cart**2
 855        end do
 856      end do
 857    end do ! idir
 858 !  STEP2: Compute (vxctaulocal)*(Laplacian of cwavef) and add it to ghc
 859    call fourwf(1,vxctaulocal(:,:,:,:,1),lcwavef,ghc1,work,gbound_k,gbound_k,&
 860 &   istwf_k,kg_k,kg_k,mgfft,mpi_enreg,ndat,ngfft,npw_k,npw_k,n4,n5,n6,2,&
 861 &   mpi_enreg%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=use_gpu_cuda)
 862 !!$OMP PARALLEL DO
 863    do idat=1,ndat
 864      do ipw=1,npw_k
 865        ghc_mGGA(:,ipw+(idat-1)*npw_k)=ghc_mGGA(:,ipw+(idat-1)*npw_k)-half*ghc1(:,ipw+(idat-1)*npw_k)
 866      end do
 867    end do
 868    ABI_DEALLOCATE(lcwavef)
 869 !  STEP3: Compute sum of (grad components of vxctaulocal)*(grad components of cwavef)
 870    do idir=1,3
 871      call fourwf(1,vxctaulocal(:,:,:,:,1+idir),gcwavef(:,:,idir),ghc1,work,gbound_k,gbound_k,&
 872      istwf_k,kg_k,kg_k,mgfft,mpi_enreg,ndat,ngfft,npw_k,npw_k,n4,n5,n6,2,&
 873 &     mpi_enreg%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=use_gpu_cuda)
 874 !!$OMP PARALLEL DO
 875      do idat=1,ndat
 876        do ipw=1,npw_k
 877          ghc_mGGA(:,ipw+(idat-1)*npw_k)=ghc_mGGA(:,ipw+(idat-1)*npw_k)-half*ghc1(:,ipw+(idat-1)*npw_k)
 878        end do
 879      end do
 880    end do ! idir
 881    ABI_DEALLOCATE(gcwavef)
 882    ABI_DEALLOCATE(ghc1)
 883 
 884  else ! nspinortot==2
 885 
 886    ABI_ALLOCATE(cwavef1,(2,npw_k*ndat))
 887    ABI_ALLOCATE(cwavef2,(2,npw_k*ndat))
 888    do idat=1,ndat
 889      do ipw=1,npw_k
 890        cwavef1(1:2,ipw+(idat-1)*npw_k)=cwavef(1:2,ipw+(idat-1)*my_nspinor*npw_k)
 891        cwavef2(1:2,ipw+(idat-1)*npw_k)=cwavef(1:2,ipw+(idat-1)*my_nspinor*npw_k+shift)
 892      end do
 893    end do
 894 !  call cg_zcopy(npw*ndat,cwavef(1,1),cwavef1)
 895 !  call cg_zcopy(npw*ndat,cwavef(1,1+shift),cwavef2)
 896 
 897 
 898    if (nspinor1TreatedByThisProc) then
 899 
 900      ABI_ALLOCATE(ghc1,(2,npw_k*ndat))
 901 
 902 !    Do it in 3 STEPs:
 903 !    STEP1: Compute grad of cwavef and Laplacian of cwavef
 904      ABI_ALLOCATE(gcwavef1,(2,npw_k*ndat,3))
 905      ABI_ALLOCATE(lcwavef1,(2,npw_k*ndat))
 906 !!$OMP PARALLEL DO
 907      do idat=1,ndat
 908        do ipw=1,npw_k
 909          gcwavef1(:,ipw+(idat-1)*npw_k,1:3)=zero
 910          lcwavef1(:,ipw+(idat-1)*npw_k)=zero
 911        end do
 912      end do
 913      do idir=1,3
 914        gp2pi1=gprimd(idir,1)*two_pi
 915        gp2pi2=gprimd(idir,2)*two_pi
 916        gp2pi3=gprimd(idir,3)*two_pi
 917        kpt_cart=gp2pi1*kpt(1)+gp2pi2*kpt(2)+gp2pi3*kpt(3)
 918 !      Multiplication by 2pi i (G+k)_idir for gradient
 919 !      Multiplication by -(2pi (G+k)_idir )**2 for Laplacian
 920        do idat=1,ndat
 921          do ipw=1,npw_k
 922            kg_k_cart=gp2pi1*kg_k(1,ipw)+gp2pi2*kg_k(2,ipw)+gp2pi3*kg_k(3,ipw)+kpt_cart
 923            gcwavef1(1,ipw+(idat-1)*npw_k,idir)= cwavef1(2,ipw+(idat-1)*npw_k)*kg_k_cart
 924            gcwavef1(2,ipw+(idat-1)*npw_k,idir)=-cwavef1(1,ipw+(idat-1)*npw_k)*kg_k_cart
 925            lcwavef1(1,ipw+(idat-1)*npw_k)=lcwavef1(1,ipw+(idat-1)*npw_k)-cwavef1(1,ipw+(idat-1)*npw_k)*kg_k_cart**2
 926            lcwavef1(2,ipw+(idat-1)*npw_k)=lcwavef1(2,ipw+(idat-1)*npw_k)-cwavef1(2,ipw+(idat-1)*npw_k)*kg_k_cart**2
 927          end do
 928        end do
 929      end do ! idir
 930 !    STEP2: Compute (vxctaulocal)*(Laplacian of cwavef) and add it to ghc
 931      call fourwf(1,vxctaulocal(:,:,:,:,1),lcwavef1,ghc1,work,gbound_k,gbound_k,&
 932 &     istwf_k,kg_k,kg_k,mgfft,mpi_enreg,ndat,ngfft,npw_k,npw_k,n4,n5,n6,2,&
 933 &     mpi_enreg%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=use_gpu_cuda)
 934 !!$OMP PARALLEL DO
 935      do idat=1,ndat
 936        do ipw=1,npw_k
 937          ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k)=ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k)-half*ghc1(:,ipw+(idat-1)*npw_k)
 938        end do
 939      end do
 940      ABI_DEALLOCATE(lcwavef1)
 941 !    STEP3: Compute (grad components of vxctaulocal)*(grad components of cwavef)
 942      do idir=1,3
 943        call fourwf(1,vxctaulocal(:,:,:,:,1+idir),gcwavef1(:,:,idir),ghc1,work,gbound_k,gbound_k,&
 944        istwf_k,kg_k,kg_k,mgfft,mpi_enreg,ndat,ngfft,npw_k,npw_k,n4,n5,n6,2,&
 945 &       mpi_enreg%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=use_gpu_cuda)
 946 !!$OMP PARALLEL DO
 947        do idat=1,ndat
 948          do ipw=1,npw_k
 949            ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k) = ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k)-half*ghc1(:,ipw+(idat-1)*npw_k)
 950          end do
 951        end do
 952      end do ! idir
 953      ABI_DEALLOCATE(gcwavef1)
 954      ABI_DEALLOCATE(ghc1)
 955 
 956    end if ! spin 1 treated by this proc
 957 
 958    if (nspinor2TreatedByThisProc) then
 959 
 960      ABI_ALLOCATE(ghc2,(2,npw_k*ndat))
 961 
 962 !    Do it in 3 STEPs:
 963 !    STEP1: Compute grad of cwavef and Laplacian of cwavef
 964      ABI_ALLOCATE(gcwavef2,(2,npw_k*ndat,3))
 965      ABI_ALLOCATE(lcwavef2,(2,npw_k*ndat))
 966 !!$OMP PARALLEL DO
 967      do idat=1,ndat
 968        do ipw=1,npw_k
 969          gcwavef2(:,ipw+(idat-1)*npw_k,1:3)=zero
 970          lcwavef2(:,ipw+(idat-1)*npw_k)  =zero
 971        end do
 972      end do
 973      do idir=1,3
 974        gp2pi1=gprimd(idir,1)*two_pi
 975        gp2pi2=gprimd(idir,2)*two_pi
 976        gp2pi3=gprimd(idir,3)*two_pi
 977        kpt_cart=gp2pi1*kpt(1)+gp2pi2*kpt(2)+gp2pi3*kpt(3)
 978 !      Multiplication by 2pi i (G+k)_idir for gradient
 979 !      Multiplication by -(2pi (G+k)_idir )**2 for Laplacian
 980        do idat=1,ndat
 981          do ipw=1,npw_k
 982            kg_k_cart=gp2pi1*kg_k(1,ipw)+gp2pi2*kg_k(2,ipw)+gp2pi3*kg_k(3,ipw)+kpt_cart
 983            gcwavef2(1,ipw+(idat-1)*npw_k,idir)= cwavef2(2,ipw+(idat-1)*npw_k)*kg_k_cart
 984            gcwavef2(2,ipw+(idat-1)*npw_k,idir)=-cwavef2(1,ipw+(idat-1)*npw_k)*kg_k_cart
 985            lcwavef2(1,ipw+(idat-1)*npw_k)=lcwavef2(1,ipw+(idat-1)*npw_k)-cwavef2(1,ipw+(idat-1)*npw_k)*kg_k_cart**2
 986            lcwavef2(2,ipw+(idat-1)*npw_k)=lcwavef2(2,ipw+(idat-1)*npw_k)-cwavef2(2,ipw+(idat-1)*npw_k)*kg_k_cart**2
 987          end do
 988        end do
 989      end do ! idir
 990 !    STEP2: Compute (vxctaulocal)*(Laplacian of cwavef) and add it to ghc
 991      call fourwf(1,vxctaulocal(:,:,:,:,1),lcwavef2,ghc2,work,gbound_k,gbound_k,&
 992 &     istwf_k,kg_k,kg_k,mgfft,mpi_enreg,ndat,ngfft,npw_k,npw_k,n4,n5,n6,2,&
 993 &     mpi_enreg%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=use_gpu_cuda)
 994 !!$OMP PARALLEL DO
 995      do idat=1,ndat
 996        do ipw=1,npw_k
 997          ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k)=ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k)-half*ghc2(:,ipw+(idat-1)*npw_k)
 998        end do
 999      end do
1000      ABI_DEALLOCATE(lcwavef2)
1001 !    STEP3: Compute sum of (grad components of vxctaulocal)*(grad components of cwavef)
1002      do idir=1,3
1003        call fourwf(1,vxctaulocal(:,:,:,:,1+idir),gcwavef2(:,:,idir),ghc2,work,gbound_k,gbound_k,&
1004        istwf_k,kg_k,kg_k,mgfft,mpi_enreg,ndat,ngfft,npw_k,npw_k,n4,n5,n6,2,&
1005 &       mpi_enreg%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=use_gpu_cuda)
1006 !!$OMP PARALLEL DO
1007        do idat=1,ndat
1008          do ipw=1,npw_k
1009            ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k)=ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k)-half*ghc2(:,ipw+(idat-1)*npw_k)
1010          end do
1011        end do
1012      end do ! idir
1013 
1014      ABI_DEALLOCATE(gcwavef2)
1015      ABI_DEALLOCATE(ghc2)
1016 
1017    end if ! spin 2 treated by this proc
1018 
1019    ABI_DEALLOCATE(cwavef1)
1020    ABI_DEALLOCATE(cwavef2)
1021 
1022  end if ! npsinortot
1023 
1024  ABI_DEALLOCATE(work)
1025 
1026 end subroutine getghc_mGGA