TABLE OF CONTENTS


ABINIT/getghc [ Functions ]

[ Top ] [ Functions ]

NAME

 getghc

FUNCTION

 Compute <G|H|C> for input vector |C> expressed in reciprocal space;
 Result is put in array ghc.
 <G|Vnonlocal|C> is also returned in gvnlc.
 if required, <G|S|C> is returned in gsc (S=overlap - PAW only)
 Note that left and right k points can be different, i.e. ghc=<k^prime+G|H|C_k>.

INPUTS

 cpopt=flag defining the status of cwaveprj%cp(:)=<Proj_i|Cnk> scalars (PAW only)
       (same meaning as in nonlop.F90 routine)
       if cpopt=-1, <p_lmn|in> (and derivatives) are computed here (and not saved)
       if cpopt= 0, <p_lmn|in> are computed here and saved
       if cpopt= 1, <p_lmn|in> and first derivatives are computed here and saved
       if cpopt= 2  <p_lmn|in> are already in memory;
       if cpopt= 3  <p_lmn|in> are already in memory; first derivatives are computed here and saved
       if cpopt= 4  <p_lmn|in> and first derivatives are already in memory;
 cwavef(2,npw*my_nspinor*ndat)=planewave coefficients of wavefunction.
 gs_ham <type(gs_hamiltonian_type)>=all data for the Hamiltonian to be applied
 lambda=factor to be used when computing <G|H-lambda.S|C> - only for sij_opt=-1
        Typically lambda is the eigenvalue (or its guess)
 mpi_enreg=information about MPI parallelization
 ndat=number of FFT to do in parallel
 prtvol=control print volume and debugging output
 sij_opt= -PAW ONLY-  if  0, only matrix elements <G|H|C> have to be computed
    (S=overlap)       if  1, matrix elements <G|S|C> have to be computed in gsc in addition to ghc
                      if -1, matrix elements <G|H-lambda.S|C> have to be computed in ghc (gsc not used)
 tim_getghc=timing code of the calling subroutine(can be set to 0 if not attributed)
 type_calc= option governing which part of Hamitonian is to be applied:
            1: local part only
            2: non-local+kinetic only (added to the exixting Hamiltonian)
            3: local + kinetic only (added to the existing Hamiltonian)
 ===== Optional inputs =====
   [kg_fft_k(3,:)]=optional, (k+G) vector coordinates to be used for the FFT tranformation
                   instead of the one contained in gs_ham datastructure.
                   Typically used for real WF (in parallel) which are FFT-transformed 2 by 2.
   [kg_fft_kp(3,:)]=optional, (k^prime+G) vector coordinates to be used for the FFT tranformation
   [select_k]=optional, option governing the choice of k points to be used.
             gs_ham datastructure contains quantities needed to apply Hamiltonian
             in reciprocal space between 2 kpoints, k and k^prime (equal in most cases);
             if select_k=1, <k^prime|H|k>       is applied [default]
             if select_k=2, <k|H|k^prime>       is applied
             if select_k=3, <k|H|k>             is applied
             if select_k=4, <k^prime|H|k^prime> is applied

OUTPUT

  ghc(2,npw*my_nspinor*ndat)=matrix elements <G|H|C> (if sij_opt>=0)
                                          or <G|H-lambda.S|C> (if sij_opt=-1)
  gvnlc(2,npw*my_nspinor*ndat)=matrix elements <G|Vnonlocal|C> (if sij_opt>=0)
                                            or <G|Vnonlocal-lambda.S|C> (if sij_opt=-1)
  if (sij_opt=1)
    gsc(2,npw*my_nspinor*ndat)=matrix elements <G|S|C> (S=overlap).

SIDE EFFECTS

  cwaveprj(natom,my_nspinor*(1+cpopt)*ndat)= wave function projected on nl projectors (PAW only)

PARENTS

      cgwf,chebfi,dfpt_cgwf,gwls_hamiltonian,ks_ddiago,lobpcgwf,m_io_kss
      m_rf2,mkresi,multithreaded_getghc

CHILDREN

      fourwf

SOURCE

127 subroutine getghc(cpopt,cwavef,cwaveprj,ghc,gsc,gs_ham,gvnlc,lambda,mpi_enreg,ndat,&
128 &                 prtvol,sij_opt,tim_getghc,type_calc,&
129 &                 kg_fft_k,kg_fft_kp,select_k) ! optional arguments
130 
131 
132 !This section has been created automatically by the script Abilint (TD).
133 !Do not modify the following lines by hand.
134 #undef ABI_FUNC
135 #define ABI_FUNC 'getghc'
136 !End of the abilint section
137 
138  implicit none
139 
140 !Arguments ------------------------------------
141 !scalars
142  integer,intent(in) :: cpopt,ndat, prtvol
143  integer,intent(in) :: sij_opt,tim_getghc,type_calc
144  integer,intent(in),optional :: select_k
145  real(dp),intent(in) :: lambda
146  type(MPI_type),intent(in) :: mpi_enreg
147  type(gs_hamiltonian_type),intent(inout),target :: gs_ham
148 !arrays
149  integer,intent(in),optional,target :: kg_fft_k(:,:),kg_fft_kp(:,:)
150  real(dp),intent(out),target :: gsc(:,:)
151  real(dp),intent(inout) :: cwavef(:,:)
152  real(dp),intent(out) :: ghc(:,:),gvnlc(:,:)
153  type(pawcprj_type),intent(inout),target :: cwaveprj(:,:)
154 
155 !Local variables-------------------------------
156 !scalars
157  integer,parameter :: level=114,re=1,im=2,tim_fourwf=1
158  integer :: choice,cplex,cpopt_here,i1,i2,i3,idat,idir,ierr
159  integer :: ig,igspinor,ii,iispinor,ikpt_this_proc,ipw,ispinor,my_nspinor
160  integer :: nnlout,npw_fft,npw_k1,npw_k2,nspinortot
161  integer :: paw_opt,select_k_,shift1,shift2,signs,tim_nonlop
162  logical :: k1_eq_k2,have_to_reequilibrate,has_fock
163  logical :: nspinor1TreatedByThisProc,nspinor2TreatedByThisProc
164  real(dp) :: ghcim,ghcre,weight
165  character(len=500) :: msg
166 !arrays
167  integer, pointer :: gbound_k1(:,:),gbound_k2(:,:),kg_k1(:,:),kg_k2(:,:)
168  integer, ABI_CONTIGUOUS pointer :: indices_pw_fft(:),kg_k_fft(:,:)
169  integer, ABI_CONTIGUOUS pointer :: recvcount_fft(:),recvdisp_fft(:)
170  integer, ABI_CONTIGUOUS pointer ::  sendcount_fft(:),senddisp_fft(:)
171  integer, allocatable:: dimcprj(:)
172  real(dp) :: enlout(ndat),lambda_ndat(ndat),tsec(2)
173  real(dp),target :: nonlop_dum(1,1)
174  real(dp),allocatable :: buff_wf(:,:),cwavef1(:,:),cwavef2(:,:),cwavef_fft(:,:),cwavef_fft_tr(:,:)
175  real(dp),allocatable :: ghc1(:,:),ghc2(:,:),ghc3(:,:),ghc4(:,:),ghcnd(:,:),ghc_mGGA(:,:)
176  real(dp),allocatable :: vlocal_tmp(:,:,:),work(:,:,:,:)
177  real(dp), pointer :: kinpw_k1(:),kinpw_k2(:),kpt_k1(:),kpt_k2(:)
178  real(dp), pointer :: gsc_ptr(:,:)
179  type(fock_common_type),pointer :: fock
180  type(pawcprj_type),pointer :: cwaveprj_fock(:,:),cwaveprj_idat(:,:),cwaveprj_nonlop(:,:)
181 
182 ! *********************************************************************
183 
184  DBG_ENTER("COLL")
185 
186 !Keep track of total time spent in getghc:
187  call timab(200+tim_getghc,1,tsec)
188 
189 !Structured debugging if prtvol==-level
190  if(prtvol==-level)then
191    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' getghc : enter, debugging '
192    call wrtout(std_out,msg,'PERS')
193  end if
194 
195 !Select k-dependent objects according to select_k input parameter
196  select_k_=1;if (present(select_k)) select_k_=select_k
197  if (select_k_==KPRIME_H_K) then
198 !  <k^prime|H|k>
199    npw_k1    =  gs_ham%npw_fft_k ; npw_k2    =  gs_ham%npw_fft_kp
200    kpt_k1    => gs_ham%kpt_k     ; kpt_k2    => gs_ham%kpt_kp
201    kg_k1     => gs_ham%kg_k      ; kg_k2     => gs_ham%kg_kp
202    gbound_k1 => gs_ham%gbound_k  ; gbound_k2 => gs_ham%gbound_kp
203    kinpw_k1  => gs_ham%kinpw_k   ; kinpw_k2  => gs_ham%kinpw_kp
204  else if (select_k_==K_H_KPRIME) then
205 !  <k|H|k^prime>
206    npw_k1    =  gs_ham%npw_fft_kp; npw_k2    =  gs_ham%npw_fft_k
207    kpt_k1    => gs_ham%kpt_kp    ; kpt_k2    => gs_ham%kpt_k
208    kg_k1     => gs_ham%kg_kp     ; kg_k2     => gs_ham%kg_k
209    gbound_k1 => gs_ham%gbound_kp ; gbound_k2 => gs_ham%gbound_k
210    kinpw_k1  => gs_ham%kinpw_kp  ; kinpw_k2  => gs_ham%kinpw_k
211  else if (select_k_==K_H_K) then
212 !  <k|H|k>
213    npw_k1    =  gs_ham%npw_fft_k ; npw_k2    =  gs_ham%npw_fft_k
214    kpt_k1    => gs_ham%kpt_k     ; kpt_k2    => gs_ham%kpt_k
215    kg_k1     => gs_ham%kg_k      ; kg_k2     => gs_ham%kg_k
216    gbound_k1 => gs_ham%gbound_k  ; gbound_k2 => gs_ham%gbound_k
217    kinpw_k1  => gs_ham%kinpw_k   ; kinpw_k2  => gs_ham%kinpw_k
218  else if (select_k_==KPRIME_H_KPRIME) then
219 !  <k^prime|H|k^prime>
220    npw_k1    =  gs_ham%npw_fft_kp; npw_k2    =  gs_ham%npw_fft_kp
221    kpt_k1    => gs_ham%kpt_kp    ; kpt_k2    => gs_ham%kpt_kp
222    kg_k1     => gs_ham%kg_kp     ; kg_k2     => gs_ham%kg_kp
223    gbound_k1 => gs_ham%gbound_kp ; gbound_k2 => gs_ham%gbound_kp
224    kinpw_k1  => gs_ham%kinpw_kp  ; kinpw_k2  => gs_ham%kinpw_kp
225  end if
226  k1_eq_k2=(all(abs(kpt_k1(:)-kpt_k2(:))<tol8))
227 
228 !Check sizes
229  my_nspinor=max(1,gs_ham%nspinor/mpi_enreg%nproc_spinor)
230  if (size(cwavef)<2*npw_k1*my_nspinor*ndat) then
231    msg='wrong size for cwavef!'
232    MSG_BUG(msg)
233  end if
234  if (size(ghc)<2*npw_k2*my_nspinor*ndat) then
235    msg='wrong size for ghc!'
236    MSG_BUG(msg)
237  end if
238  if (size(gvnlc)<2*npw_k2*my_nspinor*ndat) then
239    msg='wrong size for gvnlc!'
240    MSG_BUG(msg)
241  end if
242  if (sij_opt==1) then
243    if (size(gsc)<2*npw_k2*my_nspinor*ndat) then
244      msg='wrong size for gsc!'
245      MSG_BUG(msg)
246    end if
247  end if
248  if (gs_ham%usepaw==1.and.cpopt>=0) then
249    if (size(cwaveprj)<gs_ham%natom*my_nspinor*ndat) then
250      msg='wrong size for cwaveprj!'
251      MSG_BUG(msg)
252    end if
253  end if
254 
255 !Eventually overwrite plane waves data for FFT
256  if (present(kg_fft_k)) then
257    kg_k1 => kg_fft_k ; kg_k2 => kg_fft_k
258    npw_k1=size(kg_k1,2) ; npw_k2=size(kg_k2,2)
259  end if
260  if (present(kg_fft_kp)) then
261    kg_k2 => kg_fft_kp ; npw_k2=size(kg_k2,2)
262  end if
263 
264 !paral_kgb constraint
265  if (mpi_enreg%paral_kgb==1.and.(.not.k1_eq_k2)) then
266    msg='paral_kgb=1 not allowed for k/=k_^prime!'
267    MSG_BUG(msg)
268  end if
269 
270 !Do we add Fock exchange term ?
271  has_fock=(associated(gs_ham%fockcommon))
272  if (has_fock) fock => gs_ham%fockcommon
273 
274 !Parallelization over spinors management
275  nspinortot=gs_ham%nspinor
276  if (mpi_enreg%paral_spinor==0) then
277    shift1=npw_k1;shift2=npw_k2
278    nspinor1TreatedByThisProc=.true.
279    nspinor2TreatedByThisProc=(nspinortot==2)
280  else
281    shift1=0;shift2=0
282    nspinor1TreatedByThisProc=(mpi_enreg%me_spinor==0)
283    nspinor2TreatedByThisProc=(mpi_enreg%me_spinor==1)
284  end if
285 
286 !============================================================
287 ! Application of the local potential
288 !============================================================
289 
290  if ((type_calc==0).or.(type_calc==1).or.(type_calc==3)) then
291 
292 !  Need a Vlocal
293    if (.not.associated(gs_ham%vlocal)) then
294      msg='we need Vlocal!'
295      MSG_BUG(msg)
296    end if
297 
298 !  fourwf can only process with one value of istwf_k
299    if (.not.k1_eq_k2) then
300      msg='vlocal (fourwf) cannot be computed with k/=k^prime!'
301      MSG_BUG(msg)
302    end if
303 
304 !  Eventually adjust load balancing for FFT (by changing FFT distrib)
305    have_to_reequilibrate=.false.
306    if (mpi_enreg%paral_kgb==1) then
307      ikpt_this_proc=bandfft_kpt_get_ikpt()
308      have_to_reequilibrate=bandfft_kpt(ikpt_this_proc)%have_to_reequilibrate
309    end if
310    if (have_to_reequilibrate) then
311      npw_fft =  bandfft_kpt(ikpt_this_proc)%npw_fft
312      sendcount_fft  => bandfft_kpt(ikpt_this_proc)%sendcount_fft(:)
313      recvcount_fft  => bandfft_kpt(ikpt_this_proc)%recvcount_fft(:)
314      senddisp_fft   => bandfft_kpt(ikpt_this_proc)%senddisp_fft(:)
315      recvdisp_fft   => bandfft_kpt(ikpt_this_proc)%recvdisp_fft(:)
316      indices_pw_fft => bandfft_kpt(ikpt_this_proc)%indices_pw_fft(:)
317      kg_k_fft       => bandfft_kpt(ikpt_this_proc)%kg_k_fft(:,:)
318      ABI_ALLOCATE(buff_wf,(2,npw_k1*ndat) )
319      ABI_ALLOCATE(cwavef_fft,(2,npw_fft*ndat) )
320      if(ndat>1) then
321        ABI_ALLOCATE(cwavef_fft_tr, (2,npw_fft*ndat))
322      end if
323      do idat=1, ndat
324        do ipw = 1 ,npw_k1
325          buff_wf(1:2, idat + ndat*(indices_pw_fft(ipw)-1) ) = cwavef(1:2,ipw + npw_k1*(idat-1))
326        end do
327      end do
328      if(ndat > 1) then
329        call xmpi_alltoallv(buff_wf,2*ndat*sendcount_fft,2*ndat*senddisp_fft,  &
330 &       cwavef_fft_tr,2*ndat*recvcount_fft, 2*ndat*recvdisp_fft, mpi_enreg%comm_fft,ierr)
331 !      We need to transpose data
332        do idat=1,ndat
333          do ipw = 1 ,npw_fft
334            cwavef_fft(1:2,  ipw + npw_fft*(idat-1)) = cwavef_fft_tr(1:2,  idat + ndat*(ipw-1))
335          end do
336        end do
337      else
338        call xmpi_alltoallv(buff_wf,2*sendcount_fft,2*senddisp_fft,  &
339 &       cwavef_fft,2*recvcount_fft, 2*recvdisp_fft, mpi_enreg%comm_fft,ierr)
340      end if
341    end if
342 
343 !  Apply the local potential to the wavefunction
344 !  Start from wavefunction in reciprocal space cwavef
345 !  End with function ghc in reciprocal space also.
346    ABI_ALLOCATE(work,(2,gs_ham%n4,gs_ham%n5,gs_ham%n6*ndat))
347    weight=one
348 
349    if (nspinortot==2) then
350      ABI_ALLOCATE(cwavef1,(2,npw_k1*ndat))
351      ABI_ALLOCATE(cwavef2,(2,npw_k1*ndat))
352      do idat=1,ndat
353        do ipw=1,npw_k1
354          cwavef1(1:2,ipw+(idat-1)*npw_k1)=cwavef(1:2,ipw+(idat-1)*my_nspinor*npw_k1)
355          cwavef2(1:2,ipw+(idat-1)*npw_k1)=cwavef(1:2,ipw+(idat-1)*my_nspinor*npw_k1+shift1)
356        end do
357      end do
358 !    call cg_zcopy(npw_k1*ndat,cwavef(1,1),cwavef1)
359 !    call cg_zcopy(npw_k1*ndat,cwavef(1,1+shift1),cwavef2)
360    end if
361 
362 !  Treat scalar local potentials
363    if (gs_ham%nvloc==1) then
364 
365      if (nspinortot==1) then
366 
367        if (have_to_reequilibrate) then
368          call fourwf(1,gs_ham%vlocal,cwavef_fft,cwavef_fft,work,gbound_k1,gbound_k2,&
369 &         gs_ham%istwf_k,kg_k_fft,kg_k_fft,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
370 &         npw_fft,npw_fft,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,&
371 &         weight,weight,use_gpu_cuda=gs_ham%use_gpu_cuda)
372        else
373          call fourwf(1,gs_ham%vlocal,cwavef,ghc,work,gbound_k1,gbound_k2,&
374 &         gs_ham%istwf_k,kg_k1,kg_k2,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
375 &         npw_k1,npw_k2,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,&
376 &         weight,weight,use_gpu_cuda=gs_ham%use_gpu_cuda)
377        end if
378 
379      else ! nspinortot==2
380 
381        if (nspinor1TreatedByThisProc) then
382          ABI_ALLOCATE(ghc1,(2,npw_k2*ndat))
383          call fourwf(1,gs_ham%vlocal,cwavef1,ghc1,work,gbound_k1,gbound_k2,&
384 &         gs_ham%istwf_k,kg_k1,kg_k2,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
385 &         npw_k1,npw_k2,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,&
386 &         weight,weight,use_gpu_cuda=gs_ham%use_gpu_cuda)
387          do idat=1,ndat
388            do ipw =1, npw_k2
389              ghc(1:2,ipw+(idat-1)*my_nspinor*npw_k2)=ghc1(1:2,ipw+(idat-1)*npw_k2)
390            end do
391          end do
392          ABI_DEALLOCATE(ghc1)
393        end if ! spin 1 treated by this proc
394 
395        if (nspinor2TreatedByThisProc) then
396          ABI_ALLOCATE(ghc2,(2,npw_k2*ndat))
397          call fourwf(1,gs_ham%vlocal,cwavef2,ghc2,work,gbound_k1,gbound_k2,&
398 &         gs_ham%istwf_k,kg_k1,kg_k2,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
399 &         npw_k1,npw_k2,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,&
400 &         use_gpu_cuda=gs_ham%use_gpu_cuda)
401          do idat=1,ndat
402            do ipw=1,npw_k2
403              ghc(1:2,ipw+(idat-1)*my_nspinor*npw_k2+shift2)=ghc2(1:2,ipw+(idat-1)*npw_k2)
404            end do
405          end do
406          ABI_DEALLOCATE(ghc2)
407        end if ! spin 2 treated by this proc
408 
409      end if ! npsinortot
410 
411 !    Treat non-collinear local potentials
412    else if (gs_ham%nvloc==4) then
413 
414      ABI_ALLOCATE(ghc1,(2,npw_k2*ndat))
415      ABI_ALLOCATE(ghc2,(2,npw_k2*ndat))
416      ABI_ALLOCATE(ghc3,(2,npw_k2*ndat))
417      ABI_ALLOCATE(ghc4,(2,npw_k2*ndat))
418      ghc1(:,:)=zero; ghc2(:,:)=zero; ghc3(:,:)=zero ;  ghc4(:,:)=zero
419      ABI_ALLOCATE(vlocal_tmp,(gs_ham%n4,gs_ham%n5,gs_ham%n6))
420 !    ghc1=v11*phi1
421      vlocal_tmp(:,:,:)=gs_ham%vlocal(:,:,:,1)
422      if (nspinor1TreatedByThisProc) then
423        call fourwf(1,vlocal_tmp,cwavef1,ghc1,work,gbound_k1,gbound_k2,&
424 &       gs_ham%istwf_k,kg_k1,kg_k2,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
425 &       npw_k1,npw_k2,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,&
426 &       use_gpu_cuda=gs_ham%use_gpu_cuda)
427      end if
428 !    ghc2=v22*phi2
429      vlocal_tmp(:,:,:)=gs_ham%vlocal(:,:,:,2)
430      if (nspinor2TreatedByThisProc) then
431        call fourwf(1,vlocal_tmp,cwavef2,ghc2,work,gbound_k1,gbound_k2,&
432 &       gs_ham%istwf_k,kg_k1,kg_k2,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
433 &       npw_k1,npw_k2,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,&
434 &       use_gpu_cuda=gs_ham%use_gpu_cuda)
435      end if
436      ABI_DEALLOCATE(vlocal_tmp)
437      cplex=2
438      ABI_ALLOCATE(vlocal_tmp,(cplex*gs_ham%n4,gs_ham%n5,gs_ham%n6))
439 !    ghc3=(re(v12)-im(v12))*phi1
440      do i3=1,gs_ham%n6
441        do i2=1,gs_ham%n5
442          do i1=1,gs_ham%n4
443            vlocal_tmp(2*i1-1,i2,i3)= gs_ham%vlocal(i1,i2,i3,3)
444            vlocal_tmp(2*i1  ,i2,i3)=-gs_ham%vlocal(i1,i2,i3,4)
445          end do
446        end do
447      end do
448      if (nspinor1TreatedByThisProc) then
449        call fourwf(cplex,vlocal_tmp,cwavef1,ghc3,work,gbound_k1,gbound_k2,&
450 &       gs_ham%istwf_k,kg_k1,kg_k2,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
451 &       npw_k1,npw_k2,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,&
452 &       use_gpu_cuda=gs_ham%use_gpu_cuda)
453      end if
454 !    ghc4=(re(v12)+im(v12))*phi2
455      if (nspinor2TreatedByThisProc) then
456        do i3=1,gs_ham%n6
457          do i2=1,gs_ham%n5
458            do i1=1,gs_ham%n4
459              vlocal_tmp(2*i1,i2,i3)=-vlocal_tmp(2*i1,i2,i3)
460            end do
461          end do
462        end do
463        call fourwf(cplex,vlocal_tmp,cwavef2,ghc4,work,gbound_k1,gbound_k2,&
464 &       gs_ham%istwf_k,kg_k1,kg_k2,gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,&
465 &       npw_k1,npw_k2,gs_ham%n4,gs_ham%n5,gs_ham%n6,2,mpi_enreg%paral_kgb,tim_fourwf,weight,weight,&
466 &       use_gpu_cuda=gs_ham%use_gpu_cuda)
467      end if
468      ABI_DEALLOCATE(vlocal_tmp)
469 !    Build ghc from pieces
470 !    (v11,v22,Re(v12)+iIm(v12);Re(v12)-iIm(v12))(psi1;psi2): matrix product
471      if (mpi_enreg%paral_spinor==0) then
472        do idat=1,ndat
473          do ipw=1,npw_k2
474            ghc(1:2,ipw+(idat-1)*my_nspinor*npw_k2)       =ghc1(1:2,ipw+(idat-1)*npw_k2)+ghc4(1:2,ipw+(idat-1)*npw_k2)
475            ghc(1:2,ipw+(idat-1)*my_nspinor*npw_k2+npw_k2)=ghc3(1:2,ipw+(idat-1)*npw_k2)+ghc2(1:2,ipw+(idat-1)*npw_k2)
476          end do
477        end do
478      else
479        call xmpi_sum(ghc4,mpi_enreg%comm_spinor,ierr)
480        call xmpi_sum(ghc3,mpi_enreg%comm_spinor,ierr)
481        if (nspinor1TreatedByThisProc) then
482          do idat=1,ndat
483            do ipw=1,npw_k2
484              ghc(1:2,ipw+(idat-1)*my_nspinor*npw_k2)=ghc1(1:2,ipw+(idat-1)*npw_k2)+ghc4(1:2,ipw+(idat-1)*npw_k2)
485            end do
486          end do
487        else if (nspinor2TreatedByThisProc) then
488          do idat=1,ndat
489            do ipw=1,npw_k2
490              ghc(1:2,ipw+(idat-1)*my_nspinor*npw_k2)=ghc3(1:2,ipw+(idat-1)*npw_k2)+ghc2(1:2,ipw+(idat-1)*npw_k2)
491            end do
492          end do
493        end if
494      end if
495      ABI_DEALLOCATE(ghc1)
496      ABI_DEALLOCATE(ghc2)
497      ABI_DEALLOCATE(ghc3)
498      ABI_DEALLOCATE(ghc4)
499    end if ! nvloc
500 
501    if (nspinortot==2)  then
502      ABI_DEALLOCATE(cwavef1)
503      ABI_DEALLOCATE(cwavef2)
504    end if
505    ABI_DEALLOCATE(work)
506 
507 !  Retrieve eventually original FFT distrib
508    if(have_to_reequilibrate) then
509      if(ndat > 1 ) then
510        do idat=1,ndat
511          do ipw = 1 ,npw_fft
512            cwavef_fft_tr(1:2,  idat + ndat*(ipw-1)) = cwavef_fft(1:2,  ipw + npw_fft*(idat-1))
513          end do
514        end do
515        call xmpi_alltoallv(cwavef_fft_tr,2*ndat*recvcount_fft, 2*ndat*recvdisp_fft, &
516 &       buff_wf,2*ndat*sendcount_fft,2*ndat*senddisp_fft, mpi_enreg%comm_fft,ierr)
517      else
518        call xmpi_alltoallv(cwavef_fft,2*recvcount_fft, 2*recvdisp_fft, &
519 &       buff_wf,2*sendcount_fft,2*senddisp_fft, mpi_enreg%comm_fft,ierr)
520      end if
521      do idat=1,ndat
522        do ipw = 1 ,npw_k2
523          ghc(1:2,ipw + npw_k2*(idat-1)) = buff_wf(1:2, idat + ndat*(indices_pw_fft(ipw)-1))
524        end do
525      end do
526      ABI_DEALLOCATE(buff_wf)
527      ABI_DEALLOCATE(cwavef_fft)
528      if(ndat > 1) then
529        ABI_DEALLOCATE(cwavef_fft_tr)
530      end if
531    end if
532 
533 !  Add metaGGA contribution
534    if (associated(gs_ham%vxctaulocal)) then
535      if (.not.k1_eq_k2) then
536        msg='metaGGA not allowed for k/=k_^prime!'
537        MSG_BUG(msg)
538      end if
539      if (size(gs_ham%vxctaulocal)/=gs_ham%n4*gs_ham%n5*gs_ham%n6*gs_ham%nvloc*4) then
540        msg='wrong sizes for vxctaulocal!'
541        MSG_BUG(msg)
542      end if
543      ABI_ALLOCATE(ghc_mGGA,(2,npw_k2*my_nspinor*ndat))
544      call getghc_mGGA(cwavef,ghc_mGGA,gbound_k1,gs_ham%gprimd,gs_ham%istwf_k,kg_k1,kpt_k1,&
545 &     gs_ham%mgfft,mpi_enreg,ndat,gs_ham%ngfft,npw_k1,gs_ham%nvloc,&
546 &     gs_ham%n4,gs_ham%n5,gs_ham%n6,my_nspinor,gs_ham%vxctaulocal,gs_ham%use_gpu_cuda)
547      ghc(1:2,1:npw_k2*my_nspinor*ndat)=ghc(1:2,1:npw_k2*my_nspinor*ndat)+ghc_mGGA(1:2,1:npw_k2*my_nspinor*ndat)
548      ABI_DEALLOCATE(ghc_mGGA)
549    end if
550 
551 !  Add nuclear dipole moment contribution if nonzero
552    if (any(abs(gs_ham%nucdipmom)>tol8)) then
553      if (.not.k1_eq_k2) then
554        msg='Nuclear Dipole Moment not allowed for k/=k_^prime!'
555        MSG_BUG(msg)
556      end if
557      ABI_ALLOCATE(ghcnd,(2,npw_k2*my_nspinor*ndat))
558      call getghcnd(cwavef,ghcnd,gs_ham,my_nspinor,ndat)
559      ghc(1:2,1:npw_k2*my_nspinor*ndat)=ghc(1:2,1:npw_k2*my_nspinor*ndat)+ghcnd(1:2,1:npw_k2*my_nspinor*ndat)
560      ABI_DEALLOCATE(ghcnd)
561    end if ! end computation of nuclear dipole moment component
562 
563  end if ! type_calc
564 
565 
566  if ((type_calc==0).or.(type_calc==2).or.(type_calc==3)) then
567 
568 !============================================================
569 ! Application of the non-local potential
570 !============================================================
571 
572    if ((type_calc==0).or.(type_calc==2)) then
573      signs=2 ; choice=1 ; nnlout=1 ; idir=0 ; tim_nonlop=1
574      cpopt_here=-1;if (gs_ham%usepaw==1) cpopt_here=cpopt
575      if (has_fock) then
576        if (gs_ham%usepaw==1) then
577          cpopt_here=max(cpopt,0)
578          if (cpopt<2) then
579            ABI_DATATYPE_ALLOCATE(cwaveprj_fock,(gs_ham%natom,my_nspinor*ndat))
580            ABI_ALLOCATE(dimcprj,(gs_ham%natom))
581            call pawcprj_getdim(dimcprj,gs_ham%natom,gs_ham%nattyp,gs_ham%ntypat,&
582 &           gs_ham%typat,fock%pawtab,'O')
583            call pawcprj_alloc(cwaveprj_fock,0,dimcprj)
584            ABI_DEALLOCATE(dimcprj)
585          else
586            cwaveprj_fock=>cwaveprj
587          end if
588          cwaveprj_nonlop=>cwaveprj_fock
589        else
590          cwaveprj_nonlop=>cwaveprj
591          cwaveprj_fock=>cwaveprj
592        end if
593      else
594        cwaveprj_nonlop=>cwaveprj
595      end if
596      paw_opt=gs_ham%usepaw ; if (sij_opt/=0) paw_opt=sij_opt+3
597      lambda_ndat = lambda
598 
599      if (gs_ham%usepaw==0) gsc_ptr => nonlop_dum
600      if (gs_ham%usepaw==1) gsc_ptr => gsc
601      call nonlop(choice,cpopt_here,cwaveprj_nonlop,enlout,gs_ham,idir,lambda_ndat,mpi_enreg,ndat,&
602 &     nnlout,paw_opt,signs,gsc_ptr,tim_nonlop,cwavef,gvnlc,select_k=select_k_)
603   end if ! end type_calc 0 or 2 for nonlop application
604   if (type_calc == 3) then ! for kinetic and local only, nonlocal should be zero
605      gvnlc(:,:) = zero
606   end if
607 
608 !============================================================
609 ! Assemble kinetic, local, nonlocal and Fock contributions
610 !============================================================
611 
612 !  Assemble modified kinetic, local and nonlocal contributions
613 !  to <G|H|C(n,k)>. Take also into account build-in debugging.
614    if(prtvol/=-level)then
615      do idat=1,ndat
616        if (k1_eq_k2) then
617 !      !!$OMP PARALLEL DO PRIVATE(igspinor) COLLAPSE(2)
618          do ispinor=1,my_nspinor
619            do ig=1,npw_k2
620              igspinor=ig+npw_k2*(ispinor-1)+npw_k2*my_nspinor*(idat-1)
621              if(kinpw_k2(ig)<huge(zero)*1.d-11)then
622                ghc(re,igspinor)= ghc(re,igspinor) + kinpw_k2(ig)*cwavef(re,igspinor) + gvnlc(re,igspinor)
623                ghc(im,igspinor)= ghc(im,igspinor) + kinpw_k2(ig)*cwavef(im,igspinor) + gvnlc(im,igspinor)
624              else
625                ghc(re,igspinor)=zero
626                ghc(im,igspinor)=zero
627                if (sij_opt==1) then
628                  gsc(re,igspinor)=zero
629                  gsc(im,igspinor)=zero
630                end if
631              end if
632            end do ! ig
633          end do ! ispinor
634 
635        else
636 !      !!$OMP PARALLEL DO PRIVATE(igspinor) COLLAPSE(2)
637          do ispinor=1,my_nspinor
638            do ig=1,npw_k2
639              igspinor=ig+npw_k2*(ispinor-1)+npw_k2*my_nspinor*(idat-1)
640              if(kinpw_k2(ig)<huge(zero)*1.d-11)then
641                ghc(re,igspinor)= ghc(re,igspinor) + gvnlc(re,igspinor)
642                ghc(im,igspinor)= ghc(im,igspinor) + gvnlc(im,igspinor)
643              else
644                ghc(re,igspinor)=zero
645                ghc(im,igspinor)=zero
646                if (sij_opt==1) then
647                  gsc(re,igspinor)=zero
648                  gsc(im,igspinor)=zero
649                end if
650              end if
651            end do ! ig
652          end do ! ispinor
653        end if
654      end do ! idat
655    else
656 !    Here, debugging section
657      call wrtout(std_out,' getghc : components of ghc ','PERS')
658      write(msg,'(a)')&
659 &     'icp ig ispinor igspinor re/im     ghc        kinpw         cwavef      glocc        gvnlc  gsc'
660      call wrtout(std_out,msg,'PERS')
661      do idat=1,ndat
662        do ispinor=1,my_nspinor
663          do ig=1,npw_k2
664            igspinor=ig+npw_k2*(ispinor-1)+npw_k2*my_nspinor*(idat-1)
665            if(kinpw_k2(ig)<huge(zero)*1.d-11)then
666              if (k1_eq_k2) then
667                ghcre=kinpw_k2(ig)*cwavef(re,igspinor)+ghc(re,igspinor)+gvnlc(re,igspinor)
668                ghcim=kinpw_k2(ig)*cwavef(im,igspinor)+ghc(im,igspinor)+gvnlc(im,igspinor)
669              else
670                ghcre=ghc(re,igspinor)+gvnlc(re,igspinor)
671                ghcim=ghc(im,igspinor)+gvnlc(im,igspinor)
672              end if
673            else
674              ghcre=zero
675              ghcim=zero
676              if (sij_opt==1) then
677                gsc(re,igspinor)=zero
678                gsc(im,igspinor)=zero
679              end if
680            end if
681            iispinor=ispinor;if (mpi_enreg%paral_spinor==1) iispinor=mpi_enreg%me_spinor+1
682            if (sij_opt == 1) then
683              write(msg,'(a,3(1x,i5),6(1x,es13.6))') '  1 ', ig, iispinor, igspinor,ghcre,&
684 &             kinpw_k2(ig),cwavef(re,igspinor),ghc(re,igspinor),gvnlc(re,igspinor), gsc(re,igspinor)
685              call wrtout(std_out,msg,'PERS')
686              write(msg,'(a,3(1x,i5),6(1x,es13.6))') '  2 ', ig, iispinor, igspinor,ghcim,&
687 &             kinpw_k2(ig),cwavef(im,igspinor),ghc(im,igspinor),gvnlc(im,igspinor), gsc(im,igspinor)
688              call wrtout(std_out,msg,'PERS')
689            else
690              write(msg,'(a,3(1x,i5),6(1x,es13.6))') '  1 ', ig, iispinor, igspinor,ghcre,&
691 &             kinpw_k2(ig),cwavef(re,igspinor),ghc(re,igspinor),gvnlc(re,igspinor)
692              call wrtout(std_out,msg,'PERS')
693              write(msg,'(a,3(1x,i5),6(1x,es13.6))') '  2 ', ig, iispinor, igspinor,ghcim,&
694 &             kinpw_k2(ig),cwavef(im,igspinor),ghc(im,igspinor),gvnlc(im,igspinor)
695              call wrtout(std_out,msg,'PERS')
696            end if
697            ghc(re,igspinor)=ghcre
698            ghc(im,igspinor)=ghcim
699          end do ! ig
700        end do ! ispinor
701      end do ! idat
702    end if
703 
704 !  Calculation of the Fock exact exchange term
705    if (has_fock) then
706      if (fock_get_getghc_call(fock)==1) then
707        if (gs_ham%usepaw==0) cwaveprj_idat => cwaveprj
708        do idat=1,ndat
709          if (fock%use_ACE==0) then
710            if (gs_ham%usepaw==1) cwaveprj_idat => cwaveprj_fock(:,(idat-1)*my_nspinor+1:idat*my_nspinor)
711            call fock_getghc(cwavef(:,1+(idat-1)*npw_k1*my_nspinor:idat*npw_k1*my_nspinor),cwaveprj_idat,&
712 &           ghc(:,1+(idat-1)*npw_k2*my_nspinor:idat*npw_k2*my_nspinor),gs_ham,mpi_enreg)
713          else
714            call fock_ACE_getghc(cwavef(:,1+(idat-1)*npw_k1*my_nspinor:idat*npw_k1*my_nspinor),&
715 &           ghc(:,1+(idat-1)*npw_k2*my_nspinor:idat*npw_k2*my_nspinor),gs_ham,mpi_enreg)
716          end if
717        end do ! idat
718      end if
719    end if
720 
721 !  Structured debugging : if prtvol=-level, stop here.
722    if(prtvol==-level)then
723      write(msg,'(a,i0,a)')' getghc : exit prtvol=-',level,', debugging mode => stop '
724      MSG_ERROR(msg)
725    end if
726 
727    if ((type_calc==0).or.(type_calc==2)) then
728      if (has_fock.and.gs_ham%usepaw==1.and.cpopt<2) then
729        call pawcprj_free(cwaveprj_fock)
730        ABI_DATATYPE_DEALLOCATE(cwaveprj_fock)
731      end if
732    end if
733 
734  end if ! type_calc
735 
736  call timab(200+tim_getghc,2,tsec)
737 
738  DBG_EXIT("COLL")
739 
740 end subroutine getghc

ABINIT/getghc_mGGA [ Functions ]

[ Top ] [ Functions ]

NAME

 getghc_mGGA

FUNCTION

 Compute metaGGA contribution to <G|H|C> for input vector |C> expressed in reciprocal space.

COPYRIGHT

 Copyright (C) 1998-2018 ABINIT group (DCA, XG, GMR, LSI, MT)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

 cwavef(2,npw_k*my_nspinor*ndat)=planewave coefficients of wavefunction.
 gbound_k(2*mgfft+4)=sphere boundary info
 gprimd(3,3)=dimensional reciprocal space primitive translations (b^-1)
 istwf_k=input parameter that describes the storage of wfs
 kg_k(3,npw_k)=G vec coordinates wrt recip lattice transl.
 kpt(3)=current k point
 mgfft=maximum single fft dimension
 mpi_enreg=informations about MPI parallelization
 my_nspinor=number of spinorial components of the wavefunctions (on current proc)
 ndat=number of FFTs to perform in parall
 ngfft(18)=contain all needed information about 3D FFT
 npw_k=number of planewaves in basis for given k point.
 nvloc=number of spin components of vxctaulocal
 n4,n5,n6=for dimensionning of vxctaulocal
 use_gpu_cuda=1 if Cuda (GPU) is on
 vxctaulocal(n4,n5,n6,nvloc,4)= local potential corresponding to the derivative of XC energy with respect to
  kinetic energy density, in real space, on the augmented fft grid.
  This array contains also the gradient of vxctaulocal (gvxctaulocal) in vxctaulocal(:,:,:,:,2:4).

OUTPUT

  ghc_mGGA(2,npw_k*my_nspinor*ndat)=metaGGA contribution to <G|H|C>

SIDE EFFECTS

PARENTS

      getghc

CHILDREN

      fourwf

SOURCE

 794 subroutine getghc_mGGA(cwavef,ghc_mGGA,gbound_k,gprimd,istwf_k,kg_k,kpt,mgfft,mpi_enreg,&
 795 &                      ndat,ngfft,npw_k,nvloc,n4,n5,n6,my_nspinor,vxctaulocal,use_gpu_cuda)
 796 
 797 
 798 !This section has been created automatically by the script Abilint (TD).
 799 !Do not modify the following lines by hand.
 800 #undef ABI_FUNC
 801 #define ABI_FUNC 'getghc_mGGA'
 802 !End of the abilint section
 803 
 804  implicit none
 805 
 806 !Arguments ------------------------------------
 807 !scalars
 808  integer,intent(in) :: istwf_k,mgfft,my_nspinor,ndat,npw_k,nvloc,n4,n5,n6,use_gpu_cuda
 809  type(MPI_type),intent(in) :: mpi_enreg
 810 !arrays
 811  integer,intent(in) :: gbound_k(2*mgfft+4),kg_k(3,npw_k),ngfft(18)
 812  real(dp),intent(in) :: gprimd(3,3),kpt(3)
 813  real(dp),intent(inout) :: cwavef(2,npw_k*my_nspinor*ndat)
 814  real(dp),intent(inout) :: ghc_mGGA(2,npw_k*my_nspinor*ndat)
 815  real(dp),intent(inout) :: vxctaulocal(n4,n5,n6,nvloc,4)
 816 
 817 !Local variables-------------------------------
 818 !scalars
 819  integer,parameter :: tim_fourwf=1
 820  integer :: idat,idir,ipw,nspinortot,shift
 821  logical :: nspinor1TreatedByThisProc,nspinor2TreatedByThisProc
 822  real(dp) :: gp2pi1,gp2pi2,gp2pi3,kpt_cart,kg_k_cart,weight=one
 823 !arrays
 824  real(dp),allocatable :: cwavef1(:,:),cwavef2(:,:)
 825  real(dp),allocatable :: gcwavef(:,:,:),gcwavef1(:,:,:),gcwavef2(:,:,:)
 826  real(dp),allocatable :: ghc1(:,:),ghc2(:,:)
 827  real(dp),allocatable :: lcwavef(:,:),lcwavef1(:,:),lcwavef2(:,:)
 828  real(dp),allocatable :: work(:,:,:,:)
 829 
 830 ! *********************************************************************
 831 
 832  ghc_mGGA(:,:)=zero
 833  if (nvloc/=1) return
 834 
 835  nspinortot=min(2,(1+mpi_enreg%paral_spinor)*my_nspinor)
 836  if (mpi_enreg%paral_spinor==0) then
 837    shift=npw_k
 838    nspinor1TreatedByThisProc=.true.
 839    nspinor2TreatedByThisProc=(nspinortot==2)
 840  else
 841    shift=0
 842    nspinor1TreatedByThisProc=(mpi_enreg%me_spinor==0)
 843    nspinor2TreatedByThisProc=(mpi_enreg%me_spinor==1)
 844  end if
 845 
 846  ABI_ALLOCATE(work,(2,n4,n5,n6*ndat))
 847 
 848  if (nspinortot==1) then
 849 
 850    ABI_ALLOCATE(ghc1,(2,npw_k*ndat))
 851 
 852 !  Do it in 3 STEPs:
 853 !  STEP1: Compute grad of cwavef and Laplacian of cwavef
 854    ABI_ALLOCATE(gcwavef,(2,npw_k*ndat,3))
 855    ABI_ALLOCATE(lcwavef,(2,npw_k*ndat))
 856 !!$OMP PARALLEL DO
 857    do idat=1,ndat
 858      do ipw=1,npw_k
 859        gcwavef(:,ipw+(idat-1)*npw_k,1:3)=zero
 860        lcwavef(:,ipw+(idat-1)*npw_k)  =zero
 861      end do
 862    end do
 863    do idir=1,3
 864      gp2pi1=gprimd(idir,1)*two_pi
 865      gp2pi2=gprimd(idir,2)*two_pi
 866      gp2pi3=gprimd(idir,3)*two_pi
 867      kpt_cart=gp2pi1*kpt(1)+gp2pi2*kpt(2)+gp2pi3*kpt(3)
 868 !    Multiplication by 2pi i (G+k)_idir for gradient
 869 !    Multiplication by -(2pi (G+k)_idir )**2 for Laplacian
 870      do idat=1,ndat
 871        do ipw=1,npw_k
 872          kg_k_cart=gp2pi1*kg_k(1,ipw)+gp2pi2*kg_k(2,ipw)+gp2pi3*kg_k(3,ipw)+kpt_cart
 873          gcwavef(1,ipw+(idat-1)*npw_k,idir)= cwavef(2,ipw+(idat-1)*npw_k)*kg_k_cart
 874          gcwavef(2,ipw+(idat-1)*npw_k,idir)=-cwavef(1,ipw+(idat-1)*npw_k)*kg_k_cart
 875          lcwavef(1,ipw+(idat-1)*npw_k)=lcwavef(1,ipw+(idat-1)*npw_k)-cwavef(1,ipw+(idat-1)*npw_k)*kg_k_cart**2
 876          lcwavef(2,ipw+(idat-1)*npw_k)=lcwavef(2,ipw+(idat-1)*npw_k)-cwavef(2,ipw+(idat-1)*npw_k)*kg_k_cart**2
 877        end do
 878      end do
 879    end do ! idir
 880 !  STEP2: Compute (vxctaulocal)*(Laplacian of cwavef) and add it to ghc
 881    call fourwf(1,vxctaulocal(:,:,:,:,1),lcwavef,ghc1,work,gbound_k,gbound_k,&
 882 &   istwf_k,kg_k,kg_k,mgfft,mpi_enreg,ndat,ngfft,npw_k,npw_k,n4,n5,n6,2,&
 883 &   mpi_enreg%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=use_gpu_cuda)
 884 !!$OMP PARALLEL DO
 885    do idat=1,ndat
 886      do ipw=1,npw_k
 887        ghc_mGGA(:,ipw+(idat-1)*npw_k)=ghc_mGGA(:,ipw+(idat-1)*npw_k)-half*ghc1(:,ipw+(idat-1)*npw_k)
 888      end do
 889    end do
 890    ABI_DEALLOCATE(lcwavef)
 891 !  STEP3: Compute sum of (grad components of vxctaulocal)*(grad components of cwavef)
 892    do idir=1,3
 893      call fourwf(1,vxctaulocal(:,:,:,:,1+idir),gcwavef(:,:,idir),ghc1,work,gbound_k,gbound_k,&
 894      istwf_k,kg_k,kg_k,mgfft,mpi_enreg,ndat,ngfft,npw_k,npw_k,n4,n5,n6,2,&
 895 &     mpi_enreg%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=use_gpu_cuda)
 896 !!$OMP PARALLEL DO
 897      do idat=1,ndat
 898        do ipw=1,npw_k
 899          ghc_mGGA(:,ipw+(idat-1)*npw_k)=ghc_mGGA(:,ipw+(idat-1)*npw_k)-half*ghc1(:,ipw+(idat-1)*npw_k)
 900        end do
 901      end do
 902    end do ! idir
 903    ABI_DEALLOCATE(gcwavef)
 904    ABI_DEALLOCATE(ghc1)
 905 
 906  else ! nspinortot==2
 907 
 908    ABI_ALLOCATE(cwavef1,(2,npw_k*ndat))
 909    ABI_ALLOCATE(cwavef2,(2,npw_k*ndat))
 910    do idat=1,ndat
 911      do ipw=1,npw_k
 912        cwavef1(1:2,ipw+(idat-1)*npw_k)=cwavef(1:2,ipw+(idat-1)*my_nspinor*npw_k)
 913        cwavef2(1:2,ipw+(idat-1)*npw_k)=cwavef(1:2,ipw+(idat-1)*my_nspinor*npw_k+shift)
 914      end do
 915    end do
 916 !  call cg_zcopy(npw*ndat,cwavef(1,1),cwavef1)
 917 !  call cg_zcopy(npw*ndat,cwavef(1,1+shift),cwavef2)
 918 
 919 
 920    if (nspinor1TreatedByThisProc) then
 921 
 922      ABI_ALLOCATE(ghc1,(2,npw_k*ndat))
 923 
 924 !    Do it in 3 STEPs:
 925 !    STEP1: Compute grad of cwavef and Laplacian of cwavef
 926      ABI_ALLOCATE(gcwavef1,(2,npw_k*ndat,3))
 927      ABI_ALLOCATE(lcwavef1,(2,npw_k*ndat))
 928 !!$OMP PARALLEL DO
 929      do idat=1,ndat
 930        do ipw=1,npw_k
 931          gcwavef1(:,ipw+(idat-1)*npw_k,1:3)=zero
 932          lcwavef1(:,ipw+(idat-1)*npw_k)=zero
 933        end do
 934      end do
 935      do idir=1,3
 936        gp2pi1=gprimd(idir,1)*two_pi
 937        gp2pi2=gprimd(idir,2)*two_pi
 938        gp2pi3=gprimd(idir,3)*two_pi
 939        kpt_cart=gp2pi1*kpt(1)+gp2pi2*kpt(2)+gp2pi3*kpt(3)
 940 !      Multiplication by 2pi i (G+k)_idir for gradient
 941 !      Multiplication by -(2pi (G+k)_idir )**2 for Laplacian
 942        do idat=1,ndat
 943          do ipw=1,npw_k
 944            kg_k_cart=gp2pi1*kg_k(1,ipw)+gp2pi2*kg_k(2,ipw)+gp2pi3*kg_k(3,ipw)+kpt_cart
 945            gcwavef1(1,ipw+(idat-1)*npw_k,idir)= cwavef1(2,ipw+(idat-1)*npw_k)*kg_k_cart
 946            gcwavef1(2,ipw+(idat-1)*npw_k,idir)=-cwavef1(1,ipw+(idat-1)*npw_k)*kg_k_cart
 947            lcwavef1(1,ipw+(idat-1)*npw_k)=lcwavef1(1,ipw+(idat-1)*npw_k)-cwavef1(1,ipw+(idat-1)*npw_k)*kg_k_cart**2
 948            lcwavef1(2,ipw+(idat-1)*npw_k)=lcwavef1(2,ipw+(idat-1)*npw_k)-cwavef1(2,ipw+(idat-1)*npw_k)*kg_k_cart**2
 949          end do
 950        end do
 951      end do ! idir
 952 !    STEP2: Compute (vxctaulocal)*(Laplacian of cwavef) and add it to ghc
 953      call fourwf(1,vxctaulocal(:,:,:,:,1),lcwavef1,ghc1,work,gbound_k,gbound_k,&
 954 &     istwf_k,kg_k,kg_k,mgfft,mpi_enreg,ndat,ngfft,npw_k,npw_k,n4,n5,n6,2,&
 955 &     mpi_enreg%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=use_gpu_cuda)
 956 !!$OMP PARALLEL DO
 957      do idat=1,ndat
 958        do ipw=1,npw_k
 959          ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k)=ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k)-half*ghc1(:,ipw+(idat-1)*npw_k)
 960        end do
 961      end do
 962      ABI_DEALLOCATE(lcwavef1)
 963 !    STEP3: Compute (grad components of vxctaulocal)*(grad components of cwavef)
 964      do idir=1,3
 965        call fourwf(1,vxctaulocal(:,:,:,:,1+idir),gcwavef1(:,:,idir),ghc1,work,gbound_k,gbound_k,&
 966        istwf_k,kg_k,kg_k,mgfft,mpi_enreg,ndat,ngfft,npw_k,npw_k,n4,n5,n6,2,&
 967 &       mpi_enreg%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=use_gpu_cuda)
 968 !!$OMP PARALLEL DO
 969        do idat=1,ndat
 970          do ipw=1,npw_k
 971            ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k) = ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k)-half*ghc1(:,ipw+(idat-1)*npw_k)
 972          end do
 973        end do
 974      end do ! idir
 975      ABI_DEALLOCATE(gcwavef1)
 976      ABI_DEALLOCATE(ghc1)
 977 
 978    end if ! spin 1 treated by this proc
 979 
 980    if (nspinor2TreatedByThisProc) then
 981 
 982      ABI_ALLOCATE(ghc2,(2,npw_k*ndat))
 983 
 984 !    Do it in 3 STEPs:
 985 !    STEP1: Compute grad of cwavef and Laplacian of cwavef
 986      ABI_ALLOCATE(gcwavef2,(2,npw_k*ndat,3))
 987      ABI_ALLOCATE(lcwavef2,(2,npw_k*ndat))
 988 !!$OMP PARALLEL DO
 989      do idat=1,ndat
 990        do ipw=1,npw_k
 991          gcwavef2(:,ipw+(idat-1)*npw_k,1:3)=zero
 992          lcwavef2(:,ipw+(idat-1)*npw_k)  =zero
 993        end do
 994      end do
 995      do idir=1,3
 996        gp2pi1=gprimd(idir,1)*two_pi
 997        gp2pi2=gprimd(idir,2)*two_pi
 998        gp2pi3=gprimd(idir,3)*two_pi
 999        kpt_cart=gp2pi1*kpt(1)+gp2pi2*kpt(2)+gp2pi3*kpt(3)
1000 !      Multiplication by 2pi i (G+k)_idir for gradient
1001 !      Multiplication by -(2pi (G+k)_idir )**2 for Laplacian
1002        do idat=1,ndat
1003          do ipw=1,npw_k
1004            kg_k_cart=gp2pi1*kg_k(1,ipw)+gp2pi2*kg_k(2,ipw)+gp2pi3*kg_k(3,ipw)+kpt_cart
1005            gcwavef2(1,ipw+(idat-1)*npw_k,idir)= cwavef2(2,ipw+(idat-1)*npw_k)*kg_k_cart
1006            gcwavef2(2,ipw+(idat-1)*npw_k,idir)=-cwavef2(1,ipw+(idat-1)*npw_k)*kg_k_cart
1007            lcwavef2(1,ipw+(idat-1)*npw_k)=lcwavef2(1,ipw+(idat-1)*npw_k)-cwavef2(1,ipw+(idat-1)*npw_k)*kg_k_cart**2
1008            lcwavef2(2,ipw+(idat-1)*npw_k)=lcwavef2(2,ipw+(idat-1)*npw_k)-cwavef2(2,ipw+(idat-1)*npw_k)*kg_k_cart**2
1009          end do
1010        end do
1011      end do ! idir
1012 !    STEP2: Compute (vxctaulocal)*(Laplacian of cwavef) and add it to ghc
1013      call fourwf(1,vxctaulocal(:,:,:,:,1),lcwavef2,ghc2,work,gbound_k,gbound_k,&
1014 &     istwf_k,kg_k,kg_k,mgfft,mpi_enreg,ndat,ngfft,npw_k,npw_k,n4,n5,n6,2,&
1015 &     mpi_enreg%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=use_gpu_cuda)
1016 !!$OMP PARALLEL DO
1017      do idat=1,ndat
1018        do ipw=1,npw_k
1019          ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k)=ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k)-half*ghc2(:,ipw+(idat-1)*npw_k)
1020        end do
1021      end do
1022      ABI_DEALLOCATE(lcwavef2)
1023 !    STEP3: Compute sum of (grad components of vxctaulocal)*(grad components of cwavef)
1024      do idir=1,3
1025        call fourwf(1,vxctaulocal(:,:,:,:,1+idir),gcwavef2(:,:,idir),ghc2,work,gbound_k,gbound_k,&
1026        istwf_k,kg_k,kg_k,mgfft,mpi_enreg,ndat,ngfft,npw_k,npw_k,n4,n5,n6,2,&
1027 &       mpi_enreg%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=use_gpu_cuda)
1028 !!$OMP PARALLEL DO
1029        do idat=1,ndat
1030          do ipw=1,npw_k
1031            ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k)=ghc_mGGA(:,ipw+(idat-1)*my_nspinor*npw_k)-half*ghc2(:,ipw+(idat-1)*npw_k)
1032          end do
1033        end do
1034      end do ! idir
1035 
1036      ABI_DEALLOCATE(gcwavef2)
1037      ABI_DEALLOCATE(ghc2)
1038 
1039    end if ! spin 2 treated by this proc
1040 
1041    ABI_DEALLOCATE(cwavef1)
1042    ABI_DEALLOCATE(cwavef2)
1043 
1044  end if ! npsinortot
1045 
1046  ABI_DEALLOCATE(work)
1047 
1048 end subroutine getghc_mGGA

ABINIT/getghcnd [ Functions ]

[ Top ] [ Functions ]

NAME

 getghcnd

FUNCTION

 Compute <G|H_ND|C> for input vector |C> expressed in reciprocal space
 Result is put in array ghcnc. H_ND is the Hamiltonian due to magnetic dipoles
 on the nuclear sites.

INPUTS

 cwavef(2,npw*nspinor*ndat)=planewave coefficients of wavefunction.
 gs_ham <type(gs_hamiltonian_type)>=all data for the Hamiltonian to be applied
 my_nspinor=number of spinorial components of the wavefunctions (on current proc)
 ndat=number of FFT to do in //

OUTPUT

 ghcnd(2,npw*my_nspinor*ndat)=matrix elements <G|H_ND|C>

NOTES

  This routine applies the Hamiltonian due to an array of magnetic dipoles located
  at the atomic nuclei to the input wavefunction. Strategy below is to take advantage of
  Hermiticity to store H_ND in triangular form and then use a BLAS call to zhpmv to apply to
  input vector in one shot.
 Application of <k^prime|H|k> or <k|H|k^prime> not implemented!

PARENTS

      getghc

CHILDREN

      zhpmv

SOURCE

1444 subroutine getghcnd(cwavef,ghcnd,gs_ham,my_nspinor,ndat)
1445 
1446 
1447 !This section has been created automatically by the script Abilint (TD).
1448 !Do not modify the following lines by hand.
1449 #undef ABI_FUNC
1450 #define ABI_FUNC 'getghcnd'
1451 !End of the abilint section
1452 
1453  implicit none
1454 
1455 !Arguments ------------------------------------
1456 !scalars
1457  integer,intent(in) :: my_nspinor,ndat
1458  type(gs_hamiltonian_type),intent(in) :: gs_ham
1459 !arrays
1460  real(dp),intent(in) :: cwavef(2,gs_ham%npw_k*my_nspinor*ndat)
1461  real(dp),intent(out) :: ghcnd(2,gs_ham%npw_k*my_nspinor*ndat)
1462 
1463 !Local variables-------------------------------
1464 !scalars
1465  integer :: cwavedim
1466  character(len=500) :: message
1467  !arrays
1468  complex(dpc),allocatable :: inwave(:),hggc(:)
1469 
1470 ! *********************************************************************
1471 
1472  if (gs_ham%matblk /= gs_ham%natom) then
1473    write(message,'(a,i4,a,i4)')' gs_ham%matblk = ',gs_ham%matblk,' but natom = ',gs_ham%natom
1474    MSG_ERROR(message)
1475  end if
1476  if (ndat /= 1) then
1477    write(message,'(a,i4,a)')' ndat = ',ndat,' but getghcnd requires ndat = 1'
1478    MSG_ERROR(message)
1479  end if
1480  if (my_nspinor /= 1) then
1481    write(message,'(a,i4,a)')' nspinor = ',my_nspinor,' but getghcnd requires nspinor = 1'
1482    MSG_ERROR(message)
1483  end if
1484  if (any(abs(gs_ham%kpt_k(:)-gs_ham%kpt_kp(:))>tol8)) then
1485    message=' not allowed for kpt(left)/=kpt(right)!'
1486    MSG_BUG(message)
1487  end if
1488 
1489  if (any(abs(gs_ham%nucdipmom_k)>tol8)) then
1490    cwavedim = gs_ham%npw_k*my_nspinor*ndat
1491    ABI_ALLOCATE(hggc,(cwavedim))
1492    ABI_ALLOCATE(inwave,(cwavedim))
1493 
1494    inwave(1:gs_ham%npw_k) = cmplx(cwavef(1,1:gs_ham%npw_k),cwavef(2,1:gs_ham%npw_k),kind=dpc)
1495 
1496     ! apply hamiltonian hgg to input wavefunction inwave, result in hggc
1497     ! ZHPMV is a level-2 BLAS routine, does Matrix x Vector multiplication for double complex
1498     ! objects, with the matrix as Hermitian in packed storage
1499    call ZHPMV('L',cwavedim,cone,gs_ham%nucdipmom_k,inwave,1,czero,hggc,1)
1500 
1501    ghcnd(1,1:gs_ham%npw_k) = real(hggc)
1502    ghcnd(2,1:gs_ham%npw_k) = aimag(hggc)
1503 
1504    ABI_DEALLOCATE(hggc)
1505    ABI_DEALLOCATE(inwave)
1506 
1507  else
1508 
1509    ghcnd(:,:) = zero
1510 
1511  end if
1512 
1513 end subroutine getghcnd

ABINIT/getgsc [ Functions ]

[ Top ] [ Functions ]

NAME

 getgsc

FUNCTION

 Compute <G|S|C> for all input vectors |Cnk> at a given k-point,
              OR for one input vector |Cnk>.
 |Cnk> are expressed in reciprocal space.
 S is the overlap operator between |Cnk> (used for PAW).

INPUTS

  cg(2,mcg)=planewave coefficients of wavefunctions
  cprj(natom,mcprj)= wave functions projected with non-local projectors: cprj=<p_i|Cnk>
  gs_ham <type(gs_hamiltonian_type)>=all data for the Hamiltonian at k+q
  ibg=shift to be applied on the location of data in the array cprj (beginning of current k-point)
  icg=shift to be applied on the location of data in the array cg (beginning of current k-point)
  igsc=shift to be applied on the location of data in the array gsc (beginning of current k-point)
  ikpt,isppol=indexes of current (spin.kpoint)
  mcg=second dimension of the cg array
  mcprj=second dimension of the cprj array
  mgsc=second dimension of the gsc array
  mpi_enreg=information about MPI parallelization
  natom=number of atoms in unit cell.
  nband= if positive: number of bands at this k point for that spin polarization
         if negative: abs(nband) is the index of the only band to be computed
  npw_k=number of planewaves in basis for given k point.
  nspinor=number of spinorial components of the wavefunctions
 [select_k]=optional, option governing the choice of k points to be used.
             gs_ham datastructure contains quantities needed to apply overlap operator
             in reciprocal space between 2 kpoints, k and k^prime (equal in most cases);
             if select_k=1, <k^prime|S|k>       is applied [default]
             if select_k=2, <k|S|k^prime>       is applied
             if select_k=3, <k|S|k>             is applied
             if select_k=4, <k^prime|S|k^prime> is applied

OUTPUT

  gsc(2,mgsc)= <g|S|Cnk> or <g|S^(1)|Cnk> (S=overlap)

PARENTS

      dfpt_vtowfk

CHILDREN

      nonlop,pawcprj_alloc,pawcprj_copy,pawcprj_free,timab,xmpi_sum

SOURCE

1097 subroutine getgsc(cg,cprj,gs_ham,gsc,ibg,icg,igsc,ikpt,isppol,&
1098 &                 mcg,mcprj,mgsc,mpi_enreg,natom,nband,npw_k,nspinor,select_k)
1099 
1100 
1101 !This section has been created automatically by the script Abilint (TD).
1102 !Do not modify the following lines by hand.
1103 #undef ABI_FUNC
1104 #define ABI_FUNC 'getgsc'
1105 !End of the abilint section
1106 
1107  implicit none
1108 
1109 !Arguments ------------------------------------
1110 !scalars
1111  integer,intent(in) :: ibg,icg,igsc,ikpt,isppol,mcg,mcprj
1112  integer,intent(in) :: mgsc,natom,nband,npw_k,nspinor
1113  integer,intent(in),optional :: select_k
1114  type(MPI_type),intent(in) :: mpi_enreg
1115  type(gs_hamiltonian_type),intent(inout),target :: gs_ham
1116 !arrays
1117  real(dp),intent(in) :: cg(2,mcg)
1118  real(dp),intent(out) :: gsc(2,mgsc)
1119  type(pawcprj_type),intent(in) :: cprj(natom,mcprj)
1120 
1121 !Local variables-------------------------------
1122 !scalars
1123  integer :: choice,cpopt,dimenl1,dimenl2,iband,iband1,iband2,ierr,index_cg,index_cprj
1124  integer :: index_gsc,me,my_nspinor,paw_opt,select_k_,signs,tim_nonlop,useylm
1125  !character(len=500) :: msg
1126 !arrays
1127  real(dp) :: enlout_dum(1),tsec(2)
1128  real(dp),allocatable :: cwavef(:,:),scwavef(:,:)
1129  type(pawcprj_type),allocatable :: cwaveprj(:,:)
1130 
1131 ! *********************************************************************
1132 
1133  DBG_ENTER("COLL")
1134 
1135 !Compatibility tests
1136  my_nspinor=max(1,nspinor/mpi_enreg%nproc_spinor)
1137  if(gs_ham%usepaw==0) then
1138    MSG_BUG('Only compatible with PAW (usepaw=1) !')
1139  end if
1140  if(nband<0.and.(mcg<npw_k*my_nspinor.or.mgsc<npw_k*my_nspinor.or.mcprj<my_nspinor)) then
1141    MSG_BUG('Invalid value for mcg, mgsc or mcprj !')
1142  end if
1143 
1144 !Keep track of total time spent in getgsc:
1145  call timab(565,1,tsec)
1146 
1147  gsc = zero
1148 
1149 !Prepare some data
1150  ABI_ALLOCATE(cwavef,(2,npw_k*my_nspinor))
1151  ABI_ALLOCATE(scwavef,(2,npw_k*my_nspinor))
1152  if (gs_ham%usecprj==1) then
1153    ABI_DATATYPE_ALLOCATE(cwaveprj,(natom,my_nspinor))
1154    call pawcprj_alloc(cwaveprj,0,gs_ham%dimcprj)
1155  else
1156    ABI_DATATYPE_ALLOCATE(cwaveprj,(0,0))
1157  end if
1158  dimenl1=gs_ham%dimekb1;dimenl2=natom;tim_nonlop=0
1159  choice=1;signs=2;cpopt=-1+3*gs_ham%usecprj;paw_opt=3;useylm=1
1160  select_k_=1;if (present(select_k)) select_k_=select_k
1161  me=mpi_enreg%me_kpt
1162 
1163 !Loop over bands
1164  index_cprj=ibg;index_cg=icg;index_gsc=igsc
1165  if (nband>0) then
1166    iband1=1;iband2=nband
1167  else if (nband<0) then
1168    iband1=abs(nband);iband2=iband1
1169    index_cprj=index_cprj+(iband1-1)*my_nspinor
1170    index_cg  =index_cg  +(iband1-1)*npw_k*my_nspinor
1171    index_gsc =index_gsc +(iband1-1)*npw_k*my_nspinor
1172  end if
1173 
1174  do iband=iband1,iband2
1175 
1176    if (mpi_enreg%proc_distrb(ikpt,iband,isppol)/=me.and.nband>0) then
1177      gsc(:,1+index_gsc:npw_k*my_nspinor+index_gsc)=zero
1178      index_cprj=index_cprj+my_nspinor
1179      index_cg=index_cg+npw_k*my_nspinor
1180      index_gsc=index_gsc+npw_k*my_nspinor
1181      cycle
1182    end if
1183 
1184 !  Retrieve WF at (n,k)
1185    cwavef(:,1:npw_k*my_nspinor)=cg(:,1+index_cg:npw_k*my_nspinor+index_cg)
1186    if (gs_ham%usecprj==1) then
1187      call pawcprj_copy(cprj(:,1+index_cprj:my_nspinor+index_cprj),cwaveprj)
1188    end if
1189 
1190 !  Compute <g|S|Cnk>
1191    call nonlop(choice,cpopt,cwaveprj,enlout_dum,gs_ham,0,(/zero/),mpi_enreg,1,1,paw_opt,&
1192 &   signs,scwavef,tim_nonlop,cwavef,cwavef,select_k=select_k_)
1193 
1194    gsc(:,1+index_gsc:npw_k*my_nspinor+index_gsc)=scwavef(:,1:npw_k*my_nspinor)
1195 
1196 !  End of loop over bands
1197    index_cprj=index_cprj+my_nspinor
1198    index_cg=index_cg+npw_k*my_nspinor
1199    index_gsc=index_gsc+npw_k*my_nspinor
1200  end do
1201 
1202 !Reduction in case of parallelization
1203  if ((xmpi_paral==1)) then
1204    call timab(48,1,tsec)
1205    call xmpi_sum(gsc,mpi_enreg%comm_band,ierr)
1206    call timab(48,2,tsec)
1207  end if
1208 
1209 !Memory deallocation
1210  ABI_DEALLOCATE(cwavef)
1211  ABI_DEALLOCATE(scwavef)
1212  if (gs_ham%usecprj==1) then
1213    call pawcprj_free(cwaveprj)
1214  end if
1215  ABI_DATATYPE_DEALLOCATE(cwaveprj)
1216 
1217  call timab(565,2,tsec)
1218 
1219  DBG_EXIT("COLL")
1220 
1221 end subroutine getgsc

ABINIT/m_getghc [ Modules ]

[ Top ] [ Modules ]

NAME

  m_getghc

FUNCTION

 Compute <G|H|C> for input vector |C> expressed in reciprocal space;

COPYRIGHT

  Copyright (C) 1998-2018 ABINIT group (DCA, XG, GMR, LSI, MT)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

PARENTS

CHILDREN

SOURCE

21 #if defined HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include "abi_common.h"
26 
27 module m_getghc
28 
29  use defs_basis
30  use defs_abitypes
31  use m_errors
32  use m_abicore
33  use m_xmpi
34 
35  use m_time,        only : timab
36  use m_pawcprj,     only : pawcprj_type, pawcprj_alloc, pawcprj_free, pawcprj_getdim, pawcprj_copy
37  use m_bandfft_kpt, only : bandfft_kpt, bandfft_kpt_get_ikpt
38  use m_hamiltonian, only : gs_hamiltonian_type, KPRIME_H_K, K_H_KPRIME, K_H_K, KPRIME_H_KPRIME
39  use m_fock,        only : fock_common_type, fock_get_getghc_call
40  use m_fock_getghc, only : fock_getghc, fock_ACE_getghc
41  use m_nonlop,      only : nonlop
42  use m_fft,         only : fourwf
43 
44  implicit none
45 
46  private

ABINIT/multithreaded_getghc [ Functions ]

[ Top ] [ Functions ]

NAME

 multithreaded_getghc

FUNCTION

COPYRIGHT

 Copyright (C) 2016-2018 ABINIT group (JB)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

 cpopt=flag defining the status of cwaveprj%cp(:)=<Proj_i|Cnk> scalars (PAW only)
       (same meaning as in nonlop.F90 routine)
       if cpopt=-1, <p_lmn|in> (and derivatives) are computed here (and not saved)
       if cpopt= 0, <p_lmn|in> are computed here and saved
       if cpopt= 1, <p_lmn|in> and first derivatives are computed here and saved
       if cpopt= 2  <p_lmn|in> are already in memory;
       if cpopt= 3  <p_lmn|in> are already in memory; first derivatives are computed here and saved
       if cpopt= 4  <p_lmn|in> and first derivatives are already in memory;
 cwavef(2,npw*my_nspinor*ndat)=planewave coefficients of wavefunction.
 gs_ham <type(gs_hamiltonian_type)>=all data for the Hamiltonian to be applied
 lambda=factor to be used when computing <G|H-lambda.S|C> - only for sij_opt=-1
        Typically lambda is the eigenvalue (or its guess)
 mpi_enreg=informations about MPI parallelization
 ndat=number of FFT to do in parallel
 prtvol=control print volume and debugging output
 sij_opt= -PAW ONLY-  if  0, only matrix elements <G|H|C> have to be computed
    (S=overlap)       if  1, matrix elements <G|S|C> have to be computed in gsc in addition to ghc
                      if -1, matrix elements <G|H-lambda.S|C> have to be computed in ghc (gsc not used)
 tim_getghc=timing code of the calling subroutine(can be set to 0 if not attributed)
 type_calc= option governing which part of Hamitonian is to be applied:
            1: local part only
            2: non-local+kinetic only (added to the exixting Hamiltonian)
            3: local + kinetic only (added to the existing Hamiltonian)
 ===== Optional inputs =====
   [kg_fft_k(3,:)]=optional, (k+G) vector coordinates to be used for the FFT tranformation
                   instead of the one contained in gs_ham datastructure.
                   Typically used for real WF (in parallel) which are FFT-transformed 2 by 2.
   [kg_fft_kp(3,:)]=optional, (k^prime+G) vector coordinates to be used for the FFT tranformation
   [select_k]=optional, option governing the choice of k points to be used.
             gs_ham datastructure contains quantities needed to apply Hamiltonian
             in reciprocal space between 2 kpoints, k and k^prime (equal in most cases);
             if select_k=1, <k^prime|H|k>       is applied [default]
             if select_k=2, <k|H|k^prime>       is applied
             if select_k=3, <k|H|k>             is applied
             if select_k=4, <k^prime|H|k^prime> is applied

OUTPUT

  ghc(2,npw*my_nspinor*ndat)=matrix elements <G|H|C> (if sij_opt>=0)
                                          or <G|H-lambda.S|C> (if sij_opt=-1)
  gvnlc(2,npw*my_nspinor*ndat)=matrix elements <G|Vnonlocal|C> (if sij_opt>=0)
                                            or <G|Vnonlocal-lambda.S|C> (if sij_opt=-1)
  if (sij_opt=1)
    gsc(2,npw*my_nspinor*ndat)=matrix elements <G|S|C> (S=overlap).

SIDE EFFECTS

  cwaveprj(natom,my_nspinor*(1+cpopt)*ndat)= wave function projected on nl projectors (PAW only)

PARENTS

      m_lobpcgwf,prep_getghc

CHILDREN

      getghc,mkl_set_num_threads,omp_set_nested

SOURCE

1295 subroutine multithreaded_getghc(cpopt,cwavef,cwaveprj,ghc,gsc,gs_ham,gvnlc,lambda,mpi_enreg,ndat,&
1296 &                 prtvol,sij_opt,tim_getghc,type_calc,&
1297 &                 kg_fft_k,kg_fft_kp,select_k) ! optional arguments
1298 
1299 #ifdef HAVE_OPENMP
1300    use omp_lib
1301 #endif
1302 
1303 !This section has been created automatically by the script Abilint (TD).
1304 !Do not modify the following lines by hand.
1305 #undef ABI_FUNC
1306 #define ABI_FUNC 'multithreaded_getghc'
1307 !End of the abilint section
1308 
1309  implicit none
1310 
1311 !Arguments ------------------------------------
1312 !scalars
1313  integer,intent(in) :: cpopt,ndat, prtvol
1314  integer,intent(in) :: sij_opt,tim_getghc,type_calc
1315  integer,intent(in),optional :: select_k
1316  real(dp),intent(in) :: lambda
1317  type(MPI_type),intent(in) :: mpi_enreg
1318  type(gs_hamiltonian_type),intent(inout),target :: gs_ham
1319 !arrays
1320  integer,intent(in),optional,target :: kg_fft_k(:,:),kg_fft_kp(:,:)
1321  real(dp),intent(out),target :: gsc(:,:)
1322  real(dp),intent(inout) :: cwavef(:,:)
1323  real(dp),intent(out) :: ghc(:,:),gvnlc(:,:)
1324  type(pawcprj_type),intent(inout),target :: cwaveprj(:,:)
1325 
1326 !Local variables-------------------------------
1327 !scalars
1328  integer :: firstelt, lastelt
1329  integer :: nthreads
1330  integer :: ithread
1331  integer :: chunk
1332  integer :: residuchunk
1333  integer :: firstband
1334  integer :: lastband
1335  integer :: spacedim
1336 #ifdef HAVE_OPENMP
1337  logical :: is_nested
1338 #endif
1339 
1340  integer :: select_k_default
1341 
1342  ! *************************************************************************
1343 
1344  select_k_default = 1; if ( present(select_k) ) select_k_default = select_k
1345 
1346  spacedim = size(cwavef,dim=2)/ndat
1347 
1348     !$omp parallel default (none) private(ithread,nthreads,chunk,firstband,lastband,residuchunk,firstelt,lastelt, is_nested), &
1349     !$omp& shared(cwavef,ghc,gsc, gvnlc,spacedim,ndat,kg_fft_k,kg_fft_kp,gs_ham,cwaveprj,mpi_enreg), &
1350     !$omp& firstprivate(cpopt,lambda,prtvol,sij_opt,tim_getghc,type_calc,select_k_default)
1351 #ifdef HAVE_OPENMP
1352  ithread = omp_get_thread_num()
1353  nthreads = omp_get_num_threads()
1354  is_nested = omp_get_nested()
1355  call omp_set_nested(.false.)
1356 #ifdef HAVE_LINALG_MKL_THREADS
1357  call mkl_set_num_threads(1)
1358 #endif
1359 #else
1360  ithread = 0
1361  nthreads = 1
1362 #endif
1363  chunk = ndat/nthreads ! Divide by 2 to construct chunk of even number of bands
1364  residuchunk = ndat - nthreads*chunk
1365  if ( ithread < nthreads-residuchunk ) then
1366    firstband = ithread*chunk+1
1367    lastband = (ithread+1)*chunk
1368  else
1369    firstband = (nthreads-residuchunk)*chunk + ( ithread -(nthreads-residuchunk) )*(chunk+1) +1
1370    lastband = firstband+chunk
1371  end if
1372 
1373  if ( lastband /= 0 ) then
1374    firstelt = (firstband-1)*spacedim+1
1375    lastelt = lastband*spacedim
1376       ! Don't know how to manage optional arguments .... :(
1377    if ( present(kg_fft_k) ) then
1378      if (present(kg_fft_kp)) then
1379        call getghc(cpopt,cwavef(:,firstelt:lastelt),cwaveprj,ghc(:,firstelt:lastelt),gsc(:,firstelt:lastelt*gs_ham%usepaw),&
1380        gs_ham,gvnlc(:,firstelt:lastelt),lambda, mpi_enreg,lastband-firstband+1,prtvol,sij_opt,tim_getghc,type_calc,&
1381        select_k=select_k_default,kg_fft_k=kg_fft_k,kg_fft_kp=kg_fft_kp)
1382      else
1383        call getghc(cpopt,cwavef(:,firstelt:lastelt),cwaveprj,ghc(:,firstelt:lastelt),gsc(:,firstelt:lastelt*gs_ham%usepaw),&
1384        gs_ham,gvnlc(:,firstelt:lastelt),lambda, mpi_enreg,lastband-firstband+1,prtvol,sij_opt,tim_getghc,type_calc,&
1385        select_k=select_k_default,kg_fft_k=kg_fft_k)
1386      end if
1387    else
1388      if (present(kg_fft_kp)) then
1389        call getghc(cpopt,cwavef(:,firstelt:lastelt),cwaveprj,ghc(:,firstelt:lastelt),gsc(:,firstelt:lastelt*gs_ham%usepaw),&
1390        gs_ham,gvnlc(:,firstelt:lastelt),lambda, mpi_enreg,lastband-firstband+1,prtvol,sij_opt,tim_getghc,type_calc,&
1391        select_k=select_k_default,kg_fft_kp=kg_fft_kp)
1392      else
1393        call getghc(cpopt,cwavef(:,firstelt:lastelt),cwaveprj,ghc(:,firstelt:lastelt),gsc(:,firstelt:lastelt*gs_ham%usepaw),&
1394        gs_ham,gvnlc(:,firstelt:lastelt),lambda, mpi_enreg,lastband-firstband+1,prtvol,sij_opt,tim_getghc,type_calc,&
1395        select_k=select_k_default)
1396      end if
1397    end if
1398  end if
1399 #ifdef HAVE_OPENMP
1400  call omp_set_nested(is_nested)
1401 #ifdef HAVE_LINALG_MKL_THREADS
1402  call mkl_set_num_threads(nthreads)
1403 #endif
1404 #endif
1405     !$omp end parallel
1406 
1407 end subroutine multithreaded_getghc