TABLE OF CONTENTS


ABINIT/m_ksdiago [ Modules ]

[ Top ] [ Modules ]

NAME

  m_ksdiago

FUNCTION

  Direct diagonalization of the KS Hamiltonian H_k(G,G')

COPYRIGHT

  Copyright (C) 2008-2024 ABINIT group (MG)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

16 #if defined HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include "abi_common.h"
21 
22 module m_ksdiago
23 
24  use defs_basis
25  use m_abicore
26  use m_errors
27  use m_xmpi
28  use m_xomp
29  use m_hamiltonian
30  use m_distribfft
31  use, intrinsic :: iso_c_binding
32  use libxc_functionals
33 
34  use defs_datatypes,      only : pseudopotential_type
35  use defs_abitypes,       only : MPI_type
36  use m_dtset,             only : dataset_type
37  use m_fstrings,          only : toupper, ktoa, itoa, sjoin, ftoa, ltoa
38  use m_yaml,              only : yamldoc_t, yamldoc_open
39  use m_numeric_tools,     only : blocked_loop
40  use m_time,              only : cwtime, cwtime_report, timab
41  use m_geometry,          only : metric
42  use m_hide_lapack,       only : xhegv_cplex, xheev_cplex, xheevx_cplex, xhegvx_cplex
43  use m_slk,               only : matrix_scalapack, processor_scalapack, block_dist_1d, &
44                                  compute_eigen_problem, compute_generalized_eigen_problem
45  use m_kg,                only : mkkin, mkkpg
46  use m_crystal,           only : crystal_t
47  use m_fftcore,           only : kpgsph, get_kg
48  use m_fft,               only : fftpac
49  use m_cgtools,           only : set_istwfk
50  use m_electronpositron,  only : electronpositron_type
51  use m_mpinfo,            only : destroy_mpi_enreg, initmpi_seq
52  use m_pawtab,            only : pawtab_type
53  use m_paw_ij,            only : paw_ij_type
54  use m_pawcprj,           only : pawcprj_type, pawcprj_alloc, pawcprj_free, pawcprj_reorder, &
55                                  pawcprj_set_zero, pawcprj_mpi_sum, pawcprj_copy
56  use m_cgprj,             only : getcprj
57  use m_pawfgr,            only : pawfgr_type
58  use m_initylmg,          only : initylmg
59  use m_mkffnl,            only : mkffnl
60  use m_getghc,            only : getghc, multithreaded_getghc
61  !use m_fock,              only : fock_type
62 
63  implicit none
64 
65  private

m_gwr/ugb_free [ Functions ]

[ Top ] [ m_gwr ] [ Functions ]

NAME

  ugb_free

FUNCTION

  Free dynamic memory.

SOURCE

1326 subroutine ugb_free(ugb)
1327 
1328 !Arguments ------------------------------------
1329  class(ugb_t),intent(inout) :: ugb
1330 ! *************************************************************************
1331 
1332  call ugb%mat%free()
1333  call ugb%processor%free()
1334  ABI_SFREE(ugb%kg_k)
1335  ugb%cg_k => null()
1336  ugb%comm => null()
1337 
1338  if (allocated(ugb%cprj_k)) then
1339    call pawcprj_free(ugb%cprj_k)
1340    ABI_FREE(ugb%cprj_k)
1341  end if
1342 
1343 end subroutine ugb_free

m_gwr/ugb_print [ Functions ]

[ Top ] [ m_gwr ] [ Functions ]

NAME

  ugb_print

FUNCTION

  Print info on the object.

SOURCE

1357 subroutine ugb_print(ugb, units, prtvol, header)
1358 
1359 !Arguments ------------------------------------
1360  class(ugb_t),intent(in) :: ugb
1361  integer,intent(in) :: units(:), prtvol
1362  character(len=*),optional,intent(in) :: header
1363 
1364 !Local variables-------------------------------
1365  character(len=500) :: msg
1366  type(yamldoc_t) :: ydoc
1367 
1368 ! *************************************************************************
1369 
1370  ABI_UNUSED(prtvol)
1371 
1372  msg = ' ==== Info on the ugb_t object ==== '; if (present(header)) msg = ' ==== '//trim(adjustl(header))//' ==== '
1373  call wrtout(units, msg)
1374 
1375  ydoc = yamldoc_open('ugb_t') !, width=11, real_fmt='(3f8.3)')
1376  call ydoc%add_int("istwf_k", ugb%istwf_k)
1377  call ydoc%add_int("nspinor", ugb%nspinor)
1378  call ydoc%add_int("npw_k", ugb%npw_k)
1379  call ydoc%add_int("nband_k", ugb%nband_k)
1380  call ydoc%add_int("my_bstart", ugb%my_bstart)
1381  call ydoc%add_int("my_bstop", ugb%my_bstop)
1382  call ydoc%add_int("my_nband", ugb%my_nband)
1383  call ydoc%write_units_and_free(units)
1384 
1385 end subroutine ugb_print

m_ksdiago/ddiago_ctl_type [ Types ]

[ Top ] [ m_ksdiago ] [ Types ]

NAME

  ddiago_ctl_type

FUNCTION

  Structure storing the variables controlling the direct diagonalization of the Kohn-Sham Hamiltonian.
  Mainly used for debugging (and in the KSS code!)

SOURCE

157  type, public :: ddiago_ctl_type
158 
159   integer :: spin
160    ! The spin component of the Hamiltonian (1 if nspinor==1 or nsppol==1).
161 
162   integer :: istwf_k
163    ! Option defining whether time-reversal symmetry is used at particular k-points
164    ! If 0, the code will automatically use TR symmetry if possible (depending on the k-point)
165 
166   integer :: nband_k
167    ! Number of bands to be calculated.
168 
169   integer :: npw_k
170   ! The number of planes waves for the wavefunctions taking into account time-reversal symmetry.
171 
172   integer :: npwtot
173   ! The number of planes waves in the Hamiltonian without taking into account istwf_k
174 
175   integer :: nspinor
176   ! Number of spinorial components.
177 
178   integer :: prtvol
179    ! Flag controlling the verbosity.
180 
181   integer :: use_scalapack
182   ! 0 if diagonalization is done in sequential on each node.
183   ! 1 to use scalapack
184   ! TODO Not implemented
185 
186   real(dp) :: abstol
187    ! used fro RANGE= "V", "I", and "A" when do_full_diago=.FALSE.
188    ! The absolute error tolerance for the eigenvalues. An approximate eigenvalue is accepted
189    ! as converged when it is determined to lie in an interval [a,b] of width less than or equal to
190    !
191    !         ABSTOL + EPS *   max( |a|,|b| ) ,
192    !
193    ! where EPS is the machine precision.  If ABSTOL is less than or equal to zero, then  EPS*|T|  will be used in its place,
194    ! where |T| is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form.
195    !
196    ! Eigenvalues will be computed most accurately when ABSTOL is
197    ! set to twice the underflow threshold 2*DLAMCH('S'), not zero.
198    ! If this routine returns with INFO>0, indicating that some
199    ! eigenvectors did not converge, try setting ABSTOL to 2*DLAMCH('S').
200 
201   real(dp) :: ecut
202    ! The cutoff energy for the plane wave basis set.
203 
204   real(dp) :: ecutsm
205    ! Smearing energy for plane wave kinetic energy (Ha)
206 
207   real(dp) :: effmass_free
208    ! Effective mass for electrons (usually one).
209 
210   logical :: do_full_diago
211   ! Specifies whether direct or partial diagonalization will be performed.
212   ! Meaningful only if RANGE='A'.
213 
214   integer :: ilu(2)
215    ! If RANGE='I', the indices (in ascending order) of the smallest and largest eigenvalues to be returned.
216    ! il=ilu(1), iu=ilu(2) where
217    ! 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. NOT used if RANGE = 'A' or 'V'.
218 
219   integer :: nloalg(3)
220 
221   real(dp) :: kpoint(3)
222    ! The k-point in reduced coordinates at which the Hamiltonian is diagonalized.
223 
224   real(dp) :: vlu(2)
225    ! If RANGE='V', the lower and upper bounds of the interval to
226    ! be searched for eigenvalues. vl=vlu(1) and vu=vlu(2) with VL < VU.
227    ! Not referenced if RANGE = 'A' or 'I'.
228 
229   character(len=1) :: jobz
230    ! character defining whether wavefunctions are required (lapack option).
231    ! "N":  Compute eigenvalues only;
232    ! "V":  Compute eigenvalues and eigenvectors.
233 
234   character(len=1) :: range
235    ! character defining the subset of eigenstates that will be calculated (lapack option).
236    ! "A": all eigenvalues will be found.
237    ! "V": all eigenvalues in the half-open interval (VL,VU] will be found.
238    ! "I": the IL-th through IU-th eigenvalues will be found.
239 
240   !$character(len=fnlen) :: fname
241   ! The name of the file storing the eigenvectors and eigenvalues (only if jobz="V")
242 
243  end type ddiago_ctl_type
244 
245  public :: ksdiago
246  public :: init_ddiago_ctl

m_ksdiago/init_ddiago_ctl [ Functions ]

[ Top ] [ m_ksdiago ] [ Functions ]

NAME

  init_ddiago_ctl

FUNCTION

INPUTS

OUTPUT

SOURCE

711 subroutine init_ddiago_ctl(Dctl, jobz, spin, nspinor, ecut, kpoint, nloalg, gmet, &
712   nband_k, istwf_k, ecutsm, effmass_free, abstol, range, ilu, vlu, use_scalapack, prtvol)
713 
714 !Arguments ------------------------------------
715 !scalars
716  integer,intent(in) :: spin,nspinor
717  integer,optional,intent(in) :: istwf_k,prtvol,use_scalapack,nband_k
718  real(dp),intent(in) :: ecut
719  real(dp),optional,intent(in) :: ecutsm,effmass_free
720  real(dp),optional,intent(in) :: abstol
721  character(len=*),intent(in) :: jobz
722  character(len=*),optional,intent(in) :: range
723  type(ddiago_ctl_type),intent(out) :: Dctl
724 !arrays
725  integer,intent(in) :: nloalg(3)
726  integer,optional,intent(in) :: ilu(2)
727  real(dp),intent(in) :: kpoint(3)
728  real(dp),optional,intent(in) :: vlu(2)
729  real(dp),intent(in) :: gmet(3,3)
730 
731 !Local variables-------------------------------
732 !scalars
733  integer :: npw_k
734  logical :: ltest
735  character(len=500) :: msg
736  type(MPI_type) :: mpi_enreg_seq
737 !arrays
738  integer,allocatable :: kg_k(:,:)
739 
740 ! *************************************************************************
741 
742  call initmpi_seq(mpi_enreg_seq) ! Fake MPI_type.
743 
744  Dctl%spin  = spin
745  Dctl%nspinor = nspinor
746  Dctl%kpoint  = kpoint
747 
748  if (PRESENT(istwf_k)) then
749   Dctl%istwf_k = istwf_k
750  else
751   Dctl%istwf_k = set_istwfk(kpoint)
752  end if
753 
754  ABI_CHECK(Dctl%istwf_k == 1, "istwf_k/=1 not coded")
755 
756  Dctl%jobz   = toupper(jobz(1:1))
757  Dctl%range  = "A"
758  if (PRESENT(range)) Dctl%range = toupper(range)
759 
760  Dctl%ecut = ecut
761  Dctl%ecutsm = zero; if (PRESENT(ecutsm)) Dctl%ecutsm = ecutsm
762  Dctl%effmass_free = one; if (PRESENT(effmass_free)) Dctl%effmass_free = effmass_free
763  Dctl%nloalg  = nloalg
764  Dctl%prtvol = 0; if (PRESENT(prtvol)) Dctl%prtvol = prtvol
765  Dctl%abstol = -tol8; if (PRESENT(abstol)) Dctl%abstol = abstol
766 
767  ABI_MALLOC(kg_k,(3,0))
768 
769  ! Total number of G-vectors for this k-point with istwf_k=1.
770  call kpgsph(ecut,0,gmet,0,0,1,kg_k,kpoint,0,mpi_enreg_seq,0,Dctl%npwtot)
771 
772  ! G-vectors taking into account time-reversal symmetry.
773  call kpgsph(ecut,0,gmet,0,0,istwf_k,kg_k,kpoint,0,mpi_enreg_seq,0,npw_k)
774 
775  Dctl%npw_k = npw_k
776  ABI_FREE(kg_k)
777 
778  Dctl%do_full_diago = .FALSE.
779 
780  SELECT CASE (Dctl%range)
781  CASE ("A")
782 
783   ! Check on the number of stored bands.
784   Dctl%nband_k=-1
785   if (PRESENT(nband_k)) Dctl%nband_k=nband_k
786 
787   if (Dctl%nband_k==-1.or.Dctl%nband_k>=npw_k*nspinor) then
788     Dctl%nband_k=npw_k*nspinor
789     write(msg,'(4a)')ch10,&
790     'Since the number of bands to be computed was (-1) or',ch10,&
791     'too large, it has been set to the max. value npw_k*nspinor. '
792     if (Dctl%prtvol>0) call wrtout(std_out, msg)
793   else
794     Dctl%nband_k=nband_k
795   end if
796 
797   Dctl%do_full_diago = (Dctl%nband_k==npw_k*nspinor)
798 
799   if (Dctl%do_full_diago) then
800     write(msg,'(6a)')ch10,&
801      'Since the number of bands to be computed',ch10,&
802      'is equal to the number of G-vectors found for this k-point,',ch10,&
803      'the program will perform complete diagonalization.'
804   else
805     write(msg,'(6a)')ch10,&
806       'Since the number of bands to be computed',ch10,&
807       'is less than the number of G-vectors found,',ch10,&
808       'the program will perform partial diagonalization.'
809   end if
810   if (Dctl%prtvol>0) call wrtout(std_out, msg)
811 
812  CASE ("I")
813   if (.not.PRESENT(ilu)) then
814     ABI_ERROR(" ilu must be specified when range=I ")
815   end if
816   Dctl%ilu = ilu
817 
818   ltest = ( ( ilu(2)>=ilu(1) ) .and. ilu(1)>=1 .and. ilu(2)<=Dctl%npwtot )
819   write(msg,'(a,2i0)')" Illegal value for ilu: ",ilu
820   ABI_CHECK(ltest,msg)
821   Dctl%nband_k= ilu(2)-ilu(1)+1
822 
823  CASE ("V")
824   if (.not.PRESENT(vlu)) then
825     ABI_ERROR(" vlu must be specified when range=V ")
826   end if
827   Dctl%vlu = vlu
828 
829   Dctl%nband_k=-1 !??
830 
831   ltest = (vlu(2)>vlu(1))
832   write(msg,'(a,2f0.3)')" Illegal value for vlu: ",vlu
833   ABI_CHECK(ltest,msg)
834 
835  CASE DEFAULT
836    ABI_ERROR(" Unknown value for range: "//TRIM(Dctl%range))
837  END SELECT
838 
839  ! Consider the case in which we asked for the entire set of eigenvectors
840  ! but the number of bands is less that npw_k. Therefore have to prepare the call to ZHEEVX.
841  ! TODO this has to be done in a cleaner way.
842  if (Dctl%range == "A" .and. .not. dctl%do_full_diago) then
843    Dctl%range="I"
844    Dctl%ilu(1) = 1
845    Dctl%ilu(2) = npw_k*nspinor
846    Dctl%nband_k= npw_k*nspinor
847  end if
848 
849  Dctl%use_scalapack=0
850  if (PRESENT(use_scalapack)) Dctl%use_scalapack=use_scalapack
851  ABI_CHECK(Dctl%use_scalapack==0," scalapack mode not coded yet")
852 
853  call destroy_mpi_enreg(mpi_enreg_seq)
854 
855 end subroutine init_ddiago_ctl

m_ksdiago/ksdiago [ Functions ]

[ Top ] [ m_ksdiago ] [ Functions ]

NAME

 ksdiago

FUNCTION

  This routine performs the direct diagonalization of the Kohn-Sham Hamiltonian
  for a given k-point and spin. The routine drives the following operations:

    1) Re-computing <G|H|G_prim> matrix elements for all (G, G_prim).
       starting from the knowledge of the local potential on the real-space FFT mesh.

    2) Diagonalizing H in the plane-wave basis.

  It is called in outkss.F90 during the generation of the KSS file
  needed for a GW post-treatment. Since many-body calculations usually
  require a large number of eigenstates eigen-functions, a direct
  diagonalization of the Hamiltonian might reveal more stable than iterative
  techniques that might be problematic when several high energy states are required.
  The main drawback of the direct diagonalization is the bad scaling with the size
  of the basis set (npw**3) and the large memory requirements.

INPUTS

  kpoint(3)
  prtvol=Integer Flags  defining verbosity level
  ecut=cut-off energy for plane wave basis sphere (Ha)
  mgfftc=maximum size of 1D FFTs (coarse mesh).
  natom=number of atoms in cell.
  nfftf=(effective) number of FFT grid points in the dense FFT mesh (for this processor)
         (nfftf=nfft for norm-conserving potential runs)
  nspinor=number of spinorial components of the wavefunctions
  nsppol=1 for unpolarized, 2 for spin-polarized
  nspden=number of density components
  pawtab(psps%ntypat*psps%usepaw) <type(pawtab_type)>=paw tabulated starting data
  pawfgr<pawfgr_type>=fine grid parameters and related data
  paw_ij(natom) <type(paw_ij_type)>=paw arrays given on (i,j) channels
  psps <type(pseudopotential_type)>=variables related to pseudopotentials
  rprimd(3,3)=dimensional primitive translations for real space (bohr)
  vtrial(nfftf,nspden)=the trial potential
  xred(3,natom)=reduced dimensionless atomic coordinates
  comm=MPI communicator.
  [electronpositron] <electronpositron_type>=quantities for the electron-positron annihilation.
  nfftc=Number of points in the coarse FFT mesh.
  ngfftc(18)=Info about 3D FFT for the coarse mesh, see ~abinit/doc/variables/vargs.htm#ngfft
  Diago_ctl<ddiago_ctl_type>=Datatype storing variables and options controlling the direct diagonalization.

OUTPUT

  ierr=Status error.
  onband_diago

SIDE EFFECTS

  eig_ene(:)=Pointer used for allocating and storing the eigenvalues (hartree)
    input: pointer to NULL
     output: eig_ene(onband_diago)=The calculatated eigenvalues in ascending order.

  eig_vec(:,:,:)=Pointer used for allocating and holding the wave functions at this k-point and spin.
    input: pointer to NULL
    output: eig_vec(2,npw_k*nspinor,onband_diago)=The calculated eigenvectors.

  cprj_k(natom,nspinor*onband_diago) PAW only===
   input: pointer to NULL
   output: Projected eigenstates <Proj_i|Cnk> from output eigenstates.

NOTES

 * The routine can be time consuming (in particular when computing <G1|H|G2> elements for all (G1,G2)).
   So, it is recommended to call it once per run.

 * The routine RE-compute all Hamiltonian terms. So it is equivalent to an additional electronic SCF cycle.
   (This has no effect is convergence was reached.
   If not, eigenvalues/vectors may differs from the conjugate gradient ones)

 * Please, do NOT pass Dtset% to this routine. Either use a local variable properly initialized
   or add the additional variable to ddiago_ctl_type and change the creation method accordingly.
   ksdiago is designed such that it is possible to diagonalize the Hamiltonian at an arbitrary k-point
   or spin (not efficient but easy to code). Therefore ksdiago is useful non only for
   the KSS generation but also for testing more advanced iterative algorithms as well as interpolation techniques.

SOURCE

331 subroutine ksdiago(Diago_ctl, nband_k, nfftc, mgfftc, ngfftc, natom, &
332                    typat, nfftf, nspinor, nspden, nsppol, pawtab, pawfgr, paw_ij,&
333                    psps, rprimd, vtrial, xred, onband_diago, eig_ene, eig_vec, cprj_k, comm, ierr,&
334                    electronpositron) ! Optional arguments
335 
336 !Arguments ------------------------------------
337 !scalars
338  integer,intent(in) :: mgfftc,natom,comm,nband_k,nfftf,nsppol,nspden,nspinor,nfftc
339  integer,intent(out) :: ierr, onband_diago
340  type(pseudopotential_type),intent(in) :: psps
341  type(pawfgr_type),intent(in) :: pawfgr
342  type(ddiago_ctl_type),intent(in) :: Diago_ctl
343 !arrays
344  integer,intent(in) :: typat(natom), ngfftc(18)
345  real(dp),intent(in) :: rprimd(3,3)
346  real(dp),intent(inout) :: vtrial(nfftf,nspden)
347  real(dp),intent(in) :: xred(3,natom)
348  real(dp),pointer :: eig_ene(:),eig_vec(:,:,:)
349  type(pawcprj_type),pointer :: cprj_k(:,:)
350  type(pawtab_type),intent(in) :: pawtab(psps%ntypat*psps%usepaw)
351  type(paw_ij_type),intent(in) :: paw_ij(natom*psps%usepaw)
352  type(electronpositron_type),optional,pointer :: Electronpositron
353 
354 !Local variables-------------------------------
355 !scalars
356  integer,parameter :: mkmem1 = 1, tim_getghc = 4, paral_kgb0 = 0, master = 0, ndat1 = 1, ncomp1 = 1
357  integer :: cprj_choice,cpopt,dimffnl,ib,ider,idir,spin,npw_k
358  integer :: ikg,istwf_k,exchn2n3d,prtvol
359  integer :: jj,n1,n2,n3,n4,n5,n6,negv,nkpg,nproc,npw_k_test,my_rank,optder
360  integer :: type_calc,sij_opt,igsp2,cplex_ghg,iband,ibs1,ibs2
361  real(dp),parameter :: lambda0 = zero
362  real(dp) :: ucvol,ecutsm,effmass_free,size_mat,ecut
363  logical :: do_full_diago
364  character(len=50) :: jobz,range
365  character(len=80) :: frmt1,frmt2
366  character(len=10) :: stag(2)
367  character(len=500) :: msg
368  type(MPI_type) :: mpi_enreg_seq
369  type(gs_hamiltonian_type) :: gs_hamk
370 !arrays
371  integer :: nloalg(3)
372  integer,allocatable :: kg_k(:,:)
373  real(dp) :: gmet(3,3),gprimd(3,3),rmet(3,3),kptns_(3,1),kpoint(3),ylmgr_dum(1,1,1)
374  real(dp),allocatable :: ph3d(:,:,:),bras(:,:),ffnl(:,:,:,:),kinpw(:),kpg_k(:,:)
375  real(dp),allocatable :: vlocal(:,:,:,:),ylm_k(:,:),dum_ylm_gr_k(:,:,:), vxctaulocal(:,:,:,:,:)
376  real(dp),allocatable :: ghc(:,:),gvnlxc(:,:),gsc(:,:),ghg_mat(:,:,:),gsg_mat(:,:,:)
377  real(dp),pointer :: cwavef(:,:)
378  type(pawcprj_type),allocatable :: cwaveprj(:,:)
379 
380 ! *********************************************************************
381 
382  nproc = xmpi_comm_size(comm); my_rank = xmpi_comm_rank(comm)
383 
384  if (nproc > 1) then
385    ABI_WARNING("ksdiago not supported in parallel. Running in sequential.")
386  end if
387 
388  call initmpi_seq(mpi_enreg_seq) ! Fake MPI_type for sequential part.
389  call init_distribfft_seq(mpi_enreg_seq%distribfft, 'c', ngfftc(2), ngfftc(3), 'all')
390  if (pawfgr%usefinegrid /= 0) then
391    call init_distribfft_seq(mpi_enreg_seq%distribfft, 'f', pawfgr%ngfft(2), pawfgr%ngfft(3), 'all')
392  end if
393 
394  spin  = Diago_ctl%spin
395  kpoint  = Diago_ctl%kpoint
396  istwf_k = Diago_ctl%istwf_k
397  !% nband_k = Diago_ctl%nband_k
398  npw_k   = Diago_ctl%npw_k
399  nloalg  = Diago_ctl%nloalg
400  ecut    = Diago_ctl%ecut
401  ecutsm  = Diago_ctl%ecutsm
402  effmass_free = Diago_ctl%effmass_free
403  prtvol  = Diago_ctl%prtvol
404 
405  call metric(gmet, gprimd, -1, rmet, rprimd, ucvol)
406 
407  if (nsppol == 1) stag = ['          ','          ']
408  if (nsppol == 2) stag = ['SPIN UP:  ','SPIN DOWN:']
409 
410  ! The coarse FFT mesh.
411  n1 = ngfftc(1); n2 = ngfftc(2); n3 = ngfftc(3)
412  n4 = ngfftc(4); n5 = ngfftc(5); n6 = ngfftc(6)
413 
414  !====================
415  !=== Check input ====
416  !====================
417  ierr = 0
418 
419  ! istwfk must be 1 for each k-point
420  if (istwf_k/=1) then
421    write(msg,'(7a)')&
422    ' istwfk /= 1 not allowed:',ch10,&
423    ' States output not programmed for time-reversal symmetry.',ch10,&
424    ' Action: change istwfk in input file (put it to 1 for all kpt).',ch10,&
425    ' Program does not stop but _KSS file will not be created...'
426    ABI_WARNING(msg)
427    ierr = ierr + 1
428  end if
429 
430  if (ierr /= 0) RETURN ! Houston we have a problem!
431 
432  ! Initialize the Hamiltonian datatype on the coarse FFT mesh.
433  if (present(electronpositron)) then
434    call init_hamiltonian(gs_hamk, psps, pawtab, nspinor, nsppol, nspden, natom, typat, xred, nfftc, &
435     mgfftc, ngfftc, rprimd, nloalg, paw_ij=paw_ij, usecprj=0, electronpositron=electronpositron)
436  else
437    call init_hamiltonian(gs_hamk, psps, pawtab, nspinor, nsppol, nspden, natom, typat, xred, nfftc, &
438     mgfftc, ngfftc, rprimd, nloalg, paw_ij=paw_ij, usecprj=0)
439  end if
440 
441  ! Check on the number of stored bands.
442  onband_diago = nband_k
443  if (nband_k==-1 .or. nband_k >= npw_k*nspinor) then
444    onband_diago = npw_k*nspinor
445    write(msg,'(4a,i0)')ch10,&
446     ' Since the number of bands to be computed was -1 or',ch10,&
447     ' too large, it has been set to the maximum value npw_k*nspinor: ',npw_k*nspinor
448    call wrtout(std_out, msg)
449  end if
450 
451  !do_full_diago = (onband_diago==npw_k*nspinor)
452  do_full_diago = Diago_ctl%do_full_diago
453 
454  if (do_full_diago) then
455    write(msg,'(6a)')ch10,&
456    ' Since the number of bands to be computed',ch10,&
457    ' is equal to the number of G-vectors found for this kpt,',ch10,&
458    ' the program will perform complete diagonalization.'
459  else
460    write(msg,'(6a)')ch10,&
461    ' Since the number of bands to be computed',ch10,&
462    ' is less than the number of G-vectors found,',ch10,&
463    ' the program will perform partial diagonalization.'
464  end if
465  if (prtvol > 0) call wrtout(std_out, msg)
466 
467  ! Set up local potential vlocal with proper dimensioning, from vtrial.
468  ! Select spin component of interest if nspden<=2 as nvloc==1, for nspden==4, nvloc==4
469  ! option=2: vtrial(n1*n2*n3,ispden) --> vlocal(nd1,nd2,nd3) real case
470 
471  ABI_MALLOC(vlocal, (n4, n5, n6, gs_hamk%nvloc))
472  !if (with_vxctau) then
473  !  ABI_MALLOC(vxctaulocal,(n4,n5,n6,gs_hamk%nvloc,4))
474  !end if
475 
476  ! Set up local potential vlocal on the coarse FFT mesh from vtrial taking into account the spin.
477 
478  call gspot_transgrid_and_pack(spin, psps%usepaw, paral_kgb0, nfftc, ngfftc, nfftf, &
479                                nspden, gs_hamk%nvloc, ncomp1, pawfgr, mpi_enreg_seq, vtrial, vlocal)
480  call gs_hamk%load_spin(spin, vlocal=vlocal, with_nonlocal=.true.)
481 
482  ! This for meta-gga.
483  !if (with_vxctau) then
484  !  call gspot_transgrid_and_pack(spin, psps%usepaw, paral_kgb0, nfftc, ngfftc, nfftf, &
485  !                                nspden, gs_hamk%nvloc, 4, pawfgr, mpi_enreg, vxctau, vxctaulocal)
486  !  call gs_hamk%load_spin(spin, vxctaulocal=vxctaulocal)
487  !end if
488 
489  ! Calculate G-vectors, for this k-point. Count also the number of planewaves as a check.
490  exchn2n3d = 0; ikg = 0
491  ABI_MALLOC(kg_k, (3, npw_k))
492 
493  call kpgsph(ecut, exchn2n3d, gmet, ikg, 0, istwf_k, kg_k, kpoint, 0, mpi_enreg_seq, 0, npw_k_test)
494  ABI_CHECK(npw_k_test == npw_k, "npw_k_test/=npw_k")
495  call kpgsph(ecut,exchn2n3d,gmet,ikg,0,istwf_k,kg_k,kpoint,mkmem1,mpi_enreg_seq,npw_k,npw_k_test)
496 
497  !========================
498  !==== Kinetic energy ====
499  !========================
500  ABI_MALLOC(kinpw, (npw_k))
501  call mkkin(ecut, ecutsm, effmass_free, gmet, kg_k, kinpw, kpoint, npw_k, 0, 0)
502 
503  !================================
504  !==== Non-local form factors ====
505  !================================
506  ABI_MALLOC(ylm_k, (npw_k, psps%mpsang**2*psps%useylm))
507 
508  if (psps%useylm == 1) then
509    optder = 0
510    ABI_MALLOC(dum_ylm_gr_k, (npw_k, 3+6*(optder/2),psps%mpsang**2))
511    kptns_(:,1) = kpoint
512 
513    ! Here mband is not used if paral_compil_kpt=0
514    call initylmg(gprimd, kg_k, kptns_, mkmem1, mpi_enreg_seq, psps%mpsang, npw_k, [nband_k], 1, &
515      [npw_k], 1, optder, rprimd, ylm_k, dum_ylm_gr_k)
516 
517    ABI_FREE(dum_ylm_gr_k)
518  end if
519 
520  ! Compute (k+G) vectors (only if useylm=1)
521  nkpg = 3 * nloalg(3)
522  ABI_MALLOC(kpg_k, (npw_k, nkpg))
523  if (nkpg > 0) call mkkpg(kg_k, kpg_k, kpoint, nkpg, npw_k)
524 
525  ! Compute nonlocal form factors ffnl at all (k+G):
526  idir=0; ider=0; dimffnl=1+ider ! Now the derivative is not needed anymore.
527  ABI_MALLOC(ffnl, (npw_k, dimffnl, psps%lmnmax, psps%ntypat))
528 
529  call mkffnl(psps%dimekb, dimffnl, psps%ekb, ffnl, psps%ffspl, gmet, gprimd, ider, idir, psps%indlmn, &
530    kg_k, kpg_k, kpoint, psps%lmnmax, psps%lnmax, psps%mpsang, psps%mqgrid_ff, nkpg, npw_k, &
531    psps%ntypat, psps%pspso, psps%qgrid_ff, rmet, psps%usepaw, psps%useylm, ylm_k, ylmgr_dum)
532 
533  ABI_FREE(ylm_k)
534 
535  ! Load k-dependent part in the Hamiltonian datastructure
536  ABI_MALLOC(ph3d, (2, npw_k, gs_hamk%matblk))
537  call gs_hamk%load_k(kpt_k=kpoint, istwf_k=istwf_k, npw_k=npw_k, kinpw_k=kinpw, &
538                      kg_k=kg_k, kpg_k=kpg_k, ffnl_k=ffnl, ph3d_k=ph3d, compute_ph3d=.true., compute_gbound=.true.)
539 
540  ! Prepare call to getghc.
541  type_calc = 0                                ! For applying the whole Hamiltonian
542  sij_opt = 0; if (psps%usepaw==1) sij_opt = 1 ! For PAW, <k+G|S|k+G"> is also needed.
543 
544  cpopt = -1    ! If cpopt=-1, <p_lmn|in> (and derivatives) are computed here (and not saved)
545  if (psps%usepaw==1.and..FALSE.) then ! TODO Calculate <p_lmn|k+G>.
546    cpopt = 0  ! <p_lmn|in> are computed here and saved
547  end if
548 
549  ABI_MALLOC(ghc, (2, npw_k*nspinor*ndat1))
550  ABI_MALLOC(gvnlxc, (2, npw_k*nspinor*ndat1))
551  ABI_MALLOC(gsc, (2, npw_k*nspinor*ndat1*(sij_opt+1)/2))
552 
553  cplex_ghg = 2
554  size_mat = cplex_ghg*(npw_k*nspinor)**2*dp*b2Mb
555  write(msg,'(a,f0.3,a)')" Out-of-memory in ghg_mat. Memory required by the Hamiltonian matrix: ",size_mat," [Mb]."
556  ABI_STAT_MALLOC(ghg_mat, (cplex_ghg, npw_k*nspinor, npw_k*nspinor), ierr)
557  ABI_CHECK(ierr == 0, msg)
558  write(msg,'(a,f0.3,a)')" Out-of-memory in gsg_mat. Memory required by the PAW overlap operator: ",size_mat," [Mb]."
559  ABI_STAT_MALLOC(gsg_mat, (cplex_ghg, npw_k*nspinor, npw_k*nspinor*psps%usepaw), ierr)
560  ABI_CHECK(ierr == 0, msg)
561 
562  ! cwaveprj is ordered by atom type, see nonlop_ylm.
563  ABI_MALLOC(cwaveprj, (natom, nspinor*(1+cpopt)*gs_hamk%usepaw))
564  if (cpopt == 0) call pawcprj_alloc(cwaveprj, 0, gs_hamk%dimcprj)
565 
566  ! Initialize plane-wave array with zeros
567  ABI_CALLOC(bras, (2, npw_k*nspinor))
568  if (prtvol > 0) call wrtout(std_out, ' Calculating <G|H|G''> elements')
569 
570  ! Loop over the |beta,G''> component.
571  do igsp2=1,npw_k*nspinor
572    bras(1, igsp2) = one
573 
574    ! Get <:|H|beta,G''> and <:|S_{PAW}|beta,G''>
575    call getghc(cpopt, bras, cwaveprj, ghc, gsc, gs_hamk, gvnlxc, lambda0, mpi_enreg_seq, ndat1, &
576                prtvol, sij_opt, tim_getghc, type_calc)
577 
578    ! Fill the upper triangle.
579    ghg_mat(:,1:igsp2,igsp2) = ghc(:,1:igsp2)
580    if (psps%usepaw == 1) gsg_mat(:,1:igsp2,igsp2) = gsc(:,1:igsp2)
581 
582    ! Reset the |G,beta> component that has been treated.
583    bras(1, igsp2) = zero
584  end do
585 
586  ! Free workspace memory allocated so far.
587  ABI_FREE(bras)
588  ABI_FREE(kinpw)
589  ABI_FREE(vlocal)
590  ABI_FREE(ghc)
591  ABI_FREE(gvnlxc)
592  ABI_FREE(gsc)
593  ABI_SFREE(vxctaulocal)
594 
595  if (psps%usepaw == 1 .and. cpopt == 0) call pawcprj_free(Cwaveprj)
596  ABI_FREE(cwaveprj)
597 
598  !===========================================
599  !=== Diagonalization of <G|H|G''> matrix ===
600  !===========================================
601  ABI_MALLOC(eig_ene, (onband_diago))
602  ABI_MALLOC(eig_vec, (cplex_ghg, npw_k*nspinor, onband_diago))
603 
604  jobz = Diago_ctl%jobz  !jobz="Vectors"
605 
606  if (do_full_diago) then
607    ! Full diagonalization
608    write(msg,'(6a,i0)')ch10,&
609      ' Begin full diagonalization for kpt: ',trim(ktoa(kpoint)), stag(spin), ch10,&
610      ' Matrix size: ', npw_k*nspinor
611    call wrtout(std_out, msg)
612 
613    if (psps%usepaw == 0) then
614      call xheev_cplex(jobz, "Upper", cplex_ghg, npw_k*nspinor, ghg_mat, eig_ene, msg, ierr)
615    else
616      call xhegv_cplex(1, jobz, "Upper", cplex_ghg, npw_k*nspinor, ghg_mat, gsg_mat, eig_ene, msg, ierr)
617    end if
618    ABI_CHECK(ierr == 0, msg)
619    eig_vec(:,:,:)=  ghg_mat
620 
621  else
622    ! Partial diagonalization
623    range = Diago_ctl%range !range="Irange"
624 
625    write(msg,'(2a,3es16.8,3a,i0,a,i0)')ch10,&
626      ' Begin partial diagonalization for kpt= ',kpoint, stag(spin),ch10,&
627      ' - Size of mat.=',npw_k*nspinor,' - # out_nband: ',onband_diago
628    call wrtout(std_out, msg)
629 
630    if (psps%usepaw == 0) then
631      call xheevx_cplex(jobz, range, "Upper", cplex_ghg, npw_k*nspinor, ghg_mat, zero, zero,&
632        1, onband_diago, -tol8, negv, eig_ene, eig_vec, npw_k*nspinor, msg, ierr)
633    else
634      call xhegvx_cplex(1, jobz, range, "Upper", cplex_ghg, npw_k*nspinor, ghg_mat, gsg_mat, zero, zero,&
635        1, onband_diago, -tol8, negv, eig_ene, eig_vec, npw_k*nspinor, msg, ierr)
636    end if
637    ABI_CHECK(ierr == 0, msg)
638  end if
639 
640  ABI_FREE(ghg_mat)
641  ABI_FREE(gsg_mat)
642 
643  if (prtvol > 0 .and. my_rank == master) then
644    ! Write eigenvalues.
645    frmt1 = '(8x,9(1x,f7.2))'; frmt2 = '(8x,9(1x,f7.2))'
646    write(msg,'(2a,3x,a)')' Eigenvalues in eV for kpt: ', trim(ktoa(kpoint)), stag(spin)
647    call wrtout(std_out, msg)
648 
649    write(msg,frmt1)(eig_ene(ib)*Ha_eV,ib=1,MIN(9,onband_diago))
650    call wrtout(std_out, msg)
651    if (onband_diago >9 ) then
652      do jj=10,onband_diago,9
653        write(msg, frmt2) (eig_ene(ib)*Ha_eV,ib=jj,MIN(jj+8,onband_diago)); call wrtout(std_out, msg)
654      end do
655    end if
656  end if
657 
658  !========================================================
659  !==== Calculate <Proj_i|Cnk> from output eigenstates ====
660  !========================================================
661  if (psps%usepaw == 1) then
662 
663    ABI_MALLOC(cprj_k,(natom, nspinor*onband_diago))
664    call pawcprj_alloc(cprj_k, 0, gs_hamk%dimcprj)
665 
666    idir = 0; cprj_choice = 1  ! Only projected wave functions.
667 
668    do iband=1,onband_diago
669      ibs1 = nspinor * (iband - 1) + 1
670      ibs2 = ibs1; if (nspinor == 2) ibs2=ibs2+1
671      cwavef => eig_vec(1:2,1:npw_k,iband)
672 
673      call getcprj(cprj_choice, 0, cwavef, cprj_k(:,ibs1:ibs2), &
674        gs_hamk%ffnl_k, idir, gs_hamk%indlmn, gs_hamk%istwf_k, gs_hamk%kg_k, &
675        gs_hamk%kpg_k, gs_hamk%kpt_k, gs_hamk%lmnmax, gs_hamk%mgfft, mpi_enreg_seq, &
676        gs_hamk%natom, gs_hamk%nattyp, gs_hamk%ngfft, gs_hamk%nloalg, gs_hamk%npw_k, gs_hamk%nspinor, &
677        gs_hamk%ntypat, gs_hamk%phkxred, gs_hamk%ph1d, gs_hamk%ph3d_k, gs_hamk%ucvol, gs_hamk%useylm)
678    end do
679 
680    !  Reorder the cprj (order is now the same as in input file)
681    call pawcprj_reorder(cprj_k, gs_hamk%atindx1)
682  end if ! usepaw
683 
684  ! Free memory.
685  ABI_FREE(kpg_k)
686  ABI_FREE(kg_k)
687  ABI_FREE(ph3d)
688  ABI_FREE(ffnl)
689 
690  call destroy_mpi_enreg(mpi_enreg_seq)
691  call gs_hamk%free()
692 
693  call xmpi_barrier(comm)
694 
695 end subroutine ksdiago

m_ksdiago/ugb_from_diago [ Functions ]

[ Top ] [ m_ksdiago ] [ Functions ]

NAME

 ugb_from_diago

FUNCTION

  This routine performs the direct diagonalization of the Kohn-Sham Hamiltonian
  for a given k-point and spin using Scalapack/ELPA

INPUTS

  kpoint(3)
  prtvol=Integer Flags  defining verbosity level
  mgfftc=maximum size of 1D FFTs (coarse mesh).
  nfftf=(effective) number of FFT grid points in the dense FFT mesh (for this processor)
         (nfftf=nfft for norm-conserving potential runs)
  pawtab(psps%ntypat*psps%usepaw) <type(pawtab_type)>=paw tabulated starting data
  pawfgr<pawfgr_type>=fine grid parameters and related data
  paw_ij(natom) <type(paw_ij_type)>=paw arrays given on (i,j) channels
  psps <type(pseudopotential_type)>=variables related to pseudopotentials
  vtrial(nfftf,nspden)=the trial potential
  comm=MPI communicator.
  nfftc=Number of points in the coarse FFT mesh.
  ngfftc(18)=Info about 3D FFT for the coarse mesh, see ~abinit/doc/variables/vargs.htm#ngfft
  [electronpositron] <electronpositron_type>=quantities for the electron-positron annihilation.

OUTPUT

  eig_k(1:nband_k)=The calculatated eigenvalues in ascending order.

SOURCE

 887 subroutine ugb_from_diago(ugb, spin, istwf_k, kpoint, ecut, nband_k, ngfftc, nfftf, &
 888                           dtset, pawtab, pawfgr, paw_ij, cryst, psps, vtrial, eig_k, comm, &
 889                           electronpositron) ! Optional arguments
 890 
 891 !Arguments ------------------------------------
 892 !scalars
 893  class(ugb_t),target,intent(out) :: ugb
 894  integer,intent(in) :: spin, istwf_k
 895  real(dp),intent(in) :: kpoint(3), ecut
 896  type(dataset_type),intent(in) :: dtset
 897  integer,intent(in) :: comm,nfftf
 898  integer,intent(inout) :: nband_k
 899  type(crystal_t),intent(in) :: cryst
 900  type(pseudopotential_type),intent(in) :: psps
 901  !type(fock_type),intent(in) :: fock
 902  type(pawfgr_type),intent(in) :: pawfgr
 903 !arrays
 904  integer,intent(in) :: ngfftc(18)
 905  real(dp),intent(inout) :: vtrial(nfftf,dtset%nspden)
 906  !real(dp),intent(inout) :: vxctau(nfftf, dtset%nspden, 4*dtset%usekden)
 907  real(dp),allocatable,intent(out) :: eig_k(:)
 908  type(pawtab_type),intent(in) :: pawtab(psps%ntypat*psps%usepaw)
 909  type(paw_ij_type),intent(in) :: paw_ij(cryst%natom*psps%usepaw)
 910  type(electronpositron_type),optional,pointer :: electronpositron
 911 
 912 !Local variables-------------------------------
 913 !scalars
 914  integer,parameter :: mkmem1 = 1, tim_getghc = 4, paral_kgb0 = 0, master = 0, ncomp1 = 1
 915  integer :: cprj_choice,cpopt,dimffnl,ib,ider,idir,npw_k,nfftc,mgfftc, igs, ige, omp_nt
 916  integer :: jj,n1,n2,n3,n4,n5,n6,nkpg,nproc,my_rank,optder
 917  integer :: type_calc,sij_opt,igsp2_start,ig, my_ib,ibs1
 918  integer :: npwsp, col_bsize, nsppol, nspinor, nspden, loc2_size, il_g2, ierr, min_my_nband
 919  integer :: idat, ndat, batch_size, h_size !, mene_found
 920  real(dp),parameter :: lambda0 = zero
 921  real(dp) :: cpu, wall, gflops, mem_mb
 922  logical :: do_full_diago
 923  character(len=80) :: frmt1,frmt2
 924  character(len=10) :: stag(2)
 925  character(len=500) :: msg
 926  type(MPI_type) :: mpi_enreg_seq
 927  type(gs_hamiltonian_type) :: gs_hamk
 928  type(matrix_scalapack) :: ghg_mat, gsg_mat, ghg_4diag, gsg_4diag, eigvec
 929  type(processor_scalapack) :: proc_1d, proc_4diag
 930 !arrays
 931  real(dp) :: kptns_(3,1), ylmgr_dum(1,1,1), tsec(2)
 932  real(dp),allocatable :: ph3d(:,:,:), ffnl(:,:,:,:), kinpw(:), kpg_k(:,:)
 933  real(dp),allocatable :: vlocal(:,:,:,:), ylm_k(:,:), dum_ylm_gr_k(:,:,:), eig_ene(:), ghc(:,:), gvnlxc(:,:), gsc(:,:)
 934  real(dp),target,allocatable :: bras(:,:)
 935  !real(dp),contiguous,pointer :: bras2d_ptr(:,:)
 936  type(pawcprj_type),allocatable :: cwaveprj(:,:)
 937 
 938 ! *********************************************************************
 939 
 940  call timab(1919, 1, tsec)
 941  nproc = xmpi_comm_size(comm); my_rank = xmpi_comm_rank(comm)
 942 
 943  ! See sequence of calls in vtorho.
 944  ! Check that usekden is not 0 if want to use vxctau
 945  !with_vxctau = (present(vxctau).and.dtset%usekden/=0)
 946 
 947  ABI_CHECK_IEQ(dtset%usefock, 0, "direct diagonalization does not support usefock")
 948  ! Check that fock is present if want to use fock option
 949  !usefock = (dtset%usefock==1 .and. associated(fock))
 950 
 951  !====================
 952  !=== Check input ====
 953  !====================
 954  if (all(istwf_k /= [1, 2])) then
 955    ABI_ERROR(sjoin("istwfk:", itoa(istwf_k), "not allowed:"))
 956  end if
 957 
 958  if (istwf_k == 2) then
 959    !ABI_ERROR("istwfk == 2 with direct diago is still under development")
 960    ABI_WARNING("istwfk == 2 with direct diago is still under development")
 961  end if
 962 
 963  if (dtset%ixc < 0) then
 964    if (libxc_functionals_ismgga() .and. .not. libxc_functionals_is_tb09()) then
 965      ABI_ERROR("meta-gga functionals are not compatible with direct diagonalization!")
 966    end if
 967  end if
 968 
 969  ! MPI_type for sequential part.
 970  call initmpi_seq(mpi_enreg_seq)
 971  call init_distribfft_seq(mpi_enreg_seq%distribfft, 'c', ngfftc(2), ngfftc(3), 'all')
 972  if (pawfgr%usefinegrid /= 0) then
 973    call init_distribfft_seq(mpi_enreg_seq%distribfft, 'f', pawfgr%ngfft(2), pawfgr%ngfft(3), 'all')
 974  end if
 975 
 976  nspinor = dtset%nspinor; nsppol = dtset%nsppol; nspden = dtset%nspden
 977  if (nsppol == 1) stag = ['          ','          ']
 978  if (nsppol == 2) stag = ['SPIN UP:  ','SPIN DOWN:']
 979 
 980  call get_kg(kpoint, istwf_k, ecut, cryst%gmet, npw_k, ugb%kg_k)
 981  npwsp = npw_k * nspinor
 982 
 983  ! The coarse FFT mesh for the application of the Hamiltonian.
 984  n1 = ngfftc(1); n2 = ngfftc(2); n3 = ngfftc(3)
 985  n4 = ngfftc(4); n5 = ngfftc(5); n6 = ngfftc(6)
 986  nfftc = product(ngfftc(1:3)); mgfftc = maxval(ngfftc(1:3))
 987 
 988  ! Initialize the Hamiltonian on the coarse FFT mesh.
 989  if (present(electronpositron)) then
 990    call init_hamiltonian(gs_hamk, psps, pawtab, nspinor, nsppol, nspden, cryst%natom, cryst%typat, cryst%xred, nfftc, &
 991     mgfftc, ngfftc, cryst%rprimd, dtset%nloalg, paw_ij=paw_ij, usecprj=0, electronpositron=electronpositron)
 992  else
 993    call init_hamiltonian(gs_hamk, psps, pawtab, nspinor, nsppol, nspden, cryst%natom, cryst%typat, cryst%xred, nfftc, &
 994     mgfftc, ngfftc, cryst%rprimd, dtset%nloalg, paw_ij=paw_ij, usecprj=0)
 995  end if
 996 
 997  ! Check on the number of stored bands.
 998  if (nband_k == -1 .or. nband_k >= npwsp) then
 999    nband_k = npwsp
1000    write(msg,'(4a, i0)')ch10,&
1001     ' Since the number of bands to be computed was -1 or',ch10,&
1002     ' too large, it has been set to the maximum value. npw_k*nspinor: ',npwsp
1003    call wrtout(std_out, msg)
1004  end if
1005 
1006  do_full_diago = nband_k == npwsp
1007 
1008  ! Set up local potential vlocal with proper dimensioning, from vtrial.
1009  ! Select spin component of interest if nspden<=2 as nvloc==1, for nspden==4, nvloc==4
1010  ! option=2: vtrial(n1*n2*n3,ispden) --> vlocal(nd1,nd2,nd3) real case
1011 
1012  ABI_MALLOC(vlocal, (n4, n5, n6, gs_hamk%nvloc))
1013  call gspot_transgrid_and_pack(spin, psps%usepaw, paral_kgb0, nfftc, ngfftc, nfftf, &
1014                                nspden, gs_hamk%nvloc, ncomp1, pawfgr, mpi_enreg_seq, vtrial, vlocal)
1015  call gs_hamk%load_spin(spin, vlocal=vlocal, with_nonlocal=.true.)
1016 
1017  ! TODO: This for meta-gga.
1018  !if (with_vxctau) then
1019  !  call gspot_transgrid_and_pack(spin, psps%usepaw, paral_kgb0, nfftc, ngfftc, nfftf, &
1020  !                                nspden, gs_hamk%nvloc, 4, pawfgr, mpi_enreg, vxctau, vxctaulocal)
1021  !  call gs_hamk%load_spin(spin, vxctaulocal=vxctaulocal)
1022  !end if
1023 
1024  !========================
1025  !==== Kinetic energy ====
1026  !========================
1027  ABI_MALLOC(kinpw, (npw_k))
1028  call mkkin(ecut, dtset%ecutsm, dtset%effmass_free, cryst%gmet, ugb%kg_k, kinpw, kpoint, npw_k, 0, 0)
1029 
1030  !================================
1031  !==== Non-local form factors ====
1032  !================================
1033  ABI_MALLOC(ylm_k, (npw_k, psps%mpsang**2*psps%useylm))
1034 
1035  if (psps%useylm == 1) then
1036    optder = 0
1037    ABI_MALLOC(dum_ylm_gr_k, (npw_k, 3+6*(optder/2),psps%mpsang**2))
1038    kptns_(:,1) = kpoint
1039 
1040    ! NB: Here mband is not used if paral_compil_kpt = 0
1041    call initylmg(cryst%gprimd, ugb%kg_k, kptns_, mkmem1, mpi_enreg_seq, psps%mpsang, npw_k, [nband_k], 1, &
1042      [npw_k], 1, optder, cryst%rprimd, ylm_k, dum_ylm_gr_k)
1043 
1044    ABI_FREE(dum_ylm_gr_k)
1045  end if
1046 
1047  ! Compute (k+G) vectors (only if useylm=1)
1048  nkpg = 3 * dtset%nloalg(3)
1049  ABI_MALLOC(kpg_k, (npw_k, nkpg))
1050  if (nkpg > 0) call mkkpg(ugb%kg_k, kpg_k, kpoint, nkpg, npw_k)
1051 
1052  ! Compute nonlocal form factors ffnl at all (k+G):
1053  idir=0; ider=0; dimffnl=1+ider ! Now the derivative is not needed anymore.
1054  ABI_MALLOC(ffnl, (npw_k, dimffnl, psps%lmnmax, psps%ntypat))
1055 
1056  call mkffnl(psps%dimekb, dimffnl, psps%ekb, ffnl, psps%ffspl, cryst%gmet, cryst%gprimd, ider, idir, psps%indlmn, &
1057    ugb%kg_k, kpg_k, kpoint, psps%lmnmax, psps%lnmax, psps%mpsang, psps%mqgrid_ff, nkpg, npw_k, &
1058    psps%ntypat, psps%pspso, psps%qgrid_ff, cryst%rmet, psps%usepaw, psps%useylm, ylm_k, ylmgr_dum)
1059 
1060  ABI_FREE(ylm_k)
1061 
1062  ! Load k-dependent part of the Hamiltonian.
1063  ABI_MALLOC(ph3d, (2, npw_k, gs_hamk%matblk))
1064  call gs_hamk%load_k(kpt_k=kpoint, istwf_k=istwf_k, npw_k=npw_k, kinpw_k=kinpw, &
1065                      kg_k=ugb%kg_k, kpg_k=kpg_k, ffnl_k=ffnl, ph3d_k=ph3d, compute_ph3d=.true., compute_gbound=.true.)
1066 
1067  ! Prepare call to getghc.
1068  type_calc = 0                                ! For applying the whole Hamiltonian
1069  sij_opt = 0; if (psps%usepaw==1) sij_opt = 1 ! For PAW, <k+G|S|k+G"> is also needed.
1070 
1071  cpopt = -1
1072  if (psps%usepaw == 1) cpopt = 0  ! <p_lmn|in> are computed here and saved
1073 
1074  ! Init 1D PBLAS grid to block-distribute H along columns.
1075  call proc_1d%init(comm, grid_dims=[1, nproc])
1076  h_size = npwsp; if (istwf_k == 2) h_size = 2*npwsp - 1
1077 
1078  ABI_CHECK(block_dist_1d(h_size, nproc, col_bsize, msg), msg)
1079  call ghg_mat%init(h_size, h_size, proc_1d, istwf_k, size_blocs=[h_size, col_bsize])
1080  if (psps%usepaw == 1) call gsg_mat%init(h_size, h_size, proc_1d, istwf_k, size_blocs=[h_size, col_bsize])
1081 
1082  ! Estimate memory
1083  mem_mb = ghg_mat%locmem_mb()
1084  mem_mb = two * (psps%usepaw + 1) * mem_mb + mem_mb  ! last term for eigvec matrix
1085  call wrtout(std_out, sjoin(" Local memory for scalapack matrices:", ftoa(mem_mb, fmt="(f8.1)"), ' [Mb] <<< MEM'))
1086 
1087  ! Define batch size for the application of the Hamiltonian
1088  ! This is useful if OpenMP is activated thus we use multiple of omp_nt.
1089  omp_nt = xomp_get_num_threads(open_parallel=.True.)
1090  batch_size = 8 * omp_nt
1091  if (istwf_k == 2) batch_size = 1      ! FIXME
1092  !batch_size = 1
1093  call wrtout(std_out, sjoin(" Using batch_size:", itoa(batch_size)))
1094 
1095  ABI_MALLOC(bras, (2, npwsp * batch_size))
1096  ! cwaveprj is ordered by atom type, see nonlop_ylm.
1097  ABI_MALLOC(cwaveprj, (cryst%natom, nspinor*(1+cpopt)*gs_hamk%usepaw*batch_size))
1098  if (cpopt == 0) call pawcprj_alloc(cwaveprj, 0, gs_hamk%dimcprj)
1099  ABI_MALLOC(ghc, (2, npwsp * batch_size))
1100  ABI_MALLOC(gvnlxc, (2, npwsp * batch_size))
1101  ABI_MALLOC(gsc, (2, npwsp * batch_size*(sij_opt+1)/2))
1102 
1103  ! Loop over the |beta,G''> component.
1104  call cwtime(cpu, wall, gflops, "start")
1105  loc2_size = ghg_mat%sizeb_local(2)
1106 
1107  do il_g2=1, loc2_size, batch_size
1108    ! Operate on ndat g-vectors starting at the igsp2_start global index.
1109    igsp2_start = ghg_mat%loc2gcol(il_g2)
1110    ndat = blocked_loop(il_g2, loc2_size, batch_size)
1111 
1112    bras = zero
1113    if (istwf_k == 1) then
1114      do idat=0,ndat-1
1115        bras(1, igsp2_start + idat * npwsp + idat) = one
1116      end do
1117    else
1118      ! only istwf_k == 2 is coded here. NB: there's a check at the beginning of this routine.
1119      do idat=0,ndat-1
1120        if (igsp2_start + idat <= npwsp) then
1121          ! Cosine term
1122          bras(1, igsp2_start + idat*npwsp + idat) = half
1123          if (igsp2_start == 1) bras(1, igsp2_start + idat*npwsp + idat) = one
1124        else
1125          ! Sine term
1126          !ig = igsp2_start - npwsp + 1
1127          ig = igsp2_start - npwsp + 1 + 1  ! This should be OK
1128          bras(2, ig + idat*npwsp + idat) = half
1129        end if
1130      end do
1131    end if
1132 
1133    ! Get <:|H|beta,G''> and <:|S_{PAW}|beta,G''>
1134    !call c_f_pointer(c_loc(bras), bras2d_ptr, shape=[2, npwsp * batch_size)]
1135    call multithreaded_getghc(cpopt, bras, cwaveprj, ghc, gsc, gs_hamk, gvnlxc, lambda0, mpi_enreg_seq, ndat, &
1136                              dtset%prtvol, sij_opt, tim_getghc, type_calc)
1137 
1138    ! Now fill my local buffer of ghg/gsg
1139    if (istwf_k == 1) then
1140      do idat=0,ndat-1
1141        ! Complex wavefunctions.
1142        igs = 1 + idat * npwsp; ige = igs + npwsp - 1
1143        ghg_mat%buffer_cplx(:, il_g2+idat) = cmplx(ghc(1, igs:ige), ghc(2, igs:ige), kind=dp)
1144      end do
1145      if (psps%usepaw == 1) then
1146        do idat=0,ndat-1
1147          igs = 1 + idat * npwsp; ige = igs + npwsp - 1
1148          gsg_mat%buffer_cplx(:, il_g2+idat) = cmplx(gsc(1,igs:ige), gsc(2,igs:ige), kind=dp)
1149        end do
1150      end if
1151 
1152    else
1153      do idat=0,ndat-1
1154        ! Real wavefunctions.
1155        igs = 1 + idat*npwsp; ige = igs + npwsp - 1
1156        !if (igsp2_start == 1 .or. igsp2_start == npwsp + 1 .and. idat == 0) then
1157        !  ghc(:, igs:ige) = tol3 !; print *, ghc(:, igs:ige)
1158        !end if
1159        ghg_mat%buffer_real(1:npwsp,  il_g2+idat) =  ghc(1, igs:ige)     ! CC or CS
1160        ghg_mat%buffer_real(npwsp+1:, il_g2+idat) = -ghc(2, igs+1:ige)   ! SC or SS. Note igs+1
1161      end do
1162      if (psps%usepaw == 1) then
1163        NOT_IMPLEMENTED_ERROR()
1164        !gsg_mat%buffer_real(...)
1165        !gsg_mat%buffer_real(...)
1166      end if
1167    end if
1168 
1169  end do ! il_g2
1170  call cwtime_report(" build_hg1g2", cpu, wall, gflops)
1171 
1172  ! Free workspace memory allocated so far.
1173  ABI_FREE(bras)
1174  ABI_FREE(kinpw)
1175  ABI_FREE(vlocal)
1176  ABI_FREE(ghc)
1177  ABI_FREE(gvnlxc)
1178  ABI_FREE(gsc)
1179  if (psps%usepaw == 1 .and. cpopt == 0) call pawcprj_free(cwaveprj)
1180  ABI_FREE(cwaveprj)
1181 
1182  !===========================================
1183  !=== Diagonalization of <G|H|G''> matrix ===
1184  !===========================================
1185  ABI_MALLOC(eig_ene, (h_size))
1186 
1187  !print *, "ghg_trace:", ghg_mat%get_trace()
1188 
1189  ! Change size block and, if possible, use 2D rectangular grid of processors for diagonalization
1190  call proc_4diag%init(comm)
1191  call ghg_mat%change_size_blocs(ghg_4diag, processor=proc_4diag, free=.True.)
1192  if (psps%usepaw == 1) call gsg_mat%change_size_blocs(gsg_4diag, processor=proc_4diag, free=.True.)
1193 
1194  ! NB: global H shape is (h_size, h_size) even for partial diago.
1195  ! then one extracts the (hsize, nband_k) sub-matrix before returning.
1196  call ghg_4diag%copy(eigvec)
1197 
1198  if (do_full_diago) then
1199    write(msg,'(5a, (a,i0), 2a)')ch10,&
1200      ' Begin full diagonalization for kpt: ',trim(ktoa(kpoint)), stag(spin), ch10,&
1201      " H_gg' Matrix size: ",npwsp, ", Scalapack grid: ", trim(ltoa(ghg_4diag%processor%grid%dims))
1202    call wrtout(std_out, msg)
1203 
1204    call cwtime(cpu, wall, gflops, "start")
1205    if (psps%usepaw == 0) then
1206      !call ghg_4diag%pzheev("V", "U", eigvec, eig_ene)
1207      call compute_eigen_problem(ghg_4diag%processor, ghg_4diag, eigvec, eig_ene, comm, istwf_k)
1208    else
1209      call compute_generalized_eigen_problem(ghg_4diag%processor, ghg_4diag, gsg_4diag, eigvec, eig_ene, comm, istwf_k)
1210    end if
1211    call cwtime_report(" full_diago", cpu, wall, gflops)
1212 
1213  else
1214    write(msg,'(6a,i0,(a,i0), 2a)') ch10,&
1215      ' Begin partial diagonalization for kpt: ',trim(ktoa(kpoint)), stag(spin), ch10,&
1216      " H_gg' Matrix size: ",npwsp,', nband_k: ', nband_k,", Scalapack grid: ", trim(ltoa(ghg_4diag%processor%grid%dims))
1217    call wrtout(std_out, msg)
1218 
1219    call cwtime(cpu, wall, gflops, "start")
1220    if (psps%usepaw == 0) then
1221      !call ghg_4diag%pzheevx("V", "I", "U", zero, zero, 1, nband_k, -tol8, eigvec, mene_found, eig_ene)
1222      call compute_eigen_problem(ghg_4diag%processor, ghg_4diag, eigvec, eig_ene, comm, istwf_k, nev=nband_k)
1223    else
1224      !call ghg_4diag%pzhegvx(1, "V", "I", "U", gsg_4diag, zero, zero, 1, nband_k, -tol8, eigvec, mene_found, eig_ene)
1225      call compute_generalized_eigen_problem(ghg_4diag%processor, ghg_4diag, gsg_4diag, eigvec, eig_ene, comm, istwf_k, &
1226                                             nev=nband_k)
1227    end if
1228    call cwtime_report(" partial_diago", cpu, wall, gflops)
1229  end if
1230 
1231  if (dtset%prtvol > 0 .and. my_rank == master) then
1232    ! Write eigenvalues.
1233    frmt1 = '(8x,9(1x,f7.2))'; frmt2 = '(8x,9(1x,f7.2))'
1234    write(msg,'(2a,3x,a)')' Eigenvalues in eV for kpt: ', trim(ktoa(kpoint)), stag(spin)
1235    call wrtout(std_out, msg)
1236 
1237    write(msg,frmt1)(eig_ene(ib)*Ha_eV,ib=1,MIN(9,nband_k))
1238    call wrtout(std_out, msg)
1239    if (nband_k > 9 ) then
1240      do jj=10,nband_k,9
1241        write(msg, frmt2) (eig_ene(ib)*Ha_eV,ib=jj,MIN(jj+8,nband_k)); call wrtout(std_out, msg)
1242      end do
1243    end if
1244  end if
1245 
1246  call ghg_4diag%free(); call gsg_4diag%free(); call proc_1d%free()
1247 
1248  ! Now transfer eigvec to the ugb datastructure using 1d grid (block column distribution)
1249  call wrtout(std_out, " Moving to PBLAS block column distribution...")
1250  call cwtime(cpu, wall, gflops, "start")
1251 
1252  call ugb%processor%init(comm, grid_dims=[1, nproc])
1253  ABI_CHECK(block_dist_1d(nband_k, nproc, col_bsize, msg), msg)
1254  call eigvec%cut(h_size, nband_k, ugb%mat, size_blocs=[h_size, col_bsize], processor=ugb%processor, free=.True.)
1255  call proc_4diag%free()
1256 
1257  ABI_MALLOC(eig_k, (nband_k))
1258  eig_k(:) = eig_ene(1:nband_k)
1259 
1260  ugb%istwf_k = istwf_k
1261  ugb%nspinor = nspinor
1262  ugb%npw_k = npw_k
1263  ugb%npwsp = npwsp
1264  ugb%nband_k = nband_k
1265  ugb%comm => ugb%mat%processor%comm
1266 
1267  ugb%my_bstart = ugb%mat%loc2gcol(1)
1268  ugb%my_bstop = ugb%mat%loc2gcol(ugb%mat%sizeb_local(2))
1269  ugb%my_nband = ugb%my_bstop - ugb%my_bstart + 1
1270 
1271  if (ugb%my_nband > 0) then
1272    call c_f_pointer(c_loc(ugb%mat%buffer_cplx), ugb%cg_k, shape=[2, npwsp, ugb%my_nband])
1273  else
1274    ugb%my_nband = 0
1275    ugb%cg_k => null()
1276  end if
1277 
1278  call xmpi_min(ugb%my_nband, min_my_nband, comm, ierr)
1279  ugb%has_idle_procs = min_my_nband == 0
1280 
1281  if (psps%usepaw == 1 .and. ugb%my_nband > 0) then
1282    ! Calculate <Proj_i|Cnk> from output eigenstates. Note array allocated with ugb%my_nband
1283    ABI_MALLOC(ugb%cprj_k, (cryst%natom, nspinor * ugb%my_nband))
1284    call pawcprj_alloc(ugb%cprj_k, 0, gs_hamk%dimcprj)
1285    idir = 0; cprj_choice = 1  ! Only projected wave functions.
1286 
1287    do my_ib=1,ugb%my_nband
1288      ibs1 = nspinor * (my_ib - 1) + 1
1289      call getcprj(cprj_choice, 0, ugb%cg_k(:,:,my_ib), ugb%cprj_k(:,ibs1), &
1290                   gs_hamk%ffnl_k, idir, gs_hamk%indlmn, gs_hamk%istwf_k, gs_hamk%kg_k, &
1291                   gs_hamk%kpg_k, gs_hamk%kpt_k, gs_hamk%lmnmax, gs_hamk%mgfft, mpi_enreg_seq, &
1292                   gs_hamk%natom, gs_hamk%nattyp, gs_hamk%ngfft, gs_hamk%nloalg, gs_hamk%npw_k, gs_hamk%nspinor, &
1293                   gs_hamk%ntypat, gs_hamk%phkxred, gs_hamk%ph1d, gs_hamk%ph3d_k, gs_hamk%ucvol, gs_hamk%useylm)
1294    end do
1295 
1296    !  Reorder the cprj (order is now the same as in the input file)
1297    call pawcprj_reorder(ugb%cprj_k, gs_hamk%atindx1)
1298  end if ! usepaw
1299 
1300  call cwtime_report(" block column distribution", cpu, wall, gflops)
1301 
1302  ! Free memory.
1303  ABI_FREE(eig_ene)
1304  ABI_FREE(kpg_k)
1305  ABI_FREE(ph3d)
1306  ABI_FREE(ffnl)
1307  call destroy_mpi_enreg(mpi_enreg_seq)
1308  call gs_hamk%free()
1309 
1310  call timab(1919, 2, tsec)
1311 
1312 end subroutine ugb_from_diago

m_ksdiago/ugb_t [ Types ]

[ Top ] [ m_ksdiago ] [ Types ]

NAME

  ugb_t

FUNCTION

  Stores the wavefunctions for given (k-point, spin) in a PBLAS matrix distributed over bands (columns)

SOURCE

 77  type, public :: ugb_t
 78 
 79    integer :: istwf_k = -1
 80    ! Storage mode of cg_k
 81 
 82    integer :: nspinor = -1
 83    ! Number of spinors
 84 
 85    integer :: npw_k = -1
 86    ! Number of planewaves.
 87 
 88    integer :: npwsp = -1
 89    ! nnpw_k * nspinor
 90 
 91    integer :: nband_k = - 1
 92    ! Total number of bands (global)
 93 
 94    !integer :: usepaw
 95 
 96    integer :: my_bstart = -1, my_bstop = - 1, my_nband = - 1
 97    ! 1) Initial band
 98    ! 2) Last band
 99    ! 3) Number of bands treated by this proc. 0 if idle proc
100 
101    logical :: has_idle_procs
102    ! True if there are procs in comm who don't own any column.
103 
104    integer,pointer :: comm
105    ! pointer to MPI communicator in mat
106 
107    type(processor_scalapack) :: processor
108 
109    type(matrix_scalapack) :: mat
110    ! PBLAS matrix with MPI-distributed Fourier components
111    ! Local buffer: (2, npwsp * my_nband)
112    ! Global matrix: (npwsp, nband_k)
113 
114    integer, allocatable :: kg_k(:,:)
115    ! (3, npw_k)
116    ! G-vectors in reduced coordinates.
117 
118    real(dp), contiguous, pointer :: cg_k(:,:,:)
119    ! (2, npwsp * my_nband)
120    ! pointer to mat%buffer_cplx
121 
122    type(pawcprj_type),allocatable :: cprj_k(:,:)
123    ! (natom, nspinor * my_nband))
124    ! PAW projections ordered according to natom and NOT according to typat.
125    ! Note my_nband
126 
127  contains
128 
129    procedure :: from_diago  => ugb_from_diago
130     ! Build object by direct diagonalization of the KS Hamiltonian
131 
132    !procedure :: from_wfk  => ugb_from_wfk
133     ! Build object from WFK file
134 
135    procedure :: free => ugb_free
136     ! Free memory.
137 
138    procedure :: print => ugb_print
139     ! Print info on object.
140 
141    procedure :: collect_cprj => ugb_collect_cprj
142     ! Collect a subset of PAW cprj on all processors.
143 
144  end type ugb_t

m_slk/ugb_collect_cprj [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  ugb_collect_cprj

FUNCTION

  This is a collective routine that returns in `out_cprj` the PAW projections
  for `nb` bands starting at `band_start` NB: `out_cprj` is supposed to be allocated in the parent

SOURCE

1399 subroutine ugb_collect_cprj(ugb, nspinor, nb, band_start, out_cprj)
1400 
1401 !Arguments ------------------------------------
1402  class(ugb_t),intent(in) :: ugb
1403  integer,intent(in) :: nspinor, nb, band_start
1404  type(pawcprj_type),intent(inout) :: out_cprj(:,:)
1405 
1406 !Local variables-------------------------------
1407  integer :: ierr, my_ibs, out_ibs, band, cnt
1408 
1409 ! *************************************************************************
1410 
1411  ABI_CHECK_IEQ(size(ugb%cprj_k, dim=1), size(out_cprj, dim=1), "size1 should be the same")
1412  ABI_CHECK_IGEQ(size(out_cprj, dim=2), nb*nspinor, "size2 too small!")
1413 
1414  ! TODO: Numb algorithm based on xmpi_sum. Might be optimized.
1415  call pawcprj_set_zero(out_cprj)
1416 
1417  cnt = nspinor - 1
1418  do band=band_start, band_start+nb-1
1419    if (band >= ugb%my_bstart .and. band <= ugb%my_bstop) then
1420      my_ibs = 1 + (band - ugb%my_bstart) * nspinor
1421      out_ibs = 1 + (band - band_start) * nspinor
1422      call pawcprj_copy(ugb%cprj_k(:,my_ibs:my_ibs+cnt), out_cprj(:,out_ibs:out_ibs+cnt))
1423    end if
1424  end do
1425 
1426  call pawcprj_mpi_sum(out_cprj, ugb%comm, ierr)
1427 
1428 end subroutine ugb_collect_cprj