TABLE OF CONTENTS
- ABINIT/m_ksdiago
- m_gwr/ugb_free
- m_gwr/ugb_print
- m_ksdiago/ddiago_ctl_type
- m_ksdiago/init_ddiago_ctl
- m_ksdiago/ksdiago
- m_ksdiago/ugb_from_diago
- m_ksdiago/ugb_t
- m_slk/ugb_collect_cprj
ABINIT/m_ksdiago [ Modules ]
NAME
m_ksdiago
FUNCTION
Direct diagonalization of the KS Hamiltonian H_k(G,G')
COPYRIGHT
Copyright (C) 2008-2024 ABINIT group (MG) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
16 #if defined HAVE_CONFIG_H 17 #include "config.h" 18 #endif 19 20 #include "abi_common.h" 21 22 module m_ksdiago 23 24 use defs_basis 25 use m_abicore 26 use m_errors 27 use m_xmpi 28 use m_xomp 29 use m_hamiltonian 30 use m_distribfft 31 use, intrinsic :: iso_c_binding 32 use libxc_functionals 33 34 use defs_datatypes, only : pseudopotential_type 35 use defs_abitypes, only : MPI_type 36 use m_dtset, only : dataset_type 37 use m_fstrings, only : toupper, ktoa, itoa, sjoin, ftoa, ltoa 38 use m_yaml, only : yamldoc_t, yamldoc_open 39 use m_numeric_tools, only : blocked_loop 40 use m_time, only : cwtime, cwtime_report, timab 41 use m_geometry, only : metric 42 use m_hide_lapack, only : xhegv_cplex, xheev_cplex, xheevx_cplex, xhegvx_cplex 43 use m_slk, only : matrix_scalapack, processor_scalapack, block_dist_1d, & 44 compute_eigen_problem, compute_generalized_eigen_problem 45 use m_kg, only : mkkin, mkkpg 46 use m_crystal, only : crystal_t 47 use m_fftcore, only : kpgsph, get_kg 48 use m_fft, only : fftpac 49 use m_cgtools, only : set_istwfk 50 use m_electronpositron, only : electronpositron_type 51 use m_mpinfo, only : destroy_mpi_enreg, initmpi_seq 52 use m_pawtab, only : pawtab_type 53 use m_paw_ij, only : paw_ij_type 54 use m_pawcprj, only : pawcprj_type, pawcprj_alloc, pawcprj_free, pawcprj_reorder, & 55 pawcprj_set_zero, pawcprj_mpi_sum, pawcprj_copy 56 use m_cgprj, only : getcprj 57 use m_pawfgr, only : pawfgr_type 58 use m_initylmg, only : initylmg 59 use m_mkffnl, only : mkffnl 60 use m_getghc, only : getghc, multithreaded_getghc 61 !use m_fock, only : fock_type 62 63 implicit none 64 65 private
m_gwr/ugb_free [ Functions ]
[ Top ] [ m_gwr ] [ Functions ]
NAME
ugb_free
FUNCTION
Free dynamic memory.
SOURCE
1326 subroutine ugb_free(ugb) 1327 1328 !Arguments ------------------------------------ 1329 class(ugb_t),intent(inout) :: ugb 1330 ! ************************************************************************* 1331 1332 call ugb%mat%free() 1333 call ugb%processor%free() 1334 ABI_SFREE(ugb%kg_k) 1335 ugb%cg_k => null() 1336 ugb%comm => null() 1337 1338 if (allocated(ugb%cprj_k)) then 1339 call pawcprj_free(ugb%cprj_k) 1340 ABI_FREE(ugb%cprj_k) 1341 end if 1342 1343 end subroutine ugb_free
m_gwr/ugb_print [ Functions ]
[ Top ] [ m_gwr ] [ Functions ]
NAME
ugb_print
FUNCTION
Print info on the object.
SOURCE
1357 subroutine ugb_print(ugb, units, prtvol, header) 1358 1359 !Arguments ------------------------------------ 1360 class(ugb_t),intent(in) :: ugb 1361 integer,intent(in) :: units(:), prtvol 1362 character(len=*),optional,intent(in) :: header 1363 1364 !Local variables------------------------------- 1365 character(len=500) :: msg 1366 type(yamldoc_t) :: ydoc 1367 1368 ! ************************************************************************* 1369 1370 ABI_UNUSED(prtvol) 1371 1372 msg = ' ==== Info on the ugb_t object ==== '; if (present(header)) msg = ' ==== '//trim(adjustl(header))//' ==== ' 1373 call wrtout(units, msg) 1374 1375 ydoc = yamldoc_open('ugb_t') !, width=11, real_fmt='(3f8.3)') 1376 call ydoc%add_int("istwf_k", ugb%istwf_k) 1377 call ydoc%add_int("nspinor", ugb%nspinor) 1378 call ydoc%add_int("npw_k", ugb%npw_k) 1379 call ydoc%add_int("nband_k", ugb%nband_k) 1380 call ydoc%add_int("my_bstart", ugb%my_bstart) 1381 call ydoc%add_int("my_bstop", ugb%my_bstop) 1382 call ydoc%add_int("my_nband", ugb%my_nband) 1383 call ydoc%write_units_and_free(units) 1384 1385 end subroutine ugb_print
m_ksdiago/ddiago_ctl_type [ Types ]
[ Top ] [ m_ksdiago ] [ Types ]
NAME
ddiago_ctl_type
FUNCTION
Structure storing the variables controlling the direct diagonalization of the Kohn-Sham Hamiltonian. Mainly used for debugging (and in the KSS code!)
SOURCE
157 type, public :: ddiago_ctl_type 158 159 integer :: spin 160 ! The spin component of the Hamiltonian (1 if nspinor==1 or nsppol==1). 161 162 integer :: istwf_k 163 ! Option defining whether time-reversal symmetry is used at particular k-points 164 ! If 0, the code will automatically use TR symmetry if possible (depending on the k-point) 165 166 integer :: nband_k 167 ! Number of bands to be calculated. 168 169 integer :: npw_k 170 ! The number of planes waves for the wavefunctions taking into account time-reversal symmetry. 171 172 integer :: npwtot 173 ! The number of planes waves in the Hamiltonian without taking into account istwf_k 174 175 integer :: nspinor 176 ! Number of spinorial components. 177 178 integer :: prtvol 179 ! Flag controlling the verbosity. 180 181 integer :: use_scalapack 182 ! 0 if diagonalization is done in sequential on each node. 183 ! 1 to use scalapack 184 ! TODO Not implemented 185 186 real(dp) :: abstol 187 ! used fro RANGE= "V", "I", and "A" when do_full_diago=.FALSE. 188 ! The absolute error tolerance for the eigenvalues. An approximate eigenvalue is accepted 189 ! as converged when it is determined to lie in an interval [a,b] of width less than or equal to 190 ! 191 ! ABSTOL + EPS * max( |a|,|b| ) , 192 ! 193 ! where EPS is the machine precision. If ABSTOL is less than or equal to zero, then EPS*|T| will be used in its place, 194 ! where |T| is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form. 195 ! 196 ! Eigenvalues will be computed most accurately when ABSTOL is 197 ! set to twice the underflow threshold 2*DLAMCH('S'), not zero. 198 ! If this routine returns with INFO>0, indicating that some 199 ! eigenvectors did not converge, try setting ABSTOL to 2*DLAMCH('S'). 200 201 real(dp) :: ecut 202 ! The cutoff energy for the plane wave basis set. 203 204 real(dp) :: ecutsm 205 ! Smearing energy for plane wave kinetic energy (Ha) 206 207 real(dp) :: effmass_free 208 ! Effective mass for electrons (usually one). 209 210 logical :: do_full_diago 211 ! Specifies whether direct or partial diagonalization will be performed. 212 ! Meaningful only if RANGE='A'. 213 214 integer :: ilu(2) 215 ! If RANGE='I', the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 216 ! il=ilu(1), iu=ilu(2) where 217 ! 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. NOT used if RANGE = 'A' or 'V'. 218 219 integer :: nloalg(3) 220 221 real(dp) :: kpoint(3) 222 ! The k-point in reduced coordinates at which the Hamiltonian is diagonalized. 223 224 real(dp) :: vlu(2) 225 ! If RANGE='V', the lower and upper bounds of the interval to 226 ! be searched for eigenvalues. vl=vlu(1) and vu=vlu(2) with VL < VU. 227 ! Not referenced if RANGE = 'A' or 'I'. 228 229 character(len=1) :: jobz 230 ! character defining whether wavefunctions are required (lapack option). 231 ! "N": Compute eigenvalues only; 232 ! "V": Compute eigenvalues and eigenvectors. 233 234 character(len=1) :: range 235 ! character defining the subset of eigenstates that will be calculated (lapack option). 236 ! "A": all eigenvalues will be found. 237 ! "V": all eigenvalues in the half-open interval (VL,VU] will be found. 238 ! "I": the IL-th through IU-th eigenvalues will be found. 239 240 !$character(len=fnlen) :: fname 241 ! The name of the file storing the eigenvectors and eigenvalues (only if jobz="V") 242 243 end type ddiago_ctl_type 244 245 public :: ksdiago 246 public :: init_ddiago_ctl
m_ksdiago/init_ddiago_ctl [ Functions ]
[ Top ] [ m_ksdiago ] [ Functions ]
NAME
init_ddiago_ctl
FUNCTION
INPUTS
OUTPUT
SOURCE
711 subroutine init_ddiago_ctl(Dctl, jobz, spin, nspinor, ecut, kpoint, nloalg, gmet, & 712 nband_k, istwf_k, ecutsm, effmass_free, abstol, range, ilu, vlu, use_scalapack, prtvol) 713 714 !Arguments ------------------------------------ 715 !scalars 716 integer,intent(in) :: spin,nspinor 717 integer,optional,intent(in) :: istwf_k,prtvol,use_scalapack,nband_k 718 real(dp),intent(in) :: ecut 719 real(dp),optional,intent(in) :: ecutsm,effmass_free 720 real(dp),optional,intent(in) :: abstol 721 character(len=*),intent(in) :: jobz 722 character(len=*),optional,intent(in) :: range 723 type(ddiago_ctl_type),intent(out) :: Dctl 724 !arrays 725 integer,intent(in) :: nloalg(3) 726 integer,optional,intent(in) :: ilu(2) 727 real(dp),intent(in) :: kpoint(3) 728 real(dp),optional,intent(in) :: vlu(2) 729 real(dp),intent(in) :: gmet(3,3) 730 731 !Local variables------------------------------- 732 !scalars 733 integer :: npw_k 734 logical :: ltest 735 character(len=500) :: msg 736 type(MPI_type) :: mpi_enreg_seq 737 !arrays 738 integer,allocatable :: kg_k(:,:) 739 740 ! ************************************************************************* 741 742 call initmpi_seq(mpi_enreg_seq) ! Fake MPI_type. 743 744 Dctl%spin = spin 745 Dctl%nspinor = nspinor 746 Dctl%kpoint = kpoint 747 748 if (PRESENT(istwf_k)) then 749 Dctl%istwf_k = istwf_k 750 else 751 Dctl%istwf_k = set_istwfk(kpoint) 752 end if 753 754 ABI_CHECK(Dctl%istwf_k == 1, "istwf_k/=1 not coded") 755 756 Dctl%jobz = toupper(jobz(1:1)) 757 Dctl%range = "A" 758 if (PRESENT(range)) Dctl%range = toupper(range) 759 760 Dctl%ecut = ecut 761 Dctl%ecutsm = zero; if (PRESENT(ecutsm)) Dctl%ecutsm = ecutsm 762 Dctl%effmass_free = one; if (PRESENT(effmass_free)) Dctl%effmass_free = effmass_free 763 Dctl%nloalg = nloalg 764 Dctl%prtvol = 0; if (PRESENT(prtvol)) Dctl%prtvol = prtvol 765 Dctl%abstol = -tol8; if (PRESENT(abstol)) Dctl%abstol = abstol 766 767 ABI_MALLOC(kg_k,(3,0)) 768 769 ! Total number of G-vectors for this k-point with istwf_k=1. 770 call kpgsph(ecut,0,gmet,0,0,1,kg_k,kpoint,0,mpi_enreg_seq,0,Dctl%npwtot) 771 772 ! G-vectors taking into account time-reversal symmetry. 773 call kpgsph(ecut,0,gmet,0,0,istwf_k,kg_k,kpoint,0,mpi_enreg_seq,0,npw_k) 774 775 Dctl%npw_k = npw_k 776 ABI_FREE(kg_k) 777 778 Dctl%do_full_diago = .FALSE. 779 780 SELECT CASE (Dctl%range) 781 CASE ("A") 782 783 ! Check on the number of stored bands. 784 Dctl%nband_k=-1 785 if (PRESENT(nband_k)) Dctl%nband_k=nband_k 786 787 if (Dctl%nband_k==-1.or.Dctl%nband_k>=npw_k*nspinor) then 788 Dctl%nband_k=npw_k*nspinor 789 write(msg,'(4a)')ch10,& 790 'Since the number of bands to be computed was (-1) or',ch10,& 791 'too large, it has been set to the max. value npw_k*nspinor. ' 792 if (Dctl%prtvol>0) call wrtout(std_out, msg) 793 else 794 Dctl%nband_k=nband_k 795 end if 796 797 Dctl%do_full_diago = (Dctl%nband_k==npw_k*nspinor) 798 799 if (Dctl%do_full_diago) then 800 write(msg,'(6a)')ch10,& 801 'Since the number of bands to be computed',ch10,& 802 'is equal to the number of G-vectors found for this k-point,',ch10,& 803 'the program will perform complete diagonalization.' 804 else 805 write(msg,'(6a)')ch10,& 806 'Since the number of bands to be computed',ch10,& 807 'is less than the number of G-vectors found,',ch10,& 808 'the program will perform partial diagonalization.' 809 end if 810 if (Dctl%prtvol>0) call wrtout(std_out, msg) 811 812 CASE ("I") 813 if (.not.PRESENT(ilu)) then 814 ABI_ERROR(" ilu must be specified when range=I ") 815 end if 816 Dctl%ilu = ilu 817 818 ltest = ( ( ilu(2)>=ilu(1) ) .and. ilu(1)>=1 .and. ilu(2)<=Dctl%npwtot ) 819 write(msg,'(a,2i0)')" Illegal value for ilu: ",ilu 820 ABI_CHECK(ltest,msg) 821 Dctl%nband_k= ilu(2)-ilu(1)+1 822 823 CASE ("V") 824 if (.not.PRESENT(vlu)) then 825 ABI_ERROR(" vlu must be specified when range=V ") 826 end if 827 Dctl%vlu = vlu 828 829 Dctl%nband_k=-1 !?? 830 831 ltest = (vlu(2)>vlu(1)) 832 write(msg,'(a,2f0.3)')" Illegal value for vlu: ",vlu 833 ABI_CHECK(ltest,msg) 834 835 CASE DEFAULT 836 ABI_ERROR(" Unknown value for range: "//TRIM(Dctl%range)) 837 END SELECT 838 839 ! Consider the case in which we asked for the entire set of eigenvectors 840 ! but the number of bands is less that npw_k. Therefore have to prepare the call to ZHEEVX. 841 ! TODO this has to be done in a cleaner way. 842 if (Dctl%range == "A" .and. .not. dctl%do_full_diago) then 843 Dctl%range="I" 844 Dctl%ilu(1) = 1 845 Dctl%ilu(2) = npw_k*nspinor 846 Dctl%nband_k= npw_k*nspinor 847 end if 848 849 Dctl%use_scalapack=0 850 if (PRESENT(use_scalapack)) Dctl%use_scalapack=use_scalapack 851 ABI_CHECK(Dctl%use_scalapack==0," scalapack mode not coded yet") 852 853 call destroy_mpi_enreg(mpi_enreg_seq) 854 855 end subroutine init_ddiago_ctl
m_ksdiago/ksdiago [ Functions ]
[ Top ] [ m_ksdiago ] [ Functions ]
NAME
ksdiago
FUNCTION
This routine performs the direct diagonalization of the Kohn-Sham Hamiltonian for a given k-point and spin. The routine drives the following operations: 1) Re-computing <G|H|G_prim> matrix elements for all (G, G_prim). starting from the knowledge of the local potential on the real-space FFT mesh. 2) Diagonalizing H in the plane-wave basis. It is called in outkss.F90 during the generation of the KSS file needed for a GW post-treatment. Since many-body calculations usually require a large number of eigenstates eigen-functions, a direct diagonalization of the Hamiltonian might reveal more stable than iterative techniques that might be problematic when several high energy states are required. The main drawback of the direct diagonalization is the bad scaling with the size of the basis set (npw**3) and the large memory requirements.
INPUTS
kpoint(3) prtvol=Integer Flags defining verbosity level ecut=cut-off energy for plane wave basis sphere (Ha) mgfftc=maximum size of 1D FFTs (coarse mesh). natom=number of atoms in cell. nfftf=(effective) number of FFT grid points in the dense FFT mesh (for this processor) (nfftf=nfft for norm-conserving potential runs) nspinor=number of spinorial components of the wavefunctions nsppol=1 for unpolarized, 2 for spin-polarized nspden=number of density components pawtab(psps%ntypat*psps%usepaw) <type(pawtab_type)>=paw tabulated starting data pawfgr<pawfgr_type>=fine grid parameters and related data paw_ij(natom) <type(paw_ij_type)>=paw arrays given on (i,j) channels psps <type(pseudopotential_type)>=variables related to pseudopotentials rprimd(3,3)=dimensional primitive translations for real space (bohr) vtrial(nfftf,nspden)=the trial potential xred(3,natom)=reduced dimensionless atomic coordinates comm=MPI communicator. [electronpositron] <electronpositron_type>=quantities for the electron-positron annihilation. nfftc=Number of points in the coarse FFT mesh. ngfftc(18)=Info about 3D FFT for the coarse mesh, see ~abinit/doc/variables/vargs.htm#ngfft Diago_ctl<ddiago_ctl_type>=Datatype storing variables and options controlling the direct diagonalization.
OUTPUT
ierr=Status error. onband_diago
SIDE EFFECTS
eig_ene(:)=Pointer used for allocating and storing the eigenvalues (hartree) input: pointer to NULL output: eig_ene(onband_diago)=The calculatated eigenvalues in ascending order. eig_vec(:,:,:)=Pointer used for allocating and holding the wave functions at this k-point and spin. input: pointer to NULL output: eig_vec(2,npw_k*nspinor,onband_diago)=The calculated eigenvectors. cprj_k(natom,nspinor*onband_diago) PAW only=== input: pointer to NULL output: Projected eigenstates <Proj_i|Cnk> from output eigenstates.
NOTES
* The routine can be time consuming (in particular when computing <G1|H|G2> elements for all (G1,G2)). So, it is recommended to call it once per run. * The routine RE-compute all Hamiltonian terms. So it is equivalent to an additional electronic SCF cycle. (This has no effect is convergence was reached. If not, eigenvalues/vectors may differs from the conjugate gradient ones) * Please, do NOT pass Dtset% to this routine. Either use a local variable properly initialized or add the additional variable to ddiago_ctl_type and change the creation method accordingly. ksdiago is designed such that it is possible to diagonalize the Hamiltonian at an arbitrary k-point or spin (not efficient but easy to code). Therefore ksdiago is useful non only for the KSS generation but also for testing more advanced iterative algorithms as well as interpolation techniques.
SOURCE
331 subroutine ksdiago(Diago_ctl, nband_k, nfftc, mgfftc, ngfftc, natom, & 332 typat, nfftf, nspinor, nspden, nsppol, pawtab, pawfgr, paw_ij,& 333 psps, rprimd, vtrial, xred, onband_diago, eig_ene, eig_vec, cprj_k, comm, ierr,& 334 electronpositron) ! Optional arguments 335 336 !Arguments ------------------------------------ 337 !scalars 338 integer,intent(in) :: mgfftc,natom,comm,nband_k,nfftf,nsppol,nspden,nspinor,nfftc 339 integer,intent(out) :: ierr, onband_diago 340 type(pseudopotential_type),intent(in) :: psps 341 type(pawfgr_type),intent(in) :: pawfgr 342 type(ddiago_ctl_type),intent(in) :: Diago_ctl 343 !arrays 344 integer,intent(in) :: typat(natom), ngfftc(18) 345 real(dp),intent(in) :: rprimd(3,3) 346 real(dp),intent(inout) :: vtrial(nfftf,nspden) 347 real(dp),intent(in) :: xred(3,natom) 348 real(dp),pointer :: eig_ene(:),eig_vec(:,:,:) 349 type(pawcprj_type),pointer :: cprj_k(:,:) 350 type(pawtab_type),intent(in) :: pawtab(psps%ntypat*psps%usepaw) 351 type(paw_ij_type),intent(in) :: paw_ij(natom*psps%usepaw) 352 type(electronpositron_type),optional,pointer :: Electronpositron 353 354 !Local variables------------------------------- 355 !scalars 356 integer,parameter :: mkmem1 = 1, tim_getghc = 4, paral_kgb0 = 0, master = 0, ndat1 = 1, ncomp1 = 1 357 integer :: cprj_choice,cpopt,dimffnl,ib,ider,idir,spin,npw_k 358 integer :: ikg,istwf_k,exchn2n3d,prtvol 359 integer :: jj,n1,n2,n3,n4,n5,n6,negv,nkpg,nproc,npw_k_test,my_rank,optder 360 integer :: type_calc,sij_opt,igsp2,cplex_ghg,iband,ibs1,ibs2 361 real(dp),parameter :: lambda0 = zero 362 real(dp) :: ucvol,ecutsm,effmass_free,size_mat,ecut 363 logical :: do_full_diago 364 character(len=50) :: jobz,range 365 character(len=80) :: frmt1,frmt2 366 character(len=10) :: stag(2) 367 character(len=500) :: msg 368 type(MPI_type) :: mpi_enreg_seq 369 type(gs_hamiltonian_type) :: gs_hamk 370 !arrays 371 integer :: nloalg(3) 372 integer,allocatable :: kg_k(:,:) 373 real(dp) :: gmet(3,3),gprimd(3,3),rmet(3,3),kptns_(3,1),kpoint(3),ylmgr_dum(1,1,1) 374 real(dp),allocatable :: ph3d(:,:,:),bras(:,:),ffnl(:,:,:,:),kinpw(:),kpg_k(:,:) 375 real(dp),allocatable :: vlocal(:,:,:,:),ylm_k(:,:),dum_ylm_gr_k(:,:,:), vxctaulocal(:,:,:,:,:) 376 real(dp),allocatable :: ghc(:,:),gvnlxc(:,:),gsc(:,:),ghg_mat(:,:,:),gsg_mat(:,:,:) 377 real(dp),pointer :: cwavef(:,:) 378 type(pawcprj_type),allocatable :: cwaveprj(:,:) 379 380 ! ********************************************************************* 381 382 nproc = xmpi_comm_size(comm); my_rank = xmpi_comm_rank(comm) 383 384 if (nproc > 1) then 385 ABI_WARNING("ksdiago not supported in parallel. Running in sequential.") 386 end if 387 388 call initmpi_seq(mpi_enreg_seq) ! Fake MPI_type for sequential part. 389 call init_distribfft_seq(mpi_enreg_seq%distribfft, 'c', ngfftc(2), ngfftc(3), 'all') 390 if (pawfgr%usefinegrid /= 0) then 391 call init_distribfft_seq(mpi_enreg_seq%distribfft, 'f', pawfgr%ngfft(2), pawfgr%ngfft(3), 'all') 392 end if 393 394 spin = Diago_ctl%spin 395 kpoint = Diago_ctl%kpoint 396 istwf_k = Diago_ctl%istwf_k 397 !% nband_k = Diago_ctl%nband_k 398 npw_k = Diago_ctl%npw_k 399 nloalg = Diago_ctl%nloalg 400 ecut = Diago_ctl%ecut 401 ecutsm = Diago_ctl%ecutsm 402 effmass_free = Diago_ctl%effmass_free 403 prtvol = Diago_ctl%prtvol 404 405 call metric(gmet, gprimd, -1, rmet, rprimd, ucvol) 406 407 if (nsppol == 1) stag = [' ',' '] 408 if (nsppol == 2) stag = ['SPIN UP: ','SPIN DOWN:'] 409 410 ! The coarse FFT mesh. 411 n1 = ngfftc(1); n2 = ngfftc(2); n3 = ngfftc(3) 412 n4 = ngfftc(4); n5 = ngfftc(5); n6 = ngfftc(6) 413 414 !==================== 415 !=== Check input ==== 416 !==================== 417 ierr = 0 418 419 ! istwfk must be 1 for each k-point 420 if (istwf_k/=1) then 421 write(msg,'(7a)')& 422 ' istwfk /= 1 not allowed:',ch10,& 423 ' States output not programmed for time-reversal symmetry.',ch10,& 424 ' Action: change istwfk in input file (put it to 1 for all kpt).',ch10,& 425 ' Program does not stop but _KSS file will not be created...' 426 ABI_WARNING(msg) 427 ierr = ierr + 1 428 end if 429 430 if (ierr /= 0) RETURN ! Houston we have a problem! 431 432 ! Initialize the Hamiltonian datatype on the coarse FFT mesh. 433 if (present(electronpositron)) then 434 call init_hamiltonian(gs_hamk, psps, pawtab, nspinor, nsppol, nspden, natom, typat, xred, nfftc, & 435 mgfftc, ngfftc, rprimd, nloalg, paw_ij=paw_ij, usecprj=0, electronpositron=electronpositron) 436 else 437 call init_hamiltonian(gs_hamk, psps, pawtab, nspinor, nsppol, nspden, natom, typat, xred, nfftc, & 438 mgfftc, ngfftc, rprimd, nloalg, paw_ij=paw_ij, usecprj=0) 439 end if 440 441 ! Check on the number of stored bands. 442 onband_diago = nband_k 443 if (nband_k==-1 .or. nband_k >= npw_k*nspinor) then 444 onband_diago = npw_k*nspinor 445 write(msg,'(4a,i0)')ch10,& 446 ' Since the number of bands to be computed was -1 or',ch10,& 447 ' too large, it has been set to the maximum value npw_k*nspinor: ',npw_k*nspinor 448 call wrtout(std_out, msg) 449 end if 450 451 !do_full_diago = (onband_diago==npw_k*nspinor) 452 do_full_diago = Diago_ctl%do_full_diago 453 454 if (do_full_diago) then 455 write(msg,'(6a)')ch10,& 456 ' Since the number of bands to be computed',ch10,& 457 ' is equal to the number of G-vectors found for this kpt,',ch10,& 458 ' the program will perform complete diagonalization.' 459 else 460 write(msg,'(6a)')ch10,& 461 ' Since the number of bands to be computed',ch10,& 462 ' is less than the number of G-vectors found,',ch10,& 463 ' the program will perform partial diagonalization.' 464 end if 465 if (prtvol > 0) call wrtout(std_out, msg) 466 467 ! Set up local potential vlocal with proper dimensioning, from vtrial. 468 ! Select spin component of interest if nspden<=2 as nvloc==1, for nspden==4, nvloc==4 469 ! option=2: vtrial(n1*n2*n3,ispden) --> vlocal(nd1,nd2,nd3) real case 470 471 ABI_MALLOC(vlocal, (n4, n5, n6, gs_hamk%nvloc)) 472 !if (with_vxctau) then 473 ! ABI_MALLOC(vxctaulocal,(n4,n5,n6,gs_hamk%nvloc,4)) 474 !end if 475 476 ! Set up local potential vlocal on the coarse FFT mesh from vtrial taking into account the spin. 477 478 call gspot_transgrid_and_pack(spin, psps%usepaw, paral_kgb0, nfftc, ngfftc, nfftf, & 479 nspden, gs_hamk%nvloc, ncomp1, pawfgr, mpi_enreg_seq, vtrial, vlocal) 480 call gs_hamk%load_spin(spin, vlocal=vlocal, with_nonlocal=.true.) 481 482 ! This for meta-gga. 483 !if (with_vxctau) then 484 ! call gspot_transgrid_and_pack(spin, psps%usepaw, paral_kgb0, nfftc, ngfftc, nfftf, & 485 ! nspden, gs_hamk%nvloc, 4, pawfgr, mpi_enreg, vxctau, vxctaulocal) 486 ! call gs_hamk%load_spin(spin, vxctaulocal=vxctaulocal) 487 !end if 488 489 ! Calculate G-vectors, for this k-point. Count also the number of planewaves as a check. 490 exchn2n3d = 0; ikg = 0 491 ABI_MALLOC(kg_k, (3, npw_k)) 492 493 call kpgsph(ecut, exchn2n3d, gmet, ikg, 0, istwf_k, kg_k, kpoint, 0, mpi_enreg_seq, 0, npw_k_test) 494 ABI_CHECK(npw_k_test == npw_k, "npw_k_test/=npw_k") 495 call kpgsph(ecut,exchn2n3d,gmet,ikg,0,istwf_k,kg_k,kpoint,mkmem1,mpi_enreg_seq,npw_k,npw_k_test) 496 497 !======================== 498 !==== Kinetic energy ==== 499 !======================== 500 ABI_MALLOC(kinpw, (npw_k)) 501 call mkkin(ecut, ecutsm, effmass_free, gmet, kg_k, kinpw, kpoint, npw_k, 0, 0) 502 503 !================================ 504 !==== Non-local form factors ==== 505 !================================ 506 ABI_MALLOC(ylm_k, (npw_k, psps%mpsang**2*psps%useylm)) 507 508 if (psps%useylm == 1) then 509 optder = 0 510 ABI_MALLOC(dum_ylm_gr_k, (npw_k, 3+6*(optder/2),psps%mpsang**2)) 511 kptns_(:,1) = kpoint 512 513 ! Here mband is not used if paral_compil_kpt=0 514 call initylmg(gprimd, kg_k, kptns_, mkmem1, mpi_enreg_seq, psps%mpsang, npw_k, [nband_k], 1, & 515 [npw_k], 1, optder, rprimd, ylm_k, dum_ylm_gr_k) 516 517 ABI_FREE(dum_ylm_gr_k) 518 end if 519 520 ! Compute (k+G) vectors (only if useylm=1) 521 nkpg = 3 * nloalg(3) 522 ABI_MALLOC(kpg_k, (npw_k, nkpg)) 523 if (nkpg > 0) call mkkpg(kg_k, kpg_k, kpoint, nkpg, npw_k) 524 525 ! Compute nonlocal form factors ffnl at all (k+G): 526 idir=0; ider=0; dimffnl=1+ider ! Now the derivative is not needed anymore. 527 ABI_MALLOC(ffnl, (npw_k, dimffnl, psps%lmnmax, psps%ntypat)) 528 529 call mkffnl(psps%dimekb, dimffnl, psps%ekb, ffnl, psps%ffspl, gmet, gprimd, ider, idir, psps%indlmn, & 530 kg_k, kpg_k, kpoint, psps%lmnmax, psps%lnmax, psps%mpsang, psps%mqgrid_ff, nkpg, npw_k, & 531 psps%ntypat, psps%pspso, psps%qgrid_ff, rmet, psps%usepaw, psps%useylm, ylm_k, ylmgr_dum) 532 533 ABI_FREE(ylm_k) 534 535 ! Load k-dependent part in the Hamiltonian datastructure 536 ABI_MALLOC(ph3d, (2, npw_k, gs_hamk%matblk)) 537 call gs_hamk%load_k(kpt_k=kpoint, istwf_k=istwf_k, npw_k=npw_k, kinpw_k=kinpw, & 538 kg_k=kg_k, kpg_k=kpg_k, ffnl_k=ffnl, ph3d_k=ph3d, compute_ph3d=.true., compute_gbound=.true.) 539 540 ! Prepare call to getghc. 541 type_calc = 0 ! For applying the whole Hamiltonian 542 sij_opt = 0; if (psps%usepaw==1) sij_opt = 1 ! For PAW, <k+G|S|k+G"> is also needed. 543 544 cpopt = -1 ! If cpopt=-1, <p_lmn|in> (and derivatives) are computed here (and not saved) 545 if (psps%usepaw==1.and..FALSE.) then ! TODO Calculate <p_lmn|k+G>. 546 cpopt = 0 ! <p_lmn|in> are computed here and saved 547 end if 548 549 ABI_MALLOC(ghc, (2, npw_k*nspinor*ndat1)) 550 ABI_MALLOC(gvnlxc, (2, npw_k*nspinor*ndat1)) 551 ABI_MALLOC(gsc, (2, npw_k*nspinor*ndat1*(sij_opt+1)/2)) 552 553 cplex_ghg = 2 554 size_mat = cplex_ghg*(npw_k*nspinor)**2*dp*b2Mb 555 write(msg,'(a,f0.3,a)')" Out-of-memory in ghg_mat. Memory required by the Hamiltonian matrix: ",size_mat," [Mb]." 556 ABI_STAT_MALLOC(ghg_mat, (cplex_ghg, npw_k*nspinor, npw_k*nspinor), ierr) 557 ABI_CHECK(ierr == 0, msg) 558 write(msg,'(a,f0.3,a)')" Out-of-memory in gsg_mat. Memory required by the PAW overlap operator: ",size_mat," [Mb]." 559 ABI_STAT_MALLOC(gsg_mat, (cplex_ghg, npw_k*nspinor, npw_k*nspinor*psps%usepaw), ierr) 560 ABI_CHECK(ierr == 0, msg) 561 562 ! cwaveprj is ordered by atom type, see nonlop_ylm. 563 ABI_MALLOC(cwaveprj, (natom, nspinor*(1+cpopt)*gs_hamk%usepaw)) 564 if (cpopt == 0) call pawcprj_alloc(cwaveprj, 0, gs_hamk%dimcprj) 565 566 ! Initialize plane-wave array with zeros 567 ABI_CALLOC(bras, (2, npw_k*nspinor)) 568 if (prtvol > 0) call wrtout(std_out, ' Calculating <G|H|G''> elements') 569 570 ! Loop over the |beta,G''> component. 571 do igsp2=1,npw_k*nspinor 572 bras(1, igsp2) = one 573 574 ! Get <:|H|beta,G''> and <:|S_{PAW}|beta,G''> 575 call getghc(cpopt, bras, cwaveprj, ghc, gsc, gs_hamk, gvnlxc, lambda0, mpi_enreg_seq, ndat1, & 576 prtvol, sij_opt, tim_getghc, type_calc) 577 578 ! Fill the upper triangle. 579 ghg_mat(:,1:igsp2,igsp2) = ghc(:,1:igsp2) 580 if (psps%usepaw == 1) gsg_mat(:,1:igsp2,igsp2) = gsc(:,1:igsp2) 581 582 ! Reset the |G,beta> component that has been treated. 583 bras(1, igsp2) = zero 584 end do 585 586 ! Free workspace memory allocated so far. 587 ABI_FREE(bras) 588 ABI_FREE(kinpw) 589 ABI_FREE(vlocal) 590 ABI_FREE(ghc) 591 ABI_FREE(gvnlxc) 592 ABI_FREE(gsc) 593 ABI_SFREE(vxctaulocal) 594 595 if (psps%usepaw == 1 .and. cpopt == 0) call pawcprj_free(Cwaveprj) 596 ABI_FREE(cwaveprj) 597 598 !=========================================== 599 !=== Diagonalization of <G|H|G''> matrix === 600 !=========================================== 601 ABI_MALLOC(eig_ene, (onband_diago)) 602 ABI_MALLOC(eig_vec, (cplex_ghg, npw_k*nspinor, onband_diago)) 603 604 jobz = Diago_ctl%jobz !jobz="Vectors" 605 606 if (do_full_diago) then 607 ! Full diagonalization 608 write(msg,'(6a,i0)')ch10,& 609 ' Begin full diagonalization for kpt: ',trim(ktoa(kpoint)), stag(spin), ch10,& 610 ' Matrix size: ', npw_k*nspinor 611 call wrtout(std_out, msg) 612 613 if (psps%usepaw == 0) then 614 call xheev_cplex(jobz, "Upper", cplex_ghg, npw_k*nspinor, ghg_mat, eig_ene, msg, ierr) 615 else 616 call xhegv_cplex(1, jobz, "Upper", cplex_ghg, npw_k*nspinor, ghg_mat, gsg_mat, eig_ene, msg, ierr) 617 end if 618 ABI_CHECK(ierr == 0, msg) 619 eig_vec(:,:,:)= ghg_mat 620 621 else 622 ! Partial diagonalization 623 range = Diago_ctl%range !range="Irange" 624 625 write(msg,'(2a,3es16.8,3a,i0,a,i0)')ch10,& 626 ' Begin partial diagonalization for kpt= ',kpoint, stag(spin),ch10,& 627 ' - Size of mat.=',npw_k*nspinor,' - # out_nband: ',onband_diago 628 call wrtout(std_out, msg) 629 630 if (psps%usepaw == 0) then 631 call xheevx_cplex(jobz, range, "Upper", cplex_ghg, npw_k*nspinor, ghg_mat, zero, zero,& 632 1, onband_diago, -tol8, negv, eig_ene, eig_vec, npw_k*nspinor, msg, ierr) 633 else 634 call xhegvx_cplex(1, jobz, range, "Upper", cplex_ghg, npw_k*nspinor, ghg_mat, gsg_mat, zero, zero,& 635 1, onband_diago, -tol8, negv, eig_ene, eig_vec, npw_k*nspinor, msg, ierr) 636 end if 637 ABI_CHECK(ierr == 0, msg) 638 end if 639 640 ABI_FREE(ghg_mat) 641 ABI_FREE(gsg_mat) 642 643 if (prtvol > 0 .and. my_rank == master) then 644 ! Write eigenvalues. 645 frmt1 = '(8x,9(1x,f7.2))'; frmt2 = '(8x,9(1x,f7.2))' 646 write(msg,'(2a,3x,a)')' Eigenvalues in eV for kpt: ', trim(ktoa(kpoint)), stag(spin) 647 call wrtout(std_out, msg) 648 649 write(msg,frmt1)(eig_ene(ib)*Ha_eV,ib=1,MIN(9,onband_diago)) 650 call wrtout(std_out, msg) 651 if (onband_diago >9 ) then 652 do jj=10,onband_diago,9 653 write(msg, frmt2) (eig_ene(ib)*Ha_eV,ib=jj,MIN(jj+8,onband_diago)); call wrtout(std_out, msg) 654 end do 655 end if 656 end if 657 658 !======================================================== 659 !==== Calculate <Proj_i|Cnk> from output eigenstates ==== 660 !======================================================== 661 if (psps%usepaw == 1) then 662 663 ABI_MALLOC(cprj_k,(natom, nspinor*onband_diago)) 664 call pawcprj_alloc(cprj_k, 0, gs_hamk%dimcprj) 665 666 idir = 0; cprj_choice = 1 ! Only projected wave functions. 667 668 do iband=1,onband_diago 669 ibs1 = nspinor * (iband - 1) + 1 670 ibs2 = ibs1; if (nspinor == 2) ibs2=ibs2+1 671 cwavef => eig_vec(1:2,1:npw_k,iband) 672 673 call getcprj(cprj_choice, 0, cwavef, cprj_k(:,ibs1:ibs2), & 674 gs_hamk%ffnl_k, idir, gs_hamk%indlmn, gs_hamk%istwf_k, gs_hamk%kg_k, & 675 gs_hamk%kpg_k, gs_hamk%kpt_k, gs_hamk%lmnmax, gs_hamk%mgfft, mpi_enreg_seq, & 676 gs_hamk%natom, gs_hamk%nattyp, gs_hamk%ngfft, gs_hamk%nloalg, gs_hamk%npw_k, gs_hamk%nspinor, & 677 gs_hamk%ntypat, gs_hamk%phkxred, gs_hamk%ph1d, gs_hamk%ph3d_k, gs_hamk%ucvol, gs_hamk%useylm) 678 end do 679 680 ! Reorder the cprj (order is now the same as in input file) 681 call pawcprj_reorder(cprj_k, gs_hamk%atindx1) 682 end if ! usepaw 683 684 ! Free memory. 685 ABI_FREE(kpg_k) 686 ABI_FREE(kg_k) 687 ABI_FREE(ph3d) 688 ABI_FREE(ffnl) 689 690 call destroy_mpi_enreg(mpi_enreg_seq) 691 call gs_hamk%free() 692 693 call xmpi_barrier(comm) 694 695 end subroutine ksdiago
m_ksdiago/ugb_from_diago [ Functions ]
[ Top ] [ m_ksdiago ] [ Functions ]
NAME
ugb_from_diago
FUNCTION
This routine performs the direct diagonalization of the Kohn-Sham Hamiltonian for a given k-point and spin using Scalapack/ELPA
INPUTS
kpoint(3) prtvol=Integer Flags defining verbosity level mgfftc=maximum size of 1D FFTs (coarse mesh). nfftf=(effective) number of FFT grid points in the dense FFT mesh (for this processor) (nfftf=nfft for norm-conserving potential runs) pawtab(psps%ntypat*psps%usepaw) <type(pawtab_type)>=paw tabulated starting data pawfgr<pawfgr_type>=fine grid parameters and related data paw_ij(natom) <type(paw_ij_type)>=paw arrays given on (i,j) channels psps <type(pseudopotential_type)>=variables related to pseudopotentials vtrial(nfftf,nspden)=the trial potential comm=MPI communicator. nfftc=Number of points in the coarse FFT mesh. ngfftc(18)=Info about 3D FFT for the coarse mesh, see ~abinit/doc/variables/vargs.htm#ngfft [electronpositron] <electronpositron_type>=quantities for the electron-positron annihilation.
OUTPUT
eig_k(1:nband_k)=The calculatated eigenvalues in ascending order.
SOURCE
887 subroutine ugb_from_diago(ugb, spin, istwf_k, kpoint, ecut, nband_k, ngfftc, nfftf, & 888 dtset, pawtab, pawfgr, paw_ij, cryst, psps, vtrial, eig_k, comm, & 889 electronpositron) ! Optional arguments 890 891 !Arguments ------------------------------------ 892 !scalars 893 class(ugb_t),target,intent(out) :: ugb 894 integer,intent(in) :: spin, istwf_k 895 real(dp),intent(in) :: kpoint(3), ecut 896 type(dataset_type),intent(in) :: dtset 897 integer,intent(in) :: comm,nfftf 898 integer,intent(inout) :: nband_k 899 type(crystal_t),intent(in) :: cryst 900 type(pseudopotential_type),intent(in) :: psps 901 !type(fock_type),intent(in) :: fock 902 type(pawfgr_type),intent(in) :: pawfgr 903 !arrays 904 integer,intent(in) :: ngfftc(18) 905 real(dp),intent(inout) :: vtrial(nfftf,dtset%nspden) 906 !real(dp),intent(inout) :: vxctau(nfftf, dtset%nspden, 4*dtset%usekden) 907 real(dp),allocatable,intent(out) :: eig_k(:) 908 type(pawtab_type),intent(in) :: pawtab(psps%ntypat*psps%usepaw) 909 type(paw_ij_type),intent(in) :: paw_ij(cryst%natom*psps%usepaw) 910 type(electronpositron_type),optional,pointer :: electronpositron 911 912 !Local variables------------------------------- 913 !scalars 914 integer,parameter :: mkmem1 = 1, tim_getghc = 4, paral_kgb0 = 0, master = 0, ncomp1 = 1 915 integer :: cprj_choice,cpopt,dimffnl,ib,ider,idir,npw_k,nfftc,mgfftc, igs, ige, omp_nt 916 integer :: jj,n1,n2,n3,n4,n5,n6,nkpg,nproc,my_rank,optder 917 integer :: type_calc,sij_opt,igsp2_start,ig, my_ib,ibs1 918 integer :: npwsp, col_bsize, nsppol, nspinor, nspden, loc2_size, il_g2, ierr, min_my_nband 919 integer :: idat, ndat, batch_size, h_size !, mene_found 920 real(dp),parameter :: lambda0 = zero 921 real(dp) :: cpu, wall, gflops, mem_mb 922 logical :: do_full_diago 923 character(len=80) :: frmt1,frmt2 924 character(len=10) :: stag(2) 925 character(len=500) :: msg 926 type(MPI_type) :: mpi_enreg_seq 927 type(gs_hamiltonian_type) :: gs_hamk 928 type(matrix_scalapack) :: ghg_mat, gsg_mat, ghg_4diag, gsg_4diag, eigvec 929 type(processor_scalapack) :: proc_1d, proc_4diag 930 !arrays 931 real(dp) :: kptns_(3,1), ylmgr_dum(1,1,1), tsec(2) 932 real(dp),allocatable :: ph3d(:,:,:), ffnl(:,:,:,:), kinpw(:), kpg_k(:,:) 933 real(dp),allocatable :: vlocal(:,:,:,:), ylm_k(:,:), dum_ylm_gr_k(:,:,:), eig_ene(:), ghc(:,:), gvnlxc(:,:), gsc(:,:) 934 real(dp),target,allocatable :: bras(:,:) 935 !real(dp),contiguous,pointer :: bras2d_ptr(:,:) 936 type(pawcprj_type),allocatable :: cwaveprj(:,:) 937 938 ! ********************************************************************* 939 940 call timab(1919, 1, tsec) 941 nproc = xmpi_comm_size(comm); my_rank = xmpi_comm_rank(comm) 942 943 ! See sequence of calls in vtorho. 944 ! Check that usekden is not 0 if want to use vxctau 945 !with_vxctau = (present(vxctau).and.dtset%usekden/=0) 946 947 ABI_CHECK_IEQ(dtset%usefock, 0, "direct diagonalization does not support usefock") 948 ! Check that fock is present if want to use fock option 949 !usefock = (dtset%usefock==1 .and. associated(fock)) 950 951 !==================== 952 !=== Check input ==== 953 !==================== 954 if (all(istwf_k /= [1, 2])) then 955 ABI_ERROR(sjoin("istwfk:", itoa(istwf_k), "not allowed:")) 956 end if 957 958 if (istwf_k == 2) then 959 !ABI_ERROR("istwfk == 2 with direct diago is still under development") 960 ABI_WARNING("istwfk == 2 with direct diago is still under development") 961 end if 962 963 if (dtset%ixc < 0) then 964 if (libxc_functionals_ismgga() .and. .not. libxc_functionals_is_tb09()) then 965 ABI_ERROR("meta-gga functionals are not compatible with direct diagonalization!") 966 end if 967 end if 968 969 ! MPI_type for sequential part. 970 call initmpi_seq(mpi_enreg_seq) 971 call init_distribfft_seq(mpi_enreg_seq%distribfft, 'c', ngfftc(2), ngfftc(3), 'all') 972 if (pawfgr%usefinegrid /= 0) then 973 call init_distribfft_seq(mpi_enreg_seq%distribfft, 'f', pawfgr%ngfft(2), pawfgr%ngfft(3), 'all') 974 end if 975 976 nspinor = dtset%nspinor; nsppol = dtset%nsppol; nspden = dtset%nspden 977 if (nsppol == 1) stag = [' ',' '] 978 if (nsppol == 2) stag = ['SPIN UP: ','SPIN DOWN:'] 979 980 call get_kg(kpoint, istwf_k, ecut, cryst%gmet, npw_k, ugb%kg_k) 981 npwsp = npw_k * nspinor 982 983 ! The coarse FFT mesh for the application of the Hamiltonian. 984 n1 = ngfftc(1); n2 = ngfftc(2); n3 = ngfftc(3) 985 n4 = ngfftc(4); n5 = ngfftc(5); n6 = ngfftc(6) 986 nfftc = product(ngfftc(1:3)); mgfftc = maxval(ngfftc(1:3)) 987 988 ! Initialize the Hamiltonian on the coarse FFT mesh. 989 if (present(electronpositron)) then 990 call init_hamiltonian(gs_hamk, psps, pawtab, nspinor, nsppol, nspden, cryst%natom, cryst%typat, cryst%xred, nfftc, & 991 mgfftc, ngfftc, cryst%rprimd, dtset%nloalg, paw_ij=paw_ij, usecprj=0, electronpositron=electronpositron) 992 else 993 call init_hamiltonian(gs_hamk, psps, pawtab, nspinor, nsppol, nspden, cryst%natom, cryst%typat, cryst%xred, nfftc, & 994 mgfftc, ngfftc, cryst%rprimd, dtset%nloalg, paw_ij=paw_ij, usecprj=0) 995 end if 996 997 ! Check on the number of stored bands. 998 if (nband_k == -1 .or. nband_k >= npwsp) then 999 nband_k = npwsp 1000 write(msg,'(4a, i0)')ch10,& 1001 ' Since the number of bands to be computed was -1 or',ch10,& 1002 ' too large, it has been set to the maximum value. npw_k*nspinor: ',npwsp 1003 call wrtout(std_out, msg) 1004 end if 1005 1006 do_full_diago = nband_k == npwsp 1007 1008 ! Set up local potential vlocal with proper dimensioning, from vtrial. 1009 ! Select spin component of interest if nspden<=2 as nvloc==1, for nspden==4, nvloc==4 1010 ! option=2: vtrial(n1*n2*n3,ispden) --> vlocal(nd1,nd2,nd3) real case 1011 1012 ABI_MALLOC(vlocal, (n4, n5, n6, gs_hamk%nvloc)) 1013 call gspot_transgrid_and_pack(spin, psps%usepaw, paral_kgb0, nfftc, ngfftc, nfftf, & 1014 nspden, gs_hamk%nvloc, ncomp1, pawfgr, mpi_enreg_seq, vtrial, vlocal) 1015 call gs_hamk%load_spin(spin, vlocal=vlocal, with_nonlocal=.true.) 1016 1017 ! TODO: This for meta-gga. 1018 !if (with_vxctau) then 1019 ! call gspot_transgrid_and_pack(spin, psps%usepaw, paral_kgb0, nfftc, ngfftc, nfftf, & 1020 ! nspden, gs_hamk%nvloc, 4, pawfgr, mpi_enreg, vxctau, vxctaulocal) 1021 ! call gs_hamk%load_spin(spin, vxctaulocal=vxctaulocal) 1022 !end if 1023 1024 !======================== 1025 !==== Kinetic energy ==== 1026 !======================== 1027 ABI_MALLOC(kinpw, (npw_k)) 1028 call mkkin(ecut, dtset%ecutsm, dtset%effmass_free, cryst%gmet, ugb%kg_k, kinpw, kpoint, npw_k, 0, 0) 1029 1030 !================================ 1031 !==== Non-local form factors ==== 1032 !================================ 1033 ABI_MALLOC(ylm_k, (npw_k, psps%mpsang**2*psps%useylm)) 1034 1035 if (psps%useylm == 1) then 1036 optder = 0 1037 ABI_MALLOC(dum_ylm_gr_k, (npw_k, 3+6*(optder/2),psps%mpsang**2)) 1038 kptns_(:,1) = kpoint 1039 1040 ! NB: Here mband is not used if paral_compil_kpt = 0 1041 call initylmg(cryst%gprimd, ugb%kg_k, kptns_, mkmem1, mpi_enreg_seq, psps%mpsang, npw_k, [nband_k], 1, & 1042 [npw_k], 1, optder, cryst%rprimd, ylm_k, dum_ylm_gr_k) 1043 1044 ABI_FREE(dum_ylm_gr_k) 1045 end if 1046 1047 ! Compute (k+G) vectors (only if useylm=1) 1048 nkpg = 3 * dtset%nloalg(3) 1049 ABI_MALLOC(kpg_k, (npw_k, nkpg)) 1050 if (nkpg > 0) call mkkpg(ugb%kg_k, kpg_k, kpoint, nkpg, npw_k) 1051 1052 ! Compute nonlocal form factors ffnl at all (k+G): 1053 idir=0; ider=0; dimffnl=1+ider ! Now the derivative is not needed anymore. 1054 ABI_MALLOC(ffnl, (npw_k, dimffnl, psps%lmnmax, psps%ntypat)) 1055 1056 call mkffnl(psps%dimekb, dimffnl, psps%ekb, ffnl, psps%ffspl, cryst%gmet, cryst%gprimd, ider, idir, psps%indlmn, & 1057 ugb%kg_k, kpg_k, kpoint, psps%lmnmax, psps%lnmax, psps%mpsang, psps%mqgrid_ff, nkpg, npw_k, & 1058 psps%ntypat, psps%pspso, psps%qgrid_ff, cryst%rmet, psps%usepaw, psps%useylm, ylm_k, ylmgr_dum) 1059 1060 ABI_FREE(ylm_k) 1061 1062 ! Load k-dependent part of the Hamiltonian. 1063 ABI_MALLOC(ph3d, (2, npw_k, gs_hamk%matblk)) 1064 call gs_hamk%load_k(kpt_k=kpoint, istwf_k=istwf_k, npw_k=npw_k, kinpw_k=kinpw, & 1065 kg_k=ugb%kg_k, kpg_k=kpg_k, ffnl_k=ffnl, ph3d_k=ph3d, compute_ph3d=.true., compute_gbound=.true.) 1066 1067 ! Prepare call to getghc. 1068 type_calc = 0 ! For applying the whole Hamiltonian 1069 sij_opt = 0; if (psps%usepaw==1) sij_opt = 1 ! For PAW, <k+G|S|k+G"> is also needed. 1070 1071 cpopt = -1 1072 if (psps%usepaw == 1) cpopt = 0 ! <p_lmn|in> are computed here and saved 1073 1074 ! Init 1D PBLAS grid to block-distribute H along columns. 1075 call proc_1d%init(comm, grid_dims=[1, nproc]) 1076 h_size = npwsp; if (istwf_k == 2) h_size = 2*npwsp - 1 1077 1078 ABI_CHECK(block_dist_1d(h_size, nproc, col_bsize, msg), msg) 1079 call ghg_mat%init(h_size, h_size, proc_1d, istwf_k, size_blocs=[h_size, col_bsize]) 1080 if (psps%usepaw == 1) call gsg_mat%init(h_size, h_size, proc_1d, istwf_k, size_blocs=[h_size, col_bsize]) 1081 1082 ! Estimate memory 1083 mem_mb = ghg_mat%locmem_mb() 1084 mem_mb = two * (psps%usepaw + 1) * mem_mb + mem_mb ! last term for eigvec matrix 1085 call wrtout(std_out, sjoin(" Local memory for scalapack matrices:", ftoa(mem_mb, fmt="(f8.1)"), ' [Mb] <<< MEM')) 1086 1087 ! Define batch size for the application of the Hamiltonian 1088 ! This is useful if OpenMP is activated thus we use multiple of omp_nt. 1089 omp_nt = xomp_get_num_threads(open_parallel=.True.) 1090 batch_size = 8 * omp_nt 1091 if (istwf_k == 2) batch_size = 1 ! FIXME 1092 !batch_size = 1 1093 call wrtout(std_out, sjoin(" Using batch_size:", itoa(batch_size))) 1094 1095 ABI_MALLOC(bras, (2, npwsp * batch_size)) 1096 ! cwaveprj is ordered by atom type, see nonlop_ylm. 1097 ABI_MALLOC(cwaveprj, (cryst%natom, nspinor*(1+cpopt)*gs_hamk%usepaw*batch_size)) 1098 if (cpopt == 0) call pawcprj_alloc(cwaveprj, 0, gs_hamk%dimcprj) 1099 ABI_MALLOC(ghc, (2, npwsp * batch_size)) 1100 ABI_MALLOC(gvnlxc, (2, npwsp * batch_size)) 1101 ABI_MALLOC(gsc, (2, npwsp * batch_size*(sij_opt+1)/2)) 1102 1103 ! Loop over the |beta,G''> component. 1104 call cwtime(cpu, wall, gflops, "start") 1105 loc2_size = ghg_mat%sizeb_local(2) 1106 1107 do il_g2=1, loc2_size, batch_size 1108 ! Operate on ndat g-vectors starting at the igsp2_start global index. 1109 igsp2_start = ghg_mat%loc2gcol(il_g2) 1110 ndat = blocked_loop(il_g2, loc2_size, batch_size) 1111 1112 bras = zero 1113 if (istwf_k == 1) then 1114 do idat=0,ndat-1 1115 bras(1, igsp2_start + idat * npwsp + idat) = one 1116 end do 1117 else 1118 ! only istwf_k == 2 is coded here. NB: there's a check at the beginning of this routine. 1119 do idat=0,ndat-1 1120 if (igsp2_start + idat <= npwsp) then 1121 ! Cosine term 1122 bras(1, igsp2_start + idat*npwsp + idat) = half 1123 if (igsp2_start == 1) bras(1, igsp2_start + idat*npwsp + idat) = one 1124 else 1125 ! Sine term 1126 !ig = igsp2_start - npwsp + 1 1127 ig = igsp2_start - npwsp + 1 + 1 ! This should be OK 1128 bras(2, ig + idat*npwsp + idat) = half 1129 end if 1130 end do 1131 end if 1132 1133 ! Get <:|H|beta,G''> and <:|S_{PAW}|beta,G''> 1134 !call c_f_pointer(c_loc(bras), bras2d_ptr, shape=[2, npwsp * batch_size)] 1135 call multithreaded_getghc(cpopt, bras, cwaveprj, ghc, gsc, gs_hamk, gvnlxc, lambda0, mpi_enreg_seq, ndat, & 1136 dtset%prtvol, sij_opt, tim_getghc, type_calc) 1137 1138 ! Now fill my local buffer of ghg/gsg 1139 if (istwf_k == 1) then 1140 do idat=0,ndat-1 1141 ! Complex wavefunctions. 1142 igs = 1 + idat * npwsp; ige = igs + npwsp - 1 1143 ghg_mat%buffer_cplx(:, il_g2+idat) = cmplx(ghc(1, igs:ige), ghc(2, igs:ige), kind=dp) 1144 end do 1145 if (psps%usepaw == 1) then 1146 do idat=0,ndat-1 1147 igs = 1 + idat * npwsp; ige = igs + npwsp - 1 1148 gsg_mat%buffer_cplx(:, il_g2+idat) = cmplx(gsc(1,igs:ige), gsc(2,igs:ige), kind=dp) 1149 end do 1150 end if 1151 1152 else 1153 do idat=0,ndat-1 1154 ! Real wavefunctions. 1155 igs = 1 + idat*npwsp; ige = igs + npwsp - 1 1156 !if (igsp2_start == 1 .or. igsp2_start == npwsp + 1 .and. idat == 0) then 1157 ! ghc(:, igs:ige) = tol3 !; print *, ghc(:, igs:ige) 1158 !end if 1159 ghg_mat%buffer_real(1:npwsp, il_g2+idat) = ghc(1, igs:ige) ! CC or CS 1160 ghg_mat%buffer_real(npwsp+1:, il_g2+idat) = -ghc(2, igs+1:ige) ! SC or SS. Note igs+1 1161 end do 1162 if (psps%usepaw == 1) then 1163 NOT_IMPLEMENTED_ERROR() 1164 !gsg_mat%buffer_real(...) 1165 !gsg_mat%buffer_real(...) 1166 end if 1167 end if 1168 1169 end do ! il_g2 1170 call cwtime_report(" build_hg1g2", cpu, wall, gflops) 1171 1172 ! Free workspace memory allocated so far. 1173 ABI_FREE(bras) 1174 ABI_FREE(kinpw) 1175 ABI_FREE(vlocal) 1176 ABI_FREE(ghc) 1177 ABI_FREE(gvnlxc) 1178 ABI_FREE(gsc) 1179 if (psps%usepaw == 1 .and. cpopt == 0) call pawcprj_free(cwaveprj) 1180 ABI_FREE(cwaveprj) 1181 1182 !=========================================== 1183 !=== Diagonalization of <G|H|G''> matrix === 1184 !=========================================== 1185 ABI_MALLOC(eig_ene, (h_size)) 1186 1187 !print *, "ghg_trace:", ghg_mat%get_trace() 1188 1189 ! Change size block and, if possible, use 2D rectangular grid of processors for diagonalization 1190 call proc_4diag%init(comm) 1191 call ghg_mat%change_size_blocs(ghg_4diag, processor=proc_4diag, free=.True.) 1192 if (psps%usepaw == 1) call gsg_mat%change_size_blocs(gsg_4diag, processor=proc_4diag, free=.True.) 1193 1194 ! NB: global H shape is (h_size, h_size) even for partial diago. 1195 ! then one extracts the (hsize, nband_k) sub-matrix before returning. 1196 call ghg_4diag%copy(eigvec) 1197 1198 if (do_full_diago) then 1199 write(msg,'(5a, (a,i0), 2a)')ch10,& 1200 ' Begin full diagonalization for kpt: ',trim(ktoa(kpoint)), stag(spin), ch10,& 1201 " H_gg' Matrix size: ",npwsp, ", Scalapack grid: ", trim(ltoa(ghg_4diag%processor%grid%dims)) 1202 call wrtout(std_out, msg) 1203 1204 call cwtime(cpu, wall, gflops, "start") 1205 if (psps%usepaw == 0) then 1206 !call ghg_4diag%pzheev("V", "U", eigvec, eig_ene) 1207 call compute_eigen_problem(ghg_4diag%processor, ghg_4diag, eigvec, eig_ene, comm, istwf_k) 1208 else 1209 call compute_generalized_eigen_problem(ghg_4diag%processor, ghg_4diag, gsg_4diag, eigvec, eig_ene, comm, istwf_k) 1210 end if 1211 call cwtime_report(" full_diago", cpu, wall, gflops) 1212 1213 else 1214 write(msg,'(6a,i0,(a,i0), 2a)') ch10,& 1215 ' Begin partial diagonalization for kpt: ',trim(ktoa(kpoint)), stag(spin), ch10,& 1216 " H_gg' Matrix size: ",npwsp,', nband_k: ', nband_k,", Scalapack grid: ", trim(ltoa(ghg_4diag%processor%grid%dims)) 1217 call wrtout(std_out, msg) 1218 1219 call cwtime(cpu, wall, gflops, "start") 1220 if (psps%usepaw == 0) then 1221 !call ghg_4diag%pzheevx("V", "I", "U", zero, zero, 1, nband_k, -tol8, eigvec, mene_found, eig_ene) 1222 call compute_eigen_problem(ghg_4diag%processor, ghg_4diag, eigvec, eig_ene, comm, istwf_k, nev=nband_k) 1223 else 1224 !call ghg_4diag%pzhegvx(1, "V", "I", "U", gsg_4diag, zero, zero, 1, nband_k, -tol8, eigvec, mene_found, eig_ene) 1225 call compute_generalized_eigen_problem(ghg_4diag%processor, ghg_4diag, gsg_4diag, eigvec, eig_ene, comm, istwf_k, & 1226 nev=nband_k) 1227 end if 1228 call cwtime_report(" partial_diago", cpu, wall, gflops) 1229 end if 1230 1231 if (dtset%prtvol > 0 .and. my_rank == master) then 1232 ! Write eigenvalues. 1233 frmt1 = '(8x,9(1x,f7.2))'; frmt2 = '(8x,9(1x,f7.2))' 1234 write(msg,'(2a,3x,a)')' Eigenvalues in eV for kpt: ', trim(ktoa(kpoint)), stag(spin) 1235 call wrtout(std_out, msg) 1236 1237 write(msg,frmt1)(eig_ene(ib)*Ha_eV,ib=1,MIN(9,nband_k)) 1238 call wrtout(std_out, msg) 1239 if (nband_k > 9 ) then 1240 do jj=10,nband_k,9 1241 write(msg, frmt2) (eig_ene(ib)*Ha_eV,ib=jj,MIN(jj+8,nband_k)); call wrtout(std_out, msg) 1242 end do 1243 end if 1244 end if 1245 1246 call ghg_4diag%free(); call gsg_4diag%free(); call proc_1d%free() 1247 1248 ! Now transfer eigvec to the ugb datastructure using 1d grid (block column distribution) 1249 call wrtout(std_out, " Moving to PBLAS block column distribution...") 1250 call cwtime(cpu, wall, gflops, "start") 1251 1252 call ugb%processor%init(comm, grid_dims=[1, nproc]) 1253 ABI_CHECK(block_dist_1d(nband_k, nproc, col_bsize, msg), msg) 1254 call eigvec%cut(h_size, nband_k, ugb%mat, size_blocs=[h_size, col_bsize], processor=ugb%processor, free=.True.) 1255 call proc_4diag%free() 1256 1257 ABI_MALLOC(eig_k, (nband_k)) 1258 eig_k(:) = eig_ene(1:nband_k) 1259 1260 ugb%istwf_k = istwf_k 1261 ugb%nspinor = nspinor 1262 ugb%npw_k = npw_k 1263 ugb%npwsp = npwsp 1264 ugb%nband_k = nband_k 1265 ugb%comm => ugb%mat%processor%comm 1266 1267 ugb%my_bstart = ugb%mat%loc2gcol(1) 1268 ugb%my_bstop = ugb%mat%loc2gcol(ugb%mat%sizeb_local(2)) 1269 ugb%my_nband = ugb%my_bstop - ugb%my_bstart + 1 1270 1271 if (ugb%my_nband > 0) then 1272 call c_f_pointer(c_loc(ugb%mat%buffer_cplx), ugb%cg_k, shape=[2, npwsp, ugb%my_nband]) 1273 else 1274 ugb%my_nband = 0 1275 ugb%cg_k => null() 1276 end if 1277 1278 call xmpi_min(ugb%my_nband, min_my_nband, comm, ierr) 1279 ugb%has_idle_procs = min_my_nband == 0 1280 1281 if (psps%usepaw == 1 .and. ugb%my_nband > 0) then 1282 ! Calculate <Proj_i|Cnk> from output eigenstates. Note array allocated with ugb%my_nband 1283 ABI_MALLOC(ugb%cprj_k, (cryst%natom, nspinor * ugb%my_nband)) 1284 call pawcprj_alloc(ugb%cprj_k, 0, gs_hamk%dimcprj) 1285 idir = 0; cprj_choice = 1 ! Only projected wave functions. 1286 1287 do my_ib=1,ugb%my_nband 1288 ibs1 = nspinor * (my_ib - 1) + 1 1289 call getcprj(cprj_choice, 0, ugb%cg_k(:,:,my_ib), ugb%cprj_k(:,ibs1), & 1290 gs_hamk%ffnl_k, idir, gs_hamk%indlmn, gs_hamk%istwf_k, gs_hamk%kg_k, & 1291 gs_hamk%kpg_k, gs_hamk%kpt_k, gs_hamk%lmnmax, gs_hamk%mgfft, mpi_enreg_seq, & 1292 gs_hamk%natom, gs_hamk%nattyp, gs_hamk%ngfft, gs_hamk%nloalg, gs_hamk%npw_k, gs_hamk%nspinor, & 1293 gs_hamk%ntypat, gs_hamk%phkxred, gs_hamk%ph1d, gs_hamk%ph3d_k, gs_hamk%ucvol, gs_hamk%useylm) 1294 end do 1295 1296 ! Reorder the cprj (order is now the same as in the input file) 1297 call pawcprj_reorder(ugb%cprj_k, gs_hamk%atindx1) 1298 end if ! usepaw 1299 1300 call cwtime_report(" block column distribution", cpu, wall, gflops) 1301 1302 ! Free memory. 1303 ABI_FREE(eig_ene) 1304 ABI_FREE(kpg_k) 1305 ABI_FREE(ph3d) 1306 ABI_FREE(ffnl) 1307 call destroy_mpi_enreg(mpi_enreg_seq) 1308 call gs_hamk%free() 1309 1310 call timab(1919, 2, tsec) 1311 1312 end subroutine ugb_from_diago
m_ksdiago/ugb_t [ Types ]
[ Top ] [ m_ksdiago ] [ Types ]
NAME
ugb_t
FUNCTION
Stores the wavefunctions for given (k-point, spin) in a PBLAS matrix distributed over bands (columns)
SOURCE
77 type, public :: ugb_t 78 79 integer :: istwf_k = -1 80 ! Storage mode of cg_k 81 82 integer :: nspinor = -1 83 ! Number of spinors 84 85 integer :: npw_k = -1 86 ! Number of planewaves. 87 88 integer :: npwsp = -1 89 ! nnpw_k * nspinor 90 91 integer :: nband_k = - 1 92 ! Total number of bands (global) 93 94 !integer :: usepaw 95 96 integer :: my_bstart = -1, my_bstop = - 1, my_nband = - 1 97 ! 1) Initial band 98 ! 2) Last band 99 ! 3) Number of bands treated by this proc. 0 if idle proc 100 101 logical :: has_idle_procs 102 ! True if there are procs in comm who don't own any column. 103 104 integer,pointer :: comm 105 ! pointer to MPI communicator in mat 106 107 type(processor_scalapack) :: processor 108 109 type(matrix_scalapack) :: mat 110 ! PBLAS matrix with MPI-distributed Fourier components 111 ! Local buffer: (2, npwsp * my_nband) 112 ! Global matrix: (npwsp, nband_k) 113 114 integer, allocatable :: kg_k(:,:) 115 ! (3, npw_k) 116 ! G-vectors in reduced coordinates. 117 118 real(dp), contiguous, pointer :: cg_k(:,:,:) 119 ! (2, npwsp * my_nband) 120 ! pointer to mat%buffer_cplx 121 122 type(pawcprj_type),allocatable :: cprj_k(:,:) 123 ! (natom, nspinor * my_nband)) 124 ! PAW projections ordered according to natom and NOT according to typat. 125 ! Note my_nband 126 127 contains 128 129 procedure :: from_diago => ugb_from_diago 130 ! Build object by direct diagonalization of the KS Hamiltonian 131 132 !procedure :: from_wfk => ugb_from_wfk 133 ! Build object from WFK file 134 135 procedure :: free => ugb_free 136 ! Free memory. 137 138 procedure :: print => ugb_print 139 ! Print info on object. 140 141 procedure :: collect_cprj => ugb_collect_cprj 142 ! Collect a subset of PAW cprj on all processors. 143 144 end type ugb_t
m_slk/ugb_collect_cprj [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
ugb_collect_cprj
FUNCTION
This is a collective routine that returns in `out_cprj` the PAW projections for `nb` bands starting at `band_start` NB: `out_cprj` is supposed to be allocated in the parent
SOURCE
1399 subroutine ugb_collect_cprj(ugb, nspinor, nb, band_start, out_cprj) 1400 1401 !Arguments ------------------------------------ 1402 class(ugb_t),intent(in) :: ugb 1403 integer,intent(in) :: nspinor, nb, band_start 1404 type(pawcprj_type),intent(inout) :: out_cprj(:,:) 1405 1406 !Local variables------------------------------- 1407 integer :: ierr, my_ibs, out_ibs, band, cnt 1408 1409 ! ************************************************************************* 1410 1411 ABI_CHECK_IEQ(size(ugb%cprj_k, dim=1), size(out_cprj, dim=1), "size1 should be the same") 1412 ABI_CHECK_IGEQ(size(out_cprj, dim=2), nb*nspinor, "size2 too small!") 1413 1414 ! TODO: Numb algorithm based on xmpi_sum. Might be optimized. 1415 call pawcprj_set_zero(out_cprj) 1416 1417 cnt = nspinor - 1 1418 do band=band_start, band_start+nb-1 1419 if (band >= ugb%my_bstart .and. band <= ugb%my_bstop) then 1420 my_ibs = 1 + (band - ugb%my_bstart) * nspinor 1421 out_ibs = 1 + (band - band_start) * nspinor 1422 call pawcprj_copy(ugb%cprj_k(:,my_ibs:my_ibs+cnt), out_cprj(:,out_ibs:out_ibs+cnt)) 1423 end if 1424 end do 1425 1426 call pawcprj_mpi_sum(out_cprj, ugb%comm, ierr) 1427 1428 end subroutine ugb_collect_cprj