TABLE OF CONTENTS
- ABINIT/cgq_builder
- ABINIT/e_eigen
- ABINIT/m_vtorho
- ABINIT/vtorho
- ABINIT/wvl_comm_eigen
- ABINIT/wvl_nscf_loop
- ABINIT/wvl_nscf_loop_bigdft
- ABINIT/wvl_occ
- ABINIT/wvl_occ_bigdft
ABINIT/cgq_builder [ Functions ]
NAME
cgq_builder
FUNCTION
This routine locates cgq for efield calculations, especially for parallel case
INPUTS
berryflag = logical flag determining use of electric field variables cg(2,mcg)=planewave coefficients of wavefunctions. dtset <type(dataset_type)>=all input variables for this dataset ikpt=index of current k kpt ikpt_loc=index of k point on current processor (see vtorho.F90) isspol=value of spin polarization currently treated me_distrb=current value from spaceComm_distrb (see vtorho.F90) mcg=size of wave-functions array (cg) =mpw*nspinor*mband*mkmem*nsppol mcgq=size of cgq array (see vtorho.F90) mkgq=size of pwnsfacq array (see vtorho.F90) my_nspinor=nspinor value determined by current // set up nband_k=number of bands at each k point nproc_distrb=nproc from spaceComm_distrb (see vtorho.F90) npwarr(nkpt)=number of planewaves in basis at this k point pwnsfac(2,pwind_alloc) = phase factors for non-symmorphic translations (see initberry.f) pwind_alloc = first dimension of pwind spaceComm_distrb=comm_cell from mpi_enreg
OUTPUT
cgq(2,mcgq)=planewave coefficients of wavenfunctions adjacent to cg at ikpt pwnsfacq(2,mkgq)=phase factors for non-symmorphic translations for cg's adjacent to cg(ikpt)
SIDE EFFECTS
Input/Output dtefield <type(efield_type)> = efield variables mpi_enreg=information about MPI parallelization
TODO
NOTES
SOURCE
2524 subroutine cgq_builder(berryflag,cg,cgq,dtefield,dtset,ikpt,ikpt_loc,isppol,mcg,mcgq,& 2525 & me_distrb,mkgq,mpi_enreg,my_nspinor,nband_k,nproc_distrb,& 2526 & npwarr,pwnsfac,pwnsfacq,pwind_alloc,spaceComm_distrb) 2527 2528 !Arguments ------------------------------------ 2529 integer,intent(in) :: ikpt,ikpt_loc,isppol,me_distrb,mcg,mcgq,mkgq,my_nspinor,nband_k 2530 integer,intent(in) :: nproc_distrb,pwind_alloc,spaceComm_distrb 2531 logical,intent(in) :: berryflag 2532 type(dataset_type), intent(in) :: dtset 2533 type(efield_type), intent(inout) :: dtefield 2534 type(MPI_type), intent(in) :: mpi_enreg 2535 !arrays 2536 integer,intent(in) :: npwarr(dtset%nkpt) 2537 real(dp),intent(in) :: cg(2,mcg),pwnsfac(2,pwind_alloc) 2538 real(dp),intent(out) :: cgq(2,mcgq),pwnsfacq(2,mkgq) 2539 2540 !Local variables ------------------------- 2541 !scalars 2542 integer :: count,count1,icg1,icg2,dest,his_source 2543 integer :: idir,ierr,ifor,ikg1,ikg2,ikptf,ikpt1f,ikpt1i 2544 integer :: jkpt,jkpt1i,jkptf,jkpt1f,jsppol,my_source,npw_k1,tag 2545 !arrays 2546 integer,allocatable :: flag_send(:,:), flag_receive(:) 2547 real(dp) :: tsec(2) 2548 real(dp),allocatable :: buffer(:,:) 2549 2550 ! ************************************************************************* 2551 2552 if (mcgq==0.or.mkgq==0) return 2553 2554 call timab(983,1,tsec) 2555 2556 !Test compatbility of berryflag 2557 if (berryflag) then 2558 ABI_MALLOC(flag_send,(0:nproc_distrb-1,dtefield%fnkpt)) 2559 end if 2560 ABI_MALLOC(flag_receive,(dtset%nkpt)) 2561 flag_send(:,:) = 0 2562 flag_receive(:) = 0 2563 2564 if (berryflag) ikptf = dtefield%i2fbz(ikpt) 2565 2566 do idir = 1, 3 2567 2568 ! skip idir values for which efield_dot(idir) = 0 2569 if (berryflag .and. abs(dtefield%efield_dot(idir)) < tol12 ) cycle 2570 2571 do ifor = 1, 2 2572 2573 if(berryflag) then 2574 dtefield%sflag(:,ikpt + dtset%nkpt*(isppol - 1),ifor,idir) = 0 2575 ikpt1f = dtefield%ikpt_dk(ikptf,ifor,idir) 2576 ikpt1i = dtefield%indkk_f2ibz(ikpt1f,1) 2577 end if 2578 2579 npw_k1 = npwarr(ikpt1i) 2580 count = npw_k1*my_nspinor*nband_k 2581 my_source = mpi_enreg%proc_distrb(ikpt1i,1,isppol) 2582 2583 do dest = 0, nproc_distrb-1 2584 2585 if ((dest==me_distrb).and.(ikpt_loc <= dtset%mkmem)) then 2586 ! I am dest and have something to do 2587 2588 if ( my_source == me_distrb ) then 2589 ! I am destination and source 2590 2591 if(berryflag) then 2592 ikg1 = dtefield%fkgindex(ikpt1f) 2593 ikg2 = dtefield%cgqindex(3,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt) 2594 icg1 = dtefield%cgindex(ikpt1i,isppol) 2595 icg2 = dtefield%cgqindex(2,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt) 2596 end if 2597 2598 pwnsfacq(:,ikg2 + 1:ikg2 + npw_k1) = pwnsfac(:,ikg1 + 1:ikg1 + npw_k1) 2599 cgq(:,icg2 + 1:icg2 + count) = cg(:,icg1 + 1:icg1 + count) 2600 2601 else ! I am the destination but not the source -> receive 2602 ! receive pwnsfacq 2603 if(berryflag) then 2604 tag = ikpt1f + (isppol - 1)*dtefield%fnkpt 2605 ikg1 = dtefield%cgqindex(3,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt) 2606 end if 2607 ABI_MALLOC(buffer,(2,npw_k1)) 2608 call xmpi_recv(buffer,my_source,tag,spaceComm_distrb,ierr) 2609 pwnsfacq(:,ikg1+1:ikg1+npw_k1) = buffer(:,1:npw_k1) 2610 ABI_FREE(buffer) 2611 2612 ! receive cgq if necessary 2613 if(flag_receive(ikpt1i) == 0) then 2614 ABI_MALLOC(buffer,(2,count)) 2615 tag = ikpt1i + (isppol - 1)*dtset%nkpt 2616 call xmpi_recv(buffer,my_source,tag,spaceComm_distrb,ierr) 2617 if(berryflag) icg1 = dtefield%cgqindex(2,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt) 2618 cgq(:,icg1+1:icg1+count) = buffer(:,1:count) 2619 ABI_FREE(buffer) 2620 flag_receive(ikpt1i) = 1 2621 end if ! end if flag_receive == 0 2622 end if ! end tasks if I am the destination 2623 2624 else if (ikpt_loc <= mpi_enreg%mkmem(dest)) then ! dest != me and the dest has a k-point to treat 2625 2626 ! jkpt is the kpt which is being treated by dest (in ibz) 2627 ! jsppol is his isppol 2628 jkpt = mpi_enreg%kpt_loc2ibz_sp(dest, ikpt_loc,1) 2629 jsppol = mpi_enreg%kpt_loc2ibz_sp(dest, ikpt_loc,2) 2630 2631 if(jkpt > 0 .and. jsppol > 0) then 2632 2633 if(berryflag) then 2634 jkptf = dtefield%i2fbz(jkpt) 2635 jkpt1f = dtefield%ikpt_dk(jkptf,ifor,idir) 2636 jkpt1i = dtefield%indkk_f2ibz(jkpt1f,1) 2637 end if 2638 his_source = mpi_enreg%proc_distrb(jkpt1i,1,jsppol) 2639 2640 if (his_source == me_distrb) then 2641 2642 ! send 2643 ! pwnsfacq 2644 if(berryflag) then 2645 ikg1 = dtefield%fkgindex(jkpt1f) 2646 tag = jkpt1f + (jsppol - 1)*dtefield%fnkpt 2647 end if 2648 count1 = npwarr(jkpt1i) 2649 ABI_MALLOC(buffer,(2,count1)) 2650 buffer(:,1:count1) = pwnsfac(:,ikg1+1:ikg1+count1) 2651 call xmpi_send(buffer,dest,tag,spaceComm_distrb,ierr) 2652 ABI_FREE(buffer) 2653 2654 ! send cgq if necessary 2655 if(flag_send(dest, jkpt1i)==0) then 2656 if(berryflag) icg1 = dtefield%cgindex(jkpt1i,jsppol) 2657 tag = jkpt1i + (jsppol - 1)*dtset%nkpt 2658 count1 = npwarr(jkpt1i)*nband_k*my_nspinor 2659 ABI_MALLOC(buffer,(2,count1)) 2660 buffer(:,1:count1) = cg(:,icg1+1:icg1+count1) 2661 call xmpi_send(buffer,dest,tag,spaceComm_distrb,ierr) 2662 ABI_FREE(buffer) 2663 flag_send(dest, jkpt1i)=1 2664 end if ! if send cgq 2665 2666 end if ! end check that his_source == me 2667 2668 end if ! end check on jkpt > 0 and jsppol > 0 2669 2670 end if ! end check on me = dest else if me != dest 2671 2672 end do ! end loop over dest = 0, nproc-1 2673 2674 end do !end loop over ifor 2675 2676 end do !end loop over idir 2677 2678 call timab(983,2,tsec) 2679 2680 ABI_FREE(flag_send) 2681 ABI_FREE(flag_receive) 2682 2683 end subroutine cgq_builder
ABINIT/e_eigen [ Functions ]
NAME
e_eigen
FUNCTION
Computes eigenvalues energy from eigen, occ, kpt, wtk
INPUTS
eigen(nkpt*nsppol)=eigenvalues mband= maximum number of bands nband(nkpt*nsppol)= number of bands for each k-point and spin nkpt= number of k-points nsppol= number of spin polarization occ(mband*nkpt*nsppol)=occupations wtk(nkpt)= k-point weights
OUTPUT
e_eigenvalues= eigenvalues energy
SOURCE
2291 subroutine e_eigen(eigen,e_eigenvalues,mband,nband,nkpt,nsppol,occ,wtk) 2292 2293 !Arguments ------------------------------------ 2294 integer , intent(in) :: mband,nkpt,nsppol 2295 integer , intent(in) :: nband(nkpt*nsppol) 2296 real(dp) , intent(in) :: eigen(mband*nkpt*nsppol) 2297 real(dp) , intent(in) :: occ(mband*nkpt*nsppol) 2298 real(dp) , intent(in) :: wtk(nkpt) 2299 real(dp) , intent(out) :: e_eigenvalues 2300 2301 !Local variables------------------------------- 2302 integer :: ib,iband,ii,ikpt,isppol,nband_k 2303 real(dp) :: wtk_k 2304 2305 ! ************************************************************************* 2306 2307 DBG_ENTER("COLL") 2308 ii=0;ib=0 2309 do isppol=1,nsppol 2310 do ikpt=1,nkpt 2311 ii=ii+1 2312 nband_k=nband(ii) ; wtk_k=wtk(ii) 2313 do iband=1,nband_k 2314 ib=ib+1 2315 if(abs(occ(ib)) > tol8) then 2316 e_eigenvalues = e_eigenvalues + wtk_k*occ(ib)*eigen(ib) 2317 end if 2318 end do 2319 end do 2320 end do 2321 2322 DBG_EXIT("COLL") 2323 2324 end subroutine e_eigen
ABINIT/m_vtorho [ Modules ]
NAME
m_vtorho
FUNCTION
COPYRIGHT
Copyright (C) 1998-2024 ABINIT group (DCA, XG, GMR, MF, AR, MM, MT, FJ, MB, MT, TR) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
16 #if defined HAVE_CONFIG_H 17 #include "config.h" 18 #endif 19 20 #include "abi_common.h" 21 22 ! nvtx related macro definition 23 #include "nvtx_macros.h" 24 25 module m_vtorho 26 27 use iso_fortran_env, only : int32,int64,real32,real64 28 use defs_basis 29 use defs_wvltypes 30 use m_abicore 31 use m_xmpi 32 use m_ab7_mixing 33 use m_errors 34 use m_wffile 35 use m_efield 36 use m_cgtools 37 use m_gemm_nonlop 38 use m_gemm_nonlop_gpu 39 use m_gemm_nonlop_ompgpu 40 use m_hdr 41 use m_dtset 42 use m_dtfil 43 use m_extfpmd 44 use m_ompgpu_utils 45 46 use defs_datatypes, only : pseudopotential_type 47 use defs_abitypes, only : MPI_type 48 use m_fstrings, only : sjoin, itoa 49 use m_time, only : timab 50 use m_geometry, only : xred2xcart 51 use m_occ, only : newocc 52 use m_pawang, only : pawang_type 53 use m_pawtab, only : pawtab_type 54 use m_paw_ij, only : paw_ij_type 55 use m_pawfgrtab, only : pawfgrtab_type 56 use m_pawrhoij, only : pawrhoij_type, pawrhoij_alloc, pawrhoij_free, pawrhoij_io, pawrhoij_inquire_dim 57 use m_pawcprj, only : pawcprj_type, pawcprj_alloc, pawcprj_free, pawcprj_getdim 58 use m_pawfgr, only : pawfgr_type 59 use m_energies, only : energies_type 60 use m_hamiltonian, only : init_hamiltonian, gs_hamiltonian_type, gspot_transgrid_and_pack 61 use m_bandfft_kpt, only : bandfft_kpt, bandfft_kpt_type, bandfft_kpt_set_ikpt, & 62 bandfft_kpt_savetabs, bandfft_kpt_restoretabs, prep_bandfft_tabs 63 use m_electronpositron, only : electronpositron_type,electronpositron_calctype 64 use m_paw_dmft, only : paw_dmft_type,init_dmft,destroy_dmft,print_dmft,saveocc_dmft 65 use m_paw_correlations, only : setnoccmmp 66 use m_paw_occupancies, only : pawmkrhoij 67 use m_paw_mkrho, only : pawmkrho 68 use m_crystal, only : crystal_init, crystal_t 69 use m_oper, only : oper_type,init_oper,destroy_oper 70 use m_io_tools, only : flush_unit 71 use m_abi2big, only : wvl_occ_abi2big, wvl_rho_abi2big, wvl_occopt_abi2big, wvl_eigen_abi2big 72 use m_fock, only : fock_type, fock_ACE_type, fock_updateikpt, fock_calc_ene 73 use m_invovl, only : make_invovl 74 use m_tddft, only : tddft 75 use m_kg, only : mkkin, mkkpg 76 use m_suscep_stat, only : suscep_stat 77 use m_fft, only : fftpac 78 use m_spacepar, only : symrhg 79 use m_vtowfk, only : vtowfk 80 use m_mkrho, only : mkrho, prtrhomxmn 81 use m_mkffnl, only : mkffnl 82 use m_mpinfo, only : proc_distrb_cycle 83 use m_common, only : prteigrs 84 use m_dmft, only : dmft_solve 85 use m_datafordmft, only : datafordmft 86 use m_fourier_interpol, only : transgrid 87 use m_cgprj, only : ctocprj 88 use m_wvl_rho, only : wvl_mkrho 89 use m_wvl_psi, only : wvl_hpsitopsi, wvl_psitohpsi, wvl_nl_gradient 90 use m_inwffil, only : cg_from_atoms 91 use m_chebfiwf, only : chebfiwf2_blocksize 92 93 #if defined HAVE_GPU_CUDA 94 use m_manage_cuda 95 #endif 96 97 #if defined HAVE_YAKL 98 use gator_mod 99 #endif 100 101 #if defined HAVE_BIGDFT 102 use BigDFT_API, only : last_orthon, evaltoocc, write_energies, eigensystem_info 103 #endif 104 105 #if defined(HAVE_GPU) && defined(HAVE_GPU_MARKERS) 106 use m_nvtx_data 107 #endif 108 109 #ifdef HAVE_FC_ISO_C_BINDING 110 use, intrinsic :: iso_c_binding, only : c_int64_t 111 #endif 112 113 114 implicit none 115 116 private
ABINIT/vtorho [ Functions ]
NAME
vtorho
FUNCTION
This routine compute the new density from a fixed potential (vtrial) but might also simply compute eigenvectors and eigenvalues. The main part of it is a wf update over all k points.
INPUTS
itimes(2)=itime array, contain itime=itimes(1) and itimimage_gstate=itimes(2) from outer loops afford=used to dimension susmat atindx(natom)=index table for atoms (see gstate.f) atindx1(natom)=index table for atoms, inverse of atindx (see gstate.f) cpus= cpu time limit in seconds dbl_nnsclo=if 1, will double the value of dtset%nnsclo dielop= if positive, the dielectric matrix must be computed. dielstrt=number of the step at which the dielectric preconditioning begins. dmatpawu= fixed occupation matrix of correlated orbitals (DFT+U or DMFT only) dtefield <type(efield_type)> = variables related to Berry phase calculations (see initberry.f) dtfil <type(datafiles_type)>=variables related to files dtset <type(dataset_type)>=all input variables for this dataset | mband=maximum number of bands | mgfft=maximum size of 1D FFTs | mkmem =number of k points treated by this node. | mpw=maximum dimensioned size of npw | nfft=(effective) number of FFT grid points (for this processor) | nkpt=number of k points. | nspden=number of spin-density components | nsppol=1 for unpolarized, 2 for spin-polarized | nsym=number of symmetry elements in space group | typat= array of types of the natoms electronpositron <type(electronpositron_type)>=quantities for the electron-positron annihilation etotal=total energy (Ha) - only needed for tddft fock <type(fock_type)>= quantities to calculate Fock exact exchange gbound_diel(2*mgfftdiel+8,2)=G sphere boundary for the dielectric matrix gmet(3,3)=reciprocal space metric tensor in bohr**-2. gprimd(3,3)=dimensional reciprocal space primitive translations (3x3 tensor) and grads wrt atomic coordinates (3*natom) gsqcut=cutoff on (k+G)^2 (bohr^-2) hdr <type(hdr_type)>=the header of wf, den and pot files indsym(4,nsym,natom)=indirect indexing array for atom labels irrzon(nfft**(1-1/nsym),2,(nspden/nsppol)-3*(nspden/4))=irreducible zone data irrzondiel(nfftdiel**(1-1/nsym),2,(nspden/nsppol)-3*(nspden/4))=irreducible zone data for diel matrix nfft**(1-1/nsym) is 1 if nsym==1, and nfft otherwise istep=index of the number of steps in the routine scfcv istep_mix=index of the number of steps for the SCF mixing (can be <istep) kg(3,mpw*mkmem)=reduced planewave coordinates. kg_diel(3,npwdiel)=reduced planewave coordinates for the dielectric matrix. kxc(nfftf,nkxc)=exchange-correlation kernel, needed only if nkxc/=0 . lmax_diel=1+max. value of l angular momentum used for dielectric matrix mcg=size of wave-functions array (cg) =mpw*nspinor*mband*mkmem*nsppol mcprj=size of projected wave-functions array (cprj) =nspinor*mband*mkmem*nsppol mgfftdiel=maximum size of 1D FFTs, for the computation of the dielectric matrix mpi_enreg=information about MPI parallelization my_natom=number of atoms treated by current processor natom=number of atoms in cell. nattyp(ntypat)= # atoms of each type. nfftf= -PAW ONLY- number of FFT grid points for the fine grid (nfftf=nfft for norm-conserving potential runs) nfftdiel=number of fft grid points for the computation of the diel matrix ngfftdiel(18)=contain all needed information about 3D FFT, for dielectric matrix, see ~abinit/doc/variables/vargs.htm#ngfft nkxc=second dimension of the array kxc, see rhotoxc.f for a description npwarr(nkpt)=number of planewaves in basis at this k point npwdiel=size of the susmat array. ntypat=number of types of atoms in unit cell. optforces=option for the computation of forces (0: no force;1: forces) optres=0: the new value of the density is computed in place of the input value 1: only the density residual is computed ; the input density is kept paw_dmft <type(paw_dmft_type)>= paw+dmft related data paw_ij(my_natom*usepaw) <type(paw_ij_type)>=paw arrays given on (i,j) channels pawang <type(pawang)>=paw angular mesh and related data pawfgr <type(pawfgr_type)>=fine grid parameters and related data pawfgrtab(my_natom*usepaw) <type(pawfgrtab_type)>=atomic data given on fine rectangular grid pawtab(ntypat*usepaw) <type(pawtab_type)>=paw tabulated starting data phnons(2,nfft**(1-1/nsym),(nspden/nsppol)-3*(nspden/4))=nonsymmorphic translation phases nfft**(1-1/nsym) is 1 if nsym==1, and nfft otherwise phnonsdiel(2,nfft**(1-1/nsym),(nspden/nsppol)-3*(nspden/4))=nonsymmorphic translation phases, for diel matr nfft**(1-1/nsym) is 1 if nsym==1, and nfft otherwise ph1d(2,3*(2*mgfft+1)*natom)=one-dimensional structure factor information ph1ddiel(2,3*(2*mgfftdiel+1)*natom*usepaw)=one-dimensional structure factor information for the dielectric matrix psps <type(pseudopotential_type)>=variables related to pseudopotentials pwind(pwind_alloc,2,3) = array used to compute the overlap matrix smat between k-points (see initberry.f) pwind_alloc = first dimension of pwind pwnsfac(2,pwind_alloc) = phase factors for non-symmorphic translations (see initberry.f) rmet(3,3)=real space metric (bohr**2) rprimd(3,3)=dimensional primitive vectors symrec(3,3,nsym)=symmetry operations in reciprocal space ucvol=unit cell volume in bohr**3. usecprj=1 if cprj datastructure is stored in memory wffnew,unit numbers for wf disk files. with_vectornd = 1 if vectornd allocated vectornd(with_vectornd*nfftf,nspden,3)=nuclear dipole moment vector potential vtrial(nfftf,nspden)=INPUT potential Vtrial(r). [vxctau(nfft,nspden,4*usekden)]=(only for meta-GGA): derivative of XC energy density with respect to kinetic energy density (depsxcdtau). The arrays vxctau contains also the gradient of vxctau (gvxctau) in vxctau(:,:,2:4) xred(3,natom)=reduced dimensionless atomic coordinates ylm(mpw*mkmem,mpsang*mpsang*useylm)= real spherical harmonics for each G and k point ylmgr(mpw*mkmem,3,mpsang*mpsang*useylm)= gradients of real spherical harmonics ylmdiel(npwdiel,lmax_diel**2)= real spherical harmonics for each G and k point for the dielectric matrix
OUTPUT
compch_fft=-PAW only- compensation charge inside spheres computed over fine fft grid dphase(3) : dphase(idir) = accumulated change in the string-averaged Zak phase along the idir-th direction caused by the update of all the occupied Bloch states at all the k-points (only if finite electric field) eigen(mband*nkpt*nsppol)=array for holding eigenvalues (hartree) resid(mband*nkpt*nsppol)=residuals for each band over all k points. residm=maximum value from resid array (except for nbdbuf highest bands) susmat(2,npwdiel*afford,nspden,npwdiel,nspden)= the susceptibility (or density-density response) matrix in reciprocal space === if optforces>0 === grnl(3*natom)=stores grads of nonlocal energy wrt length scales ==== if optres==1 nres2=square of the norm of the residual nvresid(nfftf,nspden)=density residual ==== if psps%usepaw==1 cprj(natom,mcprj*usecprj)= wave functions projected with non-local projectors: cprj(n,k,i)=<p_i|Cnk> where p_i is a non-local projector. nhat(nfftf,nspden*psps%usepaw)=compensation charge density on rectangular grid in real space
SIDE EFFECTS
cg(2,mpw*dtset%nspinor*mband*mkmem*nsppol)=planewave coefficients of wavefunctions. At output contains updated wavefunctions coefficients; if nkpt>1, these are kept in a disk file. energies <type(energies_type)>=storage for energies computed here : | e_eigenvalues=Sum of the eigenvalues - Band energy (Hartree) | e_kinetic=kinetic energy part of total energy | e_nlpsp_vfock=nonlocal psp + potential Fock ACE part of total energy | e_fermie=fermi energy (Hartree) occ(mband*nkpt*nsppol)=occupation number for each band for each k. (input if insulator - occopt<3 - ; output if metallic) pawrhoij(my_natom*usepaw) <type(pawrhoij_type)>= paw rhoij occupancies and related data rhog(2,nfftf)=Fourier transform of total electron density rhor(nfftf,nspden)=total electron density (el/bohr**3) taug(2,nfftf*dtset%usekden)=array for Fourier transform of kinetic energy density taur(nfftf,nspden*dtset%usekden)=array for kinetic energy density tauresid(nfftf,nspden*dtset%usekden)=array for kinetic energy density residual wvl <type(wvl_data)>=wavelets structures in case of wavelets basis. ==== if (usepaw==1) ==== cprj(natom,mcprj*usecprj)= wave functions projected with non-local projectors: cprj(n,k,i)=<p_i|Cnk> where p_i is a non-local projector. rmm_diis_status= Status of the RMM-DIIS eigensolver. See m_rmm_diis
NOTES
Be careful to the meaning of nfft (size of FFT grids): - In case of norm-conserving calculations the FFT grid is the usual FFT grid. - In case of PAW calculations: Two FFT grids are used; one with nfft points (coarse grid) for the computation of wave functions ; one with nfftf points (fine grid) for the computation of total density. The total electronic density (rhor,rhog) is divided into two terms: - The density related to WFs =Sum[Psi**2] - The compensation density (nhat) - only in PAW The parallelisation needed for the electric field should be made an independent subroutine, so that this routine could be put back in the 95_drive directory.
SOURCE
295 subroutine vtorho(afford,atindx,atindx1,cg,compch_fft,cprj,cpus,dbl_nnsclo,& 296 & dielop,dielstrt,dmatpawu,dphase,dtefield,dtfil,dtset,& 297 & eigen,electronpositron,energies,etotal,gbound_diel,& 298 & gmet,gprimd,grnl,gsqcut,hdr,extfpmd,indsym,irrzon,irrzondiel,& 299 & istep,istep_mix,itimes,kg,kg_diel,kxc,lmax_diel,mcg,mcprj,mgfftdiel,mpi_enreg,& 300 & my_natom,natom,nattyp,nfftf,nfftdiel,ngfftdiel,nhat,nkxc,& 301 & npwarr,npwdiel,nres2,ntypat,nvresid,occ,optforces,& 302 & optres,paw_dmft,paw_ij,pawang,pawfgr,pawfgrtab,pawrhoij,pawtab,& 303 & phnons,phnonsdiel,ph1d,ph1ddiel,psps,fock,& 304 & pwind,pwind_alloc,pwnsfac,resid,residm,rhog,rhor,& 305 & rmet,rprimd,susmat,symrec,taug,taur,tauresid,& 306 & ucvol,usecprj,wffnew,with_vectornd,vectornd,vtrial,vxctau,wvl,xred,& 307 & ylm,ylmgr,ylmdiel, rmm_diis_status) 308 309 !Arguments ------------------------------- 310 !scalars 311 integer, intent(in) :: afford,dbl_nnsclo,dielop,dielstrt,istep,istep_mix,lmax_diel,mcg,mcprj,mgfftdiel 312 integer, intent(in) :: my_natom,natom,nfftf,nfftdiel,nkxc,npwdiel 313 integer, intent(in) :: ntypat,optforces,optres,pwind_alloc,usecprj,with_vectornd 314 real(dp), intent(in) :: cpus,etotal,gsqcut,ucvol 315 real(dp), intent(out) :: compch_fft,nres2,residm 316 type(MPI_type), intent(inout) :: mpi_enreg 317 type(datafiles_type), intent(in) :: dtfil 318 type(dataset_type), intent(inout) :: dtset 319 type(efield_type), intent(inout) :: dtefield 320 type(electronpositron_type),pointer :: electronpositron 321 type(energies_type), intent(inout) :: energies 322 type(hdr_type), intent(inout) :: hdr 323 type(extfpmd_type),pointer,intent(inout) :: extfpmd 324 type(paw_dmft_type), intent(inout) :: paw_dmft 325 type(pawang_type), intent(in) :: pawang 326 type(pawfgr_type), intent(in) :: pawfgr 327 type(pseudopotential_type), intent(in) :: psps 328 type(fock_type),pointer, intent(inout) :: fock 329 type(wffile_type), intent(inout) :: wffnew 330 type(wvl_data), intent(inout) :: wvl 331 !arrays 332 integer, intent(in) :: atindx(natom),atindx1(natom),gbound_diel(2*mgfftdiel+8,2) 333 integer, intent(in) :: indsym(4,dtset%nsym,natom) 334 integer, intent(in) :: irrzon(dtset%nfft**(1-1/dtset%nsym),2,(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4)) 335 integer, intent(in) :: irrzondiel(nfftdiel**(1-1/dtset%nsym),2,(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4)) 336 integer, intent(in) :: itimes(2) 337 integer, intent(in) :: kg(3,dtset%mpw*dtset%mkmem),kg_diel(3,npwdiel),nattyp(ntypat),ngfftdiel(18),npwarr(dtset%nkpt) 338 integer, intent(in) :: pwind(pwind_alloc,2,3),symrec(3,3,dtset%nsym) 339 integer, intent(inout) :: rmm_diis_status(2, dtset%nkpt, dtset%nsppol) 340 real(dp), intent(in) :: dmatpawu(:,:,:,:),gmet(3,3),gprimd(3,3),ph1d(2,3*(2*dtset%mgfft+1)*natom) 341 real(dp), intent(in) :: ph1ddiel(2,(3*(2*mgfftdiel+1)*natom)*psps%usepaw) 342 real(dp), intent(in) :: phnons(2,dtset%nfft**(1-1/dtset%nsym),(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4)) 343 real(dp), intent(in) :: phnonsdiel(2,nfftdiel**(1-1/dtset%nsym),(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4)) 344 real(dp), intent(in) :: pwnsfac(2,pwind_alloc),rmet(3,3),rprimd(3,3) 345 real(dp), intent(inout) :: vectornd(with_vectornd*nfftf,dtset%nspden,3),vtrial(nfftf,dtset%nspden) 346 real(dp), intent(inout) :: xred(3,natom) 347 real(dp), intent(in) :: ylm(dtset%mpw*dtset%mkmem,psps%mpsang*psps%mpsang*psps%useylm) 348 real(dp), intent(in) :: ylmgr(dtset%mpw*dtset%mkmem,3,psps%mpsang*psps%mpsang*psps%useylm) 349 real(dp), intent(in) :: ylmdiel(npwdiel,lmax_diel**2) 350 real(dp), intent(out) :: dphase(3),grnl(3*natom) 351 real(dp), intent(inout) :: eigen(dtset%mband*dtset%nkpt*dtset%nsppol) 352 real(dp), intent(out) :: nhat(nfftf,dtset%nspden*psps%usepaw) 353 real(dp), intent(inout) :: resid(dtset%mband*dtset%nkpt*dtset%nsppol) 354 real(dp), intent(out) :: nvresid(nfftf,dtset%nspden) 355 real(dp), intent(out) :: susmat(2,npwdiel*afford,dtset%nspden,npwdiel,dtset%nspden) 356 real(dp), intent(inout) :: cg(2,mcg) 357 real(dp), intent(inout) :: kxc(nfftf,nkxc),occ(dtset%mband*dtset%nkpt*dtset%nsppol) 358 real(dp), intent(inout) :: rhog(2,nfftf),rhor(nfftf,dtset%nspden) 359 real(dp), intent(inout) :: taug(2,nfftf*dtset%usekden),taur(nfftf,dtset%nspden*dtset%usekden) 360 real(dp), intent(inout) :: tauresid(nfftf,dtset%nspden*dtset%usekden) 361 real(dp), intent(inout),optional :: vxctau(nfftf,dtset%nspden,4*dtset%usekden) 362 type(pawcprj_type),pointer,intent(inout) :: cprj(:,:) 363 type(paw_ij_type),intent(inout) :: paw_ij(my_natom*psps%usepaw) 364 type(pawfgrtab_type),intent(inout) :: pawfgrtab(my_natom*psps%usepaw) 365 type(pawrhoij_type),target,intent(inout) :: pawrhoij(my_natom*psps%usepaw) 366 type(pawtab_type),intent(in) :: pawtab(ntypat*psps%usepaw) 367 368 !Local variables------------------------------- 369 !scalars 370 integer,parameter :: level=111,tim_mkrho=2 371 !integer,save :: nwarning=0 372 integer :: bdtot_index,counter,cplex,cplex_rhoij,dimffnl,enunit,iband,iband1,ibdkpt 373 integer :: ibg,icg,ider,idir,ierr,ifft,ifor,ifor1,ii,ikg,ikpt 374 integer :: ikpt_loc,ikpt1,my_ikpt,ikxc,ilm,imagn,index1,iorder_cprj,ipert,iplex 375 integer :: iscf,ispden,isppol,istwf_k,mband_cprj,mbdkpsp,mb2dkpsp 376 integer :: mcgq,mcprj_local,mcprj_tmp,me_distrb,mkgq,mpi_comm_sphgrid 377 integer :: my_nspinor,n1,n2,n3,n4,n5,n6,nband_eff,nbdbuf_eff !mwarning, 378 integer :: nband_k,nband_cprj_k,nbuf,neglect_pawhat,nfftot,nkpg,nkpt1,nnn,nnsclo_now 379 integer :: nproc_distrb,npw_k,nspden_rhoij,option,prtvol,nblk_gemm_nonlop 380 integer :: spaceComm_distrb,usecprj_local,usefock_ACE,usetimerev 381 logical :: berryflag,computesusmat,fixed_occ,has_vectornd 382 logical :: locc_test,paral_atom,remove_inv,usefock,with_vxctau 383 logical :: do_last_ortho,wvlbigdft=.false. 384 real(dp) :: dmft_dftocc 385 real(dp) :: edmft,ebandlda,ebanddmft,ebandldatot,ekindmft,ekindmft2,ekinlda 386 real(dp) :: min_occ,vxcavg_dum,strsxc(6) 387 character(len=500) :: msg 388 type(bandfft_kpt_type),pointer :: my_bandfft_kpt => null() 389 type(gs_hamiltonian_type) :: gs_hamk 390 !arrays 391 integer(int32), ABI_CONTIGUOUS pointer :: kg_k(:,:) => null() 392 real(dp) :: dielar(7),dphase_k(3),kpoint(3),qpt(3),rhodum(1),tsec(2),ylmgr_dum(0,0,0) 393 real(dp),allocatable :: EigMin(:,:),buffer1(:),buffer2(:),cgq(:,:) 394 real(dp),allocatable :: cgrkxc(:,:),doccde(:) 395 real(dp),allocatable :: dphasek(:,:),ek_k(:),ek_k_nd(:,:,:),eknk(:),eknk_nd(:,:,:,:,:),end_k(:) 396 real(dp),allocatable :: enlx_k(:),enlxnk(:),focknk(:),fockfornk(:,:,:),ffnl(:,:,:,:),grnl_k(:,:), xcart(:,:) 397 real(dp),allocatable :: grnlnk(:,:) 398 399 #if defined HAVE_GPU && defined HAVE_YAKL 400 real(c_double), ABI_CONTIGUOUS pointer :: kinpw(:) => null() 401 real(c_double), ABI_CONTIGUOUS pointer :: eig_k(:) => null() 402 #else 403 real(dp),allocatable :: kinpw(:) 404 real(dp),allocatable :: eig_k(:) 405 #endif 406 407 real(dp),allocatable :: kpg_k(:,:),occ_k(:),ph3d(:,:,:) 408 real(dp),allocatable :: pwnsfacq(:,:) 409 410 #if defined HAVE_GPU && defined HAVE_YAKL 411 real(c_double), ABI_CONTIGUOUS pointer :: resid_k(:) => null() 412 real(c_double), ABI_CONTIGUOUS pointer :: rhoaug(:,:,:,:) => null() 413 #else 414 real(dp),allocatable :: resid_k(:) 415 real(dp),allocatable :: rhoaug(:,:,:,:) 416 #endif 417 418 real(dp),allocatable :: rhowfg(:,:),rhowfr(:,:),tauwfg(:,:),tauwfr(:,:) 419 real(dp),allocatable :: vectornd_pac(:,:,:,:,:) 420 421 #if defined HAVE_GPU && defined HAVE_YAKL 422 real(real64), ABI_CONTIGUOUS pointer :: vlocal(:,:,:,:) => null() 423 #else 424 real(dp), allocatable :: vlocal(:,:,:,:) 425 #endif 426 427 real(dp),allocatable :: vxctaulocal(:,:,:,:,:),ylm_k(:,:),zshift(:) 428 type(pawcprj_type),allocatable :: cprj_tmp(:,:) 429 type(pawcprj_type),allocatable,target:: cprj_local(:,:) 430 type(oper_type) :: dft_occup 431 type(pawrhoij_type),pointer :: pawrhoij_unsym(:) 432 type(crystal_t) :: cryst_struc 433 integer :: idum1(0),idum3(0,0,0) 434 real(dp) :: rdum2(0,0),rdum4(0,0,0,0) 435 #if defined HAVE_BIGDFT 436 integer :: occopt_bigdft 437 #endif 438 439 #if defined(HAVE_GPU_CUDA) 440 type(gemm_nonlop_gpu_type) :: gemm_nonlop_gpu_obj 441 #endif 442 443 444 ! ********************************************************************* 445 446 DBG_ENTER("COLL") 447 448 !Keep track of total time spent in vtorho 449 call timab(980,1,tsec) 450 call timab(981,1,tsec) 451 452 !Structured debugging if prtvol==-level 453 prtvol=dtset%prtvol 454 455 ! Electric fields: set flag to turn on various behaviors 456 berryflag = any(dtset%berryopt == [4, 14, 6, 16, 7, 17]) 457 458 !If usewvl: wvlbigdft indicates that the BigDFT workflow will be followed 459 wvlbigdft=(dtset%usewvl==1.and.dtset%wvl_bigdft_comp==1) 460 461 !Several inits 462 n1=dtset%ngfft(1) ; n2=dtset%ngfft(2) ; n3=dtset%ngfft(3) 463 n4=dtset%ngfft(4) ; n5=dtset%ngfft(5) ; n6=dtset%ngfft(6) 464 usecprj_local=0;if (psps%usepaw==1) usecprj_local=1 465 my_nspinor=max(1,dtset%nspinor/mpi_enreg%nproc_spinor) 466 paral_atom=(my_natom/=natom) 467 compch_fft=-1.d5 468 469 !Check that usekden is not 0 if want to use vxctau 470 with_vxctau = (present(vxctau).and.dtset%usekden/=0) 471 472 !Check that fock is present if want to use fock option 473 usefock = (dtset%usefock==1 .and. associated(fock)) 474 usefock_ACE=0 475 if (usefock) usefock_ACE=fock%fock_common%use_ACE 476 477 !Init MPI 478 spaceComm_distrb=mpi_enreg%comm_cell 479 if (mpi_enreg%paral_kgb==1) spaceComm_distrb=mpi_enreg%comm_kpt 480 if (mpi_enreg%paral_hf ==1) spaceComm_distrb=mpi_enreg%comm_kpt 481 nproc_distrb=xmpi_comm_size(spaceComm_distrb) 482 me_distrb=xmpi_comm_rank(spaceComm_distrb) 483 mpi_comm_sphgrid=mpi_enreg%comm_fft 484 if(dtset%usewvl==1) mpi_comm_sphgrid=mpi_enreg%comm_wvl 485 !if (mpi_enreg%me_img/=0) nwarning=nwarning+1 486 487 !Test size of FFT grids (1 grid in norm-conserving, 2 grids in PAW) 488 if ((psps%usepaw==1.and.pawfgr%nfft/=nfftf).or.(psps%usepaw==0.and.dtset%nfft/=nfftf)) then 489 ABI_BUG('wrong values for nfft, nfftf!') 490 end if 491 492 !Test optforces (to prevent memory overflow) 493 if (optforces/=0.and.optforces/=1) then 494 ABI_BUG(sjoin('wrong value for optforces: ',itoa(optforces))) 495 end if 496 497 iscf=dtset%iscf 498 fixed_occ=(dtset%occopt<3.or.electronpositron_calctype(electronpositron)==1) 499 if(.not. wvlbigdft) then 500 energies%e_eigenvalues = zero 501 energies%e_kinetic = zero 502 energies%e_nucdip = zero 503 energies%e_nlpsp_vfock = zero 504 if (usefock) then 505 energies%e_fock=zero 506 energies%e_fockdc=zero 507 end if 508 grnl(:)=zero 509 if (berryflag) then 510 resid(:) = zero ! JWZ 13 May 2010. resid and eigen need to be fully zeroed each time before use 511 end if 512 ! MG: The previous line is not compatible with the RMM-DIIS since rmm_diis recevies the previous resid_k 513 ! to select the accuracy level. 514 ! For the time being, we set resid to zero if berryflag to avoid breaking the CG solver with E-field 515 ! but it's clear that the treatment of resid should be rationalized and that the previous values should be passed to vtowfk 516 eigen(:) = zero 517 bdtot_index=0 518 ibg=0;icg=0 519 mbdkpsp=dtset%mband*dtset%nkpt*dtset%nsppol 520 if(paw_dmft%use_dmft==1) mb2dkpsp=2*dtset%mband*dtset%mband*dtset%nkpt*dtset%nsppol 521 end if 522 523 if(dtset%usewvl==0) then 524 ABI_MALLOC(eknk,(mbdkpsp)) 525 ABI_MALLOC(enlxnk,(mbdkpsp)) 526 ABI_MALLOC(eknk_nd,(dtset%nsppol,dtset%nkpt,2,dtset%mband,dtset%mband*paw_dmft%use_dmft)) 527 ABI_MALLOC(EigMin,(2,dtset%mband)) 528 ABI_MALLOC(grnlnk,(3*natom,mbdkpsp*optforces)) 529 if (usefock) then 530 ABI_MALLOC(focknk,(mbdkpsp)) 531 focknk=zero 532 if (optforces>0)then 533 ABI_MALLOC(fockfornk,(3,natom,mbdkpsp)) 534 fockfornk=zero 535 end if 536 end if 537 eknk(:)=zero;enlxnk(:)=zero 538 if (optforces>0) grnlnk(:,:)=zero 539 if(paw_dmft%use_dmft==1) eknk_nd=zero 540 end if !usewvl==0 541 542 !Initialize rhor if needed; store old rhor 543 if(iscf>=0 .or. iscf==-3) then 544 if (optres==1) then 545 nvresid=rhor ; tauresid=taur 546 end if 547 ! NC and plane waves 548 if (psps%usepaw==0 .and. dtset%usewvl==0) then 549 rhor=zero ; taur=zero 550 ! PAW 551 else if(psps%usepaw==1) then 552 ABI_MALLOC(rhowfr,(dtset%nfft,dtset%nspden)) 553 ABI_MALLOC(rhowfg,(2,dtset%nfft)) 554 ABI_MALLOC(tauwfr,(dtset%nfft,dtset%nspden*dtset%usekden)) 555 ABI_MALLOC(tauwfg,(2,dtset%nfft*dtset%usekden)) 556 rhowfr(:,:)=zero ; tauwfr(:,:)=zero 557 end if 558 end if 559 560 ! Here we set the max number of non-self-consistent loops nnsclo_now used in vtowfk 561 if (iscf<0) then 562 ! Non self-consistent case 563 nnsclo_now=dtset%nstep 564 else 565 ! Self-consistent case 566 if (dtset%nnsclo>0) then 567 ! Use input variable if specified and > 0 568 nnsclo_now=dtset%nnsclo 569 else if (dtset%nnsclo < 0) then 570 ! imposed during abs(nnsclo) steps 571 nnsclo_now=1 572 if (istep<=abs(dtset%nnsclo)) nnsclo_now=merge(5,dtset%useria,dtset%useria==0) 573 else 574 ! Default branch for self-consistent case. 575 ! Perform 2 NSCF loops for the first two iterations. This is important especially wfs have 576 ! been initialized with random numbers. 577 nnsclo_now = 1 578 if (dtset%usewvl == 0) then 579 ! Plane waves 580 if (istep <= 2 .and. iscf /= 0) nnsclo_now = 2 581 ! MG: I don't understand why we need to perform 2 NSCF loops after the first SCF cycle 582 ! when we are relaxing the structure as the initial density and wavefunctions should be already good enough. 583 ! Here I change the default behavior to avoid the extra loop but only if RMM-DIIS is used. 584 ! XG 20210312 : I prefectly agree with you. This is historical, and should be changed, after testing and update of reference files. 585 if ((itimes(1) > 1 .or. (itimes(2)>1)) .and. dtset%rmm_diis /= 0) nnsclo_now = 1 586 else 587 ! Wavelets 588 if (iscf==0) then 589 nnsclo_now=0 590 else if (istep<=2) then 591 nnsclo_now=3 592 else if (istep<=4) then 593 nnsclo_now=2 594 end if 595 end if 596 end if 597 ! Double the value if required 598 if (dbl_nnsclo==1) nnsclo_now=nnsclo_now*2 599 end if 600 601 if(dtset%wfoptalg==2)nnsclo_now=40 ! UNDER DEVELOPMENT 602 603 if (dtset%prtvol > 0) then 604 write(msg, '(a,i0,a,3(i0,1x))' ) ' vtorho: nnsclo_now = ',nnsclo_now,& 605 ', note that nnsclo, dbl_nnsclo, istep= ',dtset%nnsclo,dbl_nnsclo,istep 606 call wrtout(std_out,msg) 607 else 608 if (nnsclo_now > 1) call wrtout(std_out, sjoin(" Max number of non-self-consistent loops:", itoa(nnsclo_now))) 609 end if 610 611 !==== Initialize most of the Hamiltonian ==== 612 !Allocate all arrays and initialize quantities that do not depend on k and spin. 613 call init_hamiltonian(gs_hamk,psps,pawtab,dtset%nspinor,dtset%nsppol,dtset%nspden,natom,& 614 & dtset%typat,xred,dtset%nfft,dtset%mgfft,dtset%ngfft,rprimd,dtset%nloalg,& 615 & paw_ij=paw_ij,ph1d=ph1d,usecprj=usecprj_local,electronpositron=electronpositron,fock=fock,& 616 & comm_atom=mpi_enreg%comm_atom,mpi_atmtab=mpi_enreg%my_atmtab,mpi_spintab=mpi_enreg%my_isppoltab,& 617 & nucdipmom=dtset%nucdipmom,gpu_option=dtset%gpu_option) 618 619 !Initializations for PAW (projected wave functions) 620 mcprj_local=0 ; mband_cprj=0 621 if (psps%usepaw==1) then 622 mband_cprj=dtset%mband 623 if (dtset%paral_kgb/=0) mband_cprj=mband_cprj/mpi_enreg%nproc_band 624 iorder_cprj=0 ; mcprj_local=mcprj 625 if (usecprj==0) then 626 mcprj_local=my_nspinor*mband_cprj*dtset%mkmem*dtset%nsppol 627 !This is a check but should always be true since scfcv allocated cprj anyway 628 if (allocated(cprj_local)) then 629 !Was allocated in scfcv so we just destroy and reconstruct it as desired 630 call pawcprj_free(cprj_local) 631 ABI_FREE(cprj_local) 632 end if 633 ABI_MALLOC(cprj_local,(dtset%natom,mcprj_local)) 634 call pawcprj_alloc(cprj_local,0,gs_hamk%dimcprj) 635 cprj => null() 636 cprj => cprj_local 637 end if 638 end if 639 640 call timab(981,2,tsec) 641 642 !=================================================================== 643 ! WAVELETS - Branching with a separate VTORHO procedure 644 !=================================================================== 645 646 if (dtset%usewvl == 1) then 647 #ifndef HAVE_BIGDFT 648 BIGDFT_NOTENABLED_ERROR() 649 #else 650 651 ! do_last_ortho in case of diagonalization scheme 652 if ( wvlbigdft) do_last_ortho=(dtset%iscf/=0) 653 if (.not.wvlbigdft) do_last_ortho=(.true.) 654 655 ABI_MALLOC(xcart,(3, dtset%natom)) 656 call xred2xcart(dtset%natom, rprimd, xcart, xred) 657 658 if(wvlbigdft) then 659 ! NSCF loop for wvlbigdt: 660 call wvl_nscf_loop_bigdft() 661 else 662 ! NSCF loop for WVL: (not wvlbigdft) 663 call wvl_nscf_loop() 664 end if 665 666 ! Eventually orthogonalize WFs now 667 if (do_last_ortho) then 668 call write_energies(ii,0,wvl%e%energs,0.d0,0.d0,"final") 669 call last_orthon(me_distrb, nproc_distrb, ii, wvl%wfs%ks, wvl%e%energs%evsum, .true.) 670 if(wvlbigdft) energies%e_xcdc = wvl%e%energs%evxc 671 ! If occupation numbers are not changed... 672 if (fixed_occ .or. (iscf<0 .and. iscf/=-3)) then 673 call wvl_comm_eigen() 674 end if 675 ! ... or update occupations: 676 if( ( .not.fixed_occ) .and. (iscf>0.or.iscf==-3)) then 677 if(wvlbigdft) then 678 call wvl_occ_bigdft() 679 else 680 ! Communicate eigenvalues: 681 call wvl_comm_eigen() 682 ! Update occ and Fermi level 683 call wvl_occ() 684 end if 685 end if 686 ! This might accelerate convergence: 687 wvl%wfs%ks%diis%energy_min=one 688 wvl%wfs%ks%diis%alpha=two 689 end if !do_last_ortho 690 691 ! Compute eigenvalues energy 692 if(.not. wvlbigdft .and. nnsclo_now>0) then 693 call e_eigen(eigen,energies%e_eigenvalues,dtset%mband,dtset%nband,dtset%nkpt,& 694 & dtset%nsppol,occ,dtset%wtk) 695 else 696 energies%e_eigenvalues = energies%e_kinetic + energies%e_localpsp & 697 & + energies%e_xcdc + two*energies%e_hartree +energies%e_nlpsp_vfock 698 end if 699 700 if (optforces == 1) then ! not compatible with iscf=0 and wvlbigdftcomp=1 + PAW 701 call wvl_nl_gradient(grnl, mpi_enreg, dtset%natom, rprimd, wvl, xcart) 702 end if 703 704 ! For iscf<0 we do not update the density 705 if (dtset%iscf>=0) then !(dtset%iscf>=0 .and. .not. wvlbigdft ) then 706 call wvl_mkrho(dtset,irrzon,mpi_enreg,phnons,rhor,wvl%wfs,wvl%den) 707 end if 708 ABI_FREE(xcart) 709 710 ! Note in WVL+NC: the rest will be skipped. 711 ! For PAW: we will compute Rho_ij at the end. 712 !if(wvlbigdft) return 713 #endif 714 else 715 716 !=================================================================== 717 ! PLANE WAVES - Standard VTORHO procedure 718 !=================================================================== 719 720 ! Electric field: allocate dphasek 721 nkpt1 = dtset%nkpt 722 if ( berryflag ) then 723 ABI_MALLOC(dphasek,(3,dtset%nkpt*dtset%nsppol)) 724 dphasek(:,:) = zero 725 nkpt1 = dtefield%mkmem_max 726 end if 727 728 if(dtset%gpu_option==ABI_GPU_KOKKOS) then 729 #if defined HAVE_GPU && defined HAVE_YAKL 730 ABI_MALLOC_MANAGED(rhoaug, (/n4,n5,n6,gs_hamk%nvloc/)) 731 ABI_MALLOC_MANAGED(vlocal, (/n4,n5,n6,gs_hamk%nvloc/)) 732 #endif 733 else 734 ABI_MALLOC(rhoaug,(n4,n5,n6,gs_hamk%nvloc)) 735 ABI_MALLOC(vlocal,(n4,n5,n6,gs_hamk%nvloc)) 736 end if 737 738 if(with_vxctau) then 739 ABI_MALLOC(vxctaulocal,(n4,n5,n6,gs_hamk%nvloc,4)) 740 end if 741 742 has_vectornd = (with_vectornd .EQ. 1) 743 if(has_vectornd) then 744 ABI_MALLOC(vectornd_pac,(n4,n5,n6,gs_hamk%nvloc,3)) 745 vectornd_pac=zero 746 end if 747 748 nbdbuf_eff = dtset%nbdbuf 749 ! In metallic case, at first iteration, occupations could be 0. So residm should be computed as usual 750 if (dtset%nbdbuf==-101.and..not.fixed_occ.and.istep==1.and.minval(occ)<tol10) then 751 write(msg,*) 'vtorho: nbdbuf is set to 0 for this step' 752 call wrtout(std_out,msg,'COLL') 753 nbdbuf_eff = 0 754 end if 755 756 ! LOOP OVER SPINS 757 do isppol=1,dtset%nsppol 758 call timab(982,1,tsec) 759 760 ikpt_loc = 0 761 ikg=0 762 763 ! Set up local potential vlocal on the coarse FFT mesh from vtrial taking into account the spin. 764 ! Also, continue to initialize the Hamiltonian. 765 766 call gspot_transgrid_and_pack(isppol, psps%usepaw, dtset%paral_kgb, dtset%nfft, dtset%ngfft, nfftf, & 767 dtset%nspden, gs_hamk%nvloc, 1, pawfgr, mpi_enreg, vtrial, vlocal) 768 call gs_hamk%load_spin(isppol, vlocal=vlocal, with_nonlocal=.true.) 769 770 if (with_vxctau) then 771 call gspot_transgrid_and_pack(isppol, psps%usepaw, dtset%paral_kgb, dtset%nfft, dtset%ngfft, nfftf, & 772 dtset%nspden, gs_hamk%nvloc, 4, pawfgr, mpi_enreg, vxctau, vxctaulocal) 773 call gs_hamk%load_spin(isppol, vxctaulocal=vxctaulocal) 774 end if 775 776 rhoaug(:,:,:,:)=zero 777 778 ! if vectornd is present, set it up for addition to gs_hamk similarly to how it's done for 779 ! vtrial. Note that it must be done for the three Cartesian directions. Also, the following 780 ! code assumes explicitly and implicitly that nvloc = 1. This should eventually be generalized. 781 if(has_vectornd) then 782 call gspot_transgrid_and_pack(isppol, psps%usepaw, dtset%paral_kgb, dtset%nfft, dtset%ngfft, nfftf, & 783 & dtset%nspden, gs_hamk%nvloc, 3, pawfgr, mpi_enreg, vectornd, vectornd_pac) 784 call gs_hamk%load_spin(isppol, vectornd=vectornd_pac) 785 end if 786 787 call timab(982,2,tsec) 788 789 ! BIG FAT k POINT LOOP 790 ! MVeithen: I had to modify the structure of this loop in order to implement MPI // of the electric field 791 ! Note that the loop here differs from the similar one in berryphase_new.F90. 792 ! here, ikpt_loc numbers the kpts treated by the current processor. 793 ! in berryphase_new.F90, ikpt_loc ALSO includes info about value of isppol. 794 795 ikpt = 0 796 do while (ikpt_loc < nkpt1) 797 798 call timab(997,1,tsec) 799 800 if ( .not.berryflag ) then 801 ikpt_loc = ikpt_loc + 1 802 ikpt = ikpt_loc 803 my_ikpt = mpi_enreg%my_kpttab(ikpt) 804 else 805 if (ikpt_loc < dtset%mkmem) ikpt = ikpt + 1 806 if ((ikpt > dtset%nkpt).and.(ikpt_loc < dtset%mkmem)) exit 807 my_ikpt=ikpt 808 end if 809 810 dphase_k(:) = zero 811 counter=100*ikpt+isppol 812 nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt) 813 nband_cprj_k=nband_k/mpi_enreg%nproc_band 814 istwf_k=dtset%istwfk(ikpt) 815 npw_k=npwarr(ikpt) 816 817 mcgq=1 ; mkgq=1 818 if (.not.berryflag) then 819 if(proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,1,nband_k,isppol,me_distrb)) then 820 eigen(1+bdtot_index : nband_k+bdtot_index) = zero 821 resid(1+bdtot_index : nband_k+bdtot_index) = zero 822 bdtot_index=bdtot_index+nband_k 823 cycle 824 end if 825 else 826 if ((proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,1,nband_k,isppol,me_distrb)) .and.(ikpt_loc <= dtset%mkmem)) then 827 eigen(1+bdtot_index : nband_k+bdtot_index) = zero 828 resid(1+bdtot_index : nband_k+bdtot_index) = zero 829 bdtot_index = bdtot_index + nband_k 830 cycle 831 end if 832 ikpt_loc = ikpt_loc + 1 833 mcgq = dtset%mpw*my_nspinor*nband_k*dtefield%nneigh(ikpt) 834 ikg = dtefield%kgindex(ikpt) 835 mkgq = 6*dtset%mpw 836 end if ! berryflag 837 838 call timab(997,2,tsec) 839 840 ! In case of MPI // of a finite field calculation 841 ! build the cgq array that stores the wavefunctions for the 842 ! neighbours of ikpt, and the pwnsfacq array that stores the 843 ! corresponding phase factors (in case of tnons) 844 ABI_MALLOC(cgq,(2,mcgq)) 845 ABI_MALLOC(pwnsfacq,(2,mkgq)) 846 if ( berryflag ) then 847 call cgq_builder(berryflag,cg,cgq,dtefield,dtset,ikpt,ikpt_loc,isppol,mcg,mcgq,& 848 & me_distrb,mkgq,mpi_enreg,my_nspinor,nband_k,nproc_distrb,& 849 & npwarr,pwnsfac,pwnsfacq,pwind_alloc,spaceComm_distrb) 850 if (ikpt_loc > dtset%mkmem) then 851 ABI_FREE(cgq) 852 ABI_FREE(pwnsfacq) 853 cycle 854 end if 855 end if !berryopt 856 857 call timab(984,1,tsec) 858 859 if (mpi_enreg%paral_kgb==1) my_bandfft_kpt => bandfft_kpt(my_ikpt) 860 call bandfft_kpt_set_ikpt(ikpt,mpi_enreg) 861 ! my_ikpt = ikpt 862 ! if (mpi_enreg%paral_kgb==1) then 863 ! my_ikpt = mpi_enreg%my_kpttab(ikpt) 864 ! my_bandfft_kpt => bandfft_kpt(my_ikpt) 865 ! call bandfft_kpt_set_ikpt(ikpt,mpi_enreg) 866 ! end if 867 868 ABI_MALLOC(ek_k,(nband_k)) 869 ABI_MALLOC(end_k,(nband_k)) 870 ABI_MALLOC(enlx_k,(nband_k)) 871 ABI_MALLOC(ek_k_nd,(2,nband_k,nband_k*paw_dmft%use_dmft)) 872 ABI_MALLOC(occ_k,(nband_k)) 873 ABI_MALLOC(zshift,(nband_k)) 874 ABI_MALLOC(grnl_k,(3*natom,nband_k*optforces)) 875 876 if(dtset%gpu_option==ABI_GPU_KOKKOS) then 877 #if defined HAVE_GPU && defined HAVE_YAKL 878 ABI_MALLOC_MANAGED(eig_k,(/nband_k/)) 879 ABI_MALLOC_MANAGED(resid_k,(/nband_k/)) 880 #endif 881 else 882 ABI_MALLOC(eig_k,(nband_k)) 883 ABI_MALLOC(resid_k,(nband_k)) 884 end if 885 886 eig_k(:)=zero 887 ek_k(:)=zero 888 end_k(:)=zero 889 enlx_k(:)=zero 890 if(paw_dmft%use_dmft==1) ek_k_nd(:,:,:)=zero 891 if (optforces>0) grnl_k(:,:)=zero 892 kpoint(:)=dtset%kptns(:,ikpt) 893 occ_k(:)=occ(1+bdtot_index:nband_k+bdtot_index) 894 resid_k(:) = resid(1+bdtot_index : nband_k+bdtot_index) 895 !resid_k(:)=zero 896 zshift(:)=dtset%eshift 897 898 if(dtset%gpu_option==ABI_GPU_KOKKOS) then 899 #if defined HAVE_GPU && defined HAVE_YAKL 900 ABI_MALLOC_MANAGED(kg_k, (/3,npw_k/)) 901 #endif 902 else 903 ABI_MALLOC(kg_k,(3,npw_k)) 904 end if 905 906 ABI_MALLOC(ylm_k,(npw_k,psps%mpsang*psps%mpsang*psps%useylm)) 907 kg_k(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg) 908 if (psps%useylm==1) then 909 do ilm=1,psps%mpsang*psps%mpsang 910 ylm_k(1:npw_k,ilm)=ylm(1+ikg:npw_k+ikg,ilm) 911 end do 912 end if 913 914 ! Set up remaining of the Hamiltonian 915 916 ! Compute (1/2) (2 Pi)**2 (k+G)**2: 917 if(dtset%gpu_option==ABI_GPU_KOKKOS) then 918 #if defined HAVE_GPU && defined HAVE_YAKL 919 ABI_MALLOC_MANAGED(kinpw,(/npw_k/)) 920 #endif 921 else 922 ABI_MALLOC(kinpw,(npw_k)) 923 end if 924 925 call mkkin(dtset%ecut,dtset%ecutsm,dtset%effmass_free,gmet,kg_k,kinpw,kpoint,npw_k,0,0) 926 927 ! Compute (k+G) vectors (only if useylm=1) 928 nkpg=3*optforces*dtset%nloalg(3) 929 ABI_MALLOC(kpg_k,(npw_k,nkpg)) 930 if ((mpi_enreg%paral_kgb/=1.or.istep<=1).and.nkpg>0) call mkkpg(kg_k,kpg_k,kpoint,nkpg,npw_k) 931 932 ! Compute nonlocal form factors ffnl at all (k+G): 933 ider=0;idir=0;dimffnl=1 934 ABI_MALLOC(ffnl,(npw_k,dimffnl,psps%lmnmax,ntypat)) 935 if (mpi_enreg%paral_kgb/=1.or.istep<=1) then 936 call mkffnl(psps%dimekb,dimffnl,psps%ekb,ffnl,psps%ffspl,& 937 & gmet,gprimd,ider,idir,psps%indlmn,kg_k,kpg_k,kpoint,psps%lmnmax,& 938 & psps%lnmax,psps%mpsang,psps%mqgrid_ff,nkpg,& 939 & npw_k,ntypat,psps%pspso,psps%qgrid_ff,rmet,& 940 & psps%usepaw,psps%useylm,ylm_k,ylmgr) 941 end if 942 943 ! Load k-dependent part in the Hamiltonian datastructure 944 ! - Compute 3D phase factors 945 ! - Prepare various tabs in case of band-FFT parallelism 946 ! - Load k-dependent quantities in the Hamiltonian 947 ABI_MALLOC(ph3d,(2,npw_k,gs_hamk%matblk)) 948 949 if(usefock_ACE/=0) then 950 call gs_hamk%load_k(kpt_k=dtset%kptns(:,ikpt),istwf_k=istwf_k,npw_k=npw_k,& 951 & kinpw_k=kinpw,kg_k=kg_k,kpg_k=kpg_k,ffnl_k=ffnl,fockACE_k=fock%fockACE(ikpt,isppol),ph3d_k=ph3d,& 952 & compute_ph3d=(mpi_enreg%paral_kgb/=1.or.istep<=1),& 953 & compute_gbound=(mpi_enreg%paral_kgb/=1)) 954 else 955 call gs_hamk%load_k(kpt_k=dtset%kptns(:,ikpt),istwf_k=istwf_k,npw_k=npw_k,& 956 & kinpw_k=kinpw,kg_k=kg_k,kpg_k=kpg_k,ffnl_k=ffnl,ph3d_k=ph3d,& 957 & compute_ph3d=(mpi_enreg%paral_kgb/=1.or.istep<=1),& 958 & compute_gbound=(mpi_enreg%paral_kgb/=1)) 959 end if 960 961 ! Load band-FFT tabs (transposed k-dependent arrays) 962 if (mpi_enreg%paral_kgb==1) then 963 if (istep<=1) call prep_bandfft_tabs(gs_hamk,ikpt,dtset%mkmem,mpi_enreg) 964 call gs_hamk%load_k(npw_fft_k=my_bandfft_kpt%ndatarecv, & 965 gbound_k =my_bandfft_kpt%gbound, & 966 kinpw_k =my_bandfft_kpt%kinpw_gather, & 967 kg_k =my_bandfft_kpt%kg_k_gather, & 968 kpg_k =my_bandfft_kpt%kpg_k_gather, & 969 ffnl_k =my_bandfft_kpt%ffnl_gather, & 970 ph3d_k =my_bandfft_kpt%ph3d_gather) 971 end if 972 973 ! If OpenMP GPU, load "hamiltonian" on GPU device 974 if (dtset%gpu_option == ABI_GPU_OPENMP) then 975 if(dtset%paral_kgb==0) then 976 call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp) 977 else if(istwf_k==1) then 978 call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather) 979 else if(istwf_k==2) then 980 call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather_sym) 981 else 982 ABI_ERROR("istwfk > 2 is not handled with OpenMP GPU offload mode !") 983 end if 984 end if 985 986 if(dtset%wfoptalg == 111 .and. istep <= 1) then 987 gemm_nonlop_nblocks = dtset%gpu_nl_splitsize 988 ! Only compute CHEBFI number of blocks if user didn't set it themselves 989 if(gemm_nonlop_nblocks==1) then 990 call chebfiwf2_blocksize(gs_hamk,mpi_enreg%bandpp,npw_k,nband_k,dtset%nspinor,mpi_enreg%paral_kgb,& 991 & dtset%gpu_option,nblk_gemm_nonlop) 992 gemm_nonlop_nblocks = nblk_gemm_nonlop 993 end if 994 gemm_nonlop_is_distributed = .false. 995 if(gemm_nonlop_nblocks > 1 .and. dtset%gpu_nl_distrib/=0) gemm_nonlop_is_distributed = .true. 996 end if 997 998 ! Build inverse of overlap matrix for chebfi 999 if(psps%usepaw == 1 .and. (dtset%wfoptalg == 1 .or. dtset%wfoptalg == 111) .and. istep <= 1) then 1000 ABI_NVTX_START_RANGE(NVTX_INVOVL) 1001 call make_invovl(gs_hamk, dimffnl, ffnl, ph3d, mpi_enreg) 1002 ABI_NVTX_END_RANGE() 1003 end if 1004 1005 ! Setup gemm_nonlop 1006 if (gemm_nonlop_use_gemm) then 1007 !set the global variable indicating to gemm_nonlop where to get its data from 1008 gemm_nonlop_ikpt_this_proc_being_treated = my_ikpt 1009 if (istep <= 1) then 1010 !Init the arrays 1011 !FIXME Settle this 1012 if ( dtset%gpu_option == ABI_GPU_DISABLED) then 1013 call make_gemm_nonlop(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, & 1014 gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, & 1015 gs_hamk%ucvol, gs_hamk%ffnl_k,& 1016 gs_hamk%ph3d_k,gs_hamk%kpt_k,gs_hamk%kg_k,gs_hamk%kpg_k,& 1017 compute_grad_atom=(optforces>0)) 1018 else if ( dtset%gpu_option == ABI_GPU_LEGACY .or. dtset%gpu_option == ABI_GPU_KOKKOS) then 1019 call make_gemm_nonlop_gpu(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, & 1020 gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, & 1021 gs_hamk%ucvol, gs_hamk%ffnl_k, & 1022 gs_hamk%ph3d_k,gs_hamk%kpt_k,gs_hamk%kg_k,gs_hamk%kpg_k, & 1023 compute_grad_atom=(optforces>0)) 1024 else if ( dtset%gpu_option == ABI_GPU_OPENMP) then 1025 if(mpi_enreg%paral_kgb==0) then 1026 call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp) 1027 else if(gs_hamk%istwf_k==1) then 1028 call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather) 1029 else if(gs_hamk%istwf_k==2) then 1030 call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather_sym) 1031 else 1032 ABI_ERROR("istwfk > 2 is not handled with OpenMP GPU offload mode !") 1033 end if 1034 call make_gemm_nonlop_ompgpu(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, & 1035 gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, & 1036 gs_hamk%ucvol, gs_hamk%ffnl_k, & 1037 gs_hamk%ph3d_k,gs_hamk%kpt_k,gs_hamk%kg_k,gs_hamk%kpg_k, & 1038 compute_grad_atom=(optforces>0)) 1039 end if 1040 end if 1041 end if 1042 1043 #if defined HAVE_GPU_CUDA 1044 if (dtset%gpu_option==ABI_GPU_LEGACY .or. dtset%gpu_option==ABI_GPU_KOKKOS) then 1045 if (mpi_enreg%paral_kgb==1) then 1046 call gpu_update_ffnl_ph3d( & 1047 & my_bandfft_kpt%ph3d_gather, INT(size(my_bandfft_kpt%ph3d_gather),c_int64_t), & 1048 & my_bandfft_kpt%ffnl_gather, INT(size(my_bandfft_kpt%ffnl_gather),c_int64_t) ) 1049 else 1050 call gpu_update_ffnl_ph3d( & 1051 & ph3d, INT(size(ph3d),c_int64_t), & 1052 & ffnl, INT(size(ffnl),c_int64_t) ) 1053 end if 1054 end if 1055 #endif 1056 1057 call timab(984,2,tsec) 1058 1059 ! Update the value of ikpt,isppol in fock_exchange and allocate the memory space to perform HF calculation. 1060 if (usefock) call fock_updateikpt(fock%fock_common,ikpt,isppol) 1061 if (psps%usepaw==1 .and. usefock) then 1062 if ((fock%fock_common%optfor).and.(usefock_ACE==0)) fock%fock_common%forces_ikpt=zero 1063 end if 1064 1065 ! Here we initialize the wavefunctions with atomic orbitals at the first GS iteration of the first 1066 ! relaxation step (if any). 1067 ! NB: Not all the cases are presently supported. 1068 !print *, "istep, itimes(1), wfinit", istep, itimes(1), dtset%wfinit 1069 ! FIXME: This check is not enough as I need to check whether cg have been read from WFK file 1070 if (istep == 1 .and. itimes(1) == 0 .and. dtset%wfinit /= 0) then 1071 call cg_from_atoms(ikpt, isppol, rprimd, xred, kg_k, cg(:,icg+1:), dtset, psps, eig_k, gs_hamk, & 1072 mpi_enreg, nband_k, npw_k, my_nspinor) 1073 end if 1074 1075 ABI_NVTX_START_RANGE(NVTX_VTOWFK) 1076 ! Compute the eigenvalues, wavefunction, residuals, 1077 ! contributions to kinetic energy, nuclear dipole energy, 1078 ! nonlocal energy, forces, 1079 ! and update of rhor to this k-point and this spin polarization. 1080 call vtowfk(cg,cgq,cprj,cpus,dphase_k,dtefield,dtfil,& 1081 & dtset,eig_k,ek_k,ek_k_nd,end_k,enlx_k,fixed_occ,grnl_k,gs_hamk,& 1082 & ibg,icg,ikpt,iscf,isppol,kg_k,kinpw,mband_cprj,mcg,mcgq,mcprj_local,mkgq,& 1083 & mpi_enreg,dtset%mpw,natom,nband_k,nbdbuf_eff,dtset%nkpt,istep,nnsclo_now,npw_k,npwarr,& 1084 & occ_k,optforces,prtvol,pwind,pwind_alloc,pwnsfac,pwnsfacq,resid_k,& 1085 & rhoaug,paw_dmft,dtset%wtk(ikpt),zshift, rmm_diis_status(:,ikpt,isppol)) 1086 ABI_NVTX_END_RANGE() 1087 1088 ! LB-01/03/2024: Very weird compiler error on eos-nvhpc23.1 if the second call of timab(985,...) is included... 1089 ! Drastic short-term solution : disable this timing for nvhpc... In fact this part is not important unless fock is activated 1090 ! Note: Should we keep nvhpc-23.1 in eos? 1091 #ifndef FC_NVHPC 1092 call timab(985,1,tsec) 1093 #endif 1094 1095 #if defined HAVE_GPU_CUDA 1096 if(dtset%gpu_option==ABI_GPU_LEGACY .or. dtset%gpu_option==ABI_GPU_KOKKOS) call gpu_finalize_ffnl_ph3d() 1097 #endif 1098 ABI_FREE(ffnl) 1099 1100 if(dtset%gpu_option==ABI_GPU_KOKKOS) then 1101 #if defined HAVE_GPU && defined HAVE_YAKL 1102 ABI_FREE_MANAGED(kinpw) 1103 ABI_FREE_MANAGED(kg_k) 1104 #endif 1105 else 1106 ABI_FREE(kinpw) 1107 ABI_FREE(kg_k) 1108 end if 1109 1110 ABI_FREE(kpg_k) 1111 ABI_FREE(ylm_k) 1112 ABI_FREE(ph3d) 1113 ABI_FREE(cgq) 1114 ABI_FREE(pwnsfacq) 1115 1116 ! electric field 1117 if (berryflag) then 1118 dphasek(:,ikpt + (isppol - 1)*dtset%nkpt) = dphase_k(:) 1119 1120 ! The overlap matrices for all first neighbours of ikpt are no more up to date 1121 do idir = 1, 3 1122 do ifor = 1, 2 1123 ikpt1 = dtefield%ikpt_dk(dtefield%i2fbz(ikpt),ifor,idir) 1124 ikpt1 = dtefield%indkk_f2ibz(ikpt1,1) 1125 ifor1 = -1*ifor + 3 ! ifor = 1 -> ifor1 = 2 & ifor = 2 -> ifor1 = 1 1126 dtefield%sflag(:,ikpt1+(isppol-1)*dtset%nkpt,ifor1,idir) = 0 1127 end do 1128 end do 1129 end if ! berryflag 1130 1131 ! Save eigenvalues (hartree), residuals (hartree**2) 1132 eigen(1+bdtot_index : nband_k+bdtot_index) = eig_k(:) 1133 eknk (1+bdtot_index : nband_k+bdtot_index) = ek_k (:) 1134 if(usefock) then 1135 focknk (1+bdtot_index : nband_k+bdtot_index) = fock%fock_common%eigen_ikpt (:) 1136 if (optforces>0) fockfornk(:,:,1+bdtot_index : nband_k+bdtot_index) = fock%fock_common%forces_ikpt(:,:,:) 1137 end if 1138 if(paw_dmft%use_dmft==1) eknk_nd(isppol,ikpt,:,:,:) = ek_k_nd(:,:,:) 1139 resid(1+bdtot_index : nband_k+bdtot_index) = resid_k(:) 1140 if (optforces>0) grnlnk(:,1+bdtot_index : nband_k+bdtot_index) = grnl_k(:,:) 1141 enlxnk(1+bdtot_index : nband_k+bdtot_index) = enlx_k(:) 1142 1143 if(iscf>0 .or. iscf==-3)then 1144 ! Accumulate sum over k points for band, nonlocal and kinetic energies, 1145 ! also accumulate gradients of Enonlocal: 1146 do iband=1,nband_k 1147 if (abs(occ_k(iband))>tol8) then 1148 energies%e_kinetic = energies%e_kinetic + dtset%wtk(ikpt)*occ_k(iband)*ek_k(iband) 1149 energies%e_nucdip = energies%e_nucdip + dtset%wtk(ikpt)*occ_k(iband)*end_k(iband) 1150 energies%e_eigenvalues = energies%e_eigenvalues + dtset%wtk(ikpt)*occ_k(iband)*eig_k(iband) 1151 energies%e_nlpsp_vfock = energies%e_nlpsp_vfock + dtset%wtk(ikpt)*occ_k(iband)*enlx_k(iband) 1152 if (optforces>0) grnl(:)=grnl(:)+dtset%wtk(ikpt)*occ_k(iband)*grnl_k(:,iband) 1153 if (usefock) then 1154 energies%e_fock=energies%e_fock + half*fock%fock_common%eigen_ikpt(iband)*occ_k(iband)*dtset%wtk(ikpt) 1155 if (usefock_ACE==0) energies%e_fock0=energies%e_fock 1156 endif 1157 end if 1158 end do 1159 1160 ! Calculate Fock contribution to the total energy if required 1161 if ((psps%usepaw==1).and.(usefock)) then 1162 if ((fock%fock_common%optfor).and.(usefock_ACE==0)) then 1163 !WARNING : this routine actually does NOT compute the Fock contrib to total energy, but modifies the force ONLY. 1164 call fock_calc_ene(dtset,fock%fock_common,energies%e_exactX,ikpt,nband_k,occ_k) 1165 end if 1166 end if 1167 end if 1168 1169 if ( dtset%gpu_option == ABI_GPU_OPENMP) then 1170 call ompgpu_free_hamilt_buffers() 1171 end if 1172 ! LB-01/03/2024: Very weird compiler error on eos-nvhpc23.1 if the second call of timab(985,...) is included... 1173 ! Drastic short-term solution : disable this timing for nvhpc... In fact this part is not important unless fock is activated 1174 #ifndef FC_NVHPC 1175 call timab(985,2,tsec) 1176 #endif 1177 1178 ABI_FREE(ek_k) 1179 ABI_FREE(ek_k_nd) 1180 ABI_FREE(end_k) 1181 ABI_FREE(grnl_k) 1182 ABI_FREE(occ_k) 1183 ABI_FREE(zshift) 1184 ABI_FREE(enlx_k) 1185 1186 if(dtset%gpu_option==ABI_GPU_KOKKOS) then 1187 #if defined HAVE_GPU && defined HAVE_YAKL 1188 ABI_FREE_MANAGED(eig_k) 1189 ABI_FREE_MANAGED(resid_k) 1190 #endif 1191 else 1192 ABI_FREE(eig_k) 1193 ABI_FREE(resid_k) 1194 end if 1195 1196 ! Keep track of total number of bands (all k points so far, even for k points not treated by me) 1197 bdtot_index=bdtot_index+nband_k 1198 1199 ! Also shift array memory if dtset%mkmem/=0 1200 if (dtset%mkmem/=0) then 1201 ibg=ibg+my_nspinor*nband_cprj_k 1202 icg=icg+npw_k*my_nspinor*nband_k 1203 ikg=ikg+npw_k 1204 end if 1205 1206 end do ! End big k point loop 1207 1208 call timab(986,1,tsec) 1209 1210 if (fixed_occ .and. mpi_enreg%paral_kgb==1) then 1211 call xmpi_sum(rhoaug,mpi_enreg%comm_bandspinorfft,ierr) !Sum the contributions over bands/FFT/spinors 1212 end if 1213 1214 ! Transfer density on augmented fft grid to normal fft grid in real space 1215 ! Also take into account the spin. 1216 if(iscf>0.or.iscf==-3)then 1217 if (psps%usepaw==0) then 1218 call fftpac(isppol,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhor,rhoaug(:,:,:,1),1) 1219 if(dtset%nspden==4)then 1220 do imagn=2,4 1221 call fftpac(imagn,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhor,rhoaug(:,:,:,imagn),1) 1222 end do 1223 end if 1224 else 1225 call fftpac(isppol,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhowfr,rhoaug(:,:,:,1),1) 1226 if(dtset%nspden==4)then 1227 do imagn=2,4 1228 call fftpac(imagn,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhowfr,rhoaug(:,:,:,imagn),1) 1229 end do 1230 end if 1231 end if 1232 end if 1233 1234 call timab(986,2,tsec) 1235 1236 end do ! End loop over spins 1237 1238 call timab(988,1,tsec) 1239 if (usefock) then 1240 if (usefock_ACE==0) then 1241 call xmpi_sum(energies%e_fock0,mpi_enreg%comm_kpt,ierr) 1242 end if 1243 if(fock%fock_common%optfor) call xmpi_sum(fock%fock_common%forces,mpi_enreg%comm_kpt,ierr) 1244 end if 1245 ! Electric field: compute string-averaged change in Zak phase 1246 ! along each direction, store it in dphase(idir) 1247 ! ji: it is not convenient to do this anymore. Remove. Set dphase(idir)=0.0_dp. 1248 ! eventually, dphase(idir) will have to go... 1249 if (berryflag) then 1250 dphase(:) = zero 1251 ! In case of MPI // of a finite field calculation, send dphasek to all cpus 1252 call xmpi_sum(dphasek,spaceComm_distrb,ierr) 1253 ABI_FREE(dphasek) 1254 end if ! berryflag 1255 1256 if(dtset%gpu_option==ABI_GPU_KOKKOS) then 1257 #if defined HAVE_GPU && defined HAVE_YAKL 1258 ABI_FREE_MANAGED(rhoaug) 1259 ABI_FREE_MANAGED(vlocal) 1260 #endif 1261 else 1262 ABI_FREE(rhoaug) 1263 ABI_FREE(vlocal) 1264 end if 1265 1266 if(with_vxctau) then 1267 ABI_FREE(vxctaulocal) 1268 end if 1269 if(has_vectornd) then 1270 ABI_FREE(vectornd_pac) 1271 end if 1272 1273 call timab(988,2,tsec) 1274 1275 ABI_MALLOC(doccde,(dtset%mband*dtset%nkpt*dtset%nsppol)) 1276 doccde(:)=zero 1277 1278 ! Treat now varying occupation numbers, in the self-consistent case 1279 if((.not.fixed_occ) .and. (iscf>0.or.iscf==-3)) then 1280 1281 ! Parallel case 1282 if (mpi_enreg%nproc_spkpt>1) then 1283 1284 call timab(989,1,tsec) 1285 1286 ! If needed, exchange the values of eigen,resid,eknk,enlxnk,grnlnk 1287 ABI_MALLOC(buffer1,((4+3*natom*optforces+dtset%usefock+3*natom*dtset%usefock*optforces)*mbdkpsp)) 1288 if(paw_dmft%use_dmft==1) then 1289 ABI_MALLOC(buffer2,(mb2dkpsp*paw_dmft%use_dmft)) 1290 end if 1291 ! Pack eigen,resid,eknk,enlxnk,grnlnk in buffer1 1292 buffer1(1 : mbdkpsp)=eigen(:) 1293 buffer1(1+ mbdkpsp:2*mbdkpsp)=resid(:) 1294 buffer1(1+2*mbdkpsp:3*mbdkpsp)=eknk(:) 1295 buffer1(1+3*mbdkpsp:4*mbdkpsp)=enlxnk(:) 1296 index1=4*mbdkpsp 1297 if (optforces>0) then 1298 buffer1(index1+1:index1+3*natom*mbdkpsp)=reshape(grnlnk,(/(3*natom)*mbdkpsp/) ) 1299 index1=index1+3*natom*mbdkpsp 1300 end if 1301 if (usefock) then 1302 buffer1(1+index1:index1+mbdkpsp)=focknk(:) 1303 if (optforces>0) then 1304 index1=index1+mbdkpsp 1305 buffer1(index1+1:index1+3*natom*mbdkpsp)=reshape(fockfornk,(/(3*natom)*mbdkpsp/) ) 1306 end if 1307 end if 1308 if(paw_dmft%use_dmft==1) then 1309 nnn=0 1310 do ikpt=1,dtset%nkpt 1311 do isppol=1,dtset%nsppol 1312 do iband=1,dtset%mband 1313 do iband1=1,dtset%mband 1314 do iplex=1,2 1315 nnn=nnn+1 1316 buffer2(nnn)=eknk_nd(isppol,ikpt,iplex,iband,iband1) 1317 end do 1318 end do 1319 end do 1320 end do 1321 end do 1322 if(nnn.ne.mb2dkpsp) then 1323 write(msg,*)' BUG in vtorho2, buffer2',nnn,mb2dkpsp 1324 ABI_BUG(msg) 1325 end if 1326 end if 1327 ! Build sum of everything 1328 call timab(48,1,tsec) 1329 call xmpi_sum(buffer1,mpi_enreg%comm_kpt,ierr) 1330 ! if(mpi_enreg%paral_kgb/=1.and.paw_dmft%use_dmft==1) then 1331 if(paw_dmft%use_dmft==1) call xmpi_sum(buffer2,mpi_enreg%comm_kpt,ierr) 1332 call timab(48,2,tsec) 1333 1334 ! Unpack eigen,resid,eknk,enlxnk,grnlnk in buffer1 1335 eigen(:) =buffer1(1 : mbdkpsp) 1336 resid(:) =buffer1(1+ mbdkpsp:2*mbdkpsp) 1337 eknk(:) =buffer1(1+2*mbdkpsp:3*mbdkpsp) 1338 enlxnk(:) =buffer1(1+3*mbdkpsp:4*mbdkpsp) 1339 index1=4*mbdkpsp 1340 if (optforces>0) then 1341 grnlnk(:,:)=reshape(buffer1(index1+1:index1+3*natom*mbdkpsp),(/3*natom,mbdkpsp/) ) 1342 end if 1343 if (usefock) then 1344 focknk(:)=buffer1(1+index1:index1+mbdkpsp) 1345 if (optforces>0) then 1346 index1=index1+mbdkpsp 1347 fockfornk(:,:,:)=reshape(buffer1(index1+1:index1+3*natom*mbdkpsp),(/3,natom,mbdkpsp/) ) 1348 end if 1349 end if 1350 if(paw_dmft%use_dmft==1) then 1351 nnn=0 1352 do ikpt=1,dtset%nkpt 1353 do isppol=1,dtset%nsppol 1354 do iband=1,dtset%mband 1355 do iband1=1,dtset%mband 1356 do iplex=1,2 1357 nnn=nnn+1 1358 eknk_nd(isppol,ikpt,iplex,iband,iband1)=buffer2(nnn) 1359 end do 1360 end do 1361 end do 1362 end do 1363 end do 1364 end if 1365 if(allocated(buffer2)) then 1366 ABI_FREE(buffer2) 1367 end if 1368 ABI_FREE(buffer1) 1369 call timab(989,2,tsec) 1370 1371 end if ! nproc_spkpt>1 1372 1373 ! Compute extfpmd u0 energy shift factor from eigenvalues and kinetic energy. 1374 if(associated(extfpmd)) then 1375 extfpmd%vtrial=vtrial 1376 call extfpmd%compute_shiftfactor(eigen,eknk,dtset%mband,mpi_enreg%me,& 1377 & dtset%nband,dtset%nkpt,dtset%nsppol,dtset%wtk) 1378 end if 1379 1380 ! Compute the new occupation numbers from eigen 1381 call timab(990,1,tsec) 1382 call newocc(doccde,eigen,energies%entropy,energies%e_fermie,energies%e_fermih,dtset%ivalence,& 1383 & dtset%spinmagntarget,dtset%mband,dtset%nband,dtset%nelect,dtset%ne_qFD,dtset%nh_qFD,& 1384 & dtset%nkpt,dtset%nspinor,dtset%nsppol,occ,dtset%occopt,prtvol,dtset%tphysel,& 1385 & dtset%tsmear,dtset%wtk,& 1386 & prtstm=dtset%prtstm,stmbias=dtset%stmbias,extfpmd=extfpmd) 1387 call timab(990,2,tsec) 1388 1389 1390 ! Compute number of free electrons of extfpmd model 1391 if(associated(extfpmd)) then 1392 extfpmd%nelect=zero 1393 call extfpmd%compute_nelect(energies%e_fermie,extfpmd%nelect,dtset%tsmear) 1394 call extfpmd%compute_e_kinetic(energies%e_fermie,dtset%tsmear) 1395 call extfpmd%compute_entropy(energies%e_fermie,dtset%tsmear) 1396 end if 1397 1398 ! !========= DMFT call begin ============================================ 1399 dmft_dftocc=0 1400 if(paw_dmft%use_dmft==1.and.psps%usepaw==1.and.dtset%nbandkss==0) then 1401 call timab(991,1,tsec) 1402 1403 if (dtset%dmftcheck>=0.and.dtset%usedmft>=1.and.(sum(pawtab(:)%upawu)>=tol8.or. & 1404 & sum(pawtab(:)%jpawu)>tol8).and.dtset%dmft_entropy==0) energies%entropy=zero 1405 1406 ! == 0 to a dmft calculation and do not use lda occupations 1407 ! == 1 to a lda calculation with the dmft loop 1408 if(dtset%dmftcheck==-1) dmft_dftocc=1 1409 1410 ! == initialise occnd 1411 paw_dmft%occnd=zero 1412 1413 bdtot_index = 1 1414 do isppol=1,dtset%nsppol 1415 do ikpt=1,dtset%nkpt 1416 do iband=1,dtset%nband(ikpt+(isppol-1)*dtset%nkpt) 1417 paw_dmft%occnd(1,iband,iband,ikpt,isppol)=occ(bdtot_index) 1418 bdtot_index = bdtot_index + 1 1419 end do 1420 end do 1421 end do 1422 1423 1424 if(dmft_dftocc==0) then 1425 if(dtset%occopt/=3) then 1426 ABI_ERROR('occopt should be equal to 3 in dmft') 1427 end if 1428 ! == initialise edmft 1429 if(paw_dmft%use_dmft>=1) edmft = zero 1430 1431 ! Compute residm to check the value 1432 ibdkpt=1 1433 residm=zero 1434 do isppol=1,dtset%nsppol 1435 do ikpt=1,dtset%nkpt 1436 nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt) 1437 if (nbdbuf_eff>=0) then 1438 nband_eff=max(1,nband_k-nbdbuf_eff) 1439 residm=max(residm,maxval(resid(ibdkpt:ibdkpt+nband_eff-1))) 1440 else if (nbdbuf_eff==-101) then 1441 residm=max(residm,maxval(occ(ibdkpt:ibdkpt+nband_k-1)*resid(ibdkpt:ibdkpt+nband_k-1))) 1442 else 1443 ABI_ERROR('Bad value of nbdbuf_eff') 1444 end if 1445 ibdkpt=ibdkpt+nband_k 1446 end do 1447 end do 1448 1449 ! Test residm 1450 if (paw_dmft%use_dmft>0 .and. residm>tol4 .and. dtset%dmftcheck>=0) then 1451 if(dtset%dmft_entropy>0) then 1452 write(msg,'(a,e12.3)')& 1453 ' WARNING: Wavefunctions not converged: DFT+DMFT calculation cannot be carried out safely ',residm 1454 call wrtout(std_out,msg) 1455 else 1456 write(msg,'(a,e12.3)')& 1457 ' ERROR: Wavefunctions not converged: DFT+DMFT calculation cannot be carried out safely ',residm 1458 call wrtout(std_out,msg) 1459 write(msg,'(a,i0)')' Action: increase nline and nnsclo',dtset%nstep 1460 ABI_ERROR(msg) 1461 end if 1462 1463 else if (paw_dmft%use_dmft>0 .and. residm>tol10.and. dtset%dmftcheck>=0) then 1464 write(msg,'(3a)')ch10,& 1465 ' Wavefunctions not converged: DFT+DMFT calculation might not be carried out safely ',ch10 1466 ABI_WARNING(msg) 1467 end if 1468 1469 ! == allocate paw_dmft%psichi and paw_dmft%eigen_dft 1470 call init_dmft(dmatpawu,dtset,energies%e_fermie,dtfil%fnameabo_app,& 1471 dtfil%filnam_ds(3),dtset%nspinor,paw_dmft,pawtab,psps,dtset%typat) 1472 call print_dmft(paw_dmft,dtset%pawprtvol) 1473 1474 ! == gather crystal structure date into data "cryst_struc" 1475 remove_inv=.false. 1476 if(dtset%nspden==4) remove_inv=.true. 1477 call crystal_init(dtset%amu_orig(:,1),cryst_struc,dtset%spgroup,natom,dtset%npsp,ntypat, & 1478 dtset%nsym,rprimd,dtset%typat,xred,dtset%ziontypat,dtset%znucl,1,& 1479 dtset%nspden==2.and.dtset%nsppol==1,remove_inv,hdr%title,& 1480 dtset%symrel,dtset%tnons,dtset%symafm) 1481 1482 ! == compute psichi 1483 call xmpi_barrier(spaceComm_distrb) 1484 call init_oper(paw_dmft,dft_occup) 1485 call flush_unit(std_out) 1486 call timab(620,1,tsec) 1487 call datafordmft(cryst_struc,cprj,gs_hamk%dimcprj,dtset,eigen,energies%e_fermie,& 1488 & dft_occup,dtset%mband,mband_cprj,dtset%mkmem,mpi_enreg,& 1489 & dtset%nkpt,my_nspinor,dtset%nsppol,occ,& 1490 & paw_dmft,paw_ij,pawang,pawtab,psps,usecprj_local,dtfil%unpaw) 1491 call timab(620,2,tsec) 1492 call flush_unit(std_out) 1493 1494 1495 ! == solve dmft loop 1496 call xmpi_barrier(spaceComm_distrb) 1497 1498 call dmft_solve(cryst_struc,istep,dft_occup,paw_dmft,pawang,pawtab,dtset%pawprtvol) 1499 edmft=paw_dmft%edmft 1500 energies%e_paw=energies%e_paw+edmft 1501 energies%e_pawdc=energies%e_pawdc+edmft 1502 call flush_unit(std_out) 1503 ! paw_dmft%occnd(:,:,:,:,:)=0.5_dp 1504 1505 ! For compatibility with old test, do not use for calculation 1506 if(dtset%dmft_occnd_imag==0) paw_dmft%occnd(2,:,:,:,:)=zero 1507 1508 ! call print_dmft(paw_dmft,dtset%pawprtvol) 1509 ! if(dtset%paral_kgb==1) then 1510 ! write(msg,'(5a)')ch10,& 1511 !& ' Parallelization over bands is not yet compatible with self-consistency in DMFT ',ch10,& 1512 !& ' Calculation of density does not taken into account non diagonal occupations',ch10 1513 ! call wrtout(std_out,msg) 1514 ! call wrtout(ab_out,msg) 1515 !! ABI_ERROR(msg) 1516 ! if(dtset%nstep>1) then 1517 ! write(msg,'(a,i0)')' Action: use nstep=1 instead of nstep=',dtset%nstep 1518 ! ABI_ERROR(msg) 1519 ! end if 1520 ! residm=zero 1521 ! end if 1522 ! if(dtset%nspinor==2) then 1523 ! call flush_unit(ab_out) 1524 ! write(msg,'(3a)')& 1525 ! & ' Self consistent DFT+DMFT with nspinor==2 is not possible yet ',ch10,& 1526 ! & ' Calculation are restricted to nstep =1' 1527 ! ! ABI_ERROR(msg) 1528 ! if(dtset%nstep>1) then 1529 ! write(msg,'(a,i0)')' Action: use nstep=1 instead of nstep=',dtset%nstep 1530 ! ! ABI_ERROR(msg) 1531 ! endif 1532 ! end if 1533 1534 if(me_distrb==0) then 1535 call saveocc_dmft(paw_dmft) 1536 end if 1537 call destroy_dmft(paw_dmft) 1538 1539 ! == destroy crystal_t cryst_struc 1540 call cryst_struc%free() 1541 call destroy_oper(dft_occup) 1542 end if ! dmft_dftocc 1543 call timab(991,2,tsec) 1544 1545 end if ! usedmft 1546 1547 if(dtset%nbandkss/=0) then 1548 write(msg,'(a,i3,2a,i3,4a)') & 1549 " dtset%nbandkss = ",dtset%nbandkss,ch10,& 1550 " and dtset%usedmft = ",dtset%usedmft,ch10,& 1551 " a DFT loop is carried out without DMFT.",ch10,& 1552 " Only psichi's will be written at convergence of the DFT loop." 1553 call wrtout(std_out,msg) 1554 end if 1555 ! !========= DMFT call end ============================================ 1556 1557 call timab(992,1,tsec) 1558 1559 ! Compute eeig, ek,enl and grnl from the new occ, and the shared eknk,enlxnk,grnlnk 1560 energies%e_eigenvalues = zero 1561 energies%e_kinetic = zero 1562 energies%e_nlpsp_vfock = zero 1563 if (usefock) then 1564 energies%e_fock = zero 1565 if (optforces>0) fock%fock_common%forces=zero 1566 end if 1567 if (optforces>0) grnl(:)=zero 1568 if(paw_dmft%use_dmft>=1) then 1569 ebandlda = zero 1570 ebanddmft = zero 1571 ebandldatot = zero 1572 ekindmft = zero 1573 ekindmft2 = zero 1574 ekinlda = zero 1575 end if 1576 1577 ! Compute new energy terms due to non diagonal occupations and DMFT. 1578 bdtot_index=1 1579 do isppol=1,dtset%nsppol 1580 do ikpt=1,dtset%nkpt 1581 nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt) 1582 do iband=1,nband_k 1583 1584 locc_test = abs(occ(bdtot_index))>tol8 1585 ! dmft 1586 if(paw_dmft%use_dmft>=1.and.dtset%nbandkss==0) then 1587 if(paw_dmft%band_in(iband)) then 1588 if( paw_dmft%use_dmft == 1 .and. dmft_dftocc == 1 ) then ! test of the code 1589 paw_dmft%occnd(1,iband,iband,ikpt,isppol)=occ(bdtot_index) 1590 end if 1591 locc_test = abs(paw_dmft%occnd(1,iband,iband,ikpt,isppol))+& 1592 & abs(paw_dmft%occnd(2,iband,iband,ikpt,isppol))>tol8 1593 end if 1594 end if 1595 1596 if (locc_test) then 1597 ! dmft 1598 if(paw_dmft%use_dmft==1.and.dtset%nbandkss==0) then 1599 ebandldatot=ebandldatot+dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index) 1600 if(paw_dmft%band_in(iband)) then 1601 ebandlda=ebandlda+dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index) 1602 ekinlda=ekinlda+dtset%wtk(ikpt)*occ(bdtot_index)*eknk(bdtot_index) 1603 occ(bdtot_index)=paw_dmft%occnd(1,iband,iband,ikpt,isppol) 1604 ebanddmft=ebanddmft+dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index) 1605 ekindmft=ekindmft+dtset%wtk(ikpt)*occ(bdtot_index)*eknk(bdtot_index) 1606 end if 1607 end if 1608 1609 energies%e_eigenvalues = energies%e_eigenvalues + & 1610 & dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index) 1611 energies%e_kinetic = energies%e_kinetic + & 1612 & dtset%wtk(ikpt)*occ(bdtot_index)*eknk(bdtot_index) 1613 energies%e_nlpsp_vfock = energies%e_nlpsp_vfock + & 1614 & dtset%wtk(ikpt)*occ(bdtot_index)*enlxnk(bdtot_index) 1615 if (usefock) then 1616 energies%e_fock=energies%e_fock + half*focknk(bdtot_index)*occ(bdtot_index)*dtset%wtk(ikpt) 1617 if (optforces>0) fock%fock_common%forces(:,:)=fock%fock_common%forces(:,:)+& 1618 & dtset%wtk(ikpt)*occ(bdtot_index)*fockfornk(:,:,bdtot_index) 1619 end if 1620 if (optforces>0) grnl(:)=grnl(:)+dtset%wtk(ikpt)*occ(bdtot_index)*grnlnk(:,bdtot_index) 1621 end if 1622 bdtot_index=bdtot_index+1 1623 if(paw_dmft%use_dmft==1.and.dtset%nbandkss==0) then 1624 do iband1=1,nband_k 1625 if(paw_dmft%band_in(iband).and.paw_dmft%band_in(iband1)) then 1626 ! write(std_out,*) "II+", isppol,ikpt,iband,iband1 1627 ekindmft2=ekindmft2 + dtset%wtk(ikpt)*paw_dmft%occnd(1,iband,iband1,ikpt,isppol)*& 1628 & eknk_nd(isppol,ikpt,1,iband,iband1) 1629 ekindmft2=ekindmft2 - dtset%wtk(ikpt)*paw_dmft%occnd(2,iband,iband1,ikpt,isppol)*& 1630 & eknk_nd(isppol,ikpt,2,iband,iband1) 1631 ! write(std_out,*) "II", occnd(1,iband,iband1,ikpt,isppol),eknk_nd(isppol,ikpt,iband,iband1) 1632 end if 1633 end do 1634 end if 1635 end do 1636 end do 1637 end do 1638 1639 if(paw_dmft%use_dmft==1) then 1640 energies%e_kinetic = energies%e_kinetic -ekindmft+ekindmft2 1641 if(abs(dtset%pawprtvol)>=2) then 1642 write(msg,'(4a,7(2x,a,2x,e14.7,a),a)') & 1643 "-----------------------------------------------",ch10,& 1644 "--- Energy for DMFT and tests (in Ha) ",ch10,& 1645 "--- Ebandldatot (Ha.) = ",ebandldatot,ch10,& 1646 "--- Ebandlda (Ha.) = ",ebandlda,ch10,& 1647 "--- Ebanddmft (Ha.) = ",ebanddmft,ch10,& 1648 "--- Ekinlda (Ha.) = ",ekinlda,ch10, & 1649 "--- Ekindmftdiag (Ha.) = ",ekindmft,ch10,& 1650 "--- Ekindmftnondiag(Ha.) = ",ekindmft2,ch10,& 1651 "--- Edmft= (Ha.) = ",edmft,ch10,& 1652 "-----------------------------------------------" 1653 call wrtout(std_out,msg) 1654 end if 1655 ! if(paw_dmft%use_dmft==1.and.mpi_enreg%paral_kgb==1) paw_dmft%use_dmft=0 1656 end if 1657 1658 ABI_NVTX_START_RANGE(NVTX_MKRHO) 1659 if (psps%usepaw==0) then 1660 call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,& 1661 & rhog,rhor,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,& 1662 & extfpmd=extfpmd) 1663 else 1664 call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,& 1665 & rhowfg,rhowfr,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,& 1666 & extfpmd=extfpmd) 1667 end if 1668 ABI_NVTX_END_RANGE() 1669 call timab(992,2,tsec) 1670 1671 ! Treat fixed occupation numbers or non-self-consistent case 1672 else 1673 1674 if (mpi_enreg%nproc_spkpt>1) then 1675 1676 call timab(989,1,tsec) 1677 1678 nbuf=2*mbdkpsp+dtset%nfft*dtset%nspden+4+3*natom*optforces 1679 ! If Hartree-Fock calculation, the exact exchange energy is k-dependent. 1680 if(dtset%usefock==1) then 1681 nbuf=nbuf+1 1682 if (optforces>0) nbuf=nbuf+3*natom 1683 end if 1684 if(iscf==-1 .or. iscf==-2)nbuf=2*mbdkpsp 1685 ABI_MALLOC(buffer1,(nbuf)) 1686 ! Pack eigen,resid,rho[wf]r,grnl,enl,ek 1687 buffer1(1:mbdkpsp)=eigen(:) 1688 buffer1(1+mbdkpsp:2*mbdkpsp)=resid(:) 1689 index1=2*mbdkpsp 1690 if(iscf/=-1 .and. iscf/=-2)then 1691 if (psps%usepaw==0) then 1692 buffer1(index1+1:index1+dtset%nfft*dtset%nspden)=reshape(rhor,(/dtset%nfft*dtset%nspden/)) 1693 else 1694 buffer1(index1+1:index1+dtset%nfft*dtset%nspden)=reshape(rhowfr,(/dtset%nfft*dtset%nspden/)) 1695 end if 1696 index1=index1+dtset%nfft*dtset%nspden 1697 buffer1(index1+1) = energies%e_kinetic 1698 buffer1(index1+2) = energies%e_eigenvalues 1699 buffer1(index1+3) = energies%e_nlpsp_vfock 1700 buffer1(index1+4) = energies%e_nucdip 1701 index1=index1+4 1702 ! If Hartree-Fock calculation, save e_fock in buffer1 1703 if (dtset%usefock==1) then 1704 buffer1(index1+1) = energies%e_fock 1705 index1=index1+1 1706 if (optforces>0)then 1707 buffer1(index1+1:index1+3*natom)=reshape(fock%fock_common%forces,(/3*natom/)) 1708 index1=index1+3*natom 1709 end if 1710 end if 1711 if (optforces>0) buffer1(index1+1:index1+3*natom)=grnl(1:3*natom) 1712 end if 1713 1714 ! Build sum of everything 1715 call timab(48,1,tsec) 1716 call xmpi_sum(buffer1,nbuf,mpi_enreg%comm_kpt ,ierr) 1717 call timab(48,2,tsec) 1718 1719 ! Unpack the final result 1720 eigen(:)=buffer1(1:mbdkpsp) 1721 resid(:)=buffer1(1+mbdkpsp:2*mbdkpsp) 1722 index1=2*mbdkpsp 1723 if(iscf/=-1 .and. iscf/=-2)then 1724 if (psps%usepaw==0) then 1725 ii=1 1726 do ispden=1,dtset%nspden 1727 do ifft=1,dtset%nfft 1728 rhor(ifft,ispden)=buffer1(index1+ii) 1729 ii=ii+1 1730 end do 1731 end do 1732 else 1733 ii=1 1734 do ispden=1,dtset%nspden 1735 do ifft=1,dtset%nfft 1736 rhowfr(ifft,ispden)=buffer1(index1+ii) 1737 ii=ii+1 1738 end do 1739 end do 1740 end if 1741 index1=index1+dtset%nfft*dtset%nspden 1742 energies%e_kinetic = buffer1(index1+1) 1743 energies%e_eigenvalues = buffer1(index1+2) 1744 energies%e_nlpsp_vfock = buffer1(index1+3) 1745 energies%e_nucdip = buffer1(index1+4) 1746 index1=index1+4 1747 ! If Hartree-Fock calculation, save e_fock in buffer1 1748 if (dtset%usefock==1) then 1749 energies%e_fock = buffer1(index1+1) 1750 index1=index1+1 1751 if (optforces>0) then 1752 fock%fock_common%forces(:,:)=reshape(buffer1(index1+1:index1+3*natom),(/3,natom/)) 1753 index1=index1+3*natom 1754 end if 1755 end if 1756 if (optforces>0) grnl(1:3*natom)=buffer1(index1+1:index1+3*natom) 1757 end if 1758 ABI_FREE(buffer1) 1759 call timab(989,2,tsec) 1760 1761 end if ! nproc_spkpt>1 1762 1763 ! Compute the highest occupied eigenenergy 1764 if(iscf/=-1 .and. iscf/=-2)then 1765 call timab(993,1,tsec) 1766 energies%e_fermie = -huge(one) 1767 if (dtset%occopt==9) then 1768 energies%e_fermih = -huge(one) 1769 end if 1770 bdtot_index=1 1771 do isppol=1,dtset%nsppol 1772 do ikpt=1,dtset%nkpt 1773 nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt) 1774 if (dtset%occopt/=9) then 1775 do iband=1,nband_k 1776 if(abs(occ(bdtot_index))>tol8 .and. eigen(bdtot_index)>energies%e_fermie+tol10) then 1777 energies%e_fermie=eigen(bdtot_index) 1778 end if 1779 bdtot_index=bdtot_index+1 1780 end do 1781 else 1782 ! In case occopt 9, computing the fermi level for the electrons remaining in the VB = fermi level of holes 1783 do iband=1,dtset%ivalence 1784 if(abs(occ(bdtot_index))>tol8 .and. eigen(bdtot_index)>energies%e_fermih+tol10) then 1785 energies%e_fermih=eigen(bdtot_index) 1786 end if 1787 bdtot_index=bdtot_index+1 1788 end do 1789 ! In case occopt 9, computing the fermi level for the electrons thermalized in the conduction bands 1790 do iband=dtset%ivalence+1,nband_k 1791 if(abs(occ(bdtot_index))>tol8 .and. eigen(bdtot_index)>energies%e_fermie+tol10) then 1792 energies%e_fermie=eigen(bdtot_index) 1793 end if 1794 bdtot_index=bdtot_index+1 1795 end do 1796 end if 1797 end do 1798 end do 1799 call xmpi_max(energies%e_fermie,spaceComm_distrb,ierr) 1800 if (dtset%occopt == 9) then 1801 call xmpi_max(energies%e_fermih,spaceComm_distrb,ierr) 1802 end if 1803 call timab(993,2,tsec) 1804 end if 1805 1806 ! If needed, compute rhog, and symmetrizes the density 1807 if (iscf > 0 .or. iscf==-3 ) then 1808 ! energies%e_fermie=zero ! Actually, should determine the maximum of the valence band XG20020802 1809 nfftot=dtset%ngfft(1)*dtset%ngfft(2)*dtset%ngfft(3) 1810 1811 call timab(994,1,tsec) 1812 if (psps%usepaw==0) then 1813 call symrhg(1,gprimd,irrzon,mpi_enreg,dtset%nfft,nfftot,dtset%ngfft,dtset%nspden,& 1814 & dtset%nsppol,dtset%nsym,phnons,rhog,rhor,rprimd,dtset%symafm,dtset%symrel,dtset%tnons) 1815 else 1816 call symrhg(1,gprimd,irrzon,mpi_enreg,dtset%nfft,nfftot,dtset%ngfft,dtset%nspden,& 1817 & dtset%nsppol,dtset%nsym,phnons,rhowfg,rhowfr,rprimd,dtset%symafm,dtset%symrel,dtset%tnons) 1818 end if 1819 call timab(994,2,tsec) 1820 ! We now have both rho(r) and rho(G), symmetrized, and if dtset%nsppol=2 1821 ! we also have the spin-up density, symmetrized, in rhor(:,2). 1822 end if 1823 1824 end if ! End of test on varying or fixed occupation numbers 1825 1826 call timab(994,1,tsec) 1827 1828 ! Compute the kinetic energy density 1829 if(dtset%usekden==1 .and. (iscf > 0 .or. iscf==-3 ) )then 1830 if (psps%usepaw==0) then 1831 call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,& 1832 & taug,taur,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,option=1) 1833 else 1834 call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,& 1835 & tauwfg,tauwfr,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,option=1) 1836 end if 1837 end if 1838 1839 ABI_FREE(eknk) 1840 if (usefock) then 1841 ABI_FREE(focknk) 1842 if (optforces>0)then 1843 ABI_FREE(fockfornk) 1844 end if 1845 end if 1846 ABI_FREE(eknk_nd) 1847 ABI_FREE(grnlnk) 1848 ABI_FREE(enlxnk) 1849 1850 ! In the non-self-consistent case, print eigenvalues and residuals 1851 if(iscf<=0 .and. me_distrb==0)then 1852 option=2 ; enunit=1 ; vxcavg_dum=zero 1853 call prteigrs(eigen,enunit,energies%e_fermie,energies%e_fermih,& 1854 & dtfil%fnameabo_app_eig,ab_out,iscf,dtset%kptns,dtset%kptopt,dtset%mband,& 1855 & dtset%nband,nbdbuf_eff,dtset%nkpt,nnsclo_now,dtset%nsppol,occ,dtset%occopt,option,& 1856 & dtset%prteig,prtvol,resid,dtset%tolwfr,vxcavg_dum,dtset%wtk) 1857 end if 1858 1859 ! Find largest residual over bands, k points, and spins, except for nbdbuf highest bands 1860 ibdkpt=1 1861 residm=zero 1862 do isppol=1,dtset%nsppol 1863 do ikpt=1,dtset%nkpt 1864 nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt) 1865 if (nbdbuf_eff>=0) then 1866 nband_eff=max(1,nband_k-nbdbuf_eff) 1867 residm=max(residm,maxval(resid(ibdkpt:ibdkpt+nband_eff-1))) 1868 else if (nbdbuf_eff==-101) then 1869 residm=max(residm,maxval(occ(ibdkpt:ibdkpt+nband_k-1)*resid(ibdkpt:ibdkpt+nband_k-1))) 1870 else 1871 ABI_ERROR('Bad value of nbdbuf_eff') 1872 end if 1873 ibdkpt=ibdkpt+nband_k 1874 end do 1875 end do 1876 1877 end if !usewvl==0 1878 1879 !=================================================================== 1880 ! End of PLANE WAVES section 1881 !=================================================================== 1882 1883 !In the self-consistent case, diagnose lack of unoccupied state (for each spin and k-point). 1884 1885 !Print a warning if the number of such messages already written does not exceed mwarning. 1886 ! MG: This is not a good idea as this is a typical mistake done by beginners and we should 1887 ! keep on spamming this warning message in the log file. 1888 !mwarning=5 1889 !if(nwarning<mwarning .and. iscf>=0)then 1890 !nwarning=nwarning+1 1891 1892 if(iscf>=0)then 1893 bdtot_index=1 1894 do isppol=1,dtset%nsppol 1895 do ikpt=1,dtset%nkpt 1896 min_occ=two 1897 nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt) 1898 do iband=1,nband_k 1899 if(occ(bdtot_index)<min_occ)min_occ=occ(bdtot_index) 1900 bdtot_index=bdtot_index+1 1901 end do 1902 if(min_occ>0.01_dp .and. .not. associated(extfpmd))then 1903 if(dtset%nsppol==1)then 1904 write(msg, '(a,i0,3a,f7.3,5a)' )& 1905 'For k-point number: ',ikpt,',',ch10,& 1906 'The minimal occupation factor is: ',min_occ,'.',ch10,& 1907 'An adequate monitoring of convergence requires it to be at most 0.01_dp.',ch10,& 1908 'Action: increase slightly the number of bands.' 1909 else 1910 write(msg, '(a,i0,3a,i0,a,f7.3,5a)' )& 1911 'For k-point number: ',ikpt,', and',ch10,& 1912 'for spin polarization: ',isppol, ' the minimal occupation factor is: ',min_occ,'.',ch10,& 1913 'An adequate monitoring of convergence requires it to be at most 0.01_dp.',ch10,& 1914 'Action: increase slightly the number of bands.' 1915 end if 1916 ABI_WARNING(msg) 1917 exit ! It is enough if one lack of adequate occupation is identified, so exit. 1918 end if 1919 end do 1920 end do 1921 end if 1922 1923 ABI_NVTX_START_RANGE(NVTX_VTORHO_EXTRA) 1924 if (iscf>0.or.iscf==-3 .or. (dtset%usewvl==1 .and. iscf==0)) then 1925 1926 ! PAW: Build new rhoij quantities from new occ then symetrize them 1927 ! Compute and add the compensation density to rhowfr to get the total density 1928 if (psps%usepaw==1) then 1929 call timab(555,1,tsec) 1930 if (paral_atom) then 1931 ABI_MALLOC(pawrhoij_unsym,(natom)) 1932 call pawrhoij_inquire_dim(cplex_rhoij=cplex_rhoij,nspden_rhoij=nspden_rhoij,& 1933 & nspden=dtset%nspden,spnorb=dtset%pawspnorb,cpxocc=dtset%pawcpxocc) 1934 call pawrhoij_alloc(pawrhoij_unsym,cplex_rhoij,nspden_rhoij,dtset%nspinor,& 1935 & dtset%nsppol,dtset%typat,pawtab=pawtab,use_rhoijp=0) 1936 else 1937 pawrhoij_unsym => pawrhoij 1938 end if 1939 if (usecprj_local==1) then 1940 call pawmkrhoij(atindx,atindx1,cprj,gs_hamk%dimcprj,dtset%istwfk,dtset%kptopt,dtset%mband,mband_cprj,& 1941 & mcprj_local,dtset%mkmem,mpi_enreg,natom,dtset%nband,dtset%nkpt,dtset%nspden,dtset%nspinor,& 1942 & dtset%nsppol,occ,dtset%paral_kgb,paw_dmft,pawrhoij_unsym,dtfil%unpaw,dtset%usewvl,dtset%wtk) 1943 else 1944 mcprj_tmp=my_nspinor*mband_cprj*dtset%mkmem*dtset%nsppol 1945 ABI_MALLOC(cprj_tmp,(natom,mcprj_tmp)) 1946 call pawcprj_alloc(cprj_tmp,0,gs_hamk%dimcprj) 1947 call ctocprj(atindx,cg,1,cprj_tmp,gmet,gprimd,0,0,0,dtset%istwfk,kg,dtset%kptns,& 1948 & mcg,mcprj_tmp,dtset%mgfft,dtset%mkmem,mpi_enreg,psps%mpsang,dtset%mpw,& 1949 & dtset%natom,nattyp,dtset%nband,dtset%natom,dtset%ngfft,dtset%nkpt,dtset%nloalg,& 1950 & npwarr,dtset%nspinor,dtset%nsppol,dtset%nsppol,ntypat,dtset%paral_kgb,ph1d,psps,rmet,dtset%typat,& 1951 & ucvol,dtfil%unpaw,xred,ylm,ylmgr_dum) 1952 call pawmkrhoij(atindx,atindx1,cprj_tmp,gs_hamk%dimcprj,dtset%istwfk,dtset%kptopt,& 1953 & dtset%mband,mband_cprj,mcprj_tmp,dtset%mkmem,mpi_enreg,natom,dtset%nband,dtset%nkpt,& 1954 & dtset%nspden,dtset%nspinor,dtset%nsppol,occ,dtset%paral_kgb,paw_dmft,pawrhoij_unsym,& 1955 & dtfil%unpaw,dtset%usewvl,dtset%wtk) 1956 call pawcprj_free(cprj_tmp) 1957 ABI_FREE(cprj_tmp) 1958 end if 1959 call timab(555,2,tsec) 1960 ! Build symetrized packed rhoij and compensated pseudo density 1961 cplex=1;ipert=0;idir=0;qpt(:)=zero 1962 if(dtset%usewvl==0) then 1963 call pawmkrho(1,compch_fft,cplex,gprimd,idir,indsym,ipert,mpi_enreg,& 1964 & my_natom,natom,dtset%nspden,dtset%nsym,ntypat,dtset%paral_kgb,pawang,pawfgr,pawfgrtab,& 1965 & dtset%pawprtvol,pawrhoij,pawrhoij_unsym,pawtab,qpt,rhowfg,rhowfr,rhor,rprimd,dtset%symafm,& 1966 & symrec,dtset%typat,ucvol,dtset%usewvl,xred,pawnhat=nhat,rhog=rhog) 1967 if (dtset%usekden==1) then 1968 ! DO WE NEED TAUG? 1969 call transgrid(1,mpi_enreg,dtset%nspden,+1,1,1,dtset%paral_kgb,pawfgr,tauwfg,taug,tauwfr,taur) 1970 end if 1971 else 1972 ! here do not pass rhog, we do not use it 1973 call pawmkrho(1,compch_fft,cplex,gprimd,idir,indsym,ipert,mpi_enreg,& 1974 & my_natom,natom,dtset%nspden,dtset%nsym,ntypat,dtset%paral_kgb,pawang,pawfgr,pawfgrtab,& 1975 & dtset%pawprtvol,pawrhoij,pawrhoij_unsym,pawtab,qpt,rhowfg,rhowfr,rhor,rprimd,dtset%symafm,& 1976 & symrec,dtset%typat,ucvol,dtset%usewvl,xred,pawnhat=nhat) 1977 ! In WVL: copy density to BigDFT object: 1978 call wvl_rho_abi2big(1,rhor,wvl%den) 1979 end if 1980 if (paral_atom) then 1981 call pawrhoij_free(pawrhoij_unsym) 1982 ABI_FREE(pawrhoij_unsym) 1983 end if 1984 end if ! psps%usepaw==1 1985 1986 if(paw_dmft%use_dmft==1) then 1987 ! == check noccmmp 1988 call setnoccmmp(1,0,rdum4,0,0,idum3,my_natom,natom,0,1,dtset%nsppol,0,ntypat,& 1989 & paw_ij,pawang,dtset%pawprtvol,pawrhoij,pawtab,rdum2,idum1,dtset%typat,0,dtset%usepawu,& 1990 & comm_atom=mpi_enreg%comm_atom,mpi_atmtab=mpi_enreg%my_atmtab) 1991 end if 1992 1993 ! Find and print minimum and maximum total electron density and locations 1994 ! Compute density residual (if required) and its squared norm 1995 if (iscf>=0) then 1996 if (psps%usepaw==0) then 1997 call prtrhomxmn(std_out,mpi_enreg,dtset%nfft,dtset%ngfft,dtset%nspden,1,rhor,ucvol=ucvol) 1998 else 1999 call prtrhomxmn(std_out,mpi_enreg,nfftf,pawfgr%ngfft,dtset%nspden,1,rhor,ucvol=ucvol) 2000 end if 2001 if (optres==1) then 2002 nvresid=rhor-nvresid 2003 call sqnorm_v(1,nfftf,nres2,dtset%nspden,optres,nvresid,mpi_comm_sphgrid=mpi_comm_sphgrid) 2004 if (dtset%usekden==1) then 2005 if (optres==1) tauresid=taur-tauresid 2006 endif 2007 end if 2008 end if 2009 2010 end if ! iscf>0 or iscf=-3 2011 ABI_NVTX_END_RANGE() 2012 2013 if(psps%usepaw==1.and.(iscf>=0.or.iscf==-3)) then 2014 ABI_FREE(rhowfr) 2015 ABI_FREE(rhowfg) 2016 if (dtset%usekden==1) then 2017 ABI_FREE(tauwfr) 2018 ABI_FREE(tauwfg) 2019 end if 2020 end if 2021 2022 call timab(994,2,tsec) 2023 2024 if (iscf==-1) then 2025 ! Eventually compute the excited states within tddft 2026 call timab(995,1,tsec) 2027 if (psps%usepaw==1) then 2028 ! In case of PAW calculation, have to transfer kxc from the fine to the coarse grid: 2029 ABI_MALLOC(cgrkxc,(dtset%nfft,nkxc)) 2030 do ikxc=1,nkxc 2031 call transgrid(1,mpi_enreg,1,-1,0,0,dtset%paral_kgb,pawfgr,rhodum,rhodum,cgrkxc(:,ikxc),kxc(:,ikxc)) 2032 end do 2033 call tddft(cg,dtfil,dtset,eigen,etotal,gmet,gprimd,gsqcut,& 2034 & kg,cgrkxc,dtset%mband,mgfftdiel,dtset%mkmem,mpi_enreg,dtset%mpw,dtset%nfft,& 2035 & ngfftdiel,dtset%nkpt,nkxc,npwarr,dtset%nspinor,dtset%nsppol,occ,ucvol,wffnew) 2036 ABI_FREE(cgrkxc) 2037 else 2038 call tddft(cg,dtfil,dtset,eigen,etotal,gmet,gprimd,gsqcut,& 2039 & kg,kxc,dtset%mband,mgfftdiel,dtset%mkmem,mpi_enreg,dtset%mpw,dtset%nfft,& 2040 & ngfftdiel,dtset%nkpt,nkxc,npwarr,dtset%nspinor,dtset%nsppol,occ,ucvol,wffnew) 2041 end if 2042 call timab(995,2,tsec) 2043 2044 else 2045 ! Eventually compute the susceptibility matrix and the 2046 ! dielectric matrix when istep_mix is equal to 1 or dielstrt 2047 call timab(996,1,tsec) 2048 computesusmat = dtset%testsusmat(dielop, dielstrt, istep_mix) !test if the matrix is to be computed 2049 if(computesusmat) then 2050 dielar(1)=dtset%diecut;dielar(2)=dtset%dielng 2051 dielar(3)=dtset%diemac;dielar(4)=dtset%diemix 2052 dielar(5)=dtset%diegap;dielar(6)=dtset%dielam 2053 dielar(7)=dtset%diemix;if (iscf>=10) dielar(7)=dtset%diemixmag 2054 usetimerev=1 2055 if (psps%usepaw==1.and.dtset%pawspnorb>0.and.dtset%kptopt/=1.and.dtset%kptopt/=2) usetimerev=0 2056 neglect_pawhat=1-dtset%pawsushat 2057 call suscep_stat(atindx,atindx1,cg,cprj,dielar,& 2058 & gs_hamk%dimcprj,doccde,eigen,gbound_diel,gprimd,& 2059 & irrzondiel,dtset%istwfk,kg,kg_diel,lmax_diel,& 2060 & dtset%mband,mcg,mcprj_local,mgfftdiel,dtset%mkmem,mpi_enreg,dtset%mpw,natom,dtset%nband,& 2061 & neglect_pawhat,nfftdiel,ngfftdiel,& 2062 & dtset%nkpt,npwarr,npwdiel,dtset%nspden,dtset%nspinor,dtset%nsppol,dtset%nsym,ntypat,& 2063 & occ,dtset%occopt,pawang,pawtab,phnonsdiel,ph1ddiel,rprimd,& 2064 & susmat,dtset%symafm,dtset%symrel,dtset%tnons,dtset%typat,ucvol,& 2065 & dtfil%unpaw,usecprj_local,psps%usepaw,usetimerev,dtset%wtk,ylmdiel) 2066 end if 2067 call timab(996,2,tsec) 2068 2069 end if ! end condition on iscf 2070 2071 call gs_hamk%free() 2072 2073 if (psps%usepaw==1) then 2074 if (usecprj==0) then 2075 call pawcprj_free(cprj_local) 2076 ABI_FREE(cprj_local) 2077 end if 2078 end if 2079 2080 if(dtset%usewvl==0) then 2081 ABI_FREE(EigMin) 2082 ABI_FREE(doccde) 2083 #if defined HAVE_GPU_CUDA 2084 if(dtset%gpu_option==ABI_GPU_LEGACY .or. dtset%gpu_option==ABI_GPU_KOKKOS) call gpu_finalize_ham_data() 2085 #endif 2086 end if 2087 2088 call timab(980,2,tsec) 2089 2090 DBG_EXIT("COLL") 2091 2092 contains
ABINIT/wvl_comm_eigen [ Functions ]
NAME
wvl_comm_eigen
FUNCTION
Computes occupations for the wavelet case Using BigDFT routines
INPUTS
OUTPUT
SIDE EFFECTS
NOTES
for the wvlbigdft case, see the routine 'wvl_occ_bigdft'
SOURCE
2435 subroutine wvl_comm_eigen() 2436 2437 !Arguments ------------------------------------ 2438 2439 !Local variables------------------------------- 2440 #if defined HAVE_BIGDFT 2441 integer:: ikpt,norb,shift 2442 #endif 2443 2444 ! ************************************************************************* 2445 2446 DBG_ENTER("COLL") 2447 2448 #if defined HAVE_BIGDFT 2449 if(wvlbigdft) then 2450 ! Communicates eigenvalues to all procs. 2451 ! This will print out the eigenvalues and Fermi level. 2452 call eigensystem_info(mpi_enreg%me_wvl, mpi_enreg%nproc_wvl,0.d0,& 2453 & wvl%wfs%ks%Lzd%Glr%wfd%nvctr_c+7*wvl%wfs%ks%Lzd%Glr%wfd%nvctr_f,& 2454 & wvl%wfs%ks%orbs,wvl%wfs%ks%psi) 2455 else 2456 ! Send all eigenvalues to all procs. 2457 ! I simply communicate eigenvalues: I do not print them into screen, nor calculate Fermi-level. 2458 norb=wvl%wfs%ks%orbs%norb 2459 if (mpi_enreg%nproc_wvl > 1) then 2460 shift=1 2461 do ikpt = 1, wvl%wfs%ks%orbs%nkpts 2462 call xmpi_bcast(wvl%wfs%ks%orbs%eval(shift:shift+norb-1),wvl%wfs%ks%orbs%ikptproc(ikpt),mpi_enreg%comm_wvl,ierr) 2463 shift=shift+norb 2464 end do 2465 end if 2466 end if 2467 2468 !Copy eigenvalues from BigDFT object to "eigen" 2469 call wvl_eigen_abi2big(dtset%mband,dtset%nkpt,dtset%nsppol,eigen,2,wvl%wfs) 2470 2471 #else 2472 BIGDFT_NOTENABLED_ERROR() 2473 #endif 2474 2475 DBG_EXIT("COLL") 2476 2477 end subroutine wvl_comm_eigen 2478 2479 end subroutine vtorho
ABINIT/wvl_nscf_loop [ Functions ]
NAME
wvl_nscf_loop
FUNCTION
Non-self-consistent field cycle in Wavelets See also "wvl_nscf_loop_bigdft"
INPUTS
nnsclo= number of non-self consistent field iterations
OUTPUT
SOURCE
2110 subroutine wvl_nscf_loop() 2111 2112 !Arguments ------------------------------------ 2113 ! integer, intent(in) :: istep,mcprj,nfft,nnsclo 2114 ! real(dp), intent(inout) :: residm 2115 ! type(dataset_type), intent(in) :: dtset 2116 ! type(MPI_type), intent(in) :: mpi_enreg 2117 ! type(energies_type), intent(inout) :: energies 2118 ! type(wvl_data), intent(inout) :: wvl 2119 ! !arrays 2120 ! real(dp), intent(inout) :: xcart(3, dtset%natom) 2121 ! real(dp), dimension(6), intent(out) :: strsxc 2122 ! type(pawcprj_type),dimension(dtset%natom,mcprj),intent(out)::cprj 2123 2124 !Local variables------------------------------- 2125 integer :: inonsc,ii 2126 integer,parameter :: iscf_=-1 !do not do a SCF cycle 2127 logical,parameter :: do_scf=.false. !do not do a SCF cycle 2128 logical,parameter :: wvlbigdft=.false. 2129 real(dp) :: dum,eexctx,eh,ekin,eloc,enl,esicdc,evxc,exc 2130 2131 ! ************************************************************************* 2132 2133 DBG_ENTER("COLL") 2134 2135 if(nnsclo_now>0) then 2136 do inonsc=1,nnsclo_now 2137 call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc,& 2138 & istep,inonsc,iscf_,mpi_enreg%me_wvl,dtset%natom,& 2139 & nfftf,mpi_enreg%nproc_wvl,dtset%nspden,& 2140 & dum,do_scf,evxc,wvl,wvlbigdft,xcart,strsxc) 2141 call wvl_hpsitopsi(cprj,dtset,energies,inonsc,mcprj_local,mpi_enreg, & 2142 & residm,wvl,xcart) 2143 if(residm<dtset%tolwfr) exit !Exit loop if converged 2144 end do 2145 2146 else 2147 do ii=1, dtset%nline 2148 ! Direct minimization technique: no diagonalization 2149 call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc,& 2150 & istep,ii,iscf_,mpi_enreg%me_wvl,dtset%natom,& 2151 & nfftf,mpi_enreg%nproc_wvl,dtset%nspden,& 2152 & dum,do_scf,evxc,wvl,wvlbigdft,xcart,strsxc) 2153 call wvl_hpsitopsi(cprj,dtset,energies,ii,mcprj_local,mpi_enreg, & 2154 & residm,wvl,xcart) 2155 if(residm<dtset%tolwfr) exit !Exit loop if converged 2156 end do 2157 end if 2158 2159 ! Update energies depending on new WF 2160 energies%e_kinetic=ekin 2161 energies%e_nlpsp_vfock=enl 2162 energies%e_exactX=eexctx 2163 energies%e_sicdc=esicdc 2164 2165 ! Eventually update energies depending on density 2166 if (dtset%iscf<10) then 2167 energies%e_localpsp=eloc 2168 energies%e_hartree=eh 2169 energies%e_xc=exc ; energies%e_xcdc=evxc 2170 else if (nnsclo_now==0) then 2171 energies%e_localpsp=eloc 2172 end if 2173 2174 ! End of nscf iterations 2175 if (do_last_ortho) then 2176 ! !Don't update energies (nscf cycle has been done); just recompute potential 2177 inonsc=nnsclo_now;if (nnsclo_now==0) inonsc=dtset%nline 2178 call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc,& 2179 & istep,inonsc,iscf_,mpi_enreg%me_wvl,dtset%natom,& 2180 & nfftf,mpi_enreg%nproc_wvl,dtset%nspden,& 2181 & dum,do_scf,evxc,wvl,wvlbigdft,xcart,strsxc) 2182 end if 2183 2184 DBG_EXIT("COLL") 2185 2186 end subroutine wvl_nscf_loop
ABINIT/wvl_nscf_loop_bigdft [ Functions ]
NAME
wvl_nscf_loop_bigdft
FUNCTION
Non-self-consistent field cycle in Wavelets It follows the BigDFT scheme. See also "wvl_nscf_loop"
INPUTS
nnsclo= number of non-self consistent field iterations
OUTPUT
argout(sizeout)=description
SOURCE
2206 subroutine wvl_nscf_loop_bigdft() 2207 2208 !Arguments ------------------------------------ 2209 ! integer, intent(in) :: istep,mcprj,nfft,nnsclo 2210 ! real(dp), intent(inout) :: residm 2211 ! real(dp), intent(out) :: nres2 2212 ! type(dataset_type), intent(in) :: dtset 2213 ! type(MPI_type), intent(in) :: mpi_enreg 2214 ! type(energies_type), intent(inout) :: energies 2215 ! type(wvl_data), intent(inout) :: wvl 2216 !arrays 2217 ! real(dp), intent(inout) :: xcart(3, dtset%natom) 2218 ! real(dp), dimension(6), intent(out) :: strsxc 2219 ! type(pawcprj_type),dimension(dtset%natom,mcprj),intent(out)::cprj 2220 2221 !Local variables------------------------------- 2222 integer :: inonsc 2223 integer,parameter :: iscf_=-1 !do not do a SCF cycle 2224 logical,parameter :: do_scf=.false. !do not do a SCF cycle 2225 logical,parameter :: wvlbigdft=.true. 2226 real(dp) :: eexctx,eh,ekin,eloc,enl,esicdc,evxc,exc 2227 2228 ! ************************************************************************* 2229 2230 DBG_ENTER("COLL") 2231 2232 call wvl_hpsitopsi(cprj,dtset, energies, istep, mcprj_local,mpi_enreg, & 2233 & residm, wvl,xcart) 2234 2235 if (nnsclo_now>2) then 2236 do inonsc = 2, nnsclo_now-1 2237 call wvl_psitohpsi(dtset%diemix, energies%e_exactX, energies%e_xc, & 2238 & energies%e_hartree, energies%e_kinetic, energies%e_localpsp, & 2239 & energies%e_nlpsp_vfock, energies%e_sicdc, istep, inonsc, iscf_, & 2240 & mpi_enreg%me_wvl, dtset%natom, nfftf, mpi_enreg%nproc_wvl,& 2241 & dtset%nspden, nres2, do_scf,energies%e_xcdc, & 2242 & wvl, wvlbigdft, xcart, strsxc) 2243 call wvl_hpsitopsi(cprj,dtset, energies, inonsc, mcprj_local,mpi_enreg, & 2244 & residm, wvl,xcart) 2245 end do 2246 end if 2247 2248 ! End of nscf iterations 2249 if (do_last_ortho.and.nnsclo_now<=1) then 2250 ! !Don't update energies (nscf cycle has been done); just recompute potential 2251 call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc, & 2252 & istep, 1, iscf_, mpi_enreg%me_wvl, dtset%natom, nfftf, & 2253 & mpi_enreg%nproc_wvl,dtset%nspden, nres2, do_scf,evxc, & 2254 & wvl, wvlbigdft, xcart, strsxc) 2255 else if (do_last_ortho.and.nnsclo_now>1) then 2256 ! !Update energies and potential (nscf cycles are not finished) 2257 call wvl_psitohpsi(dtset%diemix, energies%e_exactX, energies%e_xc, & 2258 & energies%e_hartree,energies%e_kinetic, energies%e_localpsp, & 2259 & energies%e_nlpsp_vfock, energies%e_sicdc, istep, nnsclo_now, iscf_, & 2260 & mpi_enreg%me_wvl, dtset%natom, nfftf, mpi_enreg%nproc_wvl,& 2261 & dtset%nspden, nres2, do_scf,energies%e_xcdc, & 2262 & wvl, wvlbigdft, xcart, strsxc) 2263 end if 2264 2265 DBG_EXIT("COLL") 2266 2267 end subroutine wvl_nscf_loop_bigdft
ABINIT/wvl_occ [ Functions ]
NAME
wvl_occ
FUNCTION
Computes occupations for the wavelet case
INPUTS
OUTPUT
NOTES
for the wvlbigdft case, see the routine 'wvl_occ_bigdft'
SOURCE
2343 subroutine wvl_occ() 2344 2345 !Local variables------------------------------- 2346 real(dp):: doccde_(dtset%mband*dtset%nkpt*dtset%nsppol) 2347 ! ************************************************************************* 2348 2349 DBG_ENTER("COLL") 2350 2351 ! Compute the new occupation numbers from eigen 2352 call newocc(doccde_,eigen,energies%entropy,energies%e_fermie,energies%e_fermih,dtset%ivalence,dtset%spinmagntarget,& 2353 & dtset%mband,dtset%nband,dtset%nelect,dtset%ne_qFD,dtset%nh_qFD,dtset%nkpt,dtset%nspinor,& 2354 & dtset%nsppol,occ,dtset%occopt,prtvol,dtset%tphysel,dtset%tsmear,dtset%wtk,& 2355 & prtstm=dtset%prtstm,stmbias=dtset%stmbias) 2356 2357 ! Copy occupations and efermi to BigDFT variables 2358 call wvl_occ_abi2big(dtset%mband,dtset%nkpt,dtset%nsppol,occ,1,wvl%wfs) 2359 2360 #if defined HAVE_BIGDFT 2361 ! Copy Fermi level to BigDFT variable: 2362 wvl%wfs%ks%orbs%efermi=energies%e_fermie 2363 #endif 2364 2365 DBG_EXIT("COLL") 2366 2367 end subroutine wvl_occ
ABINIT/wvl_occ_bigdft [ Functions ]
NAME
wvl_occ_bigdft
FUNCTION
Computes occupations for the wavelet case Using BigDFT routines
INPUTS
OUTPUT
SIDE EFFECTS
NOTES
for the wvlbigdft case, see the routine 'wvl_occ_bigdft'
SOURCE
2389 subroutine wvl_occ_bigdft() 2390 2391 ! ************************************************************************* 2392 2393 DBG_ENTER("COLL") 2394 2395 ! Transfer occopt from ABINIT to BigDFT 2396 #if defined HAVE_BIGDFT 2397 occopt_bigdft=dtset%occopt 2398 call wvl_occopt_abi2big(occopt_bigdft,occopt_bigdft,1) 2399 2400 !Calculate occupations using BigDFT routine 2401 call evaltoocc(mpi_enreg%me_wvl, mpi_enreg%nproc_wvl, .false., & 2402 & dtset%tsmear, wvl%wfs%ks%orbs, occopt_bigdft) 2403 2404 !Pass e_fermi from BigDFT object to ABINIT variable: 2405 energies%e_fermie = wvl%wfs%ks%orbs%efermi 2406 2407 !Copy occupations from BigDFT to ABINIT variables 2408 call wvl_occ_abi2big(dtset%mband,dtset%nkpt,dtset%nsppol,occ,2,wvl%wfs) 2409 #endif 2410 2411 DBG_EXIT("COLL") 2412 2413 end subroutine wvl_occ_bigdft