TABLE OF CONTENTS


ABINIT/cgq_builder [ Functions ]

[ Top ] [ Functions ]

NAME

 cgq_builder

FUNCTION

 This routine locates cgq for efield calculations, especially for parallel case

INPUTS

  berryflag = logical flag determining use of electric field variables
  cg(2,mcg)=planewave coefficients of wavefunctions.
  dtset <type(dataset_type)>=all input variables for this dataset
  ikpt=index of current k kpt
  ikpt_loc=index of k point on current processor (see vtorho.F90)
  isspol=value of spin polarization currently treated
  me_distrb=current value from spaceComm_distrb (see vtorho.F90)
  mcg=size of wave-functions array (cg) =mpw*nspinor*mband*mkmem*nsppol
  mcgq=size of cgq array (see vtorho.F90)
  mkgq=size of pwnsfacq array (see vtorho.F90)
  my_nspinor=nspinor value determined by current // set up
  nband_k=number of bands at each k point
  nproc_distrb=nproc from spaceComm_distrb (see vtorho.F90)
  npwarr(nkpt)=number of planewaves in basis at this k point
  pwnsfac(2,pwind_alloc) = phase factors for non-symmorphic translations
                           (see initberry.f)
  pwind_alloc = first dimension of pwind
  spaceComm_distrb=comm_cell from mpi_enreg

OUTPUT

  cgq(2,mcgq)=planewave coefficients of wavenfunctions adjacent to cg at ikpt
  pwnsfacq(2,mkgq)=phase factors for non-symmorphic translations for cg's adjacent to cg(ikpt)

SIDE EFFECTS

 Input/Output
   dtefield <type(efield_type)> = efield variables
   mpi_enreg=information about MPI parallelization

TODO

NOTES

SOURCE

2523 subroutine cgq_builder(berryflag,cg,cgq,dtefield,dtset,ikpt,ikpt_loc,isppol,mcg,mcgq,&
2524 &                      me_distrb,mkgq,mpi_enreg,my_nspinor,nband_k,nproc_distrb,&
2525 &                      npwarr,pwnsfac,pwnsfacq,pwind_alloc,spaceComm_distrb)
2526 
2527 !Arguments ------------------------------------
2528  integer,intent(in) :: ikpt,ikpt_loc,isppol,me_distrb,mcg,mcgq,mkgq,my_nspinor,nband_k
2529  integer,intent(in) :: nproc_distrb,pwind_alloc,spaceComm_distrb
2530  logical,intent(in) :: berryflag
2531  type(dataset_type), intent(in) :: dtset
2532  type(efield_type), intent(inout) :: dtefield
2533  type(MPI_type), intent(in) :: mpi_enreg
2534 !arrays
2535  integer,intent(in) :: npwarr(dtset%nkpt)
2536  real(dp),intent(in) :: cg(2,mcg),pwnsfac(2,pwind_alloc)
2537  real(dp),intent(out) :: cgq(2,mcgq),pwnsfacq(2,mkgq)
2538 
2539 !Local variables -------------------------
2540 !scalars
2541  integer :: count,count1,icg1,icg2,dest,his_source
2542  integer :: idir,ierr,ifor,ikg1,ikg2,ikptf,ikpt1f,ikpt1i
2543  integer :: jkpt,jkpt1i,jkptf,jkpt1f,jsppol,my_source,npw_k1,tag
2544 !arrays
2545  integer,allocatable :: flag_send(:,:), flag_receive(:)
2546  real(dp) :: tsec(2)
2547  real(dp),allocatable :: buffer(:,:)
2548 
2549 ! *************************************************************************
2550 
2551  if (mcgq==0.or.mkgq==0) return
2552 
2553  call timab(983,1,tsec)
2554 
2555 !Test compatbility of berryflag
2556  if (berryflag) then
2557    ABI_MALLOC(flag_send,(0:nproc_distrb-1,dtefield%fnkpt))
2558  end if
2559  ABI_MALLOC(flag_receive,(dtset%nkpt))
2560  flag_send(:,:) = 0
2561  flag_receive(:) = 0
2562 
2563  if (berryflag) ikptf = dtefield%i2fbz(ikpt)
2564 
2565  do idir = 1, 3
2566 
2567 !  skip idir values for which efield_dot(idir) = 0
2568    if (berryflag .and. abs(dtefield%efield_dot(idir)) < tol12 ) cycle
2569 
2570    do ifor = 1, 2
2571 
2572      if(berryflag) then
2573        dtefield%sflag(:,ikpt + dtset%nkpt*(isppol - 1),ifor,idir) = 0
2574        ikpt1f = dtefield%ikpt_dk(ikptf,ifor,idir)
2575        ikpt1i = dtefield%indkk_f2ibz(ikpt1f,1)
2576      end if
2577 
2578      npw_k1 = npwarr(ikpt1i)
2579      count = npw_k1*my_nspinor*nband_k
2580      my_source = mpi_enreg%proc_distrb(ikpt1i,1,isppol)
2581 
2582      do dest = 0, nproc_distrb-1
2583 
2584        if ((dest==me_distrb).and.(ikpt_loc <= dtset%mkmem)) then
2585 !        I am dest and have something to do
2586 
2587          if ( my_source == me_distrb ) then
2588 !          I am destination and source
2589 
2590            if(berryflag) then
2591              ikg1 = dtefield%fkgindex(ikpt1f)
2592              ikg2 = dtefield%cgqindex(3,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt)
2593              icg1 = dtefield%cgindex(ikpt1i,isppol)
2594              icg2 = dtefield%cgqindex(2,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt)
2595            end if
2596 
2597            pwnsfacq(:,ikg2 + 1:ikg2 + npw_k1) = pwnsfac(:,ikg1 + 1:ikg1 + npw_k1)
2598            cgq(:,icg2 + 1:icg2 + count) = cg(:,icg1 + 1:icg1 + count)
2599 
2600          else !  I am the destination but not the source -> receive
2601 !          receive pwnsfacq
2602            if(berryflag) then
2603              tag = ikpt1f + (isppol - 1)*dtefield%fnkpt
2604              ikg1 = dtefield%cgqindex(3,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt)
2605            end if
2606            ABI_MALLOC(buffer,(2,npw_k1))
2607            call xmpi_recv(buffer,my_source,tag,spaceComm_distrb,ierr)
2608            pwnsfacq(:,ikg1+1:ikg1+npw_k1) = buffer(:,1:npw_k1)
2609            ABI_FREE(buffer)
2610 
2611 !          receive cgq if necessary
2612            if(flag_receive(ikpt1i) == 0) then
2613              ABI_MALLOC(buffer,(2,count))
2614              tag = ikpt1i + (isppol - 1)*dtset%nkpt
2615              call xmpi_recv(buffer,my_source,tag,spaceComm_distrb,ierr)
2616              if(berryflag) icg1 = dtefield%cgqindex(2,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt)
2617              cgq(:,icg1+1:icg1+count) = buffer(:,1:count)
2618              ABI_FREE(buffer)
2619              flag_receive(ikpt1i) = 1
2620            end if ! end if flag_receive == 0
2621          end if ! end tasks if I am the destination
2622 
2623        else if (ikpt_loc <= mpi_enreg%mkmem(dest)) then  ! dest != me and the dest has a k-point to treat
2624 
2625 !        jkpt is the kpt which is being treated by dest (in ibz)
2626 !        jsppol is his isppol
2627          jkpt = mpi_enreg%kpt_loc2ibz_sp(dest, ikpt_loc,1)
2628          jsppol = mpi_enreg%kpt_loc2ibz_sp(dest, ikpt_loc,2)
2629 
2630          if(jkpt > 0 .and. jsppol > 0) then
2631 
2632            if(berryflag) then
2633              jkptf = dtefield%i2fbz(jkpt)
2634              jkpt1f = dtefield%ikpt_dk(jkptf,ifor,idir)
2635              jkpt1i = dtefield%indkk_f2ibz(jkpt1f,1)
2636            end if
2637            his_source = mpi_enreg%proc_distrb(jkpt1i,1,jsppol)
2638 
2639            if (his_source == me_distrb) then
2640 
2641 !            send
2642 !            pwnsfacq
2643              if(berryflag) then
2644                ikg1 = dtefield%fkgindex(jkpt1f)
2645                tag = jkpt1f + (jsppol - 1)*dtefield%fnkpt
2646              end if
2647              count1 = npwarr(jkpt1i)
2648              ABI_MALLOC(buffer,(2,count1))
2649              buffer(:,1:count1)  = pwnsfac(:,ikg1+1:ikg1+count1)
2650              call xmpi_send(buffer,dest,tag,spaceComm_distrb,ierr)
2651              ABI_FREE(buffer)
2652 
2653 !            send cgq if necessary
2654              if(flag_send(dest, jkpt1i)==0) then
2655                if(berryflag) icg1 = dtefield%cgindex(jkpt1i,jsppol)
2656                tag = jkpt1i + (jsppol - 1)*dtset%nkpt
2657                count1 = npwarr(jkpt1i)*nband_k*my_nspinor
2658                ABI_MALLOC(buffer,(2,count1))
2659                buffer(:,1:count1)  = cg(:,icg1+1:icg1+count1)
2660                call xmpi_send(buffer,dest,tag,spaceComm_distrb,ierr)
2661                ABI_FREE(buffer)
2662                flag_send(dest, jkpt1i)=1
2663              end if ! if send cgq
2664 
2665            end if ! end check that his_source == me
2666 
2667          end if ! end check on jkpt > 0 and jsppol > 0
2668 
2669        end if ! end check on me = dest else if me != dest
2670 
2671      end do ! end loop over dest = 0, nproc-1
2672 
2673    end do !end loop over ifor
2674 
2675  end do !end loop over idir
2676 
2677  call timab(983,2,tsec)
2678 
2679  ABI_FREE(flag_send)
2680  ABI_FREE(flag_receive)
2681 
2682 end subroutine cgq_builder

ABINIT/e_eigen [ Functions ]

[ Top ] [ Functions ]

NAME

  e_eigen

FUNCTION

  Computes eigenvalues energy from eigen, occ, kpt, wtk

INPUTS

  eigen(nkpt*nsppol)=eigenvalues
  mband= maximum number of bands
  nband(nkpt*nsppol)= number of bands for each k-point and spin
  nkpt= number of k-points
  nsppol= number of spin polarization
  occ(mband*nkpt*nsppol)=occupations
  wtk(nkpt)= k-point weights

OUTPUT

  e_eigenvalues= eigenvalues energy

SOURCE

2290 subroutine e_eigen(eigen,e_eigenvalues,mband,nband,nkpt,nsppol,occ,wtk)
2291 
2292 !Arguments ------------------------------------
2293  integer , intent(in)  :: mband,nkpt,nsppol
2294  integer , intent(in)  :: nband(nkpt*nsppol)
2295  real(dp) , intent(in)  :: eigen(mband*nkpt*nsppol)
2296  real(dp) , intent(in)  :: occ(mband*nkpt*nsppol)
2297  real(dp) , intent(in)  :: wtk(nkpt)
2298  real(dp) , intent(out) :: e_eigenvalues
2299 
2300 !Local variables-------------------------------
2301  integer :: ib,iband,ii,ikpt,isppol,nband_k
2302  real(dp) :: wtk_k
2303 
2304 ! *************************************************************************
2305 
2306    DBG_ENTER("COLL")
2307    ii=0;ib=0
2308    do isppol=1,nsppol
2309      do ikpt=1,nkpt
2310        ii=ii+1
2311        nband_k=nband(ii) ;  wtk_k=wtk(ii)
2312        do iband=1,nband_k
2313          ib=ib+1
2314          if(abs(occ(ib)) > tol8) then
2315            e_eigenvalues = e_eigenvalues + wtk_k*occ(ib)*eigen(ib)
2316          end if
2317        end do
2318      end do
2319    end do
2320 
2321    DBG_EXIT("COLL")
2322 
2323  end subroutine e_eigen

ABINIT/m_vtorho [ Modules ]

[ Top ] [ Modules ]

NAME

  m_vtorho

FUNCTION

COPYRIGHT

  Copyright (C) 1998-2024 ABINIT group (DCA, XG, GMR, MF, AR, MM, MT, FJ, MB, MT, TR)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

 16 #if defined HAVE_CONFIG_H
 17 #include "config.h"
 18 #endif
 19 
 20 #include "abi_common.h"
 21 
 22 ! nvtx related macro definition
 23 #include "nvtx_macros.h"
 24 
 25 module m_vtorho
 26 
 27  use iso_fortran_env, only : int32,int64,real32,real64
 28  use defs_basis
 29  use defs_wvltypes
 30  use m_abicore
 31  use m_xmpi
 32  use m_ab7_mixing
 33  use m_errors
 34  use m_wffile
 35  use m_efield
 36  use m_cgtools
 37  use m_gemm_nonlop
 38  use m_gemm_nonlop_gpu
 39  use m_gemm_nonlop_ompgpu
 40  use m_hdr
 41  use m_dtset
 42  use m_dtfil
 43  use m_extfpmd
 44  use m_ompgpu_utils
 45 
 46  use defs_datatypes,       only : pseudopotential_type
 47  use defs_abitypes,        only : MPI_type
 48  use m_fstrings,           only : sjoin, itoa
 49  use m_time,               only : timab
 50  use m_geometry,           only : xred2xcart
 51  use m_occ,                only : newocc
 52  use m_pawang,             only : pawang_type
 53  use m_pawtab,             only : pawtab_type
 54  use m_paw_ij,             only : paw_ij_type
 55  use m_pawfgrtab,          only : pawfgrtab_type
 56  use m_pawrhoij,           only : pawrhoij_type, pawrhoij_alloc, pawrhoij_free, pawrhoij_io, pawrhoij_inquire_dim
 57  use m_pawcprj,            only : pawcprj_type, pawcprj_alloc, pawcprj_free, pawcprj_getdim
 58  use m_pawfgr,             only : pawfgr_type
 59  use m_energies,           only : energies_type
 60  use m_hamiltonian,        only : init_hamiltonian, gs_hamiltonian_type, gspot_transgrid_and_pack
 61  use m_bandfft_kpt,        only : bandfft_kpt, bandfft_kpt_type, bandfft_kpt_set_ikpt, &
 62                                   bandfft_kpt_savetabs, bandfft_kpt_restoretabs, prep_bandfft_tabs
 63  use m_electronpositron,   only : electronpositron_type,electronpositron_calctype
 64  use m_paw_dmft,           only : paw_dmft_type,init_dmft,destroy_dmft,print_dmft,saveocc_dmft
 65  use m_paw_correlations,   only : setnoccmmp
 66  use m_paw_occupancies,    only : pawmkrhoij
 67  use m_paw_mkrho,          only : pawmkrho
 68  use m_crystal,            only : crystal_init, crystal_t
 69  use m_oper,               only : oper_type,init_oper,destroy_oper
 70  use m_io_tools,           only : flush_unit
 71  use m_abi2big,            only : wvl_occ_abi2big, wvl_rho_abi2big, wvl_occopt_abi2big, wvl_eigen_abi2big
 72  use m_fock,               only : fock_type, fock_ACE_type, fock_updateikpt, fock_calc_ene
 73  use m_invovl,             only : make_invovl
 74  use m_tddft,              only : tddft
 75  use m_kg,                 only : mkkin, mkkpg
 76  use m_suscep_stat,        only : suscep_stat
 77  use m_fft,                only : fftpac
 78  use m_spacepar,           only : symrhg
 79  use m_vtowfk,             only : vtowfk
 80  use m_mkrho,              only : mkrho, prtrhomxmn
 81  use m_mkffnl,             only : mkffnl
 82  use m_mpinfo,             only : proc_distrb_cycle
 83  use m_common,             only : prteigrs
 84  use m_dmft,               only : dmft_solve
 85  use m_datafordmft,        only : datafordmft
 86  use m_fourier_interpol,   only : transgrid
 87  use m_cgprj,              only : ctocprj
 88  use m_wvl_rho,            only : wvl_mkrho
 89  use m_wvl_psi,            only : wvl_hpsitopsi, wvl_psitohpsi, wvl_nl_gradient
 90  use m_inwffil,            only : cg_from_atoms
 91  use m_chebfiwf,           only : chebfiwf2_blocksize
 92 
 93 #if defined HAVE_GPU_CUDA
 94  use m_manage_cuda
 95 #endif
 96 
 97 #if defined HAVE_YAKL
 98  use gator_mod
 99 #endif
100 
101 #if defined HAVE_BIGDFT
102  use BigDFT_API,           only : last_orthon, evaltoocc, write_energies, eigensystem_info
103 #endif
104 
105 #if defined(HAVE_GPU) && defined(HAVE_GPU_MARKERS)
106  use m_nvtx_data
107 #endif
108 
109 #ifdef HAVE_FC_ISO_C_BINDING
110  use, intrinsic :: iso_c_binding, only : c_int64_t
111 #endif
112 
113 
114  implicit none
115 
116  private

ABINIT/vtorho [ Functions ]

[ Top ] [ Functions ]

NAME

 vtorho

FUNCTION

 This routine compute the new density from a fixed potential (vtrial)
 but might also simply compute eigenvectors and eigenvalues.
 The main part of it is a wf update over all k points.

INPUTS

  itimes(2)=itime array, contain itime=itimes(1) and itimimage_gstate=itimes(2) from outer loops
  afford=used to dimension susmat
  atindx(natom)=index table for atoms (see gstate.f)
  atindx1(natom)=index table for atoms, inverse of atindx (see gstate.f)
  cpus= cpu time limit in seconds
  dbl_nnsclo=if 1, will double the value of dtset%nnsclo
  dielop= if positive, the dielectric matrix must be computed.
  dielstrt=number of the step at which the dielectric preconditioning begins.
  dmatpawu= fixed occupation matrix of correlated orbitals (DFT+U or DMFT only)
  dtefield <type(efield_type)> = variables related to Berry phase
      calculations (see initberry.f)
  dtfil <type(datafiles_type)>=variables related to files
  dtset <type(dataset_type)>=all input variables for this dataset
   | mband=maximum number of bands
   | mgfft=maximum size of 1D FFTs
   | mkmem =number of k points treated by this node.
   | mpw=maximum dimensioned size of npw
   | nfft=(effective) number of FFT grid points (for this processor)
   | nkpt=number of k points.
   | nspden=number of spin-density components
   | nsppol=1 for unpolarized, 2 for spin-polarized
   | nsym=number of symmetry elements in space group
   | typat= array of types of the natoms
  electronpositron <type(electronpositron_type)>=quantities for the electron-positron annihilation
  etotal=total energy (Ha) - only needed for tddft
  fock <type(fock_type)>= quantities to calculate Fock exact exchange
  gbound_diel(2*mgfftdiel+8,2)=G sphere boundary for the dielectric matrix
  gmet(3,3)=reciprocal space metric tensor in bohr**-2.
  gprimd(3,3)=dimensional reciprocal space primitive translations
   (3x3 tensor) and grads wrt atomic coordinates (3*natom)
  gsqcut=cutoff on (k+G)^2 (bohr^-2)
  hdr <type(hdr_type)>=the header of wf, den and pot files
  indsym(4,nsym,natom)=indirect indexing array for atom labels
  irrzon(nfft**(1-1/nsym),2,(nspden/nsppol)-3*(nspden/4))=irreducible zone data
  irrzondiel(nfftdiel**(1-1/nsym),2,(nspden/nsppol)-3*(nspden/4))=irreducible zone data for diel matrix
                                     nfft**(1-1/nsym) is 1 if nsym==1, and nfft otherwise
  istep=index of the number of steps in the routine scfcv
  istep_mix=index of the number of steps for the SCF mixing (can be <istep)
  kg(3,mpw*mkmem)=reduced planewave coordinates.
  kg_diel(3,npwdiel)=reduced planewave coordinates for the dielectric matrix.
  kxc(nfftf,nkxc)=exchange-correlation kernel, needed only if nkxc/=0 .
  lmax_diel=1+max. value of l angular momentum used for dielectric matrix
  mcg=size of wave-functions array (cg) =mpw*nspinor*mband*mkmem*nsppol
  mcprj=size of projected wave-functions array (cprj) =nspinor*mband*mkmem*nsppol
  mgfftdiel=maximum size of 1D FFTs, for the computation of the dielectric matrix
  mpi_enreg=information about MPI parallelization
  my_natom=number of atoms treated by current processor
  natom=number of atoms in cell.
  nattyp(ntypat)= # atoms of each type.
  nfftf= -PAW ONLY- number of FFT grid points for the fine grid
         (nfftf=nfft for norm-conserving potential runs)
  nfftdiel=number of fft grid points for the computation of the diel matrix
  ngfftdiel(18)=contain all needed information about 3D FFT, for dielectric matrix,
                see ~abinit/doc/variables/vargs.htm#ngfft
  nkxc=second dimension of the array kxc, see rhotoxc.f for a description
  npwarr(nkpt)=number of planewaves in basis at this k point
  npwdiel=size of the susmat array.
  ntypat=number of types of atoms in unit cell.
  optforces=option for the computation of forces (0: no force;1: forces)
  optres=0: the new value of the density is computed in place of the input value
         1: only the density residual is computed ; the input density is kept
  paw_dmft  <type(paw_dmft_type)>= paw+dmft related data
  paw_ij(my_natom*usepaw) <type(paw_ij_type)>=paw arrays given on (i,j) channels
  pawang <type(pawang)>=paw angular mesh and related data
  pawfgr <type(pawfgr_type)>=fine grid parameters and related data
  pawfgrtab(my_natom*usepaw) <type(pawfgrtab_type)>=atomic data given on fine rectangular grid
  pawtab(ntypat*usepaw) <type(pawtab_type)>=paw tabulated starting data
  phnons(2,nfft**(1-1/nsym),(nspden/nsppol)-3*(nspden/4))=nonsymmorphic translation phases
                                    nfft**(1-1/nsym) is 1 if nsym==1, and nfft otherwise
  phnonsdiel(2,nfft**(1-1/nsym),(nspden/nsppol)-3*(nspden/4))=nonsymmorphic translation phases,
   for diel matr
                                     nfft**(1-1/nsym) is 1 if nsym==1, and nfft otherwise
  ph1d(2,3*(2*mgfft+1)*natom)=one-dimensional structure factor information
  ph1ddiel(2,3*(2*mgfftdiel+1)*natom*usepaw)=one-dimensional structure factor information
                                             for the dielectric matrix
  psps <type(pseudopotential_type)>=variables related to pseudopotentials
  pwind(pwind_alloc,2,3) = array used to compute
           the overlap matrix smat between k-points (see initberry.f)
  pwind_alloc = first dimension of pwind
  pwnsfac(2,pwind_alloc) = phase factors for non-symmorphic translations
                           (see initberry.f)
  rmet(3,3)=real space metric (bohr**2)
  rprimd(3,3)=dimensional primitive vectors
  symrec(3,3,nsym)=symmetry operations in reciprocal space
  ucvol=unit cell volume in bohr**3.
  usecprj=1 if cprj datastructure is stored in memory
  wffnew,unit numbers for wf disk files.
  with_vectornd = 1 if vectornd allocated
  vectornd(with_vectornd*nfftf,nspden,3)=nuclear dipole moment vector potential
  vtrial(nfftf,nspden)=INPUT potential Vtrial(r).
  [vxctau(nfft,nspden,4*usekden)]=(only for meta-GGA): derivative of XC energy density
    with respect to kinetic energy density (depsxcdtau). The arrays vxctau contains also
    the gradient of vxctau (gvxctau) in vxctau(:,:,2:4)
  xred(3,natom)=reduced dimensionless atomic coordinates
  ylm(mpw*mkmem,mpsang*mpsang*useylm)= real spherical harmonics for each G and k point
  ylmgr(mpw*mkmem,3,mpsang*mpsang*useylm)= gradients of real spherical harmonics
  ylmdiel(npwdiel,lmax_diel**2)= real spherical harmonics for each G and k point
                                 for the dielectric matrix

OUTPUT

  compch_fft=-PAW only- compensation charge inside spheres computed over fine fft grid
  dphase(3) : dphase(idir) = accumulated change in the string-averaged
     Zak phase along the idir-th direction caused by the update of all
     the occupied Bloch states at all the k-points (only if finite electric field)
  eigen(mband*nkpt*nsppol)=array for holding eigenvalues (hartree)
  resid(mband*nkpt*nsppol)=residuals for each band over all k points.
  residm=maximum value from resid array (except for nbdbuf highest bands)
  susmat(2,npwdiel*afford,nspden,npwdiel,nspden)=
   the susceptibility (or density-density response) matrix in reciprocal space
  === if optforces>0 ===
    grnl(3*natom)=stores grads of nonlocal energy wrt length scales
  ==== if optres==1
    nres2=square of the norm of the residual
    nvresid(nfftf,nspden)=density residual
  ==== if psps%usepaw==1
    cprj(natom,mcprj*usecprj)= wave functions projected with non-local projectors:
                               cprj(n,k,i)=<p_i|Cnk> where p_i is a non-local projector.
    nhat(nfftf,nspden*psps%usepaw)=compensation charge density on rectangular grid in real space

SIDE EFFECTS

  cg(2,mpw*dtset%nspinor*mband*mkmem*nsppol)=planewave coefficients of wavefunctions.
   At output contains updated wavefunctions coefficients;
    if nkpt>1, these are kept in a disk file.
  energies <type(energies_type)>=storage for energies computed here :
   | e_eigenvalues=Sum of the eigenvalues - Band energy (Hartree)
   | e_kinetic=kinetic energy part of total energy
   | e_nlpsp_vfock=nonlocal psp + potential Fock ACE part of total energy
   | e_fermie=fermi energy (Hartree)
  occ(mband*nkpt*nsppol)=occupation number for each band for each k.
      (input if insulator - occopt<3 - ; output if metallic)
  pawrhoij(my_natom*usepaw) <type(pawrhoij_type)>= paw rhoij occupancies and related data
  rhog(2,nfftf)=Fourier transform of total electron density
  rhor(nfftf,nspden)=total electron density (el/bohr**3)
  taug(2,nfftf*dtset%usekden)=array for Fourier transform of kinetic energy density
  taur(nfftf,nspden*dtset%usekden)=array for kinetic energy density
  tauresid(nfftf,nspden*dtset%usekden)=array for kinetic energy density residual
  wvl <type(wvl_data)>=wavelets structures in case of wavelets basis.
  ==== if (usepaw==1) ====
    cprj(natom,mcprj*usecprj)= wave functions projected with non-local projectors:
                               cprj(n,k,i)=<p_i|Cnk> where p_i is a non-local projector.
  rmm_diis_status= Status of the RMM-DIIS eigensolver. See m_rmm_diis

NOTES

  Be careful to the meaning of nfft (size of FFT grids):
   - In case of norm-conserving calculations the FFT grid is the usual FFT grid.
   - In case of PAW calculations:
     Two FFT grids are used; one with nfft points (coarse grid) for
     the computation of wave functions ; one with nfftf points
     (fine grid) for the computation of total density.

  The total electronic density (rhor,rhog) is divided into two terms:
   - The density related to WFs =Sum[Psi**2]
   - The compensation density (nhat) - only in PAW

  The parallelisation needed for the electric field should be
  made an independent subroutine, so that this routine could be put
  back in the 95_drive directory.

SOURCE

 295 subroutine vtorho(afford,atindx,atindx1,cg,compch_fft,cprj,cpus,dbl_nnsclo,&
 296 &           dielop,dielstrt,dmatpawu,dphase,dtefield,dtfil,dtset,&
 297 &           eigen,electronpositron,energies,etotal,gbound_diel,&
 298 &           gmet,gprimd,grnl,gsqcut,hdr,extfpmd,indsym,irrzon,irrzondiel,&
 299 &           istep,istep_mix,itimes,kg,kg_diel,kxc,lmax_diel,mcg,mcprj,mgfftdiel,mpi_enreg,&
 300 &           my_natom,natom,nattyp,nfftf,nfftdiel,ngfftdiel,nhat,nkxc,&
 301 &           npwarr,npwdiel,nres2,ntypat,nvresid,occ,optforces,&
 302 &           optres,paw_dmft,paw_ij,pawang,pawfgr,pawfgrtab,pawrhoij,pawtab,&
 303 &           phnons,phnonsdiel,ph1d,ph1ddiel,psps,fock,&
 304 &           pwind,pwind_alloc,pwnsfac,resid,residm,rhog,rhor,&
 305 &           rmet,rprimd,susmat,symrec,taug,taur,tauresid,&
 306 &           ucvol,usecprj,wffnew,with_vectornd,vectornd,vtrial,vxctau,wvl,xred,&
 307 &           ylm,ylmgr,ylmdiel, rmm_diis_status)
 308 
 309 !Arguments -------------------------------
 310 !scalars
 311  integer, intent(in) :: afford,dbl_nnsclo,dielop,dielstrt,istep,istep_mix,lmax_diel,mcg,mcprj,mgfftdiel
 312  integer, intent(in) :: my_natom,natom,nfftf,nfftdiel,nkxc,npwdiel
 313  integer, intent(in) :: ntypat,optforces,optres,pwind_alloc,usecprj,with_vectornd
 314  real(dp), intent(in) :: cpus,etotal,gsqcut,ucvol
 315  real(dp), intent(out) :: compch_fft,nres2,residm
 316  type(MPI_type), intent(inout) :: mpi_enreg
 317  type(datafiles_type), intent(in) :: dtfil
 318  type(dataset_type), intent(inout) :: dtset
 319  type(efield_type), intent(inout) :: dtefield
 320  type(electronpositron_type),pointer :: electronpositron
 321  type(energies_type), intent(inout) :: energies
 322  type(hdr_type), intent(inout) :: hdr
 323  type(extfpmd_type),pointer,intent(inout) :: extfpmd
 324  type(paw_dmft_type), intent(inout)  :: paw_dmft
 325  type(pawang_type), intent(in) :: pawang
 326  type(pawfgr_type), intent(in) :: pawfgr
 327  type(pseudopotential_type), intent(in) :: psps
 328  type(fock_type),pointer, intent(inout) :: fock
 329  type(wffile_type), intent(inout) :: wffnew
 330  type(wvl_data), intent(inout) :: wvl
 331 !arrays
 332  integer, intent(in) :: atindx(natom),atindx1(natom),gbound_diel(2*mgfftdiel+8,2)
 333  integer, intent(in) :: indsym(4,dtset%nsym,natom)
 334  integer, intent(in) :: irrzon(dtset%nfft**(1-1/dtset%nsym),2,(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4))
 335  integer, intent(in) :: irrzondiel(nfftdiel**(1-1/dtset%nsym),2,(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4))
 336  integer, intent(in) :: itimes(2)
 337  integer, intent(in) :: kg(3,dtset%mpw*dtset%mkmem),kg_diel(3,npwdiel),nattyp(ntypat),ngfftdiel(18),npwarr(dtset%nkpt)
 338  integer, intent(in) :: pwind(pwind_alloc,2,3),symrec(3,3,dtset%nsym)
 339  integer, intent(inout) :: rmm_diis_status(2, dtset%nkpt, dtset%nsppol)
 340  real(dp), intent(in) :: dmatpawu(:,:,:,:),gmet(3,3),gprimd(3,3),ph1d(2,3*(2*dtset%mgfft+1)*natom)
 341  real(dp), intent(in) :: ph1ddiel(2,(3*(2*mgfftdiel+1)*natom)*psps%usepaw)
 342  real(dp), intent(in) :: phnons(2,dtset%nfft**(1-1/dtset%nsym),(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4))
 343  real(dp), intent(in) :: phnonsdiel(2,nfftdiel**(1-1/dtset%nsym),(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4))
 344  real(dp), intent(in) :: pwnsfac(2,pwind_alloc),rmet(3,3),rprimd(3,3)
 345  real(dp), intent(inout) :: vectornd(with_vectornd*nfftf,dtset%nspden,3),vtrial(nfftf,dtset%nspden)
 346  real(dp), intent(inout) :: xred(3,natom)
 347  real(dp), intent(in) :: ylm(dtset%mpw*dtset%mkmem,psps%mpsang*psps%mpsang*psps%useylm)
 348  real(dp), intent(in) :: ylmgr(dtset%mpw*dtset%mkmem,3,psps%mpsang*psps%mpsang*psps%useylm)
 349  real(dp), intent(in) :: ylmdiel(npwdiel,lmax_diel**2)
 350  real(dp), intent(out) :: dphase(3),grnl(3*natom)
 351  real(dp), intent(inout) :: eigen(dtset%mband*dtset%nkpt*dtset%nsppol)
 352  real(dp), intent(out) :: nhat(nfftf,dtset%nspden*psps%usepaw)
 353  real(dp), intent(inout) :: resid(dtset%mband*dtset%nkpt*dtset%nsppol)
 354  real(dp), intent(out) :: nvresid(nfftf,dtset%nspden)
 355  real(dp), intent(out) :: susmat(2,npwdiel*afford,dtset%nspden,npwdiel,dtset%nspden)
 356  real(dp), intent(inout) :: cg(2,mcg)
 357  real(dp), intent(inout) :: kxc(nfftf,nkxc),occ(dtset%mband*dtset%nkpt*dtset%nsppol)
 358  real(dp), intent(inout) :: rhog(2,nfftf),rhor(nfftf,dtset%nspden)
 359  real(dp), intent(inout) :: taug(2,nfftf*dtset%usekden),taur(nfftf,dtset%nspden*dtset%usekden)
 360  real(dp), intent(inout) :: tauresid(nfftf,dtset%nspden*dtset%usekden)
 361  real(dp), intent(inout),optional :: vxctau(nfftf,dtset%nspden,4*dtset%usekden)
 362  type(pawcprj_type),pointer,intent(inout) :: cprj(:,:)
 363  type(paw_ij_type),intent(inout) :: paw_ij(my_natom*psps%usepaw)
 364  type(pawfgrtab_type),intent(inout) :: pawfgrtab(my_natom*psps%usepaw)
 365  type(pawrhoij_type),target,intent(inout) :: pawrhoij(my_natom*psps%usepaw)
 366  type(pawtab_type),intent(in) :: pawtab(ntypat*psps%usepaw)
 367 
 368 !Local variables-------------------------------
 369 !scalars
 370  integer,parameter :: level=111,tim_mkrho=2
 371  !integer,save :: nwarning=0
 372  integer :: bdtot_index,counter,cplex,cplex_rhoij,dimffnl,enunit,iband,iband1,ibdkpt
 373  integer :: ibg,icg,ider,idir,ierr,ifft,ifor,ifor1,ii,ikg,ikpt
 374  integer :: ikpt_loc,ikpt1,my_ikpt,ikxc,ilm,imagn,index1,iorder_cprj,ipert,iplex
 375  integer :: iscf,ispden,isppol,istwf_k,mband_cprj,mbdkpsp,mb2dkpsp
 376  integer :: mcgq,mcprj_local,mcprj_tmp,me_distrb,mkgq,mpi_comm_sphgrid
 377  integer :: my_nspinor,n1,n2,n3,n4,n5,n6,nband_eff,nbdbuf_eff !mwarning,
 378  integer :: nband_k,nband_cprj_k,nbuf,neglect_pawhat,nfftot,nkpg,nkpt1,nnn,nnsclo_now
 379  integer :: nproc_distrb,npw_k,nspden_rhoij,option,prtvol,nblk_gemm_nonlop
 380  integer :: spaceComm_distrb,usecprj_local,usefock_ACE,usetimerev
 381  logical :: berryflag,computesusmat,fixed_occ,has_vectornd
 382  logical :: locc_test,paral_atom,remove_inv,usefock,with_vxctau
 383  logical :: do_last_ortho,wvlbigdft=.false.
 384  real(dp) :: dmft_dftocc
 385  real(dp) :: edmft,ebandlda,ebanddmft,ebandldatot,ekindmft,ekindmft2,ekinlda
 386  real(dp) :: min_occ,vxcavg_dum,strsxc(6)
 387  character(len=500) :: msg
 388  type(bandfft_kpt_type),pointer :: my_bandfft_kpt => null()
 389  type(gs_hamiltonian_type) :: gs_hamk
 390 !arrays
 391  integer(int32), ABI_CONTIGUOUS pointer :: kg_k(:,:) => null()
 392  real(dp) :: dielar(7),dphase_k(3),kpoint(3),qpt(3),rhodum(1),tsec(2),ylmgr_dum(0,0,0)
 393  real(dp),allocatable :: EigMin(:,:),buffer1(:),buffer2(:),cgq(:,:)
 394  real(dp),allocatable :: cgrkxc(:,:),doccde(:)
 395  real(dp),allocatable :: dphasek(:,:),ek_k(:),ek_k_nd(:,:,:),eknk(:),eknk_nd(:,:,:,:,:),end_k(:)
 396  real(dp),allocatable :: enlx_k(:),enlxnk(:),focknk(:),fockfornk(:,:,:),ffnl(:,:,:,:),grnl_k(:,:), xcart(:,:)
 397  real(dp),allocatable :: grnlnk(:,:)
 398 
 399 #if defined HAVE_GPU && defined HAVE_YAKL
 400  real(c_double), ABI_CONTIGUOUS pointer :: kinpw(:) => null()
 401  real(c_double), ABI_CONTIGUOUS pointer :: eig_k(:) => null()
 402 #else
 403  real(dp),allocatable :: kinpw(:)
 404  real(dp),allocatable :: eig_k(:)
 405 #endif
 406 
 407  real(dp),allocatable :: kpg_k(:,:),occ_k(:),ph3d(:,:,:)
 408  real(dp),allocatable :: pwnsfacq(:,:)
 409 
 410 #if defined HAVE_GPU && defined HAVE_YAKL
 411  real(c_double), ABI_CONTIGUOUS pointer :: resid_k(:) => null()
 412  real(c_double), ABI_CONTIGUOUS pointer :: rhoaug(:,:,:,:) => null()
 413 #else
 414  real(dp),allocatable :: resid_k(:)
 415  real(dp),allocatable :: rhoaug(:,:,:,:)
 416 #endif
 417 
 418  real(dp),allocatable :: rhowfg(:,:),rhowfr(:,:),tauwfg(:,:),tauwfr(:,:)
 419  real(dp),allocatable :: vectornd_pac(:,:,:,:,:)
 420 
 421 #if defined HAVE_GPU && defined HAVE_YAKL
 422  real(real64), ABI_CONTIGUOUS pointer :: vlocal(:,:,:,:) => null()
 423 #else
 424  real(dp), allocatable :: vlocal(:,:,:,:)
 425 #endif
 426 
 427  real(dp),allocatable :: vxctaulocal(:,:,:,:,:),ylm_k(:,:),zshift(:)
 428  type(pawcprj_type),allocatable :: cprj_tmp(:,:)
 429  type(pawcprj_type),allocatable,target:: cprj_local(:,:)
 430  type(oper_type) :: dft_occup
 431  type(pawrhoij_type),pointer :: pawrhoij_unsym(:)
 432  type(crystal_t) :: cryst_struc
 433  integer :: idum1(0),idum3(0,0,0)
 434  real(dp) :: rdum2(0,0),rdum4(0,0,0,0)
 435 #if defined HAVE_BIGDFT
 436  integer :: occopt_bigdft
 437 #endif
 438 
 439 #if defined(HAVE_GPU_CUDA)
 440  type(gemm_nonlop_gpu_type) :: gemm_nonlop_gpu_obj
 441 #endif
 442 
 443 
 444 ! *********************************************************************
 445 
 446  DBG_ENTER("COLL")
 447 
 448 !Keep track of total time spent in vtorho
 449  call timab(980,1,tsec)
 450  call timab(981,1,tsec)
 451 
 452 !Structured debugging if prtvol==-level
 453  prtvol=dtset%prtvol
 454 
 455 ! Electric fields: set flag to turn on various behaviors
 456  berryflag = any(dtset%berryopt == [4, 14, 6, 16, 7, 17])
 457 
 458 !If usewvl: wvlbigdft indicates that the BigDFT workflow will be followed
 459  wvlbigdft=(dtset%usewvl==1.and.dtset%wvl_bigdft_comp==1)
 460 
 461 !Several inits
 462  n1=dtset%ngfft(1) ; n2=dtset%ngfft(2) ; n3=dtset%ngfft(3)
 463  n4=dtset%ngfft(4) ; n5=dtset%ngfft(5) ; n6=dtset%ngfft(6)
 464  usecprj_local=0;if (psps%usepaw==1) usecprj_local=1
 465  my_nspinor=max(1,dtset%nspinor/mpi_enreg%nproc_spinor)
 466  paral_atom=(my_natom/=natom)
 467  compch_fft=-1.d5
 468 
 469 !Check that usekden is not 0 if want to use vxctau
 470  with_vxctau = (present(vxctau).and.dtset%usekden/=0)
 471 
 472 !Check that fock is present if want to use fock option
 473  usefock = (dtset%usefock==1 .and. associated(fock))
 474  usefock_ACE=0
 475  if (usefock) usefock_ACE=fock%fock_common%use_ACE
 476 
 477 !Init MPI
 478  spaceComm_distrb=mpi_enreg%comm_cell
 479  if (mpi_enreg%paral_kgb==1) spaceComm_distrb=mpi_enreg%comm_kpt
 480  if (mpi_enreg%paral_hf ==1) spaceComm_distrb=mpi_enreg%comm_kpt
 481  nproc_distrb=xmpi_comm_size(spaceComm_distrb)
 482  me_distrb=xmpi_comm_rank(spaceComm_distrb)
 483  mpi_comm_sphgrid=mpi_enreg%comm_fft
 484  if(dtset%usewvl==1) mpi_comm_sphgrid=mpi_enreg%comm_wvl
 485  !if (mpi_enreg%me_img/=0) nwarning=nwarning+1
 486 
 487 !Test size of FFT grids (1 grid in norm-conserving, 2 grids in PAW)
 488  if ((psps%usepaw==1.and.pawfgr%nfft/=nfftf).or.(psps%usepaw==0.and.dtset%nfft/=nfftf)) then
 489    ABI_BUG('wrong values for nfft, nfftf!')
 490  end if
 491 
 492 !Test optforces (to prevent memory overflow)
 493  if (optforces/=0.and.optforces/=1) then
 494    ABI_BUG(sjoin('wrong value for optforces: ',itoa(optforces)))
 495  end if
 496 
 497  iscf=dtset%iscf
 498  fixed_occ=(dtset%occopt<3.or.electronpositron_calctype(electronpositron)==1)
 499  if(.not. wvlbigdft) then
 500    energies%e_eigenvalues = zero
 501    energies%e_kinetic     = zero
 502    energies%e_nucdip      = zero
 503    energies%e_nlpsp_vfock = zero
 504    if (usefock) then
 505      energies%e_fock=zero
 506      energies%e_fockdc=zero
 507    end if
 508    grnl(:)=zero
 509    if (berryflag) then
 510      resid(:) = zero ! JWZ 13 May 2010. resid and eigen need to be fully zeroed each time before use
 511    end if
 512    ! MG: The previous line is not compatible with the RMM-DIIS since rmm_diis recevies the previous resid_k
 513    ! to select the accuracy level.
 514    ! For the time being, we set resid to zero if berryflag to avoid breaking the CG solver with E-field
 515    ! but it's clear that the treatment of resid should be rationalized and that the previous values should be passed to vtowfk
 516    eigen(:) = zero
 517    bdtot_index=0
 518    ibg=0;icg=0
 519    mbdkpsp=dtset%mband*dtset%nkpt*dtset%nsppol
 520    if(paw_dmft%use_dmft==1) mb2dkpsp=2*dtset%mband*dtset%mband*dtset%nkpt*dtset%nsppol
 521  end if
 522 
 523  if(dtset%usewvl==0) then
 524    ABI_MALLOC(eknk,(mbdkpsp))
 525    ABI_MALLOC(enlxnk,(mbdkpsp))
 526    ABI_MALLOC(eknk_nd,(dtset%nsppol,dtset%nkpt,2,dtset%mband,dtset%mband*paw_dmft%use_dmft))
 527    ABI_MALLOC(EigMin,(2,dtset%mband))
 528    ABI_MALLOC(grnlnk,(3*natom,mbdkpsp*optforces))
 529    if (usefock) then
 530      ABI_MALLOC(focknk,(mbdkpsp))
 531      focknk=zero
 532      if (optforces>0)then
 533        ABI_MALLOC(fockfornk,(3,natom,mbdkpsp))
 534        fockfornk=zero
 535      end if
 536    end if
 537    eknk(:)=zero;enlxnk(:)=zero
 538    if (optforces>0) grnlnk(:,:)=zero
 539    if(paw_dmft%use_dmft==1) eknk_nd=zero
 540  end if !usewvl==0
 541 
 542 !Initialize rhor if needed; store old rhor
 543  if(iscf>=0 .or. iscf==-3) then
 544    if (optres==1) then
 545      nvresid=rhor ; tauresid=taur
 546    end if
 547 !  NC and plane waves
 548    if (psps%usepaw==0 .and. dtset%usewvl==0) then
 549      rhor=zero ; taur=zero
 550 !  PAW
 551    else if(psps%usepaw==1) then
 552      ABI_MALLOC(rhowfr,(dtset%nfft,dtset%nspden))
 553      ABI_MALLOC(rhowfg,(2,dtset%nfft))
 554      ABI_MALLOC(tauwfr,(dtset%nfft,dtset%nspden*dtset%usekden))
 555      ABI_MALLOC(tauwfg,(2,dtset%nfft*dtset%usekden))
 556      rhowfr(:,:)=zero ; tauwfr(:,:)=zero
 557    end if
 558  end if
 559 
 560  ! Here we set the max number of non-self-consistent loops nnsclo_now used in vtowfk
 561  if (iscf<0) then
 562    ! Non self-consistent case
 563    nnsclo_now=dtset%nstep
 564  else
 565    ! Self-consistent case
 566    if (dtset%nnsclo>0) then
 567      ! Use input variable if specified and > 0
 568      nnsclo_now=dtset%nnsclo
 569    else if (dtset%nnsclo < 0) then
 570      ! imposed during abs(nnsclo) steps
 571      nnsclo_now=1
 572      if (istep<=abs(dtset%nnsclo)) nnsclo_now=merge(5,dtset%useria,dtset%useria==0)
 573    else
 574      ! Default branch for self-consistent case.
 575      ! Perform 2 NSCF loops for the first two iterations. This is important especially wfs have
 576      ! been initialized with random numbers.
 577      nnsclo_now = 1
 578      if (dtset%usewvl == 0) then
 579        ! Plane waves
 580        if (istep <= 2 .and. iscf /= 0) nnsclo_now = 2
 581        ! MG: I don't understand why we need to perform 2 NSCF loops after the first SCF cycle
 582        ! when we are relaxing the structure as the initial density and wavefunctions should be already good enough.
 583        ! Here I change the default behavior to avoid the extra loop but only if RMM-DIIS is used.
 584        ! XG 20210312 : I prefectly agree with you. This is historical, and should be changed, after testing and update of reference files.
 585        if ((itimes(1) > 1 .or. (itimes(2)>1)) .and. dtset%rmm_diis /= 0) nnsclo_now = 1
 586      else
 587        ! Wavelets
 588        if (iscf==0) then
 589          nnsclo_now=0
 590        else if (istep<=2) then
 591          nnsclo_now=3
 592        else if (istep<=4) then
 593          nnsclo_now=2
 594        end if
 595      end if
 596    end if
 597    ! Double the value if required
 598    if (dbl_nnsclo==1) nnsclo_now=nnsclo_now*2
 599  end if
 600 
 601  if(dtset%wfoptalg==2)nnsclo_now=40  ! UNDER DEVELOPMENT
 602 
 603  if (dtset%prtvol > 0) then
 604    write(msg, '(a,i0,a,3(i0,1x))' ) ' vtorho: nnsclo_now = ',nnsclo_now,&
 605      ', note that nnsclo, dbl_nnsclo, istep= ',dtset%nnsclo,dbl_nnsclo,istep
 606    call wrtout(std_out,msg)
 607  else
 608    if (nnsclo_now > 1) call wrtout(std_out, sjoin(" Max number of non-self-consistent loops:", itoa(nnsclo_now)))
 609  end if
 610 
 611 !==== Initialize most of the Hamiltonian ====
 612 !Allocate all arrays and initialize quantities that do not depend on k and spin.
 613  call init_hamiltonian(gs_hamk,psps,pawtab,dtset%nspinor,dtset%nsppol,dtset%nspden,natom,&
 614 & dtset%typat,xred,dtset%nfft,dtset%mgfft,dtset%ngfft,rprimd,dtset%nloalg,&
 615 & paw_ij=paw_ij,ph1d=ph1d,usecprj=usecprj_local,electronpositron=electronpositron,fock=fock,&
 616 & comm_atom=mpi_enreg%comm_atom,mpi_atmtab=mpi_enreg%my_atmtab,mpi_spintab=mpi_enreg%my_isppoltab,&
 617 & nucdipmom=dtset%nucdipmom,gpu_option=dtset%gpu_option)
 618 
 619 !Initializations for PAW (projected wave functions)
 620  mcprj_local=0 ; mband_cprj=0
 621  if (psps%usepaw==1) then
 622    mband_cprj=dtset%mband
 623    if (dtset%paral_kgb/=0) mband_cprj=mband_cprj/mpi_enreg%nproc_band
 624    iorder_cprj=0 ; mcprj_local=mcprj
 625    if (usecprj==0) then
 626      mcprj_local=my_nspinor*mband_cprj*dtset%mkmem*dtset%nsppol
 627      !This is a check but should always be true since scfcv allocated cprj anyway
 628      if (allocated(cprj_local)) then
 629        !Was allocated in scfcv so we just destroy and reconstruct it as desired
 630        call pawcprj_free(cprj_local)
 631        ABI_FREE(cprj_local)
 632      end if
 633      ABI_MALLOC(cprj_local,(dtset%natom,mcprj_local))
 634      call pawcprj_alloc(cprj_local,0,gs_hamk%dimcprj)
 635      cprj => null()
 636      cprj => cprj_local
 637    end if
 638  end if
 639 
 640  call timab(981,2,tsec)
 641 
 642 !===================================================================
 643 ! WAVELETS - Branching with a separate VTORHO procedure
 644 !===================================================================
 645 
 646  if (dtset%usewvl == 1) then
 647 #ifndef HAVE_BIGDFT
 648    BIGDFT_NOTENABLED_ERROR()
 649 #else
 650 
 651 !  do_last_ortho in case of diagonalization scheme
 652    if (     wvlbigdft) do_last_ortho=(dtset%iscf/=0)
 653    if (.not.wvlbigdft) do_last_ortho=(.true.)
 654 
 655    ABI_MALLOC(xcart,(3, dtset%natom))
 656    call xred2xcart(dtset%natom, rprimd, xcart, xred)
 657 
 658    if(wvlbigdft) then
 659 !    NSCF loop for wvlbigdt:
 660      call wvl_nscf_loop_bigdft()
 661    else
 662 !    NSCF loop for WVL: (not wvlbigdft)
 663      call wvl_nscf_loop()
 664    end if
 665 
 666 !  Eventually orthogonalize WFs now
 667    if (do_last_ortho) then
 668      call write_energies(ii,0,wvl%e%energs,0.d0,0.d0,"final")
 669      call last_orthon(me_distrb, nproc_distrb, ii, wvl%wfs%ks, wvl%e%energs%evsum, .true.)
 670      if(wvlbigdft) energies%e_xcdc = wvl%e%energs%evxc
 671 !    If occupation numbers are not changed...
 672      if (fixed_occ .or. (iscf<0 .and. iscf/=-3)) then
 673        call wvl_comm_eigen()
 674      end if
 675 !    ... or update occupations:
 676      if( ( .not.fixed_occ) .and. (iscf>0.or.iscf==-3)) then
 677        if(wvlbigdft) then
 678          call wvl_occ_bigdft()
 679        else
 680 !        Communicate eigenvalues:
 681          call wvl_comm_eigen()
 682 !        Update occ and Fermi level
 683          call wvl_occ()
 684        end if
 685      end if
 686 !    This might accelerate convergence:
 687      wvl%wfs%ks%diis%energy_min=one
 688      wvl%wfs%ks%diis%alpha=two
 689    end if !do_last_ortho
 690 
 691 !  Compute eigenvalues energy
 692    if(.not. wvlbigdft .and. nnsclo_now>0) then
 693      call e_eigen(eigen,energies%e_eigenvalues,dtset%mband,dtset%nband,dtset%nkpt,&
 694 &     dtset%nsppol,occ,dtset%wtk)
 695    else
 696      energies%e_eigenvalues = energies%e_kinetic + energies%e_localpsp &
 697 &     + energies%e_xcdc  + two*energies%e_hartree +energies%e_nlpsp_vfock
 698    end if
 699 
 700    if (optforces == 1) then ! not compatible with iscf=0 and wvlbigdftcomp=1 + PAW
 701      call wvl_nl_gradient(grnl, mpi_enreg, dtset%natom, rprimd, wvl, xcart)
 702    end if
 703 
 704 !  For iscf<0 we do not update the density
 705    if (dtset%iscf>=0) then !(dtset%iscf>=0 .and. .not. wvlbigdft ) then
 706      call wvl_mkrho(dtset,irrzon,mpi_enreg,phnons,rhor,wvl%wfs,wvl%den)
 707    end if
 708    ABI_FREE(xcart)
 709 
 710 !  Note in WVL+NC: the rest will be skipped.
 711 !  For PAW: we will compute Rho_ij at the end.
 712    !if(wvlbigdft) return
 713 #endif
 714  else
 715 
 716 !===================================================================
 717 ! PLANE WAVES - Standard VTORHO procedure
 718 !===================================================================
 719 
 720 !  Electric field: allocate dphasek
 721    nkpt1 = dtset%nkpt
 722    if ( berryflag ) then
 723      ABI_MALLOC(dphasek,(3,dtset%nkpt*dtset%nsppol))
 724      dphasek(:,:) = zero
 725      nkpt1 = dtefield%mkmem_max
 726    end if
 727 
 728    if(dtset%gpu_option==ABI_GPU_KOKKOS) then
 729 #if defined HAVE_GPU && defined HAVE_YAKL
 730      ABI_MALLOC_MANAGED(rhoaug, (/n4,n5,n6,gs_hamk%nvloc/))
 731      ABI_MALLOC_MANAGED(vlocal, (/n4,n5,n6,gs_hamk%nvloc/))
 732 #endif
 733    else
 734      ABI_MALLOC(rhoaug,(n4,n5,n6,gs_hamk%nvloc))
 735      ABI_MALLOC(vlocal,(n4,n5,n6,gs_hamk%nvloc))
 736    end if
 737 
 738    if(with_vxctau) then
 739      ABI_MALLOC(vxctaulocal,(n4,n5,n6,gs_hamk%nvloc,4))
 740    end if
 741 
 742    has_vectornd = (with_vectornd .EQ. 1)
 743    if(has_vectornd) then
 744       ABI_MALLOC(vectornd_pac,(n4,n5,n6,gs_hamk%nvloc,3))
 745       vectornd_pac=zero
 746    end if
 747 
 748   nbdbuf_eff = dtset%nbdbuf
 749   ! In metallic case, at first iteration, occupations could be 0. So residm should be computed as usual
 750   if (dtset%nbdbuf==-101.and..not.fixed_occ.and.istep==1.and.minval(occ)<tol10) then
 751     write(msg,*) 'vtorho: nbdbuf is set to 0 for this step'
 752     call wrtout(std_out,msg,'COLL')
 753     nbdbuf_eff = 0
 754   end if
 755 
 756 !  LOOP OVER SPINS
 757    do isppol=1,dtset%nsppol
 758      call timab(982,1,tsec)
 759 
 760      ikpt_loc = 0
 761      ikg=0
 762 
 763      ! Set up local potential vlocal on the coarse FFT mesh from vtrial taking into account the spin.
 764      ! Also, continue to initialize the Hamiltonian.
 765 
 766      call gspot_transgrid_and_pack(isppol, psps%usepaw, dtset%paral_kgb, dtset%nfft, dtset%ngfft, nfftf, &
 767                                    dtset%nspden, gs_hamk%nvloc, 1, pawfgr, mpi_enreg, vtrial, vlocal)
 768      call gs_hamk%load_spin(isppol, vlocal=vlocal, with_nonlocal=.true.)
 769 
 770      if (with_vxctau) then
 771        call gspot_transgrid_and_pack(isppol, psps%usepaw, dtset%paral_kgb, dtset%nfft, dtset%ngfft, nfftf, &
 772                                      dtset%nspden, gs_hamk%nvloc, 4, pawfgr, mpi_enreg, vxctau, vxctaulocal)
 773        call gs_hamk%load_spin(isppol, vxctaulocal=vxctaulocal)
 774      end if
 775 
 776      rhoaug(:,:,:,:)=zero
 777 
 778      ! if vectornd is present, set it up for addition to gs_hamk similarly to how it's done for
 779      ! vtrial. Note that it must be done for the three Cartesian directions. Also, the following
 780      ! code assumes explicitly and implicitly that nvloc = 1. This should eventually be generalized.
 781      if(has_vectornd) then
 782        call gspot_transgrid_and_pack(isppol, psps%usepaw, dtset%paral_kgb, dtset%nfft, dtset%ngfft, nfftf, &
 783          & dtset%nspden, gs_hamk%nvloc, 3, pawfgr, mpi_enreg, vectornd, vectornd_pac)
 784        call gs_hamk%load_spin(isppol, vectornd=vectornd_pac)
 785      end if
 786     
 787      call timab(982,2,tsec)
 788 
 789 !    BIG FAT k POINT LOOP
 790 !    MVeithen: I had to modify the structure of this loop in order to implement MPI // of the electric field
 791 !    Note that the loop here differs from the similar one in berryphase_new.F90.
 792 !    here, ikpt_loc numbers the kpts treated by the current processor.
 793 !    in berryphase_new.F90, ikpt_loc ALSO includes info about value of isppol.
 794 
 795      ikpt = 0
 796      do while (ikpt_loc < nkpt1)
 797 
 798        call timab(997,1,tsec)
 799 
 800        if ( .not.berryflag ) then
 801          ikpt_loc = ikpt_loc + 1
 802          ikpt = ikpt_loc
 803          my_ikpt = mpi_enreg%my_kpttab(ikpt)
 804        else
 805          if (ikpt_loc < dtset%mkmem) ikpt = ikpt + 1
 806          if ((ikpt > dtset%nkpt).and.(ikpt_loc < dtset%mkmem)) exit
 807          my_ikpt=ikpt
 808        end if
 809 
 810        dphase_k(:) = zero
 811        counter=100*ikpt+isppol
 812        nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
 813        nband_cprj_k=nband_k/mpi_enreg%nproc_band
 814        istwf_k=dtset%istwfk(ikpt)
 815        npw_k=npwarr(ikpt)
 816 
 817        mcgq=1 ; mkgq=1
 818        if (.not.berryflag) then
 819          if(proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,1,nband_k,isppol,me_distrb)) then
 820            eigen(1+bdtot_index : nband_k+bdtot_index) = zero
 821            resid(1+bdtot_index : nband_k+bdtot_index) = zero
 822            bdtot_index=bdtot_index+nband_k
 823            cycle
 824          end if
 825        else
 826          if ((proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,1,nband_k,isppol,me_distrb)) .and.(ikpt_loc <= dtset%mkmem)) then
 827            eigen(1+bdtot_index : nband_k+bdtot_index) = zero
 828            resid(1+bdtot_index : nband_k+bdtot_index) = zero
 829            bdtot_index = bdtot_index + nband_k
 830            cycle
 831          end if
 832          ikpt_loc = ikpt_loc + 1
 833          mcgq = dtset%mpw*my_nspinor*nband_k*dtefield%nneigh(ikpt)
 834          ikg = dtefield%kgindex(ikpt)
 835          mkgq = 6*dtset%mpw
 836        end if ! berryflag
 837 
 838        call timab(997,2,tsec)
 839 
 840 !      In case of MPI // of a finite field calculation
 841 !      build the cgq array that stores the wavefunctions for the
 842 !      neighbours of ikpt, and the pwnsfacq array that stores the
 843 !      corresponding phase factors (in case of tnons)
 844        ABI_MALLOC(cgq,(2,mcgq))
 845        ABI_MALLOC(pwnsfacq,(2,mkgq))
 846        if ( berryflag ) then
 847          call cgq_builder(berryflag,cg,cgq,dtefield,dtset,ikpt,ikpt_loc,isppol,mcg,mcgq,&
 848 &         me_distrb,mkgq,mpi_enreg,my_nspinor,nband_k,nproc_distrb,&
 849 &         npwarr,pwnsfac,pwnsfacq,pwind_alloc,spaceComm_distrb)
 850          if (ikpt_loc > dtset%mkmem) then
 851            ABI_FREE(cgq)
 852            ABI_FREE(pwnsfacq)
 853            cycle
 854          end if
 855        end if !berryopt
 856 
 857        call timab(984,1,tsec)
 858 
 859        if (mpi_enreg%paral_kgb==1) my_bandfft_kpt => bandfft_kpt(my_ikpt)
 860        call bandfft_kpt_set_ikpt(ikpt,mpi_enreg)
 861 !      my_ikpt = ikpt
 862 !       if (mpi_enreg%paral_kgb==1) then
 863 !        my_ikpt = mpi_enreg%my_kpttab(ikpt)
 864 !         my_bandfft_kpt => bandfft_kpt(my_ikpt)
 865 !         call bandfft_kpt_set_ikpt(ikpt,mpi_enreg)
 866 !       end if
 867 
 868        ABI_MALLOC(ek_k,(nband_k))
 869        ABI_MALLOC(end_k,(nband_k))
 870        ABI_MALLOC(enlx_k,(nband_k))
 871        ABI_MALLOC(ek_k_nd,(2,nband_k,nband_k*paw_dmft%use_dmft))
 872        ABI_MALLOC(occ_k,(nband_k))
 873        ABI_MALLOC(zshift,(nband_k))
 874        ABI_MALLOC(grnl_k,(3*natom,nband_k*optforces))
 875 
 876        if(dtset%gpu_option==ABI_GPU_KOKKOS) then
 877 #if defined HAVE_GPU && defined HAVE_YAKL
 878          ABI_MALLOC_MANAGED(eig_k,(/nband_k/))
 879          ABI_MALLOC_MANAGED(resid_k,(/nband_k/))
 880 #endif
 881        else
 882          ABI_MALLOC(eig_k,(nband_k))
 883          ABI_MALLOC(resid_k,(nband_k))
 884        end if
 885 
 886        eig_k(:)=zero
 887        ek_k(:)=zero
 888        end_k(:)=zero
 889        enlx_k(:)=zero
 890        if(paw_dmft%use_dmft==1) ek_k_nd(:,:,:)=zero
 891        if (optforces>0) grnl_k(:,:)=zero
 892        kpoint(:)=dtset%kptns(:,ikpt)
 893        occ_k(:)=occ(1+bdtot_index:nband_k+bdtot_index)
 894        resid_k(:) = resid(1+bdtot_index : nband_k+bdtot_index)
 895        !resid_k(:)=zero
 896        zshift(:)=dtset%eshift
 897 
 898        if(dtset%gpu_option==ABI_GPU_KOKKOS) then
 899 #if defined HAVE_GPU && defined HAVE_YAKL
 900          ABI_MALLOC_MANAGED(kg_k, (/3,npw_k/))
 901 #endif
 902        else
 903          ABI_MALLOC(kg_k,(3,npw_k))
 904        end if
 905 
 906        ABI_MALLOC(ylm_k,(npw_k,psps%mpsang*psps%mpsang*psps%useylm))
 907        kg_k(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg)
 908        if (psps%useylm==1) then
 909          do ilm=1,psps%mpsang*psps%mpsang
 910            ylm_k(1:npw_k,ilm)=ylm(1+ikg:npw_k+ikg,ilm)
 911          end do
 912        end if
 913 
 914 !      Set up remaining of the Hamiltonian
 915 
 916 !      Compute (1/2) (2 Pi)**2 (k+G)**2:
 917        if(dtset%gpu_option==ABI_GPU_KOKKOS) then
 918 #if defined HAVE_GPU && defined HAVE_YAKL
 919          ABI_MALLOC_MANAGED(kinpw,(/npw_k/))
 920 #endif
 921        else
 922          ABI_MALLOC(kinpw,(npw_k))
 923        end if
 924 
 925        call mkkin(dtset%ecut,dtset%ecutsm,dtset%effmass_free,gmet,kg_k,kinpw,kpoint,npw_k,0,0)
 926 
 927 !      Compute (k+G) vectors (only if useylm=1)
 928        nkpg=3*optforces*dtset%nloalg(3)
 929        ABI_MALLOC(kpg_k,(npw_k,nkpg))
 930        if ((mpi_enreg%paral_kgb/=1.or.istep<=1).and.nkpg>0) call mkkpg(kg_k,kpg_k,kpoint,nkpg,npw_k)
 931 
 932 !      Compute nonlocal form factors ffnl at all (k+G):
 933        ider=0;idir=0;dimffnl=1
 934        ABI_MALLOC(ffnl,(npw_k,dimffnl,psps%lmnmax,ntypat))
 935        if (mpi_enreg%paral_kgb/=1.or.istep<=1) then
 936          call mkffnl(psps%dimekb,dimffnl,psps%ekb,ffnl,psps%ffspl,&
 937 &         gmet,gprimd,ider,idir,psps%indlmn,kg_k,kpg_k,kpoint,psps%lmnmax,&
 938 &         psps%lnmax,psps%mpsang,psps%mqgrid_ff,nkpg,&
 939 &         npw_k,ntypat,psps%pspso,psps%qgrid_ff,rmet,&
 940 &         psps%usepaw,psps%useylm,ylm_k,ylmgr)
 941        end if
 942 
 943 !      Load k-dependent part in the Hamiltonian datastructure
 944 !       - Compute 3D phase factors
 945 !       - Prepare various tabs in case of band-FFT parallelism
 946 !       - Load k-dependent quantities in the Hamiltonian
 947        ABI_MALLOC(ph3d,(2,npw_k,gs_hamk%matblk))
 948 
 949        if(usefock_ACE/=0) then
 950          call gs_hamk%load_k(kpt_k=dtset%kptns(:,ikpt),istwf_k=istwf_k,npw_k=npw_k,&
 951 &         kinpw_k=kinpw,kg_k=kg_k,kpg_k=kpg_k,ffnl_k=ffnl,fockACE_k=fock%fockACE(ikpt,isppol),ph3d_k=ph3d,&
 952 &         compute_ph3d=(mpi_enreg%paral_kgb/=1.or.istep<=1),&
 953 &         compute_gbound=(mpi_enreg%paral_kgb/=1))
 954        else
 955          call gs_hamk%load_k(kpt_k=dtset%kptns(:,ikpt),istwf_k=istwf_k,npw_k=npw_k,&
 956 &         kinpw_k=kinpw,kg_k=kg_k,kpg_k=kpg_k,ffnl_k=ffnl,ph3d_k=ph3d,&
 957 &         compute_ph3d=(mpi_enreg%paral_kgb/=1.or.istep<=1),&
 958 &         compute_gbound=(mpi_enreg%paral_kgb/=1))
 959        end if
 960 
 961 !      Load band-FFT tabs (transposed k-dependent arrays)
 962        if (mpi_enreg%paral_kgb==1) then
 963          if (istep<=1) call prep_bandfft_tabs(gs_hamk,ikpt,dtset%mkmem,mpi_enreg)
 964          call gs_hamk%load_k(npw_fft_k=my_bandfft_kpt%ndatarecv, &
 965            gbound_k =my_bandfft_kpt%gbound, &
 966            kinpw_k  =my_bandfft_kpt%kinpw_gather, &
 967            kg_k     =my_bandfft_kpt%kg_k_gather, &
 968            kpg_k    =my_bandfft_kpt%kpg_k_gather, &
 969            ffnl_k   =my_bandfft_kpt%ffnl_gather, &
 970            ph3d_k   =my_bandfft_kpt%ph3d_gather)
 971        end if
 972 
 973 !      If OpenMP GPU, load "hamiltonian" on GPU device
 974        if (dtset%gpu_option == ABI_GPU_OPENMP) then
 975          if(dtset%paral_kgb==0) then
 976            call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp)
 977          else if(istwf_k==1) then
 978            call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather)
 979          else if(istwf_k==2) then
 980            call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather_sym)
 981          else
 982            ABI_ERROR("istwfk > 2 is not handled with OpenMP GPU offload mode !")
 983          end if
 984        end if
 985 
 986        if(dtset%wfoptalg == 111 .and. istep <= 1) then
 987          gemm_nonlop_nblocks = dtset%gpu_nl_splitsize
 988          ! Only compute CHEBFI number of blocks if user didn't set it themselves
 989          if(gemm_nonlop_nblocks==1) then
 990            call chebfiwf2_blocksize(gs_hamk,mpi_enreg%bandpp,npw_k,nband_k,dtset%nspinor,mpi_enreg%paral_kgb,&
 991            &                        dtset%gpu_option,nblk_gemm_nonlop)
 992            gemm_nonlop_nblocks = nblk_gemm_nonlop
 993          end if
 994          if(gemm_nonlop_nblocks > 1) gemm_nonlop_is_distributed = .true.
 995        end if
 996 
 997 !      Build inverse of overlap matrix for chebfi
 998        if(psps%usepaw == 1 .and. (dtset%wfoptalg == 1 .or. dtset%wfoptalg == 111) .and. istep <= 1) then
 999           ABI_NVTX_START_RANGE(NVTX_INVOVL)
1000           call make_invovl(gs_hamk, dimffnl, ffnl, ph3d, mpi_enreg)
1001           ABI_NVTX_END_RANGE()
1002        end if
1003 
1004        ! Setup gemm_nonlop
1005        if (gemm_nonlop_use_gemm) then
1006          !set the global variable indicating to gemm_nonlop where to get its data from
1007          gemm_nonlop_ikpt_this_proc_being_treated = my_ikpt
1008          if (istep <= 1) then
1009            !Init the arrays
1010            !FIXME Settle this
1011            if ( dtset%gpu_option == ABI_GPU_DISABLED) then
1012              call make_gemm_nonlop(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, &
1013                  gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, &
1014                  gs_hamk%ucvol, gs_hamk%ffnl_k,&
1015                  gs_hamk%ph3d_k,gs_hamk%kpt_k,gs_hamk%kg_k,gs_hamk%kpg_k,&
1016                  compute_grad_atom=(optforces>0))
1017            else if ( dtset%gpu_option == ABI_GPU_LEGACY .or. dtset%gpu_option == ABI_GPU_KOKKOS) then
1018              call make_gemm_nonlop_gpu(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, &
1019                  gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, &
1020                  gs_hamk%ucvol, gs_hamk%ffnl_k, &
1021                  gs_hamk%ph3d_k,gs_hamk%kpt_k,gs_hamk%kg_k,gs_hamk%kpg_k, &
1022                  compute_grad_atom=(optforces>0))
1023            else if ( dtset%gpu_option == ABI_GPU_OPENMP) then
1024              if(mpi_enreg%paral_kgb==0) then
1025                call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp)
1026              else if(gs_hamk%istwf_k==1) then
1027                call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather)
1028              else if(gs_hamk%istwf_k==2) then
1029                call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather_sym)
1030              else
1031                ABI_ERROR("istwfk > 2 is not handled with OpenMP GPU offload mode !")
1032              end if
1033              call make_gemm_nonlop_ompgpu(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, &
1034                  gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, &
1035                  gs_hamk%ucvol, gs_hamk%ffnl_k, &
1036                  gs_hamk%ph3d_k,gs_hamk%kpt_k,gs_hamk%kg_k,gs_hamk%kpg_k, &
1037                  compute_grad_atom=(optforces>0))
1038            end if
1039          end if
1040        end if
1041 
1042 #if defined HAVE_GPU_CUDA
1043        if (dtset%gpu_option==ABI_GPU_LEGACY .or. dtset%gpu_option==ABI_GPU_KOKKOS) then
1044          if (mpi_enreg%paral_kgb==1) then
1045            call gpu_update_ffnl_ph3d( &
1046              & my_bandfft_kpt%ph3d_gather, INT(size(my_bandfft_kpt%ph3d_gather),c_int64_t), &
1047              & my_bandfft_kpt%ffnl_gather, INT(size(my_bandfft_kpt%ffnl_gather),c_int64_t) )
1048          else
1049            call gpu_update_ffnl_ph3d( &
1050              & ph3d, INT(size(ph3d),c_int64_t), &
1051              & ffnl, INT(size(ffnl),c_int64_t) )
1052          end if
1053        end if
1054 #endif
1055 
1056        call timab(984,2,tsec)
1057 
1058 !      Update the value of ikpt,isppol in fock_exchange and allocate the memory space to perform HF calculation.
1059        if (usefock) call fock_updateikpt(fock%fock_common,ikpt,isppol)
1060        if (psps%usepaw==1 .and. usefock) then
1061          if ((fock%fock_common%optfor).and.(usefock_ACE==0)) fock%fock_common%forces_ikpt=zero
1062        end if
1063 
1064        ! Here we initialize the wavefunctions with atomic orbitals at the first GS iteration of the first
1065        ! relaxation step (if any).
1066        ! NB: Not all the cases are presently supported.
1067        !print *, "istep, itimes(1), wfinit", istep, itimes(1), dtset%wfinit
1068        ! FIXME: This check is not enough as I need to check whether cg have been read from WFK file
1069        if (istep == 1 .and. itimes(1) == 0 .and. dtset%wfinit /= 0) then
1070          call cg_from_atoms(ikpt, isppol, rprimd, xred, kg_k, cg(:,icg+1:), dtset, psps, eig_k, gs_hamk, &
1071                             mpi_enreg, nband_k, npw_k, my_nspinor)
1072        end if
1073 
1074        ABI_NVTX_START_RANGE(NVTX_VTOWFK)
1075 !      Compute the eigenvalues, wavefunction, residuals,
1076 !      contributions to kinetic energy, nuclear dipole energy,
1077 !      nonlocal energy, forces,
1078 !      and update of rhor to this k-point and this spin polarization.
1079        call vtowfk(cg,cgq,cprj,cpus,dphase_k,dtefield,dtfil,&
1080 &       dtset,eig_k,ek_k,ek_k_nd,end_k,enlx_k,fixed_occ,grnl_k,gs_hamk,&
1081 &       ibg,icg,ikpt,iscf,isppol,kg_k,kinpw,mband_cprj,mcg,mcgq,mcprj_local,mkgq,&
1082 &       mpi_enreg,dtset%mpw,natom,nband_k,nbdbuf_eff,dtset%nkpt,istep,nnsclo_now,npw_k,npwarr,&
1083 &       occ_k,optforces,prtvol,pwind,pwind_alloc,pwnsfac,pwnsfacq,resid_k,&
1084 &       rhoaug,paw_dmft,dtset%wtk(ikpt),zshift, rmm_diis_status(:,ikpt,isppol))
1085        ABI_NVTX_END_RANGE()
1086 
1087 ! LB-01/03/2024: Very weird compiler error on eos-nvhpc23.1 if the second call of timab(985,...) is included...
1088 ! Drastic short-term solution : disable this timing for nvhpc... In fact this part is not important unless fock is activated
1089 ! Note: Should we keep nvhpc-23.1 in eos?
1090 #ifndef FC_NVHPC
1091        call timab(985,1,tsec)
1092 #endif
1093 
1094 #if defined HAVE_GPU_CUDA
1095        if(dtset%gpu_option==ABI_GPU_LEGACY .or. dtset%gpu_option==ABI_GPU_KOKKOS) call gpu_finalize_ffnl_ph3d()
1096 #endif
1097        ABI_FREE(ffnl)
1098 
1099        if(dtset%gpu_option==ABI_GPU_KOKKOS) then
1100 #if defined HAVE_GPU && defined HAVE_YAKL
1101          ABI_FREE_MANAGED(kinpw)
1102          ABI_FREE_MANAGED(kg_k)
1103 #endif
1104        else
1105          ABI_FREE(kinpw)
1106          ABI_FREE(kg_k)
1107        end if
1108 
1109        ABI_FREE(kpg_k)
1110        ABI_FREE(ylm_k)
1111        ABI_FREE(ph3d)
1112        ABI_FREE(cgq)
1113        ABI_FREE(pwnsfacq)
1114 
1115 !      electric field
1116        if (berryflag) then
1117          dphasek(:,ikpt + (isppol - 1)*dtset%nkpt) = dphase_k(:)
1118 
1119 !        The overlap matrices for all first neighbours of ikpt are no more up to date
1120          do idir = 1, 3
1121            do ifor = 1, 2
1122              ikpt1 = dtefield%ikpt_dk(dtefield%i2fbz(ikpt),ifor,idir)
1123              ikpt1 = dtefield%indkk_f2ibz(ikpt1,1)
1124              ifor1 = -1*ifor + 3   ! ifor = 1 -> ifor1 = 2 & ifor = 2 -> ifor1 = 1
1125              dtefield%sflag(:,ikpt1+(isppol-1)*dtset%nkpt,ifor1,idir) = 0
1126            end do
1127          end do
1128        end if  ! berryflag
1129 
1130 !      Save eigenvalues (hartree), residuals (hartree**2)
1131        eigen(1+bdtot_index : nband_k+bdtot_index) = eig_k(:)
1132        eknk (1+bdtot_index : nband_k+bdtot_index) = ek_k (:)
1133        if(usefock) then
1134          focknk (1+bdtot_index : nband_k+bdtot_index) = fock%fock_common%eigen_ikpt (:)
1135          if (optforces>0) fockfornk(:,:,1+bdtot_index : nband_k+bdtot_index) = fock%fock_common%forces_ikpt(:,:,:)
1136        end if
1137        if(paw_dmft%use_dmft==1) eknk_nd(isppol,ikpt,:,:,:) = ek_k_nd(:,:,:)
1138        resid(1+bdtot_index : nband_k+bdtot_index) = resid_k(:)
1139        if (optforces>0) grnlnk(:,1+bdtot_index : nband_k+bdtot_index) = grnl_k(:,:)
1140        enlxnk(1+bdtot_index : nband_k+bdtot_index) = enlx_k(:)
1141 
1142        if(iscf>0 .or. iscf==-3)then
1143 !        Accumulate sum over k points for band, nonlocal and kinetic energies,
1144 !        also accumulate gradients of Enonlocal:
1145          do iband=1,nband_k
1146            if (abs(occ_k(iband))>tol8) then
1147              energies%e_kinetic     = energies%e_kinetic     + dtset%wtk(ikpt)*occ_k(iband)*ek_k(iband)
1148              energies%e_nucdip     = energies%e_nucdip     + dtset%wtk(ikpt)*occ_k(iband)*end_k(iband)
1149              energies%e_eigenvalues = energies%e_eigenvalues + dtset%wtk(ikpt)*occ_k(iband)*eig_k(iband)
1150              energies%e_nlpsp_vfock = energies%e_nlpsp_vfock + dtset%wtk(ikpt)*occ_k(iband)*enlx_k(iband)
1151              if (optforces>0) grnl(:)=grnl(:)+dtset%wtk(ikpt)*occ_k(iband)*grnl_k(:,iband)
1152              if (usefock) then
1153                energies%e_fock=energies%e_fock + half*fock%fock_common%eigen_ikpt(iband)*occ_k(iband)*dtset%wtk(ikpt)
1154                if (usefock_ACE==0) energies%e_fock0=energies%e_fock
1155              endif
1156            end if
1157          end do
1158 
1159 !        Calculate Fock contribution to the total energy if required
1160          if ((psps%usepaw==1).and.(usefock)) then
1161            if ((fock%fock_common%optfor).and.(usefock_ACE==0)) then
1162              !WARNING : this routine actually does NOT compute the Fock contrib to total energy, but modifies the force ONLY.
1163              call fock_calc_ene(dtset,fock%fock_common,energies%e_exactX,ikpt,nband_k,occ_k)
1164            end if
1165          end if
1166        end if
1167 
1168        if ( dtset%gpu_option == ABI_GPU_OPENMP) then
1169          call ompgpu_free_hamilt_buffers()
1170        end if
1171 ! LB-01/03/2024: Very weird compiler error on eos-nvhpc23.1 if the second call of timab(985,...) is included...
1172 ! Drastic short-term solution : disable this timing for nvhpc... In fact this part is not important unless fock is activated
1173 #ifndef FC_NVHPC
1174        call timab(985,2,tsec)
1175 #endif
1176 
1177        ABI_FREE(ek_k)
1178        ABI_FREE(ek_k_nd)
1179        ABI_FREE(end_k)
1180        ABI_FREE(grnl_k)
1181        ABI_FREE(occ_k)
1182        ABI_FREE(zshift)
1183        ABI_FREE(enlx_k)
1184 
1185        if(dtset%gpu_option==ABI_GPU_KOKKOS) then
1186 #if defined HAVE_GPU && defined HAVE_YAKL
1187          ABI_FREE_MANAGED(eig_k)
1188          ABI_FREE_MANAGED(resid_k)
1189 #endif
1190        else
1191          ABI_FREE(eig_k)
1192          ABI_FREE(resid_k)
1193        end if
1194 
1195 !      Keep track of total number of bands (all k points so far, even for k points not treated by me)
1196        bdtot_index=bdtot_index+nband_k
1197 
1198 !      Also shift array memory if dtset%mkmem/=0
1199        if (dtset%mkmem/=0) then
1200          ibg=ibg+my_nspinor*nband_cprj_k
1201          icg=icg+npw_k*my_nspinor*nband_k
1202          ikg=ikg+npw_k
1203        end if
1204 
1205      end do ! End big k point loop
1206 
1207      call timab(986,1,tsec)
1208 
1209      if (fixed_occ .and. mpi_enreg%paral_kgb==1) then
1210        call xmpi_sum(rhoaug,mpi_enreg%comm_bandspinorfft,ierr) !Sum the contributions over bands/FFT/spinors
1211      end if
1212 
1213 !    Transfer density on augmented fft grid to normal fft grid in real space
1214 !    Also take into account the spin.
1215      if(iscf>0.or.iscf==-3)then
1216        if (psps%usepaw==0) then
1217          call fftpac(isppol,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhor,rhoaug(:,:,:,1),1)
1218          if(dtset%nspden==4)then
1219            do imagn=2,4
1220              call fftpac(imagn,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhor,rhoaug(:,:,:,imagn),1)
1221            end do
1222          end if
1223        else
1224          call fftpac(isppol,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhowfr,rhoaug(:,:,:,1),1)
1225          if(dtset%nspden==4)then
1226            do imagn=2,4
1227              call fftpac(imagn,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhowfr,rhoaug(:,:,:,imagn),1)
1228            end do
1229          end if
1230        end if
1231      end if
1232 
1233      call timab(986,2,tsec)
1234 
1235    end do ! End loop over spins
1236 
1237    call timab(988,1,tsec)
1238    if (usefock) then
1239      if (usefock_ACE==0) then
1240        call xmpi_sum(energies%e_fock0,mpi_enreg%comm_kpt,ierr)
1241      end if
1242      if(fock%fock_common%optfor) call xmpi_sum(fock%fock_common%forces,mpi_enreg%comm_kpt,ierr)
1243    end if
1244 !  Electric field: compute string-averaged change in Zak phase
1245 !  along each direction, store it in dphase(idir)
1246 !  ji: it is not convenient to do this anymore. Remove. Set dphase(idir)=0.0_dp.
1247 !  eventually, dphase(idir) will have to go...
1248    if (berryflag) then
1249      dphase(:) = zero
1250 !    In case of MPI // of a finite field calculation, send dphasek to all cpus
1251      call xmpi_sum(dphasek,spaceComm_distrb,ierr)
1252      ABI_FREE(dphasek)
1253    end if ! berryflag
1254 
1255    if(dtset%gpu_option==ABI_GPU_KOKKOS) then
1256 #if defined HAVE_GPU && defined HAVE_YAKL
1257      ABI_FREE_MANAGED(rhoaug)
1258      ABI_FREE_MANAGED(vlocal)
1259 #endif
1260    else
1261      ABI_FREE(rhoaug)
1262      ABI_FREE(vlocal)
1263    end if
1264 
1265    if(with_vxctau) then
1266      ABI_FREE(vxctaulocal)
1267    end if
1268    if(has_vectornd) then
1269       ABI_FREE(vectornd_pac)
1270    end if
1271 
1272    call timab(988,2,tsec)
1273 
1274    ABI_MALLOC(doccde,(dtset%mband*dtset%nkpt*dtset%nsppol))
1275    doccde(:)=zero
1276 
1277 !  Treat now varying occupation numbers, in the self-consistent case
1278    if((.not.fixed_occ) .and. (iscf>0.or.iscf==-3)) then
1279 
1280 !    Parallel case
1281      if (mpi_enreg%nproc_spkpt>1) then
1282 
1283        call timab(989,1,tsec)
1284 
1285 !      If needed, exchange the values of eigen,resid,eknk,enlxnk,grnlnk
1286        ABI_MALLOC(buffer1,((4+3*natom*optforces+dtset%usefock+3*natom*dtset%usefock*optforces)*mbdkpsp))
1287        if(paw_dmft%use_dmft==1) then
1288          ABI_MALLOC(buffer2,(mb2dkpsp*paw_dmft%use_dmft))
1289        end if
1290 !      Pack eigen,resid,eknk,enlxnk,grnlnk in buffer1
1291        buffer1(1          :  mbdkpsp)=eigen(:)
1292        buffer1(1+  mbdkpsp:2*mbdkpsp)=resid(:)
1293        buffer1(1+2*mbdkpsp:3*mbdkpsp)=eknk(:)
1294        buffer1(1+3*mbdkpsp:4*mbdkpsp)=enlxnk(:)
1295        index1=4*mbdkpsp
1296        if (optforces>0) then
1297          buffer1(index1+1:index1+3*natom*mbdkpsp)=reshape(grnlnk,(/(3*natom)*mbdkpsp/) )
1298          index1=index1+3*natom*mbdkpsp
1299        end if
1300        if (usefock) then
1301          buffer1(1+index1:index1+mbdkpsp)=focknk(:)
1302          if (optforces>0) then
1303            index1=index1+mbdkpsp
1304            buffer1(index1+1:index1+3*natom*mbdkpsp)=reshape(fockfornk,(/(3*natom)*mbdkpsp/) )
1305          end if
1306        end if
1307        if(paw_dmft%use_dmft==1) then
1308          nnn=0
1309          do ikpt=1,dtset%nkpt
1310            do isppol=1,dtset%nsppol
1311              do iband=1,dtset%mband
1312                do iband1=1,dtset%mband
1313                  do iplex=1,2
1314                    nnn=nnn+1
1315                    buffer2(nnn)=eknk_nd(isppol,ikpt,iplex,iband,iband1)
1316                  end do
1317                end do
1318              end do
1319            end do
1320          end do
1321          if(nnn.ne.mb2dkpsp)  then
1322            write(msg,*)' BUG in vtorho2, buffer2',nnn,mb2dkpsp
1323            ABI_BUG(msg)
1324          end if
1325        end if
1326 !      Build sum of everything
1327        call timab(48,1,tsec)
1328        call xmpi_sum(buffer1,mpi_enreg%comm_kpt,ierr)
1329 !      if(mpi_enreg%paral_kgb/=1.and.paw_dmft%use_dmft==1) then
1330        if(paw_dmft%use_dmft==1) call xmpi_sum(buffer2,mpi_enreg%comm_kpt,ierr)
1331        call timab(48,2,tsec)
1332 
1333 !      Unpack eigen,resid,eknk,enlxnk,grnlnk in buffer1
1334        eigen(:) =buffer1(1          :  mbdkpsp)
1335        resid(:) =buffer1(1+  mbdkpsp:2*mbdkpsp)
1336        eknk(:)  =buffer1(1+2*mbdkpsp:3*mbdkpsp)
1337        enlxnk(:) =buffer1(1+3*mbdkpsp:4*mbdkpsp)
1338        index1=4*mbdkpsp
1339        if (optforces>0) then
1340          grnlnk(:,:)=reshape(buffer1(index1+1:index1+3*natom*mbdkpsp),(/3*natom,mbdkpsp/) )
1341        end if
1342        if (usefock) then
1343          focknk(:)=buffer1(1+index1:index1+mbdkpsp)
1344          if (optforces>0) then
1345            index1=index1+mbdkpsp
1346            fockfornk(:,:,:)=reshape(buffer1(index1+1:index1+3*natom*mbdkpsp),(/3,natom,mbdkpsp/) )
1347          end if
1348        end if
1349        if(paw_dmft%use_dmft==1) then
1350          nnn=0
1351          do ikpt=1,dtset%nkpt
1352            do isppol=1,dtset%nsppol
1353              do iband=1,dtset%mband
1354                do iband1=1,dtset%mband
1355                  do iplex=1,2
1356                    nnn=nnn+1
1357                    eknk_nd(isppol,ikpt,iplex,iband,iband1)=buffer2(nnn)
1358                  end do
1359                end do
1360              end do
1361            end do
1362          end do
1363        end if
1364        if(allocated(buffer2))  then
1365          ABI_FREE(buffer2)
1366        end if
1367        ABI_FREE(buffer1)
1368        call timab(989,2,tsec)
1369 
1370      end if ! nproc_spkpt>1
1371 
1372 !    Compute extfpmd u0 energy shift factor from eigenvalues and kinetic energy.
1373      if(associated(extfpmd)) then
1374        extfpmd%vtrial=vtrial
1375        call extfpmd%compute_shiftfactor(eigen,eknk,dtset%mband,mpi_enreg%me,&
1376 &       dtset%nband,dtset%nkpt,dtset%nsppol,dtset%wtk)
1377      end if
1378 
1379 !    Compute the new occupation numbers from eigen
1380      call timab(990,1,tsec)
1381      call newocc(doccde,eigen,energies%entropy,energies%e_fermie,energies%e_fermih,dtset%ivalence,&
1382 &     dtset%spinmagntarget,dtset%mband,dtset%nband,dtset%nelect,dtset%ne_qFD,dtset%nh_qFD,&
1383 &     dtset%nkpt,dtset%nspinor,dtset%nsppol,occ,dtset%occopt,prtvol,dtset%tphysel,&
1384 &     dtset%tsmear,dtset%wtk,&
1385 &     prtstm=dtset%prtstm,stmbias=dtset%stmbias,extfpmd=extfpmd)
1386      call timab(990,2,tsec)
1387 
1388 
1389 !    Compute number of free electrons of extfpmd model
1390      if(associated(extfpmd)) then
1391        extfpmd%nelect=zero
1392        call extfpmd%compute_nelect(energies%e_fermie,extfpmd%nelect,dtset%tsmear)
1393        call extfpmd%compute_e_kinetic(energies%e_fermie,dtset%tsmear)
1394        call extfpmd%compute_entropy(energies%e_fermie,dtset%tsmear)
1395      end if
1396 
1397 !    !=========  DMFT call begin ============================================
1398      dmft_dftocc=0
1399      if(paw_dmft%use_dmft==1.and.psps%usepaw==1.and.dtset%nbandkss==0) then
1400        call timab(991,1,tsec)
1401 
1402        if (dtset%dmftcheck>=0.and.dtset%usedmft>=1.and.(sum(pawtab(:)%upawu)>=tol8.or.  &
1403 &       sum(pawtab(:)%jpawu)>tol8).and.dtset%dmft_entropy==0) energies%entropy=zero
1404 
1405 !      ==  0 to a dmft calculation and do not use lda occupations
1406 !      ==  1 to a lda calculation with the dmft loop
1407        if(dtset%dmftcheck==-1) dmft_dftocc=1
1408 
1409 !      ==  initialise occnd
1410        paw_dmft%occnd=zero
1411 
1412        bdtot_index = 1
1413        do isppol=1,dtset%nsppol
1414          do ikpt=1,dtset%nkpt
1415            do iband=1,dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
1416              paw_dmft%occnd(1,iband,iband,ikpt,isppol)=occ(bdtot_index)
1417              bdtot_index = bdtot_index + 1
1418            end do
1419          end do
1420        end do
1421 
1422 
1423        if(dmft_dftocc==0) then
1424          if(dtset%occopt/=3) then
1425            ABI_ERROR('occopt should be equal to 3 in dmft')
1426          end if
1427 !        ==  initialise edmft
1428          if(paw_dmft%use_dmft>=1) edmft = zero
1429 
1430          !  Compute residm to check the value
1431          ibdkpt=1
1432          residm=zero
1433          do isppol=1,dtset%nsppol
1434            do ikpt=1,dtset%nkpt
1435              nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
1436              if (nbdbuf_eff>=0) then
1437                nband_eff=max(1,nband_k-nbdbuf_eff)
1438                residm=max(residm,maxval(resid(ibdkpt:ibdkpt+nband_eff-1)))
1439              else if (nbdbuf_eff==-101) then
1440                residm=max(residm,maxval(occ(ibdkpt:ibdkpt+nband_k-1)*resid(ibdkpt:ibdkpt+nband_k-1)))
1441              else
1442                ABI_ERROR('Bad value of nbdbuf_eff')
1443              end if
1444              ibdkpt=ibdkpt+nband_k
1445            end do
1446          end do
1447 
1448          ! Test residm
1449          if (paw_dmft%use_dmft>0 .and. residm>tol4 .and. dtset%dmftcheck>=0) then
1450            if(dtset%dmft_entropy>0)  then
1451              write(msg,'(a,e12.3)')&
1452                ' WARNING: Wavefunctions not converged: DFT+DMFT calculation cannot be carried out safely ',residm
1453              call wrtout(std_out,msg)
1454            else
1455              write(msg,'(a,e12.3)')&
1456               ' ERROR: Wavefunctions not converged: DFT+DMFT calculation cannot be carried out safely ',residm
1457              call wrtout(std_out,msg)
1458              write(msg,'(a,i0)')'  Action: increase nline and nnsclo',dtset%nstep
1459              ABI_ERROR(msg)
1460            end if
1461 
1462          else if (paw_dmft%use_dmft>0 .and. residm>tol10.and. dtset%dmftcheck>=0) then
1463            write(msg,'(3a)')ch10,&
1464             '  Wavefunctions not converged: DFT+DMFT calculation might not be carried out safely ',ch10
1465            ABI_WARNING(msg)
1466          end if
1467 
1468 !        ==  allocate paw_dmft%psichi and paw_dmft%eigen_dft
1469          call init_dmft(dmatpawu,dtset,energies%e_fermie,dtfil%fnameabo_app,&
1470            dtfil%filnam_ds(3),dtset%nspinor,paw_dmft,pawtab,psps,dtset%typat)
1471          call print_dmft(paw_dmft,dtset%pawprtvol)
1472 
1473 !        ==  gather crystal structure date into data "cryst_struc"
1474          remove_inv=.false.
1475          if(dtset%nspden==4) remove_inv=.true.
1476          call crystal_init(dtset%amu_orig(:,1),cryst_struc,dtset%spgroup,natom,dtset%npsp,ntypat, &
1477           dtset%nsym,rprimd,dtset%typat,xred,dtset%ziontypat,dtset%znucl,1,&
1478           dtset%nspden==2.and.dtset%nsppol==1,remove_inv,hdr%title,&
1479           dtset%symrel,dtset%tnons,dtset%symafm)
1480 
1481 !        ==  compute psichi
1482          call xmpi_barrier(spaceComm_distrb)
1483          call init_oper(paw_dmft,dft_occup)
1484          call flush_unit(std_out)
1485          call timab(620,1,tsec)
1486          call datafordmft(cryst_struc,cprj,gs_hamk%dimcprj,dtset,eigen,energies%e_fermie,&
1487 &         dft_occup,dtset%mband,mband_cprj,dtset%mkmem,mpi_enreg,&
1488 &         dtset%nkpt,my_nspinor,dtset%nsppol,occ,&
1489 &         paw_dmft,paw_ij,pawang,pawtab,psps,usecprj_local,dtfil%unpaw)
1490          call timab(620,2,tsec)
1491          call flush_unit(std_out)
1492 
1493 
1494 !        ==  solve dmft loop
1495          call xmpi_barrier(spaceComm_distrb)
1496 
1497          call dmft_solve(cryst_struc,istep,dft_occup,paw_dmft,pawang,pawtab,dtset%pawprtvol)
1498          edmft=paw_dmft%edmft
1499          energies%e_paw=energies%e_paw+edmft
1500          energies%e_pawdc=energies%e_pawdc+edmft
1501          call flush_unit(std_out)
1502 !        paw_dmft%occnd(:,:,:,:,:)=0.5_dp
1503 
1504 !        For compatibility with old test, do not use for calculation
1505          if(dtset%dmft_occnd_imag==0) paw_dmft%occnd(2,:,:,:,:)=zero
1506 
1507 !        call print_dmft(paw_dmft,dtset%pawprtvol)
1508 !         if(dtset%paral_kgb==1) then
1509 !           write(msg,'(5a)')ch10,&
1510 !&           ' Parallelization over bands is not yet compatible with self-consistency in DMFT ',ch10,&
1511 !&           ' Calculation of density does not taken into account non diagonal occupations',ch10
1512 !           call wrtout(std_out,msg)
1513 !           call wrtout(ab_out,msg)
1514 !!          ABI_ERROR(msg)
1515 !           if(dtset%nstep>1) then
1516 !             write(msg,'(a,i0)')'  Action: use nstep=1 instead of nstep=',dtset%nstep
1517 !             ABI_ERROR(msg)
1518 !           end if
1519 !           residm=zero
1520 !         end if
1521 !        if(dtset%nspinor==2) then
1522 !          call flush_unit(ab_out)
1523 !          write(msg,'(3a)')&
1524 !          &         ' Self consistent DFT+DMFT with nspinor==2 is not possible yet ',ch10,&
1525 !          &         ' Calculation are restricted to nstep =1'
1526 !          !         ABI_ERROR(msg)
1527 !          if(dtset%nstep>1) then
1528 !          write(msg,'(a,i0)')' Action: use nstep=1 instead of nstep=',dtset%nstep
1529 !          !           ABI_ERROR(msg)
1530 !          endif
1531 !        end if
1532 
1533          if(me_distrb==0) then
1534            call saveocc_dmft(paw_dmft)
1535          end if
1536          call destroy_dmft(paw_dmft)
1537 
1538 !        ==  destroy crystal_t cryst_struc
1539          call cryst_struc%free()
1540          call destroy_oper(dft_occup)
1541        end if ! dmft_dftocc
1542        call timab(991,2,tsec)
1543 
1544      end if ! usedmft
1545 
1546      if(dtset%nbandkss/=0) then
1547        write(msg,'(a,i3,2a,i3,4a)') &
1548         " dtset%nbandkss = ",dtset%nbandkss,ch10,&
1549         " and dtset%usedmft = ",dtset%usedmft,ch10,&
1550         " a DFT loop is carried out without DMFT.",ch10,&
1551         " Only psichi's will be written at convergence of the DFT loop."
1552        call wrtout(std_out,msg)
1553      end if
1554 !    !=========  DMFT call end   ============================================
1555 
1556      call timab(992,1,tsec)
1557 
1558 !    Compute eeig, ek,enl and grnl from the new occ, and the shared eknk,enlxnk,grnlnk
1559      energies%e_eigenvalues = zero
1560      energies%e_kinetic     = zero
1561      energies%e_nlpsp_vfock = zero
1562      if (usefock) then
1563        energies%e_fock     = zero
1564        if (optforces>0) fock%fock_common%forces=zero
1565      end if
1566      if (optforces>0) grnl(:)=zero
1567      if(paw_dmft%use_dmft>=1) then
1568        ebandlda               = zero
1569        ebanddmft              = zero
1570        ebandldatot            = zero
1571        ekindmft               = zero
1572        ekindmft2              = zero
1573        ekinlda                = zero
1574      end if
1575 
1576 !    Compute new energy terms due to non diagonal occupations and DMFT.
1577      bdtot_index=1
1578      do isppol=1,dtset%nsppol
1579        do ikpt=1,dtset%nkpt
1580          nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
1581          do iband=1,nband_k
1582 
1583            locc_test = abs(occ(bdtot_index))>tol8
1584 !          dmft
1585            if(paw_dmft%use_dmft>=1.and.dtset%nbandkss==0) then
1586              if(paw_dmft%band_in(iband)) then
1587                if( paw_dmft%use_dmft == 1 .and. dmft_dftocc == 1 ) then ! test of the code
1588                  paw_dmft%occnd(1,iband,iband,ikpt,isppol)=occ(bdtot_index)
1589                end if
1590                locc_test = abs(paw_dmft%occnd(1,iband,iband,ikpt,isppol))+&
1591 &               abs(paw_dmft%occnd(2,iband,iband,ikpt,isppol))>tol8
1592              end if
1593            end if
1594 
1595            if (locc_test) then
1596 !            dmft
1597              if(paw_dmft%use_dmft==1.and.dtset%nbandkss==0) then
1598                ebandldatot=ebandldatot+dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index)
1599                if(paw_dmft%band_in(iband)) then
1600                  ebandlda=ebandlda+dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index)
1601                  ekinlda=ekinlda+dtset%wtk(ikpt)*occ(bdtot_index)*eknk(bdtot_index)
1602                  occ(bdtot_index)=paw_dmft%occnd(1,iband,iband,ikpt,isppol)
1603                  ebanddmft=ebanddmft+dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index)
1604                  ekindmft=ekindmft+dtset%wtk(ikpt)*occ(bdtot_index)*eknk(bdtot_index)
1605                end if
1606              end if
1607 
1608              energies%e_eigenvalues = energies%e_eigenvalues + &
1609 &             dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index)
1610              energies%e_kinetic = energies%e_kinetic + &
1611 &             dtset%wtk(ikpt)*occ(bdtot_index)*eknk(bdtot_index)
1612              energies%e_nlpsp_vfock = energies%e_nlpsp_vfock + &
1613 &             dtset%wtk(ikpt)*occ(bdtot_index)*enlxnk(bdtot_index)
1614              if (usefock) then
1615                energies%e_fock=energies%e_fock + half*focknk(bdtot_index)*occ(bdtot_index)*dtset%wtk(ikpt)
1616                if (optforces>0) fock%fock_common%forces(:,:)=fock%fock_common%forces(:,:)+&
1617 &               dtset%wtk(ikpt)*occ(bdtot_index)*fockfornk(:,:,bdtot_index)
1618              end if
1619              if (optforces>0) grnl(:)=grnl(:)+dtset%wtk(ikpt)*occ(bdtot_index)*grnlnk(:,bdtot_index)
1620            end if
1621            bdtot_index=bdtot_index+1
1622            if(paw_dmft%use_dmft==1.and.dtset%nbandkss==0) then
1623              do iband1=1,nband_k
1624                if(paw_dmft%band_in(iband).and.paw_dmft%band_in(iband1)) then
1625 !                write(std_out,*) "II+", isppol,ikpt,iband,iband1
1626                  ekindmft2=ekindmft2  +  dtset%wtk(ikpt)*paw_dmft%occnd(1,iband,iband1,ikpt,isppol)*&
1627 &                 eknk_nd(isppol,ikpt,1,iband,iband1)
1628                  ekindmft2=ekindmft2  -  dtset%wtk(ikpt)*paw_dmft%occnd(2,iband,iband1,ikpt,isppol)*&
1629 &                 eknk_nd(isppol,ikpt,2,iband,iband1)
1630 !                write(std_out,*) "II", occnd(1,iband,iband1,ikpt,isppol),eknk_nd(isppol,ikpt,iband,iband1)
1631                end if
1632              end do
1633            end if
1634          end do
1635        end do
1636      end do
1637 
1638      if(paw_dmft%use_dmft==1) then
1639        energies%e_kinetic = energies%e_kinetic -ekindmft+ekindmft2
1640        if(abs(dtset%pawprtvol)>=2) then
1641          write(msg,'(4a,7(2x,a,2x,e14.7,a),a)') &
1642            "-----------------------------------------------",ch10,&
1643            "--- Energy for DMFT and tests (in Ha)  ",ch10,&
1644            "--- Ebandldatot    (Ha.) = ",ebandldatot,ch10,&
1645            "--- Ebandlda       (Ha.) = ",ebandlda,ch10,&
1646            "--- Ebanddmft      (Ha.) = ",ebanddmft,ch10,&
1647            "--- Ekinlda        (Ha.) = ",ekinlda,ch10, &
1648            "--- Ekindmftdiag   (Ha.) = ",ekindmft,ch10,&
1649            "--- Ekindmftnondiag(Ha.) = ",ekindmft2,ch10,&
1650            "--- Edmft=         (Ha.) = ",edmft,ch10,&
1651            "-----------------------------------------------"
1652          call wrtout(std_out,msg)
1653        end if
1654 !       if(paw_dmft%use_dmft==1.and.mpi_enreg%paral_kgb==1) paw_dmft%use_dmft=0
1655      end if
1656 
1657      ABI_NVTX_START_RANGE(NVTX_MKRHO)
1658      if (psps%usepaw==0) then
1659        call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,&
1660 &       rhog,rhor,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,&
1661 &       extfpmd=extfpmd)
1662      else
1663        call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,&
1664 &       rhowfg,rhowfr,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,&
1665 &       extfpmd=extfpmd)
1666     end if
1667     ABI_NVTX_END_RANGE()
1668      call timab(992,2,tsec)
1669 
1670 !    Treat fixed occupation numbers or non-self-consistent case
1671    else
1672 
1673      if (mpi_enreg%nproc_spkpt>1) then
1674 
1675        call timab(989,1,tsec)
1676 
1677        nbuf=2*mbdkpsp+dtset%nfft*dtset%nspden+4+3*natom*optforces
1678        ! If Hartree-Fock calculation, the exact exchange energy is k-dependent.
1679        if(dtset%usefock==1) then
1680          nbuf=nbuf+1
1681          if (optforces>0) nbuf=nbuf+3*natom
1682        end if
1683        if(iscf==-1 .or. iscf==-2)nbuf=2*mbdkpsp
1684        ABI_MALLOC(buffer1,(nbuf))
1685        ! Pack eigen,resid,rho[wf]r,grnl,enl,ek
1686        buffer1(1:mbdkpsp)=eigen(:)
1687        buffer1(1+mbdkpsp:2*mbdkpsp)=resid(:)
1688        index1=2*mbdkpsp
1689        if(iscf/=-1 .and. iscf/=-2)then
1690          if (psps%usepaw==0) then
1691            buffer1(index1+1:index1+dtset%nfft*dtset%nspden)=reshape(rhor,(/dtset%nfft*dtset%nspden/))
1692          else
1693            buffer1(index1+1:index1+dtset%nfft*dtset%nspden)=reshape(rhowfr,(/dtset%nfft*dtset%nspden/))
1694          end if
1695          index1=index1+dtset%nfft*dtset%nspden
1696          buffer1(index1+1) = energies%e_kinetic
1697          buffer1(index1+2) = energies%e_eigenvalues
1698          buffer1(index1+3) = energies%e_nlpsp_vfock
1699          buffer1(index1+4) = energies%e_nucdip
1700          index1=index1+4
1701          ! If Hartree-Fock calculation, save e_fock in buffer1
1702          if (dtset%usefock==1) then
1703            buffer1(index1+1) = energies%e_fock
1704            index1=index1+1
1705            if (optforces>0)then
1706              buffer1(index1+1:index1+3*natom)=reshape(fock%fock_common%forces,(/3*natom/))
1707              index1=index1+3*natom
1708            end if
1709          end if
1710          if (optforces>0) buffer1(index1+1:index1+3*natom)=grnl(1:3*natom)
1711        end if
1712 
1713        ! Build sum of everything
1714        call timab(48,1,tsec)
1715        call xmpi_sum(buffer1,nbuf,mpi_enreg%comm_kpt ,ierr)
1716        call timab(48,2,tsec)
1717 
1718        ! Unpack the final result
1719        eigen(:)=buffer1(1:mbdkpsp)
1720        resid(:)=buffer1(1+mbdkpsp:2*mbdkpsp)
1721        index1=2*mbdkpsp
1722        if(iscf/=-1 .and. iscf/=-2)then
1723          if (psps%usepaw==0) then
1724            ii=1
1725            do ispden=1,dtset%nspden
1726              do ifft=1,dtset%nfft
1727                rhor(ifft,ispden)=buffer1(index1+ii)
1728                ii=ii+1
1729              end do
1730            end do
1731          else
1732            ii=1
1733            do ispden=1,dtset%nspden
1734              do ifft=1,dtset%nfft
1735                rhowfr(ifft,ispden)=buffer1(index1+ii)
1736                ii=ii+1
1737              end do
1738            end do
1739          end if
1740          index1=index1+dtset%nfft*dtset%nspden
1741          energies%e_kinetic = buffer1(index1+1)
1742          energies%e_eigenvalues = buffer1(index1+2)
1743          energies%e_nlpsp_vfock = buffer1(index1+3)
1744          energies%e_nucdip = buffer1(index1+4)
1745          index1=index1+4
1746          ! If Hartree-Fock calculation, save e_fock in buffer1
1747          if (dtset%usefock==1) then
1748            energies%e_fock = buffer1(index1+1)
1749            index1=index1+1
1750            if (optforces>0) then
1751              fock%fock_common%forces(:,:)=reshape(buffer1(index1+1:index1+3*natom),(/3,natom/))
1752              index1=index1+3*natom
1753            end if
1754          end if
1755          if (optforces>0) grnl(1:3*natom)=buffer1(index1+1:index1+3*natom)
1756        end if
1757        ABI_FREE(buffer1)
1758        call timab(989,2,tsec)
1759 
1760      end if ! nproc_spkpt>1
1761 
1762 !    Compute the highest occupied eigenenergy
1763      if(iscf/=-1 .and. iscf/=-2)then
1764        call timab(993,1,tsec)
1765        energies%e_fermie = -huge(one)
1766        if (dtset%occopt==9) then
1767           energies%e_fermih = -huge(one)
1768        end if
1769        bdtot_index=1
1770        do isppol=1,dtset%nsppol
1771          do ikpt=1,dtset%nkpt
1772            nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
1773            if (dtset%occopt/=9) then
1774               do iband=1,nband_k
1775                  if(abs(occ(bdtot_index))>tol8 .and. eigen(bdtot_index)>energies%e_fermie+tol10) then
1776                    energies%e_fermie=eigen(bdtot_index)
1777                  end if
1778                  bdtot_index=bdtot_index+1
1779               end do
1780            else
1781               ! In case occopt 9, computing the fermi level for the electrons remaining in the VB = fermi level of holes
1782               do iband=1,dtset%ivalence
1783                  if(abs(occ(bdtot_index))>tol8 .and. eigen(bdtot_index)>energies%e_fermih+tol10) then
1784                    energies%e_fermih=eigen(bdtot_index)
1785                  end if
1786                  bdtot_index=bdtot_index+1
1787               end do
1788               ! In case occopt 9, computing the fermi level for the electrons thermalized in the conduction bands
1789               do iband=dtset%ivalence+1,nband_k
1790                  if(abs(occ(bdtot_index))>tol8 .and. eigen(bdtot_index)>energies%e_fermie+tol10) then
1791                    energies%e_fermie=eigen(bdtot_index)
1792                  end if
1793                  bdtot_index=bdtot_index+1
1794               end do
1795            end if
1796          end do
1797        end do
1798        call xmpi_max(energies%e_fermie,spaceComm_distrb,ierr)
1799        if (dtset%occopt == 9) then
1800           call xmpi_max(energies%e_fermih,spaceComm_distrb,ierr)
1801        end if
1802        call timab(993,2,tsec)
1803      end if
1804 
1805 !    If needed, compute rhog, and symmetrizes the density
1806      if (iscf > 0 .or. iscf==-3 ) then
1807 !      energies%e_fermie=zero  ! Actually, should determine the maximum of the valence band XG20020802
1808        nfftot=dtset%ngfft(1)*dtset%ngfft(2)*dtset%ngfft(3)
1809 
1810        call timab(994,1,tsec)
1811        if (psps%usepaw==0) then
1812          call symrhg(1,gprimd,irrzon,mpi_enreg,dtset%nfft,nfftot,dtset%ngfft,dtset%nspden,&
1813 &         dtset%nsppol,dtset%nsym,phnons,rhog,rhor,rprimd,dtset%symafm,dtset%symrel,dtset%tnons)
1814        else
1815          call symrhg(1,gprimd,irrzon,mpi_enreg,dtset%nfft,nfftot,dtset%ngfft,dtset%nspden,&
1816 &         dtset%nsppol,dtset%nsym,phnons,rhowfg,rhowfr,rprimd,dtset%symafm,dtset%symrel,dtset%tnons)
1817        end if
1818        call timab(994,2,tsec)
1819 !      We now have both rho(r) and rho(G), symmetrized, and if dtset%nsppol=2
1820 !      we also have the spin-up density, symmetrized, in rhor(:,2).
1821      end if
1822 
1823    end if !  End of test on varying or fixed occupation numbers
1824 
1825    call timab(994,1,tsec)
1826 
1827 !  Compute the kinetic energy density
1828    if(dtset%usekden==1 .and. (iscf > 0 .or. iscf==-3 ) )then
1829      if (psps%usepaw==0) then
1830        call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,&
1831 &       taug,taur,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,option=1)
1832      else
1833        call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,&
1834 &      tauwfg,tauwfr,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,option=1)
1835      end if
1836    end if
1837 
1838    ABI_FREE(eknk)
1839    if (usefock) then
1840      ABI_FREE(focknk)
1841      if (optforces>0)then
1842        ABI_FREE(fockfornk)
1843      end if
1844    end if
1845    ABI_FREE(eknk_nd)
1846    ABI_FREE(grnlnk)
1847    ABI_FREE(enlxnk)
1848 
1849 !  In the non-self-consistent case, print eigenvalues and residuals
1850    if(iscf<=0 .and. me_distrb==0)then
1851      option=2 ; enunit=1 ; vxcavg_dum=zero
1852      call prteigrs(eigen,enunit,energies%e_fermie,energies%e_fermih,&
1853 &     dtfil%fnameabo_app_eig,ab_out,iscf,dtset%kptns,dtset%kptopt,dtset%mband,&
1854 &     dtset%nband,nbdbuf_eff,dtset%nkpt,nnsclo_now,dtset%nsppol,occ,dtset%occopt,option,&
1855 &     dtset%prteig,prtvol,resid,dtset%tolwfr,vxcavg_dum,dtset%wtk)
1856    end if
1857 
1858 !  Find largest residual over bands, k points, and spins, except for nbdbuf highest bands
1859    ibdkpt=1
1860    residm=zero
1861    do isppol=1,dtset%nsppol
1862      do ikpt=1,dtset%nkpt
1863        nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
1864        if (nbdbuf_eff>=0) then
1865          nband_eff=max(1,nband_k-nbdbuf_eff)
1866          residm=max(residm,maxval(resid(ibdkpt:ibdkpt+nband_eff-1)))
1867        else if (nbdbuf_eff==-101) then
1868          residm=max(residm,maxval(occ(ibdkpt:ibdkpt+nband_k-1)*resid(ibdkpt:ibdkpt+nband_k-1)))
1869        else
1870          ABI_ERROR('Bad value of nbdbuf_eff')
1871        end if
1872        ibdkpt=ibdkpt+nband_k
1873      end do
1874    end do
1875 
1876  end if !usewvl==0
1877 
1878 !===================================================================
1879 ! End of PLANE WAVES section
1880 !===================================================================
1881 
1882 !In the self-consistent case, diagnose lack of unoccupied state (for each spin and k-point).
1883 
1884 !Print a warning if the number of such messages already written does not exceed mwarning.
1885 ! MG: This is not a good idea as this is a typical mistake done by beginners and we should
1886 ! keep on spamming this warning message in the log file.
1887  !mwarning=5
1888  !if(nwarning<mwarning .and. iscf>=0)then
1889    !nwarning=nwarning+1
1890 
1891  if(iscf>=0)then
1892    bdtot_index=1
1893    do isppol=1,dtset%nsppol
1894      do ikpt=1,dtset%nkpt
1895        min_occ=two
1896        nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
1897        do iband=1,nband_k
1898          if(occ(bdtot_index)<min_occ)min_occ=occ(bdtot_index)
1899          bdtot_index=bdtot_index+1
1900        end do
1901        if(min_occ>0.01_dp .and. .not. associated(extfpmd))then
1902          if(dtset%nsppol==1)then
1903            write(msg, '(a,i0,3a,f7.3,5a)' )&
1904              'For k-point number: ',ikpt,',',ch10,&
1905              'The minimal occupation factor is: ',min_occ,'.',ch10,&
1906              'An adequate monitoring of convergence requires it to be  at most 0.01_dp.',ch10,&
1907              'Action: increase slightly the number of bands.'
1908          else
1909            write(msg, '(a,i0,3a,i0,a,f7.3,5a)' )&
1910              'For k-point number: ',ikpt,', and',ch10,&
1911              'for spin polarization: ',isppol, ' the minimal occupation factor is: ',min_occ,'.',ch10,&
1912              'An adequate monitoring of convergence requires it to be at most 0.01_dp.',ch10,&
1913              'Action: increase slightly the number of bands.'
1914          end if
1915          ABI_WARNING(msg)
1916          exit ! It is enough if one lack of adequate occupation is identified, so exit.
1917        end if
1918      end do
1919    end do
1920  end if
1921 
1922  ABI_NVTX_START_RANGE(NVTX_VTORHO_EXTRA)
1923  if (iscf>0.or.iscf==-3 .or. (dtset%usewvl==1 .and. iscf==0)) then
1924 
1925 !  PAW: Build new rhoij quantities from new occ then symetrize them
1926 !  Compute and add the compensation density to rhowfr to get the total density
1927    if (psps%usepaw==1) then
1928      call timab(555,1,tsec)
1929      if (paral_atom) then
1930        ABI_MALLOC(pawrhoij_unsym,(natom))
1931        call pawrhoij_inquire_dim(cplex_rhoij=cplex_rhoij,nspden_rhoij=nspden_rhoij,&
1932 &                nspden=dtset%nspden,spnorb=dtset%pawspnorb,cpxocc=dtset%pawcpxocc)
1933        call pawrhoij_alloc(pawrhoij_unsym,cplex_rhoij,nspden_rhoij,dtset%nspinor,&
1934 &       dtset%nsppol,dtset%typat,pawtab=pawtab,use_rhoijp=0)
1935      else
1936        pawrhoij_unsym => pawrhoij
1937      end if
1938      if (usecprj_local==1) then
1939        call pawmkrhoij(atindx,atindx1,cprj,gs_hamk%dimcprj,dtset%istwfk,dtset%kptopt,dtset%mband,mband_cprj,&
1940 &       mcprj_local,dtset%mkmem,mpi_enreg,natom,dtset%nband,dtset%nkpt,dtset%nspden,dtset%nspinor,&
1941 &       dtset%nsppol,occ,dtset%paral_kgb,paw_dmft,pawrhoij_unsym,dtfil%unpaw,dtset%usewvl,dtset%wtk)
1942      else
1943        mcprj_tmp=my_nspinor*mband_cprj*dtset%mkmem*dtset%nsppol
1944        ABI_MALLOC(cprj_tmp,(natom,mcprj_tmp))
1945        call pawcprj_alloc(cprj_tmp,0,gs_hamk%dimcprj)
1946        call ctocprj(atindx,cg,1,cprj_tmp,gmet,gprimd,0,0,0,dtset%istwfk,kg,dtset%kptns,&
1947 &       mcg,mcprj_tmp,dtset%mgfft,dtset%mkmem,mpi_enreg,psps%mpsang,dtset%mpw,&
1948 &       dtset%natom,nattyp,dtset%nband,dtset%natom,dtset%ngfft,dtset%nkpt,dtset%nloalg,&
1949 &       npwarr,dtset%nspinor,dtset%nsppol,dtset%nsppol,ntypat,dtset%paral_kgb,ph1d,psps,rmet,dtset%typat,&
1950 &       ucvol,dtfil%unpaw,xred,ylm,ylmgr_dum)
1951        call pawmkrhoij(atindx,atindx1,cprj_tmp,gs_hamk%dimcprj,dtset%istwfk,dtset%kptopt,&
1952 &       dtset%mband,mband_cprj,mcprj_tmp,dtset%mkmem,mpi_enreg,natom,dtset%nband,dtset%nkpt,&
1953 &       dtset%nspden,dtset%nspinor,dtset%nsppol,occ,dtset%paral_kgb,paw_dmft,pawrhoij_unsym,&
1954 &       dtfil%unpaw,dtset%usewvl,dtset%wtk)
1955        call pawcprj_free(cprj_tmp)
1956        ABI_FREE(cprj_tmp)
1957      end if
1958      call timab(555,2,tsec)
1959 !    Build symetrized packed rhoij and compensated pseudo density
1960      cplex=1;ipert=0;idir=0;qpt(:)=zero
1961      if(dtset%usewvl==0) then
1962        call pawmkrho(1,compch_fft,cplex,gprimd,idir,indsym,ipert,mpi_enreg,&
1963 &       my_natom,natom,dtset%nspden,dtset%nsym,ntypat,dtset%paral_kgb,pawang,pawfgr,pawfgrtab,&
1964 &       dtset%pawprtvol,pawrhoij,pawrhoij_unsym,pawtab,qpt,rhowfg,rhowfr,rhor,rprimd,dtset%symafm,&
1965 &       symrec,dtset%typat,ucvol,dtset%usewvl,xred,pawnhat=nhat,rhog=rhog)
1966        if (dtset%usekden==1) then
1967 !        DO WE NEED TAUG?
1968          call transgrid(1,mpi_enreg,dtset%nspden,+1,1,1,dtset%paral_kgb,pawfgr,tauwfg,taug,tauwfr,taur)
1969        end if
1970      else
1971 !      here do not pass rhog, we do not use it
1972        call pawmkrho(1,compch_fft,cplex,gprimd,idir,indsym,ipert,mpi_enreg,&
1973 &       my_natom,natom,dtset%nspden,dtset%nsym,ntypat,dtset%paral_kgb,pawang,pawfgr,pawfgrtab,&
1974 &       dtset%pawprtvol,pawrhoij,pawrhoij_unsym,pawtab,qpt,rhowfg,rhowfr,rhor,rprimd,dtset%symafm,&
1975 &       symrec,dtset%typat,ucvol,dtset%usewvl,xred,pawnhat=nhat)
1976 !      In WVL: copy density to BigDFT object:
1977        call wvl_rho_abi2big(1,rhor,wvl%den)
1978      end if
1979      if (paral_atom) then
1980        call pawrhoij_free(pawrhoij_unsym)
1981        ABI_FREE(pawrhoij_unsym)
1982      end if
1983    end if ! psps%usepaw==1
1984 
1985    if(paw_dmft%use_dmft==1) then
1986 !    == check noccmmp
1987      call setnoccmmp(1,0,rdum4,0,0,idum3,my_natom,natom,0,1,dtset%nsppol,0,ntypat,&
1988 &     paw_ij,pawang,dtset%pawprtvol,pawrhoij,pawtab,rdum2,idum1,dtset%typat,0,dtset%usepawu,&
1989 &     comm_atom=mpi_enreg%comm_atom,mpi_atmtab=mpi_enreg%my_atmtab)
1990    end if
1991 
1992 !  Find and print minimum and maximum total electron density and locations
1993 !  Compute density residual (if required) and its squared norm
1994    if (iscf>=0) then
1995      if (psps%usepaw==0) then
1996        call prtrhomxmn(std_out,mpi_enreg,dtset%nfft,dtset%ngfft,dtset%nspden,1,rhor,ucvol=ucvol)
1997      else
1998        call prtrhomxmn(std_out,mpi_enreg,nfftf,pawfgr%ngfft,dtset%nspden,1,rhor,ucvol=ucvol)
1999      end if
2000      if (optres==1) then
2001        nvresid=rhor-nvresid
2002        call sqnorm_v(1,nfftf,nres2,dtset%nspden,optres,nvresid,mpi_comm_sphgrid=mpi_comm_sphgrid)
2003        if (dtset%usekden==1) then
2004          if (optres==1) tauresid=taur-tauresid
2005        endif
2006      end if
2007    end if
2008 
2009  end if ! iscf>0 or iscf=-3
2010  ABI_NVTX_END_RANGE()
2011 
2012  if(psps%usepaw==1.and.(iscf>=0.or.iscf==-3))  then
2013    ABI_FREE(rhowfr)
2014    ABI_FREE(rhowfg)
2015    if (dtset%usekden==1) then
2016      ABI_FREE(tauwfr)
2017      ABI_FREE(tauwfg)
2018    end if
2019  end if
2020 
2021  call timab(994,2,tsec)
2022 
2023  if (iscf==-1) then
2024 !  Eventually compute the excited states within tddft
2025    call timab(995,1,tsec)
2026    if (psps%usepaw==1) then
2027 !    In case of PAW calculation, have to transfer kxc from the fine to the coarse grid:
2028      ABI_MALLOC(cgrkxc,(dtset%nfft,nkxc))
2029      do ikxc=1,nkxc
2030        call transgrid(1,mpi_enreg,1,-1,0,0,dtset%paral_kgb,pawfgr,rhodum,rhodum,cgrkxc(:,ikxc),kxc(:,ikxc))
2031      end do
2032      call tddft(cg,dtfil,dtset,eigen,etotal,gmet,gprimd,gsqcut,&
2033 &     kg,cgrkxc,dtset%mband,mgfftdiel,dtset%mkmem,mpi_enreg,dtset%mpw,dtset%nfft,&
2034 &     ngfftdiel,dtset%nkpt,nkxc,npwarr,dtset%nspinor,dtset%nsppol,occ,ucvol,wffnew)
2035      ABI_FREE(cgrkxc)
2036    else
2037      call tddft(cg,dtfil,dtset,eigen,etotal,gmet,gprimd,gsqcut,&
2038 &     kg,kxc,dtset%mband,mgfftdiel,dtset%mkmem,mpi_enreg,dtset%mpw,dtset%nfft,&
2039 &     ngfftdiel,dtset%nkpt,nkxc,npwarr,dtset%nspinor,dtset%nsppol,occ,ucvol,wffnew)
2040    end if
2041    call timab(995,2,tsec)
2042 
2043  else
2044 !  Eventually compute the susceptibility matrix and the
2045 !  dielectric matrix when istep_mix is equal to 1 or dielstrt
2046    call timab(996,1,tsec)
2047    computesusmat = dtset%testsusmat(dielop, dielstrt, istep_mix) !test if the matrix is to be computed
2048    if(computesusmat) then
2049      dielar(1)=dtset%diecut;dielar(2)=dtset%dielng
2050      dielar(3)=dtset%diemac;dielar(4)=dtset%diemix
2051      dielar(5)=dtset%diegap;dielar(6)=dtset%dielam
2052      dielar(7)=dtset%diemix;if (iscf>=10) dielar(7)=dtset%diemixmag
2053      usetimerev=1
2054      if (psps%usepaw==1.and.dtset%pawspnorb>0.and.dtset%kptopt/=1.and.dtset%kptopt/=2) usetimerev=0
2055      neglect_pawhat=1-dtset%pawsushat
2056      call suscep_stat(atindx,atindx1,cg,cprj,dielar,&
2057 &     gs_hamk%dimcprj,doccde,eigen,gbound_diel,gprimd,&
2058 &     irrzondiel,dtset%istwfk,kg,kg_diel,lmax_diel,&
2059 &     dtset%mband,mcg,mcprj_local,mgfftdiel,dtset%mkmem,mpi_enreg,dtset%mpw,natom,dtset%nband,&
2060 &     neglect_pawhat,nfftdiel,ngfftdiel,&
2061 &     dtset%nkpt,npwarr,npwdiel,dtset%nspden,dtset%nspinor,dtset%nsppol,dtset%nsym,ntypat,&
2062 &     occ,dtset%occopt,pawang,pawtab,phnonsdiel,ph1ddiel,rprimd,&
2063 &     susmat,dtset%symafm,dtset%symrel,dtset%tnons,dtset%typat,ucvol,&
2064 &     dtfil%unpaw,usecprj_local,psps%usepaw,usetimerev,dtset%wtk,ylmdiel)
2065    end if
2066    call timab(996,2,tsec)
2067 
2068  end if ! end condition on iscf
2069 
2070  call gs_hamk%free()
2071 
2072  if (psps%usepaw==1) then
2073    if (usecprj==0) then
2074      call pawcprj_free(cprj_local)
2075      ABI_FREE(cprj_local)
2076    end if
2077  end if
2078 
2079  if(dtset%usewvl==0) then
2080    ABI_FREE(EigMin)
2081    ABI_FREE(doccde)
2082 #if defined HAVE_GPU_CUDA
2083    if(dtset%gpu_option==ABI_GPU_LEGACY .or. dtset%gpu_option==ABI_GPU_KOKKOS) call gpu_finalize_ham_data()
2084 #endif
2085  end if
2086 
2087  call timab(980,2,tsec)
2088 
2089  DBG_EXIT("COLL")
2090 
2091  contains

ABINIT/wvl_comm_eigen [ Functions ]

[ Top ] [ Functions ]

NAME

  wvl_comm_eigen

FUNCTION

  Computes occupations for the wavelet case
  Using BigDFT routines

INPUTS

OUTPUT

SIDE EFFECTS

NOTES

 for the wvlbigdft case, see the routine 'wvl_occ_bigdft'

SOURCE

2434 subroutine wvl_comm_eigen()
2435 
2436 !Arguments ------------------------------------
2437 
2438 !Local variables-------------------------------
2439 #if defined HAVE_BIGDFT
2440  integer:: ikpt,norb,shift
2441 #endif
2442 
2443 ! *************************************************************************
2444 
2445    DBG_ENTER("COLL")
2446 
2447 #if defined HAVE_BIGDFT
2448    if(wvlbigdft) then
2449 !  Communicates eigenvalues to all procs.
2450 !  This will print out the eigenvalues and Fermi level.
2451      call eigensystem_info(mpi_enreg%me_wvl, mpi_enreg%nproc_wvl,0.d0,&
2452 &     wvl%wfs%ks%Lzd%Glr%wfd%nvctr_c+7*wvl%wfs%ks%Lzd%Glr%wfd%nvctr_f,&
2453 &     wvl%wfs%ks%orbs,wvl%wfs%ks%psi)
2454    else
2455 !  Send all eigenvalues to all procs.
2456 !  I simply communicate eigenvalues: I do not print them into screen, nor calculate Fermi-level.
2457      norb=wvl%wfs%ks%orbs%norb
2458      if (mpi_enreg%nproc_wvl > 1) then
2459        shift=1
2460        do ikpt = 1, wvl%wfs%ks%orbs%nkpts
2461          call xmpi_bcast(wvl%wfs%ks%orbs%eval(shift:shift+norb-1),wvl%wfs%ks%orbs%ikptproc(ikpt),mpi_enreg%comm_wvl,ierr)
2462          shift=shift+norb
2463        end do
2464      end if
2465    end if
2466 
2467 !Copy eigenvalues from BigDFT object to "eigen"
2468    call wvl_eigen_abi2big(dtset%mband,dtset%nkpt,dtset%nsppol,eigen,2,wvl%wfs)
2469 
2470 #else
2471    BIGDFT_NOTENABLED_ERROR()
2472 #endif
2473 
2474    DBG_EXIT("COLL")
2475 
2476  end subroutine wvl_comm_eigen
2477 
2478 end subroutine vtorho

ABINIT/wvl_nscf_loop [ Functions ]

[ Top ] [ Functions ]

NAME

  wvl_nscf_loop

FUNCTION

  Non-self-consistent field cycle in Wavelets
  See also "wvl_nscf_loop_bigdft"

INPUTS

  nnsclo= number of non-self consistent field iterations

OUTPUT

SOURCE

2109 subroutine wvl_nscf_loop()
2110 
2111 !Arguments ------------------------------------
2112 ! integer, intent(in)                    :: istep,mcprj,nfft,nnsclo
2113 ! real(dp), intent(inout)                :: residm
2114 ! type(dataset_type), intent(in)         :: dtset
2115 ! type(MPI_type), intent(in)             :: mpi_enreg
2116 ! type(energies_type), intent(inout)     :: energies
2117 ! type(wvl_data), intent(inout)          :: wvl
2118 ! !arrays
2119 ! real(dp), intent(inout)                :: xcart(3, dtset%natom)
2120 ! real(dp), dimension(6), intent(out)    :: strsxc
2121 ! type(pawcprj_type),dimension(dtset%natom,mcprj),intent(out)::cprj
2122 
2123 !Local variables-------------------------------
2124  integer :: inonsc,ii
2125  integer,parameter :: iscf_=-1       !do not do a SCF cycle
2126  logical,parameter :: do_scf=.false. !do not do a SCF cycle
2127  logical,parameter :: wvlbigdft=.false.
2128  real(dp) :: dum,eexctx,eh,ekin,eloc,enl,esicdc,evxc,exc
2129 
2130 ! *************************************************************************
2131 
2132    DBG_ENTER("COLL")
2133 
2134    if(nnsclo_now>0) then
2135      do inonsc=1,nnsclo_now
2136        call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc,&
2137 &       istep,inonsc,iscf_,mpi_enreg%me_wvl,dtset%natom,&
2138 &       nfftf,mpi_enreg%nproc_wvl,dtset%nspden,&
2139 &       dum,do_scf,evxc,wvl,wvlbigdft,xcart,strsxc)
2140        call wvl_hpsitopsi(cprj,dtset,energies,inonsc,mcprj_local,mpi_enreg, &
2141 &       residm,wvl,xcart)
2142        if(residm<dtset%tolwfr) exit !Exit loop if converged
2143      end do
2144 
2145    else
2146      do ii=1, dtset%nline
2147 !      Direct minimization technique: no diagonalization
2148        call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc,&
2149 &       istep,ii,iscf_,mpi_enreg%me_wvl,dtset%natom,&
2150 &       nfftf,mpi_enreg%nproc_wvl,dtset%nspden,&
2151 &       dum,do_scf,evxc,wvl,wvlbigdft,xcart,strsxc)
2152        call wvl_hpsitopsi(cprj,dtset,energies,ii,mcprj_local,mpi_enreg, &
2153 &       residm,wvl,xcart)
2154        if(residm<dtset%tolwfr) exit !Exit loop if converged
2155      end do
2156    end if
2157 
2158 !  Update energies depending on new WF
2159    energies%e_kinetic=ekin
2160    energies%e_nlpsp_vfock=enl
2161    energies%e_exactX=eexctx
2162    energies%e_sicdc=esicdc
2163 
2164 !  Eventually update energies depending on density
2165    if (dtset%iscf<10) then
2166      energies%e_localpsp=eloc
2167      energies%e_hartree=eh
2168      energies%e_xc=exc ; energies%e_xcdc=evxc
2169    else if (nnsclo_now==0) then
2170      energies%e_localpsp=eloc
2171    end if
2172 
2173 !  End of nscf iterations
2174    if (do_last_ortho) then
2175 !    !Don't update energies (nscf cycle has been done); just recompute potential
2176      inonsc=nnsclo_now;if (nnsclo_now==0) inonsc=dtset%nline
2177      call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc,&
2178 &     istep,inonsc,iscf_,mpi_enreg%me_wvl,dtset%natom,&
2179 &     nfftf,mpi_enreg%nproc_wvl,dtset%nspden,&
2180 &     dum,do_scf,evxc,wvl,wvlbigdft,xcart,strsxc)
2181    end if
2182 
2183    DBG_EXIT("COLL")
2184 
2185  end subroutine wvl_nscf_loop

ABINIT/wvl_nscf_loop_bigdft [ Functions ]

[ Top ] [ Functions ]

NAME

  wvl_nscf_loop_bigdft

FUNCTION

  Non-self-consistent field cycle in Wavelets
  It follows the BigDFT scheme.
  See also "wvl_nscf_loop"

INPUTS

  nnsclo= number of non-self consistent field iterations

OUTPUT

  argout(sizeout)=description

SOURCE

2205 subroutine wvl_nscf_loop_bigdft()
2206 
2207 !Arguments ------------------------------------
2208 ! integer, intent(in)                    :: istep,mcprj,nfft,nnsclo
2209 ! real(dp), intent(inout)                :: residm
2210 ! real(dp), intent(out)                  :: nres2
2211 ! type(dataset_type), intent(in)         :: dtset
2212 ! type(MPI_type), intent(in)             :: mpi_enreg
2213 ! type(energies_type), intent(inout)     :: energies
2214 ! type(wvl_data), intent(inout)          :: wvl
2215  !arrays
2216 ! real(dp), intent(inout)                :: xcart(3, dtset%natom)
2217 ! real(dp), dimension(6), intent(out)    :: strsxc
2218 ! type(pawcprj_type),dimension(dtset%natom,mcprj),intent(out)::cprj
2219 
2220 !Local variables-------------------------------
2221  integer :: inonsc
2222  integer,parameter :: iscf_=-1       !do not do a SCF cycle
2223  logical,parameter :: do_scf=.false. !do not do a SCF cycle
2224  logical,parameter :: wvlbigdft=.true.
2225  real(dp) :: eexctx,eh,ekin,eloc,enl,esicdc,evxc,exc
2226 
2227 ! *************************************************************************
2228 
2229    DBG_ENTER("COLL")
2230 
2231    call wvl_hpsitopsi(cprj,dtset, energies, istep, mcprj_local,mpi_enreg, &
2232 &   residm, wvl,xcart)
2233 
2234    if (nnsclo_now>2) then
2235      do inonsc = 2, nnsclo_now-1
2236        call wvl_psitohpsi(dtset%diemix, energies%e_exactX, energies%e_xc, &
2237 &       energies%e_hartree, energies%e_kinetic, energies%e_localpsp, &
2238 &       energies%e_nlpsp_vfock, energies%e_sicdc, istep, inonsc, iscf_, &
2239 &       mpi_enreg%me_wvl, dtset%natom, nfftf, mpi_enreg%nproc_wvl,&
2240 &       dtset%nspden, nres2, do_scf,energies%e_xcdc, &
2241 &       wvl, wvlbigdft, xcart, strsxc)
2242        call wvl_hpsitopsi(cprj,dtset, energies, inonsc, mcprj_local,mpi_enreg, &
2243 &       residm, wvl,xcart)
2244      end do
2245    end if
2246 
2247 !  End of nscf iterations
2248    if (do_last_ortho.and.nnsclo_now<=1) then
2249 !    !Don't update energies (nscf cycle has been done); just recompute potential
2250      call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc, &
2251 &     istep, 1, iscf_, mpi_enreg%me_wvl, dtset%natom, nfftf, &
2252 &     mpi_enreg%nproc_wvl,dtset%nspden, nres2, do_scf,evxc, &
2253 &     wvl, wvlbigdft, xcart, strsxc)
2254    else if (do_last_ortho.and.nnsclo_now>1) then
2255 !    !Update energies and potential (nscf cycles are not finished)
2256      call wvl_psitohpsi(dtset%diemix, energies%e_exactX, energies%e_xc, &
2257 &     energies%e_hartree,energies%e_kinetic, energies%e_localpsp, &
2258 &     energies%e_nlpsp_vfock, energies%e_sicdc, istep, nnsclo_now, iscf_, &
2259 &     mpi_enreg%me_wvl, dtset%natom, nfftf, mpi_enreg%nproc_wvl,&
2260 &     dtset%nspden, nres2, do_scf,energies%e_xcdc, &
2261 &     wvl, wvlbigdft, xcart, strsxc)
2262    end if
2263 
2264    DBG_EXIT("COLL")
2265 
2266  end subroutine wvl_nscf_loop_bigdft

ABINIT/wvl_occ [ Functions ]

[ Top ] [ Functions ]

NAME

  wvl_occ

FUNCTION

  Computes occupations for the wavelet case

INPUTS

OUTPUT

NOTES

 for the wvlbigdft case, see the routine 'wvl_occ_bigdft'

SOURCE

2342 subroutine wvl_occ()
2343 
2344 !Local variables-------------------------------
2345  real(dp):: doccde_(dtset%mband*dtset%nkpt*dtset%nsppol)
2346 ! *************************************************************************
2347 
2348    DBG_ENTER("COLL")
2349 
2350 !  Compute the new occupation numbers from eigen
2351    call newocc(doccde_,eigen,energies%entropy,energies%e_fermie,energies%e_fermih,dtset%ivalence,dtset%spinmagntarget,&
2352 &   dtset%mband,dtset%nband,dtset%nelect,dtset%ne_qFD,dtset%nh_qFD,dtset%nkpt,dtset%nspinor,&
2353 &   dtset%nsppol,occ,dtset%occopt,prtvol,dtset%tphysel,dtset%tsmear,dtset%wtk,&
2354 &   prtstm=dtset%prtstm,stmbias=dtset%stmbias)
2355 
2356 ! Copy occupations and efermi to BigDFT variables
2357    call wvl_occ_abi2big(dtset%mband,dtset%nkpt,dtset%nsppol,occ,1,wvl%wfs)
2358 
2359 #if defined HAVE_BIGDFT
2360 !  Copy Fermi level to BigDFT variable:
2361    wvl%wfs%ks%orbs%efermi=energies%e_fermie
2362 #endif
2363 
2364    DBG_EXIT("COLL")
2365 
2366  end subroutine wvl_occ

ABINIT/wvl_occ_bigdft [ Functions ]

[ Top ] [ Functions ]

NAME

  wvl_occ_bigdft

FUNCTION

  Computes occupations for the wavelet case
  Using BigDFT routines

INPUTS

OUTPUT

SIDE EFFECTS

NOTES

 for the wvlbigdft case, see the routine 'wvl_occ_bigdft'

SOURCE

2388 subroutine wvl_occ_bigdft()
2389 
2390 ! *************************************************************************
2391 
2392    DBG_ENTER("COLL")
2393 
2394 ! Transfer occopt from ABINIT to BigDFT
2395 #if defined HAVE_BIGDFT
2396    occopt_bigdft=dtset%occopt
2397    call wvl_occopt_abi2big(occopt_bigdft,occopt_bigdft,1)
2398 
2399 !Calculate occupations using BigDFT routine
2400    call evaltoocc(mpi_enreg%me_wvl, mpi_enreg%nproc_wvl, .false., &
2401 &   dtset%tsmear, wvl%wfs%ks%orbs,  occopt_bigdft)
2402 
2403 !Pass e_fermi from BigDFT object to ABINIT variable:
2404    energies%e_fermie = wvl%wfs%ks%orbs%efermi
2405 
2406 !Copy occupations from BigDFT to ABINIT variables
2407    call wvl_occ_abi2big(dtset%mband,dtset%nkpt,dtset%nsppol,occ,2,wvl%wfs)
2408 #endif
2409 
2410    DBG_EXIT("COLL")
2411 
2412  end subroutine wvl_occ_bigdft