TABLE OF CONTENTS


ABINIT/cgq_builder [ Functions ]

[ Top ] [ Functions ]

NAME

 cgq_builder

FUNCTION

 This routine locates cgq for efield calculations, especially for parallel case

INPUTS

  berryflag = logical flag determining use of electric field variables
  cg(2,mcg)=planewave coefficients of wavefunctions.
  dtset <type(dataset_type)>=all input variables for this dataset
  ikpt=index of current k kpt
  ikpt_loc=index of k point on current processor (see vtorho.F90)
  isspol=value of spin polarization currently treated
  me_distrb=current value from spaceComm_distrb (see vtorho.F90)
  mcg=size of wave-functions array (cg) =mpw*nspinor*mband*mkmem*nsppol
  mcgq=size of cgq array (see vtorho.F90)
  mkgq=size of pwnsfacq array (see vtorho.F90)
  my_nspinor=nspinor value determined by current // set up
  nband_k=number of bands at each k point
  nproc_distrb=nproc from spaceComm_distrb (see vtorho.F90)
  npwarr(nkpt)=number of planewaves in basis at this k point
  pwnsfac(2,pwind_alloc) = phase factors for non-symmorphic translations
                           (see initberry.f)
  pwind_alloc = first dimension of pwind
  spaceComm_distrb=comm_cell from mpi_enreg

OUTPUT

  cgq(2,mcgq)=planewave coefficients of wavenfunctions adjacent to cg at ikpt
  pwnsfacq(2,mkgq)=phase factors for non-symmorphic translations for cg's adjacent to cg(ikpt)

SIDE EFFECTS

 Input/Output
   dtefield <type(efield_type)> = efield variables
   mpi_enreg=information about MPI parallelization

TODO

NOTES

SOURCE

2524 subroutine cgq_builder(berryflag,cg,cgq,dtefield,dtset,ikpt,ikpt_loc,isppol,mcg,mcgq,&
2525 &                      me_distrb,mkgq,mpi_enreg,my_nspinor,nband_k,nproc_distrb,&
2526 &                      npwarr,pwnsfac,pwnsfacq,pwind_alloc,spaceComm_distrb)
2527 
2528 !Arguments ------------------------------------
2529  integer,intent(in) :: ikpt,ikpt_loc,isppol,me_distrb,mcg,mcgq,mkgq,my_nspinor,nband_k
2530  integer,intent(in) :: nproc_distrb,pwind_alloc,spaceComm_distrb
2531  logical,intent(in) :: berryflag
2532  type(dataset_type), intent(in) :: dtset
2533  type(efield_type), intent(inout) :: dtefield
2534  type(MPI_type), intent(in) :: mpi_enreg
2535 !arrays
2536  integer,intent(in) :: npwarr(dtset%nkpt)
2537  real(dp),intent(in) :: cg(2,mcg),pwnsfac(2,pwind_alloc)
2538  real(dp),intent(out) :: cgq(2,mcgq),pwnsfacq(2,mkgq)
2539 
2540 !Local variables -------------------------
2541 !scalars
2542  integer :: count,count1,icg1,icg2,dest,his_source
2543  integer :: idir,ierr,ifor,ikg1,ikg2,ikptf,ikpt1f,ikpt1i
2544  integer :: jkpt,jkpt1i,jkptf,jkpt1f,jsppol,my_source,npw_k1,tag
2545 !arrays
2546  integer,allocatable :: flag_send(:,:), flag_receive(:)
2547  real(dp) :: tsec(2)
2548  real(dp),allocatable :: buffer(:,:)
2549 
2550 ! *************************************************************************
2551 
2552  if (mcgq==0.or.mkgq==0) return
2553 
2554  call timab(983,1,tsec)
2555 
2556 !Test compatbility of berryflag
2557  if (berryflag) then
2558    ABI_MALLOC(flag_send,(0:nproc_distrb-1,dtefield%fnkpt))
2559  end if
2560  ABI_MALLOC(flag_receive,(dtset%nkpt))
2561  flag_send(:,:) = 0
2562  flag_receive(:) = 0
2563 
2564  if (berryflag) ikptf = dtefield%i2fbz(ikpt)
2565 
2566  do idir = 1, 3
2567 
2568 !  skip idir values for which efield_dot(idir) = 0
2569    if (berryflag .and. abs(dtefield%efield_dot(idir)) < tol12 ) cycle
2570 
2571    do ifor = 1, 2
2572 
2573      if(berryflag) then
2574        dtefield%sflag(:,ikpt + dtset%nkpt*(isppol - 1),ifor,idir) = 0
2575        ikpt1f = dtefield%ikpt_dk(ikptf,ifor,idir)
2576        ikpt1i = dtefield%indkk_f2ibz(ikpt1f,1)
2577      end if
2578 
2579      npw_k1 = npwarr(ikpt1i)
2580      count = npw_k1*my_nspinor*nband_k
2581      my_source = mpi_enreg%proc_distrb(ikpt1i,1,isppol)
2582 
2583      do dest = 0, nproc_distrb-1
2584 
2585        if ((dest==me_distrb).and.(ikpt_loc <= dtset%mkmem)) then
2586 !        I am dest and have something to do
2587 
2588          if ( my_source == me_distrb ) then
2589 !          I am destination and source
2590 
2591            if(berryflag) then
2592              ikg1 = dtefield%fkgindex(ikpt1f)
2593              ikg2 = dtefield%cgqindex(3,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt)
2594              icg1 = dtefield%cgindex(ikpt1i,isppol)
2595              icg2 = dtefield%cgqindex(2,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt)
2596            end if
2597 
2598            pwnsfacq(:,ikg2 + 1:ikg2 + npw_k1) = pwnsfac(:,ikg1 + 1:ikg1 + npw_k1)
2599            cgq(:,icg2 + 1:icg2 + count) = cg(:,icg1 + 1:icg1 + count)
2600 
2601          else !  I am the destination but not the source -> receive
2602 !          receive pwnsfacq
2603            if(berryflag) then
2604              tag = ikpt1f + (isppol - 1)*dtefield%fnkpt
2605              ikg1 = dtefield%cgqindex(3,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt)
2606            end if
2607            ABI_MALLOC(buffer,(2,npw_k1))
2608            call xmpi_recv(buffer,my_source,tag,spaceComm_distrb,ierr)
2609            pwnsfacq(:,ikg1+1:ikg1+npw_k1) = buffer(:,1:npw_k1)
2610            ABI_FREE(buffer)
2611 
2612 !          receive cgq if necessary
2613            if(flag_receive(ikpt1i) == 0) then
2614              ABI_MALLOC(buffer,(2,count))
2615              tag = ikpt1i + (isppol - 1)*dtset%nkpt
2616              call xmpi_recv(buffer,my_source,tag,spaceComm_distrb,ierr)
2617              if(berryflag) icg1 = dtefield%cgqindex(2,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt)
2618              cgq(:,icg1+1:icg1+count) = buffer(:,1:count)
2619              ABI_FREE(buffer)
2620              flag_receive(ikpt1i) = 1
2621            end if ! end if flag_receive == 0
2622          end if ! end tasks if I am the destination
2623 
2624        else if (ikpt_loc <= mpi_enreg%mkmem(dest)) then  ! dest != me and the dest has a k-point to treat
2625 
2626 !        jkpt is the kpt which is being treated by dest (in ibz)
2627 !        jsppol is his isppol
2628          jkpt = mpi_enreg%kpt_loc2ibz_sp(dest, ikpt_loc,1)
2629          jsppol = mpi_enreg%kpt_loc2ibz_sp(dest, ikpt_loc,2)
2630 
2631          if(jkpt > 0 .and. jsppol > 0) then
2632 
2633            if(berryflag) then
2634              jkptf = dtefield%i2fbz(jkpt)
2635              jkpt1f = dtefield%ikpt_dk(jkptf,ifor,idir)
2636              jkpt1i = dtefield%indkk_f2ibz(jkpt1f,1)
2637            end if
2638            his_source = mpi_enreg%proc_distrb(jkpt1i,1,jsppol)
2639 
2640            if (his_source == me_distrb) then
2641 
2642 !            send
2643 !            pwnsfacq
2644              if(berryflag) then
2645                ikg1 = dtefield%fkgindex(jkpt1f)
2646                tag = jkpt1f + (jsppol - 1)*dtefield%fnkpt
2647              end if
2648              count1 = npwarr(jkpt1i)
2649              ABI_MALLOC(buffer,(2,count1))
2650              buffer(:,1:count1)  = pwnsfac(:,ikg1+1:ikg1+count1)
2651              call xmpi_send(buffer,dest,tag,spaceComm_distrb,ierr)
2652              ABI_FREE(buffer)
2653 
2654 !            send cgq if necessary
2655              if(flag_send(dest, jkpt1i)==0) then
2656                if(berryflag) icg1 = dtefield%cgindex(jkpt1i,jsppol)
2657                tag = jkpt1i + (jsppol - 1)*dtset%nkpt
2658                count1 = npwarr(jkpt1i)*nband_k*my_nspinor
2659                ABI_MALLOC(buffer,(2,count1))
2660                buffer(:,1:count1)  = cg(:,icg1+1:icg1+count1)
2661                call xmpi_send(buffer,dest,tag,spaceComm_distrb,ierr)
2662                ABI_FREE(buffer)
2663                flag_send(dest, jkpt1i)=1
2664              end if ! if send cgq
2665 
2666            end if ! end check that his_source == me
2667 
2668          end if ! end check on jkpt > 0 and jsppol > 0
2669 
2670        end if ! end check on me = dest else if me != dest
2671 
2672      end do ! end loop over dest = 0, nproc-1
2673 
2674    end do !end loop over ifor
2675 
2676  end do !end loop over idir
2677 
2678  call timab(983,2,tsec)
2679 
2680  ABI_FREE(flag_send)
2681  ABI_FREE(flag_receive)
2682 
2683 end subroutine cgq_builder

ABINIT/e_eigen [ Functions ]

[ Top ] [ Functions ]

NAME

  e_eigen

FUNCTION

  Computes eigenvalues energy from eigen, occ, kpt, wtk

INPUTS

  eigen(nkpt*nsppol)=eigenvalues
  mband= maximum number of bands
  nband(nkpt*nsppol)= number of bands for each k-point and spin
  nkpt= number of k-points
  nsppol= number of spin polarization
  occ(mband*nkpt*nsppol)=occupations
  wtk(nkpt)= k-point weights

OUTPUT

  e_eigenvalues= eigenvalues energy

SOURCE

2291 subroutine e_eigen(eigen,e_eigenvalues,mband,nband,nkpt,nsppol,occ,wtk)
2292 
2293 !Arguments ------------------------------------
2294  integer , intent(in)  :: mband,nkpt,nsppol
2295  integer , intent(in)  :: nband(nkpt*nsppol)
2296  real(dp) , intent(in)  :: eigen(mband*nkpt*nsppol)
2297  real(dp) , intent(in)  :: occ(mband*nkpt*nsppol)
2298  real(dp) , intent(in)  :: wtk(nkpt)
2299  real(dp) , intent(out) :: e_eigenvalues
2300 
2301 !Local variables-------------------------------
2302  integer :: ib,iband,ii,ikpt,isppol,nband_k
2303  real(dp) :: wtk_k
2304 
2305 ! *************************************************************************
2306 
2307    DBG_ENTER("COLL")
2308    ii=0;ib=0
2309    do isppol=1,nsppol
2310      do ikpt=1,nkpt
2311        ii=ii+1
2312        nband_k=nband(ii) ;  wtk_k=wtk(ii)
2313        do iband=1,nband_k
2314          ib=ib+1
2315          if(abs(occ(ib)) > tol8) then
2316            e_eigenvalues = e_eigenvalues + wtk_k*occ(ib)*eigen(ib)
2317          end if
2318        end do
2319      end do
2320    end do
2321 
2322    DBG_EXIT("COLL")
2323 
2324  end subroutine e_eigen

ABINIT/m_vtorho [ Modules ]

[ Top ] [ Modules ]

NAME

  m_vtorho

FUNCTION

COPYRIGHT

  Copyright (C) 1998-2024 ABINIT group (DCA, XG, GMR, MF, AR, MM, MT, FJ, MB, MT, TR)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

 16 #if defined HAVE_CONFIG_H
 17 #include "config.h"
 18 #endif
 19 
 20 #include "abi_common.h"
 21 
 22 ! nvtx related macro definition
 23 #include "nvtx_macros.h"
 24 
 25 module m_vtorho
 26 
 27  use iso_fortran_env, only : int32,int64,real32,real64
 28  use defs_basis
 29  use defs_wvltypes
 30  use m_abicore
 31  use m_xmpi
 32  use m_ab7_mixing
 33  use m_errors
 34  use m_wffile
 35  use m_efield
 36  use m_cgtools
 37  use m_gemm_nonlop
 38  use m_gemm_nonlop_gpu
 39  use m_gemm_nonlop_ompgpu
 40  use m_hdr
 41  use m_dtset
 42  use m_dtfil
 43  use m_extfpmd
 44  use m_ompgpu_utils
 45 
 46  use defs_datatypes,       only : pseudopotential_type
 47  use defs_abitypes,        only : MPI_type
 48  use m_fstrings,           only : sjoin, itoa
 49  use m_time,               only : timab
 50  use m_geometry,           only : xred2xcart
 51  use m_occ,                only : newocc
 52  use m_pawang,             only : pawang_type
 53  use m_pawtab,             only : pawtab_type
 54  use m_paw_ij,             only : paw_ij_type
 55  use m_pawfgrtab,          only : pawfgrtab_type
 56  use m_pawrhoij,           only : pawrhoij_type, pawrhoij_alloc, pawrhoij_free, pawrhoij_io, pawrhoij_inquire_dim
 57  use m_pawcprj,            only : pawcprj_type, pawcprj_alloc, pawcprj_free, pawcprj_getdim
 58  use m_pawfgr,             only : pawfgr_type
 59  use m_energies,           only : energies_type
 60  use m_hamiltonian,        only : init_hamiltonian, gs_hamiltonian_type, gspot_transgrid_and_pack
 61  use m_bandfft_kpt,        only : bandfft_kpt, bandfft_kpt_type, bandfft_kpt_set_ikpt, &
 62                                   bandfft_kpt_savetabs, bandfft_kpt_restoretabs, prep_bandfft_tabs
 63  use m_electronpositron,   only : electronpositron_type,electronpositron_calctype
 64  use m_paw_dmft,           only : paw_dmft_type,init_dmft,destroy_dmft,print_dmft,saveocc_dmft
 65  use m_paw_correlations,   only : setnoccmmp
 66  use m_paw_occupancies,    only : pawmkrhoij
 67  use m_paw_mkrho,          only : pawmkrho
 68  use m_crystal,            only : crystal_init, crystal_t
 69  use m_oper,               only : oper_type,init_oper,destroy_oper
 70  use m_io_tools,           only : flush_unit
 71  use m_abi2big,            only : wvl_occ_abi2big, wvl_rho_abi2big, wvl_occopt_abi2big, wvl_eigen_abi2big
 72  use m_fock,               only : fock_type, fock_ACE_type, fock_updateikpt, fock_calc_ene
 73  use m_invovl,             only : make_invovl
 74  use m_tddft,              only : tddft
 75  use m_kg,                 only : mkkin, mkkpg
 76  use m_suscep_stat,        only : suscep_stat
 77  use m_fft,                only : fftpac
 78  use m_spacepar,           only : symrhg
 79  use m_vtowfk,             only : vtowfk
 80  use m_mkrho,              only : mkrho, prtrhomxmn
 81  use m_mkffnl,             only : mkffnl
 82  use m_mpinfo,             only : proc_distrb_cycle
 83  use m_common,             only : prteigrs
 84  use m_dmft,               only : dmft_solve
 85  use m_datafordmft,        only : datafordmft
 86  use m_fourier_interpol,   only : transgrid
 87  use m_cgprj,              only : ctocprj
 88  use m_wvl_rho,            only : wvl_mkrho
 89  use m_wvl_psi,            only : wvl_hpsitopsi, wvl_psitohpsi, wvl_nl_gradient
 90  use m_inwffil,            only : cg_from_atoms
 91  use m_chebfiwf,           only : chebfiwf2_blocksize
 92 
 93 #if defined HAVE_GPU_CUDA
 94  use m_manage_cuda
 95 #endif
 96 
 97 #if defined HAVE_YAKL
 98  use gator_mod
 99 #endif
100 
101 #if defined HAVE_BIGDFT
102  use BigDFT_API,           only : last_orthon, evaltoocc, write_energies, eigensystem_info
103 #endif
104 
105 #if defined(HAVE_GPU) && defined(HAVE_GPU_MARKERS)
106  use m_nvtx_data
107 #endif
108 
109 #ifdef HAVE_FC_ISO_C_BINDING
110  use, intrinsic :: iso_c_binding, only : c_int64_t
111 #endif
112 
113 
114  implicit none
115 
116  private

ABINIT/vtorho [ Functions ]

[ Top ] [ Functions ]

NAME

 vtorho

FUNCTION

 This routine compute the new density from a fixed potential (vtrial)
 but might also simply compute eigenvectors and eigenvalues.
 The main part of it is a wf update over all k points.

INPUTS

  itimes(2)=itime array, contain itime=itimes(1) and itimimage_gstate=itimes(2) from outer loops
  afford=used to dimension susmat
  atindx(natom)=index table for atoms (see gstate.f)
  atindx1(natom)=index table for atoms, inverse of atindx (see gstate.f)
  cpus= cpu time limit in seconds
  dbl_nnsclo=if 1, will double the value of dtset%nnsclo
  dielop= if positive, the dielectric matrix must be computed.
  dielstrt=number of the step at which the dielectric preconditioning begins.
  dmatpawu= fixed occupation matrix of correlated orbitals (DFT+U or DMFT only)
  dtefield <type(efield_type)> = variables related to Berry phase
      calculations (see initberry.f)
  dtfil <type(datafiles_type)>=variables related to files
  dtset <type(dataset_type)>=all input variables for this dataset
   | mband=maximum number of bands
   | mgfft=maximum size of 1D FFTs
   | mkmem =number of k points treated by this node.
   | mpw=maximum dimensioned size of npw
   | nfft=(effective) number of FFT grid points (for this processor)
   | nkpt=number of k points.
   | nspden=number of spin-density components
   | nsppol=1 for unpolarized, 2 for spin-polarized
   | nsym=number of symmetry elements in space group
   | typat= array of types of the natoms
  electronpositron <type(electronpositron_type)>=quantities for the electron-positron annihilation
  etotal=total energy (Ha) - only needed for tddft
  fock <type(fock_type)>= quantities to calculate Fock exact exchange
  gbound_diel(2*mgfftdiel+8,2)=G sphere boundary for the dielectric matrix
  gmet(3,3)=reciprocal space metric tensor in bohr**-2.
  gprimd(3,3)=dimensional reciprocal space primitive translations
   (3x3 tensor) and grads wrt atomic coordinates (3*natom)
  gsqcut=cutoff on (k+G)^2 (bohr^-2)
  hdr <type(hdr_type)>=the header of wf, den and pot files
  indsym(4,nsym,natom)=indirect indexing array for atom labels
  irrzon(nfft**(1-1/nsym),2,(nspden/nsppol)-3*(nspden/4))=irreducible zone data
  irrzondiel(nfftdiel**(1-1/nsym),2,(nspden/nsppol)-3*(nspden/4))=irreducible zone data for diel matrix
                                     nfft**(1-1/nsym) is 1 if nsym==1, and nfft otherwise
  istep=index of the number of steps in the routine scfcv
  istep_mix=index of the number of steps for the SCF mixing (can be <istep)
  kg(3,mpw*mkmem)=reduced planewave coordinates.
  kg_diel(3,npwdiel)=reduced planewave coordinates for the dielectric matrix.
  kxc(nfftf,nkxc)=exchange-correlation kernel, needed only if nkxc/=0 .
  lmax_diel=1+max. value of l angular momentum used for dielectric matrix
  mcg=size of wave-functions array (cg) =mpw*nspinor*mband*mkmem*nsppol
  mcprj=size of projected wave-functions array (cprj) =nspinor*mband*mkmem*nsppol
  mgfftdiel=maximum size of 1D FFTs, for the computation of the dielectric matrix
  mpi_enreg=information about MPI parallelization
  my_natom=number of atoms treated by current processor
  natom=number of atoms in cell.
  nattyp(ntypat)= # atoms of each type.
  nfftf= -PAW ONLY- number of FFT grid points for the fine grid
         (nfftf=nfft for norm-conserving potential runs)
  nfftdiel=number of fft grid points for the computation of the diel matrix
  ngfftdiel(18)=contain all needed information about 3D FFT, for dielectric matrix,
                see ~abinit/doc/variables/vargs.htm#ngfft
  nkxc=second dimension of the array kxc, see rhotoxc.f for a description
  npwarr(nkpt)=number of planewaves in basis at this k point
  npwdiel=size of the susmat array.
  ntypat=number of types of atoms in unit cell.
  optforces=option for the computation of forces (0: no force;1: forces)
  optres=0: the new value of the density is computed in place of the input value
         1: only the density residual is computed ; the input density is kept
  paw_dmft  <type(paw_dmft_type)>= paw+dmft related data
  paw_ij(my_natom*usepaw) <type(paw_ij_type)>=paw arrays given on (i,j) channels
  pawang <type(pawang)>=paw angular mesh and related data
  pawfgr <type(pawfgr_type)>=fine grid parameters and related data
  pawfgrtab(my_natom*usepaw) <type(pawfgrtab_type)>=atomic data given on fine rectangular grid
  pawtab(ntypat*usepaw) <type(pawtab_type)>=paw tabulated starting data
  phnons(2,nfft**(1-1/nsym),(nspden/nsppol)-3*(nspden/4))=nonsymmorphic translation phases
                                    nfft**(1-1/nsym) is 1 if nsym==1, and nfft otherwise
  phnonsdiel(2,nfft**(1-1/nsym),(nspden/nsppol)-3*(nspden/4))=nonsymmorphic translation phases,
   for diel matr
                                     nfft**(1-1/nsym) is 1 if nsym==1, and nfft otherwise
  ph1d(2,3*(2*mgfft+1)*natom)=one-dimensional structure factor information
  ph1ddiel(2,3*(2*mgfftdiel+1)*natom*usepaw)=one-dimensional structure factor information
                                             for the dielectric matrix
  psps <type(pseudopotential_type)>=variables related to pseudopotentials
  pwind(pwind_alloc,2,3) = array used to compute
           the overlap matrix smat between k-points (see initberry.f)
  pwind_alloc = first dimension of pwind
  pwnsfac(2,pwind_alloc) = phase factors for non-symmorphic translations
                           (see initberry.f)
  rmet(3,3)=real space metric (bohr**2)
  rprimd(3,3)=dimensional primitive vectors
  symrec(3,3,nsym)=symmetry operations in reciprocal space
  ucvol=unit cell volume in bohr**3.
  usecprj=1 if cprj datastructure is stored in memory
  wffnew,unit numbers for wf disk files.
  with_vectornd = 1 if vectornd allocated
  vectornd(with_vectornd*nfftf,nspden,3)=nuclear dipole moment vector potential
  vtrial(nfftf,nspden)=INPUT potential Vtrial(r).
  [vxctau(nfft,nspden,4*usekden)]=(only for meta-GGA): derivative of XC energy density
    with respect to kinetic energy density (depsxcdtau). The arrays vxctau contains also
    the gradient of vxctau (gvxctau) in vxctau(:,:,2:4)
  xred(3,natom)=reduced dimensionless atomic coordinates
  ylm(mpw*mkmem,mpsang*mpsang*useylm)= real spherical harmonics for each G and k point
  ylmgr(mpw*mkmem,3,mpsang*mpsang*useylm)= gradients of real spherical harmonics
  ylmdiel(npwdiel,lmax_diel**2)= real spherical harmonics for each G and k point
                                 for the dielectric matrix

OUTPUT

  compch_fft=-PAW only- compensation charge inside spheres computed over fine fft grid
  dphase(3) : dphase(idir) = accumulated change in the string-averaged
     Zak phase along the idir-th direction caused by the update of all
     the occupied Bloch states at all the k-points (only if finite electric field)
  eigen(mband*nkpt*nsppol)=array for holding eigenvalues (hartree)
  resid(mband*nkpt*nsppol)=residuals for each band over all k points.
  residm=maximum value from resid array (except for nbdbuf highest bands)
  susmat(2,npwdiel*afford,nspden,npwdiel,nspden)=
   the susceptibility (or density-density response) matrix in reciprocal space
  === if optforces>0 ===
    grnl(3*natom)=stores grads of nonlocal energy wrt length scales
  ==== if optres==1
    nres2=square of the norm of the residual
    nvresid(nfftf,nspden)=density residual
  ==== if psps%usepaw==1
    cprj(natom,mcprj*usecprj)= wave functions projected with non-local projectors:
                               cprj(n,k,i)=<p_i|Cnk> where p_i is a non-local projector.
    nhat(nfftf,nspden*psps%usepaw)=compensation charge density on rectangular grid in real space

SIDE EFFECTS

  cg(2,mpw*dtset%nspinor*mband*mkmem*nsppol)=planewave coefficients of wavefunctions.
   At output contains updated wavefunctions coefficients;
    if nkpt>1, these are kept in a disk file.
  energies <type(energies_type)>=storage for energies computed here :
   | e_eigenvalues=Sum of the eigenvalues - Band energy (Hartree)
   | e_kinetic=kinetic energy part of total energy
   | e_nlpsp_vfock=nonlocal psp + potential Fock ACE part of total energy
   | e_fermie=fermi energy (Hartree)
  occ(mband*nkpt*nsppol)=occupation number for each band for each k.
      (input if insulator - occopt<3 - ; output if metallic)
  pawrhoij(my_natom*usepaw) <type(pawrhoij_type)>= paw rhoij occupancies and related data
  rhog(2,nfftf)=Fourier transform of total electron density
  rhor(nfftf,nspden)=total electron density (el/bohr**3)
  taug(2,nfftf*dtset%usekden)=array for Fourier transform of kinetic energy density
  taur(nfftf,nspden*dtset%usekden)=array for kinetic energy density
  tauresid(nfftf,nspden*dtset%usekden)=array for kinetic energy density residual
  wvl <type(wvl_data)>=wavelets structures in case of wavelets basis.
  ==== if (usepaw==1) ====
    cprj(natom,mcprj*usecprj)= wave functions projected with non-local projectors:
                               cprj(n,k,i)=<p_i|Cnk> where p_i is a non-local projector.
  rmm_diis_status= Status of the RMM-DIIS eigensolver. See m_rmm_diis

NOTES

  Be careful to the meaning of nfft (size of FFT grids):
   - In case of norm-conserving calculations the FFT grid is the usual FFT grid.
   - In case of PAW calculations:
     Two FFT grids are used; one with nfft points (coarse grid) for
     the computation of wave functions ; one with nfftf points
     (fine grid) for the computation of total density.

  The total electronic density (rhor,rhog) is divided into two terms:
   - The density related to WFs =Sum[Psi**2]
   - The compensation density (nhat) - only in PAW

  The parallelisation needed for the electric field should be
  made an independent subroutine, so that this routine could be put
  back in the 95_drive directory.

SOURCE

 295 subroutine vtorho(afford,atindx,atindx1,cg,compch_fft,cprj,cpus,dbl_nnsclo,&
 296 &           dielop,dielstrt,dmatpawu,dphase,dtefield,dtfil,dtset,&
 297 &           eigen,electronpositron,energies,etotal,gbound_diel,&
 298 &           gmet,gprimd,grnl,gsqcut,hdr,extfpmd,indsym,irrzon,irrzondiel,&
 299 &           istep,istep_mix,itimes,kg,kg_diel,kxc,lmax_diel,mcg,mcprj,mgfftdiel,mpi_enreg,&
 300 &           my_natom,natom,nattyp,nfftf,nfftdiel,ngfftdiel,nhat,nkxc,&
 301 &           npwarr,npwdiel,nres2,ntypat,nvresid,occ,optforces,&
 302 &           optres,paw_dmft,paw_ij,pawang,pawfgr,pawfgrtab,pawrhoij,pawtab,&
 303 &           phnons,phnonsdiel,ph1d,ph1ddiel,psps,fock,&
 304 &           pwind,pwind_alloc,pwnsfac,resid,residm,rhog,rhor,&
 305 &           rmet,rprimd,susmat,symrec,taug,taur,tauresid,&
 306 &           ucvol,usecprj,wffnew,with_vectornd,vectornd,vtrial,vxctau,wvl,xred,&
 307 &           ylm,ylmgr,ylmdiel, rmm_diis_status)
 308 
 309 !Arguments -------------------------------
 310 !scalars
 311  integer, intent(in) :: afford,dbl_nnsclo,dielop,dielstrt,istep,istep_mix,lmax_diel,mcg,mcprj,mgfftdiel
 312  integer, intent(in) :: my_natom,natom,nfftf,nfftdiel,nkxc,npwdiel
 313  integer, intent(in) :: ntypat,optforces,optres,pwind_alloc,usecprj,with_vectornd
 314  real(dp), intent(in) :: cpus,etotal,gsqcut,ucvol
 315  real(dp), intent(out) :: compch_fft,nres2,residm
 316  type(MPI_type), intent(inout) :: mpi_enreg
 317  type(datafiles_type), intent(in) :: dtfil
 318  type(dataset_type), intent(inout) :: dtset
 319  type(efield_type), intent(inout) :: dtefield
 320  type(electronpositron_type),pointer :: electronpositron
 321  type(energies_type), intent(inout) :: energies
 322  type(hdr_type), intent(inout) :: hdr
 323  type(extfpmd_type),pointer,intent(inout) :: extfpmd
 324  type(paw_dmft_type), intent(inout)  :: paw_dmft
 325  type(pawang_type), intent(in) :: pawang
 326  type(pawfgr_type), intent(in) :: pawfgr
 327  type(pseudopotential_type), intent(in) :: psps
 328  type(fock_type),pointer, intent(inout) :: fock
 329  type(wffile_type), intent(inout) :: wffnew
 330  type(wvl_data), intent(inout) :: wvl
 331 !arrays
 332  integer, intent(in) :: atindx(natom),atindx1(natom),gbound_diel(2*mgfftdiel+8,2)
 333  integer, intent(in) :: indsym(4,dtset%nsym,natom)
 334  integer, intent(in) :: irrzon(dtset%nfft**(1-1/dtset%nsym),2,(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4))
 335  integer, intent(in) :: irrzondiel(nfftdiel**(1-1/dtset%nsym),2,(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4))
 336  integer, intent(in) :: itimes(2)
 337  integer, intent(in) :: kg(3,dtset%mpw*dtset%mkmem),kg_diel(3,npwdiel),nattyp(ntypat),ngfftdiel(18),npwarr(dtset%nkpt)
 338  integer, intent(in) :: pwind(pwind_alloc,2,3),symrec(3,3,dtset%nsym)
 339  integer, intent(inout) :: rmm_diis_status(2, dtset%nkpt, dtset%nsppol)
 340  real(dp), intent(in) :: dmatpawu(:,:,:,:),gmet(3,3),gprimd(3,3),ph1d(2,3*(2*dtset%mgfft+1)*natom)
 341  real(dp), intent(in) :: ph1ddiel(2,(3*(2*mgfftdiel+1)*natom)*psps%usepaw)
 342  real(dp), intent(in) :: phnons(2,dtset%nfft**(1-1/dtset%nsym),(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4))
 343  real(dp), intent(in) :: phnonsdiel(2,nfftdiel**(1-1/dtset%nsym),(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4))
 344  real(dp), intent(in) :: pwnsfac(2,pwind_alloc),rmet(3,3),rprimd(3,3)
 345  real(dp), intent(inout) :: vectornd(with_vectornd*nfftf,dtset%nspden,3),vtrial(nfftf,dtset%nspden)
 346  real(dp), intent(inout) :: xred(3,natom)
 347  real(dp), intent(in) :: ylm(dtset%mpw*dtset%mkmem,psps%mpsang*psps%mpsang*psps%useylm)
 348  real(dp), intent(in) :: ylmgr(dtset%mpw*dtset%mkmem,3,psps%mpsang*psps%mpsang*psps%useylm)
 349  real(dp), intent(in) :: ylmdiel(npwdiel,lmax_diel**2)
 350  real(dp), intent(out) :: dphase(3),grnl(3*natom)
 351  real(dp), intent(inout) :: eigen(dtset%mband*dtset%nkpt*dtset%nsppol)
 352  real(dp), intent(out) :: nhat(nfftf,dtset%nspden*psps%usepaw)
 353  real(dp), intent(inout) :: resid(dtset%mband*dtset%nkpt*dtset%nsppol)
 354  real(dp), intent(out) :: nvresid(nfftf,dtset%nspden)
 355  real(dp), intent(out) :: susmat(2,npwdiel*afford,dtset%nspden,npwdiel,dtset%nspden)
 356  real(dp), intent(inout) :: cg(2,mcg)
 357  real(dp), intent(inout) :: kxc(nfftf,nkxc),occ(dtset%mband*dtset%nkpt*dtset%nsppol)
 358  real(dp), intent(inout) :: rhog(2,nfftf),rhor(nfftf,dtset%nspden)
 359  real(dp), intent(inout) :: taug(2,nfftf*dtset%usekden),taur(nfftf,dtset%nspden*dtset%usekden)
 360  real(dp), intent(inout) :: tauresid(nfftf,dtset%nspden*dtset%usekden)
 361  real(dp), intent(inout),optional :: vxctau(nfftf,dtset%nspden,4*dtset%usekden)
 362  type(pawcprj_type),pointer,intent(inout) :: cprj(:,:)
 363  type(paw_ij_type),intent(inout) :: paw_ij(my_natom*psps%usepaw)
 364  type(pawfgrtab_type),intent(inout) :: pawfgrtab(my_natom*psps%usepaw)
 365  type(pawrhoij_type),target,intent(inout) :: pawrhoij(my_natom*psps%usepaw)
 366  type(pawtab_type),intent(in) :: pawtab(ntypat*psps%usepaw)
 367 
 368 !Local variables-------------------------------
 369 !scalars
 370  integer,parameter :: level=111,tim_mkrho=2
 371  !integer,save :: nwarning=0
 372  integer :: bdtot_index,counter,cplex,cplex_rhoij,dimffnl,enunit,iband,iband1,ibdkpt
 373  integer :: ibg,icg,ider,idir,ierr,ifft,ifor,ifor1,ii,ikg,ikpt
 374  integer :: ikpt_loc,ikpt1,my_ikpt,ikxc,ilm,imagn,index1,iorder_cprj,ipert,iplex
 375  integer :: iscf,ispden,isppol,istwf_k,mband_cprj,mbdkpsp,mb2dkpsp
 376  integer :: mcgq,mcprj_local,mcprj_tmp,me_distrb,mkgq,mpi_comm_sphgrid
 377  integer :: my_nspinor,n1,n2,n3,n4,n5,n6,nband_eff,nbdbuf_eff !mwarning,
 378  integer :: nband_k,nband_cprj_k,nbuf,neglect_pawhat,nfftot,nkpg,nkpt1,nnn,nnsclo_now
 379  integer :: nproc_distrb,npw_k,nspden_rhoij,option,prtvol,nblk_gemm_nonlop
 380  integer :: spaceComm_distrb,usecprj_local,usefock_ACE,usetimerev
 381  logical :: berryflag,computesusmat,fixed_occ,has_vectornd
 382  logical :: locc_test,paral_atom,remove_inv,usefock,with_vxctau
 383  logical :: do_last_ortho,wvlbigdft=.false.
 384  real(dp) :: dmft_dftocc
 385  real(dp) :: edmft,ebandlda,ebanddmft,ebandldatot,ekindmft,ekindmft2,ekinlda
 386  real(dp) :: min_occ,vxcavg_dum,strsxc(6)
 387  character(len=500) :: msg
 388  type(bandfft_kpt_type),pointer :: my_bandfft_kpt => null()
 389  type(gs_hamiltonian_type) :: gs_hamk
 390 !arrays
 391  integer(int32), ABI_CONTIGUOUS pointer :: kg_k(:,:) => null()
 392  real(dp) :: dielar(7),dphase_k(3),kpoint(3),qpt(3),rhodum(1),tsec(2),ylmgr_dum(0,0,0)
 393  real(dp),allocatable :: EigMin(:,:),buffer1(:),buffer2(:),cgq(:,:)
 394  real(dp),allocatable :: cgrkxc(:,:),doccde(:)
 395  real(dp),allocatable :: dphasek(:,:),ek_k(:),ek_k_nd(:,:,:),eknk(:),eknk_nd(:,:,:,:,:),end_k(:)
 396  real(dp),allocatable :: enlx_k(:),enlxnk(:),focknk(:),fockfornk(:,:,:),ffnl(:,:,:,:),grnl_k(:,:), xcart(:,:)
 397  real(dp),allocatable :: grnlnk(:,:)
 398 
 399 #if defined HAVE_GPU && defined HAVE_YAKL
 400  real(c_double), ABI_CONTIGUOUS pointer :: kinpw(:) => null()
 401  real(c_double), ABI_CONTIGUOUS pointer :: eig_k(:) => null()
 402 #else
 403  real(dp),allocatable :: kinpw(:)
 404  real(dp),allocatable :: eig_k(:)
 405 #endif
 406 
 407  real(dp),allocatable :: kpg_k(:,:),occ_k(:),ph3d(:,:,:)
 408  real(dp),allocatable :: pwnsfacq(:,:)
 409 
 410 #if defined HAVE_GPU && defined HAVE_YAKL
 411  real(c_double), ABI_CONTIGUOUS pointer :: resid_k(:) => null()
 412  real(c_double), ABI_CONTIGUOUS pointer :: rhoaug(:,:,:,:) => null()
 413 #else
 414  real(dp),allocatable :: resid_k(:)
 415  real(dp),allocatable :: rhoaug(:,:,:,:)
 416 #endif
 417 
 418  real(dp),allocatable :: rhowfg(:,:),rhowfr(:,:),tauwfg(:,:),tauwfr(:,:)
 419  real(dp),allocatable :: vectornd_pac(:,:,:,:,:)
 420 
 421 #if defined HAVE_GPU && defined HAVE_YAKL
 422  real(real64), ABI_CONTIGUOUS pointer :: vlocal(:,:,:,:) => null()
 423 #else
 424  real(dp), allocatable :: vlocal(:,:,:,:)
 425 #endif
 426 
 427  real(dp),allocatable :: vxctaulocal(:,:,:,:,:),ylm_k(:,:),zshift(:)
 428  type(pawcprj_type),allocatable :: cprj_tmp(:,:)
 429  type(pawcprj_type),allocatable,target:: cprj_local(:,:)
 430  type(oper_type) :: dft_occup
 431  type(pawrhoij_type),pointer :: pawrhoij_unsym(:)
 432  type(crystal_t) :: cryst_struc
 433  integer :: idum1(0),idum3(0,0,0)
 434  real(dp) :: rdum2(0,0),rdum4(0,0,0,0)
 435 #if defined HAVE_BIGDFT
 436  integer :: occopt_bigdft
 437 #endif
 438 
 439 #if defined(HAVE_GPU_CUDA)
 440  type(gemm_nonlop_gpu_type) :: gemm_nonlop_gpu_obj
 441 #endif
 442 
 443 
 444 ! *********************************************************************
 445 
 446  DBG_ENTER("COLL")
 447 
 448 !Keep track of total time spent in vtorho
 449  call timab(980,1,tsec)
 450  call timab(981,1,tsec)
 451 
 452 !Structured debugging if prtvol==-level
 453  prtvol=dtset%prtvol
 454 
 455 ! Electric fields: set flag to turn on various behaviors
 456  berryflag = any(dtset%berryopt == [4, 14, 6, 16, 7, 17])
 457 
 458 !If usewvl: wvlbigdft indicates that the BigDFT workflow will be followed
 459  wvlbigdft=(dtset%usewvl==1.and.dtset%wvl_bigdft_comp==1)
 460 
 461 !Several inits
 462  n1=dtset%ngfft(1) ; n2=dtset%ngfft(2) ; n3=dtset%ngfft(3)
 463  n4=dtset%ngfft(4) ; n5=dtset%ngfft(5) ; n6=dtset%ngfft(6)
 464  usecprj_local=0;if (psps%usepaw==1) usecprj_local=1
 465  my_nspinor=max(1,dtset%nspinor/mpi_enreg%nproc_spinor)
 466  paral_atom=(my_natom/=natom)
 467  compch_fft=-1.d5
 468 
 469 !Check that usekden is not 0 if want to use vxctau
 470  with_vxctau = (present(vxctau).and.dtset%usekden/=0)
 471 
 472 !Check that fock is present if want to use fock option
 473  usefock = (dtset%usefock==1 .and. associated(fock))
 474  usefock_ACE=0
 475  if (usefock) usefock_ACE=fock%fock_common%use_ACE
 476 
 477 !Init MPI
 478  spaceComm_distrb=mpi_enreg%comm_cell
 479  if (mpi_enreg%paral_kgb==1) spaceComm_distrb=mpi_enreg%comm_kpt
 480  if (mpi_enreg%paral_hf ==1) spaceComm_distrb=mpi_enreg%comm_kpt
 481  nproc_distrb=xmpi_comm_size(spaceComm_distrb)
 482  me_distrb=xmpi_comm_rank(spaceComm_distrb)
 483  mpi_comm_sphgrid=mpi_enreg%comm_fft
 484  if(dtset%usewvl==1) mpi_comm_sphgrid=mpi_enreg%comm_wvl
 485  !if (mpi_enreg%me_img/=0) nwarning=nwarning+1
 486 
 487 !Test size of FFT grids (1 grid in norm-conserving, 2 grids in PAW)
 488  if ((psps%usepaw==1.and.pawfgr%nfft/=nfftf).or.(psps%usepaw==0.and.dtset%nfft/=nfftf)) then
 489    ABI_BUG('wrong values for nfft, nfftf!')
 490  end if
 491 
 492 !Test optforces (to prevent memory overflow)
 493  if (optforces/=0.and.optforces/=1) then
 494    ABI_BUG(sjoin('wrong value for optforces: ',itoa(optforces)))
 495  end if
 496 
 497  iscf=dtset%iscf
 498  fixed_occ=(dtset%occopt<3.or.electronpositron_calctype(electronpositron)==1)
 499  if(.not. wvlbigdft) then
 500    energies%e_eigenvalues = zero
 501    energies%e_kinetic     = zero
 502    energies%e_nucdip      = zero
 503    energies%e_nlpsp_vfock = zero
 504    if (usefock) then
 505      energies%e_fock=zero
 506      energies%e_fockdc=zero
 507    end if
 508    grnl(:)=zero
 509    if (berryflag) then
 510      resid(:) = zero ! JWZ 13 May 2010. resid and eigen need to be fully zeroed each time before use
 511    end if
 512    ! MG: The previous line is not compatible with the RMM-DIIS since rmm_diis recevies the previous resid_k
 513    ! to select the accuracy level.
 514    ! For the time being, we set resid to zero if berryflag to avoid breaking the CG solver with E-field
 515    ! but it's clear that the treatment of resid should be rationalized and that the previous values should be passed to vtowfk
 516    eigen(:) = zero
 517    bdtot_index=0
 518    ibg=0;icg=0
 519    mbdkpsp=dtset%mband*dtset%nkpt*dtset%nsppol
 520    if(paw_dmft%use_dmft==1) mb2dkpsp=2*dtset%mband*dtset%mband*dtset%nkpt*dtset%nsppol
 521  end if
 522 
 523  if(dtset%usewvl==0) then
 524    ABI_MALLOC(eknk,(mbdkpsp))
 525    ABI_MALLOC(enlxnk,(mbdkpsp))
 526    ABI_MALLOC(eknk_nd,(dtset%nsppol,dtset%nkpt,2,dtset%mband,dtset%mband*paw_dmft%use_dmft))
 527    ABI_MALLOC(EigMin,(2,dtset%mband))
 528    ABI_MALLOC(grnlnk,(3*natom,mbdkpsp*optforces))
 529    if (usefock) then
 530      ABI_MALLOC(focknk,(mbdkpsp))
 531      focknk=zero
 532      if (optforces>0)then
 533        ABI_MALLOC(fockfornk,(3,natom,mbdkpsp))
 534        fockfornk=zero
 535      end if
 536    end if
 537    eknk(:)=zero;enlxnk(:)=zero
 538    if (optforces>0) grnlnk(:,:)=zero
 539    if(paw_dmft%use_dmft==1) eknk_nd=zero
 540  end if !usewvl==0
 541 
 542 !Initialize rhor if needed; store old rhor
 543  if(iscf>=0 .or. iscf==-3) then
 544    if (optres==1) then
 545      nvresid=rhor ; tauresid=taur
 546    end if
 547 !  NC and plane waves
 548    if (psps%usepaw==0 .and. dtset%usewvl==0) then
 549      rhor=zero ; taur=zero
 550 !  PAW
 551    else if(psps%usepaw==1) then
 552      ABI_MALLOC(rhowfr,(dtset%nfft,dtset%nspden))
 553      ABI_MALLOC(rhowfg,(2,dtset%nfft))
 554      ABI_MALLOC(tauwfr,(dtset%nfft,dtset%nspden*dtset%usekden))
 555      ABI_MALLOC(tauwfg,(2,dtset%nfft*dtset%usekden))
 556      rhowfr(:,:)=zero ; tauwfr(:,:)=zero
 557    end if
 558  end if
 559 
 560  ! Here we set the max number of non-self-consistent loops nnsclo_now used in vtowfk
 561  if (iscf<0) then
 562    ! Non self-consistent case
 563    nnsclo_now=dtset%nstep
 564  else
 565    ! Self-consistent case
 566    if (dtset%nnsclo>0) then
 567      ! Use input variable if specified and > 0
 568      nnsclo_now=dtset%nnsclo
 569    else if (dtset%nnsclo < 0) then
 570      ! imposed during abs(nnsclo) steps
 571      nnsclo_now=1
 572      if (istep<=abs(dtset%nnsclo)) nnsclo_now=merge(5,dtset%useria,dtset%useria==0)
 573    else
 574      ! Default branch for self-consistent case.
 575      ! Perform 2 NSCF loops for the first two iterations. This is important especially wfs have
 576      ! been initialized with random numbers.
 577      nnsclo_now = 1
 578      if (dtset%usewvl == 0) then
 579        ! Plane waves
 580        if (istep <= 2 .and. iscf /= 0) nnsclo_now = 2
 581        ! MG: I don't understand why we need to perform 2 NSCF loops after the first SCF cycle
 582        ! when we are relaxing the structure as the initial density and wavefunctions should be already good enough.
 583        ! Here I change the default behavior to avoid the extra loop but only if RMM-DIIS is used.
 584        ! XG 20210312 : I prefectly agree with you. This is historical, and should be changed, after testing and update of reference files.
 585        if ((itimes(1) > 1 .or. (itimes(2)>1)) .and. dtset%rmm_diis /= 0) nnsclo_now = 1
 586      else
 587        ! Wavelets
 588        if (iscf==0) then
 589          nnsclo_now=0
 590        else if (istep<=2) then
 591          nnsclo_now=3
 592        else if (istep<=4) then
 593          nnsclo_now=2
 594        end if
 595      end if
 596    end if
 597    ! Double the value if required
 598    if (dbl_nnsclo==1) nnsclo_now=nnsclo_now*2
 599  end if
 600 
 601  if(dtset%wfoptalg==2)nnsclo_now=40  ! UNDER DEVELOPMENT
 602 
 603  if (dtset%prtvol > 0) then
 604    write(msg, '(a,i0,a,3(i0,1x))' ) ' vtorho: nnsclo_now = ',nnsclo_now,&
 605      ', note that nnsclo, dbl_nnsclo, istep= ',dtset%nnsclo,dbl_nnsclo,istep
 606    call wrtout(std_out,msg)
 607  else
 608    if (nnsclo_now > 1) call wrtout(std_out, sjoin(" Max number of non-self-consistent loops:", itoa(nnsclo_now)))
 609  end if
 610 
 611 !==== Initialize most of the Hamiltonian ====
 612 !Allocate all arrays and initialize quantities that do not depend on k and spin.
 613  call init_hamiltonian(gs_hamk,psps,pawtab,dtset%nspinor,dtset%nsppol,dtset%nspden,natom,&
 614 & dtset%typat,xred,dtset%nfft,dtset%mgfft,dtset%ngfft,rprimd,dtset%nloalg,&
 615 & paw_ij=paw_ij,ph1d=ph1d,usecprj=usecprj_local,electronpositron=electronpositron,fock=fock,&
 616 & comm_atom=mpi_enreg%comm_atom,mpi_atmtab=mpi_enreg%my_atmtab,mpi_spintab=mpi_enreg%my_isppoltab,&
 617 & nucdipmom=dtset%nucdipmom,gpu_option=dtset%gpu_option)
 618 
 619 !Initializations for PAW (projected wave functions)
 620  mcprj_local=0 ; mband_cprj=0
 621  if (psps%usepaw==1) then
 622    mband_cprj=dtset%mband
 623    if (dtset%paral_kgb/=0) mband_cprj=mband_cprj/mpi_enreg%nproc_band
 624    iorder_cprj=0 ; mcprj_local=mcprj
 625    if (usecprj==0) then
 626      mcprj_local=my_nspinor*mband_cprj*dtset%mkmem*dtset%nsppol
 627      !This is a check but should always be true since scfcv allocated cprj anyway
 628      if (allocated(cprj_local)) then
 629        !Was allocated in scfcv so we just destroy and reconstruct it as desired
 630        call pawcprj_free(cprj_local)
 631        ABI_FREE(cprj_local)
 632      end if
 633      ABI_MALLOC(cprj_local,(dtset%natom,mcprj_local))
 634      call pawcprj_alloc(cprj_local,0,gs_hamk%dimcprj)
 635      cprj => null()
 636      cprj => cprj_local
 637    end if
 638  end if
 639 
 640  call timab(981,2,tsec)
 641 
 642 !===================================================================
 643 ! WAVELETS - Branching with a separate VTORHO procedure
 644 !===================================================================
 645 
 646  if (dtset%usewvl == 1) then
 647 #ifndef HAVE_BIGDFT
 648    BIGDFT_NOTENABLED_ERROR()
 649 #else
 650 
 651 !  do_last_ortho in case of diagonalization scheme
 652    if (     wvlbigdft) do_last_ortho=(dtset%iscf/=0)
 653    if (.not.wvlbigdft) do_last_ortho=(.true.)
 654 
 655    ABI_MALLOC(xcart,(3, dtset%natom))
 656    call xred2xcart(dtset%natom, rprimd, xcart, xred)
 657 
 658    if(wvlbigdft) then
 659 !    NSCF loop for wvlbigdt:
 660      call wvl_nscf_loop_bigdft()
 661    else
 662 !    NSCF loop for WVL: (not wvlbigdft)
 663      call wvl_nscf_loop()
 664    end if
 665 
 666 !  Eventually orthogonalize WFs now
 667    if (do_last_ortho) then
 668      call write_energies(ii,0,wvl%e%energs,0.d0,0.d0,"final")
 669      call last_orthon(me_distrb, nproc_distrb, ii, wvl%wfs%ks, wvl%e%energs%evsum, .true.)
 670      if(wvlbigdft) energies%e_xcdc = wvl%e%energs%evxc
 671 !    If occupation numbers are not changed...
 672      if (fixed_occ .or. (iscf<0 .and. iscf/=-3)) then
 673        call wvl_comm_eigen()
 674      end if
 675 !    ... or update occupations:
 676      if( ( .not.fixed_occ) .and. (iscf>0.or.iscf==-3)) then
 677        if(wvlbigdft) then
 678          call wvl_occ_bigdft()
 679        else
 680 !        Communicate eigenvalues:
 681          call wvl_comm_eigen()
 682 !        Update occ and Fermi level
 683          call wvl_occ()
 684        end if
 685      end if
 686 !    This might accelerate convergence:
 687      wvl%wfs%ks%diis%energy_min=one
 688      wvl%wfs%ks%diis%alpha=two
 689    end if !do_last_ortho
 690 
 691 !  Compute eigenvalues energy
 692    if(.not. wvlbigdft .and. nnsclo_now>0) then
 693      call e_eigen(eigen,energies%e_eigenvalues,dtset%mband,dtset%nband,dtset%nkpt,&
 694 &     dtset%nsppol,occ,dtset%wtk)
 695    else
 696      energies%e_eigenvalues = energies%e_kinetic + energies%e_localpsp &
 697 &     + energies%e_xcdc  + two*energies%e_hartree +energies%e_nlpsp_vfock
 698    end if
 699 
 700    if (optforces == 1) then ! not compatible with iscf=0 and wvlbigdftcomp=1 + PAW
 701      call wvl_nl_gradient(grnl, mpi_enreg, dtset%natom, rprimd, wvl, xcart)
 702    end if
 703 
 704 !  For iscf<0 we do not update the density
 705    if (dtset%iscf>=0) then !(dtset%iscf>=0 .and. .not. wvlbigdft ) then
 706      call wvl_mkrho(dtset,irrzon,mpi_enreg,phnons,rhor,wvl%wfs,wvl%den)
 707    end if
 708    ABI_FREE(xcart)
 709 
 710 !  Note in WVL+NC: the rest will be skipped.
 711 !  For PAW: we will compute Rho_ij at the end.
 712    !if(wvlbigdft) return
 713 #endif
 714  else
 715 
 716 !===================================================================
 717 ! PLANE WAVES - Standard VTORHO procedure
 718 !===================================================================
 719 
 720 !  Electric field: allocate dphasek
 721    nkpt1 = dtset%nkpt
 722    if ( berryflag ) then
 723      ABI_MALLOC(dphasek,(3,dtset%nkpt*dtset%nsppol))
 724      dphasek(:,:) = zero
 725      nkpt1 = dtefield%mkmem_max
 726    end if
 727 
 728    if(dtset%gpu_option==ABI_GPU_KOKKOS) then
 729 #if defined HAVE_GPU && defined HAVE_YAKL
 730      ABI_MALLOC_MANAGED(rhoaug, (/n4,n5,n6,gs_hamk%nvloc/))
 731      ABI_MALLOC_MANAGED(vlocal, (/n4,n5,n6,gs_hamk%nvloc/))
 732 #endif
 733    else
 734      ABI_MALLOC(rhoaug,(n4,n5,n6,gs_hamk%nvloc))
 735      ABI_MALLOC(vlocal,(n4,n5,n6,gs_hamk%nvloc))
 736    end if
 737 
 738    if(with_vxctau) then
 739      ABI_MALLOC(vxctaulocal,(n4,n5,n6,gs_hamk%nvloc,4))
 740    end if
 741 
 742    has_vectornd = (with_vectornd .EQ. 1)
 743    if(has_vectornd) then
 744       ABI_MALLOC(vectornd_pac,(n4,n5,n6,gs_hamk%nvloc,3))
 745       vectornd_pac=zero
 746    end if
 747 
 748   nbdbuf_eff = dtset%nbdbuf
 749   ! In metallic case, at first iteration, occupations could be 0. So residm should be computed as usual
 750   if (dtset%nbdbuf==-101.and..not.fixed_occ.and.istep==1.and.minval(occ)<tol10) then
 751     write(msg,*) 'vtorho: nbdbuf is set to 0 for this step'
 752     call wrtout(std_out,msg,'COLL')
 753     nbdbuf_eff = 0
 754   end if
 755 
 756 !  LOOP OVER SPINS
 757    do isppol=1,dtset%nsppol
 758      call timab(982,1,tsec)
 759 
 760      ikpt_loc = 0
 761      ikg=0
 762 
 763      ! Set up local potential vlocal on the coarse FFT mesh from vtrial taking into account the spin.
 764      ! Also, continue to initialize the Hamiltonian.
 765 
 766      call gspot_transgrid_and_pack(isppol, psps%usepaw, dtset%paral_kgb, dtset%nfft, dtset%ngfft, nfftf, &
 767                                    dtset%nspden, gs_hamk%nvloc, 1, pawfgr, mpi_enreg, vtrial, vlocal)
 768      call gs_hamk%load_spin(isppol, vlocal=vlocal, with_nonlocal=.true.)
 769 
 770      if (with_vxctau) then
 771        call gspot_transgrid_and_pack(isppol, psps%usepaw, dtset%paral_kgb, dtset%nfft, dtset%ngfft, nfftf, &
 772                                      dtset%nspden, gs_hamk%nvloc, 4, pawfgr, mpi_enreg, vxctau, vxctaulocal)
 773        call gs_hamk%load_spin(isppol, vxctaulocal=vxctaulocal)
 774      end if
 775 
 776      rhoaug(:,:,:,:)=zero
 777 
 778      ! if vectornd is present, set it up for addition to gs_hamk similarly to how it's done for
 779      ! vtrial. Note that it must be done for the three Cartesian directions. Also, the following
 780      ! code assumes explicitly and implicitly that nvloc = 1. This should eventually be generalized.
 781      if(has_vectornd) then
 782        call gspot_transgrid_and_pack(isppol, psps%usepaw, dtset%paral_kgb, dtset%nfft, dtset%ngfft, nfftf, &
 783          & dtset%nspden, gs_hamk%nvloc, 3, pawfgr, mpi_enreg, vectornd, vectornd_pac)
 784        call gs_hamk%load_spin(isppol, vectornd=vectornd_pac)
 785      end if
 786     
 787      call timab(982,2,tsec)
 788 
 789 !    BIG FAT k POINT LOOP
 790 !    MVeithen: I had to modify the structure of this loop in order to implement MPI // of the electric field
 791 !    Note that the loop here differs from the similar one in berryphase_new.F90.
 792 !    here, ikpt_loc numbers the kpts treated by the current processor.
 793 !    in berryphase_new.F90, ikpt_loc ALSO includes info about value of isppol.
 794 
 795      ikpt = 0
 796      do while (ikpt_loc < nkpt1)
 797 
 798        call timab(997,1,tsec)
 799 
 800        if ( .not.berryflag ) then
 801          ikpt_loc = ikpt_loc + 1
 802          ikpt = ikpt_loc
 803          my_ikpt = mpi_enreg%my_kpttab(ikpt)
 804        else
 805          if (ikpt_loc < dtset%mkmem) ikpt = ikpt + 1
 806          if ((ikpt > dtset%nkpt).and.(ikpt_loc < dtset%mkmem)) exit
 807          my_ikpt=ikpt
 808        end if
 809 
 810        dphase_k(:) = zero
 811        counter=100*ikpt+isppol
 812        nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
 813        nband_cprj_k=nband_k/mpi_enreg%nproc_band
 814        istwf_k=dtset%istwfk(ikpt)
 815        npw_k=npwarr(ikpt)
 816 
 817        mcgq=1 ; mkgq=1
 818        if (.not.berryflag) then
 819          if(proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,1,nband_k,isppol,me_distrb)) then
 820            eigen(1+bdtot_index : nband_k+bdtot_index) = zero
 821            resid(1+bdtot_index : nband_k+bdtot_index) = zero
 822            bdtot_index=bdtot_index+nband_k
 823            cycle
 824          end if
 825        else
 826          if ((proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,1,nband_k,isppol,me_distrb)) .and.(ikpt_loc <= dtset%mkmem)) then
 827            eigen(1+bdtot_index : nband_k+bdtot_index) = zero
 828            resid(1+bdtot_index : nband_k+bdtot_index) = zero
 829            bdtot_index = bdtot_index + nband_k
 830            cycle
 831          end if
 832          ikpt_loc = ikpt_loc + 1
 833          mcgq = dtset%mpw*my_nspinor*nband_k*dtefield%nneigh(ikpt)
 834          ikg = dtefield%kgindex(ikpt)
 835          mkgq = 6*dtset%mpw
 836        end if ! berryflag
 837 
 838        call timab(997,2,tsec)
 839 
 840 !      In case of MPI // of a finite field calculation
 841 !      build the cgq array that stores the wavefunctions for the
 842 !      neighbours of ikpt, and the pwnsfacq array that stores the
 843 !      corresponding phase factors (in case of tnons)
 844        ABI_MALLOC(cgq,(2,mcgq))
 845        ABI_MALLOC(pwnsfacq,(2,mkgq))
 846        if ( berryflag ) then
 847          call cgq_builder(berryflag,cg,cgq,dtefield,dtset,ikpt,ikpt_loc,isppol,mcg,mcgq,&
 848 &         me_distrb,mkgq,mpi_enreg,my_nspinor,nband_k,nproc_distrb,&
 849 &         npwarr,pwnsfac,pwnsfacq,pwind_alloc,spaceComm_distrb)
 850          if (ikpt_loc > dtset%mkmem) then
 851            ABI_FREE(cgq)
 852            ABI_FREE(pwnsfacq)
 853            cycle
 854          end if
 855        end if !berryopt
 856 
 857        call timab(984,1,tsec)
 858 
 859        if (mpi_enreg%paral_kgb==1) my_bandfft_kpt => bandfft_kpt(my_ikpt)
 860        call bandfft_kpt_set_ikpt(ikpt,mpi_enreg)
 861 !      my_ikpt = ikpt
 862 !       if (mpi_enreg%paral_kgb==1) then
 863 !        my_ikpt = mpi_enreg%my_kpttab(ikpt)
 864 !         my_bandfft_kpt => bandfft_kpt(my_ikpt)
 865 !         call bandfft_kpt_set_ikpt(ikpt,mpi_enreg)
 866 !       end if
 867 
 868        ABI_MALLOC(ek_k,(nband_k))
 869        ABI_MALLOC(end_k,(nband_k))
 870        ABI_MALLOC(enlx_k,(nband_k))
 871        ABI_MALLOC(ek_k_nd,(2,nband_k,nband_k*paw_dmft%use_dmft))
 872        ABI_MALLOC(occ_k,(nband_k))
 873        ABI_MALLOC(zshift,(nband_k))
 874        ABI_MALLOC(grnl_k,(3*natom,nband_k*optforces))
 875 
 876        if(dtset%gpu_option==ABI_GPU_KOKKOS) then
 877 #if defined HAVE_GPU && defined HAVE_YAKL
 878          ABI_MALLOC_MANAGED(eig_k,(/nband_k/))
 879          ABI_MALLOC_MANAGED(resid_k,(/nband_k/))
 880 #endif
 881        else
 882          ABI_MALLOC(eig_k,(nband_k))
 883          ABI_MALLOC(resid_k,(nband_k))
 884        end if
 885 
 886        eig_k(:)=zero
 887        ek_k(:)=zero
 888        end_k(:)=zero
 889        enlx_k(:)=zero
 890        if(paw_dmft%use_dmft==1) ek_k_nd(:,:,:)=zero
 891        if (optforces>0) grnl_k(:,:)=zero
 892        kpoint(:)=dtset%kptns(:,ikpt)
 893        occ_k(:)=occ(1+bdtot_index:nband_k+bdtot_index)
 894        resid_k(:) = resid(1+bdtot_index : nband_k+bdtot_index)
 895        !resid_k(:)=zero
 896        zshift(:)=dtset%eshift
 897 
 898        if(dtset%gpu_option==ABI_GPU_KOKKOS) then
 899 #if defined HAVE_GPU && defined HAVE_YAKL
 900          ABI_MALLOC_MANAGED(kg_k, (/3,npw_k/))
 901 #endif
 902        else
 903          ABI_MALLOC(kg_k,(3,npw_k))
 904        end if
 905 
 906        ABI_MALLOC(ylm_k,(npw_k,psps%mpsang*psps%mpsang*psps%useylm))
 907        kg_k(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg)
 908        if (psps%useylm==1) then
 909          do ilm=1,psps%mpsang*psps%mpsang
 910            ylm_k(1:npw_k,ilm)=ylm(1+ikg:npw_k+ikg,ilm)
 911          end do
 912        end if
 913 
 914 !      Set up remaining of the Hamiltonian
 915 
 916 !      Compute (1/2) (2 Pi)**2 (k+G)**2:
 917        if(dtset%gpu_option==ABI_GPU_KOKKOS) then
 918 #if defined HAVE_GPU && defined HAVE_YAKL
 919          ABI_MALLOC_MANAGED(kinpw,(/npw_k/))
 920 #endif
 921        else
 922          ABI_MALLOC(kinpw,(npw_k))
 923        end if
 924 
 925        call mkkin(dtset%ecut,dtset%ecutsm,dtset%effmass_free,gmet,kg_k,kinpw,kpoint,npw_k,0,0)
 926 
 927 !      Compute (k+G) vectors (only if useylm=1)
 928        nkpg=3*optforces*dtset%nloalg(3)
 929        ABI_MALLOC(kpg_k,(npw_k,nkpg))
 930        if ((mpi_enreg%paral_kgb/=1.or.istep<=1).and.nkpg>0) call mkkpg(kg_k,kpg_k,kpoint,nkpg,npw_k)
 931 
 932 !      Compute nonlocal form factors ffnl at all (k+G):
 933        ider=0;idir=0;dimffnl=1
 934        ABI_MALLOC(ffnl,(npw_k,dimffnl,psps%lmnmax,ntypat))
 935        if (mpi_enreg%paral_kgb/=1.or.istep<=1) then
 936          call mkffnl(psps%dimekb,dimffnl,psps%ekb,ffnl,psps%ffspl,&
 937 &         gmet,gprimd,ider,idir,psps%indlmn,kg_k,kpg_k,kpoint,psps%lmnmax,&
 938 &         psps%lnmax,psps%mpsang,psps%mqgrid_ff,nkpg,&
 939 &         npw_k,ntypat,psps%pspso,psps%qgrid_ff,rmet,&
 940 &         psps%usepaw,psps%useylm,ylm_k,ylmgr)
 941        end if
 942 
 943 !      Load k-dependent part in the Hamiltonian datastructure
 944 !       - Compute 3D phase factors
 945 !       - Prepare various tabs in case of band-FFT parallelism
 946 !       - Load k-dependent quantities in the Hamiltonian
 947        ABI_MALLOC(ph3d,(2,npw_k,gs_hamk%matblk))
 948 
 949        if(usefock_ACE/=0) then
 950          call gs_hamk%load_k(kpt_k=dtset%kptns(:,ikpt),istwf_k=istwf_k,npw_k=npw_k,&
 951 &         kinpw_k=kinpw,kg_k=kg_k,kpg_k=kpg_k,ffnl_k=ffnl,fockACE_k=fock%fockACE(ikpt,isppol),ph3d_k=ph3d,&
 952 &         compute_ph3d=(mpi_enreg%paral_kgb/=1.or.istep<=1),&
 953 &         compute_gbound=(mpi_enreg%paral_kgb/=1))
 954        else
 955          call gs_hamk%load_k(kpt_k=dtset%kptns(:,ikpt),istwf_k=istwf_k,npw_k=npw_k,&
 956 &         kinpw_k=kinpw,kg_k=kg_k,kpg_k=kpg_k,ffnl_k=ffnl,ph3d_k=ph3d,&
 957 &         compute_ph3d=(mpi_enreg%paral_kgb/=1.or.istep<=1),&
 958 &         compute_gbound=(mpi_enreg%paral_kgb/=1))
 959        end if
 960 
 961 !      Load band-FFT tabs (transposed k-dependent arrays)
 962        if (mpi_enreg%paral_kgb==1) then
 963          if (istep<=1) call prep_bandfft_tabs(gs_hamk,ikpt,dtset%mkmem,mpi_enreg)
 964          call gs_hamk%load_k(npw_fft_k=my_bandfft_kpt%ndatarecv, &
 965            gbound_k =my_bandfft_kpt%gbound, &
 966            kinpw_k  =my_bandfft_kpt%kinpw_gather, &
 967            kg_k     =my_bandfft_kpt%kg_k_gather, &
 968            kpg_k    =my_bandfft_kpt%kpg_k_gather, &
 969            ffnl_k   =my_bandfft_kpt%ffnl_gather, &
 970            ph3d_k   =my_bandfft_kpt%ph3d_gather)
 971        end if
 972 
 973 !      If OpenMP GPU, load "hamiltonian" on GPU device
 974        if (dtset%gpu_option == ABI_GPU_OPENMP) then
 975          if(dtset%paral_kgb==0) then
 976            call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp)
 977          else if(istwf_k==1) then
 978            call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather)
 979          else if(istwf_k==2) then
 980            call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather_sym)
 981          else
 982            ABI_ERROR("istwfk > 2 is not handled with OpenMP GPU offload mode !")
 983          end if
 984        end if
 985 
 986        if(dtset%wfoptalg == 111 .and. istep <= 1) then
 987          gemm_nonlop_nblocks = dtset%gpu_nl_splitsize
 988          ! Only compute CHEBFI number of blocks if user didn't set it themselves
 989          if(gemm_nonlop_nblocks==1) then
 990            call chebfiwf2_blocksize(gs_hamk,mpi_enreg%bandpp,npw_k,nband_k,dtset%nspinor,mpi_enreg%paral_kgb,&
 991            &                        dtset%gpu_option,nblk_gemm_nonlop)
 992            gemm_nonlop_nblocks = nblk_gemm_nonlop
 993          end if
 994          gemm_nonlop_is_distributed = .false.
 995          if(gemm_nonlop_nblocks > 1 .and. dtset%gpu_nl_distrib/=0) gemm_nonlop_is_distributed = .true.
 996        end if
 997 
 998 !      Build inverse of overlap matrix for chebfi
 999        if(psps%usepaw == 1 .and. (dtset%wfoptalg == 1 .or. dtset%wfoptalg == 111) .and. istep <= 1) then
1000           ABI_NVTX_START_RANGE(NVTX_INVOVL)
1001           call make_invovl(gs_hamk, dimffnl, ffnl, ph3d, mpi_enreg)
1002           ABI_NVTX_END_RANGE()
1003        end if
1004 
1005        ! Setup gemm_nonlop
1006        if (gemm_nonlop_use_gemm) then
1007          !set the global variable indicating to gemm_nonlop where to get its data from
1008          gemm_nonlop_ikpt_this_proc_being_treated = my_ikpt
1009          if (istep <= 1) then
1010            !Init the arrays
1011            !FIXME Settle this
1012            if ( dtset%gpu_option == ABI_GPU_DISABLED) then
1013              call make_gemm_nonlop(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, &
1014                  gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, &
1015                  gs_hamk%ucvol, gs_hamk%ffnl_k,&
1016                  gs_hamk%ph3d_k,gs_hamk%kpt_k,gs_hamk%kg_k,gs_hamk%kpg_k,&
1017                  compute_grad_atom=(optforces>0))
1018            else if ( dtset%gpu_option == ABI_GPU_LEGACY .or. dtset%gpu_option == ABI_GPU_KOKKOS) then
1019              call make_gemm_nonlop_gpu(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, &
1020                  gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, &
1021                  gs_hamk%ucvol, gs_hamk%ffnl_k, &
1022                  gs_hamk%ph3d_k,gs_hamk%kpt_k,gs_hamk%kg_k,gs_hamk%kpg_k, &
1023                  compute_grad_atom=(optforces>0))
1024            else if ( dtset%gpu_option == ABI_GPU_OPENMP) then
1025              if(mpi_enreg%paral_kgb==0) then
1026                call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp)
1027              else if(gs_hamk%istwf_k==1) then
1028                call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather)
1029              else if(gs_hamk%istwf_k==2) then
1030                call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather_sym)
1031              else
1032                ABI_ERROR("istwfk > 2 is not handled with OpenMP GPU offload mode !")
1033              end if
1034              call make_gemm_nonlop_ompgpu(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, &
1035                  gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, &
1036                  gs_hamk%ucvol, gs_hamk%ffnl_k, &
1037                  gs_hamk%ph3d_k,gs_hamk%kpt_k,gs_hamk%kg_k,gs_hamk%kpg_k, &
1038                  compute_grad_atom=(optforces>0))
1039            end if
1040          end if
1041        end if
1042 
1043 #if defined HAVE_GPU_CUDA
1044        if (dtset%gpu_option==ABI_GPU_LEGACY .or. dtset%gpu_option==ABI_GPU_KOKKOS) then
1045          if (mpi_enreg%paral_kgb==1) then
1046            call gpu_update_ffnl_ph3d( &
1047              & my_bandfft_kpt%ph3d_gather, INT(size(my_bandfft_kpt%ph3d_gather),c_int64_t), &
1048              & my_bandfft_kpt%ffnl_gather, INT(size(my_bandfft_kpt%ffnl_gather),c_int64_t) )
1049          else
1050            call gpu_update_ffnl_ph3d( &
1051              & ph3d, INT(size(ph3d),c_int64_t), &
1052              & ffnl, INT(size(ffnl),c_int64_t) )
1053          end if
1054        end if
1055 #endif
1056 
1057        call timab(984,2,tsec)
1058 
1059 !      Update the value of ikpt,isppol in fock_exchange and allocate the memory space to perform HF calculation.
1060        if (usefock) call fock_updateikpt(fock%fock_common,ikpt,isppol)
1061        if (psps%usepaw==1 .and. usefock) then
1062          if ((fock%fock_common%optfor).and.(usefock_ACE==0)) fock%fock_common%forces_ikpt=zero
1063        end if
1064 
1065        ! Here we initialize the wavefunctions with atomic orbitals at the first GS iteration of the first
1066        ! relaxation step (if any).
1067        ! NB: Not all the cases are presently supported.
1068        !print *, "istep, itimes(1), wfinit", istep, itimes(1), dtset%wfinit
1069        ! FIXME: This check is not enough as I need to check whether cg have been read from WFK file
1070        if (istep == 1 .and. itimes(1) == 0 .and. dtset%wfinit /= 0) then
1071          call cg_from_atoms(ikpt, isppol, rprimd, xred, kg_k, cg(:,icg+1:), dtset, psps, eig_k, gs_hamk, &
1072                             mpi_enreg, nband_k, npw_k, my_nspinor)
1073        end if
1074 
1075        ABI_NVTX_START_RANGE(NVTX_VTOWFK)
1076 !      Compute the eigenvalues, wavefunction, residuals,
1077 !      contributions to kinetic energy, nuclear dipole energy,
1078 !      nonlocal energy, forces,
1079 !      and update of rhor to this k-point and this spin polarization.
1080        call vtowfk(cg,cgq,cprj,cpus,dphase_k,dtefield,dtfil,&
1081 &       dtset,eig_k,ek_k,ek_k_nd,end_k,enlx_k,fixed_occ,grnl_k,gs_hamk,&
1082 &       ibg,icg,ikpt,iscf,isppol,kg_k,kinpw,mband_cprj,mcg,mcgq,mcprj_local,mkgq,&
1083 &       mpi_enreg,dtset%mpw,natom,nband_k,nbdbuf_eff,dtset%nkpt,istep,nnsclo_now,npw_k,npwarr,&
1084 &       occ_k,optforces,prtvol,pwind,pwind_alloc,pwnsfac,pwnsfacq,resid_k,&
1085 &       rhoaug,paw_dmft,dtset%wtk(ikpt),zshift, rmm_diis_status(:,ikpt,isppol))
1086        ABI_NVTX_END_RANGE()
1087 
1088 ! LB-01/03/2024: Very weird compiler error on eos-nvhpc23.1 if the second call of timab(985,...) is included...
1089 ! Drastic short-term solution : disable this timing for nvhpc... In fact this part is not important unless fock is activated
1090 ! Note: Should we keep nvhpc-23.1 in eos?
1091 #ifndef FC_NVHPC
1092        call timab(985,1,tsec)
1093 #endif
1094 
1095 #if defined HAVE_GPU_CUDA
1096        if(dtset%gpu_option==ABI_GPU_LEGACY .or. dtset%gpu_option==ABI_GPU_KOKKOS) call gpu_finalize_ffnl_ph3d()
1097 #endif
1098        ABI_FREE(ffnl)
1099 
1100        if(dtset%gpu_option==ABI_GPU_KOKKOS) then
1101 #if defined HAVE_GPU && defined HAVE_YAKL
1102          ABI_FREE_MANAGED(kinpw)
1103          ABI_FREE_MANAGED(kg_k)
1104 #endif
1105        else
1106          ABI_FREE(kinpw)
1107          ABI_FREE(kg_k)
1108        end if
1109 
1110        ABI_FREE(kpg_k)
1111        ABI_FREE(ylm_k)
1112        ABI_FREE(ph3d)
1113        ABI_FREE(cgq)
1114        ABI_FREE(pwnsfacq)
1115 
1116 !      electric field
1117        if (berryflag) then
1118          dphasek(:,ikpt + (isppol - 1)*dtset%nkpt) = dphase_k(:)
1119 
1120 !        The overlap matrices for all first neighbours of ikpt are no more up to date
1121          do idir = 1, 3
1122            do ifor = 1, 2
1123              ikpt1 = dtefield%ikpt_dk(dtefield%i2fbz(ikpt),ifor,idir)
1124              ikpt1 = dtefield%indkk_f2ibz(ikpt1,1)
1125              ifor1 = -1*ifor + 3   ! ifor = 1 -> ifor1 = 2 & ifor = 2 -> ifor1 = 1
1126              dtefield%sflag(:,ikpt1+(isppol-1)*dtset%nkpt,ifor1,idir) = 0
1127            end do
1128          end do
1129        end if  ! berryflag
1130 
1131 !      Save eigenvalues (hartree), residuals (hartree**2)
1132        eigen(1+bdtot_index : nband_k+bdtot_index) = eig_k(:)
1133        eknk (1+bdtot_index : nband_k+bdtot_index) = ek_k (:)
1134        if(usefock) then
1135          focknk (1+bdtot_index : nband_k+bdtot_index) = fock%fock_common%eigen_ikpt (:)
1136          if (optforces>0) fockfornk(:,:,1+bdtot_index : nband_k+bdtot_index) = fock%fock_common%forces_ikpt(:,:,:)
1137        end if
1138        if(paw_dmft%use_dmft==1) eknk_nd(isppol,ikpt,:,:,:) = ek_k_nd(:,:,:)
1139        resid(1+bdtot_index : nband_k+bdtot_index) = resid_k(:)
1140        if (optforces>0) grnlnk(:,1+bdtot_index : nband_k+bdtot_index) = grnl_k(:,:)
1141        enlxnk(1+bdtot_index : nband_k+bdtot_index) = enlx_k(:)
1142 
1143        if(iscf>0 .or. iscf==-3)then
1144 !        Accumulate sum over k points for band, nonlocal and kinetic energies,
1145 !        also accumulate gradients of Enonlocal:
1146          do iband=1,nband_k
1147            if (abs(occ_k(iband))>tol8) then
1148              energies%e_kinetic     = energies%e_kinetic     + dtset%wtk(ikpt)*occ_k(iband)*ek_k(iband)
1149              energies%e_nucdip     = energies%e_nucdip     + dtset%wtk(ikpt)*occ_k(iband)*end_k(iband)
1150              energies%e_eigenvalues = energies%e_eigenvalues + dtset%wtk(ikpt)*occ_k(iband)*eig_k(iband)
1151              energies%e_nlpsp_vfock = energies%e_nlpsp_vfock + dtset%wtk(ikpt)*occ_k(iband)*enlx_k(iband)
1152              if (optforces>0) grnl(:)=grnl(:)+dtset%wtk(ikpt)*occ_k(iband)*grnl_k(:,iband)
1153              if (usefock) then
1154                energies%e_fock=energies%e_fock + half*fock%fock_common%eigen_ikpt(iband)*occ_k(iband)*dtset%wtk(ikpt)
1155                if (usefock_ACE==0) energies%e_fock0=energies%e_fock
1156              endif
1157            end if
1158          end do
1159 
1160 !        Calculate Fock contribution to the total energy if required
1161          if ((psps%usepaw==1).and.(usefock)) then
1162            if ((fock%fock_common%optfor).and.(usefock_ACE==0)) then
1163              !WARNING : this routine actually does NOT compute the Fock contrib to total energy, but modifies the force ONLY.
1164              call fock_calc_ene(dtset,fock%fock_common,energies%e_exactX,ikpt,nband_k,occ_k)
1165            end if
1166          end if
1167        end if
1168 
1169        if ( dtset%gpu_option == ABI_GPU_OPENMP) then
1170          call ompgpu_free_hamilt_buffers()
1171        end if
1172 ! LB-01/03/2024: Very weird compiler error on eos-nvhpc23.1 if the second call of timab(985,...) is included...
1173 ! Drastic short-term solution : disable this timing for nvhpc... In fact this part is not important unless fock is activated
1174 #ifndef FC_NVHPC
1175        call timab(985,2,tsec)
1176 #endif
1177 
1178        ABI_FREE(ek_k)
1179        ABI_FREE(ek_k_nd)
1180        ABI_FREE(end_k)
1181        ABI_FREE(grnl_k)
1182        ABI_FREE(occ_k)
1183        ABI_FREE(zshift)
1184        ABI_FREE(enlx_k)
1185 
1186        if(dtset%gpu_option==ABI_GPU_KOKKOS) then
1187 #if defined HAVE_GPU && defined HAVE_YAKL
1188          ABI_FREE_MANAGED(eig_k)
1189          ABI_FREE_MANAGED(resid_k)
1190 #endif
1191        else
1192          ABI_FREE(eig_k)
1193          ABI_FREE(resid_k)
1194        end if
1195 
1196 !      Keep track of total number of bands (all k points so far, even for k points not treated by me)
1197        bdtot_index=bdtot_index+nband_k
1198 
1199 !      Also shift array memory if dtset%mkmem/=0
1200        if (dtset%mkmem/=0) then
1201          ibg=ibg+my_nspinor*nband_cprj_k
1202          icg=icg+npw_k*my_nspinor*nband_k
1203          ikg=ikg+npw_k
1204        end if
1205 
1206      end do ! End big k point loop
1207 
1208      call timab(986,1,tsec)
1209 
1210      if (fixed_occ .and. mpi_enreg%paral_kgb==1) then
1211        call xmpi_sum(rhoaug,mpi_enreg%comm_bandspinorfft,ierr) !Sum the contributions over bands/FFT/spinors
1212      end if
1213 
1214 !    Transfer density on augmented fft grid to normal fft grid in real space
1215 !    Also take into account the spin.
1216      if(iscf>0.or.iscf==-3)then
1217        if (psps%usepaw==0) then
1218          call fftpac(isppol,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhor,rhoaug(:,:,:,1),1)
1219          if(dtset%nspden==4)then
1220            do imagn=2,4
1221              call fftpac(imagn,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhor,rhoaug(:,:,:,imagn),1)
1222            end do
1223          end if
1224        else
1225          call fftpac(isppol,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhowfr,rhoaug(:,:,:,1),1)
1226          if(dtset%nspden==4)then
1227            do imagn=2,4
1228              call fftpac(imagn,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhowfr,rhoaug(:,:,:,imagn),1)
1229            end do
1230          end if
1231        end if
1232      end if
1233 
1234      call timab(986,2,tsec)
1235 
1236    end do ! End loop over spins
1237 
1238    call timab(988,1,tsec)
1239    if (usefock) then
1240      if (usefock_ACE==0) then
1241        call xmpi_sum(energies%e_fock0,mpi_enreg%comm_kpt,ierr)
1242      end if
1243      if(fock%fock_common%optfor) call xmpi_sum(fock%fock_common%forces,mpi_enreg%comm_kpt,ierr)
1244    end if
1245 !  Electric field: compute string-averaged change in Zak phase
1246 !  along each direction, store it in dphase(idir)
1247 !  ji: it is not convenient to do this anymore. Remove. Set dphase(idir)=0.0_dp.
1248 !  eventually, dphase(idir) will have to go...
1249    if (berryflag) then
1250      dphase(:) = zero
1251 !    In case of MPI // of a finite field calculation, send dphasek to all cpus
1252      call xmpi_sum(dphasek,spaceComm_distrb,ierr)
1253      ABI_FREE(dphasek)
1254    end if ! berryflag
1255 
1256    if(dtset%gpu_option==ABI_GPU_KOKKOS) then
1257 #if defined HAVE_GPU && defined HAVE_YAKL
1258      ABI_FREE_MANAGED(rhoaug)
1259      ABI_FREE_MANAGED(vlocal)
1260 #endif
1261    else
1262      ABI_FREE(rhoaug)
1263      ABI_FREE(vlocal)
1264    end if
1265 
1266    if(with_vxctau) then
1267      ABI_FREE(vxctaulocal)
1268    end if
1269    if(has_vectornd) then
1270       ABI_FREE(vectornd_pac)
1271    end if
1272 
1273    call timab(988,2,tsec)
1274 
1275    ABI_MALLOC(doccde,(dtset%mband*dtset%nkpt*dtset%nsppol))
1276    doccde(:)=zero
1277 
1278 !  Treat now varying occupation numbers, in the self-consistent case
1279    if((.not.fixed_occ) .and. (iscf>0.or.iscf==-3)) then
1280 
1281 !    Parallel case
1282      if (mpi_enreg%nproc_spkpt>1) then
1283 
1284        call timab(989,1,tsec)
1285 
1286 !      If needed, exchange the values of eigen,resid,eknk,enlxnk,grnlnk
1287        ABI_MALLOC(buffer1,((4+3*natom*optforces+dtset%usefock+3*natom*dtset%usefock*optforces)*mbdkpsp))
1288        if(paw_dmft%use_dmft==1) then
1289          ABI_MALLOC(buffer2,(mb2dkpsp*paw_dmft%use_dmft))
1290        end if
1291 !      Pack eigen,resid,eknk,enlxnk,grnlnk in buffer1
1292        buffer1(1          :  mbdkpsp)=eigen(:)
1293        buffer1(1+  mbdkpsp:2*mbdkpsp)=resid(:)
1294        buffer1(1+2*mbdkpsp:3*mbdkpsp)=eknk(:)
1295        buffer1(1+3*mbdkpsp:4*mbdkpsp)=enlxnk(:)
1296        index1=4*mbdkpsp
1297        if (optforces>0) then
1298          buffer1(index1+1:index1+3*natom*mbdkpsp)=reshape(grnlnk,(/(3*natom)*mbdkpsp/) )
1299          index1=index1+3*natom*mbdkpsp
1300        end if
1301        if (usefock) then
1302          buffer1(1+index1:index1+mbdkpsp)=focknk(:)
1303          if (optforces>0) then
1304            index1=index1+mbdkpsp
1305            buffer1(index1+1:index1+3*natom*mbdkpsp)=reshape(fockfornk,(/(3*natom)*mbdkpsp/) )
1306          end if
1307        end if
1308        if(paw_dmft%use_dmft==1) then
1309          nnn=0
1310          do ikpt=1,dtset%nkpt
1311            do isppol=1,dtset%nsppol
1312              do iband=1,dtset%mband
1313                do iband1=1,dtset%mband
1314                  do iplex=1,2
1315                    nnn=nnn+1
1316                    buffer2(nnn)=eknk_nd(isppol,ikpt,iplex,iband,iband1)
1317                  end do
1318                end do
1319              end do
1320            end do
1321          end do
1322          if(nnn.ne.mb2dkpsp)  then
1323            write(msg,*)' BUG in vtorho2, buffer2',nnn,mb2dkpsp
1324            ABI_BUG(msg)
1325          end if
1326        end if
1327 !      Build sum of everything
1328        call timab(48,1,tsec)
1329        call xmpi_sum(buffer1,mpi_enreg%comm_kpt,ierr)
1330 !      if(mpi_enreg%paral_kgb/=1.and.paw_dmft%use_dmft==1) then
1331        if(paw_dmft%use_dmft==1) call xmpi_sum(buffer2,mpi_enreg%comm_kpt,ierr)
1332        call timab(48,2,tsec)
1333 
1334 !      Unpack eigen,resid,eknk,enlxnk,grnlnk in buffer1
1335        eigen(:) =buffer1(1          :  mbdkpsp)
1336        resid(:) =buffer1(1+  mbdkpsp:2*mbdkpsp)
1337        eknk(:)  =buffer1(1+2*mbdkpsp:3*mbdkpsp)
1338        enlxnk(:) =buffer1(1+3*mbdkpsp:4*mbdkpsp)
1339        index1=4*mbdkpsp
1340        if (optforces>0) then
1341          grnlnk(:,:)=reshape(buffer1(index1+1:index1+3*natom*mbdkpsp),(/3*natom,mbdkpsp/) )
1342        end if
1343        if (usefock) then
1344          focknk(:)=buffer1(1+index1:index1+mbdkpsp)
1345          if (optforces>0) then
1346            index1=index1+mbdkpsp
1347            fockfornk(:,:,:)=reshape(buffer1(index1+1:index1+3*natom*mbdkpsp),(/3,natom,mbdkpsp/) )
1348          end if
1349        end if
1350        if(paw_dmft%use_dmft==1) then
1351          nnn=0
1352          do ikpt=1,dtset%nkpt
1353            do isppol=1,dtset%nsppol
1354              do iband=1,dtset%mband
1355                do iband1=1,dtset%mband
1356                  do iplex=1,2
1357                    nnn=nnn+1
1358                    eknk_nd(isppol,ikpt,iplex,iband,iband1)=buffer2(nnn)
1359                  end do
1360                end do
1361              end do
1362            end do
1363          end do
1364        end if
1365        if(allocated(buffer2))  then
1366          ABI_FREE(buffer2)
1367        end if
1368        ABI_FREE(buffer1)
1369        call timab(989,2,tsec)
1370 
1371      end if ! nproc_spkpt>1
1372 
1373 !    Compute extfpmd u0 energy shift factor from eigenvalues and kinetic energy.
1374      if(associated(extfpmd)) then
1375        extfpmd%vtrial=vtrial
1376        call extfpmd%compute_shiftfactor(eigen,eknk,dtset%mband,mpi_enreg%me,&
1377 &       dtset%nband,dtset%nkpt,dtset%nsppol,dtset%wtk)
1378      end if
1379 
1380 !    Compute the new occupation numbers from eigen
1381      call timab(990,1,tsec)
1382      call newocc(doccde,eigen,energies%entropy,energies%e_fermie,energies%e_fermih,dtset%ivalence,&
1383 &     dtset%spinmagntarget,dtset%mband,dtset%nband,dtset%nelect,dtset%ne_qFD,dtset%nh_qFD,&
1384 &     dtset%nkpt,dtset%nspinor,dtset%nsppol,occ,dtset%occopt,prtvol,dtset%tphysel,&
1385 &     dtset%tsmear,dtset%wtk,&
1386 &     prtstm=dtset%prtstm,stmbias=dtset%stmbias,extfpmd=extfpmd)
1387      call timab(990,2,tsec)
1388 
1389 
1390 !    Compute number of free electrons of extfpmd model
1391      if(associated(extfpmd)) then
1392        extfpmd%nelect=zero
1393        call extfpmd%compute_nelect(energies%e_fermie,extfpmd%nelect,dtset%tsmear)
1394        call extfpmd%compute_e_kinetic(energies%e_fermie,dtset%tsmear)
1395        call extfpmd%compute_entropy(energies%e_fermie,dtset%tsmear)
1396      end if
1397 
1398 !    !=========  DMFT call begin ============================================
1399      dmft_dftocc=0
1400      if(paw_dmft%use_dmft==1.and.psps%usepaw==1.and.dtset%nbandkss==0) then
1401        call timab(991,1,tsec)
1402 
1403        if (dtset%dmftcheck>=0.and.dtset%usedmft>=1.and.(sum(pawtab(:)%upawu)>=tol8.or.  &
1404 &       sum(pawtab(:)%jpawu)>tol8).and.dtset%dmft_entropy==0) energies%entropy=zero
1405 
1406 !      ==  0 to a dmft calculation and do not use lda occupations
1407 !      ==  1 to a lda calculation with the dmft loop
1408        if(dtset%dmftcheck==-1) dmft_dftocc=1
1409 
1410 !      ==  initialise occnd
1411        paw_dmft%occnd=zero
1412 
1413        bdtot_index = 1
1414        do isppol=1,dtset%nsppol
1415          do ikpt=1,dtset%nkpt
1416            do iband=1,dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
1417              paw_dmft%occnd(1,iband,iband,ikpt,isppol)=occ(bdtot_index)
1418              bdtot_index = bdtot_index + 1
1419            end do
1420          end do
1421        end do
1422 
1423 
1424        if(dmft_dftocc==0) then
1425          if(dtset%occopt/=3) then
1426            ABI_ERROR('occopt should be equal to 3 in dmft')
1427          end if
1428 !        ==  initialise edmft
1429          if(paw_dmft%use_dmft>=1) edmft = zero
1430 
1431          !  Compute residm to check the value
1432          ibdkpt=1
1433          residm=zero
1434          do isppol=1,dtset%nsppol
1435            do ikpt=1,dtset%nkpt
1436              nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
1437              if (nbdbuf_eff>=0) then
1438                nband_eff=max(1,nband_k-nbdbuf_eff)
1439                residm=max(residm,maxval(resid(ibdkpt:ibdkpt+nband_eff-1)))
1440              else if (nbdbuf_eff==-101) then
1441                residm=max(residm,maxval(occ(ibdkpt:ibdkpt+nband_k-1)*resid(ibdkpt:ibdkpt+nband_k-1)))
1442              else
1443                ABI_ERROR('Bad value of nbdbuf_eff')
1444              end if
1445              ibdkpt=ibdkpt+nband_k
1446            end do
1447          end do
1448 
1449          ! Test residm
1450          if (paw_dmft%use_dmft>0 .and. residm>tol4 .and. dtset%dmftcheck>=0) then
1451            if(dtset%dmft_entropy>0)  then
1452              write(msg,'(a,e12.3)')&
1453                ' WARNING: Wavefunctions not converged: DFT+DMFT calculation cannot be carried out safely ',residm
1454              call wrtout(std_out,msg)
1455            else
1456              write(msg,'(a,e12.3)')&
1457               ' ERROR: Wavefunctions not converged: DFT+DMFT calculation cannot be carried out safely ',residm
1458              call wrtout(std_out,msg)
1459              write(msg,'(a,i0)')'  Action: increase nline and nnsclo',dtset%nstep
1460              ABI_ERROR(msg)
1461            end if
1462 
1463          else if (paw_dmft%use_dmft>0 .and. residm>tol10.and. dtset%dmftcheck>=0) then
1464            write(msg,'(3a)')ch10,&
1465             '  Wavefunctions not converged: DFT+DMFT calculation might not be carried out safely ',ch10
1466            ABI_WARNING(msg)
1467          end if
1468 
1469 !        ==  allocate paw_dmft%psichi and paw_dmft%eigen_dft
1470          call init_dmft(dmatpawu,dtset,energies%e_fermie,dtfil%fnameabo_app,&
1471            dtfil%filnam_ds(3),dtset%nspinor,paw_dmft,pawtab,psps,dtset%typat)
1472          call print_dmft(paw_dmft,dtset%pawprtvol)
1473 
1474 !        ==  gather crystal structure date into data "cryst_struc"
1475          remove_inv=.false.
1476          if(dtset%nspden==4) remove_inv=.true.
1477          call crystal_init(dtset%amu_orig(:,1),cryst_struc,dtset%spgroup,natom,dtset%npsp,ntypat, &
1478           dtset%nsym,rprimd,dtset%typat,xred,dtset%ziontypat,dtset%znucl,1,&
1479           dtset%nspden==2.and.dtset%nsppol==1,remove_inv,hdr%title,&
1480           dtset%symrel,dtset%tnons,dtset%symafm)
1481 
1482 !        ==  compute psichi
1483          call xmpi_barrier(spaceComm_distrb)
1484          call init_oper(paw_dmft,dft_occup)
1485          call flush_unit(std_out)
1486          call timab(620,1,tsec)
1487          call datafordmft(cryst_struc,cprj,gs_hamk%dimcprj,dtset,eigen,energies%e_fermie,&
1488 &         dft_occup,dtset%mband,mband_cprj,dtset%mkmem,mpi_enreg,&
1489 &         dtset%nkpt,my_nspinor,dtset%nsppol,occ,&
1490 &         paw_dmft,paw_ij,pawang,pawtab,psps,usecprj_local,dtfil%unpaw)
1491          call timab(620,2,tsec)
1492          call flush_unit(std_out)
1493 
1494 
1495 !        ==  solve dmft loop
1496          call xmpi_barrier(spaceComm_distrb)
1497 
1498          call dmft_solve(cryst_struc,istep,dft_occup,paw_dmft,pawang,pawtab,dtset%pawprtvol)
1499          edmft=paw_dmft%edmft
1500          energies%e_paw=energies%e_paw+edmft
1501          energies%e_pawdc=energies%e_pawdc+edmft
1502          call flush_unit(std_out)
1503 !        paw_dmft%occnd(:,:,:,:,:)=0.5_dp
1504 
1505 !        For compatibility with old test, do not use for calculation
1506          if(dtset%dmft_occnd_imag==0) paw_dmft%occnd(2,:,:,:,:)=zero
1507 
1508 !        call print_dmft(paw_dmft,dtset%pawprtvol)
1509 !         if(dtset%paral_kgb==1) then
1510 !           write(msg,'(5a)')ch10,&
1511 !&           ' Parallelization over bands is not yet compatible with self-consistency in DMFT ',ch10,&
1512 !&           ' Calculation of density does not taken into account non diagonal occupations',ch10
1513 !           call wrtout(std_out,msg)
1514 !           call wrtout(ab_out,msg)
1515 !!          ABI_ERROR(msg)
1516 !           if(dtset%nstep>1) then
1517 !             write(msg,'(a,i0)')'  Action: use nstep=1 instead of nstep=',dtset%nstep
1518 !             ABI_ERROR(msg)
1519 !           end if
1520 !           residm=zero
1521 !         end if
1522 !        if(dtset%nspinor==2) then
1523 !          call flush_unit(ab_out)
1524 !          write(msg,'(3a)')&
1525 !          &         ' Self consistent DFT+DMFT with nspinor==2 is not possible yet ',ch10,&
1526 !          &         ' Calculation are restricted to nstep =1'
1527 !          !         ABI_ERROR(msg)
1528 !          if(dtset%nstep>1) then
1529 !          write(msg,'(a,i0)')' Action: use nstep=1 instead of nstep=',dtset%nstep
1530 !          !           ABI_ERROR(msg)
1531 !          endif
1532 !        end if
1533 
1534          if(me_distrb==0) then
1535            call saveocc_dmft(paw_dmft)
1536          end if
1537          call destroy_dmft(paw_dmft)
1538 
1539 !        ==  destroy crystal_t cryst_struc
1540          call cryst_struc%free()
1541          call destroy_oper(dft_occup)
1542        end if ! dmft_dftocc
1543        call timab(991,2,tsec)
1544 
1545      end if ! usedmft
1546 
1547      if(dtset%nbandkss/=0) then
1548        write(msg,'(a,i3,2a,i3,4a)') &
1549         " dtset%nbandkss = ",dtset%nbandkss,ch10,&
1550         " and dtset%usedmft = ",dtset%usedmft,ch10,&
1551         " a DFT loop is carried out without DMFT.",ch10,&
1552         " Only psichi's will be written at convergence of the DFT loop."
1553        call wrtout(std_out,msg)
1554      end if
1555 !    !=========  DMFT call end   ============================================
1556 
1557      call timab(992,1,tsec)
1558 
1559 !    Compute eeig, ek,enl and grnl from the new occ, and the shared eknk,enlxnk,grnlnk
1560      energies%e_eigenvalues = zero
1561      energies%e_kinetic     = zero
1562      energies%e_nlpsp_vfock = zero
1563      if (usefock) then
1564        energies%e_fock     = zero
1565        if (optforces>0) fock%fock_common%forces=zero
1566      end if
1567      if (optforces>0) grnl(:)=zero
1568      if(paw_dmft%use_dmft>=1) then
1569        ebandlda               = zero
1570        ebanddmft              = zero
1571        ebandldatot            = zero
1572        ekindmft               = zero
1573        ekindmft2              = zero
1574        ekinlda                = zero
1575      end if
1576 
1577 !    Compute new energy terms due to non diagonal occupations and DMFT.
1578      bdtot_index=1
1579      do isppol=1,dtset%nsppol
1580        do ikpt=1,dtset%nkpt
1581          nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
1582          do iband=1,nband_k
1583 
1584            locc_test = abs(occ(bdtot_index))>tol8
1585 !          dmft
1586            if(paw_dmft%use_dmft>=1.and.dtset%nbandkss==0) then
1587              if(paw_dmft%band_in(iband)) then
1588                if( paw_dmft%use_dmft == 1 .and. dmft_dftocc == 1 ) then ! test of the code
1589                  paw_dmft%occnd(1,iband,iband,ikpt,isppol)=occ(bdtot_index)
1590                end if
1591                locc_test = abs(paw_dmft%occnd(1,iband,iband,ikpt,isppol))+&
1592 &               abs(paw_dmft%occnd(2,iband,iband,ikpt,isppol))>tol8
1593              end if
1594            end if
1595 
1596            if (locc_test) then
1597 !            dmft
1598              if(paw_dmft%use_dmft==1.and.dtset%nbandkss==0) then
1599                ebandldatot=ebandldatot+dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index)
1600                if(paw_dmft%band_in(iband)) then
1601                  ebandlda=ebandlda+dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index)
1602                  ekinlda=ekinlda+dtset%wtk(ikpt)*occ(bdtot_index)*eknk(bdtot_index)
1603                  occ(bdtot_index)=paw_dmft%occnd(1,iband,iband,ikpt,isppol)
1604                  ebanddmft=ebanddmft+dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index)
1605                  ekindmft=ekindmft+dtset%wtk(ikpt)*occ(bdtot_index)*eknk(bdtot_index)
1606                end if
1607              end if
1608 
1609              energies%e_eigenvalues = energies%e_eigenvalues + &
1610 &             dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index)
1611              energies%e_kinetic = energies%e_kinetic + &
1612 &             dtset%wtk(ikpt)*occ(bdtot_index)*eknk(bdtot_index)
1613              energies%e_nlpsp_vfock = energies%e_nlpsp_vfock + &
1614 &             dtset%wtk(ikpt)*occ(bdtot_index)*enlxnk(bdtot_index)
1615              if (usefock) then
1616                energies%e_fock=energies%e_fock + half*focknk(bdtot_index)*occ(bdtot_index)*dtset%wtk(ikpt)
1617                if (optforces>0) fock%fock_common%forces(:,:)=fock%fock_common%forces(:,:)+&
1618 &               dtset%wtk(ikpt)*occ(bdtot_index)*fockfornk(:,:,bdtot_index)
1619              end if
1620              if (optforces>0) grnl(:)=grnl(:)+dtset%wtk(ikpt)*occ(bdtot_index)*grnlnk(:,bdtot_index)
1621            end if
1622            bdtot_index=bdtot_index+1
1623            if(paw_dmft%use_dmft==1.and.dtset%nbandkss==0) then
1624              do iband1=1,nband_k
1625                if(paw_dmft%band_in(iband).and.paw_dmft%band_in(iband1)) then
1626 !                write(std_out,*) "II+", isppol,ikpt,iband,iband1
1627                  ekindmft2=ekindmft2  +  dtset%wtk(ikpt)*paw_dmft%occnd(1,iband,iband1,ikpt,isppol)*&
1628 &                 eknk_nd(isppol,ikpt,1,iband,iband1)
1629                  ekindmft2=ekindmft2  -  dtset%wtk(ikpt)*paw_dmft%occnd(2,iband,iband1,ikpt,isppol)*&
1630 &                 eknk_nd(isppol,ikpt,2,iband,iband1)
1631 !                write(std_out,*) "II", occnd(1,iband,iband1,ikpt,isppol),eknk_nd(isppol,ikpt,iband,iband1)
1632                end if
1633              end do
1634            end if
1635          end do
1636        end do
1637      end do
1638 
1639      if(paw_dmft%use_dmft==1) then
1640        energies%e_kinetic = energies%e_kinetic -ekindmft+ekindmft2
1641        if(abs(dtset%pawprtvol)>=2) then
1642          write(msg,'(4a,7(2x,a,2x,e14.7,a),a)') &
1643            "-----------------------------------------------",ch10,&
1644            "--- Energy for DMFT and tests (in Ha)  ",ch10,&
1645            "--- Ebandldatot    (Ha.) = ",ebandldatot,ch10,&
1646            "--- Ebandlda       (Ha.) = ",ebandlda,ch10,&
1647            "--- Ebanddmft      (Ha.) = ",ebanddmft,ch10,&
1648            "--- Ekinlda        (Ha.) = ",ekinlda,ch10, &
1649            "--- Ekindmftdiag   (Ha.) = ",ekindmft,ch10,&
1650            "--- Ekindmftnondiag(Ha.) = ",ekindmft2,ch10,&
1651            "--- Edmft=         (Ha.) = ",edmft,ch10,&
1652            "-----------------------------------------------"
1653          call wrtout(std_out,msg)
1654        end if
1655 !       if(paw_dmft%use_dmft==1.and.mpi_enreg%paral_kgb==1) paw_dmft%use_dmft=0
1656      end if
1657 
1658      ABI_NVTX_START_RANGE(NVTX_MKRHO)
1659      if (psps%usepaw==0) then
1660        call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,&
1661 &       rhog,rhor,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,&
1662 &       extfpmd=extfpmd)
1663      else
1664        call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,&
1665 &       rhowfg,rhowfr,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,&
1666 &       extfpmd=extfpmd)
1667     end if
1668     ABI_NVTX_END_RANGE()
1669      call timab(992,2,tsec)
1670 
1671 !    Treat fixed occupation numbers or non-self-consistent case
1672    else
1673 
1674      if (mpi_enreg%nproc_spkpt>1) then
1675 
1676        call timab(989,1,tsec)
1677 
1678        nbuf=2*mbdkpsp+dtset%nfft*dtset%nspden+4+3*natom*optforces
1679        ! If Hartree-Fock calculation, the exact exchange energy is k-dependent.
1680        if(dtset%usefock==1) then
1681          nbuf=nbuf+1
1682          if (optforces>0) nbuf=nbuf+3*natom
1683        end if
1684        if(iscf==-1 .or. iscf==-2)nbuf=2*mbdkpsp
1685        ABI_MALLOC(buffer1,(nbuf))
1686        ! Pack eigen,resid,rho[wf]r,grnl,enl,ek
1687        buffer1(1:mbdkpsp)=eigen(:)
1688        buffer1(1+mbdkpsp:2*mbdkpsp)=resid(:)
1689        index1=2*mbdkpsp
1690        if(iscf/=-1 .and. iscf/=-2)then
1691          if (psps%usepaw==0) then
1692            buffer1(index1+1:index1+dtset%nfft*dtset%nspden)=reshape(rhor,(/dtset%nfft*dtset%nspden/))
1693          else
1694            buffer1(index1+1:index1+dtset%nfft*dtset%nspden)=reshape(rhowfr,(/dtset%nfft*dtset%nspden/))
1695          end if
1696          index1=index1+dtset%nfft*dtset%nspden
1697          buffer1(index1+1) = energies%e_kinetic
1698          buffer1(index1+2) = energies%e_eigenvalues
1699          buffer1(index1+3) = energies%e_nlpsp_vfock
1700          buffer1(index1+4) = energies%e_nucdip
1701          index1=index1+4
1702          ! If Hartree-Fock calculation, save e_fock in buffer1
1703          if (dtset%usefock==1) then
1704            buffer1(index1+1) = energies%e_fock
1705            index1=index1+1
1706            if (optforces>0)then
1707              buffer1(index1+1:index1+3*natom)=reshape(fock%fock_common%forces,(/3*natom/))
1708              index1=index1+3*natom
1709            end if
1710          end if
1711          if (optforces>0) buffer1(index1+1:index1+3*natom)=grnl(1:3*natom)
1712        end if
1713 
1714        ! Build sum of everything
1715        call timab(48,1,tsec)
1716        call xmpi_sum(buffer1,nbuf,mpi_enreg%comm_kpt ,ierr)
1717        call timab(48,2,tsec)
1718 
1719        ! Unpack the final result
1720        eigen(:)=buffer1(1:mbdkpsp)
1721        resid(:)=buffer1(1+mbdkpsp:2*mbdkpsp)
1722        index1=2*mbdkpsp
1723        if(iscf/=-1 .and. iscf/=-2)then
1724          if (psps%usepaw==0) then
1725            ii=1
1726            do ispden=1,dtset%nspden
1727              do ifft=1,dtset%nfft
1728                rhor(ifft,ispden)=buffer1(index1+ii)
1729                ii=ii+1
1730              end do
1731            end do
1732          else
1733            ii=1
1734            do ispden=1,dtset%nspden
1735              do ifft=1,dtset%nfft
1736                rhowfr(ifft,ispden)=buffer1(index1+ii)
1737                ii=ii+1
1738              end do
1739            end do
1740          end if
1741          index1=index1+dtset%nfft*dtset%nspden
1742          energies%e_kinetic = buffer1(index1+1)
1743          energies%e_eigenvalues = buffer1(index1+2)
1744          energies%e_nlpsp_vfock = buffer1(index1+3)
1745          energies%e_nucdip = buffer1(index1+4)
1746          index1=index1+4
1747          ! If Hartree-Fock calculation, save e_fock in buffer1
1748          if (dtset%usefock==1) then
1749            energies%e_fock = buffer1(index1+1)
1750            index1=index1+1
1751            if (optforces>0) then
1752              fock%fock_common%forces(:,:)=reshape(buffer1(index1+1:index1+3*natom),(/3,natom/))
1753              index1=index1+3*natom
1754            end if
1755          end if
1756          if (optforces>0) grnl(1:3*natom)=buffer1(index1+1:index1+3*natom)
1757        end if
1758        ABI_FREE(buffer1)
1759        call timab(989,2,tsec)
1760 
1761      end if ! nproc_spkpt>1
1762 
1763 !    Compute the highest occupied eigenenergy
1764      if(iscf/=-1 .and. iscf/=-2)then
1765        call timab(993,1,tsec)
1766        energies%e_fermie = -huge(one)
1767        if (dtset%occopt==9) then
1768           energies%e_fermih = -huge(one)
1769        end if
1770        bdtot_index=1
1771        do isppol=1,dtset%nsppol
1772          do ikpt=1,dtset%nkpt
1773            nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
1774            if (dtset%occopt/=9) then
1775               do iband=1,nband_k
1776                  if(abs(occ(bdtot_index))>tol8 .and. eigen(bdtot_index)>energies%e_fermie+tol10) then
1777                    energies%e_fermie=eigen(bdtot_index)
1778                  end if
1779                  bdtot_index=bdtot_index+1
1780               end do
1781            else
1782               ! In case occopt 9, computing the fermi level for the electrons remaining in the VB = fermi level of holes
1783               do iband=1,dtset%ivalence
1784                  if(abs(occ(bdtot_index))>tol8 .and. eigen(bdtot_index)>energies%e_fermih+tol10) then
1785                    energies%e_fermih=eigen(bdtot_index)
1786                  end if
1787                  bdtot_index=bdtot_index+1
1788               end do
1789               ! In case occopt 9, computing the fermi level for the electrons thermalized in the conduction bands
1790               do iband=dtset%ivalence+1,nband_k
1791                  if(abs(occ(bdtot_index))>tol8 .and. eigen(bdtot_index)>energies%e_fermie+tol10) then
1792                    energies%e_fermie=eigen(bdtot_index)
1793                  end if
1794                  bdtot_index=bdtot_index+1
1795               end do
1796            end if
1797          end do
1798        end do
1799        call xmpi_max(energies%e_fermie,spaceComm_distrb,ierr)
1800        if (dtset%occopt == 9) then
1801           call xmpi_max(energies%e_fermih,spaceComm_distrb,ierr)
1802        end if
1803        call timab(993,2,tsec)
1804      end if
1805 
1806 !    If needed, compute rhog, and symmetrizes the density
1807      if (iscf > 0 .or. iscf==-3 ) then
1808 !      energies%e_fermie=zero  ! Actually, should determine the maximum of the valence band XG20020802
1809        nfftot=dtset%ngfft(1)*dtset%ngfft(2)*dtset%ngfft(3)
1810 
1811        call timab(994,1,tsec)
1812        if (psps%usepaw==0) then
1813          call symrhg(1,gprimd,irrzon,mpi_enreg,dtset%nfft,nfftot,dtset%ngfft,dtset%nspden,&
1814 &         dtset%nsppol,dtset%nsym,phnons,rhog,rhor,rprimd,dtset%symafm,dtset%symrel,dtset%tnons)
1815        else
1816          call symrhg(1,gprimd,irrzon,mpi_enreg,dtset%nfft,nfftot,dtset%ngfft,dtset%nspden,&
1817 &         dtset%nsppol,dtset%nsym,phnons,rhowfg,rhowfr,rprimd,dtset%symafm,dtset%symrel,dtset%tnons)
1818        end if
1819        call timab(994,2,tsec)
1820 !      We now have both rho(r) and rho(G), symmetrized, and if dtset%nsppol=2
1821 !      we also have the spin-up density, symmetrized, in rhor(:,2).
1822      end if
1823 
1824    end if !  End of test on varying or fixed occupation numbers
1825 
1826    call timab(994,1,tsec)
1827 
1828 !  Compute the kinetic energy density
1829    if(dtset%usekden==1 .and. (iscf > 0 .or. iscf==-3 ) )then
1830      if (psps%usepaw==0) then
1831        call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,&
1832 &       taug,taur,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,option=1)
1833      else
1834        call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,&
1835 &      tauwfg,tauwfr,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,option=1)
1836      end if
1837    end if
1838 
1839    ABI_FREE(eknk)
1840    if (usefock) then
1841      ABI_FREE(focknk)
1842      if (optforces>0)then
1843        ABI_FREE(fockfornk)
1844      end if
1845    end if
1846    ABI_FREE(eknk_nd)
1847    ABI_FREE(grnlnk)
1848    ABI_FREE(enlxnk)
1849 
1850 !  In the non-self-consistent case, print eigenvalues and residuals
1851    if(iscf<=0 .and. me_distrb==0)then
1852      option=2 ; enunit=1 ; vxcavg_dum=zero
1853      call prteigrs(eigen,enunit,energies%e_fermie,energies%e_fermih,&
1854 &     dtfil%fnameabo_app_eig,ab_out,iscf,dtset%kptns,dtset%kptopt,dtset%mband,&
1855 &     dtset%nband,nbdbuf_eff,dtset%nkpt,nnsclo_now,dtset%nsppol,occ,dtset%occopt,option,&
1856 &     dtset%prteig,prtvol,resid,dtset%tolwfr,vxcavg_dum,dtset%wtk)
1857    end if
1858 
1859 !  Find largest residual over bands, k points, and spins, except for nbdbuf highest bands
1860    ibdkpt=1
1861    residm=zero
1862    do isppol=1,dtset%nsppol
1863      do ikpt=1,dtset%nkpt
1864        nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
1865        if (nbdbuf_eff>=0) then
1866          nband_eff=max(1,nband_k-nbdbuf_eff)
1867          residm=max(residm,maxval(resid(ibdkpt:ibdkpt+nband_eff-1)))
1868        else if (nbdbuf_eff==-101) then
1869          residm=max(residm,maxval(occ(ibdkpt:ibdkpt+nband_k-1)*resid(ibdkpt:ibdkpt+nband_k-1)))
1870        else
1871          ABI_ERROR('Bad value of nbdbuf_eff')
1872        end if
1873        ibdkpt=ibdkpt+nband_k
1874      end do
1875    end do
1876 
1877  end if !usewvl==0
1878 
1879 !===================================================================
1880 ! End of PLANE WAVES section
1881 !===================================================================
1882 
1883 !In the self-consistent case, diagnose lack of unoccupied state (for each spin and k-point).
1884 
1885 !Print a warning if the number of such messages already written does not exceed mwarning.
1886 ! MG: This is not a good idea as this is a typical mistake done by beginners and we should
1887 ! keep on spamming this warning message in the log file.
1888  !mwarning=5
1889  !if(nwarning<mwarning .and. iscf>=0)then
1890    !nwarning=nwarning+1
1891 
1892  if(iscf>=0)then
1893    bdtot_index=1
1894    do isppol=1,dtset%nsppol
1895      do ikpt=1,dtset%nkpt
1896        min_occ=two
1897        nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
1898        do iband=1,nband_k
1899          if(occ(bdtot_index)<min_occ)min_occ=occ(bdtot_index)
1900          bdtot_index=bdtot_index+1
1901        end do
1902        if(min_occ>0.01_dp .and. .not. associated(extfpmd))then
1903          if(dtset%nsppol==1)then
1904            write(msg, '(a,i0,3a,f7.3,5a)' )&
1905              'For k-point number: ',ikpt,',',ch10,&
1906              'The minimal occupation factor is: ',min_occ,'.',ch10,&
1907              'An adequate monitoring of convergence requires it to be  at most 0.01_dp.',ch10,&
1908              'Action: increase slightly the number of bands.'
1909          else
1910            write(msg, '(a,i0,3a,i0,a,f7.3,5a)' )&
1911              'For k-point number: ',ikpt,', and',ch10,&
1912              'for spin polarization: ',isppol, ' the minimal occupation factor is: ',min_occ,'.',ch10,&
1913              'An adequate monitoring of convergence requires it to be at most 0.01_dp.',ch10,&
1914              'Action: increase slightly the number of bands.'
1915          end if
1916          ABI_WARNING(msg)
1917          exit ! It is enough if one lack of adequate occupation is identified, so exit.
1918        end if
1919      end do
1920    end do
1921  end if
1922 
1923  ABI_NVTX_START_RANGE(NVTX_VTORHO_EXTRA)
1924  if (iscf>0.or.iscf==-3 .or. (dtset%usewvl==1 .and. iscf==0)) then
1925 
1926 !  PAW: Build new rhoij quantities from new occ then symetrize them
1927 !  Compute and add the compensation density to rhowfr to get the total density
1928    if (psps%usepaw==1) then
1929      call timab(555,1,tsec)
1930      if (paral_atom) then
1931        ABI_MALLOC(pawrhoij_unsym,(natom))
1932        call pawrhoij_inquire_dim(cplex_rhoij=cplex_rhoij,nspden_rhoij=nspden_rhoij,&
1933 &                nspden=dtset%nspden,spnorb=dtset%pawspnorb,cpxocc=dtset%pawcpxocc)
1934        call pawrhoij_alloc(pawrhoij_unsym,cplex_rhoij,nspden_rhoij,dtset%nspinor,&
1935 &       dtset%nsppol,dtset%typat,pawtab=pawtab,use_rhoijp=0)
1936      else
1937        pawrhoij_unsym => pawrhoij
1938      end if
1939      if (usecprj_local==1) then
1940        call pawmkrhoij(atindx,atindx1,cprj,gs_hamk%dimcprj,dtset%istwfk,dtset%kptopt,dtset%mband,mband_cprj,&
1941 &       mcprj_local,dtset%mkmem,mpi_enreg,natom,dtset%nband,dtset%nkpt,dtset%nspden,dtset%nspinor,&
1942 &       dtset%nsppol,occ,dtset%paral_kgb,paw_dmft,pawrhoij_unsym,dtfil%unpaw,dtset%usewvl,dtset%wtk)
1943      else
1944        mcprj_tmp=my_nspinor*mband_cprj*dtset%mkmem*dtset%nsppol
1945        ABI_MALLOC(cprj_tmp,(natom,mcprj_tmp))
1946        call pawcprj_alloc(cprj_tmp,0,gs_hamk%dimcprj)
1947        call ctocprj(atindx,cg,1,cprj_tmp,gmet,gprimd,0,0,0,dtset%istwfk,kg,dtset%kptns,&
1948 &       mcg,mcprj_tmp,dtset%mgfft,dtset%mkmem,mpi_enreg,psps%mpsang,dtset%mpw,&
1949 &       dtset%natom,nattyp,dtset%nband,dtset%natom,dtset%ngfft,dtset%nkpt,dtset%nloalg,&
1950 &       npwarr,dtset%nspinor,dtset%nsppol,dtset%nsppol,ntypat,dtset%paral_kgb,ph1d,psps,rmet,dtset%typat,&
1951 &       ucvol,dtfil%unpaw,xred,ylm,ylmgr_dum)
1952        call pawmkrhoij(atindx,atindx1,cprj_tmp,gs_hamk%dimcprj,dtset%istwfk,dtset%kptopt,&
1953 &       dtset%mband,mband_cprj,mcprj_tmp,dtset%mkmem,mpi_enreg,natom,dtset%nband,dtset%nkpt,&
1954 &       dtset%nspden,dtset%nspinor,dtset%nsppol,occ,dtset%paral_kgb,paw_dmft,pawrhoij_unsym,&
1955 &       dtfil%unpaw,dtset%usewvl,dtset%wtk)
1956        call pawcprj_free(cprj_tmp)
1957        ABI_FREE(cprj_tmp)
1958      end if
1959      call timab(555,2,tsec)
1960 !    Build symetrized packed rhoij and compensated pseudo density
1961      cplex=1;ipert=0;idir=0;qpt(:)=zero
1962      if(dtset%usewvl==0) then
1963        call pawmkrho(1,compch_fft,cplex,gprimd,idir,indsym,ipert,mpi_enreg,&
1964 &       my_natom,natom,dtset%nspden,dtset%nsym,ntypat,dtset%paral_kgb,pawang,pawfgr,pawfgrtab,&
1965 &       dtset%pawprtvol,pawrhoij,pawrhoij_unsym,pawtab,qpt,rhowfg,rhowfr,rhor,rprimd,dtset%symafm,&
1966 &       symrec,dtset%typat,ucvol,dtset%usewvl,xred,pawnhat=nhat,rhog=rhog)
1967        if (dtset%usekden==1) then
1968 !        DO WE NEED TAUG?
1969          call transgrid(1,mpi_enreg,dtset%nspden,+1,1,1,dtset%paral_kgb,pawfgr,tauwfg,taug,tauwfr,taur)
1970        end if
1971      else
1972 !      here do not pass rhog, we do not use it
1973        call pawmkrho(1,compch_fft,cplex,gprimd,idir,indsym,ipert,mpi_enreg,&
1974 &       my_natom,natom,dtset%nspden,dtset%nsym,ntypat,dtset%paral_kgb,pawang,pawfgr,pawfgrtab,&
1975 &       dtset%pawprtvol,pawrhoij,pawrhoij_unsym,pawtab,qpt,rhowfg,rhowfr,rhor,rprimd,dtset%symafm,&
1976 &       symrec,dtset%typat,ucvol,dtset%usewvl,xred,pawnhat=nhat)
1977 !      In WVL: copy density to BigDFT object:
1978        call wvl_rho_abi2big(1,rhor,wvl%den)
1979      end if
1980      if (paral_atom) then
1981        call pawrhoij_free(pawrhoij_unsym)
1982        ABI_FREE(pawrhoij_unsym)
1983      end if
1984    end if ! psps%usepaw==1
1985 
1986    if(paw_dmft%use_dmft==1) then
1987 !    == check noccmmp
1988      call setnoccmmp(1,0,rdum4,0,0,idum3,my_natom,natom,0,1,dtset%nsppol,0,ntypat,&
1989 &     paw_ij,pawang,dtset%pawprtvol,pawrhoij,pawtab,rdum2,idum1,dtset%typat,0,dtset%usepawu,&
1990 &     comm_atom=mpi_enreg%comm_atom,mpi_atmtab=mpi_enreg%my_atmtab)
1991    end if
1992 
1993 !  Find and print minimum and maximum total electron density and locations
1994 !  Compute density residual (if required) and its squared norm
1995    if (iscf>=0) then
1996      if (psps%usepaw==0) then
1997        call prtrhomxmn(std_out,mpi_enreg,dtset%nfft,dtset%ngfft,dtset%nspden,1,rhor,ucvol=ucvol)
1998      else
1999        call prtrhomxmn(std_out,mpi_enreg,nfftf,pawfgr%ngfft,dtset%nspden,1,rhor,ucvol=ucvol)
2000      end if
2001      if (optres==1) then
2002        nvresid=rhor-nvresid
2003        call sqnorm_v(1,nfftf,nres2,dtset%nspden,optres,nvresid,mpi_comm_sphgrid=mpi_comm_sphgrid)
2004        if (dtset%usekden==1) then
2005          if (optres==1) tauresid=taur-tauresid
2006        endif
2007      end if
2008    end if
2009 
2010  end if ! iscf>0 or iscf=-3
2011  ABI_NVTX_END_RANGE()
2012 
2013  if(psps%usepaw==1.and.(iscf>=0.or.iscf==-3))  then
2014    ABI_FREE(rhowfr)
2015    ABI_FREE(rhowfg)
2016    if (dtset%usekden==1) then
2017      ABI_FREE(tauwfr)
2018      ABI_FREE(tauwfg)
2019    end if
2020  end if
2021 
2022  call timab(994,2,tsec)
2023 
2024  if (iscf==-1) then
2025 !  Eventually compute the excited states within tddft
2026    call timab(995,1,tsec)
2027    if (psps%usepaw==1) then
2028 !    In case of PAW calculation, have to transfer kxc from the fine to the coarse grid:
2029      ABI_MALLOC(cgrkxc,(dtset%nfft,nkxc))
2030      do ikxc=1,nkxc
2031        call transgrid(1,mpi_enreg,1,-1,0,0,dtset%paral_kgb,pawfgr,rhodum,rhodum,cgrkxc(:,ikxc),kxc(:,ikxc))
2032      end do
2033      call tddft(cg,dtfil,dtset,eigen,etotal,gmet,gprimd,gsqcut,&
2034 &     kg,cgrkxc,dtset%mband,mgfftdiel,dtset%mkmem,mpi_enreg,dtset%mpw,dtset%nfft,&
2035 &     ngfftdiel,dtset%nkpt,nkxc,npwarr,dtset%nspinor,dtset%nsppol,occ,ucvol,wffnew)
2036      ABI_FREE(cgrkxc)
2037    else
2038      call tddft(cg,dtfil,dtset,eigen,etotal,gmet,gprimd,gsqcut,&
2039 &     kg,kxc,dtset%mband,mgfftdiel,dtset%mkmem,mpi_enreg,dtset%mpw,dtset%nfft,&
2040 &     ngfftdiel,dtset%nkpt,nkxc,npwarr,dtset%nspinor,dtset%nsppol,occ,ucvol,wffnew)
2041    end if
2042    call timab(995,2,tsec)
2043 
2044  else
2045 !  Eventually compute the susceptibility matrix and the
2046 !  dielectric matrix when istep_mix is equal to 1 or dielstrt
2047    call timab(996,1,tsec)
2048    computesusmat = dtset%testsusmat(dielop, dielstrt, istep_mix) !test if the matrix is to be computed
2049    if(computesusmat) then
2050      dielar(1)=dtset%diecut;dielar(2)=dtset%dielng
2051      dielar(3)=dtset%diemac;dielar(4)=dtset%diemix
2052      dielar(5)=dtset%diegap;dielar(6)=dtset%dielam
2053      dielar(7)=dtset%diemix;if (iscf>=10) dielar(7)=dtset%diemixmag
2054      usetimerev=1
2055      if (psps%usepaw==1.and.dtset%pawspnorb>0.and.dtset%kptopt/=1.and.dtset%kptopt/=2) usetimerev=0
2056      neglect_pawhat=1-dtset%pawsushat
2057      call suscep_stat(atindx,atindx1,cg,cprj,dielar,&
2058 &     gs_hamk%dimcprj,doccde,eigen,gbound_diel,gprimd,&
2059 &     irrzondiel,dtset%istwfk,kg,kg_diel,lmax_diel,&
2060 &     dtset%mband,mcg,mcprj_local,mgfftdiel,dtset%mkmem,mpi_enreg,dtset%mpw,natom,dtset%nband,&
2061 &     neglect_pawhat,nfftdiel,ngfftdiel,&
2062 &     dtset%nkpt,npwarr,npwdiel,dtset%nspden,dtset%nspinor,dtset%nsppol,dtset%nsym,ntypat,&
2063 &     occ,dtset%occopt,pawang,pawtab,phnonsdiel,ph1ddiel,rprimd,&
2064 &     susmat,dtset%symafm,dtset%symrel,dtset%tnons,dtset%typat,ucvol,&
2065 &     dtfil%unpaw,usecprj_local,psps%usepaw,usetimerev,dtset%wtk,ylmdiel)
2066    end if
2067    call timab(996,2,tsec)
2068 
2069  end if ! end condition on iscf
2070 
2071  call gs_hamk%free()
2072 
2073  if (psps%usepaw==1) then
2074    if (usecprj==0) then
2075      call pawcprj_free(cprj_local)
2076      ABI_FREE(cprj_local)
2077    end if
2078  end if
2079 
2080  if(dtset%usewvl==0) then
2081    ABI_FREE(EigMin)
2082    ABI_FREE(doccde)
2083 #if defined HAVE_GPU_CUDA
2084    if(dtset%gpu_option==ABI_GPU_LEGACY .or. dtset%gpu_option==ABI_GPU_KOKKOS) call gpu_finalize_ham_data()
2085 #endif
2086  end if
2087 
2088  call timab(980,2,tsec)
2089 
2090  DBG_EXIT("COLL")
2091 
2092  contains

ABINIT/wvl_comm_eigen [ Functions ]

[ Top ] [ Functions ]

NAME

  wvl_comm_eigen

FUNCTION

  Computes occupations for the wavelet case
  Using BigDFT routines

INPUTS

OUTPUT

SIDE EFFECTS

NOTES

 for the wvlbigdft case, see the routine 'wvl_occ_bigdft'

SOURCE

2435 subroutine wvl_comm_eigen()
2436 
2437 !Arguments ------------------------------------
2438 
2439 !Local variables-------------------------------
2440 #if defined HAVE_BIGDFT
2441  integer:: ikpt,norb,shift
2442 #endif
2443 
2444 ! *************************************************************************
2445 
2446    DBG_ENTER("COLL")
2447 
2448 #if defined HAVE_BIGDFT
2449    if(wvlbigdft) then
2450 !  Communicates eigenvalues to all procs.
2451 !  This will print out the eigenvalues and Fermi level.
2452      call eigensystem_info(mpi_enreg%me_wvl, mpi_enreg%nproc_wvl,0.d0,&
2453 &     wvl%wfs%ks%Lzd%Glr%wfd%nvctr_c+7*wvl%wfs%ks%Lzd%Glr%wfd%nvctr_f,&
2454 &     wvl%wfs%ks%orbs,wvl%wfs%ks%psi)
2455    else
2456 !  Send all eigenvalues to all procs.
2457 !  I simply communicate eigenvalues: I do not print them into screen, nor calculate Fermi-level.
2458      norb=wvl%wfs%ks%orbs%norb
2459      if (mpi_enreg%nproc_wvl > 1) then
2460        shift=1
2461        do ikpt = 1, wvl%wfs%ks%orbs%nkpts
2462          call xmpi_bcast(wvl%wfs%ks%orbs%eval(shift:shift+norb-1),wvl%wfs%ks%orbs%ikptproc(ikpt),mpi_enreg%comm_wvl,ierr)
2463          shift=shift+norb
2464        end do
2465      end if
2466    end if
2467 
2468 !Copy eigenvalues from BigDFT object to "eigen"
2469    call wvl_eigen_abi2big(dtset%mband,dtset%nkpt,dtset%nsppol,eigen,2,wvl%wfs)
2470 
2471 #else
2472    BIGDFT_NOTENABLED_ERROR()
2473 #endif
2474 
2475    DBG_EXIT("COLL")
2476 
2477  end subroutine wvl_comm_eigen
2478 
2479 end subroutine vtorho

ABINIT/wvl_nscf_loop [ Functions ]

[ Top ] [ Functions ]

NAME

  wvl_nscf_loop

FUNCTION

  Non-self-consistent field cycle in Wavelets
  See also "wvl_nscf_loop_bigdft"

INPUTS

  nnsclo= number of non-self consistent field iterations

OUTPUT

SOURCE

2110 subroutine wvl_nscf_loop()
2111 
2112 !Arguments ------------------------------------
2113 ! integer, intent(in)                    :: istep,mcprj,nfft,nnsclo
2114 ! real(dp), intent(inout)                :: residm
2115 ! type(dataset_type), intent(in)         :: dtset
2116 ! type(MPI_type), intent(in)             :: mpi_enreg
2117 ! type(energies_type), intent(inout)     :: energies
2118 ! type(wvl_data), intent(inout)          :: wvl
2119 ! !arrays
2120 ! real(dp), intent(inout)                :: xcart(3, dtset%natom)
2121 ! real(dp), dimension(6), intent(out)    :: strsxc
2122 ! type(pawcprj_type),dimension(dtset%natom,mcprj),intent(out)::cprj
2123 
2124 !Local variables-------------------------------
2125  integer :: inonsc,ii
2126  integer,parameter :: iscf_=-1       !do not do a SCF cycle
2127  logical,parameter :: do_scf=.false. !do not do a SCF cycle
2128  logical,parameter :: wvlbigdft=.false.
2129  real(dp) :: dum,eexctx,eh,ekin,eloc,enl,esicdc,evxc,exc
2130 
2131 ! *************************************************************************
2132 
2133    DBG_ENTER("COLL")
2134 
2135    if(nnsclo_now>0) then
2136      do inonsc=1,nnsclo_now
2137        call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc,&
2138 &       istep,inonsc,iscf_,mpi_enreg%me_wvl,dtset%natom,&
2139 &       nfftf,mpi_enreg%nproc_wvl,dtset%nspden,&
2140 &       dum,do_scf,evxc,wvl,wvlbigdft,xcart,strsxc)
2141        call wvl_hpsitopsi(cprj,dtset,energies,inonsc,mcprj_local,mpi_enreg, &
2142 &       residm,wvl,xcart)
2143        if(residm<dtset%tolwfr) exit !Exit loop if converged
2144      end do
2145 
2146    else
2147      do ii=1, dtset%nline
2148 !      Direct minimization technique: no diagonalization
2149        call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc,&
2150 &       istep,ii,iscf_,mpi_enreg%me_wvl,dtset%natom,&
2151 &       nfftf,mpi_enreg%nproc_wvl,dtset%nspden,&
2152 &       dum,do_scf,evxc,wvl,wvlbigdft,xcart,strsxc)
2153        call wvl_hpsitopsi(cprj,dtset,energies,ii,mcprj_local,mpi_enreg, &
2154 &       residm,wvl,xcart)
2155        if(residm<dtset%tolwfr) exit !Exit loop if converged
2156      end do
2157    end if
2158 
2159 !  Update energies depending on new WF
2160    energies%e_kinetic=ekin
2161    energies%e_nlpsp_vfock=enl
2162    energies%e_exactX=eexctx
2163    energies%e_sicdc=esicdc
2164 
2165 !  Eventually update energies depending on density
2166    if (dtset%iscf<10) then
2167      energies%e_localpsp=eloc
2168      energies%e_hartree=eh
2169      energies%e_xc=exc ; energies%e_xcdc=evxc
2170    else if (nnsclo_now==0) then
2171      energies%e_localpsp=eloc
2172    end if
2173 
2174 !  End of nscf iterations
2175    if (do_last_ortho) then
2176 !    !Don't update energies (nscf cycle has been done); just recompute potential
2177      inonsc=nnsclo_now;if (nnsclo_now==0) inonsc=dtset%nline
2178      call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc,&
2179 &     istep,inonsc,iscf_,mpi_enreg%me_wvl,dtset%natom,&
2180 &     nfftf,mpi_enreg%nproc_wvl,dtset%nspden,&
2181 &     dum,do_scf,evxc,wvl,wvlbigdft,xcart,strsxc)
2182    end if
2183 
2184    DBG_EXIT("COLL")
2185 
2186  end subroutine wvl_nscf_loop

ABINIT/wvl_nscf_loop_bigdft [ Functions ]

[ Top ] [ Functions ]

NAME

  wvl_nscf_loop_bigdft

FUNCTION

  Non-self-consistent field cycle in Wavelets
  It follows the BigDFT scheme.
  See also "wvl_nscf_loop"

INPUTS

  nnsclo= number of non-self consistent field iterations

OUTPUT

  argout(sizeout)=description

SOURCE

2206 subroutine wvl_nscf_loop_bigdft()
2207 
2208 !Arguments ------------------------------------
2209 ! integer, intent(in)                    :: istep,mcprj,nfft,nnsclo
2210 ! real(dp), intent(inout)                :: residm
2211 ! real(dp), intent(out)                  :: nres2
2212 ! type(dataset_type), intent(in)         :: dtset
2213 ! type(MPI_type), intent(in)             :: mpi_enreg
2214 ! type(energies_type), intent(inout)     :: energies
2215 ! type(wvl_data), intent(inout)          :: wvl
2216  !arrays
2217 ! real(dp), intent(inout)                :: xcart(3, dtset%natom)
2218 ! real(dp), dimension(6), intent(out)    :: strsxc
2219 ! type(pawcprj_type),dimension(dtset%natom,mcprj),intent(out)::cprj
2220 
2221 !Local variables-------------------------------
2222  integer :: inonsc
2223  integer,parameter :: iscf_=-1       !do not do a SCF cycle
2224  logical,parameter :: do_scf=.false. !do not do a SCF cycle
2225  logical,parameter :: wvlbigdft=.true.
2226  real(dp) :: eexctx,eh,ekin,eloc,enl,esicdc,evxc,exc
2227 
2228 ! *************************************************************************
2229 
2230    DBG_ENTER("COLL")
2231 
2232    call wvl_hpsitopsi(cprj,dtset, energies, istep, mcprj_local,mpi_enreg, &
2233 &   residm, wvl,xcart)
2234 
2235    if (nnsclo_now>2) then
2236      do inonsc = 2, nnsclo_now-1
2237        call wvl_psitohpsi(dtset%diemix, energies%e_exactX, energies%e_xc, &
2238 &       energies%e_hartree, energies%e_kinetic, energies%e_localpsp, &
2239 &       energies%e_nlpsp_vfock, energies%e_sicdc, istep, inonsc, iscf_, &
2240 &       mpi_enreg%me_wvl, dtset%natom, nfftf, mpi_enreg%nproc_wvl,&
2241 &       dtset%nspden, nres2, do_scf,energies%e_xcdc, &
2242 &       wvl, wvlbigdft, xcart, strsxc)
2243        call wvl_hpsitopsi(cprj,dtset, energies, inonsc, mcprj_local,mpi_enreg, &
2244 &       residm, wvl,xcart)
2245      end do
2246    end if
2247 
2248 !  End of nscf iterations
2249    if (do_last_ortho.and.nnsclo_now<=1) then
2250 !    !Don't update energies (nscf cycle has been done); just recompute potential
2251      call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc, &
2252 &     istep, 1, iscf_, mpi_enreg%me_wvl, dtset%natom, nfftf, &
2253 &     mpi_enreg%nproc_wvl,dtset%nspden, nres2, do_scf,evxc, &
2254 &     wvl, wvlbigdft, xcart, strsxc)
2255    else if (do_last_ortho.and.nnsclo_now>1) then
2256 !    !Update energies and potential (nscf cycles are not finished)
2257      call wvl_psitohpsi(dtset%diemix, energies%e_exactX, energies%e_xc, &
2258 &     energies%e_hartree,energies%e_kinetic, energies%e_localpsp, &
2259 &     energies%e_nlpsp_vfock, energies%e_sicdc, istep, nnsclo_now, iscf_, &
2260 &     mpi_enreg%me_wvl, dtset%natom, nfftf, mpi_enreg%nproc_wvl,&
2261 &     dtset%nspden, nres2, do_scf,energies%e_xcdc, &
2262 &     wvl, wvlbigdft, xcart, strsxc)
2263    end if
2264 
2265    DBG_EXIT("COLL")
2266 
2267  end subroutine wvl_nscf_loop_bigdft

ABINIT/wvl_occ [ Functions ]

[ Top ] [ Functions ]

NAME

  wvl_occ

FUNCTION

  Computes occupations for the wavelet case

INPUTS

OUTPUT

NOTES

 for the wvlbigdft case, see the routine 'wvl_occ_bigdft'

SOURCE

2343 subroutine wvl_occ()
2344 
2345 !Local variables-------------------------------
2346  real(dp):: doccde_(dtset%mband*dtset%nkpt*dtset%nsppol)
2347 ! *************************************************************************
2348 
2349    DBG_ENTER("COLL")
2350 
2351 !  Compute the new occupation numbers from eigen
2352    call newocc(doccde_,eigen,energies%entropy,energies%e_fermie,energies%e_fermih,dtset%ivalence,dtset%spinmagntarget,&
2353 &   dtset%mband,dtset%nband,dtset%nelect,dtset%ne_qFD,dtset%nh_qFD,dtset%nkpt,dtset%nspinor,&
2354 &   dtset%nsppol,occ,dtset%occopt,prtvol,dtset%tphysel,dtset%tsmear,dtset%wtk,&
2355 &   prtstm=dtset%prtstm,stmbias=dtset%stmbias)
2356 
2357 ! Copy occupations and efermi to BigDFT variables
2358    call wvl_occ_abi2big(dtset%mband,dtset%nkpt,dtset%nsppol,occ,1,wvl%wfs)
2359 
2360 #if defined HAVE_BIGDFT
2361 !  Copy Fermi level to BigDFT variable:
2362    wvl%wfs%ks%orbs%efermi=energies%e_fermie
2363 #endif
2364 
2365    DBG_EXIT("COLL")
2366 
2367  end subroutine wvl_occ

ABINIT/wvl_occ_bigdft [ Functions ]

[ Top ] [ Functions ]

NAME

  wvl_occ_bigdft

FUNCTION

  Computes occupations for the wavelet case
  Using BigDFT routines

INPUTS

OUTPUT

SIDE EFFECTS

NOTES

 for the wvlbigdft case, see the routine 'wvl_occ_bigdft'

SOURCE

2389 subroutine wvl_occ_bigdft()
2390 
2391 ! *************************************************************************
2392 
2393    DBG_ENTER("COLL")
2394 
2395 ! Transfer occopt from ABINIT to BigDFT
2396 #if defined HAVE_BIGDFT
2397    occopt_bigdft=dtset%occopt
2398    call wvl_occopt_abi2big(occopt_bigdft,occopt_bigdft,1)
2399 
2400 !Calculate occupations using BigDFT routine
2401    call evaltoocc(mpi_enreg%me_wvl, mpi_enreg%nproc_wvl, .false., &
2402 &   dtset%tsmear, wvl%wfs%ks%orbs,  occopt_bigdft)
2403 
2404 !Pass e_fermi from BigDFT object to ABINIT variable:
2405    energies%e_fermie = wvl%wfs%ks%orbs%efermi
2406 
2407 !Copy occupations from BigDFT to ABINIT variables
2408    call wvl_occ_abi2big(dtset%mband,dtset%nkpt,dtset%nsppol,occ,2,wvl%wfs)
2409 #endif
2410 
2411    DBG_EXIT("COLL")
2412 
2413  end subroutine wvl_occ_bigdft