TABLE OF CONTENTS


ABINIT/vtowfk [ Functions ]

[ Top ] [ Functions ]

NAME

 vtowfk

FUNCTION

 This routine compute the partial density at a given k-point,
 for a given spin-polarization, from a fixed Hamiltonian
 but might also simply compute eigenvectors and eigenvalues at this k point

COPYRIGHT

 Copyright (C) 1998-2018 ABINIT group (DCA, XG, GMR, MT)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt.

INPUTS

  cgq = array that holds the WF of the nearest neighbours of
        the current k-point (electric field, MPI //)
  cpus= cpu time limit in seconds
  dtefield <type(efield_type)> = variables related to Berry phase
      calculations (see initberry.f)
  dtfil <type(datafiles_type)>=variables related to files
  dtset <type(dataset_type)>=all input variables for this dataset
  fixed_occ=true if electronic occupations are fixed (occopt<3)
  gs_hamk <type(gs_hamiltonian_type)>=all data for the Hamiltonian at k
  icg=shift to be applied on the location of data in the array cprj
  icg=shift to be applied on the location of data in the array cg
  ikpt=number of the k-point
  iscf=(<= 0  =>non-SCF), >0 => SCF
  isppol isppol=1 for unpolarized, 2 for spin-polarized
  kg_k(3,npw_k)=reduced planewave coordinates.
  kinpw(npw_k)=(modified) kinetic energy for each plane wave (Hartree)
  mcg=second dimension of the cg array
  mcgq=second dimension of the cgq array (electric field, MPI //)
  mcprj=size of projected wave-functions array (cprj) =nspinor*mband*mkmem*nsppol
  mkgq = second dimension of pwnsfacq
  mpi_enreg=informations about MPI parallelization
  mpw=maximum dimensioned size of npw
  natom=number of atoms in cell.
  nband_k=number of bands at this k point for that spin polarization
  nkpt=number of k points.
  nnsclo_now=number of non-self-consistent loops for the current vtrial
             (often 1 for SCF calculation, =nstep for non-SCF calculations)
  npw_k=number of plane waves at this k point
  npwarr(nkpt)=number of planewaves in basis at this k point
  occ_k(nband_k)=occupation number for each band (usually 2) for each k.
  optforces=option for the computation of forces
  prtvol=control print volume and debugging output
  pwind(pwind_alloc,2,3)= array used to compute
           the overlap matrix smat between k-points (see initberry.f)
  pwind_alloc= first dimension of pwind
  pwnsfac(2,pwind_alloc)= phase factors for non-symmorphic translations
                          (see initberry.f)
  pwnsfacq(2,mkgq)= phase factors for the nearest neighbours of the
                    current k-point (electric field, MPI //)
  usebanfft=flag for band-fft parallelism
  paw_dmft  <type(paw_dmft_type)>= paw+dmft related data
  wtk=weight assigned to the k point.
  zshift(nband_k)=energy shifts for the squared shifted hamiltonian algorithm

OUTPUT

  dphase_k(3)=change in Zak phase for the current k-point
  eig_k(nband_k)=array for holding eigenvalues (hartree)
  ek_k(nband_k)=contribution from each band to kinetic energy, at this k-point
  ek_k_nd(2,nband_k,nband_k*use_dmft)=contribution to kinetic energy, including non-diagonal terms, at this k-point (usefull if use_dmft)
  resid_k(nband_k)=residuals for each band over all k points,
                   BEFORE the band rotation.
  ==== if optforces>0 ====
    grnl_k(3*natom,nband_k)=nonlocal gradients, at this k-point
  ==== if (gs_hamk%usepaw==0) ====
    enl_k(nband_k)=contribution from each band to nonlocal pseudopotential part of total energy, at this k-point
  ==== if (gs_hamk%usepaw==1) ====
    cprj(natom,mcprj*usecprj)= wave functions projected with non-local projectors:
                               cprj(n,k,i)=<p_i|Cnk> where p_i is a non-local projector.

SIDE EFFECTS

  cg(2,mcg)=updated wavefunctions
  rhoaug(n4,n5,n6,nvloc)= density in electrons/bohr**3, on the augmented fft grid.
                    (cumulative, so input as well as output). Update only
                    for occopt<3 (fixed occupation numbers)

PARENTS

      vtorho

CHILDREN

      build_h,cgwf,chebfi,dsymm,fourwf,fxphas,lobpcgwf,lobpcgwf2,meanvalue_g
      nonlop,pawcprj_alloc,pawcprj_copy,pawcprj_free,pawcprj_put,prep_fourwf
      prep_nonlop,pw_orthon,subdiago,timab,wrtout,xmpi_sum,zhemm

NOTES

  The cprj are distributed over band and spinors processors.
  One processor doesn't know all the cprj.
  Only the mod((iband-1)/mpi_enreg%bandpp,mpi_enreg%nproc_band) projectors
  are stored on each proc.

SOURCE

100 #if defined HAVE_CONFIG_H
101 #include "config.h"
102 #endif
103 
104 #include "abi_common.h"
105 
106 
107 subroutine vtowfk(cg,cgq,cprj,cpus,dphase_k,dtefield,dtfil,dtset,&
108 & eig_k,ek_k,ek_k_nd,enl_k,fixed_occ,grnl_k,gs_hamk,&
109 & ibg,icg,ikpt,iscf,isppol,kg_k,kinpw,mband_cprj,mcg,mcgq,mcprj,mkgq,mpi_enreg,&
110 & mpw,natom,nband_k,nkpt,nnsclo_now,npw_k,npwarr,occ_k,optforces,prtvol,&
111 & pwind,pwind_alloc,pwnsfac,pwnsfacq,resid_k,rhoaug,paw_dmft,wtk,zshift)
112 
113  use defs_basis
114  use defs_abitypes
115  use m_profiling_abi
116  use m_errors
117  use m_xmpi
118  use m_efield
119  use m_linalg_interfaces
120  use m_cgtools
121 
122  use m_hamiltonian, only : gs_hamiltonian_type
123  use m_paw_dmft,    only : paw_dmft_type
124  use m_pawcprj,     only : pawcprj_type, pawcprj_alloc, pawcprj_free, pawcprj_put,pawcprj_copy
125  use m_paw_dmft,    only : paw_dmft_type
126  use gwls_hamiltonian, only : build_H
127  use m_lobpcgwf,    only : lobpcgwf2
128 
129 !This section has been created automatically by the script Abilint (TD).
130 !Do not modify the following lines by hand.
131 #undef ABI_FUNC
132 #define ABI_FUNC 'vtowfk'
133  use interfaces_14_hidewrite
134  use interfaces_18_timing
135  use interfaces_53_ffts
136  use interfaces_53_spacepar
137  use interfaces_66_nonlocal
138  use interfaces_66_wfs
139  use interfaces_67_common
140  use interfaces_79_seqpar_mpi, except_this_one => vtowfk
141 !End of the abilint section
142 
143  implicit none
144 
145 !Arguments ------------------------------------
146  integer, intent(in) :: ibg,icg,ikpt,iscf,isppol,mband_cprj,mcg,mcgq,mcprj,mkgq,mpw
147  integer, intent(in) :: natom,nband_k,nkpt,nnsclo_now,npw_k,optforces
148  integer, intent(in) :: prtvol,pwind_alloc
149  logical,intent(in) :: fixed_occ
150  real(dp), intent(in) :: cpus,wtk
151  type(datafiles_type), intent(in) :: dtfil
152  type(efield_type), intent(inout) :: dtefield
153  type(dataset_type), intent(in) :: dtset
154  type(gs_hamiltonian_type), intent(inout) :: gs_hamk
155  type(MPI_type), intent(inout) :: mpi_enreg
156  type(paw_dmft_type), intent(in)  :: paw_dmft
157  integer, intent(in) :: kg_k(3,npw_k)
158  integer, intent(in) :: npwarr(nkpt),pwind(pwind_alloc,2,3)
159  real(dp), intent(in) :: cgq(2,mcgq),kinpw(npw_k),occ_k(nband_k)
160  real(dp), intent(in) :: pwnsfac(2,pwind_alloc),pwnsfacq(2,mkgq)
161  real(dp), intent(in) :: zshift(nband_k)
162  real(dp), intent(out) :: eig_k(nband_k),ek_k(nband_k),dphase_k(3),ek_k_nd(2,nband_k,nband_k*paw_dmft%use_dmft)
163  real(dp), intent(out) :: enl_k(nband_k*(1-gs_hamk%usepaw))
164  real(dp), intent(out) :: grnl_k(3*natom,nband_k*optforces)
165  real(dp), intent(out) :: resid_k(nband_k)
166  real(dp), intent(inout) :: cg(2,mcg),rhoaug(gs_hamk%n4,gs_hamk%n5,gs_hamk%n6,gs_hamk%nvloc)
167  type(pawcprj_type),intent(inout) :: cprj(natom,mcprj*gs_hamk%usecprj)
168 
169 !Local variables-------------------------------
170  logical :: newlobpcg
171  integer,parameter :: level=112,tim_fourwf=2,tim_nonlop_prep=11
172  integer,save :: nskip=0
173 !     Flag use_subovl: 1 if "subovl" array is computed (see below)
174 !     subovl should be Identity (in that case we should use use_subovl=0)
175 !     But this is true only if conjugate gradient algo. converges
176  integer :: use_subovl=0
177  integer :: bandpp_cprj,blocksize,choice,cpopt,iband,iband1
178  integer :: iblock,iblocksize,ibs,idir,ierr,igs,igsc,ii,pidx,inonsc
179  integer :: iorder_cprj,ipw,ispinor,ispinor_index,istwf_k,iwavef,jj,mgsc,my_nspinor,n1,n2,n3 !kk
180  integer :: nband_k_cprj,nblockbd,ncpgr,ndat,nkpt_max,nnlout,ortalgo
181  integer :: paw_opt,quit,signs,spaceComm,tim_nonlop,wfoptalg,wfopta10
182  logical :: nspinor1TreatedByThisProc,nspinor2TreatedByThisProc
183  real(dp) :: ar,ar_im,eshift,occblock
184  real(dp) :: res,residk,weight
185  character(len=500) :: message
186  real(dp) :: dummy(2,1),nonlop_dum(1,1),tsec(2)
187  real(dp),allocatable :: cwavef(:,:),cwavef1(:,:),cwavef_x(:,:),cwavef_y(:,:),cwavefb(:,:,:)
188  real(dp),allocatable :: eig_save(:),enlout(:),evec(:,:),evec_loc(:,:),gsc(:,:)
189  real(dp),allocatable :: mat_loc(:,:),mat1(:,:,:),matvnl(:,:,:)
190  real(dp),allocatable :: subham(:),subovl(:),subvnl(:),totvnl(:,:),wfraug(:,:,:,:)
191  type(pawcprj_type),allocatable :: cwaveprj(:,:)
192 
193 
194 
195 ! **********************************************************************
196 
197  DBG_ENTER("COLL")
198 
199  call timab(28,1,tsec) ! Keep track of total time spent in "vtowfk"
200 
201 !Structured debugging if prtvol==-level
202  if(prtvol==-level)then
203    write(message,'(80a,a,a)') ('=',ii=1,80),ch10,'vtowfk : enter'
204    call wrtout(std_out,message,'PERS')
205  end if
206 
207 
208 !=========================================================================
209 !============= INITIALIZATIONS AND ALLOCATIONS ===========================
210 !=========================================================================
211 
212  nkpt_max=50; if(xmpi_paral==1)nkpt_max=-1
213 
214  wfoptalg=mod(dtset%wfoptalg,100); wfopta10=mod(wfoptalg,10)
215  newlobpcg = (dtset%wfoptalg == 114 .and. dtset%use_gpu_cuda == 0)
216  istwf_k=gs_hamk%istwf_k
217  quit=0
218 
219 !Parallelization over spinors management
220  my_nspinor=max(1,dtset%nspinor/mpi_enreg%nproc_spinor)
221  if (mpi_enreg%paral_spinor==0) then
222    ispinor_index=1
223    nspinor1TreatedByThisProc=.true.
224    nspinor2TreatedByThisProc=(dtset%nspinor==2)
225  else
226    ispinor_index=mpi_enreg%me_spinor+1
227    nspinor1TreatedByThisProc=(mpi_enreg%me_spinor==0)
228    nspinor2TreatedByThisProc=(mpi_enreg%me_spinor==1)
229  end if
230 
231 !Parallelism over FFT and/or bands: define sizes and tabs
232  !if (mpi_enreg%paral_kgb==1) then
233  nblockbd=nband_k/(mpi_enreg%nproc_band*mpi_enreg%bandpp)
234  !else
235  !  nblockbd=nband_k/mpi_enreg%nproc_fft
236  !  if (nband_k/=nblockbd*mpi_enreg%nproc_fft) nblockbd=nblockbd+1
237  !end if
238  blocksize=nband_k/nblockbd
239 
240 !Save eshift
241  if(wfoptalg==3)then
242    eshift=zshift(1)
243    ABI_ALLOCATE(eig_save,(nband_k))
244    eig_save(:)=eshift
245  end if
246 
247  n1=gs_hamk%ngfft(1); n2=gs_hamk%ngfft(2); n3=gs_hamk%ngfft(3)
248 
249  if ( .not. newlobpcg ) then
250    igsc=0
251    mgsc=nband_k*npw_k*my_nspinor*gs_hamk%usepaw
252 
253    ABI_STAT_ALLOCATE(gsc,(2,mgsc), ierr)
254    ABI_CHECK(ierr==0, "out of memory in gsc")
255    gsc=zero
256  end if
257 
258  if(wfopta10 /= 1 .and. .not. newlobpcg ) then !chebfi already does this stuff inside
259    ABI_ALLOCATE(evec,(2*nband_k,nband_k))
260    ABI_ALLOCATE(subham,(nband_k*(nband_k+1)))
261 
262    ABI_ALLOCATE(subvnl,(0))
263    ABI_ALLOCATE(totvnl,(0,0))
264    if (gs_hamk%usepaw==0) then
265      if (wfopta10==4) then
266        ABI_DEALLOCATE(totvnl)
267        if (istwf_k==1) then
268          ABI_ALLOCATE(totvnl,(2*nband_k,nband_k))
269        else if (istwf_k==2) then
270          ABI_ALLOCATE(totvnl,(nband_k,nband_k))
271        end if
272      else
273        ABI_DEALLOCATE(subvnl)
274        ABI_ALLOCATE(subvnl,(nband_k*(nband_k+1)))
275      end if
276    end if
277 
278    if (use_subovl==1) then
279      ABI_ALLOCATE(subovl,(nband_k*(nband_k+1)))
280    else
281      ABI_ALLOCATE(subovl,(0))
282    end if
283  end if
284 
285 !Carry out UP TO dtset%nline steps, or until resid for every band is < dtset%tolwfr
286 
287  if(prtvol>2 .or. ikpt<=nkpt_max)then
288    write(message,'(a,i5,2x,a,3f9.5,2x,a)')' non-scf iterations; kpt # ',ikpt,', k= (',gs_hamk%kpt_k,'), band residuals:'
289    call wrtout(std_out,message,'PERS')
290  end if
291 
292 !Electric field: initialize dphase_k
293  dphase_k(:) = zero
294 
295 !=========================================================================
296 !==================== NON-SELF-CONSISTENT LOOP ===========================
297 !=========================================================================
298 
299 !nnsclo_now=number of non-self-consistent loops for the current vtrial
300 !(often 1 for SCF calculation, =nstep for non-SCF calculations)
301  call timab(39,1,tsec) ! "vtowfk (loop)"
302 
303  do inonsc=1,nnsclo_now
304 
305 !  This initialisation is needed for the MPI-parallelisation (gathering using sum)
306    if(wfopta10 /= 1 .and. .not. newlobpcg) then
307      subham(:)=zero
308      if (gs_hamk%usepaw==0) then
309        if (wfopta10==4) then
310          totvnl(:,:)=zero
311        else
312          subvnl(:)=zero
313        end if
314      end if
315      if (use_subovl==1)subovl(:)=zero
316    end if
317    resid_k(:)=zero
318 
319 !  Filter the WFs when modified kinetic energy is too large (see routine mkkin.f)
320 !  !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(igs,iwavef)
321    do ispinor=1,my_nspinor
322      do iband=1,nband_k
323        igs=(ispinor-1)*npw_k
324        iwavef=(iband-1)*npw_k*my_nspinor+icg
325        do ipw=1+igs,npw_k+igs
326          if(kinpw(ipw-igs)>huge(zero)*1.d-11)then
327            cg(1,ipw+iwavef)=zero
328            cg(2,ipw+iwavef)=zero
329          end if
330        end do
331      end do
332    end do
333 
334    ! JLJ 17/10/2014 : If it is a GWLS calculation, construct the hamiltonian
335    ! as in a usual GS calc., but skip any minimisation procedure.
336    ! This would be equivalent to nstep=0, if the latter did work.
337    if(dtset%optdriver/=RUNL_GWLS) then
338 
339      if(wfopta10==4.or.wfopta10==1) then
340        if (istwf_k/=1.and.istwf_k/=2) then !no way to use lobpcg
341          write(message,'(3a)')&
342 &         'Only istwfk=1 or 2 are allowed with wfoptalg=4/14 !',ch10,&
343 &         'Action: put istwfk to 1 or remove k points with half integer coordinates.'
344          MSG_ERROR(message)
345        end if
346 
347 !    =========================================================================
348 !    ============ MINIMIZATION OF BANDS: LOBPCG ==============================
349 !    =========================================================================
350        if (wfopta10==4) then
351          if ( .not. newlobpcg ) then
352            call lobpcgwf(cg,dtset,gs_hamk,gsc,icg,igsc,kinpw,mcg,mgsc,mpi_enreg,&
353 &           nband_k,nblockbd,npw_k,prtvol,resid_k,subham,totvnl)
354 !          In case of FFT parallelism, exchange subspace arrays
355            spaceComm=mpi_enreg%comm_bandspinorfft
356            call xmpi_sum(subham,spaceComm,ierr)
357            if (gs_hamk%usepaw==0) then
358              if (wfopta10==4) then
359                call xmpi_sum(totvnl,spaceComm,ierr)
360              else
361                call xmpi_sum(subvnl,spaceComm,ierr)
362              end if
363            end if
364            if (use_subovl==1) call xmpi_sum(subovl,spaceComm,ierr)
365          else
366            call lobpcgwf2(cg(:,icg+1:),dtset,eig_k,enl_k,gs_hamk,kinpw,mpi_enreg,&
367 &           nband_k,npw_k,my_nspinor,prtvol,resid_k)
368          end if
369 !        In case of FFT parallelism, exchange subspace arrays
370 
371 !    =========================================================================
372 !    ============ MINIMIZATION OF BANDS: CHEBYSHEV FILTERING =================
373 !    =========================================================================
374        else if (wfopta10 == 1) then
375          call chebfi(cg(:, icg+1:),dtset,eig_k,enl_k,gs_hamk,gsc,kinpw,&
376 &         mpi_enreg,nband_k,npw_k,my_nspinor,prtvol,resid_k)
377        end if
378 
379 !      =========================================================================
380 !      ======== MINIMIZATION OF BANDS: CONJUGATE GRADIENT (Teter et al.) =======
381 !      =========================================================================
382      else
383        call cgwf(dtset%berryopt,cg,cgq,dtset%chkexit,cpus,dphase_k,dtefield,dtfil%filnam_ds(1),&
384 &       gsc,gs_hamk,icg,igsc,ikpt,inonsc,isppol,dtset%mband,mcg,mcgq,mgsc,mkgq,&
385 &       mpi_enreg,mpw,nband_k,dtset%nbdblock,nkpt,dtset%nline,npw_k,npwarr,my_nspinor,&
386 &       dtset%nsppol,dtset%ortalg,prtvol,pwind,pwind_alloc,pwnsfac,pwnsfacq,quit,resid_k,&
387 &       subham,subovl,subvnl,dtset%tolrde,dtset%tolwfr,use_subovl,wfoptalg,zshift)
388      end if
389    end if
390 
391 !  =========================================================================
392 !  ===================== FIND LARGEST RESIDUAL =============================
393 !  =========================================================================
394 
395 !  Find largest resid over bands at this k point
396 !  Note that this operation is done BEFORE rotation of bands :
397 !  it would be time-consuming to recompute the residuals after.
398    residk=maxval(resid_k(1:max(1,nband_k-dtset%nbdbuf)))
399 
400 !  Print residuals
401    if(prtvol>2 .or. ikpt<=nkpt_max)then
402      do ii=0,(nband_k-1)/8
403        write(message,'(a,8es10.2)')' res:',(resid_k(iband),iband=1+ii*8,min(nband_k,8+ii*8))
404        call wrtout(std_out,message,'PERS')
405      end do
406    end if
407 
408 !  =========================================================================
409 !  ========== DIAGONALIZATION OF HAMILTONIAN IN WFs SUBSPACE ===============
410 !  =========================================================================
411 
412    if( .not. wfopta10 == 1 .and. .not. newlobpcg ) then
413      call timab(585,1,tsec) !"vtowfk(subdiago)"
414      call subdiago(cg,eig_k,evec,gsc,icg,igsc,istwf_k,&
415 &     mcg,mgsc,nband_k,npw_k,my_nspinor,dtset%paral_kgb,&
416 &     subham,subovl,use_subovl,gs_hamk%usepaw,mpi_enreg%me_g0)
417      call timab(585,2,tsec)
418    end if
419 
420 !  Print energies
421    if(prtvol>2 .or. ikpt<=nkpt_max)then
422      do ii=0,(nband_k-1)/8
423        write(message, '(a,8es10.2)' )' ene:',(eig_k(iband),iband=1+ii*8,min(nband_k,8+ii*8))
424        call wrtout(std_out,message,'PERS')
425      end do
426    end if
427 
428 !  THIS CHANGE OF SHIFT DOES NOT WORK WELL
429 !  Update zshift in the case of wfoptalg==3
430 !  if(wfoptalg==3 .and. inonsc/=1)then
431 !  do iband=1,nband_k
432 !  if(eig_k(iband)<eshift .and. eig_save(iband)<eshift)then
433 !  zshift(iband)=max(eig_k(iband),eig_save(iband))
434 !  end if
435 !  if(eig_k(iband)>eshift .and. eig_save(iband)>eshift)then
436 !  zshift(iband)=min(eig_k(iband),eig_save(iband))
437 !  end if
438 !  end do
439 !  eig_save(:)=eig_k(:)
440 !  end if
441 
442 !  =========================================================================
443 !  =============== ORTHOGONALIZATION OF WFs (if needed) ====================
444 !  =========================================================================
445 
446 !  Re-orthonormalize the wavefunctions at this k point--
447 !  this is redundant but is performed to combat rounding error in wavefunction orthogonality
448 
449    call timab(583,1,tsec) ! "vtowfk(pw_orthon)"
450    ortalgo=mpi_enreg%paral_kgb
451    if ((wfoptalg/=14 .and. wfoptalg /= 1).or.dtset%ortalg>0) then
452      call pw_orthon(icg,igsc,istwf_k,mcg,mgsc,npw_k*my_nspinor,nband_k,ortalgo,gsc,gs_hamk%usepaw,cg,&
453 &     mpi_enreg%me_g0,mpi_enreg%comm_bandspinorfft)
454    end if
455    call timab(583,2,tsec)
456 
457 !  DEBUG seq==par comment next block
458 !  Fix phases of all bands
459    if ((xmpi_paral/=1).or.(mpi_enreg%paral_kgb/=1)) then
460      if ( .not. newlobpcg ) then
461        call fxphas(cg,gsc,icg,igsc,istwf_k,mcg,mgsc,mpi_enreg,nband_k,npw_k*my_nspinor,gs_hamk%usepaw)
462      else
463        ! GSC is local to vtowfk and is completely useless since everything
464        ! is calcultated in my lobpcg, we don't care about the phase of gsc !
465        call fxphas(cg,gsc,icg,igsc,istwf_k,mcg,mgsc,mpi_enreg,nband_k,npw_k*my_nspinor,0)
466      end if
467    end if
468 
469    if (residk<dtset%tolwfr) exit  !  Exit loop over inonsc if converged
470  end do !  End loop over inonsc (NON SELF-CONSISTENT LOOP)
471 
472  call timab(39,2,tsec)
473  call timab(30,1,tsec) ! "vtowfk  (afterloop)"
474 
475 !###################################################################
476 
477 !Compute kinetic energy and non-local energy for each band, and in the SCF
478 !case, contribution to forces, and eventually accumulate rhoaug
479 
480  ndat=1;if (mpi_enreg%paral_kgb==1) ndat=mpi_enreg%bandpp
481  if(iscf>0 .and. fixed_occ)  then
482    ABI_ALLOCATE(wfraug,(2,gs_hamk%n4,gs_hamk%n5,gs_hamk%n6*ndat))
483  end if
484 
485 !"nonlop" routine input parameters
486  nnlout=3*natom*optforces
487  signs=1;idir=0
488  if (gs_hamk%usepaw==0) then
489    choice=1+optforces
490    paw_opt=0;cpopt=-1;tim_nonlop=2
491  else
492    choice=2*optforces
493    paw_opt=2;cpopt=0;tim_nonlop=10-8*optforces
494    if (dtset%usefock==1) then
495 !     if (dtset%optforces/= 0) then
496      if (optforces/= 0) then
497        choice=2;cpopt=1; nnlout=3*natom
498      end if
499    end if
500  end if
501 
502  ABI_ALLOCATE(enlout,(nnlout*blocksize))
503 
504 !Allocation of memory space for one WF
505  ABI_ALLOCATE(cwavef,(2,npw_k*my_nspinor*blocksize))
506  if (gs_hamk%usepaw==1.and.(iscf>0.or.gs_hamk%usecprj==1)) then
507    iorder_cprj=0
508    nband_k_cprj=nband_k*(mband_cprj/dtset%mband)
509    bandpp_cprj=mpi_enreg%bandpp
510    ABI_DATATYPE_ALLOCATE(cwaveprj,(natom,my_nspinor*bandpp_cprj))
511    ncpgr=0;if (cpopt==1) ncpgr=cprj(1,1)%ncpgr
512    call pawcprj_alloc(cwaveprj,ncpgr,gs_hamk%dimcprj)
513  else
514    ABI_DATATYPE_ALLOCATE(cwaveprj,(0,0))
515  end if
516 
517 #undef DEV_NEW_CODE
518 !#define DEV_NEW_CODE
519 
520 !The code below is more efficient if paral_kgb==1 (less MPI communications)
521 !however OMP is not compatible with paral_kgb since we should define
522 !which threads performs the call to MPI_ALL_REDUCE.
523 !This problem can be easily solved by removing MPI_enreg from meanvalue_g so that
524 !the MPI call is done only once outside the OMP parallel region.
525 
526 #ifdef DEV_NEW_CODE
527 !Loop over bands or blocks of bands. Note that in sequential mode iblock=iband, nblockbd=nband_k and blocksize=1
528 !!$OMP PARALLEL DO PRIVATE(iband,ar)
529  do iblock=1,nblockbd
530 
531 !  Compute kinetic energy of each band
532    do iblocksize=1,blocksize
533      iband=(iblock-1)*blocksize+iblocksize
534 
535      call meanvalue_g(ar,kinpw,0,istwf_k,mpi_enreg,npw_k,my_nspinor,&
536 &     cg(:,1+(iband-1)*npw_k*my_nspinor+icg:iband*npw_k*my_nspinor+icg),&
537 &     cg(:,1+(iband-1)*npw_k*my_nspinor+icg:iband*npw_k*my_nspinor+icg),0)
538 
539      ek_k(iband)=ar
540 
541      if(paw_dmft%use_dmft==1) then
542        do iband1=1,nband_k
543          call meanvalue_g(ar,kinpw,0,istwf_k,mpi_enreg,npw_k,my_nspinor,&
544 &         cg(:,1+(iband -1)*npw_k*my_nspinor+icg:iband *npw_k*my_nspinor+icg),&
545 &         cg(:,1+(iband1-1)*npw_k*my_nspinor+icg:iband1*npw_k*my_nspinor+icg),paw_dmft%use_dmft,ar_im=ar_im)
546          ek_k_nd(1,iband,iband1)=ar
547          ek_k_nd(2,iband,iband1)=ar_im
548        end do
549      end if
550 !    if(use_dmft==1) then
551 !    do iband1=1,nband_k
552 !    call meanvalue_g(ar,kinpw,0,istwf_k,mpi_enreg,npw_k,my_nspinor,&
553 !    &         cg(:,1+(iband -1)*npw_k*my_nspinor+icg:iband *npw_k*my_nspinor+icg),&
554 !    &         cg(:,1+(iband1-1)*npw_k*my_nspinor+icg:iband1*npw_k*my_nspinor+icg),use_dmft)
555 !    ek_k_nd(iband,iband1)=ar
556 !    end do
557 !    end if
558 
559    end do
560  end do
561 !TODO: xmpi_sum is missing but I have to understand the logic used to deal with the different
562 !MPI options and communicators.
563 #endif
564 
565 
566 !Loop over bands or blocks of bands. Note that in sequential mode iblock=iband, nblockbd=nband_k and blocksize=1
567  do iblock=1,nblockbd
568    occblock=maxval(occ_k(1+(iblock-1)*blocksize:iblock*blocksize))
569    cwavef(:,:)=cg(:,1+(iblock-1)*npw_k*my_nspinor*blocksize+icg:iblock*npw_k*my_nspinor*blocksize+icg)
570 
571 #ifndef DEV_NEW_CODE
572 !  Compute kinetic energy of each band
573    do iblocksize=1,blocksize
574      iband=(iblock-1)*blocksize+iblocksize
575 
576      call meanvalue_g(ar,kinpw,0,istwf_k,mpi_enreg,npw_k,my_nspinor,&
577 &     cg(:,1+(iband-1)*npw_k*my_nspinor+icg:iband*npw_k*my_nspinor+icg),&
578 &     cg(:,1+(iband-1)*npw_k*my_nspinor+icg:iband*npw_k*my_nspinor+icg),0)
579 
580      ek_k(iband)=ar
581 
582      if(paw_dmft%use_dmft==1) then
583        do iband1=1,nband_k
584          call meanvalue_g(ar,kinpw,0,istwf_k,mpi_enreg,npw_k,my_nspinor,&
585 &         cg(:,1+(iband -1)*npw_k*my_nspinor+icg:iband *npw_k*my_nspinor+icg),&
586 &         cg(:,1+(iband1-1)*npw_k*my_nspinor+icg:iband1*npw_k*my_nspinor+icg),paw_dmft%use_dmft,ar_im=ar_im)
587          ek_k_nd(1,iband,iband1)=ar
588          ek_k_nd(2,iband,iband1)=ar_im
589        end do
590      end if
591    end do
592 #endif
593 
594    if(iscf>0)then ! In case of fixed occupation numbers, accumulates the partial density
595      if (fixed_occ .and. mpi_enreg%paral_kgb/=1) then
596        if (abs(occ_k(iblock))>=tol8) then
597          weight=occ_k(iblock)*wtk/gs_hamk%ucvol
598 !        Accumulate charge density in real space in array rhoaug
599 
600 !        The same section of code is also found in mkrho.F90 : should be rationalized !
601          call fourwf(1,rhoaug(:,:,:,1),cwavef,dummy,wfraug,gs_hamk%gbound_k,gs_hamk%gbound_k,&
602 &         istwf_k,gs_hamk%kg_k,gs_hamk%kg_k,gs_hamk%mgfft,mpi_enreg,1,gs_hamk%ngfft,npw_k,1,&
603 &         gs_hamk%n4,gs_hamk%n5,gs_hamk%n6,1,&
604 &         dtset%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=dtset%use_gpu_cuda)
605 
606          if(dtset%nspinor==2)then
607            ABI_ALLOCATE(cwavef1,(2,npw_k))
608            cwavef1(:,:)=cwavef(:,1+npw_k:2*npw_k) ! EB FR spin dn part and used for m_z component (cwavef_z)
609 
610            if(dtset%nspden==1) then
611 
612              call fourwf(1,rhoaug(:,:,:,1),cwavef1,dummy,wfraug,&
613 &             gs_hamk%gbound_k,gs_hamk%gbound_k,&
614 &             istwf_k,gs_hamk%kg_k,gs_hamk%kg_k,gs_hamk%mgfft,mpi_enreg,1,gs_hamk%ngfft,npw_k,1,&
615 &             gs_hamk%n4,gs_hamk%n5,gs_hamk%n6,1,&
616 &             dtset%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=dtset%use_gpu_cuda)
617 
618            else if(dtset%nspden==4) then
619 !            Build the four components of rho. We use only norm quantities and, so fourwf.
620 !$\sum_{n} f_n \Psi^{* \alpha}_n \Psi^{\alpha}_n =\rho^{\alpha \alpha}$
621 !$\sum_{n} f_n (\Psi^{1}+\Psi^{2})^*_n (\Psi^{1}+\Psi^{2})_n=rho+m_x$
622 !$\sum_{n} f_n (\Psi^{1}-i \Psi^{2})^*_n (\Psi^{1}-i \Psi^{2})_n=rho+m_y$
623              ABI_ALLOCATE(cwavef_x,(2,npw_k))
624              ABI_ALLOCATE(cwavef_y,(2,npw_k))
625 !$(\Psi^{1}+\Psi^{2})$
626              cwavef_x(:,:)=cwavef(:,1:npw_k)+cwavef1(:,1:npw_k)
627 !$(\Psi^{1}-i \Psi^{2})$
628              cwavef_y(1,:)=cwavef(1,1:npw_k)+cwavef1(2,1:npw_k)
629              cwavef_y(2,:)=cwavef(2,1:npw_k)-cwavef1(1,1:npw_k)
630 ! z component
631              call fourwf(1,rhoaug(:,:,:,4),cwavef1,dummy,wfraug,gs_hamk%gbound_k,gs_hamk%gbound_k,&
632 &             istwf_k,gs_hamk%kg_k,gs_hamk%kg_k,gs_hamk%mgfft,mpi_enreg,1,gs_hamk%ngfft,npw_k,1,&
633 &             gs_hamk%n4,gs_hamk%n5,gs_hamk%n6,1,dtset%paral_kgb,&
634 &             tim_fourwf,weight,weight,use_gpu_cuda=dtset%use_gpu_cuda)
635 ! x component
636              call fourwf(1,rhoaug(:,:,:,2),cwavef_x,dummy,wfraug,gs_hamk%gbound_k,gs_hamk%gbound_k,&
637 &             istwf_k,gs_hamk%kg_k,gs_hamk%kg_k,gs_hamk%mgfft,mpi_enreg,1,gs_hamk%ngfft,npw_k,1,&
638 &             gs_hamk%n4,gs_hamk%n5,gs_hamk%n6,1,dtset%paral_kgb,&
639 &             tim_fourwf,weight,weight,use_gpu_cuda=dtset%use_gpu_cuda)
640 ! y component
641              call fourwf(1,rhoaug(:,:,:,3),cwavef_y,dummy,wfraug,gs_hamk%gbound_k,gs_hamk%gbound_k,&
642 &             istwf_k,gs_hamk%kg_k,gs_hamk%kg_k,gs_hamk%mgfft,mpi_enreg,1,gs_hamk%ngfft,npw_k,1,&
643 &             gs_hamk%n4,gs_hamk%n5,gs_hamk%n6,1,dtset%paral_kgb,&
644 &             tim_fourwf,weight,weight,use_gpu_cuda=dtset%use_gpu_cuda)
645 
646              ABI_DEALLOCATE(cwavef_x)
647              ABI_DEALLOCATE(cwavef_y)
648 
649            end if ! dtset%nspden/=4
650            ABI_DEALLOCATE(cwavef1)
651          end if
652        else
653          nskip=nskip+1
654        end if
655 
656 !      In case of fixed occupation numbers,in bandFFT mode accumulates the partial density
657      else if (fixed_occ .and. mpi_enreg%paral_kgb==1) then
658 
659        if (dtset%nspinor==1) then
660          call timab(537,1,tsec) ! "prep_fourwf%vtow"
661          call prep_fourwf(rhoaug(:,:,:,1),blocksize,cwavef,wfraug,iblock,istwf_k,&
662 &         gs_hamk%mgfft,mpi_enreg,nband_k,ndat,gs_hamk%ngfft,npw_k,&
663 &         gs_hamk%n4,gs_hamk%n5,gs_hamk%n6,occ_k,&
664 &         1,gs_hamk%ucvol,wtk,use_gpu_cuda=dtset%use_gpu_cuda)
665          call timab(537,2,tsec)
666        else if (dtset%nspinor==2) then
667          ABI_ALLOCATE(cwavefb,(2,npw_k*blocksize,2))
668          ibs=(iblock-1)*npw_k*my_nspinor*blocksize+icg
669 !        --- No parallelization over spinors ---
670          if (mpi_enreg%paral_spinor==0) then
671            do iband=1,blocksize
672              cwavefb(:,(iband-1)*npw_k+1:iband*npw_k,1)=cg(:,1+(2*iband-2)*npw_k+ibs:(iband*2-1)*npw_k+ibs)
673              cwavefb(:,(iband-1)*npw_k+1:iband*npw_k,2)=cg(:,1+(2*iband-1)*npw_k+ibs:iband*2*npw_k+ibs)
674            end do
675          else
676 !          --- Parallelization over spinors ---
677 !          (split the work between 2 procs)
678            cwavefb(:,:,3-ispinor_index)=zero
679            do iband=1,blocksize
680              cwavefb(:,(iband-1)*npw_k+1:iband*npw_k,ispinor_index) = cg(:,1+(iband-1)*npw_k+ibs:iband*npw_k+ibs)
681            end do
682            call xmpi_sum(cwavefb,mpi_enreg%comm_spinor,ierr)
683          end if
684 
685          call timab(537,1,tsec) !"prep_fourwf%vtow"
686          if (nspinor1TreatedByThisProc) then
687            call prep_fourwf(rhoaug(:,:,:,1),blocksize,cwavefb(:,:,1),wfraug,iblock,&
688 &           istwf_k,gs_hamk%mgfft,mpi_enreg,nband_k,ndat,gs_hamk%ngfft,npw_k,&
689 &           gs_hamk%n4,gs_hamk%n5,gs_hamk%n6,occ_k,1,gs_hamk%ucvol,wtk,&
690 &           use_gpu_cuda=dtset%use_gpu_cuda)
691          end if
692          if(dtset%nspden==1) then
693            if (nspinor2TreatedByThisProc) then
694              call prep_fourwf(rhoaug(:,:,:,1),blocksize,cwavefb(:,:,2),wfraug,&
695 &             iblock,istwf_k,gs_hamk%mgfft,mpi_enreg,nband_k,ndat,&
696 &             gs_hamk%ngfft,npw_k,gs_hamk%n4,gs_hamk%n5,gs_hamk%n6,occ_k,1,&
697 &             gs_hamk%ucvol,wtk,use_gpu_cuda=dtset%use_gpu_cuda)
698            end if
699          else if(dtset%nspden==4) then
700            ABI_ALLOCATE(cwavef_x,(2,npw_k*blocksize))
701            ABI_ALLOCATE(cwavef_y,(2,npw_k*blocksize))
702            cwavef_x(:,:)=cwavefb(:,1:npw_k*blocksize,1)+cwavefb(:,:,2)
703            cwavef_y(1,:)=cwavefb(1,1:npw_k*blocksize,1)+cwavefb(2,:,2)
704            cwavef_y(2,:)=cwavefb(2,:,1)-cwavefb(1,:,2)
705            if (nspinor1TreatedByThisProc) then
706              call prep_fourwf(rhoaug(:,:,:,4),blocksize,cwavefb(:,:,2),wfraug,&
707 &             iblock,istwf_k,gs_hamk%mgfft,mpi_enreg,nband_k,ndat,gs_hamk%ngfft,&
708 &             npw_k,gs_hamk%n4,gs_hamk%n5,gs_hamk%n6,occ_k,1,gs_hamk%ucvol,wtk,&
709 &             use_gpu_cuda=dtset%use_gpu_cuda)
710            end if
711            if (nspinor2TreatedByThisProc) then
712              call prep_fourwf(rhoaug(:,:,:,2),blocksize,cwavef_x,wfraug,&
713 &             iblock,istwf_k,gs_hamk%mgfft,mpi_enreg,nband_k,ndat,gs_hamk%ngfft,&
714 &             npw_k,gs_hamk%n4,gs_hamk%n5,gs_hamk%n6,occ_k,1,gs_hamk%ucvol,wtk,&
715 &             use_gpu_cuda=dtset%use_gpu_cuda)
716              call prep_fourwf(rhoaug(:,:,:,3),blocksize,cwavef_y,wfraug,&
717 &             iblock,istwf_k,gs_hamk%mgfft,mpi_enreg,nband_k,ndat,gs_hamk%ngfft,&
718 &             npw_k,gs_hamk%n4,gs_hamk%n5,gs_hamk%n6,occ_k,1,gs_hamk%ucvol,wtk,&
719 &             use_gpu_cuda=dtset%use_gpu_cuda)
720            end if
721            ABI_DEALLOCATE(cwavef_x)
722            ABI_DEALLOCATE(cwavef_y)
723          end if
724          call timab(537,2,tsec)
725          ABI_DEALLOCATE(cwavefb)
726        end if
727      end if
728    end if ! End of SCF calculation
729 
730 !    Call to nonlocal operator:
731 !    - Compute nonlocal forces from most recent wfs
732 !    - PAW: compute projections of WF onto NL projectors (cprj)
733    if(iscf>0.or.gs_hamk%usecprj==1)then
734      if (gs_hamk%usepaw==1.or.optforces/=0) then
735 !      Treat all wavefunctions in case of varying occupation numbers or PAW
736 !      Only treat occupied bands in case of fixed occupation numbers and NCPP
737        if(fixed_occ.and.abs(occblock)<=tol8.and.gs_hamk%usepaw==0) then
738          if (optforces>0) grnl_k(:,(iblock-1)*blocksize+1:iblock*blocksize)=zero
739        else
740          if(gs_hamk%usepaw==1) then
741            call timab(554,1,tsec)  ! "vtowfk:rhoij"
742          end if
743          if(cpopt==1) then
744            iband=1+(iblock-1)*bandpp_cprj
745            call pawcprj_copy(cprj(:,1+(iblock-1)*my_nspinor*blocksize+ibg:iblock*my_nspinor*blocksize+ibg),cwaveprj)
746          end if
747          if (mpi_enreg%paral_kgb==1) then
748            call timab(572,1,tsec) ! 'prep_nonlop%vtowfk'
749            call prep_nonlop(choice,cpopt,cwaveprj,enlout,gs_hamk,idir, &
750 &           eig_k(1+(iblock-1)*blocksize:iblock*blocksize),blocksize,&
751 &           mpi_enreg,nnlout,paw_opt,signs,nonlop_dum,tim_nonlop_prep,cwavef,cwavef)
752            call timab(572,2,tsec)
753          else
754            call nonlop(choice,cpopt,cwaveprj,enlout,gs_hamk,idir,eig_k(1+(iblock-1)*blocksize:iblock*blocksize),&
755 &           mpi_enreg,blocksize,nnlout,&
756 &           paw_opt,signs,nonlop_dum,tim_nonlop,cwavef,cwavef)
757          end if
758          if(gs_hamk%usepaw==1) then
759            call timab(554,2,tsec)
760          end if
761 !        Acccumulate forces
762          if (optforces>0) then
763            iband=(iblock-1)*blocksize
764            do iblocksize=1,blocksize
765              ii=0
766              if (nnlout>3*natom) ii=6
767              iband=iband+1;ibs=ii+nnlout*(iblocksize-1)
768              grnl_k(1:nnlout,iband)=enlout(ibs+1:ibs+nnlout)
769            end do
770          end if
771 !        Store cprj (<Pnl|Psi>)
772          if (gs_hamk%usepaw==1.and.gs_hamk%usecprj==1) then
773            iband=1+(iblock-1)*bandpp_cprj
774            call pawcprj_put(gs_hamk%atindx,cwaveprj,cprj,natom,iband,ibg,ikpt,iorder_cprj,isppol,&
775 &           mband_cprj,dtset%mkmem,natom,bandpp_cprj,nband_k_cprj,gs_hamk%dimcprj,my_nspinor,&
776 &           dtset%nsppol,dtfil%unpaw,mpicomm=mpi_enreg%comm_kpt,proc_distrb=mpi_enreg%proc_distrb)
777          end if
778        end if
779      end if ! PAW or forces
780    end if ! iscf>0 or iscf=-3
781 
782  end do !  End of loop on blocks
783 
784  ABI_DEALLOCATE(cwavef)
785  ABI_DEALLOCATE(enlout)
786 
787  if (gs_hamk%usepaw==1.and.(iscf>0.or.gs_hamk%usecprj==1)) then
788    call pawcprj_free(cwaveprj)
789  end if
790  ABI_DATATYPE_DEALLOCATE(cwaveprj)
791 
792  if (fixed_occ.and.iscf>0) then
793    ABI_DEALLOCATE(wfraug)
794  end if
795 
796 !Write the number of one-way 3D ffts skipped until now (in case of fixed occupation numbers
797  if(iscf>0 .and. fixed_occ .and. (prtvol>2 .or. ikpt<=nkpt_max) )then
798    write(message,'(a,i0)')' vtowfk : number of one-way 3D ffts skipped in vtowfk until now =',nskip
799    call wrtout(std_out,message,'PERS')
800  end if
801 
802 !Norm-conserving only: Compute nonlocal part of total energy : rotate subvnl
803  if (gs_hamk%usepaw==0 .and. wfopta10 /= 1 .and. .not. newlobpcg ) then
804    call timab(586,1,tsec)   ! 'vtowfk(nonlocalpart)'
805    ABI_ALLOCATE(matvnl,(2,nband_k,nband_k))
806    ABI_ALLOCATE(mat1,(2,nband_k,nband_k))
807    mat1=zero
808 
809    if (wfopta10==4) then
810      enl_k(1:nband_k)=zero
811 
812      if (istwf_k==1) then
813        call zhemm('l','l',nband_k,nband_k,cone,totvnl,nband_k,evec,nband_k,czero,mat1,nband_k)
814        do iband=1,nband_k
815          res = cg_real_zdotc(nband_k,evec(:,iband),mat1(:,:,iband))
816          enl_k(iband)= res
817        end do
818      else if (istwf_k==2) then
819        ABI_ALLOCATE(evec_loc,(nband_k,nband_k))
820        ABI_ALLOCATE(mat_loc,(nband_k,nband_k))
821        do iband=1,nband_k
822          do jj=1,nband_k
823            evec_loc(iband,jj)=evec(2*iband-1,jj)
824          end do
825        end do
826        call dsymm('l','l',nband_k,nband_k,one,totvnl,nband_k,evec_loc,nband_k,zero,mat_loc,nband_k)
827        do iband=1,nband_k
828          enl_k(iband)=ddot(nband_k,evec_loc(:,iband),1,mat_loc(:,iband),1)
829        end do
830        ABI_DEALLOCATE(evec_loc)
831        ABI_DEALLOCATE(mat_loc)
832      end if
833 
834    else
835 !    MG: This version is much faster with good OMP scalability.
836 !    Construct upper triangle of matvnl from subvnl using full storage mode.
837      pidx=0
838      do jj=1,nband_k
839        do ii=1,jj
840          pidx=pidx+1
841          matvnl(1,ii,jj)=subvnl(2*pidx-1)
842          matvnl(2,ii,jj)=subvnl(2*pidx  )
843        end do
844      end do
845 
846      call zhemm('L','U',nband_k,nband_k,cone,matvnl,nband_k,evec,nband_k,czero,mat1,nband_k)
847 
848 !$OMP PARALLEL DO PRIVATE(res)
849      do iband=1,nband_k
850        res = cg_real_zdotc(nband_k,evec(:,iband),mat1(:,:,iband))
851        enl_k(iband) = res
852      end do
853    end if
854 
855    ABI_DEALLOCATE(matvnl)
856    ABI_DEALLOCATE(mat1)
857    call timab(586,2,tsec)
858  end if
859 
860 !###################################################################
861 
862  if (iscf<=0 .and. residk>dtset%tolwfr) then
863    write(message,'(a,2i5,a,es13.5)')&
864 &   'Wavefunctions not converged for nnsclo,ikpt=',nnsclo_now,ikpt,' max resid=',residk
865    MSG_WARNING(message)
866  end if
867 
868 
869 !Print out eigenvalues (hartree)
870  if (prtvol>2 .or. ikpt<=nkpt_max) then
871    write(message, '(5x,a,i5,2x,a,a,a,i4,a,i4,a)' ) &
872 &   'eigenvalues (hartree) for',nband_k,'bands',ch10,&
873 &   '              after ',inonsc,' non-SCF iterations with ',dtset%nline,' CG line minimizations'
874    call wrtout(std_out,message,'PERS')
875    do ii=0,(nband_k-1)/6
876      write(message, '(1p,6e12.4)' ) (eig_k(iband),iband=1+6*ii,min(6+6*ii,nband_k))
877      call wrtout(std_out,message,'PERS')
878    end do
879  else if(ikpt==nkpt_max+1)then
880    call wrtout(std_out,' vtowfk : prtvol=0 or 1, do not print more k-points.','PERS')
881  end if
882 
883 !Print out decomposition of eigenvalues in the non-selfconsistent case or if prtvol>=10
884  if( (iscf<0 .and. (prtvol>2 .or. ikpt<=nkpt_max)) .or. prtvol>=10)then
885    write(message, '(5x,a,i5,2x,a,a,a,i4,a,i4,a)' ) &
886 &   ' mean kinetic energy (hartree) for ',nband_k,' bands',ch10,&
887 &   '              after ',inonsc,' non-SCF iterations with ',dtset%nline,' CG line minimizations'
888    call wrtout(std_out,message,'PERS')
889 
890    do ii=0,(nband_k-1)/6
891      write(message, '(1p,6e12.4)' ) (ek_k(iband),iband=1+6*ii,min(6+6*ii,nband_k))
892      call wrtout(std_out,message,'PERS')
893    end do
894 
895    if (gs_hamk%usepaw==0) then
896      write(message, '(5x,a,i5,2x,a,a,a,i4,a,i4,a)' ) &
897 &     ' mean non-local energy (hartree) for ',nband_k,' bands',ch10,&
898 &     '              after ',inonsc,' non-SCF iterations with ',dtset%nline,' CG line minimizations'
899      call wrtout(std_out,message,'PERS')
900 
901      do ii=0,(nband_k-1)/6
902        write(message,'(1p,6e12.4)') (enl_k(iband),iband=1+6*ii,min(6+6*ii,nband_k))
903        call wrtout(std_out,message,'PERS')
904      end do
905    end if
906  end if
907 
908  !Hamiltonian constructor for gwls_sternheimer
909  if(dtset%optdriver==RUNL_GWLS) then
910    call build_H(dtset,mpi_enreg,cpopt,cg,gs_hamk,kg_k,kinpw)
911  end if
912 
913  if(wfopta10 /= 1 .and. .not. newlobpcg) then
914    ABI_DEALLOCATE(evec)
915    ABI_DEALLOCATE(subham)
916    !if (gs_hamk%usepaw==0) then
917    !if (wfopta10==4) then
918    ABI_DEALLOCATE(totvnl)
919    !else
920    ABI_DEALLOCATE(subvnl)
921    !end if
922    !end if
923    ABI_DEALLOCATE(subovl)
924  end if
925  if ( .not. newlobpcg ) then
926    ABI_DEALLOCATE(gsc)
927  end if
928 
929  if(wfoptalg==3) then
930    ABI_DEALLOCATE(eig_save)
931  end if
932 
933 !Structured debugging : if prtvol=-level, stop here.
934  if(prtvol==-level)then
935    write(message,'(a,a,a,i0,a)')' vtowfk : exit ',ch10,'  prtvol=-',level,', debugging mode => stop '
936    MSG_ERROR(message)
937  end if
938 
939  call timab(30,2,tsec)
940  call timab(28,2,tsec)
941 
942  DBG_EXIT("COLL")
943 
944 end subroutine vtowfk