TABLE OF CONTENTS


ABINIT/dfpt_cgwf [ Functions ]

[ Top ] [ Functions ]

NAME

 dfpt_cgwf

FUNCTION

 Update one single wavefunction (cwavef), non self-consistently.
 Uses a conjugate-gradient algorithm.
 Try to keep close to the formulas in PRB55, 10337 (1997), for the
 non-self-consistent case, except that we are computing here
 the second-derivative of the total energy, and not E(2). There
 is a factor of 2 between the two quantities ...
 The wavefunction that is generated is always orthogonal to cgq .
 It is orthogonal to the active Hilbert space, and will be complemented
 by contributions from the active space in the calling routine, if needed.

COPYRIGHT

 Copyright (C) 1999-2018 ABINIT group (XG,DRH,XW,FJ,MT,LB)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

  band=which particular band we are converging.
  berryopt=option for Berry phase
  cgq(2,mcgq)=wavefunction coefficients for ALL bands at k+Q
  cwave0(2,npw*nspinor)=GS wavefunction at k, in reciprocal space
  cwaveprj0(natom,nspinor*usecprj)=GS wave function at k projected with nl projectors
  eig0nk=0-order eigenvalue for the present wavefunction at k
  eig0_kq(nband)=GS eigenvalues at k+Q (hartree)
  grad_berry(2,mpw1,dtefield%mband_occ) = the gradient of the Berry phase term
  gscq(2,mgscq)=<g|S|Cnk+q> coefficients for ALL bands (PAW) at k+Q
  gs_hamkq <type(gs_hamiltonian_type)>=all data for the Hamiltonian at k+Q
  icgq=shift to be applied on the location of data in the array cgq
  igscq=shift to be applied on the location of data in the array gscq
  idir=direction of the perturbation
  ipert=type of the perturbation
  mcgq=second dimension of the cgq array
  mgscq=second dimension of gscq
  mpi_enreg=information about MPI parallelization
  mpw1=maximum number of planewave for first-order wavefunctions
  natom=number of atoms in cell.
  nband=number of bands.
  nbdbuf=number of buffer bands for the minimisation
  nline=number of line minimizations per band.
  npw=number of planewaves in basis sphere at given k.
  npw1=number of planewaves in basis sphere at k+Q
  nspinor=number of spinorial components of the wavefunctions
  opt_gvnl1=option controlling the use of gvnl1 array:
            0: used as an output
            1: used as an input:    - used only for ipert=natom+2
                 NCPP: contains the ddk 1-st order WF
                 PAW: contains frozen part of 1st-order hamiltonian
            2: used as input/ouput:    - used only for PAW and ipert=natom+2
                 At input: contains the ddk 1-st order WF (times i)
                 At output: contains frozen part of 1st-order hamiltonian
  prtvol=control print volume and debugging output
  quit= if 1, proceeds to smooth ending of the job.
  dfpt_sciss=scissor shift (Ha)
  tolrde=tolerance on the ratio of differences of energies (for the line minimisation)
  tolwfr=tolerance on largest wf residual
  usedcwavef=flag controlling the use of dcwavef array (PAW only):
             0: not used (not allocated)
             1: used as input
             2: used as output
  wfoptalg=govern the choice of algorithm for wf optimisation (0 or 10, at present)

OUTPUT

  eig1_k(2*nband**2)=matrix of first-order eigenvalues (hartree)
                     eig1(:,ii,jj)=<C0 ii|H1|C0 jj> for norm-conserving psps
                     eig1(:,ii,jj)=<C0 ii|H1-(eig0_k+eig0_k+q)/2.S(1)|C0 jj> for PAW
  ghc(2,npw1*nspinor)=<G|H0-eig0_k.I|C1 band,k> (NCPP) or <G|H0-eig0_k.S0|C1 band,k> (PAW)
  gvnlc(2,npw1*nspinor)=<G|Vnl|C1 band,k>
  gvnl1(2,npw1*nspinor)=  part of <G|K1+Vnl1|C0 band,k> not depending on VHxc1           (NCPP)
                       or part of <G|K1+Vnl1-eig0k.S1|C0 band,k> not depending on VHxc1 (PAW)
  resid=wf residual for current band
  gh1c_n= <G|H1|C0 band,k> (NCPP) or <G|H1-eig0k.S1|C0 band,k> (PAW).
          This vector is not projected on the subspace orthogonal to the WF.
  === if gs_hamkq%usepaw==1 ===
  gsc(2,npw1*nspinor*usepaw)=<G|S0|C1 band,k>

SIDE EFFECTS

  Input/Output:
  cwavef(2,npw1*nspinor)=first-order  wavefunction at k,q, in reciprocal space (updated)
  === if gs_hamkq%usepaw==1 ===
  cwaveprj(natom,nspinor)= wave functions at k projected with nl projectors
  === if usedcwavef>0 ===
  dcwavef(2,npw1*nspinor)=change of wavefunction due to change of overlap:
         dcwavef is delta_Psi(1)=-1/2.Sum_{j}[<C0_k+q_j|S(1)|C0_k_i>.|C0_k+q_j>]
         see PRB 78, 035105 (2008), Eq. (42)
         input if usedcwavef=1, output if usedcwavef=2

PARENTS

      dfpt_vtowfk

CHILDREN

      cg_precon,cg_zaxpy,cg_zcopy,dotprod_g,getdc1,getgh1c,getghc
      pawcprj_alloc,pawcprj_axpby,pawcprj_free,pawcprj_set_zero,projbd
      sqnorm_g,timab,wrtout

SOURCE

 104 #if defined HAVE_CONFIG_H
 105 #include "config.h"
 106 #endif
 107 
 108 #include "abi_common.h"
 109 
 110 subroutine dfpt_cgwf(band,berryopt,cgq,cwavef,cwave0,cwaveprj,cwaveprj0,rf2,dcwavef,&
 111 & eig0nk,eig0_kq,eig1_k,ghc,gh1c_n,grad_berry,gsc,gscq,&
 112 & gs_hamkq,gvnlc,gvnl1,icgq,idir,ipert,igscq,&
 113 & mcgq,mgscq,mpi_enreg,mpw1,natom,nband,nbdbuf,nline_in,npw,npw1,nspinor,&
 114 & opt_gvnl1,prtvol,quit,resid,rf_hamkq,dfpt_sciss,tolrde,tolwfr,&
 115 & usedcwavef,wfoptalg,nlines_done)
 116 
 117  use defs_basis
 118  use defs_abitypes
 119  use m_profiling_abi
 120  use m_errors
 121  use m_xmpi
 122  use m_cgtools
 123  use m_rf2
 124 
 125  use m_pawcprj,     only : pawcprj_type, pawcprj_alloc, pawcprj_free, pawcprj_set_zero, pawcprj_axpby
 126  use m_hamiltonian, only : gs_hamiltonian_type,rf_hamiltonian_type,KPRIME_H_KPRIME
 127 
 128 !This section has been created automatically by the script Abilint (TD).
 129 !Do not modify the following lines by hand.
 130 #undef ABI_FUNC
 131 #define ABI_FUNC 'dfpt_cgwf'
 132  use interfaces_14_hidewrite
 133  use interfaces_18_timing
 134  use interfaces_66_wfs
 135 !End of the abilint section
 136 
 137  implicit none
 138 
 139 !Arguments ------------------------------------
 140 !scalars
 141  integer,intent(in) :: band,berryopt
 142  integer,intent(in) :: icgq,idir,igscq,ipert,mcgq,mgscq,mpw1,natom,nband
 143  integer,intent(in) :: nbdbuf,nline_in,npw,npw1,nspinor,opt_gvnl1
 144  integer,intent(in) :: prtvol,quit,usedcwavef,wfoptalg
 145  integer,intent(inout) :: nlines_done
 146  real(dp),intent(in) :: eig0nk,dfpt_sciss,tolrde,tolwfr
 147  real(dp),intent(out) :: resid
 148  type(MPI_type),intent(in) :: mpi_enreg
 149  type(rf2_t), intent(in) :: rf2
 150  type(gs_hamiltonian_type),intent(inout) :: gs_hamkq
 151  type(rf_hamiltonian_type),intent(inout) :: rf_hamkq
 152 !arrays
 153  real(dp),intent(in) :: cgq(2,mcgq),eig0_kq(nband)
 154  real(dp),intent(in) :: grad_berry(2,mpw1*nspinor,nband),gscq(2,mgscq)
 155  real(dp),intent(inout) :: cwave0(2,npw*nspinor),cwavef(2,npw1*nspinor)
 156  real(dp),intent(inout) :: dcwavef(2,npw1*nspinor*((usedcwavef+1)/2))
 157  real(dp),intent(inout) :: eig1_k(2*nband**2)
 158  real(dp),intent(out) :: gh1c_n(2,npw1*nspinor)
 159  real(dp),intent(out) :: ghc(2,npw1*nspinor)
 160  real(dp),intent(out) :: gsc(2,npw1*nspinor*gs_hamkq%usepaw)
 161  real(dp),intent(inout) :: gvnl1(2,npw1*nspinor),gvnlc(2,npw1*nspinor)
 162  type(pawcprj_type),intent(inout) :: cwaveprj(natom,nspinor)
 163  type(pawcprj_type),intent(inout) :: cwaveprj0(natom,nspinor*gs_hamkq%usecprj)
 164 
 165 !Local variables-------------------------------
 166 !scalars
 167  integer,parameter :: level=15,tim_getgh1c=1,tim_getghc=2,tim_projbd=2
 168  integer,save :: nskip=0
 169  integer :: cpopt,iband,igs,iline,indx_cgq,ipw,me_g0,comm_fft
 170  integer :: ipws,ispinor,istwf_k,jband,nline,optlocal,optnl,shift_band,sij_opt
 171  integer :: test_is_ok,useoverlap,usepaw,usevnl,usetolrde
 172  real(dp) :: d2edt2,d2te,d2teold,dedt,deltae,deold,dotgg
 173  real(dp) :: dotgp,doti,dotr,eshift,eshiftkq,gamma,optekin,prod1,prod2
 174  real(dp) :: theta,tol_restart,u1h0me0u1
 175  logical :: gen_eigenpb
 176  character(len=500) :: msg
 177 !arrays
 178  real(dp) :: dummy(0,0),tsec(2)
 179  real(dp),allocatable :: conjgr(:,:),cwaveq(:,:),cwwork(:,:),direc(:,:)
 180  real(dp),allocatable :: gberry(:,:),gh1c(:,:),gh_direc(:,:),gresid(:,:),gvnl1_saved(:,:)
 181  real(dp),allocatable :: gs1c(:,:),gvnl_direc(:,:),pcon(:),sconjgr(:,:)
 182  real(dp),allocatable :: scprod(:,:),work(:,:),work1(:,:),work2(:,:)
 183  real(dp),pointer :: kinpw1(:)
 184  type(pawcprj_type),allocatable :: conjgrprj(:,:)
 185  type(pawcprj_type) :: cprj_dummy(0,0)
 186 
 187 ! *********************************************************************
 188 
 189  DBG_ENTER("COLL")
 190 
 191  call timab(122,1,tsec)
 192 
 193 !======================================================================
 194 !========= LOCAL VARIABLES DEFINITIONS AND ALLOCATIONS ================
 195 !====================================================================
 196 
 197  nline = nline_in
 198  usetolrde = 1
 199 ! LB-29/11/17:
 200 ! For ipert=natom+10 or ipert=natom+11, the Sternheimer equation is non-self-consistent, so we have
 201 ! to solve a true linear problem (A.x = b) for each kpoint and band. In this case, the conjugate
 202 ! gradient algorithm can find the solution up to numerical precision with only ONE call of dfpt_cgwf
 203 ! (per kpoint and band). This way, in order to avoid useless scfcv loops (and calls of rf2_init, which
 204 ! can be time-consuming), we want leave this routine only if tolwfr is reached so tolrde and nline are
 205 ! not used to end the procedure.
 206 ! NOTE : This is also true for ipert==natom+1, but a lot of references in the test suite have to be
 207 ! changed...
 208  if(ipert==natom+10.or.ipert==natom+11) then
 209    nline = 200 ! The default value is only 4...
 210    if (nline_in>200) nline = nline_in ! Keep the possibility to increase nline
 211    usetolrde = 0  ! see below
 212  end if
 213 
 214 !Tell us what is going on:
 215  if (prtvol>=10) then
 216    write(msg,'(a,i6,2x,a,i3,a)')' --- dfpt_cgwf is called for band',band,'for',nline,' lines'
 217    call wrtout(std_out,msg,'PERS')
 218  end if
 219 
 220  me_g0 = mpi_enreg%me_g0
 221  comm_fft = mpi_enreg%comm_fft
 222 
 223 !if PAW, one has to solve a generalized eigenproblem
 224  usepaw=gs_hamkq%usepaw
 225  gen_eigenpb=(usepaw==1)
 226  useoverlap=0;if (gen_eigenpb) useoverlap=1
 227 
 228 !Use scissor shift on 0-order eigenvalue
 229  eshift=eig0nk-dfpt_sciss
 230 
 231 !Additional initializations
 232  istwf_k=gs_hamkq%istwf_k
 233  optekin=0;if (wfoptalg>=10) optekin=1
 234  tol_restart=tol12;if (gen_eigenpb) tol_restart=tol8
 235  if (ipert == natom+10 .or. ipert == natom+11) then
 236    tol_restart = tol7
 237  end if
 238  kinpw1 => gs_hamkq%kinpw_kp
 239 
 240 !Memory allocations
 241  ABI_ALLOCATE(gh1c,(2,npw1*nspinor))
 242  ABI_ALLOCATE(pcon,(npw1))
 243  ABI_ALLOCATE(scprod,(2,nband))
 244 
 245  if (berryopt== 4.or.berryopt== 6.or.berryopt== 7.or.&
 246 & berryopt==14.or.berryopt==16.or.berryopt==17) then
 247    ABI_ALLOCATE(gberry,(2,npw1*nspinor))
 248    gberry(:,1:npw1*nspinor)=grad_berry(:,1:npw1*nspinor,band)
 249  else
 250    ABI_ALLOCATE(gberry,(0,0))
 251  end if
 252 
 253  shift_band=(band-1)*npw1*nspinor
 254 
 255 !Several checking statements
 256  if (prtvol==-level.or.prtvol==-19.or.prtvol==-20) then
 257    write(msg,'(a)') " ** cgwf3 : debugging mode, tests will be done" 
 258    ! Search CGWF3_WARNING in the log file to find errors (if any)
 259    call wrtout(std_out,msg,'PERS')
 260    ABI_ALLOCATE(work,(2,npw1*nspinor))
 261    ABI_ALLOCATE(work1,(2,npw1*nspinor))
 262 !  ===== Check <Psi_k+q^(0)|S(0)|Psi_k+q^(0)>=delta
 263    if (.not.gen_eigenpb) work1(:,:)=cgq(:,1+npw1*nspinor*(band-1)+icgq:npw1*nspinor*band+icgq)
 264    if (     gen_eigenpb) work1(:,:)=gscq(:,1+npw1*nspinor*(band-1)+igscq:npw1*nspinor*band+igscq)
 265    do iband=1,nband
 266      work(:,:)=cgq(:,1+npw1*nspinor*(iband-1)+icgq:npw1*nspinor*iband+icgq)
 267      call dotprod_g(dotr,doti,istwf_k,npw1*nspinor,2,work1,work,me_g0,mpi_enreg%comm_spinorfft)
 268      test_is_ok=1
 269      if(iband==band) then
 270        if(abs(dotr-1)>tol12) test_is_ok=0
 271      else
 272        if(abs(dotr)>tol12) test_is_ok=0
 273      end if
 274      if(abs(doti)>tol12) test_is_ok=0
 275      if(test_is_ok/=1) then 
 276        write(msg,'(a,i3,a,2es22.15)') "CGWF3_WARNING : <Psi_k+q,i^(0)|S(0)|Psi_k+q,j^(0)> for band j=",iband," is ",dotr,doti
 277        call wrtout(std_out,msg,'PERS')
 278      end if
 279    end do
 280 !  ===== Check Pc.Psi_k+q^(0)=0
 281    do iband=1,nband
 282      work(:,:)=cgq(:,1+npw1*nspinor*(iband-1)+icgq:npw1*nspinor*iband+icgq)
 283      call projbd(cgq,work,-1,icgq,igscq,istwf_k,mcgq,mgscq,nband,npw1,nspinor,&
 284 &     gscq,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 285      call sqnorm_g(dotr,istwf_k,npw1*nspinor,work,me_g0,comm_fft)
 286      if(sqrt(dotr)>tol12) then
 287        write(msg,'(a,i3,a,es22.15)') "CGWF3_WARNING : Norm of Pc.Psi_k+q_j^(0) for band j=",iband," is ",sqrt(dotr)
 288        call wrtout(std_out,msg,'PERS')
 289      end if
 290    end do
 291 !  ===== Check Pc.Psi_k^(0)=0
 292    work(:,:)=cwave0(:,:)
 293    call projbd(cgq,work,-1,icgq,igscq,istwf_k,mcgq,mgscq,nband,npw1,nspinor,&
 294 &   gscq,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 295    call sqnorm_g(dotr,istwf_k,npw1*nspinor,work,me_g0,comm_fft)
 296    if(sqrt(dotr)>tol12) then
 297      write(msg,'(a,i3,a,es22.15)') "CGWF3_WARNING : Norm of Pc.Psi_k^(0) for band ",band," is ",sqrt(dotr)
 298      call wrtout(std_out,msg,'PERS')
 299    end if
 300 !  ===== Check Pc.dcwavef=0 (for 2nd order only)
 301    if(ipert==natom+10.or.ipert==natom+11) then
 302      work(:,:)=rf2%dcwavef(:,1+shift_band:npw1*nspinor+shift_band)
 303      call projbd(cgq,work,-1,icgq,igscq,istwf_k,mcgq,mgscq,nband,npw1,nspinor,&
 304 &     gscq,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 305      call sqnorm_g(dotr,istwf_k,npw1*nspinor,work,me_g0,comm_fft)
 306      if(sqrt(dotr)>tol10) then
 307        write(msg,'(a,i3,a,es22.15)') "CGWF3_WARNING : Norm of Pc.dcwavef for band ",band," is ",sqrt(dotr)
 308        call wrtout(std_out,msg,'PERS')
 309      end if
 310    end if
 311 !  ===== Check Pc^*.S(0).Psi_k+q^(0)=0
 312    if (gen_eigenpb) then
 313      do iband=1,nband
 314        work(:,:)=gscq(:,1+npw1*nspinor*(iband-1)+igscq:npw1*nspinor*iband+igscq)
 315        call projbd(gscq,work,-1,igscq,icgq,istwf_k,mgscq,mcgq,nband,npw1,nspinor,&
 316 &       cgq,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 317        call sqnorm_g(dotr,istwf_k,npw1*nspinor,work,me_g0,comm_fft)
 318        if(sqrt(dotr)>tol12) then
 319          write(msg,'(a,i3,a,es22.15)') "CGWF3_WARNING : Norm of Pc^*.S(0).Psi_k+q_j^(0) for band j=",iband," is ",sqrt(dotr)
 320          call wrtout(std_out,msg,'PERS')
 321        end if
 322      end do
 323    end if
 324    ABI_DEALLOCATE(work)
 325    ABI_DEALLOCATE(work1)
 326  end if
 327 
 328 !======================================================================
 329 !========== INITIALISATION OF MINIMIZATION ITERATIONS =================
 330 !======================================================================
 331 
 332  if(ipert/=natom+10.and.ipert/=natom+11) then
 333 !  The following is needed for first order perturbations only
 334 !  Otherwise, the work is already done in rf2_init (called in dfpt_vtowfk.F90)
 335 
 336 !  Compute H(1) applied to GS wavefunction Psi(0)
 337    if (gen_eigenpb) then
 338      sij_opt=1
 339      ABI_ALLOCATE(gs1c,(2,npw1*nspinor))
 340    else
 341      ABI_ALLOCATE(gs1c,(0,0))
 342      sij_opt=0
 343    end if
 344    usevnl=1; optlocal=1; optnl=2
 345    if (prtvol==-level.or.prtvol==-19) then
 346      ABI_ALLOCATE(gvnl1_saved,(2,npw1*nspinor))
 347      gvnl1_saved(:,:) = gvnl1(:,:)
 348    end if
 349    call getgh1c(berryopt,cwave0,cwaveprj0,gh1c,gberry,gs1c,gs_hamkq,gvnl1,idir,ipert,eshift,&
 350 &   mpi_enreg,optlocal,optnl,opt_gvnl1,rf_hamkq,sij_opt,tim_getgh1c,usevnl)
 351 
 352    if (gen_eigenpb) then
 353      if (ipert/=natom+2) then  ! S^(1) is zero for ipert=natom+2
 354 !$OMP PARALLEL
 355 !$OMP DO
 356        do ipw=1,npw1*nspinor
 357          gh1c (1:2,ipw)=gh1c (1:2,ipw)-eshift*gs1c(1:2,ipw)
 358        end do
 359 !$OMP END DO NOWAIT
 360        if (opt_gvnl1/=1) then
 361 !$OMP DO
 362          do ipw=1,npw1*nspinor
 363            gvnl1(1:2,ipw)=gvnl1(1:2,ipw)-eshift*gs1c(1:2,ipw)
 364          end do
 365 !$OMP END DO NOWAIT
 366        end if
 367 !$OMP END PARALLEL
 368      end if
 369 
 370 !    If generalized eigenPb and dcwavef requested, compute it:
 371 !    dcwavef is delta_Psi(1)=-1/2.Sum_{j}[<C0_k+q_j|S(1)|C0_k_i>.|C0_k+q_j>]
 372 !    see PRB 78, 035105 (2008), Eq. (42)
 373      if (usedcwavef==2) then
 374        call getdc1(cgq,cprj_dummy,dcwavef,cprj_dummy,0,icgq,istwf_k,mcgq,0,&
 375 &       mpi_enreg,natom,nband,npw1,nspinor,0,gs1c)
 376      end if
 377    end if
 378 
 379  else ! 2nd order case (wrt k perturbation)
 380    ! Copy RHS_Stern(:,:) of the given band in gh1c
 381    gh1c(:,:)=rf2%RHS_Stern(:,1+shift_band:npw1*nspinor+shift_band)
 382  end if
 383 
 384 !Check that Pc^*.(H^(0)-E.S^(0)).delta_Psi^(1) is zero ! This is a consequence of P_c delta_Psi^(1) = 0
 385  if (prtvol==-level.and.usedcwavef==2) then
 386    ABI_ALLOCATE(cwwork,(2,npw1*nspinor))
 387    cwwork=dcwavef
 388 !  - Apply H^(0)-E.S^(0) to delta_Psi^(1)
 389    sij_opt=0;if (gen_eigenpb) sij_opt=-1
 390    cpopt=-1
 391    ABI_ALLOCATE(work,(2,npw1*nspinor))
 392    ABI_ALLOCATE(work1,(2,npw1*nspinor*((sij_opt+1)/2)))
 393    ABI_ALLOCATE(work2,(2,npw1*nspinor))
 394    call getghc(cpopt,cwwork,conjgrprj,work,work1,gs_hamkq,work2,eshift,mpi_enreg,&
 395 &   1,prtvol,sij_opt,tim_getghc,0,select_k=KPRIME_H_KPRIME)
 396    cwwork=work
 397    ABI_DEALLOCATE(work)
 398    ABI_DEALLOCATE(work1)
 399    ABI_DEALLOCATE(work2)
 400 !  -Apply Pc^*
 401    call projbd(gscq,cwwork,-1,igscq,icgq,istwf_k,mgscq,mcgq,nband,npw1,nspinor,&
 402 &   cgq,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 403    call sqnorm_g(dotr,istwf_k,npw1*nspinor,cwwork,me_g0,comm_fft)
 404    ABI_DEALLOCATE(cwwork)
 405    if(sqrt(dotr)>tol12) then
 406      write(msg,'(a,i3,a,es22.15)') 'CGWF3_WARNING : |Pc^*.(H^(0)-E.S^(0)).delta_Psi^(1)| (band ',band,')=',sqrt(dotr)
 407      call wrtout(std_out,msg,'PERS')
 408    end if
 409  end if
 410 
 411  call cg_zcopy(npw1*nspinor,gh1c,gh1c_n)
 412 
 413 !Projecting out all bands (this could be avoided)
 414 !Note the subtlety:
 415 !-For the generalized eigenPb, S|cgq> is used in place of |cgq>,
 416 !in order to apply P_c+ projector (see PRB 73, 235101 (2006), Eq. (71), (72))
 417  if(gen_eigenpb)then
 418    call projbd(gscq,gh1c,-1,igscq,icgq,istwf_k,mgscq,mcgq,nband,npw1,nspinor,&
 419 &   cgq,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 420  else
 421    call projbd(cgq,gh1c,-1,icgq,0,istwf_k,mcgq,mgscq,nband,npw1,nspinor,&
 422 &   dummy,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 423  end if
 424 
 425  if(ipert/=natom+10.and.ipert/=natom+11) then
 426  ! For ipert=natom+10 or natom+11, this is done in rf2_init
 427 
 428    !The array eig1_k contains:
 429    !<u_(iband,k+q)^(0)|H_(k+q,k)^(1)|u_(jband,k)^(0)>                           (NC psps)
 430    !or <u_(iband,k+q)^(0)|H_(k+q,k)^(1)-(eig0_k+eig0_k+q)/2.S^(1)|u_(jband,k)^(0)> (PAW)
 431    jband=(band-1)*2*nband
 432    if (gen_eigenpb) then
 433      indx_cgq=icgq
 434      do iband=1,nband
 435        eshiftkq=half*(eig0_kq(iband)-eig0nk)
 436        call dotprod_g(dotr,doti,istwf_k,npw1*nspinor,2,cgq(:,indx_cgq+1:indx_cgq+npw1*nspinor),gs1c,&
 437 &       me_g0,mpi_enreg%comm_spinorfft)
 438        eig1_k(2*iband-1+jband)=scprod(1,iband)-eshiftkq*dotr
 439        eig1_k(2*iband  +jband)=scprod(2,iband)-eshiftkq*doti
 440        indx_cgq=indx_cgq+npw1*nspinor
 441      end do
 442    else
 443      do iband=1,nband
 444        eig1_k(2*iband-1+jband)=scprod(1,iband)
 445        eig1_k(2*iband  +jband)=scprod(2,iband)
 446      end do
 447    end if
 448 
 449   !No more need of gs1c
 450    ABI_DEALLOCATE(gs1c)
 451 
 452  end if
 453 
 454 !Filter the wavefunctions for large modified kinetic energy (see routine mkkin.f)
 455  do ispinor=1,nspinor
 456    ipws=(ispinor-1)*npw1
 457 !$OMP PARALLEL DO PRIVATE(ipw) SHARED(cwavef,kinpw1,ipws,npw1)
 458    do ipw=1+ipws,npw1+ipws
 459      if(kinpw1(ipw-ipws)>huge(zero)*1.d-11)then
 460        cwavef(1:2,ipw)=zero
 461      end if
 462    end do
 463  end do
 464 
 465 !Apply the orthogonality condition: <C1 k,q|C0 k+q>=0 (NCPP) or <C1 k,q|S0|C0 k+q>=0 (PAW)
 466 !Project out all bands from cwavef, i.e. apply P_c projector on cwavef
 467 !(this is needed when there are some partially or unoccupied states)
 468  call projbd(cgq,cwavef,-1,icgq,igscq,istwf_k,mcgq,mgscq,nband,npw1,nspinor,&
 469 & gscq,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 470 
 471  if(ipert/=natom+10.and.ipert/=natom+11) then
 472 !    If PAW, the orthogonality condition is
 473 !    <C1 k,q|S0|C0 k+q>+1/2<C0 k|S1|C0 k+q>=0
 474    if (usepaw==1.and.usedcwavef>0) then
 475 !$OMP PARALLEL DO
 476      do ipw=1,npw1*nspinor
 477        cwavef(1:2,ipw)=cwavef(1:2,ipw)+dcwavef(1:2,ipw)
 478      end do
 479    end if
 480  else ! In 2nd order case, dcwavef/=0 even in NC, and it is already computed in rf2_init (called in dfpt_vtowfk.F90)
 481    do ipw=1,npw1*nspinor
 482      cwavef(:,ipw)=cwavef(:,ipw)+rf2%dcwavef(:,ipw+shift_band)
 483    end do
 484  end if
 485 
 486 !Treat the case of buffer bands
 487  if(band>max(1,nband-nbdbuf))then
 488    cwavef=zero
 489    ghc   =zero
 490    gvnlc =zero
 491    if (gen_eigenpb) gsc=zero
 492    if (usedcwavef==2) dcwavef=zero
 493    if (usepaw==1) then
 494      call pawcprj_set_zero(cwaveprj)
 495    end if
 496    if (usedcwavef==2) dcwavef=zero
 497 !  A small negative residual will be associated with these
 498    resid=-0.1_dp
 499 !  Number of one-way 3D ffts skipped
 500    nskip=nskip+nline
 501 
 502  else
 503 !  If not a buffer band, perform the optimisation
 504 
 505    ABI_ALLOCATE(conjgr,(2,npw1*nspinor))
 506    ABI_ALLOCATE(direc,(2,npw1*nspinor))
 507    ABI_ALLOCATE(gresid,(2,npw1*nspinor))
 508    ABI_ALLOCATE(cwaveq,(2,npw1*nspinor))
 509    if (usepaw==1) then
 510      ABI_DATATYPE_ALLOCATE(conjgrprj,(natom,nspinor))
 511      call pawcprj_alloc(conjgrprj,0,gs_hamkq%dimcprj)
 512    else
 513      ABI_DATATYPE_ALLOCATE(conjgrprj,(0,0))
 514    end if
 515 
 516    cwaveq(:,:)=cgq(:,1+npw1*nspinor*(band-1)+icgq:npw1*nspinor*band+icgq)
 517    dotgp=one
 518 
 519 !  Here apply H(0) at k+q to input orthogonalized 1st-order wfs
 520    sij_opt=0;if (gen_eigenpb) sij_opt=1
 521    cpopt=-1+usepaw
 522    call getghc(cpopt,cwavef,cwaveprj,ghc,gsc,gs_hamkq,gvnlc,eshift,mpi_enreg,1,&
 523 &   prtvol,sij_opt,tim_getghc,0,select_k=KPRIME_H_KPRIME)
 524 
 525 !  ghc also includes the eigenvalue shift
 526    if (gen_eigenpb) then
 527      call cg_zaxpy(npw1*nspinor,(/-eshift,zero/),gsc,ghc)
 528    else
 529      call cg_zaxpy(npw1*nspinor,(/-eshift,zero/),cwavef,ghc)
 530    end if
 531 
 532 !  Initialize resid, in case of nline==0
 533    resid=zero
 534 
 535 !  ======================================================================
 536 !  ====== BEGIN LOOP FOR A GIVEN BAND: MINIMIZATION ITERATIONS ==========
 537 !  ======================================================================
 538    do iline=1,nline
 539 
 540 !    ======================================================================
 541 !    ================= COMPUTE THE RESIDUAL ===============================
 542 !    ======================================================================
 543 !    Note that gresid (=steepest-descent vector, Eq.(26) of PRB 55, 10337 (1996))
 544 !    is precomputed to garantee cancellation of errors
 545 !    and allow residuals to reach values as small as 1.0d-24 or better.
 546      if (berryopt== 4.or.berryopt== 6.or.berryopt== 7.or.&
 547 &     berryopt==14.or.berryopt==16.or.berryopt==17) then
 548        if (ipert==natom+2) then
 549          if (opt_gvnl1/=1) gvnl1=zero
 550 !$OMP PARALLEL DO
 551          do ipw=1,npw1*nspinor
 552            gresid(1:2,ipw)=-ghc(1:2,ipw)-gh1c(1:2,ipw)
 553          end do
 554        else
 555 !$OMP PARALLEL DO
 556          do ipw=1,npw1*nspinor
 557            gresid(1,ipw)=-ghc(1,ipw)-gh1c(1,ipw)+gberry(2,ipw)
 558            gresid(2,ipw)=-ghc(2,ipw)-gh1c(2,ipw)-gberry(1,ipw)
 559          end do
 560        end if
 561      else
 562 !$OMP PARALLEL DO
 563        do ipw=1,npw1*nspinor
 564          gresid(1:2,ipw)=-ghc(1:2,ipw)-gh1c(1:2,ipw)
 565        end do
 566      end if
 567 
 568 !    ======================================================================
 569 !    =========== PROJECT THE STEEPEST DESCENT DIRECTION ===================
 570 !    ========= OVER THE SUBSPACE ORTHOGONAL TO OTHER BANDS ================
 571 !    ======================================================================
 572 !    Project all bands from gresid into direc:
 573 !    The following projection over the subspace orthogonal to occupied bands
 574 !    is not optional in the RF case, unlike the GS case.
 575 !    However, the order of operations could be changed, so that
 576 !    as to make it only applied at the beginning, to H(1) psi(0),
 577 !    so, THIS IS TO BE REEXAMINED
 578 !    Note the subtlety:
 579 !    -For the generalized eigenPb, S|cgq> is used in place of |cgq>,
 580 !    in order to apply P_c+ projector (see PRB 73, 235101 (2006), Eq. (71), (72)
 581      if (gen_eigenpb) then
 582        call projbd(gscq,gresid,-1,igscq,icgq,istwf_k,mgscq,mcgq,nband,npw1,nspinor,&
 583 &       cgq,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 584      else
 585        call projbd(cgq,gresid,-1,icgq,0,istwf_k,mcgq,mgscq,nband,npw1,nspinor,&
 586 &       dummy,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 587      end if
 588 
 589      call cg_zcopy(npw1*nspinor,gresid,direc)
 590 
 591 !    ======================================================================
 592 !    ============== CHECK FOR CONVERGENCE CRITERIA ========================
 593 !    ======================================================================
 594 
 595 !    Compute second-order derivative of the energy using a variational expression
 596      call dotprod_g(prod1,doti,istwf_k,npw1*nspinor,1,cwavef,gresid,me_g0,mpi_enreg%comm_spinorfft)
 597      call dotprod_g(prod2,doti,istwf_k,npw1*nspinor,1,cwavef,gh1c,me_g0,mpi_enreg%comm_spinorfft)
 598      d2te=two*(-prod1+prod2)
 599 
 600 !    DEBUG Keep this debugging feature !
 601 !     write(msg,'(a,f14.6,a,f14.6)') 'prod1 = ',prod1,' prod2 = ',prod2
 602 !     call wrtout(std_out,msg,'PERS')
 603 !    ENDDEBUG
 604 
 605 !    Compute <u_m(1)|H(0)-e_m(0)|u_m(1)>
 606 !    (<u_m(1)|H(0)-e_m(0).S|u_m(1)> if gen. eigenPb),
 607 !    that should be positive,
 608 !    except when the eigenvalue eig_mk(0) is higher than
 609 !    the lowest non-treated eig_mk+q(0). For insulators, this
 610 !    has no influence, but for metallic occupations,
 611 !    the conjugate gradient algorithm breaks down. The solution adopted here
 612 !    is very crude, and rely upon the fact that occupancies of such
 613 !    levels should be smaller and smaller with increasing nband, so that
 614 !    a convergence study will give the right result.
 615 !    The same trick is also used later.
 616      u1h0me0u1=-prod1-prod2
 617 !    Some tolerance is allowed, to account for very small numerical inaccuracies and cancellations.
 618      if(u1h0me0u1<-tol_restart)then
 619        if (prtvol==-level.or.prtvol==-19) then
 620          write(msg,'(a,es22.13e3)') '  cgwf3: u1h0me0u1 = ',u1h0me0u1
 621          call wrtout(std_out,msg,'PERS')
 622        end if
 623        cwavef =zero
 624        ghc    =zero
 625        gvnlc  =zero
 626        if (gen_eigenpb) gsc(:,:)=zero
 627        if (usepaw==1) then
 628          call pawcprj_set_zero(cwaveprj)
 629        end if
 630 !      A negative residual will be the signal of this problem ...
 631        resid=-one
 632        call wrtout(std_out,' dfpt_cgwf: problem of minimisation (likely metallic), set resid to -1','PERS')
 633 !      Number of one-way 3D ffts skipped
 634        nskip=nskip+(nline-iline+1)
 635 !      Exit from the loop on iline
 636        exit
 637      end if
 638 
 639 !    Compute residual (squared) norm
 640      call sqnorm_g(resid,istwf_k,npw1*nspinor,gresid,me_g0,comm_fft)
 641      if (prtvol==-level.or.prtvol==-19)then
 642        write(msg,'(a,a,i3,f14.6,a,a,4es12.4)') ch10,&
 643 &       ' dfpt_cgwf : iline,eshift     =',iline,eshift,ch10,&
 644 &       '         resid,prod1,prod2,d2te=',resid,prod1,prod2,d2te
 645        call wrtout(std_out,msg,'PERS')
 646      end if
 647 
 648 !    If residual sufficiently small stop line minimizations
 649      if (resid<tolwfr) then
 650        if(prtvol>=10)then
 651          write(msg,'(a,i4,a,i2,a,es12.4)')' dfpt_cgwf: band',band,' converged after ',iline,' line minimizations: resid = ',resid
 652          call wrtout(std_out,msg,'PERS')
 653        end if
 654        nskip=nskip+(nline-iline+1)  ! Number of two-way 3D ffts skipped
 655        exit                         ! Exit from the loop on iline
 656      end if
 657 
 658 !    If user require exiting the job, stop line minimisations
 659      if (quit==1) then
 660        write(msg,'(a,i0)')' dfpt_cgwf: user require exiting => skip update of band ',band
 661        call wrtout(std_out,msg,'PERS')
 662        nskip=nskip+(nline-iline+1)  ! Number of two-way 3D ffts skipped
 663        exit                         ! Exit from the loop on iline
 664      end if
 665 
 666 !    Check that d2te is decreasing on succeeding lines:
 667      if (iline/=1) then
 668        !if (d2te>d2teold+tol12) then
 669        if (d2te>d2teold+tol6) then
 670          write(msg,'(a,i0,a,e14.6,a,e14.6)')'New trial energy at line ',iline,'=',d2te,'is higher than former:',d2teold
 671          MSG_WARNING(msg)
 672        end if
 673      end if
 674      d2teold=d2te
 675 
 676 !    DEBUG Keep this debugging feature !
 677 !    call sqnorm_g(dotr,istwf_k,npw1*nspinor,direc,me_g0,comm_fft)
 678 !    write(std_out,*)' dfpt_cgwf : before precon, direc**2=',dotr
 679 !    if (gen_eigenpb) then
 680 !    call dotprod_g(dotr,doti,istwf_k,npw1*nspinor,1,cwaveq,&
 681 !    &                 gscq(:,1+npw1*nspinor*(band-1)+igscq:npw1*nspinor*band+igscq),me_g0,mpi_enreg%comm_spinorfft)
 682 !    else
 683 !    call sqnorm_g(dotr,istwf_k,npw1*nspinor,cwaveq,me_g0,comm_fft)
 684 !    end if
 685 !    write(std_out,*)' dfpt_cgwf : before precon, cwaveq**2=',dotr
 686 !    ENDDEBUG
 687 
 688 !    ======================================================================
 689 !    ======== PRECONDITION THE STEEPEST DESCENT DIRECTION =================
 690 !    ======================================================================
 691 
 692 !    If wfoptalg>=10, the precondition matrix is kept constant
 693 !    during iteration ; otherwise it is recomputed
 694      if (wfoptalg<10.or.iline==1) then
 695        call cg_precon(cwaveq,zero,istwf_k,kinpw1,npw1,nspinor,me_g0,0,pcon,direc,mpi_enreg%comm_fft)
 696      else
 697        do ispinor=1,nspinor
 698          igs=(ispinor-1)*npw1
 699 !$OMP PARALLEL DO PRIVATE(ipw) SHARED(igs,npw,direc,pcon)
 700          do ipw=1+igs,npw1+igs
 701            direc(1:2,ipw)=direc(1:2,ipw)*pcon(ipw-igs)
 702          end do
 703        end do
 704      end if
 705 
 706 !    DEBUG Keep this debugging feature !
 707 !    call sqnorm_g(dotr,istwf_k,npw1*nspinor,direc,me_g0,comm_fft)
 708 !    write(std_out,*)' dfpt_cgwf : after precon, direc**2=',dotr
 709 !    ENDDEBUG
 710 
 711 !    ======================================================================
 712 !    ======= PROJECT THE PRECOND. STEEPEST DESCENT DIRECTION ==============
 713 !    ========= OVER THE SUBSPACE ORTHOGONAL TO OTHER BANDS ================
 714 !    ======================================================================
 715 
 716 !    Projecting again out all bands:
 717 !    -For the simple eigenPb, gscq is used as dummy argument
 718      call projbd(cgq,direc,-1,icgq,igscq,istwf_k,mcgq,mgscq,nband,npw1,nspinor,&
 719 &     gscq,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 720 
 721 !    DEBUG Keep this debugging feature !
 722 !    call sqnorm_g(dotr,istwf_k,npw1*nspinor,direc,me_g0,comm_fft)
 723 !    write(std_out,*)' dfpt_cgwf : after projbd, direc**2=',dotr
 724 !    ENDDEBUG
 725 
 726 !    ======================================================================
 727 !    ================= COMPUTE THE CONJUGATE-GRADIENT =====================
 728 !    ======================================================================
 729 
 730 !    get dot of direction vector with residual vector
 731      call dotprod_g(dotgg,doti,istwf_k,npw1*nspinor,1,direc,gresid,me_g0,mpi_enreg%comm_spinorfft)
 732 
 733 !    At first iteration, gamma is set to zero
 734      if (iline==1) then
 735        gamma=zero
 736        dotgp=dotgg
 737        call cg_zcopy(npw1*nspinor,direc,conjgr)
 738      else
 739 !      At next iterations, h = g + gamma * h
 740        gamma=dotgg/dotgp
 741        dotgp=dotgg
 742        if (prtvol==-level.or.prtvol==-19)then
 743          write(msg,'(a,2es16.6)') 'dfpt_cgwf: dotgg,gamma = ',dotgg,gamma
 744          call wrtout(std_out,msg,'PERS')
 745        end if
 746 !$OMP PARALLEL DO
 747        do ipw=1,npw1*nspinor
 748          conjgr(1:2,ipw)=direc(1:2,ipw)+gamma*conjgr(1:2,ipw)
 749        end do
 750        if (prtvol==-level.or.prtvol==-19)then
 751          call wrtout(std_out,'dfpt_cgwf: conjugate direction has been found','PERS')
 752        end if
 753      end if
 754 
 755 !    ======================================================================
 756 !    ===== COMPUTE CONTRIBUTIONS TO 1ST AND 2ND DERIVATIVES OF ENERGY =====
 757 !    ======================================================================
 758 !    ...along the search direction
 759 
 760 !    Compute dedt, Eq.(29) of of PRB55, 10337 (1997),
 761 !    with an additional factor of 2 for the difference
 762 !    between E(2) and the 2DTE
 763      call dotprod_g(dedt,doti,istwf_k,npw1*nspinor,1,conjgr,gresid,me_g0,mpi_enreg%comm_spinorfft)
 764      dedt=-two*two*dedt
 765      if((prtvol==-level.or.prtvol==-19.or.prtvol==-20).and.dedt-tol14>0) then
 766        write(msg,'(a)') '  CGWF3_WARNING : dedt>0'
 767        call wrtout(std_out,msg,'PERS')
 768      end if
 769      ABI_ALLOCATE(gvnl_direc,(2,npw1*nspinor))
 770      ABI_ALLOCATE(gh_direc,(2,npw1*nspinor))
 771      if (gen_eigenpb)  then
 772        ABI_ALLOCATE(sconjgr,(2,npw1*nspinor))
 773      else
 774        ABI_ALLOCATE(sconjgr,(0,0))
 775      end if
 776      sij_opt=0;if (gen_eigenpb) sij_opt=1
 777      cpopt=-1+usepaw
 778      call getghc(cpopt,conjgr,conjgrprj,gh_direc,sconjgr,gs_hamkq,gvnl_direc,&
 779 &     eshift,mpi_enreg,1,prtvol,sij_opt,tim_getghc,0,select_k=KPRIME_H_KPRIME)
 780 
 781 !    ghc also includes the eigenvalue shift
 782      if (gen_eigenpb) then
 783 !$OMP PARALLEL DO
 784        do ipw=1,npw1*nspinor
 785          gh_direc(1:2,ipw)=gh_direc(1:2,ipw)-eshift*sconjgr(1:2,ipw)
 786        end do
 787      else
 788 !$OMP PARALLEL DO
 789        do ipw=1,npw1*nspinor
 790          gh_direc(1:2,ipw)=gh_direc(1:2,ipw)-eshift*conjgr(1:2,ipw)
 791        end do
 792      end if
 793 
 794 !    compute d2edt2, Eq.(30) of of PRB55, 10337 (1997),
 795 !    with an additional factor of 2 for the difference
 796 !    between E(2) and the 2DTE, and neglect of local fields (SC terms)
 797      call dotprod_g(d2edt2,doti,istwf_k,npw1*nspinor,1,conjgr,gh_direc,me_g0,mpi_enreg%comm_spinorfft)
 798      d2edt2=two*two*d2edt2
 799      if(prtvol==-level.or.prtvol==-19)then
 800        write(msg,'(a,2es14.6)') 'dfpt_cgwf: dedt,d2edt2=',dedt,d2edt2
 801        call wrtout(std_out,msg,'PERS')
 802      end if
 803 
 804 !    ======================================================================
 805 !    ======= COMPUTE MIXING FACTOR - CHECK FOR CONVERGENCE ===============
 806 !    ======================================================================
 807 
 808 !    see Eq.(31) of PRB55, 10337 (1997)
 809 !
 810      if(d2edt2<-tol_restart)then
 811 !      This may happen when the eigenvalue eig_mk(0) is higher than
 812 !      the lowest non-treated eig_mk+q(0). The solution adopted here
 813 !      is very crude, and rely upon the fact that occupancies of such
 814 !      levels should be smaller and smaller with increasing nband, so that
 815 !      a convergence study will give the right result.
 816        theta=zero
 817 
 818        cwavef=zero
 819        ghc   =zero
 820        gvnlc =zero
 821        if (gen_eigenpb) gsc=zero
 822        if (usepaw==1) then
 823          call pawcprj_set_zero(cwaveprj)
 824        end if
 825 !      A negative residual will be the signal of this problem ...
 826        resid=-two
 827        call wrtout(std_out,' dfpt_cgwf: problem of minimisation (likely metallic), set resid to -2',"PERS")
 828      else
 829 !      Here, the value of theta that gives the minimum
 830        theta=-dedt/d2edt2
 831 !      DEBUG
 832 !      write(std_out,*)' dfpt_cgwf: dedt,d2edt2=',dedt,d2edt2
 833 !      ENDDEBUG
 834      end if
 835 
 836 !    Check that result is above machine precision
 837      if (one+theta==one) then
 838        write(msg, '(a,es16.4)' ) ' dfpt_cgwf: converged with theta=',theta
 839        call wrtout(std_out,msg,'PERS')
 840        nskip=nskip+2*(nline-iline) ! Number of one-way 3D ffts skipped
 841        exit                        ! Exit from the loop on iline
 842      end if
 843 
 844 !    ======================================================================
 845 !    ================ GENERATE NEW |wf>, H|wf>, Vnl|Wf ... ================
 846 !    ======================================================================
 847 
 848      call cg_zaxpy(npw1*nspinor,(/theta,zero/),conjgr,cwavef)
 849      call cg_zaxpy(npw1*nspinor,(/theta,zero/),gh_direc,ghc)
 850      call cg_zaxpy(npw1*nspinor,(/theta,zero/),gvnl_direc,gvnlc)
 851 
 852      if (gen_eigenpb) then
 853        call cg_zaxpy(npw1*nspinor,(/theta,zero/),sconjgr,gsc)
 854      end if
 855      if (usepaw==1) then
 856        call pawcprj_axpby(theta,one,conjgrprj,cwaveprj)
 857      end if
 858 
 859      ABI_DEALLOCATE(gh_direc)
 860      ABI_DEALLOCATE(gvnl_direc)
 861      ABI_DEALLOCATE(sconjgr)
 862 
 863 !    ======================================================================
 864 !    =========== CHECK CONVERGENCE AGAINST TRIAL ENERGY ===================
 865 !    ======================================================================
 866 
 867      if(usetolrde/=0) then
 868 !      Check reduction in trial energy deltae, Eq.(28) of PRB55, 10337 (1997)
 869        deltae=half*d2edt2*theta**2+theta*dedt
 870 
 871        if (iline==1) then
 872          deold=deltae
 873 !        The extra factor of two should be removed !
 874        else if (abs(deltae)<tolrde*two*abs(deold) .and. iline/=nline) then
 875          if(prtvol>=10.or.prtvol==-level.or.prtvol==-19)then
 876            write(msg, '(a,i4,1x,a,1p,e12.4,a,e12.4,a)' ) &
 877 &           ' dfpt_cgwf: line',iline,' deltae=',deltae,' < tolrde*',deold,' =>skip lines'
 878            call wrtout(std_out,msg,'PERS')
 879          end if
 880          nskip=nskip+2*(nline-iline) ! Number of one-way 3D ffts skipped
 881          exit                        ! Exit from the loop on iline
 882        end if
 883      end if
 884 
 885 !    ======================================================================
 886 !    ================== END LOOP FOR GIVEN BAND ===========================
 887 !    ======================================================================
 888 
 889 !    Note that there are three "exit" instruction inside the loop.
 890      nlines_done = nlines_done + 1
 891    end do ! iline
 892 
 893 !  Check that final cwavef (Psi^(1)) satisfies the orthogonality condition
 894    if (prtvol==-level.or.prtvol==-19) then
 895      sij_opt=0 ; usevnl=1 ; optlocal=1 ; optnl=2 ; if (gen_eigenpb)  sij_opt=1
 896      ABI_ALLOCATE(work,(2,npw1*nspinor))
 897      ABI_ALLOCATE(work1,(2,npw1*nspinor))
 898      ABI_ALLOCATE(work2,(2,npw1*nspinor*sij_opt))
 899      do iband=1,nband
 900        if (gen_eigenpb) then
 901          work(:,:)=gscq(:,1+npw1*nspinor*(iband-1)+igscq:npw1*nspinor*iband+igscq)
 902        else
 903          work(:,:)=cgq(:,1+npw1*nspinor*(iband-1)+icgq:npw1*nspinor*iband+icgq)
 904        end if
 905        ! Compute : <Psi^(0)_i,k+q|Psi^(1)_j,k,q>
 906        call dotprod_g(prod1,prod2,istwf_k,npw1*nspinor,2,work,cwavef,me_g0,mpi_enreg%comm_spinorfft)
 907 
 908        if (ipert/=natom+10.and.ipert/=natom+11) then
 909          if (gen_eigenpb) then
 910            call getgh1c(berryopt,cwave0,cwaveprj0,work1,gberry,work2,gs_hamkq,gvnl1_saved,idir,ipert,eshift,&
 911 &           mpi_enreg,optlocal,optnl,opt_gvnl1,rf_hamkq,sij_opt,tim_getgh1c,usevnl)
 912            work(:,:)=cgq(:,1+npw1*nspinor*(iband-1)+icgq:npw1*nspinor*iband+icgq)
 913            call dotprod_g(dotr,doti,istwf_k,npw1*nspinor,2,work,work2,me_g0,mpi_enreg%comm_spinorfft)
 914          else
 915            dotr=zero; doti=zero
 916          end if
 917          dotr=prod1+half*dotr
 918          doti=prod2+half*doti
 919        else if(prtvol==-19) then ! 2nd order case
 920          dotr=prod1+half*rf2%amn(1,iband+(band-1)*nband)
 921          doti=prod2+half*rf2%amn(2,iband+(band-1)*nband)
 922        else
 923          write(msg,'(a)') 'CGWF3_WARNING : Use prtvol=-19 to test orthogonality for ipert=natom+10 or +11'
 924          call wrtout(std_out,msg,'COLL')
 925        end if
 926        dotr=sqrt(dotr**2+doti**2)
 927        if(dotr>tol10) then
 928 !         if (gen_eigenpb) then
 929 !           write(msg,'(2a,i3,a,2es22.15)') 'CGWF3_WARNING : <Psi^(1)_i,k,q|S^(0)|Psi^(0)_j,k+q>',&
 930 !&           '+ 1/2<Psi^(0)_i,k|S^(1)|Psi^(0)_j,k+q>, for j= ',iband,' is ',dotr,doti
 931 !         else
 932          write(msg,'(a,i3,a,es22.15)') 'CGWF3_WARNING : |<Psi^(0)_i,k+q|Psi^(1)_j,k,q>+amn(i,j)/2|, for j= ',iband,' is ',dotr
 933 !         end if
 934          call wrtout(std_out,msg,'PERS')
 935        end if
 936      end do
 937      ABI_DEALLOCATE(work)
 938      ABI_DEALLOCATE(work1)
 939      ABI_DEALLOCATE(work2)
 940      if (ipert/=natom+10.and.ipert/=natom+11) then
 941        ABI_DEALLOCATE(gvnl1_saved)
 942      end if
 943    end if
 944 
 945 !  Check that final cwavef Psi^(1) is Pc.Psi^(1)+delta_Psi^(1)
 946    if (prtvol==-level.or.prtvol==-19)then
 947      ABI_ALLOCATE(cwwork,(2,npw1*nspinor))
 948      cwwork=cwavef
 949 !    -Apply Pc to Psi^(1)
 950      if (gen_eigenpb) then
 951        call projbd(cgq,cwwork,-1,icgq,igscq,istwf_k,mcgq,mgscq,nband,npw1,nspinor,&
 952 &       gscq,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 953      else
 954        call projbd(cgq,cwwork,-1,icgq,0,istwf_k,mcgq,mgscq,nband,npw1,nspinor,&
 955 &       dummy,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 956      end if
 957 !    -Add delta_Psi^(1)
 958      if (usedcwavef>0) cwwork=cwwork+dcwavef
 959      if(ipert==natom+10.or.ipert==natom+11) cwwork=cwwork+rf2%dcwavef(:,1+shift_band:npw1*nspinor+shift_band)
 960 !    -Compare to Psi^(1)
 961      cwwork=cwwork-cwavef
 962      call sqnorm_g(dotr,istwf_k,npw1*nspinor,cwwork,me_g0,comm_fft)
 963      ABI_DEALLOCATE(cwwork)
 964      if(sqrt(dotr)>tol10) then
 965 !       if (gen_eigenpb) then
 966 !         write(msg,'(a,i3,a,es22.15)') &
 967 !&         'CGWF3_WARNING : |(Pc.Psi^(1)_i,k,q + delta_Psi^(1)_i,k) - Psi^(1)_i,k,q|^2 (band ',band,')=',dotr
 968 !       else
 969        write(msg,'(a,es22.15)') 'CGWF3_WARNING : |(Pc.Psi^(1)_i,k,q + delta_Psi^(1)_i,k) - Psi^(1)_i,k,q| = ',sqrt(dotr)
 970 !       end if
 971        call wrtout(std_out,msg,'PERS')
 972      end if
 973    end if
 974 
 975 !  Check that final cwavef (Psi^(1)) solves the Sternheimer equation
 976    if(prtvol==-level.or.prtvol==-19.or.prtvol==-20)then
 977      ABI_ALLOCATE(cwwork,(2,npw1*nspinor))
 978      cwwork=cwavef
 979 !    -Apply Pc to Psi^(1)
 980      if (gen_eigenpb) then
 981        call projbd(cgq,cwwork,-1,icgq,igscq,istwf_k,mcgq,mgscq,nband,npw1,nspinor,&
 982 &       gscq,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 983      else
 984        call projbd(cgq,cwwork,-1,icgq,0,istwf_k,mcgq,mgscq,nband,npw1,nspinor,&
 985 &       dummy,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
 986      end if
 987 !    - Apply H^(0)-E.S^(0)
 988      sij_opt=0;if (gen_eigenpb) sij_opt=1
 989      cpopt=-1
 990      ABI_ALLOCATE(work,(2,npw1*nspinor))
 991      ABI_ALLOCATE(work1,(2,npw1*nspinor*((sij_opt+1)/2)))
 992      ABI_ALLOCATE(work2,(2,npw1*nspinor))
 993      call getghc(cpopt,cwwork,conjgrprj,work,work1,gs_hamkq,work2,eshift,&
 994 &     mpi_enreg,1,prtvol,sij_opt,tim_getghc,0,select_k=KPRIME_H_KPRIME)
 995      if (gen_eigenpb) then
 996        cwwork=work-eshift*work1
 997      else
 998        cwwork=work-eshift*cwwork
 999      end if
1000      ABI_DEALLOCATE(work)
1001      ABI_DEALLOCATE(work1)
1002      ABI_DEALLOCATE(work2)
1003 !  The following is not mandatory, as Pc has been already applied to Psi^(1)
1004 !  and Pc^* H^(0) Pc = Pc^* H^(0) = H^(0) Pc (same for S^(0)).
1005 !  However, in PAW, to apply Pc^* here seems to reduce the numerical error
1006 !    -Apply Pc^*
1007      if (gen_eigenpb) then
1008        call projbd(gscq,cwwork,-1,igscq,icgq,istwf_k,mgscq,mcgq,nband,npw1,nspinor,&
1009 &       cgq,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
1010      else
1011        call projbd(cgq,cwwork,-1,icgq,0,istwf_k,mcgq,mgscq,nband,npw1,nspinor,&
1012 &       dummy,scprod,0,tim_projbd,useoverlap,me_g0,comm_fft)
1013      end if
1014 !    - Add Pc^*(H^(1)-E.S^(1)).Psi^(0)
1015      cwwork=cwwork+gh1c
1016      call sqnorm_g(dotr,istwf_k,npw1*nspinor,cwwork,me_g0,comm_fft)
1017      ABI_DEALLOCATE(cwwork)
1018      write(msg,'(a,i3,a,es22.15,2a,i4)') &
1019 &     '*** CGWF3 Sternheimer equation test for band ',band,'=',sqrt(dotr),ch10,&
1020 &     'It should go to zero for large nline : nlines_done = ',nlines_done
1021      call wrtout(std_out,msg,'PERS')
1022    end if
1023 
1024 !  Check that < Psi^(0) | ( H^(0)-eps^(0) S^(0) ) | Psi^(1) > is in agreement with eig^(1)
1025    if(prtvol==-level.or.prtvol==-19.or.prtvol==-20)then
1026      ABI_ALLOCATE(cwwork,(2,npw1*nspinor))
1027      cwwork=cwavef
1028 !    - Apply H^(0)-E.S^(0)
1029      sij_opt=0;if (gen_eigenpb) sij_opt=1
1030      cpopt=-1
1031      ABI_ALLOCATE(work,(2,npw1*nspinor))
1032      ABI_ALLOCATE(work1,(2,npw1*nspinor*((sij_opt+1)/2)))
1033      ABI_ALLOCATE(work2,(2,npw1*nspinor))
1034      call getghc(cpopt,cwwork,conjgrprj,work,work1,gs_hamkq,work2,eshift,&
1035 &     mpi_enreg,1,prtvol,sij_opt,tim_getghc,0,select_k=KPRIME_H_KPRIME)
1036      if (gen_eigenpb) then
1037        cwwork=work-eshift*work1
1038      else
1039        cwwork=work-eshift*cwwork
1040      end if
1041      ABI_DEALLOCATE(work1)
1042      ABI_DEALLOCATE(work2)
1043      cwwork=cwwork+gh1c_n
1044      jband=(band-1)*2*nband
1045      do iband=1,nband
1046        work(:,:)=cgq(:,1+npw1*nspinor*(iband-1)+icgq:npw1*nspinor*iband+icgq)
1047        call dotprod_g(dotr,doti,istwf_k,npw1*nspinor,2,work,cwwork,me_g0,mpi_enreg%comm_spinorfft)
1048        dotr = dotr - eig1_k(2*iband-1+jband)
1049        doti = doti - eig1_k(2*iband  +jband)
1050        dotr = sqrt(dotr**2+doti**2)
1051        if (dotr > tol8) then
1052          write(msg,'(2(a,i3),a,es22.15)') &
1053 &         'CGWF3_WARNING < Psi^(0) | ( H^(0)-eps^(0) S^(0) ) | Psi^(1) > for i=',iband,' j=',band,&
1054          ' : ',sqrt(dotr**2+doti**2)
1055          call wrtout(std_out,msg,'PERS')
1056        end if
1057      end do
1058 !     write(msg,'(a)') '< Psi^(0) | ( H^(0)-eps^(0) S^(0) ) | Psi^(1) > is done.'
1059 !     call wrtout(std_out,msg,'PERS')
1060      ABI_DEALLOCATE(work)
1061      ABI_DEALLOCATE(cwwork)
1062    end if
1063 
1064    if (allocated(gh_direc))  then
1065      ABI_DEALLOCATE(gh_direc)
1066    end if
1067    if (allocated(gvnl_direc))  then
1068      ABI_DEALLOCATE(gvnl_direc)
1069    end if
1070    ABI_DEALLOCATE(conjgr)
1071    ABI_DEALLOCATE(cwaveq)
1072    ABI_DEALLOCATE(direc)
1073    ABI_DEALLOCATE(gresid)
1074    if (usepaw==1) then
1075      call pawcprj_free(conjgrprj)
1076    end if
1077    ABI_DATATYPE_DEALLOCATE(conjgrprj)
1078 
1079  end if  ! End condition of not being a buffer band
1080 
1081 !At the end of the treatment of a set of bands, write the number of one-way 3D ffts skipped
1082  if (xmpi_paral==1 .and. band==nband .and. prtvol>=10) then
1083    write(msg,'(a,i0)')' dfpt_cgwf: number of one-way 3D ffts skipped in cgwf3 until now =',nskip
1084    call wrtout(std_out,msg,'PERS')
1085  end if
1086 
1087  ABI_DEALLOCATE(gh1c)
1088  ABI_DEALLOCATE(pcon)
1089  ABI_DEALLOCATE(scprod)
1090  ABI_DEALLOCATE(gberry)
1091 
1092  call timab(122,2,tsec)
1093 
1094  DBG_EXIT("COLL")
1095 
1096 end subroutine dfpt_cgwf