TABLE OF CONTENTS


ABINIT/m_rmm_diis [ Modules ]

[ Top ] [ Modules ]

NAME

  m_rmm_diis

FUNCTION

  This module contains routines for the RMM-DIIS eigenvalue solver.

COPYRIGHT

  Copyright (C) 2020-2024 ABINIT group (MG)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

16 #if defined HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include "abi_common.h"
21 
22 module m_rmm_diis
23 
24  use defs_basis
25  use m_errors
26  use m_xmpi
27  use m_abicore
28  use m_dtset
29  use m_cgtools
30  use m_hide_blas
31  use m_yaml
32  use m_linalg_interfaces
33  use m_prep_kgb
34 
35  use defs_abitypes,   only : mpi_type
36  use m_fstrings,      only : sjoin, itoa, ftoa
37  use m_time,          only : timab, cwtime, cwtime_report
38  use m_numeric_tools, only : pack_matrix, imin_loc, stats_t, stats_eval
39  use m_hide_lapack,   only : xhegv_cplex, xhesv_cplex
40  use m_pair_list,     only : pair_list
41  use m_pawcprj,       only : pawcprj_type, pawcprj_alloc, pawcprj_free
42  use m_fftcore,       only : fftcore_set_mixprec
43  use m_hamiltonian,   only : gs_hamiltonian_type
44  use m_getghc,        only : getghc
45  use m_nonlop,        only : nonlop
46  use m_cgtk,          only : cgtk_fixphase
47  use m_abi_linalg,    only : abi_zgemm_2r
48  !use m_fock,         only : fock_set_ieigen, fock_set_getghc_call
49 
50  implicit none
51 
52  private

ABINIT/rmm_diis [ Functions ]

[ Top ] [ Functions ]

NAME

 rmm_diis

FUNCTION

 This routine updates the wave functions at a given (k-point, spin), using the RMM-DIIS method.

INPUTS

  istep,ikpt,isppol=Iteration step, k-point index, spin index (mainly for printing purposes).
  dtset <type(dataset_type)>=all input variables for this dataset
  gs_hamk <type(gs_hamiltonian_type)>=all data for the hamiltonian at k
  kinpw(npw)=(modified) kinetic energy for each plane wave (hartree)
  mpi_enreg=information about MPI parallelization
  nband=number of bands at this k point for that spin polarization
  npw=number of plane waves at this k point
  my_nspinor=number of spinors treated by this MPI proc

OUTPUT

  eig(nband)=array for holding eigenvalues (hartree)
  If usepaw==1:
    gsc(2,*)=<g|s|c> matrix elements (s=overlap)
  If usepaw==0
    enlx(nband)=contribution from each band to nonlocal psp + potential Fock ACE part
                of total energy, at this k-point

SIDE EFFECTS

  cg(2,*)=updated wavefunctions
  resid(nband)=residuals for each states. In input: previous residuals for this k-point, spin.
   In output: new residuals.
  rmm_diis_status(2): Status of the eigensolver.
    The first entry gives the previous accuracy.
    The second entry gives the number of iterations already performed with this level.

SOURCE

163 subroutine rmm_diis(istep, ikpt, isppol, cg, dtset, eig, occ, enlx, gs_hamk, kinpw, gsc, &
164                     mpi_enreg, nband, npw, my_nspinor, resid, rmm_diis_status)
165 
166 !Arguments ------------------------------------
167  integer,intent(in) :: istep, ikpt, isppol, nband, npw, my_nspinor
168  type(gs_hamiltonian_type),intent(inout) :: gs_hamk
169  type(dataset_type),intent(in) :: dtset
170  type(mpi_type),intent(in) :: mpi_enreg
171  real(dp),target,intent(inout) :: cg(2,npw*my_nspinor*nband)
172  real(dp),target,intent(inout) :: gsc(2,npw*my_nspinor*nband*dtset%usepaw)
173  real(dp),intent(inout) :: enlx(nband), resid(nband)
174  real(dp),intent(in) :: occ(nband), kinpw(npw)
175  real(dp),intent(out) :: eig(nband)
176  integer,intent(inout) :: rmm_diis_status(2)
177 
178 !Local variables-------------------------------
179  integer,parameter :: type_calc0 = 0, option1 = 1, option2 = 2, tim_getghc = 0
180  integer,parameter :: choice1 = 1, signs1 = 1, signs2 = 2, tim_nonlop = 0, paw_opt0 = 0, paw_opt3 = 3
181  integer :: ierr, prtvol, bsize, nblocks, iblock, npwsp, ndat, ib_start, ib_stop, idat, paral_kgb !, ortalgo
182  integer :: cpopt, sij_opt, igs, ige, mcg, mgsc, istwf_k, optekin, usepaw, iter, max_niter, max_niter_block
183  integer :: me_g0, nb_pocc, jj, kk, accuracy_level, raise_acc, prev_mixprec, after_ortho, me_cell
184  integer :: comm_bsf, prev_accuracy_level, ncalls_with_prev_accuracy, signs, paw_opt, savemem
185  logical :: first_call, use_fft_mixprec, has_fock
186  real(dp),parameter :: rdummy = zero
187  real(dp) :: accuracy_ene,  max_res_pocc, tol_occupied, lock_tolwfr !, max_absimag
188  real(dp) :: cpu, wall, gflops, cpu_all, wall_all, gflops_all
189  character(len=500) :: msg
190  type(yamldoc_t) :: rmm_ydoc
191  type(rmm_diis_t) :: diis
192  type(stats_t) :: res_stats
193 !arrays
194  real(dp) :: tsec(2)
195  real(dp),target :: fake_gsc_bk(0,0)
196  real(dp),allocatable :: lambda_bk(:), kres_bk(:,:), dots_bk(:,:), residv_bk(:,:)
197  real(dp),allocatable :: umat(:,:,:), gwork(:,:), dots(:, :)
198  real(dp),target,allocatable :: ghc(:,:), gvnlxc(:,:)
199  real(dp), ABI_CONTIGUOUS pointer :: gsc_bk(:,:), cg_bk(:,:), ghc_bk(:,:), gvnlxc_bk(:,:)
200  type(pawcprj_type) :: cprj_dum(1,1)
201 
202 ! *************************************************************************
203 
204  ! Define useful vars.
205  usepaw = dtset%usepaw; istwf_k = gs_hamk%istwf_k; paral_kgb = mpi_enreg%paral_kgb
206  me_g0 = mpi_enreg%me_g0; comm_bsf = mpi_enreg%comm_bandspinorfft
207  npwsp = npw * my_nspinor; mcg = npwsp * nband; mgsc = npwsp * nband * usepaw
208  me_cell = mpi_enreg%me_cell; prtvol = dtset%prtvol !; prtvol = -level
209  has_fock = associated(gs_hamk%fockcommon)
210 
211  if (timeit) then
212    call cwtime(cpu_all, wall_all, gflops_all, "start")
213    call cwtime(cpu, wall, gflops, "start")
214  end if
215 
216  ! =================
217  ! Prepare DIIS loop
218  ! =================
219  ! accuracy_level is computed from the maxval of the previous residuals received in input.
220  ! The different levels are:
221  !
222  !  1: Used at the beginning of the SCF cycle. Use loosy convergence criteria in order
223  !     to reduce the number of H|psi> applications as much as possible so that
224  !     we can start to mix densities/potentials.
225  !     Allow for incosistent data in output rediduals and Vnl matrix elements.
226  !     Move to the next level after Max 15 iterations.
227  !
228  !  2: Intermediate step. Decrease convergence criteria in order to perform more wavefuction iterations.
229  !     Allow for incosistent data in output rediduals and Vnl matrix elements.
230  !     Move to the next level after Max 25 iterations.
231  !
232  !  3: Approaching convergence. Use stricter convergence criteria.
233  !     Move to the next level after Max 25 iterations.
234  !
235  !  4: Ultimate precision. Try to reach the same accuracy as the other eigenvalue solvers.
236  !     This implies: using similar convergence criteria as in the other solvers.
237 
238  ! Note:
239  !
240  ! * Accuracy_level is not allowed to increase during the SCF cycle
241  !
242  ! * Since we operate on blocks of bands, all the states in the block will receive the same treatment.
243  !   This means that one can observe different convergence behaviour depending on bsize.
244  !
245  if (all(rmm_diis_status == 0)) then
246    ! This is the first time we call rmm_diis for this (k-point, spin)
247    prev_accuracy_level = 1; ncalls_with_prev_accuracy = 0
248    first_call = .True.
249  else
250    prev_accuracy_level = rmm_diis_status(1); ncalls_with_prev_accuracy = rmm_diis_status(2)
251    first_call = .False.
252  end if
253 
254  ! Decide whether we should move to the next level.
255  raise_acc = 0
256  if (prev_accuracy_level == 1 .and. ncalls_with_prev_accuracy >= 15) raise_acc = 2
257  if (prev_accuracy_level == 2 .and. ncalls_with_prev_accuracy >= 25) raise_acc = 3
258  if (prev_accuracy_level == 3 .and. ncalls_with_prev_accuracy >= 25) raise_acc = 4
259  if (raise_acc > 0) then
260    ABI_COMMENT("Accuracy_level is automatically increased as we reached the max number of NSCF iterations.")
261  end if
262  raise_acc = max(raise_acc, prev_accuracy_level)
263 
264  ! Define tolerance for occupied states on the basis of prev_accuracy_level
265  ! and compute max of residuals for these bands.
266  tol_occupied = zero
267  if (dtset%iscf > 0) then
268    tol_occupied = tol3; if (any(prev_accuracy_level == [1])) tol_occupied = tol2
269  end if
270  nb_pocc = count(occ >= tol_occupied)
271  max_res_pocc = maxval(resid(1:nb_pocc))
272 
273  ! Define accuracy_level for this run.
274  accuracy_level = 1
275  if (max_res_pocc < tol8)  accuracy_level = 2
276  if (max_res_pocc < tol12) accuracy_level = 3
277  if (max_res_pocc < tol16) accuracy_level = 4
278  !if (max_res_pocc < tol18) accuracy_level = 4
279  accuracy_level = max(prev_accuracy_level, accuracy_level, raise_acc)
280  if (istep == 1) accuracy_level = 2  ! FIXME: Differenciate between restart or rmm_diis - 3.
281  if (first_call .and. max_res_pocc == zero) accuracy_level = 1
282  !print *, "rmm_diis_status:", rmm_diis_status
283  !print *, "rmm_prev_acc:", prev_accuracy_level, "rmm_raise_acc:", raise_acc
284  !print *, "accuracy_level:", accuracy_level, "rmm_raise_acc:", raise_acc
285 
286  ! Update rmm_diis_status. Reset number of calls if we've just moved to a new accuracy_level.
287  rmm_diis_status(1) = accuracy_level
288  if (accuracy_level /= prev_accuracy_level) rmm_diis_status(2) = 0
289  rmm_diis_status(2) = rmm_diis_status(2) + 1
290 
291  ! Will perform max_niter DIIS steps. Usually 3 as nline by default is 4.
292  ! Note that, unlike in Vasp's recipe, here we don't end with a trial step after DIIS.
293  max_niter = max(dtset%nline - 1, 1)
294  if (accuracy_level >= 4) max_niter = dtset%nline
295  if (dtset%iscf < 0) max_niter = dtset%nline + 1
296 
297  ! Define accuracy_ene for SCF.
298  accuracy_ene = zero
299  if (dtset%iscf > 0) then
300    if (dtset%toldfe /= zero) then
301      accuracy_ene = dtset%toldfe * ten**(-accuracy_level + 2) / nb_pocc
302    else
303      ! We are not using toldfe to stop the SCF cycle
304      ! so we are forced to hardcode a tolerance for the absolute diff in the KS eigenvalue.
305      accuracy_ene = tol8 * ten**(-accuracy_level + 2) / nb_pocc
306    end if
307  end if
308 
309  ! Tolerance on residuals used for band locking after subdiago.
310  if (dtset%tolwfr > zero) then
311    lock_tolwfr = tol2 * dtset%tolwfr
312  else
313    lock_tolwfr = tol14
314    if (accuracy_level >= 2) lock_tolwfr = tol16
315    if (accuracy_level >= 3) lock_tolwfr = tol18
316    if (accuracy_level >= 4) lock_tolwfr = tol20 * tol2
317  end if
318 
319  ! Use mixed precisions if requested by the user but only for low accuracy_level
320  use_fft_mixprec = dtset%mixprec == 1 .and. accuracy_level < 2
321  if (use_fft_mixprec) prev_mixprec = fftcore_set_mixprec(1)
322 
323  ! Select preconditioning.
324  optekin = 0; if (dtset%wfoptalg >= 10) optekin = 1
325  optekin = 1 ! optekin = 0
326 
327  ! Will treat states in groups of bsize bands even when paral_kgb = 0
328  bsize = 8; if (paral_kgb == 1) bsize = mpi_enreg%nproc_band * mpi_enreg%bandpp
329  nblocks = nband / bsize; if (mod(nband, bsize) /= 0) nblocks = nblocks + 1
330 
331  ! Build DIIS object.
332  diis = rmm_diis_new(accuracy_level, usepaw, istwf_k, npwsp, max_niter, bsize, prtvol)
333  diis%tol_occupied = tol_occupied
334  !call wrtout(std_out, sjoin(" Using Max", itoa(max_niter), "RMM-DIIS iterations"))
335  !call wrtout(std_out, sjoin( &
336  !  " Max_input_resid_pocc", ftoa(max_res_pocc), "accuracy_level:", itoa(accuracy_level), &
337  !  ", accuracy_ene: ", ftoa(accuracy_ene)))
338  call timab(1634, 1, tsec) ! "rmm_diis:band_opt"
339 
340  rmm_ydoc = yamldoc_open("RMM-DIIS", with_iter_state=.False.)
341  call rmm_ydoc%add_ints("ikpt, isppol, istep, accuracy_level", [ikpt, isppol, istep, accuracy_level])
342  call rmm_ydoc%open_tabular("RESIDS_POCC") !, tag, indent, newline, comment)
343  write(msg, "(1x, a12, 4(a10))")"level", "mean", "min", "max", "stdev"
344  call rmm_ydoc%add_tabular_line(msg, indent=0)
345  call rmm_ydoc%add_tabular_line(resids2str("input"), indent=0)
346 
347  ! =========================
348  ! === Subspace rotation ===
349  ! =========================
350  ! Allocate big (scalable) array with <G|H|C> for all nband so that we can recompute the residuals after the rotation.
351  ! This approach requires more memory but we avoid one extra call to H|Psi> per band.
352  ! Alternatively, one can compute ghc and the residuals by applying H|psi>
353  ! inside the loop over blocks (less memory but slower).
354  savemem = dtset%rmm_diis_savemem
355  !savemem = 1
356  !if (savemem == 0) then
357  !  ABI_MALLOC_OR_DIE(ghc, (2, npwsp*nband), ierr)
358  !  ABI_MALLOC_OR_DIE(gvnlxc, (2, npwsp*nband), ierr)
359  !end if
360 
361  call subspace_rotation(gs_hamk, dtset%prtvol, mpi_enreg, nband, npw, my_nspinor, savemem, &
362                         enlx, eig, cg, gsc, ghc, gvnlxc)
363 
364  gsc_bk => fake_gsc_bk
365  cpopt = -1; sij_opt = 0
366  if (usepaw == 1) then
367    sij_opt = 1 ! matrix elements <G|S|C> have to be computed in gsc in addition to ghc
368    cpopt = -1  ! <p_lmn|in> (and derivatives) are computed here (and not saved)
369  end if
370 
371  ABI_MALLOC(lambda_bk, (bsize))
372  ABI_MALLOC(dots_bk, (2, bsize))
373  ABI_MALLOC(residv_bk, (2, npwsp*bsize))
374  ABI_MALLOC(kres_bk, (2, npwsp*bsize))
375 
376  if (savemem == 1) then
377    ABI_MALLOC(ghc_bk, (2, npwsp*bsize))
378    ABI_MALLOC(gvnlxc_bk, (2, npwsp*bsize))
379  end if
380  !write(msg, "(a,f8.1,a)") &
381  !  " Memory required: ", 2 * natom3**2 * (my_q2 - my_q1 + 1) * dp * b2Mb, " [Mb] <<< MEM"
382  !call wrtout(std_out, msg)
383 
384 
385  ! We loop over nblocks, each block contains ndat states.
386  !
387  ! - Convergence behaviour may depend on bsize as branches are taken according to
388  !   the status of all bands in the block.
389  ! TODO: Transpose only once per block and then work with already_transposed = .True.
390  if (timeit) call cwtime(cpu, wall, gflops, "start")
391 
392  do iblock=1,nblocks
393    igs = 1 + (iblock - 1) * npwsp * bsize; ige = min(iblock * npwsp * bsize, npwsp * nband)
394    ndat = (ige - igs + 1) / npwsp
395    ib_start = 1 + (iblock - 1) * bsize; ib_stop = min(iblock * bsize, nband)
396 
397    ! Reduce number of niter iterations if block contains "empty" states.
398    ! This should happen only if npband is small wrt nband and nband >> nbocc.
399    ! TODO: Don't reduce niter if MD
400    max_niter_block = max_niter
401    if (dtset%iscf > 0) then
402      if (all(occ(ib_start:ib_stop) < diis%tol_occupied)) max_niter_block = max(1 + max_niter / 2, 2)
403    end if
404 
405    ! Compute H |phi_0> with cg block after subdiago.
406    cg_bk => cg(:,igs:ige); if (usepaw == 1) gsc_bk => gsc(1:2,igs:ige)
407 
408    ! Compute residual vectors after subspace_rotation.
409    if (savemem == 0) then
410      ghc_bk => ghc(:,igs:ige); gvnlxc_bk => gvnlxc(:,igs:ige)
411      call cg_get_residvecs(usepaw, npwsp, ndat, eig(ib_start), cg_bk, ghc_bk, gsc_bk, residv_bk)
412      call cg_norm2g(istwf_k, npwsp, ndat, residv_bk, resid(ib_start), me_g0, comm_bsf)
413    else
414      call getghc_eigresid(gs_hamk, npw, my_nspinor, ndat, cg_bk, ghc_bk, gsc_bk, mpi_enreg, prtvol, &
415                          eig(ib_start), resid(ib_start), enlx(ib_start), residv_bk, gvnlxc_bk, normalize=.False.)
416    end if
417 
418    ! Band locking.
419    if (all(resid(ib_start:ib_stop) < lock_tolwfr)) then
420      call diis%stats%increment("locked", ndat)
421      cycle ! iblock
422    end if
423 
424    ! Save <R0|R0> and <phi_0|S|phi_0>, |phi_0>, |S phi_0>. Assume input cg_bk is already S-normalized.
425    call diis%push_iter(0, ndat, eig(ib_start), resid(ib_start), enlx(ib_start), cg_bk, residv_bk, gsc_bk, "SDIAG")
426 
427    ! Line minimization with preconditioned steepest descent:
428    !
429    !    |phi_1> = |phi_0> + lambda |K R_0>
430    !
431    ! where lambda minimizes the residual (we don't try to find the stationary
432    ! point of the Rayleigh quotient as in Kresse's paper).
433    !
434    !    lambda = - Re{<R_0|(H - e_0 S)} |K R_0>} / |(H - e_0 S) |K R_0>|**2
435    !
436    ! more expensive than finding the stationary point of the Rayleigh quotient as it requires
437    ! an extra H application but it should be more stable and more consistent with the RMM approach.
438    !
439    ! Precondition |R_0>, output in kres_bk = |K R_0>
440    call cg_zcopy(npwsp * ndat, residv_bk, kres_bk)
441    call cg_precon_many(istwf_k, npw, my_nspinor, ndat, cg_bk, optekin, kinpw, kres_bk, me_g0, comm_bsf)
442 
443    ! Compute H |K R_0>
444    if (paral_kgb == 0) then
445      call getghc(cpopt, kres_bk, cprj_dum, ghc_bk, gsc_bk, gs_hamk, gvnlxc_bk, &
446                  rdummy, mpi_enreg, ndat, prtvol, sij_opt, tim_getghc, type_calc0)
447    else
448      call prep_getghc(kres_bk, gs_hamk, gvnlxc_bk, ghc_bk, gsc_bk, rdummy, ndat, &
449                       mpi_enreg, prtvol, sij_opt, cpopt, cprj_dum, already_transposed=.False.)
450    end if
451 
452    ! Compute (H - e_0 S) |K R_0>
453    call cg_get_residvecs(usepaw, npwsp, ndat, eig(ib_start), kres_bk, ghc_bk, gsc_bk, residv_bk)
454    call cg_norm2g(istwf_k, npwsp, ndat, residv_bk, lambda_bk, me_g0, comm_bsf)
455 
456    ! Compute lambda
457    dots_bk = zero
458    do idat=1,ndat
459      jj = 1 + (idat - 1) * npwsp; kk = idat * npwsp
460      call dotprod_g(dots_bk(1,idat), dots_bk(2,idat), istwf_k, npwsp, option1, &
461                     diis%chain_resv(:,:,0,idat), residv_bk(:,jj), me_g0, xmpi_comm_self)
462    end do
463    call xmpi_sum(dots_bk, comm_bsf, ierr)
464 
465    ! Build |Psi_1> = |Phi_0> + lambda |K R_0>
466    do idat=1,ndat
467      lambda_bk(idat) = -dots_bk(1,idat) / lambda_bk(idat)
468      jj = 1 + (idat - 1) * npwsp; kk = idat * npwsp
469      cg_bk(:,jj:kk) = diis%chain_phi(:,:,0,idat) + lambda_bk(idat) * kres_bk(:,jj:kk)
470    end do
471 
472    ! ===============
473    ! DIIS iterations
474    ! ===============
475    iter_loop: do iter=1,max_niter_block
476 
477      if (iter > 1) then
478        ! Solve DIIS equations and update cg_bk and residv_bk for iter > 1
479        call diis%update_block(iter, npwsp, ndat, cg_bk, residv_bk, comm_bsf)
480 
481        ! Precondition residual, output in kres_bk.
482        call cg_zcopy(npwsp * ndat, residv_bk, kres_bk)
483        call cg_precon_many(istwf_k, npw, my_nspinor, ndat, cg_bk, optekin, kinpw, kres_bk, me_g0, comm_bsf)
484 
485        ! Compute cg_bk with the same lambda(ndat) obtained at iteration #0
486        call cg_zaxpy_many_areal(npwsp, ndat, lambda_bk, kres_bk, cg_bk)
487      end if
488 
489      ! Compute H |phi_now> and evaluate new enlx for NC.
490      call getghc_eigresid(gs_hamk, npw, my_nspinor, ndat, cg_bk, ghc_bk, gsc_bk, mpi_enreg, prtvol, &
491                           eig(ib_start), resid(ib_start), enlx(ib_start), residv_bk, gvnlxc_bk, normalize=.True.)
492 
493      ! Store new wavevefunctions and residuals.
494      call diis%push_iter(iter, ndat, eig(ib_start), resid(ib_start), enlx(ib_start), cg_bk, residv_bk, gsc_bk, "DIIS")
495 
496      ! CHECK FOR CONVERGENCE
497      if (diis%exit_iter(iter, ndat, max_niter_block, occ(ib_start), accuracy_ene, dtset, comm_bsf)) exit iter_loop
498 
499      ! Compute <R_i|R_j> and <i|S|j> for j=iter
500      if (iter /= max_niter_block) call diis%eval_mats(iter, ndat, me_g0, comm_bsf)
501    end do iter_loop
502 
503    if (prtvol == -level) call diis%print_block(ib_start, ndat, istep, ikpt, isppol)
504  end do ! iblock
505 
506  call timab(1634, 2, tsec) !"rmm_diis:band_opt"
507  if (timeit) call cwtime_report(" rmm_diis:band_opt", cpu, wall, gflops)
508  call rmm_ydoc%add_tabular_line(resids2str("rmm-diis"), indent=0)
509 
510  ! ===============================
511  ! Orthogonalize states after DIIS
512  ! ===============================
513  call timab(583,1,tsec) ! "vtowfk(pw_orthon)"
514 
515  !ortalgo = 3 !; ortalgo = mpi_enreg%paral_kgb
516  !call pw_orthon(0, 0, istwf_k, mcg, mgsc, npwsp, nband, ortalgo, gsc, usepaw, cg, me_g0, comm_bsf)
517 
518  ! TODO: Merge the two routines.
519  if (usepaw == 1) then
520    !call cgtk_fixphase(cg, gsc, 0, 0, istwf_k, mcg, mgsc, mpi_enreg, nband, npwsp, usepaw)
521    !call cgpaw_normalize(npwsp, nband, cg, gsc, istwf_k, me_g0, comm_bsf)
522 
523    call cgpaw_cholesky(npwsp, nband, cg, gsc, istwf_k, me_g0, comm_bsf, umat=umat)
524 
525    !call cgtk_fixphase(cg, gsc, 0, 0, istwf_k, mcg, mgsc, mpi_enreg, nband, npwsp, usepaw)
526    !call cgpaw_normalize(npwsp, nband, cg, gsc, istwf_k, me_g0, comm_bsf)
527  else
528    call cgnc_cholesky(npwsp, nband, cg, istwf_k, me_g0, comm_bsf, use_gemm=.False., umat=umat)
529  end if
530 
531  call timab(583,2,tsec)
532  if (timeit) call cwtime_report(" pw_orthon ", cpu, wall, gflops)
533 
534  ! Recompute eigenvalues, residuals, and enlx after orthogonalization.
535  ! This step is important to improve the convergence of the NC total energy
536  ! and it guarantees that eigenvalues and residuals are consistent with the output wavefunctions.
537  ! but we try to avoid it at the beginning of the SCF cycle.
538  ! NB: In principle, one can rotate Vnl(b,b') using U^-1 from the Cholesky decomposition
539  ! but the full Vnl matrix should be computed before the ortho step.
540 
541  ! Select value of after_ortho:
542  !
543  !   0: return with inconsistent eigenvalues, residuals and enlx_bk to avoid final H |Psi>.
544  !   1: recompute enlx_bx after ortho. Return inconsistent eigens and residuals (last DIIS iteration).
545  !   2: fully consistent mode: execute final H|Psi> after ortho step to update enlx_bx, eigens, residuals
546  !
547  ! Total number of H |Psi> applications:
548  !
549  !   1 for subdiago.
550  !   1 for preconditioned steepest descent.
551  !   (nline - 1) for DIIS or nline if ultimate accuracy is reached.
552  !   1 if after_ortho > 0
553  !
554  after_ortho = 0
555  if (accuracy_level >= 2) after_ortho = 1
556  if (accuracy_level >= 4) after_ortho = 2
557  if (after_ortho >= 1 .and. savemem == 0) after_ortho = 1
558  ! It seems that PAW is more sensitive to after_ortho. Perhaps I can avoid the final H|phi> if accuracy_level == 1
559  !if (usepaw == 1) after_ortho = 1
560  !if (usepaw == 1) after_ortho = 0
561  if (usepaw == 1) after_ortho = 2 ! FIXME ??
562 
563  if (after_ortho == 0) then
564    !if (prtvol == -level)
565    call wrtout(std_out, " VERY-FAST: Won't recompute data after orthogonalization.")
566 
567  !else if (after_ortho == 1 .and. savemem == 0) then
568  else if (after_ortho == 1 .and. savemem == 0 .and. usepaw == 0) then
569    !if (prtvol == -level)
570    call wrtout(std_out, " FAST: Recomputing data by rotating matrix elements.")
571 
572    if (usepaw == 0 .or. has_fock) then
573      ! Rotate gvnlxc by solving X_new U = Y_old for X with U upper triangle.
574      ! Compute enlx with rotated cg and gvnlxc.
575      if (istwf_k == 1) then
576        call ZTRSM('R', 'U', 'N', 'N', npwsp, nband, cone, umat, nband, gvnlxc, npwsp)
577      else
578        call DTRSM('R', 'U', 'N', 'N', 2*npwsp, nband, one, umat, nband, gvnlxc, 2*npwsp)
579      end if
580      ABI_MALLOC(dots, (2, nband))
581      call cg_zdotg_zip(istwf_k, npwsp, nband, option1, cg, gvnlxc, dots, me_g0, comm_bsf)
582      enlx = dots(1,:)
583      ABI_FREE(dots)
584    end if
585 
586    if (.False.) then
587    !if (usepaw == 1) then
588      ! Compute new eigenvalues, residual vectors and norms.
589      ! Rotate ghc by solving X_new U = Y_old for X with U upper triangle.
590      if (istwf_k == 1) then
591        call ZTRSM('R', 'U', 'N', 'N', npwsp, nband, cone, umat, nband, ghc, npwsp)
592      else
593        call DTRSM('R', 'U', 'N', 'N', 2*npwsp, nband, one, umat, nband, ghc, 2*npwsp)
594      end if
595      ABI_MALLOC_OR_DIE(gwork, (2, npwsp*nband), ierr)
596      call cg_get_eigens(usepaw, istwf_k, npwsp, nband, cg, ghc, gsc, eig, me_g0, comm_bsf)
597      call cg_get_residvecs(usepaw, npwsp, nband, eig, cg, ghc, gsc, gwork)
598      call cg_norm2g(istwf_k, npwsp, nband, gwork, resid, me_g0, comm_bsf)
599      call rmm_ydoc%add_tabular_line(resids2str("ortho_rot"), indent=0)
600      ABI_FREE(gwork)
601    end if
602 
603  else
604    !if (prtvol == -level)
605    if (after_ortho == 1) call wrtout(std_out, " SLOW: Recomputing enlx gvnlx by calling nonlop.")
606    if (after_ortho == 2) call wrtout(std_out, " VERY-SLOW: Recomputing eigens and residues by calling getghc.")
607 
608    do iblock=1,nblocks
609      igs = 1 + (iblock - 1) * npwsp * bsize; ige = min(iblock * npwsp * bsize, npwsp * nband)
610      ndat = (ige - igs + 1) / npwsp
611      ib_start = 1 + (iblock - 1) * bsize; ib_stop = min(iblock * bsize, nband)
612      cg_bk => cg(:,igs:ige); if (usepaw == 1) gsc_bk => gsc(1:2,igs:ige)
613      if (savemem == 0) then
614        ghc_bk => ghc(:,igs:ige); gvnlxc_bk => gvnlxc(:,igs:ige)
615      end if
616 
617      select case (after_ortho)
618      case (1)
619        ! recompute NC enlx_bx after ortho.
620        ! eigens and residuals are inconsistent as they have been computed before pw_orthon.
621        signs = 1; paw_opt = 0
622        if (usepaw == 1) then
623          signs = 2; paw_opt = 3
624        end if
625        if (paral_kgb == 0) then
626          call nonlop(choice1, cpopt, cprj_dum, enlx(ib_start:), gs_hamk, 0, eig(ib_start), &
627                      mpi_enreg, ndat, 1, paw_opt, signs, gsc_bk, tim_nonlop, cg_bk, gvnlxc_bk)
628        else
629          call prep_nonlop(choice1, cpopt, cprj_dum, enlx(ib_start), gs_hamk, 0, eig(ib_start), &
630                           ndat, mpi_enreg, 1, paw_opt, signs, gsc_bk, tim_nonlop, &
631                           cg_bk, gvnlxc_bk, already_transposed=.False.)
632        end if
633 
634      case (2)
635        ! Consistent mode: update enlx_bx, eigens, residuals after orthogonalizalization.
636        call getghc_eigresid(gs_hamk, npw, my_nspinor, ndat, cg_bk, ghc_bk, gsc_bk, mpi_enreg, prtvol, &
637                             eig(ib_start), resid(ib_start), enlx(ib_start), residv_bk, gvnlxc_bk, &
638                             normalize=usepaw == 1)
639      case default
640        ABI_BUG(sjoin("Wrong after_ortho:", itoa(after_ortho)))
641      end select
642    end do ! iblock
643 
644    call rmm_ydoc%add_tabular_line(resids2str("after_ortho"), indent=0)
645  end if ! after_ortho > 0
646 
647  !if (usepaw == 1) then
648  !  !call cgtk_fixphase(cg, gsc, 0, 0, istwf_k, mcg, mgsc, mpi_enreg, nband, npwsp, usepaw)
649  !  call cg_set_imag0_to_zero(istwf_k, me_g0, npwsp, nband, cg, max_absimag)
650  !  call cg_set_imag0_to_zero(istwf_k, me_g0, npwsp, nband, gsc, max_absimag)
651  !  call cgpaw_normalize(npwsp, nband, cg, gsc, istwf_k, me_g0, comm_bsf)
652  !end if
653 
654  if (timeit) call cwtime_report(" after_ortho ", cpu, wall, gflops)
655 
656  ! Revert mixprec to previous status before returning.
657  if (use_fft_mixprec) prev_mixprec = fftcore_set_mixprec(prev_mixprec)
658 
659  !if (dtset%prtvol > 0) then
660  if (diis%stats%length() > 0) call rmm_ydoc%add_dict("skip_stats", diis%stats)
661  call rmm_ydoc%write_and_free(std_out)
662 
663  if (timeit) call cwtime_report(" rmm_diis total: ", cpu_all, wall_all, gflops_all)
664 
665  ! Final cleanup.
666  ABI_FREE(lambda_bk)
667  ABI_FREE(dots_bk)
668  ABI_FREE(residv_bk)
669  ABI_FREE(kres_bk)
670  ABI_FREE(umat)
671  if (savemem == 0) then
672    ABI_FREE(ghc)
673    ABI_FREE(gvnlxc)
674  else
675    ABI_FREE(ghc_bk)
676    ABI_FREE(gvnlxc_bk)
677  end if
678  call diis%free()
679 
680 contains
681 
682 function resids2str(level) result(str)
683   character(len=*),intent(in) :: level
684   character(len=500) :: str
685   res_stats = stats_eval(resid(1:nb_pocc))
686   !res_stats = stats_eval(resid(1:nband))
687   write(str, "(1x, a12, 4(es10.3))") trim(level), res_stats%mean, res_stats%min, res_stats%max, res_stats%stdev
688 end function resids2str
689 
690 end subroutine rmm_diis

ABINIT/subspace_rotation [ Functions ]

[ Top ] [ Functions ]

NAME

 subspace_rotation

FUNCTION

  This routine computes the <i|H|j> matrix elements and then performs the subspace rotation
  of the orbitals (rayleigh-ritz procedure)
  The main difference with respect to other similar routines is that this implementation does not require
  the <i|H|j> matrix elements as input so it can be used before starting the wavefunction optimation
  as required e.g. by the RMM-DIIS method.
  Moreover, the routine computes the new residuals after the subspace rotation by rotating the
  matrix elements of the Hamiltonian in the new basis (requires more memory but client code
  can avoid calling getghc after subspace_rotation.

INPUTS

  gs_hamk <type(gs_hamiltonian_type)>=all data for the hamiltonian at k
  ptrvol
  mpi_enreg=information about MPI parallelization
  nband=number of bands at this k point and spin
  npw=number of plane waves at this k point
  my_nspinor=number of spinor components treated by this MPI proc.

OUTPUT

  eig(nband): New eigvalues from subspace rotation.
  If usepaw==1:
    gsc(2,*)=<g|S|c> matrix elements (S=overlap)
  enlx(nband)=contribution from each band to nonlocal psp + potential Fock ACE part of total energy, at this k-point
  ghc

SIDE EFFECTS

  cg(2,*)=updated wavefunctions
  gsc(2,*)=update <G|S|C>

SOURCE

1268 subroutine subspace_rotation(gs_hamk, prtvol, mpi_enreg, nband, npw, my_nspinor, savemem, enlx, eig, cg, gsc, ghc, gvnlxc)
1269 
1270 !Arguments ------------------------------------
1271  integer,intent(in) :: prtvol, nband, npw, my_nspinor, savemem
1272  type(gs_hamiltonian_type),intent(inout) :: gs_hamk
1273  type(mpi_type),intent(in) :: mpi_enreg
1274  real(dp),target,intent(inout) :: cg(2,npw*my_nspinor*nband)
1275  real(dp),target,intent(inout) :: gsc(2,npw*my_nspinor*nband*gs_hamk%usepaw)
1276  !real(dp),target,intent(out) :: ghc(2,npw*my_nspinor*nband)
1277  !real(dp),target,intent(out) :: gvnlxc(2,npw*my_nspinor*nband)
1278  real(dp),target,allocatable,intent(out) :: ghc(:,:), gvnlxc(:,:)
1279  real(dp),intent(out) :: eig(nband), enlx(nband)
1280 
1281 !Local variables-------------------------------
1282  integer,parameter :: type_calc0 = 0, tim_getghc = 0, use_subovl0 = 0, option1 = 1
1283  integer :: ig, ig0, ib, ierr, bsize, nblocks, iblock, npwsp, ndat, ib_start, ib_stop, paral_kgb
1284  integer :: iband, cpopt, sij_opt, igs, ige, mcg, mgsc, istwf_k, usepaw, me_g0, cplex, comm_bsf
1285  logical :: has_fock
1286  real(dp),parameter :: rdummy = zero
1287  real(dp) :: cpu, wall, gflops
1288 !arrays
1289  real(dp),target :: fake_gsc_bk(0,0)
1290  real(dp) :: subovl(use_subovl0)
1291  real(dp),allocatable :: subham(:), h_ij(:,:,:), evec(:,:,:), evec_re(:,:), gwork(:,:)
1292  real(dp),ABI_CONTIGUOUS pointer :: ghc_bk(:,:), gvnlxc_bk(:,:), gsc_bk(:,:)
1293  real(dp) :: dots(2, nband)
1294  type(pawcprj_type) :: cprj_dum(1,1)
1295 
1296 ! *************************************************************************
1297 
1298  if (timeit) call cwtime(cpu, wall, gflops, "start")
1299 
1300  usepaw = gs_hamk%usepaw; istwf_k = gs_hamk%istwf_k
1301  paral_kgb = mpi_enreg%paral_kgb; me_g0 = mpi_enreg%me_g0
1302  comm_bsf = mpi_enreg%comm_spinorfft; if (mpi_enreg%paral_kgb == 1) comm_bsf = mpi_enreg%comm_bandspinorfft
1303  npwsp = npw * my_nspinor
1304  has_fock = associated(gs_hamk%fockcommon)
1305 
1306  ! =======================================
1307  ! Apply H to input cg to compute <i|H|j>
1308  ! =======================================
1309  gsc_bk => fake_gsc_bk
1310  cpopt = -1; sij_opt = 0
1311  if (usepaw == 1) then
1312    sij_opt = 1 ! matrix elements <G|S|C> have to be computed in gsc in addition to ghc
1313    cpopt = -1  ! <p_lmn|in> (and derivatives) are computed here (and not saved)
1314  end if
1315 
1316  ! Treat states in groups of bsize bands even when paral_kgb = 0
1317  bsize = 8; if (paral_kgb == 1) bsize = mpi_enreg%nproc_band * mpi_enreg%bandpp
1318  nblocks = nband / bsize; if (mod(nband, bsize) /= 0) nblocks = nblocks + 1
1319 
1320  cplex = 2; if (istwf_k /= 1) cplex = 1
1321  !cplex = 2; if (istwf_k == 2) cplex = 1
1322 
1323  ABI_CALLOC(h_ij, (cplex, nband, nband))
1324 
1325  ! Allocate full ghc and gvnlxc to be able to rotate residuals and Vnlx matrix elements
1326  ! after subdiago. More memory but we can save a call to H|psi>.
1327  if (savemem == 0) then
1328    ABI_MALLOC_OR_DIE(ghc, (2, npwsp*nband), ierr)
1329    ABI_MALLOC_OR_DIE(gvnlxc, (2, npwsp*nband), ierr)
1330  else if (savemem == 1) then
1331    ABI_MALLOC(ghc_bk, (2, npwsp*bsize))
1332    ABI_MALLOC(gvnlxc_bk, (2, npwsp*bsize))
1333  else
1334    ABI_ERROR(sjoin("Invalid savemem:", itoa(savemem)))
1335  end if
1336 
1337  do iblock=1,nblocks
1338    igs = 1 + (iblock - 1) * npwsp * bsize; ige = min(iblock * npwsp * bsize, npwsp * nband)
1339    ndat = (ige - igs + 1) / npwsp
1340    ib_start = 1 + (iblock - 1) * bsize; ib_stop = min(iblock * bsize, nband)
1341    if (usepaw == 1) gsc_bk => gsc(1:2,igs:ige)
1342    if (savemem == 0) then
1343      ghc_bk => ghc(:, igs:ige); gvnlxc_bk => gvnlxc(:, igs:ige)
1344    end if
1345 
1346    if (paral_kgb == 0) then
1347      call getghc(cpopt, cg(:,igs:ige), cprj_dum, ghc_bk, gsc_bk, gs_hamk, gvnlxc_bk, &
1348                  rdummy, mpi_enreg, ndat, prtvol, sij_opt, tim_getghc, type_calc0)
1349    else
1350      call prep_getghc(cg(:,igs:ige), gs_hamk, gvnlxc_bk, ghc_bk, gsc_bk, rdummy, ndat, &
1351                       mpi_enreg, prtvol, sij_opt, cpopt, cprj_dum, already_transposed=.False.)
1352    end if
1353 
1354    ! Compute <i|H|j> for i=1,nband and all j in block
1355    if (cplex == 2) then
1356      call cg_zgemm("C", "N", npwsp, nband, ndat, cg, ghc_bk, h_ij(:,:,ib_start))
1357    else
1358      call dgemm("T", "N", nband, ndat, 2*npwsp, one, cg, 2*npwsp, ghc_bk, 2*npwsp, zero, h_ij(:,:,ib_start), nband)
1359    end if
1360 
1361    if (istwf_k /= 1) then
1362      do iband=ib_start, ib_stop
1363        h_ij(:,:,iband) = two * h_ij(:,:,iband)
1364 
1365        if (istwf_k == 2 .and. me_g0 == 1) then
1366          ! Gamma k-point and I have G=0. Remove double counting term.
1367          ig = 1 + (iband - ib_start) * npwsp
1368          do ib=1,nband
1369            ig0 = 1 + npwsp * (ib - 1)
1370 #if defined FC_NVHPC
1371 if (ig<0) write(100,*) ig,ig0,h_ij(1,ib,iband),cg(1,ig0),ghc_bk(1,ig)
1372 #endif
1373            h_ij(1,ib,iband) = h_ij(1,ib,iband) - cg(1,ig0) * ghc_bk(1,ig)
1374          end do
1375        end if
1376 
1377        ! Force real matrix.
1378        if (cplex == 2) h_ij(2,:,iband) = zero
1379      end do
1380    end if
1381 
1382  end do ! iblock
1383 
1384  ! Pack <i|H|j> to prepare call to subdiago.
1385  ABI_MALLOC(subham, (nband*(nband+1)))
1386  if (cplex == 2) then
1387    do iband=1,nband
1388      h_ij(2,iband,iband) = zero ! Force diagonal elements to be real
1389    end do
1390  end if
1391 
1392  if (cplex == 2) then
1393    call pack_matrix(h_ij, subham, nband, 2)
1394  else
1395    call my_pack_matrix(nband, h_ij, subham)
1396  end if
1397 
1398  ABI_FREE(h_ij)
1399  call xmpi_sum(subham, comm_bsf, ierr)
1400  if (timeit) call cwtime_report(" subspace build Hij", cpu, wall, gflops)
1401 
1402  ! ========================
1403  ! Subspace diagonalization
1404  ! =======================
1405  ! Rotate cg, gsc and compute new eigenvalues.
1406  ABI_MALLOC(evec, (2, nband, nband))
1407  mcg = npwsp * nband; mgsc = npwsp * nband * usepaw
1408  call subdiago(cg, eig, evec, gsc, 0, 0, istwf_k, mcg, mgsc, nband, npw, my_nspinor, paral_kgb, &
1409                subham, subovl, use_subovl0, usepaw, me_g0)
1410 
1411  ABI_FREE(subham)
1412  if (timeit) call cwtime_report(" subspace subdiago", cpu, wall, gflops)
1413 
1414  if (savemem == 0) then
1415    ! Rotate ghc matrix in the new subspace:
1416    !
1417    !      new_{g,b} = old_{g,i} evec_{i,b}
1418    !
1419    ! cg and PAW gsc have been already rotated in subdiago
1420    !
1421    if (cplex == 1) then
1422      ! Eigenvectors are real.
1423      ABI_MALLOC(evec_re, (nband, nband))
1424      evec_re = evec(1,:,:)
1425    end if
1426 
1427    ABI_MALLOC_OR_DIE(gwork, (2, npwsp*nband), ierr)
1428    if (cplex == 1) then
1429      call DGEMM("N", "N", 2*npwsp, nband, nband, one, ghc, 2*npwsp, evec_re, nband, zero, gwork, 2*npwsp)
1430    else
1431      call abi_zgemm_2r("N", "N", npwsp, nband, nband, cone, ghc, npwsp, evec, nband, czero, gwork, npwsp)
1432    end if
1433    call cg_zcopy(npwsp * nband, gwork, ghc)
1434 
1435    ! Rotate <G|Vnlx|Psi_n> and evaluate new enlx for NC.
1436    if (usepaw == 0 .or. has_fock) then
1437      if (cplex == 1) then
1438        call DGEMM("N", "N", 2*npwsp, nband, nband, one, gvnlxc, 2*npwsp, evec_re, nband, zero, gwork, 2*npwsp)
1439      else
1440        call abi_zgemm_2r("N", "N", npwsp, nband, nband, cone, gvnlxc, npwsp, evec, nband, czero, gwork, npwsp)
1441      end if
1442      !call abi_xgemm('N','N', vectsize, nband, nband, cone, gvnlxc, vectsize, evec, nband, czero, gwork, vectsize, x_cplx=cplx)
1443      call cg_zcopy(npwsp * nband, gwork, gvnlxc)
1444      call cg_zdotg_zip(istwf_k, npwsp, nband, option1, cg, gvnlxc, dots, me_g0, comm_bsf)
1445      enlx = dots(1,:)
1446    end if
1447    ABI_FREE(gwork)
1448    ABI_SFREE(evec_re)
1449  end if
1450 
1451  ABI_FREE(evec)
1452  if (savemem == 1) then
1453    ABI_FREE(ghc_bk)
1454    ABI_FREE(gvnlxc_bk)
1455  end if
1456 
1457  if (timeit) call cwtime_report(" subspace final rotation", cpu, wall, gflops)
1458 
1459 end subroutine subspace_rotation

m_numeric_tools/my_pack_matrix [ Functions ]

[ Top ] [ m_numeric_tools ] [ Functions ]

NAME

 my_pack_matrix

FUNCTION

 Packs a matrix into hermitian format

INPUTS

 N: size of matrix
 cplx: 2 if matrix is complex, 1 for real matrix.
 mat_in(cplx, N*N)= matrix to be packed

OUTPUT

 mat_out(cplx*N*N+1/2)= packed matrix (upper triangle)

SOURCE

1210 subroutine my_pack_matrix(n, mat_in, mat_out)
1211 
1212 !Arguments ------------------------------------
1213  integer, intent(in) :: N
1214  real(dp), intent(in) :: mat_in(N, N)
1215  real(dp), intent(out) :: mat_out(2, N*(N+1)/2)
1216 
1217 !local variables
1218  integer :: isubh, i, j
1219 ! *************************************************************************
1220 
1221  isubh = 1
1222  do j=1,N
1223    do i=1,j
1224      mat_out(1,isubh) = mat_in(i, j)
1225      mat_out(2,isubh) = zero
1226      isubh = isubh + 1
1227    end do
1228  end do
1229 
1230 end subroutine my_pack_matrix

m_rmm_diis/getghc_eigresid [ Functions ]

[ Top ] [ m_rmm_diis ] [ Functions ]

NAME

  getghc_eigresid

FUNCTION

  Compute new eigenvalues, residuals, H |psi> and enlx from cg and gsc.

INPUTS

OUTPUT

SOURCE

901 subroutine getghc_eigresid(gs_hamk, npw, my_nspinor, ndat, cg, ghc, gsc, mpi_enreg, prtvol, &
902                            eig, resid, enlx, residvecs, gvnlxc, normalize)
903 
904 !Arguments ------------------------------------
905  type(gs_hamiltonian_type),intent(inout) :: gs_hamk
906  integer,intent(in) :: npw, my_nspinor, ndat, prtvol
907  real(dp),intent(inout) :: cg(2, npw*my_nspinor*ndat)
908  real(dp),intent(out) :: ghc(2,npw*my_nspinor*ndat), gsc(2,npw*my_nspinor*ndat*gs_hamk%usepaw)
909  type(mpi_type),intent(in) :: mpi_enreg
910  real(dp),intent(out) :: eig(ndat), resid(ndat), enlx(ndat)
911  real(dp),intent(out) :: residvecs(2, npw*my_nspinor*ndat), gvnlxc(2, npw*my_nspinor*ndat)
912  logical,optional,intent(in) :: normalize
913 
914 !Local variables-------------------------------
915  integer,parameter :: type_calc0 = 0, option1 = 1, option2 = 2, tim_getghc = 0
916  integer :: istwf_k, usepaw, cpopt, sij_opt, npwsp, me_g0, comm_bsf
917  real(dp),parameter :: rdummy = zero
918  !real(dp) :: cpu, wall, gflops
919  logical :: normalize_, has_fock
920 !arrays
921  real(dp) :: dots(2, ndat)
922  type(pawcprj_type) :: cprj_dum(1,1)
923 
924 ! *************************************************************************
925 
926  !if (timeit) call cwtime(cpu, wall, gflops, "start")
927  normalize_ = .True.; if (present(normalize)) normalize_ = normalize
928  npwsp = npw * my_nspinor
929  usepaw = gs_hamk%usepaw; istwf_k = gs_hamk%istwf_k; me_g0 = mpi_enreg%me_g0
930  comm_bsf = mpi_enreg%comm_spinorfft; if (mpi_enreg%paral_kgb == 1) comm_bsf = mpi_enreg%comm_bandspinorfft
931  has_fock = associated(gs_hamk%fockcommon)
932 
933  cpopt = -1; sij_opt = 0
934  if (usepaw == 1) then
935    sij_opt = 1 ! matrix elements <G|S|C> have to be computed in gsc in addition to ghc
936    cpopt = -1  ! <p_lmn|in> (and derivatives) are computed here (and not saved)
937  end if
938 
939  ! NC normalization.
940  if (usepaw == 0 .and. normalize_) call cgnc_normalize(npwsp, ndat, cg, istwf_k, me_g0, comm_bsf)
941 
942  ! Compute H |cg>
943  !call fock_set_ieigen(gs_hamk%fockcommon, iband)
944  if (mpi_enreg%paral_kgb == 0) then
945    call getghc(cpopt, cg, cprj_dum, ghc, gsc, gs_hamk, gvnlxc, &
946                rdummy, mpi_enreg, ndat, prtvol, sij_opt, tim_getghc, type_calc0)
947  else
948    call prep_getghc(cg, gs_hamk, gvnlxc, ghc, gsc, rdummy, ndat, &
949                     mpi_enreg, prtvol, sij_opt, cpopt, cprj_dum, already_transposed=.False.)
950  end if
951 
952  ! PAW normalization must be done here once gsc is known.
953  if (usepaw == 1 .and. normalize_) call cgpaw_normalize(npwsp, ndat, cg, gsc, istwf_k, me_g0, comm_bsf)
954 
955  ! Compute new eigenvalues, residual vectors and norms.
956  call cg_get_eigens(usepaw, istwf_k, npwsp, ndat, cg, ghc, gsc, eig, me_g0, comm_bsf)
957  call cg_get_residvecs(usepaw, npwsp, ndat, eig, cg, ghc, gsc, residvecs)
958  call cg_norm2g(istwf_k, npwsp, ndat, residvecs, resid, me_g0, comm_bsf)
959 
960  if (usepaw == 0 .or. has_fock) then
961    ! Evaluate new enlx from gvnlxc.
962    call cg_zdotg_zip(istwf_k, npwsp, ndat, option1, cg, gvnlxc, dots, me_g0, comm_bsf)
963    enlx = dots(1,:)
964  end if
965 
966  !if (timeit) call cwtime_report(" getghc_eigresid", cpu, wall, gflops)
967 
968 end subroutine getghc_eigresid

m_rmm_diis/rmm_diis_eval_mats [ Functions ]

[ Top ] [ m_rmm_diis ] [ Functions ]

NAME

  rmm_diis_eval_mats

FUNCTION

  Compute matrix elements required by the RMM-DIIS method.

INPUTS

OUTPUT

SOURCE

1149 subroutine rmm_diis_eval_mats(diis, iter, ndat, me_g0, comm)
1150 
1151 !Arguments ------------------------------------
1152  class(rmm_diis_t),intent(inout) :: diis
1153  integer,intent(in) :: iter, ndat, me_g0, comm
1154 
1155 !local variables
1156  integer :: ii, ierr, idat, nprocs, option
1157  real(dp) :: dotr, doti !, cpu, wall, gflops
1158  !integer :: requests(ndat)
1159 ! *************************************************************************
1160 
1161  !if (timeit) call cwtime(cpu, wall, gflops, "start")
1162  nprocs = xmpi_comm_size(comm)
1163  option = 2; if (diis%cplex == 1) option = 1
1164 
1165  do idat=1,ndat
1166 
1167    do ii=0,iter
1168      ! <R_i|R_j>
1169      call dotprod_g(dotr, doti, diis%istwf_k, diis%npwsp, option, &
1170                     diis%chain_resv(:,:,ii,idat), diis%chain_resv(:,:,iter,idat), me_g0, xmpi_comm_self)
1171      if (ii == iter) doti = zero
1172      if (diis%cplex == 2) then
1173        diis%resmat(:, ii, iter, idat) = [dotr, doti]
1174      else
1175        diis%resmat(1, ii, iter, idat) = dotr
1176      end if
1177    end do ! ii
1178 
1179    !if (nprocs > 1) then
1180    !  call xmpi_sum(diis%resmat(:,0:iter,iter,idat), comm, ierr)
1181    !  !call xmpi_isum_ip(diis%resmat(:,0:iter,iter,idat), comm, requests(idat), ierr)
1182    !endif
1183    !if (diis%prtvol == -level) write(std_out,*)"iter, idat, resmat:", iter, idat, diis%resmat(:,0:iter,iter,idat)
1184  end do ! idat
1185 
1186  if (nprocs > 1) call xmpi_sum(diis%resmat(:,0:iter,iter,1:ndat), comm, ierr)
1187  !if (nprocs > 1) call xmpi_waitall(requests, ierr)
1188  !if (timeit) call cwtime_report(" eval_mats", cpu, wall, gflops)
1189 
1190 end subroutine rmm_diis_eval_mats

m_rmm_diis/rmm_diis_exit_iter [ Functions ]

[ Top ] [ m_rmm_diis ] [ Functions ]

NAME

  rmm_diis_exit_iter

FUNCTION

  Return true if we can exit the DIIS iteration

INPUTS

OUTPUT

SOURCE

756 logical function rmm_diis_exit_iter(diis, iter, ndat, niter_block, occ_bk, accuracy_ene, dtset, comm) result(ans)
757 
758  class(rmm_diis_t),intent(inout) :: diis
759  integer,intent(in) :: iter, ndat, niter_block, comm
760  real(dp),intent(in) :: occ_bk(ndat)
761  real(dp),intent(in) :: accuracy_ene
762  type(dataset_type),intent(in) :: dtset
763 
764 !Local variables-------------------------------
765  integer,parameter :: master = 0
766  integer :: idat, ierr, nok, checks(ndat) !nbocc,
767  real(dp) :: resid, deltae, deold , fact
768  character(len=50) :: msg_list(ndat)
769 ! *************************************************************************
770 
771  diis%last_iter = iter !; ans = .False.; return
772  if (xmpi_comm_rank(comm) /= master) goto 10
773 
774  ! Tolerances depend on accuracy_level and occupation of the state.
775  checks = 0
776 
777  do idat=1,ndat
778    resid = diis%hist_resid(iter, idat)
779    deold = diis%hist_ene(1, idat) - diis%hist_ene(0, idat)
780    deltae = diis%hist_ene(iter, idat) - diis%hist_ene(iter-1, idat)
781 
782    ! Relative criterion on eigenvalue differerence.
783    ! Abinit default in the CG part is 0.005 that is really low (0.3 in V).
784    ! Here we increase it depending whether the state is occupied or empty
785    fact = one !; if (dtset%iscf > 0 .and. abs(occ_bk(idat)) < diis%tol_occupied) fact = three
786    if (diis%accuracy_level == 1) fact = fact * 18
787    if (diis%accuracy_level == 2) fact = fact * 12
788    if (diis%accuracy_level == 3) fact = fact * 6
789    if (abs(deltae) < fact * dtset%tolrde * abs(deold)) then
790      checks(idat) = 1; msg_list(idat) = "deltae < fact * tolrde * deold"; cycle
791    end if
792 
793    if (dtset%iscf < 0) then
794      ! This is the only condition available for NSCF run.
795      if (resid < dtset%tolwfr) then
796        checks(idat) = 1; msg_list(idat) = 'resid < tolwfr'; cycle
797      end if
798 
799    else
800      ! Conditions available for SCF run.
801      if (resid < dtset%tolwfr) then
802        checks(idat) = 1; msg_list(idat) = 'resid < tolwfr'; cycle
803      end if
804 
805      ! Absolute criterion on eigenvalue difference. Assuming error on Etot ~ band_energy.
806      fact = one; if (dtset%iscf > 0 .and. abs(occ_bk(idat)) < diis%tol_occupied) fact = ten
807      if (sqrt(abs(resid)) < fact * accuracy_ene) then
808        checks(idat) = 1; msg_list(idat) = 'resid < accuracy_ene'; cycle
809      end if
810    end if
811  end do ! idat
812 
813  ! Depending on the accuracy_level either full block or a fraction of it must pass the test in order to exit.
814  nok = count(checks /= 0)
815  if (diis%accuracy_level == 1) ans = nok >= 0.65_dp * ndat
816  if (diis%accuracy_level == 2) ans = nok >= 0.75_dp * ndat
817  if (diis%accuracy_level == 3) ans = nok >= 0.90_dp * ndat
818  if (diis%accuracy_level == 4) ans = nok == ndat
819 
820  if (ans) then
821    ! Log exit only if this is not the last iteration.
822    if (iter /= niter_block) then
823      do idat=1,ndat
824        if (checks(idat) /= 0) call diis%stats%increment(msg_list(idat), 1)
825      end do
826    end if
827  end if
828 
829  ! Broadcast final decision to all ranks.
830  10 call xmpi_bcast(ans, master, comm, ierr)
831 
832 end function rmm_diis_exit_iter

m_rmm_diis/rmm_diis_free [ Functions ]

[ Top ] [ m_rmm_diis ] [ Functions ]

NAME

  rmm_diis_free

FUNCTION

  Free dynamic memory.

INPUTS

OUTPUT

SOURCE

1025 subroutine rmm_diis_free(diis)
1026 
1027 !Arguments ------------------------------------
1028  class(rmm_diis_t),intent(inout) :: diis
1029 
1030 ! *************************************************************************
1031 
1032  ABI_SFREE(diis%hist_ene)
1033  ABI_SFREE(diis%hist_resid)
1034  ABI_SFREE(diis%hist_enlx)
1035  ABI_SFREE(diis%step_type)
1036  ABI_SFREE(diis%chain_phi)
1037  ABI_SFREE(diis%chain_sphi)
1038  ABI_SFREE(diis%chain_resv)
1039  ABI_SFREE(diis%resmat)
1040 
1041  call diis%stats%free()
1042 
1043 end subroutine rmm_diis_free

m_rmm_diis/rmm_diis_new [ Functions ]

[ Top ] [ m_rmm_diis ] [ Functions ]

NAME

  rmm_diis_new

FUNCTION

  Build new rmm_diis_t instance.

INPUTS

OUTPUT

SOURCE

 984 type(rmm_diis_t) function rmm_diis_new(accuracy_level, usepaw, istwf_k, npwsp, max_niter, bsize, prtvol) result(diis)
 985 
 986 !Arguments ------------------------------------
 987  integer,intent(in) :: accuracy_level, usepaw, istwf_k, npwsp, max_niter, bsize, prtvol
 988 
 989 ! *************************************************************************
 990 
 991  diis%accuracy_level = accuracy_level
 992  diis%usepaw = usepaw
 993  diis%istwf_k = istwf_k
 994  diis%cplex = 2; if (istwf_k == 2) diis%cplex = 1
 995  diis%npwsp = npwsp
 996  diis%max_niter = max_niter
 997  diis%bsize = bsize
 998  diis%prtvol = prtvol
 999 
1000  ABI_MALLOC(diis%hist_ene, (0:max_niter+2, bsize))
1001  ABI_MALLOC(diis%hist_resid, (0:max_niter+2, bsize))
1002  ABI_CALLOC(diis%hist_enlx, (0:max_niter+2, bsize))
1003  ABI_MALLOC(diis%step_type, (0:max_niter+2, bsize))
1004  ABI_MALLOC(diis%chain_phi, (2, npwsp, 0:max_niter, bsize))
1005  ABI_MALLOC(diis%chain_sphi, (2, npwsp*usepaw, 0:max_niter, bsize))
1006  ABI_MALLOC(diis%chain_resv, (2, npwsp, 0:max_niter, bsize))
1007  ABI_CALLOC(diis%resmat, (diis%cplex, 0:max_niter, 0:max_niter, bsize)) ! <R_i|R_j>
1008 
1009 end function rmm_diis_new

m_rmm_diis/rmm_diis_print_block [ Functions ]

[ Top ] [ m_rmm_diis ] [ Functions ]

NAME

  rmm_diis_print_block

FUNCTION

  Print energies, residuals for a block of states.

INPUTS

OUTPUT

SOURCE

848 subroutine rmm_diis_print_block(diis, ib_start, ndat, istep, ikpt, isppol)
849 
850  class(rmm_diis_t),intent(in) :: diis
851  integer,intent(in) :: ib_start, ndat, istep, ikpt, isppol
852 
853 !Local variables-------------------------------
854  integer :: iter, idat, iband
855  real(dp) :: deltae, deold, dedold, absdiff
856  character(len=500) :: msg
857 ! *************************************************************************
858 
859  call wrtout(std_out, &
860    sjoin("<BEGIN RMM-DIIS-BLOCK, istep:", itoa(istep), ", ikpt:", itoa(ikpt), ", spin: ", itoa(isppol), ">"), &
861    pre_newlines=1)
862 
863  do idat=1,ndat
864    write(msg,'(1a, 2(a5), 4(a14), 1x, a6)') &
865      "#", 'iter', "band", "eigen_eV", "eigde_meV", "de/dold", "resid", "type"; call wrtout(std_out, msg)
866 
867    iband = ib_start + idat - 1
868    deold = diis%hist_ene(1, idat) - diis%hist_ene(0, idat)
869    do iter=0,diis%last_iter
870      dedold = zero; absdiff = zero
871      if (iter > 0) then
872        deltae = diis%hist_ene(iter, idat) - diis%hist_ene(iter-1, idat)
873        dedold = deltae / deold
874        absdiff = (diis%hist_ene(iter, idat) - diis%hist_ene(iter-1, idat))
875      end if
876 
877      write(msg,"(1x, 2(i5), 4(es14.6), 1x, a6)") &
878        iter, iband, diis%hist_ene(iter, idat) * Ha_eV, absdiff * Ha_meV, dedold, &
879        diis%hist_resid(iter, idat), diis%step_type(iter, idat); call wrtout(std_out, msg)
880    end do
881  end do
882 
883  call wrtout(std_out, "<END RMM-DIIS-BLOCK>", newlines=1)
884 
885 end subroutine rmm_diis_print_block

m_rmm_diis/rmm_diis_push_iter [ Functions ]

[ Top ] [ m_rmm_diis ] [ Functions ]

NAME

  rmm_diis_push_iter

FUNCTION

  Save one iteration of the DIIS algorithm.

INPUTS

OUTPUT

SOURCE

706 subroutine rmm_diis_push_iter(diis, iter, ndat, eig_bk, resid_bk, enlx_bk, cg_bk, residv_bk, gsc_bk, tag)
707 
708  class(rmm_diis_t),intent(inout) :: diis
709  integer,intent(in) :: iter, ndat
710  real(dp),intent(in) :: eig_bk(ndat), resid_bk(ndat), enlx_bk(ndat)
711  real(dp),intent(in) :: cg_bk(2, diis%npwsp*ndat), residv_bk(2, diis%npwsp*ndat), gsc_bk(2, diis%npwsp*ndat*diis%usepaw)
712  character(len=*),intent(in) :: tag
713 
714 !Local variables-------------------------------
715  integer :: idat, ibk
716 ! *************************************************************************
717 
718  diis%last_iter = iter
719  diis%hist_ene(iter, 1:ndat) = eig_bk
720  diis%hist_resid(iter, 1:ndat) = resid_bk
721  diis%hist_enlx(iter, 1:ndat) = enlx_bk
722  diis%step_type(iter, 1:ndat) = tag
723 
724  do idat=1,ndat
725    if (iter == 0) then
726      if (diis%cplex == 2) then
727        diis%resmat(:, 0, 0, idat) = [resid_bk(idat), zero]
728      else
729        diis%resmat(:, 0, 0, idat) = resid_bk(idat)
730      end if
731    end if
732    !write(std_out, *)"res0", diis%resmat(:, 0, 0, idat)
733    diis%step_type(iter, idat) = tag
734    ibk = 1 + (idat - 1) * diis%npwsp
735    call cg_zcopy(diis%npwsp, cg_bk(:,ibk), diis%chain_phi(:,:,iter,idat))
736    call cg_zcopy(diis%npwsp, residv_bk(:,ibk), diis%chain_resv(:,:,iter,idat))
737    if (diis%usepaw == 1) call cg_zcopy(diis%npwsp, gsc_bk(:,ibk), diis%chain_sphi(:,:,iter,idat))
738  end do
739 
740 end subroutine rmm_diis_push_iter

m_rmm_diis/rmm_diis_update_block [ Functions ]

[ Top ] [ m_rmm_diis ] [ Functions ]

NAME

  rmm_diis_update_block

FUNCTION

  Compute new trial wavefunctions and residuals from the DIIS chain.

INPUTS

OUTPUT

SOURCE

1059 subroutine rmm_diis_update_block(diis, iter, npwsp, ndat, cg_bk, residv_bk, comm)
1060 
1061 !Arguments ------------------------------------
1062  class(rmm_diis_t),intent(in) :: diis
1063  integer,intent(in) :: iter, npwsp, comm, ndat
1064  real(dp),intent(inout) :: cg_bk(2, npwsp, ndat), residv_bk(2, npwsp, ndat)
1065 
1066 !local variables
1067  integer,parameter :: master = 0
1068  integer :: cplex, ierr, nprocs, my_rank, idat
1069  real(dp) :: noise !, cpu, wall, gflops
1070  real(dp),allocatable :: wmat1(:,:,:), wvec(:,:,:), alphas(:,:)
1071  character(len=500) :: msg
1072  ! *************************************************************************
1073 
1074  !if (timeit) call cwtime(cpu, wall, gflops, "start")
1075  my_rank = xmpi_comm_rank(comm); nprocs = xmpi_comm_size(comm)
1076  cplex = diis%cplex
1077 
1078  ! Solve system of linear equations.
1079  ! Only master works so that we are sure we have the same solution.
1080  ABI_CALLOC(wvec, (cplex, 0:iter, ndat))
1081 
1082  if (my_rank == master) then
1083    ABI_MALLOC(wmat1, (cplex, 0:iter, 0:iter))
1084 
1085    do idat=1,ndat
1086      !if (mod(idat, nprocs) /= my_rank) cycle ! MPI parallelism
1087      wvec(1, iter, idat) = -one
1088      wmat1 = zero
1089      wmat1(1,:,iter) = -one
1090      wmat1(1,iter,:) = -one
1091      wmat1(1,iter,iter) = zero
1092      wmat1(:,0:iter-1, 0:iter-1) = diis%resmat(:, 0:iter-1, 0:iter-1, idat)
1093 
1094      call xhesv_cplex("U", cplex, iter+1, 1, wmat1, wvec(:,:,idat), msg, ierr)
1095      ABI_CHECK(ierr == 0, msg)
1096 
1097      !if (diis%prtvol == -level) then
1098      !  write(std_out,*)"wvec:", wvec(:,:,idat)
1099      !  write(std_out,*)"sum(wvec):", sum(wvec(:, 0:iter-1, idat), dim=2)
1100      !end if
1101      if (cplex == 2) then
1102        ! coefficients should sum up to 1 but sometimes we get a small imaginary part. here we remove it
1103        noise = sum(wvec(2, 0:iter-1, idat))
1104        wvec(2, 0:iter-1, idat) = wvec(2, 0:iter-1, idat) - noise * iter
1105      end if
1106 
1107    end do
1108    ABI_FREE(wmat1)
1109  end if
1110 
1111  ! Master broadcasts data.
1112  if (nprocs > 1) call xmpi_bcast(wvec, master, comm, ierr)
1113  !if (nprocs > 1) call xmpi_sum(wvec, comm, ierr)
1114 
1115  ! Take linear combination of chain_phi and chain_resv.
1116  do idat=1,ndat
1117    if (cplex == 2) then
1118      call cg_zgemv("N", npwsp, iter, diis%chain_phi(:,:,:,idat), wvec(:,:,idat), cg_bk(:,:,idat))
1119      call cg_zgemv("N", npwsp, iter, diis%chain_resv(:,:,:,idat), wvec(:,:,idat), residv_bk(:,:,idat))
1120    else
1121      ! coefficients are real --> use DGEMV
1122      ABI_MALLOC(alphas, (1, 0:iter))
1123      alphas(1,:) = wvec(1,:,idat)
1124      call dgemv("N", 2*npwsp, iter, one, diis%chain_phi(:,:,:,idat), 2*npwsp, alphas, 1, zero, cg_bk(:,:,idat), 1)
1125      call dgemv("N", 2*npwsp, iter, one, diis%chain_resv(:,:,:,idat), 2*npwsp, alphas, 1, zero, residv_bk(:,:,idat), 1)
1126      ABI_FREE(alphas)
1127    end if
1128  end do
1129 
1130  ABI_FREE(wvec)
1131  !if (timeit) call cwtime_report(" update_block", cpu, wall, gflops)
1132 
1133 end subroutine rmm_diis_update_block