TABLE OF CONTENTS
- ABINIT/m_rmm_diis
- ABINIT/rmm_diis
- ABINIT/subspace_rotation
- m_numeric_tools/my_pack_matrix
- m_rmm_diis/getghc_eigresid
- m_rmm_diis/rmm_diis_eval_mats
- m_rmm_diis/rmm_diis_exit_iter
- m_rmm_diis/rmm_diis_free
- m_rmm_diis/rmm_diis_new
- m_rmm_diis/rmm_diis_print_block
- m_rmm_diis/rmm_diis_push_iter
- m_rmm_diis/rmm_diis_update_block
ABINIT/m_rmm_diis [ Modules ]
NAME
m_rmm_diis
FUNCTION
This module contains routines for the RMM-DIIS eigenvalue solver.
COPYRIGHT
Copyright (C) 2020-2024 ABINIT group (MG) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
16 #if defined HAVE_CONFIG_H 17 #include "config.h" 18 #endif 19 20 #include "abi_common.h" 21 22 module m_rmm_diis 23 24 use defs_basis 25 use m_errors 26 use m_xmpi 27 use m_abicore 28 use m_dtset 29 use m_cgtools 30 use m_hide_blas 31 use m_yaml 32 use m_linalg_interfaces 33 use m_prep_kgb 34 35 use defs_abitypes, only : mpi_type 36 use m_fstrings, only : sjoin, itoa, ftoa 37 use m_time, only : timab, cwtime, cwtime_report 38 use m_numeric_tools, only : pack_matrix, imin_loc, stats_t, stats_eval 39 use m_hide_lapack, only : xhegv_cplex, xhesv_cplex 40 use m_pair_list, only : pair_list 41 use m_pawcprj, only : pawcprj_type, pawcprj_alloc, pawcprj_free 42 use m_fftcore, only : fftcore_set_mixprec 43 use m_hamiltonian, only : gs_hamiltonian_type 44 use m_getghc, only : getghc 45 use m_nonlop, only : nonlop 46 use m_cgtk, only : cgtk_fixphase 47 use m_abi_linalg, only : abi_zgemm_2r 48 !use m_fock, only : fock_set_ieigen, fock_set_getghc_call 49 50 implicit none 51 52 private
ABINIT/rmm_diis [ Functions ]
NAME
rmm_diis
FUNCTION
This routine updates the wave functions at a given (k-point, spin), using the RMM-DIIS method.
INPUTS
istep,ikpt,isppol=Iteration step, k-point index, spin index (mainly for printing purposes). dtset <type(dataset_type)>=all input variables for this dataset gs_hamk <type(gs_hamiltonian_type)>=all data for the hamiltonian at k kinpw(npw)=(modified) kinetic energy for each plane wave (hartree) mpi_enreg=information about MPI parallelization nband=number of bands at this k point for that spin polarization npw=number of plane waves at this k point my_nspinor=number of spinors treated by this MPI proc
OUTPUT
eig(nband)=array for holding eigenvalues (hartree) If usepaw==1: gsc(2,*)=<g|s|c> matrix elements (s=overlap) If usepaw==0 enlx(nband)=contribution from each band to nonlocal psp + potential Fock ACE part of total energy, at this k-point
SIDE EFFECTS
cg(2,*)=updated wavefunctions resid(nband)=residuals for each states. In input: previous residuals for this k-point, spin. In output: new residuals. rmm_diis_status(2): Status of the eigensolver. The first entry gives the previous accuracy. The second entry gives the number of iterations already performed with this level.
SOURCE
163 subroutine rmm_diis(istep, ikpt, isppol, cg, dtset, eig, occ, enlx, gs_hamk, kinpw, gsc, & 164 mpi_enreg, nband, npw, my_nspinor, resid, rmm_diis_status) 165 166 !Arguments ------------------------------------ 167 integer,intent(in) :: istep, ikpt, isppol, nband, npw, my_nspinor 168 type(gs_hamiltonian_type),intent(inout) :: gs_hamk 169 type(dataset_type),intent(in) :: dtset 170 type(mpi_type),intent(in) :: mpi_enreg 171 real(dp),target,intent(inout) :: cg(2,npw*my_nspinor*nband) 172 real(dp),target,intent(inout) :: gsc(2,npw*my_nspinor*nband*dtset%usepaw) 173 real(dp),intent(inout) :: enlx(nband), resid(nband) 174 real(dp),intent(in) :: occ(nband), kinpw(npw) 175 real(dp),intent(out) :: eig(nband) 176 integer,intent(inout) :: rmm_diis_status(2) 177 178 !Local variables------------------------------- 179 integer,parameter :: type_calc0 = 0, option1 = 1, option2 = 2, tim_getghc = 0 180 integer,parameter :: choice1 = 1, signs1 = 1, signs2 = 2, tim_nonlop = 0, paw_opt0 = 0, paw_opt3 = 3 181 integer :: ierr, prtvol, bsize, nblocks, iblock, npwsp, ndat, ib_start, ib_stop, idat, paral_kgb !, ortalgo 182 integer :: cpopt, sij_opt, igs, ige, mcg, mgsc, istwf_k, optekin, usepaw, iter, max_niter, max_niter_block 183 integer :: me_g0, nb_pocc, jj, kk, accuracy_level, raise_acc, prev_mixprec, after_ortho, me_cell 184 integer :: comm_bsf, prev_accuracy_level, ncalls_with_prev_accuracy, signs, paw_opt, savemem 185 logical :: first_call, use_fft_mixprec, has_fock 186 real(dp),parameter :: rdummy = zero 187 real(dp) :: accuracy_ene, max_res_pocc, tol_occupied, lock_tolwfr !, max_absimag 188 real(dp) :: cpu, wall, gflops, cpu_all, wall_all, gflops_all 189 character(len=500) :: msg 190 type(yamldoc_t) :: rmm_ydoc 191 type(rmm_diis_t) :: diis 192 type(stats_t) :: res_stats 193 !arrays 194 real(dp) :: tsec(2) 195 real(dp),target :: fake_gsc_bk(0,0) 196 real(dp),allocatable :: lambda_bk(:), kres_bk(:,:), dots_bk(:,:), residv_bk(:,:) 197 real(dp),allocatable :: umat(:,:,:), gwork(:,:), dots(:, :) 198 real(dp),target,allocatable :: ghc(:,:), gvnlxc(:,:) 199 real(dp), ABI_CONTIGUOUS pointer :: gsc_bk(:,:), cg_bk(:,:), ghc_bk(:,:), gvnlxc_bk(:,:) 200 type(pawcprj_type) :: cprj_dum(1,1) 201 202 ! ************************************************************************* 203 204 ! Define useful vars. 205 usepaw = dtset%usepaw; istwf_k = gs_hamk%istwf_k; paral_kgb = mpi_enreg%paral_kgb 206 me_g0 = mpi_enreg%me_g0; comm_bsf = mpi_enreg%comm_bandspinorfft 207 npwsp = npw * my_nspinor; mcg = npwsp * nband; mgsc = npwsp * nband * usepaw 208 me_cell = mpi_enreg%me_cell; prtvol = dtset%prtvol !; prtvol = -level 209 has_fock = associated(gs_hamk%fockcommon) 210 211 if (timeit) then 212 call cwtime(cpu_all, wall_all, gflops_all, "start") 213 call cwtime(cpu, wall, gflops, "start") 214 end if 215 216 ! ================= 217 ! Prepare DIIS loop 218 ! ================= 219 ! accuracy_level is computed from the maxval of the previous residuals received in input. 220 ! The different levels are: 221 ! 222 ! 1: Used at the beginning of the SCF cycle. Use loosy convergence criteria in order 223 ! to reduce the number of H|psi> applications as much as possible so that 224 ! we can start to mix densities/potentials. 225 ! Allow for incosistent data in output rediduals and Vnl matrix elements. 226 ! Move to the next level after Max 15 iterations. 227 ! 228 ! 2: Intermediate step. Decrease convergence criteria in order to perform more wavefuction iterations. 229 ! Allow for incosistent data in output rediduals and Vnl matrix elements. 230 ! Move to the next level after Max 25 iterations. 231 ! 232 ! 3: Approaching convergence. Use stricter convergence criteria. 233 ! Move to the next level after Max 25 iterations. 234 ! 235 ! 4: Ultimate precision. Try to reach the same accuracy as the other eigenvalue solvers. 236 ! This implies: using similar convergence criteria as in the other solvers. 237 238 ! Note: 239 ! 240 ! * Accuracy_level is not allowed to increase during the SCF cycle 241 ! 242 ! * Since we operate on blocks of bands, all the states in the block will receive the same treatment. 243 ! This means that one can observe different convergence behaviour depending on bsize. 244 ! 245 if (all(rmm_diis_status == 0)) then 246 ! This is the first time we call rmm_diis for this (k-point, spin) 247 prev_accuracy_level = 1; ncalls_with_prev_accuracy = 0 248 first_call = .True. 249 else 250 prev_accuracy_level = rmm_diis_status(1); ncalls_with_prev_accuracy = rmm_diis_status(2) 251 first_call = .False. 252 end if 253 254 ! Decide whether we should move to the next level. 255 raise_acc = 0 256 if (prev_accuracy_level == 1 .and. ncalls_with_prev_accuracy >= 15) raise_acc = 2 257 if (prev_accuracy_level == 2 .and. ncalls_with_prev_accuracy >= 25) raise_acc = 3 258 if (prev_accuracy_level == 3 .and. ncalls_with_prev_accuracy >= 25) raise_acc = 4 259 if (raise_acc > 0) then 260 ABI_COMMENT("Accuracy_level is automatically increased as we reached the max number of NSCF iterations.") 261 end if 262 raise_acc = max(raise_acc, prev_accuracy_level) 263 264 ! Define tolerance for occupied states on the basis of prev_accuracy_level 265 ! and compute max of residuals for these bands. 266 tol_occupied = zero 267 if (dtset%iscf > 0) then 268 tol_occupied = tol3; if (any(prev_accuracy_level == [1])) tol_occupied = tol2 269 end if 270 nb_pocc = count(occ >= tol_occupied) 271 max_res_pocc = maxval(resid(1:nb_pocc)) 272 273 ! Define accuracy_level for this run. 274 accuracy_level = 1 275 if (max_res_pocc < tol8) accuracy_level = 2 276 if (max_res_pocc < tol12) accuracy_level = 3 277 if (max_res_pocc < tol16) accuracy_level = 4 278 !if (max_res_pocc < tol18) accuracy_level = 4 279 accuracy_level = max(prev_accuracy_level, accuracy_level, raise_acc) 280 if (istep == 1) accuracy_level = 2 ! FIXME: Differenciate between restart or rmm_diis - 3. 281 if (first_call .and. max_res_pocc == zero) accuracy_level = 1 282 !print *, "rmm_diis_status:", rmm_diis_status 283 !print *, "rmm_prev_acc:", prev_accuracy_level, "rmm_raise_acc:", raise_acc 284 !print *, "accuracy_level:", accuracy_level, "rmm_raise_acc:", raise_acc 285 286 ! Update rmm_diis_status. Reset number of calls if we've just moved to a new accuracy_level. 287 rmm_diis_status(1) = accuracy_level 288 if (accuracy_level /= prev_accuracy_level) rmm_diis_status(2) = 0 289 rmm_diis_status(2) = rmm_diis_status(2) + 1 290 291 ! Will perform max_niter DIIS steps. Usually 3 as nline by default is 4. 292 ! Note that, unlike in Vasp's recipe, here we don't end with a trial step after DIIS. 293 max_niter = max(dtset%nline - 1, 1) 294 if (accuracy_level >= 4) max_niter = dtset%nline 295 if (dtset%iscf < 0) max_niter = dtset%nline + 1 296 297 ! Define accuracy_ene for SCF. 298 accuracy_ene = zero 299 if (dtset%iscf > 0) then 300 if (dtset%toldfe /= zero) then 301 accuracy_ene = dtset%toldfe * ten**(-accuracy_level + 2) / nb_pocc 302 else 303 ! We are not using toldfe to stop the SCF cycle 304 ! so we are forced to hardcode a tolerance for the absolute diff in the KS eigenvalue. 305 accuracy_ene = tol8 * ten**(-accuracy_level + 2) / nb_pocc 306 end if 307 end if 308 309 ! Tolerance on residuals used for band locking after subdiago. 310 if (dtset%tolwfr > zero) then 311 lock_tolwfr = tol2 * dtset%tolwfr 312 else 313 lock_tolwfr = tol14 314 if (accuracy_level >= 2) lock_tolwfr = tol16 315 if (accuracy_level >= 3) lock_tolwfr = tol18 316 if (accuracy_level >= 4) lock_tolwfr = tol20 * tol2 317 end if 318 319 ! Use mixed precisions if requested by the user but only for low accuracy_level 320 use_fft_mixprec = dtset%mixprec == 1 .and. accuracy_level < 2 321 if (use_fft_mixprec) prev_mixprec = fftcore_set_mixprec(1) 322 323 ! Select preconditioning. 324 optekin = 0; if (dtset%wfoptalg >= 10) optekin = 1 325 optekin = 1 ! optekin = 0 326 327 ! Will treat states in groups of bsize bands even when paral_kgb = 0 328 bsize = 8; if (paral_kgb == 1) bsize = mpi_enreg%nproc_band * mpi_enreg%bandpp 329 nblocks = nband / bsize; if (mod(nband, bsize) /= 0) nblocks = nblocks + 1 330 331 ! Build DIIS object. 332 diis = rmm_diis_new(accuracy_level, usepaw, istwf_k, npwsp, max_niter, bsize, prtvol) 333 diis%tol_occupied = tol_occupied 334 !call wrtout(std_out, sjoin(" Using Max", itoa(max_niter), "RMM-DIIS iterations")) 335 !call wrtout(std_out, sjoin( & 336 ! " Max_input_resid_pocc", ftoa(max_res_pocc), "accuracy_level:", itoa(accuracy_level), & 337 ! ", accuracy_ene: ", ftoa(accuracy_ene))) 338 call timab(1634, 1, tsec) ! "rmm_diis:band_opt" 339 340 rmm_ydoc = yamldoc_open("RMM-DIIS", with_iter_state=.False.) 341 call rmm_ydoc%add_ints("ikpt, isppol, istep, accuracy_level", [ikpt, isppol, istep, accuracy_level]) 342 call rmm_ydoc%open_tabular("RESIDS_POCC") !, tag, indent, newline, comment) 343 write(msg, "(1x, a12, 4(a10))")"level", "mean", "min", "max", "stdev" 344 call rmm_ydoc%add_tabular_line(msg, indent=0) 345 call rmm_ydoc%add_tabular_line(resids2str("input"), indent=0) 346 347 ! ========================= 348 ! === Subspace rotation === 349 ! ========================= 350 ! Allocate big (scalable) array with <G|H|C> for all nband so that we can recompute the residuals after the rotation. 351 ! This approach requires more memory but we avoid one extra call to H|Psi> per band. 352 ! Alternatively, one can compute ghc and the residuals by applying H|psi> 353 ! inside the loop over blocks (less memory but slower). 354 savemem = dtset%rmm_diis_savemem 355 !savemem = 1 356 !if (savemem == 0) then 357 ! ABI_MALLOC_OR_DIE(ghc, (2, npwsp*nband), ierr) 358 ! ABI_MALLOC_OR_DIE(gvnlxc, (2, npwsp*nband), ierr) 359 !end if 360 361 call subspace_rotation(gs_hamk, dtset%prtvol, mpi_enreg, nband, npw, my_nspinor, savemem, & 362 enlx, eig, cg, gsc, ghc, gvnlxc) 363 364 gsc_bk => fake_gsc_bk 365 cpopt = -1; sij_opt = 0 366 if (usepaw == 1) then 367 sij_opt = 1 ! matrix elements <G|S|C> have to be computed in gsc in addition to ghc 368 cpopt = -1 ! <p_lmn|in> (and derivatives) are computed here (and not saved) 369 end if 370 371 ABI_MALLOC(lambda_bk, (bsize)) 372 ABI_MALLOC(dots_bk, (2, bsize)) 373 ABI_MALLOC(residv_bk, (2, npwsp*bsize)) 374 ABI_MALLOC(kres_bk, (2, npwsp*bsize)) 375 376 if (savemem == 1) then 377 ABI_MALLOC(ghc_bk, (2, npwsp*bsize)) 378 ABI_MALLOC(gvnlxc_bk, (2, npwsp*bsize)) 379 end if 380 !write(msg, "(a,f8.1,a)") & 381 ! " Memory required: ", 2 * natom3**2 * (my_q2 - my_q1 + 1) * dp * b2Mb, " [Mb] <<< MEM" 382 !call wrtout(std_out, msg) 383 384 385 ! We loop over nblocks, each block contains ndat states. 386 ! 387 ! - Convergence behaviour may depend on bsize as branches are taken according to 388 ! the status of all bands in the block. 389 ! TODO: Transpose only once per block and then work with already_transposed = .True. 390 if (timeit) call cwtime(cpu, wall, gflops, "start") 391 392 do iblock=1,nblocks 393 igs = 1 + (iblock - 1) * npwsp * bsize; ige = min(iblock * npwsp * bsize, npwsp * nband) 394 ndat = (ige - igs + 1) / npwsp 395 ib_start = 1 + (iblock - 1) * bsize; ib_stop = min(iblock * bsize, nband) 396 397 ! Reduce number of niter iterations if block contains "empty" states. 398 ! This should happen only if npband is small wrt nband and nband >> nbocc. 399 ! TODO: Don't reduce niter if MD 400 max_niter_block = max_niter 401 if (dtset%iscf > 0) then 402 if (all(occ(ib_start:ib_stop) < diis%tol_occupied)) max_niter_block = max(1 + max_niter / 2, 2) 403 end if 404 405 ! Compute H |phi_0> with cg block after subdiago. 406 cg_bk => cg(:,igs:ige); if (usepaw == 1) gsc_bk => gsc(1:2,igs:ige) 407 408 ! Compute residual vectors after subspace_rotation. 409 if (savemem == 0) then 410 ghc_bk => ghc(:,igs:ige); gvnlxc_bk => gvnlxc(:,igs:ige) 411 call cg_get_residvecs(usepaw, npwsp, ndat, eig(ib_start), cg_bk, ghc_bk, gsc_bk, residv_bk) 412 call cg_norm2g(istwf_k, npwsp, ndat, residv_bk, resid(ib_start), me_g0, comm_bsf) 413 else 414 call getghc_eigresid(gs_hamk, npw, my_nspinor, ndat, cg_bk, ghc_bk, gsc_bk, mpi_enreg, prtvol, & 415 eig(ib_start), resid(ib_start), enlx(ib_start), residv_bk, gvnlxc_bk, normalize=.False.) 416 end if 417 418 ! Band locking. 419 if (all(resid(ib_start:ib_stop) < lock_tolwfr)) then 420 call diis%stats%increment("locked", ndat) 421 cycle ! iblock 422 end if 423 424 ! Save <R0|R0> and <phi_0|S|phi_0>, |phi_0>, |S phi_0>. Assume input cg_bk is already S-normalized. 425 call diis%push_iter(0, ndat, eig(ib_start), resid(ib_start), enlx(ib_start), cg_bk, residv_bk, gsc_bk, "SDIAG") 426 427 ! Line minimization with preconditioned steepest descent: 428 ! 429 ! |phi_1> = |phi_0> + lambda |K R_0> 430 ! 431 ! where lambda minimizes the residual (we don't try to find the stationary 432 ! point of the Rayleigh quotient as in Kresse's paper). 433 ! 434 ! lambda = - Re{<R_0|(H - e_0 S)} |K R_0>} / |(H - e_0 S) |K R_0>|**2 435 ! 436 ! more expensive than finding the stationary point of the Rayleigh quotient as it requires 437 ! an extra H application but it should be more stable and more consistent with the RMM approach. 438 ! 439 ! Precondition |R_0>, output in kres_bk = |K R_0> 440 call cg_zcopy(npwsp * ndat, residv_bk, kres_bk) 441 call cg_precon_many(istwf_k, npw, my_nspinor, ndat, cg_bk, optekin, kinpw, kres_bk, me_g0, comm_bsf) 442 443 ! Compute H |K R_0> 444 if (paral_kgb == 0) then 445 call getghc(cpopt, kres_bk, cprj_dum, ghc_bk, gsc_bk, gs_hamk, gvnlxc_bk, & 446 rdummy, mpi_enreg, ndat, prtvol, sij_opt, tim_getghc, type_calc0) 447 else 448 call prep_getghc(kres_bk, gs_hamk, gvnlxc_bk, ghc_bk, gsc_bk, rdummy, ndat, & 449 mpi_enreg, prtvol, sij_opt, cpopt, cprj_dum, already_transposed=.False.) 450 end if 451 452 ! Compute (H - e_0 S) |K R_0> 453 call cg_get_residvecs(usepaw, npwsp, ndat, eig(ib_start), kres_bk, ghc_bk, gsc_bk, residv_bk) 454 call cg_norm2g(istwf_k, npwsp, ndat, residv_bk, lambda_bk, me_g0, comm_bsf) 455 456 ! Compute lambda 457 dots_bk = zero 458 do idat=1,ndat 459 jj = 1 + (idat - 1) * npwsp; kk = idat * npwsp 460 call dotprod_g(dots_bk(1,idat), dots_bk(2,idat), istwf_k, npwsp, option1, & 461 diis%chain_resv(:,:,0,idat), residv_bk(:,jj), me_g0, xmpi_comm_self) 462 end do 463 call xmpi_sum(dots_bk, comm_bsf, ierr) 464 465 ! Build |Psi_1> = |Phi_0> + lambda |K R_0> 466 do idat=1,ndat 467 lambda_bk(idat) = -dots_bk(1,idat) / lambda_bk(idat) 468 jj = 1 + (idat - 1) * npwsp; kk = idat * npwsp 469 cg_bk(:,jj:kk) = diis%chain_phi(:,:,0,idat) + lambda_bk(idat) * kres_bk(:,jj:kk) 470 end do 471 472 ! =============== 473 ! DIIS iterations 474 ! =============== 475 iter_loop: do iter=1,max_niter_block 476 477 if (iter > 1) then 478 ! Solve DIIS equations and update cg_bk and residv_bk for iter > 1 479 call diis%update_block(iter, npwsp, ndat, cg_bk, residv_bk, comm_bsf) 480 481 ! Precondition residual, output in kres_bk. 482 call cg_zcopy(npwsp * ndat, residv_bk, kres_bk) 483 call cg_precon_many(istwf_k, npw, my_nspinor, ndat, cg_bk, optekin, kinpw, kres_bk, me_g0, comm_bsf) 484 485 ! Compute cg_bk with the same lambda(ndat) obtained at iteration #0 486 call cg_zaxpy_many_areal(npwsp, ndat, lambda_bk, kres_bk, cg_bk) 487 end if 488 489 ! Compute H |phi_now> and evaluate new enlx for NC. 490 call getghc_eigresid(gs_hamk, npw, my_nspinor, ndat, cg_bk, ghc_bk, gsc_bk, mpi_enreg, prtvol, & 491 eig(ib_start), resid(ib_start), enlx(ib_start), residv_bk, gvnlxc_bk, normalize=.True.) 492 493 ! Store new wavevefunctions and residuals. 494 call diis%push_iter(iter, ndat, eig(ib_start), resid(ib_start), enlx(ib_start), cg_bk, residv_bk, gsc_bk, "DIIS") 495 496 ! CHECK FOR CONVERGENCE 497 if (diis%exit_iter(iter, ndat, max_niter_block, occ(ib_start), accuracy_ene, dtset, comm_bsf)) exit iter_loop 498 499 ! Compute <R_i|R_j> and <i|S|j> for j=iter 500 if (iter /= max_niter_block) call diis%eval_mats(iter, ndat, me_g0, comm_bsf) 501 end do iter_loop 502 503 if (prtvol == -level) call diis%print_block(ib_start, ndat, istep, ikpt, isppol) 504 end do ! iblock 505 506 call timab(1634, 2, tsec) !"rmm_diis:band_opt" 507 if (timeit) call cwtime_report(" rmm_diis:band_opt", cpu, wall, gflops) 508 call rmm_ydoc%add_tabular_line(resids2str("rmm-diis"), indent=0) 509 510 ! =============================== 511 ! Orthogonalize states after DIIS 512 ! =============================== 513 call timab(583,1,tsec) ! "vtowfk(pw_orthon)" 514 515 !ortalgo = 3 !; ortalgo = mpi_enreg%paral_kgb 516 !call pw_orthon(0, 0, istwf_k, mcg, mgsc, npwsp, nband, ortalgo, gsc, usepaw, cg, me_g0, comm_bsf) 517 518 ! TODO: Merge the two routines. 519 if (usepaw == 1) then 520 !call cgtk_fixphase(cg, gsc, 0, 0, istwf_k, mcg, mgsc, mpi_enreg, nband, npwsp, usepaw) 521 !call cgpaw_normalize(npwsp, nband, cg, gsc, istwf_k, me_g0, comm_bsf) 522 523 call cgpaw_cholesky(npwsp, nband, cg, gsc, istwf_k, me_g0, comm_bsf, umat=umat) 524 525 !call cgtk_fixphase(cg, gsc, 0, 0, istwf_k, mcg, mgsc, mpi_enreg, nband, npwsp, usepaw) 526 !call cgpaw_normalize(npwsp, nband, cg, gsc, istwf_k, me_g0, comm_bsf) 527 else 528 call cgnc_cholesky(npwsp, nband, cg, istwf_k, me_g0, comm_bsf, use_gemm=.False., umat=umat) 529 end if 530 531 call timab(583,2,tsec) 532 if (timeit) call cwtime_report(" pw_orthon ", cpu, wall, gflops) 533 534 ! Recompute eigenvalues, residuals, and enlx after orthogonalization. 535 ! This step is important to improve the convergence of the NC total energy 536 ! and it guarantees that eigenvalues and residuals are consistent with the output wavefunctions. 537 ! but we try to avoid it at the beginning of the SCF cycle. 538 ! NB: In principle, one can rotate Vnl(b,b') using U^-1 from the Cholesky decomposition 539 ! but the full Vnl matrix should be computed before the ortho step. 540 541 ! Select value of after_ortho: 542 ! 543 ! 0: return with inconsistent eigenvalues, residuals and enlx_bk to avoid final H |Psi>. 544 ! 1: recompute enlx_bx after ortho. Return inconsistent eigens and residuals (last DIIS iteration). 545 ! 2: fully consistent mode: execute final H|Psi> after ortho step to update enlx_bx, eigens, residuals 546 ! 547 ! Total number of H |Psi> applications: 548 ! 549 ! 1 for subdiago. 550 ! 1 for preconditioned steepest descent. 551 ! (nline - 1) for DIIS or nline if ultimate accuracy is reached. 552 ! 1 if after_ortho > 0 553 ! 554 after_ortho = 0 555 if (accuracy_level >= 2) after_ortho = 1 556 if (accuracy_level >= 4) after_ortho = 2 557 if (after_ortho >= 1 .and. savemem == 0) after_ortho = 1 558 ! It seems that PAW is more sensitive to after_ortho. Perhaps I can avoid the final H|phi> if accuracy_level == 1 559 !if (usepaw == 1) after_ortho = 1 560 !if (usepaw == 1) after_ortho = 0 561 if (usepaw == 1) after_ortho = 2 ! FIXME ?? 562 563 if (after_ortho == 0) then 564 !if (prtvol == -level) 565 call wrtout(std_out, " VERY-FAST: Won't recompute data after orthogonalization.") 566 567 !else if (after_ortho == 1 .and. savemem == 0) then 568 else if (after_ortho == 1 .and. savemem == 0 .and. usepaw == 0) then 569 !if (prtvol == -level) 570 call wrtout(std_out, " FAST: Recomputing data by rotating matrix elements.") 571 572 if (usepaw == 0 .or. has_fock) then 573 ! Rotate gvnlxc by solving X_new U = Y_old for X with U upper triangle. 574 ! Compute enlx with rotated cg and gvnlxc. 575 if (istwf_k == 1) then 576 call ZTRSM('R', 'U', 'N', 'N', npwsp, nband, cone, umat, nband, gvnlxc, npwsp) 577 else 578 call DTRSM('R', 'U', 'N', 'N', 2*npwsp, nband, one, umat, nband, gvnlxc, 2*npwsp) 579 end if 580 ABI_MALLOC(dots, (2, nband)) 581 call cg_zdotg_zip(istwf_k, npwsp, nband, option1, cg, gvnlxc, dots, me_g0, comm_bsf) 582 enlx = dots(1,:) 583 ABI_FREE(dots) 584 end if 585 586 if (.False.) then 587 !if (usepaw == 1) then 588 ! Compute new eigenvalues, residual vectors and norms. 589 ! Rotate ghc by solving X_new U = Y_old for X with U upper triangle. 590 if (istwf_k == 1) then 591 call ZTRSM('R', 'U', 'N', 'N', npwsp, nband, cone, umat, nband, ghc, npwsp) 592 else 593 call DTRSM('R', 'U', 'N', 'N', 2*npwsp, nband, one, umat, nband, ghc, 2*npwsp) 594 end if 595 ABI_MALLOC_OR_DIE(gwork, (2, npwsp*nband), ierr) 596 call cg_get_eigens(usepaw, istwf_k, npwsp, nband, cg, ghc, gsc, eig, me_g0, comm_bsf) 597 call cg_get_residvecs(usepaw, npwsp, nband, eig, cg, ghc, gsc, gwork) 598 call cg_norm2g(istwf_k, npwsp, nband, gwork, resid, me_g0, comm_bsf) 599 call rmm_ydoc%add_tabular_line(resids2str("ortho_rot"), indent=0) 600 ABI_FREE(gwork) 601 end if 602 603 else 604 !if (prtvol == -level) 605 if (after_ortho == 1) call wrtout(std_out, " SLOW: Recomputing enlx gvnlx by calling nonlop.") 606 if (after_ortho == 2) call wrtout(std_out, " VERY-SLOW: Recomputing eigens and residues by calling getghc.") 607 608 do iblock=1,nblocks 609 igs = 1 + (iblock - 1) * npwsp * bsize; ige = min(iblock * npwsp * bsize, npwsp * nband) 610 ndat = (ige - igs + 1) / npwsp 611 ib_start = 1 + (iblock - 1) * bsize; ib_stop = min(iblock * bsize, nband) 612 cg_bk => cg(:,igs:ige); if (usepaw == 1) gsc_bk => gsc(1:2,igs:ige) 613 if (savemem == 0) then 614 ghc_bk => ghc(:,igs:ige); gvnlxc_bk => gvnlxc(:,igs:ige) 615 end if 616 617 select case (after_ortho) 618 case (1) 619 ! recompute NC enlx_bx after ortho. 620 ! eigens and residuals are inconsistent as they have been computed before pw_orthon. 621 signs = 1; paw_opt = 0 622 if (usepaw == 1) then 623 signs = 2; paw_opt = 3 624 end if 625 if (paral_kgb == 0) then 626 call nonlop(choice1, cpopt, cprj_dum, enlx(ib_start:), gs_hamk, 0, eig(ib_start), & 627 mpi_enreg, ndat, 1, paw_opt, signs, gsc_bk, tim_nonlop, cg_bk, gvnlxc_bk) 628 else 629 call prep_nonlop(choice1, cpopt, cprj_dum, enlx(ib_start), gs_hamk, 0, eig(ib_start), & 630 ndat, mpi_enreg, 1, paw_opt, signs, gsc_bk, tim_nonlop, & 631 cg_bk, gvnlxc_bk, already_transposed=.False.) 632 end if 633 634 case (2) 635 ! Consistent mode: update enlx_bx, eigens, residuals after orthogonalizalization. 636 call getghc_eigresid(gs_hamk, npw, my_nspinor, ndat, cg_bk, ghc_bk, gsc_bk, mpi_enreg, prtvol, & 637 eig(ib_start), resid(ib_start), enlx(ib_start), residv_bk, gvnlxc_bk, & 638 normalize=usepaw == 1) 639 case default 640 ABI_BUG(sjoin("Wrong after_ortho:", itoa(after_ortho))) 641 end select 642 end do ! iblock 643 644 call rmm_ydoc%add_tabular_line(resids2str("after_ortho"), indent=0) 645 end if ! after_ortho > 0 646 647 !if (usepaw == 1) then 648 ! !call cgtk_fixphase(cg, gsc, 0, 0, istwf_k, mcg, mgsc, mpi_enreg, nband, npwsp, usepaw) 649 ! call cg_set_imag0_to_zero(istwf_k, me_g0, npwsp, nband, cg, max_absimag) 650 ! call cg_set_imag0_to_zero(istwf_k, me_g0, npwsp, nband, gsc, max_absimag) 651 ! call cgpaw_normalize(npwsp, nband, cg, gsc, istwf_k, me_g0, comm_bsf) 652 !end if 653 654 if (timeit) call cwtime_report(" after_ortho ", cpu, wall, gflops) 655 656 ! Revert mixprec to previous status before returning. 657 if (use_fft_mixprec) prev_mixprec = fftcore_set_mixprec(prev_mixprec) 658 659 !if (dtset%prtvol > 0) then 660 if (diis%stats%length() > 0) call rmm_ydoc%add_dict("skip_stats", diis%stats) 661 call rmm_ydoc%write_and_free(std_out) 662 663 if (timeit) call cwtime_report(" rmm_diis total: ", cpu_all, wall_all, gflops_all) 664 665 ! Final cleanup. 666 ABI_FREE(lambda_bk) 667 ABI_FREE(dots_bk) 668 ABI_FREE(residv_bk) 669 ABI_FREE(kres_bk) 670 ABI_FREE(umat) 671 if (savemem == 0) then 672 ABI_FREE(ghc) 673 ABI_FREE(gvnlxc) 674 else 675 ABI_FREE(ghc_bk) 676 ABI_FREE(gvnlxc_bk) 677 end if 678 call diis%free() 679 680 contains 681 682 function resids2str(level) result(str) 683 character(len=*),intent(in) :: level 684 character(len=500) :: str 685 res_stats = stats_eval(resid(1:nb_pocc)) 686 !res_stats = stats_eval(resid(1:nband)) 687 write(str, "(1x, a12, 4(es10.3))") trim(level), res_stats%mean, res_stats%min, res_stats%max, res_stats%stdev 688 end function resids2str 689 690 end subroutine rmm_diis
ABINIT/subspace_rotation [ Functions ]
NAME
subspace_rotation
FUNCTION
This routine computes the <i|H|j> matrix elements and then performs the subspace rotation of the orbitals (rayleigh-ritz procedure) The main difference with respect to other similar routines is that this implementation does not require the <i|H|j> matrix elements as input so it can be used before starting the wavefunction optimation as required e.g. by the RMM-DIIS method. Moreover, the routine computes the new residuals after the subspace rotation by rotating the matrix elements of the Hamiltonian in the new basis (requires more memory but client code can avoid calling getghc after subspace_rotation.
INPUTS
gs_hamk <type(gs_hamiltonian_type)>=all data for the hamiltonian at k ptrvol mpi_enreg=information about MPI parallelization nband=number of bands at this k point and spin npw=number of plane waves at this k point my_nspinor=number of spinor components treated by this MPI proc.
OUTPUT
eig(nband): New eigvalues from subspace rotation. If usepaw==1: gsc(2,*)=<g|S|c> matrix elements (S=overlap) enlx(nband)=contribution from each band to nonlocal psp + potential Fock ACE part of total energy, at this k-point ghc
SIDE EFFECTS
cg(2,*)=updated wavefunctions gsc(2,*)=update <G|S|C>
SOURCE
1268 subroutine subspace_rotation(gs_hamk, prtvol, mpi_enreg, nband, npw, my_nspinor, savemem, enlx, eig, cg, gsc, ghc, gvnlxc) 1269 1270 !Arguments ------------------------------------ 1271 integer,intent(in) :: prtvol, nband, npw, my_nspinor, savemem 1272 type(gs_hamiltonian_type),intent(inout) :: gs_hamk 1273 type(mpi_type),intent(in) :: mpi_enreg 1274 real(dp),target,intent(inout) :: cg(2,npw*my_nspinor*nband) 1275 real(dp),target,intent(inout) :: gsc(2,npw*my_nspinor*nband*gs_hamk%usepaw) 1276 !real(dp),target,intent(out) :: ghc(2,npw*my_nspinor*nband) 1277 !real(dp),target,intent(out) :: gvnlxc(2,npw*my_nspinor*nband) 1278 real(dp),target,allocatable,intent(out) :: ghc(:,:), gvnlxc(:,:) 1279 real(dp),intent(out) :: eig(nband), enlx(nband) 1280 1281 !Local variables------------------------------- 1282 integer,parameter :: type_calc0 = 0, tim_getghc = 0, use_subovl0 = 0, option1 = 1 1283 integer :: ig, ig0, ib, ierr, bsize, nblocks, iblock, npwsp, ndat, ib_start, ib_stop, paral_kgb 1284 integer :: iband, cpopt, sij_opt, igs, ige, mcg, mgsc, istwf_k, usepaw, me_g0, cplex, comm_bsf 1285 logical :: has_fock 1286 real(dp),parameter :: rdummy = zero 1287 real(dp) :: cpu, wall, gflops 1288 !arrays 1289 real(dp),target :: fake_gsc_bk(0,0) 1290 real(dp) :: subovl(use_subovl0) 1291 real(dp),allocatable :: subham(:), h_ij(:,:,:), evec(:,:,:), evec_re(:,:), gwork(:,:) 1292 real(dp),ABI_CONTIGUOUS pointer :: ghc_bk(:,:), gvnlxc_bk(:,:), gsc_bk(:,:) 1293 real(dp) :: dots(2, nband) 1294 type(pawcprj_type) :: cprj_dum(1,1) 1295 1296 ! ************************************************************************* 1297 1298 if (timeit) call cwtime(cpu, wall, gflops, "start") 1299 1300 usepaw = gs_hamk%usepaw; istwf_k = gs_hamk%istwf_k 1301 paral_kgb = mpi_enreg%paral_kgb; me_g0 = mpi_enreg%me_g0 1302 comm_bsf = mpi_enreg%comm_spinorfft; if (mpi_enreg%paral_kgb == 1) comm_bsf = mpi_enreg%comm_bandspinorfft 1303 npwsp = npw * my_nspinor 1304 has_fock = associated(gs_hamk%fockcommon) 1305 1306 ! ======================================= 1307 ! Apply H to input cg to compute <i|H|j> 1308 ! ======================================= 1309 gsc_bk => fake_gsc_bk 1310 cpopt = -1; sij_opt = 0 1311 if (usepaw == 1) then 1312 sij_opt = 1 ! matrix elements <G|S|C> have to be computed in gsc in addition to ghc 1313 cpopt = -1 ! <p_lmn|in> (and derivatives) are computed here (and not saved) 1314 end if 1315 1316 ! Treat states in groups of bsize bands even when paral_kgb = 0 1317 bsize = 8; if (paral_kgb == 1) bsize = mpi_enreg%nproc_band * mpi_enreg%bandpp 1318 nblocks = nband / bsize; if (mod(nband, bsize) /= 0) nblocks = nblocks + 1 1319 1320 cplex = 2; if (istwf_k /= 1) cplex = 1 1321 !cplex = 2; if (istwf_k == 2) cplex = 1 1322 1323 ABI_CALLOC(h_ij, (cplex, nband, nband)) 1324 1325 ! Allocate full ghc and gvnlxc to be able to rotate residuals and Vnlx matrix elements 1326 ! after subdiago. More memory but we can save a call to H|psi>. 1327 if (savemem == 0) then 1328 ABI_MALLOC_OR_DIE(ghc, (2, npwsp*nband), ierr) 1329 ABI_MALLOC_OR_DIE(gvnlxc, (2, npwsp*nband), ierr) 1330 else if (savemem == 1) then 1331 ABI_MALLOC(ghc_bk, (2, npwsp*bsize)) 1332 ABI_MALLOC(gvnlxc_bk, (2, npwsp*bsize)) 1333 else 1334 ABI_ERROR(sjoin("Invalid savemem:", itoa(savemem))) 1335 end if 1336 1337 do iblock=1,nblocks 1338 igs = 1 + (iblock - 1) * npwsp * bsize; ige = min(iblock * npwsp * bsize, npwsp * nband) 1339 ndat = (ige - igs + 1) / npwsp 1340 ib_start = 1 + (iblock - 1) * bsize; ib_stop = min(iblock * bsize, nband) 1341 if (usepaw == 1) gsc_bk => gsc(1:2,igs:ige) 1342 if (savemem == 0) then 1343 ghc_bk => ghc(:, igs:ige); gvnlxc_bk => gvnlxc(:, igs:ige) 1344 end if 1345 1346 if (paral_kgb == 0) then 1347 call getghc(cpopt, cg(:,igs:ige), cprj_dum, ghc_bk, gsc_bk, gs_hamk, gvnlxc_bk, & 1348 rdummy, mpi_enreg, ndat, prtvol, sij_opt, tim_getghc, type_calc0) 1349 else 1350 call prep_getghc(cg(:,igs:ige), gs_hamk, gvnlxc_bk, ghc_bk, gsc_bk, rdummy, ndat, & 1351 mpi_enreg, prtvol, sij_opt, cpopt, cprj_dum, already_transposed=.False.) 1352 end if 1353 1354 ! Compute <i|H|j> for i=1,nband and all j in block 1355 if (cplex == 2) then 1356 call cg_zgemm("C", "N", npwsp, nband, ndat, cg, ghc_bk, h_ij(:,:,ib_start)) 1357 else 1358 call dgemm("T", "N", nband, ndat, 2*npwsp, one, cg, 2*npwsp, ghc_bk, 2*npwsp, zero, h_ij(:,:,ib_start), nband) 1359 end if 1360 1361 if (istwf_k /= 1) then 1362 do iband=ib_start, ib_stop 1363 h_ij(:,:,iband) = two * h_ij(:,:,iband) 1364 1365 if (istwf_k == 2 .and. me_g0 == 1) then 1366 ! Gamma k-point and I have G=0. Remove double counting term. 1367 ig = 1 + (iband - ib_start) * npwsp 1368 do ib=1,nband 1369 ig0 = 1 + npwsp * (ib - 1) 1370 #if defined FC_NVHPC 1371 if (ig<0) write(100,*) ig,ig0,h_ij(1,ib,iband),cg(1,ig0),ghc_bk(1,ig) 1372 #endif 1373 h_ij(1,ib,iband) = h_ij(1,ib,iband) - cg(1,ig0) * ghc_bk(1,ig) 1374 end do 1375 end if 1376 1377 ! Force real matrix. 1378 if (cplex == 2) h_ij(2,:,iband) = zero 1379 end do 1380 end if 1381 1382 end do ! iblock 1383 1384 ! Pack <i|H|j> to prepare call to subdiago. 1385 ABI_MALLOC(subham, (nband*(nband+1))) 1386 if (cplex == 2) then 1387 do iband=1,nband 1388 h_ij(2,iband,iband) = zero ! Force diagonal elements to be real 1389 end do 1390 end if 1391 1392 if (cplex == 2) then 1393 call pack_matrix(h_ij, subham, nband, 2) 1394 else 1395 call my_pack_matrix(nband, h_ij, subham) 1396 end if 1397 1398 ABI_FREE(h_ij) 1399 call xmpi_sum(subham, comm_bsf, ierr) 1400 if (timeit) call cwtime_report(" subspace build Hij", cpu, wall, gflops) 1401 1402 ! ======================== 1403 ! Subspace diagonalization 1404 ! ======================= 1405 ! Rotate cg, gsc and compute new eigenvalues. 1406 ABI_MALLOC(evec, (2, nband, nband)) 1407 mcg = npwsp * nband; mgsc = npwsp * nband * usepaw 1408 call subdiago(cg, eig, evec, gsc, 0, 0, istwf_k, mcg, mgsc, nband, npw, my_nspinor, paral_kgb, & 1409 subham, subovl, use_subovl0, usepaw, me_g0) 1410 1411 ABI_FREE(subham) 1412 if (timeit) call cwtime_report(" subspace subdiago", cpu, wall, gflops) 1413 1414 if (savemem == 0) then 1415 ! Rotate ghc matrix in the new subspace: 1416 ! 1417 ! new_{g,b} = old_{g,i} evec_{i,b} 1418 ! 1419 ! cg and PAW gsc have been already rotated in subdiago 1420 ! 1421 if (cplex == 1) then 1422 ! Eigenvectors are real. 1423 ABI_MALLOC(evec_re, (nband, nband)) 1424 evec_re = evec(1,:,:) 1425 end if 1426 1427 ABI_MALLOC_OR_DIE(gwork, (2, npwsp*nband), ierr) 1428 if (cplex == 1) then 1429 call DGEMM("N", "N", 2*npwsp, nband, nband, one, ghc, 2*npwsp, evec_re, nband, zero, gwork, 2*npwsp) 1430 else 1431 call abi_zgemm_2r("N", "N", npwsp, nband, nband, cone, ghc, npwsp, evec, nband, czero, gwork, npwsp) 1432 end if 1433 call cg_zcopy(npwsp * nband, gwork, ghc) 1434 1435 ! Rotate <G|Vnlx|Psi_n> and evaluate new enlx for NC. 1436 if (usepaw == 0 .or. has_fock) then 1437 if (cplex == 1) then 1438 call DGEMM("N", "N", 2*npwsp, nband, nband, one, gvnlxc, 2*npwsp, evec_re, nband, zero, gwork, 2*npwsp) 1439 else 1440 call abi_zgemm_2r("N", "N", npwsp, nband, nband, cone, gvnlxc, npwsp, evec, nband, czero, gwork, npwsp) 1441 end if 1442 !call abi_xgemm('N','N', vectsize, nband, nband, cone, gvnlxc, vectsize, evec, nband, czero, gwork, vectsize, x_cplx=cplx) 1443 call cg_zcopy(npwsp * nband, gwork, gvnlxc) 1444 call cg_zdotg_zip(istwf_k, npwsp, nband, option1, cg, gvnlxc, dots, me_g0, comm_bsf) 1445 enlx = dots(1,:) 1446 end if 1447 ABI_FREE(gwork) 1448 ABI_SFREE(evec_re) 1449 end if 1450 1451 ABI_FREE(evec) 1452 if (savemem == 1) then 1453 ABI_FREE(ghc_bk) 1454 ABI_FREE(gvnlxc_bk) 1455 end if 1456 1457 if (timeit) call cwtime_report(" subspace final rotation", cpu, wall, gflops) 1458 1459 end subroutine subspace_rotation
m_numeric_tools/my_pack_matrix [ Functions ]
[ Top ] [ m_numeric_tools ] [ Functions ]
NAME
my_pack_matrix
FUNCTION
Packs a matrix into hermitian format
INPUTS
N: size of matrix cplx: 2 if matrix is complex, 1 for real matrix. mat_in(cplx, N*N)= matrix to be packed
OUTPUT
mat_out(cplx*N*N+1/2)= packed matrix (upper triangle)
SOURCE
1210 subroutine my_pack_matrix(n, mat_in, mat_out) 1211 1212 !Arguments ------------------------------------ 1213 integer, intent(in) :: N 1214 real(dp), intent(in) :: mat_in(N, N) 1215 real(dp), intent(out) :: mat_out(2, N*(N+1)/2) 1216 1217 !local variables 1218 integer :: isubh, i, j 1219 ! ************************************************************************* 1220 1221 isubh = 1 1222 do j=1,N 1223 do i=1,j 1224 mat_out(1,isubh) = mat_in(i, j) 1225 mat_out(2,isubh) = zero 1226 isubh = isubh + 1 1227 end do 1228 end do 1229 1230 end subroutine my_pack_matrix
m_rmm_diis/getghc_eigresid [ Functions ]
[ Top ] [ m_rmm_diis ] [ Functions ]
NAME
getghc_eigresid
FUNCTION
Compute new eigenvalues, residuals, H |psi> and enlx from cg and gsc.
INPUTS
OUTPUT
SOURCE
901 subroutine getghc_eigresid(gs_hamk, npw, my_nspinor, ndat, cg, ghc, gsc, mpi_enreg, prtvol, & 902 eig, resid, enlx, residvecs, gvnlxc, normalize) 903 904 !Arguments ------------------------------------ 905 type(gs_hamiltonian_type),intent(inout) :: gs_hamk 906 integer,intent(in) :: npw, my_nspinor, ndat, prtvol 907 real(dp),intent(inout) :: cg(2, npw*my_nspinor*ndat) 908 real(dp),intent(out) :: ghc(2,npw*my_nspinor*ndat), gsc(2,npw*my_nspinor*ndat*gs_hamk%usepaw) 909 type(mpi_type),intent(in) :: mpi_enreg 910 real(dp),intent(out) :: eig(ndat), resid(ndat), enlx(ndat) 911 real(dp),intent(out) :: residvecs(2, npw*my_nspinor*ndat), gvnlxc(2, npw*my_nspinor*ndat) 912 logical,optional,intent(in) :: normalize 913 914 !Local variables------------------------------- 915 integer,parameter :: type_calc0 = 0, option1 = 1, option2 = 2, tim_getghc = 0 916 integer :: istwf_k, usepaw, cpopt, sij_opt, npwsp, me_g0, comm_bsf 917 real(dp),parameter :: rdummy = zero 918 !real(dp) :: cpu, wall, gflops 919 logical :: normalize_, has_fock 920 !arrays 921 real(dp) :: dots(2, ndat) 922 type(pawcprj_type) :: cprj_dum(1,1) 923 924 ! ************************************************************************* 925 926 !if (timeit) call cwtime(cpu, wall, gflops, "start") 927 normalize_ = .True.; if (present(normalize)) normalize_ = normalize 928 npwsp = npw * my_nspinor 929 usepaw = gs_hamk%usepaw; istwf_k = gs_hamk%istwf_k; me_g0 = mpi_enreg%me_g0 930 comm_bsf = mpi_enreg%comm_spinorfft; if (mpi_enreg%paral_kgb == 1) comm_bsf = mpi_enreg%comm_bandspinorfft 931 has_fock = associated(gs_hamk%fockcommon) 932 933 cpopt = -1; sij_opt = 0 934 if (usepaw == 1) then 935 sij_opt = 1 ! matrix elements <G|S|C> have to be computed in gsc in addition to ghc 936 cpopt = -1 ! <p_lmn|in> (and derivatives) are computed here (and not saved) 937 end if 938 939 ! NC normalization. 940 if (usepaw == 0 .and. normalize_) call cgnc_normalize(npwsp, ndat, cg, istwf_k, me_g0, comm_bsf) 941 942 ! Compute H |cg> 943 !call fock_set_ieigen(gs_hamk%fockcommon, iband) 944 if (mpi_enreg%paral_kgb == 0) then 945 call getghc(cpopt, cg, cprj_dum, ghc, gsc, gs_hamk, gvnlxc, & 946 rdummy, mpi_enreg, ndat, prtvol, sij_opt, tim_getghc, type_calc0) 947 else 948 call prep_getghc(cg, gs_hamk, gvnlxc, ghc, gsc, rdummy, ndat, & 949 mpi_enreg, prtvol, sij_opt, cpopt, cprj_dum, already_transposed=.False.) 950 end if 951 952 ! PAW normalization must be done here once gsc is known. 953 if (usepaw == 1 .and. normalize_) call cgpaw_normalize(npwsp, ndat, cg, gsc, istwf_k, me_g0, comm_bsf) 954 955 ! Compute new eigenvalues, residual vectors and norms. 956 call cg_get_eigens(usepaw, istwf_k, npwsp, ndat, cg, ghc, gsc, eig, me_g0, comm_bsf) 957 call cg_get_residvecs(usepaw, npwsp, ndat, eig, cg, ghc, gsc, residvecs) 958 call cg_norm2g(istwf_k, npwsp, ndat, residvecs, resid, me_g0, comm_bsf) 959 960 if (usepaw == 0 .or. has_fock) then 961 ! Evaluate new enlx from gvnlxc. 962 call cg_zdotg_zip(istwf_k, npwsp, ndat, option1, cg, gvnlxc, dots, me_g0, comm_bsf) 963 enlx = dots(1,:) 964 end if 965 966 !if (timeit) call cwtime_report(" getghc_eigresid", cpu, wall, gflops) 967 968 end subroutine getghc_eigresid
m_rmm_diis/rmm_diis_eval_mats [ Functions ]
[ Top ] [ m_rmm_diis ] [ Functions ]
NAME
rmm_diis_eval_mats
FUNCTION
Compute matrix elements required by the RMM-DIIS method.
INPUTS
OUTPUT
SOURCE
1149 subroutine rmm_diis_eval_mats(diis, iter, ndat, me_g0, comm) 1150 1151 !Arguments ------------------------------------ 1152 class(rmm_diis_t),intent(inout) :: diis 1153 integer,intent(in) :: iter, ndat, me_g0, comm 1154 1155 !local variables 1156 integer :: ii, ierr, idat, nprocs, option 1157 real(dp) :: dotr, doti !, cpu, wall, gflops 1158 !integer :: requests(ndat) 1159 ! ************************************************************************* 1160 1161 !if (timeit) call cwtime(cpu, wall, gflops, "start") 1162 nprocs = xmpi_comm_size(comm) 1163 option = 2; if (diis%cplex == 1) option = 1 1164 1165 do idat=1,ndat 1166 1167 do ii=0,iter 1168 ! <R_i|R_j> 1169 call dotprod_g(dotr, doti, diis%istwf_k, diis%npwsp, option, & 1170 diis%chain_resv(:,:,ii,idat), diis%chain_resv(:,:,iter,idat), me_g0, xmpi_comm_self) 1171 if (ii == iter) doti = zero 1172 if (diis%cplex == 2) then 1173 diis%resmat(:, ii, iter, idat) = [dotr, doti] 1174 else 1175 diis%resmat(1, ii, iter, idat) = dotr 1176 end if 1177 end do ! ii 1178 1179 !if (nprocs > 1) then 1180 ! call xmpi_sum(diis%resmat(:,0:iter,iter,idat), comm, ierr) 1181 ! !call xmpi_isum_ip(diis%resmat(:,0:iter,iter,idat), comm, requests(idat), ierr) 1182 !endif 1183 !if (diis%prtvol == -level) write(std_out,*)"iter, idat, resmat:", iter, idat, diis%resmat(:,0:iter,iter,idat) 1184 end do ! idat 1185 1186 if (nprocs > 1) call xmpi_sum(diis%resmat(:,0:iter,iter,1:ndat), comm, ierr) 1187 !if (nprocs > 1) call xmpi_waitall(requests, ierr) 1188 !if (timeit) call cwtime_report(" eval_mats", cpu, wall, gflops) 1189 1190 end subroutine rmm_diis_eval_mats
m_rmm_diis/rmm_diis_exit_iter [ Functions ]
[ Top ] [ m_rmm_diis ] [ Functions ]
NAME
rmm_diis_exit_iter
FUNCTION
Return true if we can exit the DIIS iteration
INPUTS
OUTPUT
SOURCE
756 logical function rmm_diis_exit_iter(diis, iter, ndat, niter_block, occ_bk, accuracy_ene, dtset, comm) result(ans) 757 758 class(rmm_diis_t),intent(inout) :: diis 759 integer,intent(in) :: iter, ndat, niter_block, comm 760 real(dp),intent(in) :: occ_bk(ndat) 761 real(dp),intent(in) :: accuracy_ene 762 type(dataset_type),intent(in) :: dtset 763 764 !Local variables------------------------------- 765 integer,parameter :: master = 0 766 integer :: idat, ierr, nok, checks(ndat) !nbocc, 767 real(dp) :: resid, deltae, deold , fact 768 character(len=50) :: msg_list(ndat) 769 ! ************************************************************************* 770 771 diis%last_iter = iter !; ans = .False.; return 772 if (xmpi_comm_rank(comm) /= master) goto 10 773 774 ! Tolerances depend on accuracy_level and occupation of the state. 775 checks = 0 776 777 do idat=1,ndat 778 resid = diis%hist_resid(iter, idat) 779 deold = diis%hist_ene(1, idat) - diis%hist_ene(0, idat) 780 deltae = diis%hist_ene(iter, idat) - diis%hist_ene(iter-1, idat) 781 782 ! Relative criterion on eigenvalue differerence. 783 ! Abinit default in the CG part is 0.005 that is really low (0.3 in V). 784 ! Here we increase it depending whether the state is occupied or empty 785 fact = one !; if (dtset%iscf > 0 .and. abs(occ_bk(idat)) < diis%tol_occupied) fact = three 786 if (diis%accuracy_level == 1) fact = fact * 18 787 if (diis%accuracy_level == 2) fact = fact * 12 788 if (diis%accuracy_level == 3) fact = fact * 6 789 if (abs(deltae) < fact * dtset%tolrde * abs(deold)) then 790 checks(idat) = 1; msg_list(idat) = "deltae < fact * tolrde * deold"; cycle 791 end if 792 793 if (dtset%iscf < 0) then 794 ! This is the only condition available for NSCF run. 795 if (resid < dtset%tolwfr) then 796 checks(idat) = 1; msg_list(idat) = 'resid < tolwfr'; cycle 797 end if 798 799 else 800 ! Conditions available for SCF run. 801 if (resid < dtset%tolwfr) then 802 checks(idat) = 1; msg_list(idat) = 'resid < tolwfr'; cycle 803 end if 804 805 ! Absolute criterion on eigenvalue difference. Assuming error on Etot ~ band_energy. 806 fact = one; if (dtset%iscf > 0 .and. abs(occ_bk(idat)) < diis%tol_occupied) fact = ten 807 if (sqrt(abs(resid)) < fact * accuracy_ene) then 808 checks(idat) = 1; msg_list(idat) = 'resid < accuracy_ene'; cycle 809 end if 810 end if 811 end do ! idat 812 813 ! Depending on the accuracy_level either full block or a fraction of it must pass the test in order to exit. 814 nok = count(checks /= 0) 815 if (diis%accuracy_level == 1) ans = nok >= 0.65_dp * ndat 816 if (diis%accuracy_level == 2) ans = nok >= 0.75_dp * ndat 817 if (diis%accuracy_level == 3) ans = nok >= 0.90_dp * ndat 818 if (diis%accuracy_level == 4) ans = nok == ndat 819 820 if (ans) then 821 ! Log exit only if this is not the last iteration. 822 if (iter /= niter_block) then 823 do idat=1,ndat 824 if (checks(idat) /= 0) call diis%stats%increment(msg_list(idat), 1) 825 end do 826 end if 827 end if 828 829 ! Broadcast final decision to all ranks. 830 10 call xmpi_bcast(ans, master, comm, ierr) 831 832 end function rmm_diis_exit_iter
m_rmm_diis/rmm_diis_free [ Functions ]
[ Top ] [ m_rmm_diis ] [ Functions ]
NAME
rmm_diis_free
FUNCTION
Free dynamic memory.
INPUTS
OUTPUT
SOURCE
1025 subroutine rmm_diis_free(diis) 1026 1027 !Arguments ------------------------------------ 1028 class(rmm_diis_t),intent(inout) :: diis 1029 1030 ! ************************************************************************* 1031 1032 ABI_SFREE(diis%hist_ene) 1033 ABI_SFREE(diis%hist_resid) 1034 ABI_SFREE(diis%hist_enlx) 1035 ABI_SFREE(diis%step_type) 1036 ABI_SFREE(diis%chain_phi) 1037 ABI_SFREE(diis%chain_sphi) 1038 ABI_SFREE(diis%chain_resv) 1039 ABI_SFREE(diis%resmat) 1040 1041 call diis%stats%free() 1042 1043 end subroutine rmm_diis_free
m_rmm_diis/rmm_diis_new [ Functions ]
[ Top ] [ m_rmm_diis ] [ Functions ]
NAME
rmm_diis_new
FUNCTION
Build new rmm_diis_t instance.
INPUTS
OUTPUT
SOURCE
984 type(rmm_diis_t) function rmm_diis_new(accuracy_level, usepaw, istwf_k, npwsp, max_niter, bsize, prtvol) result(diis) 985 986 !Arguments ------------------------------------ 987 integer,intent(in) :: accuracy_level, usepaw, istwf_k, npwsp, max_niter, bsize, prtvol 988 989 ! ************************************************************************* 990 991 diis%accuracy_level = accuracy_level 992 diis%usepaw = usepaw 993 diis%istwf_k = istwf_k 994 diis%cplex = 2; if (istwf_k == 2) diis%cplex = 1 995 diis%npwsp = npwsp 996 diis%max_niter = max_niter 997 diis%bsize = bsize 998 diis%prtvol = prtvol 999 1000 ABI_MALLOC(diis%hist_ene, (0:max_niter+2, bsize)) 1001 ABI_MALLOC(diis%hist_resid, (0:max_niter+2, bsize)) 1002 ABI_CALLOC(diis%hist_enlx, (0:max_niter+2, bsize)) 1003 ABI_MALLOC(diis%step_type, (0:max_niter+2, bsize)) 1004 ABI_MALLOC(diis%chain_phi, (2, npwsp, 0:max_niter, bsize)) 1005 ABI_MALLOC(diis%chain_sphi, (2, npwsp*usepaw, 0:max_niter, bsize)) 1006 ABI_MALLOC(diis%chain_resv, (2, npwsp, 0:max_niter, bsize)) 1007 ABI_CALLOC(diis%resmat, (diis%cplex, 0:max_niter, 0:max_niter, bsize)) ! <R_i|R_j> 1008 1009 end function rmm_diis_new
m_rmm_diis/rmm_diis_print_block [ Functions ]
[ Top ] [ m_rmm_diis ] [ Functions ]
NAME
rmm_diis_print_block
FUNCTION
Print energies, residuals for a block of states.
INPUTS
OUTPUT
SOURCE
848 subroutine rmm_diis_print_block(diis, ib_start, ndat, istep, ikpt, isppol) 849 850 class(rmm_diis_t),intent(in) :: diis 851 integer,intent(in) :: ib_start, ndat, istep, ikpt, isppol 852 853 !Local variables------------------------------- 854 integer :: iter, idat, iband 855 real(dp) :: deltae, deold, dedold, absdiff 856 character(len=500) :: msg 857 ! ************************************************************************* 858 859 call wrtout(std_out, & 860 sjoin("<BEGIN RMM-DIIS-BLOCK, istep:", itoa(istep), ", ikpt:", itoa(ikpt), ", spin: ", itoa(isppol), ">"), & 861 pre_newlines=1) 862 863 do idat=1,ndat 864 write(msg,'(1a, 2(a5), 4(a14), 1x, a6)') & 865 "#", 'iter', "band", "eigen_eV", "eigde_meV", "de/dold", "resid", "type"; call wrtout(std_out, msg) 866 867 iband = ib_start + idat - 1 868 deold = diis%hist_ene(1, idat) - diis%hist_ene(0, idat) 869 do iter=0,diis%last_iter 870 dedold = zero; absdiff = zero 871 if (iter > 0) then 872 deltae = diis%hist_ene(iter, idat) - diis%hist_ene(iter-1, idat) 873 dedold = deltae / deold 874 absdiff = (diis%hist_ene(iter, idat) - diis%hist_ene(iter-1, idat)) 875 end if 876 877 write(msg,"(1x, 2(i5), 4(es14.6), 1x, a6)") & 878 iter, iband, diis%hist_ene(iter, idat) * Ha_eV, absdiff * Ha_meV, dedold, & 879 diis%hist_resid(iter, idat), diis%step_type(iter, idat); call wrtout(std_out, msg) 880 end do 881 end do 882 883 call wrtout(std_out, "<END RMM-DIIS-BLOCK>", newlines=1) 884 885 end subroutine rmm_diis_print_block
m_rmm_diis/rmm_diis_push_iter [ Functions ]
[ Top ] [ m_rmm_diis ] [ Functions ]
NAME
rmm_diis_push_iter
FUNCTION
Save one iteration of the DIIS algorithm.
INPUTS
OUTPUT
SOURCE
706 subroutine rmm_diis_push_iter(diis, iter, ndat, eig_bk, resid_bk, enlx_bk, cg_bk, residv_bk, gsc_bk, tag) 707 708 class(rmm_diis_t),intent(inout) :: diis 709 integer,intent(in) :: iter, ndat 710 real(dp),intent(in) :: eig_bk(ndat), resid_bk(ndat), enlx_bk(ndat) 711 real(dp),intent(in) :: cg_bk(2, diis%npwsp*ndat), residv_bk(2, diis%npwsp*ndat), gsc_bk(2, diis%npwsp*ndat*diis%usepaw) 712 character(len=*),intent(in) :: tag 713 714 !Local variables------------------------------- 715 integer :: idat, ibk 716 ! ************************************************************************* 717 718 diis%last_iter = iter 719 diis%hist_ene(iter, 1:ndat) = eig_bk 720 diis%hist_resid(iter, 1:ndat) = resid_bk 721 diis%hist_enlx(iter, 1:ndat) = enlx_bk 722 diis%step_type(iter, 1:ndat) = tag 723 724 do idat=1,ndat 725 if (iter == 0) then 726 if (diis%cplex == 2) then 727 diis%resmat(:, 0, 0, idat) = [resid_bk(idat), zero] 728 else 729 diis%resmat(:, 0, 0, idat) = resid_bk(idat) 730 end if 731 end if 732 !write(std_out, *)"res0", diis%resmat(:, 0, 0, idat) 733 diis%step_type(iter, idat) = tag 734 ibk = 1 + (idat - 1) * diis%npwsp 735 call cg_zcopy(diis%npwsp, cg_bk(:,ibk), diis%chain_phi(:,:,iter,idat)) 736 call cg_zcopy(diis%npwsp, residv_bk(:,ibk), diis%chain_resv(:,:,iter,idat)) 737 if (diis%usepaw == 1) call cg_zcopy(diis%npwsp, gsc_bk(:,ibk), diis%chain_sphi(:,:,iter,idat)) 738 end do 739 740 end subroutine rmm_diis_push_iter
m_rmm_diis/rmm_diis_update_block [ Functions ]
[ Top ] [ m_rmm_diis ] [ Functions ]
NAME
rmm_diis_update_block
FUNCTION
Compute new trial wavefunctions and residuals from the DIIS chain.
INPUTS
OUTPUT
SOURCE
1059 subroutine rmm_diis_update_block(diis, iter, npwsp, ndat, cg_bk, residv_bk, comm) 1060 1061 !Arguments ------------------------------------ 1062 class(rmm_diis_t),intent(in) :: diis 1063 integer,intent(in) :: iter, npwsp, comm, ndat 1064 real(dp),intent(inout) :: cg_bk(2, npwsp, ndat), residv_bk(2, npwsp, ndat) 1065 1066 !local variables 1067 integer,parameter :: master = 0 1068 integer :: cplex, ierr, nprocs, my_rank, idat 1069 real(dp) :: noise !, cpu, wall, gflops 1070 real(dp),allocatable :: wmat1(:,:,:), wvec(:,:,:), alphas(:,:) 1071 character(len=500) :: msg 1072 ! ************************************************************************* 1073 1074 !if (timeit) call cwtime(cpu, wall, gflops, "start") 1075 my_rank = xmpi_comm_rank(comm); nprocs = xmpi_comm_size(comm) 1076 cplex = diis%cplex 1077 1078 ! Solve system of linear equations. 1079 ! Only master works so that we are sure we have the same solution. 1080 ABI_CALLOC(wvec, (cplex, 0:iter, ndat)) 1081 1082 if (my_rank == master) then 1083 ABI_MALLOC(wmat1, (cplex, 0:iter, 0:iter)) 1084 1085 do idat=1,ndat 1086 !if (mod(idat, nprocs) /= my_rank) cycle ! MPI parallelism 1087 wvec(1, iter, idat) = -one 1088 wmat1 = zero 1089 wmat1(1,:,iter) = -one 1090 wmat1(1,iter,:) = -one 1091 wmat1(1,iter,iter) = zero 1092 wmat1(:,0:iter-1, 0:iter-1) = diis%resmat(:, 0:iter-1, 0:iter-1, idat) 1093 1094 call xhesv_cplex("U", cplex, iter+1, 1, wmat1, wvec(:,:,idat), msg, ierr) 1095 ABI_CHECK(ierr == 0, msg) 1096 1097 !if (diis%prtvol == -level) then 1098 ! write(std_out,*)"wvec:", wvec(:,:,idat) 1099 ! write(std_out,*)"sum(wvec):", sum(wvec(:, 0:iter-1, idat), dim=2) 1100 !end if 1101 if (cplex == 2) then 1102 ! coefficients should sum up to 1 but sometimes we get a small imaginary part. here we remove it 1103 noise = sum(wvec(2, 0:iter-1, idat)) 1104 wvec(2, 0:iter-1, idat) = wvec(2, 0:iter-1, idat) - noise * iter 1105 end if 1106 1107 end do 1108 ABI_FREE(wmat1) 1109 end if 1110 1111 ! Master broadcasts data. 1112 if (nprocs > 1) call xmpi_bcast(wvec, master, comm, ierr) 1113 !if (nprocs > 1) call xmpi_sum(wvec, comm, ierr) 1114 1115 ! Take linear combination of chain_phi and chain_resv. 1116 do idat=1,ndat 1117 if (cplex == 2) then 1118 call cg_zgemv("N", npwsp, iter, diis%chain_phi(:,:,:,idat), wvec(:,:,idat), cg_bk(:,:,idat)) 1119 call cg_zgemv("N", npwsp, iter, diis%chain_resv(:,:,:,idat), wvec(:,:,idat), residv_bk(:,:,idat)) 1120 else 1121 ! coefficients are real --> use DGEMV 1122 ABI_MALLOC(alphas, (1, 0:iter)) 1123 alphas(1,:) = wvec(1,:,idat) 1124 call dgemv("N", 2*npwsp, iter, one, diis%chain_phi(:,:,:,idat), 2*npwsp, alphas, 1, zero, cg_bk(:,:,idat), 1) 1125 call dgemv("N", 2*npwsp, iter, one, diis%chain_resv(:,:,:,idat), 2*npwsp, alphas, 1, zero, residv_bk(:,:,idat), 1) 1126 ABI_FREE(alphas) 1127 end if 1128 end do 1129 1130 ABI_FREE(wvec) 1131 !if (timeit) call cwtime_report(" update_block", cpu, wall, gflops) 1132 1133 end subroutine rmm_diis_update_block