TABLE OF CONTENTS
- ABINIT/from_block_cyclic_to_mat
- ABINIT/from_mat_to_block_cyclic
- ABINIT/pack_matrix
- ABINIT/rayleigh_ritz_distributed
- ABINIT/rayleigh_ritz_subdiago
ABINIT/from_block_cyclic_to_mat [ Functions ]
NAME
from_block_cyclic_to_mat
FUNCTION
Fills the columns of full_mat owned by iproc with block_cyclic_mat, using a 1D block-cyclic distribution
COPYRIGHT
Copyright (C) 2014-2018 ABINIT group (AL) this file is distributed under the terms of the gnu general public license, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . for the initials of contributors, see ~abinit/doc/developers/contributors.txt .
INPUTS
block_cyclic_mat(2,vectsize*buffsize)=local part of full_mat vectsize=number of rows nband=number of columns of the full matrix buffsize=size of the local part of the matrix blocksize=size of block for the block-cyclic distribution iproc=local processor number nprocs=total number of processors
OUTPUT
full_mat(2,vectsize*nband)=full matrix
SIDE EFFECTS
PARENTS
rayleigh_ritz
CHILDREN
SOURCE
667 subroutine from_block_cyclic_to_mat(full_mat, vectsize, nband, block_cyclic_mat, buffsize, blocksize, iproc, nprocs) 668 use defs_basis 669 use defs_abitypes 670 671 !This section has been created automatically by the script Abilint (TD). 672 !Do not modify the following lines by hand. 673 #undef ABI_FUNC 674 #define ABI_FUNC 'from_block_cyclic_to_mat' 675 !End of the abilint section 676 677 implicit none 678 679 integer, intent(in) :: vectsize, nband, buffsize, blocksize, iproc, nprocs 680 real(dp), intent(inout) :: full_mat(2, vectsize*nband) 681 real(dp), intent(in) :: block_cyclic_mat(2, vectsize*buffsize) 682 683 integer :: shift_fullmat, shift_block_cyclic_mat 684 integer :: cur_bsize 685 686 ! ************************************************************************* 687 688 shift_fullmat = iproc*blocksize 689 shift_block_cyclic_mat = 0 690 691 do while(.true.) 692 cur_bsize = MIN(blocksize, nband-shift_fullmat) 693 if(cur_bsize <= 0) exit 694 full_mat(:, shift_fullmat*vectsize+1 : shift_fullmat*vectsize + cur_bsize*vectsize) =& 695 & full_mat(:, shift_fullmat*vectsize+1 : shift_fullmat*vectsize + cur_bsize*vectsize) +& 696 & block_cyclic_mat(:, shift_block_cyclic_mat*vectsize+1 : shift_block_cyclic_mat*vectsize + cur_bsize*vectsize) 697 698 shift_block_cyclic_mat = shift_block_cyclic_mat + blocksize 699 shift_fullmat = shift_fullmat + blocksize*nprocs 700 end do 701 702 end subroutine from_block_cyclic_to_mat
ABINIT/from_mat_to_block_cyclic [ Functions ]
NAME
from_mat_to_block_cyclic
FUNCTION
Fills block_cyclic_mat with the columns of full_mat owned by iproc, using a 1D block-cyclic distribution
COPYRIGHT
Copyright (C) 2014-2018 ABINIT group (AL) this file is distributed under the terms of the gnu general public license, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . for the initials of contributors, see ~abinit/doc/developers/contributors.txt .
INPUTS
full_mat(2,vectsize*nband)=full input matrix vectsize=number of rows nband=number of columns of the full matrix buffsize=size of the local part of the matrix blocksize=size of block for the block-cyclic distribution iproc=local processor number nprocs=total number of processors
OUTPUT
block_cyclic_mat(2,vectsize*buffsize)=local part of full_mat
SIDE EFFECTS
PARENTS
rayleigh_ritz
CHILDREN
SOURCE
594 subroutine from_mat_to_block_cyclic(full_mat, vectsize, nband, block_cyclic_mat, buffsize, blocksize, iproc, nprocs) 595 use defs_basis 596 use defs_abitypes 597 598 !This section has been created automatically by the script Abilint (TD). 599 !Do not modify the following lines by hand. 600 #undef ABI_FUNC 601 #define ABI_FUNC 'from_mat_to_block_cyclic' 602 !End of the abilint section 603 604 implicit none 605 606 integer, intent(in) :: vectsize, nband, buffsize, blocksize, iproc, nprocs 607 real(dp), intent(in) :: full_mat(2, vectsize*nband) 608 real(dp), intent(inout) :: block_cyclic_mat(2, vectsize*buffsize) 609 610 integer :: shift_fullmat, shift_block_cyclic_mat 611 integer :: cur_bsize 612 613 ! ************************************************************************* 614 615 shift_fullmat = iproc*blocksize 616 shift_block_cyclic_mat = 0 617 618 do while(.true.) 619 cur_bsize = MIN(blocksize, nband-shift_fullmat) 620 if(cur_bsize <= 0) exit 621 block_cyclic_mat(:, shift_block_cyclic_mat*vectsize+1 : shift_block_cyclic_mat*vectsize + cur_bsize*vectsize) = & 622 & full_mat(:, shift_fullmat*vectsize+1 : shift_fullmat*vectsize + cur_bsize*vectsize) 623 624 shift_block_cyclic_mat = shift_block_cyclic_mat + blocksize 625 shift_fullmat = shift_fullmat + blocksize*nprocs 626 end do 627 628 end subroutine from_mat_to_block_cyclic
ABINIT/pack_matrix [ Functions ]
NAME
pack_matrix
FUNCTION
Packs a matrix into hermitian format
COPYRIGHT
Copyright (C) 2014-2018 ABINIT group (AL) this file is distributed under the terms of the gnu general public license, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . for the initials of contributors, see ~abinit/doc/developers/contributors.txt .
INPUTS
N: size of matrix cplx: is the matrix complex mat_in(2, N*N)= matrix to be packed
OUTPUT
mat_out(N*N+1)= packed matrix
SIDE EFFECTS
PARENTS
rayleigh_ritz
CHILDREN
NOTES
SOURCE
740 subroutine pack_matrix(mat_in, mat_out, N, cplx) 741 use defs_basis 742 743 !This section has been created automatically by the script Abilint (TD). 744 !Do not modify the following lines by hand. 745 #undef ABI_FUNC 746 #define ABI_FUNC 'pack_matrix' 747 !End of the abilint section 748 749 implicit none 750 751 !This section has been created automatically by the script Abilint (TD). 752 !Do not modify the following lines by hand. 753 #undef ABI_FUNC 754 #define ABI_FUNC 'pack_matrix' 755 !End of the abilint section 756 757 integer, intent(in) :: N, cplx 758 real(dp), intent(in) :: mat_in(cplx, N*N) 759 real(dp), intent(out) :: mat_out(cplx*N*(N+1)/2) 760 integer :: isubh, i, j 761 762 ! ************************************************************************* 763 764 isubh = 1 765 do j=1,N 766 do i=1,j 767 mat_out(isubh) = mat_in(1, (j-1)*N+i) 768 ! bad for vectorization, but it's not performance critical, so ... 769 if(cplx == 2) then 770 mat_out(isubh+1) = mat_in(2, (j-1)*N+i) 771 end if 772 isubh=isubh+cplx 773 end do 774 end do 775 end subroutine pack_matrix
ABINIT/rayleigh_ritz_distributed [ Functions ]
NAME
rayleigh_ritz_distributed
FUNCTION
Performs a rayleigh-ritz procedure (subspace rotation), building the distributed hamiltonian/overlap matrices directly, and calling the ScaLapack routines
COPYRIGHT
Copyright (C) 2014-2018 ABINIT group (AL) this file is distributed under the terms of the gnu general public license, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . for the initials of contributors, see ~abinit/doc/developers/contributors.txt .
INPUTS
mpi_enreg=informations about MPI parallelization nband=number of bands at this k point for that spin polarization npw=number of plane waves at this k point nspinor=number of plane waves at this k point usepaw=do we use the PAW method
OUTPUT
eig(nband)=array for holding eigenvalues (hartree)
SIDE EFFECTS
cg(2,*)=updated wavefunctions ghc(2,*)=updated ghc gsc(2,*)=updated gsc gvnlc(2,*)=updated gvnlc
PARENTS
chebfi
CHILDREN
NOTES
Should profile for large test cases and see where the bottleneck is. Is it the copies? Should we do partial GEMMs? Is it the latency? Should we buffer more? Should we overlap computations and communications? (easy in theory, tedious in practice)
SOURCE
297 subroutine rayleigh_ritz_distributed(cg,ghc,gsc,gvnlc,eig,istwf_k,mpi_enreg,nband,npw,nspinor,usepaw) 298 use defs_basis 299 use defs_abitypes 300 use m_errors 301 use m_xmpi 302 use m_profiling_abi 303 use m_abi_linalg 304 use m_slk 305 306 !This section has been created automatically by the script Abilint (TD). 307 !Do not modify the following lines by hand. 308 #undef ABI_FUNC 309 #define ABI_FUNC 'rayleigh_ritz_distributed' 310 use interfaces_18_timing 311 use interfaces_66_wfs, except_this_one => rayleigh_ritz_distributed 312 !End of the abilint section 313 314 implicit none 315 316 integer,external :: NUMROC 317 318 ! Arguments 319 type(mpi_type),intent(inout) :: mpi_enreg 320 integer,intent(in) :: nband,npw,nspinor,usepaw,istwf_k 321 real(dp),intent(inout) :: cg(2,npw*nspinor*nband),gsc(2,npw*nspinor*nband),ghc(2,npw*nspinor*nband),gvnlc(2,npw*nspinor*nband) 322 real(dp),intent(out) :: eig(nband) 323 324 ! Locals 325 integer :: blocksize,nbproc,iproc,ierr,cplx,vectsize 326 integer :: buffsize_iproc(2), coords_iproc(2), grid_dims(2) 327 real(dp) :: cg_new(2,npw*nspinor*nband),gsc_or_vnlc_new(2,npw*nspinor*nband),ghc_new(2,npw*nspinor*nband) 328 real(dp), allocatable :: ham_iproc(:,:), ovl_iproc(:,:), evec_iproc(:,:), left_temp(:,:), right_temp(:,:) 329 real(dp) :: tsec(2) 330 type(matrix_scalapack) :: sca_ham, sca_ovl, sca_evec 331 character(len=500) :: message 332 character :: blas_transpose 333 334 integer, parameter :: timer_chebfi = 1600, timer_alltoall = 1601, timer_apply_inv_ovl = 1602, timer_rotation = 1603 335 integer, parameter :: timer_subdiago = 1604, timer_subham = 1605, timer_ortho = 1606, timer_getghc = 1607 336 integer, parameter :: timer_residuals = 1608, timer_update_eigen = 1609, timer_sync = 1610 337 338 ! ************************************************************************* 339 340 if(istwf_k == 1) then 341 cplx = 2 342 vectsize = npw*nspinor 343 blas_transpose = 'c' 344 else 345 cplx = 1 346 vectsize = 2*npw*nspinor 347 blas_transpose = 't' 348 end if 349 350 !write(message, *) 'RR: init' 351 !call wrtout(std_out,message,'COLL') 352 !====================================================================================================== 353 ! Init Scalapack matrices 354 !====================================================================================================== 355 call init_matrix_scalapack(sca_ham ,nband,nband,abi_processor,istwf_k,10) 356 call init_matrix_scalapack(sca_ovl ,nband,nband,abi_processor,istwf_k,10) 357 call init_matrix_scalapack(sca_evec,nband,nband,abi_processor,istwf_k,10) 358 359 ! Get info 360 blocksize = sca_ham%sizeb_blocs(1) ! Assume square blocs 361 nbproc = abi_processor%grid%nbprocs 362 grid_dims = abi_processor%grid%dims 363 364 !====================================================================================================== 365 ! Build hamiltonian and overlap matrices 366 !====================================================================================================== 367 ! TODO maybe we should avoid copies at the price of less BLAS efficiency (when blocksize is small)? must profile. 368 369 call timab(timer_subham, 1, tsec) 370 371 ! Transform cg, ghc and maybe gsc, according to istwf_k 372 if(istwf_k == 2) then 373 cg = cg * sqrt2 374 if(mpi_enreg%me_g0 == 1) cg(:, 1:npw*nspinor*nband:npw) = cg(:, 1:npw*nspinor*nband:npw) / sqrt2 375 ghc = ghc * sqrt2 376 if(mpi_enreg%me_g0 == 1) ghc(:, 1:npw*nspinor*nband:npw) = ghc(:, 1:npw*nspinor*nband:npw) / sqrt2 377 if(usepaw == 1) then 378 gsc = gsc * sqrt2 379 if(mpi_enreg%me_g0 == 1) gsc(:, 1:npw*nspinor*nband:npw) = gsc(:, 1:npw*nspinor*nband:npw) / sqrt2 380 end if 381 end if 382 383 do iproc=0,nbproc-1 384 ! Build the local matrix belonging to processor iproc 385 !write(message, *) 'RR: build', iproc 386 !call wrtout(std_out,message,'COLL') 387 388 ! Get coordinates of iproc 389 coords_iproc(1) = INT(iproc / grid_dims(2)) 390 coords_iproc(2) = MOD(iproc, grid_dims(2)) 391 392 ! Get buffersize of iproc 393 buffsize_iproc(1) = NUMROC(nband,blocksize,coords_iproc(1),0,abi_processor%grid%dims(1)) 394 buffsize_iproc(2) = NUMROC(nband,blocksize,coords_iproc(2),0,abi_processor%grid%dims(2)) 395 396 ! Allocate matrices_iproc, that will gather the contribution of this proc to the block owned by iproc 397 ABI_ALLOCATE(ham_iproc, (cplx*buffsize_iproc(1), buffsize_iproc(2))) 398 ABI_ALLOCATE(ovl_iproc, (cplx*buffsize_iproc(1), buffsize_iproc(2))) 399 400 ! Build them 401 ABI_ALLOCATE(left_temp, (2, npw*nspinor*buffsize_iproc(1))) 402 ABI_ALLOCATE(right_temp, (2, npw*nspinor*buffsize_iproc(2))) 403 404 ! ovl 405 call from_mat_to_block_cyclic(cg, npw*nspinor, nband, left_temp, & 406 & buffsize_iproc(1), blocksize, coords_iproc(1), grid_dims(1)) 407 if(usepaw == 1) then 408 call from_mat_to_block_cyclic(gsc, npw*nspinor, nband, right_temp, & 409 & buffsize_iproc(2), blocksize, coords_iproc(2), grid_dims(2)) 410 else 411 call from_mat_to_block_cyclic(cg, npw*nspinor, nband, right_temp, & 412 & buffsize_iproc(2), blocksize, coords_iproc(2), grid_dims(2)) 413 end if 414 call abi_xgemm(blas_transpose,'n',buffsize_iproc(1),buffsize_iproc(2),vectsize,cone,left_temp,vectsize,& 415 & right_temp,vectsize,czero,ovl_iproc,buffsize_iproc(1), x_cplx=cplx) 416 417 ! ham 418 call from_mat_to_block_cyclic(ghc, npw*nspinor, nband, right_temp, & 419 & buffsize_iproc(2), blocksize, coords_iproc(2), grid_dims(2)) 420 call abi_xgemm(blas_transpose,'n',buffsize_iproc(1),buffsize_iproc(2),vectsize,cone,left_temp,vectsize,& 421 & right_temp,vectsize,czero,ham_iproc,buffsize_iproc(1), x_cplx=cplx) 422 423 ! Sum to iproc, and fill sca_ matrices 424 call xmpi_sum_master(ham_iproc, iproc, abi_communicator, ierr) 425 call xmpi_sum_master(ovl_iproc, iproc, abi_communicator, ierr) 426 if(iproc == abi_processor%myproc) then 427 ! DCOPY to bypass the real/complex issue 428 if(cplx == 2) then 429 call DCOPY(cplx*buffsize_iproc(1)*buffsize_iproc(2), ham_iproc, 1, sca_ham%buffer_cplx, 1) 430 call DCOPY(cplx*buffsize_iproc(1)*buffsize_iproc(2), ovl_iproc, 1, sca_ovl%buffer_cplx, 1) 431 else 432 call DCOPY(cplx*buffsize_iproc(1)*buffsize_iproc(2), ham_iproc, 1, sca_ham%buffer_real, 1) 433 call DCOPY(cplx*buffsize_iproc(1)*buffsize_iproc(2), ovl_iproc, 1, sca_ovl%buffer_real, 1) 434 end if 435 end if 436 437 ABI_DEALLOCATE(ham_iproc) 438 ABI_DEALLOCATE(ovl_iproc) 439 ABI_DEALLOCATE(left_temp) 440 ABI_DEALLOCATE(right_temp) 441 end do 442 443 ! Final sum 444 if(cplx == 2) then 445 call xmpi_sum(sca_ham%buffer_cplx, abi_complement_communicator, ierr) 446 call xmpi_sum(sca_ovl%buffer_cplx, abi_complement_communicator, ierr) 447 else 448 call xmpi_sum(sca_ham%buffer_real, abi_complement_communicator, ierr) 449 call xmpi_sum(sca_ovl%buffer_real, abi_complement_communicator, ierr) 450 end if 451 452 ! Transform back 453 if(istwf_k == 2) then 454 cg = cg / sqrt2 455 if(mpi_enreg%me_g0 == 1) cg(:, 1:npw*nspinor*nband:npw) = cg(:, 1:npw*nspinor*nband:npw) * sqrt2 456 ghc = ghc / sqrt2 457 if(mpi_enreg%me_g0 == 1) ghc(:, 1:npw*nspinor*nband:npw) = ghc(:, 1:npw*nspinor*nband:npw) * sqrt2 458 if(usepaw == 1) then 459 gsc = gsc / sqrt2 460 if(mpi_enreg%me_g0 == 1) gsc(:, 1:npw*nspinor*nband:npw) = gsc(:, 1:npw*nspinor*nband:npw) * sqrt2 461 end if 462 end if 463 call timab(timer_subham, 2, tsec) 464 465 !====================================================================================================== 466 ! Do the diagonalization 467 !====================================================================================================== 468 !write(message, *) 'RR: diag' 469 !call wrtout(std_out,message,'COLL') 470 call timab(timer_subdiago, 1, tsec) 471 call compute_generalized_eigen_problem(abi_processor,sca_ham,sca_ovl,& 472 & sca_evec,eig,abi_communicator,istwf_k) 473 call timab(timer_subdiago, 2, tsec) 474 475 !====================================================================================================== 476 ! Perform rotation 477 !====================================================================================================== 478 call timab(timer_rotation, 1, tsec) 479 cg_new = zero 480 gsc_or_vnlc_new = zero 481 ghc_new = zero 482 do iproc=0,nbproc-1 483 ! Compute the contribution to the rotated matrices from this block 484 !write(message, *) 'RR: rot', iproc 485 !call wrtout(std_out,message,'COLL') 486 487 ! Get coordinates of iproc 488 coords_iproc(1) = INT(iproc / grid_dims(2)) 489 coords_iproc(2) = MOD(iproc, grid_dims(2)) 490 491 ! Get buffersize of iproc 492 buffsize_iproc(1) = NUMROC(nband,blocksize,coords_iproc(1),0,abi_processor%grid%dims(1)) 493 buffsize_iproc(2) = NUMROC(nband,blocksize,coords_iproc(2),0,abi_processor%grid%dims(2)) 494 495 ! Get data from iproc 496 ABI_ALLOCATE(evec_iproc, (cplx*buffsize_iproc(1), buffsize_iproc(2))) 497 if(iproc == abi_processor%myproc) then 498 if(cplx == 2) then 499 call DCOPY(cplx*buffsize_iproc(1)*buffsize_iproc(2), sca_evec%buffer_cplx, 1, evec_iproc, 1) 500 else 501 call DCOPY(cplx*buffsize_iproc(1)*buffsize_iproc(2), sca_evec%buffer_real, 1, evec_iproc, 1) 502 end if 503 end if 504 call xmpi_bcast(evec_iproc,iproc,abi_communicator,ierr) 505 506 ! Compute contribution to the rotated matrices from iproc 507 ABI_ALLOCATE(left_temp, (2,npw*nspinor*buffsize_iproc(1))) 508 ABI_ALLOCATE(right_temp, (2,npw*nspinor*buffsize_iproc(2))) 509 510 ! cg 511 call from_mat_to_block_cyclic(cg, npw*nspinor, nband, left_temp, & 512 & buffsize_iproc(1), blocksize, coords_iproc(1), grid_dims(1)) 513 call abi_xgemm('n','n',vectsize,buffsize_iproc(2),buffsize_iproc(1),cone,left_temp,vectsize,& 514 & evec_iproc, buffsize_iproc(1), czero, right_temp, vectsize, x_cplx=cplx) 515 call from_block_cyclic_to_mat(cg_new, npw*nspinor, nband, right_temp, & 516 & buffsize_iproc(2), blocksize, coords_iproc(2), grid_dims(2)) 517 518 ! ghc 519 call from_mat_to_block_cyclic(ghc, npw*nspinor, nband, left_temp, & 520 & buffsize_iproc(1), blocksize, coords_iproc(1), grid_dims(1)) 521 call abi_xgemm('n','n',vectsize,buffsize_iproc(2),buffsize_iproc(1),cone,left_temp,vectsize,& 522 & evec_iproc, buffsize_iproc(1), czero, right_temp, vectsize,x_cplx=cplx) 523 call from_block_cyclic_to_mat(ghc_new, npw*nspinor, nband, right_temp,& 524 & buffsize_iproc(2), blocksize, coords_iproc(2), grid_dims(2)) 525 526 ! gsc or vnlc 527 if(usepaw == 1) then 528 call from_mat_to_block_cyclic(gsc, npw*nspinor, nband, left_temp, & 529 & buffsize_iproc(1), blocksize, coords_iproc(1), grid_dims(1)) 530 else 531 call from_mat_to_block_cyclic(gvnlc, npw*nspinor, nband, left_temp, & 532 & buffsize_iproc(1), blocksize, coords_iproc(1), grid_dims(1)) 533 end if 534 call abi_xgemm('n','n',vectsize,buffsize_iproc(2),buffsize_iproc(1),cone,left_temp,vectsize,& 535 & evec_iproc, buffsize_iproc(1), czero, right_temp, vectsize, x_cplx=cplx) 536 call from_block_cyclic_to_mat(gsc_or_vnlc_new, npw*nspinor, nband, right_temp, & 537 & buffsize_iproc(2), blocksize, coords_iproc(2), grid_dims(2)) 538 539 ABI_DEALLOCATE(evec_iproc) 540 ABI_DEALLOCATE(left_temp) 541 ABI_DEALLOCATE(right_temp) 542 end do 543 544 ! Overwrite with _new 545 cg = cg_new 546 ghc = ghc_new 547 if(usepaw == 1) then 548 gsc = gsc_or_vnlc_new 549 else 550 gvnlc = gsc_or_vnlc_new 551 end if 552 call timab(timer_rotation, 2, tsec) 553 554 end subroutine rayleigh_ritz_distributed
ABINIT/rayleigh_ritz_subdiago [ Functions ]
NAME
rayleigh_ritz_subdiago
FUNCTION
Performs a rayleigh-ritz procedure (subspace rotation), building the hamiltonian/overlap matrices in full and calling the subdiago method
COPYRIGHT
Copyright (C) 2014-2018 ABINIT group (AL) this file is distributed under the terms of the gnu general public license, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . for the initials of contributors, see ~abinit/doc/developers/contributors.txt .
INPUTS
mpi_enreg=informations about MPI parallelization nband=number of bands at this k point for that spin polarization npw=number of plane waves at this k point nspinor=number of plane waves at this k point usepaw=do we use the PAW method
OUTPUT
eig(nband)=array for holding eigenvalues (hartree)
SIDE EFFECTS
cg(2,*)=updated wavefunctions ghc(2,*)=updated ghc gsc(2,*)=updated gsc gvnlc(2,*)=updated gvnlc
PARENTS
chebfi
CHILDREN
NOTES
TODO choose generalized eigenproblem or ortho + diago (see #if 1)
SOURCE
52 subroutine rayleigh_ritz_subdiago(cg,ghc,gsc,gvnlc,eig,istwf_k,mpi_enreg,nband,npw,nspinor,usepaw) 53 use defs_basis 54 use defs_abitypes 55 use m_cgtools 56 use m_errors 57 use m_xmpi 58 use m_profiling_abi 59 use m_abi_linalg 60 61 !This section has been created automatically by the script Abilint (TD). 62 !Do not modify the following lines by hand. 63 #undef ABI_FUNC 64 #define ABI_FUNC 'rayleigh_ritz_subdiago' 65 use interfaces_14_hidewrite 66 use interfaces_18_timing 67 use interfaces_66_wfs, except_this_one => rayleigh_ritz_subdiago 68 !End of the abilint section 69 70 implicit none 71 72 ! Arguments 73 type(mpi_type),intent(inout) :: mpi_enreg 74 integer,intent(in) :: nband,npw,nspinor,usepaw,istwf_k 75 real(dp),intent(inout) :: cg(2,npw*nspinor*nband),gsc(2,npw*nspinor*nband),ghc(2,npw*nspinor*nband),gvnlc(2,npw*nspinor*nband) 76 real(dp),intent(out) :: eig(nband) 77 78 ! Locals 79 real(dp), allocatable :: subham(:), totham(:,:) 80 real(dp), allocatable :: subovl(:), totovl(:,:) 81 real(dp), allocatable :: evec(:,:), edummy(:,:) 82 integer :: ierr, cplx, vectsize 83 real(dp) :: gtempc(2,npw*nspinor*nband), tsec(2) 84 character :: blas_transpose 85 86 integer, parameter :: timer_chebfi = 1600, timer_alltoall = 1601, timer_apply_inv_ovl = 1602, timer_rotation = 1603 87 integer, parameter :: timer_subdiago = 1604, timer_subham = 1605, timer_ortho = 1606, timer_getghc = 1607 88 integer, parameter :: timer_residuals = 1608, timer_update_eigen = 1609, timer_sync = 1610 89 90 ! ************************************************************************* 91 92 if(istwf_k == 1) then 93 cplx = 2 94 vectsize = npw*nspinor 95 blas_transpose = 'c' 96 else 97 cplx = 1 98 vectsize = 2*npw*nspinor 99 blas_transpose = 't' 100 end if 101 102 #if 1 103 call timab(timer_subham, 1, tsec) 104 105 ! Transform cg, ghc and maybe gsc, according to istwf_k 106 if(istwf_k == 2) then 107 cg = cg * sqrt2 108 if(mpi_enreg%me_g0 == 1) cg(:, 1:npw*nspinor*nband:npw) = cg(:, 1:npw*nspinor*nband:npw) / sqrt2 109 ghc = ghc * sqrt2 110 if(mpi_enreg%me_g0 == 1) ghc(:, 1:npw*nspinor*nband:npw) = ghc(:, 1:npw*nspinor*nband:npw) / sqrt2 111 if(usepaw == 1) then 112 gsc = gsc * sqrt2 113 if(mpi_enreg%me_g0 == 1) gsc(:, 1:npw*nspinor*nband:npw) = gsc(:, 1:npw*nspinor*nband:npw) / sqrt2 114 end if 115 end if 116 117 ! Build, pack and sum suham 118 ABI_ALLOCATE(subham, (cplx*nband*(nband+1)/2)) 119 ABI_ALLOCATE(totham, (cplx, nband*nband)) 120 call abi_xgemm(blas_transpose,'n',nband,nband,vectsize,cone,ghc,vectsize,& 121 & cg,vectsize,czero,totham,nband, x_cplx=cplx) 122 call pack_matrix(totham, subham, nband, cplx) 123 ABI_DEALLOCATE(totham) 124 call xmpi_sum(subham,mpi_enreg%comm_bandspinorfft,ierr) 125 126 127 ! Same for subovl 128 ABI_ALLOCATE(subovl, (cplx*nband*(nband+1)/2)) 129 ABI_ALLOCATE(totovl, (cplx, nband*nband)) 130 if(usepaw == 1) then 131 call abi_xgemm(blas_transpose,'n',nband,nband,vectsize,cone,gsc,vectsize,& 132 & cg,vectsize,czero,totovl,nband, x_cplx=cplx) 133 else 134 call abi_xgemm(blas_transpose,'n',nband,nband,vectsize,cone,cg,vectsize,& 135 & cg,vectsize,czero,totovl,nband, x_cplx=cplx) 136 end if 137 call pack_matrix(totovl, subovl, nband, cplx) 138 ABI_DEALLOCATE(totovl) 139 call xmpi_sum(subovl,mpi_enreg%comm_bandspinorfft,ierr) 140 141 142 ! Transform back 143 if(istwf_k == 2) then 144 cg = cg / sqrt2 145 if(mpi_enreg%me_g0 == 1) cg(:, 1:npw*nspinor*nband:npw) = cg(:, 1:npw*nspinor*nband:npw) * sqrt2 146 ghc = ghc / sqrt2 147 if(mpi_enreg%me_g0 == 1) ghc(:, 1:npw*nspinor*nband:npw) = ghc(:, 1:npw*nspinor*nband:npw) * sqrt2 148 if(usepaw == 1) then 149 gsc = gsc / sqrt2 150 if(mpi_enreg%me_g0 == 1) gsc(:, 1:npw*nspinor*nband:npw) = gsc(:, 1:npw*nspinor*nband:npw) * sqrt2 151 end if 152 end if 153 call timab(timer_subham, 2, tsec) 154 155 156 call timab(timer_subdiago, 1, tsec) 157 ABI_ALLOCATE(evec, (cplx*nband, nband)) 158 159 call abi_xhpgv(1,'V','U',nband,subham,subovl,eig,evec,istwf_k=istwf_k,use_slk=mpi_enreg%paral_kgb) 160 161 ABI_DEALLOCATE(subham) 162 ABI_DEALLOCATE(subovl) 163 164 ! Fix the phase (this is because of the simultaneous diagonalisation of this 165 ! matrix by different processors, allowing to get different unitary transforms, thus breaking the 166 ! coherency of parts of cg stored on different processors). 167 ! call cg_normev(evec,nband,nband) ! Unfortunately, for cg_normev to work, one needs the vectors to be normalized, so uses fxphas_seq 168 ABI_ALLOCATE(edummy, (cplx*nband, nband)) 169 call fxphas_seq(evec,edummy,0,0,1,nband*nband,nband*nband,nband,nband,0) 170 ABI_DEALLOCATE(edummy) 171 172 173 174 ! Rotate 175 call abi_xgemm('n','n',vectsize,nband, nband,cone,cg , vectsize, evec, nband, czero, gtempc, vectsize, x_cplx=cplx) 176 cg = gtempc 177 call abi_xgemm('n','n',vectsize,nband, nband,cone,ghc, vectsize, evec, nband, czero, gtempc, vectsize, x_cplx=cplx) 178 ghc = gtempc 179 if(usepaw == 1) then 180 call abi_xgemm('n','n',vectsize,nband, nband,cone,gsc, vectsize, evec, nband, czero, gtempc, vectsize, x_cplx=cplx) 181 gsc = gtempc 182 else 183 call abi_xgemm('n','n',vectsize,nband, nband,cone,gvnlc, vectsize, evec, nband, czero, gtempc, vectsize, x_cplx=cplx) 184 gvnlc = gtempc 185 end if 186 ABI_DEALLOCATE(evec) 187 call timab(timer_subdiago, 2, tsec) 188 189 #else 190 !! TODO non-functional, should be rewritten. Possibly faster (tests needed) 191 write(message, *) 'Transposed, orthogonalizing' 192 call wrtout(std_out,message,'COLL') 193 194 ! orthonormalization 195 call timab(timer_ortho, 1, tsec) 196 if (paw) then 197 call abi_xorthonormalize(cg, gsc,nband, mpi_enreg%comm_bandspinorfft, sqgram, npw*nspinor, 2) 198 else 199 call abi_xorthonormalize(cg, cg, nband, mpi_enreg%comm_bandspinorfft, sqgram, npw*nspinor, 2) 200 end if 201 call timab(timer_ortho, 2, tsec) 202 203 ! rotate ghc, gsc and gvnlc 204 call timab(timer_rotation, 1, tsec) 205 call abi_xtrsm('r','u','n','n',npw*nspinor,nband,cone,sqgram,nband, ghc,npw*nspinor,x_cplx=2) 206 if(paw) then 207 call abi_xtrsm('r','u','n','n',npw*nspinor,nband,cone,sqgram,nband, gsc,npw*nspinor,x_cplx=2) 208 else 209 call abi_xtrsm('r','u','n','n',npw*nspinor,nband,cone,sqgram,nband, gvnlc,npw*nspinor,x_cplx=2) 210 end if 211 call timab(timer_rotation, 2, tsec) 212 213 write(message, *) 'Orthogonalized, building subham' 214 call wrtout(std_out,message,'COLL') 215 216 ! build hamiltonian in subspace 217 call timab(timer_subham, 1, tsec) 218 call abi_xgemm(blas_transpose,'n',nband,nband,npw*nspinor,cone,ghc,npw*nspinor,& 219 & cg,npw*nspinor,czero,totham,nband, x_cplx=2) 220 ! pack for compatibility with subdiago 221 call pack_matrix(totham, subham, nband) 222 call xmpi_sum(subham,mpi_enreg%comm_bandspinorfft,ierr) 223 call timab(timer_subham, 2, tsec) 224 225 write(message, *) 'Subham built, diagonalizing' 226 call wrtout(std_out,message,'COLL') 227 228 ! Rayleigh-Ritz 229 call timab(timer_subdiago,1,tsec) 230 call subdiago(cg,eig,evec,gsc,0,0,gs_hamk%istwf_k,& 231 & mcg,mcg,nband,npw,nspinor,dtset%paral_kgb,& 232 & subham,dummy,0,gs_hamk%usepaw,mpi_enreg%me_g0) 233 call timab(timer_subdiago,2,tsec) 234 235 write(message, *) 'Diagonalization done' 236 call wrtout(std_out,message,'COLL') 237 238 ! Rotate ghc and gvnlc according to evecs 239 call timab(timer_rotation, 1, tsec) 240 call abi_xgemm('n','n',npw*nspinor,nband, nband,cone,ghc, npw*nspinor, evec, nband, czero, gtempc, npw*nspinor, x_cplx=2) 241 ghc = gtempc 242 if(.not. paw) then 243 call abi_xgemm('n','n',npw*nspinor,nband, nband,cone,gvnlc, npw*nspinor, evec, nband, czero, gtempc, npw*nspinor, x_cplx=2) 244 gvnlc = gtempc 245 end if 246 call timab(timer_rotation, 2, tsec) 247 248 #endif 249 end subroutine rayleigh_ritz_subdiago