TABLE OF CONTENTS
- ABINIT/m_gemm_nonlop
- m_gemm_nonlop/destroy_gemm_nonlop
- m_gemm_nonlop/free_gemm_nonlop_ikpt
- m_gemm_nonlop/gemm_nonlop
- m_gemm_nonlop/gemm_nonlop_type
- m_gemm_nonlop/init_gemm_nonlop
- m_gemm_nonlop/make_gemm_nonlop
ABINIT/m_gemm_nonlop [ Modules ]
NAME
m_gemm_nonlop
FUNCTION
This module provides functions to compute the nonlocal operator by means of the BLAS GEMM routine. By treating ndat simultaneous wavefunctions, it is able to exploit BLAS3 routines, which leads to excellent CPU efficiency and OpenMP scalability.
COPYRIGHT
Copyright (C) 2014-2022 ABINIT group (AL) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
18 ! TODO list : 19 ! Don't allocate the full nkpt structures, only those that are treated by this proc: use same init as in m_bandfft_kpt 20 ! support more options (forces & stresses mostly) 21 ! Support RF/other computations (only GS right now) 22 ! handle the case where nloalg(2) < 0, ie no precomputation of ph3d 23 ! more systematic checking of the workflow (right now, only works if init/make/gemm/destroy, no multiple makes, etc) 24 ! Avoid allocating the complex matrix when istwfk > 1 25 ! Merge with chebfi's invovl 26 27 28 #if defined HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 32 #include "abi_common.h" 33 34 module m_gemm_nonlop 35 36 use defs_basis 37 use m_errors 38 use m_abicore 39 use m_xmpi 40 use m_abi_linalg 41 42 use defs_abitypes, only : MPI_type 43 use m_opernlc_ylm, only : opernlc_ylm 44 use m_pawcprj, only : pawcprj_type 45 use m_geometry, only : strconv 46 use m_kg, only : mkkpg 47 48 implicit none 49 50 private 51 52 ! Use these routines in order: first call init, then call make_gemm_nonlop for each k point, 53 ! then call gemm_nonlop to do the actual computation, and call destroy when done. See gstate and vtorho. 54 public :: init_gemm_nonlop 55 public :: destroy_gemm_nonlop 56 public :: make_gemm_nonlop 57 public :: gemm_nonlop
m_gemm_nonlop/destroy_gemm_nonlop [ Functions ]
[ Top ] [ m_gemm_nonlop ] [ Functions ]
NAME
destroy_gemm_nonlop
FUNCTION
Destruction of the gemm_nonlop_kpt array
INPUTS
nkpt= number of k-points
SOURCE
146 subroutine destroy_gemm_nonlop(nkpt) 147 148 integer,intent(in) :: nkpt 149 integer :: ikpt 150 151 ! ************************************************************************* 152 153 ! TODO add cycling if kpt parallelism 154 do ikpt = 1,nkpt 155 call free_gemm_nonlop_ikpt(ikpt) 156 end do 157 158 ABI_FREE(gemm_nonlop_kpt) 159 160 end subroutine destroy_gemm_nonlop
m_gemm_nonlop/free_gemm_nonlop_ikpt [ Functions ]
[ Top ] [ m_gemm_nonlop ] [ Functions ]
NAME
free_destroy_gemm_nonlop_ikpt
FUNCTION
Release memory for one kpt value of the gemm_nonlop_kpt array
INPUTS
ikpt= index of gemm_nonlop_kptto be released
SOURCE
176 subroutine free_gemm_nonlop_ikpt(ikpt) 177 178 integer,intent(in) :: ikpt 179 180 ! ************************************************************************* 181 182 if(gemm_nonlop_kpt(ikpt)%nprojs /= -1) then 183 if (allocated(gemm_nonlop_kpt(ikpt)%projs)) then 184 ABI_FREE(gemm_nonlop_kpt(ikpt)%projs) 185 end if 186 if (allocated(gemm_nonlop_kpt(ikpt)%projs_r)) then 187 ABI_FREE(gemm_nonlop_kpt(ikpt)%projs_r) 188 end if 189 if (allocated(gemm_nonlop_kpt(ikpt)%projs_i)) then 190 ABI_FREE(gemm_nonlop_kpt(ikpt)%projs_i) 191 end if 192 gemm_nonlop_kpt(ikpt)%nprojs = -1 193 if(gemm_nonlop_kpt(ikpt)%ngrads /= -1) then 194 if (allocated(gemm_nonlop_kpt(ikpt)%dprojs)) then 195 ABI_FREE(gemm_nonlop_kpt(ikpt)%dprojs) 196 end if 197 if (allocated(gemm_nonlop_kpt(ikpt)%dprojs_r)) then 198 ABI_FREE(gemm_nonlop_kpt(ikpt)%dprojs_r) 199 end if 200 if (allocated(gemm_nonlop_kpt(ikpt)%dprojs_i)) then 201 ABI_FREE(gemm_nonlop_kpt(ikpt)%dprojs_i) 202 end if 203 gemm_nonlop_kpt(ikpt)%ngrads = -1 204 end if 205 end if 206 207 end subroutine free_gemm_nonlop_ikpt
m_gemm_nonlop/gemm_nonlop [ Functions ]
[ Top ] [ m_gemm_nonlop ] [ Functions ]
NAME
gemm_nonlop
FUNCTION
Replacement of nonlop. same prototype as nonlop although not all options are implemented.
INPUTS
SOURCE
480 subroutine gemm_nonlop(atindx1,choice,cpopt,cprjin,dimenl1,dimenl2,dimekbq,dimffnlin,dimffnlout,& 481 & enl,enlout,ffnlin,ffnlout,gmet,gprimd,idir,indlmn,istwf_k,& 482 & kgin,kgout,kpgin,kpgout,kptin,kptout,lambda,lmnmax,matblk,mgfft,& 483 & mpi_enreg,mpsang,mpssoang,natom,nattyp,ndat,ngfft,nkpgin,nkpgout,nloalg,& 484 & nnlout,npwin,npwout,nspinor,nspinortot,ntypat,only_SO,paw_opt,phkxredin,& 485 & phkxredout,ph1d,ph3din,ph3dout,signs,sij,svectout,& 486 & tim_nonlop,ucvol,useylm,vectin,vectout,& 487 & use_gpu_cuda) 488 489 !Arguments ------------------------------------ 490 !scalars 491 integer,intent(in) :: choice,cpopt,dimenl1,dimenl2,dimekbq,dimffnlin,dimffnlout,idir 492 integer,intent(in) :: istwf_k,lmnmax,matblk,mgfft,mpsang,mpssoang,natom,ndat,nkpgin 493 integer,intent(in) :: nkpgout,nnlout,npwin,npwout,nspinor,nspinortot,ntypat,only_SO 494 integer,intent(in) :: paw_opt,signs,tim_nonlop,useylm 495 integer,optional,intent(in) :: use_gpu_cuda 496 real(dp),intent(in) :: lambda(ndat),ucvol 497 type(MPI_type),intent(in) :: mpi_enreg 498 !arrays 499 integer,intent(in) :: atindx1(natom),indlmn(6,lmnmax,ntypat),kgin(3,npwin) 500 integer,intent(in) :: kgout(3,npwout),nattyp(ntypat),ngfft(18),nloalg(3) 501 real(dp),intent(in) :: enl(dimenl1,dimenl2,nspinortot**2,dimekbq) 502 real(dp),intent(in) :: ffnlin(npwin,dimffnlin,lmnmax,ntypat) 503 real(dp),intent(in) :: ffnlout(npwout,dimffnlout,lmnmax,ntypat),gmet(3,3) 504 real(dp),intent(in) :: gprimd(3,3),kpgin(npwin,nkpgin*useylm) 505 real(dp),intent(in) :: kpgout(npwout,nkpgout*useylm),kptin(3),kptout(3) 506 real(dp),intent(in) :: phkxredin(2,natom),phkxredout(2,natom),ph1d(2,*) 507 real(dp),intent(in) :: sij(dimenl1,ntypat*((paw_opt+1)/3)) 508 real(dp),intent(inout) :: ph3din(2,npwin,matblk),ph3dout(2,npwout,matblk) 509 real(dp),intent(inout) :: vectin(2,npwin*nspinor*ndat) 510 real(dp),intent(inout) :: enlout(nnlout*ndat) 511 real(dp),intent(out) :: svectout(2,npwout*nspinor*(paw_opt/3)*ndat) 512 real(dp),intent(inout) :: vectout(2,npwout*nspinor*ndat) !vz_i 513 type(pawcprj_type),intent(inout) :: cprjin(natom,nspinor*((cpopt+5)/5)*ndat) 514 515 ! locals 516 integer :: ii, ia, idat, igrad, nprojs, ngrads, shift, iatom, nlmn, ierr, ibeg, iend 517 integer :: cplex, cplex_enl, cplex_fac, proj_shift, grad_shift 518 integer :: enlout_shift, force_shift, nnlout_test 519 integer :: iatm, ndgxdt, ndgxdtfac, nd2gxdt, nd2gxdtfac, optder, itypat, ilmn 520 integer :: cplex_dgxdt(1), cplex_d2gxdt(1) 521 real(dp) :: esum 522 real(dp) :: work(6) 523 real(dp) :: dgxdt_dum_in(1,1,1,1,1), dgxdt_dum_out(1,1,1,1,1),dgxdt_dum_out2(1,1,1,1,1) 524 real(dp) :: d2gxdt_dum_in(1,1,1,1,1), d2gxdt_dum_out(1,1,1,1,1),d2gxdt_dum_out2(1,1,1,1,1) 525 real(dp), allocatable :: enlk(:),sij_typ(:) 526 real(dp), allocatable :: projections(:,:,:), s_projections(:,:,:), vnl_projections(:,:,:) 527 real(dp), allocatable :: dprojections(:,:,:), temp_realvec(:) 528 529 ! ************************************************************************* 530 531 ! We keep the same interface as nonlop, but we don't use many of those 532 ABI_UNUSED((/ffnlin,ffnlout,gmet,kpgin,kpgout/)) 533 ABI_UNUSED((/ph1d(1,1),ph3din,ph3dout/)) 534 ABI_UNUSED((/phkxredin,phkxredout,ucvol/)) 535 ABI_UNUSED((/mgfft,mpsang,mpssoang/)) 536 ABI_UNUSED((/kptin,kptout/)) 537 ABI_UNUSED((/idir,nloalg,ngfft,kgin,kgout,ngfft,only_SO,tim_nonlop,use_gpu_cuda/)) 538 539 ! Check supported options 540 if (.not.gemm_nonlop_use_gemm) then 541 ABI_BUG('computation not prepared for gemm_nonlop use!') 542 end if 543 if ( (choice>1.and.choice/=7.and.signs==2) .or. & 544 & (choice>3.and.choice/=7.and.choice/=23.and.signs==1) .or. & 545 & (useylm/=1) ) then 546 ABI_BUG('gemm_nonlop option not supported!') 547 end if 548 if (signs==1) then 549 nnlout_test=0 550 if (choice==1) nnlout_test=1 551 if (choice==2) nnlout_test=3*natom 552 if (choice==3) nnlout_test=6 553 if (choice==23) nnlout_test=6+3*natom 554 if (nnlout<nnlout_test) then 555 ABI_BUG('wrong nnlout size!') 556 end if 557 end if 558 559 cplex=2;if (istwf_k>1) cplex=1 560 cplex_enl=1;if (paw_opt>0) cplex_enl=2*dimenl1/(lmnmax*(lmnmax+1)) ! is enl complex? 561 cplex_fac=max(cplex,dimekbq) 562 if ((nspinortot==2.or.cplex_enl==2).and.paw_opt>0.and.choice/=7) cplex_fac=2 ! is vnl_projections complex? 563 564 nprojs = gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%nprojs 565 ngrads = gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%ngrads 566 if(choice==1) ngrads=0 567 ABI_CHECK(ngrads>=3.or.choice/=2 ,"Bad allocation in gemm_nonlop (2)!") 568 ABI_CHECK(ngrads>=6.or.choice/=3 ,"Bad allocation in gemm_nonlop (3)!") 569 ABI_CHECK(ngrads>=9.or.choice/=23,"Bad allocation in gemm_nonlop (23)!") 570 571 ! These will store the non-local factors for vectin, svectout and vectout respectively 572 ABI_MALLOC(projections,(cplex, nprojs,nspinor*ndat)) 573 ABI_MALLOC(s_projections,(cplex, nprojs,nspinor*ndat)) 574 ABI_MALLOC(vnl_projections,(cplex_fac, nprojs,nspinor*ndat)) 575 projections = zero 576 s_projections = zero 577 vnl_projections = zero 578 if (signs==1.and.ngrads>0) then 579 ABI_MALLOC(dprojections,(cplex, ngrads*nprojs,nspinor*ndat)) 580 dprojections = zero 581 end if 582 583 if(nprojs == 0) then 584 ! TODO check if this is correct 585 if(signs == 1) then 586 enlout=zero 587 return 588 end if 589 if(signs == 2) then 590 vectout = zero 591 if(paw_opt>0) svectout = vectin 592 return 593 end if 594 end if 595 596 if(cpopt >= 2) then 597 ! retrieve from cprjin 598 do idat=1, ndat*nspinor 599 shift = 0 600 do iatom = 1, natom 601 nlmn = cprjin(iatom, idat)%nlmn 602 projections(1:cplex, shift+1:shift+nlmn, idat) = cprjin(iatom, idat)%cp(1:cplex, 1:nlmn) 603 shift = shift + nlmn 604 end do 605 end do 606 if(cpopt==4.and.allocated(dprojections)) then 607 ABI_CHECK(cprjin(1,1)%ncpgr>=ngrads,"cprjin%ncpgr not correct! (1)") 608 do idat=1, ndat*nspinor 609 shift = 0 610 do iatom = 1, natom 611 nlmn = cprjin(iatom, idat)%nlmn 612 do igrad=1,ngrads 613 dprojections(1:cplex, shift+1:shift+nlmn, idat) = & 614 & cprjin(iatom, idat)%dcp(1:cplex,igrad,1:ilmn) 615 shift = shift + nlmn 616 end do 617 end do 618 end do 619 end if 620 else 621 ! opernla 622 if(cplex == 2) then 623 call abi_zgemm_2r('C', 'N', nprojs, ndat*nspinor, npwin, cone, & 624 & gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs, npwin,& 625 & vectin, npwin, czero, projections, nprojs) 626 if(signs==1.and.ngrads>0) then 627 call abi_zgemm_2r('C', 'N', ngrads*nprojs, ndat*nspinor, npwin, cone, & 628 gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%dprojs, npwin,& 629 vectin, npwin, czero, dprojections, ngrads*nprojs) 630 end if 631 else 632 ABI_MALLOC(temp_realvec,(MAX(npwout,npwin)*nspinor*ndat)) 633 ! only compute real part of projections = P^* psi => projections_r = P_r^T psi_r + P_i^T psi_i 634 temp_realvec(1:npwin*nspinor*ndat) = vectin(1,1:npwin*nspinor*ndat) 635 if(istwf_k == 2 .and. mpi_enreg%me_g0 == 1) then 636 do idat=1, ndat*nspinor 637 temp_realvec(1+(idat-1)*npwin) = temp_realvec(1+(idat-1)*npwin)/2 638 end do 639 end if 640 call DGEMM('T', 'N', nprojs, ndat*nspinor, npwin, one, & 641 & gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs_r, npwin, & 642 & temp_realvec, npwin, zero, projections, nprojs) 643 if(signs==1.and.ngrads>0) then 644 call DGEMM('T', 'N', ngrads*nprojs, ndat*nspinor, npwin, one, & 645 & gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%dprojs_r, npwin, & 646 & temp_realvec, npwin, zero, dprojections, ngrads*nprojs) 647 end if 648 temp_realvec(1:npwin*nspinor*ndat) = vectin(2,1:npwin*nspinor*ndat) 649 if(istwf_k == 2 .and. mpi_enreg%me_g0 == 1) then 650 do idat=1, ndat*nspinor 651 temp_realvec(1+(idat-1)*npwin) = zero 652 end do 653 end if 654 call DGEMM('T', 'N', nprojs, ndat*nspinor, npwin, one, & 655 & gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs_i, npwin, & 656 & temp_realvec, npwin, one , projections, nprojs) 657 projections = projections * 2 658 if(signs==1.and.ngrads>0) then 659 call DGEMM('T', 'N', ngrads*nprojs, ndat*nspinor, npwin, one, & 660 & gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%dprojs_i, npwin, & 661 & temp_realvec, npwin, one , dprojections, ngrads*nprojs) 662 dprojections = dprojections * 2 663 end if 664 ABI_FREE(temp_realvec) 665 end if 666 call xmpi_sum(projections,mpi_enreg%comm_fft,ierr) 667 if (choice>1) then 668 call xmpi_sum(dprojections,mpi_enreg%comm_fft,ierr) 669 end if 670 671 if(cpopt >= 0) then 672 ! store in cprjin 673 do idat=1, ndat*nspinor 674 shift = 0 675 do iatom = 1, natom 676 nlmn = cprjin(iatom, idat)%nlmn 677 cprjin(iatom, idat)%cp(1:cplex, 1:nlmn) = projections(1:cplex, shift+1:shift+nlmn, idat) 678 shift = shift + nlmn 679 end do 680 end do 681 if(cpopt==3) then 682 ABI_CHECK(cprjin(1,1)%ncpgr>=ngrads,"cprjin%ncpgr not correct! (2)") 683 shift = 0 684 do iatom = 1, natom 685 nlmn = cprjin(iatom, idat)%nlmn 686 do igrad=1,ngrads 687 cprjin(iatom, idat)%dcp(1:cplex,igrad,1:nlmn) = & 688 & dprojections(1:cplex, shift+1:shift+nlmn, idat) 689 shift = shift + nlmn 690 end do 691 end do 692 end if 693 end if 694 end if 695 696 if(choice > 0) then 697 698 if(choice /= 7) then 699 ! opernlc 700 iatm = 0 701 ndgxdt = 0 702 ndgxdtfac = 0 703 nd2gxdt = 0 704 nd2gxdtfac = 0 705 optder = 0 706 707 shift = 0 708 ABI_MALLOC(sij_typ,(((paw_opt+1)/3)*lmnmax*(lmnmax+1)/2)) 709 do itypat=1, ntypat 710 nlmn=count(indlmn(3,:,itypat)>0) 711 if (paw_opt>=2) then 712 if (cplex_enl==1) then 713 do ilmn=1,nlmn*(nlmn+1)/2 714 sij_typ(ilmn)=sij(ilmn,itypat) 715 end do 716 else 717 do ilmn=1,nlmn*(nlmn+1)/2 718 sij_typ(ilmn)=sij(2*ilmn-1,itypat) 719 end do 720 end if 721 end if 722 723 ibeg = shift+1 724 iend = shift+nattyp(itypat)*nlmn 725 726 do idat = 1,ndat 727 call opernlc_ylm(atindx1,cplex,cplex_dgxdt,cplex_d2gxdt,cplex_enl,cplex_fac,dgxdt_dum_in,dgxdt_dum_out,dgxdt_dum_out2,& 728 & d2gxdt_dum_in,d2gxdt_dum_out,d2gxdt_dum_out2,dimenl1,dimenl2,dimekbq,enl,& 729 & projections(:, ibeg:iend, 1+nspinor*(idat-1):nspinor*idat),& 730 & vnl_projections(:, ibeg:iend,1+nspinor*(idat-1):nspinor*idat),& 731 & s_projections(:, ibeg:iend,1+nspinor*(idat-1):nspinor*idat),& 732 & iatm,indlmn(:,:,itypat),itypat,lambda(idat),mpi_enreg,natom,ndgxdt,ndgxdtfac,nd2gxdt,nd2gxdtfac,& 733 & nattyp(itypat),nlmn,nspinor,nspinortot,optder,paw_opt,sij_typ) 734 end do 735 736 shift = shift + nattyp(itypat)*nlmn 737 iatm = iatm+nattyp(itypat) 738 end do 739 ABI_FREE(sij_typ) 740 else 741 s_projections = projections 742 end if 743 744 ! opernlb (only choice=1) 745 if(signs==2) then 746 if(paw_opt == 3 .or. paw_opt == 4) then 747 ! Get svectout from s_projections 748 if(cplex == 2) then 749 call abi_zgemm_2r('N', 'N', npwout, ndat*nspinor, nprojs, cone, & 750 & gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs, npwout, & 751 & s_projections, nprojs, czero, svectout, npwout) 752 else 753 ABI_MALLOC(temp_realvec,(MAX(npwout,npwin)*nspinor*ndat)) 754 call DGEMM('N', 'N', npwout, ndat*nspinor, nprojs, one, & 755 & gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs_r, npwout, & 756 & s_projections, nprojs, zero, temp_realvec, npwout) 757 svectout(1,1:npwout*nspinor*ndat) = temp_realvec(1:npwout*nspinor*ndat) 758 call DGEMM('N', 'N', npwout, ndat*nspinor, nprojs, one, & 759 & gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs_i, npwout,& 760 & s_projections, nprojs, zero, temp_realvec, npwout) 761 svectout(2,1:npwout*nspinor*ndat) = temp_realvec(1:npwout*nspinor*ndat) 762 ABI_FREE(temp_realvec) 763 end if 764 if(choice /= 7) svectout = svectout + vectin ! TODO understand this 765 end if 766 if(paw_opt == 0 .or. paw_opt == 1 .or. paw_opt == 4) then 767 ! Get vectout from vnl_projections 768 if(cplex_fac == 2) then 769 call abi_zgemm_2r('N', 'N', npwout, ndat*nspinor, nprojs, cone, & 770 & gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs, npwout, & 771 & vnl_projections, nprojs, czero, vectout, npwout) 772 else 773 ABI_MALLOC(temp_realvec,(MAX(npwout,npwin)*nspinor*ndat)) 774 call DGEMM('N', 'N', npwout, ndat*nspinor, nprojs, one, & 775 & gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs_r, npwout, & 776 & vnl_projections, nprojs, zero, temp_realvec, npwout) 777 vectout(1,1:npwout*nspinor*ndat) = temp_realvec(1:npwout*nspinor*ndat) 778 call DGEMM('N', 'N', npwout, ndat*nspinor, nprojs, one, & 779 & gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs_i, npwout, & 780 & vnl_projections, nprojs, zero, temp_realvec, npwout) 781 vectout(2,1:npwout*nspinor*ndat) = temp_realvec(1:npwout*nspinor*ndat) 782 ABI_FREE(temp_realvec) 783 end if 784 end if 785 end if ! opernlb 786 787 ! opernld 788 if(signs==1) then 789 enlout=zero 790 if(choice==1.or.choice==3.or.choice==23) then 791 ABI_MALLOC(enlk,(ndat)) 792 enlk=zero 793 do idat=1,ndat*nspinor 794 proj_shift=0 795 do itypat=1, ntypat 796 nlmn=count(indlmn(3,:,itypat)>0) 797 do ia=1,nattyp(itypat) 798 !Following loops are a [D][Z]DOT 799 esum=zero 800 do ilmn=1,nlmn 801 do ii=1,cplex 802 esum=esum +vnl_projections(ii,proj_shift+ilmn,idat) & 803 & *projections (ii,proj_shift+ilmn,idat) 804 end do 805 end do 806 proj_shift=proj_shift+nlmn 807 enlk(idat) = enlk(idat) + esum 808 end do 809 end do 810 end do 811 if (choice==1) enlout(1:ndat)=enlk(1:ndat) 812 end if ! choice=1/3/23 813 if(choice==2.or.choice==3.or.choice==23) then 814 do idat=1,ndat*nspinor 815 proj_shift=0 ; grad_shift=0 816 enlout_shift=(idat-1)*nnlout 817 force_shift=merge(6,0,choice==23) 818 do itypat=1, ntypat 819 nlmn=count(indlmn(3,:,itypat)>0) 820 do ia=1,nattyp(itypat) 821 if (choice==3.or.choice==23) then 822 do igrad=1,6 823 !Following loops are a [D][Z]DOT 824 esum=zero 825 do ilmn=1,nlmn 826 do ii=1,cplex 827 esum=esum +vnl_projections(ii,proj_shift+ilmn,idat) & 828 & *dprojections (ii,grad_shift+ilmn,idat) 829 end do 830 end do 831 grad_shift=grad_shift+nlmn 832 enlout(enlout_shift+igrad)=enlout(enlout_shift+igrad) + two*esum 833 end do 834 end if 835 if (choice==2.or.choice==23) then 836 do igrad=1,3 837 !Following loops are a [D][Z]DOT 838 esum=zero 839 do ilmn=1,nlmn 840 do ii=1,cplex 841 esum=esum +vnl_projections(ii,proj_shift+ilmn,idat) & 842 & *dprojections (ii,grad_shift+ilmn,idat) 843 end do 844 end do 845 grad_shift=grad_shift+nlmn 846 enlout(enlout_shift+force_shift+igrad)= & 847 & enlout(enlout_shift+force_shift+igrad) + two*esum 848 end do 849 force_shift=force_shift+3 850 end if 851 proj_shift=proj_shift+nlmn 852 end do 853 end do 854 end do 855 end if ! choice=2, 3 or 23 856 857 end if !opernld 858 859 end if ! choice>0 860 861 ! Reduction in case of parallelism 862 if (signs==1.and.mpi_enreg%paral_spinor==1) then 863 if (size(enlout)>0) then 864 call xmpi_sum(enlout,mpi_enreg%comm_spinor,ierr) 865 end if 866 if (choice==3.or.choice==23) then 867 call xmpi_sum(enlk,mpi_enreg%comm_spinor,ierr) 868 end if 869 end if 870 871 ! Derivatives wrt strain 872 ! - Convert from reduced to cartesian coordinates 873 ! - Substract volume contribution 874 if ((choice==3.or.choice==23).and.signs==1.and.paw_opt<=3) then 875 do idat=1,ndat 876 enlout_shift=(idat-1)*nnlout 877 call strconv(enlout(enlout_shift+1:enlout_shift+6),gprimd,work) 878 enlout(enlout_shift+1:enlout_shift+3)=(work(1:3)-enlk(idat)) 879 enlout(enlout_shift+4:enlout_shift+6)= work(4:6) 880 end do 881 end if 882 883 ! Release memory 884 ABI_FREE(projections) 885 ABI_FREE(s_projections) 886 ABI_FREE(vnl_projections) 887 if (allocated(dprojections)) then 888 ABI_FREE(dprojections) 889 end if 890 if (allocated(enlk)) then 891 ABI_FREE(enlk) 892 end if 893 894 end subroutine gemm_nonlop 895 !*** 896 897 !---------------------------------------------------------------------- 898 899 end module m_gemm_nonlop
m_gemm_nonlop/gemm_nonlop_type [ Types ]
[ Top ] [ m_gemm_nonlop ] [ Types ]
NAME
gemm_nonlop_type
FUNCTION
Contains information needed to apply the nonlocal operator
SOURCE
70 type,public :: gemm_nonlop_type 71 72 integer :: nprojs 73 integer :: ngrads 74 75 real(dp), allocatable :: projs(:, :, :) 76 ! (2, npw, nprojs) 77 real(dp), allocatable :: projs_r(:, :, :) 78 ! (1, npw, nprojs) 79 real(dp), allocatable :: projs_i(:, :, :) 80 ! (1, npw, nprojs) 81 82 real(dp), allocatable :: dprojs(:, :, :) 83 ! (2, npw, nprojs*ngrads) 84 real(dp), allocatable :: dprojs_r(:, :, :) 85 ! (1, npw, nprojs*ngrads) 86 real(dp), allocatable :: dprojs_i(:, :, :) 87 ! (1, npw, nprojs*ngrads) 88 89 end type gemm_nonlop_type
m_gemm_nonlop/init_gemm_nonlop [ Functions ]
[ Top ] [ m_gemm_nonlop ] [ Functions ]
NAME
init_gemm_nonlop
FUNCTION
Initalization of the gemm_nonlop_kpt array
INPUTS
nkpt= number of k-points
SOURCE
116 subroutine init_gemm_nonlop(nkpt) 117 118 integer,intent(in) :: nkpt 119 integer :: ikpt 120 121 ! ************************************************************************* 122 123 ! TODO only allocate the number of kpt treated by this proc 124 ABI_MALLOC(gemm_nonlop_kpt, (nkpt)) 125 do ikpt=1,nkpt 126 gemm_nonlop_kpt(ikpt)%nprojs = -1 127 gemm_nonlop_kpt(ikpt)%ngrads = -1 128 end do 129 130 end subroutine init_gemm_nonlop
m_gemm_nonlop/make_gemm_nonlop [ Functions ]
[ Top ] [ m_gemm_nonlop ] [ Functions ]
NAME
make_gemm_nonlop
FUNCTION
Build the gemm_nonlop array
INPUTS
SOURCE
223 subroutine make_gemm_nonlop(ikpt,npw,lmnmax,ntypat,indlmn,nattyp,istwf_k,ucvol,ffnl_k, & 224 & ph3d_k,kpt_k,kg_k,kpg_k, & 225 & compute_grad_strain,compute_grad_atom) ! Optional parameters 226 227 integer, intent(in) :: ikpt 228 integer, intent(in) :: npw, lmnmax,ntypat 229 integer, intent(in) :: indlmn(:,:,:), kg_k(:,:) 230 integer, intent(in) :: nattyp(ntypat) 231 integer, intent(in) :: istwf_k 232 logical, intent(in), optional :: compute_grad_strain,compute_grad_atom 233 real(dp), intent(in) :: ucvol 234 real(dp), intent(in) :: ffnl_k(:,:,:,:) 235 real(dp), intent(in) :: ph3d_k(:,:,:) 236 real(dp), intent(in) :: kpt_k(:) 237 real(dp), intent(in), target :: kpg_k(:,:) 238 239 integer :: nprojs,ndprojs,ngrads 240 241 integer,parameter :: alpha(6)=(/1,2,3,3,3,2/),beta(6)=(/1,2,3,2,1,1/) 242 integer :: itypat, ilmn, nlmn, ia, iaph3d, igrad, shift, shift_grad 243 integer :: il, ipw, idir, idir1, idir2, nkpg_local 244 logical :: parity,my_compute_grad_strain,my_compute_grad_atom 245 real(dp),allocatable :: atom_projs(:,:,:), atom_dprojs(:,:,:,:), temp(:) 246 real(dp),pointer :: kpg(:,:) 247 248 ! ************************************************************************* 249 250 my_compute_grad_strain=.false. ; if (present(compute_grad_strain)) my_compute_grad_strain=compute_grad_strain 251 my_compute_grad_atom=.false. ; if (present(compute_grad_atom)) my_compute_grad_atom=compute_grad_atom 252 ABI_CHECK(size(ph3d_k)>0,'nloalg(2)<0 not compatible with use_gemm_nonlop=1!') 253 ! ABI_CHECK((.not.my_compute_grad_strain).or.size(kpg_k)>0,"kpg_k should be allocated to compute gradients!") 254 ! ABI_CHECK((.not.my_compute_grad_atom).or.size(kpg_k)>0,"kpg_k should be allocated to compute gradients!") 255 256 iaph3d = 1 257 258 ABI_MALLOC(atom_projs, (2, npw, lmnmax)) 259 if (my_compute_grad_strain) then 260 ndprojs = 3 261 ABI_MALLOC(atom_dprojs, (2, npw, ndprojs, lmnmax)) 262 else 263 ndprojs = 0 264 end if 265 266 ABI_MALLOC(temp, (npw)) 267 268 call free_gemm_nonlop_ikpt(ikpt) 269 270 ! build nprojs, ngrads 271 nprojs = 0 ; ngrads = 0 272 do itypat=1,ntypat 273 nprojs = nprojs + count(indlmn(3,:,itypat)>0)*nattyp(itypat) 274 end do 275 if (my_compute_grad_strain) ngrads=ngrads+6 276 if (my_compute_grad_atom) ngrads=ngrads+3 277 if (nprojs>0) gemm_nonlop_kpt(ikpt)%nprojs = nprojs 278 if (ngrads>0) gemm_nonlop_kpt(ikpt)%ngrads = ngrads 279 280 if(istwf_k <= 1) then 281 ABI_MALLOC(gemm_nonlop_kpt(ikpt)%projs, (2, npw, nprojs)) 282 gemm_nonlop_kpt(ikpt)%projs = zero 283 if(ngrads>0) then 284 ABI_MALLOC(gemm_nonlop_kpt(ikpt)%dprojs, (2, npw, nprojs*ngrads)) 285 gemm_nonlop_kpt(ikpt)%dprojs = zero 286 end if 287 else 288 ABI_MALLOC(gemm_nonlop_kpt(ikpt)%projs_r, (1, npw, nprojs)) 289 ABI_MALLOC(gemm_nonlop_kpt(ikpt)%projs_i, (1, npw, nprojs)) 290 gemm_nonlop_kpt(ikpt)%projs_r = zero 291 gemm_nonlop_kpt(ikpt)%projs_i = zero 292 if(ngrads>0) then 293 ABI_MALLOC(gemm_nonlop_kpt(ikpt)%dprojs_r, (1, npw, nprojs*ngrads)) 294 ABI_MALLOC(gemm_nonlop_kpt(ikpt)%dprojs_i, (1, npw, nprojs*ngrads)) 295 gemm_nonlop_kpt(ikpt)%dprojs_r = zero 296 gemm_nonlop_kpt(ikpt)%dprojs_i = zero 297 end if 298 end if 299 300 ! Compute (k+G) vectors if needed 301 nkpg_local=0 302 if ((my_compute_grad_strain.or.my_compute_grad_atom).and.size(kpg_k)==0) then 303 nkpg_local=3 304 ABI_MALLOC(kpg,(npw,nkpg_local)) 305 call mkkpg(kg_k,kpg,kpt_k,nkpg_local,npw) 306 else 307 kpg => kpg_k 308 end if 309 310 shift = 0 ; shift_grad = 0 311 do itypat = 1, ntypat 312 nlmn = count(indlmn(3,:,itypat)>0) 313 314 do ia = 1, nattyp(itypat) 315 316 !! build atom_projs, from opernlb 317 !! P = 4pi/sqrt(ucvol)* conj(diag(ph3d)) * ffnl * diag(parity), with parity = (-i)^l 318 319 ! start from 4pi/sqrt(ucvol)*ffnl 320 ! atom_projs(1, :, 1:nlmn) = four_pi/sqrt(ham%ucvol) * ham%ffnl_k(:, 1, 1:nlmn) 321 ! TODO vectorize (DCOPY with stride) 322 atom_projs(:,:,:) = zero 323 do ipw=1, npw 324 atom_projs(1,ipw, 1:nlmn) = four_pi/sqrt(ucvol) * ffnl_k(ipw, 1, 1:nlmn, itypat) 325 end do 326 if (ndprojs>0) atom_dprojs(:,:,:,:) = zero 327 if (my_compute_grad_strain) then 328 do ipw=1, npw 329 atom_dprojs(1,ipw, 1:3, 1:nlmn) = four_pi/sqrt(ucvol) * ffnl_k(ipw, 2:4, 1:nlmn, itypat) 330 end do 331 end if 332 333 ! multiply by (-i)^l 334 do ilmn=1,nlmn 335 il=mod(indlmn(1,ilmn, itypat),4); 336 parity=(mod(il,2)==0) 337 if (il>1) then 338 ! multiply by -1 339 atom_projs(:,:,ilmn) = -atom_projs(:,:,ilmn) 340 if (ndprojs>0) atom_dprojs(:,:,:,ilmn) = -atom_dprojs(:,:,:,ilmn) 341 end if 342 if(.not. parity) then 343 ! multiply by -i 344 temp = atom_projs(2,:,ilmn) 345 atom_projs(2,:,ilmn) = -atom_projs(1,:,ilmn) 346 atom_projs(1,:,ilmn) = temp 347 if (ndprojs>0) then 348 do idir=1,ndprojs 349 temp = atom_dprojs(2,:,idir,ilmn) 350 atom_dprojs(2,:,idir,ilmn) = -atom_dprojs(1,:,idir,ilmn) 351 atom_dprojs(1,:,idir,ilmn) = temp 352 end do 353 end if 354 end if 355 end do 356 357 ! multiply by conj(ph3d) 358 do ilmn=1,nlmn 359 temp = atom_projs(1, :, ilmn) 360 atom_projs(1, :, ilmn) = atom_projs(1, :, ilmn) * ph3d_k(1, :, iaph3d) & 361 & + atom_projs(2, :, ilmn) * ph3d_k(2, :, iaph3d) 362 atom_projs(2, :, ilmn) = atom_projs(2, :, ilmn) * ph3d_k(1, :, iaph3d) & 363 & - temp * ph3d_k(2, :, iaph3d) 364 end do 365 if (ndprojs>0) then 366 do ilmn=1,nlmn 367 do idir=1,ndprojs 368 temp = atom_dprojs(1, :, idir,ilmn) 369 atom_dprojs(1, :, idir,ilmn) = atom_dprojs(1, :, idir,ilmn) * ph3d_k(1, :, iaph3d) & 370 & + atom_dprojs(2, :, idir,ilmn) * ph3d_k(2, :, iaph3d) 371 atom_dprojs(2, :, idir,ilmn) = atom_dprojs(2, :, idir,ilmn) * ph3d_k(1, :, iaph3d) & 372 & - temp * ph3d_k(2, :, iaph3d) 373 end do 374 end do 375 end if 376 377 !! atom_projs is built, copy to projs / dprojs 378 379 if(istwf_k <= 1) then 380 gemm_nonlop_kpt(ikpt)%projs(1:2, :, shift+1:shift+nlmn) = atom_projs(:, :, 1:nlmn) 381 if(ngrads>0) then 382 igrad=0 383 if(my_compute_grad_strain) then 384 do idir=1,6 385 idir1=alpha(idir);idir2=beta(idir) 386 do ilmn=1,nlmn 387 do ipw=1,npw 388 gemm_nonlop_kpt(ikpt)%dprojs(1:2, ipw, shift_grad+nlmn*igrad+ilmn) = & 389 & -half*(atom_dprojs(1:2, ipw, idir1, ilmn)*kpg(ipw,idir2) & 390 & +atom_dprojs(1:2, ipw, idir2, ilmn)*kpg(ipw,idir1)) 391 end do 392 end do 393 igrad=igrad+1 394 end do 395 end if 396 if(my_compute_grad_atom) then 397 do idir=1,3 398 do ilmn=1,nlmn 399 do ipw=1,npw 400 gemm_nonlop_kpt(ikpt)%dprojs(1, ipw, shift_grad+nlmn*igrad+ilmn) = & 401 & +atom_projs(2, ipw, ilmn)*kpg(ipw,idir)*two_pi 402 gemm_nonlop_kpt(ikpt)%dprojs(2, ipw, shift_grad+nlmn*igrad+ilmn) = & 403 & -atom_projs(1, ipw, ilmn)*kpg(ipw,idir)*two_pi 404 end do 405 end do 406 igrad=igrad+1 407 end do 408 end if 409 end if 410 411 else ! istwf_k>1 412 gemm_nonlop_kpt(ikpt)%projs_r(1, :, shift+1:shift+nlmn) = atom_projs(1, :, 1:nlmn) 413 gemm_nonlop_kpt(ikpt)%projs_i(1, :, shift+1:shift+nlmn) = atom_projs(2, :, 1:nlmn) 414 if(ngrads>0) then 415 igrad=0 416 if(my_compute_grad_strain) then 417 do idir=1,6 418 idir1=alpha(idir);idir2=beta(idir) 419 do ilmn=1,nlmn 420 do ipw=1,npw 421 gemm_nonlop_kpt(ikpt)%dprojs_r(1, ipw, shift_grad+nlmn*igrad+ilmn) = & 422 & -half*(atom_dprojs(1, ipw, idir1, ilmn)*kpg(ipw,idir2) & 423 & +atom_dprojs(1, ipw, idir2, ilmn)*kpg(ipw,idir1)) 424 425 gemm_nonlop_kpt(ikpt)%dprojs_i(1, ipw, shift_grad+nlmn*igrad+ilmn) = & 426 & -half*(atom_dprojs(2, ipw, idir1, ilmn)*kpg(ipw,idir2) & 427 & +atom_dprojs(2, ipw, idir2, ilmn)*kpg(ipw,idir1)) 428 end do 429 end do 430 igrad=igrad+1 431 end do 432 end if 433 if(my_compute_grad_atom) then 434 do idir=1,3 435 do ilmn=1,nlmn 436 do ipw=1,npw 437 gemm_nonlop_kpt(ikpt)%dprojs_r(1, ipw, shift_grad+nlmn*igrad+ilmn) = & 438 & +atom_projs(2, ipw, ilmn)*kpg(ipw,idir)*two_pi 439 gemm_nonlop_kpt(ikpt)%dprojs_i(1, ipw, shift_grad+nlmn*igrad+ilmn) = & 440 & -atom_projs(1, ipw, ilmn)*kpg(ipw,idir)*two_pi 441 end do 442 end do 443 igrad=igrad+1 444 end do 445 end if 446 end if 447 end if ! istwf_k 448 449 shift = shift + nlmn 450 shift_grad = shift_grad + ngrads*nlmn 451 iaph3d = iaph3d + 1 452 end do 453 end do 454 455 ABI_FREE(atom_projs) 456 ABI_FREE(temp) 457 if (allocated(atom_dprojs)) then 458 ABI_FREE(atom_dprojs) 459 end if 460 if (nkpg_local>0) then 461 ABI_FREE(kpg) 462 end if 463 464 end subroutine make_gemm_nonlop