TABLE OF CONTENTS
- ABINIT/getngrec
- ABINIT/m_rec
- ABINIT/pspnl_hgh_rec
- ABINIT/pspnl_operat_rec
- m_rec/Calcnrec
- m_rec/CleanRec
- m_rec/cpu_distribution
- m_rec/find_maxmin_proc
- m_rec/H_D_distrib
- m_rec/Init_MetricRec
- m_rec/Init_nlpspRec
- m_rec/InitRec
ABINIT/getngrec [ Functions ]
NAME
getngrec
FUNCTION
This routine computes the fft box for the recursion method, accordingly to the troncation radius. It is quite similar to getng, but : - there is no xboxcut and ecut consistency - ngfft (the initial fft box) is the maximum fft box
INPUTS
ngfft(18)=non truncated fft box mgfft=maximum of ngfft(1:3) inf_rmet=define the infinitesimal metric : rprimd*(transpose(rprimd)), divided by the number of discretisation point recrcut=truncating delta=to obtain radius of truncation
OUTPUT
ngfftrec=truncated fft box nfftrec= truncated nfft tronc=True if truncation is made
SIDE EFFECTS
PARENTS
m_rec
CHILDREN
sort_int,timab
SOURCE
986 subroutine getngrec(ngfft,rmet,ngfftrec,nfftrec,recrcut,delta,tronc) 987 988 989 !This section has been created automatically by the script Abilint (TD). 990 !Do not modify the following lines by hand. 991 #undef ABI_FUNC 992 #define ABI_FUNC 'getngrec' 993 !End of the abilint section 994 995 implicit none 996 997 !Arguments ------------------------------- 998 !scalars 999 real(dp),intent(in) :: recrcut,delta 1000 integer,intent(out) :: nfftrec 1001 logical,intent(out) :: tronc 1002 !arrays 1003 integer,intent(in) :: ngfft(18) 1004 real(dp),intent(in) :: rmet(3,3) 1005 integer,intent(out) :: ngfftrec(18) 1006 1007 !Local variables------------------------------- 1008 !scalars 1009 integer :: ii,iimin,index,jj,jjmin,kk,kkmin,largest_ngfftrec,maxpow11,maxpow2 1010 integer :: maxpow3,maxpow5,maxpow7,mmsrch,plane 1011 real(dp) :: dsm,dsp,dsqmin,rtroncat 1012 !arrays 1013 integer :: get_ngfftrec(3),maxsrch(3),minsrch(3) 1014 integer,allocatable :: iperm(:),srch(:) 1015 real(dp) :: tsec(2) 1016 real(dp) :: inf_rmet(3,3) 1017 ! ************************************************************************* 1018 1019 call timab(602,1,tsec) 1020 1021 ngfftrec(:) = ngfft(:) 1022 1023 if (recrcut>tol14) then !default value dtset%recrcut = zero means no troncation 1024 rtroncat = recrcut+delta 1025 get_ngfftrec(:)=1 1026 plane = 1 1027 1028 do ii=1,3 1029 inf_rmet(ii,:) = rmet(ii,:)/(real(ngfft(1:3)*ngfft(ii),dp)) 1030 end do 1031 1032 1033 ! minimum value of ngfftrec 1034 do ii = 1,3 1035 ngfftrec(ii)=floor(2*rtroncat/sqrt(inf_rmet(ii,ii)))+1 !minimum value 1036 if(ngfftrec(ii)>=ngfft(ii))then 1037 ngfftrec(ii)=ngfft(ii) 1038 get_ngfftrec(ii)=0 1039 end if 1040 end do 1041 1042 1043 if(sum(get_ngfftrec)/=0)then 1044 largest_ngfftrec=maxval(ngfft(1:3)) 1045 maxpow2=int(log(largest_ngfftrec+0.5d0)/log(two)) 1046 maxpow3=int(log(largest_ngfftrec+0.5d0)/log(three)) 1047 maxpow5=int(log(largest_ngfftrec+0.5d0)/log(five)) 1048 maxpow7=0 1049 maxpow11=0 1050 mmsrch=(maxpow2+1)*(maxpow3+1)*(maxpow5+1)*(maxpow7+1)*(maxpow11+1) 1051 ABI_ALLOCATE(srch,(mmsrch)) 1052 ABI_ALLOCATE(iperm,(mmsrch)) 1053 ! Factors of 2 1054 srch(1)=1 1055 do ii=1,maxpow2 1056 srch(ii+1)=srch(ii)*2 1057 end do 1058 ! Factors of 3 1059 index=maxpow2+1 1060 if(maxpow3>0)then 1061 do ii=1,maxpow3 1062 srch(1+ii*index:(ii+1)*index)=3*srch(1+(ii-1)*index:ii*index) 1063 end do 1064 end if 1065 ! Factors of 5 1066 index=(maxpow3+1)*index 1067 if(maxpow5>0)then 1068 do ii=1,maxpow5 1069 srch(1+ii*index:(ii+1)*index)=5*srch(1+(ii-1)*index:ii*index) 1070 end do 1071 end if 1072 ! Factors of 7 1073 index=(maxpow5+1)*index 1074 if(maxpow7>0)then 1075 do ii=1,maxpow7 1076 srch(1+ii*index:(ii+1)*index)=7*srch(1+(ii-1)*index:ii*index) 1077 end do 1078 end if 1079 ! Factors of 11 1080 index=(maxpow7+1)*index 1081 if(maxpow11>0)then 1082 do ii=1,maxpow11 1083 srch(1+ii*index:(ii+1)*index)=11*srch(1+(ii-1)*index:ii*index) 1084 end do 1085 end if 1086 ! srch is the set of allowed ngfftrec values 1087 1088 call sort_int(mmsrch,srch,iperm) 1089 ABI_DEALLOCATE(iperm) 1090 1091 do ii=1,3 1092 if(get_ngfftrec(ii)==1)then 1093 do jj=1,mmsrch 1094 if(srch(jj)>=ngfftrec(ii))then 1095 minsrch(ii)=jj 1096 ngfftrec(ii)=srch(jj) 1097 exit 1098 end if 1099 end do 1100 do jj=minsrch(ii),mmsrch 1101 if(srch(jj)>ngfft(ii))then 1102 ! since ngfftrec(ii)<ngfft(ii) for get_ngfftrec(ii)==1, 1103 ! and srch(mmsrch)maxval(ngfft(1:3)), 1104 ! that will appens in the range minsrch(ii),mmsrch 1105 maxsrch(ii)=jj-1 1106 exit 1107 end if 1108 end do 1109 end if 1110 ! since ngfft(ii) is in srch, we have here srch(maxsrch(ii))=ngfft(ii) 1111 ! minsrch(ii), maxsrch(ii) is the range of index of srch in which we can 1112 ! search ngfftrec(ii) 1113 1114 if(ngfftrec(ii)>=ngfft(ii))then 1115 ngfftrec(ii)=ngfft(ii) 1116 get_ngfftrec(ii)=0 1117 end if 1118 end do 1119 end if 1120 1121 ! verify that the entiere truncation sphere is in the fft box ; 1122 ! but only in the dimension in which we do not consider the entiere fft box 1123 do while(sum(get_ngfftrec)/=0) !again... 1124 1125 ! determining the minimum distance between 0 and the boundary 1126 ! of the fft box 1127 ! quite similar to the subroutine "bound", but only over the plane which 1128 ! are not the whole fft box 1129 dsqmin=dsq_rec(ngfftrec(1)/2,-ngfftrec(2)/2,-ngfftrec(3)/2,inf_rmet)+0.01d0 1130 1131 if(get_ngfftrec(1)/=0)then 1132 ! look at +/- g1 planes: 1133 do jj=-ngfftrec(2)/2,ngfftrec(2)/2 1134 do kk=-ngfftrec(3)/2,ngfftrec(3)/2 1135 dsp = dsq_rec(ngfftrec(1)/2, jj, kk,inf_rmet) 1136 dsm = dsq_rec( - ngfftrec(1)/2, jj, kk,inf_rmet) 1137 if (dsp<dsqmin) then 1138 dsqmin = dsp 1139 iimin = ngfftrec(1)/2 1140 jjmin = jj 1141 kkmin = kk 1142 plane=1 1143 end if 1144 if (dsm<dsqmin) then 1145 dsqmin = dsm 1146 iimin = - ngfftrec(1)/2 1147 jjmin = jj 1148 kkmin = kk 1149 plane=1 1150 end if 1151 end do 1152 end do 1153 end if 1154 1155 if(get_ngfftrec(2)/=0)then 1156 ! +/- g2 planes: 1157 do ii=-ngfftrec(1)/2,ngfftrec(1)/2 1158 do kk=-ngfftrec(3)/2,ngfftrec(3)/2 1159 dsp = dsq_rec(ii,ngfftrec(2)/2,kk,inf_rmet) 1160 dsm = dsq_rec(ii,-ngfftrec(2)/2,kk,inf_rmet) 1161 if (dsp<dsqmin) then 1162 dsqmin = dsp 1163 iimin = ii 1164 jjmin = ngfftrec(2)/2 1165 kkmin = kk 1166 plane=2 1167 end if 1168 if (dsm<dsqmin) then 1169 dsqmin = dsm 1170 iimin = ii 1171 jjmin = - ngfftrec(2)/2 1172 kkmin = kk 1173 plane=2 1174 end if 1175 end do 1176 end do 1177 end if 1178 1179 if(get_ngfftrec(3)/=0)then 1180 ! +/- g3 planes: 1181 do ii=-ngfftrec(1)/2,ngfftrec(1)/2 1182 do jj=-ngfftrec(2)/2,ngfftrec(2)/2 1183 dsp = dsq_rec(ii,jj,ngfftrec(3)/2,inf_rmet) 1184 dsm = dsq_rec(ii,jj,-ngfftrec(3)/2,inf_rmet) 1185 if (dsp<dsqmin) then 1186 dsqmin = dsp 1187 iimin = ii 1188 jjmin = jj 1189 kkmin = ngfftrec(3)/2 1190 plane=3 1191 end if 1192 if (dsm<dsqmin) then 1193 dsqmin = dsm 1194 iimin = ii 1195 jjmin = jj 1196 kkmin = - ngfftrec(3)/2 1197 plane=3 1198 end if 1199 end do 1200 end do 1201 end if 1202 1203 if(dsqmin>=rtroncat)then 1204 get_ngfftrec=0 1205 exit 1206 end if 1207 1208 ! Fix nearest boundary 1209 do ii=minsrch(plane),maxsrch(plane) 1210 if (srch(ii)>=ngfftrec(plane)) then 1211 ! redefine ngfft(plane) to next higher choice 1212 ngfftrec(plane)=srch(ii+1) 1213 ! verify if we cover the whole box 1214 if(ngfftrec(plane)>=ngfft(plane))then 1215 ngfftrec(plane)=ngfft(plane) 1216 get_ngfftrec(plane)=0 1217 end if 1218 ! Exit the loop over ii 1219 exit 1220 end if 1221 end do 1222 1223 end do 1224 1225 if (allocated(srch)) then 1226 ABI_DEALLOCATE(srch) 1227 end if 1228 1229 ! if(mod(ngfftrec(1),16)/=0) then 1230 ! ngfftrec(1) = ngfftrec(1)+(16-mod(ngfftrec(1),16)) 1231 ! ngfftrec(2:3) = ngfftrec(1) 1232 ! endif 1233 1234 ngfftrec(4)=2*(ngfftrec(1)/2)+1 1235 ngfftrec(5)=2*(ngfftrec(2)/2)+1 1236 ngfftrec(6)=ngfftrec(3) 1237 1238 ! --algorithm 1239 ngfftrec(7)=ngfft(7) ! to be improved for a better non-parallel algorithm - here it is automatically 401 1240 ngfftrec(8)=ngfft(8) 1241 1242 end if 1243 1244 !--For now, recursion method doesn't use paralelism on FFT - which would require a great number of processors 1245 nfftrec = product(ngfftrec(1:3)) 1246 ngfftrec(9:11) = (/0,1,0/) !--(/ paral, nproc, %me \) 1247 ngfftrec(12:13) = ngfftrec(2:3) ! n2proc ! n3proc 1248 1249 tronc = all(ngfftrec(:3)/=ngfft(:3)) 1250 call timab(602,2,tsec) 1251 1252 contains 1253 1254 function dsq_rec(ii,jj,kk,inf_rmet) 1255 1256 1257 !This section has been created automatically by the script Abilint (TD). 1258 !Do not modify the following lines by hand. 1259 #undef ABI_FUNC 1260 #define ABI_FUNC 'dsq_rec' 1261 !End of the abilint section 1262 1263 real(dp) :: dsq_rec 1264 integer,intent(in) :: ii,jj,kk 1265 real(dp),intent(in) :: inf_rmet(3,3) 1266 dsq_rec=sqrt(& 1267 & inf_rmet(1,1)*dble(ii**2)& 1268 & +inf_rmet(2,2)*dble(jj**2)& 1269 & +inf_rmet(3,3)*dble(kk**2)& 1270 & +two*(inf_rmet(1,2)*dble(ii*jj)& 1271 & +inf_rmet(2,3)*dble(jj*kk)& 1272 & +inf_rmet(3,1)*dble(kk*ii))) 1273 end function dsq_rec 1274 1275 1276 end subroutine getngrec
ABINIT/m_rec [ Modules ]
NAME
m_rec
FUNCTION
This module provides some functions applied to the recursion structured datatype recursion_type. It includes also some function used to change some variables of recursion_type
COPYRIGHT
Copyright (C) 2002-2018 ABINIT group (MMancini) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . For the initials of contributors, see ~abinit/doc/developers/contributors.txt.
NOTES
* Routines tagged with "@type_name" are strongly connected to the definition of the data type. Strongly connected means that the proper functioning of the implementation relies on the assumption that the tagged procedure is consistent with the type declaration. Every time a developer changes the structure "type_name" adding new entries, he/she has to make sure that all the strongly connected routines are changed accordingly to accomodate the modification of the data type. Typical examples of strongly connected routines are creation, destruction or reset methods.
PARENTS
CHILDREN
SOURCE
35 #if defined HAVE_CONFIG_H 36 #include "config.h" 37 #endif 38 39 #include "abi_common.h" 40 41 module m_rec 42 43 use defs_basis 44 use defs_datatypes 45 use defs_abitypes 46 use defs_rectypes 47 use m_abicore 48 use m_errors 49 use m_xmpi 50 use m_sort 51 52 use m_exp_mat, only : exp_mat 53 use m_numeric_tools, only : set2unit 54 use m_special_funcs, only : gamma_function 55 use m_pawfgr, only : pawfgr_nullify, indgrid, pawfgr_destroy 56 use m_paw_sphharm, only : initylmr 57 use m_time, only : timab 58 use m_rec_tools, only : get_pt0_pt1 59 use m_per_cond, only : per_cond 60 #ifdef HAVE_GPU_CUDA 61 use m_hidecudarec, only : InitRecGPU, CleanRecGPU 62 #endif 63 64 65 implicit none 66 67 private :: & 68 find_maxmin_proc, & !--To calculate max and min pt for any cpu 69 H_D_distrib 70 71 public :: & 72 InitRec, & !--Main creation method. 73 Init_MetricRec, & !--To Initalize the inf. metric in recursion 74 Init_nlpspRec, & !--Main creation method for non-local part. 75 CleanRec, & !--deallocate all pointers. 76 Calcnrec, & !--calculates the new min_nrec 77 cpu_distribution !--Regulates the work load on cpu-gpu 78 CONTAINS !===========================================================
ABINIT/pspnl_hgh_rec [ Functions ]
NAME
pspnl_hgh_rec
FUNCTION
This routine computes the matrices S_kk'^{l,A} of the projectors (it is the exponential of the overlap matrix). It coorresponds to the matrix: $$\left[(U_l)^{-1}*Exp(-temperature D_l )*U_l* (g_l)^{-1} -Identity\right]_kk' where (U_l)^-1* D_l* U_l = h^lg_l. $g_l = <f^l_k|f^l_{k'}>$ is the overlap matrix between projectors and $h^l_{kk'}$ is the strength matrix of the projectors. It calulates also the strength eigenvalues and eigenvectors of $h$, used in the calculus of non-local energy
INPUTS
temperature=4*rtrotter/beta=4*rtrotter*tsmear: the effective temp. in recursion psps <type(pseudopotential_type)>=variables related to pseudo-potentials debug=debug variable
OUTPUT
nlrec%mat_exp_psp_nl=the matrix of the exponential of the projectors: for any psp, for any angular moment: h_ij=strength; g_ij=ovelap => exp(-h.g/temp/4p).g^(-1) nlrec%radii=Local radii of nl psp nlrec%pspinfo(:,:) for any typat: (momang,typat)=number of projectors nlrec%eival(:,:,:) for any psp, any momang, the eigenvalues of the strength matrix H: eival(:,mang,psp) nlrec%eivec(:,:,:,:)for any psp, any momang, the eigenvectors of the strength matrix H: eivec(:,:,mang,psp)
SIDE EFFECTS
PARENTS
m_rec
CHILDREN
dgetrf,dgetri,dsyev,exp_mat,gamma_function,set2unit,wrtout
SOURCE
1557 subroutine pspnl_hgh_rec(psps,temperature,nlrec,debug) 1558 1559 use m_linalg_interfaces 1560 1561 !This section has been created automatically by the script Abilint (TD). 1562 !Do not modify the following lines by hand. 1563 #undef ABI_FUNC 1564 #define ABI_FUNC 'pspnl_hgh_rec' 1565 !End of the abilint section 1566 1567 implicit none 1568 1569 !Arguments ----------------------------------- 1570 !scalars 1571 real(dp),intent(in) :: temperature 1572 logical,intent(in) :: debug 1573 type(pseudopotential_type),intent(in) :: psps 1574 type(nlpsprec_type),intent(inout) :: nlrec 1575 !arrays 1576 !Local variables------------------------------- 1577 !scalars 1578 integer,parameter :: maxsize=3 1579 integer,parameter :: lwork=(1+32)*maxsize 1580 integer :: iangol,ipseudo,info,nproj 1581 integer :: g_mat_size,ii,nproj2 1582 real(dp) :: denom_1,denom_2,tot_proj 1583 character(len=500) :: msg 1584 !arrays 1585 integer :: ipvt(1) 1586 !real(dp) :: rwork(2*maxsize) 1587 real(dp) :: h_mat_init(3,3), rework(lwork) 1588 real(dp), allocatable :: g_mat(:,:),h_mat(:,:),eig_val_h(:) 1589 real(dp), allocatable :: identity(:,:),inv_g_mat(:,:),u_mat(:,:) 1590 1591 complex(dpc),allocatable :: hg_mat(:,:) 1592 ! ************************************************************************* 1593 1594 if(debug)then 1595 write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_hgh_rec : enter ' 1596 call wrtout(std_out,msg,'COLL') 1597 end if 1598 1599 !--For any pseudo potential: 1600 do ipseudo = 1, psps%npsp !--Loop on the pseudos 1601 write(msg,'(a,80a)')' pseudo file',('-',ii=1,10) 1602 call wrtout(std_out,msg,'COLL') 1603 write(msg,'(a)') psps%filpsp(ipseudo) 1604 call wrtout(std_out,msg,'COLL') 1605 1606 ! --For any angular moment: 1607 do iangol = 0,psps%mpsang-1 !--Loop on the angular moment 1608 1609 ! --Local radius 1610 nlrec%radii(iangol+1,ipseudo) = psps%gth_params%psppar(iangol+1,0,ipseudo) 1611 1612 ! --Strenghts of non-local projectors (matrix h) 1613 ! --Diagonal part: 1614 h_mat_init = zero 1615 h_mat_init(1,1) = psps%gth_params%psppar(iangol+1,1,ipseudo) 1616 h_mat_init(2,2) = psps%gth_params%psppar(iangol+1,2,ipseudo) 1617 h_mat_init(3,3) = psps%gth_params%psppar(iangol+1,3,ipseudo) 1618 ! --Out-diagonal part 1619 ! --Depending on angular moment the projectors 1620 ! strength is calculated differently 1621 select case(iangol) 1622 case(0) 1623 h_mat_init(1,2) = -half*sqrt(3.d0/5.d0)*h_mat_init(2,2) 1624 h_mat_init(1,3) = half*sqrt(5.d0/21.d0)*h_mat_init(3,3) 1625 h_mat_init(2,3) = -half*sqrt(100.d0/63.d0)*h_mat_init(3,3) 1626 case(1) 1627 h_mat_init(1,2) = -half*sqrt(5.d0/7.d0)*h_mat_init(2,2) 1628 h_mat_init(1,3) = sixth*sqrt(35.d0/11.d0)*h_mat_init(3,3) 1629 h_mat_init(2,3) = -14.d0/six/sqrt(11.d0) *h_mat_init(3,3) 1630 case(2) 1631 h_mat_init(1,2) = -half*sqrt(7.d0/9.d0)*h_mat_init(2,2) 1632 h_mat_init(1,3) = half*sqrt(63.d0/143.d0)*h_mat_init(3,3) 1633 h_mat_init(2,3) = -nine/sqrt(143.d0)*h_mat_init(3,3) 1634 case(3) 1635 h_mat_init(1,2) = zero; h_mat_init(1,3) = zero; h_mat_init(2,3) = zero; 1636 case default 1637 write(msg,'(a)')' error angular: moment component' 1638 call wrtout(std_out,msg,'COLL') 1639 end select 1640 1641 1642 1643 ! --Real dimensions of projectors. 1644 g_mat_size = count(abs((/ (h_mat_init(ii,ii),ii=1,3) /))>1.d-8) 1645 nlrec%pspinfo(iangol+1,ipseudo) = g_mat_size 1646 write(msg,'(a,i2,a,i2)')' ang. moment=',iangol,', N projectors=',g_mat_size 1647 call wrtout(std_out,msg,'COLL') 1648 if (g_mat_size>0) then 1649 ! --Identity matrix 1650 ABI_ALLOCATE(identity,(g_mat_size,g_mat_size)) 1651 call set2unit(identity) 1652 ! identity = zero 1653 ! identity(:,1) = one 1654 ! identity(:,:) = cshift(array=identity,shift=(/ (-ii,ii=0,g_mat_size) /), dim=2 ) 1655 1656 1657 ! ############## CALCULOUS OF THE EIGEN_SPACE OF THE PROJECTORS STRENGTHS ################## 1658 ! --Inverse of the matrix h 1659 ABI_ALLOCATE(eig_val_h,(g_mat_size)) 1660 ABI_ALLOCATE(u_mat,(g_mat_size,g_mat_size)) 1661 ! --u-mat will contain the eigenvectors of h_mat_init 1662 u_mat = h_mat_init(:g_mat_size,:g_mat_size) 1663 1664 ! write(std_out,*)'hmat_init' 1665 ! do ii=1,g_mat_size 1666 ! write(std_out,*)h_mat_init(ii,:) 1667 ! end do 1668 call DSYEV('v','u',g_mat_size,u_mat,g_mat_size,eig_val_h,rework,lwork,info) 1669 1670 ! --THE DIAGONAL MATRIX IS GIVEN BY D=U^t.H.U 1671 ! (eival=transpose(eivec).h_mat_init.eivec) 1672 write(msg,'(a,3d10.3)')' eigenvalues=',eig_val_h 1673 call wrtout(std_out,msg,'COLL') 1674 ! write(std_out,*)'autovec';write(std_out,*)u_mat 1675 1676 nlrec%eival(:g_mat_size,1+iangol,ipseudo) = eig_val_h 1677 nlrec%eivec(:g_mat_size,:g_mat_size,1+iangol,ipseudo) = u_mat 1678 ABI_DEALLOCATE(eig_val_h) 1679 ABI_DEALLOCATE(u_mat) 1680 1681 ! ##########END CALCULOUS OF THE EIGEN_SPACE OF THE PROJECTORS STRENGTHS ################## 1682 1683 ABI_ALLOCATE(g_mat,(g_mat_size,g_mat_size)) 1684 ABI_ALLOCATE(inv_g_mat,(g_mat_size,g_mat_size)) 1685 ABI_ALLOCATE(h_mat,(g_mat_size,g_mat_size)) 1686 ABI_ALLOCATE(hg_mat,(g_mat_size,g_mat_size)) 1687 1688 g_mat(:,:) = one 1689 h_mat(:,:) = zero 1690 h_mat(:,:) = h_mat_init(:g_mat_size,:g_mat_size) 1691 1692 ! ------------------------------------------------------- 1693 ! --Matrix of the overlap between projetors (matrix g) 1694 ! and the h matrix of strength 1695 do nproj = 1,g_mat_size-1 1696 do nproj2 = 1+nproj,g_mat_size 1697 tot_proj = zero 1698 ! --Analytic value of overlap 1699 ! g_ij=Gamma[-1/2+i+j+l]/Sqrt(Gamma[-1/2+i+iangol]*Gamma[-1/2+j+iangol]) 1700 call gamma_function(-half+real(nproj+nproj2+iangol,dp),tot_proj) 1701 call gamma_function(-half+real(iangol+2*nproj,dp),denom_1) 1702 call gamma_function(-half+real(iangol+2*nproj2,dp),denom_2) 1703 1704 g_mat(nproj,nproj2) = tot_proj/sqrt(denom_1*denom_2) 1705 g_mat(nproj2,nproj) = g_mat(nproj,nproj2) 1706 1707 h_mat(nproj,nproj2) = h_mat_init(nproj,nproj2) 1708 h_mat(nproj2,nproj) = h_mat_init(nproj,nproj2) 1709 end do 1710 end do 1711 1712 ! --Inverse of the overlap matrix g 1713 inv_g_mat = g_mat 1714 call DGETRF(g_mat_size,g_mat_size,inv_g_mat,g_mat_size,ipvt,info) 1715 call DGETRI(g_mat_size,inv_g_mat,g_mat_size,ipvt,rework,lwork,info) 1716 1717 1718 ! ----------------------------------------------------------- 1719 ! --Now it calculates the exponential of the matrix h.g 1720 hg_mat = matmul(h_mat,g_mat) 1721 1722 call exp_mat(hg_mat,g_mat_size,-one/temperature) 1723 1724 ! --(exp(h.g)-Identity).(g^-1) 1725 hg_mat = hg_mat-identity(:,:) 1726 1727 1728 ! --results on output 1729 nlrec%mat_exp_psp_nl(:g_mat_size,:g_mat_size,1+iangol,ipseudo) = matmul(real(hg_mat,dp),inv_g_mat) 1730 1731 ! write(std_out,*) nlrec%mat_exp_psp_nl(:g_mat_size,:g_mat_size,1+iangol,ipseudo) 1732 1733 ABI_DEALLOCATE(g_mat) 1734 ABI_DEALLOCATE(hg_mat) 1735 ABI_DEALLOCATE(h_mat) 1736 ABI_DEALLOCATE(inv_g_mat) 1737 ABI_DEALLOCATE(identity) 1738 end if 1739 1740 end do !enddo on angular moment 1741 end do !enddo on pseudos 1742 1743 1744 if(debug)then 1745 write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_hgh_rec : exit ' 1746 call wrtout(std_out,msg,'COLL') 1747 end if 1748 1749 end subroutine pspnl_hgh_rec
ABINIT/pspnl_operat_rec [ Functions ]
NAME
pspnl_operat_rec
FUNCTION
It calculates the non-local projectors used in recursion for any psp non-local: The nl interaction in recursion is $$exp{-V_{NL}/beta}=\sum_A\sum_{lm} \sum{ij}Y_{lm}(\hat{r-R_A}')f^l_i(r-R_A)D^l_{i,j}Y_{lm}(\hat{r-R_A})f^l_j{r-R_A}$$ where $D^_{i,j}$ is a matrix previously (see pspnl_operat_rec). In this routine the projectors $Y_{lm}(\hat{r-R_A}')f^l_i(r-R_A)$ are calculated. So an array of dimensions rset%nl%projec(nfftrec,lmnmax,nlrec%npsp)
INPUTS
metrec<metricrec_type>=contains information concerning metric in recursion: grid_step, metric, infinitesimal volume ngfftrec(18)=is the ngfft grid (truncated if different from ngfft) of recursion debug=debug variable
OUTPUT
nlrec<nlpsprec_type>%projec= array containig the projectors on the real grid nlrec<nlpsprec_type>%intlen= integer linear size of the non-local grid
SIDE EFFECTS
nlrec<nlpsprec_type> data set of non-local pseudo for recursion The better Interaction length (Intlen) is also calculated and printed but recursion use intlen=ngfftrec/2
NOTES
PARENTS
m_rec
CHILDREN
gamma_function,initylmr,wrtout
SOURCE
1319 subroutine pspnl_operat_rec(nlrec,metrec,ngfftrec,debug) 1320 1321 1322 !This section has been created automatically by the script Abilint (TD). 1323 !Do not modify the following lines by hand. 1324 #undef ABI_FUNC 1325 #define ABI_FUNC 'pspnl_operat_rec' 1326 !End of the abilint section 1327 1328 implicit none 1329 1330 !Arguments ------------------------------------ 1331 !scalars 1332 logical,intent(in) :: debug 1333 type(metricrec_type),intent(in) ::metrec 1334 type(nlpsprec_type),intent(inout) :: nlrec 1335 !arrays 1336 integer,intent(in) :: ngfftrec(18) 1337 !Local variables------------------------------- 1338 integer :: ii,intlen 1339 integer :: iangol,ipsp,iproj 1340 integer :: mpsang,jj,kk,rr 1341 integer :: nfftrec 1342 integer :: ilmn,il,ilm,in,lmnmax 1343 real(dp) :: raggio,rloc,denom,step 1344 real(dp) :: delta_out,partial,err 1345 character(len=500) :: msg 1346 real(dp) :: part_sum(3) 1347 real(dp) :: ylmr_gr_dm(0,0,0) 1348 real(dp),allocatable :: ylmr(:,:),proj_arr(:,:,:) 1349 real(dp),allocatable :: radloc(:,:),nrm(:) 1350 1351 ! ************************************************************************* 1352 1353 if(debug)then 1354 write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_operat_rec : enter ' 1355 call wrtout(std_out,msg,'PERS') 1356 end if 1357 1358 !##################################################################### 1359 !--CALCULATE THE (SEMI-)MAXIMUM INTERVAL WHERE ALL THE PROJECTORS ARE 1360 !DIFFERENT TO ZERO. 1361 !--For any pseudo potential: 1362 delta_out = zero 1363 step = metrec%tr(1)*half !--Scanning step= grid step/2 1364 1365 1366 do ipsp = 1, nlrec%npsp !--Loop on the pseudos 1367 1368 ! --For any angular moment: 1369 do iangol = 0,maxval(nlrec%indlmn(1,:,ipsp)) !--Loop on the angular moment 1370 rloc = nlrec%radii(iangol+1,ipsp) !--Local radius 1371 1372 ! --For any projector 1373 do iproj = 1,nlrec%pspinfo(iangol+1,ipsp) 1374 ! --Starting point to searching when the projector goes to zero. 1375 ! this correspond to twice the radius wher the projector has its maximum 1376 raggio = two*sqrt(real(-2+2*iproj+iangol,dp))*rloc 1377 ! --Caculate the gamma func at the denominator 1378 call gamma_function(real(iangol+2*iproj,dp)-half,denom) 1379 ! --Find the zero 1380 ! --The following while cycle should be replaced by a bisection 1381 ! --method. Bucause this is calculated only 1 time it is not very 1382 ! important. 1383 err = one 1384 ii=0 1385 ! tolloc = 1.d0*abs(minval(nlrec%mat_exp_psp_nl(:nlrec%pspinfo(iangol+1,ipsp),:nlrec%pspinfo(iangol+1,ipsp),iangol+1,ipsp))) 1386 do while(abs(err)>1.d-2) 1387 raggio = raggio + step 1388 err = project_prec(raggio,iproj,iangol,rloc)/sqrt(denom) 1389 ii = ii+1 1390 end do 1391 write(std_out,*)'local delta',raggio,ii 1392 delta_out=maxval((/ delta_out,raggio /)) 1393 end do !end loop on projectors 1394 1395 end do !enddo on angular moment 1396 end do !enddo on pseudos 1397 1398 !--CALCULATE how many grid steps correspond to delta_out 1399 intlen = int(delta_out/metrec%tr(1)) 1400 !--I want that intlen is odd 1401 if(mod(intlen,2)==0) intlen = intlen+1 1402 1403 write(msg,'(2a,i3,a)') ch10,' Interac. length of non-local psp(grid steps)=',intlen,ch10 1404 call wrtout(std_out,msg,'COLL') 1405 !##################################################################### 1406 1407 !--Initialisation 1408 nfftrec = product(ngfftrec(1:3)) 1409 lmnmax = nlrec%lmnmax 1410 intlen = ngfftrec(1)/2 1411 nlrec%intlen = intlen !--Setted in recursion variables 1412 1413 !##################################################################### 1414 !--CALCULATE E(q,q') 1415 !--Cration of the exponential*projectors*ylm matrix 1416 1417 !--Initialisation 1418 ABI_ALLOCATE(nlrec%projec,(nfftrec,lmnmax,nlrec%npsp)) 1419 nlrec%projec = zero 1420 ABI_ALLOCATE(radloc,(3,nfftrec)) 1421 radloc = zero 1422 ABI_ALLOCATE(nrm,(nfftrec)) 1423 nrm = zero 1424 1425 !--Loop on pseudo types 1426 pseudodo: do ipsp = 1, nlrec%npsp 1427 ! --Control if the psp is non-local, else continue 1428 if(all(nlrec%pspinfo(:,ipsp)==0)) cycle 1429 ! --Vector which stores localy the upper part of symmetrical 1430 ! matrix of the exponential of the non-local operator 1431 mpsang = maxval(nlrec%indlmn(1,:,ipsp))+1 1432 ABI_ALLOCATE(proj_arr,(nfftrec,maxval(nlrec%pspinfo(:,ipsp)),mpsang)) 1433 ABI_ALLOCATE(ylmr,(mpsang*mpsang,nfftrec)) 1434 proj_arr = zero 1435 ylmr = zero 1436 1437 ! !debug 1438 ! write(std_out,*)'mpsang,proj num',mpsang,maxval(nlrec%pspinfo(:,ipsp)) 1439 ! !enddebug 1440 1441 ! --Calculate the projctors 1442 do iangol = 0,mpsang-1 1443 rloc = nlrec%radii(iangol+1,ipsp) 1444 do iproj = 1,nlrec%pspinfo(iangol+1,ipsp) 1445 call gamma_function(real(iangol+2*iproj,dp)-half,denom) 1446 denom = one/sqrt(denom) 1447 do ii = 0,ngfftrec(1)-1 !--3-loop on coordinates 1448 do jj = 0,ngfftrec(2)-1 1449 do kk = 0,ngfftrec(3)-1 1450 ! --Calculate the radii 1451 part_sum(:) = real((/ ii,jj,kk /)-intlen,dp)*(metrec%tr) 1452 rr = 1+ii+(jj+kk*ngfftrec(2))*ngfftrec(3) 1453 radloc(:,rr) = part_sum 1454 nrm(rr) = sqrt(sum(part_sum(:)**two)) 1455 partial = project_prec(nrm(rr),iproj,iangol,rloc)*denom 1456 if(abs(partial)>tol12 ) proj_arr(rr,iproj,iangol+1) = partial 1457 end do 1458 end do 1459 end do !--End 3-loop on coordinates 1460 end do 1461 end do 1462 1463 1464 ! ------------------------------------------------------------- 1465 ! --Calculate the spherical harmonics (Verified: it works well) 1466 call initylmr(mpsang,1,nfftrec,nrm(:),1,radloc(:,:),ylmr(:,:),ylmr_gr_dm) 1467 ! ------------------------------------------------------------- 1468 1469 1470 do ilmn = 1,lmnmax 1471 ilm = nlrec%indlmn(4,ilmn,ipsp) 1472 il = nlrec%indlmn(1,ilmn,ipsp)+1 1473 in = nlrec%indlmn(3,ilmn,ipsp) 1474 write(msg,'(2a,i3,2i2)')ch10,'lm,l,n',ilm,il,in 1475 call wrtout(std_out,msg,'COLL') 1476 1477 nlrec%projec(:,ilmn,ipsp) = ylmr(ilm,:)*proj_arr(:,in,il) 1478 end do 1479 1480 ABI_DEALLOCATE(ylmr) 1481 ABI_DEALLOCATE(proj_arr) 1482 end do pseudodo !--end loop on pseudo types 1483 1484 1485 ABI_DEALLOCATE(radloc) 1486 ABI_DEALLOCATE(nrm) 1487 1488 if(debug)then 1489 write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_operat_rec : exit ' 1490 call wrtout(std_out,msg,'PERS') 1491 end if 1492 1493 contains 1494 1495 function project_prec(raggio,iproj,iangol,rloc) 1496 !--Analytical expression of the projectors in hgh-pspeudopotential 1497 !--The gamma function at denominator is missing 1498 1499 !This section has been created automatically by the script Abilint (TD). 1500 !Do not modify the following lines by hand. 1501 #undef ABI_FUNC 1502 #define ABI_FUNC 'project_prec' 1503 !End of the abilint section 1504 1505 real(dp) :: project_prec 1506 integer,intent(in) :: iproj,iangol 1507 real(dp),intent(in) :: raggio,rloc 1508 1509 project_prec=sqrt2*(raggio/rloc)**real((iangol+2*(iproj-1)),dp)*& 1510 & exp(-((raggio/rloc)**two)*half)/rloc**onehalf 1511 end function project_prec 1512 1513 end subroutine pspnl_operat_rec
m_rec/Calcnrec [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
Calcnrec
FUNCTION
Calculate the new min_nrec.
INPUTS
rset<recursion_type>=Data type concerning recursion b2(:,:) recursion coefficients
OUTPUT
rset%min_nrec is changed
PARENTS
vtorhorec
CHILDREN
wrtout,xmpi_max
SOURCE
908 subroutine Calcnrec(rset,b2) 909 910 911 !This section has been created automatically by the script Abilint (TD). 912 !Do not modify the following lines by hand. 913 #undef ABI_FUNC 914 #define ABI_FUNC 'Calcnrec' 915 !End of the abilint section 916 917 implicit none 918 919 !Arguments ------------------------------------ 920 ! scalars 921 type(recursion_type),intent(inout) :: rset 922 ! arrays 923 real(dp), intent(in):: b2(0:rset%min_nrec,1:rset%par%ntranche) 924 !Local ---------------------------------------- 925 ! scalars 926 integer :: kk,ii,jj,ierr,loc_nrec 927 character(len=500) :: msg 928 ! ********************************************************************* 929 ! @recursion_type 930 ! @pawfgr_type 931 kk = 1 932 loc_nrec = rset%min_nrec 933 do ii=1,rset%par%ntranche 934 !--Use to lbound because b2 passed as argument 935 ! doesn't have the same bounds as in the calling 936 ! subroutine, the +1 because b2(lbound,ii)=1. 937 jj = lbound(b2,dim=1)+1 938 do while (b2(jj,ii)>tol10 .and. jj<=rset%min_nrec-1) 939 jj = jj+1 940 kk = max(jj,kk) 941 end do 942 enddo 943 call xmpi_max(kk,rset%min_nrec,rset%mpi%comm_bandfft,ierr) 944 rset%min_nrec = rset%min_nrec+1-lbound(b2,dim=1) 945 946 write(msg,'(a,i2.2,a9,i2.2)') ' -- nrec adjustement nrec=',loc_nrec,' => nrec=',rset%min_nrec 947 call wrtout(std_out,msg,'COLL') 948 write(msg,'(51a)')' ',('-',ii=1,50) 949 call wrtout(std_out,msg,'COLL') 950 951 end subroutine Calcnrec
m_rec/CleanRec [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
CleanRec
FUNCTION
Deallocate the pointers of rset<recursion_type>=Data type concerning recursion.
INPUTS
rset<recursion_type>=Data type concerning recursion
SIDE EFFECTS
All pointers are deallocated.
PARENTS
gstate
CHILDREN
wrtout,xmpi_max
SOURCE
821 subroutine CleanRec(rset) 822 823 824 !This section has been created automatically by the script Abilint (TD). 825 !Do not modify the following lines by hand. 826 #undef ABI_FUNC 827 #define ABI_FUNC 'CleanRec' 828 !End of the abilint section 829 830 implicit none 831 832 !Arguments ------------------------------------ 833 ! scalars 834 type(recursion_type),intent(inout) :: rset 835 ! arrays 836 ! ********************************************************************* 837 838 ! @recursion_type 839 840 if (allocated(rset%ZT_p)) then 841 ABI_DEALLOCATE(rset%ZT_p) 842 end if 843 if (allocated(rset%par%displs)) then 844 ABI_DEALLOCATE(rset%par%displs) 845 end if 846 if (allocated(rset%par%vcount)) then 847 ABI_DEALLOCATE(rset%par%vcount) 848 end if 849 if (allocated(rset%nl%mat_exp_psp_nl)) then 850 ABI_DEALLOCATE(rset%nl%mat_exp_psp_nl) 851 end if 852 if (allocated(rset%nl%eival)) then 853 ABI_DEALLOCATE(rset%nl%eival) 854 end if 855 if (allocated(rset%nl%eivec)) then 856 ABI_DEALLOCATE(rset%nl%eivec) 857 end if 858 if (allocated(rset%nl%pspinfo)) then 859 ABI_DEALLOCATE(rset%nl%pspinfo) 860 end if 861 if (allocated(rset%nl%radii)) then 862 ABI_DEALLOCATE(rset%nl%radii) 863 end if 864 if (allocated(rset%nl%indlmn)) then 865 ABI_DEALLOCATE(rset%nl%indlmn) 866 end if 867 if (allocated(rset%nl%projec)) then 868 ABI_DEALLOCATE(rset%nl%projec) 869 end if 870 if (allocated(rset%inf%gcart)) then 871 ABI_DEALLOCATE(rset%inf%gcart) 872 end if 873 874 call pawfgr_destroy(rset%pawfgr) 875 876 ! No is needed deallocate rset%mpi: it is a copy of mpi_enreg which 877 ! pointers are deallocated in gstate 878 879 #ifdef HAVE_GPU_CUDA 880 call CleanRecGPU(rset%GPU,rset%load) 881 #endif 882 883 end subroutine CleanRec
m_rec/cpu_distribution [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
cpu_distribution
FUNCTION
Calculate the number of point,GPU,for any proc
INPUTS
ngfft(3)=nuber of point of the grid gratio=recgratio ratio between the fine and coarse grid beta_coeff=estimated time ratio between CPU_time and GPU_time calc_type=if 0 takes the possible max for nptrec (to test the completly full graphic card). 1 after test to calculate the min possible value for nptrec
OUTPUT
PARENTS
first_rec,m_rec
CHILDREN
wrtout,xmpi_max
SOURCE
314 subroutine cpu_distribution(gratio,rset,ngfft,beta_coeff,calc_type) 315 316 317 !This section has been created automatically by the script Abilint (TD). 318 !Do not modify the following lines by hand. 319 #undef ABI_FUNC 320 #define ABI_FUNC 'cpu_distribution' 321 !End of the abilint section 322 323 implicit none 324 325 !Arguments ------------------------------------ 326 integer,intent(in) :: gratio,calc_type 327 real(dp),intent(in) :: beta_coeff 328 integer,intent(in) :: ngfft(3) 329 type(recursion_type),intent(inout),target :: rset 330 !Local --------------------------- 331 integer :: ii,nfft,ierr 332 integer,pointer :: proc_pt_dev(:,:) 333 type(recparall_type),pointer :: recpar 334 character(500) :: msg 335 ! ********************************************************************* 336 337 ! write(std_out,*)'start cpu_distribution' 338 339 nullify(proc_pt_dev) 340 ABI_ALLOCATE(proc_pt_dev,(2,0:rset%mpi%nproc-1)) 341 342 nfft = product(ngfft) 343 call H_D_distrib(rset,nfft,gratio,proc_pt_dev,beta_coeff) 344 345 nullify(recpar) 346 if(rset%load == 0)then 347 ABI_ALLOCATE(rset%par%displs,(0:rset%mpi%nproc-1)) 348 ABI_ALLOCATE(rset%par%vcount,(0:rset%mpi%nproc-1)) 349 recpar => rset%par 350 #if defined HAVE_GPU_CUDA 351 else 352 if(rset%tp==4)then 353 ABI_ALLOCATE(rset%GPU%par%displs,(0:rset%mpi%nproc-1)) 354 ABI_ALLOCATE(rset%GPU%par%vcount,(0:rset%mpi%nproc-1)) 355 endif 356 recpar => rset%GPU%par 357 #endif 358 endif 359 360 recpar%ntranche = nfft/(rset%mpi%nproc)!equipartitioned point 361 362 call find_maxmin_proc(recpar,rset%mpi%nproc,& 363 & rset%mpi%me,gratio,ngfft,proc_pt_dev) 364 365 recpar%vcount = 0 366 if(rset%load==0)then 367 recpar%vcount(rset%mpi%me) = recpar%ntranche 368 else 369 recpar%vcount(rset%mpi%me) = recpar%npt 370 endif 371 372 call xmpi_sum(recpar%vcount,rset%mpi%comm_bandfft,ierr) 373 374 recpar%displs = 0 375 if(rset%mpi%nproc>1) recpar%displs(1:) = (/(sum(recpar%vcount(:ii)),ii=0,rset%mpi%nproc-2)/) 376 377 !--INITALIZATION OF CUDA FOR RECURSION 378 #if defined HAVE_GPU_CUDA 379 if(rset%load == 0) rset%GPU%par = rset%par 380 call InitRecGPU(rset,nfft,gratio,rset%GPU%map(rset%mpi%me),calc_type) 381 #else 382 ierr = calc_type !only of abirule when there is not HAVE_GPU_CUDA 383 #endif 384 385 386 ! if(rset%debug ) then 387 write(msg,'(a,i7,2(2a,3i7),8(2a,i7),2(2a,3i7),(2a,e14.6))')& 388 & ' me ', rset%mpi%me,ch10,& 389 & ' ngfft ', ngfft(1:3),ch10,& 390 & ' ngfftrec ', rset%ngfftrec(1:3),ch10,& 391 & ' load ', rset%load,ch10,& 392 & ' ntranche ', recpar%ntranche,ch10,& 393 & ' min_pt ', recpar%min_pt,ch10,& 394 & ' max_pt ', recpar%max_pt,ch10,& 395 & ' rset%mpi%nproc ', rset%mpi%nproc,ch10,& 396 & ' rset%mpi%nproc_fft ', rset%mpi%nproc_fft,ch10,& 397 & ' dtset%ngfft(10) ', rset%ngfftrec(10),ch10,& 398 & ' recpar%npt ', recpar%npt,ch10,& 399 & ' recpar%pt0 ', recpar%pt0%x,recpar%pt0%y,recpar%pt0%z,ch10,& 400 & ' recpar%pt1 ', recpar%pt1%x,recpar%pt1%y,recpar%pt1%z,ch10,& 401 & ' grid step ', rset%inf%tr(1) 402 call wrtout(std_out,msg,'PERS') 403 #if defined HAVE_GPU_CUDA 404 write(msg,'(a,i7,2(2a,i7),a)')& 405 & ' rset%ngp ', rset%ngpu,ch10,& 406 & ' gpudevice ', rset%gpudevice,ch10,& 407 & ' nptrec ', rset%GPU%nptrec,ch10 408 call wrtout(std_out,msg,'PERS') 409 #endif 410 ! write(std_out,*)'display',recpar%displs 411 ! write(std_out,*)'vcount',recpar%vcount 412 ! end if 413 414 415 nullify(recpar) 416 if(associated(proc_pt_dev)) then 417 ABI_DEALLOCATE(proc_pt_dev) 418 end if 419 420 ! write(std_out,*)'exit from cpu_distribution' 421 end subroutine cpu_distribution
m_rec/find_maxmin_proc [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
find_maxmin_proc
FUNCTION
To calculate max and min pt for any cpu, it is useful for recgratio!=1
INPUTS
nproc = number of procs me = identity of the proc ngfft(3) = fine grid (corresponds to dtset%ngfft(1:3)) proc_pt_dev(2,0:nproc-1) which device and how many points recpar%npt = number of points computed by the proc me (see side effects)
OUTPUT
recpar%pt0<type(vec_int)>=Intial point for this proc in x,y,z recpar%pt1<type(vec_int)>=Final point for this proc in x,y,z recpar%min_pt=Intial point for this proc recpar%max_pt=Final point for this proc
SIDE EFFECTS
recpar%ntranche=number of pts computed by the proc me on the fine grid. So when recgratio!=1, ntranche will not correspond to the npt!
PARENTS
m_rec
CHILDREN
wrtout,xmpi_max
SOURCE
241 subroutine find_maxmin_proc(recpar,nproc,me,gratio,ngfft,proc_pt_dev) 242 243 244 !This section has been created automatically by the script Abilint (TD). 245 !Do not modify the following lines by hand. 246 #undef ABI_FUNC 247 #define ABI_FUNC 'find_maxmin_proc' 248 !End of the abilint section 249 250 implicit none 251 252 !Arguments ------------------------------------ 253 integer,intent(in) :: nproc,me,gratio 254 integer,intent(in) :: ngfft(3) 255 type(recparall_type),intent(inout) :: recpar 256 integer,pointer :: proc_pt_dev(:,:) 257 !Local --------------------------- 258 integer :: pointoncpu 259 integer :: nfft,ntot,ii 260 integer :: inf,sup 261 integer :: proc_limit(0:nproc-1) 262 ! ********************************************************************* 263 ! write(std_out,*)'start find_maxmin_proc' 264 recpar%npt = 0 265 nfft = product(ngfft) 266 ntot = nfft/(gratio*gratio*gratio) 267 pointoncpu = ntot/nproc 268 269 proc_limit = (/(sum(proc_pt_dev(2,:ii)),ii=0,nproc-1)/) 270 271 if(gratio==1)then 272 recpar%ntranche = proc_limit(me) 273 if(me/=0) recpar%ntranche = recpar%ntranche-proc_limit(me-1) 274 endif 275 276 inf=0 277 if(me/=0) inf = proc_limit(me-1) 278 sup = proc_limit(me) 279 280 281 call get_pt0_pt1(ngfft,gratio,inf,sup,recpar) 282 283 recpar%npt = sup-inf 284 285 !write(std_out,*)'exit find_maxmin_proc' 286 end subroutine find_maxmin_proc
m_rec/H_D_distrib [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
H_D_distrib
FUNCTION
Calculate the number of point,GPU,for any proc
INPUTS
rset<recursion_type>= recursion variables cpu (-1 if there are not gpu) nfft=nuber of point of the fine grid ngfftrec=nuber of point of one edge of the coarse grid gratio=recgratio ratio between the fine and coarse grid beta_coeff=estimated time ratio between CPU_time and GPU_time
OUTPUT
proc_pt_dev(2,0:nproc-1) which device and how many points that proc has to compute: proc_pt_dev(1,iproc) which device associated to proc i (-1 if none), proc_pt_dev(2,iproc) how many points
PARENTS
m_rec
CHILDREN
wrtout,xmpi_max
SOURCE
110 subroutine H_D_distrib(rset,nfft,gratio,proc_pt_dev,beta_coeff) 111 112 113 !This section has been created automatically by the script Abilint (TD). 114 !Do not modify the following lines by hand. 115 #undef ABI_FUNC 116 #define ABI_FUNC 'H_D_distrib' 117 !End of the abilint section 118 119 implicit none 120 121 !Arguments ------------------------------------ 122 integer, intent(in) :: nfft,gratio 123 real(dp),intent(in) :: beta_coeff 124 integer,pointer :: proc_pt_dev(:,:) 125 type(recursion_type),intent(inout) :: rset 126 !Local --------------------------- 127 integer :: me,icpu,resto,ntot,ngpu 128 integer :: n_per_cpu,n_per_gpu 129 character(500) :: msg 130 #ifdef HAVE_GPU_CUDA 131 integer,pointer :: ndev(:) 132 #else 133 integer :: ndev(0:rset%mpi%nproc-1) 134 #endif 135 ! ********************************************************************* 136 137 138 #ifdef HAVE_GPU_CUDA 139 ndev => rset%GPU%map 140 #else 141 ndev = -1 142 #endif 143 144 me = rset%mpi%me 145 ntot = nfft/(gratio*gratio*gratio) 146 ngpu = rset%ngpu 147 148 !--If sequential code all points are computed by the proc 0 149 if(rset%mpi%nproc ==1) then 150 proc_pt_dev(1,0) = ndev(0) 151 proc_pt_dev(2,0) = ntot 152 return 153 end if 154 155 !--Number of points for any cpu 156 n_per_cpu = int(int(ntot/(rset%mpi%nproc+ngpu*(beta_coeff-1.d0)))) 157 n_per_gpu = int(n_per_cpu*beta_coeff) 158 !write(std_out,*)'n_per_cpu',n_per_cpu 159 !write(std_out,*)'rset%GPU%map',rset%GPU%map 160 do icpu=0,rset%mpi%nproc-1 161 proc_pt_dev(1,icpu) = ndev(icpu) 162 proc_pt_dev(2,icpu) = n_per_cpu 163 if(ndev(icpu)>-1) proc_pt_dev(2,icpu) = n_per_gpu 164 end do 165 166 !--Distribute the rest 167 resto = ntot-sum(proc_pt_dev(2,:)) 168 icpu = 0 169 !write(std_out,*)'rest',resto,ngpu 170 if(resto>0) then 171 if(ngpu/=0) then 172 !--distribute rest only on GPU 173 do while(resto/=0) 174 if(proc_pt_dev(1,icpu)>-1) then 175 proc_pt_dev(2,icpu) = proc_pt_dev(2,icpu)+1 176 resto = resto-1 177 endif 178 icpu = mod(icpu+1,rset%mpi%nproc) 179 enddo 180 else 181 !--distribute rest on all CPU 182 do while(resto/=0) 183 proc_pt_dev(2,icpu) = proc_pt_dev(2,icpu)+1 184 resto = resto-1 185 icpu = mod(icpu+1,rset%mpi%nproc) 186 enddo 187 return 188 endif 189 endif 190 191 !--Printing GPU and load distribution on procs 192 write(msg,'(3a)')& 193 & ' -Load on procs------------',ch10,& 194 & ' me device points' 195 call wrtout(std_out,msg,'COLL') 196 do icpu=0,rset%mpi%nproc-1 197 write(msg,'(i5,i8,i14)') icpu,proc_pt_dev(:,icpu); 198 call wrtout(std_out,msg,'COLL') 199 end do 200 201 end subroutine H_D_distrib
m_rec/Init_MetricRec [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
Init_MetricRec
FUNCTION
Initialise the rset<recursion_type>=Data type concerning recursion. In particular, the information on the infinitesimal metric. Also other variable are initialized
INPUTS
rmet: metrics ucvol=unit cell volume in bohr**3. ngfft(1:3)=fine grid used in recursion rprimd=Real space PRIMitive translations, Dimensional xred=vectors (X) of atom positions in reduced coordinates natom=number of atoms debug=debug variable
OUTPUT
metrec <type(metricrec_type)>= infinitesimal metrics used in recursion
PARENTS
scfcv
CHILDREN
wrtout,xmpi_max
SOURCE
632 subroutine Init_MetricRec(metrec,nlpsp,rmet,ucvol,rprimd,xred,ngfft,natom,debug) 633 634 635 !This section has been created automatically by the script Abilint (TD). 636 !Do not modify the following lines by hand. 637 #undef ABI_FUNC 638 #define ABI_FUNC 'Init_MetricRec' 639 !End of the abilint section 640 641 implicit none 642 !Arguments ------------------------------------ 643 !scalars 644 integer, intent(in) ::natom 645 real(dp), intent(in) :: ucvol 646 logical,intent(in) ::nlpsp,debug 647 type(metricrec_type),intent(inout) :: metrec 648 !arrays 649 integer,intent(in) :: ngfft(3) 650 real(dp),intent(in) :: rmet(3,3),rprimd(3,3),xred(3,natom) 651 652 !Local --------------------------- 653 integer :: ii 654 real(dp) :: xcart(3,natom) 655 character(500) :: msg 656 ! ********************************************************************* 657 658 !--Intialisation of variables concerning the infinitesimal metric 659 do ii=1,3 660 metrec%rmet(ii,:) = rmet(ii,:)/(real(ngfft(1:3)*ngfft(ii),dp)) 661 end do 662 metrec%ucvol = ucvol/real(product(ngfft(1:3)),dp) 663 metrec%tr(:) = sqrt((/(metrec%rmet(ii,ii),ii=1,3)/)) !grid step 664 665 !--Initialisation of others variables 666 !--In non-loc-psp case: calculate the position of ions and conversion factor 667 if(nlpsp) then 668 do ii = 1,natom 669 xcart(:,ii) = matmul(rprimd(:,:),xred(:,ii)) 670 end do 671 metrec%gcart(:,:) = per_cond(natom,xcart,ngfft(1:3),metrec%tr(:)) 672 if(debug) then 673 do ii=1,natom 674 write (msg,'(a,3f8.2)')'xcart=',xcart(:,ii) 675 call wrtout(std_out,msg,'COLL') 676 write (msg,'(a,3i4)')'gcart=',metrec%gcart(:,ii) 677 call wrtout(std_out,msg,'COLL') 678 end do 679 end if 680 end if 681 682 end subroutine Init_MetricRec
m_rec/Init_nlpspRec [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
Init_nlpspRec
FUNCTION
Initialise the rset<recursion_type>=Data type concerning recursion. In particular, the non-local part of pseudo-potential.
INPUTS
tempe=temperature psps <type(pseudopotential_type)>=variables related to pseudo-potentials metrec <type(metricrec_type)>=infinitesimal metrics used in recursion ngfftrec(18)=Number of Grid points for Fast Fourier Transform for Recursion (truncated box, if different from ngfft) debug=debug variable
SIDE EFFECTS
nlrec <type(nlpsprec_type)>=pseudo-potentials informations for recursion
PARENTS
first_rec
CHILDREN
wrtout,xmpi_max
SOURCE
712 subroutine Init_nlpspRec(tempe,psps,nlrec,metrec,ngfftrec,debug) 713 714 715 !This section has been created automatically by the script Abilint (TD). 716 !Do not modify the following lines by hand. 717 #undef ABI_FUNC 718 #define ABI_FUNC 'Init_nlpspRec' 719 !End of the abilint section 720 721 implicit none 722 !Arguments ------------------------------------ 723 ! scalars 724 logical,intent(in) :: debug 725 real(dp), intent(in) :: tempe 726 type(pseudopotential_type),intent(in) ::psps 727 type(metricrec_type),intent(inout) :: metrec 728 type(nlpsprec_type),intent(inout) :: nlrec 729 ! arrays 730 integer,intent(in) :: ngfftrec(18) 731 !Local --------------------------- 732 integer :: ii,jj 733 character(500) :: msg 734 ! ********************************************************************* 735 !!--Routine for the calcul of the non-local pseudo 736 if(any(psps%pspcod/=3) .and. nlrec%nlpsp ) then 737 msg = "The non-local part of psp is used in Recursion only for hgh-psp" 738 MSG_WARNING(msg) 739 nlrec%nlpsp = .False. 740 if (allocated(metrec%gcart)) then 741 ABI_DEALLOCATE(metrec%gcart) 742 end if 743 end if 744 745 if(any(psps%pspcod==3) .and. nlrec%nlpsp) then 746 747 nlrec%nlpsp = .True. 748 nlrec%npsp = psps%npsp 749 nlrec%lmnmax = count(psps%indlmn(3,:,psps%npsp)/=0) 750 ABI_ALLOCATE(nlrec%mat_exp_psp_nl,(3,3,psps%mpsang,psps%npsp)) 751 ABI_ALLOCATE(nlrec%eival,(3,psps%mpsang,psps%npsp)) 752 ABI_ALLOCATE(nlrec%eivec,(3,3,psps%mpsang,psps%npsp)) 753 ABI_ALLOCATE(nlrec%pspinfo,(psps%mpsang,psps%npsp)) 754 ABI_ALLOCATE(nlrec%radii,(psps%mpsang,psps%npsp)) 755 ABI_ALLOCATE(nlrec%indlmn,(6,nlrec%lmnmax,psps%npsp)) 756 nlrec%indlmn(:,:,:) = psps%indlmn(:,:nlrec%lmnmax,:) 757 nlrec%mat_exp_psp_nl(:,:,:,:) = zero 758 nlrec%eivec(:,:,:,:) = zero 759 nlrec%eival(:,:,:) = zero 760 nlrec%radii(:,:) = zero 761 nlrec%pspinfo(:,:) = 0 762 763 !--Get the exponential of the strength times the projectors overlap 764 ! of the non-local part of psp(hgh): 765 ! h_ij=strength; g_ij=ovelap => (exp(-h.g/temp/4p)-Identity).g^(-1) 766 ! And the diagonalisation of the projectors and associated eigenvectors 767 call pspnl_hgh_rec(psps,tempe,nlrec,debug) 768 769 if(debug)then 770 do jj=1,psps%npsp 771 write(msg,'(a)')' Exponential matrices:' 772 call wrtout(std_out,msg,'COLL') 773 do ii=1,psps%mpsang 774 write(msg,'(a,i2,a,3f15.10,a,3f15.10,a,3f15.10)')& 775 & 'angular moment',ii-1,ch10,& 776 & nlrec%mat_exp_psp_nl(1,:,ii,jj),ch10,& 777 & nlrec%mat_exp_psp_nl(2,:,ii,jj),ch10,& 778 & nlrec%mat_exp_psp_nl(3,:,ii,jj) 779 call wrtout(std_out,msg,'COLL') 780 end do 781 end do 782 end if 783 784 !--Now it calculates the matrix of the exp(V_NL) 785 call pspnl_operat_rec(nlrec,metrec,ngfftrec,debug) 786 787 else !--Only local pseudo potentials 788 nlrec%nlpsp = .False. 789 nlrec%npsp = psps%npsp 790 ABI_ALLOCATE(nlrec%mat_exp_psp_nl,(0,0,0,0)) 791 ABI_ALLOCATE(nlrec%pspinfo,(0,0)) 792 ABI_ALLOCATE(nlrec%radii,(0,0)) 793 ABI_ALLOCATE(nlrec%indlmn,(0,0,0)) 794 ABI_ALLOCATE(nlrec%projec,(0,0,0)) 795 endif 796 797 end subroutine Init_nlpspRec
m_rec/InitRec [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
InitRec
FUNCTION
Initialise the rset<recursion_type>=Data type concerning recursion.
INPUTS
dtset <type(dataset_type)>=all input variables in this dataset mpi_ab <type(mpi_type)=MPI-parallelisation information mproj=0 if psp is only local
SIDE EFFECTS
All pointers set to null().
PARENTS
gstate
CHILDREN
wrtout,xmpi_max
SOURCE
449 subroutine InitRec(dtset,mpi_ab,rset,rmet,mproj) 450 451 #ifdef HAVE_GPU_CUDA 452 use m_gpu_detect,only :get_topo,find_set_gpu 453 use m_hidecudarec,only :InitRecGPU_0 454 #include "cuda_common.h" 455 #endif 456 457 !This section has been created automatically by the script Abilint (TD). 458 !Do not modify the following lines by hand. 459 #undef ABI_FUNC 460 #define ABI_FUNC 'InitRec' 461 !End of the abilint section 462 463 implicit none 464 465 !Arguments ------------------------------------ 466 ! scalars 467 integer,intent(in) :: mproj 468 type(dataset_type),intent(in) :: dtset 469 type(MPI_type),intent(in),target :: mpi_ab 470 type(recursion_type),intent(inout) :: rset 471 real(dp),intent(in) :: rmet(3,3) 472 ! arrays 473 !Local --------------------------- 474 integer :: ii 475 real(dp) :: beta,rtrotter 476 #if defined HAVE_GPU_CUDA 477 character(500) :: msg 478 #endif 479 ! ********************************************************************* 480 ! @recursion_type 481 ! @pawfgr_type 482 483 !--Initialisation 484 beta = one/dtset%tsmear !--Inverse of temperature 485 !--Rewriting the trotter parameter 486 rtrotter = max(half,real(dtset%recptrott,dp)) 487 488 rset%debug= (dtset%prtvol==-7) 489 rset%quitrec = 0 490 rset%min_nrec = dtset%recnrec 491 rset%efermi = dtset%recefermi !initial guess for fermie 492 493 rset%nfftrec = 0 494 rset%ngfftrec = 0 495 496 rset%tronc = .False. 497 498 rset%mpi => mpi_ab 499 500 !--Are all pseudo-potentials local? 501 rset%nl%nlpsp = (mproj/=0) 502 !--Some initialisation concerning the metrics 503 ! If non-local psps then it allocates the atoms positions 504 ! on the grid 505 if(rset%nl%nlpsp) then 506 ABI_ALLOCATE(rset%inf%gcart,(3,dtset%natom)) 507 else 508 ABI_ALLOCATE(rset%inf%gcart,(0,0)) 509 end if 510 rset%inf%gcart = 0 511 512 !---------------------------------------------------------- 513 !--TRONCATION OF THE BOX 514 !! determines new dimensions the method is similar to the one used 515 !! in getng (except that ecut and xboxcutmin give no constraint, 516 !! and symmetries are not handled) 517 518 call getngrec(dtset%ngfft,rmet,rset%ngfftrec,rset%nfftrec,dtset%recrcut,0.25d0*sqrt(beta/rtrotter),rset%tronc) 519 ! 1/4*sqrt(beta/trotter) for guess - should be modified 520 521 !------------------------------------------------------------ 522 !--DETERMINING WHICH POINT WILL COMPUTE THAT PROC 523 !---------------------------------------------------------- 524 !--Paralelism using the band communicator (not used in the recursion) 525 !--Distribution on procs with cuda 526 527 528 rset%ngpu = 0 !--Initial guess no GPU at all 529 rset%gpudevice = -1 !--Inital guess no GPU associated 530 rset%load = 0 !--Inital homogeneous work load 531 rset%tp = 0 !--Initial guess 1 cpu, 0 gpu 532 533 534 #ifdef HAVE_GPU_CUDA 535 !--Initialise GPU variables for recursion 536 call InitRecGPU_0(rset%GPU,mpi_ab) 537 538 !--Get the distribution of GPUs on CPUs 539 call find_set_gpu(mpi_ab%nproc,mpi_ab%comm_bandfft,rset%GPU%map,rset%ngpu) 540 541 !--Get the topology of the machine 542 call get_topo(rset%mpi%nproc,rset%ngpu,rset%tp) 543 if(rset%tp>4)then 544 msg = 'm_rec: number of gpu>number of cpu is not implemented' 545 MSG_ERROR(msg) 546 endif 547 ! rset%tp = 0;if(rset%mpi%nproc>1)rset%tp = 1 548 ! rset%ngpu = 0; rset%GPU%map=-1 549 !--For the moment cuda doesnt take into account non-local psp 550 if(rset%nl%nlpsp) then 551 rset%tp = 0;if(rset%mpi%nproc>1)rset%tp = 1 552 rset%GPU%map = -1 553 endif 554 #else 555 if(rset%mpi%nproc>1)rset%tp = 1 556 #endif 557 558 !--Basic initialization for recursion metric (only needed for printing) 559 do ii=1,3 560 rset%inf%rmet(ii,:) = rmet(ii,:)/(real(dtset%ngfft(1:3)*dtset%ngfft(ii),dp)) 561 end do 562 rset%inf%tr(:) = sqrt((/(rset%inf%rmet(ii,ii),ii=1,3)/)) !grid step 563 564 !--Compute the work loqd distribution on devices (gpu,cpu) 565 call cpu_distribution(dtset%recgratio,rset,dtset%ngfft(:3),1.d0,0) 566 567 !------------------------------------------------------------ 568 !--DEFINITION VARIABLE COARSE-FINE GRID TO USE TRANSGRID--INGRID FUNCTIONS 569 call pawfgr_nullify(rset%pawfgr) 570 !if coarse grid is used 571 if (dtset%recgratio>1) then 572 !fine grid-- 573 rset%pawfgr%mgfft = 0 574 rset%pawfgr%nfft = product(dtset%ngfft(1:3)) 575 rset%pawfgr%ngfft(:) = dtset%ngfft(:) 576 rset%pawfgr%ngfft(9:11)=(/0,1,0/) 577 rset%pawfgr%ngfft(12:13)= dtset%ngfft(2:3) 578 !coarse grid-- 579 rset%pawfgr%mgfftc = 0 580 rset%pawfgr%ngfftc(:) = rset%pawfgr%ngfft(:) 581 rset%pawfgr%ngfftc(:3) = floor(real(dtset%ngfft(:3)+1,dp)/real(dtset%recgratio,dp)) 582 rset%pawfgr%nfftc = product(rset%pawfgr%ngfftc(1:3)) 583 584 rset%pawfgr%usefinegrid = 1 585 ABI_ALLOCATE(rset%pawfgr%fintocoa,(rset%pawfgr%nfft)) 586 ABI_ALLOCATE(rset%pawfgr%coatofin,(rset%pawfgr%nfftc)) 587 call indgrid(rset%pawfgr%coatofin,rset%pawfgr%fintocoa,& 588 rset%pawfgr%nfftc,rset%pawfgr%nfft,& 589 rset%pawfgr%ngfftc,rset%pawfgr%ngfft) 590 591 else 592 rset%pawfgr%mgfft = 0 593 rset%pawfgr%ngfft = 0 594 rset%pawfgr%mgfftc = 0 595 596 rset%pawfgr%usefinegrid = 0 597 end if 598 599 600 end subroutine InitRec