TABLE OF CONTENTS
- ABINIT/density_rec
- ABINIT/entropyrec
- ABINIT/fermisolverec
- ABINIT/first_rec
- ABINIT/gran_potrec
- ABINIT/green_kernel
- ABINIT/m_vtorhorec
- ABINIT/nlenergyrec
- ABINIT/recursion
- ABINIT/recursion_nl
- ABINIT/vn_nl_rec
- ABINIT/vtorhorec
ABINIT/density_rec [ Functions ]
NAME
density_rec
FUNCTION
This routine computes the density using the coefficients corresponding to continued fraction at a point from a fixed potential.
INPUTS
coordx, coordy, coordz=coordonnees of the computed point an, bn2 : coefficient given by density_rec. Input if get_rec_coef=0, output else nrec=order of density_rec fermie=fermi energy (Hartree) tsmear=temperature (Hartree) rtrotter=real trotter parameter tol=tolerance criteria for stopping density_rec inf_ucvol=infinitesimal unit cell volume dim_trott = max(0,2*trotter-1)
OUTPUT
rho_out=result of the continued fraction multiplied by a multiplicator
SIDE EFFECTS
NOTES
at this time : - exppot should be replaced by ? - coord should be replaced by ? - need a rectangular box (rmet diagonal matrix)
SOURCE
1543 subroutine density_rec(an,bn2,rho_out,nrec, & 1544 & fermie,tsmear,rtrotter, & 1545 & dim_trott,tol,inf_ucvol) 1546 1547 !Arguments ------------------------------- 1548 !scalars 1549 integer,intent(in) :: nrec 1550 integer,intent(in) :: dim_trott 1551 real(dp),intent(in) :: fermie,tol,tsmear,inf_ucvol,rtrotter 1552 real(dp), intent(out) :: rho_out 1553 !arrays 1554 real(dp),intent(in) :: an(0:nrec),bn2(0:nrec) 1555 !Local variables------------------------------- 1556 !not used, debugging purpose only 1557 !for debugging purpose, detailled printing only once for density and ekin 1558 !scalars 1559 integer, parameter :: minrec = 3 1560 integer :: irec 1561 real(dp) :: beta,mult,prod_b2,error,errold 1562 real(dp) :: pi_on_rtrotter,twortrotter,exp1,exp2 1563 complex(dpc) :: cinv2rtrotter,coeef_mu,facrec0 1564 ! character(len=500) :: msg 1565 !arrays 1566 real(dp) :: tsec(2) 1567 complex(dpc) :: acc_rho(0:nrec) 1568 complex(dpc) :: D(0:dim_trott),Dold(0:dim_trott) 1569 complex(dpc) :: N(0:dim_trott),Nold(0:dim_trott) 1570 !************************************************************************** 1571 1572 call timab(605,1,tsec) 1573 1574 !############################################################## 1575 !--Initialisation of metrics 1576 mult = two/inf_ucvol !non-spined system 1577 beta = one/tsmear 1578 1579 !--Variables for optimisation 1580 pi_on_rtrotter = pi/rtrotter 1581 twortrotter = two*rtrotter 1582 exp1 = exp((beta*fermie)/(rtrotter)) 1583 exp2 = exp(beta*fermie/(twortrotter)) 1584 cinv2rtrotter = cmplx(one/twortrotter,zero,dp) 1585 coeef_mu = cmplx(one/exp2,zero,dp) 1586 1587 N = czero; D = cone 1588 facrec0 = cone 1589 Nold = czero; Dold = czero 1590 !--Initialisation of accumulated density 1591 acc_rho = czero 1592 !--Initialisation of estimated error 1593 prod_b2 = twortrotter/exp1 1594 errold = zero 1595 1596 1597 !############################################################## 1598 !--Main loop 1599 maindo : do irec = 0, nrec 1600 1601 ! ###################################################### 1602 ! --Density computation 1603 ! !--using the property that: sum_i(bi*c)^2|(z-ai*c)=1/c*sum_i(bi)^2|(z/c-ai) 1604 ! !and for c =exp(-beta*fermie/(two*rtrotter) 1605 1606 call trottersum(dim_trott,error,prod_b2,pi_on_rtrotter,& 1607 & facrec0,coeef_mu,exp1,& 1608 & an(irec),bn2(irec),& 1609 & N,D,Nold,Dold) 1610 1611 if(irec/=nrec .and. irec>=minrec)then 1612 if((bn2(irec+1)<tol14).or.(mult*error<tol.and.errold<tol)) exit maindo 1613 end if 1614 errold = mult*error 1615 end do maindo 1616 !--Accumulated density 1617 rho_out = mult*real(cone-sum(N/D)*cinv2rtrotter,dp) 1618 1619 call timab(605,2,tsec) 1620 1621 end subroutine density_rec
ABINIT/entropyrec [ Functions ]
NAME
entropyrec
FUNCTION
This routine computes the local part of the entropy at a point using a path integral, in the recursion method. an, bn2 : coefficient given by the recursion. nrec=order of recursion trotter=trotter parameter multce=a multiplicator for computing entropy ; 2 for non-spin-polarized system debug_rec=debug variable n_pt_integ=number of points of integration for the path integral xmax =max point of integration on the real axis
OUTPUT
ent_out=entropy at the point ent_out1,ent_out2,ent_out3,ent_out4=debug entropy at the point
NOTES
at this time : - multce should be not used - the routine should be integraly rewrited and use the routine recursion. - only modified for p /= 0
SOURCE
947 subroutine entropyrec(an,bn2,nrec,trotter,ent_out,multce,debug_rec, & 948 & n_pt_integ,xmax,& 949 & ent_out1,ent_out2,ent_out3,ent_out4) 950 951 !Arguments ------------------------------- 952 !scalars 953 integer,intent(in) :: n_pt_integ,nrec,trotter 954 logical,intent(in) :: debug_rec 955 real(dp), intent(in) :: multce,xmax 956 real(dp),intent(out) :: ent_out,ent_out1,ent_out2,ent_out3,ent_out4 957 !arrays 958 real(dp),intent(in) :: an(0:nrec),bn2(0:nrec) 959 960 !Local variables------------------------------- 961 !scalars 962 integer, parameter :: level = 7 963 integer, save :: first_en = 1 964 integer :: ii,kk,n_pt_integ_path2,n_pt_integ_path3 965 real(dp) :: arg,epsilo,step,twotrotter,xmin,dr_step 966 complex(dpc) :: D,Dnew,Dold,N,Nnew,Nold,dz_path,ent_acc,ent_acc1,ent_acc2 967 complex(dpc) :: ent_acc3,ent_acc4 968 complex(dpc) :: funczero,z_path,zj 969 complex(dpc) ::delta_calc 970 character(len=500) :: msg 971 !arrays 972 real(dp) :: tsec(2) 973 real(dp) :: iif,factor 974 975 976 ! ************************************************************************* 977 978 979 call timab(610,1,tsec) 980 981 !structured debugging if debug_rec=T : print detailled result the first time we enter entropyrec 982 983 if(debug_rec .and. first_en==1)then 984 write(msg,'(a)')' ' 985 call wrtout(std_out,msg,'PERS') 986 write(msg,'(a)')' entropyrec : enter ' 987 call wrtout(std_out,msg,'PERS') 988 write(msg,'(a,i6)')'n_pt_integ ' , n_pt_integ 989 call wrtout(std_out,msg,'COLL') 990 end if 991 992 ent_out = zero 993 ent_out1 = zero 994 ent_out2 = zero 995 ent_out3 = zero 996 ent_out4 = zero 997 ent_acc = czero 998 ent_acc1 = czero 999 ent_acc2 = czero 1000 ent_acc3 = czero 1001 ent_acc4 = czero 1002 1003 !path parameters 1004 twotrotter = max(two*real(trotter,dp),one) 1005 if(trotter==0)then 1006 factor = tol5 1007 arg =pi*three_quarters 1008 zj = cmplx(-one,one-sin(arg),dp) 1009 else 1010 factor = xmax/ten 1011 arg = pi/twotrotter 1012 zj = cmplx( cos(arg) , sin(arg),dp ) 1013 end if 1014 1015 epsilo = factor*sin( arg ) 1016 xmin = factor*cos( arg ) 1017 step = (xmax-xmin)/real(n_pt_integ,dp) 1018 1019 !#################################################################### 1020 ![xmax + i*epsilo,xmin + i*epsilo] 1021 dr_step = one/real(n_pt_integ,dp) 1022 path1: do ii = 0,n_pt_integ 1023 z_path = cmplx(xmin+real(ii,dp)*(xmax-xmin)*dr_step,epsilo,dp) 1024 dz_path = -cmplx((xmax-xmin)*dr_step,zero,dp) 1025 1026 Nold = czero 1027 Dold = cone 1028 N = cone 1029 D = z_path - cmplx(an(0),zero,dp) 1030 1031 do kk=1,nrec 1032 Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold 1033 Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold 1034 1035 Nold = N 1036 Dold = D 1037 N = Nnew 1038 D = Dnew 1039 1040 if(kk/=nrec)then 1041 if((bn2(kk+1)<tol14))exit 1042 end if 1043 end do 1044 1045 ! <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz 1046 delta_calc = func1_rec(z_path**twotrotter)*(N/D)*dz_path 1047 if(ii==0.or.ii==n_pt_integ)then 1048 ent_acc = ent_acc + half*delta_calc 1049 ent_acc1 = ent_acc1 + half*delta_calc 1050 else 1051 ent_acc = ent_acc + delta_calc 1052 ent_acc1 = ent_acc1 + delta_calc 1053 end if 1054 end do path1 1055 1056 1057 !#################################################################### 1058 ![1/2zj,0] 1059 if(epsilo/step>100.d0)then 1060 n_pt_integ_path2 = int((factor*abs(zj))/step)+1 1061 else 1062 n_pt_integ_path2 = 100 1063 end if 1064 1065 if(trotter/=0)then 1066 n_pt_integ_path3 = 0 1067 dr_step = one/real(n_pt_integ_path2,dp) 1068 dz_path = -cmplx(xmin,epsilo,dp)*dr_step 1069 path5: do ii = 0,n_pt_integ_path2 1070 z_path = cmplx(real(ii,dp)*xmin,real(ii,dp)*epsilo,dp)*dr_step 1071 if(abs(z_path)>tol14)then 1072 Nold = czero 1073 Dold = cone 1074 N = cone 1075 D = z_path - cmplx(an(0),zero,dp) 1076 do kk=1,nrec 1077 Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold 1078 Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold 1079 Nold = N 1080 Dold = D 1081 N = Nnew 1082 D = Dnew 1083 if(kk/=nrec)then 1084 if((bn2(kk+1)<tol14))exit 1085 end if 1086 end do 1087 1088 ! <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz 1089 if(abs(z_path)**twotrotter>tiny(one)) then 1090 funczero = func1_rec(z_path**twotrotter) 1091 else 1092 funczero = czero 1093 end if 1094 delta_calc = funczero*N/D*dz_path 1095 if(ii==0.or.ii==n_pt_integ_path2)then 1096 ent_acc = ent_acc + half*delta_calc 1097 if(debug_rec) ent_acc3 = ent_acc3 + half*delta_calc 1098 else 1099 ent_acc = ent_acc + funczero*delta_calc 1100 if(debug_rec) ent_acc3 = ent_acc3 + funczero*delta_calc 1101 end if 1102 end if 1103 end do path5 1104 1105 else ! trotter==0 1106 1107 n_pt_integ_path3 = max(100,int((epsilo*half*pi)/real(step,dp))+1) 1108 dr_step = one/real(n_pt_integ_path3,dp) 1109 path6: do ii = 0,n_pt_integ_path3 1110 iif=half*pi*real(ii,dp)*dr_step 1111 z_path = epsilo*cmplx(-cos(iif),1-sin(iif),dp) 1112 dz_path = epsilo*cmplx(sin(iif),-cos(iif),dp)*half*pi*dr_step 1113 if(abs(z_path)**twotrotter>tol14)then 1114 Nold = czero 1115 Dold = cone 1116 N = cone 1117 D = z_path - cmplx(an(0),zero,dp) 1118 do kk=1,nrec 1119 Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold 1120 Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold 1121 Nold = N 1122 Dold = D 1123 N = Nnew 1124 D = Dnew 1125 if(kk/=nrec .and. bn2(kk+1)<tol14) exit !-EXIT 1126 end do 1127 1128 ! <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz 1129 delta_calc = func1_rec(z_path**twotrotter) * N/D * dz_path 1130 if(ii==0.or.ii==n_pt_integ_path3)then 1131 ent_acc = ent_acc + half*delta_calc 1132 if(debug_rec) ent_acc3 = ent_acc3 + half*delta_calc 1133 else 1134 ent_acc = ent_acc + delta_calc !<r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz 1135 if(debug_rec) ent_acc3 = ent_acc3 + delta_calc !<r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz 1136 end if 1137 end if 1138 end do path6 1139 1140 end if 1141 1142 if(first_en==1 .and. debug_rec) then 1143 write(msg,'(a,i5,2a,i5,2a,i5,2a,es11.4,2a,es11.4,2a,es11.4)')& 1144 & 'n_pt_path =',n_pt_integ,ch10,& 1145 & 'n_pt_path2 =',n_pt_integ_path2,ch10,& 1146 & 'n_pt_path3 =',n_pt_integ_path3,ch10,& 1147 & 'xmin =',xmin,ch10,& 1148 & 'xmax =',xmax,ch10,& 1149 & 'epsilon =',epsilo 1150 call wrtout(std_out,msg,'COLL') 1151 first_en = 0 1152 end if 1153 1154 !#################################################################### 1155 ![xmax,xmax+i*epsilo] 1156 dr_step = one/real(n_pt_integ_path2,dp) 1157 dz_path = cmplx(zero,epsilo*dr_step,dp) 1158 path4: do ii = 0,n_pt_integ_path2 1159 z_path = cmplx(xmax,real(ii,dp)*epsilo*dr_step,dp) 1160 1161 Nold = czero 1162 Dold = cone 1163 N = cone 1164 D = z_path - cmplx(an(0),zero,dp) 1165 1166 do kk=1,nrec 1167 Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold 1168 Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold 1169 1170 Nold = N 1171 Dold = D 1172 N = Nnew 1173 D = Dnew 1174 1175 if(kk/=nrec)then 1176 if((bn2(kk+1)<tol14))exit 1177 end if 1178 end do 1179 1180 ! <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz 1181 delta_calc = func1_rec(z_path**twotrotter)*N/D*dz_path 1182 if(ii==0.or.ii==n_pt_integ_path2)then 1183 1184 ent_acc = ent_acc + half*delta_calc 1185 if(debug_rec) ent_acc2 = ent_acc2 + half*delta_calc 1186 else 1187 ent_acc = ent_acc + delta_calc 1188 if(debug_rec) ent_acc2 = ent_acc2 + delta_calc 1189 end if 1190 end do path4 1191 1192 1193 ent_out = multce*real(ent_acc*cmplx(zero,-piinv,dp),dp) 1194 if(debug_rec) then 1195 ent_out1 = multce*real(ent_acc1*cmplx(zero,-piinv,dp),dp) 1196 ent_out2 = multce*real(ent_acc2*cmplx(zero,-piinv,dp),dp) 1197 ent_out3 = multce*real(ent_acc3*cmplx(zero,-piinv,dp),dp) 1198 ent_out4 = multce*real(ent_acc4*cmplx(zero,-piinv,dp),dp) 1199 end if 1200 1201 call timab(610,2,tsec) 1202 1203 contains 1204 1205 !function to integrate over the path 1206 !func1_rec(z_path,twotrotter) = ( z_path**twotrotter/(1+z_path**twotrotter)*log(1+1/z_path**twotrotter)+& !- f*ln(f) 1207 !&1/(1+z_path**twotrotter)*log(1+z_path**twotrotter)) !- (1-f)*ln(1-f) 1208 1209 !func1_rec(z_path_pow) = z_path_pow/(cone+z_path_pow)*log(cone+cone/z_path_pow)+& !- f*ln(f) 1210 !&cone/(cone+z_path_pow)*log(cone+z_path_pow) !- (1-f)*ln(1-f) 1211 1212 !other expression of func for a path like ro(t)*exp(2*i*pi/(2*p)*(j+1/2)) 1213 1214 function func1_rec(z) 1215 1216 complex(dpc) :: func1_rec 1217 complex(dpc),intent(in) :: z 1218 1219 func1_rec = z/(cone+z)*log(cone+cone/z)+ cone/(cone+z)*log(cone+z) 1220 1221 end function func1_rec 1222 1223 end subroutine entropyrec
ABINIT/fermisolverec [ Functions ]
NAME
fermisolverec
FUNCTION
This routine computes the fermi energy in order to have a given number of valence electrons in the recursion method, using a Ridder s Method
INPUTS
debug_rec=debugging variable nb_rec=order of recursion nb_point=number of discretization point in one dimension (=n1=n2=n3) temperature=temperature (Hartree) trotter=trotter parameter nelect=number of valence electrons (dtset%nelect) acc=accuracy for the fermi energy max_it=maximum number of iteration for the Ridder's Method long_tranche=number of point computed by thi proc mpi_enreg=information about MPI parallelization inf_ucvol=infinitesimal unit cell volume gputopo=true if topology gpu-cpu= 2 or 3
OUTPUT
SIDE EFFECTS
fermie=fermi energy rho=density, recomputed for the new fermi energy a, b2 : coefficient given by recursion recomputed for the new fermi energy
NOTES
at this time :
SOURCE
1260 subroutine fermisolverec(fermie,rho,a,b2,debug_rec,nb_rec, & 1261 & temperature,trotter,nelect, & 1262 & acc, max_it, & 1263 & long_tranche,mpi_enreg,& 1264 & inf_ucvol,gputopo) 1265 1266 !Arguments ------------------------------- 1267 !scalars 1268 integer,intent(in) :: long_tranche,max_it,nb_rec,trotter 1269 logical,intent(in) :: debug_rec,gputopo 1270 real(dp),intent(in) :: acc,inf_ucvol,nelect,temperature 1271 real(dp), intent(inout) :: fermie 1272 type(MPI_type),intent(in) :: mpi_enreg 1273 !arrays 1274 real(dp), intent(inout) :: a(0:nb_rec,long_tranche), b2(0:nb_rec,long_tranche) 1275 real(dp), intent(inout) :: rho(long_tranche) 1276 1277 !Local variables------------------------------- 1278 !scalars 1279 integer :: ierr,ii,ipointlocal,nn,dim_trott 1280 real(dp) :: beta,fermieh,fermiel,fermiem,fermienew,nelecth,nelectl,nelectm 1281 real(dp) :: nelectnew,res_nelecth,res_nelectl,res_nelectm,res_nelectnew 1282 real(dp) :: rtrotter,ss,fermitol 1283 character(len=500) :: msg 1284 !arrays 1285 real(dp) :: tsec(2) 1286 real(dp) :: rhotry(long_tranche) 1287 !no_abirules 1288 #ifdef HAVE_GPU_CUDA 1289 integer :: swt_tm,npitch 1290 real(cudap) :: rhocu(long_tranche) 1291 real(dp) :: tsec2(2) 1292 #endif 1293 1294 ! ************************************************************************* 1295 1296 #ifdef HAVE_GPU_CUDA 1297 swt_tm = 0 1298 #endif 1299 1300 call timab(609,1,tsec) 1301 1302 beta = one/temperature 1303 rtrotter = max(half,real(trotter,dp)) 1304 dim_trott = max(0,2*trotter-1) 1305 1306 write(msg,'(a)')' -- fermisolverec ---------------------------------' 1307 call wrtout(std_out,msg,'COLL') 1308 if(debug_rec) then 1309 write (msg,'(a,d10.3)')' nelect= ',nelect 1310 call wrtout(std_out,msg,'COLL') 1311 end if 1312 !initialisation of fermiel 1313 fermiel = fermie 1314 call timab(609,2,tsec) 1315 1316 !initialisation fermitol 1317 fermitol = acc 1318 #ifdef HAVE_GPU_CUDA_SP 1319 if(gputopo) fermitol = 1.d-3 1320 #endif 1321 1322 if(gputopo) then 1323 #ifdef HAVE_GPU_CUDA 1324 swt_tm = 1 1325 ! allocate array an and bn2 on gpu for computation of trotter formula 1326 call alloc_dens_cuda(long_tranche,nb_rec,dim_trott,npitch,& 1327 & real(a,cudap),real(b2,cudap)) 1328 1329 call timab(617,1,tsec) 1330 call density_cuda(npitch,long_tranche,nb_rec,dim_trott,& 1331 & real(fermiel,cudap),real(temperature,cudap),& 1332 & real(rtrotter,cudap),real(inf_ucvol,cudap),& 1333 & real(tol14,cudap),& 1334 & rhocu) 1335 rhotry = real(rhocu,dp) 1336 call timab(617,2,tsec) 1337 #endif 1338 else 1339 do ipointlocal = 1,long_tranche 1340 call density_rec(a(:,ipointlocal),& 1341 & b2(:,ipointlocal),& 1342 & rhotry(ipointlocal),& 1343 & nb_rec,fermiel,temperature,rtrotter,dim_trott, & 1344 & tol14,inf_ucvol) 1345 end do 1346 end if 1347 1348 call timab(609,1,tsec) 1349 nelectl = sum(rhotry) 1350 call xmpi_sum( nelectl,mpi_enreg%comm_bandfft,ierr) 1351 res_nelectl = inf_ucvol*nelectl - nelect 1352 1353 if (res_nelectl /= zero) then 1354 ! initialisation of fermih 1355 ! excess of electrons -> smaller fermi 1356 res_nelecth = zero 1357 ii = 1 1358 fermieh = fermie - ten*sign(one,res_nelectl)*temperature 1359 do while(ii<6 .and. res_nelecth*res_nelectl>=0) 1360 fermieh = fermieh - ten*sign(one,res_nelectl)*temperature 1361 call timab(609,2,tsec) 1362 1363 if(gputopo) then 1364 #ifdef HAVE_GPU_CUDA 1365 call timab(617,1,tsec) 1366 call density_cuda(npitch,long_tranche,nb_rec,dim_trott,& 1367 & real(fermieh,cudap),real(temperature,cudap),& 1368 & real(rtrotter,cudap),real(inf_ucvol,cudap),& 1369 & real(tol14,cudap),& 1370 & rhocu) 1371 rhotry = real(rhocu,dp) 1372 call timab(617,2,tsec) 1373 #endif 1374 else 1375 do ipointlocal = 1,long_tranche 1376 call density_rec(a(:,ipointlocal), & 1377 & b2(:,ipointlocal), & 1378 & rhotry(ipointlocal), & 1379 & nb_rec,fermieh,temperature,rtrotter,dim_trott, & 1380 & tol14,inf_ucvol) 1381 end do 1382 end if 1383 call timab(609,1,tsec) 1384 nelecth = sum(rhotry) 1385 call xmpi_sum( nelecth,mpi_enreg%comm_bandfft ,ierr); 1386 res_nelecth = inf_ucvol*nelecth - nelect 1387 1388 if(debug_rec) then 1389 write (msg,'(a,es11.4e2,a,es11.4e2)') ' Fermi energy interval',fermieh,' ',fermiel 1390 call wrtout(std_out,msg,'COLL') 1391 end if 1392 ii = ii +1 1393 end do 1394 1395 if (res_nelecth*res_nelectl>0) then 1396 write (msg,'(4a)')' fermisolverec : ERROR- ',ch10,& 1397 & ' initial guess for fermi energy doesnt permit to find solutions in solver',ch10 1398 ABI_ERROR(msg) 1399 end if 1400 1401 ! MAIN LOOP ------------------------------------------------------ 1402 main : do nn=1,max_it 1403 ! fermiem computation 1404 fermiem = 0.5d0*(fermiel+fermieh) 1405 1406 ! nelectm = zero 1407 call timab(609,2,tsec) 1408 1409 if(gputopo) then 1410 #ifdef HAVE_GPU_CUDA 1411 call timab(617,1,tsec) 1412 call density_cuda(npitch,long_tranche,nb_rec,dim_trott,& 1413 & real(fermiem,cudap),real(temperature,cudap),& 1414 & real(rtrotter,cudap),real(inf_ucvol,cudap),& 1415 & real(tol14,cudap),& 1416 & rhocu) 1417 rhotry = real(rhocu,dp) 1418 call timab(617,2,tsec) 1419 #endif 1420 else 1421 do ipointlocal = 1,long_tranche 1422 call density_rec(a(:,ipointlocal), & 1423 & b2(:,ipointlocal), & 1424 & rhotry(ipointlocal), & 1425 & nb_rec,fermiem,temperature,rtrotter,dim_trott, & 1426 & tol14,inf_ucvol) 1427 end do 1428 end if 1429 1430 call timab(609,1,tsec) 1431 nelectm = sum(rhotry) 1432 call xmpi_sum( nelectm,mpi_enreg%comm_bandfft,ierr) 1433 res_nelectm = inf_ucvol*nelectm - nelect 1434 1435 ! new guess 1436 ss = sqrt(res_nelectm**two-res_nelectl*res_nelecth) 1437 fermienew = fermiem + (fermiem-fermiel)*sign(one, res_nelectl-res_nelecth)*res_nelectm/ss 1438 1439 call timab(609,2,tsec) 1440 if(gputopo) then 1441 #ifdef HAVE_GPU_CUDA 1442 call timab(617,1,tsec) 1443 call density_cuda(npitch,long_tranche,nb_rec,dim_trott,& 1444 & real(fermienew,cudap),real(temperature,cudap),& 1445 & real(rtrotter,cudap),real(inf_ucvol,cudap),& 1446 & real(tol14,cudap),& 1447 & rhocu) 1448 rhotry = real(rhocu,dp) 1449 call timab(617,2,tsec) 1450 #endif 1451 else 1452 do ipointlocal = 1,long_tranche 1453 call density_rec(a(:,ipointlocal), & 1454 & b2(:,ipointlocal), & 1455 & rhotry(ipointlocal), & 1456 & nb_rec,fermienew,temperature,rtrotter,dim_trott, & 1457 & tol14,inf_ucvol) 1458 end do 1459 end if 1460 1461 call timab(609,1,tsec) 1462 nelectnew = sum(rhotry) 1463 call xmpi_sum( nelectnew,mpi_enreg%comm_bandfft ,ierr); 1464 res_nelectnew = inf_ucvol*nelectnew - nelect 1465 1466 ! fermiel et fermieh for new iteration 1467 if (sign(res_nelectm,res_nelectnew) /= res_nelectm) then 1468 fermiel = fermiem 1469 res_nelectl = res_nelectm 1470 fermieh = fermienew 1471 res_nelecth = res_nelectnew 1472 else if (sign(res_nelectl,res_nelectnew) /= res_nelectl) then 1473 fermieh = fermienew 1474 res_nelecth = res_nelectnew 1475 else if (sign(res_nelecth,res_nelectnew) /= res_nelecth) then 1476 fermiel = fermienew 1477 res_nelectl = res_nelectnew 1478 end if 1479 1480 ! are we within the tolerance ? 1481 if ((abs(res_nelectnew) < fermitol).or.(nn == max_it)) then 1482 fermie = fermienew 1483 rho = rhotry 1484 if(debug_rec) then 1485 write (msg,'(a,es11.4e2,a,i4)')' err, num_iter ', res_nelectnew, ' ',nn 1486 call wrtout(std_out,msg,'COLL') 1487 write(msg,'(a,50a)')' ',('-',ii=1,50) 1488 call wrtout(std_out,msg,'COLL') 1489 end if 1490 exit main 1491 end if 1492 1493 end do main 1494 1495 end if 1496 1497 #ifdef HAVE_GPU_CUDA 1498 !deallocate array on GPU 1499 if(gputopo) then 1500 call dealloc_dens_cuda() 1501 end if 1502 call timab(613+swt_tm,1,tsec2) !!--start time-counter: sync gpu-cpu 1503 call xmpi_barrier(mpi_enreg%comm_bandfft) 1504 call timab(613+swt_tm,2,tsec2) !!--stop time-counter: sync gpu-cpu 1505 #endif 1506 1507 call timab(609,2,tsec) 1508 end subroutine fermisolverec
ABINIT/first_rec [ Functions ]
NAME
first_rec
FUNCTION
When recursion method is used, in the first step this routine compute some quantities which are used in the rest of the calculation.
COPYRIGHT
Copyright (C) 2009-2024 ABINIT group (MMancini) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
INPUTS
dtset <type(dataset_type)>=all input variables for this dataset: | recgratio =fine/coarse grid ratio | recptrott =trotter parameter | tsmear =temperature | recrcut =tut radius in recursion (range of iteration) | ngfft(18) =FFT grid used as real (fine) grid in recursion psps <type(pseudopotential_type)>=variables related to pseudo-potentials
OUTPUT
SIDE EFFECTS
rset <type(recursion_type)>=variables related to recursion method | debug<logical> = T if debugging is used | inf <type(metricrec_type)>=information concerning the infinitesimal metrics | ngfftrec(18) =truncated (or not, if not ngfftrec=ngfft)FFT grid used as real grid in recursion. | nfftrec =product(ngfftrec(1:3)) | tronc<logical> = T if truncation is effectively used | ZT_p = fourier transform of the green_kernel calculated on the fine grid
NOTES
SOURCE
2142 subroutine first_rec(dtset,psps,rset) 2143 2144 !Arguments ------------------------------------ 2145 ! scalars 2146 type(dataset_type),intent(in) :: dtset 2147 type(pseudopotential_type),intent(in) :: psps 2148 type(recursion_type),intent(inout) :: rset 2149 !Local variables------------------------------- 2150 !scalars 2151 integer :: nfftrec,trotter,ii,dim_trott 2152 real(dp) :: tsmear,beta,rtrotter 2153 character(len=500) :: msg 2154 !arrays 2155 integer :: ngfftrec(18) 2156 real(dp) :: tsec(2) 2157 #ifdef HAVE_GPU_CUDA 2158 integer :: max_rec,ierr,testpts,swt_tm 2159 real(dp) :: rho,tm_ratio 2160 real(dp) :: time_cu,time_f 2161 type(recursion_type) :: rset_test 2162 type(recparall_type) :: parold 2163 integer :: trasl(3) 2164 real(dp) :: tsec2(2),tsec3(2) 2165 real(dp) :: aloc(0,1),b2loc(0,1) 2166 real(dp) :: dm_projec(0,0,0,1,1) 2167 real(dp) :: exppot(0:dtset%nfft-1) 2168 real(dp),allocatable :: exppotloc(:) 2169 real(cudap),allocatable :: aloc_cu(:),b2loc_cu(:) 2170 #endif 2171 2172 ! ************************************************************************* 2173 2174 call timab(601,1,tsec) !!--Start time-counter: initialisation 2175 2176 ABI_WARNING("RECURSION") 2177 if(dtset%recgratio>1) then 2178 write(msg,'(a)')'COARSE GRID IS USED' 2179 call wrtout(std_out,msg,'COLL') 2180 end if 2181 2182 !--Initialisation 2183 trotter = dtset%recptrott !--Trotter parameter 2184 tsmear = dtset%tsmear !--Temperature 2185 beta = one/tsmear !--Inverse of temperature 2186 2187 !--Rewriting the trotter parameter 2188 dim_trott = max(0,2*trotter-1) 2189 rtrotter = max(half,real(trotter,dp)) 2190 2191 write (msg,'(2a)')ch10,'==== FIRST CYCLE RECURSION =========================' 2192 call wrtout(std_out,msg,'COLL') 2193 2194 2195 ngfftrec = rset%ngfftrec 2196 nfftrec = rset%nfftrec 2197 !------------------------------------------------ 2198 !--TRONCATION OF THE BOX: determines new dimensions 2199 !--Now in InitRec 2200 !-------------------------------------------------------- 2201 !--DEFINITION PAW VARIABLES COARSE-FINE GRID TO USE TRANSGRID--INGRID FUNCTIONS 2202 !--Now these variables are defined into gstate by InitRec 2203 2204 !-------------------------------------------------------- 2205 !--COMPUTATION OF THE FOURIER TRANSFORM OF THE GREEN KERNEL (only once) 2206 write (msg,'(a)')' - green kernel calculation -----------------------' 2207 call wrtout(std_out,msg,'COLL') 2208 ABI_MALLOC(rset%ZT_p,(1:2,0: nfftrec-1)) 2209 call timab(601,2,tsec) 2210 call green_kernel(rset%ZT_p,rset%inf%rmet,rset%inf%ucvol,rtrotter/beta,rset%mpi,ngfftrec,nfftrec) 2211 call timab(601,1,tsec) 2212 write(msg,'(a,50a)')' ',('-',ii=1,50) 2213 call wrtout(std_out,msg,'COLL') 2214 !!--end computation of the fourier transform of the Green kernel 2215 2216 !!----------------------------------- 2217 !!--ROUTINE FOR THE CALCULATION OF THE NON-LOCAL PSEUDO 2218 !--Now these variables here by Init_nlpspRec 2219 call Init_nlpspRec(four*tsmear*rtrotter,psps,rset%nl,rset%inf,rset%ngfftrec,rset%debug) 2220 2221 !!----------------------------------- 2222 !--Load distribution on procs when GPU are present 2223 #if defined HAVE_GPU_CUDA 2224 2225 !--Test timing only if exists GPU and they are not equal to the cpus 2226 if(rset%tp == 4) then 2227 parold = rset%par 2228 ii = 0 2229 time_f = zero 2230 time_cu = zero 2231 call random_number(exppot) ! exppot = one 2232 2233 if(rset%gpudevice == -1) then 2234 ! --Test CPUS 2235 swt_tm = 0 2236 testpts = min(rset%par%npt, 20) 2237 call timein(tsec2(1),tsec2(2)) 2238 if(rset%tronc) then 2239 ABI_MALLOC(exppotloc,(0:nfftrec-1)) 2240 do while(ii< testpts) 2241 trasl = -(/1,2,3/)+ngfftrec(:3)/2 2242 call reshape_pot(trasl,dtset%nfft,nfftrec,dtset%ngfft(:3),ngfftrec(:3),& 2243 & exppot,exppotloc) 2244 call recursion(exppotloc,0,0,0, & 2245 & aloc, & 2246 & b2loc, & 2247 & rho,& 2248 & 0, rset%efermi,tsmear,rtrotter,dim_trott, & 2249 & rset%ZT_p, & 2250 & dtset%rectolden,dtset%typat, & 2251 & rset%nl,& 2252 & rset%mpi,nfftrec,ngfftrec,rset%inf,& 2253 & 6,dtset%natom,dm_projec,0) 2254 ii=ii+1 2255 end do 2256 ABI_FREE(exppotloc) 2257 else 2258 do while(ii< testpts) 2259 call recursion(exppot,0,0,0, & 2260 & aloc, & 2261 & b2loc, & 2262 & rho,& 2263 & 0, rset%efermi,tsmear,rtrotter,dim_trott, & 2264 & rset%ZT_p, & 2265 & dtset%rectolden,dtset%typat, & 2266 & rset%nl,& 2267 & rset%mpi,nfftrec,ngfftrec,rset%inf,& 2268 & 6,dtset%natom,dm_projec,0) 2269 ii=ii+1 2270 end do 2271 end if 2272 call timein(tsec3(1),tsec3(2)) 2273 time_f = (tsec3(1)-tsec2(1))/real(testpts,dp) 2274 time_f = time_f*time_f 2275 else 2276 ! --Test GPUS 2277 swt_tm = 1 2278 rset_test = rset 2279 rset_test%GPU%par%npt = max(rset%GPU%nptrec,100) 2280 rset_test%min_nrec = 0 2281 call get_pt0_pt1(dtset%ngfft(:3),dtset%recgratio,0,& 2282 & rset_test%GPU%par%npt,rset_test%GPU%par) 2283 2284 2285 ABI_MALLOC(aloc_cu,(rset_test%GPU%par%npt)) 2286 ABI_MALLOC(b2loc_cu,(rset_test%GPU%par%npt)) 2287 call timein(tsec2(1),tsec2(2)) 2288 call cudarec(rset_test, exppot,aloc_cu,b2loc_cu,& 2289 & beta,trotter,dtset%rectolden,dtset%recgratio,dtset%ngfft,max_rec) 2290 call timein(tsec3(1),tsec3(2)) 2291 ABI_FREE(aloc_cu) 2292 ABI_FREE(b2loc_cu) 2293 2294 time_cu = (tsec3(1)-tsec2(1))/real(rset_test%GPU%par%npt,dp) 2295 time_cu = time_cu*time_cu 2296 end if 2297 2298 2299 ! --Get Total Times 2300 call xmpi_sum(time_f,rset%mpi%comm_bandfft,ierr) 2301 call xmpi_sum(time_cu,rset%mpi%comm_bandfft,ierr) 2302 2303 ! --Average Total Times 2304 time_f = sqrt(time_f/real(rset%mpi%nproc-rset%ngpu,dp)) 2305 time_cu = sqrt(time_cu/real(rset%ngpu,dp)) 2306 tm_ratio = time_f/time_cu 2307 2308 2309 write(msg,'(3(a25,f10.5,a))')& 2310 & ' Time for cpu recursion ',time_f,ch10,& 2311 & ' Time for gpu recursion ',time_cu,ch10,& 2312 & ' Time ratio ',tm_ratio,ch10 2313 call wrtout(std_out,msg,'COLL') 2314 2315 2316 ! tm_ratio =1.20d2! 0.d0! 1.21d0 2317 rset%par = parold 2318 ! --Compute the work-load distribution on devices (gpu,cpu) 2319 if(tm_ratio>1.5d0 .and. time_cu>zero)then 2320 rset%load = 1 2321 call cpu_distribution(dtset%recgratio,rset,dtset%ngfft(:3),tm_ratio,1) 2322 else 2323 rset%gpudevice = -1 2324 end if 2325 end if 2326 2327 #endif 2328 2329 2330 !------------------------------------------------------------ 2331 !--DETERMINING WHICH POINT WILL COMPUTE THAT PROC 2332 !--Now these variables are defined into gstate by Init_rec 2333 2334 write (msg,'(2a)')ch10,'==== END FIRST CYCLE RECURSION =====================' 2335 call wrtout(std_out,msg,'COLL') 2336 call timab(601,2,tsec) !!--stop time-counter: initialisation 2337 2338 end subroutine first_rec
ABINIT/gran_potrec [ Functions ]
NAME
gran_potrec
FUNCTION
This routine computes the local part of the grand-potential at a point using a path integral, in the recursion method.
INPUTS
an, bn2 : coefficient given by the recursion. nrec=order of recursion trotter=trotter parameter mult=a multiplicator for computing grand-potential (2 for non-spin-polarized system) debug_rec=debugging variable n_pt_integ=points for computation of path integral xmax= maximum point on the x-axis for integration
OUTPUT
ene_out=grand-potential at the point if debug_rec=T then ene_out1,ene_out2,ene_out3,ene_out4 are the different path branch contriubutions to the grand-potential. In reality it is not the gren potential but the grand-potential (omega=-PV) divided by -T
NOTES
in reality it is not the gren potential but the grand-potential (omega=-PV) divided by -T at this time : - mult should be not used - the routine should be integraly rewrited and use the routine recursion. - only modified for p /= 0
SOURCE
1657 subroutine gran_potrec(an,bn2,nrec,trotter,ene_out, mult, & 1658 & debug_rec,n_pt_integ,xmax,& 1659 & ene_out1,ene_out2,ene_out3,ene_out4) 1660 1661 !Arguments ------------------------------- 1662 !scalars 1663 integer,intent(in) :: n_pt_integ,nrec,trotter 1664 logical,intent(in) :: debug_rec 1665 real(dp), intent(in) :: mult,xmax 1666 real(dp),intent(inout) :: ene_out,ene_out1,ene_out2,ene_out3,ene_out4 !vz_i 1667 !arrays 1668 real(dp), intent(in) :: an(0:nrec),bn2(0:nrec) 1669 1670 !Local variables------------------------------- 1671 !scalars 1672 integer, parameter :: level = 7 1673 integer, save :: first = 1 1674 integer :: ii,kk,n_pt_integ_path2 1675 real(dp) :: epsilon,step,twotrotter,xmin,dr_step 1676 complex(dpc) :: D,Dnew,Dold,N,Nnew,Nold,dz_path,ene_acc,ene_acc1,ene_acc2 1677 complex(dpc) :: ene_acc3,ene_acc4 1678 complex(dpc) :: z_path,delta_calc 1679 character(len=500) :: message 1680 !arrays 1681 real(dp) :: tsec(2) 1682 1683 ! ************************************************************************* 1684 1685 1686 call timab(611,1,tsec) 1687 1688 !structured debugging if debug_rec=T : print detailled result the first time we enter gran_potrec 1689 if(debug_rec .and. first==1)then 1690 write(message,'(a)')' ' 1691 call wrtout(std_out,message,'PERS') 1692 write(message,'(a)')' gran_potrec : enter ' 1693 call wrtout(std_out,message,'PERS') 1694 write(message,'(a,i8)')'n_pt_integ ' , n_pt_integ 1695 call wrtout(std_out,message,'COLL') 1696 first=0 1697 end if 1698 1699 ene_out = zero 1700 ene_acc = czero 1701 ene_acc1 = czero 1702 ene_acc2 = czero 1703 ene_acc3 = czero 1704 ene_acc4 = czero 1705 1706 1707 !path parameters 1708 !n_pt_integ = 2500 1709 xmin = -half 1710 step = (xmax-xmin)/real(n_pt_integ,dp) 1711 if(trotter==0)then 1712 twotrotter = one 1713 epsilon = .5d-1 1714 else 1715 twotrotter = two*real(trotter,dp) 1716 epsilon = half*sin( pi/twotrotter) 1717 end if 1718 1719 !xmin = -abs(xmin)**(1.d0/twotrotter) 1720 1721 !#################################################################### 1722 ![xmax + i*epsilon,xmin + i*epsilon] 1723 dr_step = one/real(n_pt_integ,dp) 1724 dz_path = -cmplx((xmax-xmin)*dr_step,zero,dp) 1725 path1: do ii = 0,n_pt_integ 1726 ! z_path = cmplx(xmin + real(ii,dp)*(xmax-xmin)*dr_step,epsilon,dp) 1727 z_path = cmplx(xmin,epsilon,dp) - real(ii,dp)*dz_path 1728 Nold = czero 1729 Dold = cone 1730 N = cone 1731 D = z_path - cmplx(an(0),zero,dp) 1732 1733 do kk=1,nrec 1734 Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold 1735 Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold 1736 1737 Nold = N 1738 Dold = D 1739 N = Nnew 1740 D = Dnew 1741 1742 if(kk/=nrec)then 1743 if((bn2(kk+1)<tol14))exit 1744 end if 1745 1746 end do 1747 1748 ! <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz 1749 delta_calc = func_rec(z_path,twotrotter)* N/D *dz_path 1750 if(ii==0.or.ii==n_pt_integ)then 1751 ene_acc = ene_acc + half*delta_calc 1752 if(debug_rec) ene_acc1 = ene_acc1 + half*delta_calc 1753 else 1754 ene_acc = ene_acc + delta_calc 1755 if(debug_rec) ene_acc1 = ene_acc1 + delta_calc 1756 end if 1757 end do path1 1758 1759 !#################################################################### 1760 ![xmin + i*epsilon,xmin] 1761 if(epsilon/step>4.d0)then 1762 n_pt_integ_path2 = int(epsilon/step)+1 1763 else 1764 n_pt_integ_path2 = 5 1765 end if 1766 n_pt_integ_path2 = n_pt_integ 1767 dr_step = one/real(n_pt_integ_path2,dp) 1768 dz_path = -cmplx(zero,epsilon*dr_step,dp) 1769 path2: do ii = 0,n_pt_integ_path2 1770 ! z_path = cmplx(xmin,real(ii,dp)*epsilon*dr_step,dp) 1771 z_path = cmplx(xmin,zero,dp)-dz_path*real(ii,dp) 1772 Nold = czero 1773 Dold = cone 1774 N = cone 1775 D = z_path - cmplx(an(0),zero,dp) 1776 1777 do kk=1,nrec 1778 Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold 1779 Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold 1780 1781 Nold = N 1782 Dold = D 1783 N = Nnew 1784 D = Dnew 1785 1786 if(kk/=nrec)then 1787 if((bn2(kk+1)<tol14))exit 1788 end if 1789 1790 end do 1791 1792 ! <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz 1793 delta_calc = func_rec(z_path,twotrotter)* N/D *dz_path 1794 if(ii==0.or.ii==n_pt_integ_path2)then 1795 ene_acc = ene_acc + half*delta_calc 1796 if(debug_rec) ene_acc3 = ene_acc3 + half*delta_calc 1797 else 1798 ene_acc = ene_acc + delta_calc 1799 if(debug_rec) ene_acc3 = ene_acc3 + delta_calc 1800 end if 1801 end do path2 1802 1803 1804 1805 !#################################################################### 1806 ![xmin,0] 1807 if(xmin/=czero)then 1808 dr_step = one/real(n_pt_integ,dp) 1809 dz_path = cmplx(xmin*dr_step,zero,dp) 1810 path3: do ii = 1,n_pt_integ !the integrand is 0 at 0 1811 ! z_path = cmplx(real(ii,dp)*xmin*dr_step,zero,dp) 1812 z_path = real(ii,dp)*dz_path 1813 1814 Nold = czero 1815 Dold = cone 1816 N = cone 1817 D = z_path - cmplx(an(0),zero,dp) 1818 1819 do kk=1,nrec 1820 Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold 1821 Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold 1822 1823 Nold = N 1824 Dold = D 1825 N = Nnew 1826 D = Dnew 1827 1828 if(kk/=nrec)then 1829 if((bn2(kk+1)<tol14))exit 1830 end if 1831 end do 1832 1833 ! <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz 1834 delta_calc = func_rec(z_path,twotrotter) * N/D *dz_path 1835 if(ii==n_pt_integ)then 1836 ene_acc = ene_acc +half*delta_calc 1837 if(debug_rec) ene_acc4 = ene_acc4 + half*delta_calc 1838 else 1839 ene_acc = ene_acc + delta_calc 1840 if(debug_rec) ene_acc4 = ene_acc4 +delta_calc 1841 end if 1842 end do path3 1843 end if 1844 1845 !#################################################################### 1846 ![xmax,xmax+i*epsilon] 1847 dr_step = one/real(n_pt_integ_path2,dp) 1848 dz_path = cmplx(zero,epsilon*dr_step,dp) 1849 path4: do ii = 0,n_pt_integ_path2 1850 ! z_path = cmplx(xmax,real(ii,dp)*epsilon*dr_step,dp) 1851 z_path = cmplx(xmax,0,dp)+real(ii,dp)*dz_path 1852 1853 Nold = czero 1854 Dold = cone 1855 N = cone 1856 D = z_path - cmplx(an(0),zero,dp) 1857 1858 do kk=1,nrec 1859 Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold 1860 Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold 1861 1862 Nold = N 1863 Dold = D 1864 N = Nnew 1865 D = Dnew 1866 1867 if(kk/=nrec)then 1868 if((bn2(kk+1)<tol14))exit 1869 end if 1870 1871 end do 1872 1873 ! <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz 1874 delta_calc = func_rec(z_path,twotrotter) * N/D *dz_path 1875 if(ii==0.or.ii==n_pt_integ_path2)then 1876 ene_acc = ene_acc + half*delta_calc 1877 if(debug_rec) ene_acc2 = ene_acc2 + half*delta_calc 1878 else 1879 ene_acc = ene_acc + delta_calc 1880 if(debug_rec) ene_acc2 = ene_acc2 + delta_calc 1881 end if 1882 end do path4 1883 1884 ene_out = mult*real(ene_acc*cmplx(zero,-piinv,dp),dp) 1885 if(debug_rec) then 1886 ene_out1 = mult*real(ene_acc1*cmplx(zero,-piinv,dp),dp) 1887 ene_out2 = mult*real(ene_acc2*cmplx(zero,-piinv,dp),dp) 1888 ene_out3 = mult*real(ene_acc3*cmplx(zero,-piinv,dp),dp) 1889 ene_out4 = mult*real(ene_acc4*cmplx(zero,-piinv,dp),dp) 1890 end if 1891 1892 call timab(611,2,tsec) 1893 1894 contains 1895 1896 !func_rec(z_path,twotrotter) = log(cone+z_path**twotrotter) 1897 1898 function func_rec(z,x) 1899 1900 complex(dpc) :: func_rec 1901 complex(dpc),intent(in) :: z 1902 real(dp),intent(in) :: x 1903 1904 func_rec = log(cone+z**x) 1905 1906 end function func_rec 1907 1908 end subroutine gran_potrec
ABINIT/green_kernel [ Functions ]
NAME
green_kernel
FUNCTION
this routine compute the fourrier transform of the Green kernel for the recursion method
INPUTS
inf_rmet=define the infinitesimal metric : rprimd*(transpose(rprimd)) divided by the number of discretisation point inf_ucvol=volume of infinitesimal cell mult=variance of the Gaussian (=rtrotter/beta) mpi_enreg=information about MPI parallelization ngfft=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft nfft=total number of fft grid points debug_rec=debugging variable
OUTPUT
ZT_p=fourier transforme of the Green kernel
NOTES
at this time : - need a rectangular box
SOURCE
2370 subroutine green_kernel(ZT_p,inf_rmet,inf_ucvol,mult,mpi_enreg,ngfft,nfft) 2371 2372 !Arguments ------------------------------- 2373 !scalars 2374 integer,intent(in) :: nfft 2375 real(dp),intent(in) :: inf_ucvol,mult 2376 type(MPI_type),intent(in) :: mpi_enreg 2377 !arrays 2378 integer,intent(in) :: ngfft(18) 2379 real(dp),intent(in) :: inf_rmet(3,3) 2380 real(dp),intent(out) :: ZT_p(1:2,0:nfft-1) 2381 2382 !Local variables------------------------------- 2383 !scalars 2384 integer,parameter :: n_green_max=5 2385 integer :: ii,isign,jj,kk,n_green,xx,yy,zz 2386 real(dp) :: acc, norme 2387 character(len=500) :: msg 2388 !arrays 2389 real(dp) :: tsec(2) 2390 real(dp),allocatable :: T_p(:) 2391 2392 ! ************************************************************************* 2393 2394 call timab(603,1,tsec) 2395 2396 norme = (mult/pi)**(onehalf) 2397 2398 ABI_MALLOC(T_p,(0:nfft-1)) 2399 2400 !n_green should be better chosen for non rectangular cell 2401 do xx=1, n_green_max 2402 n_green = xx 2403 if(exp(-mult*dsq_green(xx*ngfft(1),0,0,inf_rmet))<tol14 & 2404 & .and. exp(-mult*dsq_green(0,xx*ngfft(2),0,inf_rmet))<tol14 & 2405 & .and. exp(-mult*dsq_green(0,0,xx*ngfft(3),inf_rmet))<tol14 ) exit 2406 end do 2407 2408 acc = zero 2409 T_p = zero 2410 do kk = 0,ngfft(3)-1 2411 do jj = 0,ngfft(2)-1 2412 do ii = 0,ngfft(1)-1 2413 2414 do xx=-n_green,n_green-1 2415 do yy=-n_green,n_green-1 2416 do zz=-n_green,n_green-1 2417 2418 T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk) = T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk)+ & 2419 & exp(-mult*dsq_green(ii+xx*ngfft(1),jj+yy*ngfft(2),kk+zz*ngfft(3),inf_rmet)) 2420 2421 end do 2422 end do 2423 end do 2424 2425 T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk) = norme*T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk) 2426 acc = acc + inf_ucvol* T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk) 2427 2428 end do 2429 end do 2430 end do 2431 2432 T_p(:)= (one/acc)*T_p(:) 2433 2434 !if(debug_rec)then 2435 write(msg,'(a,d12.3,2(2a,i8),2(2a,3d12.3),2a,d16.6)')& 2436 & ' on the boundary ', exp(-mult*dsq_green(ngfft(1),0,0,inf_rmet)),ch10, & 2437 & ' no zero ', count(T_p>tol14),ch10, & 2438 & ' n_green ', n_green,ch10, & 2439 & ' erreur_n_green ', exp(-mult*dsq_green(n_green*ngfft(1),0,0,inf_rmet)), & 2440 & exp(-mult*dsq_green(0,n_green*ngfft(2),0,inf_rmet)), & 2441 & exp(-mult*dsq_green(0,0,n_green*ngfft(3),inf_rmet)),ch10,& 2442 & ' erreur_troncat ', T_p(ngfft(1)/2), & 2443 & T_p(ngfft(1)*(ngfft(2)/2)), & 2444 & T_P(ngfft(1)*ngfft(2)*(ngfft(3)/2)),ch10, & 2445 & ' erreurT_p ',abs(acc-1.d0) 2446 call wrtout(std_out,msg,'COLL') 2447 !endif 2448 2449 2450 isign = -1 2451 call fourdp(1,ZT_p,T_p,isign,mpi_enreg,nfft,1,ngfft,0) 2452 2453 ABI_FREE(T_p) 2454 2455 ZT_p(:,:) = real(nfft,dp)*ZT_p 2456 2457 2458 call timab(603,2,tsec) 2459 2460 contains 2461 2462 function dsq_green(ii,jj,kk,inf_rmet) 2463 2464 real(dp) :: dsq_green 2465 integer,intent(in) :: ii,jj,kk 2466 real(dp),intent(in) :: inf_rmet(3,3) 2467 dsq_green= inf_rmet(1,1)*dble(ii**2)& 2468 & +inf_rmet(2,2)*dble(jj**2)& 2469 & +inf_rmet(3,3)*dble(kk**2)& 2470 & +two*(inf_rmet(1,2)*dble(ii*jj)& 2471 & +inf_rmet(2,3)*dble(jj*kk)& 2472 & +inf_rmet(3,1)*dble(kk*ii)) 2473 end function dsq_green 2474 2475 end subroutine green_kernel
ABINIT/m_vtorhorec [ Modules ]
NAME
m_vtorhorec
FUNCTION
COPYRIGHT
Copyright (C) 2008-2024 ABINIT group (SLeroux, MMancini). This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
15 #if defined HAVE_CONFIG_H 16 #include "config.h" 17 #endif 18 19 #include "abi_common.h" 20 21 module m_vtorhorec 22 23 use defs_basis 24 use defs_rectypes 25 use m_distribfft 26 use m_xmpi 27 use m_pretty_rec 28 use m_errors 29 use m_abicore 30 use m_per_cond 31 use m_dtset 32 33 use defs_datatypes, only : pseudopotential_type 34 use defs_abitypes, only : MPI_type 35 use m_time, only : timein, timab 36 use m_rec, only : Calcnrec, init_nlpsprec, cpu_distribution 37 use m_rec_tools, only : reshape_pot, trottersum, get_pt0_pt1 38 use m_spacepar, only : symrhg 39 use m_fourier_interpol, only : transgrid 40 use m_fft, only : fourdp 41 42 #ifdef HAVE_GPU_CUDA 43 use m_gpu_toolbox 44 use m_hidecudarec 45 use m_xredistribute 46 #endif 47 48 implicit none 49 50 private
ABINIT/nlenergyrec [ Functions ]
NAME
nlenergyrec
FUNCTION
During recursion, it computes the non-local energy
INPUTS
rset<recursion_type>=contains all recursion parameters exppot=exponential of -1/tsmear*vtrial (computed only once in vtorhorec) tsmear=temperature (Hartree) trotter=trotter parameter tol=tolerance criteria for stopping recursion_nl ngfft=information about FFT(dtset%ngfft a priori different from ngfftrec) mpi_enreg=information about MPI paralelisation rset<recursion_type> contains all parameter of recursion typat(natom)=type of pseudo potential associated to any atom natom=number of atoms
OUTPUT
enlx=non-local energy
SIDE EFFECTS
NOTES
SOURCE
1939 subroutine nlenergyrec(rset,enlx,exppot,ngfft,natom,typat,tsmear,trotter,tol) 1940 1941 !Arguments ------------------------------------ 1942 !Scalar 1943 integer , intent(in) :: natom,trotter 1944 real(dp), intent(in) :: tsmear,tol 1945 type(recursion_type),intent(in) :: rset 1946 real(dp), intent(out) :: enlx 1947 !Arrays 1948 integer , intent(in) :: typat(natom),ngfft(18) 1949 real(dp), intent(in) :: exppot(0:ngfft(1)*ngfft(2)*ngfft(3)-1) 1950 !Local variables------------------------------- 1951 integer :: iatom,jatom 1952 integer :: ii,ipsp,dim_trott 1953 integer :: ierr,me_count 1954 integer :: ilmn,jlmn,ilm,jlm,in,jn,il 1955 character(len=500) :: msg 1956 logical :: tronc 1957 real(dp) :: rho_nl,normali,mult 1958 type(mpi_type):: mpi_loc 1959 !Arrays 1960 integer :: gcart_loc(3,natom) 1961 integer :: ngfftrec(3),trasl(3) 1962 real(dp) :: tsec(2) 1963 real(dp) :: un0(0:rset%nfftrec) 1964 real(dp),pointer :: projec(:,:,:,:,:) 1965 real(dp),allocatable :: exppotloc(:) 1966 real(dp) :: proj_arr(0:rset%ngfftrec(1)-1,0:rset%ngfftrec(2)-1,0:rset%ngfftrec(3)-1) 1967 1968 ! ************************************************************************* 1969 1970 1971 call timab(612,1,tsec) !!--start time-counter: nlenergyrec 1972 1973 if(rset%debug)then 1974 write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' nlenergyrec : enter' 1975 call wrtout(std_out,msg,'PERS') 1976 end if 1977 1978 write(msg,'(a)')' -- nlenergyrec -----------------------------------' 1979 call wrtout(std_out,msg,'COLL') 1980 1981 !--Initialisation variables 1982 enlx = zero 1983 mult = two !--is twice for non-spinned systems 1984 ngfftrec = rset%ngfftrec(:3) 1985 gcart_loc = rset%inf%gcart 1986 mpi_loc = rset%mpi 1987 me_count = 0 1988 dim_trott = max(0,2*trotter-1) 1989 nullify(projec) 1990 ABI_MALLOC(projec,(0:rset%ngfftrec(1)-1,0:rset%ngfftrec(2)-1,0:rset%ngfftrec(3)-1,rset%nl%lmnmax,natom)) 1991 projec = zero 1992 1993 tronc = rset%tronc !--True if troncation is used 1994 if(tronc) then 1995 ABI_MALLOC(exppotloc,(0:rset%nfftrec-1)) 1996 end if 1997 1998 1999 !--LOOP ON ATOMS to create projectors-vector 2000 atomloop1: do iatom = 1, natom 2001 ipsp = typat(iatom) 2002 ! --Aquisition,reshape,translation,rotation of the projectors vector 2003 do ilmn = 1,rset%nl%lmnmax 2004 in = rset%nl%indlmn(3,ilmn,ipsp) 2005 ! --Projectors vector in 3-composant vector 2006 projec(:,:,:,ilmn,iatom) = reshape(rset%nl%projec(:,ilmn,ipsp),shape=shape(projec(:,:,:,1,1))) 2007 ! --Moving the projectors vector on the center of the grid 2008 do ii=1,3 2009 projec(:,:,:,ilmn,iatom) = cshift(projec(:,:,:,ilmn,iatom),shift=ngfftrec(ii)/2-gcart_loc(ii,iatom),dim=ii) 2010 end do 2011 end do 2012 2013 end do atomloop1 2014 2015 2016 !################################################################## 2017 !--LOOP ON ATOMS (MAIN LOOP) 2018 atomloop: do iatom = 1, natom 2019 ipsp = typat(iatom) 2020 2021 ! --If troncation is present, the considered atom has to be in the 2022 ! center of the grid so atoms, potential and projectors have to be translated 2023 if(tronc) then 2024 trasl = -rset%inf%gcart(:,iatom)+ngfftrec/2 2025 ! --Translation of atoms 2026 do jatom=1,natom 2027 gcart_loc(:,jatom) = rset%inf%gcart(:,jatom)+trasl 2028 gcart_loc(:,jatom) = modulo(gcart_loc(:,jatom),ngfft(:3)) 2029 ! --Translation of non-local projectors 2030 do ilmn = 1,rset%nl%lmnmax 2031 projec(:,:,:,ilmn,jatom) = reshape(rset%nl%projec(:,ilmn,typat(jatom)),shape=shape(projec(:,:,:,1,1))) 2032 do ii=1,3 2033 projec(:,:,:,ilmn,jatom) = eoshift(projec(:,:,:,ilmn,jatom),shift=ngfftrec(ii)/2-gcart_loc(ii,jatom),dim=ii) 2034 end do 2035 end do 2036 end do 2037 2038 ! --Translation of the potential 2039 call reshape_pot(trasl,ngfft(1)*ngfft(2)*ngfft(3),rset%nfftrec,ngfft(:3),ngfftrec,exppot,exppotloc) 2040 end if 2041 2042 ! --Loop on projectors 2043 projloop: do ilmn = 1,rset%nl%lmnmax 2044 me_count = iatom+ilmn*natom-2 !--counter of the number of iteration 2045 ! --Only the proc me compute 2046 if(mpi_loc%me==mod(me_count,mpi_loc%nproc)) then 2047 ilm = rset%nl%indlmn(4,ilmn,ipsp) 2048 proj_arr = zero 2049 do jlmn = 1,rset%nl%lmnmax 2050 jlm = rset%nl%indlmn(4,jlmn,ipsp) 2051 if(ilm==jlm) then 2052 in = rset%nl%indlmn(3,ilmn,ipsp) 2053 jn = rset%nl%indlmn(3,jlmn,ipsp) 2054 il = rset%nl%indlmn(1,ilmn,ipsp)+1 2055 proj_arr(:,:,:) = proj_arr(:,:,:) + rset%nl%eivec(jn,in,il,ipsp)*projec(:,:,:,jlmn,iatom) 2056 ! write(std_out,*)'l,m,lm,n,n',il-1,rset%nl%indlmn(2,ilmn,ipsp),ilm,in,jn 2057 ! write(std_out,*)'eigevectors',rset%nl%eivec(jn,in,il,ipsp) 2058 2059 end if 2060 end do 2061 2062 un0 = pack(proj_arr(:,:,:),mask=.true.) 2063 normali = sum(un0*un0)*rset%inf%ucvol 2064 un0 = (one/sqrt(normali))*un0 2065 2066 if(tronc)then 2067 call recursion_nl(exppotloc,un0,rho_nl,rset,rset%ngfftrec,& 2068 & tsmear,trotter,dim_trott,tol,typat,& 2069 & natom,projec) 2070 else 2071 call recursion_nl(exppot,un0,rho_nl,rset,rset%ngfftrec,& 2072 & tsmear,trotter,dim_trott,tol,typat,& 2073 & natom,projec) 2074 end if 2075 2076 enlx = enlx+mult*rho_nl*rset%nl%eival(in,il,ipsp)*normali 2077 end if 2078 2079 end do projloop 2080 end do atomloop 2081 2082 !--Sum the contribution to the non-local energy computed by any procs 2083 call xmpi_sum(enlx,mpi_loc%comm_bandfft,ierr) 2084 2085 if(associated(projec)) then 2086 ABI_FREE(projec) 2087 end if 2088 if(tronc) then 2089 ABI_FREE(exppotloc) 2090 end if 2091 2092 if(rset%debug)then 2093 write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' nlenergyrec : exit' 2094 call wrtout(std_out,msg,'PERS') 2095 end if 2096 2097 call timab(612,2,tsec) !--stop time-counter: nlenergyrec 2098 2099 end subroutine nlenergyrec
ABINIT/recursion [ Functions ]
NAME
recursion
FUNCTION
This routine computes the recursion coefficients and the corresponding continued fraction to get the density at a point from a fixed potential.
INPUTS
exppot=exponential of -1/tsmear*vtrial (computed only once in vtorhorec) coordx, coordy, coordz=coordonnees of the computed point nrec=order of recursion fermie=fermi energy (Hartree) tsmear=temperature (Hartree) dim_trott=dimension of the partial fraction decomposition rtrotter=trotter parameter (real) ZT_p=fourier transform of the Green krenel (computed only once in vtorhorec) typat(:)=type of psp associated to any atom tol=tolerance criteria for stopping recursion debug=debugging variable mpi_enreg=information about MPI paralelisation nfft=number of points in FFT grid ngfft=information about FFT metrec<type(metricrec_type)>=information concerning the infinitesimal metrics inf_ucvol=infinitesimal unit cell volume tim_fourdp=time counter for fourdp natom=number of atoms projec(ngfftrec(1),ngfftrec(2),ngfftrec(3),lmnmax,natom) is the vector, on the ngfftrec grid containing the non-lacal projector $Y_{lm}(r-R_A)f_{lk}(r-R_A) tim= 0 if the time spent in the routine is not taken into account,1 otherwise. For example when measuring time for loading balancing, we don't want to add the time spent in this to the total time calculation
OUTPUT
rho_out=result of the continued fraction multiplied by a multiplicator an, bn2 : coefficient given by recursion.
SIDE EFFECTS
NOTES
at this time : - exppot should be replaced by ? - coord should be replaced by ? - need a rectangular box (rmet diagonal matrix)
SOURCE
2526 subroutine recursion(exppot,coordx,coordy,coordz,an,bn2,rho_out, & 2527 & nrec,fermie,tsmear,rtrotter,dim_trott, & 2528 & ZT_p, tol,typat, & 2529 & nlrec,mpi_enreg,& 2530 & nfft,ngfft,metrec,& 2531 & tim_fourdp,natom,projec,tim) 2532 2533 2534 use m_linalg_interfaces 2535 2536 !Arguments ------------------------------- 2537 !scalars 2538 integer,intent(in) :: coordx,coordy,coordz,nfft,nrec,tim 2539 integer,intent(in) :: tim_fourdp,natom,dim_trott 2540 real(dp),intent(in) :: fermie,tol,tsmear,rtrotter 2541 real(dp), intent(out) :: rho_out 2542 type(MPI_type),intent(in) :: mpi_enreg 2543 type(nlpsprec_type),intent(in) :: nlrec 2544 type(metricrec_type),intent(in) :: metrec 2545 !arrays 2546 integer, intent(in) :: ngfft(18) 2547 integer, intent(in) :: typat(natom) 2548 real(dp), intent(in) :: ZT_p(1:2, 0:nfft-1) 2549 real(dp), intent(in) :: exppot(0:nfft-1) 2550 real(dp), intent(in) :: projec(0:,0:,0:,1:,1:) 2551 real(dp), intent(out) :: an(0:nrec),bn2(0:nrec) 2552 !Local variables------------------------------- 2553 !not used, debugging purpose only 2554 !for debugging purpose, detailled printing only once for density and ekin 2555 !scalars 2556 integer, parameter :: level = 7, minrec = 3 2557 integer :: irec,isign,timab_id,ii 2558 real(dp) :: switchimu,switchu 2559 real(dp) :: bb,beta,mult,prod_b2,error,errold 2560 real(dp) :: inf_ucvol,pi_on_rtrotter,twortrotter,exp1,exp2 2561 complex(dpc) :: cinv2rtrotter,coeef_mu,facrec0 2562 ! character(len=500) :: msg 2563 !arrays 2564 real(dp) :: tsec(2) 2565 real(dp) :: inf_tr(3) 2566 real(dp) :: Zvtempo(1:2, 0:nfft-1) 2567 real(dp) :: unold(0:nfft-1),vn(0:nfft-1),un(0:nfft-1) 2568 complex(dpc) :: acc_rho(0:nrec) 2569 complex(dpc) :: D(0:dim_trott),Dold(0:dim_trott) 2570 complex(dpc) :: N(0:dim_trott),Nold(0:dim_trott) 2571 2572 ! ************************************************************************* 2573 2574 !--If count time or not 2575 timab_id = 616; if(tim/=0) timab_id = 606; 2576 2577 call timab(timab_id,1,tsec) 2578 2579 !############################################################## 2580 !--Initialisation of metrics 2581 inf_ucvol = metrec%ucvol 2582 inf_tr = metrec%tr 2583 mult = two/inf_ucvol !non-spined system 2584 2585 beta = one/tsmear 2586 !--Variables for optimisation 2587 pi_on_rtrotter = pi/rtrotter 2588 twortrotter = two*rtrotter 2589 exp1 = exp((beta*fermie)/(rtrotter)) 2590 exp2 = exp(beta*fermie/(twortrotter)) 2591 cinv2rtrotter = cmplx(one/twortrotter,zero,dp) 2592 coeef_mu = cmplx(one/exp2,zero,dp) 2593 2594 !--Initialisation of an,bn,un.... 2595 N = czero; D = cone 2596 facrec0 = cone 2597 Nold = czero; Dold = czero 2598 2599 an = zero; bn2 = zero; bn2(0) = one 2600 bb = zero; vn = zero; unold = zero 2601 !--u0 is a Dirac function 2602 un = zero 2603 un(coordx+ngfft(1)*(coordy+ngfft(2)*coordz)) = one/sqrt(inf_ucvol) 2604 2605 !--Initialisation of accumulated density 2606 acc_rho = czero 2607 !--Initialisation of estimated error 2608 prod_b2 = twortrotter/exp1 2609 errold = zero 2610 2611 !############################################################## 2612 !--Main loop 2613 maindo : do irec = 0, nrec 2614 2615 ! --Get an and bn2 coef by the lanczos method 2616 2617 ! --Computation of exp(-beta*V/8*p)*un or exp(-beta*V/4*p)*un 2618 ! depending on if nl part has to be calculated or not. 2619 vn = exppot * un 2620 2621 ! --First Non-local psp contribution: (Id+sum_atom int dr1(E(r,r1))vn(r1)) 2622 ! --Computation of exp(-beta*V_NL/4*p)*vn 2623 if(nlrec%nlpsp) then 2624 call timab(timab_id,2,tsec) 2625 call vn_nl_rec(vn,natom,typat,ngfft(:3),inf_ucvol,nlrec,projec) 2626 call timab(timab_id,1,tsec) 2627 2628 ! --Computation of exp(-beta*V/8*p)*vn in nonlocal case 2629 vn = exppot * vn 2630 end if !--End if on nlrec%nlpsp 2631 2632 ! --Convolution with the Green kernel 2633 ! --FFT of vn 2634 isign = -1 2635 call fourdp(1,Zvtempo,vn,isign,mpi_enreg,nfft,1,ngfft,tim_fourdp) 2636 2637 ! --F(T)F(vn) 2638 do ii = 0,nfft-1 2639 switchu = Zvtempo(1,ii) 2640 switchimu = Zvtempo(2,ii) 2641 Zvtempo(1,ii) = switchu*ZT_p(1,ii) - switchimu*ZT_p(2,ii) 2642 Zvtempo(2,ii) = switchu*ZT_p(2,ii) + switchimu*ZT_p(1,ii) 2643 end do 2644 2645 ! --F^-1(F(T)F(vn)) 2646 isign = 1 2647 call fourdp(1,Zvtempo,vn,isign,mpi_enreg,nfft,1,ngfft,tim_fourdp) 2648 2649 ! --Computation of exp(-beta*V/8*p)*un or exp(-beta*V/4*p)*un 2650 ! depending on if nl part has to be calculated or not. 2651 2652 vn = inf_ucvol * exppot * vn 2653 2654 ! --Second Non-local psp contribution: (Id+sum_atom E(r,r1))vn 2655 if(nlrec%nlpsp) then 2656 call timab(timab_id,2,tsec) 2657 call vn_nl_rec(vn,natom,typat,ngfft(:3),inf_ucvol,nlrec,projec) 2658 call timab(timab_id,1,tsec) 2659 2660 ! --Computation of exp(-beta*V/8*p)*vn in nonlocal case 2661 vn = exppot * vn 2662 end if !--End if on nlrec%nlpsp 2663 2664 ! --Multiplication of a and b2 coef by exp(beta*fermie/(two*rtrotter)) must be done in the continued fraction computation 2665 ! --Computation of a and b2 2666 an(irec) = inf_ucvol*ddot(nfft,vn,1,un,1) 2667 2668 ! --an must be positive real 2669 ! --We must compute bn2 and prepare for the next iteration 2670 if(irec<nrec)then 2671 do ii = 0,nfft-1 2672 switchu = un(ii) 2673 un(ii) = vn(ii)-an(irec)*un(ii)-bb*unold(ii) 2674 unold(ii) = switchu 2675 bn2(irec+1) = bn2(irec+1)+inf_ucvol*un(ii)*un(ii) 2676 end do 2677 bb = sqrt(bn2(irec+1)) 2678 un = (one/bb)*un 2679 end if 2680 2681 ! ###################################################### 2682 ! --Density computation 2683 ! density computation is done inside the main looping, juste after the calculus of a and b2, in order to make 2684 ! it possible to stop the recursion at the needed accuracy, without doing more recursion loop than needed - 2685 ! further developpement 2686 2687 ! !--using the property that: sum_i(bi*c)^2|(z-ai*c)=1/c*sum_i(bi)^2|(z/c-ai) 2688 ! !and for c =exp(-beta*fermie/(two*rtrotter) 2689 2690 2691 call trottersum(dim_trott,error,prod_b2,pi_on_rtrotter,& 2692 & facrec0,coeef_mu,exp1,& 2693 & an(irec),bn2(irec),& 2694 & N,D,Nold,Dold) 2695 2696 2697 if(irec/=nrec .and. irec>=minrec)then 2698 if((bn2(irec+1)<tol14).or.(mult*error<tol.and.errold<tol)) exit 2699 end if 2700 errold = mult*error 2701 end do maindo 2702 !--Accumulated density 2703 rho_out = mult*real(cone-sum(N/D)*cinv2rtrotter,dp) 2704 2705 2706 call timab(timab_id,2,tsec) 2707 2708 end subroutine recursion
ABINIT/recursion_nl [ Functions ]
NAME
recursion_nl
FUNCTION
Given a $|un>$ vector on the real-space grid this routine calculates the density in $|un>$ by recursion method.
INPUTS
exppot=exponential of -1/tsmear*vtrial (computed only once in vtorhorec) trotter=trotter parameter dim_trott=dimension of the partial fraction decomposition tsmear=temperature (Hartree) tol=tolerance criteria for stopping recursion_nl ngfft=information about FFT(dtset%ngfft a priori different from ngfftrec) rset<recursion_type> contains all parameter of recursion typat(natom)=type of pseudo potential associated to any atom natom=number of atoms projec(ngfftrec(1),ngfftrec(2),ngfftrec(3),lmnmax,natom) is the vector, on the ngfftrec grid containing the non-lacal projector $Y_{lm}(r-R_A)f_{lk}(r-R_A)
OUTPUT
rho_out=result of the continued fraction multiplied by a multiplicator
SIDE EFFECTS
un(:,:,:)=initial vector on the grid. it is changed in output
NOTES
at this time : - need a rectangular box (rmet diagonal matrix)
SOURCE
2745 subroutine recursion_nl(exppot,un,rho_out,rset,ngfft, & 2746 & tsmear,trotter,dim_trott,tol,typat,& 2747 & natom,projec) 2748 2749 2750 use m_linalg_interfaces 2751 2752 !Arguments ------------------------------- 2753 !scalars 2754 integer,intent(in) :: trotter,natom,dim_trott 2755 real(dp),intent(in) :: tol,tsmear 2756 type(recursion_type),intent(in) :: rset 2757 real(dp), intent(out) :: rho_out 2758 !arrays 2759 integer,intent(in) :: typat(natom),ngfft(18) 2760 real(dp),intent(in) :: exppot(0:ngfft(1)*ngfft(2)*ngfft(3)-1) 2761 real(dp),intent(inout) :: un(0:rset%nfftrec-1) 2762 real(dp),pointer :: projec(:,:,:,:,:) 2763 !Local variables------------------------------- 2764 !scalars 2765 integer, parameter :: minrec = 3 2766 integer :: irec,isign,ii 2767 real(dp) :: bb,beta,mult,prod_b2,rtrotter 2768 real(dp) :: inf_ucvol,pi_on_rtrotter,twortrotter,exp1 2769 real(dp) :: exp2,error,errold 2770 real(dp) :: switchu,switchimu 2771 complex(dpc) :: facrec0,cinv2rtrotter,coeef_mu 2772 character(len=500) :: msg 2773 type(mpi_type),pointer:: mpi_loc 2774 !arrays 2775 real(dp):: tsec(2) 2776 real(dp):: inf_tr(3) 2777 real(dp):: an(0:rset%min_nrec),bn2(0:rset%min_nrec) 2778 real(dp):: vn(0:rset%nfftrec-1) 2779 real(dp):: unold(0:rset%nfftrec-1) 2780 real(dp):: Zvtempo(1:2,0:rset%nfftrec-1) 2781 complex(dpc) :: acc_rho(0:rset%min_nrec) 2782 complex(dpc) :: D(0:dim_trott),Dold(0:dim_trott) 2783 complex(dpc) :: N(0:dim_trott),Nold(0:dim_trott) 2784 2785 ! ************************************************************************* 2786 2787 call timab(608,1,tsec) !--start time-counter: recursion_nl 2788 if(rset%debug)then 2789 msg=' ' 2790 call wrtout(std_out,msg,'COLL') 2791 end if 2792 2793 !############################################################## 2794 beta = one/tsmear 2795 2796 !--Rewriting the trotter parameter 2797 rtrotter = max(half,real(trotter,dp)) 2798 2799 !--Initialisation of mpi 2800 mpi_loc => rset%mpi 2801 2802 !--Initialisation of metrics 2803 inf_ucvol = rset%inf%ucvol 2804 inf_tr = rset%inf%tr 2805 mult = one !--In the case of the calculus of the NL-energy 2806 2807 !--Initialisation of an,bn,un.... 2808 N = czero; D = cone 2809 facrec0 = cone 2810 Nold = czero; Dold = czero 2811 2812 an = zero; bn2 = zero; bn2(0) = one 2813 bb = zero; vn = zero; unold = zero 2814 2815 !--Variables for optimisation 2816 pi_on_rtrotter = pi/rtrotter 2817 twortrotter = two*rtrotter 2818 exp1 = exp((beta*rset%efermi)/(rtrotter)) 2819 exp2 = exp(beta*rset%efermi/(twortrotter)) 2820 cinv2rtrotter = cmplx(one/twortrotter,zero,dp) 2821 coeef_mu = cmplx(one/exp2,zero,dp) 2822 2823 !--Initialisation of accumulated density 2824 acc_rho = czero 2825 !--Initialisation of estimated error 2826 prod_b2 = twortrotter/exp1 2827 errold = zero 2828 2829 !############################################################## 2830 !--Main loop 2831 maindo : do irec = 0, rset%min_nrec 2832 ! --Get an and bn2 coef by the lanczos method 2833 2834 ! --Computation of exp(-beta*V/8*p)*un 2835 vn = exppot * un 2836 2837 ! --First Non-local psp contribution: (Id+sum_atom E(r,r1))vn 2838 call timab(608,2,tsec) 2839 call vn_nl_rec(vn,natom,typat,rset%ngfftrec(:3),inf_ucvol,rset%nl,projec) 2840 call timab(608,1,tsec) 2841 2842 ! --Computation of exp(-beta*V/8*p)*un 2843 vn = exppot * vn 2844 2845 ! --Convolution with the Green kernel 2846 ! --FFT of vn 2847 isign = -1 2848 call fourdp(1,Zvtempo,vn,isign,mpi_loc,rset%nfftrec,1,rset%ngfftrec,6) 2849 2850 ! --F(T)F(vn) 2851 do ii = 0,rset%nfftrec-1 2852 switchu = Zvtempo(1,ii) 2853 switchimu = Zvtempo(2,ii) 2854 Zvtempo(1,ii) = switchu*rset%ZT_p(1,ii) - switchimu*rset%ZT_p(2,ii) 2855 Zvtempo(2,ii) = switchu*rset%ZT_p(2,ii) + switchimu*rset%ZT_p(1,ii) 2856 end do 2857 2858 ! --F^-1(F(T)F(vn)) 2859 isign = 1 2860 call fourdp(1,Zvtempo,vn,isign,mpi_loc,rset%nfftrec,1,rset%ngfftrec,6) 2861 2862 ! --Computation of exp(-beta*V/2*p)*vn 2863 vn = inf_ucvol * exppot * vn 2864 2865 ! --Second Non-local psp contribution: (Id+sum_atom E(r,r1))vn 2866 call timab(608,2,tsec) 2867 call vn_nl_rec(vn,natom,typat,rset%ngfftrec(:3),inf_ucvol,rset%nl,projec) 2868 call timab(608,1,tsec) 2869 2870 ! --Computation of exp(-beta*V/8*p)*vn 2871 vn = exppot * vn 2872 2873 2874 ! --Multiplication of a and b2 coef by exp(beta*fermie/(2.d0*rtrotter)) must be done in the continued fraction computation 2875 ! --Computation of a and b2 2876 an(irec) = inf_ucvol*ddot(rset%nfftrec,vn,1,un,1) !--an must be positive real 2877 2878 ! --We must compute bn2 and prepare for the next iteration 2879 if(irec<rset%min_nrec)then 2880 do ii = 0,rset%nfftrec-1 2881 switchu = un(ii) 2882 un(ii) = vn(ii)-an(irec)*un(ii)-bb*unold(ii) 2883 unold(ii) = switchu 2884 bn2(irec+1) = bn2(irec+1)+inf_ucvol*un(ii)*un(ii) 2885 end do 2886 bb = sqrt(bn2(irec+1)) 2887 un = (one/bb)*un 2888 end if 2889 2890 ! ###################################################### 2891 ! --Density computation 2892 ! in order to make it possible to stop the recursion_nl at the 2893 ! needed accuracy, without doing more recursion_nl loop than needed further developpement 2894 2895 call trottersum(dim_trott,error,& 2896 & prod_b2,pi_on_rtrotter,& 2897 & facrec0,coeef_mu,exp1,& 2898 & an(irec),bn2(irec),& 2899 & N,D,Nold,Dold) 2900 2901 2902 if(irec/=rset%min_nrec .and. irec>=minrec)then 2903 if((bn2(irec+1)<tol14).or.(mult*error<tol.and.errold<tol)) exit 2904 end if 2905 errold = mult*error 2906 end do maindo 2907 !--Accumulated density 2908 rho_out = mult*real(cone-sum(N/D)*cinv2rtrotter,dp) 2909 2910 call timab(608,2,tsec) !--stop time-counter: recursion_nl 2911 2912 end subroutine recursion_nl
ABINIT/vn_nl_rec [ Functions ]
NAME
vn_nl_rec
FUNCTION
this routine computes the contribution to the vector vn, during recursion, due to the non-local psp.
INPUTS
vn(:,:,:)=the vector on the real-space grid. inf_ucvol=volume of infinitesimal cell natom=number of atoms typat(natom)=the type of psps associated to the atoms ngfftrec(3)=first 3 components of ngfftrec (truncated box, if different from ngfft) for the real-space grid nlrec<type(nlpsprec_type)> in recursion_type containing information concerning psp projec(ngfftrec(1),ngfftrec(2),ngfftrec(3),lmnmax,natom) is the vector, on the ngfftrec grid containing the non-lacal projector $Y_{lm}(r-R_A)f_{lk}(r-R_A)
OUTPUT
vn_nl(:,:,:)=the non_local contribution to vn
NOTES
SOURCE
2941 subroutine vn_nl_rec(vn,natom,typat,ngfftrec,inf_ucvol,nlrec,projec) 2942 2943 2944 use m_linalg_interfaces 2945 2946 !Arguments ------------------------------- 2947 !scalars 2948 integer,intent(in) :: natom 2949 real(dp),intent(in) :: inf_ucvol 2950 type(nlpsprec_type),intent(in) :: nlrec 2951 !arrays 2952 integer,intent(in) :: ngfftrec(3),typat(natom) 2953 real(dp),intent(in) :: projec(0:,0:,0:,1:,1:) 2954 real(dp),intent(inout):: vn(0:ngfftrec(1)*ngfftrec(2)*ngfftrec(3)-1) 2955 !Local variables------------------------------- 2956 !scalars 2957 integer :: iatom,nfftrec 2958 integer :: jlmn,il,in,jn 2959 integer :: ipsp,ilmn 2960 integer :: npsp,lmnmax 2961 real(dp):: vn_nl_loc 2962 !arrays 2963 real(dp):: vn_nl(0:ngfftrec(1)-1,0:ngfftrec(2)-1,0:ngfftrec(3)-1) 2964 real(dp):: vtempo(0:ngfftrec(1)-1,0:ngfftrec(2)-1,0:ngfftrec(3)-1) 2965 real(dp):: tsec(2) 2966 ! ************************************************************************* 2967 2968 call timab(615,1,tsec) 2969 !--Initialisation 2970 2971 vn_nl = zero 2972 npsp = nlrec%npsp 2973 lmnmax = nlrec%lmnmax 2974 nfftrec = product(ngfftrec) 2975 vtempo(:,:,:) = reshape(source=vn,shape=ngfftrec(:3)) 2976 2977 !--Sum_iatom \int dr1 E(r-r_a,r1-r_a)vn(r1) *infucvol 2978 do iatom=1,natom !--Loop on atoms 2979 ipsp = typat(natom) 2980 2981 ! --If psp(typat(iatom)) is local then cycle 2982 if(all(nlrec%pspinfo(:,ipsp)==0)) cycle 2983 2984 2985 ! write(std_out,*)'lmnmax',nlrec%lmnmax,lmnmax 2986 2987 do ilmn = 1, lmnmax 2988 do jlmn = 1,lmnmax 2989 if(nlrec%indlmn(4,ilmn,ipsp)==nlrec%indlmn(4,jlmn,ipsp)) then 2990 il = 1+nlrec%indlmn(1,jlmn,ipsp) 2991 in = nlrec%indlmn(3,ilmn,ipsp) 2992 jn = nlrec%indlmn(3,jlmn,ipsp) 2993 vn_nl_loc = ddot(nfftrec,projec(:,:,:,jlmn,iatom),1,vtempo,1) 2994 vn_nl = vn_nl+projec(:,:,:,ilmn,iatom)*vn_nl_loc*nlrec%mat_exp_psp_nl(in,jn,il,ipsp) 2995 end if 2996 end do 2997 end do 2998 end do !--End loop on atoms 2999 vtempo = vtempo + vn_nl*inf_ucvol 3000 3001 vn = reshape(source=vtempo,shape=(/nfftrec/)) 3002 3003 call timab(615,2,tsec) 3004 3005 end subroutine vn_nl_rec
ABINIT/vtorhorec [ Functions ]
NAME
vtorhorec
FUNCTION
This routine computes the new density from a fixed potential (vtrial) using a recursion method
INPUTS
deltastep= if 0 the iteration step is equal to dtset%nstep initialized= if 0 the initialization of the gstate run is not yet finished operator (ground-state symmetries) dtset <type(dataset_type)>=all input variables for this dataset irrzon(nfft**(1-1/nsym),2,(nspden/nsppol)-3*(nspden/4))=irreducible zone data nfftf=(effective) number of FFT grid points (for this processor) nspden=number of spin-density components nsppol=1 for unpolarized, 2 for spin-polarized nsym=number of symmetry elements in space group phnons(2,nfft**(1-1/nsym),(nspden/nsppol)-3*(nspden/4))=nonsymmorphic translation phases vtrial(nfft,nspden)=INPUT Vtrial(r). rset <type(recursion_type)> all variables for recursion rprimd(3,3)=dimensional primitive translations in real space (bohr) gprimd(3,3)=dimensional primitive translations in reciprocal space
OUTPUT
ek=kinetic energy part of total energy. enlx=nonlocal psp + potential Fock ACE part of total energy. entropy=entropy due to the occupation number smearing (if metal) e_eigenvalues=Sum of the eigenvalues - Band energy (Hartree) fermie=fermi energy (Hartree) grnl(3*natom)=stores grads of nonlocal energy wrt length scales (3x3 tensor) and grads wrt atomic coordinates (3*natom)
SIDE EFFECTS
rhog(2,nfft)=array for Fourier transform of electron density rhor(nfft,nspden)=array for electron density in electrons/bohr**3. rset%efermi= fermi energy
NOTES
at this time : - grnl in not implemented - symetrie usage not implemented (irrzon not used and nsym should be 1) - spin-polarized not implemented (nsppol must be 1, nspden ?) - phnons used only in symrhg - need a rectangular box (ngfft(1)=ngfft(2)=ngfft(3))
SOURCE
108 subroutine vtorhorec(dtset,& 109 & ek,enlx,entropy,e_eigenvalues,fermie,& 110 & grnl,initialized,irrzon,nfftf,phnons,& 111 & rhog, rhor, vtrial,rset,deltastep,rprimd,gprimd) 112 113 !Arguments ------------------------------- 114 !scalars 115 integer,intent(in) :: initialized 116 integer,intent(in) :: nfftf,deltastep 117 real(dp),intent(out) :: e_eigenvalues,ek,enlx,entropy,fermie 118 type(dataset_type),intent(in) :: dtset 119 type(recursion_type),intent(inout) :: rset 120 !arrays 121 integer, intent(in) :: irrzon(:,:,:) 122 real(dp),intent(in) :: rprimd(3,3),gprimd(3,3) 123 real(dp),intent(in) :: phnons(:,:,:) 124 real(dp),intent(in) :: vtrial(:,:) 125 real(dp),intent(inout) :: rhog(:,:) 126 real(dp),intent(out) :: grnl(:) !vz_i 127 real(dp),intent(inout) :: rhor(:,:) !vz_i 128 129 !Local variables------------------------------- 130 !scalars 131 integer :: nfftrec, dim_entro,ii1,jj1,kk1 132 integer :: ierr,ii,ilmn,ipsp,dim_trott 133 integer :: ipoint,ipointlocal,jj,kk,irec 134 integer :: n1,n2,n3,n4,n_pt_integ_entropy 135 integer :: nrec,iatom,min_pt,max_pt,swt_tm 136 integer :: get_K_S_G 137 integer :: tim_fourdp,trotter 138 real(dp),parameter :: perc_vmin=one 139 real(dp) :: beta,drho,drhomax 140 real(dp) :: entropy1,entropy2,entropy3,entropy4 141 real(dp) :: entropylocal,entropylocal1,entropylocal2 142 real(dp) :: entropylocal3,entropylocal4,gran_pot,gran_pot1,gran_pot2 143 real(dp) :: gran_pot3,gran_pot4,gran_pot_local,gran_pot_local1 144 real(dp) :: gran_pot_local2,gran_pot_local3,gran_pot_local4 145 real(dp) :: inf_ucvol,intrhov,factor 146 real(dp) :: nelect,potmin,rtrotter,toldrho,tolrec,tsmear 147 real(dp) :: xmax,nlpotmin,ratio1,ratio2,ratio4,ratio8 148 type(recparall_type) :: recpar 149 character(len=500) :: msg 150 !arrays 151 integer :: ngfftrec(18),trasl(3) 152 integer,pointer :: gcart_loc(:,:) 153 integer,allocatable :: bufsize(:), bufdispl(:) 154 real(dp) :: tsec(2),tsec2(2) 155 real(dp) :: exppot(0:dtset%ngfft(1)*dtset%ngfft(2)*dtset%ngfft(3)-1) 156 real(dp),target :: rholocal(1:rset%par%ntranche) 157 real(dp),target :: alocal(0:rset%min_nrec,1:rset%par%ntranche) 158 real(dp),target :: b2local(0:rset%min_nrec,1:rset%par%ntranche) 159 real(dp),pointer :: rho_wrk(:) 160 real(dp),pointer :: a_wrk(:,:),b2_wrk(:,:) 161 real(dp),allocatable :: rholocal_f(:), rholoc_2(:) 162 real(dp),allocatable :: rhogf(:,:),rhogc(:,:),aloc_copy(:,:),b2loc_copy(:,:) 163 real(dp),allocatable :: gran_pot_v_f(:,:),gran_pot_v_c(:,:),gran_pot_v_2(:,:) 164 real(dp),allocatable :: entropy_v_f(:,:),entropy_v_c(:,:),entropy_v_2(:,:) 165 real(dp),allocatable :: ablocal_1(:,:,:),ablocal_2(:,:,:),ablocal_f(:,:,:) 166 real(dp),allocatable :: exppotloc(:) 167 real(dp),allocatable :: projec(:,:,:,:,:) 168 #if defined HAVE_GPU_CUDA 169 integer :: max_rec 170 integer,allocatable :: vcount_0(:), displs_0(:) 171 integer,allocatable :: vcount_1(:), displs_1(:) 172 real(dp),allocatable,target :: rho_hyb(:) 173 real(dp),allocatable,target :: a_hyb(:,:),b2_hyb(:,:) 174 real(cudap),allocatable :: an_dev(:,:) 175 real(cudap),allocatable :: bn2_dev(:,:) 176 #endif 177 178 ! ********************************************************************* 179 if(rset%debug)then 180 write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' vtorhorec : enter ' 181 call wrtout(std_out,msg,'PERS') 182 end if 183 184 call timab(21,1,tsec) 185 call timab(600,1,tsec2) 186 !################################################################################################## 187 !!--Initalization in the FIRST time in VTORHOREC is made in SCFCV by FIRST_REC routine 188 !!--Parameters for the recursion method AND Initialisation 189 190 trotter = dtset%recptrott !--trotter parameter 191 nelect = dtset%nelect !--number of electrons 192 193 toldrho = dtset%rectolden !--tollerance for density 194 tolrec = toldrho*1.d-2 !--tollerance for local density 195 196 tsmear = dtset%tsmear !--temperature 197 beta = one/tsmear !--inverse of temperature 198 199 factor = real(dtset%recgratio**3*100*rset%mpi%nproc,dp)/real(nfftf,dp) 200 201 !--Assignation of the rset variable: 202 nrec = rset%min_nrec 203 nfftrec = rset%nfftrec 204 ngfftrec = rset%ngfftrec 205 inf_ucvol = rset%inf%ucvol 206 207 min_pt = rset%par%displs(rset%mpi%me)+1 208 max_pt = min_pt+rset%par%vcount(rset%mpi%me)-1 209 210 !--In the last self-constistent loop or if density is converged the 211 !thermodynamics quantities are calculated 212 get_K_S_G = 0; if(deltastep==0 .or. rset%quitrec/=0) get_K_S_G = 1; 213 214 !--Rewriting the trotter parameter 215 rtrotter = max(half,real(trotter,dp)) 216 dim_trott = max(0,2*trotter-1) 217 218 !--Variables Optimisation 219 ratio1 = beta/rtrotter 220 ratio2 = ratio1/two 221 ratio4 = ratio1/four 222 ratio8 = ratio1/eight 223 224 !--Integration points for entropy 225 n_pt_integ_entropy = max(25,dtset%recnpath) 226 227 !-- energies non-local: at day not implemented 228 enlx = zero 229 grnl = zero 230 !jmb 231 ek = zero 232 233 !--only a copy of ngfft(1:3) and nfft (to purge!!) 234 n1 = dtset%ngfft(1) ; n2 = dtset%ngfft(2) ; n3 = dtset%ngfft(3) 235 n4 = n3 236 237 !--time switch to measure gpu-cpu syncrhonisation 238 swt_tm = 0 ; !no gpu initally 239 240 exppot = zero 241 nullify(gcart_loc) 242 243 if(dtset%rectesteg==1)then 244 ! --Free electron gas case 245 exppot = one 246 else 247 if(.not.(rset%nl%nlpsp)) then 248 ! --Local case 249 ! --COMPUTATION OF exp( -beta*pot/(4*rtrotter)) 250 ABI_MALLOC(gcart_loc,(0,0)) 251 gcart_loc = 0 252 exppot = exp( -(ratio4*vtrial(:,1))) 253 else 254 ! --Non-Local case 255 ! --COMPUTATION OF exp(-beta*pot/(8*rtrotter)) 256 exppot = exp( -(ratio8*vtrial(:,1))) 257 258 ABI_MALLOC(gcart_loc,(3,dtset%natom)) 259 gcart_loc = rset%inf%gcart 260 ABI_MALLOC(projec,(0:ngfftrec(1)-1,0:ngfftrec(2)-1,0:ngfftrec(3)-1,rset%nl%lmnmax,dtset%natom)) 261 projec = zero 262 263 if(.not.(rset%tronc)) then 264 do iatom =1, dtset%natom 265 ipsp = dtset%typat(iatom) 266 do ilmn = 1,rset%nl%lmnmax 267 projec(:,:,:,ilmn,iatom) = reshape(rset%nl%projec(:,ilmn,ipsp),shape=shape(projec(:,:,:,1,1))) 268 do ii=1,3 269 projec(:,:,:,ilmn,iatom) = cshift(projec(:,:,:,ilmn,iatom),shift=ngfftrec(ii)/2-gcart_loc(ii,iatom),dim=ii) 270 end do 271 end do 272 end do 273 end if 274 end if 275 end if 276 277 !################################################################################### 278 !MAIN LOOP 279 280 rholocal = zero; alocal = zero; b2local = zero 281 ipointlocal = 1 282 283 !--Allocation: if hybrid calculation is done then I have to use 284 !balanced work on devices. 285 nullify(rho_wrk,a_wrk,b2_wrk) 286 287 if(rset%load == 1)then 288 #ifdef HAVE_GPU_CUDA 289 ABI_MALLOC(rho_hyb,(1:rset%GPU%par%npt)) 290 ABI_MALLOC(a_hyb,(0:nrec,1:rset%GPU%par%npt)) 291 ABI_MALLOC(b2_hyb,(0:nrec,1:rset%GPU%par%npt)) 292 rho_hyb = zero; a_hyb = zero; b2_hyb = zero 293 rho_wrk => rho_hyb 294 a_wrk => a_hyb 295 b2_wrk => b2_hyb 296 recpar = rset%GPU%par 297 #endif 298 else 299 rho_wrk => rholocal 300 a_wrk => alocal 301 b2_wrk => b2local 302 recpar = rset%par 303 end if 304 305 !#if defined HAVE_GPU_CUDA 306 !if(rset%debug)then 307 !write (std_out,*) 'rset%recGPU%nptrec ',rset%recGPU%nptrec 308 !write (std_out,*) 'rset%gpudevice ',rset%gpudevice 309 !write (std_out,*) 'rset%ngfftrec ',rset%ngfftrec(1:3) 310 !write (std_out,*) 'rset%min_nrec ',rset%min_nrec 311 !write (std_out,*) 'rset%par%ntranche ',rset%par%ntranche 312 !write (std_out,*) 'rset%par%min_pt ',rset%par%min_pt,min_pt 313 !write (std_out,*) 'rset%par%max_pt ',rset%par%max_pt,max_pt 314 !write (std_out,*) 'pt0 ',rset%par%pt0%x,rset%par%pt0%y,rset%par%pt0%z 315 !write (std_out,*) 'pt1 ',rset%par%pt1%x,rset%par%pt1%y,rset%par%pt1%z 316 !end if 317 !#endif 318 319 if(rset%gpudevice>=0) then 320 #if defined HAVE_GPU_CUDA 321 swt_tm = 1; 322 call timab(607,1,tsec2) 323 ABI_MALLOC(an_dev,(0:recpar%npt-1,0:nrec)) 324 ABI_MALLOC(bn2_dev,(0:recpar%npt-1,0:nrec)) 325 an_dev = zero 326 bn2_dev = zero; bn2_dev(:,0) = one 327 328 call cudarec( rset, exppot,an_dev,bn2_dev,& 329 & beta,trotter,tolrec,dtset%recgratio,dtset%ngfft(:3),max_rec) 330 331 max_rec = min(max_rec,nrec) 332 a_wrk(0:max_rec,1:recpar%npt) = transpose(an_dev(0:,0:max_rec)) 333 b2_wrk(0:max_rec,1:recpar%npt) = transpose(bn2_dev(0:,0:max_rec)) 334 335 ABI_FREE(an_dev) 336 ABI_FREE(bn2_dev) 337 call timab(607,2,tsec2) 338 339 ipointlocal = recpar%npt+1 340 341 ! !DEBUG CUDA 342 ! if(rset%debug)then 343 ! if( rset%mpi%me==0)then 344 ! do ipoint = 1,rset%par%npt,1 345 ! kk=ipoint/(product(dtset%ngfft(:2))) 346 ! jj=ipoint/dtset%ngfft(1)-kk*dtset%ngfft(2) 347 ! ii=ipoint-jj*dtset%ngfft(1)-kk*dtset%ngfft(2)*dtset%ngfft(1) 348 ! write(msg,'(a,4i8,2(a,a9,5f12.6))')& 349 ! & 'pt',ipoint,ii,jj,kk,& 350 ! & ch10,'an-gpu ',real(alocal(:4,ipoint)),& 351 ! & ch10,'b2n-gpu ',real(b2local(:4,ipoint)) 352 ! call wrtout(std_out,msg,'COLL') 353 ! end do 354 ! endif 355 ! endif 356 ! !ENDDEBUG CUDA 357 #endif 358 359 else 360 if (.not.(rset%tronc)) then 361 graou1 : do kk = recpar%pt0%z,recpar%pt1%z,dtset%recgratio 362 do jj = 0,n2-1,dtset%recgratio 363 do ii = 0,n1-1,dtset%recgratio 364 ipoint = ii+(jj+kk*n2)*n1 365 ! --Local position of atoms 366 if (ipoint<recpar%min_pt) cycle 367 ! --Computation done by that proc 368 tim_fourdp=6 369 call recursion(exppot,ii,jj,kk, & 370 & a_wrk(:,ipointlocal), & 371 & b2_wrk(:,ipointlocal), & 372 & rho_wrk(ipointlocal),& 373 & nrec, rset%efermi,tsmear,rtrotter,dim_trott, & 374 & rset%ZT_p, & 375 & tolrec,dtset%typat,rset%nl,& 376 & rset%mpi,nfftrec,ngfftrec,rset%inf,& 377 & tim_fourdp,dtset%natom,projec,1) 378 ipointlocal = ipointlocal + 1 379 ! write(std_out,*)'ipointlocal',ipoint,ipointlocal,ii,jj,kk 380 call prtwork(dtset%recgratio**3*ipointlocal*100*rset%mpi%nproc/nfftrec) 381 if(ipoint==recpar%max_pt) exit graou1 382 end do 383 end do 384 end do graou1 385 else !--We use a troncation 386 ABI_MALLOC(exppotloc,(0:nfftrec-1)) 387 graou2 : do kk = recpar%pt0%z,recpar%pt1%z,dtset%recgratio 388 do jj = 0,n2-1,dtset%recgratio 389 do ii = 0,n1-1,dtset%recgratio 390 ipoint = ii+(jj+kk*n2)*n1 391 if (ipoint<recpar%min_pt) cycle 392 ! computation done by that proc 393 exppotloc = zero 394 ! --Traslation to move position on the ngfftrec grid center 395 trasl = -(/ii,jj,kk/)+ngfftrec(:3)/2 396 if(rset%nl%nlpsp) then 397 do iatom=1,dtset%natom 398 ! --local position of atoms 399 gcart_loc(:,iatom) = rset%inf%gcart(:,iatom)+trasl 400 gcart_loc(:,iatom) = modulo(gcart_loc(:,iatom),(/n1,n2,n3/)) 401 ! --Traslation of non-local projectors 402 do ilmn = 1,rset%nl%lmnmax 403 projec(:,:,:,ilmn,iatom) = reshape(rset%nl%projec(:,ilmn,dtset%typat(iatom)),shape=shape(projec(:,:,:,1,1))) 404 do ii1=1,3 405 projec(:,:,:,ilmn,iatom) = eoshift(projec(:,:,:,ilmn,iatom),shift=ngfftrec(ii1)/2-gcart_loc(ii1,iatom),dim=ii1) 406 end do 407 end do 408 end do 409 end if 410 411 call reshape_pot(trasl,nfftf,nfftrec,& 412 & dtset%ngfft(:3),ngfftrec(:3),& 413 & exppot,exppotloc) 414 415 tim_fourdp=6 416 call recursion(exppotloc,ngfftrec(1)/2,ngfftrec(2)/2,ngfftrec(3)/2, & 417 & a_wrk(:,ipointlocal), & 418 & b2_wrk(:,ipointlocal), & 419 & rho_wrk(ipointlocal),& 420 & nrec, rset%efermi,tsmear,rtrotter,dim_trott, & 421 & rset%ZT_p, & 422 & tolrec,dtset%typat,rset%nl,& 423 & rset%mpi,nfftrec,ngfftrec,rset%inf,& 424 & tim_fourdp,dtset%natom,projec,1) 425 ipointlocal = ipointlocal + 1 426 call prtwork(factor*real(ipointlocal,dp)) 427 if(ipoint==recpar%max_pt) exit graou2 428 end do 429 end do 430 end do graou2 431 ABI_FREE(exppotloc) 432 end if 433 434 end if 435 write(msg,'( a12,i12)')'ipointlocal',ipointlocal 436 call wrtout(std_out,msg,'PERS') 437 call timab(613+swt_tm,1,tsec2) !!--start time-counter: sync gpu-cpu 438 call xmpi_barrier(rset%mpi%comm_bandfft) 439 call timab(613+swt_tm,2,tsec2) !!--stop time-counter: sync gpu-cpu 440 441 !!############################################################# 442 !!--ASSIGNATION PARAMETERS TO MENAGE PARALLELISME OF TRANSGRID 443 if((rset%load==1 .or. dtset%recgratio>1))then 444 ! --Bufsize contains the values of the number of points calculated 445 ! by any proc on the coarse grid; bufsize_f on the fine grid 446 call timab(604,1,tsec2) !--start time-counter: transgrid 447 ABI_MALLOC(bufsize,(0:rset%mpi%nproc-1)) 448 ABI_MALLOC(bufdispl,(0:rset%mpi%nproc-1)) 449 bufsize = 0; 450 bufsize(rset%mpi%me) = rset%par%npt 451 call xmpi_sum(bufsize,rset%mpi%comm_bandfft,ierr) 452 453 bufdispl(0) = 0; 454 if(rset%mpi%nproc>1) bufdispl(1:) = (/(sum(bufsize(:ii)),ii=0,rset%mpi%nproc-1)/) 455 call timab(604,2,tsec2) !--stop time-counter: transgrid 456 end if 457 !!#################################################################### 458 !!--REDISTRIBUTION OF LOAD ON PROCS AFTER RECURSION IF CUDA IS USED 459 #ifdef HAVE_GPU_CUDA 460 if(rset%load==1)then 461 call timab(604,1,tsec2) !--start time-counter: transgrid 462 call xredistribute(rho_hyb,rset%GPU%par%vcount,rset%GPU%par%displs,& 463 & rholocal,bufsize,bufdispl,rset%mpi%me,& 464 & rset%mpi%nproc,& 465 & rset%mpi%comm_bandfft,ierr) 466 467 468 ABI_MALLOC(vcount_0,(0:rset%mpi%nproc-1)) 469 ABI_MALLOC(displs_0,(0:rset%mpi%nproc-1)) 470 ABI_MALLOC(vcount_1,(0:rset%mpi%nproc-1)) 471 ABI_MALLOC(displs_1,(0:rset%mpi%nproc-1)) 472 473 vcount_0 = 0 474 vcount_0(rset%mpi%me) = rset%par%npt*(nrec+1) 475 call xmpi_sum(vcount_0,rset%mpi%comm_bandfft,ierr) 476 displs_0 = 0 477 if(rset%mpi%nproc>1) displs_0(1:) = (/(sum(vcount_0(:ii)),ii=0,rset%mpi%nproc-1)/) 478 479 480 vcount_1 = 0 481 vcount_1(rset%mpi%me) = rset%GPU%par%npt*(nrec+1) 482 call xmpi_sum(vcount_1,rset%mpi%comm_bandfft,ierr) 483 displs_1 = 0 484 if(rset%mpi%nproc>1) displs_1(1:) = (/(sum(vcount_1(:ii)),ii=0,rset%mpi%nproc-1)/) 485 486 487 call xredistribute(a_hyb,vcount_1,displs_1,& 488 & alocal,vcount_0,displs_0,& 489 & rset%mpi%me,rset%mpi%nproc,& 490 & rset%mpi%comm_bandfft,ierr) 491 492 call xredistribute(b2_hyb,vcount_1,displs_1,& 493 & b2local,vcount_0,displs_0,& 494 & rset%mpi%me,rset%mpi%nproc,& 495 & rset%mpi%comm_bandfft,ierr) 496 497 nullify(rho_wrk,a_wrk,b2_wrk) 498 ABI_FREE(rho_hyb) 499 ABI_FREE(a_hyb) 500 ABI_FREE(b2_hyb) 501 ABI_FREE(vcount_0) 502 ABI_FREE(displs_0) 503 ABI_FREE(vcount_1) 504 ABI_FREE(displs_1) 505 call timab(604,2,tsec2) !--start time-counter: transgrid 506 end if 507 #endif 508 509 !############################################################# 510 !--TRANSGRID FOR THE DENSITY RHO AND THE COEFFICIENTS AN AND B2N 511 if (dtset%recgratio>1) then 512 ! --variables allocation and initialisation------- 513 write (msg,'(a)')' - TRANSGRID USING -----' 514 call wrtout(std_out,msg,'COLL') 515 call timab(604,1,tsec2) !--start time-counter: transgrid 516 517 ABI_MALLOC(rholocal_f,(rset%pawfgr%nfft)) 518 ABI_MALLOC(rhogf,(2,rset%pawfgr%nfft)) 519 ABI_MALLOC(rhogc,(2,rset%pawfgr%nfftc)) 520 ABI_MALLOC(rholoc_2,(1:rset%pawfgr%nfftc)) 521 ABI_MALLOC(ablocal_2,(1:rset%pawfgr%nfftc,0:nrec,2)) 522 ABI_MALLOC(ablocal_f,(1:rset%pawfgr%nfft,0:nrec,2)) 523 ABI_MALLOC(ablocal_1,(1:rset%par%npt,0:nrec,2)) 524 525 call destroy_distribfft(rset%mpi%distribfft) 526 call init_distribfft(rset%mpi%distribfft,'c',rset%mpi%nproc_fft,rset%pawfgr%ngfftc(2) ,rset%pawfgr%ngfftc(3)) 527 call init_distribfft(rset%mpi%distribfft,'f',rset%mpi%nproc_fft,rset%pawfgr%ngfft(2) ,rset%pawfgr%ngfft(3)) 528 529 rholocal_f = zero; ablocal_f = zero; ablocal_2 = zero 530 531 ablocal_1(:,:,1) = transpose(alocal(:,1:rset%par%npt)) 532 ablocal_1(:,:,2) = transpose(b2local(:,1:rset%par%npt)) 533 534 if(get_K_S_G==1 .and. dtset%recgratio>1 ) then 535 ABI_MALLOC(aloc_copy,(0:nrec,1:rset%par%npt)) 536 ABI_MALLOC(b2loc_copy,(0:nrec,1:rset%par%npt)) 537 aloc_copy = alocal(:,1:rset%par%npt) 538 b2loc_copy = b2local(:,1:rset%par%npt) 539 end if 540 541 if(rset%mpi%nproc ==1) then 542 ! --SEQUENTIAL CASE-- 543 rholoc_2 = rholocal(1:rset%par%npt) 544 ablocal_2 = ablocal_1 545 ! --Transigrid: coarse->fine 546 rhogf = zero; rhogc = zero 547 call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,rholoc_2,rholocal_f) 548 do jj1 = 1,2 549 do ipoint = 0,nrec 550 rhogf = zero; rhogc = zero 551 call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,& 552 & ablocal_2(:,ipoint,jj1),ablocal_f(:,ipoint,jj1)) 553 end do 554 end do 555 ! --Assignation of the interpolated results on the fine grid-- 556 rholocal = rholocal_f 557 alocal = transpose(ablocal_f(:,:,1)) 558 b2local = transpose(ablocal_f(:,:,2)) 559 560 else 561 ! --PARALLEL CASE-- 562 rholoc_2 = zero 563 ! --Send on all procs rho,an,bn-- 564 call xmpi_allgatherv(rholocal(1:rset%par%npt),bufsize(rset%mpi%me),rholoc_2,& 565 & bufsize,bufdispl,rset%mpi%comm_bandfft,ierr) 566 do irec = 0,nrec 567 call xmpi_allgatherv(ablocal_1(1:rset%par%npt,irec,1),bufsize(rset%mpi%me),& 568 & ablocal_2(:,irec,1),bufsize,bufdispl,rset%mpi%comm_bandfft,ierr) 569 call xmpi_allgatherv(ablocal_1(1:rset%par%npt,irec,2),bufsize(rset%mpi%me),& 570 & ablocal_2(:,irec,2),bufsize,bufdispl,rset%mpi%comm_bandfft,ierr) 571 end do 572 573 574 ! --Transigrid: coarse->fine on differents procs (with respect 575 ! the number of recursion) 576 rhogf = zero; rhogc = zero 577 call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,rholoc_2,rholocal_f) 578 579 do irec = 0,2*(nrec+1)-1 580 ii1 = modulo(irec,rset%mpi%nproc) 581 jj1 = 1+modulo(irec,2) 582 kk1 = floor(irec/2.) 583 if(maxval(abs(ablocal_2(:,kk1,jj1))) > tol10 .and. rset%mpi%me == ii1) then 584 rhogf = zero; rhogc = zero 585 call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,& 586 & rhogf,ablocal_2(:,kk1,jj1),ablocal_f(:,kk1,jj1)) 587 end if 588 end do 589 590 ! --Recuperation of all interpolated results 591 ! from procs to allprocs 592 call xmpi_sum(ablocal_f,rset%mpi%comm_bandfft,ierr) 593 594 ! --Assignation the interpolated results on the fine grid 595 ! any procs to obtain the same point as in the standard recursion 596 do ii1 = 0, rset%mpi%nproc-1 597 jj1 = rset%par%displs(ii1)+1 598 if(ii1 == rset%mpi%me) then 599 alocal = transpose(ablocal_f(jj1:jj1+rset%par%vcount(ii1)-1,:,1)) 600 b2local = transpose(ablocal_f(jj1:jj1+rset%par%vcount(ii1)-1,:,2)) 601 rholocal = rholocal_f(jj1:jj1+rset%par%vcount(ii1)-1) 602 end if 603 end do 604 end if 605 ABI_FREE(ablocal_f) 606 ABI_FREE(ablocal_2) 607 ABI_FREE(ablocal_1) 608 ABI_FREE(rhogf) 609 ABI_FREE(rholocal_f) 610 ABI_FREE(rhogc) 611 ABI_FREE(rholoc_2) 612 613 call timab(604,2,tsec2) !--stop time-counter: transgrid 614 else 615 write(msg,'(a)')' - TRANSGRID NOT USED --' 616 call wrtout(std_out,msg,'COLL') 617 end if 618 !!--End transgrid 619 !!############################################################### 620 !################################### 621 !--Fermi energy computation 622 !--find the good mu by imposing the electrons number 623 call fermisolverec(rset%efermi,rholocal,alocal,b2local,rset%debug,& 624 & rset%min_nrec,tsmear,trotter,nelect,tol10,100, & 625 & rset%par%ntranche,rset%mpi,inf_ucvol,& !.False. .and.& 626 & (rset%tp==2 .or. rset%tp==3) .and. trotter>1) 627 628 !################################################################# 629 !######### ENTROPY AND GRAN POTENTIAL COMPUTATION ################## 630 entropy = zero 631 gran_pot = zero 632 noentropie : if(get_K_S_G==1)then 633 entropy1 = zero; entropy2 = zero ;entropy3 = zero; entropy4 = zero 634 gran_pot1 = zero ; gran_pot2 = zero; gran_pot3 = zero; gran_pot4 = zero 635 636 ! --Seek for the min of the path integral 637 potmin = zero; nlpotmin = zero 638 if(dtset%rectesteg/=1) potmin = minval(vtrial(:,1)) 639 if(rset%nl%nlpsp) nlpotmin = minval(rset%nl%eival(:,:,:)) 640 xmax = exp(-ratio2*(potmin+nlpotmin-rset%efermi)) 641 642 dim_entro = 0; if(rset%debug) dim_entro = 4; 643 644 if(dtset%recgratio>1) then 645 ! --Recgratio>1 646 ABI_MALLOC(rhogf,(2,rset%pawfgr%nfft)) 647 ABI_MALLOC(rhogc,(2,rset%pawfgr%nfftc)) 648 ABI_MALLOC(entropy_v_f,(rset%pawfgr%nfft,0:4)) 649 ABI_MALLOC(entropy_v_c,(rset%pawfgr%nfftc,0:4)) 650 ABI_MALLOC(entropy_v_2,(1:rset%par%npt,0:4)) 651 ABI_MALLOC(gran_pot_v_f,(rset%pawfgr%nfft,0:4)) 652 ABI_MALLOC(gran_pot_v_c,(rset%pawfgr%nfftc,0:4)) 653 ABI_MALLOC(gran_pot_v_2,(1:rset%par%npt,0:4)) 654 655 entropy_v_c = zero; entropy_v_f = zero; entropy_v_2 = zero 656 gran_pot_v_c = zero; gran_pot_v_f = zero; gran_pot_v_2 = zero 657 658 659 do ipoint = 1,rset%par%npt 660 call entropyrec(exp(rset%efermi*ratio2)*aloc_copy(:,ipoint), & 661 & exp(rset%efermi*ratio1)*b2loc_copy(:,ipoint), & 662 & nrec,trotter,entropy_v_2(ipoint,0),two,& 663 & rset%debug,n_pt_integ_entropy,perc_vmin*xmax,& 664 & entropy_v_2(ipoint,1),& 665 & entropy_v_2(ipoint,2),& 666 & entropy_v_2(ipoint,3),& 667 & entropy_v_2(ipoint,4)) 668 669 call gran_potrec(exp(rset%efermi*ratio2)*aloc_copy(:,ipoint), & 670 & exp(rset%efermi*ratio1)*b2loc_copy(:,ipoint), & 671 & nrec,trotter,gran_pot_v_2(ipoint,0),two,& 672 & rset%debug,n_pt_integ_entropy,perc_vmin*xmax,& 673 & gran_pot_v_2(ipoint,1),& 674 & gran_pot_v_2(ipoint,2),& 675 & gran_pot_v_2(ipoint,3),& 676 & gran_pot_v_2(ipoint,4)) 677 end do 678 ABI_FREE(aloc_copy) 679 ABI_FREE(b2loc_copy) 680 681 call timab(613+swt_tm,1,tsec2) !!--start time-counter: sync gpu-cpu 682 call xmpi_barrier(rset%mpi%comm_bandfft) 683 call timab(613+swt_tm,2,tsec2) !!--stop time-counter: sync gpu-cpu 684 685 call timab(604,1,tsec2) !--start time-counter: transgrid 686 if(rset%mpi%nproc==1) then 687 entropy_v_c = entropy_v_2 688 gran_pot_v_c = gran_pot_v_2 689 end if 690 do ii1 = 0,dim_entro 691 call xmpi_allgatherv(entropy_v_2(:,ii1),bufsize(rset%mpi%me),& 692 & entropy_v_c(:,ii1),bufsize,bufdispl,& 693 & rset%mpi%comm_bandfft,ierr) 694 call xmpi_allgatherv(gran_pot_v_2(:,ii1),bufsize(rset%mpi%me),& 695 & gran_pot_v_c(:,ii1),bufsize,bufdispl,& 696 & rset%mpi%comm_bandfft,ierr) 697 698 if(maxval(abs(entropy_v_c(:,ii1))) > tol10) then 699 rhogf = zero; rhogc = zero 700 call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,& 701 & rset%pawfgr,rhogc,rhogf,entropy_v_c(:,ii1),entropy_v_f(:,ii1)) 702 end if 703 if(maxval(abs(gran_pot_v_c(:,ii1))) >tol10) then 704 rhogf = zero; rhogc = zero 705 call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,& 706 & rset%pawfgr,rhogc,rhogf,gran_pot_v_c(:,ii1),gran_pot_v_f(:,ii1)) 707 end if 708 end do 709 call timab(604,2,tsec2) !--stop time-counter: transgrid 710 711 entropy = sum(entropy_v_f(:,0)) 712 gran_pot = sum(gran_pot_v_f(:,0)) 713 714 if(rset%debug)then 715 entropy1 = sum(entropy_v_f(:,1)) 716 entropy2 = sum(entropy_v_f(:,2)) 717 entropy3 = sum(entropy_v_f(:,3)) 718 entropy4 = sum(entropy_v_f(:,4)) 719 gran_pot1 = sum(gran_pot_v_f(:,1)) 720 gran_pot2 = sum(gran_pot_v_f(:,2)) 721 gran_pot3 = sum(gran_pot_v_f(:,3)) 722 gran_pot4 = sum(gran_pot_v_f(:,4)) 723 end if 724 725 ABI_FREE(entropy_v_f) 726 ABI_FREE(entropy_v_c) 727 ABI_FREE(entropy_v_2) 728 ABI_FREE(rhogf) 729 ABI_FREE(rhogc) 730 ABI_FREE(gran_pot_v_f) 731 ABI_FREE(gran_pot_v_c) 732 ABI_FREE(gran_pot_v_2) 733 734 735 else 736 ! --Recgratio=1 737 do ipoint = 1,rset%par%ntranche 738 call entropyrec(exp(rset%efermi*ratio2)*alocal(:,ipoint), & 739 & exp(rset%efermi*ratio1)*b2local(:,ipoint), & 740 & nrec,trotter,entropylocal,two,& 741 & rset%debug,n_pt_integ_entropy,perc_vmin*xmax,& 742 & entropylocal1,entropylocal2,& 743 & entropylocal3,entropylocal4) 744 call gran_potrec(exp(rset%efermi*ratio2)*alocal(:,ipoint), & 745 & exp(rset%efermi*ratio1)*b2local(:,ipoint), & 746 & nrec,trotter,gran_pot_local,two,& !/ucvol,& 747 & rset%debug,n_pt_integ_entropy,perc_vmin*xmax,& 748 & gran_pot_local1,gran_pot_local2,& 749 & gran_pot_local3,gran_pot_local4) 750 751 entropy = entropy + entropylocal 752 gran_pot = gran_pot + gran_pot_local 753 if(rset%debug)then 754 entropy1 = entropy1 + entropylocal1 755 entropy2 = entropy2 + entropylocal2 756 entropy3 = entropy3 + entropylocal3 757 entropy4 = entropy4 + entropylocal4 758 gran_pot1 = gran_pot1 + gran_pot_local1 759 gran_pot2 = gran_pot2 + gran_pot_local2 760 gran_pot3 = gran_pot3 + gran_pot_local3 761 gran_pot4 = gran_pot4 + gran_pot_local4 762 end if 763 764 end do 765 766 call xmpi_sum(entropy,rset%mpi%comm_bandfft ,ierr) 767 call xmpi_sum(gran_pot,rset%mpi%comm_bandfft ,ierr) 768 if(rset%debug)then 769 call xmpi_sum(entropy1,rset%mpi%comm_bandfft ,ierr) 770 call xmpi_sum(entropy2,rset%mpi%comm_bandfft ,ierr) 771 call xmpi_sum(entropy3,rset%mpi%comm_bandfft ,ierr) 772 call xmpi_sum(entropy4,rset%mpi%comm_bandfft ,ierr) 773 call xmpi_sum(gran_pot1,rset%mpi%comm_bandfft ,ierr) 774 call xmpi_sum(gran_pot2,rset%mpi%comm_bandfft ,ierr) 775 call xmpi_sum(gran_pot3,rset%mpi%comm_bandfft ,ierr) 776 call xmpi_sum(gran_pot4,rset%mpi%comm_bandfft ,ierr) 777 end if 778 end if 779 780 if(rset%debug)then 781 write(msg,'(2(2a,4(2a,es11.4,a)))')& 782 & ' --------------------------' ,ch10, & 783 & ' entropy, horiz path=',' ',entropy1,ch10, & 784 & ' entropy, xmax path=',' ',entropy2,ch10, & 785 & ' entropy, xmin path=',' ',entropy3,ch10, & 786 & ' entropy, zero path=',' ',entropy4,ch10, & 787 & ' --------------------------' ,ch10, & 788 & ' -omega/T, horiz path=',' ',gran_pot1,ch10, & 789 & ' -omega/T, xmax path=',' ',gran_pot2,ch10, & 790 & ' -omega/T, xmin path=',' ',gran_pot3,ch10, & 791 & ' -omega/T, zero path=',' ',gran_pot4,ch10 792 call wrtout(std_out,msg,'COLL') 793 end if 794 795 e_eigenvalues=tsmear*(entropy-gran_pot) + rset%efermi*nelect 796 ! --In reality gran_pot is not the gran potential but the 797 ! potential omega=-PV (Landau-potential or grand-potential) 798 ! divided by -T so the internal energy 799 ! U:=e_eigenvalues= TS+omega+muN = ST-T*sum(ln(1-n))+muN = 800 ! T(S-gran_pot)+muN 801 802 803 if(rset%nl%nlpsp) then 804 call nlenergyrec(rset,enlx,exppot,dtset%ngfft,dtset%natom,& 805 & dtset%typat,tsmear,trotter,tolrec) 806 end if 807 end if noentropie 808 !##### END ENTROPY AND GRAN POTENTIAL COMPUTATION ################## 809 !################################################################# 810 811 !if(associated(projec)) 812 if(rset%nl%nlpsp) then 813 ABI_FREE(projec) 814 end if 815 if(associated(gcart_loc)) then 816 ABI_FREE(gcart_loc) 817 end if 818 if((dtset%recgratio/=1 .or. rset%load==1)) then 819 ABI_FREE(bufdispl) 820 ABI_FREE(bufsize) 821 end if 822 !------------------------------------------------------------------ 823 !--Check if the convergence is reached for rho 824 drho = maxval(abs(rhor(min_pt:max_pt,1)-rholocal(:))) 825 drhomax = drho 826 call xmpi_max(drho,drhomax,rset%mpi%comm_bandfft,ierr) 827 828 !write(std_out,*)'drhomax,toldrho',drhomax,toldrho 829 if(drhomax<toldrho)then 830 rset%quitrec = rset%quitrec+1 831 else 832 rset%quitrec = 0 833 end if 834 835 !------------------------------------------------------------------- 836 !--Density on all procs 837 rhor(min_pt:max_pt,1) = rholocal(:) 838 if(rset%mpi%nproc /= 1)then 839 call xmpi_allgatherv(rholocal,rset%par%ntranche,rhor(:,1),& 840 & rset%par%vcount,rset%par%displs,& 841 & rset%mpi%comm_band,ierr) 842 end if 843 844 !-------------------------------------------------------------------- 845 !--2nd EKIN CALCULATION: this method is used 846 noekin2 : if(get_K_S_G==1)then 847 intrhov = (inf_ucvol)*sum(rholocal*vtrial(min_pt:max_pt,1)) 848 call xmpi_sum(intrhov,rset%mpi%comm_bandfft ,ierr) 849 850 ek = e_eigenvalues-intrhov-enlx 851 852 853 if(rset%debug) then 854 write (msg,'(2a,3f15.10,2a,3f15.10,2a,f15.10,a)') ch10,& 855 & ' ek,int(rho*V),ek+int(rho*V) ', ek, intrhov, ek+ intrhov,ch10, & 856 & ' kT*S, kT*sum(ln(...)), diff ', tsmear*entropy, tsmear*gran_pot, tsmear*(entropy-gran_pot),ch10, & 857 & ' kT(S-sum(ln(...)))+mu*nelect', tsmear*(entropy-gran_pot)+rset%efermi*nelect,ch10 858 call wrtout(std_out,msg,'COLL') 859 end if 860 861 862 end if noekin2 863 !-------------------------------------------------------------------- 864 fermie = rset%efermi 865 866 !-------------------------------------------------------- 867 !!--At the first step to find the max number of recursion 868 !! needed to convergence, then redefine nrec. 869 if(initialized==0 .and. dtset%ntime>0) then 870 call Calcnrec(rset,b2local) 871 end if 872 873 !-------------------------------------------------------- 874 call destroy_distribfft(rset%mpi%distribfft) 875 call init_distribfft(rset%mpi%distribfft,'c',rset%mpi%nproc_fft,rset%ngfftrec(2),rset%ngfftrec(3)) 876 call init_distribfft(rset%mpi%distribfft,'f',rset%mpi%nproc_fft,dtset%ngfft(2),dtset%ngfft(3)) 877 878 !--Printing results 879 write(msg,'(3a,f15.10)')& 880 & ' -- Results: --------------------------------------',ch10,& 881 & ' mu =',rset%efermi 882 call wrtout(std_out,msg,'COLL') 883 if(get_K_S_G==1)then 884 write(msg,'(a,f15.10,6(2a,f15.10))')& 885 & ' potmin =',potmin,ch10,& 886 & ' <V_eff> =',intrhov,ch10,& 887 & ' entropy =',entropy,ch10,& 888 & ' -omega/T =',gran_pot,ch10,& 889 & ' eigenvalues =',e_eigenvalues,ch10,& 890 & ' kinetic =',ek,ch10,& 891 & ' non-loc ene =',enlx 892 call wrtout(std_out,msg,'COLL') 893 end if 894 write(msg,'(a,50a)')' ',('-',ii=1,50) 895 call wrtout(std_out,msg,'COLL') 896 !write(std_out,*)'is the pressure ',gran_pot*tsmear/(rset%inf%ucvol*real(nfftrec,dp)) 897 898 !--Structured debugging : if rset%debug=T, stop here. 899 if(.false.)then !(rset%debug) 900 call wrtout(std_out,' rhor ','PERS') 901 write(std_out,*)rhor(:,1) 902 call wrtout(std_out,' ','COLL') 903 write(msg,'(a,2d10.3)')' temps recursion ',tsec 904 call wrtout(std_out,msg,'COLL') 905 write(msg,'(a,l1,a)') ' vtorhorec : rset%debug=-',rset%debug,', debugging mode => stop ' 906 ABI_ERROR(msg) 907 end if 908 909 call timab(600,2,tsec2) 910 call timab(21,2,tsec) 911 912 call symrhg(1,gprimd,irrzon,rset%mpi,nfftf,& 913 & dtset%ngfft(1)*dtset%ngfft(2)*dtset%ngfft(3),dtset%ngfft,dtset%nspden,& 914 & dtset%nsppol,dtset%nsym,phnons,rhog,rhor,rprimd,dtset%symafm,dtset%symrel,dtset%tnons) 915 916 end subroutine vtorhorec