TABLE OF CONTENTS


ABINIT/density_rec [ Functions ]

[ Top ] [ Functions ]

NAME

 density_rec

FUNCTION

 This routine computes the density using  the coefficients corresponding to
 continued fraction at a point from a fixed potential.

INPUTS

  coordx, coordy, coordz=coordonnees of the computed point
  an, bn2 : coefficient given by density_rec. Input if get_rec_coef=0, output else
  nrec=order of density_rec
  fermie=fermi energy (Hartree)
  tsmear=temperature (Hartree)
  rtrotter=real trotter parameter
  tol=tolerance criteria for stopping density_rec
  inf_ucvol=infinitesimal unit cell volume
  dim_trott = max(0,2*trotter-1)

OUTPUT

  rho_out=result of the continued fraction multiplied by a multiplicator

SIDE EFFECTS

PARENTS

      fermisolverec

CHILDREN

      timab,trottersum

NOTES

  at this time :
       - exppot should be replaced by ?
       - coord should be replaced by ?
       - need a rectangular box (rmet diagonal matrix)

SOURCE

1611 subroutine density_rec(an,bn2,rho_out,nrec, &
1612 &                     fermie,tsmear,rtrotter, &
1613 &                     dim_trott,tol,inf_ucvol)
1614 
1615 
1616 !This section has been created automatically by the script Abilint (TD).
1617 !Do not modify the following lines by hand.
1618 #undef ABI_FUNC
1619 #define ABI_FUNC 'density_rec'
1620 !End of the abilint section
1621 
1622  implicit none
1623 
1624 !Arguments -------------------------------
1625 !scalars
1626  integer,intent(in) :: nrec
1627  integer,intent(in) :: dim_trott
1628  real(dp),intent(in) :: fermie,tol,tsmear,inf_ucvol,rtrotter
1629  real(dp), intent(out) :: rho_out
1630 !arrays
1631  real(dp),intent(in) :: an(0:nrec),bn2(0:nrec)
1632 !Local variables-------------------------------
1633 !not used, debugging purpose only
1634 !for debugging purpose, detailled printing only once for density and ekin
1635 !scalars
1636  integer, parameter :: minrec = 3
1637  integer  :: irec
1638  real(dp) :: beta,mult,prod_b2,error,errold
1639  real(dp) :: pi_on_rtrotter,twortrotter,exp1,exp2
1640  complex(dpc) :: cinv2rtrotter,coeef_mu,facrec0
1641 ! character(len=500) :: msg
1642 !arrays
1643  real(dp) :: tsec(2)
1644  complex(dpc) :: acc_rho(0:nrec)
1645  complex(dpc) :: D(0:dim_trott),Dold(0:dim_trott)
1646  complex(dpc) :: N(0:dim_trott),Nold(0:dim_trott)
1647 !**************************************************************************
1648 
1649  call timab(605,1,tsec)
1650 
1651 !##############################################################
1652 !--Initialisation of metrics
1653  mult = two/inf_ucvol   !non-spined system
1654  beta = one/tsmear
1655 
1656 !--Variables for optimisation
1657  pi_on_rtrotter = pi/rtrotter
1658  twortrotter = two*rtrotter
1659  exp1 = exp((beta*fermie)/(rtrotter))
1660  exp2 = exp(beta*fermie/(twortrotter))
1661  cinv2rtrotter = cmplx(one/twortrotter,zero,dp)
1662  coeef_mu = cmplx(one/exp2,zero,dp)
1663 
1664  N = czero;  D = cone
1665  facrec0 = cone
1666  Nold = czero; Dold = czero
1667 !--Initialisation of accumulated density
1668  acc_rho = czero
1669 !--Initialisation of estimated error
1670  prod_b2 = twortrotter/exp1
1671  errold = zero
1672 
1673 
1674 !##############################################################
1675 !--Main loop
1676  maindo : do irec = 0, nrec
1677 
1678 !  ######################################################
1679 !  --Density computation
1680 !  !--using the property that: sum_i(bi*c)^2|(z-ai*c)=1/c*sum_i(bi)^2|(z/c-ai)
1681 !  !and for c =exp(-beta*fermie/(two*rtrotter)
1682 
1683    call trottersum(dim_trott,error,prod_b2,pi_on_rtrotter,&
1684 &   facrec0,coeef_mu,exp1,&
1685 &   an(irec),bn2(irec),&
1686 &   N,D,Nold,Dold)
1687 
1688    if(irec/=nrec .and. irec>=minrec)then
1689      if((bn2(irec+1)<tol14).or.(mult*error<tol.and.errold<tol)) exit maindo
1690    end if
1691    errold = mult*error
1692  end do maindo
1693 !--Accumulated density
1694  rho_out = mult*real(cone-sum(N/D)*cinv2rtrotter,dp)
1695 
1696  call timab(605,2,tsec)
1697 
1698  end subroutine density_rec

ABINIT/entropyrec [ Functions ]

[ Top ] [ Functions ]

NAME

 entropyrec

FUNCTION

 This routine computes the local part of the entropy at a point using a path integral,
 in the recursion method.

  an, bn2 : coefficient given by the recursion.
  nrec=order of recursion
  trotter=trotter parameter
  multce=a multiplicator for computing entropy ; 2 for non-spin-polarized system
  debug_rec=debug variable
  n_pt_integ=number of points of integration for the path integral
  xmax =max point of integration on the real axis

OUTPUT

  ent_out=entropy at the point
  ent_out1,ent_out2,ent_out3,ent_out4=debug entropy at the point

PARENTS

      vtorhorec

CHILDREN

      timab,wrtout

NOTES

  at this time :
       - multce should be not used
       - the routine should be integraly rewrited and use the routine recursion.
       - only modified for p /= 0

SOURCE

 975 subroutine entropyrec(an,bn2,nrec,trotter,ent_out,multce,debug_rec, &
 976 &                     n_pt_integ,xmax,&
 977 &                     ent_out1,ent_out2,ent_out3,ent_out4)
 978 
 979 
 980 !This section has been created automatically by the script Abilint (TD).
 981 !Do not modify the following lines by hand.
 982 #undef ABI_FUNC
 983 #define ABI_FUNC 'entropyrec'
 984 !End of the abilint section
 985 
 986  implicit none
 987 
 988 !Arguments -------------------------------
 989 !scalars
 990  integer,intent(in) :: n_pt_integ,nrec,trotter
 991  logical,intent(in) :: debug_rec
 992  real(dp), intent(in) :: multce,xmax
 993  real(dp),intent(out) :: ent_out,ent_out1,ent_out2,ent_out3,ent_out4
 994 !arrays
 995  real(dp),intent(in) :: an(0:nrec),bn2(0:nrec)
 996 
 997 !Local variables-------------------------------
 998 !scalars
 999  integer, parameter :: level = 7
1000  integer, save :: first_en = 1
1001  integer :: ii,kk,n_pt_integ_path2,n_pt_integ_path3
1002  real(dp) :: arg,epsilo,step,twotrotter,xmin,dr_step
1003  complex(dpc) :: D,Dnew,Dold,N,Nnew,Nold,dz_path,ent_acc,ent_acc1,ent_acc2
1004  complex(dpc) :: ent_acc3,ent_acc4
1005  complex(dpc) :: funczero,z_path,zj
1006  complex(dpc) ::delta_calc
1007  character(len=500) :: msg
1008 !arrays
1009  real(dp) :: tsec(2)
1010  real(dp) :: iif,factor
1011 
1012 
1013 ! *************************************************************************
1014 
1015 
1016  call timab(610,1,tsec)
1017 
1018 !structured debugging if debug_rec=T : print detailled result the first time we enter entropyrec
1019 
1020  if(debug_rec .and. first_en==1)then
1021    write(msg,'(a)')' '
1022    call wrtout(std_out,msg,'PERS')
1023    write(msg,'(a)')' entropyrec : enter '
1024    call wrtout(std_out,msg,'PERS')
1025    write(msg,'(a,i6)')'n_pt_integ ' , n_pt_integ
1026    call wrtout(std_out,msg,'COLL')
1027  end if
1028 
1029  ent_out = zero
1030  ent_out1 = zero
1031  ent_out2 = zero
1032  ent_out3 = zero
1033  ent_out4 = zero
1034  ent_acc = czero
1035  ent_acc1 = czero
1036  ent_acc2 = czero
1037  ent_acc3 = czero
1038  ent_acc4 = czero
1039 
1040 !path parameters
1041  twotrotter = max(two*real(trotter,dp),one)
1042  if(trotter==0)then
1043    factor = tol5
1044    arg =pi*three_quarters
1045    zj = cmplx(-one,one-sin(arg),dp)
1046  else
1047    factor = xmax/ten
1048    arg = pi/twotrotter
1049    zj = cmplx( cos(arg) , sin(arg),dp )
1050  end if
1051 
1052  epsilo = factor*sin( arg )
1053  xmin = factor*cos( arg )
1054  step = (xmax-xmin)/real(n_pt_integ,dp)
1055 
1056 !####################################################################
1057 ![xmax + i*epsilo,xmin + i*epsilo]
1058  dr_step = one/real(n_pt_integ,dp)
1059  path1:  do ii = 0,n_pt_integ
1060    z_path = cmplx(xmin+real(ii,dp)*(xmax-xmin)*dr_step,epsilo,dp)
1061    dz_path = -cmplx((xmax-xmin)*dr_step,zero,dp)
1062 
1063    Nold = czero
1064    Dold = cone
1065    N = cone
1066    D = z_path - cmplx(an(0),zero,dp)
1067 
1068    do kk=1,nrec
1069      Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1070      Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1071 
1072      Nold = N
1073      Dold = D
1074      N = Nnew
1075      D = Dnew
1076 
1077      if(kk/=nrec)then
1078        if((bn2(kk+1)<tol14))exit
1079      end if
1080    end do
1081 
1082 !  <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1083    delta_calc = func1_rec(z_path**twotrotter)*(N/D)*dz_path
1084    if(ii==0.or.ii==n_pt_integ)then
1085      ent_acc  = ent_acc  + half*delta_calc
1086      ent_acc1 = ent_acc1 + half*delta_calc
1087    else
1088      ent_acc  = ent_acc  + delta_calc
1089      ent_acc1 = ent_acc1 + delta_calc
1090    end if
1091  end do path1
1092 
1093 
1094 !####################################################################
1095 ![1/2zj,0]
1096  if(epsilo/step>100.d0)then
1097    n_pt_integ_path2 = int((factor*abs(zj))/step)+1
1098  else
1099    n_pt_integ_path2 = 100
1100  end if
1101 
1102  if(trotter/=0)then
1103    n_pt_integ_path3 = 0
1104    dr_step = one/real(n_pt_integ_path2,dp)
1105    dz_path = -cmplx(xmin,epsilo,dp)*dr_step
1106    path5:  do ii = 0,n_pt_integ_path2
1107      z_path = cmplx(real(ii,dp)*xmin,real(ii,dp)*epsilo,dp)*dr_step
1108      if(abs(z_path)>tol14)then
1109        Nold = czero
1110        Dold = cone
1111        N = cone
1112        D = z_path - cmplx(an(0),zero,dp)
1113        do kk=1,nrec
1114          Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1115          Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1116          Nold = N
1117          Dold = D
1118          N = Nnew
1119          D = Dnew
1120          if(kk/=nrec)then
1121            if((bn2(kk+1)<tol14))exit
1122          end if
1123        end do
1124 
1125 !      <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1126        if(abs(z_path)**twotrotter>tiny(one)) then
1127          funczero = func1_rec(z_path**twotrotter)
1128        else
1129          funczero = czero
1130        end if
1131        delta_calc = funczero*N/D*dz_path
1132        if(ii==0.or.ii==n_pt_integ_path2)then
1133          ent_acc  = ent_acc  + half*delta_calc
1134          if(debug_rec) ent_acc3 = ent_acc3 + half*delta_calc
1135        else
1136          ent_acc  = ent_acc  + funczero*delta_calc
1137          if(debug_rec) ent_acc3 = ent_acc3 + funczero*delta_calc
1138        end if
1139      end if
1140    end do path5
1141 
1142  else  ! trotter==0
1143 
1144    n_pt_integ_path3 = max(100,int((epsilo*half*pi)/real(step,dp))+1)
1145    dr_step = one/real(n_pt_integ_path3,dp)
1146    path6:  do ii = 0,n_pt_integ_path3
1147      iif=half*pi*real(ii,dp)*dr_step
1148      z_path = epsilo*cmplx(-cos(iif),1-sin(iif),dp)
1149      dz_path = epsilo*cmplx(sin(iif),-cos(iif),dp)*half*pi*dr_step
1150      if(abs(z_path)**twotrotter>tol14)then
1151        Nold = czero
1152        Dold = cone
1153        N = cone
1154        D = z_path - cmplx(an(0),zero,dp)
1155        do kk=1,nrec
1156          Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1157          Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1158          Nold = N
1159          Dold = D
1160          N = Nnew
1161          D = Dnew
1162          if(kk/=nrec .and. bn2(kk+1)<tol14) exit !-EXIT
1163        end do
1164 
1165 !      <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1166        delta_calc = func1_rec(z_path**twotrotter) * N/D * dz_path
1167        if(ii==0.or.ii==n_pt_integ_path3)then
1168          ent_acc  = ent_acc + half*delta_calc
1169          if(debug_rec) ent_acc3 = ent_acc3 + half*delta_calc
1170        else
1171          ent_acc  = ent_acc + delta_calc    !<r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1172          if(debug_rec) ent_acc3 = ent_acc3 + delta_calc  !<r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1173        end if
1174      end if
1175    end do path6
1176 
1177  end if
1178 
1179  if(first_en==1 .and. debug_rec) then
1180    write(msg,'(a,i5,2a,i5,2a,i5,2a,es11.4,2a,es11.4,2a,es11.4)')&
1181 &   'n_pt_path  =',n_pt_integ,ch10,&
1182 &   'n_pt_path2 =',n_pt_integ_path2,ch10,&
1183 &   'n_pt_path3 =',n_pt_integ_path3,ch10,&
1184 &   'xmin       =',xmin,ch10,&
1185 &   'xmax       =',xmax,ch10,&
1186 &   'epsilon    =',epsilo
1187    call wrtout(std_out,msg,'COLL')
1188    first_en = 0
1189  end if
1190 
1191 !####################################################################
1192 ![xmax,xmax+i*epsilo]
1193  dr_step = one/real(n_pt_integ_path2,dp)
1194  dz_path = cmplx(zero,epsilo*dr_step,dp)
1195  path4:  do ii = 0,n_pt_integ_path2
1196    z_path = cmplx(xmax,real(ii,dp)*epsilo*dr_step,dp)
1197 
1198    Nold = czero
1199    Dold = cone
1200    N = cone
1201    D = z_path - cmplx(an(0),zero,dp)
1202 
1203    do kk=1,nrec
1204      Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1205      Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1206 
1207      Nold = N
1208      Dold = D
1209      N = Nnew
1210      D = Dnew
1211 
1212      if(kk/=nrec)then
1213        if((bn2(kk+1)<tol14))exit
1214      end if
1215    end do
1216 
1217 !  <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1218    delta_calc = func1_rec(z_path**twotrotter)*N/D*dz_path
1219    if(ii==0.or.ii==n_pt_integ_path2)then
1220 
1221      ent_acc =  ent_acc  + half*delta_calc
1222      if(debug_rec) ent_acc2 = ent_acc2 + half*delta_calc
1223    else
1224      ent_acc  = ent_acc  + delta_calc
1225      if(debug_rec) ent_acc2 = ent_acc2 + delta_calc
1226    end if
1227  end do path4
1228 
1229 
1230  ent_out  = multce*real(ent_acc*cmplx(zero,-piinv,dp),dp)
1231  if(debug_rec) then
1232    ent_out1 = multce*real(ent_acc1*cmplx(zero,-piinv,dp),dp)
1233    ent_out2 = multce*real(ent_acc2*cmplx(zero,-piinv,dp),dp)
1234    ent_out3 = multce*real(ent_acc3*cmplx(zero,-piinv,dp),dp)
1235    ent_out4 = multce*real(ent_acc4*cmplx(zero,-piinv,dp),dp)
1236  end if
1237 
1238  call timab(610,2,tsec)
1239 
1240  contains
1241 
1242 !function to integrate over the path
1243 !func1_rec(z_path,twotrotter) =  ( z_path**twotrotter/(1+z_path**twotrotter)*log(1+1/z_path**twotrotter)+&    !- f*ln(f)
1244 !&1/(1+z_path**twotrotter)*log(1+z_path**twotrotter))       !- (1-f)*ln(1-f)
1245 
1246 !func1_rec(z_path_pow) =   z_path_pow/(cone+z_path_pow)*log(cone+cone/z_path_pow)+&    !- f*ln(f)
1247 !&cone/(cone+z_path_pow)*log(cone+z_path_pow)       !- (1-f)*ln(1-f)
1248 
1249 !other expression of func for a path like ro(t)*exp(2*i*pi/(2*p)*(j+1/2))
1250 
1251    function func1_rec(z)
1252 
1253 
1254 !This section has been created automatically by the script Abilint (TD).
1255 !Do not modify the following lines by hand.
1256 #undef ABI_FUNC
1257 #define ABI_FUNC 'func1_rec'
1258 !End of the abilint section
1259 
1260    implicit none
1261 
1262    complex(dpc) :: func1_rec
1263    complex(dpc),intent(in) :: z
1264 
1265    func1_rec =   z/(cone+z)*log(cone+cone/z)+ cone/(cone+z)*log(cone+z)
1266 
1267  end function func1_rec
1268 
1269 end subroutine entropyrec

ABINIT/fermisolverec [ Functions ]

[ Top ] [ Functions ]

NAME

 fermisolverec

FUNCTION

 This routine computes the fermi energy in order to have a given number of
 valence electrons in the recursion method, using a Ridder s Method

INPUTS

  debug_rec=debugging variable
  nb_rec=order of recursion
  nb_point=number of discretization point in one dimension (=n1=n2=n3)
  temperature=temperature (Hartree)
  trotter=trotter parameter
  nelect=number of valence electrons (dtset%nelect)
  acc=accuracy for the fermi energy
  max_it=maximum number of iteration for the Ridder's Method
  long_tranche=number of point computed by thi proc
  mpi_enreg=information about MPI parallelization
  inf_ucvol=infinitesimal unit cell volume
  gputopo=true if topology gpu-cpu= 2 or 3

OUTPUT

SIDE EFFECTS

  fermie=fermi energy
  rho=density, recomputed for the new fermi energy
  a, b2 : coefficient given by recursion recomputed for the new fermi energy

PARENTS

      vtorhorec

CHILDREN

      alloc_dens_cuda,dealloc_dens_cuda,density_cuda,density_rec,timab,wrtout
      xmpi_barrier,xmpi_sum

NOTES

  at this time :

SOURCE

1313 subroutine fermisolverec(fermie,rho,a,b2,debug_rec,nb_rec, &
1314   &                      temperature,trotter,nelect, &
1315   &                      acc, max_it, &
1316   &                      long_tranche,mpi_enreg,&
1317   &                      inf_ucvol,gputopo)
1318 
1319 
1320 !This section has been created automatically by the script Abilint (TD).
1321 !Do not modify the following lines by hand.
1322 #undef ABI_FUNC
1323 #define ABI_FUNC 'fermisolverec'
1324 !End of the abilint section
1325 
1326  implicit none
1327 
1328 !Arguments -------------------------------
1329  !scalars
1330  integer,intent(in) :: long_tranche,max_it,nb_rec,trotter
1331  logical,intent(in) :: debug_rec,gputopo
1332  real(dp),intent(in) :: acc,inf_ucvol,nelect,temperature
1333  real(dp), intent(inout) :: fermie
1334  type(MPI_type),intent(in) :: mpi_enreg
1335  !arrays
1336  real(dp), intent(inout) :: a(0:nb_rec,long_tranche), b2(0:nb_rec,long_tranche)
1337  real(dp), intent(inout) :: rho(long_tranche)
1338 
1339 !Local variables-------------------------------
1340  !scalars
1341  integer  ::  ierr,ii,ipointlocal,nn,dim_trott
1342  real(dp) :: beta,fermieh,fermiel,fermiem,fermienew,nelecth,nelectl,nelectm
1343  real(dp) :: nelectnew,res_nelecth,res_nelectl,res_nelectm,res_nelectnew
1344  real(dp) :: rtrotter,ss,fermitol
1345  character(len=500) :: msg
1346  !arrays
1347  real(dp) :: tsec(2)
1348  real(dp) :: rhotry(long_tranche)
1349  !no_abirules
1350 #ifdef HAVE_GPU_CUDA
1351  integer :: swt_tm,npitch
1352  real(cudap) :: rhocu(long_tranche)
1353  real(dp) :: tsec2(2)
1354 #endif
1355 
1356 ! *************************************************************************
1357 
1358 #ifdef HAVE_GPU_CUDA
1359  swt_tm = 0
1360 #endif
1361 
1362  call timab(609,1,tsec)
1363 
1364  beta = one/temperature
1365  rtrotter  = max(half,real(trotter,dp))
1366  dim_trott = max(0,2*trotter-1)
1367 
1368  write(msg,'(a)')' -- fermisolverec ---------------------------------'
1369  call wrtout(std_out,msg,'COLL')
1370  if(debug_rec) then
1371    write (msg,'(a,d10.3)')' nelect= ',nelect
1372    call wrtout(std_out,msg,'COLL')
1373  end if
1374 !initialisation of fermiel
1375  fermiel = fermie
1376  call timab(609,2,tsec)
1377 
1378 !initialisation fermitol
1379  fermitol = acc
1380 #ifdef HAVE_GPU_CUDA_SP
1381  if(gputopo)  fermitol = 1.d-3
1382 #endif
1383 
1384  if(gputopo) then
1385 #ifdef HAVE_GPU_CUDA
1386    swt_tm = 1
1387 !  allocate array an and bn2 on gpu for computation of trotter formula
1388    call alloc_dens_cuda(long_tranche,nb_rec,dim_trott,npitch,&
1389 &   real(a,cudap),real(b2,cudap))
1390 
1391    call timab(617,1,tsec)
1392    call density_cuda(npitch,long_tranche,nb_rec,dim_trott,&
1393 &   real(fermiel,cudap),real(temperature,cudap),&
1394 &   real(rtrotter,cudap),real(inf_ucvol,cudap),&
1395 &   real(tol14,cudap),&
1396 &   rhocu)
1397    rhotry = real(rhocu,dp)
1398    call timab(617,2,tsec)
1399 #endif
1400  else
1401    do ipointlocal = 1,long_tranche
1402      call density_rec(a(:,ipointlocal),&
1403 &     b2(:,ipointlocal),&
1404 &     rhotry(ipointlocal),&
1405 &     nb_rec,fermiel,temperature,rtrotter,dim_trott, &
1406 &     tol14,inf_ucvol)
1407    end do
1408  end if
1409 
1410  call timab(609,1,tsec)
1411  nelectl = sum(rhotry)
1412  call xmpi_sum( nelectl,mpi_enreg%comm_bandfft,ierr)
1413  res_nelectl = inf_ucvol*nelectl - nelect
1414 
1415  if (res_nelectl /= zero) then
1416 !  initialisation of fermih
1417 !  excess of electrons -> smaller fermi
1418    res_nelecth = zero
1419    ii = 1
1420    fermieh = fermie - ten*sign(one,res_nelectl)*temperature
1421    do while(ii<6 .and. res_nelecth*res_nelectl>=0)
1422      fermieh = fermieh - ten*sign(one,res_nelectl)*temperature
1423      call timab(609,2,tsec)
1424 
1425      if(gputopo) then
1426 #ifdef HAVE_GPU_CUDA
1427        call timab(617,1,tsec)
1428        call density_cuda(npitch,long_tranche,nb_rec,dim_trott,&
1429 &       real(fermieh,cudap),real(temperature,cudap),&
1430 &       real(rtrotter,cudap),real(inf_ucvol,cudap),&
1431 &       real(tol14,cudap),&
1432 &       rhocu)
1433        rhotry = real(rhocu,dp)
1434        call timab(617,2,tsec)
1435 #endif
1436      else
1437        do ipointlocal = 1,long_tranche
1438          call density_rec(a(:,ipointlocal),  &
1439 &         b2(:,ipointlocal), &
1440 &         rhotry(ipointlocal), &
1441 &         nb_rec,fermieh,temperature,rtrotter,dim_trott, &
1442 &         tol14,inf_ucvol)
1443        end do
1444      end if
1445      call timab(609,1,tsec)
1446      nelecth = sum(rhotry)
1447      call xmpi_sum( nelecth,mpi_enreg%comm_bandfft ,ierr);
1448      res_nelecth = inf_ucvol*nelecth - nelect
1449 
1450      if(debug_rec) then
1451        write (msg,'(a,es11.4e2,a,es11.4e2)') ' Fermi energy interval',fermieh,' ',fermiel
1452        call wrtout(std_out,msg,'COLL')
1453      end if
1454      ii = ii +1
1455    end do
1456 
1457    if (res_nelecth*res_nelectl>0) then
1458      write (msg,'(4a)')' fermisolverec : ERROR- ',ch10,&
1459 &     ' initial guess for fermi energy doesnt permit to  find solutions in solver',ch10
1460      MSG_ERROR(msg)
1461    end if
1462 
1463 !  MAIN LOOP   ------------------------------------------------------
1464    main : do nn=1,max_it
1465 !    fermiem computation
1466      fermiem = 0.5d0*(fermiel+fermieh)
1467 
1468 !    nelectm = zero
1469      call timab(609,2,tsec)
1470 
1471      if(gputopo) then
1472 #ifdef HAVE_GPU_CUDA
1473        call timab(617,1,tsec)
1474        call density_cuda(npitch,long_tranche,nb_rec,dim_trott,&
1475 &       real(fermiem,cudap),real(temperature,cudap),&
1476 &       real(rtrotter,cudap),real(inf_ucvol,cudap),&
1477 &       real(tol14,cudap),&
1478 &       rhocu)
1479        rhotry = real(rhocu,dp)
1480        call timab(617,2,tsec)
1481 #endif
1482      else
1483        do ipointlocal = 1,long_tranche
1484          call density_rec(a(:,ipointlocal),  &
1485 &         b2(:,ipointlocal), &
1486 &         rhotry(ipointlocal), &
1487 &         nb_rec,fermiem,temperature,rtrotter,dim_trott, &
1488 &         tol14,inf_ucvol)
1489        end do
1490      end if
1491 
1492      call timab(609,1,tsec)
1493      nelectm = sum(rhotry)
1494      call xmpi_sum( nelectm,mpi_enreg%comm_bandfft,ierr)
1495      res_nelectm = inf_ucvol*nelectm - nelect
1496 
1497 !    new guess
1498      ss = sqrt(res_nelectm**two-res_nelectl*res_nelecth)
1499      fermienew = fermiem + (fermiem-fermiel)*sign(one, res_nelectl-res_nelecth)*res_nelectm/ss
1500 
1501      call timab(609,2,tsec)
1502      if(gputopo) then
1503 #ifdef HAVE_GPU_CUDA
1504        call timab(617,1,tsec)
1505        call density_cuda(npitch,long_tranche,nb_rec,dim_trott,&
1506 &       real(fermienew,cudap),real(temperature,cudap),&
1507 &       real(rtrotter,cudap),real(inf_ucvol,cudap),&
1508 &       real(tol14,cudap),&
1509 &       rhocu)
1510        rhotry = real(rhocu,dp)
1511        call timab(617,2,tsec)
1512 #endif
1513      else
1514        do ipointlocal = 1,long_tranche
1515          call density_rec(a(:,ipointlocal),  &
1516 &         b2(:,ipointlocal), &
1517 &         rhotry(ipointlocal), &
1518 &         nb_rec,fermienew,temperature,rtrotter,dim_trott, &
1519 &         tol14,inf_ucvol)
1520        end do
1521      end if
1522 
1523      call timab(609,1,tsec)
1524      nelectnew = sum(rhotry)
1525      call xmpi_sum( nelectnew,mpi_enreg%comm_bandfft ,ierr);
1526      res_nelectnew = inf_ucvol*nelectnew - nelect
1527 
1528 !    fermiel et fermieh for new iteration
1529      if (sign(res_nelectm,res_nelectnew) /= res_nelectm) then
1530        fermiel = fermiem
1531        res_nelectl = res_nelectm
1532        fermieh = fermienew
1533        res_nelecth = res_nelectnew
1534      else if (sign(res_nelectl,res_nelectnew) /= res_nelectl) then
1535        fermieh = fermienew
1536        res_nelecth = res_nelectnew
1537      else if (sign(res_nelecth,res_nelectnew) /= res_nelecth) then
1538        fermiel = fermienew
1539        res_nelectl = res_nelectnew
1540      end if
1541 
1542 !    are we within the tolerance ?
1543      if ((abs(res_nelectnew) < fermitol).or.(nn == max_it)) then
1544        fermie = fermienew
1545        rho = rhotry
1546        if(debug_rec) then
1547          write (msg,'(a,es11.4e2,a,i4)')' err, num_iter ', res_nelectnew, ' ',nn
1548          call wrtout(std_out,msg,'COLL')
1549          write(msg,'(a,50a)')' ',('-',ii=1,50)
1550          call wrtout(std_out,msg,'COLL')
1551        end if
1552        exit main
1553      end if
1554 
1555    end do main
1556 
1557  end if
1558 
1559 #ifdef HAVE_GPU_CUDA
1560 !deallocate array on GPU
1561  if(gputopo) then
1562    call dealloc_dens_cuda()
1563  end if
1564  call timab(613+swt_tm,1,tsec2)  !!--start time-counter: sync gpu-cpu
1565  call xmpi_barrier(mpi_enreg%comm_bandfft)
1566  call timab(613+swt_tm,2,tsec2)  !!--stop time-counter: sync gpu-cpu
1567 #endif
1568 
1569  call timab(609,2,tsec)
1570 end subroutine fermisolverec

ABINIT/first_rec [ Functions ]

[ Top ] [ Functions ]

NAME

 first_rec

FUNCTION

 When recursion method is used, in the first step this routine
 compute some quantities which are used in the rest of the calculation.

COPYRIGHT

  Copyright (C) 2009-2018 ABINIT group (MMancini)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

INPUTS

  dtset <type(dataset_type)>=all input variables for this dataset:
   | recgratio =fine/coarse grid ratio
   | recptrott =trotter parameter
   | tsmear    =temperature
   | recrcut   =tut radius in recursion (range of iteration)
   | ngfft(18) =FFT grid used as real (fine) grid in recursion
  psps <type(pseudopotential_type)>=variables related to pseudo-potentials

OUTPUT

SIDE EFFECTS

  rset <type(recursion_type)>=variables related to recursion method
   | debug<logical> = T if debugging is used
   | inf <type(metricrec_type)>=information concerning the infinitesimal metrics
   | ngfftrec(18) =truncated (or not, if not ngfftrec=ngfft)FFT grid used as real grid in recursion.
   | nfftrec =product(ngfftrec(1:3))
   | tronc<logical> = T if truncation is effectively used
   | ZT_p = fourier transform of the green_kernel calculated on the fine grid

NOTES

PARENTS

      scfcv

CHILDREN

      cpu_distribution,cudarec,get_pt0_pt1,green_kernel,init_nlpsprec
      random_number,recursion,reshape_pot,timab,timein,wrtout,xmpi_sum

SOURCE

2266 subroutine first_rec(dtset,psps,rset)
2267 
2268 
2269 !This section has been created automatically by the script Abilint (TD).
2270 !Do not modify the following lines by hand.
2271 #undef ABI_FUNC
2272 #define ABI_FUNC 'first_rec'
2273 !End of the abilint section
2274 
2275  implicit none
2276 
2277 !Arguments ------------------------------------
2278 ! scalars
2279  type(dataset_type),intent(in) :: dtset
2280  type(pseudopotential_type),intent(in) :: psps
2281  type(recursion_type),intent(inout) :: rset
2282 !Local variables-------------------------------
2283 !scalars
2284  integer  :: nfftrec,trotter,ii,dim_trott
2285  real(dp) :: tsmear,beta,rtrotter
2286  character(len=500) :: msg
2287 !arrays
2288  integer  :: ngfftrec(18)
2289  real(dp) :: tsec(2)
2290 #ifdef HAVE_GPU_CUDA
2291  integer  :: max_rec,ierr,testpts,swt_tm
2292  real(dp) :: rho,tm_ratio
2293  real(dp) :: time_cu,time_f
2294  type(recursion_type) :: rset_test
2295  type(recparall_type) :: parold
2296  integer :: trasl(3)
2297  real(dp) :: tsec2(2),tsec3(2)
2298  real(dp) :: aloc(0,1),b2loc(0,1)
2299  real(dp) :: dm_projec(0,0,0,1,1)
2300  real(dp) :: exppot(0:dtset%nfft-1)
2301  real(dp),allocatable :: exppotloc(:)
2302  real(cudap),allocatable :: aloc_cu(:),b2loc_cu(:)
2303 #endif
2304 
2305 ! *************************************************************************
2306 
2307  call timab(601,1,tsec)  !!--Start time-counter: initialisation
2308 
2309  MSG_WARNING("RECURSION")
2310  if(dtset%recgratio>1) then
2311    write(msg,'(a)')'COARSE GRID IS USED'
2312    call wrtout(std_out,msg,'COLL')
2313  end if
2314 
2315 !--Initialisation
2316  trotter = dtset%recptrott  !--Trotter parameter
2317  tsmear  = dtset%tsmear     !--Temperature
2318  beta    = one/tsmear       !--Inverse of temperature
2319 
2320 !--Rewriting the trotter parameter
2321  dim_trott = max(0,2*trotter-1)
2322  rtrotter  = max(half,real(trotter,dp))
2323 
2324  write (msg,'(2a)')ch10,'==== FIRST CYCLE RECURSION ========================='
2325  call wrtout(std_out,msg,'COLL')
2326 
2327 
2328  ngfftrec = rset%ngfftrec
2329  nfftrec = rset%nfftrec
2330 !------------------------------------------------
2331 !--TRONCATION OF THE BOX: determines new dimensions
2332 !--Now in InitRec
2333 !--------------------------------------------------------
2334 !--DEFINITION PAW VARIABLES COARSE-FINE GRID  TO USE TRANSGRID--INGRID FUNCTIONS
2335 !--Now these variables are defined into gstate by InitRec
2336 
2337 !--------------------------------------------------------
2338 !--COMPUTATION OF THE FOURIER TRANSFORM OF THE GREEN KERNEL (only once)
2339  write (msg,'(a)')' - green kernel calculation -----------------------'
2340  call wrtout(std_out,msg,'COLL')
2341  ABI_ALLOCATE(rset%ZT_p,(1:2,0: nfftrec-1))
2342  call timab(601,2,tsec)
2343  call green_kernel(rset%ZT_p,rset%inf%rmet,rset%inf%ucvol,rtrotter/beta,rset%mpi,ngfftrec,nfftrec)
2344  call timab(601,1,tsec)
2345  write(msg,'(a,50a)')' ',('-',ii=1,50)
2346  call wrtout(std_out,msg,'COLL')
2347 !!--end computation of the fourier transform of the Green kernel
2348 
2349 !!-----------------------------------
2350 !!--ROUTINE FOR THE CALCULATION OF THE NON-LOCAL PSEUDO
2351 !--Now these variables here by  Init_nlpspRec
2352  call Init_nlpspRec(four*tsmear*rtrotter,psps,rset%nl,rset%inf,rset%ngfftrec,rset%debug)
2353 
2354 !!-----------------------------------
2355 !--Load distribution on procs when GPU are present
2356 #if defined HAVE_GPU_CUDA
2357 
2358 !--Test timing only if exists GPU and they are not equal to the cpus
2359  if(rset%tp == 4) then
2360    parold = rset%par
2361    ii = 0
2362    time_f = zero
2363    time_cu = zero
2364    call random_number(exppot)  !   exppot = one
2365 
2366    if(rset%gpudevice == -1) then
2367 !    --Test CPUS
2368      swt_tm = 0
2369      testpts = min(rset%par%npt, 20)
2370      call timein(tsec2(1),tsec2(2))
2371      if(rset%tronc) then
2372        ABI_ALLOCATE(exppotloc,(0:nfftrec-1))
2373        do while(ii< testpts)
2374          trasl = -(/1,2,3/)+ngfftrec(:3)/2
2375          call reshape_pot(trasl,dtset%nfft,nfftrec,dtset%ngfft(:3),ngfftrec(:3),&
2376 &         exppot,exppotloc)
2377          call recursion(exppotloc,0,0,0, &
2378 &         aloc, &
2379 &         b2loc, &
2380 &         rho,&
2381 &         0, rset%efermi,tsmear,rtrotter,dim_trott, &
2382 &         rset%ZT_p, &
2383 &         dtset%rectolden,dtset%typat, &
2384 &         rset%nl,&
2385 &         rset%mpi,nfftrec,ngfftrec,rset%inf,&
2386 &         6,dtset%natom,dm_projec,0)
2387          ii=ii+1
2388        end do
2389        ABI_DEALLOCATE(exppotloc)
2390      else
2391        do while(ii< testpts)
2392          call recursion(exppot,0,0,0, &
2393 &         aloc, &
2394 &         b2loc, &
2395 &         rho,&
2396 &         0, rset%efermi,tsmear,rtrotter,dim_trott, &
2397 &         rset%ZT_p, &
2398 &         dtset%rectolden,dtset%typat, &
2399 &         rset%nl,&
2400 &         rset%mpi,nfftrec,ngfftrec,rset%inf,&
2401 &         6,dtset%natom,dm_projec,0)
2402          ii=ii+1
2403        end do
2404      end if
2405      call timein(tsec3(1),tsec3(2))
2406      time_f = (tsec3(1)-tsec2(1))/real(testpts,dp)
2407      time_f = time_f*time_f
2408    else
2409 !    --Test GPUS
2410      swt_tm = 1
2411      rset_test = rset
2412      rset_test%GPU%par%npt = max(rset%GPU%nptrec,100)
2413      rset_test%min_nrec = 0
2414      call get_pt0_pt1(dtset%ngfft(:3),dtset%recgratio,0,&
2415 &     rset_test%GPU%par%npt,rset_test%GPU%par)
2416 
2417 
2418      ABI_ALLOCATE(aloc_cu,(rset_test%GPU%par%npt))
2419      ABI_ALLOCATE(b2loc_cu,(rset_test%GPU%par%npt))
2420      call timein(tsec2(1),tsec2(2))
2421      call cudarec(rset_test, exppot,aloc_cu,b2loc_cu,&
2422 &     beta,trotter,dtset%rectolden,dtset%recgratio,dtset%ngfft,max_rec)
2423      call timein(tsec3(1),tsec3(2))
2424      ABI_DEALLOCATE(aloc_cu)
2425      ABI_DEALLOCATE(b2loc_cu)
2426 
2427      time_cu = (tsec3(1)-tsec2(1))/real(rset_test%GPU%par%npt,dp)
2428      time_cu = time_cu*time_cu
2429    end if
2430 
2431 
2432 !  --Get Total Times
2433    call xmpi_sum(time_f,rset%mpi%comm_bandfft,ierr)
2434    call xmpi_sum(time_cu,rset%mpi%comm_bandfft,ierr)
2435 
2436 !  --Average Total Times
2437    time_f   = sqrt(time_f/real(rset%mpi%nproc-rset%ngpu,dp))
2438    time_cu  = sqrt(time_cu/real(rset%ngpu,dp))
2439    tm_ratio = time_f/time_cu
2440 
2441 
2442    write(msg,'(3(a25,f10.5,a))')&
2443 &   ' Time for cpu recursion ',time_f,ch10,&
2444 &   ' Time for gpu recursion ',time_cu,ch10,&
2445 &   ' Time ratio             ',tm_ratio,ch10
2446    call wrtout(std_out,msg,'COLL')
2447 
2448 
2449 !  tm_ratio =1.20d2! 0.d0! 1.21d0
2450    rset%par = parold
2451 !  --Compute the work-load distribution on devices (gpu,cpu)
2452    if(tm_ratio>1.5d0 .and. time_cu>zero)then
2453      rset%load = 1
2454      call cpu_distribution(dtset%recgratio,rset,dtset%ngfft(:3),tm_ratio,1)
2455    else
2456      rset%gpudevice = -1
2457    end if
2458  end if
2459 
2460 #endif
2461 
2462 
2463 !------------------------------------------------------------
2464 !--DETERMINING WHICH POINT WILL COMPUTE THAT PROC
2465 !--Now these variables are defined into gstate by Init_rec
2466 
2467  write (msg,'(2a)')ch10,'==== END FIRST CYCLE RECURSION ====================='
2468  call wrtout(std_out,msg,'COLL')
2469  call timab(601,2,tsec) !!--stop time-counter: initialisation
2470 
2471 end subroutine first_rec

ABINIT/gran_potrec [ Functions ]

[ Top ] [ Functions ]

NAME

 gran_potrec

FUNCTION

 This routine computes the local part of the grand-potential at a point using a path integral,
 in the recursion method.

INPUTS

  an, bn2 : coefficient given by the recursion.
  nrec=order of recursion
  trotter=trotter parameter
  mult=a multiplicator for computing grand-potential (2 for non-spin-polarized system)
  debug_rec=debugging variable
  n_pt_integ=points for computation of path integral
  xmax= maximum point on the x-axis for integration

OUTPUT

  ene_out=grand-potential at the point
  if debug_rec=T then ene_out1,ene_out2,ene_out3,ene_out4 are
  the different path branch contriubutions to the grand-potential.
  In reality it is not the gren potential but the
  grand-potential (omega=-PV) divided by -T

PARENTS

      vtorhorec

CHILDREN

      timab,wrtout

NOTES

  in reality it is not the gren potential but the grand-potential (omega=-PV) divided by -T
  at this time :
       - mult should be not used
       - the routine should be integraly rewrited and use the routine recursion.
       - only modified for p /= 0

SOURCE

1740 subroutine gran_potrec(an,bn2,nrec,trotter,ene_out, mult, &
1741 &                     debug_rec,n_pt_integ,xmax,&
1742 &                     ene_out1,ene_out2,ene_out3,ene_out4)
1743 
1744 
1745 !This section has been created automatically by the script Abilint (TD).
1746 !Do not modify the following lines by hand.
1747 #undef ABI_FUNC
1748 #define ABI_FUNC 'gran_potrec'
1749 !End of the abilint section
1750 
1751  implicit none
1752 
1753 !Arguments -------------------------------
1754 !scalars
1755  integer,intent(in) :: n_pt_integ,nrec,trotter
1756  logical,intent(in) :: debug_rec
1757  real(dp), intent(in) :: mult,xmax
1758  real(dp),intent(inout) :: ene_out,ene_out1,ene_out2,ene_out3,ene_out4 !vz_i
1759 !arrays
1760  real(dp), intent(in) :: an(0:nrec),bn2(0:nrec)
1761 
1762 !Local variables-------------------------------
1763 !scalars
1764  integer, parameter :: level = 7
1765  integer, save :: first = 1
1766  integer :: ii,kk,n_pt_integ_path2
1767  real(dp) :: epsilon,step,twotrotter,xmin,dr_step
1768  complex(dpc) :: D,Dnew,Dold,N,Nnew,Nold,dz_path,ene_acc,ene_acc1,ene_acc2
1769  complex(dpc) :: ene_acc3,ene_acc4
1770  complex(dpc) :: z_path,delta_calc
1771  character(len=500) :: message
1772 !arrays
1773  real(dp) :: tsec(2)
1774 
1775 ! *************************************************************************
1776 
1777 
1778  call timab(611,1,tsec)
1779 
1780 !structured debugging if debug_rec=T : print detailled result the first time we enter gran_potrec
1781  if(debug_rec .and. first==1)then
1782    write(message,'(a)')' '
1783    call wrtout(std_out,message,'PERS')
1784    write(message,'(a)')' gran_potrec : enter '
1785    call wrtout(std_out,message,'PERS')
1786    write(message,'(a,i8)')'n_pt_integ ' , n_pt_integ
1787    call wrtout(std_out,message,'COLL')
1788    first=0
1789  end if
1790 
1791  ene_out = zero
1792  ene_acc = czero
1793  ene_acc1 = czero
1794  ene_acc2 = czero
1795  ene_acc3 = czero
1796  ene_acc4 = czero
1797 
1798 
1799 !path parameters
1800 !n_pt_integ = 2500
1801  xmin = -half
1802  step = (xmax-xmin)/real(n_pt_integ,dp)
1803  if(trotter==0)then
1804    twotrotter = one
1805    epsilon = .5d-1
1806  else
1807    twotrotter = two*real(trotter,dp)
1808    epsilon = half*sin( pi/twotrotter)
1809  end if
1810 
1811 !xmin = -abs(xmin)**(1.d0/twotrotter)
1812 
1813 !####################################################################
1814 ![xmax + i*epsilon,xmin + i*epsilon]
1815  dr_step = one/real(n_pt_integ,dp)
1816  dz_path = -cmplx((xmax-xmin)*dr_step,zero,dp)
1817  path1:  do ii = 0,n_pt_integ
1818 !  z_path = cmplx(xmin + real(ii,dp)*(xmax-xmin)*dr_step,epsilon,dp)
1819    z_path = cmplx(xmin,epsilon,dp) - real(ii,dp)*dz_path
1820    Nold = czero
1821    Dold = cone
1822    N = cone
1823    D = z_path - cmplx(an(0),zero,dp)
1824 
1825    do kk=1,nrec
1826      Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1827      Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1828 
1829      Nold = N
1830      Dold = D
1831      N = Nnew
1832      D = Dnew
1833 
1834      if(kk/=nrec)then
1835        if((bn2(kk+1)<tol14))exit
1836      end if
1837 
1838    end do
1839 
1840 !  <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1841    delta_calc = func_rec(z_path,twotrotter)* N/D *dz_path
1842    if(ii==0.or.ii==n_pt_integ)then
1843      ene_acc = ene_acc + half*delta_calc
1844      if(debug_rec)  ene_acc1 = ene_acc1 + half*delta_calc
1845    else
1846      ene_acc = ene_acc + delta_calc
1847      if(debug_rec)  ene_acc1 = ene_acc1 + delta_calc
1848    end if
1849  end do path1
1850 
1851 !####################################################################
1852 ![xmin + i*epsilon,xmin]
1853  if(epsilon/step>4.d0)then
1854    n_pt_integ_path2 = int(epsilon/step)+1
1855  else
1856    n_pt_integ_path2 = 5
1857  end if
1858  n_pt_integ_path2 = n_pt_integ
1859  dr_step = one/real(n_pt_integ_path2,dp)
1860  dz_path = -cmplx(zero,epsilon*dr_step,dp)
1861  path2:  do ii = 0,n_pt_integ_path2
1862 !  z_path = cmplx(xmin,real(ii,dp)*epsilon*dr_step,dp)
1863    z_path = cmplx(xmin,zero,dp)-dz_path*real(ii,dp)
1864    Nold = czero
1865    Dold = cone
1866    N = cone
1867    D = z_path - cmplx(an(0),zero,dp)
1868 
1869    do kk=1,nrec
1870      Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1871      Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1872 
1873      Nold = N
1874      Dold = D
1875      N = Nnew
1876      D = Dnew
1877 
1878      if(kk/=nrec)then
1879        if((bn2(kk+1)<tol14))exit
1880      end if
1881 
1882    end do
1883 
1884 !  <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1885    delta_calc = func_rec(z_path,twotrotter)* N/D *dz_path
1886    if(ii==0.or.ii==n_pt_integ_path2)then
1887      ene_acc = ene_acc + half*delta_calc
1888      if(debug_rec) ene_acc3 = ene_acc3 + half*delta_calc
1889    else
1890      ene_acc = ene_acc + delta_calc
1891      if(debug_rec) ene_acc3 = ene_acc3 + delta_calc
1892    end if
1893  end do path2
1894 
1895 
1896 
1897 !####################################################################
1898 ![xmin,0]
1899  if(xmin/=czero)then
1900    dr_step = one/real(n_pt_integ,dp)
1901    dz_path = cmplx(xmin*dr_step,zero,dp)
1902    path3:  do ii = 1,n_pt_integ !the integrand is 0 at 0
1903 !    z_path = cmplx(real(ii,dp)*xmin*dr_step,zero,dp)
1904      z_path = real(ii,dp)*dz_path
1905 
1906      Nold = czero
1907      Dold = cone
1908      N = cone
1909      D = z_path - cmplx(an(0),zero,dp)
1910 
1911      do kk=1,nrec
1912        Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1913        Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1914 
1915        Nold = N
1916        Dold = D
1917        N = Nnew
1918        D = Dnew
1919 
1920        if(kk/=nrec)then
1921          if((bn2(kk+1)<tol14))exit
1922        end if
1923      end do
1924 
1925 !    <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1926      delta_calc = func_rec(z_path,twotrotter) * N/D *dz_path
1927      if(ii==n_pt_integ)then
1928        ene_acc = ene_acc +half*delta_calc
1929        if(debug_rec) ene_acc4 = ene_acc4 + half*delta_calc
1930      else
1931        ene_acc = ene_acc + delta_calc
1932        if(debug_rec) ene_acc4 = ene_acc4 +delta_calc
1933      end if
1934    end do path3
1935  end if
1936 
1937 !####################################################################
1938 ![xmax,xmax+i*epsilon]
1939  dr_step = one/real(n_pt_integ_path2,dp)
1940  dz_path = cmplx(zero,epsilon*dr_step,dp)
1941  path4:  do ii = 0,n_pt_integ_path2
1942 !  z_path = cmplx(xmax,real(ii,dp)*epsilon*dr_step,dp)
1943    z_path = cmplx(xmax,0,dp)+real(ii,dp)*dz_path
1944 
1945    Nold = czero
1946    Dold = cone
1947    N = cone
1948    D = z_path - cmplx(an(0),zero,dp)
1949 
1950    do kk=1,nrec
1951      Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1952      Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1953 
1954      Nold = N
1955      Dold = D
1956      N = Nnew
1957      D = Dnew
1958 
1959      if(kk/=nrec)then
1960        if((bn2(kk+1)<tol14))exit
1961      end if
1962 
1963    end do
1964 
1965 !  <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1966    delta_calc = func_rec(z_path,twotrotter) * N/D *dz_path
1967    if(ii==0.or.ii==n_pt_integ_path2)then
1968      ene_acc = ene_acc + half*delta_calc
1969      if(debug_rec) ene_acc2 = ene_acc2 + half*delta_calc
1970    else
1971      ene_acc = ene_acc + delta_calc
1972      if(debug_rec) ene_acc2 = ene_acc2 + delta_calc
1973    end if
1974  end do path4
1975 
1976  ene_out = mult*real(ene_acc*cmplx(zero,-piinv,dp),dp)
1977  if(debug_rec) then
1978    ene_out1 = mult*real(ene_acc1*cmplx(zero,-piinv,dp),dp)
1979    ene_out2 = mult*real(ene_acc2*cmplx(zero,-piinv,dp),dp)
1980    ene_out3 = mult*real(ene_acc3*cmplx(zero,-piinv,dp),dp)
1981    ene_out4 = mult*real(ene_acc4*cmplx(zero,-piinv,dp),dp)
1982  end if
1983 
1984  call timab(611,2,tsec)
1985 
1986  contains
1987 
1988 !func_rec(z_path,twotrotter) = log(cone+z_path**twotrotter)
1989 
1990    function func_rec(z,x)
1991 
1992 
1993 !This section has been created automatically by the script Abilint (TD).
1994 !Do not modify the following lines by hand.
1995 #undef ABI_FUNC
1996 #define ABI_FUNC 'func_rec'
1997 !End of the abilint section
1998 
1999    implicit none
2000 
2001    complex(dpc) :: func_rec
2002    complex(dpc),intent(in) :: z
2003    real(dp),intent(in) :: x
2004 
2005    func_rec = log(cone+z**x)
2006 
2007  end function func_rec
2008 
2009 end subroutine gran_potrec

ABINIT/green_kernel [ Functions ]

[ Top ] [ Functions ]

NAME

 green_kernel

FUNCTION

 this routine compute the fourrier transform of the Green kernel for the
 recursion method

INPUTS

  inf_rmet=define the  infinitesimal metric : rprimd*(transpose(rprimd)) divided
    by the number of discretisation point
  inf_ucvol=volume of infinitesimal cell
  mult=variance of the Gaussian (=rtrotter/beta)
  mpi_enreg=information about MPI parallelization
  ngfft=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  nfft=total number of fft grid points
  debug_rec=debugging variable

OUTPUT

  ZT_p=fourier transforme of the Green kernel

PARENTS

      first_rec

CHILDREN

      fourdp,timab,wrtout

NOTES

  at this time :
       - need a rectangular box

SOURCE

2509 subroutine green_kernel(ZT_p,inf_rmet,inf_ucvol,mult,mpi_enreg,ngfft,nfft)
2510 
2511 
2512 !This section has been created automatically by the script Abilint (TD).
2513 !Do not modify the following lines by hand.
2514 #undef ABI_FUNC
2515 #define ABI_FUNC 'green_kernel'
2516 !End of the abilint section
2517 
2518  implicit none
2519 
2520 !Arguments -------------------------------
2521 !scalars
2522  integer,intent(in) :: nfft
2523  real(dp),intent(in) :: inf_ucvol,mult
2524  type(MPI_type),intent(in) :: mpi_enreg
2525 !arrays
2526  integer,intent(in) :: ngfft(18)
2527  real(dp),intent(in) :: inf_rmet(3,3)
2528  real(dp),intent(out) :: ZT_p(1:2,0:nfft-1)
2529 
2530 !Local variables-------------------------------
2531 !scalars
2532  integer,parameter :: n_green_max=5
2533  integer :: ii,isign,jj,kk,n_green,xx,yy,zz
2534  real(dp) :: acc, norme
2535  character(len=500) :: msg
2536 !arrays
2537  real(dp) :: tsec(2)
2538  real(dp),allocatable :: T_p(:)
2539 
2540 ! *************************************************************************
2541 
2542  call timab(603,1,tsec)
2543 
2544  norme = (mult/pi)**(onehalf)
2545 
2546  ABI_ALLOCATE(T_p,(0:nfft-1))
2547 
2548 !n_green should be better chosen for non rectangular cell
2549  do xx=1, n_green_max
2550    n_green = xx
2551    if(exp(-mult*dsq_green(xx*ngfft(1),0,0,inf_rmet))<tol14 &
2552 &   .and. exp(-mult*dsq_green(0,xx*ngfft(2),0,inf_rmet))<tol14 &
2553 &   .and. exp(-mult*dsq_green(0,0,xx*ngfft(3),inf_rmet))<tol14 ) exit
2554  end do
2555 
2556  acc = zero
2557  T_p = zero
2558  do kk = 0,ngfft(3)-1
2559    do jj = 0,ngfft(2)-1
2560      do ii = 0,ngfft(1)-1
2561 
2562        do xx=-n_green,n_green-1
2563          do yy=-n_green,n_green-1
2564            do zz=-n_green,n_green-1
2565 
2566              T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk) = T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk)+ &
2567 &             exp(-mult*dsq_green(ii+xx*ngfft(1),jj+yy*ngfft(2),kk+zz*ngfft(3),inf_rmet))
2568 
2569            end do
2570          end do
2571        end do
2572 
2573        T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk) = norme*T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk)
2574        acc = acc + inf_ucvol* T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk)
2575 
2576      end do
2577    end do
2578  end do
2579 
2580  T_p(:)= (one/acc)*T_p(:)
2581 
2582 !if(debug_rec)then
2583  write(msg,'(a,d12.3,2(2a,i8),2(2a,3d12.3),2a,d16.6)')&
2584 & ' on the boundary    ', exp(-mult*dsq_green(ngfft(1),0,0,inf_rmet)),ch10, &
2585 & ' no zero            ', count(T_p>tol14),ch10, &
2586 & ' n_green            ', n_green,ch10, &
2587 & ' erreur_n_green     ', exp(-mult*dsq_green(n_green*ngfft(1),0,0,inf_rmet)), &
2588 & exp(-mult*dsq_green(0,n_green*ngfft(2),0,inf_rmet)), &
2589 & exp(-mult*dsq_green(0,0,n_green*ngfft(3),inf_rmet)),ch10,&
2590 & ' erreur_troncat     ', T_p(ngfft(1)/2),  &
2591 & T_p(ngfft(1)*(ngfft(2)/2)), &
2592 & T_P(ngfft(1)*ngfft(2)*(ngfft(3)/2)),ch10, &
2593 & ' erreurT_p          ',abs(acc-1.d0)
2594  call wrtout(std_out,msg,'COLL')
2595 !endif
2596 
2597 
2598  isign = -1
2599  call fourdp(1,ZT_p,T_p,isign,mpi_enreg,nfft,ngfft,1,0)
2600 
2601  ABI_DEALLOCATE(T_p)
2602 
2603  ZT_p(:,:) = real(nfft,dp)*ZT_p
2604 
2605 
2606  call timab(603,2,tsec)
2607 
2608  contains
2609 
2610    function dsq_green(ii,jj,kk,inf_rmet)
2611 
2612 
2613 !This section has been created automatically by the script Abilint (TD).
2614 !Do not modify the following lines by hand.
2615 #undef ABI_FUNC
2616 #define ABI_FUNC 'dsq_green'
2617 !End of the abilint section
2618 
2619    real(dp) :: dsq_green
2620    integer,intent(in) :: ii,jj,kk
2621    real(dp),intent(in) :: inf_rmet(3,3)
2622    dsq_green= inf_rmet(1,1)*dble(ii**2)&
2623 &   +inf_rmet(2,2)*dble(jj**2)&
2624 &   +inf_rmet(3,3)*dble(kk**2)&
2625 &   +two*(inf_rmet(1,2)*dble(ii*jj)&
2626 &   +inf_rmet(2,3)*dble(jj*kk)&
2627 &   +inf_rmet(3,1)*dble(kk*ii))
2628  end function dsq_green
2629 
2630 end subroutine green_kernel

ABINIT/m_vtorhorec [ Modules ]

[ Top ] [ Modules ]

NAME

  m_vtorhorec

FUNCTION

COPYRIGHT

  Copyright (C) 2008-2018 ABINIT group (SLeroux, MMancini).
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

PARENTS

CHILDREN

SOURCE

20 #if defined HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23 
24 #include "abi_common.h"
25 
26 module m_vtorhorec
27 
28  use defs_basis
29  use defs_abitypes
30  use defs_datatypes
31  use defs_rectypes
32  use m_xmpi
33  use m_pretty_rec
34  use m_errors
35  use m_abicore
36  use m_per_cond
37 
38  use m_time,             only : timein, timab
39  use m_rec,              only : Calcnrec, init_nlpsprec, cpu_distribution
40  use m_rec_tools,        only : reshape_pot, trottersum, get_pt0_pt1
41  use m_spacepar,         only : symrhg
42  use m_fourier_interpol, only : transgrid
43  use m_fft,              only : fourdp
44 
45 #ifdef HAVE_GPU_CUDA
46  use m_initcuda,       only : cudap
47  use m_hidecudarec
48  use m_xredistribute
49 #endif
50 
51  implicit none
52 
53  private

ABINIT/nlenergyrec [ Functions ]

[ Top ] [ Functions ]

NAME

 nlenergyrec

FUNCTION

 During recursion, it computes the non-local energy

INPUTS

  rset<recursion_type>=contains all recursion parameters
  exppot=exponential of -1/tsmear*vtrial (computed only once in vtorhorec)
  tsmear=temperature (Hartree)
  trotter=trotter parameter
  tol=tolerance criteria for stopping recursion_nl
  ngfft=information about FFT(dtset%ngfft a priori different from ngfftrec)
  mpi_enreg=information about MPI paralelisation
  rset<recursion_type> contains all parameter of recursion
  typat(natom)=type of pseudo potential associated to any atom
  natom=number of atoms

OUTPUT

  enl=non-local energy

SIDE EFFECTS

NOTES

PARENTS

      vtorhorec

CHILDREN

      recursion_nl,reshape_pot,timab,wrtout,xmpi_sum

SOURCE

2046 subroutine nlenergyrec(rset,enl,exppot,ngfft,natom,typat,tsmear,trotter,tol)
2047 
2048 
2049 !This section has been created automatically by the script Abilint (TD).
2050 !Do not modify the following lines by hand.
2051 #undef ABI_FUNC
2052 #define ABI_FUNC 'nlenergyrec'
2053 !End of the abilint section
2054 
2055  implicit none
2056 
2057 !Arguments ------------------------------------
2058 !Scalar
2059  integer , intent(in)  :: natom,trotter
2060  real(dp), intent(in)  :: tsmear,tol
2061  type(recursion_type),intent(in) :: rset
2062  real(dp), intent(out) :: enl
2063 !Arrays
2064  integer , intent(in)  :: typat(natom),ngfft(18)
2065  real(dp), intent(in)  :: exppot(0:ngfft(1)*ngfft(2)*ngfft(3)-1)
2066 !Local variables-------------------------------
2067  integer :: iatom,jatom
2068  integer :: ii,ipsp,dim_trott
2069  integer :: ierr,me_count
2070  integer :: ilmn,jlmn,ilm,jlm,in,jn,il
2071  character(len=500) :: msg
2072  logical  :: tronc
2073  real(dp) :: rho_nl,normali,mult
2074  type(mpi_type):: mpi_loc
2075 !Arrays
2076  integer  :: gcart_loc(3,natom)
2077  integer  :: ngfftrec(3),trasl(3)
2078  real(dp) :: tsec(2)
2079  real(dp) :: un0(0:rset%nfftrec)
2080  real(dp),pointer :: projec(:,:,:,:,:)
2081  real(dp),allocatable ::  exppotloc(:)
2082  real(dp) :: proj_arr(0:rset%ngfftrec(1)-1,0:rset%ngfftrec(2)-1,0:rset%ngfftrec(3)-1)
2083 
2084 ! *************************************************************************
2085 
2086 
2087  call timab(612,1,tsec) !!--start time-counter: nlenergyrec
2088 
2089  if(rset%debug)then
2090    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' nlenergyrec : enter'
2091    call wrtout(std_out,msg,'PERS')
2092  end if
2093 
2094  write(msg,'(a)')' -- nlenergyrec -----------------------------------'
2095  call wrtout(std_out,msg,'COLL')
2096 
2097 !--Initialisation variables
2098  enl = zero
2099  mult = two !--is twice for non-spinned systems
2100  ngfftrec = rset%ngfftrec(:3)
2101  gcart_loc = rset%inf%gcart
2102  mpi_loc = rset%mpi
2103  me_count = 0
2104  dim_trott = max(0,2*trotter-1)
2105  nullify(projec)
2106  ABI_ALLOCATE(projec,(0:rset%ngfftrec(1)-1,0:rset%ngfftrec(2)-1,0:rset%ngfftrec(3)-1,rset%nl%lmnmax,natom))
2107  projec = zero
2108 
2109  tronc = rset%tronc  !--True if troncation is used
2110  if(tronc)   then
2111    ABI_ALLOCATE(exppotloc,(0:rset%nfftrec-1))
2112  end if
2113 
2114 
2115 !--LOOP ON ATOMS to create projectors-vector
2116  atomloop1: do iatom = 1, natom
2117    ipsp = typat(iatom)
2118 !  --Aquisition,reshape,translation,rotation of the projectors vector
2119    do ilmn = 1,rset%nl%lmnmax
2120      in = rset%nl%indlmn(3,ilmn,ipsp)
2121 !    --Projectors vector in 3-composant vector
2122      projec(:,:,:,ilmn,iatom) = reshape(rset%nl%projec(:,ilmn,ipsp),shape=shape(projec(:,:,:,1,1)))
2123 !    --Moving the projectors vector on the center of the grid
2124      do ii=1,3
2125        projec(:,:,:,ilmn,iatom) = cshift(projec(:,:,:,ilmn,iatom),shift=ngfftrec(ii)/2-gcart_loc(ii,iatom),dim=ii)
2126      end do
2127    end do
2128 
2129  end do atomloop1
2130 
2131 
2132 !##################################################################
2133 !--LOOP ON ATOMS (MAIN LOOP)
2134  atomloop: do iatom = 1, natom
2135    ipsp = typat(iatom)
2136 
2137 !  --If troncation is present, the considered atom has to be in the
2138 !  center of the grid so atoms, potential and projectors have to be translated
2139    if(tronc) then
2140      trasl = -rset%inf%gcart(:,iatom)+ngfftrec/2
2141 !    --Translation of atoms
2142      do jatom=1,natom
2143        gcart_loc(:,jatom) = rset%inf%gcart(:,jatom)+trasl
2144        gcart_loc(:,jatom) = modulo(gcart_loc(:,jatom),ngfft(:3))
2145 !      --Translation of non-local projectors
2146        do ilmn = 1,rset%nl%lmnmax
2147          projec(:,:,:,ilmn,jatom) = reshape(rset%nl%projec(:,ilmn,typat(jatom)),shape=shape(projec(:,:,:,1,1)))
2148          do ii=1,3
2149            projec(:,:,:,ilmn,jatom) = eoshift(projec(:,:,:,ilmn,jatom),shift=ngfftrec(ii)/2-gcart_loc(ii,jatom),dim=ii)
2150          end do
2151        end do
2152      end do
2153 
2154 !    --Translation of the potential
2155      call reshape_pot(trasl,ngfft(1)*ngfft(2)*ngfft(3),rset%nfftrec,ngfft(:3),ngfftrec,exppot,exppotloc)
2156    end if
2157 
2158 !  --Loop on projectors
2159    projloop: do ilmn = 1,rset%nl%lmnmax
2160      me_count = iatom+ilmn*natom-2 !--counter of the number of iteration
2161 !    --Only the proc me compute
2162      if(mpi_loc%me==mod(me_count,mpi_loc%nproc)) then
2163        ilm = rset%nl%indlmn(4,ilmn,ipsp)
2164        proj_arr = zero
2165        do jlmn = 1,rset%nl%lmnmax
2166          jlm = rset%nl%indlmn(4,jlmn,ipsp)
2167          if(ilm==jlm) then
2168            in = rset%nl%indlmn(3,ilmn,ipsp)
2169            jn = rset%nl%indlmn(3,jlmn,ipsp)
2170            il = rset%nl%indlmn(1,ilmn,ipsp)+1
2171            proj_arr(:,:,:) = proj_arr(:,:,:) + rset%nl%eivec(jn,in,il,ipsp)*projec(:,:,:,jlmn,iatom)
2172 !          write(std_out,*)'l,m,lm,n,n',il-1,rset%nl%indlmn(2,ilmn,ipsp),ilm,in,jn
2173 !          write(std_out,*)'eigevectors',rset%nl%eivec(jn,in,il,ipsp)
2174 
2175          end if
2176        end do
2177 
2178        un0 = pack(proj_arr(:,:,:),mask=.true.)
2179        normali = sum(un0*un0)*rset%inf%ucvol
2180        un0 = (one/sqrt(normali))*un0
2181 
2182        if(tronc)then
2183          call recursion_nl(exppotloc,un0,rho_nl,rset,rset%ngfftrec,&
2184 &         tsmear,trotter,dim_trott,tol,typat,&
2185 &         natom,projec)
2186        else
2187          call recursion_nl(exppot,un0,rho_nl,rset,rset%ngfftrec,&
2188 &         tsmear,trotter,dim_trott,tol,typat,&
2189 &         natom,projec)
2190        end if
2191 
2192        enl = enl+mult*rho_nl*rset%nl%eival(in,il,ipsp)*normali
2193      end if
2194 
2195    end do projloop
2196  end do atomloop
2197 
2198 !--Sum the contribution to the non-local energy computed by any procs
2199  call xmpi_sum(enl,mpi_loc%comm_bandfft,ierr)
2200 
2201  if(associated(projec))  then
2202    ABI_DEALLOCATE(projec)
2203  end if
2204  if(tronc)  then
2205    ABI_DEALLOCATE(exppotloc)
2206  end if
2207 
2208  if(rset%debug)then
2209    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' nlenergyrec : exit'
2210    call wrtout(std_out,msg,'PERS')
2211  end if
2212 
2213  call timab(612,2,tsec)  !--stop  time-counter: nlenergyrec
2214 
2215 end subroutine nlenergyrec

ABINIT/recursion [ Functions ]

[ Top ] [ Functions ]

NAME

 recursion

FUNCTION

 This routine computes the recursion coefficients and the corresponding
 continued fraction to get the density at a point from a fixed potential.

INPUTS

  exppot=exponential of -1/tsmear*vtrial (computed only once in vtorhorec)
  coordx, coordy, coordz=coordonnees of the computed point
  nrec=order of recursion
  fermie=fermi energy (Hartree)
  tsmear=temperature (Hartree)
  dim_trott=dimension of the partial fraction decomposition
  rtrotter=trotter parameter (real)
  ZT_p=fourier transform of the Green krenel (computed only once in vtorhorec)
  typat(:)=type of psp associated to any atom
  tol=tolerance criteria for stopping recursion
  debug=debugging variable
  mpi_enreg=information about MPI paralelisation
  nfft=number of points in FFT grid
  ngfft=information about FFT
  metrec<type(metricrec_type)>=information concerning the infinitesimal metrics
  inf_ucvol=infinitesimal unit cell volume
  tim_fourdp=time counter for fourdp
  natom=number of atoms
  projec(ngfftrec(1),ngfftrec(2),ngfftrec(3),lmnmax,natom) is the  vector, on the ngfftrec grid containing
  the non-lacal projector $Y_{lm}(r-R_A)f_{lk}(r-R_A)
  tim= 0 if the time spent in the routine is not taken into account,1 otherwise. For example
  when measuring time for loading  balancing, we don't want to add the time spent in this to the
  total time calculation

OUTPUT

  rho_out=result of the continued fraction multiplied by a multiplicator
  an, bn2 : coefficient given by recursion.

SIDE EFFECTS

PARENTS

      first_rec,vtorhorec

CHILDREN

      fourdp,timab,trottersum,vn_nl_rec

NOTES

  at this time :
       - exppot should be replaced by ?
       - coord should be replaced by ?
       - need a rectangular box (rmet diagonal matrix)

SOURCE

2687 subroutine recursion(exppot,coordx,coordy,coordz,an,bn2,rho_out, &
2688 &                    nrec,fermie,tsmear,rtrotter,dim_trott, &
2689 &                    ZT_p, tol,typat, &
2690 &                    nlrec,mpi_enreg,&
2691 &                    nfft,ngfft,metrec,&
2692 &                    tim_fourdp,natom,projec,tim)
2693 
2694 
2695  use m_linalg_interfaces
2696 
2697 !This section has been created automatically by the script Abilint (TD).
2698 !Do not modify the following lines by hand.
2699 #undef ABI_FUNC
2700 #define ABI_FUNC 'recursion'
2701 !End of the abilint section
2702 
2703  implicit none
2704 
2705 !Arguments -------------------------------
2706 !scalars
2707  integer,intent(in) :: coordx,coordy,coordz,nfft,nrec,tim
2708  integer,intent(in) :: tim_fourdp,natom,dim_trott
2709  real(dp),intent(in) :: fermie,tol,tsmear,rtrotter
2710  real(dp), intent(out) :: rho_out
2711  type(MPI_type),intent(in) :: mpi_enreg
2712  type(nlpsprec_type),intent(in) :: nlrec
2713  type(metricrec_type),intent(in) :: metrec
2714 !arrays
2715  integer, intent(in) :: ngfft(18)
2716  integer, intent(in) :: typat(natom)
2717  real(dp), intent(in) :: ZT_p(1:2, 0:nfft-1)
2718  real(dp), intent(in) :: exppot(0:nfft-1)
2719  real(dp), intent(in) :: projec(0:,0:,0:,1:,1:)
2720  real(dp), intent(out) :: an(0:nrec),bn2(0:nrec)
2721 !Local variables-------------------------------
2722 !not used, debugging purpose only
2723 !for debugging purpose, detailled printing only once for density and ekin
2724 !scalars
2725  integer, parameter :: level = 7, minrec = 3
2726  integer  :: irec,isign,timab_id,ii
2727  real(dp) :: switchimu,switchu
2728  real(dp) :: bb,beta,mult,prod_b2,error,errold
2729  real(dp) :: inf_ucvol,pi_on_rtrotter,twortrotter,exp1,exp2
2730  complex(dpc) :: cinv2rtrotter,coeef_mu,facrec0
2731 ! character(len=500) :: msg
2732 !arrays
2733  real(dp) :: tsec(2)
2734  real(dp) :: inf_tr(3)
2735  real(dp) :: Zvtempo(1:2, 0:nfft-1)
2736  real(dp) :: unold(0:nfft-1),vn(0:nfft-1),un(0:nfft-1)
2737  complex(dpc) :: acc_rho(0:nrec)
2738  complex(dpc) :: D(0:dim_trott),Dold(0:dim_trott)
2739  complex(dpc) :: N(0:dim_trott),Nold(0:dim_trott)
2740 
2741 ! *************************************************************************
2742 
2743 !--If count time or not
2744  timab_id = 616; if(tim/=0) timab_id = 606;
2745 
2746  call timab(timab_id,1,tsec)
2747 
2748 !##############################################################
2749 !--Initialisation of metrics
2750  inf_ucvol = metrec%ucvol
2751  inf_tr = metrec%tr
2752  mult = two/inf_ucvol    !non-spined system
2753 
2754  beta = one/tsmear
2755 !--Variables for optimisation
2756  pi_on_rtrotter = pi/rtrotter
2757  twortrotter = two*rtrotter
2758  exp1 = exp((beta*fermie)/(rtrotter))
2759  exp2 = exp(beta*fermie/(twortrotter))
2760  cinv2rtrotter = cmplx(one/twortrotter,zero,dp)
2761  coeef_mu = cmplx(one/exp2,zero,dp)
2762 
2763 !--Initialisation of  an,bn,un....
2764  N = czero;  D = cone
2765  facrec0 = cone
2766  Nold = czero; Dold = czero
2767 
2768  an = zero; bn2 = zero;  bn2(0) = one
2769  bb = zero; vn  = zero;  unold  = zero
2770 !--u0 is a Dirac function
2771  un = zero
2772  un(coordx+ngfft(1)*(coordy+ngfft(2)*coordz)) = one/sqrt(inf_ucvol)
2773 
2774 !--Initialisation of accumulated density
2775  acc_rho = czero
2776 !--Initialisation of estimated error
2777  prod_b2 = twortrotter/exp1
2778  errold = zero
2779 
2780 !##############################################################
2781 !--Main loop
2782  maindo : do irec = 0, nrec
2783 
2784 !  --Get an and bn2 coef by the lanczos method
2785 
2786 !  --Computation of exp(-beta*V/8*p)*un or exp(-beta*V/4*p)*un
2787 !  depending on if nl part has to be calculated or not.
2788    vn = exppot * un
2789 
2790 !  --First Non-local psp contribution: (Id+sum_atom int dr1(E(r,r1))vn(r1))
2791 !  --Computation of exp(-beta*V_NL/4*p)*vn
2792    if(nlrec%nlpsp) then
2793      call timab(timab_id,2,tsec)
2794      call vn_nl_rec(vn,natom,typat,ngfft(:3),inf_ucvol,nlrec,projec)
2795      call timab(timab_id,1,tsec)
2796 
2797 !    --Computation of exp(-beta*V/8*p)*vn in nonlocal case
2798      vn = exppot * vn
2799    end if !--End if on nlrec%nlpsp
2800 
2801 !  --Convolution with the Green kernel
2802 !  --FFT of vn
2803    isign = -1
2804    call fourdp(1,Zvtempo,vn,isign,mpi_enreg,nfft,ngfft,1,tim_fourdp)
2805 
2806 !  --F(T)F(vn)
2807    do ii = 0,nfft-1
2808      switchu   = Zvtempo(1,ii)
2809      switchimu = Zvtempo(2,ii)
2810      Zvtempo(1,ii) = switchu*ZT_p(1,ii) - switchimu*ZT_p(2,ii)
2811      Zvtempo(2,ii) = switchu*ZT_p(2,ii) + switchimu*ZT_p(1,ii)
2812    end do
2813 
2814 !  --F^-1(F(T)F(vn))
2815    isign = 1
2816    call fourdp(1,Zvtempo,vn,isign,mpi_enreg,nfft,ngfft,1,tim_fourdp)
2817 
2818 !  --Computation of exp(-beta*V/8*p)*un or exp(-beta*V/4*p)*un
2819 !  depending on if nl part has to be calculated or not.
2820 
2821    vn = inf_ucvol * exppot * vn
2822 
2823 !  --Second Non-local psp contribution: (Id+sum_atom E(r,r1))vn
2824    if(nlrec%nlpsp) then
2825      call timab(timab_id,2,tsec)
2826      call vn_nl_rec(vn,natom,typat,ngfft(:3),inf_ucvol,nlrec,projec)
2827      call timab(timab_id,1,tsec)
2828 
2829 !    --Computation of exp(-beta*V/8*p)*vn in nonlocal case
2830      vn = exppot * vn
2831    end if !--End if on nlrec%nlpsp
2832 
2833 !  --Multiplication of a and b2 coef by exp(beta*fermie/(two*rtrotter)) must be done in the continued fraction computation
2834 !  --Computation of a and b2
2835    an(irec) = inf_ucvol*ddot(nfft,vn,1,un,1)
2836 
2837 !  --an must be positive real
2838 !  --We must compute bn2 and prepare for the next iteration
2839    if(irec<nrec)then
2840      do ii = 0,nfft-1
2841        switchu = un(ii)
2842        un(ii) = vn(ii)-an(irec)*un(ii)-bb*unold(ii)
2843        unold(ii) = switchu
2844        bn2(irec+1) = bn2(irec+1)+inf_ucvol*un(ii)*un(ii)
2845      end do
2846      bb = sqrt(bn2(irec+1))
2847      un = (one/bb)*un
2848    end if
2849 
2850 !  ######################################################
2851 !  --Density computation
2852 !  density computation is done inside the main looping, juste after the calculus of a and b2, in order to make
2853 !  it possible to stop the recursion at the needed accuracy, without doing more recursion loop than needed -
2854 !  further developpement
2855 
2856 !  !--using the property that: sum_i(bi*c)^2|(z-ai*c)=1/c*sum_i(bi)^2|(z/c-ai)
2857 !  !and for c =exp(-beta*fermie/(two*rtrotter)
2858 
2859 
2860    call trottersum(dim_trott,error,prod_b2,pi_on_rtrotter,&
2861 &   facrec0,coeef_mu,exp1,&
2862 &   an(irec),bn2(irec),&
2863 &   N,D,Nold,Dold)
2864 
2865 
2866    if(irec/=nrec .and. irec>=minrec)then
2867      if((bn2(irec+1)<tol14).or.(mult*error<tol.and.errold<tol)) exit
2868    end if
2869    errold = mult*error
2870  end do maindo
2871 !--Accumulated density
2872  rho_out = mult*real(cone-sum(N/D)*cinv2rtrotter,dp)
2873 
2874 
2875  call timab(timab_id,2,tsec)
2876 
2877  end subroutine recursion

ABINIT/recursion_nl [ Functions ]

[ Top ] [ Functions ]

NAME

 recursion_nl

FUNCTION

 Given a $|un>$ vector on the real-space grid this routine calculates
 the density in  $|un>$ by recursion method.

INPUTS

  exppot=exponential of -1/tsmear*vtrial (computed only once in vtorhorec)
  trotter=trotter parameter
  dim_trott=dimension of the partial fraction decomposition
  tsmear=temperature (Hartree)
  tol=tolerance criteria for stopping recursion_nl
  ngfft=information about FFT(dtset%ngfft a priori different from ngfftrec)
  rset<recursion_type> contains all parameter of recursion
  typat(natom)=type of pseudo potential associated to any atom
  natom=number of atoms
  projec(ngfftrec(1),ngfftrec(2),ngfftrec(3),lmnmax,natom) is the  vector, on the ngfftrec grid containing
  the non-lacal projector $Y_{lm}(r-R_A)f_{lk}(r-R_A)

OUTPUT

  rho_out=result of the continued fraction multiplied by a multiplicator

SIDE EFFECTS

  un(:,:,:)=initial vector on the grid. it is changed in output

PARENTS

      nlenergyrec

CHILDREN

      fourdp,timab,trottersum,vn_nl_rec,wrtout

NOTES

  at this time :
       - need a rectangular box (rmet diagonal matrix)

SOURCE

2920 subroutine recursion_nl(exppot,un,rho_out,rset,ngfft, &
2921   &                     tsmear,trotter,dim_trott,tol,typat,&
2922   &                     natom,projec)
2923 
2924 
2925  use m_linalg_interfaces
2926 
2927 !This section has been created automatically by the script Abilint (TD).
2928 !Do not modify the following lines by hand.
2929 #undef ABI_FUNC
2930 #define ABI_FUNC 'recursion_nl'
2931 !End of the abilint section
2932 
2933  implicit none
2934 
2935 !Arguments -------------------------------
2936 !scalars
2937  integer,intent(in) :: trotter,natom,dim_trott
2938  real(dp),intent(in) :: tol,tsmear
2939  type(recursion_type),intent(in) :: rset
2940  real(dp), intent(out) :: rho_out
2941 !arrays
2942  integer,intent(in) ::  typat(natom),ngfft(18)
2943  real(dp),intent(in) :: exppot(0:ngfft(1)*ngfft(2)*ngfft(3)-1)
2944  real(dp),intent(inout) :: un(0:rset%nfftrec-1)
2945  real(dp),pointer :: projec(:,:,:,:,:)
2946 !Local variables-------------------------------
2947 !scalars
2948  integer, parameter ::  minrec = 3
2949  integer  :: irec,isign,ii
2950  real(dp) :: bb,beta,mult,prod_b2,rtrotter
2951  real(dp) :: inf_ucvol,pi_on_rtrotter,twortrotter,exp1
2952  real(dp) :: exp2,error,errold
2953  real(dp) :: switchu,switchimu
2954  complex(dpc) :: facrec0,cinv2rtrotter,coeef_mu
2955  character(len=500) :: msg
2956  type(mpi_type),pointer:: mpi_loc
2957 !arrays
2958  real(dp):: tsec(2)
2959  real(dp):: inf_tr(3)
2960  real(dp):: an(0:rset%min_nrec),bn2(0:rset%min_nrec)
2961  real(dp):: vn(0:rset%nfftrec-1)
2962  real(dp):: unold(0:rset%nfftrec-1)
2963  real(dp):: Zvtempo(1:2,0:rset%nfftrec-1)
2964  complex(dpc) :: acc_rho(0:rset%min_nrec)
2965  complex(dpc) :: D(0:dim_trott),Dold(0:dim_trott)
2966  complex(dpc) :: N(0:dim_trott),Nold(0:dim_trott)
2967 
2968 ! *************************************************************************
2969 
2970  call timab(608,1,tsec) !--start time-counter: recursion_nl
2971  if(rset%debug)then
2972    msg=' '
2973    call wrtout(std_out,msg,'COLL')
2974  end if
2975 
2976 !##############################################################
2977  beta = one/tsmear
2978 
2979 !--Rewriting the trotter parameter
2980  rtrotter  = max(half,real(trotter,dp))
2981 
2982 !--Initialisation of mpi
2983  mpi_loc => rset%mpi
2984 
2985 !--Initialisation of metrics
2986  inf_ucvol = rset%inf%ucvol
2987  inf_tr = rset%inf%tr
2988  mult = one   !--In the case of the calculus of the NL-energy
2989 
2990 !--Initialisation of  an,bn,un....
2991  N = czero;  D = cone
2992  facrec0 = cone
2993  Nold = czero; Dold = czero
2994 
2995  an = zero; bn2 = zero;  bn2(0) = one
2996  bb = zero; vn  = zero;  unold  = zero
2997 
2998 !--Variables for optimisation
2999  pi_on_rtrotter = pi/rtrotter
3000  twortrotter = two*rtrotter
3001  exp1 = exp((beta*rset%efermi)/(rtrotter))
3002  exp2 = exp(beta*rset%efermi/(twortrotter))
3003  cinv2rtrotter = cmplx(one/twortrotter,zero,dp)
3004  coeef_mu = cmplx(one/exp2,zero,dp)
3005 
3006 !--Initialisation of accumulated density
3007  acc_rho = czero
3008 !--Initialisation of estimated error
3009  prod_b2 = twortrotter/exp1
3010  errold = zero
3011 
3012 !##############################################################
3013 !--Main loop
3014  maindo : do irec = 0, rset%min_nrec
3015 !  --Get an and bn2 coef by the lanczos method
3016 
3017 !  --Computation of exp(-beta*V/8*p)*un
3018    vn = exppot * un
3019 
3020 !  --First Non-local psp contribution: (Id+sum_atom E(r,r1))vn
3021    call timab(608,2,tsec)
3022    call vn_nl_rec(vn,natom,typat,rset%ngfftrec(:3),inf_ucvol,rset%nl,projec)
3023    call timab(608,1,tsec)
3024 
3025 !  --Computation of exp(-beta*V/8*p)*un
3026    vn = exppot * vn
3027 
3028 !  --Convolution with the Green kernel
3029 !  --FFT of vn
3030    isign = -1
3031    call fourdp(1,Zvtempo,vn,isign,mpi_loc,rset%nfftrec,rset%ngfftrec,1,6)
3032 
3033 !  --F(T)F(vn)
3034    do ii = 0,rset%nfftrec-1
3035      switchu   = Zvtempo(1,ii)
3036      switchimu = Zvtempo(2,ii)
3037      Zvtempo(1,ii) = switchu*rset%ZT_p(1,ii) - switchimu*rset%ZT_p(2,ii)
3038      Zvtempo(2,ii) = switchu*rset%ZT_p(2,ii) + switchimu*rset%ZT_p(1,ii)
3039    end do
3040 
3041 !  --F^-1(F(T)F(vn))
3042    isign = 1
3043    call fourdp(1,Zvtempo,vn,isign,mpi_loc,rset%nfftrec,rset%ngfftrec,1,6)
3044 
3045 !  --Computation of exp(-beta*V/2*p)*vn
3046    vn = inf_ucvol * exppot * vn
3047 
3048 !  --Second Non-local psp contribution: (Id+sum_atom E(r,r1))vn
3049    call timab(608,2,tsec)
3050    call vn_nl_rec(vn,natom,typat,rset%ngfftrec(:3),inf_ucvol,rset%nl,projec)
3051    call timab(608,1,tsec)
3052 
3053 !  --Computation of exp(-beta*V/8*p)*vn
3054    vn = exppot * vn
3055 
3056 
3057 !  --Multiplication of a and b2 coef by exp(beta*fermie/(2.d0*rtrotter)) must be done in the continued fraction computation
3058 !  --Computation of a and b2
3059    an(irec) = inf_ucvol*ddot(rset%nfftrec,vn,1,un,1)      !--an must be positive real
3060 
3061 !  --We must compute bn2 and prepare for the next iteration
3062    if(irec<rset%min_nrec)then
3063      do ii = 0,rset%nfftrec-1
3064        switchu = un(ii)
3065        un(ii) = vn(ii)-an(irec)*un(ii)-bb*unold(ii)
3066        unold(ii) = switchu
3067        bn2(irec+1) = bn2(irec+1)+inf_ucvol*un(ii)*un(ii)
3068      end do
3069      bb = sqrt(bn2(irec+1))
3070      un = (one/bb)*un
3071    end if
3072 
3073 !  ######################################################
3074 !  --Density computation
3075 !  in order to make it possible to stop the recursion_nl at the
3076 !  needed accuracy, without doing more recursion_nl loop than needed further developpement
3077 
3078    call trottersum(dim_trott,error,&
3079 &   prod_b2,pi_on_rtrotter,&
3080 &   facrec0,coeef_mu,exp1,&
3081 &   an(irec),bn2(irec),&
3082 &   N,D,Nold,Dold)
3083 
3084 
3085    if(irec/=rset%min_nrec .and. irec>=minrec)then
3086      if((bn2(irec+1)<tol14).or.(mult*error<tol.and.errold<tol)) exit
3087    end if
3088    errold = mult*error
3089  end do maindo
3090 !--Accumulated density
3091  rho_out = mult*real(cone-sum(N/D)*cinv2rtrotter,dp)
3092 
3093  call timab(608,2,tsec) !--stop time-counter: recursion_nl
3094 
3095 end subroutine recursion_nl

ABINIT/vn_nl_rec [ Functions ]

[ Top ] [ Functions ]

NAME

 vn_nl_rec

FUNCTION

 this routine computes the contribution to the vector vn, during
 recursion, due to the non-local psp.

INPUTS

  vn(:,:,:)=the vector on the real-space grid.
  inf_ucvol=volume of infinitesimal cell
  natom=number of atoms
  typat(natom)=the type of psps associated to the atoms
  ngfftrec(3)=first 3 components of ngfftrec (truncated box, if different from ngfft) for the real-space grid
  nlrec<type(nlpsprec_type)> in recursion_type containing information concerning psp
  projec(ngfftrec(1),ngfftrec(2),ngfftrec(3),lmnmax,natom) is the  vector, on the ngfftrec grid containing
  the non-lacal projector $Y_{lm}(r-R_A)f_{lk}(r-R_A)

OUTPUT

 vn_nl(:,:,:)=the non_local contribution to vn

PARENTS

      recursion,recursion_nl

CHILDREN

      timab

NOTES

SOURCE

3130 subroutine vn_nl_rec(vn,natom,typat,ngfftrec,inf_ucvol,nlrec,projec)
3131 
3132 
3133  use m_linalg_interfaces
3134 
3135 !This section has been created automatically by the script Abilint (TD).
3136 !Do not modify the following lines by hand.
3137 #undef ABI_FUNC
3138 #define ABI_FUNC 'vn_nl_rec'
3139 !End of the abilint section
3140 
3141  implicit none
3142 
3143 !Arguments -------------------------------
3144 !scalars
3145  integer,intent(in) :: natom
3146  real(dp),intent(in) :: inf_ucvol
3147  type(nlpsprec_type),intent(in) :: nlrec
3148 !arrays
3149  integer,intent(in) :: ngfftrec(3),typat(natom)
3150  real(dp),intent(in) :: projec(0:,0:,0:,1:,1:)
3151  real(dp),intent(inout):: vn(0:ngfftrec(1)*ngfftrec(2)*ngfftrec(3)-1)
3152 !Local variables-------------------------------
3153 !scalars
3154  integer :: iatom,nfftrec
3155  integer :: jlmn,il,in,jn
3156  integer :: ipsp,ilmn
3157  integer :: npsp,lmnmax
3158  real(dp):: vn_nl_loc
3159 !arrays
3160  real(dp):: vn_nl(0:ngfftrec(1)-1,0:ngfftrec(2)-1,0:ngfftrec(3)-1)
3161  real(dp):: vtempo(0:ngfftrec(1)-1,0:ngfftrec(2)-1,0:ngfftrec(3)-1)
3162  real(dp):: tsec(2)
3163 ! *************************************************************************
3164 
3165  call timab(615,1,tsec)
3166 !--Initialisation
3167 
3168  vn_nl = zero
3169  npsp = nlrec%npsp
3170  lmnmax = nlrec%lmnmax
3171  nfftrec = product(ngfftrec)
3172  vtempo(:,:,:) = reshape(source=vn,shape=ngfftrec(:3))
3173 
3174 !--Sum_iatom \int dr1 E(r-r_a,r1-r_a)vn(r1) *infucvol
3175  do iatom=1,natom !--Loop on atoms
3176    ipsp = typat(natom)
3177 
3178 !  --If psp(typat(iatom)) is local then cycle
3179    if(all(nlrec%pspinfo(:,ipsp)==0))  cycle
3180 
3181 
3182 !  write(std_out,*)'lmnmax',nlrec%lmnmax,lmnmax
3183 
3184    do ilmn = 1, lmnmax
3185      do jlmn = 1,lmnmax
3186        if(nlrec%indlmn(4,ilmn,ipsp)==nlrec%indlmn(4,jlmn,ipsp)) then
3187          il = 1+nlrec%indlmn(1,jlmn,ipsp)
3188          in = nlrec%indlmn(3,ilmn,ipsp)
3189          jn = nlrec%indlmn(3,jlmn,ipsp)
3190          vn_nl_loc = ddot(nfftrec,projec(:,:,:,jlmn,iatom),1,vtempo,1)
3191          vn_nl = vn_nl+projec(:,:,:,ilmn,iatom)*vn_nl_loc*nlrec%mat_exp_psp_nl(in,jn,il,ipsp)
3192        end if
3193      end do
3194    end do
3195  end do !--End loop on atoms
3196  vtempo = vtempo + vn_nl*inf_ucvol
3197 
3198  vn = reshape(source=vtempo,shape=(/nfftrec/))
3199 
3200  call timab(615,2,tsec)
3201 
3202 end subroutine vn_nl_rec

ABINIT/vtorhorec [ Functions ]

[ Top ] [ Functions ]

NAME

 vtorhorec

FUNCTION

 This routine computes the new density from a fixed potential (vtrial)
 using a recursion method

INPUTS

  deltastep= if 0 the iteration step is equal to dtset%nstep
  initialized= if 0 the initialization of the gstate run is not yet finished
  operator (ground-state symmetries)
  dtset <type(dataset_type)>=all input variables for this dataset
  irrzon(nfft**(1-1/nsym),2,(nspden/nsppol)-3*(nspden/4))=irreducible zone data
  nfftf=(effective) number of FFT grid points (for this processor)
  nspden=number of spin-density components
  nsppol=1 for unpolarized, 2 for spin-polarized
  nsym=number of symmetry elements in space group
  phnons(2,nfft**(1-1/nsym),(nspden/nsppol)-3*(nspden/4))=nonsymmorphic translation phases
  vtrial(nfft,nspden)=INPUT Vtrial(r).
  rset <type(recursion_type)> all variables for recursion
  rprimd(3,3)=dimensional primitive translations in real space (bohr)
  gprimd(3,3)=dimensional primitive translations in reciprocal space

OUTPUT

  ek=kinetic energy part of total energy.
  enl=nonlocal pseudopotential part of total energy.
  entropy=entropy due to the occupation number smearing (if metal)
  e_eigenvalues=Sum of the eigenvalues - Band energy (Hartree)
  fermie=fermi energy (Hartree)
  grnl(3*natom)=stores grads of nonlocal energy wrt length scales
   (3x3 tensor) and grads wrt atomic coordinates (3*natom)

SIDE EFFECTS

  rhog(2,nfft)=array for Fourier transform of electron density
  rhor(nfft,nspden)=array for electron density in electrons/bohr**3.
  rset%efermi= fermi energy

PARENTS

      scfcv

CHILDREN

      calcnrec,cudarec,destroy_distribfft,entropyrec,fermisolverec
      gran_potrec,init_distribfft,nlenergyrec,prtwork,recursion,reshape_pot
      symrhg,timab,transgrid,wrtout,xmpi_allgatherv,xmpi_barrier,xmpi_max
      xmpi_sum,xredistribute

NOTES

  at this time :
       - grnl in not implemented
       - symetrie usage not implemented (irrzon not used and nsym should be 1)
       - spin-polarized not implemented (nsppol must be 1, nspden ?)
       - phnons used only in symrhg
       - need a rectangular box (ngfft(1)=ngfft(2)=ngfft(3))

SOURCE

120 subroutine vtorhorec(dtset,&
121 &  ek,enl,entropy,e_eigenvalues,fermie,&
122 &  grnl,initialized,irrzon,nfftf,phnons,&
123 &  rhog, rhor, vtrial,rset,deltastep,rprimd,gprimd)
124 
125 
126 !This section has been created automatically by the script Abilint (TD).
127 !Do not modify the following lines by hand.
128 #undef ABI_FUNC
129 #define ABI_FUNC 'vtorhorec'
130 !End of the abilint section
131 
132  implicit none
133 
134 !Arguments -------------------------------
135 !scalars
136  integer,intent(in) :: initialized
137  integer,intent(in) :: nfftf,deltastep
138  real(dp),intent(out) :: e_eigenvalues,ek,enl,entropy,fermie
139  type(dataset_type),intent(in) :: dtset
140  type(recursion_type),intent(inout) :: rset
141 !arrays
142  integer, intent(in) :: irrzon(:,:,:)
143  real(dp),intent(in) :: rprimd(3,3),gprimd(3,3)
144  real(dp),intent(in) :: phnons(:,:,:)
145  real(dp),intent(in) :: vtrial(:,:)
146  real(dp),intent(inout) :: rhog(:,:)
147  real(dp),intent(out) :: grnl(:)  !vz_i
148  real(dp),intent(inout) :: rhor(:,:) !vz_i
149 
150 !Local variables-------------------------------
151 !scalars
152  integer :: nfftrec, dim_entro,ii1,jj1,kk1
153  integer :: ierr,ii,ilmn,ipsp,dim_trott
154  integer :: ipoint,ipointlocal,jj,kk,irec
155  integer :: n1,n2,n3,n4,n_pt_integ_entropy
156  integer :: nrec,iatom,min_pt,max_pt,swt_tm
157  integer ::  get_K_S_G
158  integer :: tim_fourdp,trotter
159  real(dp),parameter :: perc_vmin=one
160  real(dp) :: beta,drho,drhomax
161  real(dp) :: entropy1,entropy2,entropy3,entropy4
162  real(dp) :: entropylocal,entropylocal1,entropylocal2
163  real(dp) :: entropylocal3,entropylocal4,gran_pot,gran_pot1,gran_pot2
164  real(dp) :: gran_pot3,gran_pot4,gran_pot_local,gran_pot_local1
165  real(dp) :: gran_pot_local2,gran_pot_local3,gran_pot_local4
166  real(dp) :: inf_ucvol,intrhov,factor
167  real(dp) :: nelect,potmin,rtrotter,toldrho,tolrec,tsmear
168  real(dp) :: xmax,nlpotmin,ratio1,ratio2,ratio4,ratio8
169  type(recparall_type) :: recpar
170  character(len=500) :: msg
171  !arrays
172  integer :: ngfftrec(18),trasl(3)
173  integer,pointer :: gcart_loc(:,:)
174  integer,allocatable :: bufsize(:), bufdispl(:)
175  real(dp) :: tsec(2),tsec2(2)
176  real(dp) :: exppot(0:dtset%ngfft(1)*dtset%ngfft(2)*dtset%ngfft(3)-1)
177  real(dp),target :: rholocal(1:rset%par%ntranche)
178  real(dp),target :: alocal(0:rset%min_nrec,1:rset%par%ntranche)
179  real(dp),target :: b2local(0:rset%min_nrec,1:rset%par%ntranche)
180  real(dp),pointer :: rho_wrk(:)
181  real(dp),pointer :: a_wrk(:,:),b2_wrk(:,:)
182  real(dp),allocatable :: rholocal_f(:), rholoc_2(:)
183  real(dp),allocatable :: rhogf(:,:),rhogc(:,:),aloc_copy(:,:),b2loc_copy(:,:)
184  real(dp),allocatable :: gran_pot_v_f(:,:),gran_pot_v_c(:,:),gran_pot_v_2(:,:)
185  real(dp),allocatable :: entropy_v_f(:,:),entropy_v_c(:,:),entropy_v_2(:,:)
186  real(dp),allocatable :: ablocal_1(:,:,:),ablocal_2(:,:,:),ablocal_f(:,:,:)
187  real(dp),allocatable :: exppotloc(:)
188  real(dp),allocatable :: projec(:,:,:,:,:)
189 #if defined HAVE_GPU_CUDA
190  integer :: max_rec
191  integer,allocatable :: vcount_0(:), displs_0(:)
192  integer,allocatable :: vcount_1(:), displs_1(:)
193  real(dp),allocatable,target :: rho_hyb(:)
194  real(dp),allocatable,target :: a_hyb(:,:),b2_hyb(:,:)
195  real(cudap),allocatable :: an_dev(:,:)
196  real(cudap),allocatable :: bn2_dev(:,:)
197 #endif
198 
199 ! *********************************************************************
200  if(rset%debug)then
201    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' vtorhorec : enter '
202    call wrtout(std_out,msg,'PERS')
203  end if
204 
205  call timab(21,1,tsec)
206  call timab(600,1,tsec2)
207 !##################################################################################################
208 !!--Initalization in the FIRST time in VTORHOREC is made in SCFCV by FIRST_REC routine
209 !!--Parameters for the recursion method AND  Initialisation
210 
211  trotter = dtset%recptrott  !--trotter parameter
212  nelect  = dtset%nelect     !--number of electrons
213 
214  toldrho = dtset%rectolden  !--tollerance for density
215  tolrec  = toldrho*1.d-2    !--tollerance for local density
216 
217  tsmear  = dtset%tsmear     !--temperature
218  beta    = one/tsmear       !--inverse of temperature
219 
220  factor = real(dtset%recgratio**3*100*rset%mpi%nproc,dp)/real(nfftf,dp)
221 
222 !--Assignation of the rset variable:
223  nrec       = rset%min_nrec
224  nfftrec    = rset%nfftrec
225  ngfftrec   = rset%ngfftrec
226  inf_ucvol  = rset%inf%ucvol
227 
228  min_pt = rset%par%displs(rset%mpi%me)+1
229  max_pt = min_pt+rset%par%vcount(rset%mpi%me)-1
230 
231 !--In the last self-constistent loop or if density is converged the
232 !thermodynamics quantities are calculated
233  get_K_S_G = 0; if(deltastep==0 .or. rset%quitrec/=0) get_K_S_G = 1;
234 
235 !--Rewriting the trotter parameter
236  rtrotter  = max(half,real(trotter,dp))
237  dim_trott = max(0,2*trotter-1)
238 
239 !--Variables Optimisation
240  ratio1 = beta/rtrotter
241  ratio2 = ratio1/two
242  ratio4 = ratio1/four
243  ratio8 = ratio1/eight
244 
245 !--Integration points for entropy
246  n_pt_integ_entropy = max(25,dtset%recnpath)
247 
248 !-- energies non-local: at day not implemented
249  enl = zero
250  grnl = zero
251 !jmb
252  ek = zero
253 
254 !--only a copy of ngfft(1:3) and nfft  (to purge!!)
255  n1 = dtset%ngfft(1) ; n2 = dtset%ngfft(2) ; n3 = dtset%ngfft(3)
256  n4 = n3
257 
258 !--time switch to measure gpu-cpu syncrhonisation
259  swt_tm = 0 ; !no gpu initally
260 
261  exppot = zero
262  nullify(gcart_loc)
263 
264  if(dtset%rectesteg==1)then
265 !  --Free electron gas case
266    exppot = one
267  else
268    if(.not.(rset%nl%nlpsp)) then
269 !    --Local case
270 !    --COMPUTATION OF exp( -beta*pot/(4*rtrotter))
271      ABI_ALLOCATE(gcart_loc,(0,0))
272      gcart_loc = 0
273      exppot = exp( -(ratio4*vtrial(:,1)))
274    else
275 !    --Non-Local case
276 !    --COMPUTATION OF exp(-beta*pot/(8*rtrotter))
277      exppot = exp( -(ratio8*vtrial(:,1)))
278 
279      ABI_ALLOCATE(gcart_loc,(3,dtset%natom))
280      gcart_loc = rset%inf%gcart
281      ABI_ALLOCATE(projec,(0:ngfftrec(1)-1,0:ngfftrec(2)-1,0:ngfftrec(3)-1,rset%nl%lmnmax,dtset%natom))
282      projec = zero
283 
284      if(.not.(rset%tronc)) then
285        do iatom =1, dtset%natom
286          ipsp = dtset%typat(iatom)
287          do ilmn = 1,rset%nl%lmnmax
288            projec(:,:,:,ilmn,iatom) = reshape(rset%nl%projec(:,ilmn,ipsp),shape=shape(projec(:,:,:,1,1)))
289            do ii=1,3
290              projec(:,:,:,ilmn,iatom) = cshift(projec(:,:,:,ilmn,iatom),shift=ngfftrec(ii)/2-gcart_loc(ii,iatom),dim=ii)
291            end do
292          end do
293        end do
294      end if
295    end if
296  end if
297 
298 !###################################################################################
299 !MAIN LOOP
300 
301  rholocal = zero; alocal = zero; b2local = zero
302  ipointlocal = 1
303 
304 !--Allocation: if hybrid calculation is done then I have to use
305 !balanced work on devices.
306  nullify(rho_wrk,a_wrk,b2_wrk)
307 
308  if(rset%load == 1)then
309 #ifdef HAVE_GPU_CUDA
310    ABI_ALLOCATE(rho_hyb,(1:rset%GPU%par%npt))
311    ABI_ALLOCATE(a_hyb,(0:nrec,1:rset%GPU%par%npt))
312    ABI_ALLOCATE(b2_hyb,(0:nrec,1:rset%GPU%par%npt))
313    rho_hyb = zero; a_hyb = zero; b2_hyb = zero
314    rho_wrk => rho_hyb
315    a_wrk   => a_hyb
316    b2_wrk  => b2_hyb
317    recpar  = rset%GPU%par
318 #endif
319  else
320    rho_wrk => rholocal
321    a_wrk   => alocal
322    b2_wrk  => b2local
323    recpar  = rset%par
324  end if
325 
326 !#if defined HAVE_GPU_CUDA
327 !if(rset%debug)then
328 !write (std_out,*) 'rset%recGPU%nptrec ',rset%recGPU%nptrec
329 !write (std_out,*) 'rset%gpudevice ',rset%gpudevice
330 !write (std_out,*) 'rset%ngfftrec ',rset%ngfftrec(1:3)
331 !write (std_out,*) 'rset%min_nrec ',rset%min_nrec
332 !write (std_out,*) 'rset%par%ntranche ',rset%par%ntranche
333 !write (std_out,*) 'rset%par%min_pt ',rset%par%min_pt,min_pt
334 !write (std_out,*) 'rset%par%max_pt ',rset%par%max_pt,max_pt
335 !write (std_out,*) 'pt0 ',rset%par%pt0%x,rset%par%pt0%y,rset%par%pt0%z
336 !write (std_out,*) 'pt1 ',rset%par%pt1%x,rset%par%pt1%y,rset%par%pt1%z
337 !end if
338 !#endif
339 
340  if(rset%gpudevice>=0) then
341 #if defined HAVE_GPU_CUDA
342    swt_tm = 1;
343    call timab(607,1,tsec2)
344    ABI_ALLOCATE(an_dev,(0:recpar%npt-1,0:nrec))
345    ABI_ALLOCATE(bn2_dev,(0:recpar%npt-1,0:nrec))
346    an_dev = zero
347    bn2_dev = zero; bn2_dev(:,0) = one
348 
349    call cudarec( rset, exppot,an_dev,bn2_dev,&
350 &   beta,trotter,tolrec,dtset%recgratio,dtset%ngfft(:3),max_rec)
351 
352    max_rec = min(max_rec,nrec)
353    a_wrk(0:max_rec,1:recpar%npt)  = transpose(an_dev(0:,0:max_rec))
354    b2_wrk(0:max_rec,1:recpar%npt) = transpose(bn2_dev(0:,0:max_rec))
355 
356    ABI_DEALLOCATE(an_dev)
357    ABI_DEALLOCATE(bn2_dev)
358    call timab(607,2,tsec2)
359 
360    ipointlocal = recpar%npt+1
361 
362 !  !DEBUG CUDA
363 !  if(rset%debug)then
364 !  if( rset%mpi%me==0)then
365 !  do ipoint = 1,rset%par%npt,1
366 !  kk=ipoint/(product(dtset%ngfft(:2)))
367 !  jj=ipoint/dtset%ngfft(1)-kk*dtset%ngfft(2)
368 !  ii=ipoint-jj*dtset%ngfft(1)-kk*dtset%ngfft(2)*dtset%ngfft(1)
369 !  write(msg,'(a,4i8,2(a,a9,5f12.6))')&
370 !  & 'pt',ipoint,ii,jj,kk,&
371 !  & ch10,'an-gpu   ',real(alocal(:4,ipoint)),&
372 !  & ch10,'b2n-gpu  ',real(b2local(:4,ipoint))
373 !  call wrtout(std_out,msg,'COLL')
374 !  end do
375 !  endif
376 !  endif
377 !  !ENDDEBUG CUDA
378 #endif
379 
380  else
381    if (.not.(rset%tronc)) then
382      graou1 : do kk = recpar%pt0%z,recpar%pt1%z,dtset%recgratio
383        do jj = 0,n2-1,dtset%recgratio
384          do ii = 0,n1-1,dtset%recgratio
385            ipoint = ii+(jj+kk*n2)*n1
386 !          --Local position of atoms
387            if (ipoint<recpar%min_pt) cycle
388 !          --Computation done by that proc
389            tim_fourdp=6
390            call recursion(exppot,ii,jj,kk, &
391 &           a_wrk(:,ipointlocal), &
392 &           b2_wrk(:,ipointlocal), &
393 &           rho_wrk(ipointlocal),&
394 &           nrec, rset%efermi,tsmear,rtrotter,dim_trott, &
395 &           rset%ZT_p, &
396 &           tolrec,dtset%typat,rset%nl,&
397 &           rset%mpi,nfftrec,ngfftrec,rset%inf,&
398 &           tim_fourdp,dtset%natom,projec,1)
399            ipointlocal = ipointlocal + 1
400 !          write(std_out,*)'ipointlocal',ipoint,ipointlocal,ii,jj,kk
401            call prtwork(dtset%recgratio**3*ipointlocal*100*rset%mpi%nproc/nfftrec)
402            if(ipoint==recpar%max_pt) exit graou1
403          end do
404        end do
405      end do graou1
406    else !--We use a troncation
407      ABI_ALLOCATE(exppotloc,(0:nfftrec-1))
408      graou2 : do kk = recpar%pt0%z,recpar%pt1%z,dtset%recgratio
409        do jj = 0,n2-1,dtset%recgratio
410          do ii = 0,n1-1,dtset%recgratio
411            ipoint = ii+(jj+kk*n2)*n1
412            if (ipoint<recpar%min_pt) cycle
413 !          computation done by that proc
414            exppotloc = zero
415 !          --Traslation to move position on the ngfftrec grid center
416            trasl = -(/ii,jj,kk/)+ngfftrec(:3)/2
417            if(rset%nl%nlpsp) then
418              do iatom=1,dtset%natom
419 !              --local position of atoms
420                gcart_loc(:,iatom) = rset%inf%gcart(:,iatom)+trasl
421                gcart_loc(:,iatom) = modulo(gcart_loc(:,iatom),(/n1,n2,n3/))
422 !              --Traslation of non-local projectors
423                do ilmn = 1,rset%nl%lmnmax
424                  projec(:,:,:,ilmn,iatom) = reshape(rset%nl%projec(:,ilmn,dtset%typat(iatom)),shape=shape(projec(:,:,:,1,1)))
425                  do ii1=1,3
426                    projec(:,:,:,ilmn,iatom) = eoshift(projec(:,:,:,ilmn,iatom),shift=ngfftrec(ii1)/2-gcart_loc(ii1,iatom),dim=ii1)
427                  end do
428                end do
429              end do
430            end if
431 
432            call reshape_pot(trasl,nfftf,nfftrec,&
433 &           dtset%ngfft(:3),ngfftrec(:3),&
434 &           exppot,exppotloc)
435 
436            tim_fourdp=6
437            call recursion(exppotloc,ngfftrec(1)/2,ngfftrec(2)/2,ngfftrec(3)/2, &
438 &           a_wrk(:,ipointlocal), &
439 &           b2_wrk(:,ipointlocal), &
440 &           rho_wrk(ipointlocal),&
441 &           nrec, rset%efermi,tsmear,rtrotter,dim_trott, &
442 &           rset%ZT_p, &
443 &           tolrec,dtset%typat,rset%nl,&
444 &           rset%mpi,nfftrec,ngfftrec,rset%inf,&
445 &           tim_fourdp,dtset%natom,projec,1)
446            ipointlocal = ipointlocal + 1
447            call prtwork(factor*real(ipointlocal,dp))
448            if(ipoint==recpar%max_pt) exit graou2
449          end do
450        end do
451      end do graou2
452      ABI_DEALLOCATE(exppotloc)
453    end if
454 
455  end if
456  write(msg,'( a12,i12)')'ipointlocal',ipointlocal
457  call wrtout(std_out,msg,'PERS')
458  call timab(613+swt_tm,1,tsec2)  !!--start time-counter: sync gpu-cpu
459  call xmpi_barrier(rset%mpi%comm_bandfft)
460  call timab(613+swt_tm,2,tsec2)  !!--stop time-counter: sync gpu-cpu
461 
462 !!#############################################################
463 !!--ASSIGNATION PARAMETERS TO MENAGE PARALLELISME OF TRANSGRID
464  if((rset%load==1 .or. dtset%recgratio>1))then
465 !  --Bufsize contains the values of the number of points calculated
466 !  by any proc on the coarse grid; bufsize_f on the fine grid
467    call timab(604,1,tsec2) !--start time-counter: transgrid
468    ABI_ALLOCATE(bufsize,(0:rset%mpi%nproc-1))
469    ABI_ALLOCATE(bufdispl,(0:rset%mpi%nproc-1))
470    bufsize = 0;
471    bufsize(rset%mpi%me) = rset%par%npt
472    call xmpi_sum(bufsize,rset%mpi%comm_bandfft,ierr)
473 
474    bufdispl(0) = 0;
475    if(rset%mpi%nproc>1) bufdispl(1:) = (/(sum(bufsize(:ii)),ii=0,rset%mpi%nproc-1)/)
476    call timab(604,2,tsec2) !--stop time-counter: transgrid
477  end if
478 !!####################################################################
479 !!--REDISTRIBUTION OF LOAD ON PROCS AFTER RECURSION IF CUDA IS USED
480 #ifdef HAVE_GPU_CUDA
481  if(rset%load==1)then
482    call timab(604,1,tsec2) !--start time-counter: transgrid
483    call xredistribute(rho_hyb,rset%GPU%par%vcount,rset%GPU%par%displs,&
484 &   rholocal,bufsize,bufdispl,rset%mpi%me,&
485 &   rset%mpi%nproc,&
486 &   rset%mpi%comm_bandfft,ierr)
487 
488 
489    ABI_ALLOCATE(vcount_0,(0:rset%mpi%nproc-1))
490    ABI_ALLOCATE(displs_0,(0:rset%mpi%nproc-1))
491    ABI_ALLOCATE(vcount_1,(0:rset%mpi%nproc-1))
492    ABI_ALLOCATE(displs_1,(0:rset%mpi%nproc-1))
493 
494    vcount_0 = 0
495    vcount_0(rset%mpi%me) = rset%par%npt*(nrec+1)
496    call xmpi_sum(vcount_0,rset%mpi%comm_bandfft,ierr)
497    displs_0 = 0
498    if(rset%mpi%nproc>1) displs_0(1:) = (/(sum(vcount_0(:ii)),ii=0,rset%mpi%nproc-1)/)
499 
500 
501    vcount_1 = 0
502    vcount_1(rset%mpi%me) = rset%GPU%par%npt*(nrec+1)
503    call xmpi_sum(vcount_1,rset%mpi%comm_bandfft,ierr)
504    displs_1 = 0
505    if(rset%mpi%nproc>1) displs_1(1:) = (/(sum(vcount_1(:ii)),ii=0,rset%mpi%nproc-1)/)
506 
507 
508    call xredistribute(a_hyb,vcount_1,displs_1,&
509 &   alocal,vcount_0,displs_0,&
510 &   rset%mpi%me,rset%mpi%nproc,&
511 &   rset%mpi%comm_bandfft,ierr)
512 
513    call xredistribute(b2_hyb,vcount_1,displs_1,&
514 &   b2local,vcount_0,displs_0,&
515 &   rset%mpi%me,rset%mpi%nproc,&
516 &   rset%mpi%comm_bandfft,ierr)
517 
518    nullify(rho_wrk,a_wrk,b2_wrk)
519    ABI_DEALLOCATE(rho_hyb)
520    ABI_DEALLOCATE(a_hyb)
521    ABI_DEALLOCATE(b2_hyb)
522    ABI_DEALLOCATE(vcount_0)
523    ABI_DEALLOCATE(displs_0)
524    ABI_DEALLOCATE(vcount_1)
525    ABI_DEALLOCATE(displs_1)
526    call timab(604,2,tsec2) !--start time-counter: transgrid
527  end if
528 #endif
529 
530 !#############################################################
531 !--TRANSGRID FOR THE DENSITY RHO AND THE COEFFICIENTS AN AND B2N
532  if (dtset%recgratio>1) then
533 !  --variables allocation and initialisation-------
534    write (msg,'(a)')' - TRANSGRID USING -----'
535    call wrtout(std_out,msg,'COLL')
536    call timab(604,1,tsec2) !--start time-counter: transgrid
537 
538    ABI_ALLOCATE(rholocal_f,(rset%pawfgr%nfft))
539    ABI_ALLOCATE(rhogf,(2,rset%pawfgr%nfft))
540    ABI_ALLOCATE(rhogc,(2,rset%pawfgr%nfftc))
541    ABI_ALLOCATE(rholoc_2,(1:rset%pawfgr%nfftc))
542    ABI_ALLOCATE(ablocal_2,(1:rset%pawfgr%nfftc,0:nrec,2))
543    ABI_ALLOCATE(ablocal_f,(1:rset%pawfgr%nfft,0:nrec,2))
544    ABI_ALLOCATE(ablocal_1,(1:rset%par%npt,0:nrec,2))
545 
546    call destroy_distribfft(rset%mpi%distribfft)
547    call init_distribfft(rset%mpi%distribfft,'c',rset%mpi%nproc_fft,rset%pawfgr%ngfftc(2) ,rset%pawfgr%ngfftc(3))
548    call init_distribfft(rset%mpi%distribfft,'f',rset%mpi%nproc_fft,rset%pawfgr%ngfft(2) ,rset%pawfgr%ngfft(3))
549 
550    rholocal_f = zero; ablocal_f = zero; ablocal_2 = zero
551 
552    ablocal_1(:,:,1) = transpose(alocal(:,1:rset%par%npt))
553    ablocal_1(:,:,2) = transpose(b2local(:,1:rset%par%npt))
554 
555    if(get_K_S_G==1 .and. dtset%recgratio>1 ) then
556      ABI_ALLOCATE(aloc_copy,(0:nrec,1:rset%par%npt))
557      ABI_ALLOCATE(b2loc_copy,(0:nrec,1:rset%par%npt))
558      aloc_copy = alocal(:,1:rset%par%npt)
559      b2loc_copy = b2local(:,1:rset%par%npt)
560    end if
561 
562    if(rset%mpi%nproc ==1) then
563 !    --SEQUENTIAL CASE--
564      rholoc_2 = rholocal(1:rset%par%npt)
565      ablocal_2 = ablocal_1
566 !    --Transigrid: coarse->fine
567      rhogf = zero; rhogc = zero
568      call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,rholoc_2,rholocal_f)
569      do jj1 = 1,2
570        do ipoint = 0,nrec
571          rhogf = zero; rhogc = zero
572          call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,&
573 &         ablocal_2(:,ipoint,jj1),ablocal_f(:,ipoint,jj1))
574        end do
575      end do
576 !    --Assignation of the interpolated results on the fine grid--
577      rholocal = rholocal_f
578      alocal = transpose(ablocal_f(:,:,1))
579      b2local = transpose(ablocal_f(:,:,2))
580 
581    else
582 !    --PARALLEL CASE--
583      rholoc_2 = zero
584 !    --Send on all procs rho,an,bn--
585      call xmpi_allgatherv(rholocal(1:rset%par%npt),bufsize(rset%mpi%me),rholoc_2,&
586 &     bufsize,bufdispl,rset%mpi%comm_bandfft,ierr)
587      do irec = 0,nrec
588        call xmpi_allgatherv(ablocal_1(1:rset%par%npt,irec,1),bufsize(rset%mpi%me),&
589 &       ablocal_2(:,irec,1),bufsize,bufdispl,rset%mpi%comm_bandfft,ierr)
590        call xmpi_allgatherv(ablocal_1(1:rset%par%npt,irec,2),bufsize(rset%mpi%me),&
591 &       ablocal_2(:,irec,2),bufsize,bufdispl,rset%mpi%comm_bandfft,ierr)
592      end do
593 
594 
595 !    --Transigrid: coarse->fine on differents procs (with respect
596 !    the number of recursion)
597      rhogf = zero;  rhogc = zero
598      call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,rholoc_2,rholocal_f)
599 
600      do irec = 0,2*(nrec+1)-1
601        ii1 = modulo(irec,rset%mpi%nproc)
602        jj1 = 1+modulo(irec,2)
603        kk1 = floor(irec/2.)
604        if(maxval(abs(ablocal_2(:,kk1,jj1))) > tol10 .and. rset%mpi%me == ii1) then
605          rhogf = zero; rhogc = zero
606          call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,&
607 &         rhogf,ablocal_2(:,kk1,jj1),ablocal_f(:,kk1,jj1))
608        end if
609      end do
610 
611 !    --Recuperation of all interpolated results
612 !    from procs to allprocs
613      call xmpi_sum(ablocal_f,rset%mpi%comm_bandfft,ierr)
614 
615 !    --Assignation the interpolated results on the fine grid
616 !    any procs to obtain the same point as in the standard recursion
617      do ii1 = 0, rset%mpi%nproc-1
618        jj1 = rset%par%displs(ii1)+1
619        if(ii1 == rset%mpi%me) then
620          alocal = transpose(ablocal_f(jj1:jj1+rset%par%vcount(ii1)-1,:,1))
621          b2local = transpose(ablocal_f(jj1:jj1+rset%par%vcount(ii1)-1,:,2))
622          rholocal = rholocal_f(jj1:jj1+rset%par%vcount(ii1)-1)
623        end if
624      end do
625    end if
626    ABI_DEALLOCATE(ablocal_f)
627    ABI_DEALLOCATE(ablocal_2)
628    ABI_DEALLOCATE(ablocal_1)
629    ABI_DEALLOCATE(rhogf)
630    ABI_DEALLOCATE(rholocal_f)
631    ABI_DEALLOCATE(rhogc)
632    ABI_DEALLOCATE(rholoc_2)
633 
634    call timab(604,2,tsec2) !--stop time-counter: transgrid
635  else
636    write(msg,'(a)')' - TRANSGRID NOT USED --'
637    call wrtout(std_out,msg,'COLL')
638  end if
639 !!--End transgrid
640 !!###############################################################
641 !###################################
642 !--Fermi energy computation
643 !--find the good mu by imposing the electrons number
644  call fermisolverec(rset%efermi,rholocal,alocal,b2local,rset%debug,&
645 & rset%min_nrec,tsmear,trotter,nelect,tol10,100, &
646 & rset%par%ntranche,rset%mpi,inf_ucvol,& !.False. .and.&
647 & (rset%tp==2 .or. rset%tp==3) .and. trotter>1)
648 
649 !#################################################################
650 !######### ENTROPY AND GRAN POTENTIAL COMPUTATION  ##################
651  entropy = zero
652  gran_pot = zero
653  noentropie : if(get_K_S_G==1)then
654    entropy1 = zero; entropy2 = zero ;entropy3 = zero; entropy4 = zero
655    gran_pot1 = zero ; gran_pot2 = zero; gran_pot3 = zero; gran_pot4 = zero
656 
657 !  --Seek for the min of the path integral
658    potmin = zero;  nlpotmin = zero
659    if(dtset%rectesteg/=1) potmin = minval(vtrial(:,1))
660    if(rset%nl%nlpsp)  nlpotmin = minval(rset%nl%eival(:,:,:))
661    xmax = exp(-ratio2*(potmin+nlpotmin-rset%efermi))
662 
663    dim_entro = 0;  if(rset%debug) dim_entro = 4;
664 
665    if(dtset%recgratio>1) then
666 !    --Recgratio>1
667      ABI_ALLOCATE(rhogf,(2,rset%pawfgr%nfft))
668      ABI_ALLOCATE(rhogc,(2,rset%pawfgr%nfftc))
669      ABI_ALLOCATE(entropy_v_f,(rset%pawfgr%nfft,0:4))
670      ABI_ALLOCATE(entropy_v_c,(rset%pawfgr%nfftc,0:4))
671      ABI_ALLOCATE(entropy_v_2,(1:rset%par%npt,0:4))
672      ABI_ALLOCATE(gran_pot_v_f,(rset%pawfgr%nfft,0:4))
673      ABI_ALLOCATE(gran_pot_v_c,(rset%pawfgr%nfftc,0:4))
674      ABI_ALLOCATE(gran_pot_v_2,(1:rset%par%npt,0:4))
675 
676      entropy_v_c = zero; entropy_v_f = zero; entropy_v_2 = zero
677      gran_pot_v_c = zero; gran_pot_v_f = zero; gran_pot_v_2 = zero
678 
679 
680      do ipoint = 1,rset%par%npt
681        call entropyrec(exp(rset%efermi*ratio2)*aloc_copy(:,ipoint), &
682 &       exp(rset%efermi*ratio1)*b2loc_copy(:,ipoint), &
683 &       nrec,trotter,entropy_v_2(ipoint,0),two,&
684 &       rset%debug,n_pt_integ_entropy,perc_vmin*xmax,&
685 &       entropy_v_2(ipoint,1),&
686 &       entropy_v_2(ipoint,2),&
687 &       entropy_v_2(ipoint,3),&
688 &       entropy_v_2(ipoint,4))
689 
690        call gran_potrec(exp(rset%efermi*ratio2)*aloc_copy(:,ipoint), &
691 &       exp(rset%efermi*ratio1)*b2loc_copy(:,ipoint), &
692 &       nrec,trotter,gran_pot_v_2(ipoint,0),two,&
693 &       rset%debug,n_pt_integ_entropy,perc_vmin*xmax,&
694 &       gran_pot_v_2(ipoint,1),&
695 &       gran_pot_v_2(ipoint,2),&
696 &       gran_pot_v_2(ipoint,3),&
697 &       gran_pot_v_2(ipoint,4))
698      end do
699      ABI_DEALLOCATE(aloc_copy)
700      ABI_DEALLOCATE(b2loc_copy)
701 
702      call timab(613+swt_tm,1,tsec2)  !!--start time-counter: sync gpu-cpu
703      call xmpi_barrier(rset%mpi%comm_bandfft)
704      call timab(613+swt_tm,2,tsec2)  !!--stop time-counter: sync gpu-cpu
705 
706      call timab(604,1,tsec2) !--start time-counter: transgrid
707      if(rset%mpi%nproc==1) then
708        entropy_v_c = entropy_v_2
709        gran_pot_v_c = gran_pot_v_2
710      end if
711      do ii1 = 0,dim_entro
712        call xmpi_allgatherv(entropy_v_2(:,ii1),bufsize(rset%mpi%me),&
713 &       entropy_v_c(:,ii1),bufsize,bufdispl,&
714 &       rset%mpi%comm_bandfft,ierr)
715        call xmpi_allgatherv(gran_pot_v_2(:,ii1),bufsize(rset%mpi%me),&
716 &       gran_pot_v_c(:,ii1),bufsize,bufdispl,&
717 &       rset%mpi%comm_bandfft,ierr)
718 
719        if(maxval(abs(entropy_v_c(:,ii1))) > tol10) then
720          rhogf = zero; rhogc = zero
721          call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,&
722 &         rset%pawfgr,rhogc,rhogf,entropy_v_c(:,ii1),entropy_v_f(:,ii1))
723        end if
724        if(maxval(abs(gran_pot_v_c(:,ii1))) >tol10) then
725          rhogf = zero; rhogc = zero
726          call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,&
727 &         rset%pawfgr,rhogc,rhogf,gran_pot_v_c(:,ii1),gran_pot_v_f(:,ii1))
728        end if
729      end do
730      call timab(604,2,tsec2) !--stop time-counter: transgrid
731 
732      entropy  = sum(entropy_v_f(:,0))
733      gran_pot  = sum(gran_pot_v_f(:,0))
734 
735      if(rset%debug)then
736        entropy1 = sum(entropy_v_f(:,1))
737        entropy2 = sum(entropy_v_f(:,2))
738        entropy3 = sum(entropy_v_f(:,3))
739        entropy4 = sum(entropy_v_f(:,4))
740        gran_pot1 = sum(gran_pot_v_f(:,1))
741        gran_pot2 = sum(gran_pot_v_f(:,2))
742        gran_pot3 = sum(gran_pot_v_f(:,3))
743        gran_pot4 = sum(gran_pot_v_f(:,4))
744      end if
745 
746      ABI_DEALLOCATE(entropy_v_f)
747      ABI_DEALLOCATE(entropy_v_c)
748      ABI_DEALLOCATE(entropy_v_2)
749      ABI_DEALLOCATE(rhogf)
750      ABI_DEALLOCATE(rhogc)
751      ABI_DEALLOCATE(gran_pot_v_f)
752      ABI_DEALLOCATE(gran_pot_v_c)
753      ABI_DEALLOCATE(gran_pot_v_2)
754 
755 
756    else
757 !    --Recgratio=1
758      do ipoint = 1,rset%par%ntranche
759        call entropyrec(exp(rset%efermi*ratio2)*alocal(:,ipoint), &
760 &       exp(rset%efermi*ratio1)*b2local(:,ipoint), &
761 &       nrec,trotter,entropylocal,two,&
762 &       rset%debug,n_pt_integ_entropy,perc_vmin*xmax,&
763 &       entropylocal1,entropylocal2,&
764 &       entropylocal3,entropylocal4)
765        call gran_potrec(exp(rset%efermi*ratio2)*alocal(:,ipoint), &
766 &       exp(rset%efermi*ratio1)*b2local(:,ipoint), &
767 &       nrec,trotter,gran_pot_local,two,& !/ucvol,&
768 &       rset%debug,n_pt_integ_entropy,perc_vmin*xmax,&
769 &       gran_pot_local1,gran_pot_local2,&
770 &       gran_pot_local3,gran_pot_local4)
771 
772        entropy = entropy + entropylocal
773        gran_pot = gran_pot + gran_pot_local
774        if(rset%debug)then
775          entropy1 = entropy1 + entropylocal1
776          entropy2 = entropy2 + entropylocal2
777          entropy3 = entropy3 + entropylocal3
778          entropy4 = entropy4 + entropylocal4
779          gran_pot1 = gran_pot1 + gran_pot_local1
780          gran_pot2 = gran_pot2 + gran_pot_local2
781          gran_pot3 = gran_pot3 + gran_pot_local3
782          gran_pot4 = gran_pot4 + gran_pot_local4
783        end if
784 
785      end do
786 
787      call xmpi_sum(entropy,rset%mpi%comm_bandfft ,ierr)
788      call xmpi_sum(gran_pot,rset%mpi%comm_bandfft ,ierr)
789      if(rset%debug)then
790        call xmpi_sum(entropy1,rset%mpi%comm_bandfft ,ierr)
791        call xmpi_sum(entropy2,rset%mpi%comm_bandfft ,ierr)
792        call xmpi_sum(entropy3,rset%mpi%comm_bandfft ,ierr)
793        call xmpi_sum(entropy4,rset%mpi%comm_bandfft ,ierr)
794        call xmpi_sum(gran_pot1,rset%mpi%comm_bandfft ,ierr)
795        call xmpi_sum(gran_pot2,rset%mpi%comm_bandfft ,ierr)
796        call xmpi_sum(gran_pot3,rset%mpi%comm_bandfft ,ierr)
797        call xmpi_sum(gran_pot4,rset%mpi%comm_bandfft ,ierr)
798      end if
799    end if
800 
801    if(rset%debug)then
802      write(msg,'(2(2a,4(2a,es11.4,a)))')&
803 &     ' --------------------------'        ,ch10, &
804 &     '  entropy, horiz path=',' ',entropy1,ch10, &
805 &     '  entropy, xmax  path=',' ',entropy2,ch10, &
806 &     '  entropy, xmin  path=',' ',entropy3,ch10, &
807 &     '  entropy, zero  path=',' ',entropy4,ch10, &
808 &     ' --------------------------'        ,ch10, &
809 &     ' -omega/T, horiz path=',' ',gran_pot1,ch10, &
810 &     ' -omega/T, xmax  path=',' ',gran_pot2,ch10, &
811 &     ' -omega/T, xmin  path=',' ',gran_pot3,ch10, &
812 &     ' -omega/T, zero  path=',' ',gran_pot4,ch10
813      call wrtout(std_out,msg,'COLL')
814    end if
815 
816    e_eigenvalues=tsmear*(entropy-gran_pot) + rset%efermi*nelect
817 !  --In reality gran_pot is not the gran potential but the
818 !  potential omega=-PV (Landau-potential or grand-potential)
819 !  divided by -T so the internal energy
820 !  U:=e_eigenvalues= TS+omega+muN = ST-T*sum(ln(1-n))+muN =
821 !  T(S-gran_pot)+muN
822 
823 
824    if(rset%nl%nlpsp) then
825      call nlenergyrec(rset,enl,exppot,dtset%ngfft,dtset%natom,&
826 &     dtset%typat,tsmear,trotter,tolrec)
827    end if
828  end if noentropie
829 !##### END ENTROPY AND GRAN POTENTIAL COMPUTATION  ##################
830 !#################################################################
831 
832 !if(associated(projec))
833  if(rset%nl%nlpsp)  then
834    ABI_DEALLOCATE(projec)
835  end if
836  if(associated(gcart_loc))  then
837    ABI_DEALLOCATE(gcart_loc)
838  end if
839  if((dtset%recgratio/=1 .or. rset%load==1))  then
840    ABI_DEALLOCATE(bufdispl)
841    ABI_DEALLOCATE(bufsize)
842  end if
843 !------------------------------------------------------------------
844 !--Check if the convergence is reached for rho
845  drho = maxval(abs(rhor(min_pt:max_pt,1)-rholocal(:)))
846  drhomax = drho
847  call xmpi_max(drho,drhomax,rset%mpi%comm_bandfft,ierr)
848 
849 !write(std_out,*)'drhomax,toldrho',drhomax,toldrho
850  if(drhomax<toldrho)then
851    rset%quitrec = rset%quitrec+1
852  else
853    rset%quitrec = 0
854  end if
855 
856 !-------------------------------------------------------------------
857 !--Density on all procs
858  rhor(min_pt:max_pt,1) = rholocal(:)
859  if(rset%mpi%nproc /= 1)then
860    call xmpi_allgatherv(rholocal,rset%par%ntranche,rhor(:,1),&
861 &   rset%par%vcount,rset%par%displs,&
862 &   rset%mpi%comm_band,ierr)
863  end if
864 
865 !--------------------------------------------------------------------
866 !--2nd EKIN CALCULATION: this method is used
867  noekin2 : if(get_K_S_G==1)then
868    intrhov = (inf_ucvol)*sum(rholocal*vtrial(min_pt:max_pt,1))
869    call xmpi_sum(intrhov,rset%mpi%comm_bandfft ,ierr)
870 
871    ek = e_eigenvalues-intrhov-enl
872 
873 
874    if(rset%debug) then
875      write (msg,'(2a,3f15.10,2a,3f15.10,2a,f15.10,a)') ch10,&
876 &     ' ek,int(rho*V),ek+int(rho*V) ', ek, intrhov,  ek+ intrhov,ch10, &
877 &     ' kT*S, kT*sum(ln(...)), diff ', tsmear*entropy, tsmear*gran_pot, tsmear*(entropy-gran_pot),ch10, &
878 &     ' kT(S-sum(ln(...)))+mu*nelect', tsmear*(entropy-gran_pot)+rset%efermi*nelect,ch10
879      call wrtout(std_out,msg,'COLL')
880    end if
881 
882 
883  end if noekin2
884 !--------------------------------------------------------------------
885  fermie = rset%efermi
886 
887 !--------------------------------------------------------
888 !!--At the first step to find the max number of recursion
889 !!  needed to convergence, then redefine nrec.
890  if(initialized==0 .and. dtset%ntime>0) then
891    call  Calcnrec(rset,b2local)
892  end if
893 
894 !--------------------------------------------------------
895  call destroy_distribfft(rset%mpi%distribfft)
896  call init_distribfft(rset%mpi%distribfft,'c',rset%mpi%nproc_fft,rset%ngfftrec(2),rset%ngfftrec(3))
897  call init_distribfft(rset%mpi%distribfft,'f',rset%mpi%nproc_fft,dtset%ngfft(2),dtset%ngfft(3))
898 
899 !--Printing results
900  write(msg,'(3a,f15.10)')&
901 & ' -- Results: --------------------------------------',ch10,&
902 & ' mu          =',rset%efermi
903  call wrtout(std_out,msg,'COLL')
904  if(get_K_S_G==1)then
905    write(msg,'(a,f15.10,6(2a,f15.10))')&
906 &   ' potmin      =',potmin,ch10,&
907 &   ' <V_eff>     =',intrhov,ch10,&
908 &   ' entropy     =',entropy,ch10,&
909 &   ' -omega/T    =',gran_pot,ch10,&
910 &   ' eigenvalues =',e_eigenvalues,ch10,&
911 &   ' kinetic     =',ek,ch10,&
912 &   ' non-loc ene =',enl
913    call wrtout(std_out,msg,'COLL')
914  end if
915  write(msg,'(a,50a)')' ',('-',ii=1,50)
916  call wrtout(std_out,msg,'COLL')
917 !write(std_out,*)'is the pressure ',gran_pot*tsmear/(rset%inf%ucvol*real(nfftrec,dp))
918 
919 !--Structured debugging : if rset%debug=T, stop here.
920  if(.false.)then !(rset%debug)
921    call wrtout(std_out,'  rhor ','PERS')
922    write(std_out,*)rhor(:,1)
923    call wrtout(std_out,' ','COLL')
924    write(msg,'(a,2d10.3)')'  temps recursion    ',tsec
925    call wrtout(std_out,msg,'COLL')
926    write(msg,'(a,l1,a)') ' vtorhorec : rset%debug=-',rset%debug,', debugging mode => stop '
927    MSG_ERROR(msg)
928  end if
929 
930  call timab(600,2,tsec2)
931  call timab(21,2,tsec)
932 
933  call symrhg(1,gprimd,irrzon,rset%mpi,nfftf,&
934 & dtset%ngfft(1)*dtset%ngfft(2)*dtset%ngfft(3),dtset%ngfft,dtset%nspden,&
935 & dtset%nsppol,dtset%nsym,dtset%paral_kgb,&
936 & phnons,rhog,rhor,rprimd,dtset%symafm,dtset%symrel)
937 
938 end subroutine vtorhorec