TABLE OF CONTENTS


ABINIT/getngrec [ Functions ]

[ Top ] [ Functions ]

NAME

 getngrec

FUNCTION

 This routine computes the fft box for the recursion method, accordingly to the troncation radius.
 It is quite similar to getng, but :
     - there is no xboxcut and ecut consistency
     - ngfft (the initial fft box) is the maximum fft box

INPUTS

  ngfft(18)=non truncated fft box
  mgfft=maximum of ngfft(1:3)
  inf_rmet=define the infinitesimal metric : rprimd*(transpose(rprimd)), divided by the number of discretisation point
  recrcut=truncating
  delta=to obtain radius of truncation

OUTPUT

  ngfftrec=truncated fft box
  nfftrec= truncated nfft
  tronc=True if truncation is made

SIDE EFFECTS

PARENTS

      m_rec

CHILDREN

      sort_int,timab

SOURCE

 986 subroutine getngrec(ngfft,rmet,ngfftrec,nfftrec,recrcut,delta,tronc)
 987 
 988 
 989 !This section has been created automatically by the script Abilint (TD).
 990 !Do not modify the following lines by hand.
 991 #undef ABI_FUNC
 992 #define ABI_FUNC 'getngrec'
 993 !End of the abilint section
 994 
 995 implicit none
 996 
 997 !Arguments -------------------------------
 998 !scalars
 999 real(dp),intent(in) :: recrcut,delta
1000 integer,intent(out) :: nfftrec
1001 logical,intent(out) :: tronc
1002 !arrays
1003 integer,intent(in)  :: ngfft(18)
1004 real(dp),intent(in) :: rmet(3,3)
1005 integer,intent(out) :: ngfftrec(18)
1006 
1007 !Local variables-------------------------------
1008 !scalars
1009 integer :: ii,iimin,index,jj,jjmin,kk,kkmin,largest_ngfftrec,maxpow11,maxpow2
1010 integer :: maxpow3,maxpow5,maxpow7,mmsrch,plane
1011 real(dp) :: dsm,dsp,dsqmin,rtroncat
1012 !arrays
1013 integer :: get_ngfftrec(3),maxsrch(3),minsrch(3)
1014 integer,allocatable :: iperm(:),srch(:)
1015 real(dp) :: tsec(2)
1016 real(dp) :: inf_rmet(3,3)
1017 ! *************************************************************************
1018 
1019  call timab(602,1,tsec)
1020 
1021  ngfftrec(:) = ngfft(:)
1022 
1023  if (recrcut>tol14) then  !default value dtset%recrcut = zero means no troncation
1024    rtroncat = recrcut+delta
1025    get_ngfftrec(:)=1
1026    plane = 1
1027 
1028    do ii=1,3
1029      inf_rmet(ii,:) = rmet(ii,:)/(real(ngfft(1:3)*ngfft(ii),dp))
1030    end do
1031 
1032 
1033 !  minimum value of ngfftrec
1034    do ii = 1,3
1035      ngfftrec(ii)=floor(2*rtroncat/sqrt(inf_rmet(ii,ii)))+1  !minimum value
1036      if(ngfftrec(ii)>=ngfft(ii))then
1037        ngfftrec(ii)=ngfft(ii)
1038        get_ngfftrec(ii)=0
1039      end if
1040    end do
1041 
1042 
1043    if(sum(get_ngfftrec)/=0)then
1044      largest_ngfftrec=maxval(ngfft(1:3))
1045      maxpow2=int(log(largest_ngfftrec+0.5d0)/log(two))
1046      maxpow3=int(log(largest_ngfftrec+0.5d0)/log(three))
1047      maxpow5=int(log(largest_ngfftrec+0.5d0)/log(five))
1048      maxpow7=0
1049      maxpow11=0
1050      mmsrch=(maxpow2+1)*(maxpow3+1)*(maxpow5+1)*(maxpow7+1)*(maxpow11+1)
1051      ABI_ALLOCATE(srch,(mmsrch))
1052      ABI_ALLOCATE(iperm,(mmsrch))
1053 !    Factors of 2
1054      srch(1)=1
1055      do ii=1,maxpow2
1056        srch(ii+1)=srch(ii)*2
1057      end do
1058 !    Factors of 3
1059      index=maxpow2+1
1060      if(maxpow3>0)then
1061        do ii=1,maxpow3
1062          srch(1+ii*index:(ii+1)*index)=3*srch(1+(ii-1)*index:ii*index)
1063        end do
1064      end if
1065 !    Factors of 5
1066      index=(maxpow3+1)*index
1067      if(maxpow5>0)then
1068        do ii=1,maxpow5
1069          srch(1+ii*index:(ii+1)*index)=5*srch(1+(ii-1)*index:ii*index)
1070        end do
1071      end if
1072 !    Factors of 7
1073      index=(maxpow5+1)*index
1074      if(maxpow7>0)then
1075        do ii=1,maxpow7
1076          srch(1+ii*index:(ii+1)*index)=7*srch(1+(ii-1)*index:ii*index)
1077        end do
1078      end if
1079 !    Factors of 11
1080      index=(maxpow7+1)*index
1081      if(maxpow11>0)then
1082        do ii=1,maxpow11
1083          srch(1+ii*index:(ii+1)*index)=11*srch(1+(ii-1)*index:ii*index)
1084        end do
1085      end if
1086 !    srch is the set of allowed ngfftrec values
1087 
1088      call sort_int(mmsrch,srch,iperm)
1089      ABI_DEALLOCATE(iperm)
1090 
1091      do ii=1,3
1092        if(get_ngfftrec(ii)==1)then
1093          do jj=1,mmsrch
1094            if(srch(jj)>=ngfftrec(ii))then
1095              minsrch(ii)=jj
1096              ngfftrec(ii)=srch(jj)
1097              exit
1098            end if
1099          end do
1100          do jj=minsrch(ii),mmsrch
1101            if(srch(jj)>ngfft(ii))then
1102 !            since ngfftrec(ii)<ngfft(ii) for get_ngfftrec(ii)==1,
1103 !            and srch(mmsrch)maxval(ngfft(1:3)),
1104 !            that will appens in the range minsrch(ii),mmsrch
1105              maxsrch(ii)=jj-1
1106              exit
1107            end if
1108          end do
1109        end if
1110 !      since ngfft(ii) is in srch, we have here srch(maxsrch(ii))=ngfft(ii)
1111 !      minsrch(ii), maxsrch(ii) is the range of index of srch in which we can
1112 !      search ngfftrec(ii)
1113 
1114        if(ngfftrec(ii)>=ngfft(ii))then
1115          ngfftrec(ii)=ngfft(ii)
1116          get_ngfftrec(ii)=0
1117        end if
1118      end do
1119    end if
1120 
1121 !  verify that the entiere truncation sphere is in the fft box ;
1122 !  but only in the dimension in which we do not consider the entiere fft box
1123    do while(sum(get_ngfftrec)/=0)  !again...
1124 
1125 !    determining the minimum distance between 0 and the boundary
1126 !    of the fft box
1127 !    quite similar to the subroutine "bound", but only over the plane which
1128 !    are not the whole fft box
1129      dsqmin=dsq_rec(ngfftrec(1)/2,-ngfftrec(2)/2,-ngfftrec(3)/2,inf_rmet)+0.01d0
1130 
1131      if(get_ngfftrec(1)/=0)then
1132 !      look at +/- g1 planes:
1133        do jj=-ngfftrec(2)/2,ngfftrec(2)/2
1134          do kk=-ngfftrec(3)/2,ngfftrec(3)/2
1135            dsp = dsq_rec(ngfftrec(1)/2, jj, kk,inf_rmet)
1136            dsm = dsq_rec( - ngfftrec(1)/2, jj, kk,inf_rmet)
1137            if (dsp<dsqmin) then
1138              dsqmin = dsp
1139              iimin = ngfftrec(1)/2
1140              jjmin = jj
1141              kkmin = kk
1142              plane=1
1143            end if
1144            if (dsm<dsqmin) then
1145              dsqmin = dsm
1146              iimin =  - ngfftrec(1)/2
1147              jjmin = jj
1148              kkmin = kk
1149              plane=1
1150            end if
1151          end do
1152        end do
1153      end if
1154 
1155      if(get_ngfftrec(2)/=0)then
1156 !      +/- g2 planes:
1157        do ii=-ngfftrec(1)/2,ngfftrec(1)/2
1158          do kk=-ngfftrec(3)/2,ngfftrec(3)/2
1159            dsp = dsq_rec(ii,ngfftrec(2)/2,kk,inf_rmet)
1160            dsm = dsq_rec(ii,-ngfftrec(2)/2,kk,inf_rmet)
1161            if (dsp<dsqmin) then
1162              dsqmin = dsp
1163              iimin = ii
1164              jjmin = ngfftrec(2)/2
1165              kkmin = kk
1166              plane=2
1167            end if
1168            if (dsm<dsqmin) then
1169              dsqmin = dsm
1170              iimin = ii
1171              jjmin =  - ngfftrec(2)/2
1172              kkmin = kk
1173              plane=2
1174            end if
1175          end do
1176        end do
1177      end if
1178 
1179      if(get_ngfftrec(3)/=0)then
1180 !      +/- g3 planes:
1181        do ii=-ngfftrec(1)/2,ngfftrec(1)/2
1182          do jj=-ngfftrec(2)/2,ngfftrec(2)/2
1183            dsp = dsq_rec(ii,jj,ngfftrec(3)/2,inf_rmet)
1184            dsm = dsq_rec(ii,jj,-ngfftrec(3)/2,inf_rmet)
1185            if (dsp<dsqmin) then
1186              dsqmin = dsp
1187              iimin = ii
1188              jjmin = jj
1189              kkmin = ngfftrec(3)/2
1190              plane=3
1191            end if
1192            if (dsm<dsqmin) then
1193              dsqmin = dsm
1194              iimin = ii
1195              jjmin = jj
1196              kkmin =  - ngfftrec(3)/2
1197              plane=3
1198            end if
1199          end do
1200        end do
1201      end if
1202 
1203      if(dsqmin>=rtroncat)then
1204        get_ngfftrec=0
1205        exit
1206      end if
1207 
1208 !    Fix nearest boundary
1209      do ii=minsrch(plane),maxsrch(plane)
1210        if (srch(ii)>=ngfftrec(plane)) then
1211 !        redefine ngfft(plane) to next higher choice
1212          ngfftrec(plane)=srch(ii+1)
1213 !        verify if we cover the whole box
1214          if(ngfftrec(plane)>=ngfft(plane))then
1215            ngfftrec(plane)=ngfft(plane)
1216            get_ngfftrec(plane)=0
1217          end if
1218 !        Exit the loop over ii
1219          exit
1220        end if
1221      end do
1222 
1223    end do
1224 
1225    if (allocated(srch)) then
1226      ABI_DEALLOCATE(srch)
1227    end if
1228 
1229 !  if(mod(ngfftrec(1),16)/=0) then
1230 !  ngfftrec(1) = ngfftrec(1)+(16-mod(ngfftrec(1),16))
1231 !  ngfftrec(2:3) = ngfftrec(1)
1232 !  endif
1233 
1234    ngfftrec(4)=2*(ngfftrec(1)/2)+1
1235    ngfftrec(5)=2*(ngfftrec(2)/2)+1
1236    ngfftrec(6)=ngfftrec(3)
1237 
1238 !  --algorithm
1239    ngfftrec(7)=ngfft(7)   ! to be improved for a better non-parallel algorithm - here it is automatically 401
1240    ngfftrec(8)=ngfft(8)
1241 
1242  end if
1243 
1244 !--For now, recursion method doesn't use paralelism on FFT - which would require a great number of processors
1245  nfftrec = product(ngfftrec(1:3))
1246  ngfftrec(9:11) = (/0,1,0/)   !--(/ paral, nproc, %me \)
1247  ngfftrec(12:13) = ngfftrec(2:3)   ! n2proc ! n3proc
1248 
1249  tronc  = all(ngfftrec(:3)/=ngfft(:3))
1250  call timab(602,2,tsec)
1251 
1252  contains
1253 
1254    function dsq_rec(ii,jj,kk,inf_rmet)
1255 
1256 
1257 !This section has been created automatically by the script Abilint (TD).
1258 !Do not modify the following lines by hand.
1259 #undef ABI_FUNC
1260 #define ABI_FUNC 'dsq_rec'
1261 !End of the abilint section
1262 
1263    real(dp) :: dsq_rec
1264    integer,intent(in) :: ii,jj,kk
1265    real(dp),intent(in) :: inf_rmet(3,3)
1266    dsq_rec=sqrt(&
1267 &   inf_rmet(1,1)*dble(ii**2)&
1268 &   +inf_rmet(2,2)*dble(jj**2)&
1269 &   +inf_rmet(3,3)*dble(kk**2)&
1270 &   +two*(inf_rmet(1,2)*dble(ii*jj)&
1271 &   +inf_rmet(2,3)*dble(jj*kk)&
1272 &   +inf_rmet(3,1)*dble(kk*ii)))
1273  end function dsq_rec
1274 
1275 
1276 end subroutine getngrec

ABINIT/m_rec [ Modules ]

[ Top ] [ Modules ]

NAME

  m_rec

FUNCTION

  This module  provides some functions applied to the
  recursion structured datatype recursion_type.
  It includes also some function used to change some variables
  of recursion_type

COPYRIGHT

 Copyright (C) 2002-2018 ABINIT group (MMancini)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt.

NOTES

 * Routines tagged with "@type_name" are strongly connected to the definition of the data type.
   Strongly connected means that the proper functioning of the implementation relies on the
   assumption that the tagged procedure is consistent with the type declaration.
   Every time a developer changes the structure "type_name" adding new entries, he/she has to make sure
   that all the strongly connected routines are changed accordingly to accomodate the modification of the data type.
   Typical examples of strongly connected routines are creation, destruction or reset methods.

PARENTS

CHILDREN

SOURCE

35 #if defined HAVE_CONFIG_H
36 #include "config.h"
37 #endif
38 
39 #include "abi_common.h"
40 
41 module m_rec
42 
43  use defs_basis
44  use defs_datatypes
45  use defs_abitypes
46  use defs_rectypes
47  use m_abicore
48  use m_errors
49  use m_xmpi
50  use m_sort
51 
52  use m_exp_mat,         only : exp_mat
53  use m_numeric_tools,   only : set2unit
54  use m_special_funcs,   only : gamma_function
55  use m_pawfgr,          only : pawfgr_nullify, indgrid, pawfgr_destroy
56  use m_paw_sphharm,     only : initylmr
57  use m_time,            only : timab
58  use m_rec_tools,       only : get_pt0_pt1
59  use m_per_cond,        only : per_cond
60 #ifdef HAVE_GPU_CUDA
61  use m_hidecudarec,     only : InitRecGPU, CleanRecGPU
62 #endif
63 
64 
65  implicit none
66 
67  private ::           &
68    find_maxmin_proc,  &  !--To calculate max and min pt for any cpu
69    H_D_distrib
70 
71  public ::            &
72    InitRec,           &  !--Main creation method.
73    Init_MetricRec,    &  !--To Initalize the inf. metric in recursion
74    Init_nlpspRec,     &  !--Main creation method for non-local part.
75    CleanRec,          &  !--deallocate all pointers.
76    Calcnrec,          &  !--calculates the new min_nrec
77    cpu_distribution      !--Regulates the work load on cpu-gpu
78 CONTAINS  !===========================================================

ABINIT/pspnl_hgh_rec [ Functions ]

[ Top ] [ Functions ]

NAME

 pspnl_hgh_rec

FUNCTION

 This routine computes the matrices S_kk'^{l,A} of the projectors
 (it is the exponential of the overlap matrix). It coorresponds to the matrix:
   $$\left[(U_l)^{-1}*Exp(-temperature D_l )*U_l* (g_l)^{-1} -Identity\right]_kk'
   where (U_l)^-1* D_l* U_l = h^lg_l.
 $g_l = <f^l_k|f^l_{k'}>$ is the overlap matrix between projectors
 and $h^l_{kk'}$ is the strength matrix of the projectors.
 It calulates also the strength eigenvalues and eigenvectors of $h$,
 used in the calculus of non-local energy

INPUTS

  temperature=4*rtrotter/beta=4*rtrotter*tsmear: the effective temp. in  recursion
  psps <type(pseudopotential_type)>=variables related to pseudo-potentials
  debug=debug variable

OUTPUT

  nlrec%mat_exp_psp_nl=the matrix of the exponential of the projectors:
   for any psp, for any angular moment:
   h_ij=strength; g_ij=ovelap => exp(-h.g/temp/4p).g^(-1)
  nlrec%radii=Local radii of nl psp
  nlrec%pspinfo(:,:) for any typat:  (momang,typat)=number of projectors
  nlrec%eival(:,:,:) for any psp, any momang, the eigenvalues of the
    strength matrix H: eival(:,mang,psp)
  nlrec%eivec(:,:,:,:)for any psp, any momang, the eigenvectors of the
    strength matrix H: eivec(:,:,mang,psp)

SIDE EFFECTS

PARENTS

      m_rec

CHILDREN

      dgetrf,dgetri,dsyev,exp_mat,gamma_function,set2unit,wrtout

SOURCE

1557 subroutine pspnl_hgh_rec(psps,temperature,nlrec,debug)
1558 
1559  use m_linalg_interfaces
1560 
1561 !This section has been created automatically by the script Abilint (TD).
1562 !Do not modify the following lines by hand.
1563 #undef ABI_FUNC
1564 #define ABI_FUNC 'pspnl_hgh_rec'
1565 !End of the abilint section
1566 
1567  implicit none
1568 
1569 !Arguments -----------------------------------
1570 !scalars
1571  real(dp),intent(in) :: temperature
1572  logical,intent(in) :: debug
1573  type(pseudopotential_type),intent(in) :: psps
1574  type(nlpsprec_type),intent(inout) :: nlrec
1575 !arrays
1576 !Local variables-------------------------------
1577 !scalars
1578  integer,parameter :: maxsize=3
1579  integer,parameter :: lwork=(1+32)*maxsize
1580  integer :: iangol,ipseudo,info,nproj
1581  integer :: g_mat_size,ii,nproj2
1582  real(dp) :: denom_1,denom_2,tot_proj
1583  character(len=500) :: msg
1584 !arrays
1585  integer :: ipvt(1)
1586  !real(dp) :: rwork(2*maxsize)
1587  real(dp) :: h_mat_init(3,3), rework(lwork)
1588  real(dp), allocatable :: g_mat(:,:),h_mat(:,:),eig_val_h(:)
1589  real(dp), allocatable :: identity(:,:),inv_g_mat(:,:),u_mat(:,:)
1590 
1591  complex(dpc),allocatable :: hg_mat(:,:)
1592 ! *************************************************************************
1593 
1594  if(debug)then
1595    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_hgh_rec : enter '
1596    call wrtout(std_out,msg,'COLL')
1597  end if
1598 
1599 !--For any pseudo potential:
1600  do ipseudo = 1, psps%npsp !--Loop on the pseudos
1601    write(msg,'(a,80a)')' pseudo file',('-',ii=1,10)
1602    call wrtout(std_out,msg,'COLL')
1603    write(msg,'(a)') psps%filpsp(ipseudo)
1604    call wrtout(std_out,msg,'COLL')
1605 
1606 !  --For any angular moment:
1607    do iangol = 0,psps%mpsang-1 !--Loop on the angular moment
1608 
1609 !    --Local radius
1610      nlrec%radii(iangol+1,ipseudo) = psps%gth_params%psppar(iangol+1,0,ipseudo)
1611 
1612 !    --Strenghts of non-local projectors  (matrix h)
1613 !    --Diagonal part:
1614      h_mat_init = zero
1615      h_mat_init(1,1) = psps%gth_params%psppar(iangol+1,1,ipseudo)
1616      h_mat_init(2,2) = psps%gth_params%psppar(iangol+1,2,ipseudo)
1617      h_mat_init(3,3) = psps%gth_params%psppar(iangol+1,3,ipseudo)
1618 !    --Out-diagonal part
1619 !    --Depending on angular moment the projectors
1620 !    strength is calculated differently
1621      select case(iangol)
1622      case(0)
1623        h_mat_init(1,2) = -half*sqrt(3.d0/5.d0)*h_mat_init(2,2)
1624        h_mat_init(1,3) =  half*sqrt(5.d0/21.d0)*h_mat_init(3,3)
1625        h_mat_init(2,3) = -half*sqrt(100.d0/63.d0)*h_mat_init(3,3)
1626      case(1)
1627        h_mat_init(1,2) = -half*sqrt(5.d0/7.d0)*h_mat_init(2,2)
1628        h_mat_init(1,3) =  sixth*sqrt(35.d0/11.d0)*h_mat_init(3,3)
1629        h_mat_init(2,3) = -14.d0/six/sqrt(11.d0) *h_mat_init(3,3)
1630      case(2)
1631        h_mat_init(1,2) = -half*sqrt(7.d0/9.d0)*h_mat_init(2,2)
1632        h_mat_init(1,3) =  half*sqrt(63.d0/143.d0)*h_mat_init(3,3)
1633        h_mat_init(2,3) = -nine/sqrt(143.d0)*h_mat_init(3,3)
1634      case(3)
1635        h_mat_init(1,2) = zero;  h_mat_init(1,3) = zero;  h_mat_init(2,3) = zero;
1636      case default
1637        write(msg,'(a)')' error angular: moment component'
1638        call wrtout(std_out,msg,'COLL')
1639      end select
1640 
1641 
1642 
1643 !    --Real dimensions of projectors.
1644      g_mat_size = count(abs((/ (h_mat_init(ii,ii),ii=1,3) /))>1.d-8)
1645      nlrec%pspinfo(iangol+1,ipseudo) = g_mat_size
1646      write(msg,'(a,i2,a,i2)')' ang. moment=',iangol,', N projectors=',g_mat_size
1647      call wrtout(std_out,msg,'COLL')
1648      if (g_mat_size>0) then
1649 !      --Identity matrix
1650        ABI_ALLOCATE(identity,(g_mat_size,g_mat_size))
1651        call set2unit(identity)
1652 !      identity = zero
1653 !      identity(:,1) = one
1654 !      identity(:,:) = cshift(array=identity,shift=(/ (-ii,ii=0,g_mat_size) /), dim=2 )
1655 
1656 
1657 !      ############## CALCULOUS OF THE EIGEN_SPACE OF THE PROJECTORS STRENGTHS ##################
1658 !      --Inverse of the matrix h
1659        ABI_ALLOCATE(eig_val_h,(g_mat_size))
1660        ABI_ALLOCATE(u_mat,(g_mat_size,g_mat_size))
1661 !      --u-mat will contain the eigenvectors of h_mat_init
1662        u_mat = h_mat_init(:g_mat_size,:g_mat_size)
1663 
1664 !      write(std_out,*)'hmat_init'
1665 !      do ii=1,g_mat_size
1666 !      write(std_out,*)h_mat_init(ii,:)
1667 !      end do
1668        call DSYEV('v','u',g_mat_size,u_mat,g_mat_size,eig_val_h,rework,lwork,info)
1669 
1670 !      --THE DIAGONAL MATRIX IS GIVEN BY D=U^t.H.U
1671 !      (eival=transpose(eivec).h_mat_init.eivec)
1672        write(msg,'(a,3d10.3)')'  eigenvalues=',eig_val_h
1673        call wrtout(std_out,msg,'COLL')
1674 !      write(std_out,*)'autovec';write(std_out,*)u_mat
1675 
1676        nlrec%eival(:g_mat_size,1+iangol,ipseudo) = eig_val_h
1677        nlrec%eivec(:g_mat_size,:g_mat_size,1+iangol,ipseudo) = u_mat
1678        ABI_DEALLOCATE(eig_val_h)
1679        ABI_DEALLOCATE(u_mat)
1680 
1681 !      ##########END  CALCULOUS OF THE EIGEN_SPACE OF THE PROJECTORS STRENGTHS ##################
1682 
1683        ABI_ALLOCATE(g_mat,(g_mat_size,g_mat_size))
1684        ABI_ALLOCATE(inv_g_mat,(g_mat_size,g_mat_size))
1685        ABI_ALLOCATE(h_mat,(g_mat_size,g_mat_size))
1686        ABI_ALLOCATE(hg_mat,(g_mat_size,g_mat_size))
1687 
1688        g_mat(:,:) = one
1689        h_mat(:,:) = zero
1690        h_mat(:,:) = h_mat_init(:g_mat_size,:g_mat_size)
1691 
1692 !      -------------------------------------------------------
1693 !      --Matrix  of the overlap between projetors (matrix g)
1694 !      and the h matrix of strength
1695        do nproj = 1,g_mat_size-1
1696          do nproj2 = 1+nproj,g_mat_size
1697            tot_proj = zero
1698 !          --Analytic value of overlap
1699 !          g_ij=Gamma[-1/2+i+j+l]/Sqrt(Gamma[-1/2+i+iangol]*Gamma[-1/2+j+iangol])
1700            call gamma_function(-half+real(nproj+nproj2+iangol,dp),tot_proj)
1701            call gamma_function(-half+real(iangol+2*nproj,dp),denom_1)
1702            call gamma_function(-half+real(iangol+2*nproj2,dp),denom_2)
1703 
1704            g_mat(nproj,nproj2) = tot_proj/sqrt(denom_1*denom_2)
1705            g_mat(nproj2,nproj) = g_mat(nproj,nproj2)
1706 
1707            h_mat(nproj,nproj2) = h_mat_init(nproj,nproj2)
1708            h_mat(nproj2,nproj) = h_mat_init(nproj,nproj2)
1709          end do
1710        end do
1711 
1712 !      --Inverse of the overlap matrix g
1713        inv_g_mat = g_mat
1714        call DGETRF(g_mat_size,g_mat_size,inv_g_mat,g_mat_size,ipvt,info)
1715        call DGETRI(g_mat_size,inv_g_mat,g_mat_size,ipvt,rework,lwork,info)
1716 
1717 
1718 !      -----------------------------------------------------------
1719 !      --Now it calculates the exponential of the matrix h.g
1720        hg_mat = matmul(h_mat,g_mat)
1721 
1722        call exp_mat(hg_mat,g_mat_size,-one/temperature)
1723 
1724 !      --(exp(h.g)-Identity).(g^-1)
1725        hg_mat = hg_mat-identity(:,:)
1726 
1727 
1728 !      --results on output
1729        nlrec%mat_exp_psp_nl(:g_mat_size,:g_mat_size,1+iangol,ipseudo) = matmul(real(hg_mat,dp),inv_g_mat)
1730 
1731 !      write(std_out,*) nlrec%mat_exp_psp_nl(:g_mat_size,:g_mat_size,1+iangol,ipseudo)
1732 
1733        ABI_DEALLOCATE(g_mat)
1734        ABI_DEALLOCATE(hg_mat)
1735        ABI_DEALLOCATE(h_mat)
1736        ABI_DEALLOCATE(inv_g_mat)
1737        ABI_DEALLOCATE(identity)
1738      end if
1739 
1740    end do !enddo on angular moment
1741  end do !enddo on pseudos
1742 
1743 
1744  if(debug)then
1745    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_hgh_rec : exit '
1746    call wrtout(std_out,msg,'COLL')
1747  end if
1748 
1749 end subroutine pspnl_hgh_rec

ABINIT/pspnl_operat_rec [ Functions ]

[ Top ] [ Functions ]

NAME

 pspnl_operat_rec

FUNCTION

 It calculates the non-local projectors used in recursion for any psp non-local:
 The nl interaction in recursion is $$exp{-V_{NL}/beta}=\sum_A\sum_{lm}
 \sum{ij}Y_{lm}(\hat{r-R_A}')f^l_i(r-R_A)D^l_{i,j}Y_{lm}(\hat{r-R_A})f^l_j{r-R_A}$$
 where $D^_{i,j}$ is a matrix  previously (see pspnl_operat_rec).
 In this routine  the projectors $Y_{lm}(\hat{r-R_A}')f^l_i(r-R_A)$
 are calculated. So an array of dimensions
 rset%nl%projec(nfftrec,lmnmax,nlrec%npsp)

INPUTS

 metrec<metricrec_type>=contains information concerning metric in
         recursion: grid_step, metric, infinitesimal volume
 ngfftrec(18)=is the ngfft grid (truncated if different from ngfft) of recursion
 debug=debug variable

OUTPUT

 nlrec<nlpsprec_type>%projec= array containig the projectors on the real grid
 nlrec<nlpsprec_type>%intlen= integer linear size of the non-local grid

SIDE EFFECTS

 nlrec<nlpsprec_type> data set of non-local pseudo for recursion
 The better Interaction length (Intlen) is also calculated and printed but
 recursion use intlen=ngfftrec/2

NOTES

PARENTS

      m_rec

CHILDREN

      gamma_function,initylmr,wrtout

SOURCE

1319 subroutine pspnl_operat_rec(nlrec,metrec,ngfftrec,debug)
1320 
1321 
1322 !This section has been created automatically by the script Abilint (TD).
1323 !Do not modify the following lines by hand.
1324 #undef ABI_FUNC
1325 #define ABI_FUNC 'pspnl_operat_rec'
1326 !End of the abilint section
1327 
1328  implicit none
1329 
1330 !Arguments ------------------------------------
1331 !scalars
1332  logical,intent(in) :: debug
1333  type(metricrec_type),intent(in) ::metrec
1334  type(nlpsprec_type),intent(inout) :: nlrec
1335 !arrays
1336  integer,intent(in) :: ngfftrec(18)
1337 !Local variables-------------------------------
1338  integer :: ii,intlen
1339  integer :: iangol,ipsp,iproj
1340  integer :: mpsang,jj,kk,rr
1341  integer :: nfftrec
1342  integer :: ilmn,il,ilm,in,lmnmax
1343  real(dp) :: raggio,rloc,denom,step
1344  real(dp) :: delta_out,partial,err
1345  character(len=500) :: msg
1346  real(dp) :: part_sum(3)
1347  real(dp) :: ylmr_gr_dm(0,0,0)
1348  real(dp),allocatable :: ylmr(:,:),proj_arr(:,:,:)
1349  real(dp),allocatable :: radloc(:,:),nrm(:)
1350 
1351 ! *************************************************************************
1352 
1353  if(debug)then
1354    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_operat_rec : enter '
1355    call wrtout(std_out,msg,'PERS')
1356  end if
1357 
1358 !#####################################################################
1359 !--CALCULATE THE (SEMI-)MAXIMUM INTERVAL WHERE ALL THE PROJECTORS ARE
1360 !DIFFERENT TO ZERO.
1361 !--For any pseudo potential:
1362  delta_out = zero
1363  step = metrec%tr(1)*half !--Scanning step= grid step/2
1364 
1365 
1366  do ipsp = 1, nlrec%npsp !--Loop on the pseudos
1367 
1368 !  --For any angular moment:
1369    do iangol = 0,maxval(nlrec%indlmn(1,:,ipsp)) !--Loop on the angular moment
1370      rloc = nlrec%radii(iangol+1,ipsp) !--Local radius
1371 
1372 !    --For any projector
1373      do iproj = 1,nlrec%pspinfo(iangol+1,ipsp)
1374 !      --Starting point to searching when the projector goes to zero.
1375 !      this correspond to twice the radius wher the projector has its maximum
1376        raggio = two*sqrt(real(-2+2*iproj+iangol,dp))*rloc
1377 !      --Caculate the gamma func at the denominator
1378        call gamma_function(real(iangol+2*iproj,dp)-half,denom)
1379 !      --Find the zero
1380 !      --The following while cycle should be replaced by a bisection
1381 !      --method. Bucause this is calculated only 1 time it is not very
1382 !      important.
1383        err = one
1384        ii=0
1385 !      tolloc = 1.d0*abs(minval(nlrec%mat_exp_psp_nl(:nlrec%pspinfo(iangol+1,ipsp),:nlrec%pspinfo(iangol+1,ipsp),iangol+1,ipsp)))
1386        do while(abs(err)>1.d-2)
1387          raggio = raggio + step
1388          err = project_prec(raggio,iproj,iangol,rloc)/sqrt(denom)
1389          ii = ii+1
1390        end do
1391        write(std_out,*)'local delta',raggio,ii
1392        delta_out=maxval((/ delta_out,raggio /))
1393      end do !end loop on projectors
1394 
1395    end do !enddo on angular moment
1396  end do !enddo on pseudos
1397 
1398 !--CALCULATE how many grid steps correspond to delta_out
1399  intlen = int(delta_out/metrec%tr(1))
1400 !--I want that intlen is odd
1401  if(mod(intlen,2)==0) intlen = intlen+1
1402 
1403  write(msg,'(2a,i3,a)') ch10,' Interac. length of non-local psp(grid steps)=',intlen,ch10
1404  call wrtout(std_out,msg,'COLL')
1405 !#####################################################################
1406 
1407 !--Initialisation
1408  nfftrec = product(ngfftrec(1:3))
1409  lmnmax = nlrec%lmnmax
1410  intlen = ngfftrec(1)/2
1411  nlrec%intlen = intlen !--Setted in recursion variables
1412 
1413 !#####################################################################
1414 !--CALCULATE E(q,q')
1415 !--Cration of the exponential*projectors*ylm matrix
1416 
1417 !--Initialisation
1418  ABI_ALLOCATE(nlrec%projec,(nfftrec,lmnmax,nlrec%npsp))
1419  nlrec%projec = zero
1420  ABI_ALLOCATE(radloc,(3,nfftrec))
1421  radloc = zero
1422  ABI_ALLOCATE(nrm,(nfftrec))
1423  nrm = zero
1424 
1425 !--Loop on pseudo types
1426  pseudodo: do ipsp = 1, nlrec%npsp
1427 !  --Control if the psp is non-local, else continue
1428    if(all(nlrec%pspinfo(:,ipsp)==0)) cycle
1429 !  --Vector which stores localy the upper part of symmetrical
1430 !  matrix of the exponential of the non-local operator
1431    mpsang = maxval(nlrec%indlmn(1,:,ipsp))+1
1432    ABI_ALLOCATE(proj_arr,(nfftrec,maxval(nlrec%pspinfo(:,ipsp)),mpsang))
1433    ABI_ALLOCATE(ylmr,(mpsang*mpsang,nfftrec))
1434    proj_arr = zero
1435    ylmr = zero
1436 
1437 !  !debug
1438 !  write(std_out,*)'mpsang,proj num',mpsang,maxval(nlrec%pspinfo(:,ipsp))
1439 !  !enddebug
1440 
1441 !  --Calculate the projctors
1442    do iangol = 0,mpsang-1
1443      rloc = nlrec%radii(iangol+1,ipsp)
1444      do iproj = 1,nlrec%pspinfo(iangol+1,ipsp)
1445        call gamma_function(real(iangol+2*iproj,dp)-half,denom)
1446        denom = one/sqrt(denom)
1447        do ii = 0,ngfftrec(1)-1 !--3-loop on coordinates
1448          do jj = 0,ngfftrec(2)-1
1449            do kk = 0,ngfftrec(3)-1
1450 !            --Calculate the radii
1451              part_sum(:) = real((/ ii,jj,kk /)-intlen,dp)*(metrec%tr)
1452              rr = 1+ii+(jj+kk*ngfftrec(2))*ngfftrec(3)
1453              radloc(:,rr) = part_sum
1454              nrm(rr) = sqrt(sum(part_sum(:)**two))
1455              partial = project_prec(nrm(rr),iproj,iangol,rloc)*denom
1456              if(abs(partial)>tol12 ) proj_arr(rr,iproj,iangol+1) = partial
1457            end do
1458          end do
1459        end do  !--End 3-loop on coordinates
1460      end do
1461    end do
1462 
1463 
1464 !  -------------------------------------------------------------
1465 !  --Calculate the spherical harmonics (Verified: it works well)
1466    call initylmr(mpsang,1,nfftrec,nrm(:),1,radloc(:,:),ylmr(:,:),ylmr_gr_dm)
1467 !  -------------------------------------------------------------
1468 
1469 
1470    do ilmn = 1,lmnmax
1471      ilm = nlrec%indlmn(4,ilmn,ipsp)
1472      il = nlrec%indlmn(1,ilmn,ipsp)+1
1473      in = nlrec%indlmn(3,ilmn,ipsp)
1474      write(msg,'(2a,i3,2i2)')ch10,'lm,l,n',ilm,il,in
1475      call wrtout(std_out,msg,'COLL')
1476 
1477      nlrec%projec(:,ilmn,ipsp) = ylmr(ilm,:)*proj_arr(:,in,il)
1478    end do
1479 
1480    ABI_DEALLOCATE(ylmr)
1481    ABI_DEALLOCATE(proj_arr)
1482  end do pseudodo !--end loop on pseudo types
1483 
1484 
1485  ABI_DEALLOCATE(radloc)
1486  ABI_DEALLOCATE(nrm)
1487 
1488  if(debug)then
1489    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_operat_rec : exit '
1490    call wrtout(std_out,msg,'PERS')
1491  end if
1492 
1493  contains
1494 
1495    function project_prec(raggio,iproj,iangol,rloc)
1496 !--Analytical expression of the projectors in hgh-pspeudopotential
1497 !--The gamma function at denominator is missing
1498 
1499 !This section has been created automatically by the script Abilint (TD).
1500 !Do not modify the following lines by hand.
1501 #undef ABI_FUNC
1502 #define ABI_FUNC 'project_prec'
1503 !End of the abilint section
1504 
1505    real(dp) :: project_prec
1506    integer,intent(in) :: iproj,iangol
1507    real(dp),intent(in) :: raggio,rloc
1508 
1509    project_prec=sqrt2*(raggio/rloc)**real((iangol+2*(iproj-1)),dp)*&
1510 &   exp(-((raggio/rloc)**two)*half)/rloc**onehalf
1511  end function project_prec
1512 
1513 end subroutine pspnl_operat_rec

m_rec/Calcnrec [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 Calcnrec

FUNCTION

 Calculate the new min_nrec.

INPUTS

 rset<recursion_type>=Data type concerning recursion
 b2(:,:) recursion coefficients

OUTPUT

 rset%min_nrec is changed

PARENTS

      vtorhorec

CHILDREN

      wrtout,xmpi_max

SOURCE

908 subroutine Calcnrec(rset,b2)
909 
910 
911 !This section has been created automatically by the script Abilint (TD).
912 !Do not modify the following lines by hand.
913 #undef ABI_FUNC
914 #define ABI_FUNC 'Calcnrec'
915 !End of the abilint section
916 
917  implicit none
918 
919  !Arguments ------------------------------------
920  ! scalars
921  type(recursion_type),intent(inout) :: rset
922  ! arrays
923  real(dp), intent(in):: b2(0:rset%min_nrec,1:rset%par%ntranche)
924  !Local ----------------------------------------
925  ! scalars
926  integer :: kk,ii,jj,ierr,loc_nrec
927  character(len=500) :: msg
928  ! *********************************************************************
929  ! @recursion_type
930  ! @pawfgr_type
931  kk = 1
932  loc_nrec = rset%min_nrec
933  do ii=1,rset%par%ntranche
934   !--Use to lbound because b2 passed as argument
935   !  doesn't have the same bounds as in the calling
936   !  subroutine, the +1 because b2(lbound,ii)=1.
937   jj = lbound(b2,dim=1)+1
938   do while (b2(jj,ii)>tol10 .and.  jj<=rset%min_nrec-1)
939    jj = jj+1
940    kk = max(jj,kk)
941   end do
942  enddo
943  call xmpi_max(kk,rset%min_nrec,rset%mpi%comm_bandfft,ierr)
944  rset%min_nrec = rset%min_nrec+1-lbound(b2,dim=1)
945 
946  write(msg,'(a,i2.2,a9,i2.2)') ' -- nrec adjustement   nrec=',loc_nrec,' => nrec=',rset%min_nrec
947  call wrtout(std_out,msg,'COLL')
948  write(msg,'(51a)')' ',('-',ii=1,50)
949  call wrtout(std_out,msg,'COLL')
950 
951 end subroutine Calcnrec

m_rec/CleanRec [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 CleanRec

FUNCTION

 Deallocate the pointers of rset<recursion_type>=Data type concerning recursion.

INPUTS

 rset<recursion_type>=Data type concerning recursion

SIDE EFFECTS

 All pointers are deallocated.

PARENTS

      gstate

CHILDREN

      wrtout,xmpi_max

SOURCE

821 subroutine CleanRec(rset)
822 
823 
824 !This section has been created automatically by the script Abilint (TD).
825 !Do not modify the following lines by hand.
826 #undef ABI_FUNC
827 #define ABI_FUNC 'CleanRec'
828 !End of the abilint section
829 
830  implicit none
831 
832 !Arguments ------------------------------------
833 ! scalars
834  type(recursion_type),intent(inout) :: rset
835 ! arrays
836 ! *********************************************************************
837 
838  ! @recursion_type
839 
840  if (allocated(rset%ZT_p))  then
841    ABI_DEALLOCATE(rset%ZT_p)
842  end if
843  if (allocated(rset%par%displs))  then
844    ABI_DEALLOCATE(rset%par%displs)
845  end if
846  if (allocated(rset%par%vcount))  then
847    ABI_DEALLOCATE(rset%par%vcount)
848  end if
849  if (allocated(rset%nl%mat_exp_psp_nl))  then
850    ABI_DEALLOCATE(rset%nl%mat_exp_psp_nl)
851  end if
852  if (allocated(rset%nl%eival))  then
853    ABI_DEALLOCATE(rset%nl%eival)
854  end if
855  if (allocated(rset%nl%eivec))  then
856    ABI_DEALLOCATE(rset%nl%eivec)
857  end if
858  if (allocated(rset%nl%pspinfo))  then
859    ABI_DEALLOCATE(rset%nl%pspinfo)
860  end if
861  if (allocated(rset%nl%radii))  then
862    ABI_DEALLOCATE(rset%nl%radii)
863  end if
864  if (allocated(rset%nl%indlmn))  then
865    ABI_DEALLOCATE(rset%nl%indlmn)
866  end if
867  if (allocated(rset%nl%projec))  then
868    ABI_DEALLOCATE(rset%nl%projec)
869  end if
870  if (allocated(rset%inf%gcart))  then
871    ABI_DEALLOCATE(rset%inf%gcart)
872  end if
873 
874  call pawfgr_destroy(rset%pawfgr)
875 
876  ! No is needed deallocate rset%mpi: it is a copy of mpi_enreg which
877  ! pointers are deallocated in gstate
878 
879 #ifdef HAVE_GPU_CUDA
880   call CleanRecGPU(rset%GPU,rset%load)
881 #endif
882 
883 end subroutine CleanRec

m_rec/cpu_distribution [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 cpu_distribution

FUNCTION

 Calculate the number of point,GPU,for any proc

INPUTS

  ngfft(3)=nuber of point of the grid
  gratio=recgratio ratio between the fine and coarse grid
  beta_coeff=estimated time ratio between CPU_time and GPU_time
  calc_type=if 0 takes the possible max for nptrec (to test the
  completly full graphic card). 1 after test to calculate the min
  possible value for nptrec

OUTPUT

PARENTS

      first_rec,m_rec

CHILDREN

      wrtout,xmpi_max

SOURCE

314  subroutine cpu_distribution(gratio,rset,ngfft,beta_coeff,calc_type)
315 
316 
317 !This section has been created automatically by the script Abilint (TD).
318 !Do not modify the following lines by hand.
319 #undef ABI_FUNC
320 #define ABI_FUNC 'cpu_distribution'
321 !End of the abilint section
322 
323  implicit none
324 
325 !Arguments ------------------------------------
326  integer,intent(in)  :: gratio,calc_type
327  real(dp),intent(in) :: beta_coeff
328  integer,intent(in)  :: ngfft(3)
329  type(recursion_type),intent(inout),target :: rset
330 !Local ---------------------------
331  integer :: ii,nfft,ierr
332  integer,pointer :: proc_pt_dev(:,:)
333  type(recparall_type),pointer :: recpar
334  character(500) :: msg
335 ! *********************************************************************
336 
337  ! write(std_out,*)'start cpu_distribution'
338 
339  nullify(proc_pt_dev)
340  ABI_ALLOCATE(proc_pt_dev,(2,0:rset%mpi%nproc-1))
341 
342  nfft = product(ngfft)
343  call H_D_distrib(rset,nfft,gratio,proc_pt_dev,beta_coeff)
344 
345  nullify(recpar)
346  if(rset%load == 0)then
347    ABI_ALLOCATE(rset%par%displs,(0:rset%mpi%nproc-1))
348    ABI_ALLOCATE(rset%par%vcount,(0:rset%mpi%nproc-1))
349    recpar => rset%par
350 #if defined HAVE_GPU_CUDA
351  else
352    if(rset%tp==4)then
353      ABI_ALLOCATE(rset%GPU%par%displs,(0:rset%mpi%nproc-1))
354      ABI_ALLOCATE(rset%GPU%par%vcount,(0:rset%mpi%nproc-1))
355    endif
356    recpar => rset%GPU%par
357 #endif
358  endif
359 
360  recpar%ntranche = nfft/(rset%mpi%nproc)!equipartitioned point
361 
362  call find_maxmin_proc(recpar,rset%mpi%nproc,&
363 &                      rset%mpi%me,gratio,ngfft,proc_pt_dev)
364 
365  recpar%vcount = 0
366  if(rset%load==0)then
367    recpar%vcount(rset%mpi%me) = recpar%ntranche
368  else
369    recpar%vcount(rset%mpi%me) = recpar%npt
370  endif
371 
372  call xmpi_sum(recpar%vcount,rset%mpi%comm_bandfft,ierr)
373 
374  recpar%displs = 0
375  if(rset%mpi%nproc>1) recpar%displs(1:) = (/(sum(recpar%vcount(:ii)),ii=0,rset%mpi%nproc-2)/)
376 
377  !--INITALIZATION OF CUDA FOR RECURSION
378 #if defined HAVE_GPU_CUDA
379  if(rset%load == 0)   rset%GPU%par = rset%par
380  call InitRecGPU(rset,nfft,gratio,rset%GPU%map(rset%mpi%me),calc_type)
381 #else
382   ierr = calc_type !only of abirule when there is not HAVE_GPU_CUDA
383 #endif
384 
385 
386 ! if(rset%debug ) then
387  write(msg,'(a,i7,2(2a,3i7),8(2a,i7),2(2a,3i7),(2a,e14.6))')&
388    & ' me                 ',  rset%mpi%me,ch10,&
389    & ' ngfft              ',  ngfft(1:3),ch10,&
390    & ' ngfftrec           ',  rset%ngfftrec(1:3),ch10,&
391    & ' load               ',  rset%load,ch10,&
392    & ' ntranche           ',  recpar%ntranche,ch10,&
393    & ' min_pt             ',  recpar%min_pt,ch10,&
394    & ' max_pt             ',  recpar%max_pt,ch10,&
395    & ' rset%mpi%nproc     ',  rset%mpi%nproc,ch10,&
396    & ' rset%mpi%nproc_fft ',  rset%mpi%nproc_fft,ch10,&
397    & ' dtset%ngfft(10)    ',  rset%ngfftrec(10),ch10,&
398    & ' recpar%npt         ',  recpar%npt,ch10,&
399    & ' recpar%pt0         ',  recpar%pt0%x,recpar%pt0%y,recpar%pt0%z,ch10,&
400    & ' recpar%pt1         ',  recpar%pt1%x,recpar%pt1%y,recpar%pt1%z,ch10,&
401    & ' grid step          ',  rset%inf%tr(1)
402  call wrtout(std_out,msg,'PERS')
403 #if defined HAVE_GPU_CUDA
404  write(msg,'(a,i7,2(2a,i7),a)')&
405    & ' rset%ngp           ',  rset%ngpu,ch10,&
406    & ' gpudevice          ',  rset%gpudevice,ch10,&
407    & ' nptrec             ',  rset%GPU%nptrec,ch10
408  call wrtout(std_out,msg,'PERS')
409 #endif
410 !  write(std_out,*)'display',recpar%displs
411 !  write(std_out,*)'vcount',recpar%vcount
412 ! end if
413 
414 
415  nullify(recpar)
416  if(associated(proc_pt_dev))  then
417    ABI_DEALLOCATE(proc_pt_dev)
418  end if
419 
420 ! write(std_out,*)'exit from cpu_distribution'
421 end subroutine cpu_distribution

m_rec/find_maxmin_proc [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 find_maxmin_proc

FUNCTION

 To calculate max and min pt for any cpu, it is useful for
 recgratio!=1

INPUTS

 nproc = number of procs
 me = identity of the proc
 ngfft(3) = fine grid (corresponds to dtset%ngfft(1:3))
 proc_pt_dev(2,0:nproc-1) which device and how many points
 recpar%npt = number of points computed by the proc me (see side effects)

OUTPUT

 recpar%pt0<type(vec_int)>=Intial point for this proc in x,y,z
 recpar%pt1<type(vec_int)>=Final point for this proc in x,y,z
 recpar%min_pt=Intial point for this proc
 recpar%max_pt=Final point for this proc

SIDE EFFECTS

 recpar%ntranche=number of pts computed by the proc me on the fine grid.


 So when  recgratio!=1, ntranche will  not correspond to the npt!

PARENTS

      m_rec

CHILDREN

      wrtout,xmpi_max

SOURCE

241 subroutine find_maxmin_proc(recpar,nproc,me,gratio,ngfft,proc_pt_dev)
242 
243 
244 !This section has been created automatically by the script Abilint (TD).
245 !Do not modify the following lines by hand.
246 #undef ABI_FUNC
247 #define ABI_FUNC 'find_maxmin_proc'
248 !End of the abilint section
249 
250  implicit none
251 
252 !Arguments ------------------------------------
253  integer,intent(in)   :: nproc,me,gratio
254  integer,intent(in)   :: ngfft(3)
255  type(recparall_type),intent(inout) :: recpar
256  integer,pointer :: proc_pt_dev(:,:)
257 !Local ---------------------------
258  integer :: pointoncpu
259  integer :: nfft,ntot,ii
260  integer :: inf,sup
261  integer :: proc_limit(0:nproc-1)
262 ! *********************************************************************
263  !  write(std_out,*)'start find_maxmin_proc'
264  recpar%npt = 0
265  nfft = product(ngfft)
266  ntot = nfft/(gratio*gratio*gratio)
267  pointoncpu = ntot/nproc
268 
269  proc_limit = (/(sum(proc_pt_dev(2,:ii)),ii=0,nproc-1)/)
270 
271  if(gratio==1)then
272    recpar%ntranche = proc_limit(me)
273    if(me/=0) recpar%ntranche = recpar%ntranche-proc_limit(me-1)
274  endif
275 
276  inf=0
277  if(me/=0) inf = proc_limit(me-1)
278  sup = proc_limit(me)
279 
280 
281  call get_pt0_pt1(ngfft,gratio,inf,sup,recpar)
282 
283  recpar%npt = sup-inf
284 
285  !write(std_out,*)'exit find_maxmin_proc'
286 end subroutine find_maxmin_proc

m_rec/H_D_distrib [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 H_D_distrib

FUNCTION

 Calculate the number of point,GPU,for any proc

INPUTS

  rset<recursion_type>= recursion variables
  cpu (-1 if there are not gpu)
  nfft=nuber of point of the fine grid
  ngfftrec=nuber of point of one edge of the coarse grid
  gratio=recgratio ratio between the fine and coarse grid
  beta_coeff=estimated time ratio between CPU_time and GPU_time

OUTPUT

  proc_pt_dev(2,0:nproc-1) which device and how many points
  that proc has to compute: proc_pt_dev(1,iproc) which device
  associated to proc i (-1 if none), proc_pt_dev(2,iproc) how
  many points

PARENTS

      m_rec

CHILDREN

      wrtout,xmpi_max

SOURCE

110 subroutine H_D_distrib(rset,nfft,gratio,proc_pt_dev,beta_coeff)
111 
112 
113 !This section has been created automatically by the script Abilint (TD).
114 !Do not modify the following lines by hand.
115 #undef ABI_FUNC
116 #define ABI_FUNC 'H_D_distrib'
117 !End of the abilint section
118 
119  implicit none
120 
121 !Arguments ------------------------------------
122  integer, intent(in) :: nfft,gratio
123  real(dp),intent(in) :: beta_coeff
124  integer,pointer :: proc_pt_dev(:,:)
125  type(recursion_type),intent(inout) :: rset
126 !Local ---------------------------
127  integer :: me,icpu,resto,ntot,ngpu
128  integer :: n_per_cpu,n_per_gpu
129  character(500) :: msg
130 #ifdef HAVE_GPU_CUDA
131  integer,pointer :: ndev(:)
132 #else
133  integer :: ndev(0:rset%mpi%nproc-1)
134 #endif
135 ! *********************************************************************
136 
137 
138 #ifdef HAVE_GPU_CUDA
139  ndev => rset%GPU%map
140 #else
141  ndev = -1
142 #endif
143 
144  me = rset%mpi%me
145  ntot = nfft/(gratio*gratio*gratio)
146  ngpu = rset%ngpu
147 
148  !--If sequential code all points are computed by the proc 0
149  if(rset%mpi%nproc ==1) then
150    proc_pt_dev(1,0) = ndev(0)
151    proc_pt_dev(2,0) = ntot
152    return
153  end if
154 
155  !--Number of points for any cpu
156  n_per_cpu = int(int(ntot/(rset%mpi%nproc+ngpu*(beta_coeff-1.d0))))
157  n_per_gpu = int(n_per_cpu*beta_coeff)
158  !write(std_out,*)'n_per_cpu',n_per_cpu
159  !write(std_out,*)'rset%GPU%map',rset%GPU%map
160  do icpu=0,rset%mpi%nproc-1
161    proc_pt_dev(1,icpu) = ndev(icpu)
162    proc_pt_dev(2,icpu) = n_per_cpu
163    if(ndev(icpu)>-1) proc_pt_dev(2,icpu) = n_per_gpu
164  end do
165 
166  !--Distribute the rest
167  resto = ntot-sum(proc_pt_dev(2,:))
168  icpu = 0
169  !write(std_out,*)'rest',resto,ngpu
170  if(resto>0) then
171    if(ngpu/=0) then
172      !--distribute rest only on GPU
173      do while(resto/=0)
174        if(proc_pt_dev(1,icpu)>-1) then
175          proc_pt_dev(2,icpu) = proc_pt_dev(2,icpu)+1
176          resto = resto-1
177        endif
178        icpu = mod(icpu+1,rset%mpi%nproc)
179      enddo
180    else
181      !--distribute rest on all CPU
182      do while(resto/=0)
183        proc_pt_dev(2,icpu) = proc_pt_dev(2,icpu)+1
184        resto = resto-1
185        icpu = mod(icpu+1,rset%mpi%nproc)
186      enddo
187      return
188    endif
189  endif
190 
191  !--Printing GPU and load distribution on procs
192  write(msg,'(3a)')&
193       & ' -Load on procs------------',ch10,&
194       & '   me  device        points'
195  call wrtout(std_out,msg,'COLL')
196  do icpu=0,rset%mpi%nproc-1
197    write(msg,'(i5,i8,i14)') icpu,proc_pt_dev(:,icpu);
198    call wrtout(std_out,msg,'COLL')
199  end do
200 
201 end subroutine H_D_distrib

m_rec/Init_MetricRec [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 Init_MetricRec

FUNCTION

 Initialise the rset<recursion_type>=Data type concerning recursion.
 In particular, the information on the infinitesimal metric.
 Also other variable are initialized

INPUTS

 rmet: metrics
 ucvol=unit cell volume in bohr**3.
 ngfft(1:3)=fine grid used in recursion
 rprimd=Real space PRIMitive translations, Dimensional
 xred=vectors (X) of atom positions in reduced coordinates
 natom=number of atoms
 debug=debug variable

OUTPUT

 metrec <type(metricrec_type)>= infinitesimal metrics used in recursion

PARENTS

      scfcv

CHILDREN

      wrtout,xmpi_max

SOURCE

632 subroutine Init_MetricRec(metrec,nlpsp,rmet,ucvol,rprimd,xred,ngfft,natom,debug)
633 
634 
635 !This section has been created automatically by the script Abilint (TD).
636 !Do not modify the following lines by hand.
637 #undef ABI_FUNC
638 #define ABI_FUNC 'Init_MetricRec'
639 !End of the abilint section
640 
641  implicit none
642 !Arguments ------------------------------------
643 !scalars
644  integer, intent(in) ::natom
645  real(dp), intent(in) :: ucvol
646  logical,intent(in) ::nlpsp,debug
647  type(metricrec_type),intent(inout) :: metrec
648 !arrays
649  integer,intent(in) :: ngfft(3)
650  real(dp),intent(in) :: rmet(3,3),rprimd(3,3),xred(3,natom)
651 
652 !Local ---------------------------
653  integer :: ii
654  real(dp) :: xcart(3,natom)
655  character(500) :: msg
656 ! *********************************************************************
657 
658  !--Intialisation of variables concerning the infinitesimal metric
659  do ii=1,3
660    metrec%rmet(ii,:) = rmet(ii,:)/(real(ngfft(1:3)*ngfft(ii),dp))
661  end do
662  metrec%ucvol = ucvol/real(product(ngfft(1:3)),dp)
663  metrec%tr(:) = sqrt((/(metrec%rmet(ii,ii),ii=1,3)/)) !grid step
664 
665  !--Initialisation of others variables
666  !--In non-loc-psp case: calculate the position of ions and conversion factor
667  if(nlpsp) then
668    do ii = 1,natom
669      xcart(:,ii) = matmul(rprimd(:,:),xred(:,ii))
670    end do
671    metrec%gcart(:,:) = per_cond(natom,xcart,ngfft(1:3),metrec%tr(:))
672    if(debug) then
673      do ii=1,natom
674        write (msg,'(a,3f8.2)')'xcart=',xcart(:,ii)
675        call wrtout(std_out,msg,'COLL')
676        write (msg,'(a,3i4)')'gcart=',metrec%gcart(:,ii)
677        call wrtout(std_out,msg,'COLL')
678      end do
679    end if
680  end if
681 
682 end subroutine Init_MetricRec

m_rec/Init_nlpspRec [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 Init_nlpspRec

FUNCTION

 Initialise the rset<recursion_type>=Data type concerning recursion.
 In particular, the non-local part of pseudo-potential.

INPUTS

 tempe=temperature
 psps <type(pseudopotential_type)>=variables related to pseudo-potentials
 metrec <type(metricrec_type)>=infinitesimal metrics used in recursion
 ngfftrec(18)=Number of Grid points for Fast Fourier Transform for
   Recursion (truncated box, if different from ngfft)
 debug=debug variable

SIDE EFFECTS

 nlrec <type(nlpsprec_type)>=pseudo-potentials informations for recursion

PARENTS

      first_rec

CHILDREN

      wrtout,xmpi_max

SOURCE

712 subroutine Init_nlpspRec(tempe,psps,nlrec,metrec,ngfftrec,debug)
713 
714 
715 !This section has been created automatically by the script Abilint (TD).
716 !Do not modify the following lines by hand.
717 #undef ABI_FUNC
718 #define ABI_FUNC 'Init_nlpspRec'
719 !End of the abilint section
720 
721  implicit none
722 !Arguments ------------------------------------
723 ! scalars
724  logical,intent(in) :: debug
725  real(dp), intent(in) :: tempe
726  type(pseudopotential_type),intent(in) ::psps
727  type(metricrec_type),intent(inout) :: metrec
728  type(nlpsprec_type),intent(inout) :: nlrec
729 ! arrays
730  integer,intent(in) :: ngfftrec(18)
731 !Local ---------------------------
732  integer :: ii,jj
733  character(500) :: msg
734 ! *********************************************************************
735  !!--Routine for the calcul of the non-local pseudo
736  if(any(psps%pspcod/=3) .and. nlrec%nlpsp ) then
737    msg = "The non-local part of psp is used in Recursion only for hgh-psp"
738    MSG_WARNING(msg)
739    nlrec%nlpsp = .False.
740    if (allocated(metrec%gcart))  then
741      ABI_DEALLOCATE(metrec%gcart)
742    end if
743  end if
744 
745  if(any(psps%pspcod==3) .and.  nlrec%nlpsp) then
746 
747   nlrec%nlpsp = .True.
748   nlrec%npsp  = psps%npsp
749   nlrec%lmnmax = count(psps%indlmn(3,:,psps%npsp)/=0)
750   ABI_ALLOCATE(nlrec%mat_exp_psp_nl,(3,3,psps%mpsang,psps%npsp))
751   ABI_ALLOCATE(nlrec%eival,(3,psps%mpsang,psps%npsp))
752   ABI_ALLOCATE(nlrec%eivec,(3,3,psps%mpsang,psps%npsp))
753   ABI_ALLOCATE(nlrec%pspinfo,(psps%mpsang,psps%npsp))
754   ABI_ALLOCATE(nlrec%radii,(psps%mpsang,psps%npsp))
755   ABI_ALLOCATE(nlrec%indlmn,(6,nlrec%lmnmax,psps%npsp))
756   nlrec%indlmn(:,:,:) = psps%indlmn(:,:nlrec%lmnmax,:)
757   nlrec%mat_exp_psp_nl(:,:,:,:) = zero
758   nlrec%eivec(:,:,:,:) = zero
759   nlrec%eival(:,:,:) = zero
760   nlrec%radii(:,:) = zero
761   nlrec%pspinfo(:,:) = 0
762 
763   !--Get the exponential of the strength times the projectors overlap
764   !  of the non-local part of psp(hgh):
765   !  h_ij=strength; g_ij=ovelap => (exp(-h.g/temp/4p)-Identity).g^(-1)
766   !  And the diagonalisation of the projectors and associated eigenvectors
767   call pspnl_hgh_rec(psps,tempe,nlrec,debug)
768 
769   if(debug)then
770    do jj=1,psps%npsp
771     write(msg,'(a)')' Exponential matrices:'
772     call wrtout(std_out,msg,'COLL')
773     do ii=1,psps%mpsang
774      write(msg,'(a,i2,a,3f15.10,a,3f15.10,a,3f15.10)')&
775        &   'angular moment',ii-1,ch10,&
776        &                    nlrec%mat_exp_psp_nl(1,:,ii,jj),ch10,&
777        &                    nlrec%mat_exp_psp_nl(2,:,ii,jj),ch10,&
778        &                    nlrec%mat_exp_psp_nl(3,:,ii,jj)
779      call wrtout(std_out,msg,'COLL')
780     end do
781    end do
782   end if
783 
784   !--Now it calculates the matrix of the exp(V_NL)
785   call pspnl_operat_rec(nlrec,metrec,ngfftrec,debug)
786 
787  else !--Only local pseudo potentials
788   nlrec%nlpsp = .False.
789   nlrec%npsp  = psps%npsp
790   ABI_ALLOCATE(nlrec%mat_exp_psp_nl,(0,0,0,0))
791   ABI_ALLOCATE(nlrec%pspinfo,(0,0))
792   ABI_ALLOCATE(nlrec%radii,(0,0))
793   ABI_ALLOCATE(nlrec%indlmn,(0,0,0))
794   ABI_ALLOCATE(nlrec%projec,(0,0,0))
795  endif
796 
797 end subroutine Init_nlpspRec

m_rec/InitRec [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 InitRec

FUNCTION

 Initialise the rset<recursion_type>=Data type concerning recursion.

INPUTS

 dtset <type(dataset_type)>=all input variables in this dataset
 mpi_ab <type(mpi_type)=MPI-parallelisation information
 mproj=0 if psp is only local

SIDE EFFECTS

 All pointers set to null().

PARENTS

      gstate

CHILDREN

      wrtout,xmpi_max

SOURCE

449 subroutine InitRec(dtset,mpi_ab,rset,rmet,mproj)
450 
451 #ifdef HAVE_GPU_CUDA
452  use m_gpu_detect,only    :get_topo,find_set_gpu
453  use m_hidecudarec,only   :InitRecGPU_0
454 #include "cuda_common.h"
455 #endif
456 
457 !This section has been created automatically by the script Abilint (TD).
458 !Do not modify the following lines by hand.
459 #undef ABI_FUNC
460 #define ABI_FUNC 'InitRec'
461 !End of the abilint section
462 
463  implicit none
464 
465 !Arguments ------------------------------------
466 ! scalars
467  integer,intent(in) :: mproj
468  type(dataset_type),intent(in) :: dtset
469  type(MPI_type),intent(in),target :: mpi_ab
470  type(recursion_type),intent(inout) :: rset
471  real(dp),intent(in) :: rmet(3,3)
472 ! arrays
473 !Local ---------------------------
474  integer :: ii
475  real(dp) :: beta,rtrotter
476 #if defined HAVE_GPU_CUDA
477  character(500) :: msg
478 #endif
479 ! *********************************************************************
480  ! @recursion_type
481  ! @pawfgr_type
482 
483  !--Initialisation
484  beta = one/dtset%tsmear  !--Inverse of temperature
485  !--Rewriting the trotter parameter
486  rtrotter  = max(half,real(dtset%recptrott,dp))
487 
488  rset%debug= (dtset%prtvol==-7)
489  rset%quitrec   = 0
490  rset%min_nrec  = dtset%recnrec
491  rset%efermi    = dtset%recefermi !initial guess for fermie
492 
493  rset%nfftrec   = 0
494  rset%ngfftrec  = 0
495 
496  rset%tronc = .False.
497 
498  rset%mpi => mpi_ab
499 
500  !--Are all pseudo-potentials local?
501  rset%nl%nlpsp = (mproj/=0)
502  !--Some initialisation concerning the metrics
503  !  If non-local psps then it allocates the atoms positions
504  !   on the grid
505  if(rset%nl%nlpsp)  then
506   ABI_ALLOCATE(rset%inf%gcart,(3,dtset%natom))
507  else
508   ABI_ALLOCATE(rset%inf%gcart,(0,0))
509  end if
510  rset%inf%gcart = 0
511 
512  !----------------------------------------------------------
513  !--TRONCATION OF THE BOX
514  !! determines new dimensions the method is similar to the one used
515  !! in getng  (except that ecut and xboxcutmin give no constraint,
516  !!  and symmetries are not handled)
517 
518  call getngrec(dtset%ngfft,rmet,rset%ngfftrec,rset%nfftrec,dtset%recrcut,0.25d0*sqrt(beta/rtrotter),rset%tronc)
519  !  1/4*sqrt(beta/trotter) for guess - should be modified
520 
521  !------------------------------------------------------------
522  !--DETERMINING WHICH POINT WILL COMPUTE THAT PROC
523  !----------------------------------------------------------
524  !--Paralelism using the band communicator (not used in the recursion)
525  !--Distribution on procs with cuda
526 
527 
528  rset%ngpu = 0       !--Initial guess no GPU at all
529  rset%gpudevice = -1 !--Inital guess no GPU associated
530  rset%load = 0       !--Inital homogeneous work load
531  rset%tp = 0         !--Initial guess 1 cpu, 0 gpu
532 
533 
534 #ifdef HAVE_GPU_CUDA
535  !--Initialise GPU variables for recursion
536  call InitRecGPU_0(rset%GPU,mpi_ab)
537 
538  !--Get the distribution of GPUs on CPUs
539  call find_set_gpu(mpi_ab%nproc,mpi_ab%comm_bandfft,rset%GPU%map,rset%ngpu)
540 
541  !--Get the topology of the machine
542  call get_topo(rset%mpi%nproc,rset%ngpu,rset%tp)
543  if(rset%tp>4)then
544    msg = 'm_rec: number of gpu>number of cpu is not implemented'
545    MSG_ERROR(msg)
546  endif
547 !  rset%tp = 0;if(rset%mpi%nproc>1)rset%tp = 1
548 !  rset%ngpu = 0; rset%GPU%map=-1
549  !--For the moment cuda doesnt take into account non-local psp
550  if(rset%nl%nlpsp) then
551    rset%tp = 0;if(rset%mpi%nproc>1)rset%tp = 1
552    rset%GPU%map = -1
553  endif
554 #else
555  if(rset%mpi%nproc>1)rset%tp = 1
556 #endif
557 
558  !--Basic initialization for recursion metric (only needed for printing)
559  do ii=1,3
560    rset%inf%rmet(ii,:) = rmet(ii,:)/(real(dtset%ngfft(1:3)*dtset%ngfft(ii),dp))
561  end do
562  rset%inf%tr(:) = sqrt((/(rset%inf%rmet(ii,ii),ii=1,3)/)) !grid step
563 
564  !--Compute the work loqd distribution on devices (gpu,cpu)
565  call cpu_distribution(dtset%recgratio,rset,dtset%ngfft(:3),1.d0,0)
566 
567  !------------------------------------------------------------
568  !--DEFINITION VARIABLE COARSE-FINE GRID  TO USE TRANSGRID--INGRID FUNCTIONS
569  call pawfgr_nullify(rset%pawfgr)
570  !if coarse grid is used
571  if (dtset%recgratio>1) then
572    !fine grid--
573    rset%pawfgr%mgfft = 0
574    rset%pawfgr%nfft = product(dtset%ngfft(1:3))
575    rset%pawfgr%ngfft(:) = dtset%ngfft(:)
576    rset%pawfgr%ngfft(9:11)=(/0,1,0/)
577    rset%pawfgr%ngfft(12:13)= dtset%ngfft(2:3)
578    !coarse grid--
579    rset%pawfgr%mgfftc = 0
580    rset%pawfgr%ngfftc(:) = rset%pawfgr%ngfft(:)
581    rset%pawfgr%ngfftc(:3) = floor(real(dtset%ngfft(:3)+1,dp)/real(dtset%recgratio,dp))
582    rset%pawfgr%nfftc = product(rset%pawfgr%ngfftc(1:3))
583 
584    rset%pawfgr%usefinegrid = 1
585    ABI_ALLOCATE(rset%pawfgr%fintocoa,(rset%pawfgr%nfft))
586    ABI_ALLOCATE(rset%pawfgr%coatofin,(rset%pawfgr%nfftc))
587    call indgrid(rset%pawfgr%coatofin,rset%pawfgr%fintocoa,&
588      rset%pawfgr%nfftc,rset%pawfgr%nfft,&
589      rset%pawfgr%ngfftc,rset%pawfgr%ngfft)
590 
591  else
592    rset%pawfgr%mgfft = 0
593    rset%pawfgr%ngfft = 0
594    rset%pawfgr%mgfftc = 0
595 
596    rset%pawfgr%usefinegrid = 0
597  end if
598 
599 
600 end subroutine InitRec