TABLE OF CONTENTS


ABINIT/first_rec [ Functions ]

[ Top ] [ Functions ]

NAME

 first_rec

FUNCTION

 When recursion method is used, in the first step this routine
 compute some quantities which are used in the rest of the calculation.

COPYRIGHT

  Copyright (C) 2009-2018 ABINIT group (MMancini)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

INPUTS

  dtset <type(dataset_type)>=all input variables for this dataset:
   | recgratio =fine/coarse grid ratio
   | recptrott =trotter parameter
   | tsmear    =temperature
   | recrcut   =tut radius in recursion (range of iteration)
   | ngfft(18) =FFT grid used as real (fine) grid in recursion
  psps <type(pseudopotential_type)>=variables related to pseudo-potentials

OUTPUT

SIDE EFFECTS

  rset <type(recursion_type)>=variables related to recursion method
   | debug<logical> = T if debugging is used
   | inf <type(metricrec_type)>=information concerning the infinitesimal metrics
   | ngfftrec(18) =truncated (or not, if not ngfftrec=ngfft)FFT grid used as real grid in recursion.
   | nfftrec =product(ngfftrec(1:3))
   | tronc<logical> = T if truncation is effectively used
   | ZT_p = fourier transform of the green_kernel calculated on the fine grid

NOTES

PARENTS

      scfcv

CHILDREN

      cpu_distribution,cudarec,get_pt0_pt1,green_kernel,init_nlpsprec
      random_number,recursion,reshape_pot,timab,timein,wrtout,xmpi_sum

SOURCE

 48 #if defined HAVE_CONFIG_H
 49 #include "config.h"
 50 #endif
 51 
 52 #if defined HAVE_GPU_CUDA
 53 #include "cuda_common.h"
 54 #endif
 55 
 56 #include "abi_common.h"
 57 
 58 subroutine first_rec(dtset,psps,rset)
 59 
 60  use defs_basis
 61  use defs_abitypes
 62  use defs_datatypes
 63  use defs_rectypes
 64  use m_profiling_abi
 65  use m_errors
 66 
 67  use m_rec,only         : init_nlpsprec,cpu_distribution
 68  use m_rec_tools,only   : get_pt0_pt1,reshape_pot
 69 #if defined HAVE_GPU_CUDA
 70  use m_initcuda,only    : cudap
 71  use m_hidecudarec,only : cudarec,CleanRecGPU
 72  use m_xmpi,only        : xmpi_sum
 73 #endif
 74 
 75 !This section has been created automatically by the script Abilint (TD).
 76 !Do not modify the following lines by hand.
 77 #undef ABI_FUNC
 78 #define ABI_FUNC 'first_rec'
 79  use interfaces_14_hidewrite
 80  use interfaces_18_timing
 81  use interfaces_68_recursion, except_this_one => first_rec
 82 !End of the abilint section
 83 
 84  implicit none
 85 
 86 !Arguments ------------------------------------
 87 ! scalars
 88  type(dataset_type),intent(in) :: dtset
 89  type(pseudopotential_type),intent(in) :: psps
 90  type(recursion_type),intent(inout) :: rset
 91 !Local variables-------------------------------
 92 !scalars
 93  integer  :: nfftrec,trotter,ii,dim_trott
 94  real(dp) :: tsmear,beta,rtrotter
 95  character(len=500) :: msg
 96 !arrays
 97  integer  :: ngfftrec(18)
 98  real(dp) :: tsec(2)
 99 #ifdef HAVE_GPU_CUDA
100  integer  :: max_rec,ierr,testpts,swt_tm
101  real(dp) :: rho,tm_ratio
102  real(dp) :: time_cu,time_f
103  type(recursion_type) :: rset_test
104  type(recparall_type) :: parold
105  integer :: trasl(3)
106  real(dp) :: tsec2(2),tsec3(2)
107  real(dp) :: aloc(0,1),b2loc(0,1)
108  real(dp) :: dm_projec(0,0,0,1,1)
109  real(dp) :: exppot(0:dtset%nfft-1)
110  real(dp),allocatable :: exppotloc(:)
111  real(cudap),allocatable :: aloc_cu(:),b2loc_cu(:)
112 #endif
113 
114 ! *************************************************************************
115 
116  call timab(601,1,tsec)  !!--Start time-counter: initialisation
117 
118  MSG_WARNING("RECURSION")
119  if(dtset%recgratio>1) then
120    write(msg,'(a)')'COARSE GRID IS USED'
121    call wrtout(std_out,msg,'COLL')
122  end if
123 
124 !--Initialisation
125  trotter = dtset%recptrott  !--Trotter parameter
126  tsmear  = dtset%tsmear     !--Temperature
127  beta    = one/tsmear       !--Inverse of temperature
128 
129 !--Rewriting the trotter parameter
130  dim_trott = max(0,2*trotter-1)
131  rtrotter  = max(half,real(trotter,dp))
132 
133  write (msg,'(2a)')ch10,'==== FIRST CYCLE RECURSION ========================='
134  call wrtout(std_out,msg,'COLL')
135 
136 
137  ngfftrec = rset%ngfftrec
138  nfftrec = rset%nfftrec
139 !------------------------------------------------
140 !--TRONCATION OF THE BOX: determines new dimensions
141 !--Now in InitRec
142 !--------------------------------------------------------
143 !--DEFINITION PAW VARIABLES COARSE-FINE GRID  TO USE TRANSGRID--INGRID FUNCTIONS
144 !--Now these variables are defined into gstate by InitRec
145 
146 !--------------------------------------------------------
147 !--COMPUTATION OF THE FOURIER TRANSFORM OF THE GREEN KERNEL (only once)
148  write (msg,'(a)')' - green kernel calculation -----------------------'
149  call wrtout(std_out,msg,'COLL')
150  ABI_ALLOCATE(rset%ZT_p,(1:2,0: nfftrec-1))
151  call timab(601,2,tsec)
152  call green_kernel(rset%ZT_p,rset%inf%rmet,rset%inf%ucvol,rtrotter/beta,rset%mpi,ngfftrec,nfftrec)
153  call timab(601,1,tsec)
154  write(msg,'(a,50a)')' ',('-',ii=1,50)
155  call wrtout(std_out,msg,'COLL')
156 !!--end computation of the fourier transform of the Green kernel
157 
158 !!-----------------------------------
159 !!--ROUTINE FOR THE CALCULATION OF THE NON-LOCAL PSEUDO
160 !--Now these variables here by  Init_nlpspRec
161  call Init_nlpspRec(four*tsmear*rtrotter,psps,rset%nl,rset%inf,rset%ngfftrec,rset%debug)
162 
163 !!-----------------------------------
164 !--Load distribution on procs when GPU are present
165 #if defined HAVE_GPU_CUDA
166 
167 !--Test timing only if exists GPU and they are not equal to the cpus
168  if(rset%tp == 4) then
169    parold = rset%par
170    ii = 0
171    time_f = zero
172    time_cu = zero
173    call random_number(exppot)  !   exppot = one
174 
175    if(rset%gpudevice == -1) then
176 !    --Test CPUS
177      swt_tm = 0
178      testpts = min(rset%par%npt, 20)
179      call timein(tsec2(1),tsec2(2))
180      if(rset%tronc) then
181        ABI_ALLOCATE(exppotloc,(0:nfftrec-1))
182        do while(ii< testpts)
183          trasl = -(/1,2,3/)+ngfftrec(:3)/2
184          call reshape_pot(trasl,dtset%nfft,nfftrec,dtset%ngfft(:3),ngfftrec(:3),&
185 &         exppot,exppotloc)
186          call recursion(exppotloc,0,0,0, &
187 &         aloc, &
188 &         b2loc, &
189 &         rho,&
190 &         0, rset%efermi,tsmear,rtrotter,dim_trott, &
191 &         rset%ZT_p, &
192 &         dtset%rectolden,dtset%typat, &
193 &         rset%nl,&
194 &         rset%mpi,nfftrec,ngfftrec,rset%inf,&
195 &         6,dtset%natom,dm_projec,0)
196          ii=ii+1
197        end do
198        ABI_DEALLOCATE(exppotloc)
199      else
200        do while(ii< testpts)
201          call recursion(exppot,0,0,0, &
202 &         aloc, &
203 &         b2loc, &
204 &         rho,&
205 &         0, rset%efermi,tsmear,rtrotter,dim_trott, &
206 &         rset%ZT_p, &
207 &         dtset%rectolden,dtset%typat, &
208 &         rset%nl,&
209 &         rset%mpi,nfftrec,ngfftrec,rset%inf,&
210 &         6,dtset%natom,dm_projec,0)
211          ii=ii+1
212        end do
213      end if
214      call timein(tsec3(1),tsec3(2))
215      time_f = (tsec3(1)-tsec2(1))/real(testpts,dp)
216      time_f = time_f*time_f
217    else
218 !    --Test GPUS
219      swt_tm = 1
220      rset_test = rset
221      rset_test%GPU%par%npt = max(rset%GPU%nptrec,100)
222      rset_test%min_nrec = 0
223      call get_pt0_pt1(dtset%ngfft(:3),dtset%recgratio,0,&
224 &     rset_test%GPU%par%npt,rset_test%GPU%par)
225 
226 
227      ABI_ALLOCATE(aloc_cu,(rset_test%GPU%par%npt))
228      ABI_ALLOCATE(b2loc_cu,(rset_test%GPU%par%npt))
229      call timein(tsec2(1),tsec2(2))
230      call cudarec(rset_test, exppot,aloc_cu,b2loc_cu,&
231 &     beta,trotter,dtset%rectolden,dtset%recgratio,dtset%ngfft,max_rec)
232      call timein(tsec3(1),tsec3(2))
233      ABI_DEALLOCATE(aloc_cu)
234      ABI_DEALLOCATE(b2loc_cu)
235 
236      time_cu = (tsec3(1)-tsec2(1))/real(rset_test%GPU%par%npt,dp)
237      time_cu = time_cu*time_cu
238    end if
239 
240 
241 !  --Get Total Times
242    call xmpi_sum(time_f,rset%mpi%comm_bandfft,ierr)
243    call xmpi_sum(time_cu,rset%mpi%comm_bandfft,ierr)
244 
245 !  --Average Total Times
246    time_f   = sqrt(time_f/real(rset%mpi%nproc-rset%ngpu,dp))
247    time_cu  = sqrt(time_cu/real(rset%ngpu,dp))
248    tm_ratio = time_f/time_cu
249 
250 
251    write(msg,'(3(a25,f10.5,a))')&
252 &   ' Time for cpu recursion ',time_f,ch10,&
253 &   ' Time for gpu recursion ',time_cu,ch10,&
254 &   ' Time ratio             ',tm_ratio,ch10
255    call wrtout(std_out,msg,'COLL')
256 
257 
258 !  tm_ratio =1.20d2! 0.d0! 1.21d0
259    rset%par = parold
260 !  --Compute the work-load distribution on devices (gpu,cpu)
261    if(tm_ratio>1.5d0 .and. time_cu>zero)then
262      rset%load = 1
263      call cpu_distribution(dtset%recgratio,rset,dtset%ngfft(:3),tm_ratio,1)
264    else
265      rset%gpudevice = -1
266    end if
267  end if
268 
269 #endif
270 
271 
272 !------------------------------------------------------------
273 !--DETERMINING WHICH POINT WILL COMPUTE THAT PROC
274 !--Now these variables are defined into gstate by Init_rec
275 
276  write (msg,'(2a)')ch10,'==== END FIRST CYCLE RECURSION ====================='
277  call wrtout(std_out,msg,'COLL')
278  call timab(601,2,tsec) !!--stop time-counter: initialisation
279 
280 end subroutine first_rec