TABLE OF CONTENTS


ABINIT/m_hidecudarec [ Modules ]

[ Top ] [ Modules ]

NAME

 m_hidecudarec

FUNCTION

  Call the C-cu program to make recursion on GPU

COPYRIGHT

  Copyright (C) 2009-2018 ABINIT group (MMancini)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

NOTES

PARENTS

CHILDREN

SOURCE

23 #if defined HAVE_CONFIG_H
24 #include "config.h"
25 #endif
26 #if defined HAVE_GPU_CUDA
27 #include "cuda_common.h"
28 #endif
29 
30 #include "abi_common.h"
31 
32 
33 module m_hidecudarec
34 
35  use defs_basis
36  use defs_rectypes
37  use m_abicore
38 #if defined HAVE_GPU_CUDA
39  use m_initcuda
40 #endif
41 
42  use m_fft,        only : fourdp
43 
44  implicit none
45 
46  private
47 
48 #if defined HAVE_GPU_CUDA
49  private ::  prt_mem_usage          ! Print memory usage
50 #endif
51 
52 #if defined HAVE_GPU_CUDA
53  public ::  InitRecGPU_0        ! Initialize recGPU_type
54  public ::  InitRecGPU          ! InitRecGPU
55  public ::  cudarec             ! Make the recursion on GPU
56 #endif
57  public :: CleanRecGPU            ! deallocate all pointers.
58 
59 
60 CONTAINS !===========================================================

m_hidecudarec/CleanRecGPU [ Functions ]

[ Top ] [ m_hidecudarec ] [ Functions ]

NAME

 CleanRecGPU

FUNCTION

  If there are devices availeble than the recGPU_type is initialized

INPUTS

  load=marks allocation of some arrays
  recgpu<type(devGPU_type)>=contains information of GPU

OUTPUT

 nptrec(ndevice)=number of points for recursion on GPU

PARENTS

      m_rec

CHILDREN

      unset_dev

SOURCE

416 subroutine CleanRecGPU(recgpu,load)
417 
418 
419 !This section has been created automatically by the script Abilint (TD).
420 !Do not modify the following lines by hand.
421 #undef ABI_FUNC
422 #define ABI_FUNC 'CleanRecGPU'
423 !End of the abilint section
424 
425  implicit none
426 
427 !Arguments ------------------------------------
428  integer,intent(in)  :: load
429  type(recGPU_type),intent(inout) :: recgpu
430 ! *************************************************************************
431 
432  recgpu%nptrec = 0
433 
434  if(associated(recgpu%map))  then
435    ABI_DEALLOCATE(recgpu%map)
436  end if
437  if(load==1)then
438    if(allocated(recgpu%par%displs)) then
439      ABI_DEALLOCATE(recgpu%par%displs)
440    end if
441    if(allocated(recgpu%par%vcount)) then
442      ABI_DEALLOCATE(recgpu%par%vcount)
443    end if
444  endif
445  call unset_dev()
446 
447 end subroutine CleanRecGPU

m_hidecudarec/cudarec [ Functions ]

[ Top ] [ m_hidecudarec ] [ Functions ]

NAME

 cudarec

FUNCTION

 Make recursion on a GPU device

INPUTS

OUTPUT

PARENTS

      first_rec,vtorhorec

CHILDREN

      unset_dev

SOURCE

309 #if defined HAVE_GPU_CUDA
310 
311 subroutine cudarec(rset,exppot,an,bn2,beta,trotter,tolrec,gratio,ngfft,max_rec)
312 
313 
314 !This section has been created automatically by the script Abilint (TD).
315 !Do not modify the following lines by hand.
316 #undef ABI_FUNC
317 #define ABI_FUNC 'cudarec'
318 !End of the abilint section
319 
320  implicit none
321 
322 !Arguments ------------------------------------
323  integer,intent(in)     :: trotter,gratio
324  real(dp),intent(in)    :: beta,tolrec
325  integer,intent(inout)  :: max_rec
326  type(recursion_type),intent(inout) :: rset
327  integer,intent(in)         :: ngfft(1:3)
328  real(dp), intent(in)       :: exppot(0:product(ngfft)-1)
329  real(cudap), intent(inout) :: an(0:rset%GPU%par%npt-1,0:rset%min_nrec)
330  real(cudap), intent(inout) :: bn2(0:rset%GPU%par%npt-1,0:rset%min_nrec)
331 !Local variables-------------------------------
332  ! character(len=500) :: msg
333  !integer  ::  maxpt,ipt,ii,jj,kk
334  real(dp) :: T_p(0:rset%nfftrec-1)
335  ! **integer***********************************************************************
336 
337 
338 !DEBUG
339 ! write (std_out,*) ' m_hidecudarec/cudarec : enter'
340 !ENDDEBUG
341 
342  call fourdp(1,rset%ZT_p,T_p,1,rset%mpi,rset%nfftrec,rset%ngfftrec,1,0)
343  T_p = (one/rset%nfftrec)*T_p
344 
345  if(.not.(rset%tronc)) then
346    call cuda_rec_cal(trotter,&
347      &               gratio,&
348      &               rset%GPU%par%npt,&
349      &               rset%min_nrec,&
350      &               rset%GPU%nptrec,&
351      &               max_rec,&
352      &               real(beta,cudap),&
353      &               real(rset%efermi,cudap),&
354      &               real(tolrec,cudap),&
355      &               real(rset%inf%ucvol,cudap),&
356      &               rset%GPU%par%pt0,&
357      &               rset%GPU%par%pt1,&
358      &               rset%ngfftrec(1:3),&
359      &               real(T_p,cudap),&
360      &               real(exppot,cudap),&
361      &               an,bn2)
362 
363  else
364 
365 
366    call cuda_rec_cal_cut(trotter,&
367      &                   gratio,&
368      &                   rset%GPU%par%npt,&
369      &                   rset%min_nrec,&
370      &                   rset%GPU%nptrec,&
371      &                   max_rec,&
372      &                   real(beta,cudap),&
373      &                   real(rset%efermi,cudap),&
374      &                   real(tolrec,cudap),&
375      &                   real(rset%inf%ucvol,cudap),&
376      &                   rset%GPU%par%pt0,&
377      &                   rset%GPU%par%pt1,&
378      &                   ngfft,&
379      &                   rset%ngfftrec(1:3),&
380      &                   real(T_p,cudap),&
381      &                   real(exppot,cudap),&
382      &                   an,bn2)
383 
384  endif
385 
386 !DEBUG
387 !write (std_out,*) ' m_hidecudarec/cudarec : exit'
388 !ENDDEBUG
389 
390 end subroutine cudarec

m_hidecudarec/InitRecGPU [ Functions ]

[ Top ] [ m_hidecudarec ] [ Functions ]

NAME

 InitRecGPU

FUNCTION

  If there are devices available then the recGPU_type is initialized

INPUTS

  rset<recusion_type>= contains information of recusion
  gpuinfo<devGPU_type>=contains information of GPU
  calc_type=if 0 takes the possible max for nptrec (to test the
  completly full graphic card). 1 after test to calculate the min
  possible value for nptrec

OUTPUT

  recgpuinfo<recGPU_type>=contains information of recursion with GPU

PARENTS

      m_rec

CHILDREN

      unset_dev

SOURCE

203 #if defined HAVE_GPU_CUDA
204 
205 subroutine InitRecGPU(rset,nfft,gratio,gpudevice,calc_type)
206 
207 
208 !This section has been created automatically by the script Abilint (TD).
209 !Do not modify the following lines by hand.
210 #undef ABI_FUNC
211 #define ABI_FUNC 'InitRecGPU'
212 !End of the abilint section
213 
214  implicit none
215 
216 !Arguments ------------------------------------
217  integer,intent(in) :: nfft,gpudevice
218  integer,intent(in) :: gratio
219  integer,intent(in) :: calc_type
220  type(recursion_type),intent(inout) :: rset
221 !Local variables-------------------------------
222  integer :: pos_size,resto,nfftc
223 ! real(dp) ::
224  type(devGPU_type) :: gpuinfo
225  character(len=500) :: msg
226 ! *************************************************************************
227  nfftc = nfft/(gratio**3)
228  pos_size = 1
229  rset%gpudevice = gpudevice
230 
231 
232  call InitGPU(gpuinfo,gpudevice)
233  !-- look if it is possible to set devices CUDA compatible
234  call set_dev(gpudevice)
235  if(gpudevice>-1)then
236    !--Take the approximate use of memory to compute the number of points on any GPU
237    if(rset%tronc)then
238      !for CUDA version <3.0 :
239      !      pos_size = (.90d0*real(gpuinfo%maxmemdev(0))/real(cudap)&
240      !        &           -real(nfft+4*rset%nfftrec))/real((4*rset%nfftrec+15+2))
241      ! for CUDA version 3.0 with batched FFT:
242     pos_size = (.50d0*real(gpuinfo%maxmemdev(0))/real(cudap)&
243       &           -real(nfft+2*rset%nfftrec))/real((6*rset%nfftrec+15+2))
244 
245 
246      else
247        !for CUDA version <3.0 :
248        !       pos_size = (.90d0*real(gpuinfo%maxmemdev(0))/real(cudap)&
249        !         &           -real(5*rset%nfftrec))/real((3*rset%nfftrec+15)+2)
250        ! for CUDA version 3.0 with batched FFT:
251       pos_size = (.5d0*real(gpuinfo%maxmemdev(0))/real(cudap)-real(3&
252         &*rset%nfftrec))/real((5*rset%nfftrec)+15+2)
253 
254    endif
255    !--The nbr of points has to be bigger than 1 and smaller than
256    !  rset%par%npt (which is the number of points given to any proc to compute
257    !  it is smaller than nfftrec)
258    pos_size = min(pos_size,nfftc)
259 
260    !--if production and not timing test
261    if(calc_type==1) pos_size = min(pos_size,rset%GPU%par%npt)
262 
263    if(pos_size<1) then
264      write(msg,'(a)')' ERROR NO SUFFICENT MEMORY ON DEVICE'
265      call wrtout(std_out,msg,'PERS')
266    end if
267 
268 
269    !--For GPU calculation it is better to have a number of point
270    !  proportional to the half-warp size (16)
271 
272    if(pos_size >16 )then
273      resto = mod(pos_size,16)
274      if(resto /=0) then
275        pos_size = pos_size-resto
276        if(pos_size<nfftc) pos_size = pos_size+16
277      endif
278    endif
279 
280    rset%GPU%nptrec = pos_size
281    if(rset%mpi%me==0) then
282      call prt_mem_usage(pos_size,rset%nfftrec)
283    end if
284  endif
285  call CleanGPU(gpuinfo)
286 
287 end subroutine InitRecGPU

m_hidecudarec/InitRecGPU_0 [ Functions ]

[ Top ] [ m_hidecudarec ] [ Functions ]

NAME

 InitRecGPU_0

FUNCTION

  recGPU_type is initialized

INPUTS

 mpi_ab=MPI information

OUTPUT

 recgpu=initialisation of GPU variables for recursion

PARENTS

      m_rec

CHILDREN

      unset_dev

SOURCE

151 #if defined HAVE_GPU_CUDA
152 
153 subroutine InitRecGPU_0(recgpu,mpi_ab)
154 
155 
156 !This section has been created automatically by the script Abilint (TD).
157 !Do not modify the following lines by hand.
158 #undef ABI_FUNC
159 #define ABI_FUNC 'InitRecGPU_0'
160 !End of the abilint section
161 
162  implicit none
163 
164 !Arguments ------------------------------------
165  type(MPI_type),intent(in) :: mpi_ab
166  type(recGPU_type),intent(inout) :: recgpu
167 !Local variables-------------------------------
168 ! *************************************************************************
169  recgpu%nptrec = 0
170  nullify(recgpu%map)
171  ABI_ALLOCATE(recgpu%map,(0:mpi_ab%nproc-1))
172  recgpu%map = -1       !--Initial guess no gpu
173 
174 end subroutine InitRecGPU_0

m_initcuda/prt_mem_usage [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 prt_mem_usage

FUNCTION

 Print information about allocation on GPU device during recursion

INPUTS

 nptrec=number of vectors allocated on device
 nfft=size of the grid (and so of a vector)

PARENTS

      m_hidecudarec

CHILDREN

      unset_dev

SOURCE

 81 #if defined HAVE_GPU_CUDA
 82 subroutine prt_mem_usage(nptrec,nfft)
 83 
 84 
 85 !This section has been created automatically by the script Abilint (TD).
 86 !Do not modify the following lines by hand.
 87 #undef ABI_FUNC
 88 #define ABI_FUNC 'prt_mem_usage'
 89 !End of the abilint section
 90 
 91   implicit none
 92 !Arguments ------------------------------------
 93   integer,intent(in) :: nptrec,nfft
 94 !Local ---------------------------
 95   integer :: ii
 96   integer(kind=i4b) :: largeur,clargeur
 97   real(dp):: totmem,rpart
 98   character(500) :: msg
 99 ! *********************************************************************
100 
101   largeur  = cudap*nfft
102   clargeur = cudap*nfft*2
103   !for CUDA version <3.0 :
104   !   rpart = 3.d0*real(largeur,dp)/1048576.d0*real(nptrec,dp)
105   !   totmem = rpart+real(2*clargeur+largeur+(2*cudap+i2b)*nptrec,dp)/1048576.d0
106   !for CUDA version 3.0 :
107     rpart = 6.d0*real(largeur,dp)/1048576.d0*real(nptrec,dp)
108     totmem = rpart+real(clargeur+largeur+(2*cudap+i2b)*nptrec,dp)/1048576.d0
109 
110   write(msg,'(a,80a)')' ',('_',ii=1,80)
111   call wrtout(std_out,msg,'COLL')
112   write(msg,'(a18,a44,a18,a)')'_________________',&
113 &  '  Allocated Memory on Device for Recursion ','___________________' ,ch10
114   call wrtout(std_out,msg,'COLL')
115 
116   write (msg,'(2(a32,i10,a),2(a32,i10,a6,a),2(a32,f10.2,a7,a))')&
117     & '   Number of Points            ',nfft  ,ch10, &
118     & '   Number of Vectors           ',nptrec,ch10, &
119     & '   Size Real Vectors           ',largeur ,'bytes',ch10, &
120     & '   Size Complex Vectors        ',clargeur,'bytes',ch10, &
121     & '   Size Matrix of Vectors      ',real(largeur*nptrec,dp)/1048576.d0,'Mbytes',ch10, &
122     & '   Allocated Memory on GPU     ',totmem,'Mbytes',ch10
123   call wrtout(std_out,msg,'COLL')
124   write(msg,'(a,80a)')' ',('_',ii=1,80)
125   call wrtout(std_out,msg,'COLL')
126 end subroutine prt_mem_usage
127 
128 #endif