TABLE OF CONTENTS


ABINIT/m_initcuda [ Modules ]

[ Top ] [ Modules ]

NAME

 m_initcuda

FUNCTION

  Module containing all variables concerning GPU device
  and the functions needed to extract them

COPYRIGHT

  Copyright (C) 2009-2022 ABINIT group (MMancini, MT, FDahm)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

NOTES

  Is an experimental development

SOURCE

20 #if defined HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23 
24 #if defined HAVE_GPU_CUDA
25 #include "cuda_common.h"
26 #endif
27 
28 #include "abi_common.h"
29 
30 module m_initcuda
31 
32  use defs_basis
33  use m_abicore
34  use m_xmpi, only: xmpi_world,xmpi_comm_rank,xmpi_comm_size,xmpi_abort
35 
36  implicit none
37 
38 #if defined HAVE_GPU_CUDA
39  integer,parameter,public :: cudap=kind(CUDA_KIND)
40 #endif
41 
42 !Structures

m_initcuda/CleanGPU [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 CleanGPU

FUNCTION

 Print information about GPU device

SOURCE

248  subroutine CleanGPU(gpuinfo)
249 
250  implicit none
251 
252 !Arguments ------------------------------------
253 !scalars
254  type(devGPU_type),intent(inout) :: gpuinfo
255 ! *********************************************************************
256 #if defined HAVE_GPU_CUDA
257  if (allocated(gpuinfo%maxmemdev))  then
258    ABI_FREE(gpuinfo%maxmemdev)
259  end if
260 #endif
261 
262  end subroutine CleanGPU

m_initcuda/devGPU_type [ Types ]

[ Top ] [ m_initcuda ] [ Types ]

NAME

 devGPU_type

FUNCTION

 This structured datatype used to contains GPU properties

SOURCE

54  type,public :: devGPU_type
55   integer :: ndevice  !--number of available devices
56   real(dp),allocatable :: maxmemdev(:)  !--max global memory on any device
57  end type devGPU_type

m_initcuda/get_fastest_devices [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 get_fastest_devices

FUNCTION

 In case of multiple devices, sort them by performances
 and output the resulting list of devices.

SOURCE

399  subroutine get_fastest_devices(devices,nb_devices)
400 
401  implicit none
402 
403 !Arguments ------------------------------------
404 !scalars
405  integer,intent(in) :: nb_devices
406  integer,intent(out) :: devices(:)
407 !Local variables ------------------------------
408 !scalars
409  integer :: ii,nproc
410  character(len=500) :: msg
411 #if defined HAVE_GPU_CUDA
412  integer :: constmem,gflops,jj,lenname,nprocs,ncores,regist,sharemem
413  real(sp) :: clockRate,globalmem
414  character(len=20) :: name
415 #endif
416 !arrays
417 #if defined HAVE_GPU_CUDA
418  integer :: vers(0:1)
419  integer,allocatable :: isort(:)
420  real(dp),allocatable :: flops(:),mem(:)
421 #endif
422 
423 ! *********************************************************************
424 
425  nproc=xmpi_comm_size(xmpi_world)
426  if (size(devices)/=nproc) stop 'wrong size for devices array!'
427 
428 !Default
429  do ii=0,nproc-1
430    devices(ii+1) = MOD(ii,nb_devices)
431  end do
432  if (nb_devices==1) return
433 
434  write(msg,'(a,i2,a)') ch10,nb_devices,' GPU device(s) have been detected on the current node:'
435  call wrtout(std_out,msg,'PERS')
436 
437 #if defined HAVE_GPU_CUDA
438 !Check device(s) properties
439  ABI_MALLOC(flops,(nb_devices))
440  ABI_MALLOC(mem,  (nb_devices))
441  do ii=0,nb_devices-1
442    call set_dev(ii)
443    call get_dev_info(ii,name,lenname,vers,globalmem,clockRate,gflops,constmem,&
444 &                    sharemem,regist,nprocs,ncores)
445    flops(ii+1)=dble(gflops) ; mem(ii+1)=dble(globalmem)
446    call unset_dev()
447    write(msg,'(a,i2,3a,i1,a,i1,a,i6,a,f7.1,a,i7,a,i4,a,i4,a)') &
448 &   '  Device ',ii,': ',trim(name(1:lenname)),', v',vers(0),'.',vers(1),', Mem=',nint(globalmem),&
449 &   ' Mbytes, Clock=',clockrate,' GHz, ',gflops,' GFLOPS, ',nprocs,' processors, ',ncores,' cores'
450    call wrtout(std_out,msg,'PERS')
451  end do
452 
453 !Sort devices (first by flops, then by memory)
454  ABI_MALLOC(isort,(nb_devices))
455  isort(:)=(/(ii,ii=1,nb_devices)/)
456  call my_sort(flops,mem,isort)
457 
458 !Distribute cards among procs
459  do ii=0,nproc-1
460    jj=MOD(ii,nb_devices)
461    devices(ii+1) = isort(jj+1)-1
462  end do
463 
464  ABI_FREE(isort)
465  ABI_FREE(flops)
466  ABI_FREE(mem)
467 #endif
468 
469 contains

m_initcuda/Get_Mem_Dev [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 Get_Mem_Dev

FUNCTION

 Get the max memory availeble on device

INPUTS

 device  device number

OUTPUT

 max_mem_dev

SOURCE

222 subroutine Get_Mem_Dev(device,max_mem_dev)
223 
224  implicit none
225 
226 !Arguments ------------------------------------
227 !scalars
228  integer,intent(in) :: device
229  real(sp),intent(out) :: max_mem_dev
230 !Local variables ------------------------------
231 ! *********************************************************************
232 #if defined HAVE_GPU_CUDA
233  call get_GPU_max_mem(device,max_mem_dev)
234 #endif
235 end subroutine Get_Mem_Dev

m_initcuda/InitGPU [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 InitGPU

FUNCTION

 Print information about GPU device

SOURCE

154  subroutine InitGPU(gpuinfo,device)
155 
156  implicit none
157 
158 !Arguments ------------------------------------
159 !scalars
160  integer,intent(in)              :: device
161  type(devGPU_type),intent(inout) :: gpuinfo
162 !Local variables ------------------------------
163 !scalars
164  real(sp) :: locmax
165 ! *********************************************************************
166  gpuinfo%ndevice = 0
167 #if defined HAVE_GPU_CUDA
168 !--Initialization
169  if(device>-1)then
170    !--Get the number of device for this proc
171    gpuinfo%ndevice = 1
172    ABI_MALLOC(gpuinfo%maxmemdev,(0:1))
173    call get_GPU_max_mem(device,locmax)
174    gpuinfo%maxmemdev(0:1) = locmax
175    call  prt_device_info(device)
176  endif
177 #endif
178  end subroutine InitGPU

m_initcuda/my_sort [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 my_sort

FUNCTION

  Small sorting routine: change iperm array
  according to list1 values then list2 values

SOURCE

482  subroutine my_sort(list1,list2,iperm)
483 
484  implicit none
485 
486 !Arguments ------------------------------------
487 !scalars
488  integer,intent(inout) :: iperm(:)
489  real(dp),intent(in) :: list1(:),list2(:)
490 !Local variables ------------------------------
491 !scalars
492  integer :: ll,mm,nn,pp
493  real(dp) :: xx
494 !arrays
495  real(dp),allocatable :: llist(:)
496 
497 ! *********************************************************************
498 
499  nn=size(iperm)
500  ABI_MALLOC(llist,(nn))
501  llist(:)=list1(:)
502  do ll=1,nn-1
503    do mm=ll+1,nn
504      if (llist(mm)>llist(ll)) then
505        xx=llist(ll);llist(ll)=llist(mm);llist(mm)=xx
506        pp=iperm(ll);iperm(ll)=iperm(mm);iperm(mm)=pp
507      end if
508    end do
509  end do
510  do ll=1,nn-1
511    do mm=ll+1,nn
512      if (abs(llist(mm)-llist(ll))<tol8) then
513        if (list2(iperm(mm))>list2(iperm(ll))) then
514          xx=llist(ll);llist(ll)=llist(mm);llist(mm)=xx
515          pp=iperm(ll);iperm(ll)=iperm(mm);iperm(mm)=pp
516        end if
517      end if
518    end do
519  end do
520  ABI_FREE(llist)
521 
522  end subroutine my_sort

m_initcuda/prt_device_info [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 prt_device_info

FUNCTION

 Print information about GPU device

SOURCE

 88  subroutine prt_device_info(device)
 89 
 90   implicit none
 91 !Arguments ------------------------------------
 92 !scalars
 93  integer,intent(in) :: device
 94 !Local variables ------------------------------
 95 !scalars
 96  integer :: gflops,constmem,sharemem
 97  integer :: ii,regist,lenname,ncores,nprocs
 98  real(sp) :: globalmem,clockRate
 99  character(20)  :: name
100  character(20)  :: formatdev
101  character(60)  :: gflops_stg
102  character(500) :: msg
103 !arrays
104  integer :: vers(0:1)
105 ! *********************************************************************
106 #if defined HAVE_GPU_CUDA
107  write(msg,'(a,80a)')' ',('_',ii=1,80)
108  call wrtout(std_out,msg,'PERS')
109  write(msg,'(a25,a25,a31,a)')  '________________________',&
110 &  ' Graphic Card Properties ','_______________________________' ,ch10
111  call wrtout(std_out,msg,'PERS')
112 
113  call get_dev_info(device,name,lenname,vers,globalmem,clockRate,gflops,constmem,sharemem,regist,nprocs,ncores)
114  if (gflops<0) then
115    gflops_stg="undefined (add new def. in version_2_cores function)"
116  else
117    write(gflops_stg,'(i7,a)') gflops,' GFP'
118  end if
119 
120  write(formatdev,'(a12,i4,a)') '(a23,i4,a3,a',lenname,')'
121  write (msg,formatdev)&
122        & '  Device             ',device,' : ',name(1:lenname)
123  call wrtout(std_out,msg,'PERS')
124  write (msg,'(a,2(i1,a),a,i6,a,a,a,f7.1,a,a,a,i4,a,i4,4a,2(a,i7,2a),a,i7,a)')&
125        & ' Revision number:                   ',vers(0),'.',vers(1),ch10, &
126        & ' Total amount of global memory: ',nint(globalmem),' Mbytes',ch10, &
127        & ' Clock rate:                    ',clockRate,' GHz',ch10, &
128        & ' Number of processors/cores:    ',nprocs,'/',ncores,ch10, &
129        & ' Max GFLOPS:                    ',trim(gflops_stg),ch10, &
130        & ' Total  constant memory:        ',constmem,' bytes',ch10, &
131        & ' Shared memory per block:       ',sharemem,' bytes',ch10, &
132        & ' Number of registers per block: ',regist,ch10
133  call wrtout(std_out,msg,'PERS')
134  if(device == -1)then
135    write(msg,'(a)')' no cuda-GPU devices found'
136    call wrtout(std_out,msg,'PERS')
137  end if
138  write(msg,'(a,80a)')' ',('_',ii=1,80)
139  call wrtout(std_out,msg,'PERS')
140 #endif
141  end subroutine prt_device_info

m_initcuda/setdevice_cuda [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 setdevice_cuda

FUNCTION

 Detect and activate a GPU device from current CPU core

INPUTS

  gpu_devices(5)= list of GPU devices to choose on one node (in case of multiple devices);
                  if set to 5*-1, will choose the devices by order of performances.

SIDE EFFECTS

  use_gpu_cuda= 1 if CUDA is on; will be set to 0 if no GPU device is free.

SOURCE

282  subroutine setdevice_cuda(gpu_devices_node,use_gpu_cuda)
283 
284 #ifdef FC_NAG
285  use f90_unix_proc
286 #endif
287  implicit none
288 
289 !Arguments ------------------------------------
290 !scalars
291  integer,intent(inout) :: use_gpu_cuda
292 !arrays
293  integer, intent(in) :: gpu_devices_node(5)
294 !Local variables ------------------------------
295 !scalars
296  integer :: device,ii,jj,me,nb_devices,nproc
297  logical :: testopen
298  character(len=500) :: msg
299  type(devGPU_type) :: gpuinfo
300 !arrays
301  integer,allocatable :: fastest_devices(:)
302 ! *********************************************************************
303 
304  if (use_gpu_cuda==0) return
305 
306  nproc=xmpi_comm_size(xmpi_world)
307  me=xmpi_comm_rank(xmpi_world)
308 
309 #if defined HAVE_GPU_CUDA
310  device=-1
311  call c_get_ndevice(nb_devices)
312  nb_devices=min(nb_devices,5)
313  if(nb_devices>0) then
314    if(nb_devices==1) then
315      device=0
316    else if(all(gpu_devices_node(1:nb_devices)==-1)) then
317      ABI_MALLOC(fastest_devices,(0:nproc-1))
318      call get_fastest_devices(fastest_devices,nb_devices)
319      device=fastest_devices(me)
320      ABI_FREE(fastest_devices)
321    else
322      jj=nb_devices
323      do ii=jj,2,-1
324        if(gpu_devices_node(ii)==-1) nb_devices=ii-1
325      end do
326      device=gpu_devices_node(1+mod(me,nb_devices))
327    end if
328    call set_dev(device)
329    call check_context(nb_devices,msg)
330    if(nb_devices==1) then !allocation succeed
331      write(msg, '(4a,i1,2a)' ) ch10,&
332 &     ' setdevice_cuda : COMMENT -',ch10,&
333 &     '  GPU ',device,' has been properly initialized, continuing...',ch10
334      call wrtout(std_out,msg,'PERS')
335    else !gpu allocation failed we print error message returned and exit
336      device=-1
337      call wrtout(std_out,msg,'COLL')
338      call xmpi_abort()
339      inquire(std_out,OPENED=testopen)
340      if (testopen) close(std_out)
341 #if defined FC_NAG
342      call exit(-1)
343 #elif defined HAVE_FC_EXIT
344      call exit(1)
345 #else
346       stop 1
347 #endif
348    end if
349    call InitGPU(gpuinfo,device)
350    call CleanGPU(gpuinfo)
351  else
352    use_gpu_cuda=0
353  end if
354 #endif
355  end subroutine setdevice_cuda

m_initcuda/unsetdevice_cuda [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 unsetdevice_cuda

FUNCTION

 Deactivate a GPU device from current CPU core

SOURCE

368  subroutine unsetdevice_cuda(use_gpu_cuda)
369 
370  implicit none
371 
372 !Arguments ------------------------------------
373 !scalars
374  integer,intent(in) :: use_gpu_cuda
375 !Local variables ------------------------------
376 !scalars
377  character(len=500) :: msg
378 ! *********************************************************************
379 
380  if (use_gpu_cuda==0) return
381 
382 #if defined HAVE_GPU_CUDA
383  call unset_dev()
384 #endif
385  end subroutine unsetdevice_cuda