TABLE OF CONTENTS
- ABINIT/m_initcuda
- m_initcuda/CleanGPU
- m_initcuda/devGPU_type
- m_initcuda/get_fastest_devices
- m_initcuda/Get_Mem_Dev
- m_initcuda/InitGPU
- m_initcuda/my_sort
- m_initcuda/prt_device_info
- m_initcuda/setdevice_cuda
- m_initcuda/unsetdevice_cuda
ABINIT/m_initcuda [ Modules ]
NAME
m_initcuda
FUNCTION
Module containing all variables concerning GPU device and the functions needed to extract them
COPYRIGHT
Copyright (C) 2009-2022 ABINIT group (MMancini, MT, FDahm) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
NOTES
Is an experimental development
SOURCE
20 #if defined HAVE_CONFIG_H 21 #include "config.h" 22 #endif 23 24 #if defined HAVE_GPU_CUDA 25 #include "cuda_common.h" 26 #endif 27 28 #include "abi_common.h" 29 30 module m_initcuda 31 32 use defs_basis 33 use m_abicore 34 use m_xmpi, only: xmpi_world,xmpi_comm_rank,xmpi_comm_size,xmpi_abort 35 36 implicit none 37 38 #if defined HAVE_GPU_CUDA 39 integer,parameter,public :: cudap=kind(CUDA_KIND) 40 #endif 41 42 !Structures
m_initcuda/CleanGPU [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
CleanGPU
FUNCTION
Print information about GPU device
SOURCE
248 subroutine CleanGPU(gpuinfo) 249 250 implicit none 251 252 !Arguments ------------------------------------ 253 !scalars 254 type(devGPU_type),intent(inout) :: gpuinfo 255 ! ********************************************************************* 256 #if defined HAVE_GPU_CUDA 257 if (allocated(gpuinfo%maxmemdev)) then 258 ABI_FREE(gpuinfo%maxmemdev) 259 end if 260 #endif 261 262 end subroutine CleanGPU
m_initcuda/devGPU_type [ Types ]
[ Top ] [ m_initcuda ] [ Types ]
NAME
devGPU_type
FUNCTION
This structured datatype used to contains GPU properties
SOURCE
54 type,public :: devGPU_type 55 integer :: ndevice !--number of available devices 56 real(dp),allocatable :: maxmemdev(:) !--max global memory on any device 57 end type devGPU_type
m_initcuda/get_fastest_devices [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
get_fastest_devices
FUNCTION
In case of multiple devices, sort them by performances and output the resulting list of devices.
SOURCE
399 subroutine get_fastest_devices(devices,nb_devices) 400 401 implicit none 402 403 !Arguments ------------------------------------ 404 !scalars 405 integer,intent(in) :: nb_devices 406 integer,intent(out) :: devices(:) 407 !Local variables ------------------------------ 408 !scalars 409 integer :: ii,nproc 410 character(len=500) :: msg 411 #if defined HAVE_GPU_CUDA 412 integer :: constmem,gflops,jj,lenname,nprocs,ncores,regist,sharemem 413 real(sp) :: clockRate,globalmem 414 character(len=20) :: name 415 #endif 416 !arrays 417 #if defined HAVE_GPU_CUDA 418 integer :: vers(0:1) 419 integer,allocatable :: isort(:) 420 real(dp),allocatable :: flops(:),mem(:) 421 #endif 422 423 ! ********************************************************************* 424 425 nproc=xmpi_comm_size(xmpi_world) 426 if (size(devices)/=nproc) stop 'wrong size for devices array!' 427 428 !Default 429 do ii=0,nproc-1 430 devices(ii+1) = MOD(ii,nb_devices) 431 end do 432 if (nb_devices==1) return 433 434 write(msg,'(a,i2,a)') ch10,nb_devices,' GPU device(s) have been detected on the current node:' 435 call wrtout(std_out,msg,'PERS') 436 437 #if defined HAVE_GPU_CUDA 438 !Check device(s) properties 439 ABI_MALLOC(flops,(nb_devices)) 440 ABI_MALLOC(mem, (nb_devices)) 441 do ii=0,nb_devices-1 442 call set_dev(ii) 443 call get_dev_info(ii,name,lenname,vers,globalmem,clockRate,gflops,constmem,& 444 & sharemem,regist,nprocs,ncores) 445 flops(ii+1)=dble(gflops) ; mem(ii+1)=dble(globalmem) 446 call unset_dev() 447 write(msg,'(a,i2,3a,i1,a,i1,a,i6,a,f7.1,a,i7,a,i4,a,i4,a)') & 448 & ' Device ',ii,': ',trim(name(1:lenname)),', v',vers(0),'.',vers(1),', Mem=',nint(globalmem),& 449 & ' Mbytes, Clock=',clockrate,' GHz, ',gflops,' GFLOPS, ',nprocs,' processors, ',ncores,' cores' 450 call wrtout(std_out,msg,'PERS') 451 end do 452 453 !Sort devices (first by flops, then by memory) 454 ABI_MALLOC(isort,(nb_devices)) 455 isort(:)=(/(ii,ii=1,nb_devices)/) 456 call my_sort(flops,mem,isort) 457 458 !Distribute cards among procs 459 do ii=0,nproc-1 460 jj=MOD(ii,nb_devices) 461 devices(ii+1) = isort(jj+1)-1 462 end do 463 464 ABI_FREE(isort) 465 ABI_FREE(flops) 466 ABI_FREE(mem) 467 #endif 468 469 contains
m_initcuda/Get_Mem_Dev [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
Get_Mem_Dev
FUNCTION
Get the max memory availeble on device
INPUTS
device device number
OUTPUT
max_mem_dev
SOURCE
222 subroutine Get_Mem_Dev(device,max_mem_dev) 223 224 implicit none 225 226 !Arguments ------------------------------------ 227 !scalars 228 integer,intent(in) :: device 229 real(sp),intent(out) :: max_mem_dev 230 !Local variables ------------------------------ 231 ! ********************************************************************* 232 #if defined HAVE_GPU_CUDA 233 call get_GPU_max_mem(device,max_mem_dev) 234 #endif 235 end subroutine Get_Mem_Dev
m_initcuda/InitGPU [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
InitGPU
FUNCTION
Print information about GPU device
SOURCE
154 subroutine InitGPU(gpuinfo,device) 155 156 implicit none 157 158 !Arguments ------------------------------------ 159 !scalars 160 integer,intent(in) :: device 161 type(devGPU_type),intent(inout) :: gpuinfo 162 !Local variables ------------------------------ 163 !scalars 164 real(sp) :: locmax 165 ! ********************************************************************* 166 gpuinfo%ndevice = 0 167 #if defined HAVE_GPU_CUDA 168 !--Initialization 169 if(device>-1)then 170 !--Get the number of device for this proc 171 gpuinfo%ndevice = 1 172 ABI_MALLOC(gpuinfo%maxmemdev,(0:1)) 173 call get_GPU_max_mem(device,locmax) 174 gpuinfo%maxmemdev(0:1) = locmax 175 call prt_device_info(device) 176 endif 177 #endif 178 end subroutine InitGPU
m_initcuda/my_sort [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
my_sort
FUNCTION
Small sorting routine: change iperm array according to list1 values then list2 values
SOURCE
482 subroutine my_sort(list1,list2,iperm) 483 484 implicit none 485 486 !Arguments ------------------------------------ 487 !scalars 488 integer,intent(inout) :: iperm(:) 489 real(dp),intent(in) :: list1(:),list2(:) 490 !Local variables ------------------------------ 491 !scalars 492 integer :: ll,mm,nn,pp 493 real(dp) :: xx 494 !arrays 495 real(dp),allocatable :: llist(:) 496 497 ! ********************************************************************* 498 499 nn=size(iperm) 500 ABI_MALLOC(llist,(nn)) 501 llist(:)=list1(:) 502 do ll=1,nn-1 503 do mm=ll+1,nn 504 if (llist(mm)>llist(ll)) then 505 xx=llist(ll);llist(ll)=llist(mm);llist(mm)=xx 506 pp=iperm(ll);iperm(ll)=iperm(mm);iperm(mm)=pp 507 end if 508 end do 509 end do 510 do ll=1,nn-1 511 do mm=ll+1,nn 512 if (abs(llist(mm)-llist(ll))<tol8) then 513 if (list2(iperm(mm))>list2(iperm(ll))) then 514 xx=llist(ll);llist(ll)=llist(mm);llist(mm)=xx 515 pp=iperm(ll);iperm(ll)=iperm(mm);iperm(mm)=pp 516 end if 517 end if 518 end do 519 end do 520 ABI_FREE(llist) 521 522 end subroutine my_sort
m_initcuda/prt_device_info [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
prt_device_info
FUNCTION
Print information about GPU device
SOURCE
88 subroutine prt_device_info(device) 89 90 implicit none 91 !Arguments ------------------------------------ 92 !scalars 93 integer,intent(in) :: device 94 !Local variables ------------------------------ 95 !scalars 96 integer :: gflops,constmem,sharemem 97 integer :: ii,regist,lenname,ncores,nprocs 98 real(sp) :: globalmem,clockRate 99 character(20) :: name 100 character(20) :: formatdev 101 character(60) :: gflops_stg 102 character(500) :: msg 103 !arrays 104 integer :: vers(0:1) 105 ! ********************************************************************* 106 #if defined HAVE_GPU_CUDA 107 write(msg,'(a,80a)')' ',('_',ii=1,80) 108 call wrtout(std_out,msg,'PERS') 109 write(msg,'(a25,a25,a31,a)') '________________________',& 110 & ' Graphic Card Properties ','_______________________________' ,ch10 111 call wrtout(std_out,msg,'PERS') 112 113 call get_dev_info(device,name,lenname,vers,globalmem,clockRate,gflops,constmem,sharemem,regist,nprocs,ncores) 114 if (gflops<0) then 115 gflops_stg="undefined (add new def. in version_2_cores function)" 116 else 117 write(gflops_stg,'(i7,a)') gflops,' GFP' 118 end if 119 120 write(formatdev,'(a12,i4,a)') '(a23,i4,a3,a',lenname,')' 121 write (msg,formatdev)& 122 & ' Device ',device,' : ',name(1:lenname) 123 call wrtout(std_out,msg,'PERS') 124 write (msg,'(a,2(i1,a),a,i6,a,a,a,f7.1,a,a,a,i4,a,i4,4a,2(a,i7,2a),a,i7,a)')& 125 & ' Revision number: ',vers(0),'.',vers(1),ch10, & 126 & ' Total amount of global memory: ',nint(globalmem),' Mbytes',ch10, & 127 & ' Clock rate: ',clockRate,' GHz',ch10, & 128 & ' Number of processors/cores: ',nprocs,'/',ncores,ch10, & 129 & ' Max GFLOPS: ',trim(gflops_stg),ch10, & 130 & ' Total constant memory: ',constmem,' bytes',ch10, & 131 & ' Shared memory per block: ',sharemem,' bytes',ch10, & 132 & ' Number of registers per block: ',regist,ch10 133 call wrtout(std_out,msg,'PERS') 134 if(device == -1)then 135 write(msg,'(a)')' no cuda-GPU devices found' 136 call wrtout(std_out,msg,'PERS') 137 end if 138 write(msg,'(a,80a)')' ',('_',ii=1,80) 139 call wrtout(std_out,msg,'PERS') 140 #endif 141 end subroutine prt_device_info
m_initcuda/setdevice_cuda [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
setdevice_cuda
FUNCTION
Detect and activate a GPU device from current CPU core
INPUTS
gpu_devices(5)= list of GPU devices to choose on one node (in case of multiple devices); if set to 5*-1, will choose the devices by order of performances.
SIDE EFFECTS
use_gpu_cuda= 1 if CUDA is on; will be set to 0 if no GPU device is free.
SOURCE
282 subroutine setdevice_cuda(gpu_devices_node,use_gpu_cuda) 283 284 #ifdef FC_NAG 285 use f90_unix_proc 286 #endif 287 implicit none 288 289 !Arguments ------------------------------------ 290 !scalars 291 integer,intent(inout) :: use_gpu_cuda 292 !arrays 293 integer, intent(in) :: gpu_devices_node(5) 294 !Local variables ------------------------------ 295 !scalars 296 integer :: device,ii,jj,me,nb_devices,nproc 297 logical :: testopen 298 character(len=500) :: msg 299 type(devGPU_type) :: gpuinfo 300 !arrays 301 integer,allocatable :: fastest_devices(:) 302 ! ********************************************************************* 303 304 if (use_gpu_cuda==0) return 305 306 nproc=xmpi_comm_size(xmpi_world) 307 me=xmpi_comm_rank(xmpi_world) 308 309 #if defined HAVE_GPU_CUDA 310 device=-1 311 call c_get_ndevice(nb_devices) 312 nb_devices=min(nb_devices,5) 313 if(nb_devices>0) then 314 if(nb_devices==1) then 315 device=0 316 else if(all(gpu_devices_node(1:nb_devices)==-1)) then 317 ABI_MALLOC(fastest_devices,(0:nproc-1)) 318 call get_fastest_devices(fastest_devices,nb_devices) 319 device=fastest_devices(me) 320 ABI_FREE(fastest_devices) 321 else 322 jj=nb_devices 323 do ii=jj,2,-1 324 if(gpu_devices_node(ii)==-1) nb_devices=ii-1 325 end do 326 device=gpu_devices_node(1+mod(me,nb_devices)) 327 end if 328 call set_dev(device) 329 call check_context(nb_devices,msg) 330 if(nb_devices==1) then !allocation succeed 331 write(msg, '(4a,i1,2a)' ) ch10,& 332 & ' setdevice_cuda : COMMENT -',ch10,& 333 & ' GPU ',device,' has been properly initialized, continuing...',ch10 334 call wrtout(std_out,msg,'PERS') 335 else !gpu allocation failed we print error message returned and exit 336 device=-1 337 call wrtout(std_out,msg,'COLL') 338 call xmpi_abort() 339 inquire(std_out,OPENED=testopen) 340 if (testopen) close(std_out) 341 #if defined FC_NAG 342 call exit(-1) 343 #elif defined HAVE_FC_EXIT 344 call exit(1) 345 #else 346 stop 1 347 #endif 348 end if 349 call InitGPU(gpuinfo,device) 350 call CleanGPU(gpuinfo) 351 else 352 use_gpu_cuda=0 353 end if 354 #endif 355 end subroutine setdevice_cuda
m_initcuda/unsetdevice_cuda [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
unsetdevice_cuda
FUNCTION
Deactivate a GPU device from current CPU core
SOURCE
368 subroutine unsetdevice_cuda(use_gpu_cuda) 369 370 implicit none 371 372 !Arguments ------------------------------------ 373 !scalars 374 integer,intent(in) :: use_gpu_cuda 375 !Local variables ------------------------------ 376 !scalars 377 character(len=500) :: msg 378 ! ********************************************************************* 379 380 if (use_gpu_cuda==0) return 381 382 #if defined HAVE_GPU_CUDA 383 call unset_dev() 384 #endif 385 end subroutine unsetdevice_cuda