TABLE OF CONTENTS


ABINIT/m_abi_gpu_linalg [ Modules ]

[ Top ] [ Modules ]

NAME

  m_abi_gpu_linalg

FUNCTION

  Interfaces of GPU subroutines wrapper

COPYRIGHT

  Copyright (C) 2011-2018 ABINIT group (FDahm ))
  This file is distributed under the terms of the
  GNU General Public License, see ~ABINIT/Infos/copyright
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE


m_abi_gpu_linalg/alloc_on_gpu [ Functions ]

[ Top ] [ m_abi_gpu_linalg ] [ Functions ]

NAME

  alloc_on_gpu

FUNCTION

  Allocate size byte in gpu memory and returns in gpu_ptr this location

INPUTS

  size= size in byte to allocate

OUTPUT

  gpu_ptr= C_PTR on gpu memory location that has been allocated

SIDE EFFECTS

   WARNING! : this routine is a dummy one when HAVE_GPU_CUDA is not enabled
   the correct one is in 15_gpu_toolbox/dev_spec.cu

PARENTS

      lobpcgwf

SOURCE

42 #ifndef HAVE_GPU_CUDA
43 
44 subroutine alloc_on_gpu(gpu_ptr,size)
45 
46 
47 !This section has been created automatically by the script Abilint (TD).
48 !Do not modify the following lines by hand.
49 #undef ABI_FUNC
50 #define ABI_FUNC 'alloc_on_gpu'
51 !End of the abilint section
52 
53  implicit none
54 
55 !Arguments ------------------------------------
56  integer :: size ! size in bytes to allocate
57  type(c_ptr) :: gpu_ptr
58 
59 !Local variables ------------------------------
60  type(c_ptr) :: cptr
61 
62 ! *********************************************************************
63 
64  if (.false.) then
65     cptr=gpu_ptr;write(std_out,*) size
66  end if
67 
68 end subroutine alloc_on_gpu

m_abi_gpu_linalg/copy_from_gpu [ Functions ]

[ Top ] [ m_abi_gpu_linalg ] [ Functions ]

NAME

  copy_from_gpu

FUNCTION

  copy size byte from gpu memory pointed by gpu_ptr to dtab

INPUTS

  size= size in byte to allocate
  gpu_ptr= C_PTR on gpu memory location that has been allocated

OUTPUT

  dtab = fortran tab which will contains data

SIDE EFFECTS

   WARNING! : this routine is a dummy one when HAVE_GPU_CUDA is not enabled
   the correct one is in 15_gpu_toolbox/dev_spec.cu

PARENTS

      lobpcgwf,m_abi_gpu_linalg

SOURCE

 94 subroutine copy_from_gpu(dtab,gpu_ptr,size)
 95 
 96 !This section has been created automatically by the script Abilint (TD).
 97 !Do not modify the following lines by hand.
 98 #undef ABI_FUNC
 99 #define ABI_FUNC 'copy_from_gpu'
100 !End of the abilint section
101 
102  implicit none
103 
104 !Arguments ------------------------------------
105  integer :: size ! taille en octet a transferer
106  real(dp),dimension(*),optional :: dtab
107  type(c_ptr) :: gpu_ptr
108 
109 !Local variables ------------------------------
110  type(c_ptr) :: cptr
111 
112 ! *********************************************************************
113 
114  if (.false.) then
115    cptr=gpu_ptr;write(std_out,*) size,dtab(1)
116  end if
117 
118 end subroutine copy_from_gpu

m_abi_gpu_linalg/copy_on_gpu [ Functions ]

[ Top ] [ m_abi_gpu_linalg ] [ Functions ]

NAME

  copy_on_gpu

FUNCTION

  copy size byte from  dtab to gpu memory pointed by gpu_ptr

INPUTS

  size= size in byte to allocate
  dtab = fortran tab to copy

OUTPUT

  gpu_ptr= C_PTR on gpu memory location

SIDE EFFECTS

   WARNING! : this routine is a dummy one when HAVE_GPU_CUDA is not enabled
   the correct one is in 15_gpu_toolbox/dev_spec.cu

PARENTS

      lobpcgwf,m_abi_gpu_linalg

SOURCE

144 subroutine copy_on_gpu(dtab,gpu_ptr,size)
145 
146 !This section has been created automatically by the script Abilint (TD).
147 !Do not modify the following lines by hand.
148 #undef ABI_FUNC
149 #define ABI_FUNC 'copy_on_gpu'
150 !End of the abilint section
151 
152  implicit none
153 
154 !Arguments ------------------------------------
155  integer :: size ! size in byte (to be transfered)
156  real(dp), dimension(*),optional :: dtab
157  type(c_ptr) :: gpu_ptr
158 !Local variables ------------------------------
159  type(c_ptr) :: cptr
160 
161 ! *********************************************************************
162 
163  if (.false.) then
164    cptr=gpu_ptr;write(std_out,*) size,dtab(1)
165  end if
166 
167 end subroutine copy_on_gpu

m_abi_gpu_linalg/dealloc_on_gpu [ Functions ]

[ Top ] [ m_abi_gpu_linalg ] [ Functions ]

NAME

  dealloc_on_gpu

FUNCTION

  free memory location pointed by gpu_ptr

INPUTS

OUTPUT

  gpu_ptr= C_PTR on gpu memory location that has been allocated

SIDE EFFECTS

   WARNING! : this routine is a dummy one when HAVE_GPU_CUDA is not enabled
   the correct one is in 15_gpu_toolbox/dev_spec.cu

PARENTS

      lobpcgwf

SOURCE

191 subroutine dealloc_on_gpu(gpu_ptr)
192 
193 
194 !This section has been created automatically by the script Abilint (TD).
195 !Do not modify the following lines by hand.
196 #undef ABI_FUNC
197 #define ABI_FUNC 'dealloc_on_gpu'
198 !End of the abilint section
199 
200  implicit none
201 
202 !Arguments ------------------------------------
203  type(c_ptr) :: gpu_ptr
204 
205 !Local variables ------------------------------
206  type(c_ptr) :: cptr
207 
208 ! *********************************************************************
209 
210  if (.false.) cptr=gpu_ptr
211 
212 end subroutine dealloc_on_gpu

m_abi_gpu_linalg/gpu_linalg_init [ Functions ]

[ Top ] [ m_abi_gpu_linalg ] [ Functions ]

NAME

  gpu_linalg_init

FUNCTION

  initialisation of linalg environnement on GPU

INPUTS

OUTPUT

SIDE EFFECTS

   WARNING! : this routine is a dummy one when HAVE_GPU_CUDA is not enabled
   the correct one is in 15_gpu_toolbox/gpu_linalg.cu

PARENTS

      lobpcgwf

SOURCE

235 subroutine gpu_linalg_init()
236 
237 
238 !This section has been created automatically by the script Abilint (TD).
239 !Do not modify the following lines by hand.
240 #undef ABI_FUNC
241 #define ABI_FUNC 'gpu_linalg_init'
242 !End of the abilint section
243 
244  implicit none
245 
246 end subroutine gpu_linalg_init

m_abi_gpu_linalg/gpu_linalg_shutdown [ Functions ]

[ Top ] [ m_abi_gpu_linalg ] [ Functions ]

NAME

  gpu_linalg_shutdown

FUNCTION

  close linalg environnement on GPU

INPUTS

OUTPUT

SIDE EFFECTS

   WARNING! : this routine is a dummy one when HAVE_GPU_CUDA is not enabled
   the correct one is in 15_gpu_toolbox/gpu_linalg.cu

PARENTS

      lobpcgwf

CHILDREN

SOURCE

270 subroutine gpu_linalg_shutdown()
271 
272 
273 !This section has been created automatically by the script Abilint (TD).
274 !Do not modify the following lines by hand.
275 #undef ABI_FUNC
276 #define ABI_FUNC 'gpu_linalg_shutdown'
277 !End of the abilint section
278 
279  implicit none
280 
281 end subroutine gpu_linalg_shutdown

m_abi_gpu_linalg/gpu_xgemm [ Functions ]

[ Top ] [ m_abi_gpu_linalg ] [ Functions ]

NAME

  gpu_xgemm

FUNCTION

  Compute a scalar-matrix-matrix product and return a scalar-matrix product on GPU
  c = alpha * op(a) * op(b) + beta * c

INPUTS

  cplx  = 1 if real 2 if complex
  transa= from of op(a) to be used in the matrix multiplication
  transb= from of op(b) to be used in the matrix multiplication
  m     = number of rows of the matrix op(a) and of the matrix c
  n     = number of rows of the matrix op(b) and the number of columns of the matrix c
  k     = number of columns of the matrix op(a) and the number of rows of the matrix op(b)
  alpha = alpha scalar coefficient for matrix op(a)
  a_gpu = pointer to gpu memory location of  matrix a
  lda   = first dimension of a
  b_gpu = pointer to gpu memory location of  matrix b
  ldb   = first dimension of b
  beta  = beta scalar coefficient for matrix c
  c_gpu = pointer to gpu memory location of  matrix c
  ldc   = first dimension of c

OUTPUT

  c     = c matrix

SIDE EFFECTS

   WARNING! : this routine is a dummy one when HAVE_GPU_CUDA is not enabled
   the correct one is in 15_gpu_toolbox/gpu_linalg.cu

PARENTS

      lobpcgwf,m_abi_gpu_linalg

CHILDREN

SOURCE

323 subroutine gpu_xgemm(cplx,transa,transb,m,n,k,alpha,a_gpu,lda,b_gpu,ldb,beta,c_gpu,ldc)
324 
325 !This section has been created automatically by the script Abilint (TD).
326 !Do not modify the following lines by hand.
327 #undef ABI_FUNC
328 #define ABI_FUNC 'gpu_xgemm'
329 !End of the abilint section
330 
331  implicit none
332 
333 !Arguments ------------------------------------
334  integer,intent(in) :: cplx,lda,ldb,ldc,m,n,k
335  complex(dpc),intent(in) :: alpha,beta
336  character(len=1),intent(in) :: transa,transb
337  type(c_ptr),intent(in) :: a_gpu,b_gpu
338  type(c_ptr),intent(inout) :: c_gpu
339 !Local variables ------------------------------
340  type(c_ptr) :: cptr
341 
342 ! *********************************************************************
343 
344  if (.false.) then
345    cptr=a_gpu;cptr=b_gpu;cptr=c_gpu
346    write(std_out,*) transa,transb,cplx,lda,ldb,ldc,m,n,k,alpha,beta
347  end if
348 
349 end subroutine gpu_xgemm

m_abi_gpu_linalg/gpu_xtrsm [ Functions ]

[ Top ] [ m_abi_gpu_linalg ] [ Functions ]

NAME

  gpu_xtrsm

FUNCTION

 Solves a matrix equation (one matrix operand is triangular) on GPU.
 The xtrsm routines solve one of the following matrix equations
 op(a)*x = alpha*b
 or
 x*op(a) = alpha*b,

INPUTS

 cplx= 1 if real 2 if complex
 side= Specifies whether op(a) appears on the left or right of x for
      the operation to be performed as follows:
      L or l op(a)*x = alpha*b
      R or r x*op(a) = alpha*b
 uplo= Specifies whether the matrix a is an upper or lower triangular
      matrix as follows:
      U or u Matrix a is an upper triangular matrix.
      L or l Matrix a is a lower triangular matrix
 transa= Specifies the form of op(a) to be used in the matrix
      multiplication as follows:
      N or n op(a) = a
      T or t op(a) = a'
      C or c op(a) = conjg(a')
 diag= Specifies whether or not a is unit triangular as follows:
      U or u Matrix a is assumed to be unit triangular.
      N or n Matrix a is not assumed to be unit triangular.
 m= Specifies the number of rows of b. The value of m must be at least zero
 n= Specifies the number of columns of b. The value of n must be at least zero
 alpha= Specifies the scalar alpha. When alpha is zero, then a is not referenced and b
      need not be set before entry.
  a_gpu = pointer to gpu memory location of  array a, DIMENSION (lda, k), where k is m when side = 'L' or 'l' and is n
      when side = 'R' or 'r'.
 lda= Specifies the first dimension of a as declared in the calling
     (sub)program. When side = 'L' or 'l', then lda must be at least max(1,
      m), when side = 'R' or 'r', then lda must be at least max(1, n).
  b_gpu = pointer to gpu memory location of  b Array, DIMENSION (ldb,n). Before entry, the leading m-by-n part of the array
     b must contain the right-hand side matrix b.
 ldb= Specifies the first dimension of b as declared in the calling
     (sub)program. The value of ldb must be at least max(1, m).

OUTPUT

  b_gpu

SIDE EFFECTS

   WARNING! : this routine is a dummy one when HAVE_GPU_CUDA is not enabled
   the correct one is in 15_gpu_toolbox/gpu_linalg.cu

PARENTS

      lobpcgwf,m_abi_gpu_linalg

CHILDREN

SOURCE

408 subroutine gpu_xtrsm(cplx,side,uplo,transa,diag,m,n,alpha,a_gpu,lda,b_gpu,ldb)
409 
410 !This section has been created automatically by the script Abilint (TD).
411 !Do not modify the following lines by hand.
412 #undef ABI_FUNC
413 #define ABI_FUNC 'gpu_xtrsm'
414 !End of the abilint section
415 
416  implicit none
417 
418 ! !Arguments ------------------------------------
419  integer, intent(in) :: cplx,lda,ldb,m,n
420  complex(dpc), intent(in) :: alpha
421  character(len=1), intent(in) :: side,uplo,transa,diag
422  type(c_ptr),intent(in) :: a_gpu
423  type(c_ptr),intent(inout) :: b_gpu
424 !Local variables ------------------------------
425  type(c_ptr) :: cptr
426 
427 ! *********************************************************************
428 
429  if (.false.) then
430    cptr=a_gpu;cptr=b_gpu
431    write(std_out,*) side,uplo,transa,diag,cplx,lda,ldb,m,n,alpha
432  end if
433 
434 end subroutine gpu_xtrsm

m_abi_linalg/gpu_xorthonormalize [ Functions ]

[ Top ] [ m_abi_linalg ] [ Functions ]

NAME

  gpu_xorthonormalize

FUNCTION

  This routine computes the overlap of two complex wavefunctions (for a given number of bands)
  and orthonormalizes it using gpu:
      - Computes the products of two rectangular matrices
         containing the wavefunctions psi and S.psi (where S is the
         overlap (with the PAW terms if necessary)).
      - Does a Cholesky decomposition of this overlap
      - rotates the initial matrix blockvectorx by the triangular matrix to
         have an orthonormal set of wavefunctions

INPUTS

  blockvectorbx = matrix of dimension (blocksize,vectsize) as a GPU ptr
                  (e.g. block of overlap*wavefunction)
  blocksize     = dimension of matrices (e.g number of bands)
  spaceComm     = communicator used for  MPI parallelization
  vectsize      = dimension of matrices (e.g number of G vector)

OUTPUT

  sqgram        = Choleski decomposition of transpose(blockvector)*blockvectorx as a GPU ptr

SIDE EFFECTS

  blockvectorx  = on input, matrix of dimension (vectsize,blocksize) as a GPU ptr
                  (e.g block of wavefunction)
  blockvectorx  = on output, orthonormalized wavefunction. as a GPU ptr

PARENTS

      lobpcgwf

CHILDREN

SOURCE

476 subroutine gpu_xorthonormalize(blockvectorx_gpu,blockvectorbx_gpu,blocksize,spaceComm,&
477 &                              sqgram_gpu,vectsize,&
478 &                              x_cplx,timopt,tim_xortho) ! optional arguments
479 
480 !This section has been created automatically by the script Abilint (TD).
481 !Do not modify the following lines by hand.
482 #undef ABI_FUNC
483 #define ABI_FUNC 'gpu_xorthonormalize'
484 !End of the abilint section
485 
486  implicit none
487 
488 !Arguments ------------------------------------
489 !scalars
490  integer,intent(in) :: blocksize,spaceComm,vectsize,x_cplx
491  integer, intent(in), optional :: timopt,tim_xortho
492 !arrays
493  type(c_ptr),intent(inout) :: blockvectorbx_gpu, blockvectorx_gpu,sqgram_gpu
494 !Local variables-------------------------------
495 #if defined HAVE_GPU_CUDA
496  integer :: ierr,info
497  real(dp),dimension(:,:),allocatable :: d_sqgram
498  complex(dpc),dimension(:,:),allocatable :: z_sqgram
499  character :: tr
500  real(dp) :: tsec(2)
501 #else
502  type(c_ptr) :: cptr_a
503 #endif
504  character(len=500) :: message
505 
506 ! *********************************************************************
507 #if defined HAVE_GPU_CUDA
508  if (present(tim_xortho).and.present(timopt)) then
509    if(abs(timopt)==3) then
510      call timab(tim_xortho,1,tsec)
511    end if
512  end if
513 
514  if ( x_cplx == 1 ) then
515    tr='t'
516    ABI_ALLOCATE(d_sqgram,(blocksize,blocksize))
517  else
518    tr='c'
519    ABI_ALLOCATE(z_sqgram,(blocksize,blocksize))
520  end if
521 
522  call gpu_xgemm(x_cplx,tr,'n',blocksize,blocksize,vectsize, &
523 & cone,blockvectorx_gpu,vectsize,blockvectorbx_gpu,vectsize,czero,sqgram_gpu,blocksize)
524 
525  if ( x_cplx == 1 ) then
526    call copy_from_gpu(d_sqgram,sqgram_gpu,x_cplx*dp*blocksize*blocksize)
527    call xmpi_sum(d_sqgram,spaceComm,ierr)
528    call abi_xpotrf('u',blocksize,d_sqgram,blocksize,info)
529    call copy_on_gpu(d_sqgram,sqgram_gpu,x_cplx*dp*blocksize*blocksize)
530  else
531    call copy_from_gpu(z_sqgram,sqgram_gpu,x_cplx*dp*blocksize*blocksize)
532    call xmpi_sum(z_sqgram,spaceComm,ierr)
533    call abi_xpotrf('u',blocksize,z_sqgram,blocksize,info)
534    call copy_on_gpu(z_sqgram,sqgram_gpu,x_cplx*dp*blocksize*blocksize)
535  end if
536 
537  if (info /= 0 ) then
538    write(message,'(a,i3)') '  xpotrf, info=',info
539    MSG_WARNING(message)
540  end if
541 
542  call gpu_xtrsm(x_cplx,'r','u','n','n',vectsize,blocksize,cone,sqgram_gpu,blocksize,&
543 &               blockvectorx_gpu,vectsize)
544 
545  if(x_cplx==1) then
546    ABI_DEALLOCATE(d_sqgram)
547  else
548    ABI_DEALLOCATE(z_sqgram)
549  end if
550  if (present(tim_xortho).and.present(timopt)) then
551    if(abs(timopt)==3) then
552      call timab(tim_xortho,2,tsec)
553    end if
554  end if
555  return
556 
557 #else
558  message='  This routine is not allowed when Cuda is disabled !'
559  MSG_BUG(message)
560  if (.false.) then
561    write(std_out,*) blocksize,vectsize,spaceComm,x_cplx
562    if (present(timopt))  write(std_out,*) timopt
563    if (present(tim_xortho))  write(std_out,*) tim_xortho
564    cptr_a=blockvectorbx_gpu;cptr_a=blockvectorx_gpu;cptr_a=sqgram_gpu
565  end if
566 #endif
567 
568 end subroutine gpu_xorthonormalize