TABLE OF CONTENTS


ABINIT/compute_kgb_indicator [ Functions ]

[ Top ] [ Functions ]

NAME

 compute_kgb_indicator

FUNCTION

 Only for "KGB" parallelism (LOBPCG algorithm for Ground-state):
  Give an indicator of performance for a given distribution of processors
  (npband, npfft and bandpp).
  Determine best choice of parameters for Scalapack and/or Magma Linear Algebra routines.

COPYRIGHT

 Copyright (C) 1999-2018 ABINIT group (FD)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt.

INPUTS

  bandpp=internal lobpcg optimization variable
  glb_comm=communicator for global MPI communications
  mband=maximum number of bands.
  mband=maximum number of plane waves
  npband=number of processor 'band'
  npfft = number of processor 'fft'
  uselinalggpu=indicate if we also test the gpu linear algebra

OUTPUT

  acc_kgb = indicator of performance
  npslk = number of process to used in communicators

SIDE EFFECTS

 This routine can be used to find an indicator in order to refine automatic process distribution.
   This indicator is returned in acc_kgb
 This routine can be used to find the optimal values of np_slk parameter (ScaLapack)
   and wheter or not we should use Magma for Linear Algebra in lobpcgwf

PARENTS

      finddistrproc

CHILDREN

      abi_linalg_finalize,abi_linalg_init,abi_xhegv,abi_xorthonormalize
      wrtout,xmpi_bcast,xmpi_comm_free

SOURCE

 47 #if defined HAVE_CONFIG_H
 48 #include "config.h"
 49 #endif
 50 
 51 #include "abi_common.h"
 52 
 53 subroutine compute_kgb_indicator(acc_kgb,bandpp,glb_comm,mband,mpw,npband,npfft,npslk,&
 54 &                                uselinalggpu)
 55 
 56  use defs_basis
 57  use m_abi_linalg
 58  use m_profiling_abi
 59  use m_errors
 60  use m_xmpi
 61 
 62  use m_time,  only : abi_wtime
 63 
 64 !This section has been created automatically by the script Abilint (TD).
 65 !Do not modify the following lines by hand.
 66 #undef ABI_FUNC
 67 #define ABI_FUNC 'compute_kgb_indicator'
 68  use interfaces_14_hidewrite
 69 !End of the abilint section
 70 
 71  implicit none
 72 
 73 !Arguments ------------------------------------
 74 !scalars
 75  integer,intent(in) :: bandpp,glb_comm,mband,mpw,npband,npfft
 76  integer,intent(inout) :: npslk,uselinalggpu
 77  real(dp),intent(inout) :: acc_kgb
 78 !arrays
 79 
 80 !Local variables-------------------------------
 81 !scalars
 82  integer,parameter :: max_number_of_npslk=10,max_number_of_iter=10
 83  integer :: blocksize,bigorder,ierr,ii,islk,islk1,iter,jj,keep_gpu
 84  integer :: kgb_comm,my_rank,np_slk,np_slk_max,np_slk_best,nranks
 85  integer :: use_lapack_gpu,use_slk,vectsize
 86  real(dp) :: min_eigen,min_ortho,time_xeigen,time_xortho
 87  character(len=500) :: message
 88 !arrays
 89  integer,allocatable :: ranks(:),val_npslk(:)
 90  real(dp),allocatable :: eigen(:),grama(:,:),gramb(:,:)
 91  complex(dpc),allocatable :: blockvectorbx(:,:),blockvectorx(:,:),sqgram(:,:)
 92 
 93 !******************************************************************
 94 
 95  DBG_ENTER("COLL")
 96 
 97 #ifdef DEBUG_MODE
 98  write(message,'(a,3i3)') 'compute_kgb_indicator : (bpp,npb,npf) = ', bandpp, npband, npfft
 99  call wrtout(std_out,message,'PERS')
100 #endif
101 
102 !Create local communicator for test
103  if (xmpi_paral==1) then
104    nranks=npfft*npband
105    ABI_ALLOCATE(ranks,(nranks))
106    ranks=(/((my_rank-1),my_rank=1,nranks)/)
107    kgb_comm=xmpi_subcomm(glb_comm,nranks,ranks,my_rank_in_group=my_rank)
108    ABI_DEALLOCATE(ranks)
109  else
110    kgb_comm=xmpi_comm_self
111    my_rank=0
112  end if
113 
114 !Only for process in the new subgroup
115  if (my_rank/=xmpi_undefined) then
116 
117 !  We enforce vectsize >=blocksize  (This is not true in lobpcg but
118 !  these are rare cases and this simplify the matrix constructions below...)
119    blocksize=npband*bandpp
120    vectsize=max(1+mpw/(npband*npfft),blocksize)
121    bigorder=3*blocksize
122 
123    ABI_ALLOCATE(blockvectorx,(vectsize,blocksize))
124    ABI_ALLOCATE(blockvectorbx,(vectsize,blocksize))
125    ABI_ALLOCATE(sqgram,(blocksize,blocksize))
126    ABI_ALLOCATE(grama,(2*bigorder,bigorder))
127    ABI_ALLOCATE(gramb,(2*bigorder,bigorder))
128    ABI_ALLOCATE(eigen,(bigorder))
129    ABI_ALLOCATE(val_npslk,(max_number_of_npslk)) ! not too much values tested
130 
131    min_eigen=greatest_real
132    min_ortho=greatest_real
133    np_slk_best=-1 ; np_slk_max=0
134 #ifdef HAVE_LINALG_SCALAPACK
135    np_slk_max=min(max_number_of_npslk,npband*npfft)
136 #endif
137 
138 !  Preselect a range of available np_slk values
139    val_npslk(1:)=0 ; val_npslk(2)=1
140    do islk=3,np_slk_max
141      np_slk=val_npslk(islk-1)*2
142      do while ((modulo(npband*npfft,np_slk)>0).and.(np_slk<(npband*npfft)))
143        np_slk=np_slk+1
144      end do
145      if(np_slk>(npband*npfft).or.np_slk>mband) exit
146      val_npslk(islk)=np_slk
147    end do
148    np_slk_max=islk-1
149 
150 !  Loop over np_slk values
151    islk1=1
152 #ifdef HAVE_LINALG_MAGMA
153    islk1=1-uselinalggpu
154 #endif
155    do islk=islk1,np_slk_max
156 
157      time_xortho=zero ; time_xeigen=zero
158 
159      use_slk=0
160      if (islk==0) then
161 !      This is the test for the GPU
162        use_lapack_gpu=1 ; np_slk=0
163      else
164        use_lapack_gpu=0 ; np_slk=val_npslk(islk)
165        if (np_slk>0) use_slk=1
166      end if
167 
168 !    Initialize linalg parameters for this np_slk value
169 !    For the first np_slk value, everything is initialized
170 !    For the following np_slk values, only Scalapack parameters are updated
171      call abi_linalg_init(kgb_comm,np_slk,bigorder,my_rank,only_scalapack=(islk>islk1))
172 
173 !    We could do mband/blocksize iter as in lobpcg but it's too long
174      do iter=1,max_number_of_iter
175 
176 !      Build matrixes
177        do ii=1,vectsize
178          do jj=1,blocksize
179            if (ii>jj) then
180              blockvectorx(ii,jj) =czero
181              blockvectorbx(ii,jj)=czero
182            else
183              blockvectorx(ii,jj) =cone
184              blockvectorbx(ii,jj)=cone
185            end if
186          end do
187        end do
188        grama=zero;gramb=zero
189        do jj=1,bigorder
190          do ii=jj,bigorder
191            if (ii==jj) then
192              grama(2*ii-1,jj)=one
193              gramb(2*ii-1,jj)=one
194            else
195              grama(2*ii-1:2*ii,jj)=one
196              grama(2*jj-1,ii)= one
197              grama(2*jj  ,ii)=-one
198            end if
199          end do
200        end do
201 
202 !      Call to abi_xorthonormalize
203        time_xortho=time_xortho-abi_wtime()
204        call abi_xorthonormalize(blockvectorx,blockvectorbx,blocksize,kgb_comm,sqgram,vectsize)
205        time_xortho = time_xortho + abi_wtime()
206 
207 !      Call to abi_xhegv
208        time_xeigen=time_xeigen-abi_wtime()
209        call abi_xhegv(1,'v','u',bigorder,grama,gramb,eigen,&
210 &       x_cplx=2,use_slk=use_slk,use_gpu=use_lapack_gpu)
211        time_xeigen=time_xeigen+abi_wtime()
212 
213      end do ! iter
214 
215 !    Finalize linalg parameters for this np_slk value
216 !    For the last np_slk value, everything is finalized
217 !    For the previous np_slk values, only Scalapack parameters are updated
218      call abi_linalg_finalize(only_scalapack=(islk<np_slk_max))
219 
220      time_xortho= time_xortho*mband/blocksize
221      time_xeigen= time_xeigen*mband/blocksize
222      if (time_xortho<min_ortho) min_ortho=time_xortho
223      if (time_xeigen<min_eigen) then
224        min_eigen=time_xeigen
225        np_slk_best=np_slk
226        keep_gpu=use_lapack_gpu
227      end if
228 
229    end do ! np_slk
230 
231 #ifdef DEBUG_MODE
232    write(message,'(2(a,es15.3),a,i3)') ' In the best case, xortho took ',min_ortho,&
233 &   ' and xeigen took ',min_eigen,' for np_slk=',np_slk_best
234    call wrtout(std_out,message,'PERS')
235 #endif
236 
237 !  Final values to be sent to others process
238    acc_kgb=min_ortho+four*min_eigen
239    npslk=max(np_slk_best,1)
240    uselinalggpu=keep_gpu
241 
242    ABI_DEALLOCATE(blockvectorx)
243    ABI_DEALLOCATE(blockvectorbx)
244    ABI_DEALLOCATE(sqgram)
245    ABI_DEALLOCATE(grama)
246    ABI_DEALLOCATE(gramb)
247    ABI_DEALLOCATE(eigen)
248    ABI_DEALLOCATE(val_npslk)
249 
250  end if ! my_rank in group
251 
252 !Free local MPI communicator
253  call xmpi_comm_free(kgb_comm)
254 
255 !Broadcast of results to be sure every process has them
256  call xmpi_bcast(acc_kgb,0,glb_comm,ierr)
257  call xmpi_bcast(npslk,0,glb_comm,ierr)
258  call xmpi_bcast(uselinalggpu,0,glb_comm,ierr)
259 
260 #ifndef DEBUG_MODE
261  ABI_UNUSED(message)
262 #endif
263 
264  DBG_EXIT("COLL")
265 
266 end subroutine compute_kgb_indicator