TABLE OF CONTENTS


ABINIT/m_lobpcgwf [ Functions ]

[ Top ] [ Functions ]

NAME

 m_lobpcgwf

FUNCTION

 This routine updates the whole wave functions at a given k-point,
 using the lobpcg method
 for a given spin-polarization, from a fixed hamiltonian
 but might also simply compute eigenvectors and eigenvalues at this k point.
 it will also update the matrix elements of the hamiltonian.

COPYRIGHT

 Copyright (C) 1998-2024 ABINIT group (JB)
 this file is distributed under the terms of the
 gnu general public license, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 for the initials of contributors, see ~abinit/doc/developers/contributors.txt .

SOURCE

 21 #if defined HAVE_CONFIG_H
 22 #include "config.h"
 23 #endif
 24 
 25 #include "abi_common.h"
 26 
 27 ! nvtx related macro definition
 28 #include "nvtx_macros.h"
 29 
 30 module m_lobpcgwf
 31 
 32  use defs_basis
 33  use m_abicore
 34  use m_lobpcg
 35  use m_xmpi
 36  use m_errors
 37  use m_time
 38  use m_xomp
 39  use m_fstrings
 40  use m_xg
 41  use m_xgTransposer
 42  use m_lobpcg2
 43  use m_dtset
 44 
 45  use defs_abitypes, only : mpi_type
 46  use m_hamiltonian, only : gs_hamiltonian_type
 47  use m_pawcprj,     only : pawcprj_type
 48  use m_nonlop,      only : nonlop
 49  use m_prep_kgb,    only : prep_getghc, prep_nonlop
 50  use m_getghc,      only : multithreaded_getghc
 51  use m_cgtools,     only : dotprod_g
 52 
 53 #if defined(HAVE_GPU) && defined(HAVE_GPU_MARKERS)
 54  use m_nvtx_data
 55 #endif
 56 
 57  use, intrinsic :: iso_c_binding
 58 
 59  private
 60 
 61  integer, parameter :: l_tim_getghc=5
 62  double precision, parameter :: inv_sqrt2 = 1/sqrt2
 63 
 64  ! For use in getghc_gsc1
 65  integer,save  :: l_cpopt
 66  integer,save  :: l_icplx
 67  integer,save  :: l_istwf
 68  integer,save  :: l_npw
 69  integer,save  :: l_nspinor
 70  logical,save  :: l_paw
 71  integer, save :: l_paral_kgb
 72  integer,save  :: l_prtvol
 73  integer,save  :: l_sij_opt
 74  real(dp), allocatable,save ::  l_pcon(:)
 75  type(mpi_type),pointer,save :: l_mpi_enreg
 76  type(gs_hamiltonian_type),pointer,save :: l_gs_hamk
 77 
 78  public :: lobpcgwf2
 79 
 80  contains
 81 
 82 subroutine lobpcgwf2(cg,dtset,eig,occ,enl_out,gs_hamk,isppol,ikpt,inonsc,istep,kinpw,mpi_enreg,&
 83 &                   nband,npw,nspinor,prtvol,resid,nbdbuf)
 84 
 85  implicit none
 86 
 87 !Arguments ------------------------------------
 88  integer,intent(in) :: nband,npw,prtvol,nspinor
 89  integer,intent(in) :: isppol,ikpt,inonsc,istep,nbdbuf
 90  type(gs_hamiltonian_type),target,intent(inout) :: gs_hamk
 91  type(dataset_type)              ,intent(in   ) :: dtset
 92  type(mpi_type)           ,target,intent(in)    :: mpi_enreg
 93  real(dp)                 ,target,intent(inout) :: cg(2,nspinor*nband*npw)!,gsc(2,nspinor*nband*npw)
 94  real(dp)                        ,intent(in   ) :: kinpw(npw)
 95  real(dp)                 ,target,intent(  out) :: resid(nband)
 96  real(dp)                        ,intent(  out) :: enl_out(nband)
 97  real(dp)                 ,target,intent(  out) :: eig(nband)
 98  real(dp)                 ,target,intent(in   ) :: occ(nband)
 99 
100 !Local variables-------------------------------
101 
102  type(xgBlock_t) :: xgx0
103  type(xgBlock_t) :: xgeigen
104  type(xgBlock_t) :: xgresidu
105  type(xgBlock_t) :: xgocc
106  type(lobpcg_t) :: lobpcg
107 
108  integer :: space, blockdim,  nline
109  integer :: ipw
110 
111 ! integer :: nthreads
112 !
113 ! double precision :: lobpcgMem(2)
114 ! double precision :: localmem
115 
116  integer, parameter :: tim_lobpcgwf2 = 1640
117  double precision :: cputime, walltime
118  double precision :: tsec(2)
119 
120  type(c_ptr) :: cptr
121  real(dp), pointer :: eig_ptr(:,:) => NULL()
122  real(dp), pointer :: resid_ptr(:,:) => NULL()
123  real(dp), pointer :: occ_ptr(:,:) => NULL()
124 
125  ! Important things for NC
126  integer,parameter :: choice=1, paw_opt=0, signs=1
127  type(pawcprj_type) :: cprj_dum(1,1)
128  integer :: iband, shift
129  real(dp) :: gsc_dummy(0,0)
130  real(dp), allocatable :: l_gvnlxc(:,:)
131 
132 ! *********************************************************************
133 
134 
135 !###########################################################################
136 !################ INITIALISATION  ##########################################
137 !###########################################################################
138 
139  call timab(tim_lobpcgwf2,1,tsec)
140  cputime = abi_cpu_time()
141  walltime = abi_wtime()
142 
143  ! Set module variables
144  l_paw = (gs_hamk%usepaw==1)
145  l_cpopt=-1;l_sij_opt=0;if (l_paw) l_sij_opt=1
146  l_istwf=gs_hamk%istwf_k
147  l_npw = npw
148  l_nspinor = nspinor
149  l_prtvol = prtvol
150  l_mpi_enreg => mpi_enreg
151  l_gs_hamk => gs_hamk
152  l_paral_kgb = dtset%paral_kgb
153 
154 !Variables
155  nline=dtset%nline
156  blockdim=nband/dtset%nblock_lobpcg !=l_mpi_enreg%nproc_band*l_mpi_enreg%bandpp
157 
158 !Depends on istwfk
159  if ( l_istwf == 2 ) then ! Real only
160    ! SPACE_CR mean that we have complex numbers but no re*im terms only re*re
161    ! and im*im so that a vector of complex is consider as a long vector of real
162    ! therefore the number of data is (2*npw*nspinor)*nband
163    ! This space is completely equivalent to SPACE_R but will correctly set and
164    ! get the array data into the xgBlock
165    space = SPACE_CR
166    l_icplx = 2
167 
168  else ! complex
169    space = SPACE_C
170    l_icplx = 1
171  end if
172 
173  ! Memory info
174  !if ( prtvol >= 3 ) then
175  !  lobpcgMem = lobpcg_memInfo(nband,l_icplx*l_npw*l_nspinor,blockdim,space)
176  !  localMem = (l_npw+2*l_npw*l_nspinor*blockdim+2*nband)*kind(1.d0)
177  !  write(std_out,'(1x,A,F10.6,1x,A)') "Each MPI process calling lobpcg should need around ", &
178  !  (localMem+sum(lobpcgMem))/1e9, &
179  !  "GB of peak memory as follows :"
180  !  write(std_out,'(4x,A,F10.6,1x,A)') "Permanent memory in lobpcgwf : ", &
181  !  (localMem)/1e9, "GB"
182  !  write(std_out,'(4x,A,F10.6,1x,A)') "Permanent memory in m_lobpcg : ", &
183  !  (lobpcgMem(1))/1e9, "GB"
184  !  write(std_out,'(4x,A,F10.6,1x,A)') "Temporary memory in m_lobpcg : ", &
185  !  (lobpcgMem(2))/1e9, "GB"
186  !end if
187 
188  !For preconditionning
189  ABI_MALLOC(l_pcon,(1:l_icplx*npw))
190  !$omp parallel do schedule(static), shared(l_pcon,kinpw)
191  do ipw=1-1,l_icplx*npw-1
192    if(kinpw(ipw/l_icplx+1)>huge(zero)*1.d-11) then
193      l_pcon(ipw+1)=0.d0
194    else
195      l_pcon(ipw+1) = (27+kinpw(ipw/l_icplx+1)*(18+kinpw(ipw/l_icplx+1)*(12+8*kinpw(ipw/l_icplx+1)))) &
196 &    / (27+kinpw(ipw/l_icplx+1)*(18+kinpw(ipw/l_icplx+1)*(12+8*kinpw(ipw/l_icplx+1))) + 16*kinpw(ipw/l_icplx+1)**4)
197    end if
198  end do
199 
200 #ifdef HAVE_OPENMP_OFFLOAD
201  if(gs_hamk%gpu_option==ABI_GPU_OPENMP) then
202    !$OMP TARGET ENTER DATA MAP(to:cg,eig,resid)
203  end if
204 #endif
205 
206  ! Local variables for lobpcg
207  !call xg_init(xgx0,space,icplx*npw*nspinor,nband)
208  call xgBlock_map(xgx0,cg,space,l_icplx*l_npw*l_nspinor,nband,l_mpi_enreg%comm_bandspinorfft,gpu_option=dtset%gpu_option)
209  if ( l_istwf == 2 ) then ! Real only
210    ! Scale cg
211    call xgBlock_scale(xgx0,sqrt2,1)
212    ! This is possible since the memory in cg and xgx0 is the same
213    ! Don't know yet how to deal with this with xgBlock
214    if(l_mpi_enreg%me_g0 == 1) then
215      if(l_gs_hamk%gpu_option==ABI_GPU_OPENMP) then
216 #ifdef HAVE_OPENMP_OFFLOAD
217        !$OMP TARGET MAP(to:cg)
218        cg(:, 1:npw*nspinor*nband:npw) = cg(:, 1:npw*nspinor*nband:npw) * inv_sqrt2
219        !$OMP END TARGET
220 #endif
221      else
222        cg(:, 1:npw*nspinor*nband:npw) = cg(:, 1:npw*nspinor*nband:npw) * inv_sqrt2
223      end if
224    end if
225  end if
226 
227  !call xg_init(xgeigen,SPACE_R,nband,1,l_mpi_enreg%comm_bandspinorfft)
228  ! Trick the with C to change rank of arrays (:) to (:,:)
229  cptr = c_loc(eig)
230  call c_f_pointer(cptr,eig_ptr,(/ nband,1 /))
231  call xgBlock_map(xgeigen,eig_ptr,SPACE_R,nband,1,gpu_option=dtset%gpu_option)
232 
233  !call xg_init(xgresidu,SPACE_R,nband,1,l_mpi_enreg%comm_bandspinorfft)
234  ! Trick the with C to change rank of arrays (:) to (:,:)
235  cptr = c_loc(resid)
236  call c_f_pointer(cptr,resid_ptr,(/ nband,1 /))
237  call xgBlock_map(xgresidu,resid_ptr,SPACE_R,nband,1,gpu_option=dtset%gpu_option)
238 
239  !call xg_init(xgeigen,SPACE_R,nband,1,l_mpi_enreg%comm_bandspinorfft)
240  ! Trick the with C to change rank of arrays (:) to (:,:)
241  cptr = c_loc(occ)
242  call c_f_pointer(cptr,occ_ptr,(/ nband,1 /))
243  call xgBlock_map(xgocc,occ_ptr,SPACE_R,nband,1,gpu_option=dtset%gpu_option)
244 
245  !ABI_MALLOC(l_gvnlxc,(2,l_npw*l_nspinor*blockdim))
246 
247  call lobpcg_init(lobpcg,nband,l_icplx*l_npw*l_nspinor,blockdim,dtset%tolwfr,nline,&
248    space,l_mpi_enreg%comm_bandspinorfft,l_paral_kgb,l_mpi_enreg%comm_spinorfft,l_mpi_enreg%comm_band,&
249    gs_hamk%gpu_option)
250 
251 !###########################################################################
252 !################    RUUUUUUUN    ##########################################
253 !###########################################################################
254 
255  ! Run lobpcg
256  call lobpcg_run(lobpcg,xgx0,getghc_gsc1,precond,xgeigen,xgocc,xgresidu,prtvol,nspinor,isppol,ikpt,inonsc,istep,nbdbuf)
257 
258  ! Free preconditionning since not needed anymore
259  ABI_FREE(l_pcon)
260 
261  ! Scale back
262  if(l_istwf == 2) then
263    call xgBlock_scale(xgx0,inv_sqrt2,1)
264    if(l_mpi_enreg%me_g0 == 1) then
265      if(l_gs_hamk%gpu_option==ABI_GPU_OPENMP) then
266 #ifdef HAVE_OPENMP_OFFLOAD
267        !$OMP TARGET MAP(to:cg)
268        cg(:, 1:npw*nspinor*nband:npw) = cg(:, 1:npw*nspinor*nband:npw) * sqrt2
269        !$OMP END TARGET
270 #endif
271      else
272        cg(:, 1:npw*nspinor*nband:npw) = cg(:, 1:npw*nspinor*nband:npw) * sqrt2
273      end if
274    end if
275  end if
276 
277  ! Compute enlout (nonlocal energy for each band if necessary) This is the best
278  ! quick and dirty trick to compute this part in NC. gvnlxc cannot be part of
279  ! lobpcg algorithm
280  if ( .not. l_paw ) then
281    !Check l_gvnlxc size
282    !if ( size(l_gvnlxc) < 2*nband*l_npw*l_nspinor ) then
283    !if ( size(l_gvnlxc) /= 0 ) then
284    !  ABI_FREE(l_gvnlxc)
285      !ABI_MALLOC(l_gvnlxc,(2,nband*l_npw*l_nspinor))
286 #ifdef FC_CRAY
287    ABI_MALLOC(l_gvnlxc,(1,1))
288 #else
289    ABI_MALLOC(l_gvnlxc,(0,0))
290 #endif
291 
292    !Call nonlop
293    if (l_paral_kgb==0) then
294 
295      call nonlop(choice,l_cpopt,cprj_dum,enl_out,l_gs_hamk,0,eig,mpi_enreg,nband,1,paw_opt,&
296 &                signs,gsc_dummy,l_tim_getghc,cg,l_gvnlxc)
297 
298    else
299      do iband=1,nband/blockdim
300        shift = (iband-1)*blockdim*l_npw*l_nspinor
301        call prep_nonlop(choice,l_cpopt,cprj_dum, &
302 &        enl_out((iband-1)*blockdim+1:iband*blockdim),l_gs_hamk,0,&
303 &        eig((iband-1)*blockdim+1:iband*blockdim),blockdim,mpi_enreg,1,paw_opt,signs,&
304 &        gsc_dummy,l_tim_getghc, &
305 &        cg(:,shift+1:shift+blockdim*l_npw*l_nspinor),&
306 !&        l_gvnlxc(:,shift+1:shift+blockdim*l_npw*l_nspinor),&
307 &        l_gvnlxc(:,:),&
308 &        already_transposed=.false.)
309      end do
310    end if
311    !Compute enlout
312 !   do iband=1,nband
313 !     shift = npw*nspinor*(iband-1)
314 !       call dotprod_g(enl_out(iband),dprod_i,l_gs_hamk%istwf_k,npw*nspinor,1,cg(:, shift+1:shift+npw*nspinor),&
315 !  &     l_gvnlxc(:, shift+1:shift+npw*nspinor),mpi_enreg%me_g0,mpi_enreg%comm_bandspinorfft)
316 !   end do
317    ABI_FREE(l_gvnlxc)
318  end if
319 
320  ! Free lobpcg
321  call lobpcg_free(lobpcg)
322 
323 #ifdef HAVE_OPENMP_OFFLOAD
324  if(gs_hamk%gpu_option==ABI_GPU_OPENMP) then
325    !$OMP TARGET EXIT DATA MAP(from:cg,eig,resid)
326  end if
327 #endif
328 !###########################################################################
329 !################    SORRY IT'S ALREADY FINISHED : )  ######################
330 !###########################################################################
331 
332 
333  call timab(tim_lobpcgwf2,2,tsec)
334 ! cputime = abi_cpu_time() - cputime
335 ! walltime = abi_wtime() - walltime
336 ! nthreads = xomp_get_num_threads(open_parallel = .true.)
337 ! if ( cputime/walltime/dble(nthreads) < 0.75 .and. (int(cputime/walltime)+1) /= nthreads) then
338 !   if ( prtvol >= 3 ) then
339 !     write(std_out,'(a)',advance='no') sjoin(" Lobpcg took", sec2str(cputime), "of cpu time")
340 !     write(std_out,*) sjoin("for a wall time of", sec2str(walltime))
341 !     write(std_out,'(a,f6.2)') " -> Ratio of ", cputime/walltime
342 !   end if
343 !   ABI_COMMENT(sjoin("You should set the number of threads to something close to",itoa(int(cputime/walltime)+1)))
344 ! end if
345 
346  DBG_EXIT("COLL")
347 
348 end subroutine lobpcgwf2
349 
350 subroutine getghc_gsc1(X,AX,BX,transposer)
351 
352  implicit none
353 
354 !Arguments ------------------------------------
355  type(xgBlock_t), intent(inout) :: X
356  type(xgBlock_t), intent(inout) :: AX
357  type(xgBlock_t), intent(inout) :: BX
358  type(xgTransposer_t), intent(inout) :: transposer
359  integer         :: blockdim
360  integer         :: spacedim
361  type(pawcprj_type) :: cprj_dum(l_gs_hamk%natom,1)
362 
363 !Local variables-------------------------------
364 !scalars
365  integer :: cpuRow
366  real(dp) :: eval
367 !arrays
368  real(dp), pointer :: cg(:,:)
369  real(dp), pointer :: ghc(:,:)
370  real(dp), pointer :: gsc(:,:)
371  real(dp)          :: l_gvnlxc(1,1)
372 
373 ! *********************************************************************
374 
375  call xgBlock_getSize(X,spacedim,blockdim)
376  call xgBlock_check(X,AX)
377  call xgBlock_check(X,BX)
378 
379  spacedim = spacedim/l_icplx
380 
381  call xgBlock_reverseMap(X,cg,l_icplx,spacedim*blockdim)
382  call xgBlock_reverseMap(AX,ghc,l_icplx,spacedim*blockdim)
383  call xgBlock_reverseMap(BX,gsc,l_icplx,spacedim*blockdim)
384 
385  !Scale back cg
386  if (l_paral_kgb == 1) cpuRow = xgTransposer_getRank(transposer, 2)
387  if(l_istwf == 2) then
388    call xgBlock_scale(X,inv_sqrt2,1)
389    if(l_gs_hamk%gpu_option==ABI_GPU_OPENMP) then
390 #ifdef HAVE_OPENMP_OFFLOAD
391      if (l_paral_kgb == 0) then
392        if(l_mpi_enreg%me_g0 == 1) then
393          !$OMP TARGET MAP(cg)
394          cg(:, 1:spacedim*blockdim:l_npw) = cg(:, 1:spacedim*blockdim:l_npw) * sqrt2
395          !$OMP END TARGET
396        end if
397      else
398        if (cpuRow == 0) then
399          !$OMP TARGET MAP(cg)
400          cg(:, 1:spacedim*blockdim:spacedim) = cg(:, 1:spacedim*blockdim:spacedim) * sqrt2
401          !$OMP END TARGET
402        end if
403      end if
404 #endif
405    else
406      if (l_paral_kgb == 0) then
407        if(l_mpi_enreg%me_g0 == 1) cg(:, 1:spacedim*blockdim:l_npw) = cg(:, 1:spacedim*blockdim:l_npw) * sqrt2
408      else
409        if (cpuRow == 0) cg(:, 1:spacedim*blockdim:spacedim) = cg(:, 1:spacedim*blockdim:spacedim) * sqrt2
410      end if
411    end if
412  end if
413 
414  !if ( size(l_gvnlxc) < 2*blockdim*spacedim ) then
415  !  ABI_FREE(l_gvnlxc)
416  !  ABI_MALLOC(l_gvnlxc,(2,blockdim*spacedim))
417  !end if
418 
419  call multithreaded_getghc(l_cpopt,cg,cprj_dum,ghc,gsc,&
420    l_gs_hamk,l_gvnlxc,eval,l_mpi_enreg,blockdim,l_prtvol,l_sij_opt,l_tim_getghc,0)
421 
422 
423 #if defined(HAVE_GPU_CUDA) && defined(HAVE_YAKL)
424  call gpu_device_synchronize()
425 #endif
426 
427  !Scale cg, ghc, gsc
428  if ( l_istwf == 2 ) then
429    call xgBlock_scale(X ,sqrt2,1)
430    call xgBlock_scale(AX,sqrt2,1)
431 
432    if(l_gs_hamk%gpu_option==ABI_GPU_OPENMP) then
433 #ifdef HAVE_OPENMP_OFFLOAD
434      if (l_paral_kgb == 0) then
435        if(l_mpi_enreg%me_g0 == 1) then
436          !$OMP TARGET MAP(cg)
437          cg(:, 1:spacedim*blockdim:l_npw) = cg(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
438          !$OMP END TARGET
439          !$OMP TARGET MAP(ghc)
440          ghc(:, 1:spacedim*blockdim:l_npw) = ghc(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
441          !$OMP END TARGET
442        endif
443      else
444        if (cpuRow == 0) then
445          !$OMP TARGET MAP(cg)
446          cg(:, 1:spacedim*blockdim:spacedim) = cg(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
447          !$OMP END TARGET
448          !$OMP TARGET MAP(ghc)
449          ghc(:, 1:spacedim*blockdim:spacedim) = ghc(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
450          !$OMP END TARGET
451        end if
452      end if
453 #endif
454    else
455      if (l_paral_kgb == 0) then
456        if(l_mpi_enreg%me_g0 == 1) then
457          cg(:, 1:spacedim*blockdim:l_npw) = cg(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
458          ghc(:, 1:spacedim*blockdim:l_npw) = ghc(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
459        endif
460      else
461        if (cpuRow == 0) then
462          cg(:, 1:spacedim*blockdim:spacedim) = cg(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
463          ghc(:, 1:spacedim*blockdim:spacedim) = ghc(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
464        end if
465      end if
466    end if
467    if(l_paw) then
468      call xgBlock_scale(BX,sqrt2,1)
469      if(l_gs_hamk%gpu_option==ABI_GPU_OPENMP) then
470 #ifdef HAVE_OPENMP_OFFLOAD
471        if (l_paral_kgb == 0) then
472          if(l_mpi_enreg%me_g0 == 1) then
473            !$OMP TARGET MAP(gsc)
474            gsc(:, 1:spacedim*blockdim:l_npw) = gsc(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
475            !$OMP END TARGET
476          end if
477        else
478          if (cpuRow == 0) then
479            !$OMP TARGET MAP(gsc)
480            gsc(:, 1:spacedim*blockdim:spacedim) = gsc(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
481            !$OMP END TARGET
482          end if
483        end if
484 #endif
485      else
486        if (l_paral_kgb == 0) then
487          if(l_mpi_enreg%me_g0 == 1) gsc(:, 1:spacedim*blockdim:l_npw) = gsc(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
488        else
489          if (cpuRow == 0) gsc(:, 1:spacedim*blockdim:spacedim) = gsc(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
490        end if
491      end if
492    end if ! l_paw
493  end if ! l_istwf==2
494 
495  if ( .not. l_paw ) call xgBlock_copy(X,BX)
496 
497 end subroutine getghc_gsc1
498 
499  subroutine precond(W)
500    use m_xg, only : xg_t, xgBlock_colwiseMul
501    type(xgBlock_t), intent(inout) :: W
502    integer :: ispinor
503    !integer :: cplx
504 
505    ! precondition resid_vec
506    do ispinor = 1,l_nspinor
507      !do cplx = 1, l_icplx
508      call xgBlock_colwiseMul(W,l_pcon,l_icplx*l_npw*(ispinor-1))
509       !end do
510    end do
511 
512  end subroutine precond
513 
514  end module m_lobpcgwf