./79_seqpar_mpi/m_chebfiwf.F90

ABINIT/m_chebfiwf
m_chebfiwf/chebfiwf2
m_chebfiwf/getBm1X
m_chebfiwf/getghc_gsc1
m_chebfiwf/precond1

ABINIT/m_chebfiwf [ Functions ]

NAME

 m_chebfiwf

FUNCTION

 This module contains a routine updating the whole wave functions at a given k-point,
 using the Chebyshev filtering method (2021 implementation using xG abstraction layer)
 for a given spin-polarization, from a fixed hamiltonian
 but might also simply compute eigenvectors and eigenvalues at this k point.
 it will also update the matrix elements of the hamiltonian.

 Copyright (C) 2018-2024 ABINIT group (BS)
 This file is distributed under the terms of the
 gnu general public license, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 for the initials of contributors, see ~abinit/doc/developers/contributors.txt .

SOURCE

 21 #if defined HAVE_CONFIG_H
 22 #include "config.h"
 23 #endif
 24 
 25 #include "abi_common.h"
 26 
 27 ! nvtx related macro definition
 28 #include "nvtx_macros.h"
 29 
 30 module m_chebfiwf
 31 
 32  use defs_abitypes
 33  use defs_basis
 34  use m_abicore
 35  use m_errors
 36  use m_fstrings
 37  use m_time
 38 
 39  use m_chebfi
 40  use m_chebfi2
 41  use m_invovl
 42 
 43  use m_cgtools,     only : dotprod_g
 44  use m_dtset,       only : dataset_type
 45 
 46  use m_hamiltonian, only : gs_hamiltonian_type
 47  use m_pawcprj,     only : pawcprj_type
 48  use m_nonlop,      only : nonlop
 49  use m_prep_kgb,    only : prep_getghc, prep_nonlop
 50  use m_pawcprj,     only : pawcprj_type, pawcprj_alloc, pawcprj_free
 51  use m_getghc,      only : multithreaded_getghc
 52  use m_gemm_nonlop, only : gemm_nonlop_use_gemm
 53 
 54  use m_xg
 55  use m_xgTransposer
 56 
 57  !FIXME Keep those in these modules or moves them together ?
 58  use m_invovl,             only : invovl_ompgpu_static_mem,invovl_ompgpu_work_mem
 59  use m_gemm_nonlop_ompgpu, only : gemm_nonlop_ompgpu_static_mem
 60  use m_getghc_ompgpu,      only : getghc_ompgpu_work_mem
 61 
 62 #if defined(HAVE_GPU)
 63  use m_gpu_toolbox
 64 #endif
 65 
 66 #if defined(HAVE_GPU) && defined(HAVE_GPU_MARKERS)
 67  use m_nvtx_data
 68 #endif
 69 
 70 #if defined(HAVE_GPU)
 71  use m_gpu_toolbox
 72 #endif
 73 
 74 #if defined(HAVE_YAKL)
 75  use gator_mod
 76 #endif
 77 
 78  use, intrinsic :: iso_c_binding, only: c_associated,c_loc,c_ptr,c_f_pointer,c_double,c_size_t
 79 
 80  use m_xmpi
 81  use m_xomp
 82 #ifdef HAVE_OPENMP
 83  use omp_lib
 84 #endif
 85 
 86  implicit none
 87 
 88  private
 89 
 90  integer, parameter :: l_tim_getghc=7
 91  real(dp), parameter :: inv_sqrt2 = 1/sqrt2
 92 
 93 ! For use in getghc_gsc1
 94  integer, save :: l_cpopt
 95  integer, save :: l_icplx
 96  integer, save :: l_istwf
 97  integer, save :: l_npw
 98  integer, save :: l_nband_filter
 99  integer, save :: l_nspinor
100  logical, save :: l_paw
101  integer, save :: l_prtvol
102  integer, save :: l_sij_opt
103  integer, save :: l_paral_kgb
104  integer, save :: l_useria
105  integer, save :: l_block_sliced
106 
107 #if defined HAVE_GPU && defined HAVE_YAKL
108  real(kind=c_double), ABI_CONTIGUOUS pointer, save :: l_pcon(:)
109 #else
110  real(dp),            allocatable,            save :: l_pcon(:)
111 #endif
112 
113  type(mpi_type),pointer,save :: l_mpi_enreg
114  type(gs_hamiltonian_type),pointer,save :: l_gs_hamk
115 
116  integer, parameter :: DEBUG_ROWS = 5
117  integer, parameter :: DEBUG_COLUMNS = 5
118 
119  public :: chebfiwf2_blocksize
120  public :: chebfiwf2
121 
122  CONTAINS  !========================================================================================

m_chebfiwf/chebfiwf2 [ Functions ]

[ Top ] [ m_chebfiwf ] [ Functions ]

NAME

 chebfiwf2

FUNCTION

 This routine updates the whole wave functions set at a given k-point,
 using the Chebfi method (2021 version using xG abstraction layer)

INPUTS

  dtset= input variables for this dataset
  kinpw(npw)= kinetic energy for each plane wave (Hartree)
  mpi_enreg= MPI-parallelisation information
  nband= number of bands at this k point
  npw= number of plane waves at this k point
  nspinor= number of spinorial components of the wavefunctions
  prtvol= control print volume and debugging

OUTPUT

  eig(nband)= eigenvalues (hartree) for all bands
  enl_out(nband)= contribution of each band to the nl part of energy
  resid(nband)= residuals for each band

SIDE EFFECTS

  cg(2,npw*nspinor*nband)= planewave coefficients of wavefunctions
  gs_hamk <type(gs_hamiltonian_type)>=all data for the hamiltonian at k

SOURCE

245 subroutine chebfiwf2(cg,dtset,eig,enl_out,gs_hamk,kinpw,mpi_enreg,&
246 &                    nband,npw,nspinor,prtvol,resid)
247 
248  implicit none
249 
250  ! Arguments ------------------------------------
251  integer,intent(in) :: nband,npw,prtvol,nspinor
252  type(mpi_type),target,intent(in) :: mpi_enreg
253  real(dp),target,intent(inout) :: cg(2,npw*nspinor*nband)
254  real(dp),intent(in) :: kinpw(npw)
255  real(dp),target,intent(out) :: resid(nband)
256  real(dp),intent(out) :: enl_out(nband)
257  real(dp),target,intent(out) :: eig(nband)
258  type(dataset_type),intent(in) :: dtset
259  type(gs_hamiltonian_type),target,intent(inout) :: gs_hamk
260 
261  ! Local variables-------------------------------
262  ! scalars
263  integer, parameter :: tim_chebfiwf2 = 1750
264  integer :: ipw,space,blockdim,nline,total_spacedim,ierr
265  real(dp) :: localmem
266  type(c_ptr) :: cptr
267  type(chebfi_t) :: chebfi
268  type(xgBlock_t) :: xgx0,xgeigen,xgresidu
269  ! arrays
270  real(dp) :: tsec(2),chebfiMem(2)
271  real(dp),pointer :: eig_ptr(:,:) => NULL()
272  real(dp),pointer :: resid_ptr(:,:) => NULL()
273  real(dp), allocatable :: l_gvnlxc(:,:)
274 
275  ! Stupid things for NC
276  integer,parameter :: choice=1, paw_opt=0, signs=1
277  real(dp) :: gsc_dummy(1,1)
278  type(pawcprj_type) :: cprj_dum(gs_hamk%natom,1)
279 
280 #if defined(HAVE_GPU_CUDA) && defined(HAVE_YAKL)
281  integer(kind=c_size_t) :: l_pcon_size_bytes
282 #endif
283 
284 ! *********************************************************************
285 
286 !################ INITIALIZATION  #####################################
287 !######################################################################
288 
289   call timab(tim_chebfiwf2,1,tsec)
290 
291 !Set module variables
292  l_paw = (gs_hamk%usepaw==1)
293  l_cpopt=-1;l_sij_opt=0;if (l_paw) l_sij_opt=1
294  l_istwf=gs_hamk%istwf_k
295  l_npw = npw
296  l_nspinor = nspinor
297  l_prtvol = prtvol
298  l_mpi_enreg => mpi_enreg
299  l_gs_hamk => gs_hamk
300  l_nband_filter = nband
301  l_paral_kgb = dtset%paral_kgb
302  l_block_sliced = dtset%invovl_blksliced
303 
304 !Variables
305  nline=dtset%nline
306  blockdim=l_mpi_enreg%nproc_band*l_mpi_enreg%bandpp
307  !for debug
308  l_useria=dtset%useria
309 
310 !Depends on istwfk
311  if ( l_istwf == 2 ) then ! Real only
312    ! SPACE_CR mean that we have complex numbers but no re*im terms only re*re
313    ! and im*im so that a vector of complex is consider as a long vector of real
314    ! therefore the number of data is (2*npw*nspinor)*nband
315    ! This space is completely equivalent to SPACE_R but will correctly set and
316    ! get the array data into the xgBlock
317    space = SPACE_CR
318    l_icplx = 2
319  else ! complex
320    space = SPACE_C
321    l_icplx = 1
322  end if
323 
324 !Memory info
325  if ( prtvol >= 3 ) then
326    if (l_mpi_enreg%paral_kgb == 1) then
327      total_spacedim = l_icplx*l_npw*l_nspinor
328      call xmpi_sum(total_spacedim,l_mpi_enreg%comm_bandspinorfft,ierr)
329    else
330      total_spacedim = 0
331    end if
332    chebfiMem = chebfi_memInfo(nband,l_icplx*l_npw*l_nspinor,space,l_mpi_enreg%paral_kgb, &
333 &                             total_spacedim,l_mpi_enreg%bandpp) !blockdim
334    localMem = (l_npw+2*l_npw*l_nspinor+2*nband)*kind(1.d0) !blockdim
335    write(std_out,'(1x,A,F10.6,1x,A)') "Each MPI process calling chebfi should need around ", &
336    (localMem+sum(chebfiMem))/1e9,"GB of peak memory as follows :"
337    write(std_out,'(4x,A,F10.6,1x,A)') "Permanent memory in chebfiwf : ",(localMem)/1e9,"GB"
338    write(std_out,'(4x,A,F10.6,1x,A)') "Permanent memory in m_chebfi : ",(chebfiMem(1))/1e9,"GB"
339    write(std_out,'(4x,A,F10.6,1x,A)') "Temporary memory in m_chebfi : ",(chebfiMem(2))/1e9,"GB"
340  end if
341 
342  !For preconditionning
343  if(dtset%gpu_option==ABI_GPU_KOKKOS) then
344 #if defined HAVE_GPU && defined HAVE_YAKL
345    ABI_MALLOC_MANAGED(l_pcon, (/l_icplx*npw/))
346 #endif
347  else
348    ABI_MALLOC(l_pcon,(1:l_icplx*npw))
349  end if
350 
351 !$omp parallel do schedule(static), shared(l_pcon,kinpw)
352  do ipw=1-1,l_icplx*npw-1
353    if(kinpw(ipw/l_icplx+1)>huge(zero)*1.d-11) then
354      l_pcon(ipw+1)=0.d0
355    else
356      l_pcon(ipw+1) = (27+kinpw(ipw/l_icplx+1)*(18+kinpw(ipw/l_icplx+1)*(12+8*kinpw(ipw/l_icplx+1)))) &
357 &    / (27+kinpw(ipw/l_icplx+1)*(18+kinpw(ipw/l_icplx+1)*(12+8*kinpw(ipw/l_icplx+1))) + 16*kinpw(ipw/l_icplx+1)**4)
358    end if
359  end do
360 
361 #if defined(HAVE_GPU_CUDA) && defined(HAVE_YAKL)
362  if(l_gs_hamk%gpu_option==ABI_GPU_KOKKOS) then
363    ! upload l_pcon to device / gpu
364    l_pcon_size_bytes =l_icplx * npw * dp
365    call gpu_data_prefetch_async(C_LOC(l_pcon), l_pcon_size_bytes)
366  end if
367 #endif
368 
369  call xgBlock_map(xgx0,cg,space,l_icplx*l_npw*l_nspinor,nband,l_mpi_enreg%comm_bandspinorfft,gpu_option=dtset%gpu_option)
370 
371  ABI_NVTX_START_RANGE(NVTX_CHEBFI2_SQRT2)
372  if ( l_istwf == 2 ) then ! Real only
373    ! Scale cg
374    call xgBlock_scale(xgx0,sqrt2,1)  !ALL MPI processes do this
375 
376    ! This is possible since the memory in cg and xgx0 is the same
377    ! Don't know yet how to deal with this with xgBlock
378    !MPI HANDLES THIS AUTOMATICALLY (only proc 0 is me_g0)
379    if(l_mpi_enreg%me_g0 == 1) cg(:, 1:npw*nspinor*nband:npw) = cg(:, 1:npw*nspinor*nband:npw) * inv_sqrt2
380  end if
381  ABI_NVTX_END_RANGE()
382 
383 
384 #ifdef HAVE_OPENMP_OFFLOAD
385  !$OMP TARGET ENTER DATA MAP(to:cg,eig,resid) IF(gs_hamk%gpu_option==ABI_GPU_OPENMP)
386 #endif
387 
388 !Trick with C is to change rank of arrays (:) to (:,:)
389  cptr = c_loc(eig)
390  call c_f_pointer(cptr,eig_ptr,(/ nband,1 /))
391  call xgBlock_map(xgeigen,eig_ptr,SPACE_R,nband,1,l_mpi_enreg%comm_bandspinorfft,gpu_option=dtset%gpu_option)
392 !Trick the with C to change rank of arrays (:) to (:,:)
393  cptr = c_loc(resid)
394  call c_f_pointer(cptr,resid_ptr,(/ nband,1 /))
395  call xgBlock_map(xgresidu,resid_ptr,SPACE_R,nband,1,l_mpi_enreg%comm_bandspinorfft,gpu_option=dtset%gpu_option)
396 
397 ! ABI_MALLOC(l_gvnlxc,(2,l_npw*l_nspinor*l_nband_filter))
398  call timab(tim_chebfiwf2,2,tsec)
399 
400  ABI_NVTX_START_RANGE(NVTX_CHEBFI2_INIT)
401  call chebfi_init(chebfi,nband,l_icplx*l_npw*l_nspinor,dtset%tolwfr_diago,dtset%ecut, &
402 &                 dtset%paral_kgb,l_mpi_enreg%bandpp, &
403 &                 nline, space,1,l_gs_hamk%istwf_k, &
404 &                 l_mpi_enreg%comm_bandspinorfft,l_mpi_enreg%me_g0,l_paw,&
405 &                 l_mpi_enreg%comm_spinorfft,l_mpi_enreg%comm_band,&
406 &                 l_gs_hamk%gpu_option,gpu_kokkos_nthrd=dtset%gpu_kokkos_nthrd)
407  ABI_NVTX_END_RANGE()
408 
409 !################    RUUUUUUUN    #####################################
410 !######################################################################
411 
412  call chebfi_run(chebfi,xgx0,getghc_gsc1,getBm1X,precond1,xgeigen,xgresidu,nspinor)
413 
414 !Free preconditionning since not needed anymore
415  if(dtset%gpu_option==ABI_GPU_KOKKOS) then
416 #if defined HAVE_GPU && defined HAVE_YAKL
417    ABI_FREE_MANAGED(l_pcon)
418 #endif
419  else
420    ABI_FREE(l_pcon)
421  end if
422 
423 !Compute enlout (nonlocal energy for each band if necessary) This is the best
424 !  quick and dirty trick to compute this part in NC. gvnlc cannot be part of
425 !  chebfi algorithm
426  if ( .not. l_paw ) then
427    !Check l_gvnlc size
428    !if ( size(l_gvnlxc) < 2*nband*l_npw*l_nspinor ) then
429    !if ( size(l_gvnlxc) /= 0 ) then
430    !  ABI_FREE(l_gvnlxc)
431 #ifdef FC_CRAY
432    ABI_MALLOC(l_gvnlxc,(1,1))
433 #else
434    ABI_MALLOC(l_gvnlxc,(0,0))
435 #endif
436    !end if
437 
438    ABI_NVTX_START_RANGE(NVTX_CHEBFI2_NONLOP)
439    !Call nonlop
440    call nonlop(choice,l_cpopt,cprj_dum,enl_out,l_gs_hamk,0,eig,mpi_enreg,nband,1,paw_opt,&
441         &            signs,gsc_dummy,l_tim_getghc,cg,l_gvnlxc)
442    ABI_NVTX_END_RANGE()
443    ABI_FREE(l_gvnlxc)
444  end if
445 
446 !Free chebfi
447  call chebfi_free(chebfi)
448 
449 #ifdef HAVE_OPENMP_OFFLOAD
450  !$OMP TARGET UPDATE FROM(cg,eig,resid) IF(gs_hamk%gpu_option==ABI_GPU_OPENMP)
451  !$OMP TARGET EXIT DATA MAP(delete:cg,eig,resid) IF(gs_hamk%gpu_option==ABI_GPU_OPENMP)
452 #endif
453 !################    SORRY IT'S ALREADY FINISHED : )  #################
454 !######################################################################
455 
456  call timab(tim_chebfiwf2,2,tsec)
457 
458  DBG_EXIT("COLL")
459 
460 end subroutine chebfiwf2

m_chebfiwf/getBm1X [ Functions ]

[ Top ] [ m_chebfiwf ] [ Functions ]

NAME

 getBm1X

FUNCTION

 This routine computes S^-1|C> for a given wave function C.
  It acts as a driver for apply_invovl.

SIDE EFFECTS

  X  <type(xgBlock_t)>= memory block containing |C>
  Bm1X <type(xgBlock_t)>= memory block containing S^-1|C>
  transposer <type(xgTransposer_t)>= data used for array transpositions

SOURCE

646 subroutine getBm1X(X,Bm1X,transposer)
647 
648  implicit none
649 
650 !Arguments ------------------------------------
651  type(xgBlock_t), intent(inout) :: X
652  type(xgBlock_t), intent(inout) :: Bm1X
653  type(xgTransposer_t), optional, intent(inout) :: transposer
654 
655 !Local variables-------------------------------
656 !scalars
657  integer :: blockdim
658  integer :: spacedim
659  integer :: cpuRow
660 !arrays
661  real(dp), pointer :: ghc_filter(:,:)
662  real(dp), pointer :: gsm1hc_filter(:,:)
663  type(pawcprj_type), allocatable :: cwaveprj_next(:,:) !dummy
664 
665 ! *********************************************************************
666 
667  call xgBlock_getSize(X,spacedim,blockdim)
668 
669  spacedim = spacedim/l_icplx
670 
671  call xgBlock_reverseMap(X,ghc_filter,l_icplx,spacedim*blockdim)
672 
673  call xgBlock_reverseMap(Bm1X,gsm1hc_filter,l_icplx,spacedim*blockdim)
674 
675  if (l_paral_kgb == 1) cpuRow = xgTransposer_getRank(transposer, 2)
676 
677  !scale back cg
678  if(l_istwf == 2) then
679    call xgBlock_scale(X,inv_sqrt2,1)
680    if(l_gs_hamk%gpu_option==ABI_GPU_OPENMP) then
681 #ifdef HAVE_OPENMP_OFFLOAD
682      if (l_paral_kgb == 0 ) then
683        if(l_mpi_enreg%me_g0 == 1) then
684          !$OMP TARGET MAP(to:ghc_filter)
685          ghc_filter(:, 1:spacedim*blockdim:l_npw) = ghc_filter(:, 1:spacedim*blockdim:l_npw) * sqrt2
686          !$OMP END TARGET
687        end if
688      else
689        if (cpuRow == 0) then
690          !$OMP TARGET MAP(to:ghc_filter)
691          ghc_filter(:, 1:spacedim*blockdim:spacedim) = ghc_filter(:, 1:spacedim*blockdim:spacedim) * sqrt2
692          !$OMP END TARGET
693        end if
694      end if
695 #endif
696    else
697      if (l_paral_kgb == 0) then
698        if(l_mpi_enreg%me_g0 == 1) ghc_filter(:, 1:spacedim*blockdim:l_npw) = ghc_filter(:, 1:spacedim*blockdim:l_npw) * sqrt2
699      else
700        if (cpuRow == 0) then
701          ghc_filter(:, 1:spacedim*blockdim:spacedim) = ghc_filter(:, 1:spacedim*blockdim:spacedim) * sqrt2
702        end if
703      end if
704    end if
705 
706    if(l_paw) then
707      call xgBlock_scale(Bm1X,inv_sqrt2,1)
708      if(l_gs_hamk%gpu_option==ABI_GPU_OPENMP) then
709 #ifdef HAVE_OPENMP_OFFLOAD
710        if (l_paral_kgb == 0) then
711          if(l_mpi_enreg%me_g0 == 1) then
712            !$OMP TARGET MAP(to:gsm1hc_filter)
713            gsm1hc_filter(:, 1:spacedim*blockdim:l_npw) = gsm1hc_filter(:, 1:spacedim*blockdim:l_npw) * sqrt2
714            !$OMP END TARGET
715          end if
716        else
717          if (cpuRow == 0) then
718            !$OMP TARGET MAP(to:gsm1hc_filter)
719            gsm1hc_filter(:, 1:spacedim*blockdim:spacedim) = gsm1hc_filter(:, 1:spacedim*blockdim:spacedim) * sqrt2
720            !$OMP END TARGET
721          end if
722        end if
723 #endif
724      else
725        if (l_paral_kgb == 0) then
726          if(l_mpi_enreg%me_g0 == 1) &
727   &        gsm1hc_filter(:, 1:spacedim*blockdim:l_npw) = gsm1hc_filter(:, 1:spacedim*blockdim:l_npw) * sqrt2
728        else
729          if (cpuRow == 0) then
730            gsm1hc_filter(:, 1:spacedim*blockdim:spacedim) = gsm1hc_filter(:, 1:spacedim*blockdim:spacedim) * sqrt2
731          end if
732        end if
733      end if
734    end if
735  end if
736 
737  if(l_paw) then
738    !cwaveprj_next is dummy
739    if(gemm_nonlop_use_gemm) then
740      ABI_MALLOC(cwaveprj_next, (1,1))
741    else
742      ABI_MALLOC(cwaveprj_next, (l_gs_hamk%natom,l_nspinor*blockdim))
743      call pawcprj_alloc(cwaveprj_next,0,l_gs_hamk%dimcprj)
744    end if
745 
746    ABI_NVTX_START_RANGE(NVTX_INVOVL)
747    call apply_invovl(l_gs_hamk, ghc_filter(:,:), gsm1hc_filter(:,:), cwaveprj_next(:,:), &
748        spacedim/l_nspinor, blockdim, l_mpi_enreg, l_nspinor, l_block_sliced)
749    ABI_NVTX_END_RANGE()
750  else
751    gsm1hc_filter(:,:) = ghc_filter(:,:)
752  end if
753 
754  ABI_NVTX_START_RANGE(NVTX_INVOVL_POST1)
755  !Scale cg, ghc, gsc
756  if ( l_istwf == 2 ) then
757    call xgBlock_scale(X,sqrt2,1)
758    if(l_gs_hamk%gpu_option==ABI_GPU_OPENMP) then
759 #ifdef HAVE_OPENMP_OFFLOAD
760      if (l_paral_kgb == 0) then
761        if(l_mpi_enreg%me_g0 == 1) then
762          !$OMP TARGET MAP(to:ghc_filter)
763          ghc_filter(:, 1:spacedim*blockdim:l_npw) = ghc_filter(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
764          !$OMP END TARGET
765        endif
766      else
767        if (cpuRow == 0) then
768          !$OMP TARGET MAP(to:ghc_filter)
769          ghc_filter(:, 1:spacedim*blockdim:spacedim) = ghc_filter(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
770          !$OMP END TARGET
771        end if
772      end if
773 #endif
774    else
775      if (l_paral_kgb == 0) then
776        if(l_mpi_enreg%me_g0 == 1) then
777          ghc_filter(:, 1:spacedim*blockdim:l_npw) = ghc_filter(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
778        endif
779      else
780        if (cpuRow == 0) then
781          ghc_filter(:, 1:spacedim*blockdim:spacedim) = ghc_filter(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
782        end if
783      end if
784    end if
785 
786    if(l_paw) then
787      call xgBlock_scale(Bm1X,sqrt2,1)
788      if(l_gs_hamk%gpu_option==ABI_GPU_OPENMP) then
789 #ifdef HAVE_OPENMP_OFFLOAD
790        if (l_paral_kgb == 0) then
791          if(l_mpi_enreg%me_g0 == 1) then
792            !$OMP TARGET MAP(to:gsm1hc_filter)
793            gsm1hc_filter(:, 1:spacedim*blockdim:l_npw) = gsm1hc_filter(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
794            !$OMP END TARGET
795          end if
796        else
797          if (cpuRow == 0) then
798            !$OMP TARGET MAP(to:gsm1hc_filter)
799            gsm1hc_filter(:, 1:spacedim*blockdim:spacedim) = gsm1hc_filter(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
800            !$OMP END TARGET
801          end if
802        end if
803 #endif
804      else
805        if (l_paral_kgb == 0) then
806          if(l_mpi_enreg%me_g0 == 1) &
807   &        gsm1hc_filter(:, 1:spacedim*blockdim:l_npw) = gsm1hc_filter(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
808        else
809          if (cpuRow == 0) then
810            gsm1hc_filter(:, 1:spacedim*blockdim:spacedim) = gsm1hc_filter(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
811          end if
812        end if
813      end if
814    end if
815  end if
816 
817  if (l_paw) then
818    call pawcprj_free(cwaveprj_next)
819    ABI_FREE(cwaveprj_next)
820  end if
821 
822  ABI_NVTX_END_RANGE()
823 
824 end subroutine getBm1X

m_chebfiwf/getghc_gsc1 [ Functions ]

[ Top ] [ m_chebfiwf ] [ Functions ]

NAME

 getghc_gsc1

FUNCTION

 This routine computes H|C> and possibly S|C> for a given wave function C.
  It acts as a driver for getghc, taken into account parallelism, multithreading, etc.

SIDE EFFECTS

  X  <type(xgBlock_t)>= memory block containing |C>
  AX <type(xgBlock_t)>= memory block containing H|C>
  BX <type(xgBlock_t)>= memory block containing S|C>
  transposer <type(xgTransposer_t)>= data used for array transpositions

SOURCE

481 subroutine getghc_gsc1(X,AX,BX,transposer)
482 
483  implicit none
484 
485 !Arguments ------------------------------------
486  type(xgBlock_t), intent(inout) :: X
487  type(xgBlock_t), intent(inout) :: AX
488  type(xgBlock_t), intent(inout) :: BX
489  type(xgTransposer_t), optional, intent(inout) :: transposer
490  integer         :: blockdim
491  integer         :: spacedim
492  type(pawcprj_type) :: cprj_dum(l_gs_hamk%natom,1)
493 
494 !Local variables-------------------------------
495 !scalars
496  integer :: cpuRow
497  real(dp) :: eval
498 !arrays
499  real(dp), pointer :: cg(:,:)
500  real(dp), pointer :: ghc(:,:)
501  real(dp), pointer :: gsc(:,:)
502  real(dp)          :: l_gvnlxc(1,1)
503 
504 ! *********************************************************************
505 
506  call xgBlock_getSize(X,spacedim,blockdim)
507 
508  spacedim = spacedim/l_icplx
509 
510  call xgBlock_reverseMap(X,cg,l_icplx,spacedim*blockdim)
511  call xgBlock_reverseMap(AX,ghc,l_icplx,spacedim*blockdim)
512  call xgBlock_reverseMap(BX,gsc,l_icplx,spacedim*blockdim)
513 
514  !Scale back cg
515  if (l_paral_kgb == 1) cpuRow = xgTransposer_getRank(transposer, 2)
516  if(l_istwf == 2) then
517    call xgBlock_scale(X,inv_sqrt2,1)
518    if(l_gs_hamk%gpu_option==ABI_GPU_OPENMP) then
519 #ifdef HAVE_OPENMP_OFFLOAD
520      if (l_paral_kgb == 0) then
521        if(l_mpi_enreg%me_g0 == 1) then
522          !$OMP TARGET MAP(cg)
523          cg(:, 1:spacedim*blockdim:l_npw) = cg(:, 1:spacedim*blockdim:l_npw) * sqrt2
524          !$OMP END TARGET
525        end if
526      else
527        if (cpuRow == 0) then
528          !$OMP TARGET MAP(cg)
529          cg(:, 1:spacedim*blockdim:spacedim) = cg(:, 1:spacedim*blockdim:spacedim) * sqrt2
530          !$OMP END TARGET
531        end if
532      end if
533 #endif
534    else
535      if (l_paral_kgb == 0) then
536        if(l_mpi_enreg%me_g0 == 1) cg(:, 1:spacedim*blockdim:l_npw) = cg(:, 1:spacedim*blockdim:l_npw) * sqrt2
537      else
538        if (cpuRow == 0) cg(:, 1:spacedim*blockdim:spacedim) = cg(:, 1:spacedim*blockdim:spacedim) * sqrt2
539      end if
540    end if
541  end if
542 
543  !if ( size(l_gvnlxc) < 2*blockdim*spacedim ) then
544  !  ABI_FREE(l_gvnlxc)
545  !  ABI_MALLOC(l_gvnlxc,(2,blockdim*spacedim))
546  !end if
547 
548  call multithreaded_getghc(l_cpopt,cg,cprj_dum,ghc,gsc,&
549    l_gs_hamk,l_gvnlxc,eval,l_mpi_enreg,blockdim,l_prtvol,l_sij_opt,l_tim_getghc,0)
550 
551 
552 #if defined(HAVE_GPU_CUDA) && defined(HAVE_YAKL)
553  call gpu_device_synchronize()
554 #endif
555 
556  !Scale cg, ghc, gsc
557  if ( l_istwf == 2 ) then
558    call xgBlock_scale(X ,sqrt2,1)
559    call xgBlock_scale(AX,sqrt2,1)
560 
561    if(l_gs_hamk%gpu_option==ABI_GPU_OPENMP) then
562 #ifdef HAVE_OPENMP_OFFLOAD
563      if (l_paral_kgb == 0) then
564        if(l_mpi_enreg%me_g0 == 1) then
565          !$OMP TARGET MAP(cg)
566          cg(:, 1:spacedim*blockdim:l_npw) = cg(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
567          !$OMP END TARGET
568          !$OMP TARGET MAP(ghc)
569          ghc(:, 1:spacedim*blockdim:l_npw) = ghc(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
570          !$OMP END TARGET
571        endif
572      else
573        if (cpuRow == 0) then
574          !$OMP TARGET MAP(cg)
575          cg(:, 1:spacedim*blockdim:spacedim) = cg(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
576          !$OMP END TARGET
577          !$OMP TARGET MAP(ghc)
578          ghc(:, 1:spacedim*blockdim:spacedim) = ghc(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
579          !$OMP END TARGET
580        end if
581      end if
582 #endif
583    else
584      if (l_paral_kgb == 0) then
585        if(l_mpi_enreg%me_g0 == 1) then
586          cg(:, 1:spacedim*blockdim:l_npw) = cg(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
587          ghc(:, 1:spacedim*blockdim:l_npw) = ghc(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
588        endif
589      else
590        if (cpuRow == 0) then
591          cg(:, 1:spacedim*blockdim:spacedim) = cg(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
592          ghc(:, 1:spacedim*blockdim:spacedim) = ghc(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
593        end if
594      end if
595    end if
596    if(l_paw) then
597      call xgBlock_scale(BX,sqrt2,1)
598      if(l_gs_hamk%gpu_option==ABI_GPU_OPENMP) then
599 #ifdef HAVE_OPENMP_OFFLOAD
600        if (l_paral_kgb == 0) then
601          if(l_mpi_enreg%me_g0 == 1) then
602            !$OMP TARGET MAP(gsc)
603            gsc(:, 1:spacedim*blockdim:l_npw) = gsc(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
604            !$OMP END TARGET
605          end if
606        else
607          if (cpuRow == 0) then
608            !$OMP TARGET MAP(gsc)
609            gsc(:, 1:spacedim*blockdim:spacedim) = gsc(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
610            !$OMP END TARGET
611          end if
612        end if
613 #endif
614      else
615        if (l_paral_kgb == 0) then
616          if(l_mpi_enreg%me_g0 == 1) gsc(:, 1:spacedim*blockdim:l_npw) = gsc(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
617        else
618          if (cpuRow == 0) gsc(:, 1:spacedim*blockdim:spacedim) = gsc(:, 1:spacedim*blockdim:spacedim) * inv_sqrt2
619        end if
620      end if
621    end if ! l_paw
622  end if ! l_istwf==2
623 
624  if ( .not. l_paw ) call xgBlock_copy(X,BX)
625 
626 end subroutine getghc_gsc1

m_chebfiwf/precond1 [ Functions ]

[ Top ] [ m_chebfiwf ] [ Functions ]

NAME

 precond1

FUNCTION

 This routine applies a preconditionning to a block of memory

INPUTS

 [gpu_option] = GPU implementation to use, i.e. cuda, openMP, ... (0=not using GPU)

SIDE EFFECTS

  W <type(xgBlock_t)>= memory block

SOURCE

843 subroutine precond1(W)
844 
845  implicit none
846 
847  ! Arguments ------------------------------------
848  type(xgBlock_t), intent(inout)           :: W
849 
850 
851  ! Local variables-------------------------------
852  ! scalars
853  integer :: ispinor
854 
855  ! *********************************************************************
856 
857  ! Precondition resid_vec
858  do ispinor = 1,l_nspinor
859    call xgBlock_colwiseMul(W, l_pcon, l_icplx*l_npw*(ispinor-1))
860  end do
861 
862 end subroutine precond1

TABLE OF CONTENTS

ABINIT/m_chebfiwf [ Functions ]

m_chebfiwf/chebfiwf2 [ Functions ]

m_chebfiwf/getBm1X [ Functions ]

m_chebfiwf/getghc_gsc1 [ Functions ]

m_chebfiwf/precond1 [ Functions ]