TABLE OF CONTENTS


ABINIT/m_opernlc_ylm_ompgpu [ Modules ]

[ Top ] [ Modules ]

NAME

  m_opernlc_ylm_ompgpu

FUNCTION

COPYRIGHT

  Copyright (C) 2008-2024 ABINIT group (MT)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

PARENTS

CHILDREN

SOURCE

19 #if defined HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include "abi_common.h"
24 
25 module m_opernlc_ylm_ompgpu
26 
27  use defs_basis
28  use m_errors
29  use m_abicore
30  use m_xmpi
31 
32  use defs_abitypes, only : MPI_type
33 
34  implicit none
35 
36  private

ABINIT/opernlc_ylm_ompgpu [ Functions ]

[ Top ] [ Functions ]

NAME

 opernlc_ylm_ompgpu

FUNCTION

 * Operate with the non-local part of the hamiltonian,
   in order to reduce projected scalars
 * Operate with the non-local projectors and the overlap matrix,
   in order to reduce projected scalars

INPUTS

  atindx1(natom)=index table for atoms (gives the absolute index of
                 an atom from its rank in a block of atoms)
  cplex=1 if <p_lmn|c> scalars are real (equivalent to istwfk>1)
        2 if <p_lmn|c> scalars are complex
  cplex_dgxdt(ndgxdt) = used only when cplex = 1
             cplex_dgxdt(i) = 1 if dgxdt(1,i,:,:)   is real, 2 if it is pure imaginary
  cplex_enl=1 if enl factors are real, 2 if they are complex
  cplex_fac=1 if gxfac scalars are real, 2 if gxfac scalars are complex
  dgxdt(cplex,ndgxdt,nlmn,nincat)=grads of projected scalars (only if optder>0)
  dimenl1,dimenl2=dimensions of enl (see enl)
  dimekbq=1 if enl factors do not contain a exp(-iqR) phase, 2 is they do
  enl(cplex_enl*dimenl1,dimenl2,nspinortot**2,dimekbq)=
  ->Norm conserving : ==== when paw_opt=0 ====
                      (Real) Kleinman-Bylander energies (hartree)
                      dimenl1=lmnmax  -  dimenl2=ntypat
                      dimekbq is 2 if Enl contains a exp(-iqR) phase, 1 otherwise
  ->PAW :             ==== when paw_opt=1, 2 or 4 ====
                      (Real or complex, hermitian) Dij coefs to connect projectors
                      dimenl1=cplex_enl*lmnmax*(lmnmax+1)/2  -  dimenl2=natom
                      These are complex numbers if cplex_enl=2
                        enl(:,:,1) contains Dij^up-up
                        enl(:,:,2) contains Dij^dn-dn
                        enl(:,:,3) contains Dij^up-dn (only if nspinor=2)
                        enl(:,:,4) contains Dij^dn-up (only if nspinor=2)
                      dimekbq is 2 if Dij contains a exp(-iqR) phase, 1 otherwise
  gx(cplex,nlmn,nincat*abs(enl_opt))= projected scalars
  iatm=absolute rank of first atom of the current block of atoms
  indlmn(6,nlmn)= array giving l,m,n,lm,ln,s for i=lmn
  itypat=type of atoms
  lambda=factor to be used when computing (Vln-lambda.S) - only for paw_opt=2
  mpi_enreg=information about MPI parallelization
  natom=number of atoms in cell
  ndgxdt=second dimension of dgxdt
  ndgxdtfac=second dimension of dgxdtfac
  nincat=number of atoms in the subset here treated
  nlmn=number of (l,m,n) numbers for current type of atom
  nspinor= number of spinorial components of the wavefunctions (on current proc)
  nspinortot=total number of spinorial components of the wavefunctions
  optder=0=only gxfac is computed, 1=both gxfac and dgxdtfac are computed
         2=gxfac, dgxdtfac and d2gxdtfac are computed
  paw_opt= define the nonlocal operator concerned with:
           paw_opt=0 : Norm-conserving Vnl (use of Kleinman-Bylander ener.)
           paw_opt=1 : PAW nonlocal part of H (use of Dij coeffs)
           paw_opt=2 : PAW: (Vnl-lambda.Sij) (Sij=overlap matrix)
           paw_opt=3 : PAW overlap matrix (Sij)
           paw_opt=4 : both PAW nonlocal part of H (Dij) and overlap matrix (Sij)
  sij(nlm*(nlmn+1)/2)=overlap matrix components (only if paw_opt=2, 3 or 4)

OUTPUT

  if (paw_opt=0, 1, 2 or 4)
    gxfac(cplex_fac,nlmn,nincat,nspinor)= reduced projected scalars related to Vnl (NL operator)
  if (paw_opt=3 or 4)
    gxfac_sij(cplex,nlmn,nincat,nspinor)= reduced projected scalars related to Sij (overlap)
  if (optder==1.and.paw_opt=0, 1, 2 or 4)
    dgxdtfac(cplex_fac,ndgxdtfac,nlmn,nincat,nspinor)= gradients of gxfac related to Vnl (NL operator)
  if (optder==1.and.paw_opt=3 or 4)
    dgxdtfac_sij(cplex,ndgxdtfac,nlmn,nincat,nspinor)= gradients of gxfac related to Sij (overlap)

NOTES

 This routine operates for one type of atom, and within this given type of atom,
 for a subset of at most nincat atoms.

 About the non-local factors symmetry:
   - The lower triangular part of the Dij matrix can be deduced from the upper one
     with the following relation: D^s2s1_ji = (D^s1s2_ij)^*
     where s1,s2 are spinor components
   - The Dij factors can contain a exp(-iqR) phase
     This phase does not have to be included in the symmetry rule
     For that reason, we first apply the real part (cos(qR).D^s1s2_ij)
     then, we apply the imaginary part (-sin(qR).D^s1s2_ij)

PARENTS

      m_gpu_nonlop,m_nonlop_ylm

CHILDREN

      xmpi_sum

SOURCE

135 subroutine opernlc_ylm_ompgpu(atindx1,cplex,cplex_dgxdt,cplex_d2gxdt,cplex_enl,cplex_fac,&
136 &          dgxdt,dgxdtfac,dgxdtfac_sij,d2gxdt,d2gxdtfac,d2gxdtfac_sij,dimenl1,dimenl2,dimekbq,enl,&
137 &          gx,gxfac,gxfac_sij,iatm,indlmn,itypat,lambda,mpi_enreg,natom,ndgxdt,ndgxdtfac,&
138 &          nd2gxdt,nd2gxdtfac,nincat,nlmn,nspinor,nspinortot,optder,paw_opt,sij,ndat,ibeg,iend,nprojs,ntypat)
139 
140 !Arguments ------------------------------------
141 !scalars
142  integer,intent(in) :: cplex,cplex_enl,cplex_fac,dimenl1,dimenl2,dimekbq,iatm,itypat
143  integer,intent(in) :: natom,ndgxdt,ndgxdtfac,nd2gxdt,nd2gxdtfac,nincat,nspinor,nspinortot,optder,paw_opt
144  integer,intent(inout) :: nlmn
145  integer,intent(in) :: ndat,ibeg,iend,nprojs,ntypat
146  real(dp) :: lambda(ndat)
147  type(MPI_type) , intent(in) :: mpi_enreg
148 !arrays
149  integer,intent(in) :: atindx1(natom),indlmn(6,nlmn,ntypat),cplex_dgxdt(ndgxdt),cplex_d2gxdt(nd2gxdt)
150  real(dp),intent(in) :: dgxdt(cplex,ndgxdt,nlmn,nincat,nspinor)
151  real(dp),intent(in) :: d2gxdt(cplex,nd2gxdt,nlmn,nincat,nspinor)
152  real(dp),intent(in),target :: enl(dimenl1,dimenl2,nspinortot**2,dimekbq)
153  real(dp),intent(inout) :: gx(cplex,nprojs,nspinor*ndat)
154  real(dp),intent(in) :: sij(:,:)
155  real(dp),intent(out),target :: dgxdtfac(cplex_fac,ndgxdtfac,nlmn,nincat,nspinor)
156  real(dp),intent(out) :: dgxdtfac_sij(cplex,ndgxdtfac,nlmn,nincat,nspinor*(paw_opt/3))
157  real(dp),intent(out),target :: d2gxdtfac(cplex_fac,nd2gxdtfac,nlmn,nincat,nspinor)
158  real(dp),intent(out) :: d2gxdtfac_sij(cplex,nd2gxdtfac,nlmn,nincat,nspinor*(paw_opt/3))
159  real(dp),intent(out),target :: gxfac(cplex_fac,nprojs,nspinor*ndat)
160  real(dp),intent(out) :: gxfac_sij(cplex,nprojs,nspinor*ndat)
161 
162 !Tested usecases :
163 ! - Nvidia GPUs : FC_NVHPC + CUDA
164 ! - AMD GPUs    : FC_LLVM + HIP
165 ! An eventual Intel implementation would use the OneAPI LLVM compiler.
166 ! Homemade CUDA/HIP interfaces would allow the use of GCC.
167 ! But it is likely that OpenMP performance won't be optimal outside GPU vendors compilers.
168 #ifdef HAVE_OPENMP_OFFLOAD
169 
170 !Local variables-------------------------------
171 !Arrays
172 !scalars
173  integer :: cplex_,ia,ierr,ijlmn,ijspin,ilm,ilmn,i0lmn,iln,index_enl,iphase,ispinor,ispinor_index,idat
174  integer :: j0lmn,jilmn,jispin,jjlmn,jlm,jlmn,jspinor,jspinor_index,mu,shift
175  real(dp) :: sijr
176 !arrays
177  real(dp) :: enl_(2),gxfi(2),gxi(cplex),gxj(cplex)
178  real(dp),allocatable :: d2gxdtfac_offdiag(:,:,:,:,:),dgxdtfac_offdiag(:,:,:,:,:)
179  real(dp),allocatable :: gxfac_offdiag(:,:,:,:),gxfj(:,:)
180  real(dp),pointer :: d2gxdtfac_(:,:,:,:,:),dgxdtfac_(:,:,:,:,:),gxfac_(:,:,:)
181  real(dp),pointer :: enl_ptr(:,:,:)
182 
183 ! *************************************************************************
184 
185  if (dimekbq==2) then
186    ABI_ERROR('dimekbq=2 not yet allowed with GPU !')
187  end if
188 
189  DBG_ENTER("COLL")
190 
191 !Parallelization over spinors treatment
192  shift=0;if (mpi_enreg%paral_spinor==1) shift=mpi_enreg%me_spinor
193  iphase=1
194  gxfac_ => gxfac
195  dgxdtfac_ => dgxdtfac
196 
197 
198 
199 !Accumulate gxfac related to non-local operator (Norm-conserving)
200 !-------------------------------------------------------------------
201   if (paw_opt==0) then
202    !Enl is E(Kleinman-Bylander)
203    ABI_CHECK(cplex_enl/=2,"BUG: invalid cplex_enl=2!")
204    ABI_CHECK(cplex_fac==cplex,"BUG: invalid cplex_fac/=cplex!")
205    !$OMP TARGET TEAMS DISTRIBUTE &
206    !$OMP& PARALLEL DO COLLAPSE(3) &
207    !$OMP& MAP(to:gxfac_,gx,enl_ptr) &
208    !$OMP& IS_DEVICE_PTR(indlmn) &
209    !$OMP& PRIVATE(ispinor,ispinor_index,ia,ilmn,iln,enl_)
210    do ispinor=1,nspinor
211      do ia=1,nincat
212        do ilmn=1,nlmn
213          ispinor_index=ispinor+shift
214          iln=indlmn(5,ilmn,itypat)
215          enl_(1)=enl_ptr(iln,itypat,ispinor_index)
216          !TODO gxfac_(1:cplex,ilmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)=&
217          !& enl_(1)*gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)
218        end do
219      end do
220    end do
221    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
222   end if
223 
224 !Accumulate gxfac related to nonlocal operator (PAW)
225 !-------------------------------------------------------------------
226   if (paw_opt==1.or.paw_opt==2.or.paw_opt==4) then
227    !Enl is psp strength Dij or (Dij-lambda.Sij)
228 
229 !  === Diagonal term(s) (up-up, down-down)
230 
231 !  1-Enl is real
232    if (cplex_enl==1) then
233      if (paw_opt==2) then
234        !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(4) &
235        !$OMP& MAP(to:gxfac,enl,atindx1,gx,sij,lambda) &
236        !$OMP& PRIVATE(idat,ispinor,ispinor_index,ia,index_enl,jlmn,j0lmn,jjlmn,ilmn,i0lmn,ijlmn)
237        do idat=1,ndat
238        do ispinor=1,nspinor
239          do ia=1,nincat
240            do jlmn=1,nlmn
241              ispinor_index=ispinor+shift
242              index_enl=atindx1(iatm+ia)
243              j0lmn=jlmn*(jlmn-1)/2
244              jjlmn=j0lmn+jlmn
245              gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)=&
246 &               gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) + &
247 &               (enl(jjlmn,index_enl,ispinor_index,iphase)-lambda(idat) * sij(jjlmn,itypat)) * &
248 &               gx(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)
249              do ilmn=1,jlmn-1
250                ijlmn=j0lmn+ilmn
251                gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)=&
252 &                 gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) + &
253 &                 (enl(ijlmn,index_enl,ispinor_index,iphase)-lambda(idat) * sij(ijlmn,itypat)) * &
254 &                 gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)
255              end do
256              if(jlmn<nlmn) then
257                do ilmn=jlmn+1,nlmn
258                  i0lmn=(ilmn*(ilmn-1)/2)
259                  ijlmn=i0lmn+jlmn
260                  gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)=&
261 &                   gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) + &
262 &                   (enl(ijlmn,index_enl,ispinor_index,iphase)-lambda(idat) * sij(ijlmn,itypat)) *&
263 &                   gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)
264                end do
265              end if
266            end do
267          end do
268        end do
269        end do
270 
271      else
272        !$OMP TARGET TEAMS DISTRIBUTE &
273        !$OMP& PARALLEL DO COLLAPSE(4) &
274        !$OMP& MAP(to:enl,atindx1,gx,gxfac) &
275        !$OMP& PRIVATE(idat,ispinor,ispinor_index,ia,index_enl,jlmn,j0lmn,jjlmn,ilmn,i0lmn,ijlmn)
276        do idat=1,ndat
277        do ispinor=1,nspinor
278          do ia=1,nincat
279            do jlmn=1,nlmn
280              ispinor_index=ispinor+shift
281              index_enl=atindx1(iatm+ia)
282              j0lmn=jlmn*(jlmn-1)/2
283              jjlmn=j0lmn+jlmn
284              gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)=&
285 &               gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) + &
286 &               enl(jjlmn,index_enl,ispinor_index,iphase) * &
287 &               gx(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor)
288              do ilmn=1,jlmn-1
289                ijlmn=j0lmn+ilmn
290                gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)=&
291 &                 gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) + &
292 &                 enl(ijlmn,index_enl,ispinor_index,iphase)*gx(1:cplex,ibeg+ilmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor)
293              end do
294              if(jlmn<nlmn) then
295                do ilmn=jlmn+1,nlmn
296                  i0lmn=(ilmn*(ilmn-1)/2)
297                  ijlmn=i0lmn+jlmn
298                  gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)=&
299 &                   gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) + &
300 &                   enl(ijlmn,index_enl,ispinor_index,iphase)*gx(1:cplex,ibeg+ilmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor)
301                end do
302              end if
303            end do
304          end do
305        end do
306        end do
307      endif
308 
309 
310 !    2-Enl is complex  ===== D^ss'_ij=D^s's_ji^*
311    else
312      ABI_CHECK(cplex_fac==cplex_enl,"BUG: invalid cplex_fac/=cplex_enl!")
313 
314      if (nspinortot==1) then ! -------------> NO SPINORS
315        if(paw_opt==2) then
316          !$OMP TARGET TEAMS DISTRIBUTE &
317          !$OMP& PARALLEL DO COLLAPSE(3) &
318          !$OMP& MAP(to:gxfac_,gx,gxi,atindx1,gxj,enl_ptr) &
319          !$OMP& IS_DEVICE_PTR(sij) &
320          !$OMP& PRIVATE(idat,ia,index_enl,jlmn,j0lmn,jjlmn,enl_,gxj,ilmn,i0lmn,ijlmn,gxi)
321          do idat=1,ndat
322          do ia=1,nincat
323            do jlmn=1,nlmn
324              index_enl=atindx1(iatm+ia)
325              j0lmn=jlmn*(jlmn-1)/2
326              jjlmn=j0lmn+jlmn
327              enl_(1)=enl_ptr(2*jjlmn-1,index_enl,1)-lambda(idat)*sij(jjlmn,itypat)
328              gxj(1:cplex)=gx(1:cplex,jlmn+(ia-1)*nlmn+ibeg,1)
329              gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
330 &               gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxj(1)
331              if (cplex==2) then
332                gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
333 &                 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxj(2)
334              end if
335              do ilmn=1,jlmn-1
336                ijlmn=j0lmn+ilmn
337                enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,1)
338                enl_(1)=enl_(1)-lambda(idat)*sij(ijlmn,itypat)
339                gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))
340                gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
341 &                 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxi(1)
342                gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
343 &                 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))-enl_(2)*gxi(1)
344                gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
345 &                 gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxj(1)
346                gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
347 &                 gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(2)*gxj(1)
348                if (cplex==2) then
349                  gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
350 &                   gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(2)*gxi(2)
351                  gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
352 &                   gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxi(2)
353                  gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
354 &                   gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))-enl_(2)*gxj(2)
355                  gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
356 &                   gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxj(2)
357                end if
358              end do
359              if(jlmn<nlmn) then
360                do ilmn=jlmn+1,nlmn
361                  i0lmn=ilmn*(ilmn-1)/2
362                  ijlmn=i0lmn+jlmn
363                  enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,1)
364                  enl_(1)=enl_(1)-lambda(idat)*sij(ijlmn,itypat)
365                  gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,1)
366                  gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
367 &                   gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxi(1)
368                  gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))= &
369 &                   gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(2)*gxi(1)
370                  if (cplex==2) then
371                    gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
372 &                     gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))-enl_(2)*gxi(2)
373                    gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
374 &                     gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxi(2)
375                  end if
376                end do
377              end if
378            end do
379          end do
380          end do
381        else
382          !$OMP TARGET TEAMS DISTRIBUTE &
383          !$OMP& PARALLEL DO COLLAPSE(3) &
384          !$OMP& MAP(to:gxfac_,gx,gxi,atindx1,gxj,enl_ptr) &
385          !$OMP& IS_DEVICE_PTR(sij) &
386          !$OMP& PRIVATE(idat,ia,index_enl,jlmn,j0lmn,jjlmn,enl_,gxj,ilmn,i0lmn,ijlmn,gxi)
387          do idat=1,ndat
388          do ia=1,nincat
389            do jlmn=1,nlmn
390            index_enl=atindx1(iatm+ia)
391              j0lmn=jlmn*(jlmn-1)/2
392              jjlmn=j0lmn+jlmn
393              enl_(1)=enl_ptr(2*jjlmn-1,index_enl,1)
394              gxj(1:cplex)=gx(1:cplex,jlmn+(ia-1)*nlmn+ibeg,1)
395              gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
396 &               gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxj(1)
397              if (cplex==2) then
398                gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) + &
399 &                 enl_(1)*gxj(2)
400              end if
401              do ilmn=1,jlmn-1
402                ijlmn=j0lmn+ilmn
403                enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,1)
404                gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))
405                gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
406 &                 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxi(1)
407                gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
408 &                 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))-enl_(2)*gxi(1)
409                gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
410 &                 gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxj(1)
411                gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
412 &                 gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(2)*gxj(1)
413                if (cplex==2) then
414                  gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
415 &                   gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(2)*gxi(2)
416                  gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
417 &                   gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxi(2)
418                  gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
419 &                   gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))-enl_(2)*gxj(2)
420                  gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
421 &                   gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxj(2)
422                end if
423              end do
424              if(jlmn<nlmn) then
425                do ilmn=jlmn+1,nlmn
426                  i0lmn=ilmn*(ilmn-1)/2
427                  ijlmn=i0lmn+jlmn
428                  enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,1)
429    !TODO               gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,1)
430                  gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
431 &                   gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxi(1)
432                  gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = &
433 &                   gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(2)*gxi(1)
434                  if (cplex==2) then
435                    gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) - &
436 &                     enl_(2)*gxi(2)
437                    gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) + &
438 &                     enl_(1)*gxi(2)
439                  end if
440                end do
441              end if
442            end do
443          end do
444          end do
445        end if
446 
447      else ! -------------> SPINORIAL CASE
448 
449 !  === Diagonal term(s) (up-up, down-down)
450 
451        !$OMP TARGET TEAMS DISTRIBUTE &
452        !$OMP& PARALLEL DO COLLAPSE(4) &
453        !$OMP& MAP(to:gxfac_,gx,gxi,atindx1,gxj,enl_ptr) &
454        !$OMP& IS_DEVICE_PTR(sij) &
455        !$OMP& PRIVATE(idat,ispinor,ispinor_index,ia,index_enl), &
456        !$OMP& PRIVATE(jlmn,j0lmn,jjlmn,enl_,gxj,ilmn,ijlmn,i0lmn,gxi)
457        do idat=1,ndat
458        do ispinor=1,nspinor
459          do ia=1,nincat
460            do jlmn=1,nlmn
461              ispinor_index=ispinor+shift
462              index_enl=atindx1(iatm+ia)
463              j0lmn=jlmn*(jlmn-1)/2
464              jjlmn=j0lmn+jlmn
465              enl_(1)=enl_ptr(2*jjlmn-1,index_enl,ispinor_index)
466              if (paw_opt==2) enl_(1)=enl_(1)-lambda(idat)*sij(jjlmn,itypat)
467 !TODO              gxj(1:cplex)=gx(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)
468              gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = &
469 &                 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxj(1)
470              if (cplex==2) then
471                gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = &
472 &                 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxj(2)
473              end if
474              do ilmn=1,jlmn-1
475                ijlmn=j0lmn+ilmn
476                enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,ispinor_index)
477                if (paw_opt==2) enl_(1)=enl_(1)-lambda(idat)*sij(ijlmn,itypat)
478  !TODO               gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)
479                gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = &
480 &                 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxi(1)
481                gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = &
482 &                 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)-enl_(2)*gxi(1)
483                if (cplex==2) then
484                  gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = &
485 &                   gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(2)*gxi(2)
486                  gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = &
487 &                   gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxi(2)
488                end if
489              end do
490              if(jlmn<nlmn) then
491                do ilmn=jlmn+1,nlmn
492                  i0lmn=ilmn*(ilmn-1)/2
493                  ijlmn=i0lmn+jlmn
494                  enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,ispinor_index)
495                  if (paw_opt==2) enl_(1)=enl_(1)-lambda(idat)*sij(ijlmn,itypat)
496 !TODO                  gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)
497                  gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = &
498 &                   gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxi(1)
499                  gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = &
500 &                   gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(2)*gxi(1)
501                  if (cplex==2) then
502                    gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = &
503 &                     gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)-enl_(2)*gxi(2)
504                    gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = &
505 &                     gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxi(2)
506                  end if
507                end do
508              end if
509            end do
510          end do
511        end do
512        end do
513      end if !nspinortot
514    end if !complex_enl
515 
516 !  === Off-diagonal term(s) (up-down, down-up)
517 
518 !  --- No parallelization over spinors ---
519    if (nspinortot==2.and.nspinor==nspinortot) then
520      ABI_CHECK(cplex_enl==2,"BUG: invalid cplex_enl/=2!")
521      ABI_CHECK(cplex_fac==cplex,"BUG: invalid cplex_fac/=cplex)!")
522      !$OMP TARGET TEAMS DISTRIBUTE &
523      !$OMP& PARALLEL DO COLLAPSE(4) &
524      !$OMP& MAP(to:gxfac_,gx,gxi,atindx1,gxj,enl_ptr) &
525      !$OMP& PRIVATE(idat,ispinor,jspinor,ia,index_enl), &
526      !$OMP& PRIVATE(jlmn,j0lmn,jjlmn,enl_,gxi,gxj,ilmn,i0lmn,ijlmn)
527      do idat=1,ndat
528      do ispinor=1,nspinortot
529        do ia=1,nincat
530          do jlmn=1,nlmn
531        jspinor=3-ispinor
532          index_enl=atindx1(iatm+ia)
533            j0lmn=jlmn*(jlmn-1)/2
534            jjlmn=j0lmn+jlmn
535            enl_(1:2)=enl_ptr(2*jjlmn-1:2*jjlmn,index_enl,2+ispinor )
536 !TODO            gxi(1:cplex)=gx(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)
537            gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)+enl_(1)*gxi(1)
538            gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)-enl_(2)*gxi(1)
539            if (cplex==2) then
540              gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)+enl_(2)*gxi(2)
541              gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)+enl_(1)*gxi(2)
542            end if
543            do ilmn=1,jlmn-1
544              ijlmn=j0lmn+ilmn
545              enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,2+ispinor+(idat-1)*nspinor)
546 !TODO              gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)
547              gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)+enl_(1)*gxi(1)
548              gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)-enl_(2)*gxi(1)
549              if (cplex==2) then
550                gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)+enl_(2)*gxi(2)
551                gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)+enl_(1)*gxi(2)
552              end if
553            end do
554            if(jlmn<nlmn) then
555              do ilmn=jlmn+1,nlmn
556                i0lmn=ilmn*(ilmn-1)/2
557                ijlmn=i0lmn+jlmn
558                enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,2+ispinor+(idat-1)*nspinor)
559 !TODO                gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,jspinor)
560                gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = &
561 &                   gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxi(1)
562                gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = &
563 &                   gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(2)*gxi(1)
564                if (cplex==2) then
565                  gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = &
566 &                     gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)-enl_(2)*gxi(2)
567                  gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = &
568 &                     gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxi(2)
569                end if
570              end do
571            end if
572          end do
573        end do
574      end do
575      end do
576 
577 !    --- Parallelization over spinors ---
578    else if (nspinortot==2.and.nspinor/=nspinortot) then
579      ABI_CHECK(cplex_enl==2,"BUG: invalid cplex_enl/=2!")
580      ABI_CHECK(cplex_fac==2,"BUG: invalid cplex_fac/=2!")
581      ABI_MALLOC(gxfac_offdiag,(cplex_fac,nlmn,nincat,nspinortot))
582 !$OMP WORKSHARE
583      gxfac_offdiag(:,:,:,:)=zero
584 !$OMP END WORKSHARE
585      ispinor_index=mpi_enreg%me_spinor+1
586      jspinor_index=3-ispinor_index
587      if (ispinor_index==1) then
588        ijspin=3;jispin=4
589      else
590        ijspin=4;jispin=3
591      end if
592      !$OMP TARGET TEAMS DISTRIBUTE &
593      !$OMP& PARALLEL DO COLLAPSE(4) &
594      !$OMP& MAP(to:gx,gxi,atindx1,enl_ptr) &
595      !$OMP PRIVATE(idat,ia,index_enl,jlmn,j0lmn,ilmn,i0lmn,i0lmn,ijlmn,enl_,jilmn,gxi)
596      do idat=1,ndat
597      do ia=1,nincat
598        do jlmn=1,nlmn
599          do ilmn=1,nlmn
600        index_enl=atindx1(iatm+ia)
601          j0lmn=jlmn*(jlmn-1)/2
602            i0lmn=ilmn*(ilmn-1)/2
603            if (ilmn<=jlmn) then
604              ijlmn=j0lmn+ilmn
605              enl_(1)= enl_ptr(2*ijlmn-1,index_enl,ijspin)
606              enl_(2)=-enl_ptr(2*ijlmn  ,index_enl,ijspin)
607            else
608              jilmn=i0lmn+jlmn
609              enl_(1)= enl_ptr(2*jilmn-1,index_enl,jispin)
610              enl_(2)= enl_ptr(2*jilmn  ,index_enl,jispin)
611            end if
612 !TODO            gxi(1:cplex)=gx(1:cplex,ilmn,ia,1+nspinor*(idat-1))
613            gxfac_offdiag(1,jlmn,ia,jspinor_index)= &
614 &            gxfac_offdiag(1,jlmn,ia,jspinor_index)+enl_(1)*gxi(1)
615            gxfac_offdiag(2,jlmn,ia,jspinor_index)= &
616 &            gxfac_offdiag(2,jlmn,ia,jspinor_index)+enl_(2)*gxi(1)
617            if (cplex==2) then
618              gxfac_offdiag(1,jlmn,ia,jspinor_index)= &
619 &              gxfac_offdiag(1,jlmn,ia,jspinor_index)-enl_(2)*gxi(2)
620              gxfac_offdiag(2,jlmn,ia,jspinor_index)= &
621 &              gxfac_offdiag(2,jlmn,ia,jspinor_index)+enl_(1)*gxi(2)
622            end if
623          end do !ilmn
624        end do !jlmn
625      end do !iat
626      end do !idat
627      call xmpi_sum(gxfac_offdiag,mpi_enreg%comm_spinor,ierr)
628      !$OMP TARGET UPDATE FROM(gxfac_)
629      !gxfac_(:,:,:,1)=gxfac_(:,:,:,1)+gxfac_offdiag(:,:,:,ispinor_index)
630      ABI_FREE(gxfac_offdiag)
631    end if
632 
633   end if !paw_opt
634 
635 
636 !Accumulate gxfac related to overlap (Sij) (PAW)
637 !------------------------------------------- ------------------------
638  if (paw_opt==3.or.paw_opt==4) then ! Use Sij, overlap contribution
639    !$OMP TARGET TEAMS DISTRIBUTE &
640    !$OMP& PARALLEL DO COLLAPSE(4) &
641    !$OMP& MAP(to:sij,gx,gxfac_sij) &
642    !$OMP PRIVATE(idat, ispinor,ia,jlmn,j0lmn,jjlmn,jlm,ilmn,ilm,i0lmn,ijlmn)
643    do idat=1,ndat
644    do ispinor=1,nspinor
645      do ia=1,nincat
646        do jlmn=1,nlmn
647          j0lmn=jlmn*(jlmn-1)/2
648          jjlmn=j0lmn+jlmn
649          gxfac_sij(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor)= &
650            gxfac_sij(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor) &
651            + sij(jjlmn,itypat) * gx(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor)
652          do ilmn=1,jlmn-1
653            ijlmn=j0lmn+ilmn
654            gxfac_sij(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor)= &
655              gxfac_sij(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor) &
656              + sij(ijlmn,itypat) * gx(1:cplex,ibeg+ilmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor)
657          end do
658          if(jlmn<nlmn) then
659            do ilmn=jlmn+1,nlmn
660              i0lmn=ilmn*(ilmn-1)/2
661              ijlmn=i0lmn+jlmn
662              gxfac_sij(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor)=&
663                gxfac_sij(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor) &
664                + sij(ijlmn,itypat) * gx(1:cplex,ibeg+ilmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor)
665            end do
666          end if
667        end do
668      end do
669    end do
670    end do
671  end if
672 
673 #else
674 
675  ABI_UNUSED((/cplex,cplex_enl,cplex_fac,dimenl1,dimenl2,dimekbq,iatm,itypat/))
676  ABI_UNUSED((/natom,ndgxdt,ndgxdtfac,nd2gxdt,nd2gxdtfac,nincat,nspinor,nspinortot,optder,paw_opt/))
677  ABI_UNUSED((/nlmn,ndat,ibeg,iend,nprojs,ntypat/))
678  ABI_UNUSED((/atindx1,indlmn,cplex_dgxdt,cplex_d2gxdt/))
679  ABI_UNUSED((/dgxdtfac,dgxdtfac_sij,d2gxdtfac,d2gxdtfac_sij,gxfac,gxfac_sij,dgxdt,d2gxdt,lambda,enl,gx,sij/))
680  ABI_UNUSED_A(mpi_enreg)
681  ABI_BUG("Unhandled configuration for OpenMP GPU immplementation")
682 
683 #endif
684 
685 end subroutine opernlc_ylm_ompgpu