./66_nonlocal/m_nonlop

ABINIT/m_nonlop_ylm
ABINIT/nonlop_ylm
ABINIT/nonlop_ylm_init_counters
ABINIT/nonlop_ylm_output_counters
ABINIT/nonlop_ylm_stop_counters

ABINIT/m_nonlop_ylm [ Modules ]

NAME

  m_nonlop_ylm

FUNCTION

  Copyright (C) 1998-2024 ABINIT group (MT)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

15 #if defined HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18 
19 #include "abi_common.h"
20 
21 module m_nonlop_ylm
22 
23  use defs_basis
24  use m_xmpi
25  use m_abicore
26  use m_errors
27 
28  use defs_abitypes,      only : MPI_type
29  use m_geometry,         only : strconv
30  use m_kg,               only : ph1d3d, mkkpg
31  use m_pawcprj,          only : pawcprj_type
32  use m_opernla_ylm,      only : opernla_ylm,opernla_counter
33  use m_opernla_ylm_mv,   only : opernla_ylm_mv,opernla_mv_counter,opernla_mv_dgemv_counter
34  use m_opernlb_ylm,      only : opernlb_ylm,opernlb_counter
35  use m_opernlb_ylm_mv,   only : opernlb_ylm_mv,opernlb_mv_counter,opernlb_mv_dgemv_counter
36  use m_opernlc_ylm,      only : opernlc_ylm
37  use m_opernld_ylm,      only : opernld_ylm
38  use m_kg,               only : mkkpgcart
39 ! use m_time,             only : timab
40 
41  implicit none
42 
43  private

ABINIT/nonlop_ylm [ Functions ]

[ Top ] [ Functions ]

NAME

 nonlop_ylm

FUNCTION

 * Compute application of a nonlocal operator Vnl in order to get:
    - contracted elements (energy, forces, stresses, ...), if signs=1
    - a function in reciprocal space (|out> = Vnl|in>), if signs=2
   Operator Vnl, as the following general form:
    $Vnl=sum_{R,lmn,l''m''n''} {|P_{Rlmn}> Enl^{R}_{lmn,l''m''n''} <P_{Rl''m''n''}|}$
   Operator Vnl is -- in the typical case -- the nonlocal potential.
   - With norm-conserving pseudopots, $Enl^{R}_{lmn,l''m''n''}$ is the
     Kleinmann-Bylander energy $Ekb^{R}_{ln}$.
   - In a PAW calculation, $Enl^{R}_{lmn,l''m''n''}$ are the nonlocal
     coefficients to connect projectors $D_{ij}$.
   - The |P_{Rlmn}> are the projector functions.
 * Optionnaly, in case of PAW calculation, compute:
   - Application of the overlap matrix in reciprocal space
     (<in|S|in> or (I+S)|in>).
   - Application of (Vnl-lambda.S) in reciprocal space
     (<in|Vnl-lambda.S|in> and derivatives or (Vnl-lambda.S)|in>).
 * This routine uses spherical harmonics Ylm to express Vnl.

INPUTS

  atindx1(natom)=index table for atoms, inverse of atindx
  choice: chooses possible output:
    choice=0 => do nothing (only compute WF projected with NL projectors)
          =1 => non-local energy contribution
          =2 => 1st derivative(s) with respect to atomic position(s)
          =3 => 1st derivative(s) with respect to strain(s)
          =22=> mixed 2nd derivative(s) with respect to atomic pos. and q vector (at q=0)
          =25=> mixed 3rd derivative(s) with respect to atomic pos. and two q vectors (at q=0)
          =23=> 1st derivative(s) with respect to atomic pos. and
                1st derivative(s) with respect to atomic pos. and strains
          =4 => 2nd derivative(s) with respect to 2 atomic pos.
          =24=> 1st derivative(s) with respect to atm. pos. and
          =33=> mixed 2nd derivative(s) with respect to strain and q vector (at q=0)
                2nd derivative(s) with respect to 2 atomic pos.
          =5 => 1st derivative(s) with respect to k wavevector, typically
                sum_ij [ |p_i> D_ij <dp_j/dk| + |dp_i/dk> D_ij < p_j| ]
          =6 => 2nd derivative(s) with respect to 2 strains and
                mixed 2nd derivative(s) with respect to strains & atomic pos.
          =51 =>right 1st derivative(s) with respect to k wavevector, typically
                sum_ij [ |p_i> D_ij <dp_j/dk| ]
          =52 =>left 1st derivative(s) with respect to k wavevector, typically
                sum_ij [ |dp_i/dk> D_ij < p_j| ]
          =53 =>twist 1st derivative(s) with respect to k, typically
                sum_ij [ |dp_i/dk_(idir+1)> D_ij <dp_j//dk_(idir+2)|
          =54=> mixed 2nd derivative(s) with respect to atomic pos. and left k wavevector
          =55=> mixed 2nd derivative(s) with respect to strain and right k wavevector
          =7 => apply operator $\sum_i [ |p_i> <p_i| ],
                same as overlap operator with s_ij=identity (paw_opt==3 only)
          =8 => 2nd derivatives with respect to 2 k wavevectors
          =81=> partial 2nd derivatives with respect to 2 k wavevectors,
                full derivative with respect to k1, right derivative with respect to k2,
                (derivative with respect to k of choice 51), typically
                sum_ij [ |dp_i/dk1> D_ij <dp_j/dk2| + |p_i> D_ij < d2p_j/dk1dk2| ]
    Only choices 1,2,3,23,4,5,6 are compatible with useylm=0.
    Only choices 1,2,22,25,3,5,33,51,52,53,7,8,81 are compatible with signs=2
  cpopt=flag defining the status of cprjin%cp(:)=<Proj_i|Cnk> scalars (see below, side effects)
  dimenl1,dimenl2=dimensions of enl (see enl)
  dimekbq=1 if enl factors do not contain a exp(-iqR) phase, 2 is they do
  dimffnlin=second dimension of ffnlin (1+number of derivatives)
  dimffnlout=second dimension of ffnlout (1+number of derivatives)
  enl(cplex_enl*dimenl1,dimenl2,nspinortot**2,dimekbq)=
  ->Norm conserving : ==== when paw_opt=0 ====
                      (Real) Kleinman-Bylander energies (hartree)
                      dimenl1=lmnmax  -  dimenl2=ntypat
                      dimekbq is 2 if Enl contains a exp(-iqR) phase, 1 otherwise
  ->PAW :             ==== when paw_opt=1, 2 or 4 ====
                      (Real or complex, hermitian) Dij coefs to connect projectors
                      dimenl1=cplex_enl*lmnmax*(lmnmax+1)/2  -  dimenl2=natom
                      These are complex numbers if cplex_enl=2
                        enl(:,:,1) contains Dij^up-up
                        enl(:,:,2) contains Dij^dn-dn
                        enl(:,:,3) contains Dij^up-dn (only if nspinor=2)
                        enl(:,:,4) contains Dij^dn-up (only if nspinor=2)
                      dimekbq is 2 if Dij contains a exp(-iqR) phase, 1 otherwise
  ffnlin(npwin,dimffnlin,lmnmax,ntypat)=nonlocal form factors to be used
          for the application of the nonlocal operator to the |in> vector
  ffnlout(npwout,dimffnlout,lmnmax,ntypat)=nonlocal form factors to be used
          for the application of the nonlocal operator to the |out> vector
  -----------------------------------------------------------
  gprimd(3,3)=dimensional reciprocal space primitive translations
  idir=direction of the - atom to be moved in the case (choice=2,signs=2) or (choice=22,signs=2)
                        - k point direction in the case (choice=5,signs=2S)
                          for choice 53, twisted derivative involves idir+1 and idir+2 (mod 3)
                        - strain component (1:6) in the case (choice=3,signs=2) or (choice=6,signs=1)
                        - strain component (1:9) in the case (choice=33,signs=2)
                        - (1:9) components to specify the atom to be moved and the second q-gradient
                          direction in the case (choice=25,signs=2)
  indlmn(6,i,ntypat)= array giving l,m,n,lm,ln,s for i=lmn
  istwf_k=option parameter that describes the storage of wfs
  kgin(3,npwin)=integer coords of planewaves in basis sphere, for the |in> vector
  kgout(3,npwout)=integer coords of planewaves in basis sphere, for the |out> vector
  kpgin(npw,npkgin)= (k+G) components and related data, for the |in> vector
  kpgout(npw,nkpgout)=(k+G) components and related data, for the |out> vector
  kptin(3)=k point in terms of recip. translations, for the |in> vector
  kptout(3)=k point in terms of recip. translations, for the |out> vector
  lambda=factor to be used when computing (Vln-lambda.S) - only for paw_opt=2
         Typically lambda is the eigenvalue (or its guess)
  lmnmax=max. number of (l,m,n) components over all types of atoms
  matblk=dimension of the arrays ph3din and ph3dout
  mgfft=maximum size of 1D FFTs
  mpi_enreg=information about MPI parallelization
  natom=number of atoms in cell
  nattyp(ntypat)=number of atoms of each type
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  nkpgin,nkpgout=second sizes of arrays kpgin/kpgout
  nloalg(3)=governs the choice of the algorithm for nonlocal operator
  nnlout=dimension of enlout (when signs=1 and choice>0):
         ==== if paw_opt=0, 1 or 2 ====
         choice   nnlout     |  choice   nnlout
              1   1          |      51   6 (complex)
              2   3*natom    |      52   6 (complex)
              3   6          |      53   6 (complex)
              4   6*natom    |      54   9*natom
             23   6+3*natom  |      55   36 (complex)
             24   9*natom    |       6   36+18*natom
              5   3          |       8   6
                             |      81   18 (complex)
         ==== if paw_opt=3 ====
         choice   nnlout
              1   1
              2   3*natom
              5   3
             51   3
             52   3
             54   9*natom
             55   36
              7   1
              8   6
             81   9
         ==== if paw_opt=4 ====
         not available
  npwin=number of planewaves for given k point, for the |in> vector
  npwout=number of planewaves for given k point, for the |out> vector
  nspinor=number of spinorial components of the wavefunctions (on current proc)
  nspinortot=total number of spinorial components of the wavefunctions
  ntypat=number of types of atoms in cell
  paw_opt= define the nonlocal operator concerned with:
           paw_opt=0 : Norm-conserving Vnl (use of Kleinman-Bylander ener.)
           paw_opt=1 : PAW nonlocal part of H (use of Dij coeffs)
           paw_opt=2 : PAW: (Vnl-lambda.Sij) (Sij=overlap matrix)
           paw_opt=3 : PAW overlap matrix (Sij)
           paw_opt=4 : both PAW nonlocal part of H (Dij) and overlap matrix (Sij)
  phkxredin(2,natom)=phase factors exp(2 pi kptin.xred)
  phkxredout(2,natom)=phase factors exp(2 pi kptout.xred)
  ph1d(2,3*(2*mgfft+1)*natom)=1D structure factors phase information
  ph3din(2,npwin,matblk)=3D structure factors, for each atom and plane wave (in)
  ph3dout(2,npwout,matblk)=3-dim structure factors, for each atom and plane wave (out)
  [qdir]= optional,direction of the q-gradient (only for choice=22 choice=25 and choice=33)
  signs= if 1, get contracted elements (energy, forces, stress, ...)
         if 2, applies the non-local operator to a function in reciprocal space
  sij(dimenl1,ntypat*(paw_opt/3))=overlap matrix components (only if paw_opt=2, 3 or 4)
  ucvol=unit cell volume (bohr^3)
  vectin(2,npwin*nspinor)=input cmplx wavefunction coefficients <G|in>
  [cprjin_left(natom,nspinor)]=The projected input wave function <p_nlm|in_left>
    for the left wavefunction. Data are assumed to be in memory, they are NOT recalculated here.

OUTPUT

 ==== if (signs==1) ====
 --If (paw_opt==0, 1 or 2)
    enlout(nnlout)= contribution to the non-local part of the following properties:
      if choice=1 : enlout(1)             -> the energy
      if choice=2 : enlout(3*natom)       -> 1st deriv. of energy wrt atm. pos (forces)
      if choice=3 : enlout(6)             -> 1st deriv. of energy wrt strain (stresses)
      if choice=4 : enlout(6*natom)       -> 2nd deriv. of energy wrt 2 atm. pos (dyn. mat.)
      if choice=23: enlout(6+3*natom)     -> 1st deriv. of energy wrt atm. pos (forces) and
                                             1st deriv. of energy wrt strain (stresses)
      if choice=24: enlout(9*natom)       -> 1st deriv. of energy wrt atm. pos (forces) and
                                             2nd deriv. of energy wrt 2 atm. pos (dyn. mat.)
      if choice=5 : enlout(3)             -> 1st deriv. of energy wrt k
      if choice=51: enlout(3)             -> 1st deriv. (right) of energy wrt k
      if choice=52: enlout(3)             -> 1st deriv. (left) of energy wrt k
      if choice=53: enlout(3)             -> 1st deriv. (twist) of energy wrt k
      if choice=54: enlout(18*natom)      -> 2nd deriv. of energy wrt atm. pos and right k (Born eff. charge)
      if choice=55: enlout(36)            -> 2nd deriv. of energy wrt strain and right k (piezoelastic tensor)
      if choice=6 : enlout(36+18*natom)   -> 2nd deriv. of energy wrt 2 strains (elast. tensor) and
                                             2nd deriv. of energy wrt to atm. pos and strain (internal strain)
      if choice=8 : enlout(6)             -> 2nd deriv. of energy wrt 2 k
      if choice=81: enlout(9)             -> 2nd deriv.of E: full derivative w.r.t. k1, right derivative w.r.t k2
 --If (paw_opt==3)
      if choice=1 : enlout(1)             -> contribution to <c|S|c> (note: not including <c|c>)
      if choice=2 : enlout(3*natom)       -> contribution to <c|dS/d_atm.pos|c>
      if choice=51: enlout(3)             -> contribution to <c|d(right)S/d_k|c>
      if choice=52: enlout(3)             -> contribution to <c|d(left)S/d_k|c>
      if choice=54: enlout(18*natom)      -> 2nd deriv. of energy wrt atm. pos and right k (Born eff. charge)
      if choice=55: enlout(36)            -> 2nd deriv. of energy wrt strain and right k (piezoelastic tensor)
      if choice=7 : enlout(1)             -> contribution to <c|sum_i[p_i><p_i]|c>
      if choice=8 : enlout(6)             -> contribution to <c|d2S/d_k1d_k2|c>
      if choice=81: enlout(9)             -> contribution to <c|dS/d_k1[d(right)d_k2]|c>
 --If (paw_opt==4)
      not available
 ==== if (signs==2) ====
 --if (paw_opt=0)
    vectout(2,npwout*my_nspinor*ndat)=result of the aplication of the concerned operator
                or one of its derivatives to the input vect.
      if (choice=22) <G|d2V_nonlocal/d(atm. pos)dq|vect_in> (at q=0)
      if (choice=25) <G|d3V_nonlocal/d(atm. pos)dqdq|vect_in> (at q=0)
      if (choice=33) <G|d2V_nonlocal/d(strain)dq|vect_in> (at q=0)
 --if (paw_opt=0, 1 or 4)
    vectout(2,npwout*my_nspinor*ndat)=result of the aplication of the concerned operator
                or one of its derivatives to the input vect.:
      if (choice=1)  <G|V_nonlocal|vect_in>
      if (choice=2)  <G|dV_nonlocal/d(atm. pos)|vect_in>
      if (choice=3)  <G|dV_nonlocal/d(strain)|vect_in>
      if (choice=5)  <G|dV_nonlocal/d(k)|vect_in>
      if (choice=51) <G|d(right)V_nonlocal/d(k)|vect_in>
      if (choice=52) <G|d(left)V_nonlocal/d(k)|vect_in>
      if (choice=53) <G|d(twist)V_nonlocal/d(k)|vect_in>
      if (choice=8)  <G|d2V_nonlocal/d(k)d(k)|vect_in>
      if (choice=81) <G|d[d(right)V_nonlocal/d(k)]/d(k)|vect_in>
 --if (paw_opt=2)
    vectout(2,npwout*my_nspinor*ndat)=final vector in reciprocal space:
      if (choice=1)  <G|V_nonlocal-lamdba.(I+S)|vect_in>
      if (choice=2)  <G|d[V_nonlocal-lamdba.(I+S)]/d(atm. pos)|vect_in>
      if (choice=3)  <G|d[V_nonlocal-lamdba.(I+S)]/d(strain)|vect_in>
      if (choice=5)  <G|d[V_nonlocal-lamdba.(I+S)]/d(k)|vect_in>
      if (choice=51) <G|d(right)[V_nonlocal-lamdba.(I+S)]/d(k)|vect_in>
      if (choice=52) <G|d(left)[V_nonlocal-lamdba.(I+S)]/d(k)|vect_in>
      if (choice=53) <G|d(twist)[V_nonlocal-lamdba.(I+S)]/d(k)|vect_in>
      if (choice=8)  <G|d2[V_nonlocal-lamdba.(I+S)]/d(k)d(k)|vect_in>
      if (choice=81) <G|d[d(right[V_nonlocal-lamdba.(I+S)]/d(k)]/d(k)|vect_in>
 --if (paw_opt=3 or 4)
    svectout(2,npwout*my_nspinor*ndat)=result of the aplication of Sij (overlap matrix)
                  or one of its derivatives to the input vect.:
      if (choice=1)  <G|I+S|vect_in>
      if (choice=2)  <G|dS/d(atm. pos)|vect_in>
      if (choice=3)  <G|dS/d(strain)|vect_in>
      if (choice=5)  <G|dS/d(k)|vect_in>
      if (choice=51) <G|d(right)S/d(k)|vect_in>
      if (choice=52) <G|d(left)S/d(k)|vect_in>
      if (choice=53) <G|d(twist)S/d(k)|vect_in>
      if (choice=3)  <G|d[V_nonlocal-lamdba.(I+S)]/d(strain)|vect_in>
      if (choice=7)  <G|sum_i[p_i><p_i]|vect_in>
      if (choice=8)  <G|d2S/d(k)d(k)|vect_in>
      if (choice=81) <G|d[d(right)S/d(k)]/d(k)|vect_in>

SIDE EFFECTS

  cprjin(natom,nspinor) <type(pawcprj_type)>=projected input wave function |in> on non-local projectors
                                            =<p_lmn|in> and derivatives
                    Treatment depends on cpopt parameter:
                     if cpopt=-1, <p_lmn|in> (and derivatives)
                                  are computed here (and not saved)
                     if cpopt= 0, <p_lmn|in> are computed here and saved
                                  derivatives are eventually computed but not saved
                     if cpopt= 1, <p_lmn|in> and first derivatives are computed here and saved
                                  other derivatives are eventually computed but not saved
                     if cpopt= 2  <p_lmn|in> are already in memory;
                                  first (and 2nd) derivatives are computed here and not saved
                     if cpopt= 3  <p_lmn|in> are already in memory;
                                  first derivatives are computed here and saved
                                  other derivatives are eventually computed but not saved
                     if cpopt= 4  <p_lmn|in> and first derivatives are already in memory;
                                  other derivatives are not computed (except when choice=8 or 81)
                                  This option is not compatible with choice=4,24 or 6
                     Warning: for cpopt= 1 or 3, derivatives wrt strains do not contain
                              the contribution due to the volume change;
                              i.e. <dp_lmn/dEps|in> are incomplete.

NOTES

 This application of the nonlocal operator is programmed using a direct
 implementation of spherical harmonics (Ylm). Abinit used historically
 Legendre polynomials for the application of nonlocal operator; but the
 implementation of PAW algorithm enforced the use of Ylm.

 In the case signs=1, the array vectout is not used, nor modified
 so that the same array as vectin can be used as a dummy argument;
 the same is true for the pairs npwin-npwout, ffnlin-ffnlout,
 kgin-kgout, ph3din-ph3dout, phkredin-phkxredout).

 Notes about choice==33:
  **Since the 2nd derivative w.r.t q-vector is calculated along cartesian
    directions, the 1/twopi**2 factor (that in the rest of the code is applied
    in the reduced to cartesian derivative conversion process) is here
    explicictly included in the formulas.

  **Notice that idir=1-9, in contrast to the strain perturbation (idir=1-6),
    because this term is not symmetric w.r.t permutations of the two strain
    indices.(Also applies for choice=25)

  **A -i factor has been factorized out in all the contributions of the second
    q-gradient of the metric Hamiltonian and in the first and second q-gradients
    of the atomic displacement Hamiltonian. This is lately included in the
    matrix element calculation.

TODO

 * Complete implementation of spin-orbit

SOURCE

 347  subroutine nonlop_ylm(atindx1,choice,cpopt,cprjin,dimenl1,dimenl2,dimekbq,dimffnlin,dimffnlout,&
 348 &                      enl,enlout,ffnlin,ffnlout,gprimd,idir,indlmn,istwf_k,&
 349 &                      kgin,kgout,kpgin,kpgout,kptin,kptout,lambda,lmnmax,matblk,mgfft,&
 350 &                      mpi_enreg,natom,nattyp,ngfft,nkpgin,nkpgout,nloalg,nnlout,&
 351 &                      npwin,npwout,nspinor,nspinortot,ntypat,paw_opt,phkxredin,phkxredout,ph1d,&
 352 &                      ph3din,ph3dout,signs,sij,svectout,ucvol,vectin,vectout,cprjin_left,&
 353 &                      enlout_im,ndat_left,qdir)
 354 
 355 !Arguments ------------------------------------
 356 !scalars
 357  integer,intent(in) :: choice,cpopt,dimenl1,dimenl2,dimekbq,dimffnlin,dimffnlout,idir
 358  integer,intent(in) :: istwf_k,lmnmax,matblk,mgfft,natom,nkpgin,nkpgout,nnlout
 359  integer,intent(in) :: npwin,npwout,nspinor,nspinortot,ntypat,paw_opt,signs
 360  integer,intent(in),optional :: qdir,ndat_left
 361  real(dp),intent(in) :: lambda,ucvol
 362  type(MPI_type),intent(in) :: mpi_enreg
 363 !arrays
 364  integer,intent(in) :: atindx1(natom),kgin(3,npwin)
 365  integer,intent(in),target :: indlmn(6,lmnmax,ntypat)
 366  integer,intent(in) :: kgout(3,npwout),nattyp(ntypat),ngfft(18),nloalg(3)
 367  real(dp),intent(in) :: enl(dimenl1,dimenl2,nspinortot**2,dimekbq)
 368  real(dp),intent(in),target :: ffnlin(npwin,dimffnlin,lmnmax,ntypat)
 369  real(dp),intent(in),target :: ffnlout(npwout,dimffnlout,lmnmax,ntypat)
 370  real(dp),intent(in) :: gprimd(3,3)
 371  real(dp),intent(in),target :: kpgin(npwin,nkpgin),kpgout(npwout,nkpgout)
 372  real(dp),intent(in) :: kptin(3),kptout(3),ph1d(2,3*(2*mgfft+1)*natom)
 373  real(dp),intent(in) :: phkxredin(2,natom),phkxredout(2,natom)
 374  real(dp),intent(in) :: sij(dimenl1,ntypat*((paw_opt+1)/3))
 375  real(dp),intent(inout) :: ph3din(2,npwin,matblk),ph3dout(2,npwout,matblk)
 376  real(dp),intent(inout) :: vectin(:,:)
 377  real(dp),intent(out) :: enlout(:)
 378  real(dp),intent(out),optional :: enlout_im(:)
 379  real(dp),intent(out) :: svectout(:,:)
 380  real(dp),intent(inout) :: vectout (:,:)
 381  type(pawcprj_type),intent(inout) :: cprjin(:,:)
 382  type(pawcprj_type),optional,intent(in) :: cprjin_left(:,:)
 383 
 384 !Local variables-------------------------------
 385 !scalars
 386  integer :: choice_a,choice_b,cplex,cplex_enl,cplex_fac,ia,ia1,ia2,ia3,ia4,ia5
 387  integer :: iatm,ic,idir1,idir2,ii,ierr,ilmn,ishift,ispinor,itypat,jc,mincat,mu,mua,mub,mu0
 388  integer :: n1,n2,n3,nd2gxdt,ndat_left_,ndgxdt,ndgxdt_stored,nd2gxdtfac,ndgxdtfac
 389  integer :: nincat,nkpgin_,nkpgout_,nlmn,nu,nua1,nua2,nub1,nub2,optder
 390  real(dp) :: enlk!, tsec(2)
 391  logical :: check,testnl,no_opernla_mv,no_opernlb_mv
 392  character(len=500) :: message
 393 !arrays
 394  integer,parameter :: alpha(6)=(/1,2,3,3,3,2/),beta(6)=(/1,2,3,2,1,1/)
 395  integer,parameter :: gamma(3,3)=reshape((/1,6,5,6,2,4,5,4,3/),(/3,3/))
 396  integer,allocatable :: cplex_dgxdt(:),cplex_d2gxdt(:)
 397  integer,ABI_CONTIGUOUS pointer :: indlmn_typ(:,:)
 398  real(dp),allocatable :: d2gxdt(:,:,:,:,:),d2gxdtfac(:,:,:,:,:),d2gxdtfac_sij(:,:,:,:,:)
 399  real(dp),allocatable :: dgxdt(:,:,:,:,:),dgxdtfac(:,:,:,:,:),dgxdtfac_sij(:,:,:,:,:)
 400  real(dp),allocatable :: ddkk(:),fnlk(:),gmet(:,:)
 401  real(dp),allocatable :: gx(:,:,:,:),gxfac(:,:,:,:),gxfac_sij(:,:,:,:),gx_left(:,:,:,:)
 402  real(dp),allocatable :: sij_typ(:),strnlk(:)
 403  real(dp),allocatable :: work1(:),work2(:),work3(:,:),work4(:,:),work5(:,:,:),work6(:,:,:),work7(:,:,:)
 404  real(dp),ABI_CONTIGUOUS pointer :: ffnlin_typ(:,:,:),ffnlout_typ(:,:,:),kpgin_(:,:),kpgout_(:,:)
 405 
 406 ! **********************************************************************
 407 
 408  DBG_ENTER("COLL")
 409 
 410 ! call timab(1100,1,tsec)
 411 
 412 !Check consistency of arguments
 413 !==============================================================
 414 
 415 !signs=1, almost all choices
 416  if (signs==1) then
 417    if(paw_opt<3) then
 418      check=(choice==0 .or.choice==1 .or.choice==2 .or.choice==3 .or.choice==4 .or.&
 419 &     choice==23.or.choice==24.or.choice==5 .or.choice==51.or.choice==52.or.&
 420 &     choice==53.or.choice==54.or.choice==55.or.&
 421 &     choice==6 .or.choice==8 .or.choice==81)
 422    else if (paw_opt==3) then
 423      check=(choice== 0.or.choice== 1.or.choice== 2.or.choice==3.or.choice==5.or.&
 424 &     choice==23.or.choice==51.or.choice==52.or.choice==53.or.choice==54.or.choice==55.or.&
 425 &     choice== 8.or.choice==81)
 426    else
 427      check = .false.
 428    end if
 429    ABI_CHECK(check,'BUG: choice not compatible (for signs=1)')
 430  end if
 431 
 432 !signs=2, less choices
 433  if (signs==2) then
 434    check=(choice==0.or.choice==1.or.choice==2.or.choice==22.or.choice==25.or.choice==3 .or.&
 435 &   choice==5.or.choice==33.or.choice==51.or.choice==52.or.choice==53.or.choice==54.or.&
 436 &   choice==7.or.choice==8.or.choice==81)
 437    ABI_CHECK(check,'BUG: choice not compatible (for signs=2)')
 438  end if
 439 !1<=idir<=6 is required when choice=3 and signs=2
 440  if (choice==3.and.signs==2) then
 441    check=(idir>=1.and.idir<=6)
 442    ABI_CHECK(check,'BUG: choice=3 and signs=2 requires 1<=idir<=6')
 443 !1<=idir<=9 is required when choice= 25 or 33 and signs=2
 444  else if ((choice==25.or.choice==33).and.signs==2) then
 445    check=(idir>=1.and.idir<=9)
 446    ABI_CHECK(check,'BUG: choice= 25 or 33 and signs=2 requires 1<=idir<=9')
 447 !1<=idir<=9 is required when choice==8/81 and signs=2
 448  else if ((choice==8.or.choice==81.or.choice==54).and.signs==2) then
 449    check=(idir>=1.and.idir<=9)
 450    ABI_CHECK(check,'BUG: choice=8/81 and signs=2 requires 1<=idir<=9')
 451  else
 452 !  signs=2 requires 1<=idir<=3 when choice>1
 453    check=(signs/=2.or.choice<=1.or.choice==7.or.(idir>=1.and.idir<=3))
 454    ABI_CHECK(check,'BUG: signs=2 requires 1<=idir<=3')
 455  end if
 456 !1<=qdir<=3 is required when choice==22 or choice==25 or choice==33 and signs=2
 457  if ((choice==22.or.choice==25.or.choice==33).and.signs==2) then
 458    check=(qdir>=1.and.qdir<=3)
 459    ABI_CHECK(check,'BUG: choice=22,25 or 33 and signs=2 requires 1<=qdir<=3')
 460  end if
 461 
 462 !check allowed values for cpopt
 463  check=(cpopt>=-1.and.cpopt<=4)
 464  ABI_CHECK(check,'bad value for cpopt')
 465  check=(cpopt/=4.or.(choice/=4.and.choice/=24.and.choice/=6))
 466  ABI_CHECK(check,'BUG: cpopt=4 not allowed for 2nd derivatives')
 467  check=(cpopt/=2.or.(choice/=8.and.choice/=81))
 468  ABI_CHECK(check,'BUG: cpopt=2 not allowed for choice=8,81, use cpopt=4 instead')
 469 !check conditions for optional arguments
 470  check=((.not.present(cprjin_left)).or.(signs==1.and.choice==1))
 471  ABI_CHECK(check,'BUG: when cprjin_left is present, must have choice=1,signs=1')
 472 !protect special case choice==7
 473  check=(choice/=7.or.paw_opt==3)
 474  ABI_CHECK(check,'BUG: when choice=7, paw_opt must be 3')
 475 !spin-orbit not yet allowed
 476  check=(maxval(indlmn(6,:,:))<=1)
 477  ABI_CHECK(check,'BUG: spin-orbit with Yml for nonlop not yet allowed')
 478 
 479 !Test: size of blocks of atoms
 480  mincat=min(NLO_MINCAT,maxval(nattyp))
 481  if (nloalg(2)<=0.and.mincat>matblk) then
 482    write(message, '(a,a,a,i4,a,i4,a)' ) &
 483 &   'With nloc_mem<=0, mincat must be less than matblk.',ch10,&
 484 &   'Their value is ',mincat,' and ',matblk,'.'
 485    ABI_BUG(message)
 486  end if
 487  ndat_left_=1
 488  if (present(ndat_left)) then
 489    ndat_left_=ndat_left
 490  end if
 491  if (nloalg(1)<2.or.nloalg(1)>10) then
 492    ABI_ERROR('nloalg(1) should be between 2 and 10.')
 493  end if
 494  ! Determine which implementation to use : matrix-vector (mv), matrix-vector with dgmev (mv-dgemv), or native
 495  !nloalg(1)|  opernla |  opernlb
 496  !------------------------------
 497  !    2    | mv-dgemv | mv-dgemv
 498  !    3    |    mv    |    mv
 499  !  4(def) |  native  |  native
 500  !    5    | mv-dgemv |    mv
 501  !    6    |    mv    | mv-dgemv
 502  !    7    | mv-dgemv |  native
 503  !    8    |  native  |    mv
 504  !    9    |    mv    |  native
 505  !   10    |  native  | mv-dgemv
 506  no_opernla_mv = nloalg(1)==4.or.nloalg(1)==8.or.nloalg(1)==10 ! have to be consistent with getcprj
 507  no_opernlb_mv = nloalg(1)==4.or.nloalg(1)==7.or.nloalg(1)==9
 508 
 509 !Define dimensions of projected scalars
 510 !==============================================================
 511 
 512 !Define some useful variables
 513  n1=ngfft(1);n2=ngfft(2);n3=ngfft(3)
 514  choice_a=merge(choice,1,choice/=7);choice_b=choice_a
 515  if (cpopt>=2) choice_a=-choice_a
 516  cplex=2;if (istwf_k>1) cplex=1 !Take into account TR-symmetry
 517  cplex_enl=1;if (paw_opt>0) cplex_enl=2*dimenl1/(lmnmax*(lmnmax+1))
 518  cplex_fac=max(cplex,dimekbq)
 519  if ((nspinortot==2.or.cplex_enl==2).and.paw_opt>0.and.choice/=7) cplex_fac=2
 520 
 521 !Define dimensions of projected scalars
 522  ndgxdt=0;ndgxdtfac=0;nd2gxdt=0;nd2gxdtfac=0
 523  if (choice==2) then
 524    if (signs==1) ndgxdt=3
 525    if (signs==2) ndgxdt=1
 526    if (signs==2) ndgxdtfac=1
 527  end if
 528  if (choice==22) then
 529    if (signs==2) ndgxdt=1
 530    if (signs==2) ndgxdtfac=1
 531  end if
 532  if (choice==25) then
 533    if (signs==2) ndgxdt=1
 534    if (signs==2) ndgxdtfac=1
 535  end if
 536  if (choice==3) then
 537    if (signs==1) ndgxdt=6
 538    if (signs==2) ndgxdt=1
 539    if (signs==2) ndgxdtfac=1
 540  end if
 541  if (choice==23) then
 542    if (signs==1) ndgxdt=9
 543  end if
 544  if (choice==4) then
 545    if(signs==1) ndgxdt=3
 546    if(signs==1) ndgxdtfac=3
 547    if(signs==1) nd2gxdt=6
 548  end if
 549  if (choice==24) then
 550    if(signs==1) ndgxdt=3
 551    if(signs==1) ndgxdtfac=3
 552    if(signs==1) nd2gxdt=6
 553  end if
 554  if (choice==5) then
 555    if(signs==1) ndgxdt=3
 556    if(signs==2) ndgxdt=1
 557    if(signs==2) ndgxdtfac=1
 558  end if
 559  if (choice==33) then
 560    if(signs==2) ndgxdt=2
 561    if(signs==2) ndgxdtfac=2
 562    if(signs==2) nd2gxdt=3
 563    if(signs==2) nd2gxdtfac=3
 564  end if
 565  if (choice==51) then
 566    if(signs==1) ndgxdt=3
 567    if(signs==2) ndgxdt=1
 568    if(signs==2) ndgxdtfac=1
 569  end if
 570  if (choice==52) then
 571    if(signs==1) ndgxdt=3
 572    if(signs==2) ndgxdt=1
 573    if(signs==2) ndgxdtfac=1
 574  end if
 575  if (choice==53) then
 576    if(signs==1) ndgxdt=3
 577    if(signs==1) ndgxdtfac=3
 578    if(signs==2) ndgxdt=2
 579    if(signs==2) ndgxdtfac=2
 580  end if
 581  if (choice==54) then
 582    if(signs==1) ndgxdt=6
 583    if(signs==1) ndgxdtfac=6
 584    if(signs==1) nd2gxdt=9
 585    if(signs==2) ndgxdt=1
 586    if(signs==2) nd2gxdt=1
 587    if(signs==2) ndgxdtfac=1
 588    if(signs==2) nd2gxdtfac=1
 589  end if
 590  if (choice==55) then
 591    if(signs==1) ndgxdt=9
 592    if(signs==1) ndgxdtfac=9
 593    if(signs==1) nd2gxdt=18
 594  end if
 595  if (choice==6) then
 596    if(signs==1) ndgxdt=9
 597    if(signs==1) ndgxdtfac=9
 598    if(signs==1) nd2gxdt=54
 599  end if
 600  if (choice==8) then
 601    if(signs==1) ndgxdt=3
 602    if(signs==1) ndgxdtfac=3
 603    if(signs==1) nd2gxdt=6
 604    if(signs==2) ndgxdt=2
 605    if(signs==2) ndgxdtfac=2
 606    if(signs==2) nd2gxdt=1
 607    if(signs==2) nd2gxdtfac=1
 608  end if
 609  if (choice==81) then
 610    if(signs==1) ndgxdt=3
 611    if(signs==1) ndgxdtfac=3
 612    if(signs==1) nd2gxdt=6
 613    if(signs==2) ndgxdt=1
 614    if(signs==2) ndgxdtfac=1
 615    if(signs==2) nd2gxdt=1
 616    if(signs==2) nd2gxdtfac=1
 617  end if
 618  ABI_CHECK(ndgxdtfac<=ndgxdt,"BUG: ndgxdtfac>ndgxdt!")
 619  optder=0;if (ndgxdtfac>0) optder=1
 620  if (nd2gxdtfac>0) optder=2
 621 
 622 !Consistency tests
 623  if (cpopt==4) then
 624    if (ndgxdt>0.and.cprjin(1,1)%ncpgr<=0) then
 625      message='cprjin%ncpgr=0 not allowed with cpopt=4 and these (choice,signs) !'
 626      ABI_BUG(message)
 627    end if
 628  end if
 629  if (cpopt==1.or.cpopt==3) then
 630    if (cprjin(1,1)%ncpgr<ndgxdt) then
 631      message='should have cprjin%ncpgr>=ndgxdt with cpopt=1 or 3 !'
 632      ABI_BUG(message)
 633    end if
 634  end if
 635 
 636 
 637 !Additional steps before calculation
 638 !==============================================================
 639 
 640 !Initialize output arrays
 641  if (signs==1) then
 642    ABI_MALLOC(fnlk,(3*natom))
 643    ABI_MALLOC(ddkk,(6))
 644    ABI_MALLOC(strnlk,(6))
 645    enlk=zero;fnlk=zero;ddkk=zero;strnlk=zero
 646    enlout(:)=zero
 647    if (present(enlout_im)) then
 648      enlout_im(:)=zero
 649    end if
 650  end if
 651  if (signs==2) then
 652    if (paw_opt==0.or.paw_opt==1.or.paw_opt==4) vectout(:,:)=zero
 653    if (paw_opt==2.and.choice==1) vectout(:,:)=-lambda*vectin(:,:)
 654    if (paw_opt==2.and.choice> 1) vectout(:,:)=zero
 655    if (paw_opt==3.or.paw_opt==4) then
 656      if (choice==1) svectout(:,:)=vectin(:,:)
 657      if (choice> 1) svectout(:,:)=zero
 658    end if
 659  end if
 660 
 661 !Eventually re-compute (k+G) vectors (and related data)
 662  nkpgin_=0
 663  if (choice==2.or.choice==22.or.choice==25.or.choice==33.or.choice==54) nkpgin_=3
 664  if (signs==1) then
 665    if (choice==4.or.choice==24) nkpgin_=9
 666    if (choice==3.or.choice==23.or.choice==6) nkpgin_=3
 667    if (choice==55) nkpgin_=3
 668  end if
 669  if (nkpgin<nkpgin_) then
 670    ABI_MALLOC(kpgin_,(npwin,nkpgin_))
 671 
 672    !For the metric derivatives we need kpg in Cartesian coordinates
 673    if (choice==33) then
 674      call mkkpgcart(gprimd,kgin,kpgin_,kptin,nkpgin_,npwin)
 675    else
 676      call mkkpg(kgin,kpgin_,kptin,nkpgin_,npwin)
 677    end if
 678 
 679  else
 680    nkpgin_ = nkpgin
 681    kpgin_  => kpgin
 682  end if
 683 
 684  nkpgout_=0
 685  if ((choice==2.or.choice==22.or.choice==25.or.choice==3.or.choice==33.or.choice==54).and.signs==2) nkpgout_=3
 686  if (nkpgout<nkpgout_) then
 687    ABI_MALLOC(kpgout_,(npwout,nkpgout_))
 688 
 689    !For the metric derivatives we need kpg in Cartesian coordinates
 690    if (choice==33) then
 691      call mkkpgcart(gprimd,kgout,kpgout_,kptout,nkpgout_,npwout)
 692    else
 693      call mkkpg(kgout,kpgout_,kptout,nkpgout_,npwout)
 694    end if
 695 
 696  else
 697    nkpgout_ = nkpgout
 698    kpgout_ => kpgout
 699  end if
 700 
 701 !Big loop on atom types
 702 !==============================================================
 703 
 704  ia1=1;iatm=0
 705  do itypat=1,ntypat
 706 
 707 !  Get atom loop indices for different types:
 708    ia2=ia1+nattyp(itypat)-1;ia5=1
 709 
 710 !  Select quantities specific to the current type of atom
 711    nlmn=count(indlmn(3,:,itypat)>0)
 712 
 713 !  Test on local part
 714    testnl=(paw_opt/=0)
 715    if (paw_opt==0) testnl=any(abs(enl(:,:,:,:))>tol10)
 716 
 717 !  Some non-local part is to be applied for that type of atom
 718    if (testnl) then
 719 
 720 !    Store some quantities depending only of the atom type
 721      ffnlin_typ => ffnlin(:,:,:,itypat)
 722      indlmn_typ => indlmn(:,:,itypat)
 723      if (signs==2) then
 724        ffnlout_typ => ffnlout(:,:,:,itypat)
 725      end if
 726      if (paw_opt>=2) then
 727        ABI_MALLOC(sij_typ,(nlmn*(nlmn+1)/2))
 728        if (cplex_enl==1) then
 729          do ilmn=1,nlmn*(nlmn+1)/2
 730            sij_typ(ilmn)=sij(ilmn,itypat)
 731          end do
 732        else
 733          do ilmn=1,nlmn*(nlmn+1)/2
 734            sij_typ(ilmn)=sij(2*ilmn-1,itypat)
 735          end do
 736        end if
 737      else
 738        ABI_MALLOC(sij_typ,(0))
 739      end if
 740 
 741 !    Loop over atoms of the same type
 742 !    ==============================================================
 743 
 744 !    Cut the sum on different atoms in blocks, to allow memory saving.
 745 !    Inner summations on atoms will be done from ia3 to ia4.
 746 !    Note: the maximum range from ia3 to ia4 is mincat (max. increment of atoms).
 747 
 748      do ia3=ia1,ia2,mincat
 749        ia4=min(ia2,ia3+mincat-1)
 750 !      Give the increment of number of atoms in this subset.
 751        nincat=ia4-ia3+1
 752 
 753 !      Prepare the phase factors if they were not already computed
 754        if (nloalg(2)<=0) then
 755          call ph1d3d(ia3,ia4,kgin,matblk,natom,npwin,n1,n2,n3,phkxredin,ph1d,ph3din)
 756        end if
 757 
 758 !      Allocate memory for projected scalars
 759        ABI_MALLOC(gx,(cplex,nlmn,nincat,nspinor))
 760        ABI_MALLOC(gxfac,(cplex_fac,nlmn,nincat,nspinor))
 761        ABI_MALLOC(dgxdt,(cplex,ndgxdt,nlmn,nincat,nspinor))
 762        ABI_MALLOC(d2gxdt,(cplex,nd2gxdt,nlmn,nincat,nspinor))
 763        ABI_MALLOC(d2gxdtfac,(cplex_fac,nd2gxdtfac,nlmn,nincat,nspinor))
 764        ABI_MALLOC(dgxdtfac,(cplex_fac,ndgxdtfac,nlmn,nincat,nspinor))
 765        gx(:,:,:,:)=zero;gxfac(:,:,:,:)=zero
 766        if (ndgxdt>0) dgxdt(:,:,:,:,:)=zero
 767        if (ndgxdtfac>0) dgxdtfac(:,:,:,:,:)=zero
 768        if (nd2gxdt>0) d2gxdt(:,:,:,:,:)=zero
 769        if (nd2gxdtfac>0) d2gxdtfac(:,:,:,:,:)=zero
 770        if (paw_opt>=3) then
 771          ABI_MALLOC(gxfac_sij,(cplex,nlmn,nincat,nspinor))
 772          ABI_MALLOC(dgxdtfac_sij,(cplex,ndgxdtfac,nlmn,nincat,nspinor))
 773          ABI_MALLOC(d2gxdtfac_sij,(cplex,nd2gxdtfac,nlmn,nincat,nspinor))
 774          gxfac_sij(:,:,:,:)=zero
 775          if (ndgxdtfac>0) dgxdtfac_sij(:,:,:,:,:)=zero
 776          if (nd2gxdtfac>0) d2gxdtfac_sij(:,:,:,:,:) = zero
 777        else
 778          ABI_MALLOC(gxfac_sij,(0,0,0,0))
 779          ABI_MALLOC(dgxdtfac_sij,(0,0,0,0,0))
 780          ABI_MALLOC(d2gxdtfac_sij,(0,0,0,0,0))
 781        end if
 782 
 783 !      When istwf_k > 1, gx derivatives can be real or pure imaginary
 784 !      cplex_dgxdt(i)  = 1 if dgxdt(1,i,:,:)  is real, 2 if it is pure imaginary
 785 !      cplex_d2gxdt(i) = 1 if d2gxdt(1,i,:,:) is real, 2 if it is pure imaginary
 786        ABI_MALLOC(cplex_dgxdt,(ndgxdt))
 787        ABI_MALLOC(cplex_d2gxdt,(nd2gxdt))
 788        cplex_dgxdt(:) = 1 ; cplex_d2gxdt(:) = 1
 789        if(ndgxdt > 0) then
 790          if (choice==5.or.choice==51.or.choice==52.or.choice==53.or. &
 791 &         choice==8.or.choice==81) cplex_dgxdt(:) = 2
 792          if (choice==54.and.signs==1) cplex_dgxdt(4:6) = 2
 793          if (choice==54.and.signs==2) cplex_dgxdt(:)   = 2
 794          if (choice==55.and.signs==1) cplex_dgxdt(7:9) = 2
 795        end if
 796        if(nd2gxdt > 0) then
 797          if (choice==54) cplex_d2gxdt(:) = 2
 798          if (choice==55.and.signs==1) cplex_d2gxdt(1:18)= 2
 799        end if
 800 
 801 !      Compute projection of current wave function |c> on each
 802 !      non-local projector: <p_lmn|c>
 803 !      ==============================================================
 804 
 805 !      Retrieve eventually <p_lmn|c> coeffs (and derivatives)
 806        if (cpopt>=2) then
 807          do ispinor=1,nspinor
 808            do ia=1,nincat
 809              gx(1:cplex,1:nlmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%cp(1:cplex,1:nlmn)
 810            end do
 811          end do
 812        end if
 813        if (cpopt==4.and.ndgxdt>0) then
 814          ndgxdt_stored = cprjin(1,1)%ncpgr
 815          ishift=0
 816          if (((choice==2).or.(choice==3)).and.(ndgxdt_stored>ndgxdt).and.(signs==2)) ishift=idir-ndgxdt
 817          if ((choice==2).and.(ndgxdt_stored==9).and.(signs==2)) ishift=ishift+6
 818          if (choice==2.and.(ndgxdt_stored>ndgxdt).and.(signs==1)) ishift=ndgxdt_stored-ndgxdt
 819          if(cplex == 2) then
 820            do ispinor=1,nspinor
 821              do ia=1,nincat
 822                if (ndgxdt_stored==ndgxdt.or.(ndgxdt_stored>ndgxdt.and.((choice==2).or.(choice==3)))) then
 823                  dgxdt(1:2,1:ndgxdt,1:nlmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%dcp(1:2,1+ishift:ndgxdt+ishift,1:nlmn)
 824                else if (signs==2.and.ndgxdt_stored==3) then
 825                  if (choice==5.or.choice==51.or.choice==52) then ! ndgxdt=1
 826                    dgxdt(1:2,1,1:nlmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%dcp(1:2,idir,1:nlmn)
 827                  else if (choice==53) then ! ndgxdt=2
 828                    idir1 = modulo(idir,3)+1; idir2 = modulo(idir+1,3)+1
 829                    dgxdt(1:2,1,1:nlmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%dcp(1:2,idir1,1:nlmn)
 830                    dgxdt(1:2,2,1:nlmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%dcp(1:2,idir2,1:nlmn)
 831                  else if (choice==8) then ! ndgxdt=2
 832                    idir1=(idir-1)/3+1; idir2=mod((idir-1),3)+1
 833                    dgxdt(1:2,1,1:nlmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%dcp(1:2,idir1,1:nlmn)
 834                    dgxdt(1:2,2,1:nlmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%dcp(1:2,idir2,1:nlmn)
 835                  else if (choice==81) then ! ndgxdt=1
 836                    idir1=(idir-1)/3+1; idir2=mod((idir-1),3)+1
 837                    dgxdt(1:2,1,1:nlmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%dcp(1:2,idir2,1:nlmn)
 838                  end if
 839                end if
 840              end do
 841            end do
 842          else ! cplex != 2
 843            do ispinor=1,nspinor
 844              do ia=1,nincat
 845                do ilmn=1,nlmn
 846                  if (ndgxdt_stored==ndgxdt.or.(ndgxdt_stored>ndgxdt.and.((choice==2).or.(choice==3)))) then
 847                    do ii=1,ndgxdt
 848                      ic = cplex_dgxdt(ii)
 849                      dgxdt(1,ii,ilmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%dcp(ic,ii+ishift,ilmn)
 850                    end do
 851                  else if (signs==2.and.ndgxdt_stored==3) then
 852                    if (choice==5.or.choice==51.or.choice==52) then ! ndgxdt=1
 853                      dgxdt(1,1,ilmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%dcp(cplex_dgxdt(1),idir,ilmn)
 854                    else if (choice==53) then ! ndgxdt=2
 855                      idir1 = modulo(idir,3)+1; idir2 = modulo(idir+1,3)+1
 856                      dgxdt(1,1,1:nlmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%dcp(cplex_dgxdt(1),idir1,1:nlmn)
 857                      dgxdt(1,2,1:nlmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%dcp(cplex_dgxdt(2),idir2,1:nlmn)
 858                    else if (choice==8) then ! ndgxdt=2
 859                      idir1=(idir-1)/3+1; idir2=mod((idir-1),3)+1
 860                      dgxdt(1,1,ilmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%dcp(cplex_dgxdt(1),idir1,ilmn)
 861                      dgxdt(1,2,ilmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%dcp(cplex_dgxdt(2),idir2,ilmn)
 862                    else if (choice==81) then ! ndgxdt=1
 863                      idir1=(idir-1)/3+1; idir2=mod((idir-1),3)+1
 864                      dgxdt(1,1,ilmn,ia,ispinor)=cprjin(iatm+ia,ispinor)%dcp(cplex_dgxdt(1),idir2,ilmn)
 865                    end if
 866                  end if
 867                end do
 868              end do
 869            end do
 870          end if ! cplex == 2
 871        end if ! cpopt==4 and ndgxdt>0
 872 
 873        ! Computation or <p_lmn|c> (and derivatives) for this block of atoms if :
 874        !    <p_lmn|c> are not in memory : cpopt<=1
 875        ! OR <p_lmn|c> are in memory, but we need derivatives : cpopt<=3 and abs(choice_a)>1
 876        ! OR <p_lmn|c> and first derivatives are in memory, but we need second derivatives : choice=8 or 81
 877        if (cpopt<=1.or.(cpopt<=3.and.abs(choice_a)>1).or.choice==8.or.choice==81) then
 878 !       if ((cpopt<4.and.choice_a/=-1).or.choice==8.or.choice==81) then
 879          if (abs(choice_a)>1.or.no_opernla_mv) then
 880 !           call timab(1101,1,tsec)
 881            call opernla_ylm(choice_a,cplex,cplex_dgxdt,cplex_d2gxdt,dimffnlin,d2gxdt,dgxdt,ffnlin_typ,gx,&
 882 &           ia3,idir,indlmn_typ,istwf_k,kpgin_,matblk,mpi_enreg,nd2gxdt,ndgxdt,nincat,nkpgin_,nlmn,&
 883 &           nloalg,npwin,nspinor,ph3din,signs,ucvol,vectin,qdir=qdir)
 884 !           call timab(1101,2,tsec)
 885          else
 886 !           call timab(1102,1,tsec)
 887            call opernla_ylm_mv(choice_a,cplex,dimffnlin,ffnlin_typ,gx,&
 888 &           ia3,indlmn_typ,istwf_k,matblk,mpi_enreg,nincat,nlmn,&
 889 &           nloalg,npwin,nspinor,ph3din,ucvol,vectin)
 890 !           call timab(1102,2,tsec)
 891          end if
 892        end if
 893 
 894 !      Transfer result to output variable cprj (if requested)
 895 !      cprj(:)%cp receive the <p_i|Psi> factors (p_i: non-local projector)
 896 !      Be careful: cprj(:)%dcp does not exactly contain the derivative of cprj(:)%cp.
 897 !                  - Volume contributions (in case of strain derivative) are not included,
 898 !                  - Global coordinate transformation are not applied,
 899 !                  cprj(:)%dcp is meant to be a restart argument of the present nonlop routine.
 900        if (cpopt==0.or.cpopt==1) then
 901          do ispinor=1,nspinor
 902            do ia=1,nincat
 903              cprjin(iatm+ia,ispinor)%nlmn=nlmn
 904              cprjin(iatm+ia,ispinor)%cp(1:cplex,1:nlmn)=gx(1:cplex,1:nlmn,ia,ispinor)
 905              if (cplex==1) cprjin(iatm+ia,ispinor)%cp(2,1:nlmn)=zero
 906            end do
 907          end do
 908        end if
 909        if ((cpopt==1.or.cpopt==3).and.ndgxdt>0) then
 910          ishift=0
 911          if ((choice==2).and.(cprjin(1,1)%ncpgr>ndgxdt)) ishift=cprjin(1,1)%ncpgr-ndgxdt
 912          if(cplex==2)then
 913            do ispinor=1,nspinor
 914              do ia=1,nincat
 915                cprjin(iatm+ia,ispinor)%dcp(1:2,1+ishift:ndgxdt+ishift,1:nlmn)=dgxdt(1:2,1:ndgxdt,1:nlmn,ia,ispinor)
 916 !               cprjin(iatm+ia,ispinor)%dcp(1:2,1:ndgxdt,1:nlmn)=dgxdt(1:2,1:ndgxdt,1:nlmn,ia,ispinor)
 917              end do
 918            end do
 919          else
 920            do ispinor=1,nspinor
 921              do ia=1,nincat
 922                do ilmn=1,nlmn
 923                  do ii=1,ndgxdt
 924                    ic = cplex_dgxdt(ii) ; jc = 3 - ic
 925                    cprjin(iatm+ia,ispinor)%dcp(ic,ii+ishift,ilmn)=dgxdt(1,ii,ilmn,ia,ispinor)
 926                    cprjin(iatm+ia,ispinor)%dcp(jc,ii+ishift,ilmn)=zero
 927 !                   cprjin(iatm+ia,ispinor)%dcp(ic,ii,ilmn)=dgxdt(1,ii,ilmn,ia,ispinor)
 928 !                   cprjin(iatm+ia,ispinor)%dcp(jc,ii,ilmn)=zero
 929                  end do
 930                end do
 931              end do
 932            end do
 933          end if
 934        end if
 935 
 936 !      If choice==0, that's all for these atoms !
 937        if (choice>0) then
 938          if(choice/=7) then
 939 !           call timab(1105,1,tsec)
 940 !          Contraction from <p_i|c> to Sum_j[Dij.<p_j|c>] (and derivatives)
 941            call opernlc_ylm(atindx1,cplex,cplex_dgxdt,cplex_d2gxdt,cplex_enl,cplex_fac,dgxdt,dgxdtfac,dgxdtfac_sij,&
 942 &           d2gxdt,d2gxdtfac,d2gxdtfac_sij,dimenl1,dimenl2,dimekbq,enl,gx,gxfac,gxfac_sij,&
 943 &           iatm,indlmn_typ,itypat,lambda,mpi_enreg,natom,ndgxdt,ndgxdtfac,nd2gxdt,nd2gxdtfac,&
 944 &           nincat,nlmn,nspinor,nspinortot,optder,paw_opt,sij_typ)
 945 !           call timab(1105,2,tsec)
 946          else
 947             gxfac_sij=gx
 948          end if
 949 
 950 !        Operate with the non-local potential on the projected scalars,
 951 !        in order to get contributions to energy/forces/stress/dyn.mat
 952 !        ==============================================================
 953          if (signs==1) then
 954            if (.not.present(cprjin_left)) then
 955 !             call timab(1106,1,tsec)
 956              call opernld_ylm(choice_b,cplex,cplex_fac,ddkk,dgxdt,dgxdtfac,dgxdtfac_sij,d2gxdt,&
 957 &             enlk,enlout,fnlk,gx,gxfac,gxfac_sij,ia3,natom,ndat_left_,nd2gxdt,ndgxdt,ndgxdtfac,&
 958 &             nincat,nlmn,nnlout,nspinor,paw_opt,strnlk)
 959 !             call timab(1106,2,tsec)
 960            else
 961              ABI_MALLOC(gx_left,(cplex,nlmn,nincat,nspinor*ndat_left_))
 962 !            Retrieve <p_lmn|c> coeffs
 963              do ispinor=1,nspinor*ndat_left_
 964                do ia=1,nincat
 965                  gx_left(1:cplex,1:nlmn,ia,ispinor)=cprjin_left(iatm+ia,ispinor)%cp(1:cplex,1:nlmn)
 966                end do
 967              end do
 968 !            TODO
 969 !            if (cpopt==4.and.ndgxdt>0) then
 970 !            do ispinor=1,nspinor
 971 !            do ia=1,nincat
 972 !            dgxdt_left(1:cplex,1:ndgxdt,1:nlmn,ia,ispinor)=cprjin_left(iatm+ia,ispinor)%dcp(1:cplex,1:ndgxdt,1:nlmn)
 973 !            end do
 974 !            end do
 975 !            end if
 976              if (present(enlout_im)) then
 977 !               call timab(1108,1,tsec)
 978                call opernld_ylm(choice_b,cplex,cplex_fac,ddkk,dgxdt,dgxdtfac,dgxdtfac_sij,d2gxdt,&
 979 &               enlk,enlout,fnlk,gx_left,gxfac,gxfac_sij,ia3,natom,ndat_left_,nd2gxdt,ndgxdt,ndgxdtfac,&
 980 &               nincat,nlmn,nnlout,nspinor,paw_opt,strnlk,enlout_im=enlout_im)
 981 !               call timab(1108,2,tsec)
 982              else
 983 !               call timab(1107,1,tsec)
 984                call opernld_ylm(choice_b,cplex,cplex_fac,ddkk,dgxdt,dgxdtfac,dgxdtfac_sij,d2gxdt,&
 985 &               enlk,enlout,fnlk,gx_left,gxfac,gxfac_sij,ia3,natom,ndat_left_,nd2gxdt,ndgxdt,ndgxdtfac,&
 986 &               nincat,nlmn,nnlout,nspinor,paw_opt,strnlk)
 987 !               call timab(1107,2,tsec)
 988              end if
 989              ABI_FREE(gx_left)
 990            end if
 991          end if ! signs == 1
 992 
 993 !        Operate with the non-local potential on the projected scalars,
 994 !        in order to get matrix element
 995 !        ==============================================================
 996          if (signs==2) then
 997 !          Prepare the phase factors if they were not already computed
 998            if(nloalg(2)<=0) then
 999              call ph1d3d(ia3,ia4,kgout,matblk,natom,npwout,n1,n2,n3,phkxredout,ph1d,ph3dout)
1000            end if
1001            if (abs(choice_b)>1.or.no_opernlb_mv) then
1002 !             call timab(1103,1,tsec)
1003              call opernlb_ylm(choice_b,cplex,cplex_dgxdt,cplex_d2gxdt,cplex_fac,&
1004 &             d2gxdtfac,d2gxdtfac_sij,dgxdtfac,dgxdtfac_sij,dimffnlout,ffnlout_typ,gxfac,gxfac_sij,ia3,&
1005 &             idir,indlmn_typ,kpgout_,matblk,ndgxdtfac,nd2gxdtfac,nincat,nkpgout_,nlmn,&
1006 &             nloalg,npwout,nspinor,paw_opt,ph3dout,svectout,ucvol,vectout,qdir=qdir)
1007 !             call timab(1103,2,tsec)
1008            else
1009 !             call timab(1104,1,tsec)
1010              call opernlb_ylm_mv(choice_b,cplex,cplex_fac,&
1011 &             dimffnlout,ffnlout_typ,gxfac,gxfac_sij,ia3,indlmn_typ,matblk,nincat,nlmn,&
1012 &             nloalg,npwout,nspinor,paw_opt,ph3dout,svectout,ucvol,vectout)
1013 !             call timab(1104,2,tsec)
1014            end if
1015          end if ! signs == 2
1016 
1017        end if ! choice>=0
1018 
1019 !      Deallocate temporary projected scalars
1020        ABI_FREE(gx)
1021        ABI_FREE(gxfac)
1022        ABI_FREE(dgxdt)
1023        ABI_FREE(dgxdtfac)
1024        ABI_FREE(d2gxdt)
1025        ABI_FREE(d2gxdtfac)
1026        ABI_FREE(dgxdtfac_sij)
1027        ABI_FREE(d2gxdtfac_sij)
1028        ABI_FREE(gxfac_sij)
1029        ABI_FREE(cplex_dgxdt)
1030        ABI_FREE(cplex_d2gxdt)
1031 
1032 !      End sum on atom subset loop
1033        iatm=iatm+nincat;ia5=ia5+nincat
1034      end do
1035      !if (paw_opt>=2)  then
1036      ABI_FREE(sij_typ)
1037      !end if
1038 
1039 !    End condition of existence of a non-local part
1040    else
1041      if (cpopt==0.or.cpopt==1) then
1042        do ispinor=1,nspinor
1043          do ia=1,nattyp(itypat)
1044            cprjin(iatm+ia,ispinor)%cp(:,1:nlmn)=zero
1045          end do
1046        end do
1047      end if
1048      if ((cpopt==1.or.cpopt==3).and.ndgxdt>0) then
1049        ishift=0
1050        if ((choice==2).and.(cprjin(1,1)%ncpgr>ndgxdt)) ishift=cprjin(1,1)%ncpgr-ndgxdt
1051        do ispinor=1,nspinor
1052          do ia=1,nattyp(itypat)
1053            cprjin(iatm+ia,ispinor)%dcp(:,1+ishift:ndgxdt+ishift,1:nlmn)=zero
1054 !           cprjin(iatm+ia,ispinor)%dcp(:,1:ndgxdt,1:nlmn)=zero
1055          end do
1056        end do
1057      end if
1058      iatm=iatm+nattyp(itypat)
1059    end if
1060 
1061 !  End atom type loop
1062    ia1=ia2+1
1063  end do
1064 
1065 
1066 !Reduction in case of parallelism
1067 !==============================================================
1068 
1069  if (signs==1.and.mpi_enreg%paral_spinor==1) then
1070    if (nnlout/=0) then
1071      call xmpi_sum(enlout,mpi_enreg%comm_spinor,ierr)
1072    end if
1073    if (choice==3.or.choice==6.or.choice==23) then
1074      call xmpi_sum(enlk,mpi_enreg%comm_spinor,ierr)
1075    end if
1076    if (choice==6) then
1077      call xmpi_sum(fnlk,mpi_enreg%comm_spinor,ierr)
1078      call xmpi_sum(strnlk,mpi_enreg%comm_spinor,ierr)
1079    end if
1080    if (choice==55) then
1081      call xmpi_sum(ddkk,mpi_enreg%comm_spinor,ierr)
1082    end if
1083  end if
1084 
1085 !Coordinate transformations
1086 !==============================================================
1087 
1088 !Need sometimes gmet
1089  if ((signs==1.and.paw_opt<=3).and. &
1090 & (choice==5 .or.choice==51.or.choice==52.or.choice==53.or.&
1091 & choice==54.or.choice==55)) then
1092    ABI_MALLOC(gmet,(3,3))
1093    gmet = MATMUL(TRANSPOSE(gprimd),gprimd)
1094  end if
1095 
1096 !1st derivative wrt to strain (stress tensor):
1097 ! - convert from reduced to cartesian coordinates
1098 ! - substract volume contribution
1099  if ((choice==3.or.choice==23).and.signs==1.and.paw_opt<=3) then
1100    mu0=0 ! Shift to be applied in enlout array
1101    ABI_MALLOC(work1,(6))
1102    work1(1:6)=enlout(mu0+1:mu0+6)
1103    call strconv(work1,gprimd,work1)
1104    enlout(mu0+1:mu0+3)=(work1(1:3)-enlk)
1105    enlout(mu0+4:mu0+6)= work1(4:6)
1106    ABI_FREE(work1)
1107  end if
1108 
1109 !1st derivative wrt to k wave vector (ddk):
1110 ! - convert from cartesian to reduced coordinates
1111  if ((choice==5.or.choice==53).and.signs==1.and.paw_opt<=3) then
1112    mu0=0 ! Shift to be applied in enlout array
1113    ABI_MALLOC(work1,(3))
1114    work1(:)=enlout(mu0+1:mu0+3)
1115    enlout(mu0+1:mu0+3)=gmet(:,1)*work1(1)+gmet(:,2)*work1(2)+gmet(:,3)*work1(3)
1116    ABI_FREE(work1)
1117  end if
1118  if ((choice==51.or.choice==52).and.signs==1.and.paw_opt<=3) then
1119    mu0=0 ! Shift to be applied in enlout array
1120    ABI_MALLOC(work1,(3))
1121    do mu=1,2 ! Loop for Re,Im
1122      work1(1:3)=(/enlout(mu0+1),enlout(mu0+3),enlout(mu0+5)/)
1123      enlout(mu0+1)=gmet(1,1)*work1(1)+gmet(1,2)*work1(2)+gmet(1,3)*work1(3)
1124      enlout(mu0+3)=gmet(2,1)*work1(1)+gmet(2,2)*work1(2)+gmet(2,3)*work1(3)
1125      enlout(mu0+5)=gmet(3,1)*work1(1)+gmet(3,2)*work1(2)+gmet(3,3)*work1(3)
1126      mu0=mu0+1
1127    end do
1128    ABI_FREE(work1)
1129  end if
1130 
1131 !2nd derivative wrt to k wave vector and atomic position (effective charges):
1132 ! - convert from cartesian to reduced coordinates
1133  if (choice==54.and.signs==1.and.paw_opt<=3) then
1134    mu0=0 ! Shift to be applied in enlout array
1135    ABI_MALLOC(work1,(3))
1136    ABI_MALLOC(work2,(3))
1137    do mu=1,3*natom
1138 !    First, real part
1139      work1(1)=enlout(mu0+1);work1(2)=enlout(mu0+3);work1(3)=enlout(mu0+5)
1140      work2(:)=gmet(:,1)*work1(1)+gmet(:,2)*work1(2)+gmet(:,3)*work1(3)
1141      enlout(mu0+1)=work2(1);enlout(mu0+3)=work2(2);enlout(mu0+5)=work2(3)
1142 !    Then imaginary part
1143      work1(1)=enlout(mu0+2);work1(2)=enlout(mu0+4);work1(3)=enlout(mu0+6)
1144      work2(:)=gmet(:,1)*work1(1)+gmet(:,2)*work1(2)+gmet(:,3)*work1(3)
1145      enlout(mu0+2)=work2(1);enlout(mu0+4)=work2(2);enlout(mu0+6)=work2(3)
1146      mu0=mu0+6
1147    end do
1148    ABI_FREE(work1)
1149    ABI_FREE(work2)
1150  end if
1151 
1152 !2nd derivative wrt to k wave vector and strain (piezoelectric tensor):
1153 ! - convert from cartesian to reduced coordinates (k point)
1154 ! - convert from reduced to cartesian coordinates (strain)
1155 ! - substract volume contribution
1156 ! - symetrize strain components
1157  if (choice==55.and.signs==1.and.paw_opt<=3) then
1158    ABI_MALLOC(work3,(2,3))
1159    ABI_MALLOC(work4,(2,3))
1160    ABI_MALLOC(work5,(2,3,6))
1161    ABI_MALLOC(work7,(2,3,6))
1162    ABI_MALLOC(work6,(2,3,3))
1163    do ic=1,3 ! gamma
1164      work5=zero
1165      do jc=1,3 ! nu
1166        do ii=1,3 ! lambda
1167          mu=(gamma(jc,ii)-1)*3+1
1168          work5(1,jc,ii)=gmet(ic,1)*enlout(2*mu-1)+gmet(ic,2)*enlout(2*mu+1) &
1169 &         +gmet(ic,3)*enlout(2*mu+3)
1170          work5(2,jc,ii)=gmet(ic,1)*enlout(2*mu  )+gmet(ic,2)*enlout(2*mu+2) &
1171 &         +gmet(ic,3)*enlout(2*mu+4)
1172        end do
1173      end do
1174      work6=zero
1175      do jc=1,3 ! nu
1176        do ii=1,3 ! beta
1177          work6(1:cplex,ii,jc)=gprimd(ii,1)*work5(1:cplex,jc,1)+gprimd(ii,2)*work5(1:cplex,jc,2) &
1178 &         +gprimd(ii,3)*work5(1:cplex,jc,3)
1179        end do
1180      end do
1181      do jc=1,3 ! alpha
1182        do ii=1,3 ! beta
1183          mu=gamma(jc,ii)
1184          work7(1:cplex,ic,mu)=gprimd(jc,1)*work6(1:cplex,ii,1)+gprimd(jc,2)*work6(1:cplex,ii,2) &
1185 &         +gprimd(jc,3)*work6(1:cplex,ii,3)
1186        end do
1187      end do
1188    end do ! gamma
1189 
1190    do ii=1,3 ! alpha
1191      work3(1,ii)=gprimd(ii,1)*ddkk(2*1-1)+gprimd(ii,2)*ddkk(2*2-1) &
1192 &     +gprimd(ii,3)*ddkk(2*3-1)
1193      work3(2,ii)=gprimd(ii,1)*ddkk(2*1  )+gprimd(ii,2)*ddkk(2*2  ) &
1194 &     +gprimd(ii,3)*ddkk(2*3  )
1195    end do
1196    do ii=1,3 ! gamma
1197      work4(1,ii)=gmet(ii,1)*ddkk(2*1-1)+gmet(ii,2)*ddkk(2*2-1) &
1198 &     +gmet(ii,3)*ddkk(2*3-1)
1199      work4(2,ii)=gmet(ii,1)*ddkk(2*1  )+gmet(ii,2)*ddkk(2*2  ) &
1200 &     +gmet(ii,3)*ddkk(2*3  )
1201    end do
1202 
1203    do mu=1,6
1204      ii=alpha(mu) ! alpha
1205      ic=beta(mu) ! beta
1206      do jc=1,3 ! gamma
1207        work7(1:cplex,jc,mu)=work7(1:cplex,jc,mu)-half &
1208 &       *(gprimd(ic,jc)*work3(1:cplex,ii)+gprimd(ii,jc)*work3(1:cplex,ic))
1209        if (ii==ic) work7(1:cplex,jc,mu)=work7(1:cplex,jc,mu)-work4(1:cplex,jc)
1210      end do
1211    end do
1212    do mu=1,6 ! alpha,beta
1213      do nu=1,3 ! gamma
1214        mu0=3*(mu-1)+nu
1215        enlout(2*mu0-1)=work7(1,nu,mu)
1216        enlout(2*mu0  )=work7(2,nu,mu)
1217      end do
1218    end do
1219    ABI_FREE(gmet)
1220    ABI_FREE(work3)
1221    ABI_FREE(work4)
1222    ABI_FREE(work5)
1223    ABI_FREE(work6)
1224    ABI_FREE(work7)
1225  end if
1226 
1227 !2nd derivative wrt to 2 k wave vectors (effective mass):
1228 ! - convert from cartesian to reduced coordinates
1229  if ((choice==8.or.choice==81).and.signs==1.and.paw_opt<=3) then
1230    mu0=0 ! Shift to be applied in enlout array
1231    ABI_MALLOC(work3,(3,3))
1232    ABI_MALLOC(work4,(3,3))
1233    mua=1;if (choice==81) mua=2
1234    do ii=1,mua ! Loop Re,Im
1235      if (choice==8) then ! enlout is real in Voigt notation
1236        work3(1,1)=enlout(mu0+1) ; work3(1,2)=enlout(mu0+6) ; work3(1,3)=enlout(mu0+5)
1237        work3(2,1)=enlout(mu0+6) ; work3(2,2)=enlout(mu0+2) ; work3(2,3)=enlout(mu0+4)
1238        work3(3,1)=enlout(mu0+5) ; work3(3,2)=enlout(mu0+4) ; work3(3,3)=enlout(mu0+3)
1239      else                ! enlout is complex in matrix notation
1240        work3(1,1)=enlout(mu0+1 ) ; work3(1,2)=enlout(mu0+3 ) ; work3(1,3)=enlout(mu0+5 )
1241        work3(2,1)=enlout(mu0+7 ) ; work3(2,2)=enlout(mu0+9 ) ; work3(2,3)=enlout(mu0+11)
1242        work3(3,1)=enlout(mu0+13) ; work3(3,2)=enlout(mu0+15) ; work3(3,3)=enlout(mu0+17)
1243      end if
1244      do mu=1,3
1245        work4(:,mu)=gprimd(:,1)*work3(mu,1)+gprimd(:,2)*work3(mu,2)+gprimd(:,3)*work3(mu,3)
1246      end do
1247      do mu=1,3
1248        work3(:,mu)=gprimd(:,1)*work4(mu,1)+gprimd(:,2)*work4(mu,2)+gprimd(:,3)*work4(mu,3)
1249      end do
1250      do mu=1,3
1251        work4(:,mu)=gprimd(1,:)*work3(mu,1)+gprimd(2,:)*work3(mu,2)+gprimd(3,:)*work3(mu,3)
1252      end do
1253      do mu=1,3
1254        work3(:,mu)=gprimd(1,:)*work4(mu,1)+gprimd(2,:)*work4(mu,2)+gprimd(3,:)*work4(mu,3)
1255      end do
1256      if (choice==8) then ! enlout is real in Voigt notation
1257        enlout(mu0+1) = work3(1,1) ; enlout(mu0+2) = work3(2,2) ; enlout(mu0+3) = work3(3,3)
1258        enlout(mu0+4) = work3(3,2) ; enlout(mu0+5) = work3(1,3) ; enlout(mu0+6) = work3(2,1)
1259      else                ! enlout is complex in matrix notation
1260        enlout(mu0+1 )=work3(1,1) ; enlout(mu0+3 )=work3(1,2) ; enlout(mu0+5 )=work3(1,3)
1261        enlout(mu0+7 )=work3(2,1) ; enlout(mu0+9 )=work3(2,2) ; enlout(mu0+11)=work3(2,3)
1262        enlout(mu0+13)=work3(3,1) ; enlout(mu0+15)=work3(3,2) ; enlout(mu0+17)=work3(3,3)
1263      end if
1264      mu0=mu0+1
1265    end do
1266    ABI_FREE(work3)
1267    ABI_FREE(work4)
1268  end if
1269 
1270 !2nd derivative wrt to 2 strains (elastic tensor):
1271 ! - convert from reduced to cartesian coordinates
1272 ! - substract volume contribution
1273  if (choice==6.and.signs==1.and.paw_opt<=3) then
1274    mu0=0 ! Shift to be applied in enlout array
1275    ABI_MALLOC(work1,(6))
1276    ABI_MALLOC(work2,(6))
1277    ABI_MALLOC(work3,(6+3*natom,6))
1278    work3(:,:)=reshape(enlout(mu0+1:mu0+6*(6+3*natom)),(/6+3*natom,6/))
1279    do mu=1,6
1280      call strconv(work3(1:6,mu),gprimd,work3(1:6,mu))
1281    end do
1282    do mu=1,6+3*natom
1283      work1(1:6)=work3(mu,1:6)
1284      call strconv(work1,gprimd,work2)
1285      work3(mu,1:6)=work2(1:6)
1286    end do
1287    enlout(mu0+1:mu0+6*(6+3*natom))=reshape(work3(:,:),(/6*(6+3*natom)/))
1288    ABI_FREE(work1)
1289    ABI_FREE(work2)
1290    ABI_FREE(work3)
1291    call strconv(strnlk,gprimd,strnlk)
1292    do mub=1,6
1293      nub1=alpha(mub);nub2=beta(mub)
1294      do mua=1,6
1295        mu=mu0+mua+(3*natom+6)*(mub-1)
1296        nua1=alpha(mua);nua2=beta(mua)
1297        if (mua<=3.and.mub<=3) enlout(mu)=enlout(mu)+enlk
1298        if (mua<=3) enlout(mu)=enlout(mu)-strnlk(mub)
1299        if (mub<=3) enlout(mu)=enlout(mu)-strnlk(mua)
1300        if (nub1==nua2) enlout(mu)=enlout(mu)-0.25d0*strnlk(gamma(nua1,nub2))
1301        if (nub2==nua2) enlout(mu)=enlout(mu)-0.25d0*strnlk(gamma(nua1,nub1))
1302        if (nub1==nua1) enlout(mu)=enlout(mu)-0.25d0*strnlk(gamma(nua2,nub2))
1303        if (nub2==nua1) enlout(mu)=enlout(mu)-0.25d0*strnlk(gamma(nua2,nub1))
1304      end do
1305      if (mub<=3) then
1306        do nua1=1,natom
1307          nua2=3*(nua1-1);mu=mu0+nua2+6+(3*natom+6)*(mub-1)
1308          enlout(mu+1:mu+3)=enlout(mu+1:mu+3)-fnlk(nua2+1:nua2+3)
1309        end do
1310      end if
1311    end do
1312  end if
1313 
1314  if (allocated(gmet)) then
1315    ABI_FREE(gmet)
1316  end if
1317 
1318 !Final deallocations
1319 !==============================================================
1320 
1321  if (signs==1)  then
1322    ABI_FREE(fnlk)
1323    ABI_FREE(ddkk)
1324    ABI_FREE(strnlk)
1325  end if
1326 
1327  if (nkpgin<nkpgin_) then
1328    ABI_FREE(kpgin_)
1329  end if
1330  if (nkpgout<nkpgout_) then
1331    ABI_FREE(kpgout_)
1332  end if
1333 
1334 ! call timab(1100,2,tsec)
1335 
1336  DBG_EXIT("COLL")
1337 
1338 end subroutine nonlop_ylm

ABINIT/nonlop_ylm_init_counters [ Functions ]

[ Top ] [ Functions ]

NAME

 nonlop_ylm_init_counters

FUNCTION

SOURCE

1349 subroutine nonlop_ylm_init_counters()
1350 
1351    opernla_counter = 0
1352    opernlb_counter = 0
1353    opernla_mv_counter = 0
1354    opernlb_mv_counter = 0
1355    opernla_mv_dgemv_counter = 0
1356    opernlb_mv_dgemv_counter = 0
1357 
1358 end subroutine nonlop_ylm_init_counters

ABINIT/nonlop_ylm_output_counters [ Functions ]

[ Top ] [ Functions ]

NAME

 nonlop_ylm_output_counters

FUNCTION

SOURCE

1389 subroutine nonlop_ylm_output_counters(natom,nbandtot,ntypat,typat,mpi_enreg)
1390 
1391 !Arguments ------------------------------------
1392 !scalars
1393  integer,intent(in) :: natom,nbandtot,ntypat
1394  integer,intent(in) :: typat(:)
1395  type(MPI_type),intent(in) :: mpi_enreg
1396 !arrays
1397 
1398 !Local variables-------------------------------
1399 !scalars
1400  character(len=500) :: msg
1401  integer :: cnt,ia1,ia2,ia3,ia4,ia5,iatm,ierr,itypat,mincat,nincat,opernl_calls
1402 !arrays
1403  integer :: nattyp(ntypat)
1404 
1405  do itypat=1,ntypat
1406    nattyp(itypat)=0
1407    do iatm=1,natom
1408      if(typat(iatm)==itypat)then
1409 !       atindx(iatom)=indx
1410 !       atindx1(indx)=iatom
1411 !       indx=indx+1
1412        nattyp(itypat)=nattyp(itypat)+1
1413      end if
1414    end do
1415  end do
1416  call wrtout([std_out,ab_out],'','COLL')
1417  write(msg,'(a)')                ' --- NONLOP YLM COUNTERS -----------------------------------------------------'
1418  call wrtout([std_out,ab_out],msg,'COLL')
1419  mincat=min(NLO_MINCAT,maxval(nattyp))
1420  ia1=1;iatm=0;opernl_calls=0
1421  do itypat=1,ntypat
1422 !  Get atom loop indices for different types:
1423    ia2=ia1+nattyp(itypat)-1;ia5=1
1424    do ia3=ia1,ia2,mincat
1425      ia4=min(ia2,ia3+mincat-1)
1426 !    Give the increment of number of atoms in this subset.
1427      nincat=ia4-ia3+1
1428      opernl_calls=opernl_calls+1
1429 !    End sum on atom subset loop
1430      iatm=iatm+nincat;ia5=ia5+nincat
1431    end do
1432 !  End atom type loop
1433    ia1=ia2+1
1434  end do
1435  if (iatm/=natom) then
1436    ABI_ERROR('iatm should be equal to natom!')
1437  end if
1438  write(msg,'(a,i6)')             ' Number of Calls in nonlop_ylm : NC = ',opernl_calls
1439  call wrtout([std_out,ab_out],msg,'COLL')
1440  write(msg,'(a,i6)')             ' total Number of Bands         : NB = ',nbandtot
1441  call wrtout([std_out,ab_out],msg,'COLL')
1442  write(msg,'(a)')                '                      | total count (TC) |            TC/NC |         TC/NC/NB'
1443  call wrtout([std_out,ab_out],msg,'COLL')
1444  write(msg,'(a)')                ' -----------------------------------------------------------------------------'
1445  call wrtout([std_out,ab_out],msg,'COLL')
1446  call xmpi_sum(opernla_counter,mpi_enreg%comm_kpt,ierr)
1447  call xmpi_sum(opernlb_counter,mpi_enreg%comm_kpt,ierr)
1448  call xmpi_sum(opernla_mv_counter,mpi_enreg%comm_kpt,ierr)
1449  call xmpi_sum(opernlb_mv_counter,mpi_enreg%comm_kpt,ierr)
1450  call xmpi_sum(opernla_mv_dgemv_counter,mpi_enreg%comm_kpt,ierr)
1451  call xmpi_sum(opernlb_mv_dgemv_counter,mpi_enreg%comm_kpt,ierr)
1452  cnt=opernla_counter
1453  if (cnt>0) then
1454    write(msg,'(2(a,i16),a,f16.1)') ' opernla_ylm          | ',&
1455      & cnt,' | ',cnt/opernl_calls,' | ',dble(cnt)/opernl_calls/nbandtot
1456    call wrtout([std_out,ab_out],msg,'COLL')
1457  end if
1458  cnt=opernla_mv_counter
1459  if (cnt>0) then
1460    write(msg,'(2(a,i16),a,f16.1)') ' opernla_ylm_mv       | ',&
1461      & cnt,' | ',cnt/opernl_calls,' | ',dble(cnt)/opernl_calls/nbandtot
1462    call wrtout([std_out,ab_out],msg,'COLL')
1463  end if
1464  cnt=opernla_mv_dgemv_counter
1465  if (cnt>0) then
1466    write(msg,'(2(a,i16),a,f16.1)') ' opernla_ylm_mv(dgemv)| ',&
1467      & cnt,' | ',cnt/opernl_calls,' | ',dble(cnt)/opernl_calls/nbandtot
1468    call wrtout([std_out,ab_out],msg,'COLL')
1469  end if
1470  cnt=opernlb_counter
1471  if (cnt>0) then
1472    write(msg,'(2(a,i16),a,f16.1)') ' opernlb_ylm          | ',&
1473      & cnt,' | ',cnt/opernl_calls,' | ',dble(cnt)/opernl_calls/nbandtot
1474    call wrtout([std_out,ab_out],msg,'COLL')
1475  end if
1476  cnt=opernlb_mv_counter
1477  if (cnt>0) then
1478    write(msg,'(2(a,i16),a,f16.1)') ' opernlb_ylm_mv       | ',&
1479      & cnt,' | ',cnt/opernl_calls,' | ',dble(cnt)/opernl_calls/nbandtot
1480    call wrtout([std_out,ab_out],msg,'COLL')
1481  end if
1482  cnt=opernlb_mv_dgemv_counter
1483  if (cnt>0) then
1484    write(msg,'(2(a,i16),a,f16.1)') ' opernlb_ylm_mv(dgemv)| ',&
1485      & cnt,' | ',cnt/opernl_calls,' | ',dble(cnt)/opernl_calls/nbandtot
1486    call wrtout([std_out,ab_out],msg,'COLL')
1487  end if
1488  write(msg,'(a)')                ' -----------------------------------------------------------------------------'
1489  call wrtout([std_out,ab_out],msg,'COLL')
1490 
1491 end subroutine nonlop_ylm_output_counters

ABINIT/nonlop_ylm_stop_counters [ Functions ]

[ Top ] [ Functions ]

NAME

 nonlop_ylm_stop_counters

FUNCTION

SOURCE

1369 subroutine nonlop_ylm_stop_counters()
1370 
1371    opernla_counter = -1
1372    opernlb_counter = -1
1373    opernla_mv_counter = -1
1374    opernlb_mv_counter = -1
1375    opernla_mv_dgemv_counter = -1
1376    opernlb_mv_dgemv_counter = -1
1377 
1378 end subroutine nonlop_ylm_stop_counters

TABLE OF CONTENTS

ABINIT/m_nonlop_ylm [ Modules ]

ABINIT/nonlop_ylm [ Functions ]

ABINIT/nonlop_ylm_init_counters [ Functions ]

ABINIT/nonlop_ylm_output_counters [ Functions ]

ABINIT/nonlop_ylm_stop_counters [ Functions ]