TABLE OF CONTENTS


ABINIT/m_paw_mkaewf [ Modules ]

[ Top ] [ Modules ]

NAME

  m_paw_mkaewf

FUNCTION

 Construct complete AE wave functions on the fine FFT grid adding onsite PAW corrections.

COPYRIGHT

  Copyright (C) 2008-2022 ABINIT group (MG)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

16 #if defined HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include "abi_common.h"
21 
22 module m_paw_mkaewf
23 
24  use defs_basis
25  use defs_wvltypes
26  use m_abicore
27  use m_xmpi
28  use m_hide_blas
29  use m_splines
30  use m_errors
31  use m_nctk
32  use m_hdr
33  use m_dtset
34  use m_dtfil
35 #ifdef HAVE_NETCDF
36  use netcdf
37 #endif
38 
39  use defs_datatypes,   only : ebands_t
40  use defs_abitypes,    only : MPI_type
41  use m_io_tools,       only : flush_unit
42  use m_numeric_tools,  only : wrap2_zero_one
43  use m_fftcore,        only : sphereboundary
44  use m_geometry,       only : xcart2xred
45  use m_crystal,        only : crystal_t
46  use m_ebands,         only : ebands_ncwrite
47  use m_pawrad,         only : pawrad_type
48  use m_pawtab,         only : pawtab_type, pawtab_get_lsize
49  use m_pawfgrtab,      only : pawfgrtab_type, pawfgrtab_init, pawfgrtab_free, pawfgrtab_print
50  use m_pawcprj,        only : pawcprj_type, pawcprj_alloc, pawcprj_get, pawcprj_free
51  use m_paw_pwaves_lmn, only : paw_pwaves_lmn_t, paw_pwaves_lmn_init, paw_pwaves_lmn_free
52  use m_paral_atom,     only : get_my_atmtab, free_my_atmtab
53  use m_paw_nhat,       only : nhatgrid
54  use m_mpinfo,         only : proc_distrb_cycle
55  use m_fft,            only : fourwf
56 
57  implicit none
58 
59  private
60 
61  public :: pawmkaewf
62 
63 CONTAINS  !========================================================================================

m_paw_mkaewf/pawmkaewf [ Functions ]

[ Top ] [ m_paw_mkaewf ] [ Functions ]

NAME

 pawmkaewf

FUNCTION

 Construct complete AE wave functions on the fine FFT grid adding onsite PAW corrections.

INPUTS

 crystal<crystal_t>=Crystalline structure
 ebands<ebands_t>=Electronic energies
 dimcprj(natom)=array of dimensions of array cprj (not ordered)
 mband=maximum number of bands
 mcg=size of wave-functions array (cg) =mpw*nspinor*mband*mkmem*nsppol
 mcprj=size of projected wave-functions array (cprj) =nspinor*mband*mkmem*nsppol
 mkmem=number of k points treated by this node.
 mpi_atmtab(:)=--optional-- indexes of the atoms treated by current proc
 [comm_atom]= MPI communicator over atoms
 mpw=maximum dimensioned size of npw.
 my_natom=number of atoms treated by current processor
 natom=number of atoms in cell
 ntypat=number of types of atoms in the cell
 nkpt=Total number of k-points
 nsppol=1 for unpolarized, 2 for spin-polarized
 unks=unit number for G vectors.
 nband(nkpt*nsppol)=Number of bands for each k-point and spin.
 istwfk(nkpt)=Storage mode at each k-point.
 Pawfgrtab(natom) <type(pawfgrtab_type)> : data about the fine grid around each atom
 Pawrad(ntypat) <type(pawrad_type)> : radial mesh data for each type of atom
 Pawtab(ntypat) <type(pawtab_type)> : PAW functions around each type of atom
 Dtfil <type(datafiles_type)>=variables related to files
 cg(2,mcg)=planewave coefficients of wavefunctions.
 Cprj(natom,nspinor*mband*mkmem*nsppol)=<p_lmn|Cnk> coefficients for each WF |Cnk>
   and each |p_lmn> non-local projector
 npwarr(nkpt)=Number of plane waves at each k-point
 ngfftf(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  Note that ngfftf refers to the fine mesh.
 kg(3,mpw*mkmem)=reduced planewave coordinates
 Hdr<hdr_type>=the header of wf, den and pot files
 kpt(3,nkpt)=reduced coordinates of k points.

OUTPUT

  ierr=Status error
  Main output is written on file (ETSF_IO file format).

NOTES

 In PAW calculations, the pseudized wavefunction us represented
 on a relatively small plane wave basis set and is not normalized
 as it does not include the on-site PAW contributions which is described
 in terms of real spherical harmonics and radial functions.
 For post-processing and proper visualization, it is necessary
 to use the full electronic wave function, which is what this subroutine constructs.
 Specifically, it computes the pseudo part by doing an FFT from G- to r-space
 using the dense mesh defined by pawecutdg. The on-site PAW terms are also
 computed in real space inside each sphere and added to the pseudo part.
 Notice that this formula is expressed on the fine grid, and requires
 interpolating the PAW radial functions onto this grid, as well as calling
 initylmr in order to get the angular functions on the grid points.

SOURCE

126 subroutine pawmkaewf(Dtset,crystal,ebands,my_natom,mpw,mband,mcg,mcprj,nkpt,mkmem,nsppol,nband,&
127 & istwfk,npwarr,kpt,ngfftf,kg,dimcprj,Pawfgrtab,Pawrad,Pawtab,&
128 & Hdr,Dtfil,cg,Cprj,MPI_enreg,ierr,pseudo_norms,set_k,set_band , &
129 & mpi_atmtab,comm_atom) ! Optional arguments
130 
131 !Arguments ------------------------------------
132 !scalars
133  integer,intent(in) :: my_natom,mband,mcg,mcprj,mkmem,mpw,nsppol,nkpt
134  integer,intent(in),optional :: comm_atom,set_k,set_band
135  integer,intent(out) :: ierr
136  type(Datafiles_type),intent(in) :: Dtfil
137  type(MPI_type),intent(in) :: MPI_enreg
138  type(hdr_type),intent(inout) :: Hdr
139  type(dataset_type),intent(in) :: Dtset
140  type(crystal_t),intent(in) :: crystal
141  type(ebands_t),intent(in) :: ebands
142 !arrays
143  integer,intent(in) :: nband(nkpt*nsppol),istwfk(nkpt),npwarr(nkpt),dimcprj(crystal%natom)
144  integer,intent(in) :: ngfftf(18),kg(3,mpw*mkmem)
145  integer,optional,target,intent(in) :: mpi_atmtab(:)
146  real(dp),intent(in) :: cg(2,mcg)
147  real(dp),intent(in) :: kpt(3,nkpt)
148  real(dp),optional,intent(out) :: pseudo_norms(nsppol,nkpt,mband)
149  type(pawfgrtab_type),intent(in) :: Pawfgrtab(my_natom)
150  type(pawrad_type),intent(in) :: Pawrad(crystal%ntypat)
151  type(pawtab_type),intent(in) :: Pawtab(crystal%ntypat)
152  type(pawcprj_type),intent(in) :: Cprj(crystal%natom,mcprj)
153 
154 !Local variables-------------------------------
155 !scalars
156  integer,parameter :: tim_fourwf0=0,tim_rwwf0=0,master=0
157  integer :: bdtot_index,iband,icg,mgfftf,paral_kgb
158  integer :: iatom,iatom_tot,ifgd,ifftsph,ifft,itypat,ispinor,ipw,ndat,ii,i1,i2,i3
159  integer :: jl,jm,jlmn,natom
160  integer :: max_nfgd,nfgd,ln_size,lmn_size,my_comm_atom,option
161  integer :: iorder_cprj,comm_cell,me_kpt,ibsp,ibg,isppol,ikpt,nband_k,cplex
162  integer :: n1,n2,n3,n4,n5,n6,ikg,npwout,istwf_k,npw_k
163  integer :: nfftot,nprocs
164  integer :: optcut,optgr0,optgr1,optgr2,optrad,start_band,start_kpt,stop_kpt,stop_band
165  logical :: my_atmtab_allocated,paral_atom
166  real(dp),parameter :: weight1=one
167  real(dp) :: phj,tphj,re_p,im_p,norm,norm_rerr,max_rerr,imur,reur,arg
168  character(len=500) :: msg
169  character(len=nctk_slen) :: shape_str
170 !arrays
171  integer,allocatable :: l_size_atm(:)
172  integer, pointer :: my_atmtab(:)
173  integer,allocatable :: gbound(:,:),kg_k(:,:)
174  real(dp) :: red(3),shift(3),rfft(3),kpoint(3),cp_fact(2)
175  real(dp),allocatable :: r0shift(:,:,:),phk_atm(:,:,:)
176  real(dp),allocatable :: buf_tmp(:,:,:),fofgin(:,:),fofgin_down(:,:),fofgout(:,:)
177  real(dp),allocatable :: denpot(:,:,:),fofr(:,:,:,:),fofr_down(:,:,:,:),phkr(:,:)
178  real(dp),allocatable :: ur_ae(:,:), ur_pw(:,:),ur_ae_onsite(:,:),ur_ps_onsite(:,:)
179  real(dp),allocatable :: ur_mask(:),dummy_1d(:),rsph_red(:,:),rsph_cart(:,:)
180  type(pawcprj_type),allocatable :: Cprj_k(:,:)
181  type(pawfgrtab_type) :: local_pawfgrtab(my_natom)
182  type(paw_pwaves_lmn_t),allocatable :: Paw_onsite(:)
183 #ifdef HAVE_NETCDF
184  integer :: fform,ncerr,ncid,ae_ncid,pw_ncid,aeons_ncid,psons_ncid
185  character(len=fnlen) :: fname
186 #endif
187 
188 ! ************************************************************************
189 
190  DBG_ENTER("COLL")
191 
192 !Init parallelism
193  comm_cell = MPI_enreg%comm_cell; nprocs = xmpi_comm_size(comm_cell)
194  me_kpt = MPI_enreg%me_kpt; paral_kgb=mpi_enreg%paral_kgb
195 
196 !Compatibility tests
197  ABI_CHECK(mkmem/=0, "mkmem==0 not supported anymore!")
198  ABI_CHECK(MPI_enreg%paral_kgb == 0, "paral_kgb/=0 not coded")
199  ABI_CHECK(SIZE(dimcprj)>0, "dimcprj should be allocated")
200  ABI_CHECK(mpi_enreg%paral_spinor==0, "parallelisation over spinors not implemented")
201  ABI_CHECK(nprocs==1, "k spin parallelism not yet active")
202  ABI_CHECK(dtset%nspinor==1, "nspinor == 2 is buggy")
203 
204  natom = crystal%natom
205 
206 !Set up parallelism over atoms
207  paral_atom=(present(comm_atom).and.(my_natom/=natom))
208  nullify(my_atmtab);if (present(mpi_atmtab)) my_atmtab => mpi_atmtab
209  my_comm_atom=xmpi_comm_self;if (present(comm_atom)) my_comm_atom=comm_atom
210  call get_my_atmtab(my_comm_atom,my_atmtab,my_atmtab_allocated,paral_atom,natom,my_natom_ref=my_natom)
211 
212 !If collection of pseudo norms is enabled, make sure the array is initialised
213  if (present(pseudo_norms)) pseudo_norms = zero
214 
215 !use a local copy of pawfgrtab to make sure we use the correction in the paw spheres
216 !the usual pawfgrtab uses r_shape which may not be the same as r_paw
217  if (my_natom>0) then
218    if (paral_atom) then
219      call pawtab_get_lsize(pawtab,l_size_atm,my_natom,Dtset%typat,mpi_atmtab=my_atmtab)
220      call pawfgrtab_init(local_pawfgrtab,Pawfgrtab(1)%cplex,l_size_atm,Dtset%nspden,Dtset%typat,&
221 &     mpi_atmtab=my_atmtab,comm_atom=my_comm_atom)
222    else
223      call pawtab_get_lsize(pawtab,l_size_atm,my_natom,Dtset%typat)
224      call pawfgrtab_init(local_pawfgrtab,Pawfgrtab(1)%cplex,l_size_atm,Dtset%nspden,Dtset%typat)
225    end if
226    ABI_FREE(l_size_atm)
227  end if
228  optcut = 1 ! use rpaw to construct local_pawfgrtab
229  optgr0 = 0; optgr1 = 0; optgr2 = 0 ! dont need gY terms locally
230  optrad = 1 ! do store r-R
231 
232  if (paral_atom) then
233    call nhatgrid(crystal%atindx1,crystal%gmet,my_natom,natom,crystal%nattyp,ngfftf,crystal%ntypat,&
234 &   optcut,optgr0,optgr1,optgr2,optrad,local_pawfgrtab,pawtab,crystal%rprimd,Dtset%typat,crystal%ucvol,Hdr%xred,&
235 &   comm_atom=my_comm_atom,mpi_atmtab=my_atmtab)
236  else
237    call nhatgrid(crystal%atindx1,crystal%gmet,my_natom,natom,crystal%nattyp,ngfftf,crystal%ntypat,&
238 &   optcut,optgr0,optgr1,optgr2,optrad,local_pawfgrtab,pawtab,crystal%rprimd,Dtset%typat,crystal%ucvol,Hdr%xred)
239  end if
240 !now local_pawfgrtab is ready to use
241 
242  max_nfgd=MAXVAL(local_pawfgrtab(:)%nfgd) ! MAX no. of points in the fine grid for this PAW sphere
243  ABI_MALLOC(r0shift,(3,max_nfgd,my_natom))
244  ABI_MALLOC(phk_atm,(2,max_nfgd,my_natom))
245 
246  do iatom=1,my_natom
247    iatom_tot=iatom;if (paral_atom) iatom_tot=my_atmtab(iatom)
248 
249    nfgd=local_pawfgrtab(iatom)%nfgd ! no. of points in the fine grid for this PAW sphere
250    ABI_MALLOC(rsph_red,(3,nfgd))
251    ABI_MALLOC(rsph_cart,(3,nfgd))
252    do ifgd=1,nfgd
253      rsph_cart(:,ifgd) = local_pawfgrtab(iatom)%rfgd(:,ifgd) + crystal%xcart(:,iatom_tot)
254    end do
255    call xcart2xred(nfgd,crystal%rprimd,rsph_cart,rsph_red) ! we work in reduced coordinates.
256    do ifgd=1,nfgd
257      call wrap2_zero_one(rsph_red(1,ifgd),red(1),shift(1)) ! num = red + shift
258      call wrap2_zero_one(rsph_red(2,ifgd),red(2),shift(2))
259      call wrap2_zero_one(rsph_red(3,ifgd),red(3),shift(3))
260      r0shift(:,ifgd,iatom) = shift
261      !if (ANY( ABS(shift) > tol12)) then
262      !  ABI_WARNING("rmR_red is outside the first unit cell.")
263      !  write(std_out,*)rsph_red(:,ifgd),shift
264      !end if
265    end do
266    ABI_FREE(rsph_red)
267    ABI_FREE(rsph_cart)
268  end do
269 
270  if (.not.paral_atom .and. my_natom>0) then
271    call pawfgrtab_print(local_pawfgrtab,natom=natom,unit=std_out,&
272 &   prtvol=Dtset%prtvol,mode_paral="COLL")
273  end if
274 
275  ierr=0
276 #ifndef HAVE_NETCDF
277  ierr = -1
278  write(msg,'(3a)')&
279 & "netcdf support must be enabled in order to output AE PAW wavefunction. ",ch10,&
280 & "No output will be produced, use --enable-netcdf at configure-time. "
281  ABI_WARNING(msg)
282  return
283 !These statements are necessary to avoid the compiler complain about unused variables:
284  ii=Dtset%usepaw;ii=Dtfil%unpaw;ii=Hdr%usepaw
285 #endif
286 
287 !FIXME check ordering in cprj and Eventually in external file
288 !why is iorder_cprj not stored in the file for crosschecking purpose?
289 !Here Im assuming cprj are not ordered!
290  iorder_cprj=0
291 
292 !n4,n5,n6 are FFT dimensions, modified to avoid cache trashing
293  n1=ngfftf(1); n2=ngfftf(2); n3=ngfftf(3)
294  n4=ngfftf(4); n5=ngfftf(5); n6=ngfftf(6)
295  nfftot=PRODUCT(ngfftf(1:3))
296  mgfftf=MAXVAL(ngfftf(1:3))
297 
298  ABI_MALLOC(phkr,(2,nfftot))
299  ABI_MALLOC(gbound,(2*mgfftf+8,2))
300 
301 #ifdef HAVE_NETCDF
302 !=== Initialize ETSF_IO files ===
303 ! FIXME: nspinor == 2 is buggy
304 
305  fname = trim(dtfil%filnam_ds(4))//'_PAWAVES.nc'
306  write(msg,'(2a)')' Opening file for AE PAW wave functions: ',trim(fname)
307  call wrtout([std_out, ab_out], msg, 'PERS')
308 
309  if (xmpi_comm_rank(comm_cell) == master) then
310    NCF_CHECK(nctk_open_create(ncid, fname, xmpi_comm_self))
311 
312    fform = 602
313    NCF_CHECK(hdr%ncwrite(ncid, fform, nc_define=.True.))
314 
315    ! Define wavefunctions in real space on the dense FFT mesh
316    ! Fortran layout:
317    !real_space_wavefunctions: double 8d array with shape:
318    !  [real_or_complex_wavefunctions]
319    !  [number_of_grid_points_vector1][number_of_grid_points_vector2][number_of_grid_points_vector3]
320    !  [number_of_spinor_components]
321    !  [max_number_of_states][number_of_kpoints][number_of_spins]
322 
323    ncerr = nctk_def_dims(ncid, [ &
324      nctkdim_t("real_or_complex_wavefunctions", 2),  &
325      nctkdim_t("number_of_grid_points_vector1", n1), &
326      nctkdim_t("number_of_grid_points_vector2", n2), &
327      nctkdim_t("number_of_grid_points_vector3", n3)  &
328    ], defmode=.True.)
329    NCF_CHECK(ncerr)
330 
331    shape_str = "real_or_complex_wavefunctions, &
332 &   number_of_grid_points_vector1, number_of_grid_points_vector2, number_of_grid_points_vector3, &
333 &   number_of_spinor_components, &
334 &   max_number_of_states, number_of_kpoints, number_of_spins"
335 
336    ! Define wavefunctions in real space.
337    ncerr = nctk_def_arrays(ncid, [&
338      nctkarr_t('ur_ae', "dp", shape_str),&
339      nctkarr_t('ur_pw', "dp", shape_str),&
340      nctkarr_t('ur_ae_onsite', "dp", shape_str),&
341      nctkarr_t('ur_ps_onsite', "dp", shape_str) &
342    ], defmode=.True.)
343    NCF_CHECK(ncerr)
344 
345    ! Complete the geometry information.
346    NCF_CHECK(crystal%ncwrite(ncid))
347    NCF_CHECK(ebands_ncwrite(ebands, ncid))
348 
349    NCF_CHECK(nf90_close(ncid))
350  end if
351 
352  call xmpi_barrier(comm_cell)
353 
354  ! Reopen the file in parallel inside comm_cell
355  ! Note that we use individual IO thus there's no need to handle idle processes
356  ! if paral_kgb == 0 and nprocs > nkpt * nsppol
357  NCF_CHECK(nctk_open_modify(ncid, fname, comm_cell))
358  ae_ncid = nctk_idname(ncid, "ur_ae")
359  pw_ncid = nctk_idname(ncid, "ur_pw")
360  aeons_ncid = nctk_idname(ncid, "ur_ae_onsite")
361  psons_ncid = nctk_idname(ncid, "ur_ps_onsite")
362 
363  NCF_CHECK(nctk_set_datamode(ncid))
364 #endif
365 
366 !Init structure storing phi_{nlm} and tphi_(nlm} on the dense FFT points located in the PAW spheres.
367  ABI_MALLOC(Paw_onsite,(natom))
368  if (paral_atom) then
369    call paw_pwaves_lmn_init(Paw_onsite,my_natom,natom,crystal%ntypat,crystal%rprimd,crystal%xcart,&
370    Pawtab,Pawrad,local_pawfgrtab, comm_atom=my_comm_atom,mpi_atmtab=my_atmtab)
371  else
372    call paw_pwaves_lmn_init(Paw_onsite,my_natom,natom,crystal%ntypat,crystal%rprimd,crystal%xcart,&
373    Pawtab,Pawrad,local_pawfgrtab)
374  end if
375 
376  bdtot_index=0; icg=0; ibg=0; norm_rerr=smallest_real
377 
378  ! === Loop over spin ===
379  do isppol=1,nsppol
380    ikg=0; start_kpt=1; stop_kpt=nkpt
381 
382    ! Check if k-point was specified (only serial)
383    if (present(set_k) .and. nprocs==1) then
384      if (set_k/=0) then
385        start_kpt = set_k
386        stop_kpt = set_k
387        !ABI_ERROR("set_k")
388      end if
389    end if
390 
391    ! === Loop over k points ===
392    do ikpt=start_kpt,stop_kpt
393      kpoint  = kpt(:,ikpt)
394      nband_k = nband(ikpt+(isppol-1)*nkpt)
395      npw_k   = npwarr(ikpt)
396      istwf_k = istwfk(ikpt)
397 
398      if (proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,1,nband_k,isppol,me_kpt)) then
399        bdtot_index=bdtot_index+nband_k
400        !ABI_ERROR("cycle in seq!")
401        cycle
402      end if
403 
404      do i3=0,n3-1
405        rfft(3)=DBLE(i3)/n3
406        do i2=0,n2-1
407          rfft(2)=DBLE(i2)/n2
408          do i1=0,n1-1
409            rfft(1)=DBLE(i1)/n1
410            ifft = 1 +i1 +i2*n1 +i3*n1*n2
411            phkr(1,ifft) = COS(two_pi*dot_product(kpoint,rfft))
412            phkr(2,ifft) = SIN(two_pi*dot_product(kpoint,rfft))
413          end do
414        end do
415      end do
416      ! phkr(1,:)=one; phkr(2,:)=zero
417 
418 !    Calculate the phase for the onsite PAW contributions.
419      do iatom=1,my_natom
420        nfgd=local_pawfgrtab(iatom)%nfgd ! no. of points in the fine grid for this PAW sphere
421        do ifgd=1,nfgd
422          arg = -two_pi* dot_product(r0shift(:,ifgd,iatom),kpoint)
423          phk_atm(1,ifgd,iatom) = COS(arg)
424          phk_atm(2,ifgd,iatom) = SIN(arg)
425        end do
426      end do
427 
428      ABI_MALLOC(Cprj_k,(natom,dtset%nspinor*nband_k))
429      call pawcprj_alloc(Cprj_k,0,dimcprj)
430 
431 !    Extract cprj for this k-point.
432      ibsp=0
433      do iband=1,nband_k
434        do ispinor=1,dtset%nspinor
435          ibsp=ibsp+1
436          do iatom=1,natom
437            Cprj_k(iatom,ibsp)%cp(:,:)=Cprj(iatom,ibsp+ibg)%cp(:,:)
438          end do
439        end do
440      end do
441 
442      ABI_MALLOC(kg_k,(3,npw_k))
443 
444      ! Extract G-vectors.
445      kg_k(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg)
446      call sphereboundary(gbound,istwf_k,kg_k,mgfftf,npw_k)
447 
448      ! If a single band is requested, neuter the loop (only serial)
449      start_band = 1; stop_band = nband_k
450      if (present(set_band).AND.nprocs==1) then
451        if (set_band/=0) then
452          start_band = set_band
453          stop_band = set_band
454          !ABI_ERROR("set_band")
455        end if
456      end if
457 
458      ! Loop over bands.
459      do iband=start_band,stop_band
460 
461        ! Fourier transform on the real fft box of the smooth part.
462        ndat=Dtset%nspinor
463        ABI_MALLOC(fofgin,(2,npw_k*ndat))
464        ABI_MALLOC(fofr,(2,n4,n5,n6*ndat))
465 
466        do ipw=1,npw_k*dtset%nspinor
467          fofgin(:,ipw)=cg(:,ipw+(iband-1)*npw_k*dtset%nspinor+icg)
468        end do
469 
470        ! Complex can be set to 0 with this option(0) of fourwf
471        option=0; cplex=0; npwout=1
472        ABI_MALLOC(denpot,(cplex*n4,n5,n6))
473        ABI_MALLOC(fofgout,(2,npwout*ndat))
474 
475        call fourwf(cplex,denpot,fofgin(:,1:npw_k),fofgout,fofr(:,:,:,1:n6),gbound,gbound,istwf_k,kg_k,kg_k,&
476          mgfftf,MPI_enreg,1,ngfftf,npw_k,npwout,n4,n5,n6,option,tim_fourwf0,weight1,weight1,&
477          use_gpu_cuda=Dtset%use_gpu_cuda)
478 
479 !      Here I do not know if fourwf works in the case of spinors,
480 !      It seems that not all fftalg option support ndata! should check!
481 !      Do not forget to declare real(dp)::fofgin_down(:,:) to use the following statements
482        if (Dtset%nspinor==2) then
483          ABI_MALLOC(fofgin_down,(2,npw_k))
484          ABI_MALLOC(fofr_down,(2,n4,n5,n6))
485          fofgin_down(:,:)=fofgin(:,1+npw_k:2*npw_k)
486 !        Complex can be set to 0 with this option(0) of fourwf
487 !        cplex=1; option=1; npwout=1; ndat=1
488 !        NOTE: fofr_down can NOT be replaced by fofr(:,:,:,n6+1:2*n6), or else
489 !        the data in fofr(:,:,:,1:n6) will be the same with fofr(:,:,:,n6+1:2*n6)
490          call fourwf(cplex,denpot,fofgin_down,fofgout,fofr_down,gbound,gbound,istwf_k,kg_k,kg_k,&
491            mgfftf,MPI_enreg,1,ngfftf,npw_k,npwout,n4,n5,n6,option,tim_fourwf0,weight1,weight1)
492          ABI_FREE(fofgin_down)
493        end if
494 
495        ABI_MALLOC(ur_ae,(2,n1*n2*n3*ndat))
496        ABI_MALLOC(ur_ae_onsite,(2,n1*n2*n3))
497        ABI_MALLOC(ur_ps_onsite,(2,n1*n2*n3))
498        ABI_MALLOC(ur_pw,(2,n1*n2*n3*ndat))
499        ABI_MALLOC(ur_mask,(n1*n2*n3))
500 
501        ur_ae=zero;ur_ae_onsite=zero;ur_ps_onsite=zero;ur_pw=zero;ur_mask=zero
502 
503        ! * Add phase e^{ikr} since it is contained in cprj.
504        do i3=1,n3
505          do i2=1,n2
506            do i1=1,n1
507              ii = i1 + n1*(i2-1)+ n1*n2*(i3-1)
508              ur_pw(:,ii)=fofr(:,i1,i2,i3) ! Save pw part separately without the phase.
509              ur_ae(1,ii)= fofr(1,i1,i2,i3) * phkr(1,ii) - fofr(2,i1,i2,i3) * phkr(2,ii)
510              ur_ae(2,ii)= fofr(1,i1,i2,i3) * phkr(2,ii) + fofr(2,i1,i2,i3) * phkr(1,ii)
511              if(Dtset%nspinor==2) then
512                ur_pw(:,ii+n1*n2*n3)=fofr_down(:,i1,i2,i3) ! Save pw part separately without the phase.
513                ur_ae(1,ii+n1*n2*n3)= fofr_down(1,i1,i2,i3) * phkr(1,ii) - fofr_down(2,i1,i2,i3) * phkr(2,ii)
514                ur_ae(2,ii+n1*n2*n3)= fofr_down(1,i1,i2,i3) * phkr(2,ii) + fofr_down(2,i1,i2,i3) * phkr(1,ii)
515              end if
516            end do
517          end do
518        end do
519        ABI_FREE(fofr)
520 
521        if(Dtset%nspinor==2) then
522          ABI_FREE(fofr_down)
523        end if
524 
525        ! === Add onsite term on the augmented FFT mesh ===
526        do iatom=1,my_natom
527          itypat  =local_pawfgrtab(iatom)%itypat
528          lmn_size=Pawtab(itypat)%lmn_size
529          ln_size =Pawtab(itypat)%basis_size   ! no. of nl elements in PAW basis
530          nfgd    =local_pawfgrtab(iatom)%nfgd ! no. of points in the fine grid for this PAW sphere
531 
532          ibsp=(iband-1)*dtset%nspinor
533          do ispinor=1,dtset%nspinor
534            ibsp=ibsp+1
535            do jlmn=1,lmn_size
536              jl=Pawtab(itypat)%indlmn(1,jlmn)
537              jm=Pawtab(itypat)%indlmn(2,jlmn)
538              cp_fact(1) = Cprj_k(iatom,ibsp)%cp(1,jlmn) *sqrt(crystal%ucvol) ! Magic factor
539              cp_fact(2) = Cprj_k(iatom,ibsp)%cp(2,jlmn) *sqrt(crystal%ucvol)
540 
541              do ifgd=1,nfgd ! loop over fine grid points in current PAW sphere.
542                ifftsph = local_pawfgrtab(iatom)%ifftsph(ifgd) ! index of the point on the grid
543                phj  = Paw_onsite(iatom)% phi(ifgd,jlmn)
544                tphj = Paw_onsite(iatom)%tphi(ifgd,jlmn)
545                ! old code
546                !re_p = cp_fact(1); im_p = cp_fact(2)
547                ! apply the phase
548                re_p = cp_fact(1) * phk_atm(1,ifgd,iatom) - cp_fact(2) * phk_atm(2,ifgd,iatom)
549                im_p = cp_fact(1) * phk_atm(2,ifgd,iatom) + cp_fact(2) * phk_atm(1,ifgd,iatom)
550 
551                ur_ae(1,ifftsph+(ispinor-1)*nfftot) = ur_ae(1,ifftsph+(ispinor-1)*nfftot) + re_p * (phj-tphj)
552                ur_ae(2,ifftsph+(ispinor-1)*nfftot) = ur_ae(2,ifftsph+(ispinor-1)*nfftot) + im_p * (phj-tphj)
553                ur_ae_onsite(1,ifftsph) = ur_ae_onsite(1,ifftsph) + re_p * phj
554                ur_ae_onsite(2,ifftsph) = ur_ae_onsite(2,ifftsph) + im_p * phj
555                ur_ps_onsite(1,ifftsph) = ur_ps_onsite(1,ifftsph) + re_p * tphj
556                ur_ps_onsite(2,ifftsph) = ur_ps_onsite(2,ifftsph) + im_p * tphj
557                ur_mask(ifftsph) = one
558              end do
559 
560            end do !jlmn
561          end do !ispinor
562        end do !iatom
563 
564        if (paral_atom) then
565          ABI_MALLOC(buf_tmp,(2,n1*n2*n3,3))
566          buf_tmp(:,:,1) = ur_ae
567          buf_tmp(:,:,2) = ur_ae_onsite
568          buf_tmp(:,:,3) = ur_ps_onsite
569          call xmpi_sum(buf_tmp,my_comm_atom,ierr)
570          ur_ae = buf_tmp(:,:,1)
571          ur_ae_onsite= buf_tmp(:,:,2)
572          ur_ps_onsite= buf_tmp(:,:,3)
573          ABI_FREE(buf_tmp)
574        end if
575 
576 !      * Remove the phase e^{ikr}, we store u(r).
577        do i3=1,n3
578          do i2=1,n2
579            do i1=1,n1
580              ii = i1 + n1*(i2-1)+ n1*n2*(i3-1)
581              reur=ur_ae(1,ii)
582              imur=ur_ae(2,ii)
583              ur_ae(1,ii)=  reur * phkr(1,ii) + imur * phkr(2,ii)
584              ur_ae(2,ii)= -reur * phkr(2,ii) + imur * phkr(1,ii)
585              if(Dtset%nspinor==2) then
586                reur=ur_ae(1,ii+nfftot)    ! Important!
587                imur=ur_ae(2,ii+nfftot)
588                ur_ae(1,ii+nfftot)=  reur * phkr(1,ii) + imur * phkr(2,ii)
589                ur_ae(2,ii+nfftot)= -reur * phkr(2,ii) + imur * phkr(1,ii)
590              end if
591              reur=ur_ae_onsite(1,ii)
592              imur=ur_ae_onsite(2,ii)
593              ur_ae_onsite(1,ii)=  reur * phkr(1,ii) + imur * phkr(2,ii)
594              ur_ae_onsite(2,ii)= -reur * phkr(2,ii) + imur * phkr(1,ii)
595              reur=ur_ps_onsite(1,ii)
596              imur=ur_ps_onsite(2,ii)
597              ur_ps_onsite(1,ii)=  reur * phkr(1,ii) + imur * phkr(2,ii)
598              ur_ps_onsite(2,ii)= -reur * phkr(2,ii) + imur * phkr(1,ii)
599            end do
600          end do
601        end do
602 
603        norm=zero
604        do ii=1,npw_k*Dtset%nspinor
605          norm=norm+fofgin(1,ii)**2+fofgin(2,ii)**2
606        end do
607        write(std_out,'(a,2i5,f22.16)',advance='no') 'ikpt,iband, norm (G,PSWF)=',ikpt,iband,norm
608        norm=zero
609        do ifft=1,nfftot*Dtset%nspinor
610          norm = norm + ur_ae(1,ifft)**2+ur_ae(2,ifft)**2
611        end do
612        norm=norm/nfftot
613        norm_rerr = MAX((ABS(norm-one))*100,norm_rerr)
614        write(std_out,*)"norm (R,AEWF)= ",norm
615        call flush_unit(std_out)
616 
617 !      MS: Various testing and debugging options
618        if (.TRUE..and.nprocs==1) then
619          if (present(pseudo_norms)) then
620 !          Check the supposedly zero overlap |\tilde{Psi_n}-\tilde{Psi_n^1}|^2
621            ABI_MALLOC(dummy_1d,(n1*n2*n3))
622            dummy_1d = zero
623            norm = zero
624            do ifft = 1, nfftot
625              dummy_1d(ifft) = ((ur_pw(1,ifft)-ur_ps_onsite(1,ifft))**2 &
626               +  (ur_pw(2,ifft)-ur_ps_onsite(2,ifft))**2) * ur_mask(ifft)
627              norm = norm + dummy_1d(ifft)
628            end do
629            norm = norm / nfftot
630            pseudo_norms(isppol,ikpt,iband) = norm
631            ABI_FREE(dummy_1d)
632          end if
633 
634        else
635          write(msg,'(5a)')&
636           "The option to print PAW all-electron wavefunctions is on, but execution ",ch10,&
637           "is in parallel on two or more processors. XcrysDen files with individual con-",ch10,&
638           "tributions will not be written. In order to enable this you must run in serial."
639          ABI_WARNING(msg)
640        end if ! Check if serial run
641 
642 #ifdef HAVE_NETCDF
643        ncerr = nf90_put_var(ncid, ae_ncid, ur_ae, &
644           start=[1,1,1,1,1,iband,ikpt,isppol], count=[2,n1,n2,n3,1,1,1,1])
645        NCF_CHECK(ncerr)
646 
647        ncerr = nf90_put_var(ncid, pw_ncid, ur_pw, &
648          start=[1,1,1,1,1,iband,ikpt,isppol], count=[2,n1,n2,n3,1,1,1,1])
649        NCF_CHECK(ncerr)
650 
651        ncerr = nf90_put_var(ncid, aeons_ncid, ur_ae_onsite, &
652          start=[1,1,1,1,1,iband,ikpt,isppol], count=[2,n1,n2,n3,1,1,1,1])
653        NCF_CHECK(ncerr)
654 
655        ncerr = nf90_put_var(ncid, psons_ncid, ur_ps_onsite, &
656          start=[1,1,1,1,1,iband,ikpt,isppol], count=[2,n1,n2,n3,1,1,1,1])
657        NCF_CHECK(ncerr)
658 #endif
659 
660        ABI_FREE(ur_ae)
661        ABI_FREE(ur_ae_onsite)
662        ABI_FREE(ur_ps_onsite)
663        ABI_FREE(ur_pw)
664        ABI_FREE(ur_mask)
665        ABI_FREE(fofgin)
666        ABI_FREE(fofgout)
667        ABI_FREE(denpot)
668      end do !nband_k
669 
670      bdtot_index=bdtot_index+nband_k
671 
672      if (mkmem/=0) then
673        ibg=ibg+dtset%nspinor*nband_k
674        icg=icg+npw_k*dtset%nspinor*nband_k
675        ikg=ikg+npw_k
676      end if
677 
678      ABI_FREE(kg_k)
679 
680      call pawcprj_free(Cprj_k)
681      ABI_FREE(Cprj_k)
682 
683    end do !ikpt
684  end do !nsppol
685 
686  ABI_FREE(phkr)
687  ABI_FREE(gbound)
688 
689  ! Free augmentation waves.
690  call paw_pwaves_lmn_free(Paw_onsite)
691  ABI_FREE(Paw_onsite)
692 
693  ! Maximum relative error over CPUs.
694  call xmpi_max(norm_rerr,max_rerr,comm_cell,ierr)
695  write(std_out,*)"max_rerr=",max_rerr
696 
697  if (max_rerr > ten) then
698    write(msg,'(7a)')&
699     "Inaccuracy on the normalization of the wave funtions exceeds 10%. ",ch10,&
700     "Likely due to the use of a too coarse FFT mesh or unconverged wavefunctions. ",ch10,&
701     "Numerical values inside the augmentation regions might be inaccurate. ",ch10,&
702     "Action: increase pawecutdg in your input file. "
703    ABI_COMMENT(msg)
704  end if
705 
706  ABI_FREE(r0shift)
707  ABI_FREE(phk_atm)
708  call pawfgrtab_free(local_pawfgrtab)
709 
710  ! Destroy atom table used for parallelism
711  call free_my_atmtab(my_atmtab,my_atmtab_allocated)
712 
713  DBG_EXIT("COLL")
714 
715 end subroutine pawmkaewf