TABLE OF CONTENTS


ABINIT/ccfft [ Functions ]

[ Top ] [ Functions ]

NAME

 ccfft

FUNCTION

 Carry out complex-to-complex Fourier transforms between real
 and reciprocal (G) space. Library of such routines.
 Include machine-dependent F90 routines used with fftalg=200.

COPYRIGHT

 Copyright (C) 2000-2018 ABINIT group (PT, XG, FF)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

INPUTS

  fftalga=govern the choice of the fft routine to be used
    if 1: SGoedecker routine
    if 2: Machine dependent routine, depending on the precompilation options
    if 3: FFTW library routine
    if 4: new SGoedecker routine, version 2002
          Warning : the second and third dimensions of the Fourier space
          array are switched, compared to the usual case
  fftcache=size of the cache (kB)
  isign= Integer specifying which sign to be used for the transformation.
         must be either +1 or -1.
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  n1,n2,n3=Actual integer dimensions (see ngfft) for the 3D sequence.
           Physical dimension of the transform.
  n4,n5,n6=Leading dimensions. Generally, n6 is not different to n3.
  ndat=number of FFT to do in //
  option= 1 if call from fourwf, 2 if call from other routine
  work1(2,n4*n5*n6)=Array to be transformed.

OUTPUT

  inplace = 0 if result is in work2 ; =1 if result is in work1 (machine dependent)
  normalized=0 if the backward (isign=-1) FFT is not normalized, so has to be normalized outside of ccfft
            =1 otherwise
  work2(2,n4*n5*n6)=transformed array in case inplace=0.

SIDE EFFECTS

  work1(2,n4*n5*n6)=at input, array to be transformed
                    at output, transformed array (in case inplace=1)

NOTES

 precompilation definitions :
   -D(machine_list) :  (case fftalga=200)
      choice of machine-dependent FFT library, if permitted
   -DHAVE_FFT_FFTW2   : (case fftalga=300) activate the FFTW lib
   -Dnolib  : (case fftalga=200) call SGoedecker routine,
      instead of machine-dependent one

 More about fftalga=200
 Library routines for the following platforms have been implemented :
  Compaq/DEC
  HP          (in place FFT)
  SGI         (in place FFT)
  NEC         (in place FFT)
 For all the other platforms, or if the CPP directive nolib is
 activated, one uses the fft routine from S. Goedecker.

PARENTS

      fourdp,fourwf

CHILDREN

      sg2002_back,sg2002_forw,sg_fft_cc

SOURCE

3539 subroutine ccfft(ngfft,isign,n1,n2,n3,n4,n5,n6,ndat,option,work1,work2,comm_fft)
3540 
3541 
3542 !This section has been created automatically by the script Abilint (TD).
3543 !Do not modify the following lines by hand.
3544 #undef ABI_FUNC
3545 #define ABI_FUNC 'ccfft'
3546 !End of the abilint section
3547 
3548  implicit none
3549 
3550 !Arguments ------------------------------------
3551 !scalars
3552  integer,intent(in) :: isign,n1,n2,n3,n4,n5,n6,ndat,option,comm_fft
3553 !arrays
3554  integer,intent(in) :: ngfft(18)
3555  real(dp),intent(inout) :: work1(2,n4*n5*n6*ndat)
3556  real(dp),intent(inout) :: work2(2,n4*n5*n6*ndat) !vz_i
3557 
3558 !Local variables ------------------------------
3559 !scalars
3560  integer,parameter :: cplex2=2
3561  integer :: fftalg,fftalga,fftalgb,fftalgc,fftcache
3562  integer :: nd2proc,nd3proc,nproc_fft
3563  character(len=500) :: message
3564 
3565 !*************************************************************************
3566 
3567  nproc_fft=ngfft(10)
3568 
3569  fftcache=ngfft(8)
3570  fftalg  =ngfft(7)
3571  fftalga =fftalg/100; fftalgb=mod(fftalg,100)/10; fftalgc=mod(fftalg,10)
3572 
3573  if(fftalga==2)then
3574    MSG_ERROR("Machine dependent FFTs are not supported anymore")
3575 
3576  else if(fftalga==3)then
3577    MSG_ERROR("Old interface with FFTW2 is not supported anymore")
3578 
3579  else if(fftalga<1 .or. fftalga>4)then
3580    write(message, '(a,a,a,i5,a,a)' )&
3581 &   'The allowed values of fftalg(A) are 1, 2, 3, and 4 .',ch10,&
3582 &   'The actual value of fftalg(A) is',fftalga,ch10,&
3583 &   'Action : check the value of fftalg in your input file.'
3584    MSG_ERROR(message)
3585  end if
3586 
3587  ! This routine will be removed ASAP.
3588  ! Do not add new FFT libraries without previous discussion with Matteo Giantomassi
3589  ! inplace==1 or normalize==2 are not supported anymore in the caller (fourwf, fourdp)
3590  !inplace=0; normalized=0
3591 
3592  if (fftalga/=4) then
3593    ! Call Stefan Goedecker FFT
3594    call sg_fft_cc(fftcache,n1,n2,n3,n4,n5,n6,ndat,isign,work1,work2)
3595 
3596  else if (fftalga==4) then
3597    ! Call new version of Stefan Goedecker FFT
3598    nd2proc=((n2-1)/nproc_fft) +1
3599    nd3proc=((n6-1)/nproc_fft) +1
3600 
3601    if (isign==1) then
3602      ! Fourier to real space (backward)
3603      call sg2002_back(cplex2,ndat,n1,n2,n3,n4,n5,n6,n4,nd2proc,nd3proc,option,work1,work2,comm_fft)
3604    else
3605      ! isign=-1, real space to Fourier (forward)
3606      call sg2002_forw(cplex2,ndat,n1,n2,n3,n4,n5,n6,n4,nd2proc,nd3proc,option,work1,work2,comm_fft)
3607    end if
3608  end if
3609 
3610 end subroutine ccfft

ABINIT/fourdp [ Functions ]

[ Top ] [ Functions ]

NAME

 fourdp

FUNCTION

 Conduct Fourier transform of REAL or COMPLEX function f(r)=fofr defined on
 fft grid in real space, to create complex f(G)=fofg defined on full fft grid
 in reciprocal space, in full storage mode, or the reverse operation.
 For the reverse operation, the final data is divided by nfftot.
 REAL case when cplex=1, COMPLEX case when cplex=2
 Usually used for density and potentials.

 There are two different possibilities :
  fftalgb=0 means using the complex-to-complex FFT routine,
   irrespective of the value of cplex
  fftalgb=1 means using a real-to-complex FFT or a complex-to-complex FFT,
   depending on the value of cplex.
  The only real-to-complex FFT available is from SGoedecker library.

COPYRIGHT

 Copyright (C) 1998-2018 ABINIT group (DCA, XG)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

 cplex=1 if fofr is real, 2 if fofr is complex
 isign=sign of Fourier transform exponent: current convention uses
  +1 for transforming from G to r
  -1 for transforming from r to G.
 mpi_enreg=information about MPI parallelization
 nfft=(effective) number of FFT grid points (for this processor)
 ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 paral_kgb=Flag related to the kpoint-band-fft parallelism
 tim_fourdp=timing code of the calling routine (can be set to 0 if not attributed)

TODO

  Remove paral_kgb

SIDE EFFECTS

 Input/Output
 fofg(2,nfft)=f(G), complex.
 fofr(cplex*nfft)=input function f(r) (real or complex)

PARENTS

      atm2fft,bethe_salpeter,calc_smeared_density,dfpt_atm2fft,dfpt_dyfro
      dfpt_eltfrxc,dfpt_looppert,dfpt_newvtr,dfpt_scfcv,dfpt_vlocal
      dfptnl_loop,dieltcel,energy,fock_getghc,forces,fourdp_6d,fresidrsp
      green_kernel,gstate,hartre,hartrestr,initro,jellium,laplacian,m_dvdb
      m_electronpositron,m_epjdos,m_fft_prof,m_hidecudarec,m_kxc,m_ppmodel
      m_screening,make_efg_el,mklocl_realspace,mklocl_recipspace,moddiel
      moddiel_csrb,mrgscr,newrho,newvtr,nonlinear,nres2vres,odamix,pawmknhat
      pawmknhat_psipsi,pawmkrho,posdoppler,prcref,prcref_PMA,recursion
      recursion_nl,redgr,respfn,rotate_rho,scfcv,screening,setup_positron
      sigma,stress,symrhg,tddft,transgrid,vlocalstr,xcden,xcpot

CHILDREN

      ccfft,dfti_seqfourdp,fftw3_mpifourdp,fftw3_seqfourdp,fourdp_mpi
      ptabs_fourdp,sg2002_back,sg2002_forw,sg2002_mpifourdp,sg_fft_rc,timab

SOURCE

3145 subroutine fourdp(cplex,fofg,fofr,isign,mpi_enreg,nfft,ngfft,paral_kgb,tim_fourdp)
3146 
3147 
3148 !This section has been created automatically by the script Abilint (TD).
3149 !Do not modify the following lines by hand.
3150 #undef ABI_FUNC
3151 #define ABI_FUNC 'fourdp'
3152 !End of the abilint section
3153 
3154  implicit none
3155 
3156 !Arguments ------------------------------------
3157 !scalars
3158  integer,intent(in) :: cplex,isign,nfft,paral_kgb,tim_fourdp
3159  type(MPI_type),intent(in) :: mpi_enreg
3160 !arrays
3161  integer,intent(in) :: ngfft(18)
3162  real(dp),intent(inout) :: fofg(2,nfft),fofr(cplex*nfft)
3163 
3164 !Local variables-------------------------------
3165 !scalars
3166  integer,parameter :: ndat1=1
3167  integer :: fftalg,fftalga,fftalgb,fftcache,i1,i2,i3,base
3168  integer :: n1,n1half1,n1halfm,n2,n2half1,n3,n4
3169  integer :: n4half1,n5,n5half1,n6 !nd2proc,nd3proc,i3_local,i2_local,
3170  integer :: comm_fft,nproc_fft,me_fft
3171  real(dp) :: xnorm
3172  character(len=500) :: message
3173 !arrays
3174  integer, ABI_CONTIGUOUS pointer :: fftn2_distrib(:),ffti2_local(:)
3175  integer, ABI_CONTIGUOUS pointer :: fftn3_distrib(:),ffti3_local(:)
3176  real(dp) :: tsec(2)
3177  real(dp),allocatable :: work1(:,:,:,:),work2(:,:,:,:)
3178  real(dp),allocatable :: workf(:,:,:,:),workr(:,:,:,:)
3179 
3180 ! *************************************************************************
3181 
3182  ABI_UNUSED(paral_kgb)
3183 
3184  ! Keep track of timing
3185  call timab(260+tim_fourdp,1,tsec)
3186 
3187  n1=ngfft(1); n2=ngfft(2); n3=ngfft(3)
3188  n4=ngfft(4); n5=ngfft(5); n6=ngfft(6)
3189  me_fft=ngfft(11); nproc_fft=ngfft(10)
3190  comm_fft = mpi_enreg%comm_fft
3191  !write(std_out,*)"fourdp, nx,ny,nz,nfft =",n1,n2,n3,nfft
3192 
3193  fftcache=ngfft(8)
3194  fftalg  =ngfft(7); fftalga =fftalg/100; fftalgb =mod(fftalg,100)/10
3195 
3196  xnorm=one/dble(n1*n2*n3)
3197  !write(std_out,*)' fourdp :me_fft',me_fft,'nproc_fft',nproc_fft,'nfft',nfft
3198 
3199  if (fftalgb/=0 .and. fftalgb/=1) then
3200    write(message, '(a,i4,a,a,a,a,a)' )&
3201 &   'The input algorithm number fftalg=',fftalg,' is not allowed.',ch10,&
3202 &   'The second digit (fftalg(B)) must be 0 or 1.',ch10,&
3203 &   'Action : change fftalg in your input file.'
3204    MSG_BUG(message)
3205  end if
3206 
3207  if (fftalgb==1 .and. ALL(fftalga/=(/1,3,4,5/)) )then
3208    write(message,'(a,i4,5a)')&
3209 &   'The input algorithm number fftalg=',fftalg,' is not allowed.',ch10,&
3210 &   'When fftalg(B) is 1, the allowed values for fftalg(A) are 1 and 4.',ch10,&
3211 &   'Action: change fftalg in your input file.'
3212    MSG_BUG(message)
3213  end if
3214 
3215  if (n4<n1.or.n5<n2.or.n6<n3) then
3216    write(message,'(a,3i8,a,3i8)')'  Each of n4,n5,n6=',n4,n5,n6,'must be >= n1, n2, n3 =',n1,n2,n3
3217    MSG_BUG(message)
3218  end if
3219 
3220  ! Get the distrib associated with this fft_grid => for i2 and i3 planes
3221  call ptabs_fourdp(mpi_enreg,n2,n3,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local)
3222 
3223  ! Branch immediately depending on nproc_fft
3224  if (nproc_fft > 1) then
3225    call fourdp_mpi(cplex,nfft,ngfft,ndat1,isign,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
3226    goto 100
3227  end if
3228 
3229  if (fftalga == FFT_FFTW3) then
3230    ! Call sequential or MPI FFTW3 version.
3231    if (nproc_fft == 1) then
3232      !call wrtout(std_out,"FFTW3 SEQFOURDP","COLL")
3233      call fftw3_seqfourdp(cplex,n1,n2,n3,n1,n2,n3,ndat1,isign,fofg,fofr)
3234    else
3235      !call wrtout(std_out,"FFTW3 MPIFOURDP","COLL")
3236      call fftw3_mpifourdp(cplex,nfft,ngfft,ndat1,isign,&
3237 &     fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
3238    end if
3239    ! Accumulate timing and return
3240    call timab(260+tim_fourdp,2,tsec); return
3241  end if
3242 
3243  if (fftalga==FFT_DFTI) then
3244    ! Call sequential or MPI MKL.
3245    if (nproc_fft == 1) then
3246      call dfti_seqfourdp(cplex,n1,n2,n3,n1,n2,n3,ndat1,isign,fofg,fofr)
3247    else
3248      MSG_ERROR("MPI fourdp with MKL cluster DFT not implemented")
3249    end if
3250    ! Accumulate timing and return
3251    call timab(260+tim_fourdp,2,tsec); return
3252  end if
3253 
3254  ! Here, deal  with the new SG FFT, complex-to-complex case
3255  if (fftalga==FFT_SG2002 .and. (fftalgb==0 .or. cplex==2)) then
3256    call sg2002_mpifourdp(cplex,nfft,ngfft,ndat1,isign,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
3257    !call sg2002_seqfourdp(cplex,nfft,ngfft,ndat1,isign,fftn2_fofg,fofr)
3258  end if
3259 
3260  ! Here, deal with the new SG FFT, with real-to-complex
3261  if (fftalga==FFT_SG2002 .and. fftalgb==1 .and. cplex==1) then
3262    ABI_CHECK(nproc_fft == 1,"fftalg 41x does not support nproc_fft > 1")
3263 
3264    n1half1=n1/2+1; n1halfm=(n1+1)/2
3265    n2half1=n2/2+1
3266    ! n4half1 or n5half1 are the odd integers >= n1half1 or n2half1
3267    n4half1=(n1half1/2)*2+1
3268    n5half1=(n2half1/2)*2+1
3269    ABI_ALLOCATE(workr,(2,n4half1,n5,n6))
3270    ABI_ALLOCATE(workf,(2,n4,n6,n5half1))
3271 
3272    if (isign==1) then
3273      do i3=1,n3
3274        do i2=1,n2half1
3275          base=n1*(i2-1+n2*(i3-1))
3276          do i1=1,n1
3277            workf(1,i1,i3,i2)=fofg(1,i1+base)
3278            workf(2,i1,i3,i2)=fofg(2,i1+base)
3279          end do
3280        end do
3281      end do
3282 
3283      !nd2proc=((n2-1)/nproc_fft) +1
3284      !nd3proc=((n6-1)/nproc_fft) +1
3285 
3286      ! change the call? n5half1 et n6 ?
3287      call sg2002_back(cplex,ndat1,n1,n2,n3,n4,n5,n6,n4half1,n5half1,n6,2,workf,workr,comm_fft)
3288 
3289      do i3=1,n3
3290        do i2=1,n2
3291          base=n1*(i2-1+n2*(i3-1))
3292          do i1=1,n1half1-1
3293            ! copy data
3294            fofr(2*i1-1+base)=workr(1,i1,i2,i3)
3295            fofr(2*i1  +base)=workr(2,i1,i2,i3)
3296          end do
3297          ! If n1 odd, must add last data
3298          if((2*n1half1-2)/=n1)then
3299            fofr(n1+base)=workr(1,n1half1,i2,i3)
3300          end if
3301        end do
3302      end do
3303 
3304    else if (isign==-1) then
3305      do i3=1,n3
3306        do i2=1,n2
3307          base=n1*(i2-1+n2*(i3-1))
3308          do i1=1,n1half1-1
3309            workr(1,i1,i2,i3)=fofr(2*i1-1+base)
3310            workr(2,i1,i2,i3)=fofr(2*i1  +base)
3311          end do
3312          ! If n1 odd, must add last data
3313          if((2*n1half1-2)/=n1)then
3314            workr(1,n1half1,i2,i3)=fofr(n1+base)
3315            workr(2,n1half1,i2,i3)=zero
3316          end if
3317        end do
3318      end do
3319 
3320      call sg2002_forw(cplex,ndat1,n1,n2,n3,n4,n5,n6,n4half1,n5half1,n6,2,workr,workf,comm_fft)
3321 
3322      ! Transfer fft output to the original fft box
3323      do i3=1,n3
3324        do i2=1,n2half1
3325          base=n1*(i2-1+n2*(i3-1))
3326          do i1=1,n1
3327            fofg(1,i1+base)=workf(1,i1,i3,i2)*xnorm
3328            fofg(2,i1+base)=workf(2,i1,i3,i2)*xnorm
3329          end do
3330        end do
3331 
3332        ! Complete missing values with complex conjugate
3333        ! Inverse of ix is located at nx+2-ix , except for ix=1, for which it is 1.
3334        if(n2half1>2)then
3335          do i2=2,n2+1-n2half1
3336            base=n1*((n2+2-i2)-1)
3337            if(i3/=1)base=base+n1*n2*((n3+2-i3)-1)
3338            fofg(1,1+base)= workf(1,1,i3,i2)*xnorm
3339            fofg(2,1+base)=-workf(2,1,i3,i2)*xnorm
3340            do i1=2,n1
3341              fofg(1,n1+2-i1+base)= workf(1,i1,i3,i2)*xnorm
3342              fofg(2,n1+2-i1+base)=-workf(2,i1,i3,i2)*xnorm
3343            end do
3344          end do
3345        end if
3346      end do
3347 
3348    end if ! isign
3349    ABI_DEALLOCATE(workr)
3350    ABI_DEALLOCATE(workf)
3351  end if
3352 
3353  ! Here, one calls the complex-to-complex FFT subroutine
3354  if( (fftalgb==0 .or. cplex==2) .and. fftalga/=4 )then
3355 
3356    ABI_ALLOCATE(work1,(2,n4,n5,n6))
3357    ABI_ALLOCATE(work2,(2,n4,n5,n6))
3358 
3359    if (isign==1) then
3360 
3361      ! Transfer fofg to the expanded fft box
3362 !$OMP PARALLEL DO PRIVATE(base)
3363      do i3=1,n3
3364        do i2=1,n2
3365          base=n1*(i2-1+n2*(i3-1))
3366          do i1=1,n1
3367            work1(1,i1,i2,i3)=fofg(1,i1+base)
3368            work1(2,i1,i2,i3)=fofg(2,i1+base)
3369          end do
3370        end do
3371      end do
3372 
3373      ! Call Stefan Goedecker C2C FFT
3374      !call sg_fft_cc(fftcache,n1,n2,n3,n4,n5,n6,ndat1,isign,work1,work2)
3375      call ccfft(ngfft,isign,n1,n2,n3,n4,n5,n6,ndat1,2,work1,work2,comm_fft)
3376 
3377      ! Take data from expanded box and put it in the original box.
3378      if (cplex==1) then
3379        ! REAL case
3380 !$OMP PARALLEL DO PRIVATE(base)
3381        do i3=1,n3
3382          do i2=1,n2
3383            base=n1*(i2-1+n2*(i3-1))
3384            do i1=1,n1
3385              fofr(i1+base)=work2(1,i1,i2,i3)
3386            end do
3387          end do
3388        end do
3389 
3390      else
3391        ! COMPLEX case
3392 !$OMP PARALLEL DO PRIVATE(base)
3393        do i3=1,n3
3394          do i2=1,n2
3395            base=2*n1*(i2-1+n2*(i3-1))
3396            do i1=1,n1
3397              fofr(2*i1-1+base)=work2(1,i1,i2,i3)
3398              fofr(2*i1  +base)=work2(2,i1,i2,i3)
3399            end do
3400          end do
3401        end do
3402      end if
3403 
3404    else if (isign==-1) then
3405 
3406      ! Insert fofr into the augmented fft box
3407      if (cplex==1) then
3408        ! REAL case
3409 !$OMP PARALLEL DO PRIVATE(base)
3410        do i3=1,n3
3411          do i2=1,n2
3412            base=n1*(i2-1+n2*(i3-1))
3413            do i1=1,n1
3414              ! copy data
3415              work1(1,i1,i2,i3)=fofr(i1+base)
3416              work1(2,i1,i2,i3)=zero
3417            end do
3418          end do
3419        end do
3420      else
3421        ! COMPLEX case
3422 !$OMP PARALLEL DO PRIVATE(base)
3423        do i3=1,n3
3424          do i2=1,n2
3425            base=2*n1*(i2-1+n2*(i3-1))
3426            do i1=1,n1
3427              ! copy data
3428              work1(1,i1,i2,i3)=fofr(2*i1-1+base)
3429              work1(2,i1,i2,i3)=fofr(2*i1  +base)
3430            end do
3431          end do
3432        end do
3433      end if ! cplex
3434 
3435      ! Call Stefan Goedecker C2C FFT
3436      !call sg_fft_cc(fftcache,n1,n2,n3,n4,n5,n6,ndat1,isign,work1,work2)
3437      call ccfft(ngfft,isign,n1,n2,n3,n4,n5,n6,ndat1,2,work1,work2,comm_fft)
3438 
3439      ! Transfer fft output to the original fft box
3440 !$OMP PARALLEL DO PRIVATE(base)
3441      do i3=1,n3
3442        do i2=1,n2
3443          base=n1*(i2-1+n2*(i3-1))
3444          do i1=1,n1
3445            fofg(1,i1+base)=work2(1,i1,i2,i3)*xnorm
3446            fofg(2,i1+base)=work2(2,i1,i2,i3)*xnorm
3447          end do
3448        end do
3449      end do
3450 
3451    end if ! isign
3452 
3453    ABI_DEALLOCATE(work1)
3454    ABI_DEALLOCATE(work2)
3455  end if ! End simple algorithm
3456 
3457  ! Here sophisticated algorithm based on S. Goedecker routines, only for the REAL case.
3458  ! Take advantage of the fact that fofr is real, and that fofg has corresponding symmetry properties.
3459  if( (fftalgb==1 .and. cplex==1) .and. fftalga/=4 )then
3460    ABI_CHECK(nproc_fft==1,"nproc > 1 not supported")
3461    ABI_CHECK(ndat1==1,"ndat > 1 not supported")
3462    call sg_fft_rc(cplex,fofg,fofr,isign,nfft,ngfft)
3463  end if
3464 
3465  100 call timab(260+tim_fourdp,2,tsec)
3466 
3467 end subroutine fourdp

ABINIT/fourwf [ Functions ]

[ Top ] [ Functions ]

NAME

 fourwf

FUNCTION

 Carry out composite Fourier transforms between real and reciprocal (G) space.
 Wavefunctions, contained in a sphere in reciprocal space,
 can be FFT to real space. They can also be FFT from real space
 to a sphere. Also, the density maybe accumulated, and a local
 potential can be applied.

 The different options are :
 - option=0 --> reciprocal to real space and output the result.
 - option=1 --> reciprocal to real space and accumulate the density.
 - option=2 --> reciprocal to real space, apply the local potential to the wavefunction
                in real space and produce the result in reciprocal space.
 - option=3 --> real space to reciprocal space.
                NOTE that in this case, fftalg=1x1 MUST be used. This may be changed in the future.

 The different sections of this routine corresponds to different
 algorithms, used independently of each others :
 - fftalg=xx0 : use simple complex-to-complex routines, without zero padding
     (rather simple, so can be used to understand how fourwf.f works);
 - fftalg=1x1 : use S Goedecker routines, with zero padding
     (7/12 savings in execution time);
 - fftalg=1x2 : call even more sophisticated coding also based on S Goedecker routines

 This routine contains many parts that differ only
 by small details, in order to treat each case with the better speed.
 Also for better speed, it uses no F90 construct, except the allocate command
 and for zeroing arrays.

COPYRIGHT

 Copyright (C) 1998-2018 ABINIT group (DCA, XG, GMR, FF)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

 cplex= if 1 , denpot is real, if 2 , denpot is complex
    (cplex=2 only allowed for option=2, and istwf_k=1)
    not relevant if option=0 or option=3, so cplex=0 can be used to minimize memory
 fofgin(2,npwin)=holds input wavefunction in G vector basis sphere.
                 (intent(in) but the routine sphere can modify it for another iflag)
 gboundin(2*mgfft+8,2)=sphere boundary info for reciprocal to real space
 gboundout(2*mgfft+8,2)=sphere boundary info for real to reciprocal space
 istwf_k=option parameter that describes the storage of wfs
 kg_kin(3,npwin)=reduced planewave coordinates, input
 kg_kout(3,npwout)=reduced planewave coordinates, output
 mgfft=maximum size of 1D FFTs
 mpi_enreg=information about MPI parallelization
 ndat=number of FFT to do in //
 ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 npwin=number of elements in fofgin array (for option 0, 1 and 2)
 npwout=number of elements in fofgout array (for option 2 and 3)
 n4,n5,n6=ngfft(4),ngfft(5),ngfft(6), dimensions of fofr.
 option= if 0: do direct FFT
         if 1: do direct FFT, then sum the density
         if 2: do direct FFT, multiply by the potential, then do reverse FFT
         if 3: do reverse FFT only
 paral_kgb=Flag related to the kpoint-band-fft parallelism
 tim_fourwf=timing code of the calling routine (can be set to 0 if not attributed)
 weight_r=weight to be used for the accumulation of the density in real space
         (needed only when option=1)
 weight_i=weight to be used for the accumulation of the density in real space
         (needed only when option=1 and (fftalg=4 and fftalgc/=0))
 fofginb(2,npwin)=holds second input wavefunction in G vector basis sphere.
                 (intent(in) but the routine sphere can modify it for another iflag)
                 (for non diagonal occupation)
 use_ndo = use non diagonal occupations.

OUTPUT

  (see side effects)

SIDE EFFECTS

 Input/Output
 for option==0, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
                fofr(2,n4,n5,n6*ndat) contains the output Fourier Transform of fofgin;
                no use of denpot, fofgout and npwout.
 for option==1, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
                denpot(cplex*n4,n5,n6) contains the input density at input,
                and the updated density at output (accumulated);
                no use of fofgout and npwout.
 for option==2, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
                denpot(cplex*n4,n5,n6) contains the input local potential;
                fofgout(2,npwout*ndat) contains the output function;
 for option==3, fofr(2,n4,n5,n6*ndat) contains the input real space wavefunction;
                fofgout(2,npwout*ndat) contains its output Fourier transform;
                no use of fofgin and npwin.

TODO

  Remove paral_kgb, we are already passing mpi_enreg

NOTES

   DO NOT CHANGE THE API OF THIS FUNCTION.
   If you need a specialized routine for the FFT of the wavefunctions, create
   a wrapper that uses fourwf to accomplish your task. This routine, indeed,
   has already too many parameters and each change in the API requires a careful
   modification of the different wrappers used for specialized FFTs such as FFTW3 and MKL-DFTI

PARENTS

      dfpt_accrho,dfpt_mkrho,dfptnl_resp,fock_getghc,getgh1c,getghc
      gwls_hamiltonian,m_cut3d,m_epjdos,m_fft_prof,m_fock,mkrho,mlwfovlp
      pawmkaewf,pawsushat,posdoppler,prep_fourwf,spin_current,susk,suskmm
      tddft,vtowfk

CHILDREN

      ccfft,cg_addtorho,cg_box2gsph,dcopy,dfti_seqfourwf,fftw3_seqfourwf
      fourwf_mpi,gpu_fourwf,ptabs_fourwf,sg_fftpad,sg_fftrisc,sg_fftrisc_2
      sphere,sphere_fft,timab,xmpi_sum

SOURCE

2471 subroutine fourwf(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,&
2472 &  kg_kin,kg_kout,mgfft,mpi_enreg,ndat,ngfft,npwin,npwout,n4,n5,n6,option,&
2473 &  paral_kgb,tim_fourwf,weight_r,weight_i, &
2474 &  use_gpu_cuda,use_ndo,fofginb) ! Optional arguments
2475 
2476 
2477 !This section has been created automatically by the script Abilint (TD).
2478 !Do not modify the following lines by hand.
2479 #undef ABI_FUNC
2480 #define ABI_FUNC 'fourwf'
2481 !End of the abilint section
2482 
2483  implicit none
2484 
2485 !Arguments ------------------------------------
2486 !scalars
2487  integer,intent(in) :: cplex,istwf_k,mgfft,n4,n5,n6,ndat,npwin,npwout,option,paral_kgb
2488  integer,intent(in) :: tim_fourwf
2489  integer,intent(in),optional :: use_gpu_cuda,use_ndo
2490  real(dp),intent(in) :: weight_r,weight_i
2491  type(MPI_type),intent(in) :: mpi_enreg
2492 !arrays
2493  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2)
2494  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18)
2495  real(dp),intent(inout) :: denpot(cplex*n4,n5,n6),fofgin(2,npwin*ndat)
2496  real(dp),intent(inout),optional :: fofginb(:,:) ! (2,npwin*ndat)
2497  real(dp),intent(inout) :: fofr(2,n4,n5,n6*ndat)
2498  real(dp),intent(out) :: fofgout(2,npwout*ndat)
2499 
2500 !Local variables-------------------------------
2501 !scalars
2502  integer :: fftalg,fftalga,fftalgc,fftcache,i1,i2,i2_local,i3,i3_local,i3_glob,idat,ier
2503  integer :: iflag,ig,comm_fft,me_g0,me_fft,n1,n2,n3,nd2proc,nd3proc
2504  integer :: nfftot,nproc_fft,option_ccfft
2505  real(dp) :: fim,fre,xnorm
2506  character(len=500) :: message
2507  logical ::  luse_gpu_cuda,luse_ndo
2508 !arrays
2509  integer,parameter :: shiftg0(3)=0
2510  integer,parameter :: symmE(3,3)=reshape([1,0,0,0,1,0,0,0,1],[3,3])
2511  integer, ABI_CONTIGUOUS pointer :: fftn2_distrib(:),ffti2_local(:)
2512  integer, ABI_CONTIGUOUS pointer :: fftn3_distrib(:),ffti3_local(:)
2513  real(dp) :: tsec(2)
2514  real(dp),allocatable :: work1(:,:,:,:),work2(:,:,:,:),work3(:,:,:,:)
2515  real(dp),allocatable :: work4(:,:,:,:),work_sum(:,:,:,:)
2516 
2517 ! *************************************************************************
2518 
2519  ! Accumulate timing
2520  call timab(840+tim_fourwf,1,tsec)
2521 
2522  n1=ngfft(1); n2=ngfft(2); n3=ngfft(3); nfftot=n1*n2*n3
2523  fftcache=ngfft(8)
2524  fftalg=ngfft(7); fftalga=fftalg/100; fftalgc=mod(fftalg,10)
2525  me_fft=ngfft(11)
2526  nproc_fft=ngfft(10)
2527 
2528  comm_fft = mpi_enreg%comm_fft; me_g0 = mpi_enreg%me_g0
2529 
2530  !if (ndat/=1) then
2531  !  write(std_out,*)fftalg
2532  !  MSG_ERROR("Really? I thought nobody uses ndat > 1")
2533  !end if
2534 
2535  !if (weight_r /= weight_i) then
2536  !  write(std_out,*)fftalg
2537  !  MSG_ERROR("Really? I thought nobody uses weight_r != weight_i")
2538  !end if
2539 
2540  !if (option == 0 .and. fftalgc == 0) then
2541  !  MSG_ERROR("Option 0 is buggy when fftalgc ==0 is used!")
2542  !end if
2543 
2544 !Cuda version of fourwf
2545  luse_gpu_cuda=PRESENT(use_gpu_cuda)
2546  if (luse_gpu_cuda) luse_gpu_cuda=(luse_gpu_cuda.and.(use_gpu_cuda==1))
2547 
2548  if(luse_gpu_cuda) then
2549 #if defined HAVE_GPU_CUDA
2550    call gpu_fourwf(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,&
2551 &   kg_kin,kg_kout,mgfft,mpi_enreg,ndat,ngfft,npwin,npwout,n4,n5,n6,option,&
2552 &   paral_kgb,tim_fourwf,weight_r,weight_i) !,&
2553 !  &  use_ndo,fofginb)
2554 #endif
2555    call timab(840+tim_fourwf,2,tsec); return
2556  end if
2557 
2558  if ((fftalgc<0 .or. fftalgc>2)) then
2559    write(message, '(a,i4,a,a,a,a,a)' )&
2560 &   'The input algorithm number fftalg=',fftalg,' is not allowed.',ch10,&
2561 &   'The third digit, fftalg(C), must be 0, 1, or 2',ch10,&
2562 &   'Action: change fftalg in your input file.'
2563    MSG_ERROR(message)
2564  end if
2565 
2566  if (fftalgc/=0 .and. ALL(fftalga/=(/1,3,4,5/)) ) then
2567    write(message, '(a,i4,5a)' )&
2568 &   'The input algorithm number fftalg=',fftalg,' is not allowed.',ch10,&
2569 &   'The first digit must be 1,3,4 when the last digit is not 0.',ch10,&
2570 &   'Action: change fftalg in your input file.'
2571    MSG_ERROR(message)
2572  end if
2573 
2574  if (option<0 .or. option>3)then
2575    write(message, '(a,i4,a,a,a)' )&
2576 &   'The option number',option,' is not allowed.',ch10,&
2577 &   'Only option=0, 1, 2 or 3 are allowed presently.'
2578    MSG_ERROR(message)
2579  end if
2580 
2581  if (option==1 .and. cplex/=1) then
2582    write(message, '(a,a,a,i4,a)' )&
2583 &   'With the option number 1, cplex must be 1,',ch10,&
2584 &   'but it is cplex=',cplex,'.'
2585    MSG_ERROR(message)
2586  end if
2587 
2588  if (option==2 .and. (cplex/=1 .and. cplex/=2)) then
2589    write(message, '(a,a,a,i4,a)' )&
2590 &   'With the option number 2, cplex must be 1 or 2,',ch10,&
2591 &   'but it is cplex=',cplex,'.'
2592    MSG_ERROR(message)
2593  end if
2594 
2595  ! DMFT uses its own FFT algorithm (that should be wrapped in a different routine!)
2596  luse_ndo=.false.
2597  if (present(use_ndo).and.present(fofginb)) then
2598    if(use_ndo==1) then
2599      luse_ndo=.true.
2600      if((size(fofginb,2)==0)) then
2601        write(message, '(a,a,a,i4,i5)' )&
2602 &       'fofginb has a dimension equal to zero and use_ndo==1',ch10,&
2603 &       'Action: check dimension of fofginb',size(fofginb,2),use_ndo
2604        MSG_ERROR(message)
2605      end if
2606    end if
2607  end if
2608 
2609  if (luse_ndo) then
2610    if (.not.(fftalgc==2 .and. option/=3)) then
2611      MSG_ERROR("luse_ndo but not .not.(fftalgc==2 .and. option/=3)")
2612    end if
2613    ABI_CHECK(nproc_fft==1, "DMFT with nproc_fft != 1")
2614    ABI_CHECK(ndat == 1, "use_ndo and ndat != 1 not coded")
2615 
2616    call sg_fftrisc_2(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,&
2617 &   istwf_k,kg_kin,kg_kout,&
2618 &   mgfft,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_2=weight_i,luse_ndo=luse_ndo,fofgin_p=fofginb)
2619    goto 100
2620  end if
2621 
2622  ! Get the distrib associated with this fft_grid => for i2 and i3 planes
2623  call ptabs_fourwf(mpi_enreg,n2,n3,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local)
2624 
2625  ! Branch immediately depending on nproc_fft
2626  if (nproc_fft > 1 .and. fftalg /= 412) then
2627    call fourwf_mpi(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,&
2628 &   istwf_k,kg_kin,kg_kout,me_g0,mgfft,ngfft,mpi_enreg%distribfft,n1,n2,n3,npwin,npwout,&
2629 &   n4,n5,n6,ndat,option,weight_r,weight_i,comm_fft)
2630    goto 100
2631  end if
2632 
2633  select case (fftalga)
2634 
2635  case (FFT_FFTW3)
2636    if (luse_ndo) MSG_ERROR("luse_ndo not supported by FFTW3")
2637    if (nproc_fft == 1) then
2638 !      call wrtout(std_out,"FFTW3_SEQFOURWF","COLL")
2639      call fftw3_seqfourwf(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,&
2640 &     kg_kin,kg_kout,mgfft,ndat,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_i)
2641    else
2642      MSG_ERROR("Not coded")
2643    end if
2644 
2645  case (FFT_DFTI)
2646    if (luse_ndo) MSG_ERROR("luse_ndo not supported by DFTI")
2647    if (nproc_fft == 1) then
2648 !     call wrtout(std_out,"DFTI_SEQFOURWF","COLL")
2649      call dfti_seqfourwf(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,&
2650 &     kg_kin,kg_kout,mgfft,ndat,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_i)
2651    else
2652      MSG_ERROR("Not coded")
2653    end if
2654 
2655  case default
2656    ! TODO: Clean this code!
2657 
2658    ! Here, use routines that make forwards FFT separately of backwards FFT,
2659    ! in particular, usual 3DFFT library routines, called in ccfft.
2660    if (fftalgc==0 .or. (fftalgc==1 .and. fftalga/=4) .or. &
2661 &   (fftalgc==2 .and. fftalga/=4 .and. option==3) )then
2662 
2663      ABI_ALLOCATE(work1,(2,n4,n5,n6*ndat))
2664 
2665      if (option/=3)then
2666        ! Insert fofgin into the fft box (array fofr)
2667 
2668        if (fftalga/=4)then
2669          iflag=1
2670          call sphere(fofgin,ndat,npwin,fofr,n1,n2,n3,n4,n5,n6,kg_kin,istwf_k,iflag,me_g0,shiftg0,symmE,one)
2671 
2672        else if (fftalga==4 .and. fftalgc==0) then
2673          ! Note the switch of n5 and n6, as they are only
2674          ! needed to dimension work2 inside "sphere"
2675          ABI_ALLOCATE(work2,(2,n4,n6,n5*ndat))
2676 
2677          iflag=2
2678          nd2proc=((n2-1)/nproc_fft) +1
2679          nd3proc=((n6-1)/nproc_fft) +1
2680          ABI_ALLOCATE(work3,(2,n4,n6,nd2proc*ndat))
2681          ABI_ALLOCATE(work4,(2,n4,n5,nd3proc*ndat))
2682 
2683          if (istwf_k == 1 .and. paral_kgb==1) then
2684            ! sphere dont need a big array
2685            work3=zero
2686            call sphere_fft(fofgin,ndat,npwin,work3,n1,n2,n3,n4,n6,kg_kin,&
2687 &           mpi_enreg%distribfft%tab_fftwf2_local,nd2proc)
2688          else
2689            ! sphere needs a big array and communications
2690            if (nproc_fft == 1 .and. ndat == 1 .and. istwf_k == 1) then
2691              ! dimensions of tab work3 and work2 are identical no need to use work2
2692              work3=zero
2693              call sphere(fofgin,ndat,npwin,work3,n1,n2,n3,n4,n6,nd2proc,&
2694 &             kg_kin,istwf_k,iflag,me_g0,shiftg0,symmE,one)
2695            else
2696              work2=zero
2697              call sphere(fofgin,ndat,npwin,work2,n1,n2,n3,n4,n6,n5,&
2698 &             kg_kin,istwf_k,iflag,me_g0,shiftg0,symmE,one)
2699 
2700              if (paral_kgb==1 .and. istwf_k > 1) then
2701                ! Collect G-vectors on each node
2702                work3=zero
2703                ABI_ALLOCATE(work_sum,(2,n4,n6,n5*ndat))
2704                call timab(48,1,tsec)
2705                call xmpi_sum(work2,work_sum,2*n4*n6*n5*ndat,comm_fft,ier)
2706                call timab(48,2,tsec)
2707 
2708                ! Extract my list of G-vectors needed for MPI-FFT.
2709                do idat=1,ndat
2710                  do i2=1,n2
2711                    if( fftn2_distrib(i2) == me_fft) then
2712                      i2_local = ffti2_local(i2) + nd2proc*(idat-1)
2713                      do i3=1,n3
2714                        do i1=1,n1
2715                          work3(1,i1,i3,i2_local)=work_sum(1,i1,i3,i2+n5*(idat-1))
2716                          work3(2,i1,i3,i2_local)=work_sum(2,i1,i3,i2+n5*(idat-1))
2717                        end do
2718                      end do
2719                    end if
2720                  end do
2721                end do
2722                ABI_DEALLOCATE(work_sum)
2723              end if
2724 
2725              if (paral_kgb/=1) then
2726                do idat=1,ndat
2727                  do i2=1,n2
2728                    do i3=1,n3
2729                      do i1=1,n1
2730                        work3(1,i1,i3,i2+nd2proc*(idat-1))=work2(1,i1,i3,i2+n5*(idat-1))
2731                        work3(2,i1,i3,i2+nd2proc*(idat-1))=work2(2,i1,i3,i2+n5*(idat-1))
2732                      end do
2733                    end do
2734                  end do
2735                end do
2736              end if
2737            end if
2738          end if
2739          if (paral_kgb==1) then
2740            option_ccfft=1
2741          else
2742            option_ccfft=2
2743          end if
2744        end if
2745 
2746        ! Fourier transform fofr (reciprocal to real space)
2747        ! The output might be in work1 or fofr, depending on inplace
2748        if (fftalgc==0) then
2749          if (fftalga/=4) then
2750            ! Call usual 3DFFT library routines
2751            call ccfft(ngfft,+1,n1,n2,n3,n4,n5,n6,ndat,2,fofr,work1,comm_fft)
2752          else
2753            ! SG simplest complex-to-complex routine
2754            call ccfft(ngfft,+1,n1,n2,n3,n4,n5,n6,ndat,option_ccfft,work3,work4,comm_fft)
2755            ABI_DEALLOCATE(work2)
2756            ABI_DEALLOCATE(work3)
2757          end if
2758        else
2759          ! Call SG routine, with zero padding
2760          call sg_fftpad(fftcache,mgfft,n1,n2,n3,n4,n5,n6,ndat,gboundin,+1,fofr,work1)
2761        end if
2762      end if ! option/=3
2763 
2764      ! Note that if option==0 everything is alright already, the output is available in fofr.
2765      ! MG: TODO: Rewrite this mess in human-readable form!
2766      if (option==0) then
2767        if (fftalgc==0) then
2768          if (fftalga/=4) then
2769            call DCOPY(2*n4*n5*n6*ndat,work1,1,fofr,1)
2770          else
2771            call DCOPY(2*n4*n5*n6*ndat,work4,1,fofr,1)
2772          end if
2773        else
2774          ! Results are copied to fofr.
2775          call DCOPY(2*n4*n5*n6*ndat,work1,1,fofr,1)
2776        end if
2777      end if
2778 
2779      if (option==1) then
2780        ! Accumulate density
2781        if ((fftalgc==0) .and. (fftalga==4)) then
2782          do idat=1,ndat
2783            do i3=1,n3
2784              if( me_fft == fftn3_distrib(i3) ) then
2785                i3_local = ffti3_local(i3) + nd3proc*(idat-1)
2786                do i2=1,n2
2787                  do i1=1,n1
2788                    denpot(i1,i2,i3)=denpot(i1,i2,i3)+&
2789 &                   weight_r*work4(1,i1,i2,i3_local)**2+&
2790 &                   weight_i*work4(2,i1,i2,i3_local)**2
2791                  end do
2792                end do
2793              end if
2794            end do
2795          end do ! idat
2796        else
2797          call cg_addtorho(n1,n2,n3,n4,n5,n6,ndat,weight_r,weight_i,work1,denpot)
2798        end if
2799      end if ! option==1
2800 
2801      if (option==2) then
2802        ! Apply local potential
2803        if (cplex==1) then
2804 
2805          if ((fftalgc==0) .and. (fftalga==4)) then
2806 !$OMP PARALLEL DO PRIVATE(i3_local,i3_glob)
2807            do idat=1,ndat
2808              do i3=1,n3
2809                if( me_fft == fftn3_distrib(i3) ) then
2810                  i3_local = ffti3_local(i3) + nd3proc*(idat-1)
2811                  i3_glob = i3+n3*(idat-1)
2812                  do i2=1,n2
2813                    do i1=1,n1
2814                      fofr(1,i1,i2,i3_glob)= denpot(i1,i2,i3)*work4(1,i1,i2,i3_local)
2815                      fofr(2,i1,i2,i3_glob)= denpot(i1,i2,i3)*work4(2,i1,i2,i3_local)
2816                    end do
2817                  end do
2818                end if
2819              end do
2820            end do
2821          end if
2822          if ((fftalgc/=0) .or. (fftalga/=4)) then
2823 !$OMP PARALLEL DO PRIVATE(i3_glob)
2824            do idat=1,ndat
2825              do i3=1,n3
2826                if( me_fft == fftn3_distrib(i3) ) then
2827                  i3_glob = i3+n3*(idat-1)
2828                  do i2=1,n2
2829                    do i1=1,n1
2830                      fofr(1,i1,i2,i3_glob)=denpot(i1,i2,i3)*work1(1,i1,i2,i3+n3*(idat-1))
2831                      fofr(2,i1,i2,i3_glob)=denpot(i1,i2,i3)*work1(2,i1,i2,i3+n3*(idat-1))
2832                    end do
2833                  end do
2834                end if
2835              end do
2836            end do
2837          end if
2838 
2839        else if (cplex==2) then
2840          if ((fftalgc==0) .and. (fftalga==4)) then
2841 !$OMP PARALLEL DO PRIVATE(fre,fim,i3_local,i3_glob)
2842            do idat=1,ndat
2843              do i3=1,n3
2844                if( me_fft == fftn3_distrib(i3) ) then
2845                  i3_local = ffti3_local(i3) + nd3proc*(idat-1)
2846                  i3_glob = i3+n3*(idat-1)
2847                  do i2=1,n2
2848                    do i1=1,n1
2849                      fre=work4(1,i1,i2,i3_local)
2850                      fim=work4(2,i1,i2,i3_local)
2851                      fofr(1,i1,i2,i3_glob)=denpot(2*i1-1,i2,i3)*fre -denpot(2*i1,i2,i3)*fim
2852                      fofr(2,i1,i2,i3_glob)=denpot(2*i1-1,i2,i3)*fim +denpot(2*i1,i2,i3)*fre
2853                    end do
2854                  end do
2855                end if
2856              end do
2857            end do
2858          end if
2859 
2860          if ((fftalgc/=0) .or. (fftalga/=4)) then
2861 !$OMP PARALLEL DO PRIVATE(fre,fim,i3_glob)
2862            do idat=1,ndat
2863              do i3=1,n3
2864                if( me_fft == fftn3_distrib(i3) ) then
2865                  i3_glob = i3+n3*(idat-1)
2866                  do i2=1,n2
2867                    do i1=1,n1
2868                      fre=work1(1,i1,i2,i3+n3*(idat-1))
2869                      fim=work1(2,i1,i2,i3+n3*(idat-1))
2870                      fofr(1,i1,i2,i3_glob)=denpot(2*i1-1,i2,i3)*fre -denpot(2*i1,i2,i3)*fim
2871                      fofr(2,i1,i2,i3_glob)=denpot(2*i1-1,i2,i3)*fim +denpot(2*i1,i2,i3)*fre
2872                    end do
2873                  end do
2874                end if
2875              end do
2876            end do
2877          end if
2878        end if ! cplex=2
2879 
2880      end if ! option=2
2881 
2882      ! The data for option==2 or option==3 is now in fofr.
2883      if (option==2 .or. option==3) then
2884 
2885        if (fftalgc==0) then
2886          ! Call usual 3DFFT library routines or SG simplest complex-to-complex routine
2887          if (fftalga==FFT_SG2002) then
2888            ABI_DEALLOCATE(work1)
2889            ABI_ALLOCATE(work1,(2,n4,n6,n5*ndat))
2890          end if
2891 
2892          if (option==3 .or. fftalga/=4) then
2893            call ccfft(ngfft,-1,n1,n2,n3,n4,n5,n6,ndat,2,fofr,work1,comm_fft)
2894          else
2895            ! creation of small arrays
2896            ! nd3proc=((n5-1)/nproc_fft) +1
2897            nd3proc=((n6-1)/nproc_fft) +1
2898            nd2proc=((n2-1)/nproc_fft) +1
2899            ABI_ALLOCATE(work3,(2,n4,n5,nd3proc*ndat))
2900            ABI_ALLOCATE(work2,(2,n4,n6,nd2proc*ndat))
2901 
2902            if (paral_kgb==1) then
2903 
2904              if (cplex==1) then
2905                do idat=1,ndat
2906                  do i3=1,n3
2907                    if( me_fft == fftn3_distrib(i3) ) then
2908                      i3_local = ffti3_local(i3) + nd3proc*(idat-1)
2909                      do i2=1,n2
2910                        do i1=1,n1
2911                          work3(1,i1,i2,i3_local)=denpot(i1,i2,i3)*work4(1,i1,i2,i3_local)
2912                          work3(2,i1,i2,i3_local)=denpot(i1,i2,i3)*work4(2,i1,i2,i3_local)
2913                        end do
2914                      end do
2915                    end if
2916                  end do
2917                end do
2918              else
2919                do idat=1,ndat
2920                  do i3=1,n3
2921                    if( me_fft == fftn3_distrib(i3) ) then
2922                      i3_local = ffti3_local(i3) + nd3proc*(idat-1)
2923                      do i2=1,n2
2924                        do i1=1,n1
2925                          fre=work4(1,i1,i2,i3_local)
2926                          fim=work4(2,i1,i2,i3_local)
2927                          work3(1,i1,i2,i3_local) = denpot(2*i1-1,i2,i3)*fre-denpot(2*i1,i2,i3)*fim
2928                          work3(2,i1,i2,i3_local) = denpot(2*i1-1,i2,i3)*fim+denpot(2*i1,i2,i3)*fre
2929                        end do
2930                      end do
2931                    end if
2932                  end do
2933                end do
2934              end if
2935              option_ccfft=1
2936 
2937            else
2938              if (nproc_fft /=1 .or. ndat /= 1 ) then
2939                do idat=1,ndat
2940                  do i3=1,n3
2941                    do i2=1,n2
2942                      do i1=1,n1
2943                        work3(1,i1,i2,i3+nd3proc*(idat-1))=fofr(1,i1,i2,i3+n3*(idat-1))
2944                        work3(2,i1,i2,i3+nd3proc*(idat-1))=fofr(2,i1,i2,i3+n3*(idat-1))
2945                      end do
2946                    end do
2947                  end do
2948                end do
2949                option_ccfft=2
2950              end if
2951            end if
2952 
2953            if (paral_kgb==1) then
2954              call ccfft(ngfft,-1,n1,n2,n3,n4,n5,n6,ndat,option_ccfft,work3,work2,comm_fft)
2955            else
2956              if (nproc_fft /=1 .or. ndat /= 1 ) then
2957                call ccfft(ngfft,-1,n1,n2,n3,n4,n5,n6,ndat,option_ccfft,work3,work2,comm_fft)
2958              else
2959                call ccfft(ngfft,-1,n1,n2,n3,n4,n5,n6,ndat,option_ccfft,fofr,work1,comm_fft)
2960              end if
2961            end if
2962 
2963            ! load of work1
2964            if ((paral_kgb==1) .and.  ( istwf_k > 1 )) work1(:,:,:,:)=zero
2965 
2966            if (paral_kgb==1) then
2967              if ( istwf_k > 1 ) then
2968                do idat=1,ndat
2969                  do i2=1,n2
2970                    if( me_fft == fftn2_distrib(i2) ) then
2971                      i2_local = ffti2_local(i2) + nd2proc*(idat-1)
2972                      do i3=1,n3
2973                        do i1=1,n1
2974                          work1(1,i1,i3,i2+n5*(idat-1))= work2(1,i1,i3,i2_local)
2975                          work1(2,i1,i3,i2+n5*(idat-1))= work2(2,i1,i3,i2_local)
2976                        end do
2977                      end do
2978                    end if
2979                  end do
2980                end do
2981              end if
2982 
2983            else
2984              if (nproc_fft /=1 .or. ndat /= 1 ) then
2985                do idat=1,ndat
2986                  do i2=1,n2
2987                    do i3=1,n3
2988                      do i1=1,n1
2989                        work1(1,i1,i3,i2+n5*(idat-1))=work2(1,i1,i3,i2+nd2proc*(idat-1))
2990                        work1(2,i1,i3,i2+n5*(idat-1))=work2(2,i1,i3,i2+nd2proc*(idat-1))
2991                      end do
2992                    end do
2993                  end do
2994                end do
2995              end if
2996            end if
2997            ABI_DEALLOCATE(work3)
2998            if ((paral_kgb==1) .and.  ( istwf_k > 1 )) then
2999              call timab(48,1,tsec)
3000              call xmpi_sum(work1,comm_fft,ier)
3001              call timab(48,2,tsec)
3002            end if
3003          end if
3004 
3005        else
3006          ! Call SG routine, with zero padding
3007          call sg_fftpad(fftcache,mgfft,n1,n2,n3,n4,n5,n6,ndat,gboundout,-1,fofr,work1)
3008        end if
3009 
3010        xnorm = one/dble(nfftot)
3011 
3012        if (fftalga/=4) then
3013          call cg_box2gsph(n1,n2,n3,n4,n5,n6,ndat,npwout,kg_kout,work1,fofgout, rscal=xnorm)
3014        else
3015          ! if fftalga==4
3016          if ((paral_kgb==1) .and. ( istwf_k == 1 )) then
3017 !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i2_local)
3018            do idat=1,ndat
3019              do ig=1,npwout
3020                i1=kg_kout(1,ig); if(i1<0)i1=i1+n1 ; i1=i1+1
3021                i2=kg_kout(2,ig); if(i2<0)i2=i2+n2 ; i2=i2+1
3022                i3=kg_kout(3,ig); if(i3<0)i3=i3+n3 ; i3=i3+1
3023                i2_local = ffti2_local(i2) + nd2proc*(idat-1)
3024                fofgout(1,ig+npwout*(idat-1))= work2(1,i1,i3,i2_local)*xnorm
3025                fofgout(2,ig+npwout*(idat-1))= work2(2,i1,i3,i2_local)*xnorm
3026              end do
3027            end do
3028            ABI_DEALLOCATE(work2)
3029          else
3030 !$OMP PARALLEL DO PRIVATE(i1,i2,i3)
3031            do idat=1,ndat
3032              do ig=1,npwout
3033                i1=kg_kout(1,ig); if(i1<0)i1=i1+n1; i1=i1+1
3034                i2=kg_kout(2,ig); if(i2<0)i2=i2+n2; i2=i2+1
3035                i3=kg_kout(3,ig); if(i3<0)i3=i3+n3; i3=i3+1
3036                fofgout(1,ig+npwout*(idat-1))=work1(1,i1,i3,i2+n5*(idat-1))*xnorm
3037                fofgout(2,ig+npwout*(idat-1))=work1(2,i1,i3,i2+n5*(idat-1))*xnorm
3038              end do
3039            end do
3040          end if
3041        end if ! fftalga
3042      end if ! if option==2 or 3
3043 
3044      if (allocated(work1))  then
3045        ABI_DEALLOCATE(work1)
3046      end if
3047    end if
3048 
3049    ! Here, call more specialized 3-dimensional fft
3050    ! (zero padding as well as maximize cache reuse) based on S Goedecker routines.
3051    ! Specially tuned for cache architectures.
3052    if (fftalga==FFT_SG .and. fftalgc==2 .and. option/=3) then
3053      call sg_fftrisc(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,&
3054 &     istwf_k,kg_kin,kg_kout,mgfft,ndat,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_i)
3055    end if
3056 
3057    ! Here, call new FFT from S Goedecker, also sophisticated specialized 3-dimensional fft
3058    ! (zero padding as well as maximize cache reuse)
3059    if (fftalga==FFT_SG2002 .and. fftalgc/=0) then
3060      ! The args are not the same as fourwf, but might be
3061      call fourwf_mpi(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,&
3062 &     istwf_k,kg_kin,kg_kout,me_g0,mgfft,ngfft,mpi_enreg%distribfft,n1,n2,n3,npwin,npwout,&
3063 &     n4,n5,n6,ndat,option,weight_r,weight_i,comm_fft)
3064    end if
3065 
3066    if (allocated(work4))  then
3067      ABI_DEALLOCATE(work4)
3068    end if
3069    if (allocated(work2))  then
3070      ABI_DEALLOCATE(work2)
3071    end if
3072 
3073  end select
3074 
3075 ! Accumulate timing
3076  100 continue
3077  call timab(840+tim_fourwf,2,tsec)
3078 
3079 end subroutine fourwf

ABINIT/m_fft [ Modules ]

[ Top ] [ Modules ]

NAME

  m_fft

FUNCTION

  This module provides driver routines for sequential FFTs (OpenMP threads are supported).
  It also defines generic interfaces for single or double precision arrays.

COPYRIGHT

 Copyright (C) 2009-2018 ABINIT group (MG, MM, GZ, MT, MF, XG)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

PARENTS

CHILDREN

SOURCE

 22 #if defined HAVE_CONFIG_H
 23 #include "config.h"
 24 #endif
 25 
 26 #include "abi_common.h"
 27 
 28 MODULE m_fft
 29 
 30  use defs_basis
 31  use m_abicore
 32  use m_errors
 33  use m_xomp
 34  use m_xmpi
 35  use m_cplxtools
 36  use m_cgtools
 37  use m_sgfft
 38  use m_sg2002
 39  use m_fftw3
 40  use m_dfti
 41  use iso_c_binding
 42 #if defined HAVE_MPI2
 43  use mpi
 44 #endif
 45 
 46  use defs_abitypes,   only : MPI_type
 47  use defs_fftdata,    only : mg
 48  use m_time,          only : cwtime, timab
 49  use m_numeric_tools, only : r2c
 50  use m_fstrings,      only : sjoin, itoa
 51  use m_geometry,      only : metric
 52  use m_hide_blas,     only : xscal
 53  use m_fftcore,       only : get_cache_kb, kpgsph, get_kg, sphere_fft, sphere_fft1, sphere, change_istwfk,&
 54 &                            fftalg_info, fftalg_has_mpi, print_ngfft, getng, sphereboundary
 55  use m_mpinfo,        only : destroy_mpi_enreg, ptabs_fourdp, ptabs_fourwf, initmpi_seq
 56  use m_distribfft,    only : distribfft_type, init_distribfft, destroy_distribfft
 57 
 58  implicit none
 59 
 60  private
 61 
 62 #if defined HAVE_MPI1
 63  include 'mpif.h'
 64 #endif
 65 
 66  public :: fftbox_execute       ! Driver for FFTs on the full box (complex-to-complex version, operated on complex arrays)
 67  public :: fft_ug               ! Driver for zero-padded FFTs u(g) --> u(r)
 68  public :: fft_ur               ! Driver for zero-padded FFTs u(r) --> u(g)
 69  public :: fftpad               ! Driver for (low-level) zero-padded FFTs, note that fft_ug is the preferred interface.
 70  public :: fft_poisson          ! Solve the poisson equation in G-space starting from n(r).
 71  public :: fourdp_6d            ! Calculate a 6-dimensional Fast Fourier Transform
 72  public :: fftpac               ! Copy to change the stride of a three-dimension array for more efficient FFT.
 73  public :: indirect_parallel_Fourier
 74 
 75  public :: fft_use_lib_threads
 76  public :: fft_allow_ialltoall  ! Allow the use of non-blocking IALLOTOALL in MPI-FFTs algorithms
 77  public :: zerosym              ! Symmetrize an array on the FFT grid by vanishing some term on the boundaries.
 78 
 79 ! Main entry points.
 80  public :: fourdp
 81  public :: fourwf
 82 
 83 ! Driver routines for MPI version.
 84  public :: fourdp_mpi           ! MPI FFT of densities/potentials on the full box.
 85  public :: fourwf_mpi           ! specialized MPI-FFT for wavefunctions.
 86  !public :: fftmpi_u
 87 
 88  interface fftbox_execute
 89    module procedure fftbox_execute_ip_spc
 90    module procedure fftbox_execute_ip_dpc
 91    module procedure fftbox_execute_op_spc
 92    module procedure fftbox_execute_op_dpc
 93  end interface fftbox_execute
 94 
 95  interface fft_ug
 96    !module procedure fft_ug_dp   TODO
 97    module procedure fft_ug_spc
 98    module procedure fft_ug_dpc
 99  end interface fft_ug
100  !public :: fft_ug_spc, fft_ug_dpc
101 
102  interface fft_ur
103    !module procedure fft_ur_dp      TODO
104    module procedure fft_ur_spc
105    module procedure fft_ur_dpc
106  end interface fft_ur
107  !public :: fft_ur_spc, fft_ur_dpc
108 
109  interface fftpad
110    !module procedure fftpad_dp
111    module procedure fftpad_spc
112    module procedure fftpad_dpc
113  end interface fftpad

m_fft/fft_allow_ialltoall [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

  fft_allow_ialltoall

FUNCTION

  Allow the use of non-blocking IALLOTOALL in MPI-FFTs algorithms
  Mainly used for profiling purposes.

PARENTS

      m_argparse

CHILDREN

      fourwf_mpi

SOURCE

182 subroutine fft_allow_ialltoall(bool)
183 
184 
185 !This section has been created automatically by the script Abilint (TD).
186 !Do not modify the following lines by hand.
187 #undef ABI_FUNC
188 #define ABI_FUNC 'fft_allow_ialltoall'
189 !End of the abilint section
190 
191  implicit none
192 
193 !Arguments ------------------------------------
194  logical,intent(in) :: bool
195 
196 ! *************************************************************************
197 
198  ALLOW_IALLTOALL = bool
199 #ifndef HAVE_MPI_IALLTOALL
200  ALLOW_IALLTOALL = .False.
201 #endif
202 
203 end subroutine fft_allow_ialltoall

m_fft/fft_poisson [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fft_poisson

FUNCTION

  Driver routine to solve the Poisson equation in G-space given the density, n(r),
  in real space of the FFT box.

INPUTS

 ngfft(18)=Info on the 3D FFT.
 cplex=1 if fofr is real, 2 if fofr is complex
 nx,ny,nz=Number of FFT points along the three directions.
 ldx,ldy,ldz=Leading dimension of the array nr and vg.
 ndat = Number of densities
 vg(nx*ny*nz)=Potential in reciprocal space.

SIDE EFFECTS

 nr(cplex*ldx*ldy*ldz*ndat)
    input: n(r) (real or complex)
    output: the hartree potential in real space

NOTES

   vg is given on the FFT mesh instead of the augmented mesh [ldx,ldy,ldz]
   in order to simplify the interface with the other routines operating of vg

PARENTS

CHILDREN

      fourwf_mpi

SOURCE

1184 subroutine fft_poisson(ngfft,cplex,nx,ny,nz,ldx,ldy,ldz,ndat,vg,nr)
1185 
1186 
1187 !This section has been created automatically by the script Abilint (TD).
1188 !Do not modify the following lines by hand.
1189 #undef ABI_FUNC
1190 #define ABI_FUNC 'fft_poisson'
1191 !End of the abilint section
1192 
1193  implicit none
1194 
1195 !Arguments ------------------------------------
1196 !scalars
1197  integer,intent(in) :: cplex,nx,ny,nz,ldx,ldy,ldz,ndat
1198  integer,intent(in) :: ngfft(18)
1199 !arrays
1200  real(dp),intent(inout) :: nr(cplex*ldx*ldy*ldz*ndat)
1201  real(dp),intent(in) :: vg(nx*ny*nz)
1202 
1203 !Local variables-------------------------------
1204 !scalars
1205  integer :: fftalga,fftcache
1206 
1207 ! *************************************************************************
1208 
1209  fftalga = ngfft(7)/100; fftcache = ngfft(8)
1210 
1211  select case (fftalga)
1212  case (FFT_SG, FFT_SG2002)
1213    ! Note: according to my tests fftalg 1xx is always faster than 4xx in sequential
1214    ! hence it does not make sense to provide a fallback for fftalg 4xx.
1215    ! Use external FFT libraries if you want to run at top-level speed.
1216    call sg_poisson(fftcache,cplex,nx,ny,nz,ldx,ldy,ldz,ndat,vg,nr)
1217 
1218  case (FFT_FFTW3)
1219    call fftw3_poisson(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,vg,nr)
1220 
1221  !case (FFT_DFTI)
1222  !  call dfti_poisson(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,vg,nr)
1223 
1224  case default
1225    MSG_BUG(sjoin("Wrong value for fftalga: ",itoa(fftalga)))
1226  end select
1227 
1228 end subroutine fft_poisson

m_fft/fft_ug_dp [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fft_ug_dp

FUNCTION

 Driver routine for G-->R transform of wavefunctions with zero-padded FFT.
 TARGET: double precision real arrays

INPUTS

 npw_k=number of plane waves for this k-point.
 nfft=Number of FFT points.
 nspinor=number of spinorial components
 ndat=Numer of wavefunctions to transform.
 mgfft=Max number of FFT divisions
 ngfft(18)=information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 istwfk=Option describing the storage of the wavefunction. (at present must be 1)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound_k_k(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.
 ug(npw_k*nspinor*ndat)=wavefunctions in reciprocal space

OUTPUT

  ur(nfft*nspinor*ndat)=wavefunctions in real space.

PARENTS

      m_fft

CHILDREN

      fourwf_mpi

SOURCE

592 subroutine fft_ug_dp(npw_k,nfft,nspinor,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,ug,ur)
593 
594 
595 !This section has been created automatically by the script Abilint (TD).
596 !Do not modify the following lines by hand.
597 #undef ABI_FUNC
598 #define ABI_FUNC 'fft_ug_dp'
599 !End of the abilint section
600 
601  implicit none
602 
603 !Arguments ------------------------------------
604 !scalars
605  integer,intent(in) :: npw_k,nfft,nspinor,istwf_k,mgfft,ndat
606 !arrays
607  integer,intent(in) :: ngfft(18),gbound_k(2*mgfft+8,2),kg_k(3,npw_k)
608  real(dp),intent(in) :: ug(2*npw_k*nspinor*ndat)
609  real(dp),intent(out) :: ur(2*nfft*nspinor*ndat)
610 
611 ! *************************************************************************
612 
613  MSG_ERROR("This is a Stub")
614 !#include "fftug_driver.finc"
615 
616 end subroutine fft_ug_dp

m_fft/fft_ug_dpc [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fft_ug_dpc

FUNCTION

 Driver routine for G-->R transform of wavefunctions with zero-padded FFT.
 TARGET: double precision arrays

INPUTS

 npw_k=number of plane waves for this k-point.
 nfft=Number of FFT points.
 nspinor=number of spinorial components
 ndat=Numer of wavefunctions to transform.
 mgfft=Max number of FFT divisions
 ngfft(18)=information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 istwfk=Option describing the storage of the wavefunction. (at present must be 1)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound_k_k(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.
 ug(npw_k*nspinor*ndat)=wavefunctions in reciprocal space

OUTPUT

  ur(nfft*nspinor*ndat)=wavefunctions in real space.

PARENTS

CHILDREN

      fourwf_mpi

SOURCE

711 subroutine fft_ug_dpc(npw_k,nfft,nspinor,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,ug,ur)
712 
713 
714 !This section has been created automatically by the script Abilint (TD).
715 !Do not modify the following lines by hand.
716 #undef ABI_FUNC
717 #define ABI_FUNC 'fft_ug_dpc'
718 !End of the abilint section
719 
720  implicit none
721 
722 !Arguments ------------------------------------
723 !scalars
724  integer,intent(in) :: npw_k,nfft,nspinor,istwf_k,mgfft,ndat
725 !arrays
726  integer,intent(in) :: ngfft(18),gbound_k(2*mgfft+8,2),kg_k(3,npw_k)
727  complex(dpc),intent(in) :: ug(npw_k*nspinor*ndat)
728  complex(dpc),intent(out) :: ur(nfft*nspinor*ndat)
729 
730 ! *************************************************************************
731 
732 #include "fftug_driver.finc"
733 
734 end subroutine fft_ug_dpc

m_fft/fft_ug_spc [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fft_ug_spc

FUNCTION

 Driver routine for G-->R transform of wavefunctions with zero-padded FFT.
 TARGET: single precision arrays

INPUTS

 npw_k=number of plane waves for this k-point.
 nfft=Number of FFT points.
 nspinor=number of spinorial components
 ndat=Numer of wavefunctions to transform.
 mgfft=Max number of FFT divisions
 ngfft(18)=information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 istwfk=Option describing the storage of the wavefunction. (at present must be 1)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound_k_k(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.
 ug(npw_k*nspinor*ndat)=wavefunctions in reciprocal space

OUTPUT

  ur(nfft*nspinor*ndat)=wavefunctions in real space.

PARENTS

CHILDREN

      fourwf_mpi

SOURCE

653 subroutine fft_ug_spc(npw_k,nfft,nspinor,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,ug,ur)
654 
655 
656 !This section has been created automatically by the script Abilint (TD).
657 !Do not modify the following lines by hand.
658 #undef ABI_FUNC
659 #define ABI_FUNC 'fft_ug_spc'
660 !End of the abilint section
661 
662  implicit none
663 
664 !Arguments ------------------------------------
665 !scalars
666  integer,intent(in) :: npw_k,nfft,nspinor,istwf_k,mgfft,ndat
667 !arrays
668  integer,intent(in) :: ngfft(18),gbound_k(2*mgfft+8,2),kg_k(3,npw_k)
669  complex(spc),intent(in) :: ug(npw_k*nspinor*ndat)
670  complex(spc),intent(out) :: ur(nfft*nspinor*ndat)
671 
672 ! *************************************************************************
673 
674 #include "fftug_driver.finc"
675 
676 end subroutine fft_ug_spc

m_fft/fft_ur_dp [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fft_ur_dp

FUNCTION

 Compute ndat zero-padded FFTs from R- to G-space .
 Mainly used for the transform of wavefunctions.
 TARGET: dp real arrays

INPUTS

 npw_k=number of plane waves for this k-point.
 nfft=Number of FFT points.
 nspinor=number of spinorial components
 ndat=Number of wavefunctions to transform.
 mgfft=Max number of FFT divisions
 ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 istwfk=Option describing the storage of the wavefunction. (at present must be 1)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound_k(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.

SIDE EFFECTS

  ur(nfft*nspinor*ndat)=In input: wavefunctions in real space
                        Destroyed in output. Do not use ur anymore!

OUTPUT

  ug(npw_k*nspinor*ndat)=wavefunctions in reciprocal space given on the G-sphere.

PARENTS

      m_fft

CHILDREN

      fourwf_mpi

SOURCE

775 subroutine fft_ur_dp(npw_k,nfft,nspinor,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,ur,ug)
776 
777 
778 !This section has been created automatically by the script Abilint (TD).
779 !Do not modify the following lines by hand.
780 #undef ABI_FUNC
781 #define ABI_FUNC 'fft_ur_dp'
782 !End of the abilint section
783 
784  implicit none
785 
786 !Arguments ------------------------------------
787 !scalars
788  integer,intent(in) :: npw_k,nfft,nspinor,ndat,istwf_k,mgfft
789 !arrays
790  integer,intent(in) :: ngfft(18),gbound_k(2*mgfft+8,2)
791  integer,intent(in) :: kg_k(3,npw_k)
792  real(dp),intent(inout) :: ur(2*nfft*nspinor*ndat)
793  real(dp),intent(out) :: ug(2*npw_k*nspinor*ndat)
794 
795 ! *************************************************************************
796 
797  MSG_ERROR("This is a Stub")
798 
799 !#include "fftur_driver.finc"
800 
801 end subroutine fft_ur_dp

m_fft/fft_ur_dpc [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fft_ur_dpc

FUNCTION

 Compute ndat zero-padded FFTs from R- to G-space .
 Mainly used for the transform of wavefunctions.
 TARGET: dpc complex arrays

INPUTS

 npw_k=number of plane waves for this k-point.
 nfft=Number of FFT points.
 nspinor=number of spinorial components
 ndat=Number of wavefunctions to transform.
 mgfft=Max number of FFT divisions
 ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 istwfk=Option describing the storage of the wavefunction. (at present must be 1)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound_k(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.

SIDE EFFECTS

  ur(nfft*nspinor*ndat)=In input: wavefunctions in real space
                        Destroyed in output. Do not use ur anymore!

OUTPUT

  ug(npw_k*nspinor*ndat)=wavefunctions in reciprocal space given on the G-sphere.

PARENTS

CHILDREN

      fourwf_mpi

SOURCE

904 subroutine fft_ur_dpc(npw_k,nfft,nspinor,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,ur,ug)
905 
906 
907 !This section has been created automatically by the script Abilint (TD).
908 !Do not modify the following lines by hand.
909 #undef ABI_FUNC
910 #define ABI_FUNC 'fft_ur_dpc'
911 !End of the abilint section
912 
913  implicit none
914 
915 !Arguments ------------------------------------
916 !scalars
917  integer,intent(in) :: npw_k,nfft,nspinor,ndat,istwf_k,mgfft
918 !arrays
919  integer,intent(in) :: ngfft(18),gbound_k(2*mgfft+8,2)
920  integer,intent(in) :: kg_k(3,npw_k)
921  complex(dpc),intent(inout) :: ur(nfft*nspinor*ndat)
922  complex(dpc),intent(out) :: ug(npw_k*nspinor*ndat)
923 
924 ! *************************************************************************
925 
926 #include "fftur_driver.finc"
927 
928 end subroutine fft_ur_dpc

m_fft/fft_ur_spc [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fft_ur_spc

FUNCTION

 Compute ndat zero-padded FFTs from R- to G-space .
 Mainly used for the transform of wavefunctions.
 TARGET: spc complex arrays

INPUTS

 npw_k=number of plane waves for this k-point.
 nfft=Number of FFT points.
 nspinor=number of spinorial components
 ndat=Number of wavefunctions to transform.
 mgfft=Max number of FFT divisions
 ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 istwfk=Option describing the storage of the wavefunction. (at present must be 1)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound_k(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.

SIDE EFFECTS

  ur(nfft*nspinor*ndat)=In input: wavefunctions in real space
                        Destroyed in output. Do not use ur anymore!

OUTPUT

  ug(npw_k*nspinor*ndat)=wavefunctions in reciprocal space given on the G-sphere.

PARENTS

CHILDREN

      fourwf_mpi

SOURCE

841 subroutine fft_ur_spc(npw_k,nfft,nspinor,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,ur,ug)
842 
843 
844 !This section has been created automatically by the script Abilint (TD).
845 !Do not modify the following lines by hand.
846 #undef ABI_FUNC
847 #define ABI_FUNC 'fft_ur_spc'
848 !End of the abilint section
849 
850  implicit none
851 
852 !Arguments ------------------------------------
853 !scalars
854  integer,intent(in) :: npw_k,nfft,nspinor,ndat,istwf_k,mgfft
855 !arrays
856  integer,intent(in) :: ngfft(18),gbound_k(2*mgfft+8,2)
857  integer,intent(in) :: kg_k(3,npw_k)
858  complex(spc),intent(inout) :: ur(nfft*nspinor*ndat)
859  complex(spc),intent(out) :: ug(npw_k*nspinor*ndat)
860 
861 ! *************************************************************************
862 
863 #include "fftur_driver.finc"
864 
865 end subroutine fft_ur_spc

m_fft/fft_use_lib_threads [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fft_use_lib_threads

FUNCTION

INPUTS

OUTPUT

PARENTS

      fftprof

CHILDREN

      fourwf_mpi

SOURCE

1251 subroutine fft_use_lib_threads(logvar)
1252 
1253 
1254 !This section has been created automatically by the script Abilint (TD).
1255 !Do not modify the following lines by hand.
1256 #undef ABI_FUNC
1257 #define ABI_FUNC 'fft_use_lib_threads'
1258 !End of the abilint section
1259 
1260  implicit none
1261 
1262 !Arguments ------------------------------------
1263 !scalars
1264  logical,intent(in) :: logvar
1265 
1266 ! *************************************************************************
1267 
1268  call dfti_use_lib_threads(logvar)
1269  call fftw3_use_lib_threads(logvar)
1270 
1271 end subroutine fft_use_lib_threads

m_fft/fftbox_execute_ip_dpc [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

  fftbox_execute_ip_dpc

FUNCTION

  In-place FFT transform of complex arrays
  Call (FFTW3|DFTI) routines if available, otherwise fallback to SG routines
  TARGET: dpc arrays

INPUTS

  plan<fftbox_plan3_t>=Structure with the parameters defining the transform.

SIDE EFFECTS

  ff(plan%ldxyz*plan%ndat) =
    In input: the data to transform.
    Changed in output, filled with the FFT results.

PARENTS

CHILDREN

      fourwf_mpi

SOURCE

434 subroutine fftbox_execute_ip_dpc(plan,ff)
435 
436 
437 !This section has been created automatically by the script Abilint (TD).
438 !Do not modify the following lines by hand.
439 #undef ABI_FUNC
440 #define ABI_FUNC 'fftbox_execute_ip_dpc'
441 !End of the abilint section
442 
443  implicit none
444 
445 !Arguments ------------------------------------
446 !scalars
447  type(fftbox_plan3_t),intent(in) :: plan
448 !arrays
449  complex(dpc),intent(inout) :: ff(plan%ldxyz*plan%ndat)
450 
451 ! *************************************************************************
452 
453 #include "fftbox_ip_driver.finc"
454 
455 end subroutine fftbox_execute_ip_dpc

m_fft/fftbox_execute_ip_spc [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

  fftbox_execute_ip_spc

FUNCTION

  In-place FFT transform of complex array.
  Call (FFTW3|DFTI) routines if available, otherwise we fallback to SG routines
  TARGET: spc arrays

INPUTS

  plan<fftbox_plan3_t>=Structure with the parameters defining the transform.

SIDE EFFECTS

  ff(plan%ldxyz*plan%ndat) =
    In input: the data to transform.
    Changed in output, filled with the FFT results.

PARENTS

CHILDREN

      fourwf_mpi

SOURCE

384 subroutine fftbox_execute_ip_spc(plan,ff)
385 
386 
387 !This section has been created automatically by the script Abilint (TD).
388 !Do not modify the following lines by hand.
389 #undef ABI_FUNC
390 #define ABI_FUNC 'fftbox_execute_ip_spc'
391 !End of the abilint section
392 
393  implicit none
394 
395 !Arguments ------------------------------------
396 !scalars
397  type(fftbox_plan3_t),intent(in) :: plan
398 !arrays
399  complex(spc),intent(inout) :: ff(plan%ldxyz*plan%ndat)
400 
401 ! *************************************************************************
402 
403 #include "fftbox_ip_driver.finc"
404 
405 end subroutine fftbox_execute_ip_spc

m_fft/fftbox_execute_op_dpc [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

  fftbox_execute_op_dpc

FUNCTION

  Out-of-place FFT transform of complex arrays.
  Call (FFTW3|DFTI) routines if available, otherwise fallback to SG routines
  TARGET: dpc arrays

INPUTS

 plan<fftbox_plan3_t>=Structure with the parameters defining the transform.
 ff(plan%ldxyz*plan%ndat)=The input array to be transformed.

OUTPUT

  gg(plan%ldxyz*plan%ndat)= The FFT results.

PARENTS

CHILDREN

      fourwf_mpi

SOURCE

533 subroutine fftbox_execute_op_dpc(plan,ff,gg)
534 
535 
536 !This section has been created automatically by the script Abilint (TD).
537 !Do not modify the following lines by hand.
538 #undef ABI_FUNC
539 #define ABI_FUNC 'fftbox_execute_op_dpc'
540 !End of the abilint section
541 
542  implicit none
543 
544 !Arguments ------------------------------------
545 !scalars
546  type(fftbox_plan3_t),intent(in) :: plan
547 !arrays
548  complex(dpc),intent(in) :: ff(plan%ldxyz*plan%ndat)
549  complex(dpc),intent(inout) :: gg(plan%ldxyz*plan%ndat)
550 
551 ! *************************************************************************
552 
553 #include "fftbox_op_driver.finc"
554 
555 end subroutine fftbox_execute_op_dpc

m_fft/fftbox_execute_op_spc [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

  fftbox_execute_op_spc

FUNCTION

  Out-of-place FFT transform of complex arrays.
  Call (FFTW3|DFTI) routines if available, otherwise fallback to SG routines
  TARGET: spc arrays

INPUTS

 plan<fftbox_plan3_t>=Structure with the parameters defining the transform.
 ff(plan%ldxyz*plan%ndat)=The input array to be transformed.

OUTPUT

  gg(plan%ldxyz*plan%ndat)= The FFT results.

PARENTS

CHILDREN

      fourwf_mpi

SOURCE

483 subroutine fftbox_execute_op_spc(plan,ff,gg)
484 
485 
486 !This section has been created automatically by the script Abilint (TD).
487 !Do not modify the following lines by hand.
488 #undef ABI_FUNC
489 #define ABI_FUNC 'fftbox_execute_op_spc'
490 !End of the abilint section
491 
492  implicit none
493 
494 !Arguments ------------------------------------
495 !scalars
496  type(fftbox_plan3_t),intent(in) :: plan
497 !arrays
498  complex(spc),intent(in) :: ff(plan%ldxyz*plan%ndat)
499  complex(spc),intent(inout) :: gg(plan%ldxyz*plan%ndat)
500 
501 ! *************************************************************************
502 
503 #include "fftbox_op_driver.finc"
504 
505 end subroutine fftbox_execute_op_spc

m_fft/fftbox_mpi_utests [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

  fftbox_mpi_utests

FUNCTION

 Driver routine for unit tests of the MPI FFT routines.

INPUTS

 fftalg =fftalg input variable.
 cplex=1 for r2c, 2 for c2c transforms.
 ndat = Number of transform to execute
 nthreads = Number of OpenMP threads.
 comm_fft=MPI communicator for the FFT
 [unit]=Output Unit number (DEFAULT std_out)

OUTPUT

  nfailed=number of failures.

PARENTS

      fftprof

SOURCE

1805 function fftbox_mpi_utests(fftalg,cplex,ndat,nthreads,comm_fft,unit) result(nfailed)
1806 
1807 
1808 !This section has been created automatically by the script Abilint (TD).
1809 !Do not modify the following lines by hand.
1810 #undef ABI_FUNC
1811 #define ABI_FUNC 'fftbox_mpi_utests'
1812 !End of the abilint section
1813 
1814  implicit none
1815 
1816 !Arguments -----------------------------------
1817 !scalars
1818  integer,intent(in) :: fftalg,cplex,ndat,nthreads,comm_fft
1819  integer,optional,intent(in) :: unit
1820  integer :: nfailed
1821 
1822 !Local variables-------------------------------
1823 !scalars
1824  integer,parameter :: NSETS=6
1825  integer :: ierr,old_nthreads,ount,iset,mpierr,nfft,me_fft
1826  integer :: nproc_fft,fftalga,fftalgc,n1,n2,n3,n4,n5,n6
1827  real(dp),parameter :: ATOL_DP=tol12
1828  real(dp) :: max_abserr
1829  real(dp) ::  ctime,wtime,gflops
1830  character(len=500) :: msg,info,library,cplex_mode,padding_mode
1831  type(distribfft_type),target :: fftabs
1832 !arrays
1833  integer :: pars(6,NSETS),ngfft(18)
1834  integer, ABI_CONTIGUOUS pointer :: fftn2_distrib(:),ffti2_local(:)
1835  integer, ABI_CONTIGUOUS pointer :: fftn3_distrib(:),ffti3_local(:)
1836  real(dp),allocatable :: fofg(:,:),fofr(:),fofr_copy(:)
1837 
1838 ! *************************************************************************
1839 
1840  ount = std_out; if (PRESENT(unit)) ount = unit
1841  nfailed = 0
1842 
1843  if (nthreads>0) then
1844    old_nthreads = xomp_get_max_threads()
1845    call xomp_set_num_threads(nthreads)
1846  end if
1847 
1848  ! These values must be compatible with all the FFT routines.
1849  ! SG library is the most restrictive (only powers of 2,3,5).
1850  pars = RESHAPE( [    &
1851 &  12,18,15,12,18,15, &
1852 &  12,18,15,13,19,16, &
1853 &  12,18,15,13,19,15, &
1854 &  12,18,15,12,18,16, &
1855 &  12,18,15,13,18,15, &
1856 &  12,18,15,15,21,18  &
1857 & ], [6,NSETS] )
1858 
1859  fftalga=fftalg/100; fftalgc=mod(fftalg,10)
1860 
1861  nproc_fft = xmpi_comm_size(comm_fft); me_fft = xmpi_comm_rank(comm_fft)
1862 
1863  call fftalg_info(fftalg,library,cplex_mode,padding_mode)
1864 
1865  !do iset=1,SIZE(pars,DIM=2)
1866  do iset=1,1
1867    n1=pars(1,iset); n2=pars(2,iset); n3=pars(3,iset)
1868    ! For the time being, ngfft(2) and ngfft(3) must be multiple of nproc_fft
1869    n2 = n2 * nproc_fft; n3 = n3 * nproc_fft
1870    !n4=pars(4,iset); n5=pars(5,iset); n6=pars(6,iset)
1871    n4=n1; n5=n2; n6=n3
1872 
1873    !n4=n1+1; n5=n2; n6=n3
1874    !n4=n1; n5=n2+1; n6=n3
1875    !n4=n1; n5=n2; n6=n3+1
1876    !write(std_out,*)pars(1:6,iset)
1877    !cplex = 1
1878 
1879    !call getng(boxcutmin,ecut,gmet,kpt,me_fft,mgfft,nfft,ngfft,nproc_fft,nsym,paral_fft,symrel,&
1880    !&ngfftc,use_gpu_cuda,unit) ! optional
1881 
1882    ! Init ngfft
1883    ! TODO Propagate more info via ngfft, define helper functions, write routine to get fftcache
1884    ngfft = 0
1885    ngfft(1:7) = [n1,n2,n3,n4,n5,n6,fftalg]
1886    ngfft(8)= get_cache_kb()        ! cache
1887    ngfft(9)=1                      ! paral_fft_
1888    ngfft(10)=nproc_fft             ! nproc_fft
1889    ngfft(11)=xmpi_comm_rank(comm_fft)  ! me_fft
1890    ngfft(12)=ngfft(2)/nproc_fft    ! n2proc
1891    ngfft(13)=ngfft(3)/nproc_fft    ! n3proc
1892 
1893    !call print_ngfft(ngfft,"ngfft for MPI-fourdp",unit=std_out,mode_paral="COLL",prtvol=0)
1894 
1895    ! Allocate arrays, fill fofr with random numbers and keep a copy.
1896    nfft = (n1 * n2 * n3) / nproc_fft
1897    ABI_MALLOC(fofg, (2,nfft*ndat))
1898    ABI_MALLOC(fofr, (cplex*nfft*ndat))
1899    ABI_MALLOC(fofr_copy, (cplex*nfft*ndat))
1900 
1901    call RANDOM_NUMBER(fofr)
1902    fofr_copy = fofr
1903 
1904    call init_distribfft(fftabs,"c",nproc_fft,n2,n3)
1905    fftn2_distrib => fftabs%tab_fftdp2_distrib
1906    ffti2_local => fftabs%tab_fftdp2_local
1907    fftn3_distrib => fftabs%tab_fftdp3_distrib
1908    ffti3_local => fftabs%tab_fftdp3_local
1909 
1910    call cwtime(ctime,wtime,gflops,"start")
1911 
1912    select case (fftalga)
1913 
1914    case (FFT_SG2002)
1915      call sg2002_mpifourdp(cplex,nfft,ngfft,ndat,-1,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
1916      call sg2002_mpifourdp(cplex,nfft,ngfft,ndat,+1,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
1917 
1918    case (FFT_FFTW3)
1919      call fftw3_mpifourdp(cplex,nfft,ngfft,ndat,-1,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
1920      call fftw3_mpifourdp(cplex,nfft,ngfft,ndat,+1,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
1921 
1922    !case (FFT_DFTI)
1923    !  call dfti_mpifourdp(cplex,nfft,ngfft,ndat,-1,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
1924    !  call dfti_mpifourdp(cplex,nfft,ngfft,ndat,+1,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
1925 
1926    case default
1927      write(msg,'(a,i0,a)')"fftalg: ",fftalg," does not support MPI-FFT"
1928      MSG_BUG(msg)
1929    end select
1930 
1931    call cwtime(ctime,wtime,gflops,"stop")
1932    if (me_fft == 0) then
1933      !write(std_out,'(a,i0,2(a,f10.4))')"fftalg: ",fftalg,", cpu_time: ",ctime,", wall_time: ",wtime
1934    end if
1935 
1936    ! Check if F^{-1} F = I within ATOL_DP
1937    ierr = COUNT(ABS(fofr - fofr_copy) > ATOL_DP)
1938    call xmpi_sum(ierr,comm_fft,mpierr)
1939    nfailed = nfailed + ierr
1940 
1941    if (cplex == 1) info = sjoin(library,"r2c --> c2r :")
1942    if (cplex == 2) info = sjoin(library,"c2c :")
1943 
1944    if (ierr /= 0) then
1945      ! Compute the maximum of the absolute error.
1946      max_abserr = MAXVAL(ABS(fofr - fofr_copy))
1947      call xmpi_max(max_abserr,comm_fft,mpierr)
1948      write(msg,"(a,es9.2,a)")" FAILED (max_abserr = ",max_abserr,")"
1949    else
1950      write(msg,"(a)")" OK"
1951    end if
1952    call wrtout(ount,sjoin(info,msg),"COLL")
1953 
1954    call destroy_distribfft(fftabs)
1955 
1956    ABI_FREE(fofg)
1957    ABI_FREE(fofr_copy)
1958    ABI_FREE(fofr)
1959  end do
1960 
1961  if (nthreads > 0) then
1962    call xomp_set_num_threads(old_nthreads)
1963  end if
1964 
1965 end function fftbox_mpi_utests

m_fft/fftbox_plan3 [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

  fftbox_plan3

FUNCTION

  Basic interface to construct fftbox_plan3_t

INPUTS

PARENTS

      debug_tools

CHILDREN

      fourwf_mpi

SOURCE

225 subroutine fftbox_plan3(plan,dims,fftalg,isign)
226 
227 
228 !This section has been created automatically by the script Abilint (TD).
229 !Do not modify the following lines by hand.
230 #undef ABI_FUNC
231 #define ABI_FUNC 'fftbox_plan3'
232 !End of the abilint section
233 
234  implicit none
235 
236 !Arguments ------------------------------------
237 !scalars
238  integer,intent(in) :: fftalg,isign
239  type(fftbox_plan3_t),intent(out) :: plan
240 !arrays
241  integer,intent(in) :: dims(3)
242 
243 !Local variables-------------------------------
244 !scalars
245  integer,parameter :: ndat1=1,fftcache0=0
246 
247 ! *************************************************************************
248 
249  call fftbox_plan3_init(plan,ndat1,dims,dims,fftalg,fftcache0,isign)
250 
251 end subroutine fftbox_plan3

m_fft/fftbox_plan3_init [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

  fftbox_plan3_init

FUNCTION

  Initialize the plan with the options passed to the fttbox_ routines.
  Low-level constructor.

INPUTS

  See fftbox_plan3_t for the meaning of the different arguments.

PARENTS

      m_fft

CHILDREN

      fourwf_mpi

SOURCE

324 subroutine fftbox_plan3_init(plan,ndat,dims,embed,fftalg,fftcache,isign)
325 
326 
327 !This section has been created automatically by the script Abilint (TD).
328 !Do not modify the following lines by hand.
329 #undef ABI_FUNC
330 #define ABI_FUNC 'fftbox_plan3_init'
331 !End of the abilint section
332 
333  implicit none
334 
335 !Arguments ------------------------------------
336 !scalars
337  integer,intent(in) :: ndat,fftalg,fftcache,isign !,nthreads
338  type(fftbox_plan3_t),intent(out) :: plan
339 !arrays
340  integer,intent(in) :: dims(3),embed(3)
341 
342 ! *************************************************************************
343 
344  plan%ndat     = ndat
345  plan%dims     = dims                       !ngfft(1:3)
346  plan%embed    = embed                      !ngfft(4:6)
347  plan%fftalg   = fftalg                     !ngfft(7)
348  if (fftcache > 0) plan%fftcache = fftcache !ngfft(8)
349  plan%isign    = isign
350  !plan%nthreads = nthreads
351 
352  plan%nfft     = PRODUCT(plan%dims)
353  plan%ldxyz    = PRODUCT(plan%embed)
354 
355 end subroutine fftbox_plan3_init

m_fft/fftbox_plan3_many [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

  fftbox_plan3_many

FUNCTION

  Advanced interface to construct fftbox_plan3_t

INPUTS

  See fftbox_plan3_t

PARENTS

      m_fft,m_fft_prof,m_oscillators,m_pawpwij,m_shirley

CHILDREN

      fourwf_mpi

SOURCE

274 subroutine fftbox_plan3_many(plan,ndat,dims,embed,fftalg,isign)
275 
276 
277 !This section has been created automatically by the script Abilint (TD).
278 !Do not modify the following lines by hand.
279 #undef ABI_FUNC
280 #define ABI_FUNC 'fftbox_plan3_many'
281 !End of the abilint section
282 
283  implicit none
284 
285 !Arguments ------------------------------------
286 !scalars
287  integer,intent(in) :: fftalg,isign,ndat
288  type(fftbox_plan3_t),intent(out) :: plan
289 !arrays
290  integer,intent(in) :: dims(3),embed(3)
291 
292 !Local variables-------------------------------
293 !scalars
294  integer,parameter :: fftcache0=0
295 
296 ! *************************************************************************
297 
298  call fftbox_plan3_init(plan,ndat,dims,embed,fftalg,fftcache0,isign)
299 
300 end subroutine fftbox_plan3_many

m_fft/fftbox_plan3_t [ Types ]

[ Top ] [ m_fft ] [ Types ]

NAME

 fftbox_plan3_t

FUNCTION

  Stores the options passed to the fftbox_ routines.

SOURCE

127  type,public :: fftbox_plan3_t
128    private
129    integer :: fftalg=112      ! Flag defining the library to call.
130    integer :: fftcache=16     ! Size of the cache (kB). Only used in SG routines.
131    integer :: isign=0         ! Sign of the exponential in the FFT
132    integer :: nfft            ! Total number of points in the FFT box.
133    integer :: ldxyz=-1        ! Physical dimension of the array to transform
134    integer :: ndat=-1         ! Number of FFTs associated to the plan.
135    !integer :: nthreads=-1    ! The number of threads associated to the plan.
136    integer :: dims(3)=-1      ! The number of FFT divisions.
137    integer :: embed(3)=-1     ! Leading dimensions of the input,output arrays.
138  end type fftbox_plan3_t
139 
140  public :: fftbox_plan3       ! Basic interface to create the plan.
141  public :: fftbox_plan3_many  ! Advanced interface
142  public :: fftbox_plan3_init  ! Low-level constructor

m_fft/fftbox_utests [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

  fftbox_utests

FUNCTION

 Driver routine for base unitary tests of the FFT routines (sequential version).

INPUTS

 fftalg =fftalg input variable.
 ndat = Number of transform to execute
 nthreads = Number of OpenMP threads.
 [unit]=Output Unit number (DEFAULT std_out)

OUTPUT

  nfailed=number of failures.

PARENTS

      fftprof

SOURCE

1297 function fftbox_utests(fftalg,ndat,nthreads,unit) result(nfailed)
1298 
1299 
1300 !This section has been created automatically by the script Abilint (TD).
1301 !Do not modify the following lines by hand.
1302 #undef ABI_FUNC
1303 #define ABI_FUNC 'fftbox_utests'
1304 !End of the abilint section
1305 
1306  implicit none
1307 
1308 !Arguments -----------------------------------
1309 !scalars
1310  integer,intent(in) :: fftalg,ndat,nthreads
1311  integer,optional,intent(in) :: unit
1312  integer :: nfailed
1313 !arrays
1314 
1315 !Local variables-------------------------------
1316 !scalars
1317  integer,parameter :: NSETS=6
1318  integer :: ifft,ierr,ldxyz,old_nthreads,ount,cplex
1319  integer :: iset,nx,ny,nz,ldx,ldy,ldz,fftalga,fftalgc
1320  !integer :: ix,iy,iz,padat,dat
1321  real(dp),parameter :: ATOL_SP=tol6,ATOL_DP=tol12 ! Tolerances on the absolute error
1322  real(dp) :: max_abserr
1323  character(len=500) :: msg,info,library,cplex_mode,padding_mode
1324  type(fftbox_plan3_t) :: bw_plan,fw_plan
1325 !arrays
1326  integer :: pars(6,NSETS)
1327  real(dp) :: crand(2)
1328  real(dp),allocatable :: fofg(:),fofr_ref(:),fofr(:)
1329  complex(dpc),allocatable :: ff(:),ff_ref(:),gg(:)
1330  complex(spc),allocatable :: ffsp(:),ff_refsp(:),ggsp(:)
1331 
1332 ! *************************************************************************
1333 
1334  nfailed = 0
1335 
1336  ount = std_out; if (PRESENT(unit)) ount = unit
1337 
1338  if (nthreads>0) then
1339    old_nthreads = xomp_get_max_threads()
1340    call xomp_set_num_threads(nthreads)
1341  end if
1342 
1343  ! These values must be compatible with all the FFT routines.
1344  ! SG library is the most restrictive (only powers of 2,3,5).
1345  pars = RESHAPE( (/   &
1346 &  12,18,15,12,18,15, &
1347 &  12,18,15,13,19,16, &
1348 &  12,18,15,13,19,15, &
1349 &  12,18,15,12,18,16, &
1350 &  12,18,15,13,18,15, &
1351 &  12,18,15,15,21,18  &
1352 & /), (/6,NSETS/) )
1353 
1354  fftalga=fftalg/100; fftalgc=mod(fftalg,10)
1355 
1356  call fftalg_info(fftalg,library,cplex_mode,padding_mode)
1357 
1358  do iset=1,SIZE(pars,DIM=2)
1359    nx =pars(1,iset);  ny=pars(2,iset);  nz=pars(3,iset)
1360    ldx=pars(4,iset); ldy=pars(5,iset); ldz=pars(6,iset)
1361    !write(std_out,*)pars(1:6,iset)
1362 
1363    ! Create the FFT plans.
1364    call fftbox_plan3_many(bw_plan,ndat,pars(1,iset),pars(4,iset),fftalg,+1)
1365    call fftbox_plan3_many(fw_plan,ndat,pars(1,iset),pars(4,iset),fftalg,-1)
1366 
1367    ldxyz = ldx*ldy*ldz
1368    !
1369    ! ================================================================
1370    ! === TEST the single precision version of dfti_c2c_* routines ===
1371    ! ================================================================
1372    ABI_MALLOC(ff_refsp, (ldxyz*ndat))
1373    ABI_MALLOC(ffsp,     (ldxyz*ndat))
1374    ABI_MALLOC(ggsp,     (ldxyz*ndat))
1375 
1376    do ifft=1,ldxyz*ndat
1377      call RANDOM_NUMBER(crand)
1378      ff_refsp(ifft) = DCMPLX(crand(1), crand(2))
1379    end do
1380 
1381    ! Set the augmentation region to zero, because FFTW3 wrappers
1382    ! use zscal to scale the results.
1383    call cplx_setaug_zero_spc(nx,ny,nz,ldx,ldy,ldz,ndat,ff_refsp)
1384    ffsp = ff_refsp
1385 
1386    ! in-place version.
1387    call fftbox_execute(bw_plan,ffsp)
1388    call fftbox_execute(fw_plan,ffsp)
1389 
1390    ierr = COUNT(ABS(ffsp - ff_refsp) > ATOL_SP)
1391    nfailed = nfailed + ierr
1392 
1393    info = sjoin(library,"c2c_ip_spc :")
1394    if (ierr /= 0) then
1395      max_abserr = MAXVAL(ABS(ffsp - ff_refsp))
1396      write(msg,"(a,es9.2,a)")" FAILED (max_abserr = ",max_abserr,")"
1397    else
1398      write(msg,"(a)")" OK"
1399    end if
1400    call wrtout(ount,sjoin(info,msg),"COLL")
1401 
1402    ffsp = ff_refsp
1403    call fftbox_execute(bw_plan,ffsp,ggsp)
1404    call fftbox_execute(fw_plan,ggsp,ffsp)
1405 
1406    ierr = COUNT(ABS(ffsp - ff_refsp) > ATOL_SP)
1407    nfailed = nfailed + ierr
1408 
1409    info = sjoin(library,"c2c_op_spc :")
1410    if (ierr /= 0) then
1411      max_abserr = MAXVAL(ABS(ffsp - ff_refsp))
1412      write(msg,"(a,es9.2,a)")" FAILED (max_abserr = ",max_abserr,")"
1413    else
1414      write(msg,"(a)")" OK"
1415    end if
1416    call wrtout(ount,sjoin(info,msg),"COLL")
1417 
1418    ABI_FREE(ff_refsp)
1419    ABI_FREE(ffsp)
1420    ABI_FREE(ggsp)
1421 
1422    ! ================================================================
1423    ! === TEST the double precision version of dfti_c2c_* routines ===
1424    ! ================================================================
1425    ABI_MALLOC(ff_ref, (ldxyz*ndat))
1426    ABI_MALLOC(ff,     (ldxyz*ndat))
1427    ABI_MALLOC(gg,     (ldxyz*ndat))
1428 
1429    do ifft=1,ldxyz*ndat
1430      call RANDOM_NUMBER(crand)
1431      ff_ref(ifft) = DCMPLX(crand(1), crand(2))
1432    end do
1433 
1434    ! Set the augmentation region to zero, because FFTW3 wrappers
1435    ! use zscal to scale the results.
1436    call cplx_setaug_zero_dpc(nx,ny,nz,ldx,ldy,ldz,ndat,ff_ref)
1437    ff = ff_ref
1438 
1439    call fftbox_execute(bw_plan,ff)
1440    call fftbox_execute(fw_plan,ff)
1441 
1442    ierr = COUNT(ABS(ff - ff_ref) > ATOL_DP)
1443    nfailed = nfailed + ierr
1444 
1445    info = sjoin(library,"c2c_ip_dpc :")
1446    if (ierr /= 0) then
1447      max_abserr = MAXVAL(ABS(ff - ff_ref))
1448      write(msg,"(a,es9.2,a)")" FAILED (max_abserr = ",max_abserr,")"
1449    else
1450      write(msg,"(a)")" OK"
1451    end if
1452    call wrtout(ount,sjoin(info,msg),"COLL")
1453 
1454    ff = ff_ref
1455    call fftbox_execute(bw_plan,ff,gg)
1456    call fftbox_execute(fw_plan,gg,ff)
1457 
1458    ierr = COUNT(ABS(ff - ff_ref) > ATOL_DP)
1459    nfailed = nfailed + ierr
1460 
1461    info = sjoin(library,"c2c_op_dpc :")
1462    if (ierr /= 0) then
1463      max_abserr = MAXVAL(ABS(ff - ff_ref))
1464      write(msg,"(a,es9.2,a)")" FAILED (max_abserr = ",max_abserr,")"
1465    else
1466      write(msg,"(a)")" OK"
1467    end if
1468    call wrtout(ount,sjoin(info,msg),"COLL")
1469 
1470    ABI_FREE(ff_ref)
1471    ABI_FREE(ff)
1472    ABI_FREE(gg)
1473 
1474    do cplex=1,2
1475      !
1476      !if (fftalga == FFT_FFTW3 .and. ndat > 1 .and. cplex==1) then
1477      !  call wrtout(ount,"Warning: fourdp with FFTW3-wrappers, cplex=2 and ndat>1, might crash if MKL is used","COLL")
1478      !  !CYCLE
1479      !end if
1480 
1481      ABI_MALLOC(fofg,     (2*ldxyz*ndat))
1482      ABI_MALLOC(fofr_ref, (cplex*ldxyz*ndat))
1483      ABI_MALLOC(fofr,     (cplex*ldxyz*ndat))
1484 
1485      call RANDOM_NUMBER(fofr_ref)
1486      !call cg_setaug_zero(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,fofr_ref)
1487      fofr = fofr_ref
1488 
1489      SELECT CASE (fftalga)
1490      CASE (FFT_FFTW3)
1491        call fftw3_seqfourdp(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,-1,fofg,fofr)
1492        call fftw3_seqfourdp(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,+1,fofg,fofr)
1493 
1494      CASE (FFT_DFTI)
1495        call dfti_seqfourdp(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,-1,fofg,fofr)
1496        call dfti_seqfourdp(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,+1,fofg,fofr)
1497 
1498      CASE DEFAULT
1499        ! TODO
1500        continue
1501      END SELECT
1502 
1503      !call cg_setaug_zero(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,fofr)
1504 
1505      ierr = COUNT(ABS(fofr - fofr_ref) > ATOL_DP)
1506      nfailed = nfailed + ierr
1507 
1508      write(info,"(a,i1,a)")sjoin(library,"fourdp (cplex "),cplex,") :"
1509      if (ierr /= 0) then
1510        max_abserr = MAXVAL(ABS(fofr - fofr_ref))
1511        write(msg,"(a,es9.2,a)")" FAILED (max_abserr = ",max_abserr,")"
1512      else
1513        write(msg,"(a)")" OK"
1514      end if
1515      call wrtout(ount,sjoin(info,msg),"COLL")
1516 
1517      ABI_FREE(fofg)
1518      ABI_FREE(fofr_ref)
1519      ABI_FREE(fofr)
1520    end do
1521    !
1522  end do
1523 
1524  if (nthreads > 0) then
1525    call xomp_set_num_threads(old_nthreads)
1526  end if
1527 
1528 end function fftbox_utests

m_fft/fftmpi_u [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fftmpi_u

FUNCTION

  **** DO NOT USE IT ****
  Wrapper function used to perform the MPI FFT of ndat wavefunctions.
  Mainly used for unit tests and prototyping. Better integration
  will be provided afterwards.

INPUTS

  See fourwf_mpi

OUTPUT

  See fourwf_mpi

PARENTS

      m_fft

CHILDREN

      fourwf_mpi

SOURCE

4265 ! The final interface should be:
4266 !subroutine fftmpi_u(npw_k,n4,n5,n6,nspinor,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,fftabs,isign,fofg,fofr)
4267 
4268 subroutine fftmpi_u(npw_k,n4,n5,n6,ndat,mgfft,ngfft,&
4269 &  istwf_k,gbound_k,kg_k,me_g0,distribfft,isign,fofg,fofr,comm_fft,cplexwf)
4270 
4271 
4272 !This section has been created automatically by the script Abilint (TD).
4273 !Do not modify the following lines by hand.
4274 #undef ABI_FUNC
4275 #define ABI_FUNC 'fftmpi_u'
4276 !End of the abilint section
4277 
4278  implicit none
4279 
4280 !Arguments ------------------------------------
4281 !scalars
4282  integer,intent(in) :: istwf_k,mgfft,n4,n5,n6,ndat,npw_k
4283  integer,intent(in) :: isign,comm_fft,me_g0,cplexwf
4284  type(distribfft_type),intent(in) :: distribfft
4285 !arrays
4286  integer,intent(in) :: gbound_k(2*mgfft+8,2),ngfft(18)
4287  integer,intent(in) :: kg_k(3,npw_k)
4288  real(dp),intent(inout) :: fofg(2,npw_k*ndat),fofr(2,n4,n5,n6*ndat)
4289 
4290 !Local variables-------------------------------
4291 !scalars
4292  integer,parameter :: npw0=0,cplex0=0
4293  integer :: n1,n2,n3
4294  real(dp),parameter :: weight_r=one,weight_i=one
4295 !arrays
4296  integer :: dummy_kg(0,0)
4297  real(dp) :: dummy_denpot(0,0,0),dummy_fofg(0,0)
4298 
4299 ! *************************************************************************
4300 
4301  n1 = ngfft(1); n2 = ngfft(2); n3 = ngfft(3)
4302 
4303  if (isign == 1) then
4304    ! option 0 G --> R
4305    call fourwf_mpi(cplex0,dummy_denpot,fofg,dummy_fofg,fofr,&
4306 &    gbound_k,gbound_k,istwf_k,kg_k,dummy_kg,me_g0,mgfft,ngfft,distribfft,n1,n2,n3,&
4307 &    npw_k,npw0,n4,n5,n6,ndat,0,weight_r,weight_i,comm_fft,cplexwf=cplexwf)
4308  else
4309    ! option 3 R --> G
4310    call fourwf_mpi(cplex0,dummy_denpot,dummy_fofg,fofg,fofr,&
4311 &    gbound_k,gbound_k,istwf_k,dummy_kg,kg_k,me_g0,mgfft,ngfft,distribfft,n1,n2,n3,&
4312 &    npw0,npw_k,n4,n5,n6,ndat,3,weight_r,weight_i,comm_fft,cplexwf=cplexwf)
4313  end if
4314 
4315 end subroutine fftmpi_u

m_fft/fftpac [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fftpac

FUNCTION

 Allow for data copying to modify the stride (dimensioning) of a three-
 dimensional array, for more efficient three dimensional fft.
 NOTE that the arrays are in REAL space.

 Note that arrays aa and bb may be the same array (start at the same address).
 The array aa also incorporate a spin variable.
 MG FIXME: THIS IS **VERY BAD** AS FORTRAN DOES NOT ALLOW FOR ALIASING

INPUTS

  ispden=actual spin-density of interest
  nspden=number of spin-density components
  n1,n2,n3=actual data dimensions, dimensions of complex array a
  nd1,nd2,nd3=array dimensions of (larger) array b
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  option= see description of side effects

OUTPUT

  (see side effects)

SIDE EFFECTS

  aa & bb arrays are treated as input or output depending on option:
  option=1  aa(n1*n2*n3,ispden) <-- bb(nd1,nd2,nd3) real case
  option=2  aa(n1*n2*n3,ispden) --> bb(nd1,nd2,nd3) real case
  option=10 aa(n1*n2*n3,ispden) <-- bb(nd1,nd2,nd3) complex case like option 1 real part
  option=11 aa(n1*n2*n3,ispden) <-- bb(nd1,nd2,nd3) complex case like option 1 imag part

PARENTS

      dfpt_mkrho,dfpt_nstpaw,dfpt_rhofermi,dfpt_vtorho,dfptnl_resp,energy
      fock_getghc,getgh1c,gwls_hamiltonian,ks_ddiago,m_epjdos,m_io_kss,mkrho
      suscep_stat,vtorho

CHILDREN

      ptabs_fourdp

SOURCE

4676 subroutine fftpac(ispden,mpi_enreg,nspden,n1,n2,n3,nd1,nd2,nd3,ngfft,aa,bb,option)
4677 
4678 
4679 !This section has been created automatically by the script Abilint (TD).
4680 !Do not modify the following lines by hand.
4681 #undef ABI_FUNC
4682 #define ABI_FUNC 'fftpac'
4683 !End of the abilint section
4684 
4685  implicit none
4686 
4687 !Arguments ------------------------------------
4688 !scalars
4689  integer,intent(in) :: ispden,n1,n2,n3,nd1,nd2,nd3,nspden,option
4690  type(mpi_type),intent(in) :: mpi_enreg
4691 !arrays
4692  integer,intent(in) :: ngfft(18)
4693  real(dp),intent(inout) :: aa(n1*n2*n3/ngfft(10),nspden),bb(nd1,nd2,nd3)
4694 
4695 !Local variables-------------------------------
4696 !scalars
4697  integer :: i1,i2,i3,index,me_fft,nproc_fft
4698  character(len=500) :: message
4699  !arrays
4700  integer, ABI_CONTIGUOUS pointer :: fftn2_distrib(:),ffti2_local(:)
4701  integer, ABI_CONTIGUOUS pointer :: fftn3_distrib(:),ffti3_local(:)
4702 
4703 ! *************************************************************************
4704 
4705  me_fft=ngfft(11); nproc_fft=ngfft(10)
4706 
4707  if (option==1.or.option==2) then
4708    if (nd1<n1.or.nd2<n2.or.nd3<n3) then
4709      write(message,'(a,3i0,2a,3i0,a)')&
4710 &     'Each of nd1,nd2,nd3=',nd1,nd2,nd3,ch10,&
4711 &     'must be >=      n1, n2, n3 =',n1,n2,n3,'.'
4712      MSG_BUG(message)
4713    end if
4714  else
4715    if (2*nd1<n1.or.nd2<n2.or.nd3<n3) then
4716      write(message,'(a,3i0,2a,3i0,a)')&
4717 &     'Each of 2*nd1,nd2,nd3=',2*nd1,nd2,nd3,ch10,&
4718 &     'must be >= (n1, n2, n3) =',n1,n2,n3,'.'
4719      MSG_BUG(message)
4720    end if
4721  end if
4722 
4723  ! Get the distrib associated with this fft_grid
4724  call ptabs_fourdp(mpi_enreg,n2,n3,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local)
4725 
4726  if (option==1) then
4727    do i3=1,n3
4728      if (me_fft==fftn3_distrib(i3)) then
4729        do i2=1,n2
4730          do i1=1,n1
4731            aa(i1+n1*(i2-1+n2*(ffti3_local(i3)-1)),ispden)=bb(i1,i2,i3)
4732          end do
4733        end do
4734      end if
4735    end do
4736 
4737  else if (option==2) then
4738    !  Here we avoid corrupting the data in a while writing to b in the
4739    !  case in which a and b are same array.
4740    !  Also: replace "trash" data with 0 s to avoid floating point
4741    !  exceptions when this data is actually manipulated in fft.
4742    do i3=nd3,n3+1,-1
4743      do i2=nd2,1,-1
4744        do i1=nd1,1,-1
4745          bb(i1,i2,i3)=0.d0
4746        end do
4747      end do
4748    end do
4749    do i3=n3,1,-1
4750      if (me_fft==fftn3_distrib(i3)) then
4751        do i2=nd2,n2+1,-1
4752          do i1=nd1,1,-1
4753            bb(i1,i2,i3)=0.d0
4754          end do
4755        end do
4756        do i2=n2,1,-1
4757          do i1=nd1,n1+1,-1
4758            bb(i1,i2,i3)=0.d0
4759          end do
4760          do i1=n1,1,-1
4761            bb(i1,i2,i3)=aa(i1+n1*(i2-1+n2*(ffti3_local(i3) - 1)),ispden)
4762          end do
4763        end do
4764      end if
4765    end do
4766 !  MF
4767  else if (option==10 .or. option==11) then
4768    index=1
4769    if(option==11) index=2
4770    do i3=1,n3
4771      do i2=1,n2
4772        do i1=1,n1/2
4773          aa(index,ispden)=bb(i1,i2,i3)
4774          index=index+2
4775        end do
4776      end do
4777    end do
4778 !  MF
4779  else
4780    write(message,'(a,i0,a)')' Bad option =',option,'.'
4781    MSG_BUG(message)
4782  end if
4783 
4784 end subroutine fftpac

m_fft/fftpad_dpc [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

  fftpad_dpc

FUNCTION

  Driver routine used to transform COMPLEX arrays using 3D zero-padded FFTs.
  TARGET: DPC arrays

INPUTS

  ngfft(18)=Info on the 3D FFT.
  nx,ny,nz=Logical dimensions of the FFT mesh.
  ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
  ndat=Number of FFTs
  mgfft=MAX(nx,ny,nz), only used to dimension gbound
  isign=The sign of the transform.
  gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point. See sphereboundary for more info.

SIDE EFFECTS

  ff(ldx*ldy*ldz*ndat)=
    input: The array with the data to be transformed.
    output: The results of the FFT.

PARENTS

CHILDREN

      fourwf_mpi

SOURCE

1067 subroutine fftpad_dpc(ff,ngfft,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,gbound)
1068 
1069 
1070 !This section has been created automatically by the script Abilint (TD).
1071 !Do not modify the following lines by hand.
1072 #undef ABI_FUNC
1073 #define ABI_FUNC 'fftpad_dpc'
1074 !End of the abilint section
1075 
1076  implicit none
1077 
1078 !Arguments ------------------------------------
1079 !scalars
1080  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign
1081 !arrays
1082  integer,intent(in) :: ngfft(18),gbound(2*mgfft+8,2)
1083  complex(dpc),target,intent(inout) :: ff(ldx*ldy*ldz*ndat)
1084 
1085 !Local variables-------------------------------
1086 !scalars
1087  integer :: fftalg,fftalga,fftalgc,ncount
1088  integer :: ivz !vz_d
1089  character(len=500) :: msg
1090 !arrays
1091  real(dp),allocatable :: fofr(:,:,:,:,:)
1092  real(dp),allocatable :: fofrvz(:,:) !vz_d
1093  real(dp),ABI_CONTIGUOUS pointer :: fpt_ftarr(:,:,:,:,:)
1094 
1095 ! *************************************************************************
1096 
1097  fftalg=ngfft(7); fftalga=fftalg/100; fftalgc=MOD(fftalg,10)
1098 
1099  select case (fftalga)
1100 
1101  case (FFT_FFTW3)
1102    call fftw3_fftpad(ff,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,gbound)
1103 
1104  case (FFT_DFTI)
1105    call dfti_fftpad(ff,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,gbound)
1106 
1107  case (FFT_SG)
1108    ! Goedecker"s routines.
1109    ! TODO: sg_fftpad is not the fastest routine, here I should call sg_fftrisc but I need
1110    ! kg_kin that are not available in rho_tw_g, actually one should pass G-G0 due to
1111    ! the shift introduced by the umklapp.
1112    ncount = ldx*ldy*ldz*ndat
1113 
1114    ABI_MALLOC(fofr, (2,ldx,ldy,ldz,ndat))
1115 !  call ZCOPY(ncount,ff,1,fofr,1) !vz_d
1116 !  call DCOPY(2*ncount,ff,1,fofr,1)  ! MG
1117    ! alternatif of ZCOPY from vz
1118    ABI_ALLOCATE(fofrvz,(2,ncount))     !vz_d
1119    do ivz=1,ncount                !vz_d
1120       fofrvz(1,ivz)= real(ff(ivz))  !vz_d
1121       fofrvz(2,ivz)=aimag(ff(ivz))  !vz_d
1122    end do                         !vz_d
1123    call DCOPY(2*ncount,fofrvz,1,fofr,1) !vz_d
1124    ABI_DEALLOCATE(fofrvz)             !vz_d
1125 
1126    call C_F_pointer(C_loc(ff),fpt_ftarr, shape=(/2,ldx,ldy,ldz,ndat/))
1127 
1128    !  MT nov. 2012: with xlf, need to put explicit boundaries for the 4th dimension
1129    !  this looks like a compiler bug...
1130    !if (size(fpt_ftarr)==2*size(ff)) then
1131    call sg_fftpad(ngfft(8),mgfft,nx,ny,nz,ldx,ldy,ldz,ndat,gbound,isign,fofr,fpt_ftarr)
1132    !else
1133    !  call sg_fftpad(ngfft(8),mgfft,nx,ny,nz,ldx,ldy,ldz,ndat,gbound,isign,fofr,fpt_ftarr(:,:,:,1:ldz,dat))
1134    !end if
1135 
1136    ABI_FREE(fofr)
1137 
1138    if (isign==-1) then  ! Here there might be numerical exceptions due to the holes.
1139      call xscal(ncount,one/(nx*ny*nz),ff,1)
1140    end if
1141 
1142  case default
1143    write(msg,'(a,i0,a)')"fftalga = ", fftalga," not coded "
1144    MSG_ERROR(msg)
1145  end select
1146 
1147 end subroutine fftpad_dpc

m_fft/fftpad_spc [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

  fftpad_spc

FUNCTION

  Driver routine used to transform COMPLEX arrays using 3D zero-padded FFTs.
  TARGET: SPC arrays

INPUTS

  ngfft(18)=Info on the 3D FFT.
  nx,ny,nz=Logical dimensions of the FFT mesh.
  ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
  ndat=Number of FFTs
  mgfft=MAX(nx,ny,nz), only used to dimension gbound
  isign=The sign of the transform.
  gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point. See sphereboundary for more info.

SIDE EFFECTS

  ff(ldx*ldy*ldz*ndat)=
    input: The array with the data to be transformed.
    output: The results of the FFT.

PARENTS

CHILDREN

      fourwf_mpi

SOURCE

 962 subroutine fftpad_spc(ff,ngfft,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,gbound)
 963 
 964 
 965 !This section has been created automatically by the script Abilint (TD).
 966 !Do not modify the following lines by hand.
 967 #undef ABI_FUNC
 968 #define ABI_FUNC 'fftpad_spc'
 969 !End of the abilint section
 970 
 971  implicit none
 972 
 973 !Arguments ------------------------------------
 974 !scalars
 975  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign
 976 !arrays
 977  integer,intent(in) :: ngfft(18),gbound(2*mgfft+8,2)
 978  complex(spc),target,intent(inout) :: ff(ldx*ldy*ldz*ndat)
 979 
 980 !Local variables-------------------------------
 981 !scalars
 982  integer :: fftalg,fftalga,fftalgc,ncount,p
 983  character(len=500) :: msg
 984 !arrays
 985  real(dp),allocatable :: fofr(:,:),ftarr(:,:)
 986 
 987 ! *************************************************************************
 988 
 989  fftalg=ngfft(7); fftalga=fftalg/100; fftalgc=MOD(fftalg,10)
 990 
 991  select case (fftalga)
 992 
 993  case (FFT_FFTW3)
 994    call fftw3_fftpad(ff,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,gbound)
 995 
 996  case (FFT_DFTI)
 997    call dfti_fftpad(ff,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,gbound)
 998 
 999  case (FFT_SG)
1000    ! Goedecker"s routines.
1001    ! TODO: sg_fftpad is not the fastest routine, here I should call sg_fftrisc but I need
1002    ! kg_kin that are not available in rho_tw_g, actually one should pass G-G0 due to
1003    ! the shift introduced by the umklapp.
1004    ncount = ldx*ldy*ldz*ndat
1005 
1006    ABI_MALLOC(fofr,(2,ldx*ldy*ldz*ndat))
1007    ABI_MALLOC(ftarr,(2,ldx*ldy*ldz*ndat))
1008 
1009    do p=1,ldx*ldy*ldz*ndat
1010      fofr(1,p) = REAL(ff(p))
1011      fofr(2,p) = AIMAG(ff(p))
1012    end do
1013 
1014    call sg_fftpad(ngfft(8),mgfft,nx,ny,nz,ldx,ldy,ldz,ndat,gbound,isign,fofr,ftarr)
1015    !
1016    ! Copy the results.
1017    do p=1,ldx*ldy*ldz*ndat
1018      ff(p) = CMPLX(ftarr(1,p), ftarr(2,p))
1019    end do
1020 
1021    if (isign==-1) then  ! Here there might be numerical exceptions due to the holes.
1022      ff = ff/(nx*ny*nz)
1023    end if
1024 
1025    ABI_FREE(ftarr)
1026    ABI_FREE(fofr)
1027 
1028  case default
1029    write(msg,'(a,i0,a)')"fftalga = ", fftalga," not coded "
1030    MSG_ERROR(msg)
1031  end select
1032 
1033 end subroutine fftpad_spc

m_fft/fftu_mpi_utests [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fftu_mpi_utests

FUNCTION

 Unit tests for the FFTs of wavefunctions (MPI vesion).

INPUTS

OUTPUT

  nfailed=number of failed tests.

PARENTS

      fftprof

SOURCE

1987 function fftu_mpi_utests(fftalg,ecut,rprimd,ndat,nthreads,comm_fft,paral_kgb,unit) result(nfailed)
1988 
1989 
1990 !This section has been created automatically by the script Abilint (TD).
1991 !Do not modify the following lines by hand.
1992 #undef ABI_FUNC
1993 #define ABI_FUNC 'fftu_mpi_utests'
1994 !End of the abilint section
1995 
1996  implicit none
1997 
1998 !Arguments ------------------------------------
1999 !scalars
2000  integer,intent(in) :: fftalg,ndat,nthreads,comm_fft,paral_kgb
2001  integer :: nfailed
2002  integer,optional,intent(in) :: unit
2003  real(dp),intent(in) :: ecut
2004 !arrays
2005  real(dp),intent(in) :: rprimd(3,3)
2006 
2007 !Local variables-------------------------------
2008 !scalars
2009  integer,parameter :: nsym1=1,npw0=0,cplex_one=1,istwfk_one=1
2010  integer :: n1,n2,n3,idat,n4,n5,n6,ierr,npw_k,full_npw_k,istwf_npw_k,cplexwf
2011  integer :: mgfft,istwf_k,ikpt,old_nthreads,ount,isign,fftalga,fftalgc
2012  integer :: ig,i1,i2,i3,i3_glob,i3dat,nd3proc,i3_local,g0sender
2013  integer :: step,me_g0,me_fft,nproc_fft,mpierr,nfft,cplex
2014  real(dp),parameter :: boxcutmin2=two,ATOL_DP=tol12,RTOL_DP=tol3 ! Tolerances on the absolute and relative error
2015  real(dp),parameter :: weight_r=one,weight_i=one
2016  real(dp) :: max_abserr,max_relerr,ucvol,relerr,den,refden
2017  real(dp) ::  ctime,wtime,gflops
2018  character(len=500) :: msg,info,library,cplex_mode,padding_mode
2019  type(distribfft_type) :: fftabs
2020 !arrays
2021  integer :: symrel(3,3,nsym1),ngfft(18)
2022  integer,allocatable :: full_kg_k(:,:),istw_kg_k(:,:)
2023  integer,allocatable :: gbound_k(:,:),kg_k(:,:)
2024  real(dp) :: dummy_fofg(0,0) !dummy_denpot(0,0,0)
2025  real(dp) :: kpoint(3),kpoints(3,2)
2026  real(dp) :: gmet(3,3),gprimd(3,3),rmet(3,3)
2027  real(dp),allocatable :: fofg(:,:),ref_fofg(:,:),fofg_out(:,:),fofr(:,:,:,:)
2028  real(dp),allocatable :: density(:,:,:),pot(:,:,:),invpot(:,:,:)
2029  real(dp),allocatable :: full_fofg(:,:),istwf_fofg(:,:)
2030 
2031 ! *************************************************************************
2032 
2033  nfailed = 0
2034 
2035  ount = std_out; if (PRESENT(unit)) ount = unit
2036 
2037  if (nthreads > 0) then
2038    old_nthreads = xomp_get_max_threads()
2039    call xomp_set_num_threads(nthreads)
2040  end if
2041 
2042  if (.not. fftalg_has_mpi(fftalg)) then
2043    write(msg,'(a,i0,a)')"fftalg: ",fftalg," does not support MPI"
2044    MSG_ERROR(msg)
2045  end if
2046 
2047  nproc_fft = xmpi_comm_size(comm_fft); me_fft = xmpi_comm_rank(comm_fft)
2048 
2049  symrel = reshape([1,0,0,0,1,0,0,0,1],[3,3,nsym1])
2050  call metric(gmet,gprimd,-1,rmet,rprimd,ucvol)
2051 
2052  kpoints = RESHAPE( [ &
2053 &  0.1, 0.2, 0.3, &
2054 &  0.0, 0.0, 0.0 ], [3,2] )
2055 
2056  call fftalg_info(fftalg,library,cplex_mode,padding_mode)
2057 
2058  fftalga=fftalg/100; fftalgc=mod(fftalg,10)
2059 
2060  do ikpt=1,size(kpoints, dim=2)
2061    kpoint = kpoints(:,ikpt)
2062 
2063    ! Get full G-sphere (no MPI distribution, no time-reversal)
2064    call get_kg(kpoint,istwfk_one,ecut,gmet,full_npw_k,full_kg_k)
2065 
2066    ! Get full G-sphere (no MPI distribution, use time-reversal if possible)
2067    istwf_k = set_istwfk(kpoint)
2068    call get_kg(kpoint,istwf_k,ecut,gmet,istwf_npw_k,istw_kg_k)
2069 
2070    ! Get FFT box dims.
2071    ngfft = -1
2072    ngfft(7) = fftalg
2073    ngfft(8) = get_cache_kb()
2074 
2075    call getng(boxcutmin2,ecut,gmet,kpoint,me_fft,mgfft,nfft,ngfft,nproc_fft,nsym1,&
2076 &             paral_kgb,symrel,unit=dev_null)
2077 
2078    n1 = ngfft(1); n2 = ngfft(2); n3 = ngfft(3)
2079    ! Do not use augmentation.
2080    !ngfft(4:6) = ngfft(1:3)
2081    n4 = ngfft(4); n5 = ngfft(5); n6 = ngfft(6)
2082 
2083    call print_ngfft(ngfft,"ngfft for MPI-fourwf",unit=std_out,mode_paral="COLL",prtvol=0)
2084 
2085    ! Compute FFT distribution tables.
2086    call init_distribfft(fftabs,"c",nproc_fft,n2,n3)
2087 
2088    ! Set to 1 if this node owns G = 0.
2089    me_g0 = 0; if (fftabs%tab_fftwf2_distrib(1) == me_fft) me_g0 = 1
2090    g0sender = fftabs%tab_fftwf2_distrib(1)
2091 
2092    ! Allocate u(g) on the full sphere, initialize with random numbers.
2093    ! and broadcast full data to the other nodes.
2094    ABI_MALLOC(full_fofg, (2,full_npw_k*ndat))
2095 
2096    if (me_fft == g0sender) then
2097      if (istwf_k == 1) then
2098        call RANDOM_NUMBER(full_fofg)
2099        !full_fofg = one
2100 
2101      else if (istwf_k == 2) then
2102        ! Special treatment for real wavefunctions.
2103        ! Fill the irreducible G-vectors first so that we have the u(g) of a real u(r)
2104        ! Then get u(g) on the full G-sphere.
2105        ! TODO: Sequential version OK, SIGSEV if mpi_ncpus > 1!
2106        ABI_MALLOC(istwf_fofg, (2,istwf_npw_k*ndat))
2107        call RANDOM_NUMBER(istwf_fofg)
2108        ! Enforce real u in G-space.
2109        do idat=1,ndat
2110          istwf_fofg(2,1+(idat-1)*istwf_npw_k) = zero
2111        end do
2112 
2113        ! from istwfk 2 to 1.
2114        call change_istwfk(istwf_npw_k,istw_kg_k,istwf_k,full_npw_k,full_kg_k,istwfk_one,n1,n2,n3,ndat,istwf_fofg,full_fofg)
2115        ABI_FREE(istwf_fofg)
2116 
2117      else
2118        MSG_ERROR("istwf_k /= [1,2] not available in MPI-FFT mode")
2119      end if
2120    end if
2121 
2122    call xmpi_bcast(full_fofg,g0sender,comm_fft,ierr)
2123 
2124    ! Compute sphere boundaries for zero-padded FFTs
2125    ABI_MALLOC(gbound_k,(2*mgfft+8,2))
2126    call sphereboundary(gbound_k,istwfk_one,full_kg_k,mgfft,full_npw_k)
2127 
2128    ! Extract my G-vectors from full_kg_k and store them in kg_k.
2129    !write(std_out,*)"fftwf2_distrib",fftabs%tab_fftwf2_distrib
2130    do step=1,2
2131      if (step == 2) then
2132        ! Allocate my u(g) and my Gs.
2133        ! Set fofg to zero to bypass a bug with XLF!
2134        ABI_MALLOC(kg_k, (3, npw_k))
2135        ABI_CALLOC(fofg, (2,npw_k*ndat))
2136      end if
2137 
2138      npw_k = 0
2139      do ig=1,full_npw_k
2140        i1=full_kg_k(1,ig); if(i1<0) i1=i1+n1; i1=i1+1
2141        i2=full_kg_k(2,ig); if(i2<0) i2=i2+n2; i2=i2+1
2142        i3=full_kg_k(3,ig); if(i3<0) i3=i3+n3; i3=i3+1
2143        if (fftabs%tab_fftwf2_distrib(i2) == me_fft) then
2144          npw_k = npw_k + 1
2145          if (step == 2) then
2146            kg_k(:,npw_k) = full_kg_k(:,ig)
2147            fofg(:,npw_k) = full_fofg(:,ig)
2148          end if
2149        end if
2150      end do
2151    end do ! step
2152 
2153    ABI_FREE(istw_kg_k)
2154 
2155    !write(std_out,*)"dist",trim(itoa(me_fft)),fftabs%tab_fftdp3_distrib(:) !== me_fft)
2156    !write(std_out,*)"local",trim(itoa(me_fft)),fftabs%tab_fftdp3_local(:)
2157 
2158    ! Allocate my local portion of u(r), and keep a copy my u(g).
2159    ABI_MALLOC(fofr, (2,n4,n5,n6*ndat))
2160    ABI_MALLOC(ref_fofg, (2,npw_k*ndat))
2161    ref_fofg = fofg
2162 
2163    call cwtime(ctime,wtime,gflops,"start")
2164 
2165    ! ----------------------------------------------------------------
2166    ! Test the the backward and the forward transform of wavefunctions
2167    ! ----------------------------------------------------------------
2168    ! c2c or [c2r, r2c]
2169    cplexwf = 2; if (istwf_k==2) cplexwf = 1
2170    ! FIXME:
2171    ! There's a bug somewhere in the MPI routines if cplexwf == 1 is used and ndat > 1
2172    ! Not a serious problem at present since cplexwf==1 is never used in abinit.
2173    if (ndat>1) cplexwf = 2
2174    if (nproc_fft > 1) cplexwf = 2
2175    cplexwf = 2
2176    !cplexwf = 2; if (istwf_k==2) cplexwf = 1
2177 
2178    do isign = 1,-1,-2
2179      call fftmpi_u(npw_k,n4,n5,n6,ndat,mgfft,ngfft,&
2180 &      istwfk_one,gbound_k,kg_k,me_g0,fftabs,isign,fofg,fofr,comm_fft,cplexwf=cplexwf)
2181    end do
2182 
2183    ! The final interface should be:
2184    !subroutine fftmpi_u(npw_k,n4,n5,n6,nspinor,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,fftabs,isign,fofg,fofr)
2185 
2186    ! Parallel version (must support augmentation in forf(:,:,:,:), hence we have a different API wrt the seq case!
2187    !call fftmpi_ug(npw_k,n4,n5,n6,nspinor,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,fftabs,fofg,fofr)
2188    !call fftmpi_ur(npw_k,n4,n5,n6,nspinor,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,fftabs,fofr,fofgout)
2189 
2190    ! Seq version.
2191    !call fft_ug(npw_k,nxyz,nspinor,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,ugsp,ursp)
2192    !call fft_ur(npw_k,nxyz,nspinor,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,ursp,ugsp)
2193 
2194    call cwtime(ctime,wtime,gflops,"stop")
2195    if (me_fft == 0) then
2196      !write(std_out,'(a,i0,3(a,f10.4))')"fftalg: ",fftalg,", ecut: ",ecut,", cpu_time: ",ctime,", wall_time: ",wtime
2197    end if
2198 
2199    ! Check if F^{-1} F = I within ATOL_DP
2200    ierr = COUNT(ABS(fofg - ref_fofg) > ATOL_DP)
2201    call xmpi_sum(ierr,comm_fft,mpierr)
2202    nfailed = nfailed + ierr
2203 
2204    write(info,"(a,i1,a)")sjoin(library,"fftu_mpi, istwfk "),istwf_k," :"
2205 
2206    if (ierr /= 0) then
2207      max_abserr = MAXVAL(ABS(fofg - ref_fofg))
2208      call xmpi_max(max_abserr,comm_fft,mpierr)
2209      write(msg,"(a,es9.2,a)")" FAILED (max_abserr = ",max_abserr,")"
2210    else
2211      write(msg,"(a)")" OK"
2212    end if
2213    call wrtout(ount,sjoin(info,msg),"COLL")
2214 
2215    ! -------------------------------------------
2216    ! Test the accumulation of density (option 1)
2217    ! -------------------------------------------
2218    ABI_CALLOC(density, (cplex_one*n4,n5,n6))
2219    !fofg = one
2220 
2221    ! Accumulate density. Does not work if cplexwf==1
2222    call fourwf_mpi(cplex_one,density,fofg,dummy_fofg,fofr,&
2223 &    gbound_k,gbound_k,istwfk_one,kg_k,kg_k,me_g0,mgfft,ngfft,fftabs,n1,n2,n3,&
2224 &    npw_k,npw_k,n4,n5,n6,ndat,1,weight_r,weight_i,comm_fft,cplexwf=cplexwf)
2225 
2226 !   Recompute u(r)
2227 !   call fourwf_mpi(cplex_one,density,fofg,dummy_fofg,fofr,&
2228 !&    gbound_k,gbound_k,istwf_one,kg_k,kg_k,me_g0,mgfft,ngfft,fftabs,n1,n2,n3,&
2229 !&    npw_k,npw_k,n4,n5,n6,ndat,0,weight_r,weight_i,comm_fft,cplexwf=cplexwf)
2230 !   call xmpi_sum(density,comm_fft,ierr)
2231 !   if (me_fft == 0) write(std_out,*)"sum density", sum(density)
2232 
2233    !do i3=1,n6
2234    !  write(110+me_fft,*)i3,density(:,:,i3)
2235    !end do
2236 
2237    max_relerr = zero
2238 
2239    ! FIXME: This is wrong if ndat > 1
2240    ! Must unify the treatment of fofr and rhor on the augmented mesh (n4,n5,n6)
2241    ! back_wf and for_wf return a MPI distributed arrays but fofr is allocated with
2242    ! the global dimensions.
2243    nd3proc=(n3-1)/nproc_fft+1
2244    do i3=1,n3
2245    !do i3=1,nd3proc
2246      if( me_fft == fftabs%tab_fftdp3_distrib(i3) ) then
2247        i3_local = fftabs%tab_fftdp3_local(i3) !+ nd3proc*(idat-1)
2248        !i3_local = i3
2249        i3_glob = i3
2250        !i3_glob = i3_local
2251        !i3_glob = i3 + nd3proc * me_fft
2252        !i3dat = i3  + n6 * (idat-1)
2253        do i2=1,n2
2254          do i1=1,n1
2255             den = density(i1,i2,i3_glob)
2256             refden = zero
2257             do idat=1,ndat
2258               i3dat = i3_local + n6 * (idat-1)
2259               refden = refden + weight_r*fofr(1,i1,i2,i3dat)**2+ weight_i*fofr(2,i1,i2,i3dat)**2
2260             end do
2261             relerr = abs(refden - den)
2262             if (abs(refden) < tol12) refden = tol12
2263             relerr = relerr / abs(refden)
2264             max_relerr = max(max_relerr, relerr)
2265             !if (relerr > RTOL_DP) write(std_out,*)trim(itoa(me_fft)),i1,i2,i3,idat,den,refden
2266          end do
2267        end do
2268      end if
2269    end do
2270 
2271    call xmpi_max(max_relerr,comm_fft,mpierr)
2272 
2273    write(info,"(a,i1,a)")sjoin(library,"accrho_mpi, istwfk "),istwf_k," :"
2274    if (max_relerr > RTOL_DP) then
2275      write(msg,"(a,es9.2,a)")" FAILED (max_relerr = ",max_relerr,")"
2276    else
2277      write(msg,"(a)")" OK"
2278    end if
2279    call wrtout(ount,sjoin(info,msg),"COLL")
2280 
2281    ABI_FREE(density)
2282 
2283    ! -------------------------------------------
2284    ! Test the application of the local potential
2285    ! -------------------------------------------
2286    cplex = 1
2287    ABI_MALLOC(pot, (cplex*n4,n5,n6))
2288    ABI_MALLOC(invpot, (cplex*n4,n5,n6))
2289 
2290    if (me_fft == g0sender) then
2291      !pot = fofr(1,:,:,:)
2292      !call RANDOM_NUMBER(pot)
2293      !where (abs(pot) < tol4) pot = tol4
2294      ! Simplest potential ever (G=0 to reduce errors due to aliasing)
2295      pot = four
2296      invpot = one/pot
2297    end if
2298 
2299    call xmpi_bcast(pot,g0sender,comm_fft,ierr)
2300    call xmpi_bcast(invpot,g0sender,comm_fft,ierr)
2301 
2302    ABI_MALLOC(fofg_out, (2,npw_k*ndat))
2303 
2304    ! Compute fofg_out = <G|pot(r)|fofg>
2305    call fourwf_mpi(cplex,pot,fofg,fofg_out,fofr,&
2306 &    gbound_k,gbound_k,istwfk_one,kg_k,kg_k,me_g0,mgfft,ngfft,fftabs,n1,n2,n3,&
2307 &    npw_k,npw_k,n4,n5,n6,ndat,2,weight_r,weight_i,comm_fft,cplexwf=cplexwf)
2308 
2309    ! Compute fofg = <G|1/pot(r)|fofg_out>
2310    call fourwf_mpi(cplex,invpot,fofg_out,fofg,fofr,&
2311 &    gbound_k,gbound_k,istwfk_one,kg_k,kg_k,me_g0,mgfft,ngfft,fftabs,n1,n2,n3,&
2312 &    npw_k,npw_k,n4,n5,n6,ndat,2,weight_r,weight_i,comm_fft,cplexwf=cplexwf)
2313 
2314    ! Check if we got the initial u(g) within ATOL_DP
2315    ierr = COUNT(ABS(fofg - ref_fofg) > ATOL_DP)
2316    call xmpi_sum(ierr,comm_fft,mpierr)
2317    nfailed = nfailed + ierr
2318 
2319    write(info,"(a,i1,a)")sjoin(library,"<G|vloc|u>, istwfk "),istwf_k," :"
2320 
2321    if (ierr /= 0) then
2322      max_abserr = MAXVAL(ABS(fofg - ref_fofg))
2323      call xmpi_max(max_abserr,comm_fft,mpierr)
2324      write(msg,"(a,es9.2,a)")" FAILED (max_abserr = ",max_abserr,")"
2325      !if (me_fft == 0) write(std_out,*)(fofg(:,ig),ref_fofg(:,ig), ig=1,npw_k*ndat)
2326    else
2327      write(msg,"(a)")" OK"
2328    end if
2329    call wrtout(ount,sjoin(info,msg),"COLL")
2330 
2331    ABI_FREE(fofg_out)
2332 
2333    ABI_FREE(pot)
2334    ABI_FREE(invpot)
2335 
2336    ABI_FREE(kg_k)
2337    ABI_FREE(gbound_k)
2338 
2339    ABI_FREE(fofg)
2340    ABI_FREE(ref_fofg)
2341    ABI_FREE(fofr)
2342 
2343    ABI_FREE(full_kg_k)
2344    ABI_FREE(full_fofg)
2345 
2346    call destroy_distribfft(fftabs)
2347  end do
2348 
2349  if (nthreads > 0) then
2350    call xomp_set_num_threads(old_nthreads)
2351  end if
2352 
2353 end function fftu_mpi_utests

m_fft/fftu_utests [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fftu_utests

FUNCTION

 Unit tests for the FFTs of wavefunctions (sequential version).

INPUTS

OUTPUT

  nfailed=number of failed tests.

PARENTS

      fftprof

SOURCE

1550 function fftu_utests(ecut,ngfft,rprimd,ndat,nthreads,unit) result(nfailed)
1551 
1552 
1553 !This section has been created automatically by the script Abilint (TD).
1554 !Do not modify the following lines by hand.
1555 #undef ABI_FUNC
1556 #define ABI_FUNC 'fftu_utests'
1557 !End of the abilint section
1558 
1559  implicit none
1560 
1561 !Arguments ------------------------------------
1562 !scalars
1563  integer,intent(in) :: ndat,nthreads
1564  integer :: nfailed
1565  integer,optional,intent(in) :: unit
1566  real(dp),intent(in) :: ecut
1567 !arrays
1568  integer,intent(in) :: ngfft(18)
1569  real(dp),intent(in) :: rprimd(3,3)
1570 
1571 !Local variables-------------------------------
1572 !scalars
1573  integer,parameter :: nspinor1=1,mkmem1=1,exchn2n3d0=0,ikg0=0
1574  integer :: nx,ny,nz,nxyz,ldx,ldy,ldz,ierr,npw_k,mgfft,istwf_k,ikpt,ldxyz,ipw,old_nthreads,ount
1575  integer :: fftalg,npw_k_test
1576  real(dp),parameter :: ATOL_SP=tol6, ATOL_DP=tol12 ! Tolerances on the absolute error
1577  real(dp) :: max_abserr,ucvol
1578  character(len=500) :: msg,info,library,cplex_mode,padding_mode
1579 !arrays
1580  integer :: kg_dum(3,0)
1581  integer,allocatable :: gbound_k(:,:),kg_k(:,:)
1582  real(dp) :: kpoint(3),crand(2),kpoints(3,9)
1583  real(dp) :: gmet(3,3),gprimd(3,3),rmet(3,3)
1584  real(dp),allocatable :: cg(:,:),cg_ref(:,:),cr(:,:)
1585  complex(spc),allocatable :: ugsp(:),ug_refsp(:),ursp(:)
1586  complex(dpc),allocatable :: ug(:),ug_ref(:),ur(:)
1587  type(MPI_type) :: MPI_enreg_seq
1588 
1589 ! *************************************************************************
1590 
1591  ount = std_out; if (PRESENT(unit)) ount = unit
1592 
1593  nfailed = 0
1594  fftalg = ngfft(7)
1595 
1596  if (nthreads > 0) then
1597    old_nthreads = xomp_get_max_threads()
1598    call xomp_set_num_threads(nthreads)
1599  end if
1600 
1601  call metric(gmet,gprimd,-1,rmet,rprimd,ucvol)
1602 
1603  nx  = ngfft(1);  ny = ngfft(2);  nz = ngfft(3)
1604  ldx = ngfft(4); ldy = ngfft(5); ldz = ngfft(6)
1605  mgfft = MAXVAL(ngfft(1:3))
1606 
1607   nxyz =  nx* ny* nz
1608  ldxyz = ldx*ldy*ldz
1609 
1610  ABI_CALLOC(cg_ref, (2, ldxyz*ndat))
1611  ABI_CALLOC(cg,     (2, ldxyz*ndat))
1612  ABI_CALLOC(cr,     (2, ldxyz*ndat))
1613 
1614  ABI_CALLOC(ug_ref, (ldxyz*ndat))
1615  ABI_CALLOC(ug,     (ldxyz*ndat))
1616  ABI_CALLOC(ur,     (ldxyz*ndat))
1617 
1618  ABI_CALLOC(ug_refsp, (ldxyz*ndat))
1619  ABI_CALLOC(ugsp,     (ldxyz*ndat))
1620  ABI_CALLOC(ursp,     (ldxyz*ndat))
1621 
1622  kpoints = RESHAPE( (/ &
1623 &  0.1, 0.2, 0.3, &
1624 &  0.0, 0.0, 0.0, &
1625 &  0.5, 0.0, 0.0, &
1626 &  0.0, 0.0, 0.5, &
1627 &  0.5, 0.0, 0.5, &
1628 &  0.0, 0.5, 0.0, &
1629 &  0.5, 0.5, 0.0, &
1630 &  0.0, 0.5, 0.5, &
1631 &  0.5, 0.5, 0.5 /), (/3,9/) )
1632 
1633  call fftalg_info(fftalg,library,cplex_mode,padding_mode)
1634 
1635  call initmpi_seq(MPI_enreg_seq)
1636 
1637  do ikpt=1,SIZE(kpoints,DIM=2)
1638    kpoint = kpoints(:,ikpt)
1639 
1640    istwf_k = set_istwfk(kpoint)
1641 
1642    ! * Calculate the number of G-vectors for this k-point.
1643    call kpgsph(ecut,exchn2n3d0,gmet,ikg0,0,istwf_k,kg_dum,kpoint,0,MPI_enreg_seq,0,npw_k)
1644    !
1645    ! * Allocate and calculate the set of G-vectors.
1646    ABI_MALLOC(kg_k,(3,npw_k))
1647    call kpgsph(ecut,exchn2n3d0,gmet,ikg0,0,istwf_k,kg_k,kpoint,mkmem1,MPI_enreg_seq,npw_k,npw_k_test)
1648 
1649    ABI_MALLOC(gbound_k,(2*mgfft+8,2))
1650    call sphereboundary(gbound_k,istwf_k,kg_k,mgfft,npw_k)
1651 
1652    !if (istwf_k==2) then
1653    !  do ipw=1,npw_k
1654    !    write(std_out,*)ipw, kg_k(:,ipw)
1655    !  end do
1656    !  stop
1657    !end if
1658 
1659 #if 0
1660    !TODO
1661    ! ================================================
1662    ! === Test the double precision 2*real version ===
1663    ! ================================================
1664    do ipw=1,npw_k*ndat
1665      call RANDOM_NUMBER(crand)
1666      cg_ref(:,ipw) = crand(:)
1667    end do
1668 
1669    if (istwf_k == 2) then
1670      do ipw=1,npw_k*ndat,npw_k
1671        cg_ref(2,ipw) = zero
1672      end do
1673    end if
1674 
1675    cg = cg_ref
1676 
1677    call fft_ug_dp(npw_k,nxyz,nspinor1,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,cg,cr)
1678    call fft_ur_dp(npw_k,nxyz,nspinor1,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,cr,cg)
1679 
1680    ierr = COUNT(ABS(cg - cg_ref) > ATOL_DP)
1681    nfailed = nfailed + ierr
1682 
1683    write(info,"(a,i1,a)")sjoin(library,"fftu_dp, istwfk "),istwf_k," :"
1684    if (ierr /= 0) then
1685      max_abserr = MAXVAL(ABS(cg - cg_ref))
1686      write(msg,"(a,es9.2,a)")" FAILED (max_abserr = ",max_abserr,")"
1687    else
1688      write(msg,"(a)")" OK"
1689    end if
1690    call wrtout(ount,sjoin(info,msg),"COLL")
1691 #endif
1692 
1693    ! =================================================
1694    ! === Test the single precision complex version ===
1695    ! =================================================
1696    do ipw=1,npw_k*ndat
1697      call RANDOM_NUMBER(crand)
1698      ug_refsp(ipw) = CMPLX(crand(1), crand(2))
1699    end do
1700 
1701    if (istwf_k == 2) then
1702      do ipw=1,npw_k*ndat,npw_k
1703        ug_refsp(ipw) = REAL(ug_refsp(ipw))
1704      end do
1705    end if
1706 
1707    ugsp = ug_refsp
1708 
1709    call fft_ug(npw_k,nxyz,nspinor1,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,ugsp,ursp)
1710    call fft_ur(npw_k,nxyz,nspinor1,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,ursp,ugsp)
1711 
1712    ierr = COUNT(ABS(ugsp - ug_refsp) > ATOL_SP)
1713    nfailed = nfailed + ierr
1714 
1715    write(info,"(a,i1,a)")sjoin(library,"fftu_spc, istwfk "),istwf_k," :"
1716    if (ierr /= 0) then
1717      max_abserr = MAXVAL(ABS(ugsp - ug_refsp))
1718      write(msg,"(a,es9.2,a)")" FAILED (max_abserr = ",max_abserr,")"
1719    else
1720      write(msg,"(a)")" OK"
1721    end if
1722    call wrtout(ount,sjoin(info,msg),"COLL")
1723 
1724    ! =================================================
1725    ! === Test the double precision complex version ===
1726    ! =================================================
1727    do ipw=1,npw_k*ndat
1728      call RANDOM_NUMBER(crand)
1729      ug_ref(ipw) = DCMPLX(crand(1), crand(2))
1730    end do
1731 
1732    if (istwf_k == 2) then
1733      do ipw=1,npw_k*ndat,npw_k
1734        ug_ref(ipw) = REAL(ug_ref(ipw))
1735      end do
1736    end if
1737 
1738    ug = ug_ref
1739 
1740    call fft_ug(npw_k,nxyz,nspinor1,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,ug,ur)
1741    call fft_ur(npw_k,nxyz,nspinor1,ndat,mgfft,ngfft,istwf_k,kg_k,gbound_k,ur,ug)
1742 
1743    ierr = COUNT(ABS(ug - ug_ref) > ATOL_DP)
1744    nfailed = nfailed + ierr
1745 
1746    write(info,"(a,i1,a)")sjoin(library,"fftu_dpc, istwfk "),istwf_k," :"
1747    if (ierr /= 0) then
1748      max_abserr = MAXVAL(ABS(ug - ug_ref))
1749      write(msg,"(a,es9.2,a)")" FAILED (max_abserr = ",max_abserr,")"
1750    else
1751      write(msg,"(a)")" OK"
1752    end if
1753    call wrtout(ount,sjoin(info,msg),"COLL")
1754 
1755    ABI_FREE(kg_k)
1756    ABI_FREE(gbound_k)
1757  end do
1758 
1759  ABI_FREE(cg_ref)
1760  ABI_FREE(cg)
1761  ABI_FREE(cr)
1762 
1763  ABI_FREE(ug_ref)
1764  ABI_FREE(ug)
1765  ABI_FREE(ur)
1766 
1767  ABI_FREE(ug_refsp)
1768  ABI_FREE(ugsp)
1769  ABI_FREE(ursp)
1770 
1771  call destroy_mpi_enreg(MPI_enreg_seq)
1772 
1773  if (nthreads > 0) then
1774    call xomp_set_num_threads(old_nthreads)
1775  end if
1776 
1777 end function fftu_utests

m_fft/fourdp_6d [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fourdp_6d

FUNCTION

     Calculate a 6-dimensional Fast Fourier Transform

     isign=-1 : A(G1,G2) = Sum(r1,r2) A(r1,r2) exp(-iG1.r1) exp(+iG2.r2)
                                                  ^            ^
     isign=+1 : A(r1,r2) = Sum(G1,G2) A(G1,G2) exp(+iG1.r1) exp(-iG2.r2)
                                                  ^            ^
     isign=-1 and isign=1 form a transform/inverse-transform pair: calling
     one then the other will take you back to the original function,
     multiplied by a factor of (nl*nm*nn)**2.
     ------------------------------------------------------------------

     input:
      a: A(r1,r2) [overwritten]
     output:
      a: A(G1,G2) in the format IGFFT
     ------------------------------------------------------------------

INPUTS

OUTPUT

SIDE EFFECTS

NOTES

PARENTS

      m_kxc

CHILDREN

      fourdp

SOURCE

4536 subroutine fourdp_6d(cplex,matrix,isign,MPI_enreg,nfft,ngfft,paral_kgb,tim_fourdp)
4537 
4538 
4539 !This section has been created automatically by the script Abilint (TD).
4540 !Do not modify the following lines by hand.
4541 #undef ABI_FUNC
4542 #define ABI_FUNC 'fourdp_6d'
4543 !End of the abilint section
4544 
4545  implicit none
4546 
4547 !Arguments ------------------------------------
4548 !scalars
4549  integer,intent(in) :: cplex,isign,nfft,paral_kgb,tim_fourdp
4550  type(MPI_type),intent(in) :: MPI_enreg
4551 !arrays
4552  integer,intent(in) :: ngfft(18)
4553  complex(gwpc),intent(inout) :: matrix(nfft,nfft)
4554 
4555 !Local variables-------------------------------
4556 !scalars
4557  !integer,parameter :: cplex=2
4558  integer :: i1,i2,i3,ifft
4559  integer :: n1,n2,n3
4560 !arrays
4561  real(dp),allocatable :: fofg(:,:),fofr(:)
4562 
4563 ! *************************************************************************
4564 
4565 !TODO check normalization factor, it is better if we use the GW conventions.
4566  n1 = ngfft(1)
4567  n2 = ngfft(2)
4568  n3 = ngfft(3)
4569 
4570  ABI_MALLOC(fofg,(2,nfft))
4571  ABI_MALLOC(fofr,(cplex*nfft))
4572 
4573  do i3=0,n3-1
4574    do i2=0,n2-1
4575      do i1=0,n1-1
4576 
4577        ifft=1+i1+i2*n1+i3*n1*n2
4578        if (isign==1) then
4579          ! G1 -> r1 transform for each G2 to form A(r1,G2)
4580          fofg(1,:)=REAL (matrix(:,ifft))
4581          fofg(2,:)=AIMAG(matrix(:,ifft))
4582        else if (isign==-1) then
4583          ! r1 -> G1 transform for each r2 to form A(G1,r2)
4584          fofr(1:nfft)       =REAL (matrix(:,ifft))
4585          fofr(nfft+1:2*nfft)=AIMAG(matrix(:,ifft))
4586        else
4587          MSG_ERROR("Wrong isign")
4588        end if
4589 
4590        call fourdp(cplex,fofg,fofr,isign,MPI_enreg,nfft,ngfft,paral_kgb,tim_fourdp)
4591 
4592        if (isign==1) then ! Save A(r1,G2)
4593          matrix(:,ifft)=CMPLX(fofr(1:nfft),fofr(nfft+1:2*nfft))
4594        else if (isign==-1) then ! Save A(G1,r2)
4595          matrix(:,ifft)=CMPLX(fofg(1,:),fofg(2,:))
4596        end if
4597 
4598      end do
4599    end do
4600  end do
4601 
4602  do i3=0,n3-1
4603    do i2=0,n2-1
4604      do i1=0,n1-1
4605 
4606        ifft=1+i1+i2*n1+i3*n1*n2
4607        if (isign==1) then
4608          ! Do the G2 -> r2 transform of A(r1,G2) to get A(r1,r2)
4609          fofr(1:nfft       )=REAL (matrix(ifft,:))
4610          fofr(nfft+1:2*nfft)=AIMAG(matrix(ifft,:))
4611        else if (isign==-1) then
4612          ! Do the r2 -> G2 transform of A(G1,r2) to get A(G1,G2)
4613          fofg(1,:)=REAL (matrix(ifft,:))
4614          fofg(2,:)=AIMAG(matrix(ifft,:))
4615        end if
4616 
4617        call fourdp(2,fofg,fofr,-isign,MPI_enreg,nfft,ngfft,paral_kgb,tim_fourdp)
4618 
4619        if (isign==1) then
4620          matrix(ifft,:)=CMPLX(fofg(1,:),fofg(2,:))
4621        else if (isign==-1) then
4622          matrix(ifft,:)=CMPLX(fofr(1:nfft),fofr(nfft+1:2*nfft))
4623        end if
4624 
4625      end do
4626    end do
4627  end do
4628 
4629  ABI_FREE(fofg)
4630  ABI_FREE(fofr)
4631 
4632 end subroutine fourdp_6d

m_fft/fourdp_mpi [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fourdp_mpi

FUNCTION

 Conduct Fourier transform of REAL or COMPLEX function f(r)=fofr defined on
 fft grid in real space, to create complex f(G)=fofg defined on full fft grid
 in reciprocal space, in full storage mode, or the reverse operation.
 For the reverse operation, the final data is divided by nfftot.
 REAL case when cplex=1, COMPLEX case when cplex=2
 Usually used for density and potentials.

INPUTS

 cplex=1 if fofr is real, 2 if fofr is complex
 nfft=(effective) number of FFT grid points (for this processor)
 ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 ndat=Numbre of FFT transforms
 isign=sign of Fourier transform exponent: current convention uses
    +1 for transforming from G to r
    -1 for transforming from r to G.
 fftn2_distrib(2),ffti2_local(2)
 fftn3_distrib(3),ffti3_local(3)
 comm_fft=MPI communicator

SIDE EFFECTS

 Input/Output
 fofg(2,nfft)=f(G), complex.
 fofr(cplex*nfft)=input function f(r) (real or complex)

TODO

  Write simplified API for sequential version.

PARENTS

      fourdp

CHILDREN

      fourwf_mpi

SOURCE

3655 subroutine fourdp_mpi(cplex,nfft,ngfft,ndat,isign,&
3656 &  fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
3657 
3658 
3659 !This section has been created automatically by the script Abilint (TD).
3660 !Do not modify the following lines by hand.
3661 #undef ABI_FUNC
3662 #define ABI_FUNC 'fourdp_mpi'
3663 !End of the abilint section
3664 
3665  implicit none
3666 
3667 !Arguments ------------------------------------
3668 !scalars
3669  integer,intent(in) :: cplex,isign,nfft,ndat,comm_fft
3670 !arrays
3671  integer,intent(in) :: ngfft(18)
3672  integer,intent(in) :: fftn2_distrib(ngfft(2)),ffti2_local(ngfft(2))
3673  integer,intent(in) :: fftn3_distrib(ngfft(3)),ffti3_local(ngfft(3))
3674  real(dp),intent(inout) :: fofg(2,nfft*ndat),fofr(cplex*nfft*ndat)
3675 
3676 !Local variables-------------------------------
3677 !scalars
3678  integer :: fftalg,fftalga,fftalgc
3679  character(len=500) :: msg
3680 
3681 ! *************************************************************************
3682 
3683  fftalg=ngfft(7); fftalga=fftalg/100 ; fftalgc=mod(fftalg,10)
3684 
3685  select case (fftalga)
3686  case (FFT_SG2002)
3687    call sg2002_mpifourdp(cplex,nfft,ngfft,ndat,isign,&
3688 &    fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
3689 
3690  case (FFT_FFTW3)
3691    call fftw3_mpifourdp(cplex,nfft,ngfft,ndat,isign,&
3692 &    fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
3693 
3694  !case (FFT_DFTI)
3695  !   call dfti_mpifourdp(cplex,nfft,ngfft,ndat,isign,&
3696  !&    fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
3697 
3698  case default
3699    write(msg,"(a,i0)")"Wrong fftalg: ",fftalg
3700    MSG_BUG(msg)
3701  end select
3702 
3703 end subroutine fourdp_mpi

m_fft/fourwf_mpi [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 fourwf_mpi

FUNCTION

 Carry out composite Fourier transforms between real and reciprocal (G) space.
 Wavefunctions, contained in a sphere in reciprocal space,
 can be FFT to real space. They can also be FFT from real space
 to a sphere. Also, the density maybe accumulated, and a local
 potential can be applied.

 The different options are :
 - reciprocal to real space and output the result (option=0),
 - reciprocal to real space and accumulate the density (option=1)
 - reciprocal to real space, apply the local potential to the wavefunction
    in real space and produce the result in reciprocal space (option=2)
 - real space to reciprocal space (option=3).

 Schedule of operations
 - fftalgc=1 : use separate forward and backward transforms
     (7/12 savings in execution time);
 - fftalgc=2 : in case of option=1 and option=2, use routines for composite operation
     even faster than 1x1

 Also for better speed, it uses no F90 construct, except the allocate command and for zeroing arrays.

INPUTS

 cplex= if 1 , denpot is real, if 2 , denpot is complex
    (cplex=2 only allowed for option=2, and istwf_k=1)
    not relevant if option=0 or option=3, so cplex=0 can be used to minimize memory
 fftalgc=1 or 2 => simple or composite FFT applications
 fofgin(2,npwin)=holds input wavefunction in G vector basis sphere.
                 (intent(in) but the routine sphere can modify it for another iflag)
 gboundin(2*mgfft+8,2)=sphere boundary info for reciprocal to real space
 gboundout(2*mgfft+8,2)=sphere boundary info for real to reciprocal space
 istwf_k=option parameter that describes the storage of wfs
 kg_kin(3,npwin)=reduced planewave coordinates, input
 kg_kout(3,npwout)=reduced planewave coordinates, output
 me_g0=1 if this MPI node treats the Gamma, 0 otherwise
 mgfft=maximum size of 1D FFTs
 ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 distribfft<distribfft_type>=Tables needed for the FFT parallelism
 n1,n2,n3=1D FFT sizes
 npwin=number of elements in fofgin array (for option 0, 1 and 2)
 npwout=number of elements in fofgout array (for option 2 and 3)
 n4,n5,n6=dimensions of fofr.
 ndat=Number of FFTs
 option= if 0: do direct FFT
         if 1: do direct FFT, then sum the density
         if 2: do direct FFT, multiply by the potential, then do reverse FFT
         if 3: do reverse FFT only
 weight_r=weight to be used for the accumulation of the density in real space
         (needed only when option=1)
 weight_i=weight to be used for the accumulation of the density in real space
         (needed only when option=1)
 comm_fft=MPI communicator.
 [cplexwf]= 1 c2r or r2c can be used. Default: 2 i.e. use complex-to-complex FFTs.

OUTPUT

  (see side effects)

SIDE EFFECTS

 Input/Output
 for option==0, fofgin(2,npwin)=holds input wavefunction in G sphere;
                fofr(2,n4,n5,n6) contains the output Fourier Transform of fofgin;
                no use of denpot, fofgout and npwout.
 for option==1, fofgin(2,npwin)=holds input wavefunction in G sphere;
                denpot(cplex*n4,n5,n6) contains the input density at input,
                and the updated density at output (accumulated);
                no use of fofgout and npwout.
 for option==2, fofgin(2,npwin)=holds input wavefunction in G sphere;
                denpot(cplex*n4,n5,n6) contains the input local potential;
                fofgout(2,npwout) contains the output function;
 for option==3, fofr(2,n4,n5,n6) contains the input real space wavefunction;
                fofgout(2,npwout) contains its output Fourier transform;
                no use of fofgin and npwin.

PARENTS

      fourwf,m_fft

CHILDREN

      fourwf_mpi

SOURCE

3795 subroutine fourwf_mpi(cplex,denpot,fofgin,fofgout,fofr,&
3796 &  gboundin,gboundout,istwf_k,kg_kin,kg_kout,me_g0,mgfft,ngfft,distribfft,n1,n2,n3,&
3797 &  npwin,npwout,n4,n5,n6,ndat,option,weight_r,weight_i,comm_fft,cplexwf)
3798 
3799 
3800 !This section has been created automatically by the script Abilint (TD).
3801 !Do not modify the following lines by hand.
3802 #undef ABI_FUNC
3803 #define ABI_FUNC 'fourwf_mpi'
3804 !End of the abilint section
3805 
3806  implicit none
3807 
3808 !Arguments ------------------------------------
3809 !scalars
3810  integer,intent(in) :: cplex,istwf_k,mgfft,n1,n2,n3,n4,n5,n6,npwin,ndat
3811  integer,intent(in) :: npwout,option,comm_fft,me_g0
3812  integer,intent(in),optional :: cplexwf
3813  real(dp),intent(in) :: weight_r,weight_i
3814  type(distribfft_type),intent(in) :: distribfft
3815 !arrays
3816  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2),ngfft(18)
3817  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout)
3818  real(dp),intent(inout) :: denpot(cplex*n4,n5,n6),fofgin(2,npwin*ndat)
3819  real(dp),intent(inout) :: fofr(2,n4,n5,n6*ndat)
3820  real(dp),intent(out) :: fofgout(2,npwout*ndat)
3821 
3822 !Local variables-------------------------------
3823 !scalars
3824  integer :: fftalg,fftalga,fftalgc,idat
3825  integer :: cplexwf_,i1,i2,i3,iflag,ig,igdat,me_fft,m1i,m1o,m2i,m2o,m3i
3826  integer :: m3o,max1i,max1o,max2i,max2i_plus,max2o,max2o_plus
3827  integer :: max3i,max3o,md1,md1i,md1o,md2,md2i,md2o,md2proc
3828  integer :: md3,md3i,md3o,min1i,min1o,min2i,min2i_moins,min2o,min2o_moins,min3i,min3o
3829  integer :: nd3proc,nproc_fft,n6eff,i3_glob,i2_loc,i2dat_loc,i3dat
3830  integer,save :: nwrites_ialltoall=0
3831  logical :: use_ialltoall
3832  real(dp) :: fim,fre,xnorm
3833  character(len=500) :: msg
3834 !arrays
3835  integer,parameter :: shiftg0(3)=0
3836  integer,parameter :: symmE(3,3)=reshape([1,0,0,0,1,0,0,0,1],[3,3])
3837 ! real(dp) :: tsec(2)
3838  real(dp) :: weight_array_r(ndat), weight_array_i(ndat)
3839  real(dp),allocatable :: workf(:,:,:,:)
3840 
3841 ! *************************************************************************
3842 
3843  !call timab(540,1,tsec)
3844 
3845  fftalg=ngfft(7); fftalga=fftalg/100 ; fftalgc=mod(fftalg,10)
3846 
3847  me_fft=ngfft(11); nproc_fft=ngfft(10)
3848  !write(std_out,*)"nproc_fft",nproc_fft
3849  ! Risky since I don't know if this entry is initialized
3850  ! Continue to pass comm_fft explicitly.
3851  !comm_fft = ngfft(14)
3852 
3853  if (fftalgc<1 .or. fftalgc>2) then
3854    write(msg,'(a,i0,3a)')&
3855 &   'The input algorithm number fftalgc=',fftalgc,' is not allowed with MPI-FFT. Must be 1 or 2',ch10,&
3856 &   'Action: change fftalgc in your input file.'
3857    MSG_ERROR(msg)
3858  end if
3859 
3860  if (option<0 .or. option>3) then
3861    write(msg,'(a,i0,3a)')&
3862 &   'The option number',option,' is not allowed.',ch10,&
3863 &   'Only option=0, 1, 2 or 3 are allowed presently.'
3864    MSG_ERROR(msg)
3865  end if
3866 
3867  if (option==1 .and. cplex/=1) then
3868    write(msg,'(a,i0,a)')&
3869 &   'With the option number 1, cplex must be 1 but it is cplex=',cplex,'.'
3870    MSG_ERROR(msg)
3871  end if
3872 
3873  if ( ALL(cplex/=(/1,2/)) .and. ANY(option==(/1,2/)) ) then
3874    write(msg,'(a,i0,a)')' When option is (1,2) cplex must be 1 or 2, but it is cplex=',cplex,'.'
3875    MSG_ERROR(msg)
3876  end if
3877 
3878  !write(std_out,*)"in fourwf_mpi with fftalg: ",fftalg,fftalgc
3879 
3880 ! We use the non-blocking version if IALLTOALL is available and ndat > 1.
3881  use_ialltoall = .False.
3882 #ifdef HAVE_MPI_IALLTOALL
3883  use_ialltoall = (ndat > 1)
3884 #endif
3885  use_ialltoall = (use_ialltoall .and. ALLOW_IALLTOALL)
3886  if (use_ialltoall .and. nwrites_ialltoall==0) then
3887    nwrites_ialltoall = 1
3888    call wrtout(std_out,"- Will use non-blocking ialltoall for MPI-FFT","COLL")
3889  end if
3890 
3891  md1i=0; md2i=0; md3i=0; m2i=0
3892  md1o=0; md2o=0; md3o=0; m2o=0
3893 
3894  if (option/=3) then
3895    ! Compute the dimensions of the small-box enclosing the input G-sphere
3896    max1i=gboundin(2,1); min1i=gboundin(1,1)
3897    max2i=gboundin(4,1); min2i=gboundin(3,1)
3898 
3899    if(istwf_k==2 .or. istwf_k==4 .or. istwf_k==6 .or. istwf_k==8)then
3900      max1i=max(max1i,-min1i)
3901      min1i=-max1i
3902    else if (istwf_k==3 .or. istwf_k==5 .or. istwf_k==7 .or. istwf_k==9) then
3903      max1i=max(max1i,-min1i-1)
3904      min1i=-max1i-1
3905    end if
3906    if (istwf_k>=2 .and. istwf_k<=5) then
3907      max2i=max(max2i,-min2i)
3908      min2i=-max2i
3909    else if (istwf_k>=6 .and. istwf_k<=9) then
3910      max2i=max(max2i,-min2i-1)
3911      min2i=-max2i-1
3912    end if
3913 
3914    max3i=gboundin(4,2); min3i=gboundin(3,2)
3915 
3916    ! Compute arrays size and leading dimensions to avoid cache trashing
3917    m1i=max1i-min1i+1; md1i=2*(m1i/2)+1
3918    m2i=max2i-min2i+1; md2i=2*(m2i/2)+1
3919 
3920    !if (.False.) then
3921    if (nproc_fft/=1) then
3922      ! Increase max2i in order to have m2i divisible by nproc_fft
3923      min2i_moins=(((m2i-1)/nproc_fft+1)*nproc_fft-m2i)/2
3924      max2i_plus=((m2i-1)/nproc_fft+1)*nproc_fft-m2i-min2i_moins
3925      ! max2i=max2i+((m2i-1)/nproc_fft+1)*nproc_fft-m2i
3926      max2i=max2i+max2i_plus
3927      min2i=min2i-min2i_moins
3928      ! careful, to be checked and make sure the max and min are smaller than size of box
3929      m2i=max2i-min2i+1; md2i=2*(m2i/2)+1
3930    end if
3931    ABI_CHECK(m2i <= n2, "m2i > n2")
3932 
3933    m3i=max3i-min3i+1; md3i=2*(m3i/2)+1
3934  end if
3935 
3936  if (option==2 .or. option==3) then
3937    ! Compute the dimensions of the small-box enclosing the output G-sphere
3938    max1o=gboundout(2,1); min1o=gboundout(1,1)
3939    max2o=gboundout(4,1); min2o=gboundout(3,1)
3940 
3941    if (istwf_k==2 .or. istwf_k==4 .or. istwf_k==6 .or. istwf_k==8) then
3942      max1o=max(max1o,-min1o)
3943      min1o=-max1o
3944    else if (istwf_k==3 .or. istwf_k==5 .or. istwf_k==7 .or. istwf_k==9) then
3945      max1o=max(max1o,-min1o-1)
3946      min1o=-max1o-1
3947    end if
3948    if (istwf_k>=2 .and. istwf_k<=5) then
3949      max2o=max(max2o,-min2o)
3950      min2o=-max2o
3951    else if (istwf_k>=6 .and. istwf_k<=9) then
3952      max2o=max(max2o,-min2o-1)
3953      min2o=-max2o-1
3954    end if
3955 
3956    max3o=gboundout(4,2); min3o=gboundout(3,2)
3957 
3958    ! Compute arrays size and leading dimensions to avoid cache trashing
3959    m1o=max1o-min1o+1; md1o=2*(m1o/2)+1
3960    m2o=max2o-min2o+1; md2o=2*(m2o/2)+1
3961 
3962    if (nproc_fft/=1) then
3963      ! Increase max2o in order to have m2o divisible by nproc_fft
3964      min2o_moins=(((m2o-1)/nproc_fft+1)*nproc_fft-m2o)/2
3965      max2o_plus=((m2o-1)/nproc_fft+1)*nproc_fft-m2o-min2o_moins
3966      ! max2o=max2o+((m2o-1)/nproc_fft+1)*nproc_fft-m2o
3967      max2o=max2o+max2o_plus
3968      min2o=min2o-min2o_moins
3969      ! careful, to be checked and make sure the max and min are smaller than size of box
3970      m2o=max2o-min2o+1; md2o=2*(m2o/2)+1
3971    end if
3972    ABI_CHECK(m2o <= n2, "m2o > n2")
3973 
3974    m3o=max3o-min3o+1; md3o=2*(m3o/2)+1
3975  end if
3976 
3977  md1=max(md1i,md1o)
3978  md2=max(md2i,md2o)
3979  md3=max(md3i,md3o)
3980 
3981  md2proc=(max(m2i,m2o)-1)/nproc_fft+1
3982  n6eff=(n6-1)/nproc_fft+1
3983  !write(std_out,*)'fourwf_mpi : max1i,max2i,max3i=',max1i,max2i,max3i
3984  !write(std_out,*)'fourwf_mpi : min1i,min2i,min3i=',min1i,min2i,min3i
3985  !write(std_out,*)'fourwf_mpi : m1i,m2i,m3i=',m1i,m2i,m3i
3986 
3987  ! Allocate work array in G-space (note exchange 3 <--> 2)
3988  ABI_ALLOCATE(workf,(2,md1,md3,md2proc*ndat))
3989 
3990  if (option/=3) then
3991    ! Insert fofgin into the **small** box (array workf) :
3992    ! Note the switch of md3 and md2, as they are only needed to dimension workf inside "sphere"
3993 
3994    if (nproc_fft > 1) then
3995      if (istwf_k/=1 )then
3996        write(msg,'(a,i0,a)')'The value of istwf_k: ',istwf_k,' is not allowed. Only istwf_k=1 is allowed in MPI-FFT'
3997        !MSG_WARNING(msg)
3998        MSG_ERROR(msg)
3999      end if
4000      call sphere_fft1(fofgin,ndat,npwin,workf,m1i,m2i,m3i,md1,md3,md2proc,kg_kin,distribfft%tab_fftwf2_local)
4001    else
4002      iflag=2
4003      call sphere(fofgin,ndat,npwin,workf,m1i,m2i,m3i,md1,md3,md2proc,kg_kin,istwf_k,iflag,me_g0,shiftg0,symmE,one)
4004    end if
4005  end if
4006 
4007  ! Can we use c2r or r2c?
4008  cplexwf_=2; if (istwf_k==2) cplexwf_=1
4009  if (present(cplexwf)) cplexwf_ = cplexwf
4010 
4011  if (option==0 .or. ((option==1.or.option==2) .and. fftalgc==1) .or. option==3) then
4012    ! Treat non-composite operations
4013 
4014    if (option/=3) then
4015      ! Fourier transform workf(2,md1,md3,md2proc*ndat) to fofr (reciprocal to real space).
4016      ! FIXME: This is buggy if cplexwx==1
4017 
4018      select case (fftalga)
4019 
4020      case (FFT_SG2002)
4021 
4022 !        do idat=1,ndat
4023 !        call sg2002_mpiback_wf(cplexwf_,1,n1,n2,n3,n4,n5,(n6-1)/nproc_fft+1,&
4024 ! &        max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3, &
4025 ! &         workf(:,:,:,(idat-1)*md2proc+1:idat*md2proc), &
4026 ! &        fofr(:,:,:,(idat-1)*n6eff+1:idat*n6eff),comm_fft)
4027 !        enddo
4028        call sg2002_mpiback_wf(cplexwf_,ndat,n1,n2,n3,n4,n5,(n6-1)/nproc_fft+1,&
4029 &        max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3,workf,fofr,comm_fft)
4030 
4031      case (FFT_FFTW3)
4032 
4033        if (use_ialltoall) then
4034          call fftw3_mpiback_manywf(cplexwf_,ndat,n1,n2,n3,n4,n5,(n6-1)/nproc_fft+1,&
4035 &          max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3,workf,fofr,comm_fft)
4036        else
4037          call fftw3_mpiback_wf(cplexwf_,ndat,n1,n2,n3,n4,n5,(n6-1)/nproc_fft+1,&
4038 &          max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3,workf,fofr,comm_fft)
4039        end if
4040 
4041      !case (FFT_DFTI)
4042      !  call dfti_mpiback_wf(cplexwf_,ndat,n1,n2,n3,n4,n5,(n6-1)/nproc_fft+1,&
4043      !   &        max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3,workf,fofr,comm_fft)
4044      case default
4045        MSG_ERROR("fftalga does not provide MPI back_wf")
4046      end select
4047 
4048    end if ! option
4049 
4050    nd3proc=(n3-1)/nproc_fft+1
4051 
4052    if (option==1) then
4053      ! Accumulate density
4054      do idat=1,ndat
4055        do i3=1,nd3proc
4056          i3_glob = i3 + nd3proc * me_fft
4057          i3dat = i3  + n6eff * (idat-1)
4058          do i2=1,n2
4059            do i1=1,n1
4060              denpot(i1,i2,i3_glob) = denpot(i1,i2,i3_glob) &
4061 &              + (weight_r*fofr(1,i1,i2,i3dat)**2+ weight_i*fofr(2,i1,i2,i3dat)**2)
4062            end do
4063          end do
4064        end do
4065      end do
4066    end if ! option==1
4067 
4068    if (option==2) then
4069 
4070      !  Apply local potential
4071      if (cplex==1) then
4072        do idat=1,ndat
4073          do i3=1,nd3proc
4074            i3_glob = i3 + nd3proc * me_fft
4075            i3dat = i3  + n6eff * (idat-1)
4076            do i2=1,n2
4077              do i1=1,n1
4078                fofr(1,i1,i2,i3dat)=denpot(i1,i2,i3_glob)*fofr(1,i1,i2,i3dat)
4079                fofr(2,i1,i2,i3dat)=denpot(i1,i2,i3_glob)*fofr(2,i1,i2,i3dat)
4080              end do
4081            end do
4082          end do
4083        end do
4084 
4085      else if (cplex==2) then
4086        do idat=1,ndat
4087          do i3=1,(n3-1)/nproc_fft+1
4088            i3_glob = i3 + nd3proc * me_fft
4089            i3dat = i3  + n6eff * (idat-1)
4090            do i2=1,n2
4091              do i1=1,n1
4092                fre=fofr(1,i1,i2,i3dat)
4093                fim=fofr(2,i1,i2,i3dat)
4094                fofr(1,i1,i2,i3dat)=denpot(2*i1-1,i2,i3_glob)*fre -denpot(2*i1,i2,i3_glob)*fim
4095                fofr(2,i1,i2,i3dat)=denpot(2*i1-1,i2,i3_glob)*fim +denpot(2*i1,i2,i3_glob)*fre
4096              end do
4097            end do
4098          end do
4099        end do
4100      end if ! cplex
4101 
4102    end if ! option==2
4103 
4104    if (option==2 .or. option==3) then
4105      ! Fourier transform fofr to workf (real to reciprocal space)
4106      ! output in workf(2,md1,md3,md2proc*ndat)
4107 
4108      select case (fftalga)
4109      case (FFT_SG2002)
4110 
4111        call sg2002_mpiforw_wf(cplexwf_,ndat,n1,n2,n3,n4,n5,(n6-1)/nproc_fft+1,&
4112 &        max1o,max2o,max3o,m1o,m2o,m3o,md1,md2proc,md3,fofr,workf,comm_fft)
4113 
4114      case (FFT_FFTW3)
4115 
4116        if (use_ialltoall) then
4117          call fftw3_mpiforw_manywf(cplexwf_,ndat,n1,n2,n3,n4,n5,(n6-1)/nproc_fft+1,&
4118 &          max1o,max2o,max3o,m1o,m2o,m3o,md1,md2proc,md3,fofr,workf,comm_fft)
4119        else
4120          call fftw3_mpiforw_wf(cplexwf_,ndat,n1,n2,n3,n4,n5,(n6-1)/nproc_fft+1,&
4121 &          max1o,max2o,max3o,m1o,m2o,m3o,md1,md2proc,md3,fofr,workf,comm_fft)
4122        end if
4123 
4124      !case (FFT_DFTI)
4125      !  call dfti_mpiforw_wf(cplexwf_,ndat,n1,n2,n3,n4,n5,(n6-1)/nproc_fft+1,&
4126      !  &max1o,max2o,max3o,m1o,m2o,m3o,md1,md2proc,md3,fofr,workf,comm_fft)
4127 
4128      case default
4129        MSG_ERROR("fftalga does not provide MPI back_wf")
4130      end select
4131 
4132    end if
4133 
4134  else if (fftalgc==2 .and. (option==1 .or. option==2)) then
4135    !  Treat composite operations
4136 
4137    select case (option)
4138    case (1)
4139      !ABI_CHECK(weight_r == weight_i,"weight_r != weight_i")
4140      weight_array_r(:)=weight_r
4141      weight_array_i(:)=weight_i
4142 
4143      select case (fftalga)
4144      case (FFT_SG2002)
4145        ! Note that here we don' fill fofr. Don't know if someone in
4146        ! abinit uses option 1 to get both fofr as well as denpot
4147        call sg2002_accrho(cplexwf_,ndat,n1,n2,n3,n4,n5,n6,(n6-1)/nproc_fft+1,&
4148 &        max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3,comm_fft,nproc_fft,me_fft,&
4149 &        workf,denpot,weight_array_r,weight_array_i)
4150 
4151      case (FFT_FFTW3)
4152        call fftw3_accrho(cplexwf_,ndat,n1,n2,n3,n4,n5,n6,(n6-1)/nproc_fft+1,&
4153 &        max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3,comm_fft,nproc_fft,me_fft,&
4154 &        workf,denpot,weight_array_r, weight_array_i)
4155 
4156      case default
4157        MSG_ERROR("fftalga does not provide accrho")
4158      end select
4159 
4160    case (2)
4161      !write(std_out,*)fftalg,option,cplex
4162      !ABI_CHECK(cplex==1,"cplex!=2 with fftalg 412 is buggy")
4163 
4164      select case (fftalga)
4165 
4166      case (FFT_SG2002)
4167 
4168        if (use_ialltoall) then
4169          call sg2002_applypot_many(cplexwf_,cplex,ndat,n1,n2,n3,n4,n5,n6,(n6-1)/nproc_fft+1,&
4170 &          max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3,&
4171 &          max1o,max2o,max3o,m1o,m2o,m3o,comm_fft,nproc_fft,me_fft,denpot,workf)
4172 
4173        else
4174          call sg2002_applypot(cplexwf_,cplex,ndat,n1,n2,n3,n4,n5,n6,(n6-1)/nproc_fft+1,&
4175 &          max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3,&
4176 &          max1o,max2o,max3o,m1o,m2o,m3o,comm_fft,nproc_fft,me_fft,denpot,workf)
4177        endif
4178 
4179      case (FFT_FFTW3)
4180 
4181        if (use_ialltoall) then
4182 
4183          call fftw3_applypot_many(cplexwf_,cplex,ndat,n1,n2,n3,n4,n5,n6,(n6-1)/nproc_fft+1,&
4184 &          max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3,&
4185 &          max1o,max2o,max3o,m1o,m2o,m3o,comm_fft,nproc_fft,me_fft,denpot,workf)
4186 
4187        else
4188          call fftw3_applypot(cplexwf_,cplex,ndat,n1,n2,n3,n4,n5,n6,(n6-1)/nproc_fft+1,&
4189 &          max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3,&
4190 &          max1o,max2o,max3o,m1o,m2o,m3o,comm_fft,nproc_fft,me_fft,denpot,workf)
4191        end if
4192 
4193      case default
4194        MSG_ERROR("fftalga does not provide applypot")
4195      end select
4196 
4197    case default
4198      write(msg,"(a,i0,a)")"Option ",option," is not supported when fftalgc == 2"
4199      MSG_ERROR(msg)
4200    end select
4201 
4202  end if ! End of composite operations
4203 
4204  if (option==2 .or. option==3) then
4205    ! From FFT box to kg_kout.
4206    xnorm=one/dble(n1*n2*n3)
4207 
4208    if (nproc_fft > 1) then
4209      do idat=1,ndat
4210        do ig=1,npwout
4211          i1=kg_kout(1,ig); if(i1<0) i1=i1+m1o; i1=i1+1
4212          i2=kg_kout(2,ig); if(i2<0) i2=i2+m2o; i2=i2+1
4213          i3=kg_kout(3,ig); if(i3<0) i3=i3+m3o; i3=i3+1
4214 
4215          igdat = ig + (idat-1) * npwout
4216          i2_loc = (i2-1)/nproc_fft +1
4217          i2dat_loc = i2_loc + (idat-1) * md2proc
4218 
4219          fofgout(1,igdat)=workf(1,i1,i3,i2dat_loc)*xnorm
4220          fofgout(2,igdat)=workf(2,i1,i3,i2dat_loc)*xnorm
4221        end do
4222      end do
4223    else
4224      ! Warning: This call is buggy if istwfk > 2
4225      iflag=-2
4226      call sphere(fofgout,ndat,npwout,workf,m1o,m2o,m3o,md1,md3,md2proc,kg_kout,istwf_k,iflag,&
4227 &      me_g0,shiftg0,symmE,xnorm)
4228    end if
4229  end if ! if option==2 or 3
4230 
4231  ABI_DEALLOCATE(workf)
4232 
4233 !call timab(540,2,tsec)
4234 
4235 end subroutine fourwf_mpi

m_fft/indirect_parallel_Fourier [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 indirect_parallel_Fourier

FUNCTION

 The purpose of this routine is to transfer data from right to left right(:,index(i))=left(:,i)
 The difficulty is that right and left are distributed among processors
 We will suppose that the distribution is done as a density in Fourier space
 We first order the right hand side data according to the processor
 in which they are going to be located in the left hand side.
 This is done is a way such that  a mpi_alltoall put the data on the correct processor.
 We also transfer their future adress. A final ordering put everything in place

INPUTS

  index(sizeindex)= global adress for the transfer from right to left
  left(2,nleft)=left hand side
  mpi_enreg=information about MPI parallelization
  ngleft(18)=contain all needed information about 3D FFT for the left hand side
  see ~abinit/doc/variables/vargs.htm#ngfft
  ngright(18)=contain all needed information about 3D FFT for the right hand side
  see ~abinit/doc/variables/vargs.htm#ngfft
  nleft=second dimension of left array (for this processor)
  nright=second dimension of right array (for this processor)
  sizeindex=size of the index array (different form nright, because it is global to all proccessors)

OUTPUT

  left(2,nleft)=the elements of the right hand side, at the correct palce in the correct processor

NOTES

  A lot of things to improve.

PARENTS

      prcref,prcref_PMA,transgrid

CHILDREN

      mpi_alltoall,ptabs_fourdp

SOURCE

4826 subroutine indirect_parallel_Fourier(index,left,mpi_enreg,ngleft,ngright,nleft,nright,paral_kgb,right,sizeindex)
4827 
4828 
4829 !This section has been created automatically by the script Abilint (TD).
4830 !Do not modify the following lines by hand.
4831 #undef ABI_FUNC
4832 #define ABI_FUNC 'indirect_parallel_Fourier'
4833 !End of the abilint section
4834 
4835  implicit none
4836 
4837 !Arguments ---------------------------------------------
4838 !scalars
4839  integer,intent(in) :: ngleft(18),ngright(18),nleft,nright,paral_kgb,sizeindex
4840  type(MPI_type),intent(in) :: mpi_enreg
4841 !arrays
4842  integer,intent(in) :: index(sizeindex)
4843  real(dp),intent(in) :: right(2,nright)
4844  real(dp),intent(inout) :: left(2,nleft)
4845 
4846 !Local variables ---------------------------------------
4847 !scalars
4848  integer :: ierr,i_global,ileft,iright,iright_global
4849  integer :: j,j1,j2,j3,j_global,jleft_global
4850  integer :: jleft_local,me_fft,n1l,n2l,n3l,n1r,n2r,n3r,nd2l,nd2r
4851  integer :: nproc_fft,proc_dest,r2,siz_slice_max
4852 !arrays
4853  integer,allocatable :: index_recv(:),index_send(:),siz_slice(:), ffti2r_global(:)
4854  integer, ABI_CONTIGUOUS pointer :: fftn2l_distrib(:),ffti2l_local(:)
4855  integer, ABI_CONTIGUOUS pointer :: fftn3l_distrib(:),ffti3l_local(:)
4856  integer, ABI_CONTIGUOUS pointer :: fftn2r_distrib(:),ffti2r_local(:)
4857  integer, ABI_CONTIGUOUS pointer :: fftn3r_distrib(:),ffti3r_local(:)
4858  real(dp),allocatable :: right_send(:,:),right_recv(:,:)
4859 
4860 ! *************************************************************************
4861  n1r=ngright(1);n2r=ngright(2);n3r=ngright(3)
4862  n1l=ngleft(1) ;n2l=ngleft(2) ;n3l=ngleft(3)
4863  nproc_fft=mpi_enreg%nproc_fft; me_fft=mpi_enreg%me_fft
4864  nd2r=n2r/nproc_fft; nd2l=n2l/nproc_fft
4865 
4866  !Get the distrib associated with the left fft_grid
4867  call ptabs_fourdp(mpi_enreg,n2l,n3l,fftn2l_distrib,ffti2l_local,fftn3l_distrib,ffti3l_local)
4868 
4869  !Get the distrib associated with the right fft_grid
4870  call ptabs_fourdp(mpi_enreg,n2r,n3r,fftn2r_distrib,ffti2r_local,fftn3r_distrib,ffti3r_local)
4871 
4872  !Precompute local --> global corespondance
4873  ABI_ALLOCATE(ffti2r_global,(nd2r))
4874  ffti2r_global(:) = -1
4875  do j2=1,n2r
4876     if( fftn2r_distrib(j2) == me_fft ) then
4877        ffti2r_global( ffti2r_local(j2) ) = j2
4878     end if
4879  end do
4880 
4881 
4882  ABI_ALLOCATE(siz_slice,(nproc_fft))
4883  siz_slice(:)=0
4884  do i_global=1,sizeindex !look for the maximal size of slice of data
4885   j_global=index(i_global)!; write(std_out,*) j_global,i_global
4886   if(j_global /=0) then
4887     !use the fact that (j-1)=i1 + n1l*(j2l-1 + n2l*(j3l-1))
4888    proc_dest= fftn2l_distrib( modulo((j_global-1)/n1l,n2l) + 1)
4889    siz_slice(proc_dest+1)=siz_slice(proc_dest+1)+1
4890 !write(std_out,*) 'in indirect proc',proc_dest,siz_slice(proc_dest+1)
4891   end if
4892  end do
4893  siz_slice_max=maxval(siz_slice) !This value could be made smaller by looking locally
4894 !and performing a allgather with a max
4895 !write(std_out,*) 'siz_slice,sizeindex,siz_slice',siz_slice(:),sizeindex,siz_slice_max
4896 !write(std_out,*) 'sizeindex,nright,nleft',sizeindex,nright,nleft
4897  ABI_ALLOCATE(right_send,(2,nproc_fft*siz_slice_max))
4898  ABI_ALLOCATE(index_send,(nproc_fft*siz_slice_max))
4899  siz_slice(:)=0; index_send(:)=0; right_send(:,:)=zero
4900  do iright=1,nright
4901   j=iright-1;j1=modulo(j,n1r);j2=modulo(j/n1r,nd2r);j3=j/(n1r*nd2r)
4902   j2 = ffti2r_global(j2+1) - 1
4903   iright_global=n1r*(n2r*j3+j2)+j1+1
4904   jleft_global=index(iright_global)
4905   if(jleft_global/=0)then
4906      j=jleft_global-1;j1=modulo(j,n1l);j2=modulo(j/n1l,n2l);j3=j/(n1l*n2l); r2=ffti2l_local(j2+1)-1
4907    jleft_local=n1l*(nd2l*j3+r2)+j1+1
4908    proc_dest=fftn2l_distrib(j2+1)
4909    siz_slice(proc_dest+1)=siz_slice(proc_dest+1)+1
4910    right_send(:,proc_dest*siz_slice_max+siz_slice(proc_dest+1))=right(:,iright)
4911    index_send(proc_dest*siz_slice_max+siz_slice(proc_dest+1))=jleft_local
4912 !write(std_out,*) 'loop ir',jleft_local,jleft_global,iright_global,iright
4913   end if
4914  end do
4915  ABI_ALLOCATE(right_recv,(2,nproc_fft*siz_slice_max))
4916  ABI_ALLOCATE(index_recv,(nproc_fft*siz_slice_max))
4917 #if defined HAVE_MPI
4918   if(paral_kgb == 1) then
4919     call mpi_alltoall (right_send,2*siz_slice_max, &
4920 &                          MPI_double_precision, &
4921 &                          right_recv,2*siz_slice_max, &
4922 &                          MPI_double_precision,mpi_enreg%comm_fft,ierr)
4923     call mpi_alltoall (index_send,siz_slice_max, &
4924 &                          MPI_integer, &
4925 &                          index_recv,siz_slice_max, &
4926 &                          MPI_integer,mpi_enreg%comm_fft,ierr)
4927   endif
4928 #endif
4929  do ileft=1,siz_slice_max*nproc_fft
4930 !write(std_out,*)index_recv(ileft)
4931  if(index_recv(ileft) /=0 ) left(:,index_recv(ileft))=right_recv(:,ileft)
4932  end do
4933  ABI_DEALLOCATE(right_recv)
4934  ABI_DEALLOCATE(index_recv)
4935  ABI_DEALLOCATE(right_send)
4936  ABI_DEALLOCATE(index_send)
4937  ABI_DEALLOCATE(siz_slice)
4938  ABI_DEALLOCATE(ffti2r_global)
4939 
4940 end subroutine indirect_parallel_Fourier

m_fft/zerosym [ Functions ]

[ Top ] [ m_fft ] [ Functions ]

NAME

 zerosym

FUNCTION

 Symmetrize an array on the FFT grid by vanishing some term on the boundaries.

INPUTS

  cplex= if 1, input array is REAL, if 2, input array is COMPLEX
  mpi_enreg=information about MPI parallelization
  n1,n2,n3=FFT dimensions nfft=n1*n2*n3
  ig1,ig2,ig3=optional arguments= indexes of unbalanced g-vectors to cancel
              if not present, ig1=1+n1/2, ig2=1+n2/2, ig3=1+n3/2 for even n1,n2,n3
              if igj=-1, nothing is done in direction j

OUTPUT

  (see side effects)

SIDE EFFECTS

  array(cplex,n1*n2*n3)=complex array to be symetrized

PARENTS

      atm2fft,dfpt_atm2fft,dfpt_dyfro,forces,hartre,initro,m_fock,pawmknhat
      pawmknhat_psipsi,prcref,prcref_PMA,stress,transgrid

CHILDREN

SOURCE

4347 subroutine zerosym(array,cplex,n1,n2,n3,&
4348 &                  ig1,ig2,ig3,comm_fft,distribfft) ! Optional arguments
4349 
4350 
4351 !This section has been created automatically by the script Abilint (TD).
4352 !Do not modify the following lines by hand.
4353 #undef ABI_FUNC
4354 #define ABI_FUNC 'zerosym'
4355 !End of the abilint section
4356 
4357  implicit none
4358 
4359 !Arguments ------------------------------------
4360 !scalars
4361  integer,intent(in) :: cplex,n1,n2,n3
4362  integer,optional,intent(in) :: ig1,ig2,ig3,comm_fft
4363  type(distribfft_type),intent(in),target,optional :: distribfft
4364 !arrays
4365  real(dp),intent(inout) :: array(cplex,n1*n2*n3)
4366 
4367 !Local variables-------------------------------
4368 !scalars
4369  integer :: i1,i2,i3,ifft,ifft_proc,index,j,j1,j2,j3,me_fft,nd2
4370  integer :: nproc_fft,n1sel,nn12,n2sel,n3sel,r2
4371  !arrays
4372  integer, ABI_CONTIGUOUS pointer :: fftn2_distrib(:),ffti2_local(:)
4373 
4374 ! **********************************************************************
4375 
4376  DBG_ENTER("COLL")
4377 
4378  me_fft=0;nproc_fft=1
4379  if (present(comm_fft)) then
4380    me_fft=xmpi_comm_rank(comm_fft)
4381    nproc_fft=xmpi_comm_size(comm_fft)
4382  end if
4383  nd2=(n2-1)/nproc_fft+1
4384  nn12=n1*n2
4385 
4386 !Get the distrib associated with this fft_grid
4387  if (present(distribfft)) then
4388    if (n2== distribfft%n2_coarse) then
4389      fftn2_distrib => distribfft%tab_fftdp2_distrib
4390      ffti2_local => distribfft%tab_fftdp2_local
4391    else if(n2 == distribfft%n2_fine) then
4392      fftn2_distrib => distribfft%tab_fftdp2dg_distrib
4393      ffti2_local => distribfft%tab_fftdp2dg_local
4394    else
4395      MSG_BUG("Unable to find an allocated distrib for this fft grid")
4396    end if
4397  else
4398    ABI_ALLOCATE(fftn2_distrib,(n2))
4399    ABI_ALLOCATE(ffti2_local,(n2))
4400    fftn2_distrib=0;ffti2_local=(/(i2,i2=1,n2)/)
4401  end if
4402 
4403  if (present(ig1)) then
4404    n1sel=ig1
4405  else if (mod(n1,2)==0) then
4406    n1sel=1+n1/2
4407  else
4408    n1sel=-1
4409  end if
4410  if (present(ig2)) then
4411    n2sel=ig2
4412  else if (mod(n2,2)==0) then
4413    n2sel=1+n2/2
4414  else
4415    n2sel=-1
4416  end if
4417  if (present(ig3)) then
4418    n3sel=ig3
4419  else if (mod(n3,2)==0) then
4420    n3sel=1+n3/2
4421  else
4422    n3sel=-1
4423  end if
4424 
4425  if (n1sel>0) then
4426    index=n1sel-nn12-n1
4427    do i3=1,n3
4428      index=index+nn12;ifft=index
4429      do i2=1,n2
4430        ifft=ifft+n1
4431        if (nproc_fft>1) then
4432 !        MPIWF: consider ifft only if it is treated by the current proc and compute its adress
4433          j=ifft-1;j1=modulo(j,n1);j2=modulo(j/n1,n2);j3=j/(n1*n2) !;r2=modulo(j2,nd2)
4434          if(fftn2_distrib(j2+1)==me_fft) then ! MPIWF this ifft is to be treated by me_fft
4435            r2= ffti2_local(j2+1) - 1
4436            ifft_proc=n1*(nd2*j3+r2)+j1+1 !this is ifft in the current proc
4437            array(:,ifft_proc)=zero
4438          end if
4439        else
4440          array(:,ifft)=zero
4441        end if
4442      end do
4443    end do
4444  end if
4445 
4446  if (n2sel>0) then
4447    index=n1*n2sel-nn12-n1
4448    do i3=1,n3
4449      index=index+nn12;ifft=index
4450      do i1=1,n1
4451        ifft=ifft+1
4452        if (nproc_fft>1) then
4453 !        MPIWF: consider ifft only if it is treated by the current proc and compute its adress
4454          j=ifft-1;j1=modulo(j,n1);j2=modulo(j/n1,n2);j3=j/(n1*n2);
4455          if(fftn2_distrib(j2+1)==me_fft) then ! MPIWF this ifft is to be treated by me_fft
4456            r2= ffti2_local(j2+1) - 1
4457            ifft_proc=n1*(nd2*j3+r2)+j1+1 !this is ifft in the current proc
4458            array(:,ifft_proc)=zero
4459          end if
4460        else
4461          array(:,ifft)=zero
4462        end if
4463      end do
4464    end do
4465  end if
4466 
4467  if (n3sel>0) then
4468    index=nn12*n3sel-nn12-n1
4469    do i2=1,n2
4470      index=index+n1;ifft=index
4471      do i1=1,n1
4472        ifft=ifft+1
4473        if (nproc_fft>1) then
4474 !        MPIWF: consider ifft only if it is treated by the current proc and compute its adress
4475          j=ifft-1;j1=modulo(j,n1);j2=modulo(j/n1,n2);j3=j/(n1*n2)
4476          if(fftn2_distrib(j2+1)==me_fft) then ! MPIWF this ifft is to be treated by me_fft
4477            r2= ffti2_local(j2+1) - 1
4478            ifft_proc=n1*(nd2*j3+r2)+j1+1 !this is ifft in the current proc
4479            array(:,ifft_proc)=zero
4480          end if
4481        else
4482          array(:,ifft)=zero
4483        end if
4484      end do
4485    end do
4486  end if
4487 
4488  if (.not.present(distribfft)) then
4489    ABI_DEALLOCATE(fftn2_distrib)
4490    ABI_DEALLOCATE(ffti2_local)
4491  end if
4492 
4493  DBG_EXIT("COLL")
4494 
4495 end subroutine zerosym