TABLE OF CONTENTS


ABINIT/fourdp [ Functions ]

[ Top ] [ Functions ]

NAME

 fourdp

FUNCTION

 Conduct Fourier transform of REAL or COMPLEX function f(r)=fofr defined on
 fft grid in real space, to create complex f(G)=fofg defined on full fft grid 
 in reciprocal space, in full storage mode, or the reverse operation.
 For the reverse operation, the final data is divided by nfftot.
 REAL case when cplex=1, COMPLEX case when cplex=2
 Usually used for density and potentials.

 There are two different possibilities :
  fftalgb=0 means using the complex-to-complex FFT routine,
   irrespective of the value of cplex
  fftalgb=1 means using a real-to-complex FFT or a complex-to-complex FFT,
   depending on the value of cplex.
  The only real-to-complex FFT available is from SGoedecker library.

COPYRIGHT

 Copyright (C) 1998-2018 ABINIT group (DCA, XG)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

 cplex=1 if fofr is real, 2 if fofr is complex
 isign=sign of Fourier transform exponent: current convention uses
  +1 for transforming from G to r 
  -1 for transforming from r to G.
 mpi_enreg=information about MPI parallelization
 nfft=(effective) number of FFT grid points (for this processor)
 ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 paral_kgb=Flag related to the kpoint-band-fft parallelism
 tim_fourdp=timing code of the calling routine (can be set to 0 if not attributed)

TODO

  Remove paral_kgb

SIDE EFFECTS

 Input/Output
 fofg(2,nfft)=f(G), complex.
 fofr(cplex*nfft)=input function f(r) (real or complex)

PARENTS

      atm2fft,bethe_salpeter,calc_smeared_density,dfpt_atm2fft,dfpt_dyfro
      dfpt_eltfrxc,dfpt_looppert,dfpt_newvtr,dfpt_scfcv,dfpt_vlocal
      dfptnl_loop,dieltcel,energy,fock_getghc,forces,fourdp_6d,fresidrsp
      green_kernel,gstate,hartre,hartrestr,initro,jellium,laplacian,m_dvdb
      m_electronpositron,m_epjdos,m_fft_prof,m_hidecudarec,m_kxc,m_ppmodel
      m_screening,make_efg_el,mklocl_realspace,mklocl_recipspace,moddiel
      moddiel_csrb,mrgscr,newrho,newvtr,nonlinear,nres2vres,odamix,pawmknhat
      pawmknhat_psipsi,pawmkrho,posdoppler,prcref,prcref_PMA,recursion
      recursion_nl,redgr,respfn,rotate_rho,scfcv,screening,setup_positron
      sigma,stress,symrhg,tddft,transgrid,vlocalstr,xcden,xcpot

CHILDREN

      ccfft,dfti_seqfourdp,fftw3_mpifourdp,fftw3_seqfourdp,fourdp_mpi
      ptabs_fourdp,sg2002_back,sg2002_forw,sg2002_mpifourdp,sg_fft_rc,timab

SOURCE

 65 #if defined HAVE_CONFIG_H
 66 #include "config.h"
 67 #endif
 68 
 69 #include "abi_common.h"
 70 
 71 subroutine fourdp(cplex,fofg,fofr,isign,mpi_enreg,nfft,ngfft,paral_kgb,tim_fourdp)
 72 
 73  use defs_basis
 74  use defs_abitypes
 75  use m_profiling_abi
 76  use m_errors
 77  use m_fftcore
 78  
 79  use m_mpinfo,      only : ptabs_fourdp
 80  use m_sgfft,       only : sg_fft_rc
 81  use m_sg2002,      only : sg2002_mpifourdp, sg2002_back, sg2002_forw
 82  use m_fftw3,       only : fftw3_seqfourdp, fftw3_mpifourdp
 83  use m_dfti,        only : dfti_seqfourdp
 84  use m_fft,         only : fourdp_mpi
 85 
 86 !This section has been created automatically by the script Abilint (TD).
 87 !Do not modify the following lines by hand.
 88 #undef ABI_FUNC
 89 #define ABI_FUNC 'fourdp'
 90  use interfaces_18_timing
 91  use interfaces_53_ffts, except_this_one => fourdp
 92 !End of the abilint section
 93 
 94  implicit none
 95 
 96 !Arguments ------------------------------------
 97 !scalars
 98  integer,intent(in) :: cplex,isign,nfft,paral_kgb,tim_fourdp
 99  type(MPI_type),intent(in) :: mpi_enreg
100 !arrays
101  integer,intent(in) :: ngfft(18)
102  real(dp),intent(inout) :: fofg(2,nfft),fofr(cplex*nfft)
103 
104 !Local variables-------------------------------
105 !scalars
106  integer,parameter :: ndat1=1
107  integer :: fftalg,fftalga,fftalgb,fftcache,i1,i2,i3,base
108  integer :: n1,n1half1,n1halfm,n2,n2half1,n3,n4
109  integer :: n4half1,n5,n5half1,n6 !nd2proc,nd3proc,i3_local,i2_local,
110  integer :: comm_fft,nproc_fft,me_fft
111  real(dp) :: xnorm
112  character(len=500) :: message
113 !arrays
114  integer, ABI_CONTIGUOUS pointer :: fftn2_distrib(:),ffti2_local(:)
115  integer, ABI_CONTIGUOUS pointer :: fftn3_distrib(:),ffti3_local(:)
116  real(dp) :: tsec(2)
117  real(dp),allocatable :: work1(:,:,:,:),work2(:,:,:,:)
118  real(dp),allocatable :: workf(:,:,:,:),workr(:,:,:,:)
119 
120 ! *************************************************************************
121 
122  ABI_UNUSED(paral_kgb)
123  
124  ! Keep track of timing
125  call timab(260+tim_fourdp,1,tsec)
126 
127  n1=ngfft(1); n2=ngfft(2); n3=ngfft(3)
128  n4=ngfft(4); n5=ngfft(5); n6=ngfft(6)
129  me_fft=ngfft(11); nproc_fft=ngfft(10)
130  comm_fft = mpi_enreg%comm_fft
131  !write(std_out,*)"fourdp, nx,ny,nz,nfft =",n1,n2,n3,nfft
132 
133  fftcache=ngfft(8)
134  fftalg  =ngfft(7); fftalga =fftalg/100; fftalgb =mod(fftalg,100)/10
135 
136  xnorm=one/dble(n1*n2*n3)
137  !write(std_out,*)' fourdp :me_fft',me_fft,'nproc_fft',nproc_fft,'nfft',nfft
138 
139  if (fftalgb/=0 .and. fftalgb/=1) then
140    write(message, '(a,i4,a,a,a,a,a)' )&
141 &   'The input algorithm number fftalg=',fftalg,' is not allowed.',ch10,&
142 &   'The second digit (fftalg(B)) must be 0 or 1.',ch10,&
143 &   'Action : change fftalg in your input file.'
144    MSG_BUG(message)
145  end if
146 
147  if (fftalgb==1 .and. ALL(fftalga/=(/1,3,4,5/)) )then
148    write(message,'(a,i4,5a)')&
149 &   'The input algorithm number fftalg=',fftalg,' is not allowed.',ch10,&
150 &   'When fftalg(B) is 1, the allowed values for fftalg(A) are 1 and 4.',ch10,&
151 &   'Action: change fftalg in your input file.'
152    MSG_BUG(message)
153  end if
154 
155  if (n4<n1.or.n5<n2.or.n6<n3) then
156    write(message,'(a,3i8,a,3i8)')'  Each of n4,n5,n6=',n4,n5,n6,'must be >= n1, n2, n3 =',n1,n2,n3
157    MSG_BUG(message)
158  end if
159 
160  ! Get the distrib associated with this fft_grid => for i2 and i3 planes
161  call ptabs_fourdp(mpi_enreg,n2,n3,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local)
162 
163  ! Branch immediately depending on nproc_fft 
164  if (nproc_fft > 1) then
165    call fourdp_mpi(cplex,nfft,ngfft,ndat1,isign,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
166    goto 100
167  end if
168 
169  if (fftalga == FFT_FFTW3) then
170    ! Call sequential or MPI FFTW3 version.
171    if (nproc_fft == 1) then
172      !call wrtout(std_out,"FFTW3 SEQFOURDP","COLL")
173      call fftw3_seqfourdp(cplex,n1,n2,n3,n1,n2,n3,ndat1,isign,fofg,fofr)
174    else
175      !call wrtout(std_out,"FFTW3 MPIFOURDP","COLL")
176      call fftw3_mpifourdp(cplex,nfft,ngfft,ndat1,isign,&
177 &     fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
178    end if
179    ! Accumulate timing and return
180    call timab(260+tim_fourdp,2,tsec); return
181  end if
182 
183  if (fftalga==FFT_DFTI) then 
184    ! Call sequential or MPI MKL.
185    if (nproc_fft == 1) then
186      call dfti_seqfourdp(cplex,n1,n2,n3,n1,n2,n3,ndat1,isign,fofg,fofr)
187    else
188      MSG_ERROR("MPI fourdp with MKL cluster DFT not implemented")
189    end if
190    ! Accumulate timing and return
191    call timab(260+tim_fourdp,2,tsec); return
192  end if
193 
194  ! Here, deal  with the new SG FFT, complex-to-complex case
195  if (fftalga==FFT_SG2002 .and. (fftalgb==0 .or. cplex==2)) then
196    call sg2002_mpifourdp(cplex,nfft,ngfft,ndat1,isign,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
197    !call sg2002_seqfourdp(cplex,nfft,ngfft,ndat1,isign,fftn2_fofg,fofr)
198  end if
199 
200  ! Here, deal with the new SG FFT, with real-to-complex
201  if (fftalga==FFT_SG2002 .and. fftalgb==1 .and. cplex==1) then
202    ABI_CHECK(nproc_fft == 1,"fftalg 41x does not support nproc_fft > 1")
203 
204    n1half1=n1/2+1; n1halfm=(n1+1)/2
205    n2half1=n2/2+1
206    ! n4half1 or n5half1 are the odd integers >= n1half1 or n2half1
207    n4half1=(n1half1/2)*2+1
208    n5half1=(n2half1/2)*2+1
209    ABI_ALLOCATE(workr,(2,n4half1,n5,n6))
210    ABI_ALLOCATE(workf,(2,n4,n6,n5half1))
211 
212    if (isign==1) then
213      do i3=1,n3
214        do i2=1,n2half1
215          base=n1*(i2-1+n2*(i3-1))
216          do i1=1,n1
217            workf(1,i1,i3,i2)=fofg(1,i1+base)
218            workf(2,i1,i3,i2)=fofg(2,i1+base)
219          end do
220        end do
221      end do
222 
223      !nd2proc=((n2-1)/nproc_fft) +1
224      !nd3proc=((n6-1)/nproc_fft) +1
225 
226      ! change the call? n5half1 et n6 ?
227      call sg2002_back(cplex,ndat1,n1,n2,n3,n4,n5,n6,n4half1,n5half1,n6,2,workf,workr,comm_fft)
228 
229      do i3=1,n3
230        do i2=1,n2
231          base=n1*(i2-1+n2*(i3-1))
232          do i1=1,n1half1-1
233            ! copy data
234            fofr(2*i1-1+base)=workr(1,i1,i2,i3)
235            fofr(2*i1  +base)=workr(2,i1,i2,i3)
236          end do
237          ! If n1 odd, must add last data
238          if((2*n1half1-2)/=n1)then
239            fofr(n1+base)=workr(1,n1half1,i2,i3)
240          end if
241        end do
242      end do
243 
244    else if (isign==-1) then
245      do i3=1,n3
246        do i2=1,n2
247          base=n1*(i2-1+n2*(i3-1))
248          do i1=1,n1half1-1
249            workr(1,i1,i2,i3)=fofr(2*i1-1+base)
250            workr(2,i1,i2,i3)=fofr(2*i1  +base)
251          end do
252          ! If n1 odd, must add last data
253          if((2*n1half1-2)/=n1)then
254            workr(1,n1half1,i2,i3)=fofr(n1+base)
255            workr(2,n1half1,i2,i3)=zero
256          end if
257        end do
258      end do
259 
260      call sg2002_forw(cplex,ndat1,n1,n2,n3,n4,n5,n6,n4half1,n5half1,n6,2,workr,workf,comm_fft)
261 
262      ! Transfer fft output to the original fft box
263      do i3=1,n3
264        do i2=1,n2half1
265          base=n1*(i2-1+n2*(i3-1))
266          do i1=1,n1
267            fofg(1,i1+base)=workf(1,i1,i3,i2)*xnorm
268            fofg(2,i1+base)=workf(2,i1,i3,i2)*xnorm
269          end do
270        end do
271 
272        ! Complete missing values with complex conjugate
273        ! Inverse of ix is located at nx+2-ix , except for ix=1, for which it is 1.
274        if(n2half1>2)then
275          do i2=2,n2+1-n2half1
276            base=n1*((n2+2-i2)-1)
277            if(i3/=1)base=base+n1*n2*((n3+2-i3)-1)
278            fofg(1,1+base)= workf(1,1,i3,i2)*xnorm
279            fofg(2,1+base)=-workf(2,1,i3,i2)*xnorm
280            do i1=2,n1
281              fofg(1,n1+2-i1+base)= workf(1,i1,i3,i2)*xnorm
282              fofg(2,n1+2-i1+base)=-workf(2,i1,i3,i2)*xnorm
283            end do
284          end do
285        end if
286      end do
287 
288    end if ! isign
289    ABI_DEALLOCATE(workr)
290    ABI_DEALLOCATE(workf)
291  end if
292 
293  ! Here, one calls the complex-to-complex FFT subroutine
294  if( (fftalgb==0 .or. cplex==2) .and. fftalga/=4 )then
295 
296    ABI_ALLOCATE(work1,(2,n4,n5,n6))
297    ABI_ALLOCATE(work2,(2,n4,n5,n6))
298 
299    if (isign==1) then
300 
301      ! Transfer fofg to the expanded fft box
302 !$OMP PARALLEL DO PRIVATE(base)
303      do i3=1,n3
304        do i2=1,n2
305          base=n1*(i2-1+n2*(i3-1))
306          do i1=1,n1
307            work1(1,i1,i2,i3)=fofg(1,i1+base)
308            work1(2,i1,i2,i3)=fofg(2,i1+base)
309          end do
310        end do
311      end do
312 
313      ! Call Stefan Goedecker C2C FFT
314      !call sg_fft_cc(fftcache,n1,n2,n3,n4,n5,n6,ndat1,isign,work1,work2)
315      call ccfft(ngfft,isign,n1,n2,n3,n4,n5,n6,ndat1,2,work1,work2,comm_fft)
316 
317      ! Take data from expanded box and put it in the original box.
318      if (cplex==1) then
319        ! REAL case
320 !$OMP PARALLEL DO PRIVATE(base)
321        do i3=1,n3
322          do i2=1,n2
323            base=n1*(i2-1+n2*(i3-1))
324            do i1=1,n1
325              fofr(i1+base)=work2(1,i1,i2,i3)
326            end do
327          end do
328        end do
329 
330      else
331        ! COMPLEX case
332 !$OMP PARALLEL DO PRIVATE(base)
333        do i3=1,n3
334          do i2=1,n2
335            base=2*n1*(i2-1+n2*(i3-1))
336            do i1=1,n1
337              fofr(2*i1-1+base)=work2(1,i1,i2,i3)
338              fofr(2*i1  +base)=work2(2,i1,i2,i3)
339            end do
340          end do
341        end do
342      end if
343 
344    else if (isign==-1) then
345 
346      ! Insert fofr into the augmented fft box
347      if (cplex==1) then 
348        ! REAL case
349 !$OMP PARALLEL DO PRIVATE(base) 
350        do i3=1,n3
351          do i2=1,n2
352            base=n1*(i2-1+n2*(i3-1))
353            do i1=1,n1
354              ! copy data
355              work1(1,i1,i2,i3)=fofr(i1+base)
356              work1(2,i1,i2,i3)=zero
357            end do
358          end do
359        end do
360      else
361        ! COMPLEX case
362 !$OMP PARALLEL DO PRIVATE(base)
363        do i3=1,n3
364          do i2=1,n2
365            base=2*n1*(i2-1+n2*(i3-1))
366            do i1=1,n1
367              ! copy data
368              work1(1,i1,i2,i3)=fofr(2*i1-1+base)
369              work1(2,i1,i2,i3)=fofr(2*i1  +base)
370            end do
371          end do
372        end do
373      end if ! cplex
374 
375      ! Call Stefan Goedecker C2C FFT
376      !call sg_fft_cc(fftcache,n1,n2,n3,n4,n5,n6,ndat1,isign,work1,work2)
377      call ccfft(ngfft,isign,n1,n2,n3,n4,n5,n6,ndat1,2,work1,work2,comm_fft)
378 
379      ! Transfer fft output to the original fft box
380 !$OMP PARALLEL DO PRIVATE(base)
381      do i3=1,n3
382        do i2=1,n2
383          base=n1*(i2-1+n2*(i3-1))
384          do i1=1,n1
385            fofg(1,i1+base)=work2(1,i1,i2,i3)*xnorm
386            fofg(2,i1+base)=work2(2,i1,i2,i3)*xnorm
387          end do
388        end do
389      end do
390 
391    end if ! isign
392 
393    ABI_DEALLOCATE(work1)
394    ABI_DEALLOCATE(work2)
395  end if ! End simple algorithm
396 
397  ! Here sophisticated algorithm based on S. Goedecker routines, only for the REAL case.
398  ! Take advantage of the fact that fofr is real, and that fofg has corresponding symmetry properties.
399  if( (fftalgb==1 .and. cplex==1) .and. fftalga/=4 )then
400    ABI_CHECK(nproc_fft==1,"nproc > 1 not supported")
401    ABI_CHECK(ndat1==1,"ndat > 1 not supported")
402    call sg_fft_rc(cplex,fofg,fofr,isign,nfft,ngfft)
403  end if
404 
405  100 call timab(260+tim_fourdp,2,tsec)
406 
407 end subroutine fourdp