TABLE OF CONTENTS
ABINIT/fourdp [ Functions ]
NAME
fourdp
FUNCTION
Conduct Fourier transform of REAL or COMPLEX function f(r)=fofr defined on fft grid in real space, to create complex f(G)=fofg defined on full fft grid in reciprocal space, in full storage mode, or the reverse operation. For the reverse operation, the final data is divided by nfftot. REAL case when cplex=1, COMPLEX case when cplex=2 Usually used for density and potentials. There are two different possibilities : fftalgb=0 means using the complex-to-complex FFT routine, irrespective of the value of cplex fftalgb=1 means using a real-to-complex FFT or a complex-to-complex FFT, depending on the value of cplex. The only real-to-complex FFT available is from SGoedecker library.
COPYRIGHT
Copyright (C) 1998-2018 ABINIT group (DCA, XG) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
INPUTS
cplex=1 if fofr is real, 2 if fofr is complex isign=sign of Fourier transform exponent: current convention uses +1 for transforming from G to r -1 for transforming from r to G. mpi_enreg=information about MPI parallelization nfft=(effective) number of FFT grid points (for this processor) ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft paral_kgb=Flag related to the kpoint-band-fft parallelism tim_fourdp=timing code of the calling routine (can be set to 0 if not attributed)
TODO
Remove paral_kgb
SIDE EFFECTS
Input/Output fofg(2,nfft)=f(G), complex. fofr(cplex*nfft)=input function f(r) (real or complex)
PARENTS
atm2fft,bethe_salpeter,calc_smeared_density,dfpt_atm2fft,dfpt_dyfro dfpt_eltfrxc,dfpt_looppert,dfpt_newvtr,dfpt_scfcv,dfpt_vlocal dfptnl_loop,dieltcel,energy,fock_getghc,forces,fourdp_6d,fresidrsp green_kernel,gstate,hartre,hartrestr,initro,jellium,laplacian,m_dvdb m_electronpositron,m_epjdos,m_fft_prof,m_hidecudarec,m_kxc,m_ppmodel m_screening,make_efg_el,mklocl_realspace,mklocl_recipspace,moddiel moddiel_csrb,mrgscr,newrho,newvtr,nonlinear,nres2vres,odamix,pawmknhat pawmknhat_psipsi,pawmkrho,posdoppler,prcref,prcref_PMA,recursion recursion_nl,redgr,respfn,rotate_rho,scfcv,screening,setup_positron sigma,stress,symrhg,tddft,transgrid,vlocalstr,xcden,xcpot
CHILDREN
ccfft,dfti_seqfourdp,fftw3_mpifourdp,fftw3_seqfourdp,fourdp_mpi ptabs_fourdp,sg2002_back,sg2002_forw,sg2002_mpifourdp,sg_fft_rc,timab
SOURCE
65 #if defined HAVE_CONFIG_H 66 #include "config.h" 67 #endif 68 69 #include "abi_common.h" 70 71 subroutine fourdp(cplex,fofg,fofr,isign,mpi_enreg,nfft,ngfft,paral_kgb,tim_fourdp) 72 73 use defs_basis 74 use defs_abitypes 75 use m_profiling_abi 76 use m_errors 77 use m_fftcore 78 79 use m_mpinfo, only : ptabs_fourdp 80 use m_sgfft, only : sg_fft_rc 81 use m_sg2002, only : sg2002_mpifourdp, sg2002_back, sg2002_forw 82 use m_fftw3, only : fftw3_seqfourdp, fftw3_mpifourdp 83 use m_dfti, only : dfti_seqfourdp 84 use m_fft, only : fourdp_mpi 85 86 !This section has been created automatically by the script Abilint (TD). 87 !Do not modify the following lines by hand. 88 #undef ABI_FUNC 89 #define ABI_FUNC 'fourdp' 90 use interfaces_18_timing 91 use interfaces_53_ffts, except_this_one => fourdp 92 !End of the abilint section 93 94 implicit none 95 96 !Arguments ------------------------------------ 97 !scalars 98 integer,intent(in) :: cplex,isign,nfft,paral_kgb,tim_fourdp 99 type(MPI_type),intent(in) :: mpi_enreg 100 !arrays 101 integer,intent(in) :: ngfft(18) 102 real(dp),intent(inout) :: fofg(2,nfft),fofr(cplex*nfft) 103 104 !Local variables------------------------------- 105 !scalars 106 integer,parameter :: ndat1=1 107 integer :: fftalg,fftalga,fftalgb,fftcache,i1,i2,i3,base 108 integer :: n1,n1half1,n1halfm,n2,n2half1,n3,n4 109 integer :: n4half1,n5,n5half1,n6 !nd2proc,nd3proc,i3_local,i2_local, 110 integer :: comm_fft,nproc_fft,me_fft 111 real(dp) :: xnorm 112 character(len=500) :: message 113 !arrays 114 integer, ABI_CONTIGUOUS pointer :: fftn2_distrib(:),ffti2_local(:) 115 integer, ABI_CONTIGUOUS pointer :: fftn3_distrib(:),ffti3_local(:) 116 real(dp) :: tsec(2) 117 real(dp),allocatable :: work1(:,:,:,:),work2(:,:,:,:) 118 real(dp),allocatable :: workf(:,:,:,:),workr(:,:,:,:) 119 120 ! ************************************************************************* 121 122 ABI_UNUSED(paral_kgb) 123 124 ! Keep track of timing 125 call timab(260+tim_fourdp,1,tsec) 126 127 n1=ngfft(1); n2=ngfft(2); n3=ngfft(3) 128 n4=ngfft(4); n5=ngfft(5); n6=ngfft(6) 129 me_fft=ngfft(11); nproc_fft=ngfft(10) 130 comm_fft = mpi_enreg%comm_fft 131 !write(std_out,*)"fourdp, nx,ny,nz,nfft =",n1,n2,n3,nfft 132 133 fftcache=ngfft(8) 134 fftalg =ngfft(7); fftalga =fftalg/100; fftalgb =mod(fftalg,100)/10 135 136 xnorm=one/dble(n1*n2*n3) 137 !write(std_out,*)' fourdp :me_fft',me_fft,'nproc_fft',nproc_fft,'nfft',nfft 138 139 if (fftalgb/=0 .and. fftalgb/=1) then 140 write(message, '(a,i4,a,a,a,a,a)' )& 141 & 'The input algorithm number fftalg=',fftalg,' is not allowed.',ch10,& 142 & 'The second digit (fftalg(B)) must be 0 or 1.',ch10,& 143 & 'Action : change fftalg in your input file.' 144 MSG_BUG(message) 145 end if 146 147 if (fftalgb==1 .and. ALL(fftalga/=(/1,3,4,5/)) )then 148 write(message,'(a,i4,5a)')& 149 & 'The input algorithm number fftalg=',fftalg,' is not allowed.',ch10,& 150 & 'When fftalg(B) is 1, the allowed values for fftalg(A) are 1 and 4.',ch10,& 151 & 'Action: change fftalg in your input file.' 152 MSG_BUG(message) 153 end if 154 155 if (n4<n1.or.n5<n2.or.n6<n3) then 156 write(message,'(a,3i8,a,3i8)')' Each of n4,n5,n6=',n4,n5,n6,'must be >= n1, n2, n3 =',n1,n2,n3 157 MSG_BUG(message) 158 end if 159 160 ! Get the distrib associated with this fft_grid => for i2 and i3 planes 161 call ptabs_fourdp(mpi_enreg,n2,n3,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local) 162 163 ! Branch immediately depending on nproc_fft 164 if (nproc_fft > 1) then 165 call fourdp_mpi(cplex,nfft,ngfft,ndat1,isign,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft) 166 goto 100 167 end if 168 169 if (fftalga == FFT_FFTW3) then 170 ! Call sequential or MPI FFTW3 version. 171 if (nproc_fft == 1) then 172 !call wrtout(std_out,"FFTW3 SEQFOURDP","COLL") 173 call fftw3_seqfourdp(cplex,n1,n2,n3,n1,n2,n3,ndat1,isign,fofg,fofr) 174 else 175 !call wrtout(std_out,"FFTW3 MPIFOURDP","COLL") 176 call fftw3_mpifourdp(cplex,nfft,ngfft,ndat1,isign,& 177 & fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft) 178 end if 179 ! Accumulate timing and return 180 call timab(260+tim_fourdp,2,tsec); return 181 end if 182 183 if (fftalga==FFT_DFTI) then 184 ! Call sequential or MPI MKL. 185 if (nproc_fft == 1) then 186 call dfti_seqfourdp(cplex,n1,n2,n3,n1,n2,n3,ndat1,isign,fofg,fofr) 187 else 188 MSG_ERROR("MPI fourdp with MKL cluster DFT not implemented") 189 end if 190 ! Accumulate timing and return 191 call timab(260+tim_fourdp,2,tsec); return 192 end if 193 194 ! Here, deal with the new SG FFT, complex-to-complex case 195 if (fftalga==FFT_SG2002 .and. (fftalgb==0 .or. cplex==2)) then 196 call sg2002_mpifourdp(cplex,nfft,ngfft,ndat1,isign,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft) 197 !call sg2002_seqfourdp(cplex,nfft,ngfft,ndat1,isign,fftn2_fofg,fofr) 198 end if 199 200 ! Here, deal with the new SG FFT, with real-to-complex 201 if (fftalga==FFT_SG2002 .and. fftalgb==1 .and. cplex==1) then 202 ABI_CHECK(nproc_fft == 1,"fftalg 41x does not support nproc_fft > 1") 203 204 n1half1=n1/2+1; n1halfm=(n1+1)/2 205 n2half1=n2/2+1 206 ! n4half1 or n5half1 are the odd integers >= n1half1 or n2half1 207 n4half1=(n1half1/2)*2+1 208 n5half1=(n2half1/2)*2+1 209 ABI_ALLOCATE(workr,(2,n4half1,n5,n6)) 210 ABI_ALLOCATE(workf,(2,n4,n6,n5half1)) 211 212 if (isign==1) then 213 do i3=1,n3 214 do i2=1,n2half1 215 base=n1*(i2-1+n2*(i3-1)) 216 do i1=1,n1 217 workf(1,i1,i3,i2)=fofg(1,i1+base) 218 workf(2,i1,i3,i2)=fofg(2,i1+base) 219 end do 220 end do 221 end do 222 223 !nd2proc=((n2-1)/nproc_fft) +1 224 !nd3proc=((n6-1)/nproc_fft) +1 225 226 ! change the call? n5half1 et n6 ? 227 call sg2002_back(cplex,ndat1,n1,n2,n3,n4,n5,n6,n4half1,n5half1,n6,2,workf,workr,comm_fft) 228 229 do i3=1,n3 230 do i2=1,n2 231 base=n1*(i2-1+n2*(i3-1)) 232 do i1=1,n1half1-1 233 ! copy data 234 fofr(2*i1-1+base)=workr(1,i1,i2,i3) 235 fofr(2*i1 +base)=workr(2,i1,i2,i3) 236 end do 237 ! If n1 odd, must add last data 238 if((2*n1half1-2)/=n1)then 239 fofr(n1+base)=workr(1,n1half1,i2,i3) 240 end if 241 end do 242 end do 243 244 else if (isign==-1) then 245 do i3=1,n3 246 do i2=1,n2 247 base=n1*(i2-1+n2*(i3-1)) 248 do i1=1,n1half1-1 249 workr(1,i1,i2,i3)=fofr(2*i1-1+base) 250 workr(2,i1,i2,i3)=fofr(2*i1 +base) 251 end do 252 ! If n1 odd, must add last data 253 if((2*n1half1-2)/=n1)then 254 workr(1,n1half1,i2,i3)=fofr(n1+base) 255 workr(2,n1half1,i2,i3)=zero 256 end if 257 end do 258 end do 259 260 call sg2002_forw(cplex,ndat1,n1,n2,n3,n4,n5,n6,n4half1,n5half1,n6,2,workr,workf,comm_fft) 261 262 ! Transfer fft output to the original fft box 263 do i3=1,n3 264 do i2=1,n2half1 265 base=n1*(i2-1+n2*(i3-1)) 266 do i1=1,n1 267 fofg(1,i1+base)=workf(1,i1,i3,i2)*xnorm 268 fofg(2,i1+base)=workf(2,i1,i3,i2)*xnorm 269 end do 270 end do 271 272 ! Complete missing values with complex conjugate 273 ! Inverse of ix is located at nx+2-ix , except for ix=1, for which it is 1. 274 if(n2half1>2)then 275 do i2=2,n2+1-n2half1 276 base=n1*((n2+2-i2)-1) 277 if(i3/=1)base=base+n1*n2*((n3+2-i3)-1) 278 fofg(1,1+base)= workf(1,1,i3,i2)*xnorm 279 fofg(2,1+base)=-workf(2,1,i3,i2)*xnorm 280 do i1=2,n1 281 fofg(1,n1+2-i1+base)= workf(1,i1,i3,i2)*xnorm 282 fofg(2,n1+2-i1+base)=-workf(2,i1,i3,i2)*xnorm 283 end do 284 end do 285 end if 286 end do 287 288 end if ! isign 289 ABI_DEALLOCATE(workr) 290 ABI_DEALLOCATE(workf) 291 end if 292 293 ! Here, one calls the complex-to-complex FFT subroutine 294 if( (fftalgb==0 .or. cplex==2) .and. fftalga/=4 )then 295 296 ABI_ALLOCATE(work1,(2,n4,n5,n6)) 297 ABI_ALLOCATE(work2,(2,n4,n5,n6)) 298 299 if (isign==1) then 300 301 ! Transfer fofg to the expanded fft box 302 !$OMP PARALLEL DO PRIVATE(base) 303 do i3=1,n3 304 do i2=1,n2 305 base=n1*(i2-1+n2*(i3-1)) 306 do i1=1,n1 307 work1(1,i1,i2,i3)=fofg(1,i1+base) 308 work1(2,i1,i2,i3)=fofg(2,i1+base) 309 end do 310 end do 311 end do 312 313 ! Call Stefan Goedecker C2C FFT 314 !call sg_fft_cc(fftcache,n1,n2,n3,n4,n5,n6,ndat1,isign,work1,work2) 315 call ccfft(ngfft,isign,n1,n2,n3,n4,n5,n6,ndat1,2,work1,work2,comm_fft) 316 317 ! Take data from expanded box and put it in the original box. 318 if (cplex==1) then 319 ! REAL case 320 !$OMP PARALLEL DO PRIVATE(base) 321 do i3=1,n3 322 do i2=1,n2 323 base=n1*(i2-1+n2*(i3-1)) 324 do i1=1,n1 325 fofr(i1+base)=work2(1,i1,i2,i3) 326 end do 327 end do 328 end do 329 330 else 331 ! COMPLEX case 332 !$OMP PARALLEL DO PRIVATE(base) 333 do i3=1,n3 334 do i2=1,n2 335 base=2*n1*(i2-1+n2*(i3-1)) 336 do i1=1,n1 337 fofr(2*i1-1+base)=work2(1,i1,i2,i3) 338 fofr(2*i1 +base)=work2(2,i1,i2,i3) 339 end do 340 end do 341 end do 342 end if 343 344 else if (isign==-1) then 345 346 ! Insert fofr into the augmented fft box 347 if (cplex==1) then 348 ! REAL case 349 !$OMP PARALLEL DO PRIVATE(base) 350 do i3=1,n3 351 do i2=1,n2 352 base=n1*(i2-1+n2*(i3-1)) 353 do i1=1,n1 354 ! copy data 355 work1(1,i1,i2,i3)=fofr(i1+base) 356 work1(2,i1,i2,i3)=zero 357 end do 358 end do 359 end do 360 else 361 ! COMPLEX case 362 !$OMP PARALLEL DO PRIVATE(base) 363 do i3=1,n3 364 do i2=1,n2 365 base=2*n1*(i2-1+n2*(i3-1)) 366 do i1=1,n1 367 ! copy data 368 work1(1,i1,i2,i3)=fofr(2*i1-1+base) 369 work1(2,i1,i2,i3)=fofr(2*i1 +base) 370 end do 371 end do 372 end do 373 end if ! cplex 374 375 ! Call Stefan Goedecker C2C FFT 376 !call sg_fft_cc(fftcache,n1,n2,n3,n4,n5,n6,ndat1,isign,work1,work2) 377 call ccfft(ngfft,isign,n1,n2,n3,n4,n5,n6,ndat1,2,work1,work2,comm_fft) 378 379 ! Transfer fft output to the original fft box 380 !$OMP PARALLEL DO PRIVATE(base) 381 do i3=1,n3 382 do i2=1,n2 383 base=n1*(i2-1+n2*(i3-1)) 384 do i1=1,n1 385 fofg(1,i1+base)=work2(1,i1,i2,i3)*xnorm 386 fofg(2,i1+base)=work2(2,i1,i2,i3)*xnorm 387 end do 388 end do 389 end do 390 391 end if ! isign 392 393 ABI_DEALLOCATE(work1) 394 ABI_DEALLOCATE(work2) 395 end if ! End simple algorithm 396 397 ! Here sophisticated algorithm based on S. Goedecker routines, only for the REAL case. 398 ! Take advantage of the fact that fofr is real, and that fofg has corresponding symmetry properties. 399 if( (fftalgb==1 .and. cplex==1) .and. fftalga/=4 )then 400 ABI_CHECK(nproc_fft==1,"nproc > 1 not supported") 401 ABI_CHECK(ndat1==1,"ndat > 1 not supported") 402 call sg_fft_rc(cplex,fofg,fofr,isign,nfft,ngfft) 403 end if 404 405 100 call timab(260+tim_fourdp,2,tsec) 406 407 end subroutine fourdp