TABLE OF CONTENTS
- ABINIT/m_dfti
- m_dfti/dfti_alloc_complex_dpc
- m_dfti/dfti_alloc_complex_spc
- m_dfti/dfti_alloc_real_dp
- m_dfti/dfti_c2c_ip_dpc
- m_dfti/dfti_c2c_ip_spc
- m_dfti/dfti_c2c_op_dpc
- m_dfti/dfti_c2c_op_spc
- m_dfti/dfti_c2r_op_dp
- m_dfti/dfti_c2r_op_dpc
- m_dfti/dfti_check_status
- m_dfti/dfti_fftpad_dp
- m_dfti/dfti_fftpad_dpc
- m_dfti/dfti_fftpad_spc
- m_dfti/dfti_fftrisc_dp
- m_dfti/dfti_fftrisc_mixprec
- m_dfti/dfti_fftrisc_sp
- m_dfti/dfti_fftug_dp
- m_dfti/dfti_fftug_dpc
- m_dfti/dfti_fftug_spc
- m_dfti/dfti_fftur_dp
- m_dfti/dfti_fftur_dpc
- m_dfti/dfti_fftur_spc
- m_dfti/dfti_many_dft_ip
- m_dfti/dfti_many_dft_op
- m_dfti/dfti_r2c_op_dp
- m_dfti/dfti_r2c_op_dpc
- m_dfti/dfti_seqfourdp
- m_dfti/dfti_seqfourwf
- m_dfti/dfti_spawn_threads_here
- m_dfti/dfti_use_lib_threads
ABINIT/m_dfti [ Modules ]
NAME
m_dfti
FUNCTION
This module provides wrappers for the MKL DFTI routines: in-place and out-of-place version.
COPYRIGHT
Copyright (C) 2009-2022 ABINIT group (MG) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
NOTES
1) MPI parallelism is not supported 2) For better performance the FFT divisions should contain small factors (/2, 3, 5, 7, 11, 13/) see http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_userguide_lnx/index.htm
SOURCE
21 #if defined HAVE_CONFIG_H 22 #include "config.h" 23 #endif 24 25 #include "abi_common.h" 26 27 #ifdef HAVE_DFTI 28 29 ! Include and generate MKL_DFTI module 30 #include "mkl_dfti.f90" 31 32 ! Macros for template files. 33 #define FFTLIB "DFTI" 34 #define FFT_PREF(name) CONCAT(dfti_,name) 35 #define SPAWN_THREADS_HERE(ndat, nthreads) dfti_spawn_threads_here(ndat, nthreads) 36 37 #define FFT_DOUBLE 1 38 #define FFT_SINGLE 2 39 #define FFT_MIXPREC 3 40 41 #endif 42 43 MODULE m_dfti 44 45 use defs_basis 46 use m_abicore 47 use m_errors 48 use m_xomp 49 use m_cgtools 50 use m_cplxtools 51 use m_fftcore 52 use, intrinsic :: iso_c_binding 53 #ifdef HAVE_DFTI 54 use MKL_DFTI 55 #endif 56 57 use m_fstrings, only : basename, strcat, int2char10, itoa, sjoin 58 use m_hide_blas, only : xcopy 59 use m_fft_mesh, only : zpad_t, zpad_init, zpad_free 60 61 implicit none 62 63 private 64 65 ! Entry points for client code 66 public :: dfti_seqfourdp ! 3D FFT of lengths nx, ny, nz. Mainly used for densities or potentials. 67 public :: dfti_seqfourwf ! FFT transform of wavefunctions (high-level interface). 68 public :: dfti_fftrisc 69 public :: dfti_fftrisc_mixprec ! Mixed precision version of fftrisc: input/output in dp, computation done in sp. 70 public :: dfti_fftug ! G-->R, 3D zero-padded FFT of lengths nx, ny, nz. Mainly used for wavefunctions 71 public :: dfti_fftur ! R-->G, 3D zero-padded FFT of lengths nx, ny, nz. Mainly used for wavefunctions 72 73 ! Low-level routines. 74 public :: dfti_r2c_op ! Real to complex transform (out-of-place version). 75 public :: dfti_c2r_op ! Complex to real transform (out-of-place version). 76 public :: dfti_c2c_op ! complex to complex transform (out-of-place version). 77 public :: dfti_c2c_ip ! complex to complex transform (in-place version). 78 public :: dfti_many_dft_op ! Driver routine for many out-of-place 3D complex-to-complex FFTs. 79 public :: dfti_many_dft_ip ! Driver routine for many in-place 3D complex-to-complex FFTs. 80 public :: dfti_fftpad ! Driver routines for zero-padded FFT of wavefunctions. 81 82 !FIXME I don't know why gcc does not recognize this one 83 ! Perhaps I have to provide an interfaces for fofr(:,:,:,:) 84 public :: dfti_fftpad_dp ! Driver routines for zero-padded FFT of wavefunctions. 85 public :: dfti_fftug_dp ! Driver routines for zero-padded FFT of wavefunctions. 86 public :: dfti_use_lib_threads
m_dfti/dfti_alloc_complex_dpc [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_alloc_complex_dpc
FUNCTION
INPUTS
OUTPUT
SOURCE
2289 #ifdef HAVE_DFTI 2290 2291 subroutine dfti_alloc_complex_dpc(size, cptr, fptr) 2292 2293 !Arguments ------------------------------------ 2294 !scalars 2295 integer,intent(in) :: size 2296 complex(dpc),ABI_CONTIGUOUS pointer :: fptr(:) 2297 type(C_PTR),intent(out) :: cptr 2298 !arrays 2299 2300 ! ************************************************************************* 2301 2302 cptr = mkl_malloc( INT(2*size*C_DOUBLE, KIND=C_SIZE_T), DFTI_DEFAULT_ALIGNMENT_DP) 2303 if (.not. C_ASSOCIATED(cptr)) then 2304 ABI_ERROR("mkl_malloc returned NULL!") 2305 end if 2306 2307 call c_f_pointer(cptr, fptr, [size]) 2308 2309 end subroutine dfti_alloc_complex_dpc
m_dfti/dfti_alloc_complex_spc [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_alloc_complex_spc
FUNCTION
INPUTS
OUTPUT
SOURCE
2251 #ifdef HAVE_DFTI 2252 2253 subroutine dfti_alloc_complex_spc(size, cptr, fptr) 2254 2255 !Arguments ------------------------------------ 2256 !scalars 2257 integer,intent(in) :: size 2258 complex(spc),ABI_CONTIGUOUS pointer :: fptr(:) 2259 type(C_PTR),intent(out) :: cptr 2260 2261 ! ************************************************************************* 2262 2263 cptr = mkl_malloc( INT(2*size*C_FLOAT, KIND=C_SIZE_T), DFTI_DEFAULT_ALIGNMENT_SP) 2264 if (.not. C_ASSOCIATED(cptr)) then 2265 ABI_ERROR("mkl_malloc returned NULL!") 2266 end if 2267 2268 call c_f_pointer(cptr, fptr, [size]) 2269 2270 end subroutine dfti_alloc_complex_spc
m_dfti/dfti_alloc_real_dp [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_alloc_real_dp
FUNCTION
INPUTS
OUTPUT
SOURCE
2212 #ifdef HAVE_DFTI 2213 2214 subroutine dfti_alloc_real_dp(size, cptr, fptr) 2215 2216 !Arguments ------------------------------------ 2217 !scalars 2218 integer,intent(in) :: size 2219 real(dp),ABI_CONTIGUOUS pointer :: fptr(:) 2220 type(C_PTR),intent(out) :: cptr 2221 !arrays 2222 2223 ! ************************************************************************* 2224 2225 cptr = mkl_malloc( INT(size*C_DOUBLE, KIND=C_SIZE_T), DFTI_DEFAULT_ALIGNMENT_DP) 2226 if (.not. C_ASSOCIATED(cptr)) then 2227 ABI_ERROR("mkl_malloc returned NULL!") 2228 end if 2229 2230 call c_f_pointer(cptr, fptr, [size]) 2231 2232 end subroutine dfti_alloc_real_dp
m_dfti/dfti_c2c_ip_dpc [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_c2c_ip_dpc
FUNCTION
Driver routine for in-place 3D complex-complex FFT. TARGET: DPC arrays
INPUTS
nx,ny,nz=Number of points along the three directions. ldx,ldy,ldz=Physical dimensions of the array. ndat=Number of FFTs to be done. iscale=0 if G --> R FFT should not be scaled. isign= +1 : ff(G) => ff(R); -1 : ff(R) => ff(G)
SIDE EFFECTS
ff(ldx*ldy*ldz*ndat)= In input: the complex array to be transformed. In output: the Fourier transformed in the space specified by isign.
SOURCE
1346 subroutine dfti_c2c_ip_dpc(nx, ny, nz, ldx, ldy, ldz, ndat, iscale, isign, ff) 1347 1348 !Arguments ------------------------------------ 1349 !scalars 1350 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,iscale,isign 1351 !arrays 1352 complex(dpc),intent(inout) :: ff(ldx*ldy*ldz*ndat) 1353 1354 ! ************************************************************************* 1355 1356 ! Include Fortran template 1357 #undef DEV_DFTI_PRECISION 1358 #define DEV_DFTI_PRECISION DFTI_DOUBLE 1359 1360 #include "dfti_c2c_ip.finc" 1361 1362 end subroutine dfti_c2c_ip_dpc
m_dfti/dfti_c2c_ip_spc [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_c2c_ip_spc
FUNCTION
Driver routine for in-place 3D complex-complex FFT. TARGET: SPC arrays
INPUTS
nx,ny,nz=Number of points along the three directions. ldx,ldy,ldz=Physical dimensions of the array. ndat=Number of FFTs to be done. isign= +1 : ff(G) => ff(R); -1 : ff(R) => ff(G)
SIDE EFFECTS
ff(ldx*ldy*ldz*ndat)= In input: the complex array to be transformed. In output: the Fourier transform in the space specified by isign.
SOURCE
1304 subroutine dfti_c2c_ip_spc(nx, ny, nz, ldx, ldy, ldz, ndat, iscale, isign, ff) 1305 1306 !Arguments ------------------------------------ 1307 !scalars 1308 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,iscale,isign 1309 !arrays 1310 complex(spc),intent(inout) :: ff(ldx*ldy*ldz*ndat) 1311 1312 ! ************************************************************************* 1313 1314 ! Include Fortran template 1315 #undef DEV_DFTI_PRECISION 1316 #define DEV_DFTI_PRECISION DFTI_SINGLE 1317 1318 #include "dfti_c2c_ip.finc" 1319 1320 end subroutine dfti_c2c_ip_spc
m_dfti/dfti_c2c_op_dpc [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_c2c_op_dpc
FUNCTION
Driver routine for out-of-place 3D complex-complex FFT of lengths nx, ny, nz. TARGET: DPC arrays
INPUTS
nx,ny,nz=Number of points along the three directions. ldx,ldy,ldz=Physical dimensions of the array. ndat=Number of FFTs to be done. iscale=0 if G --> R FFT should not be scaled. isign= +1 : ff(G) => gg(R); -1 : ff(R) => gg(G) ff(ldx*ldy*ldz*ndat)=The array to be transformed.
OUTPUT
gg(ldx*ldy*ldz*ndat)=The FFT of ff.
SOURCE
1431 subroutine dfti_c2c_op_dpc(nx, ny, nz, ldx, ldy, ldz, ndat, iscale, isign, ff, gg) 1432 1433 !Arguments ------------------------------------ 1434 !scalars 1435 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,isign,ndat,iscale 1436 !arrays 1437 complex(dpc),intent(in) :: ff(ldx*ldy*ldz*ndat) 1438 complex(dpc),intent(out) :: gg(ldx*ldy*ldz*ndat) 1439 1440 ! ************************************************************************* 1441 1442 ! Include Fortran template 1443 #undef DEV_DFTI_PRECISION 1444 #define DEV_DFTI_PRECISION DFTI_DOUBLE 1445 1446 #include "dfti_c2c_op.finc" 1447 1448 end subroutine dfti_c2c_op_dpc
m_dfti/dfti_c2c_op_spc [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_c2c_op_spc
FUNCTION
Driver routine for out-of-place 3D complex-complex FFT of lengths nx, ny, nz. TARGET: spc arrays
INPUTS
nx,ny,nz=Number of points along the three directions. ldx,ldy,ldz=Physical dimensions of the array. ndat=Number of FFTs to be done. iscale=0 if G --> R FFT should not be scaled. isign= +1 : ff(G) => gg(R); -1 : ff(R) => gg(G) ff(ldx*ldy*ldz*ndat)=The array to be transformed.
OUTPUT
gg(ldx*ldy*ldz*ndat)=The FFT of ff.
SOURCE
1388 subroutine dfti_c2c_op_spc(nx, ny, nz, ldx, ldy, ldz, ndat, iscale, isign, ff, gg) 1389 1390 !Arguments ------------------------------------ 1391 !scalars 1392 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,isign,ndat, iscale 1393 !arrays 1394 complex(spc),intent(in) :: ff(ldx*ldy*ldz*ndat) 1395 complex(spc),intent(out) :: gg(ldx*ldy*ldz*ndat) 1396 1397 ! ************************************************************************* 1398 1399 ! Include Fortran template 1400 #undef DEV_DFTI_PRECISION 1401 #define DEV_DFTI_PRECISION DFTI_SINGLE 1402 1403 #include "dfti_c2c_op.finc" 1404 1405 end subroutine dfti_c2c_op_spc
m_dfti/dfti_c2r_op_dp [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_c2r_op_dp
FUNCTION
Driver routine for out-of-place 3D complex-to-real FFT of lengths nx, ny, nz.
INPUTS
nx,ny,nz=Number of point along the three directions. ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts). ndat=Number of FFTs to be done. ff(2,ldx*ldy*ldz*ndat)=The complex array to be transformed.
OUTPUT
gg(ldx*ldy*ldz*ndat)=The backwards real FFT of ff.
SOURCE
2044 subroutine dfti_c2r_op_dp(nx, ny, nz, ldx, ldy, ldz, ndat, ff, gg) 2045 2046 !Arguments ------------------------------------ 2047 !scalars 2048 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat 2049 !arrays 2050 real(dp),target,intent(in) :: ff(2*ldx*ldy*ldz*ndat) 2051 real(dp),intent(inout) :: gg(ldx*ldy*ldz*ndat) !vz_i 2052 2053 #ifdef HAVE_DFTI 2054 !Local variables------------------------------- 2055 !scalars 2056 type(C_ptr) :: ff_cptr 2057 !arrays 2058 complex(dpc),ABI_CONTIGUOUS pointer :: ff_fptr(:) 2059 2060 ! ************************************************************************* 2061 2062 ff_cptr = C_loc(ff) 2063 call C_F_pointer(ff_cptr,ff_fptr, shape=(/ldx*ldy*ldz*ndat/)) 2064 2065 call dfti_c2r_op_dpc(nx,ny,nz,ldx,ldy,ldz,ndat,ff_fptr,gg) 2066 2067 #else 2068 ABI_ERROR("FFT_DFTI support not activated") 2069 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz/)) 2070 ABI_UNUSED((/ff(1),gg(1)/)) 2071 #endif 2072 2073 end subroutine dfti_c2r_op_dp
m_dfti/dfti_c2r_op_dpc [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_c2r_op_dpc
FUNCTION
Driver routine for out-of-place 3D complex-to-real FFT of lengths nx, ny, nz.
INPUTS
nx,ny,nz=Number of point along the three directions. ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts). ndat=Number of FFTs to be done. ff(2,ldx*ldy*ldz*ndat)=The complex array to be transformed.
OUTPUT
gg(2,ldx*ldy*ldz*ndat)=The backwards real FFT of ff.
SOURCE
1947 subroutine dfti_c2r_op_dpc(nx, ny, nz, ldx, ldy, ldz, ndat, ff, gg) 1948 1949 !Arguments ------------------------------------ 1950 !scalars 1951 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat 1952 !arrays 1953 complex(dpc),intent(in) :: ff(ldx*ldy*ldz*ndat) 1954 real(dp),intent(out) :: gg(ldx*ldy*ldz*ndat) 1955 1956 #ifdef HAVE_DFTI 1957 !Local variables------------------------------- 1958 !scalars 1959 integer :: status,nhp,padx,i2,i3,igp,igf,idat,padatf,padatp,ii 1960 type(DFTI_DESCRIPTOR),pointer :: Desc 1961 type(C_PTR) :: cptr 1962 !arrays 1963 complex(dpc),ABI_CONTIGUOUS pointer :: ff_hp(:) 1964 1965 ! ************************************************************************* 1966 1967 !stride = 1 1968 !idist = nhp 1969 !odist = nx*ny*nz 1970 !n = (/nx,ny,nz/) 1971 !inembed = (/(nx/2+1),ny,nz/) 1972 !onembed = (/nx,ny,nz/) ! check this 1973 !my_plan = retrieve_plan3(n,ndat,inembed,stride,idist,onembed,stride,odist,FFTW_BACKWARD,my_flags,Saved_plans) 1974 1975 ! Fill the Hermitian part: Hermitian redundancy: out[i] is the conjugate of out[n-i] 1976 padx = (nx/2+1) 1977 nhp = padx*ny*nz 1978 1979 call dfti_alloc_complex(nhp*ndat,cptr,ff_hp) 1980 1981 !!$OMP PARALLEL DO PRIVATE(padatf,padatp,igf,igp) 1982 do idat=1,ndat 1983 padatf=(idat-1)*ldx*ldy*ldz 1984 padatp=(idat-1)*padx*ny*nz 1985 do i3=1,nz 1986 do i2=1,ny 1987 igf = (i3-1)*ldx*ldy + (i2-1)*ldx + padatf 1988 igp = (i3-1)*padx*ny + (i2-1)*padx + padatp 1989 ff_hp(igp+1:igp+padx) = ff(igf+1:igf+padx) 1990 end do 1991 end do 1992 end do 1993 1994 status = DftiCreateDescriptor(Desc, DFTI_DOUBLE, DFTI_REAL, 3, (/nx,ny,nz/) ) 1995 DFTI_CHECK(status) 1996 1997 status = DftiSetValue(Desc, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX) 1998 status = DftiSetValue(Desc, DFTI_PLACEMENT, DFTI_NOT_INPLACE ) 1999 status = DftiSetValue(Desc, DFTI_INPUT_STRIDES, (/0, 1, padx, padx*ny/)) 2000 status = DftiSetValue(Desc, DFTI_INPUT_DISTANCE, nhp) 2001 status = DftiSetValue(Desc, DFTI_OUTPUT_STRIDES, (/0, 1, ldx, ldx*ldy/)) 2002 status = DftiSetValue(Desc, DFTI_OUTPUT_DISTANCE, ldx*ldy*ldz) 2003 status = DftiSetValue(Desc, DFTI_NUMBER_OF_TRANSFORMS, ndat) 2004 DFTI_CHECK(status) 2005 2006 DFTI_CHECK( DftiCommitDescriptor(Desc) ) 2007 2008 DFTI_CHECK( DftiComputeBackward(Desc, ff_hp, gg) ) 2009 2010 DFTI_CHECK( DftiFreeDescriptor(Desc) ) 2011 2012 call dfti_free(cptr) 2013 2014 #else 2015 ABI_ERROR("FFT_DFTI support not activated") 2016 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz/)) 2017 ABI_UNUSED(ff(1)) 2018 ABI_UNUSED(gg(1)) 2019 #endif 2020 2021 end subroutine dfti_c2r_op_dpc
m_dfti/dfti_check_status [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_check_status
FUNCTION
Error handler for DFTI wrappers. Print error message and abort.
INPUTS
status = status error reported by dfti. [file] = file name [line] = line number
SOURCE
2092 #ifdef HAVE_DFTI 2093 2094 subroutine dfti_check_status(status, file, line) 2095 2096 !Arguments ------------------------------------ 2097 !scalars 2098 integer,intent(in) :: status 2099 integer,optional,intent(in) :: line 2100 character(len=*),optional,intent(in) :: file 2101 2102 !Local variables------------------------------- 2103 integer :: f90line 2104 character(len=10) :: lnum 2105 character(len=500) :: f90name 2106 character(len=500) :: my_msg 2107 character(len=DFTI_MAX_MESSAGE_LENGTH+500) :: err_msg 2108 2109 ! ************************************************************************* 2110 2111 if (PRESENT(line)) then 2112 f90line=line 2113 else 2114 f90line=0 2115 end if 2116 call int2char10(f90line,lnum) 2117 2118 if (PRESENT(file)) then 2119 f90name = basename(file) 2120 else 2121 f90name='Subroutine Unknown' 2122 end if 2123 2124 my_msg=strcat(f90name,":",lnum,":") 2125 2126 if (status /= 0) then 2127 if (.not. DftiErrorClass(status, DFTI_NO_ERROR)) then 2128 err_msg = strcat(my_msg," Error: ",DftiErrorMessage(status)) 2129 ABI_ERROR(err_msg) 2130 end if 2131 end if 2132 2133 end subroutine dfti_check_status
m_dfti/dfti_fftpad_dp [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_fftpad_dp
FUNCTION
This routine transforms wavefunctions using 3D zero-padded FFTs with DFTI. The 3D ffts are computed only on lines and planes which have non zero elements (see zpad_init) FFT transform is in-place.
INPUTS
nx,ny,nz=Logical dimensions of the FFT mesh. ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts). ndat=Numer of FFTs mgfft=MAX(nx,ny,nz), only used to dimension gbound isign=The sign of the transform. gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point. See sphereboundary for more info.
SIDE EFFECTS
ff(2*ldx*ldy*ldz*ndat)= input: The array with the data to be transformed. output: The results of the FFT.
SOURCE
1599 subroutine dfti_fftpad_dp(ff, nx, ny, nz, ldx, ldy, ldz, ndat, mgfft, isign, gbound, iscale) 1600 1601 !Arguments ------------------------------------ 1602 !scalars 1603 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign 1604 !arrays 1605 integer,intent(in) :: gbound(2*mgfft+8,2) 1606 real(dp),target,intent(inout) :: ff(2*ldx*ldy*ldz*ndat) 1607 integer,optional,intent(in) :: iscale 1608 1609 !Local variables------------------------------- 1610 #ifdef HAVE_DFTI 1611 !scalars 1612 type(C_ptr) :: cptr 1613 integer :: iscale__ 1614 !arrays 1615 complex(dpc),ABI_CONTIGUOUS pointer :: fptr(:) 1616 1617 ! ************************************************************************* 1618 1619 iscale__ = merge(1, 0, isign == -1); if (present(iscale)) iscale__ = iscale 1620 1621 ! Associate complex fptr with real ff via the C pointer 1622 cptr = C_loc(ff) 1623 call C_F_pointer(cptr, fptr, SHAPE=[ldx*ldy*ldz*ndat]) 1624 1625 ! Call complex version --> a lot of boilerplate code avoided 1626 call dfti_fftpad_dpc(fptr, nx, ny, nz, ldx, ldy, ldz, ndat, mgfft, isign, gbound) 1627 1628 #else 1629 ABI_ERROR("FFT_DFTI support not activated") 1630 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,iscale/)) 1631 ABI_UNUSED(gbound(1,1)) 1632 ABI_UNUSED(ff(1)) 1633 #endif 1634 1635 end subroutine dfti_fftpad_dp
m_dfti/dfti_fftpad_dpc [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_fftpad_dpc
FUNCTION
This routine transforms wavefunctions using 3D zero-padded FFTs with DFTI. The 3D ffts are computed only on lines and planes which have non zero elements (see zpad_init) FFT transform is in-place. Target: complex double-precision arrays.
INPUTS
nx,ny,nz=Logical dimensions of the FFT mesh. ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts). ndat=Number of FFTs. mgfft=MAX(nx,ny,nz), only used to dimension gbound isign=The sign of the transform. gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point. See sphereboundary for more info.
SIDE EFFECTS
ff(ldx*ldy*ldz*ndat)= input: The array with the data to be transformed. output: The results of the FFT.
SOURCE
1665 subroutine dfti_fftpad_dpc(ff, nx, ny, nz, ldx, ldy, ldz, ndat, mgfft, isign, gbound, iscale) 1666 1667 !Arguments ------------------------------------ 1668 !scalars 1669 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign 1670 integer,optional,intent(in) :: iscale 1671 !arrays 1672 integer,intent(in) :: gbound(2*mgfft+8,2) 1673 complex(dpc),intent(inout) :: ff(ldx*ldy*ldz*ndat) 1674 1675 !Local variables------------------------------- 1676 #ifdef HAVE_DFTI 1677 1678 ! ************************************************************************* 1679 1680 ! Include Fortran template 1681 #undef DEV_DFTI_PRECISION 1682 #define DEV_DFTI_PRECISION DFTI_DOUBLE 1683 1684 #include "dfti_fftpad.finc" 1685 1686 #else 1687 ABI_ERROR("FFT_DFTI support not activated") 1688 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,iscale/)) 1689 ABI_UNUSED(gbound(1,1)) 1690 ABI_UNUSED(ff(1)) 1691 #endif 1692 1693 end subroutine dfti_fftpad_dpc
m_dfti/dfti_fftpad_spc [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_fftpad_spc
FUNCTION
This routine transforms wavefunctions using 3D zero-padded FFTs with DFTI. The 3D ffts are computed only on lines and planes which have non zero elements (see zpad_init) FFT transform is in-place. Target: complex SPC arrays.
INPUTS
nx,ny,nz=Logical dimensions of the FFT mesh. ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts). ndat=Number of FFTs. mgfft=MAX(nx,ny,nz), only used to dimension gbound isign=The sign of the transform. gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point. See sphereboundary for more info.
SIDE EFFECTS
ff(ldx*ldy*ldz*ndat)= input: The array with the data to be transformed. output: The results of the FFT.
SOURCE
1723 subroutine dfti_fftpad_spc(ff, nx, ny, nz, ldx, ldy, ldz, ndat, mgfft, isign, gbound, iscale) 1724 1725 !Arguments ------------------------------------ 1726 !scalars 1727 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign 1728 integer,optional,intent(in) :: iscale 1729 !arrays 1730 integer,intent(in) :: gbound(2*mgfft+8,2) 1731 complex(spc),intent(inout) :: ff(ldx*ldy*ldz*ndat) 1732 1733 #ifdef HAVE_DFTI 1734 1735 ! Include Fortran template 1736 #undef DEV_DFTI_PRECISION 1737 #define DEV_DFTI_PRECISION DFTI_SINGLE 1738 1739 #include "dfti_fftpad.finc" 1740 1741 #else 1742 ABI_ERROR("FFT_DFTI support not activated") 1743 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,iscale/)) 1744 ABI_UNUSED(gbound(1,1)) 1745 ABI_UNUSED(ff(1)) 1746 #endif 1747 1748 end subroutine dfti_fftpad_spc
m_dfti/dfti_fftrisc_dp [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_fftrisc_dp
FUNCTION
Carry out Fourier transforms between real and reciprocal (G) space, for wavefunctions, contained in a sphere in reciprocal space, in both directions. Also accomplish some post-processing.
NOTES
Specifically uses rather sophisticated algorithms, based on S Goedecker routines, specialized for superscalar RISC architecture. Zero padding : saves 7/12 execution time Bi-dimensional data locality in most of the routine : cache reuse For k-point (0 0 0) : takes advantage of symmetry of data. Note however that no blocking is used, in both 1D z-transform or subsequent 2D transform. This should be improved.
INPUTS
cplex= if 1 , denpot is real, if 2 , denpot is complex (cplex=2 only allowed for option=2 when istwf_k=1) one can also use cplex=0 if option=0 or option=3 fofgin(2,npwin)=holds input wavefunction in G vector basis sphere. gboundin(2*mgfft+8,2)=sphere boundary info for reciprocal to real space gboundout(2*mgfft+8,2)=sphere boundary info for real to reciprocal space istwf_k=option parameter that describes the storage of wfs kg_kin(3,npwin)=reduced planewave coordinates, input kg_kout(3,npwout)=reduced planewave coordinates, output mgfft=maximum size of 1D FFTs ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft npwin=number of elements in fofgin array (for option 0, 1 and 2) npwout=number of elements in fofgout array (for option 2 and 3) ldx,ldy,ldz=ngfft(4),ngfft(5),ngfft(6), dimensions of fofr. option= if 0: do direct FFT if 1: do direct FFT, then sum the density if 2: do direct FFT, multiply by the potential, then do reverse FFT if 3: do reverse FFT only weight=weight to be used for the accumulation of the density in real space (needed only when option=1)
OUTPUT
(see side effects)
OPTIONS
The different options are: - reciprocal to real space and output the result (when option=0), - reciprocal to real space and accumulate the density (when option=1) or - reciprocal to real space, apply the local potential to the wavefunction in real space and produce the result in reciprocal space (when option=2) - real space to reciprocal space (when option=3). option=0 IS NOT ALLOWED when istwf_k>2 option=3 IS NOT ALLOWED when istwf_k>=2
SIDE EFFECTS
for option==0, fofgin(2,npwin)=holds input wavefunction in G sphere; fofr(2,ldx,ldy,ldz) contains the Fourier Transform of fofgin; no use of denpot, fofgout and npwout. for option==1, fofgin(2,npwin)=holds input wavefunction in G sphere; denpot(cplex*ldx,ldy,ldz) contains the input density at input, and the updated density at output; no use of fofgout and npwout. for option==2, fofgin(2,npwin)=holds input wavefunction in G sphere; denpot(cplex*ldx,ldy,ldz) contains the input local potential; fofgout(2,npwout) contains the output function; for option==3, fofr(2,ldx,ldy,ldz) contains the real space wavefunction; fofgout(2,npwout) contains its Fourier transform; no use of fofgin and npwin.
SOURCE
700 subroutine dfti_fftrisc_dp(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,& 701 mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option, & 702 weight_r, weight_i, abi_convention, iscale) 703 704 !Arguments ------------------------------------ 705 !scalars 706 integer,intent(in) :: cplex,istwf_k,mgfft,ldx,ldy,ldz,npwin,npwout,option 707 real(dp),intent(in) :: weight_i,weight_r 708 !arrays 709 integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2) 710 integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18) 711 real(dp),intent(in) :: fofgin(2,npwin) 712 real(dp),intent(inout) :: denpot(cplex*ldx,ldy,ldz) 713 real(dp),intent(inout) :: fofr(2,ldx*ldy*ldz) 714 real(dp),intent(inout) :: fofgout(2,npwout) !vz_i 715 logical,optional,intent(in) :: abi_convention 716 integer,optional,intent(in) :: iscale 717 718 ! ************************************************************************* 719 720 #ifdef HAVE_DFTI 721 722 #undef FFT_PRECISION 723 #undef MYKIND 724 #undef MYCZERO 725 #undef MYCMPLX 726 #undef MYCONJG 727 728 #define FFT_PRECISION DFTI_DOUBLE 729 #define MYKIND DPC 730 #define MYCZERO (0._dp,0._dp) 731 #define MYCMPLX DCMPLX 732 #define MYCONJG DCONJG 733 734 #include "dfti_fftrisc.finc" 735 736 #else 737 ABI_ERROR("DFTI support not activated") 738 ABI_UNUSED((/cplex,gboundin(1,1),gboundout(1,1),istwf_k,kg_kin(1,1),kg_kout(1,1),iscale/)) 739 ABI_UNUSED((/mgfft,ngfft(1),npwin,npwout,ldx,ldy,ldz,option/)) 740 ABI_UNUSED((/denpot(1,1,1),fofgin(1,1),fofgout(1,1),fofr(1,1),weight_r,weight_i/)) 741 ABI_UNUSED(abi_convention) 742 #endif 743 744 end subroutine dfti_fftrisc_dp
m_dfti/dfti_fftrisc_mixprec [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_fftrisc_mixprec
FUNCTION
Carry out Fourier transforms between real and reciprocal (G) space, for wavefunctions, contained in a sphere in reciprocal space, in both directions. Also accomplish some post-processing. This is the Mixed Precision version (dp in input, FFT done with sp data, output is dp) See dfti_fftrisc_dp for API doc.
SOURCE
762 subroutine dfti_fftrisc_mixprec(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,& 763 mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,& 764 weight_r,weight_i, abi_convention, iscale) ! optional 765 766 !Arguments ------------------------------------ 767 !scalars 768 integer,intent(in) :: cplex,istwf_k,mgfft,ldx,ldy,ldz,npwin,npwout,option 769 real(dp),intent(in) :: weight_i,weight_r 770 !arrays 771 integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2) 772 integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18) 773 real(dp),intent(in) :: fofgin(2,npwin) 774 real(dp),intent(inout) :: denpot(cplex*ldx,ldy,ldz) 775 real(dp),intent(inout) :: fofr(2,ldx*ldy*ldz) 776 real(dp),intent(inout) :: fofgout(2,npwout) !vz_i 777 logical,optional,intent(in) :: abi_convention 778 integer,optional,intent(in) :: iscale 779 780 ! ************************************************************************* 781 782 #ifdef HAVE_DFTI 783 784 #undef FFT_PRECISION 785 #undef MYKIND 786 #undef MYCZERO 787 #undef MYCMPLX 788 #undef MYCONJG 789 790 #define FFT_PRECISION DFTI_SINGLE 791 #define MYKIND SPC 792 #define MYCZERO (0._sp,0._sp) 793 #define MYCMPLX CMPLX 794 #define MYCONJG CONJG 795 796 #define HAVE_DFTI_MIXED_PRECISION 1 797 798 #include "dfti_fftrisc.finc" 799 800 #undef HAVE_DFTI_MIXED_PRECISION 801 802 #else 803 ABI_ERROR("DFTI support not activated") 804 ABI_UNUSED((/cplex,gboundin(1,1),gboundout(1,1),istwf_k,kg_kin(1,1),kg_kout(1,1),iscale/)) 805 ABI_UNUSED((/mgfft,ngfft(1),npwin,npwout,ldx,ldy,ldz,option/)) 806 ABI_UNUSED((/denpot(1,1,1),fofgin(1,1),fofgout(1,1),fofr(1,1),weight_r,weight_i/)) 807 ABI_UNUSED(abi_convention) 808 #endif 809 810 end subroutine dfti_fftrisc_mixprec
m_dfti/dfti_fftrisc_sp [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_fftrisc_sp
FUNCTION
Carry out Fourier transforms between real and reciprocal (G) space, for wavefunctions, contained in a sphere in reciprocal space, in both directions. Also accomplish some post-processing. See dfti_fftrisc_dp for API doc.
SOURCE
580 subroutine dfti_fftrisc_sp(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,& 581 mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option, & 582 weight_r,weight_i, abi_convention, iscale) 583 584 !Arguments ------------------------------------ 585 !scalars 586 integer,intent(in) :: cplex,istwf_k,mgfft,ldx,ldy,ldz,npwin,npwout,option 587 real(dp),intent(in) :: weight_i,weight_r 588 !arrays 589 integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2) 590 integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18) 591 real(sp),intent(in) :: fofgin(2,npwin) 592 real(dp),intent(inout) :: denpot(cplex*ldx,ldy,ldz) 593 real(sp),intent(inout) :: fofr(2,ldx*ldy*ldz) 594 real(sp),intent(inout) :: fofgout(2,npwout) !vz_i 595 logical,optional,intent(in) :: abi_convention 596 integer,optional,intent(in) :: iscale 597 598 ! ************************************************************************* 599 600 #ifdef HAVE_DFTI 601 602 #undef FFT_PRECISION 603 #undef MYKIND 604 #undef MYCZERO 605 #undef MYCMPLX 606 #undef MYCONJG 607 608 #define FFT_PRECISION DFTI_SINGLE 609 #define MYKIND SPC 610 #define MYCZERO (0._sp,0._sp) 611 #define MYCMPLX CMPLX 612 #define MYCONJG CONJG 613 614 #include "dfti_fftrisc.finc" 615 616 #else 617 ABI_ERROR("DFTI support not activated") 618 ABI_UNUSED((/cplex,gboundin(1,1),gboundout(1,1),istwf_k,kg_kin(1,1),kg_kout(1,1),iscale/)) 619 ABI_UNUSED((/mgfft,ngfft(1),npwin,npwout,ldx,ldy,ldz,option/)) 620 ABI_UNUSED((/denpot(1,1,1),weight_r,weight_i/)) 621 ABI_UNUSED((/fofgin(1,1),fofgout(1,1),fofr(1,1)/)) 622 ABI_UNUSED(abi_convention) 623 #endif 624 625 end subroutine dfti_fftrisc_sp
m_dfti/dfti_fftug_dp [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_fftug_dp
FUNCTION
Compute ndat zero-padded FFTs from G to R space. Mainly used for the transform of wavefunctions. TARGET: dp arrays with real and imaginary part
INPUTS
fftalg=FFT algorith (see input variable) fftcache=size of the cache (kB) npw_k=number of plane waves for this k-point. nx,ny,nz=Number of point along the three directions. ldx,ldy,ldz=Leading dimensions of the array. ndat=Number of transforms istwf_k=Option describing the storage of the wavefunction. mgfft=Max number of FFT divisions (used to dimension gbound) kg_k(3,npw_k)=G-vectors in reduced coordinates gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary. ug(npw_k*ndat)=wavefunctions in reciprocal space.
OUTPUT
ur(ldx*ldy*ldz*ndat)=wavefunctions in real space.
SOURCE
842 subroutine dfti_fftug_dp(fftalg, fftcache, npw_k, nx, ny, nz, ldx, ldy, ldz, ndat, & 843 istwf_k, mgfft, kg_k, gbound, ug, ur, & 844 isign, iscale) ! optional 845 846 !Arguments ------------------------------------ 847 !scalars 848 integer,intent(in) :: fftalg,fftcache 849 integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft 850 !arrays 851 integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k) 852 real(dp),target,intent(in) :: ug(2*npw_k*ndat) 853 real(dp),target,intent(inout) :: ur(2*ldx*ldy*ldz*ndat) 854 integer,optional,intent(in) :: isign, iscale 855 856 #ifdef HAVE_DFTI 857 !Local variables------------------------------- 858 integer,parameter :: dist=2 859 integer :: iscale__, isign__ 860 real(dp) :: fofgout(2,0) 861 real(dp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:) 862 863 ! ************************************************************************* 864 865 iscale__ = 0; if (present(iscale)) iscale__ = iscale 866 isign__ = +1; if (present(isign)) isign__ = isign 867 868 #undef TK_PREF 869 #define TK_PREF(name) CONCAT(cg_,name) 870 871 #undef FFT_PRECISION 872 #define FFT_PRECISION FFT_DOUBLE 873 874 #include "fftug.finc" 875 876 #undef FFT_PRECISION 877 878 #else 879 ! Silence compiler warning 880 ABI_ERROR("FFT_DFTI support not activated") 881 ABI_UNUSED((/fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1), isign, iscale/)) 882 ABI_UNUSED((/ug(1),ur(1)/)) 883 #endif 884 885 end subroutine dfti_fftug_dp
m_dfti/dfti_fftug_dpc [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_fftug_dpc
FUNCTION
Compute ndat zero-padded FFTs from G ro R. Mainly used for the transform of wavefunctions. TARGET: DPC arrays
INPUTS
fftalg=FFT algorith (see input variable) fftcache=size of the cache (kB) npw_k=number of plane waves for this k-point. nx,ny,nz=Number of point along the three directions. ldx,ldy,ldz=Leading dimensions of the array. ndat=Number of transforms istwf_k=Option describing the storage of the wavefunction. mgfft=Max number of FFT divisions (used to dimension gbound) kg_k(3,npw_k)=G-vectors in reduced coordinates gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary. ug(npw_k*ndat)=wavefunctions in reciprocal space
OUTPUT
ur(ldx*ldy*ldz*ndat)=wavefunctions in real space.
SOURCE
993 subroutine dfti_fftug_dpc(fftalg, fftcache, npw_k, nx, ny, nz, ldx, ldy, ldz, ndat, & 994 istwf_k, mgfft, kg_k, gbound, ug, ur, & 995 isign, iscale) ! optional 996 997 !Arguments ------------------------------------ 998 !scalars 999 integer,intent(in) :: fftalg,fftcache 1000 integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft 1001 !arrays 1002 integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k) 1003 complex(dpc),target,intent(in) :: ug(npw_k*ndat) 1004 complex(dpc),target,intent(inout) :: ur(ldx*ldy*ldz*ndat) !vz_i 1005 integer,optional,intent(in) :: isign, iscale 1006 1007 #ifdef HAVE_DFTI 1008 !Local variables------------------------------- 1009 !scalars 1010 integer,parameter :: dist=1 1011 integer :: iscale__, isign__ 1012 !arrays 1013 real(dp) :: fofgout(2,0) 1014 real(dp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:) 1015 1016 ! ************************************************************************* 1017 1018 iscale__ = 0; if (present(iscale)) iscale__ = iscale 1019 isign__ = +1; if (present(isign)) isign__ = isign 1020 1021 #undef TK_PREF 1022 #define TK_PREF(name) CONCAT(cplx_,name) 1023 1024 #undef FFT_PRECISION 1025 #define FFT_PRECISION FFT_DOUBLE 1026 1027 #include "fftug.finc" 1028 1029 #undef FFT_PRECISION 1030 1031 #else 1032 ! Silence compiler warning 1033 ABI_ERROR("FFT_DFTI support not activated") 1034 ABI_UNUSED((/fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1),isign,iscale/)) 1035 ABI_UNUSED((/ug(1),ur(1)/)) 1036 #endif 1037 1038 end subroutine dfti_fftug_dpc
m_dfti/dfti_fftug_spc [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_fftug_spc
FUNCTION
Compute ndat zero-padded FFTs from G- to R-space . Mainly used for the transform of wavefunctions. TARGET: spc arrays
INPUTS
fftalg=FFT algorith (see input variable) fftcache=size of the cache (kB) npw_k=number of plane waves for this k-point. nx,ny,nz=Number of point along the three directions. ldx,ldy,ldz=Leading dimensions of the array. ndat=Number of transforms istwf_k=Option describing the storage of the wavefunction. mgfft=Max number of FFT divisions (used to dimension gbound) kg_k(3,npw_k)=G-vectors in reduced coordinates gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary. ug(npw_k*ndat)=wavefunctions in reciprocal space.
OUTPUT
ur(ldx*ldy*ldz*ndat)=wavefunctions in real space.
SOURCE
917 subroutine dfti_fftug_spc(fftalg, fftcache, npw_k, nx, ny, nz, ldx, ldy, ldz, ndat, & 918 istwf_k, mgfft, kg_k, gbound, ug, ur, & 919 isign, iscale) ! optional 920 921 !Arguments ------------------------------------ 922 !scalars 923 integer,intent(in) :: fftalg,fftcache 924 integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft 925 integer,optional,intent(in) :: isign, iscale 926 !arrays 927 integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k) 928 complex(spc),target,intent(in) :: ug(npw_k*ndat) 929 complex(spc),target,intent(inout) :: ur(ldx*ldy*ldz*ndat) !vz_i 930 931 #ifdef HAVE_DFTI 932 !Local variables------------------------------- 933 !scalars 934 integer,parameter :: dist=1 935 integer :: iscale__, isign__ 936 real(sp) :: fofgout(2,0) 937 real(sp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:) 938 939 ! ************************************************************************* 940 941 iscale__ = 0; if (present(iscale)) iscale__ = iscale 942 isign__ = +1; if (present(isign)) isign__ = isign 943 944 #undef TK_PREF 945 #define TK_PREF(name) CONCAT(cplx_,name) 946 947 #undef FFT_PRECISION 948 #define FFT_PRECISION FFT_SINGLE 949 950 #include "fftug.finc" 951 952 #undef FFT_PRECISION 953 954 #else 955 ! Silence compiler warning 956 ABI_ERROR("FFT_DFTI support not activated") 957 ABI_UNUSED((/fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1), iscale, isign/)) 958 ABI_UNUSED((/ug(1),ur(1)/)) 959 #endif 960 961 end subroutine dfti_fftug_spc
m_dfti/dfti_fftur_dp [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_fftur_dp
FUNCTION
Compute ndat zero-padded FFTs from R- to G-space . Mainly used for the transform of wavefunctions. TARGET: dp arrays with real and imaginary part.
INPUTS
fftalg=FFT algorith (see input variable) fftcache=size of the cache (kB) npw_k=number of plane waves for this k-point. nx,ny,nz=Number of point along the three directions. ldx,ldy,ldz=Leading dimensions of the array. ndat=Number of transforms istwf_k=Option describing the storage of the wavefunction. mgfft=Max number of FFT divisions (used to dimension gbound) kg_k(3,npw_k)=G-vectors in reduced coordinates gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary. SIDE EFFECT ur(2,ldx*ldy*ldz*ndat)= In input: wavefunctions in real space. Destroyed in output. Do not use ur anymore!
OUTPUT
ug(2,npw_k*ndat)=wavefunctions in reciprocal space.
SOURCE
1072 subroutine dfti_fftur_dp(fftalg, fftcache, npw_k, nx, ny, nz, ldx, ldy, ldz, ndat, & 1073 istwf_k, mgfft, kg_k, gbound, ur, ug, & 1074 isign, iscale) ! optional 1075 1076 !Arguments ------------------------------------ 1077 !scalars 1078 integer,intent(in) :: fftalg,fftcache 1079 integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft 1080 integer,optional,intent(in) :: isign, iscale 1081 !arrays 1082 integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k) 1083 real(dp),target,intent(inout) :: ur(2*ldx*ldy*ldz*ndat) 1084 real(dp),target,intent(inout) :: ug(2*npw_k*ndat) !vz_i 1085 1086 #ifdef HAVE_DFTI 1087 !Local variables------------------------------- 1088 !scalars 1089 integer,parameter :: dist=2 1090 integer :: iscale__, isign__ 1091 !arrays 1092 real(dp) :: dum_ugin(2,0) 1093 real(dp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:) 1094 1095 ! ************************************************************************* 1096 1097 iscale__ = 1; if (present(iscale)) iscale__ = iscale 1098 isign__ = -1; if (present(isign)) isign__ = isign 1099 1100 #undef TK_PREF 1101 #define TK_PREF(name) CONCAT(cg_,name) 1102 1103 #undef FFT_PRECISION 1104 #define FFT_PRECISION FFT_DOUBLE 1105 1106 #include "fftur.finc" 1107 1108 #undef FFT_PRECISION 1109 1110 #else 1111 ! Silence compiler warning 1112 ABI_ERROR("FFT_DFTI support not activated") 1113 ABI_UNUSED((/fftalg,fftcache/)) 1114 ABI_UNUSED((/npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1),iscale,isign/)) 1115 ABI_UNUSED((/ug(1),ur(1)/)) 1116 #endif 1117 1118 end subroutine dfti_fftur_dp
m_dfti/dfti_fftur_dpc [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_fftur_dpc
FUNCTION
Compute ndat zero-padded FFTs from R ro G. Mainly used for the transform of wavefunctions. TARGET: DPC arrays
INPUTS
fftalg=FFT algorith (see input variable) fftcache=size of the cache (kB) npw_k=number of plane waves for this k-point. nx,ny,nz=Number of point along the three directions. ldx,ldy,ldz=Leading dimensions of the array. ndat=Number of transforms istwf_k=Option describing the storage of the wavefunction. mgfft=Max number of FFT divisions (used to dimension gbound) kg_k(3,npw_k)=G-vectors in reduced coordinates gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary. SIDE EFFECT ur(ldx*ldy*ldz*ndat)= In input: wavefunctions in real space. Destroyed in output. Do not use ur anymore!
OUTPUT
ug(npw_k*ndat)=wavefunctions in reciprocal space
SOURCE
1233 subroutine dfti_fftur_dpc(fftalg, fftcache, npw_k, nx, ny, nz, ldx, ldy, ldz, ndat, & 1234 istwf_k, mgfft, kg_k, gbound, ur, ug, & 1235 isign, iscale) ! optional 1236 1237 !Arguments ------------------------------------ 1238 !scalars 1239 integer,intent(in) :: fftalg,fftcache 1240 integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft 1241 integer,optional,intent(in) :: isign, iscale 1242 !arrays 1243 integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k) 1244 complex(dpc),target,intent(inout) :: ur(ldx*ldy*ldz*ndat) 1245 complex(dpc),target,intent(inout) :: ug(npw_k*ndat) !vz_i 1246 1247 #ifdef HAVE_DFTI 1248 !Local variables------------------------------- 1249 !scalars 1250 integer,parameter :: dist=1 1251 integer :: iscale__, isign__ 1252 !arrays 1253 real(dp) :: dum_ugin(2,0) 1254 real(dp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:) 1255 1256 ! ************************************************************************* 1257 1258 iscale__ = 1; if (present(iscale)) iscale__ = iscale 1259 isign__ = -1; if (present(isign)) isign__ = isign 1260 1261 #undef TK_PREF 1262 #define TK_PREF(name) CONCAT(cplx_,name) 1263 1264 #undef FFT_PRECISION 1265 #define FFT_PRECISION FFT_DOUBLE 1266 1267 #include "fftur.finc" 1268 1269 #undef FFT_PRECISION 1270 1271 #else 1272 ! Silence compiler warning 1273 ABI_ERROR("FFT_DFTI support not activated") 1274 ABI_UNUSED((/fftalg,fftcache/)) 1275 ABI_UNUSED((/npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1),iscale,isign/)) 1276 ABI_UNUSED((/ug(1),ur(1)/)) 1277 #endif 1278 1279 end subroutine dfti_fftur_dpc
m_dfti/dfti_fftur_spc [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_fftur_spc
FUNCTION
Compute ndat zero-padded FFTs from R- to G-space . Mainly used for the transform of wavefunctions. TARGET: spc arrays
INPUTS
fftalg=FFT algorith (see input variable) fftcache=size of the cache (kB) npw_k=number of plane waves for this k-point. nx,ny,nz=Number of point along the three directions. ldx,ldy,ldz=Leading dimensions of the array. ndat=Number of transforms istwf_k=Option describing the storage of the wavefunction. mgfft=Max number of FFT divisions (used to dimension gbound) kg_k(3,npw_k)=G-vectors in reduced coordinates gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary. SIDE EFFECT ur(ldx*ldy*ldz*ndat)= In input: wavefunctions in real space. Destroyed in output. Do not use ur anymore!
OUTPUT
ug(npw_k*ndat)=wavefunctions in reciprocal space.
SOURCE
1153 subroutine dfti_fftur_spc(fftalg, fftcache, npw_k, nx, ny, nz, ldx, ldy, ldz, ndat, & 1154 istwf_k, mgfft, kg_k, gbound, ur, ug, & 1155 isign, iscale) ! optional 1156 1157 !Arguments ------------------------------------ 1158 !scalars 1159 integer,intent(in) :: fftalg,fftcache 1160 integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft 1161 !arrays 1162 integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k) 1163 complex(spc),target,intent(inout) :: ur(ldx*ldy*ldz*ndat) 1164 complex(spc),target,intent(inout) :: ug(npw_k*ndat) !vz_i 1165 integer,optional,intent(in) :: isign, iscale 1166 1167 #ifdef HAVE_DFTI 1168 !Local variables------------------------------- 1169 !scalars 1170 integer,parameter :: dist=1 1171 integer :: iscale__, isign__ 1172 !arrays 1173 real(sp) :: dum_ugin(2,0) 1174 real(sp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:) 1175 1176 ! ************************************************************************* 1177 1178 iscale__ = 1; if (present(iscale)) iscale__ = iscale 1179 isign__ = -1; if (present(isign)) isign__ = isign 1180 1181 #undef TK_PREF 1182 #define TK_PREF(name) CONCAT(cplx_,name) 1183 1184 #undef FFT_PRECISION 1185 #define FFT_PRECISION FFT_SINGLE 1186 1187 #include "fftur.finc" 1188 1189 #undef FFT_PRECISION 1190 1191 #else 1192 ! Silence compiler warning 1193 ABI_ERROR("FFT_DFTI support not activated") 1194 ABI_UNUSED((/fftalg,fftcache/)) 1195 ABI_UNUSED((/npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1),isign,iscale/)) 1196 ABI_UNUSED((/ug(1),ur(1)/)) 1197 #endif 1198 1199 end subroutine dfti_fftur_spc
m_dfti/dfti_many_dft_ip [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_many_dft_ip
FUNCTION
Driver routine for many in-place 3D complex-to-complex FFTs of lengths nx, ny, nz.
INPUTS
nx,ny,nz=Number of points along the three directions. ldx,ldy,ldz=Physical dimension of the finout array (to avoid cache conflicts). ndat=Number of FFTs to be done. isign=sign of Fourier transform exponent: current convention uses +1 for transforming from G to r, -1 for transforming from r to G.
OUTPUT
finout(2,ldx*ldy*ldz*ndat)= In input: The complex array to be transformed. In output: The FFT results.
SOURCE
1538 subroutine dfti_many_dft_ip(nx,ny,nz,ldx,ldy,ldz,ndat,isign,finout) 1539 1540 !Arguments ------------------------------------ 1541 !scalars 1542 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,isign 1543 !arrays 1544 real(dp),target,intent(inout) :: finout(2*ldx*ldy*ldz*ndat) 1545 1546 #ifdef HAVE_DFTI 1547 !Local variables------------------------------- 1548 !scalars 1549 integer,parameter :: iscale1 = 1 1550 type(C_ptr) :: finout_cptr 1551 !arrays 1552 complex(dpc),ABI_CONTIGUOUS pointer :: finout_fptr(:) 1553 1554 ! ************************************************************************* 1555 1556 ! Associate complex finout_fptr with real ffinout via the C pointer 1557 finout_cptr = C_loc(finout) 1558 call C_F_pointer(finout_cptr,finout_fptr, shape=(/ldx*ldy*ldz*ndat/)) 1559 1560 ! Call complex version --> a lot of boilerplate code avoided 1561 call dfti_c2c_ip(nx, ny, nz, ldx, ldy, ldz, ndat, iscale1, isign, finout_fptr) 1562 1563 #else 1564 ABI_ERROR("FFT_DFTI support not activated") 1565 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,isign/)) 1566 ABI_UNUSED(finout(1)) 1567 #endif 1568 1569 end subroutine dfti_many_dft_ip
m_dfti/dfti_many_dft_op [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_many_dft_op
FUNCTION
Driver routine for many out-of-place 3D complex-to-complex FFTs of lengths nx, ny, nz.
INPUTS
nx,ny,nz=Number of points along the three directions. ldx,ldy,ldz=Physical dimension of the fin and fout arrays (to avoid cache conflicts). ndat=Number of FFTs to be done. fin(2*ldx*ldy*ldz*ndat)=The complex array to be transformed. isign=sign of Fourier transform exponent: current convention uses +1 for transforming from G to r, -1 for transforming from r to G.
OUTPUT
fout(2,ldx*ldy*ldz*ndat)=The Fourier transform of fin.
SOURCE
1474 subroutine dfti_many_dft_op(nx,ny,nz,ldx,ldy,ldz,ndat,isign,fin,fout) 1475 1476 !Arguments ------------------------------------ 1477 !scalars 1478 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,isign 1479 !arrays 1480 real(dp),target,intent(in) :: fin(2*ldx*ldy*ldz*ndat) 1481 real(dp),target,intent(out) :: fout(2*ldx*ldy*ldz*ndat) 1482 1483 #ifdef HAVE_DFTI 1484 !Local variables------------------------------- 1485 !scalars 1486 integer,parameter :: iscale1 = 1 1487 type(C_ptr) :: fin_cptr, fout_cptr 1488 1489 !arrays 1490 complex(dpc),ABI_CONTIGUOUS pointer :: fin_fptr(:),fout_fptr(:) 1491 1492 ! ************************************************************************* 1493 1494 ! Associate complex pointers with real inputs via the C pointers 1495 fin_cptr = C_loc(fin) 1496 call C_F_pointer(fin_cptr,fin_fptr, shape=[ldx*ldy*ldz*ndat]) 1497 1498 fout_cptr = C_loc(fout) 1499 call C_F_pointer(fout_cptr,fout_fptr, shape=[ldx*ldy*ldz*ndat]) 1500 1501 ! Call complex version --> a lot of boilerplate code avoided 1502 call dfti_c2c_op(nx, ny, nz, ldx, ldy, ldz, ndat, iscale1, isign, fin_fptr, fout_fptr) 1503 1504 #else 1505 ABI_ERROR("FFT_DFTI support not activated") 1506 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,isign/)) 1507 ABI_UNUSED(fin(1)) 1508 ABI_UNUSED(fout(1)) 1509 #endif 1510 1511 end subroutine dfti_many_dft_op
m_dfti/dfti_r2c_op_dp [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_r2c_op_dp
FUNCTION
Driver routine for out-of-place 3D real-to-complex FFT of lengths nx, ny, nz.
INPUTS
nx,ny,nz=Number of points along the three directions. ldx,ldy,ldz=Physical dimensions of the f array (to avoid cache conflicts). ndat=Number of FFTs to be done. ff(ldx*ldy*ldz*ndat)=The real array to be transformed.
OUTPUT
gg(2*ldx*ldy*ldz*ndat)=The forward FFT of ff (real valued)
SOURCE
1894 subroutine dfti_r2c_op_dp(nx, ny, nz, ldx, ldy, ldz, ndat, ff, gg) 1895 1896 !Arguments ------------------------------------ 1897 !scalars 1898 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat 1899 !arrays 1900 real(dp),intent(in) :: ff(ldx*ldy*ldz*ndat) 1901 real(dp),target,intent(out) :: gg(2*ldx*ldy*ldz*ndat) 1902 1903 #ifdef HAVE_DFTI 1904 !Local variables------------------------------- 1905 !scalars 1906 type(C_ptr) :: gg_cptr 1907 !arrays 1908 complex(dpc),ABI_CONTIGUOUS pointer :: gg_fptr(:) 1909 1910 ! ************************************************************************* 1911 1912 gg_cptr = C_loc(gg) 1913 call C_F_pointer(gg_cptr,gg_fptr, shape=(/ldx*ldy*ldz*ndat/)) 1914 1915 call dfti_r2c_op_dpc(nx,ny,nz,ldx,ldy,ldz,ndat,ff,gg_fptr) 1916 1917 #else 1918 ABI_ERROR("FFT_DFTI support not activated") 1919 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz/)) 1920 ABI_UNUSED(ff) 1921 ABI_UNUSED(gg(1)) 1922 #endif 1923 1924 end subroutine dfti_r2c_op_dp
m_dfti/dfti_r2c_op_dpc [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_r2c_op_dpc
FUNCTION
Driver routine for out-of-place 3D real-to-complex FFT of lengths nx, ny, nz.
INPUTS
nx,ny,nz=Number of points along the three directions. ldx,ldy,ldz=Physical dimensions of the f array (to avoid cache conflicts). ff(ldx*ldy*ldz*ndat)=The real array to be transformed. ndat=Number of FFTs to be done.
OUTPUT
gg(ldx*ldy*ldz*ndat)=The forward FFT of ff (complex valued)
SOURCE
1771 subroutine dfti_r2c_op_dpc(nx, ny, nz, ldx, ldy, ldz, ndat, ff, gg) 1772 1773 !Arguments ------------------------------------ 1774 !scalars 1775 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat 1776 !arrays 1777 real(dp),intent(in) :: ff(ldx*ldy*ldz*ndat) 1778 complex(dpc),intent(out) :: gg(ldx*ldy*ldz*ndat) 1779 1780 #ifdef HAVE_DFTI 1781 !Local variables------------------------------- 1782 !scalars 1783 integer :: status,nhp,padx,i1,i2,i3,igp,igf,imgf,ii 1784 integer :: i1inv,i2inv,i3inv,idat,padatf 1785 type(DFTI_DESCRIPTOR),pointer :: Desc 1786 type(C_PTR) :: cptr 1787 !arrays 1788 integer,allocatable :: i1inver(:),i2inver(:),i3inver(:) 1789 complex(dpc),ABI_CONTIGUOUS pointer :: gg_hp(:) 1790 1791 ! ************************************************************************* 1792 1793 padx = (nx/2+1) 1794 nhp = (nx/2+1)*ny*nz 1795 1796 call dfti_alloc_complex(nhp*ndat,cptr,gg_hp) 1797 1798 status = DftiCreateDescriptor(Desc, DFTI_DOUBLE, DFTI_REAL, 3, (/nx,ny,nz/) ) 1799 DFTI_CHECK(status) 1800 1801 status = DftiSetValue(Desc, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX) 1802 status = DftiSetValue(Desc, DFTI_PLACEMENT, DFTI_NOT_INPLACE ) 1803 status = DftiSetValue(Desc, DFTI_INPUT_STRIDES, (/0, 1, ldx, ldx*ldy/)) 1804 status = DftiSetValue(Desc, DFTI_INPUT_DISTANCE, ldx*ldy*ldz) 1805 status = DftiSetValue(Desc, DFTI_OUTPUT_STRIDES, (/0, 1, padx, padx*ny/)) 1806 status = DftiSetValue(Desc, DFTI_OUTPUT_DISTANCE, nhp) 1807 status = DftiSetValue(Desc, DFTI_NUMBER_OF_TRANSFORMS, ndat) 1808 status = DftiSetValue(Desc, DFTI_FORWARD_SCALE, one / DBLE(nx*ny*nz) ) 1809 DFTI_CHECK(status) 1810 1811 DFTI_CHECK( DftiCommitDescriptor(Desc) ) 1812 1813 DFTI_CHECK( DftiComputeForward(Desc, ff, gg_hp) ) 1814 1815 DFTI_CHECK( DftiFreeDescriptor(Desc) ) 1816 1817 ! Reconstruct full FFT: Hermitian redundancy: out[i] is the conjugate of out[n-i] 1818 ABI_MALLOC(i1inver,(padx)) 1819 ABI_MALLOC(i2inver,(ny)) 1820 ABI_MALLOC(i3inver,(nz)) 1821 1822 i1inver(1)=1 1823 do i1=2,padx 1824 i1inver(i1)=nx+2-i1 1825 end do 1826 1827 i2inver(1)=1 1828 do i2=2,ny 1829 i2inver(i2)=ny+2-i2 1830 end do 1831 1832 i3inver(1)=1 1833 do i3=2,nz 1834 i3inver(i3)=nz+2-i3 1835 end do 1836 1837 igp=0 1838 do idat=1,ndat 1839 padatf=(idat-1)*ldx*ldy*ldz 1840 do i3=1,nz 1841 i3inv = i3inver(i3) 1842 do i2=1,ny 1843 i2inv = i2inver(i2) 1844 do i1=1,padx 1845 igp=igp+1 1846 igf = i1 + (i3-1)*ldx*ldy + (i2-1)*ldx + padatf 1847 gg(igf) = gg_hp(igp) 1848 i1inv = i1inver(i1) 1849 if (i1inv/=i1) then 1850 imgf = i1inv + (i3inv-1)*ldx*ldy + (i2inv-1)*ldx + padatf 1851 gg(imgf) = DCONJG(gg_hp(igp)) 1852 end if 1853 end do 1854 end do 1855 end do 1856 end do 1857 1858 ABI_FREE(i1inver) 1859 ABI_FREE(i2inver) 1860 ABI_FREE(i3inver) 1861 1862 call dfti_free(cptr) 1863 1864 #else 1865 ABI_ERROR("FFT_DFTI support not activated") 1866 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz/)) 1867 ABI_UNUSED(ff) 1868 ABI_UNUSED(gg(1)) 1869 #endif 1870 1871 end subroutine dfti_r2c_op_dpc
m_dfti/dfti_seqfourdp [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_seqfourdp
FUNCTION
Driver routine for 3D FFT of lengths nx, ny, nz. Mainly used for densities or potentials. FFT Transform is out-of-place
INPUTS
cplex=1 if fofr is real, 2 if fofr is complex nx,ny,nz=Number of point along the three directions. ldx,ldy,ldz=Leading dimensions of the array. ndat = Number of FFTS isign= +1 : fofg(G) => fofr(R); -1 : fofr(R) => fofg(G) fofg(2,ldx*ldy*ldz*ndat)=The array to be transformed.
OUTPUT
fofr(cplex,ldx*ldy*ldz*ndat)=The FFT of fofg
SOURCE
204 subroutine dfti_seqfourdp(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,isign,fofg,fofr) 205 206 !Arguments ------------------------------------ 207 !scalars 208 integer,intent(in) :: cplex,nx,ny,nz,ldx,ldy,ldz,ndat,isign 209 !arrays 210 real(dp),intent(inout) :: fofg(2*ldx*ldy*ldz*ndat) 211 real(dp),intent(inout) :: fofr(cplex*ldx*ldy*ldz*ndat) 212 213 !Local variables------------------------------- 214 !scalars 215 integer,parameter :: iscale1 = 1 216 integer :: ii,jj 217 complex(spc), allocatable :: work_sp(:) 218 219 ! ************************************************************************* 220 221 select case (cplex) 222 case (2) 223 ! Complex to Complex. 224 if (fftcore_mixprec == 1) then 225 ! Mixed precision: copyin + in-place + copyout 226 ABI_MALLOC(work_sp, (ldx*ldy*ldz*ndat)) 227 if (isign == +1) then 228 work_sp(:) = cmplx(fofg(1::2), fofg(2::2), kind=spc) 229 else if (isign == -1) then 230 work_sp(:) = cmplx(fofr(1::2), fofr(2::2), kind=spc) 231 else 232 ABI_BUG("Wrong isign") 233 end if 234 235 call dfti_c2c_ip_spc(nx, ny, nz, ldx, ldy, ldz, ndat, iscale1, isign, work_sp) 236 237 if (isign == +1) then 238 jj = 1 239 do ii=1,ldx*ldy*ldz*ndat 240 fofr(jj) = real(work_sp(ii), kind=dp) 241 fofr(jj+1) = aimag(work_sp(ii)) 242 jj = jj + 2 243 end do 244 else if (isign == -1) then 245 jj = 1 246 do ii=1,ldx*ldy*ldz*ndat 247 fofg(jj) = real(work_sp(ii), kind=dp) 248 fofg(jj+1) = aimag(work_sp(ii)) 249 jj = jj + 2 250 end do 251 end if 252 ABI_FREE(work_sp) 253 254 else 255 ! double precision version. 256 select case (isign) 257 case (+1) 258 call dfti_many_dft_op(nx,ny,nz,ldx,ldy,ldz,ndat,isign,fofg,fofr) 259 case (-1) ! -1 260 call dfti_many_dft_op(nx,ny,nz,ldx,ldy,ldz,ndat,isign,fofr,fofg) 261 case default 262 ABI_BUG("Wrong isign") 263 end select 264 end if 265 266 case (1) ! Real case. 267 268 select case (isign) 269 case (+1) ! G --> R 270 call dfti_c2r_op(nx,ny,nz,ldx,ldy,ldz,ndat,fofg,fofr) 271 case (-1) ! R --> G 272 call dfti_r2c_op(nx,ny,nz,ldx,ldy,ldz,ndat,fofr,fofg) 273 case default 274 ABI_BUG("Wrong isign") 275 end select 276 277 case default 278 ABI_BUG("Wrong value for cplex") 279 end select 280 281 end subroutine dfti_seqfourdp
m_dfti/dfti_seqfourwf [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_seqfourwf
FUNCTION
Carry out composite Fourier transforms between real and reciprocal (G) space. Wavefunctions, contained in a sphere in reciprocal space, can be FFT to real space. They can also be FFT from real space to a sphere. Also, the density maybe accumulated, and a local potential can be applied. The different options are : - option=0 --> reciprocal to real space and output the result. - option=1 --> reciprocal to real space and accumulate the density. - option=2 --> reciprocal to real space, apply the local potential to the wavefunction in real space and produce the result in reciprocal space. - option=3 --> real space to reciprocal space. NOTE that in this case, fftalg=1x1 MUST be used. This may be changed in the future.
INPUTS
cplex= if 1 , denpot is real, if 2 , denpot is complex (cplex=2 only allowed for option=2, and istwf_k=1) not relevant if option=0 or option=3, so cplex=0 can be used to minimize memory fofgin(2,npwin)=holds input wavefunction in G vector basis sphere. (intent(in) but the routine sphere can modify it for another iflag) gboundin(2*mgfft+8,2)=sphere boundary info for reciprocal to real space gboundout(2*mgfft+8,2)=sphere boundary info for real to reciprocal space istwf_k=option parameter that describes the storage of wfs kg_kin(3,npwin)=reduced planewave coordinates, input kg_kout(3,npwout)=reduced planewave coordinates, output mgfft=maximum size of 1D FFTs ndat=number of FFT to do in // ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft npwin=number of elements in fofgin array (for option 0, 1 and 2) npwout=number of elements in fofgout array (for option 2 and 3) ldx,ldy,ldz=ngfft(4),ngfft(5),ngfft(6), dimensions of fofr. option= if 0: do direct FFT if 1: do direct FFT, then sum the density if 2: do direct FFT, multiply by the potential, then do reverse FFT if 3: do reverse FFT only weight_r=weight to be used for the accumulation of the density in real space (needed only when option=1)
OUTPUT
(see side effects)
SIDE EFFECTS
Input/Output for option==0, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere; fofr(2,ldx,ldy,ldz) contains the output Fourier Transform of fofgin; no use of denpot, fofgout and npwout. for option==1, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere; denpot(cplex*ldx,ldy,ldz) contains the input density at input, and the updated density at output (accumulated); no use of fofgout and npwout. for option==2, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere; denpot(cplex*ldx,ldy,ldz) contains the input local potential; fofgout(2,npwout*ndat) contains the output function; for option==3, fofr(2,ldx,ldy,ldz*ndat) contains the input real space wavefunction; fofgout(2,npwout*ndat) contains its output Fourier transform; no use of fofgin and npwin.
SOURCE
349 subroutine dfti_seqfourwf(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k, & 350 kg_kin,kg_kout,mgfft,ndat,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i) 351 352 !Arguments ------------------------------------ 353 !scalars 354 integer,intent(in) :: cplex,istwf_k,ldx,ldy,ldz,ndat,npwin,npwout,option,mgfft 355 real(dp),intent(in) :: weight_i,weight_r 356 !arrays 357 integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2) 358 integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18) 359 real(dp),intent(inout) :: denpot(cplex*ldx,ldy,ldz),fofgin(2,npwin*ndat) 360 real(dp),intent(inout) :: fofr(2,ldx*ldy*ldz*ndat) 361 real(dp),intent(out) :: fofgout(2,npwout*ndat) 362 363 !Local variables------------------------------- 364 !scalars 365 integer,parameter :: ndat1=1 366 integer :: nx,ny,nz,fftalg,fftalga,fftalgc,fftcache,dat,ptg,ptr,ptgin,ptgout,nthreads 367 logical :: use_fftrisc 368 character(len=500) :: msg 369 370 ! ************************************************************************* 371 372 if (all(option /= [0, 1, 2, 3])) then 373 write(msg,'(a,i0,a)')' Option:',option,' is not allowed. Only option=0, 1, 2 or 3 are allowed presently.' 374 ABI_ERROR(msg) 375 end if 376 377 if (option == 1 .and. cplex /= 1) then 378 ABI_ERROR(sjoin("With option number 1, cplex must be 1 but it is cplex:", itoa(cplex))) 379 end if 380 381 if (option==2 .and. (cplex/=1 .and. cplex/=2)) then 382 ABI_ERROR(sjoin("With the option number 2, cplex must be 1 or 2, but it is cplex:", itoa(cplex))) 383 end if 384 385 nx=ngfft(1); ny=ngfft(2); nz=ngfft(3) 386 fftalg=ngfft(7); fftalga=fftalg/100; fftalgc=MOD(fftalg,10) 387 fftcache=ngfft(8) 388 389 use_fftrisc = (fftalgc==2) 390 if (istwf_k==2.and.option==3) use_fftrisc = .FALSE. 391 if (istwf_k>2.and.ANY(option==(/0,3/))) use_fftrisc = .FALSE. 392 393 nthreads = xomp_get_num_threads(open_parallel=.TRUE.) 394 395 if (use_fftrisc) then 396 !call wrtout(std_out, " calls dfti_fftrisc") 397 if (ndat == 1) then 398 if (fftcore_mixprec == 0) then 399 call dfti_fftrisc_dp(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,& 400 mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i) 401 else 402 call dfti_fftrisc_mixprec(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,& 403 mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i) 404 end if 405 else 406 ! All this boilerplate code is needed because the caller might pass zero-sized arrays 407 ! for the arguments that are not referenced and we don't want to have problems at run-time. 408 ! Moreover option 1 requires a special treatment when threads are started at this level. 409 410 SELECT CASE (option) 411 CASE (0) 412 ! 413 ! fofgin -> fofr, no use of denpot, fofgout and npwout. 414 if (.not.dfti_spawn_threads_here(ndat,nthreads)) then 415 do dat=1,ndat 416 ptg = 1 + (dat-1)*npwin 417 ptr = 1 + (dat-1)*ldx*ldy*ldz 418 call dfti_fftrisc_dp(cplex,denpot,fofgin(1,ptg),fofgout,fofr(1,ptr),gboundin,gboundout,istwf_k,kg_kin,kg_kout,& 419 & mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i) 420 end do 421 else 422 !$OMP PARALLEL DO PRIVATE(ptg,ptr) 423 do dat=1,ndat 424 ptg = 1 + (dat-1)*npwin 425 ptr = 1 + (dat-1)*ldx*ldy*ldz 426 call dfti_fftrisc_dp(cplex,denpot,fofgin(1,ptg),fofgout,fofr(1,ptr),gboundin,gboundout,istwf_k,kg_kin,kg_kout,& 427 & mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i) 428 end do 429 end if 430 431 CASE (1) 432 !fofgin -> local ur and accumulate density in denpot 433 ! TODO this is delicate part to do in parallel, as one should OMP reduce denpot. 434 do dat=1,ndat 435 ptg = 1 + (dat-1)*npwin 436 ptr = 1 + (dat-1)*ldx*ldy*ldz 437 call dfti_fftrisc_dp(cplex,denpot,fofgin(1,ptg),fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,& 438 & mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i) 439 end do 440 441 CASE (2) 442 ! <G|vloc(r)|fofgin(r)> in fofgout 443 if (.not.dfti_spawn_threads_here(ndat,nthreads)) then 444 do dat=1,ndat 445 ptgin = 1 + (dat-1)*npwin 446 ptgout = 1 + (dat-1)*npwout 447 if (fftcore_mixprec == 0) then 448 call dfti_fftrisc_dp(cplex,denpot,fofgin(1,ptgin),fofgout(1,ptgout),fofr,gboundin,gboundout,istwf_k,& 449 kg_kin,kg_kout,mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i) 450 else 451 call dfti_fftrisc_mixprec(cplex,denpot,fofgin(1,ptgin),fofgout(1,ptgout),fofr,gboundin,gboundout,istwf_k,& 452 kg_kin,kg_kout,mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i) 453 end if 454 end do 455 else 456 !$OMP PARALLEL DO PRIVATE(ptgin,ptgout) 457 do dat=1,ndat 458 ptgin = 1 + (dat-1)*npwin 459 ptgout = 1 + (dat-1)*npwout 460 call dfti_fftrisc_dp(cplex,denpot,fofgin(1,ptgin),fofgout(1,ptgout),fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,& 461 & mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i) 462 end do 463 end if 464 465 CASE (3) 466 !fofr -> fofgout 467 if (.not.dfti_spawn_threads_here(ndat,nthreads)) then 468 do dat=1,ndat 469 ptr = 1 + (dat-1)*ldx*ldy*ldz 470 ptgout = 1 + (dat-1)*npwout 471 call dfti_fftrisc_dp(cplex,denpot,fofgin,fofgout(1,ptgout),fofr(1,ptr),gboundin,gboundout,istwf_k,kg_kin,kg_kout,& 472 & mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i) 473 end do 474 else 475 !$OMP PARALLEL DO PRIVATE(ptr,ptgout) 476 do dat=1,ndat 477 ptr = 1 + (dat-1)*ldx*ldy*ldz 478 ptgout = 1 + (dat-1)*npwout 479 call dfti_fftrisc_dp(cplex,denpot,fofgin,fofgout(1,ptgout),fofr(1,ptr),gboundin,gboundout,istwf_k,kg_kin,kg_kout,& 480 & mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i) 481 end do 482 end if 483 484 CASE DEFAULT 485 write(msg,'(a,i0,a)')'Option',option,' is not allowed. Only option=0, 1, 2 or 3 are allowed presently.' 486 ABI_ERROR(msg) 487 END SELECT 488 489 end if 490 491 else 492 SELECT CASE (option) 493 CASE (0) 494 ! 495 ! FFT u(g) --> u(r) 496 if (.not.dfti_spawn_threads_here(ndat,nthreads)) then 497 call dfti_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_kin,gboundin,fofgin,fofr) 498 else 499 !$OMP PARALLEL DO PRIVATE(ptg, ptr) 500 do dat=1,ndat 501 ptg = 1 + (dat-1)*npwin 502 ptr = 1 + (dat-1)*ldx*ldy*ldz 503 call dfti_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat1,& 504 & istwf_k,mgfft,kg_kin,gboundin,fofgin(1,ptg),fofr(1,ptr)) 505 end do 506 end if 507 508 CASE (1) 509 ! TODO this is delicate part to do in parallel, as one should OMP reduce denpot. 510 call dfti_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_kin,gboundin,fofgin,fofr) 511 call cg_addtorho(nx,ny,nz,ldx,ldy,ldz,ndat,weight_r,weight_i,fofr,denpot) 512 513 CASE (2) 514 515 if (.not.dfti_spawn_threads_here(ndat,nthreads)) then 516 call dfti_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_kin,gboundin,fofgin,fofr) 517 call cg_vlocpsi(nx,ny,nz,ldx,ldy,ldz,ndat,cplex,denpot,fofr) 518 519 ! The data for option==2 is now in fofr. 520 call dfti_fftpad_dp(fofr,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,-1,gboundout) 521 522 call cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat,npwout,kg_kout,fofr,fofgout) 523 else 524 525 !$OMP PARALLEL DO PRIVATE(ptg, ptr) 526 do dat=1,ndat 527 ptg = 1 + (dat-1)*npwin 528 ptr = 1 + (dat-1)*ldx*ldy*ldz 529 call dfti_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat1,& 530 & istwf_k,mgfft,kg_kin,gboundin,fofgin(1,ptg),fofr(1,ptr)) 531 532 call cg_vlocpsi(nx,ny,nz,ldx,ldy,ldz,ndat1,cplex,denpot,fofr(1,ptr)) 533 534 ! The data for option==2 is now in fofr. 535 call dfti_fftpad_dp(fofr(1,ptr),nx,ny,nz,ldx,ldy,ldz,ndat1,mgfft,-1,gboundout) 536 537 ptg = 1 + (dat-1)*npwout 538 call cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat1,npwout,kg_kout,fofr(1,ptr),fofgout(1,ptg)) 539 end do 540 end if 541 542 CASE (3) 543 ! The data for option==3 is already in fofr. 544 if (.not.dfti_spawn_threads_here(ndat,nthreads)) then 545 call dfti_fftpad_dp(fofr,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,-1,gboundout) 546 call cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat,npwout,kg_kout,fofr,fofgout) 547 else 548 !$OMP PARALLEL DO PRIVATE(ptg, ptr) 549 do dat=1,ndat 550 ptg = 1 + (dat-1)*npwout 551 ptr = 1 + (dat-1)*ldx*ldy*ldz 552 call dfti_fftpad_dp(fofr(1,ptr),nx,ny,nz,ldx,ldy,ldz,ndat1,mgfft,-1,gboundout) 553 call cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat1,npwout,kg_kout,fofr(1,ptr),fofgout(1,ptg)) 554 end do 555 end if 556 557 CASE DEFAULT 558 write(msg,'(a,i0,a)')'Option',option,' is not allowed. Only option=0, 1, 2 or 3 are allowed presently.' 559 ABI_ERROR(msg) 560 END SELECT 561 end if 562 563 end subroutine dfti_seqfourwf
m_dfti/dfti_spawn_threads_here [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_spawn_threads_here
FUNCTION
Helper function that returns true if FFT calls should be OMP parallelized in the client code.
INPUTS
ndat=Number of FFT transforms to do nthreads = Number of threads available
SOURCE
2154 function dfti_spawn_threads_here(ndat, nthreads) result(ans) 2155 2156 !Arguments ------------------------------------ 2157 !scalars 2158 integer,intent(in) :: ndat,nthreads 2159 logical :: ans 2160 2161 ! ************************************************************************* 2162 2163 ans = .FALSE. 2164 #ifdef HAVE_OPENMP 2165 ans = (nthreads > 1 .and. MOD(ndat,nthreads) == 0 .and. .not. USE_LIB_THREADS) 2166 #else 2167 ABI_UNUSED((/ndat,nthreads/)) 2168 #endif 2169 2170 end function dfti_spawn_threads_here
m_dfti/dfti_use_lib_threads [ Functions ]
[ Top ] [ m_dfti ] [ Functions ]
NAME
dfti_use_lib_threads
FUNCTION
INPUTS
SOURCE
2185 subroutine dfti_use_lib_threads(logvar) 2186 2187 !Arguments ------------------------------------ 2188 !scalars 2189 logical,intent(in) :: logvar 2190 2191 ! ************************************************************************* 2192 2193 USE_LIB_THREADS = logvar 2194 2195 end subroutine dfti_use_lib_threads