TABLE OF CONTENTS


ABINIT/m_dfti [ Modules ]

[ Top ] [ Modules ]

NAME

 m_dfti

FUNCTION

  This module provides wrappers for the MKL DFTI routines: in-place and out-of-place version.

COPYRIGHT

 Copyright (C) 2009-2022 ABINIT group (MG)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

NOTES

  1) MPI parallelism is not supported
  2) For better performance the FFT divisions should contain small factors  (/2, 3, 5, 7, 11, 13/)
     see http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_userguide_lnx/index.htm

SOURCE

21 #if defined HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include "abi_common.h"
26 
27 #ifdef HAVE_DFTI
28 
29 ! Include and generate MKL_DFTI module
30 #include "mkl_dfti.f90"
31 
32 ! Macros for template files.
33 #define FFTLIB "DFTI"
34 #define FFT_PREF(name) CONCAT(dfti_,name)
35 #define SPAWN_THREADS_HERE(ndat, nthreads) dfti_spawn_threads_here(ndat, nthreads)
36 
37 #define FFT_DOUBLE 1
38 #define FFT_SINGLE 2
39 #define FFT_MIXPREC 3
40 
41 #endif
42 
43 MODULE m_dfti
44 
45  use defs_basis
46  use m_abicore
47  use m_errors
48  use m_xomp
49  use m_cgtools
50  use m_cplxtools
51  use m_fftcore
52  use, intrinsic :: iso_c_binding
53 #ifdef HAVE_DFTI
54  use MKL_DFTI
55 #endif
56 
57  use m_fstrings,  only : basename, strcat, int2char10, itoa, sjoin
58  use m_hide_blas, only : xcopy
59  use m_fft_mesh,  only : zpad_t, zpad_init, zpad_free
60 
61  implicit none
62 
63  private
64 
65 ! Entry points for client code
66  public :: dfti_seqfourdp      ! 3D FFT of lengths nx, ny, nz. Mainly used for densities or potentials.
67  public :: dfti_seqfourwf      ! FFT transform of wavefunctions (high-level interface).
68  public :: dfti_fftrisc
69  public :: dfti_fftrisc_mixprec ! Mixed precision version of fftrisc: input/output in dp, computation done in sp.
70  public :: dfti_fftug          ! G-->R, 3D zero-padded FFT of lengths nx, ny, nz. Mainly used for wavefunctions
71  public :: dfti_fftur          ! R-->G, 3D zero-padded FFT of lengths nx, ny, nz. Mainly used for wavefunctions
72 
73 ! Low-level routines.
74  public :: dfti_r2c_op         ! Real to complex transform (out-of-place version).
75  public :: dfti_c2r_op         ! Complex to real transform (out-of-place version).
76  public :: dfti_c2c_op         ! complex to complex transform (out-of-place version).
77  public :: dfti_c2c_ip         ! complex to complex transform (in-place version).
78  public :: dfti_many_dft_op    ! Driver routine for many out-of-place 3D complex-to-complex FFTs.
79  public :: dfti_many_dft_ip    ! Driver routine for many in-place 3D complex-to-complex FFTs.
80  public :: dfti_fftpad         ! Driver routines for zero-padded FFT of wavefunctions.
81 
82  !FIXME I don't know why gcc does not recognize this one
83  ! Perhaps I have to provide an interfaces for fofr(:,:,:,:)
84  public :: dfti_fftpad_dp      ! Driver routines for zero-padded FFT of wavefunctions.
85  public :: dfti_fftug_dp       ! Driver routines for zero-padded FFT of wavefunctions.
86  public :: dfti_use_lib_threads

m_dfti/dfti_alloc_complex_dpc [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_alloc_complex_dpc

FUNCTION

INPUTS

OUTPUT

SOURCE

2289 #ifdef HAVE_DFTI
2290 
2291 subroutine dfti_alloc_complex_dpc(size, cptr, fptr)
2292 
2293 !Arguments ------------------------------------
2294 !scalars
2295  integer,intent(in) :: size
2296  complex(dpc),ABI_CONTIGUOUS pointer :: fptr(:)
2297  type(C_PTR),intent(out) :: cptr
2298 !arrays
2299 
2300 ! *************************************************************************
2301 
2302  cptr = mkl_malloc( INT(2*size*C_DOUBLE, KIND=C_SIZE_T), DFTI_DEFAULT_ALIGNMENT_DP)
2303  if (.not. C_ASSOCIATED(cptr)) then
2304    ABI_ERROR("mkl_malloc returned NULL!")
2305  end if
2306 
2307  call c_f_pointer(cptr, fptr, [size])
2308 
2309 end subroutine dfti_alloc_complex_dpc

m_dfti/dfti_alloc_complex_spc [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_alloc_complex_spc

FUNCTION

INPUTS

OUTPUT

SOURCE

2251 #ifdef HAVE_DFTI
2252 
2253 subroutine dfti_alloc_complex_spc(size, cptr, fptr)
2254 
2255 !Arguments ------------------------------------
2256 !scalars
2257  integer,intent(in) :: size
2258  complex(spc),ABI_CONTIGUOUS pointer :: fptr(:)
2259  type(C_PTR),intent(out) :: cptr
2260 
2261 ! *************************************************************************
2262 
2263  cptr = mkl_malloc( INT(2*size*C_FLOAT, KIND=C_SIZE_T), DFTI_DEFAULT_ALIGNMENT_SP)
2264  if (.not. C_ASSOCIATED(cptr)) then
2265    ABI_ERROR("mkl_malloc returned NULL!")
2266  end if
2267 
2268  call c_f_pointer(cptr, fptr, [size])
2269 
2270 end subroutine dfti_alloc_complex_spc

m_dfti/dfti_alloc_real_dp [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_alloc_real_dp

FUNCTION

INPUTS

OUTPUT

SOURCE

2212 #ifdef HAVE_DFTI
2213 
2214 subroutine dfti_alloc_real_dp(size, cptr, fptr)
2215 
2216 !Arguments ------------------------------------
2217 !scalars
2218  integer,intent(in) :: size
2219  real(dp),ABI_CONTIGUOUS pointer :: fptr(:)
2220  type(C_PTR),intent(out) :: cptr
2221 !arrays
2222 
2223 ! *************************************************************************
2224 
2225  cptr = mkl_malloc( INT(size*C_DOUBLE, KIND=C_SIZE_T), DFTI_DEFAULT_ALIGNMENT_DP)
2226  if (.not. C_ASSOCIATED(cptr)) then
2227    ABI_ERROR("mkl_malloc returned NULL!")
2228  end if
2229 
2230  call c_f_pointer(cptr, fptr, [size])
2231 
2232 end subroutine dfti_alloc_real_dp

m_dfti/dfti_c2c_ip_dpc [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_c2c_ip_dpc

FUNCTION

 Driver routine for in-place 3D complex-complex FFT. TARGET: DPC arrays

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimensions of the array.
 ndat=Number of FFTs to be done.
 iscale=0 if G --> R FFT should not be scaled.
 isign= +1 : ff(G) => ff(R); -1 : ff(R) => ff(G)

SIDE EFFECTS

  ff(ldx*ldy*ldz*ndat)=
    In input: the complex array to be transformed.
    In output: the Fourier transformed in the space specified by isign.

SOURCE

1346 subroutine dfti_c2c_ip_dpc(nx, ny, nz, ldx, ldy, ldz, ndat, iscale, isign, ff)
1347 
1348 !Arguments ------------------------------------
1349 !scalars
1350  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,iscale,isign
1351 !arrays
1352  complex(dpc),intent(inout) :: ff(ldx*ldy*ldz*ndat)
1353 
1354 ! *************************************************************************
1355 
1356 ! Include Fortran template
1357 #undef DEV_DFTI_PRECISION
1358 #define DEV_DFTI_PRECISION DFTI_DOUBLE
1359 
1360 #include "dfti_c2c_ip.finc"
1361 
1362 end subroutine dfti_c2c_ip_dpc

m_dfti/dfti_c2c_ip_spc [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_c2c_ip_spc

FUNCTION

 Driver routine for in-place 3D complex-complex FFT. TARGET: SPC arrays

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimensions of the array.
 ndat=Number of FFTs to be done.
 isign= +1 : ff(G) => ff(R); -1 : ff(R) => ff(G)

SIDE EFFECTS

  ff(ldx*ldy*ldz*ndat)=
    In input: the complex array to be transformed.
    In output: the Fourier transform in the space specified by isign.

SOURCE

1304 subroutine dfti_c2c_ip_spc(nx, ny, nz, ldx, ldy, ldz, ndat, iscale, isign, ff)
1305 
1306 !Arguments ------------------------------------
1307 !scalars
1308  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,iscale,isign
1309 !arrays
1310  complex(spc),intent(inout) :: ff(ldx*ldy*ldz*ndat)
1311 
1312 ! *************************************************************************
1313 
1314 ! Include Fortran template
1315 #undef DEV_DFTI_PRECISION
1316 #define DEV_DFTI_PRECISION DFTI_SINGLE
1317 
1318 #include "dfti_c2c_ip.finc"
1319 
1320 end subroutine dfti_c2c_ip_spc

m_dfti/dfti_c2c_op_dpc [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_c2c_op_dpc

FUNCTION

 Driver routine for out-of-place 3D complex-complex FFT of lengths nx, ny, nz.
 TARGET: DPC arrays

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimensions of the array.
 ndat=Number of FFTs to be done.
 iscale=0 if G --> R FFT should not be scaled.
 isign= +1 : ff(G) => gg(R); -1 : ff(R) => gg(G)
 ff(ldx*ldy*ldz*ndat)=The array to be transformed.

OUTPUT

   gg(ldx*ldy*ldz*ndat)=The FFT of ff.

SOURCE

1431 subroutine dfti_c2c_op_dpc(nx, ny, nz, ldx, ldy, ldz, ndat, iscale, isign, ff, gg)
1432 
1433 !Arguments ------------------------------------
1434 !scalars
1435  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,isign,ndat,iscale
1436 !arrays
1437  complex(dpc),intent(in) :: ff(ldx*ldy*ldz*ndat)
1438  complex(dpc),intent(out) :: gg(ldx*ldy*ldz*ndat)
1439 
1440 ! *************************************************************************
1441 
1442 ! Include Fortran template
1443 #undef DEV_DFTI_PRECISION
1444 #define DEV_DFTI_PRECISION DFTI_DOUBLE
1445 
1446 #include "dfti_c2c_op.finc"
1447 
1448 end subroutine dfti_c2c_op_dpc

m_dfti/dfti_c2c_op_spc [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_c2c_op_spc

FUNCTION

 Driver routine for out-of-place 3D complex-complex FFT of lengths nx, ny, nz.
 TARGET: spc arrays

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimensions of the array.
 ndat=Number of FFTs to be done.
 iscale=0 if G --> R FFT should not be scaled.
 isign= +1 : ff(G) => gg(R); -1 : ff(R) => gg(G)
 ff(ldx*ldy*ldz*ndat)=The array to be transformed.

OUTPUT

   gg(ldx*ldy*ldz*ndat)=The FFT of ff.

SOURCE

1388 subroutine dfti_c2c_op_spc(nx, ny, nz, ldx, ldy, ldz, ndat, iscale, isign, ff, gg)
1389 
1390 !Arguments ------------------------------------
1391 !scalars
1392  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,isign,ndat, iscale
1393 !arrays
1394  complex(spc),intent(in) :: ff(ldx*ldy*ldz*ndat)
1395  complex(spc),intent(out) :: gg(ldx*ldy*ldz*ndat)
1396 
1397 ! *************************************************************************
1398 
1399 ! Include Fortran template
1400 #undef DEV_DFTI_PRECISION
1401 #define DEV_DFTI_PRECISION DFTI_SINGLE
1402 
1403 #include "dfti_c2c_op.finc"
1404 
1405 end subroutine dfti_c2c_op_spc

m_dfti/dfti_c2r_op_dp [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_c2r_op_dp

FUNCTION

 Driver routine for out-of-place 3D complex-to-real FFT of lengths nx, ny, nz.

INPUTS

 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
 ndat=Number of FFTs to be done.
 ff(2,ldx*ldy*ldz*ndat)=The complex array to be transformed.

OUTPUT

 gg(ldx*ldy*ldz*ndat)=The backwards real FFT of ff.

SOURCE

2044 subroutine dfti_c2r_op_dp(nx, ny, nz, ldx, ldy, ldz, ndat, ff, gg)
2045 
2046 !Arguments ------------------------------------
2047 !scalars
2048  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat
2049 !arrays
2050  real(dp),target,intent(in) :: ff(2*ldx*ldy*ldz*ndat)
2051  real(dp),intent(inout) :: gg(ldx*ldy*ldz*ndat)    !vz_i
2052 
2053 #ifdef HAVE_DFTI
2054 !Local variables-------------------------------
2055 !scalars
2056  type(C_ptr) :: ff_cptr
2057 !arrays
2058  complex(dpc),ABI_CONTIGUOUS pointer :: ff_fptr(:)
2059 
2060 ! *************************************************************************
2061 
2062  ff_cptr = C_loc(ff)
2063  call C_F_pointer(ff_cptr,ff_fptr, shape=(/ldx*ldy*ldz*ndat/))
2064 
2065  call dfti_c2r_op_dpc(nx,ny,nz,ldx,ldy,ldz,ndat,ff_fptr,gg)
2066 
2067 #else
2068  ABI_ERROR("FFT_DFTI support not activated")
2069  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz/))
2070  ABI_UNUSED((/ff(1),gg(1)/))
2071 #endif
2072 
2073 end subroutine dfti_c2r_op_dp

m_dfti/dfti_c2r_op_dpc [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_c2r_op_dpc

FUNCTION

 Driver routine for out-of-place 3D complex-to-real FFT of lengths nx, ny, nz.

INPUTS

 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
 ndat=Number of FFTs to be done.
 ff(2,ldx*ldy*ldz*ndat)=The complex array to be transformed.

OUTPUT

 gg(2,ldx*ldy*ldz*ndat)=The backwards real FFT of ff.

SOURCE

1947 subroutine dfti_c2r_op_dpc(nx, ny, nz, ldx, ldy, ldz, ndat, ff, gg)
1948 
1949 !Arguments ------------------------------------
1950 !scalars
1951  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat
1952 !arrays
1953  complex(dpc),intent(in) :: ff(ldx*ldy*ldz*ndat)
1954  real(dp),intent(out) :: gg(ldx*ldy*ldz*ndat)
1955 
1956 #ifdef HAVE_DFTI
1957 !Local variables-------------------------------
1958 !scalars
1959  integer :: status,nhp,padx,i2,i3,igp,igf,idat,padatf,padatp,ii
1960  type(DFTI_DESCRIPTOR),pointer :: Desc
1961  type(C_PTR) :: cptr
1962 !arrays
1963  complex(dpc),ABI_CONTIGUOUS pointer :: ff_hp(:)
1964 
1965 ! *************************************************************************
1966 
1967  !stride  = 1
1968  !idist   = nhp
1969  !odist   = nx*ny*nz
1970  !n       = (/nx,ny,nz/)
1971  !inembed = (/(nx/2+1),ny,nz/)
1972  !onembed = (/nx,ny,nz/) ! check this
1973  !my_plan = retrieve_plan3(n,ndat,inembed,stride,idist,onembed,stride,odist,FFTW_BACKWARD,my_flags,Saved_plans)
1974 
1975  ! Fill the Hermitian part: Hermitian redundancy: out[i] is the conjugate of out[n-i]
1976  padx    = (nx/2+1)
1977  nhp     = padx*ny*nz
1978 
1979  call dfti_alloc_complex(nhp*ndat,cptr,ff_hp)
1980 
1981 !!$OMP PARALLEL DO PRIVATE(padatf,padatp,igf,igp)
1982  do idat=1,ndat
1983    padatf=(idat-1)*ldx*ldy*ldz
1984    padatp=(idat-1)*padx*ny*nz
1985    do i3=1,nz
1986      do i2=1,ny
1987        igf = (i3-1)*ldx*ldy + (i2-1)*ldx   + padatf
1988        igp = (i3-1)*padx*ny + (i2-1)*padx  + padatp
1989        ff_hp(igp+1:igp+padx) = ff(igf+1:igf+padx)
1990      end do
1991    end do
1992  end do
1993 
1994  status = DftiCreateDescriptor(Desc, DFTI_DOUBLE, DFTI_REAL, 3, (/nx,ny,nz/) )
1995  DFTI_CHECK(status)
1996 
1997  status = DftiSetValue(Desc, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX)
1998  status = DftiSetValue(Desc, DFTI_PLACEMENT, DFTI_NOT_INPLACE )
1999  status = DftiSetValue(Desc, DFTI_INPUT_STRIDES, (/0, 1, padx, padx*ny/))
2000  status = DftiSetValue(Desc, DFTI_INPUT_DISTANCE, nhp)
2001  status = DftiSetValue(Desc, DFTI_OUTPUT_STRIDES, (/0, 1, ldx, ldx*ldy/))
2002  status = DftiSetValue(Desc, DFTI_OUTPUT_DISTANCE, ldx*ldy*ldz)
2003  status = DftiSetValue(Desc, DFTI_NUMBER_OF_TRANSFORMS, ndat)
2004  DFTI_CHECK(status)
2005 
2006  DFTI_CHECK( DftiCommitDescriptor(Desc) )
2007 
2008  DFTI_CHECK( DftiComputeBackward(Desc, ff_hp, gg) )
2009 
2010  DFTI_CHECK( DftiFreeDescriptor(Desc) )
2011 
2012  call dfti_free(cptr)
2013 
2014 #else
2015  ABI_ERROR("FFT_DFTI support not activated")
2016  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz/))
2017  ABI_UNUSED(ff(1))
2018  ABI_UNUSED(gg(1))
2019 #endif
2020 
2021 end subroutine dfti_c2r_op_dpc

m_dfti/dfti_check_status [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_check_status

FUNCTION

  Error handler for DFTI wrappers. Print error message and abort.

INPUTS

  status = status error reported by dfti.
  [file] = file name
  [line] = line number

SOURCE

2092 #ifdef HAVE_DFTI
2093 
2094 subroutine dfti_check_status(status, file, line)
2095 
2096 !Arguments ------------------------------------
2097 !scalars
2098  integer,intent(in) :: status
2099  integer,optional,intent(in) :: line
2100  character(len=*),optional,intent(in) :: file
2101 
2102 !Local variables-------------------------------
2103  integer :: f90line
2104  character(len=10) :: lnum
2105  character(len=500) :: f90name
2106  character(len=500) :: my_msg
2107  character(len=DFTI_MAX_MESSAGE_LENGTH+500) :: err_msg
2108 
2109 ! *************************************************************************
2110 
2111  if (PRESENT(line)) then
2112    f90line=line
2113  else
2114    f90line=0
2115  end if
2116  call int2char10(f90line,lnum)
2117 
2118  if (PRESENT(file)) then
2119    f90name = basename(file)
2120  else
2121    f90name='Subroutine Unknown'
2122  end if
2123 
2124  my_msg=strcat(f90name,":",lnum,":")
2125 
2126  if (status /= 0) then
2127    if (.not. DftiErrorClass(status, DFTI_NO_ERROR)) then
2128      err_msg = strcat(my_msg," Error: ",DftiErrorMessage(status))
2129      ABI_ERROR(err_msg)
2130    end if
2131  end if
2132 
2133 end subroutine dfti_check_status

m_dfti/dfti_fftpad_dp [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_fftpad_dp

FUNCTION

  This routine transforms wavefunctions using 3D zero-padded FFTs with DFTI.
  The 3D ffts are computed only on lines and planes which have non zero elements (see zpad_init)
  FFT transform is in-place.

INPUTS

   nx,ny,nz=Logical dimensions of the FFT mesh.
   ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
   ndat=Numer of FFTs
   mgfft=MAX(nx,ny,nz), only used to dimension gbound
   isign=The sign of the transform.
   gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point.
     See sphereboundary for more info.

SIDE EFFECTS

   ff(2*ldx*ldy*ldz*ndat)=
     input: The array with the data to be transformed.
     output: The results of the FFT.

SOURCE

1599 subroutine dfti_fftpad_dp(ff, nx, ny, nz, ldx, ldy, ldz, ndat, mgfft, isign, gbound, iscale)
1600 
1601 !Arguments ------------------------------------
1602 !scalars
1603  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign
1604 !arrays
1605  integer,intent(in) :: gbound(2*mgfft+8,2)
1606  real(dp),target,intent(inout) :: ff(2*ldx*ldy*ldz*ndat)
1607  integer,optional,intent(in) :: iscale
1608 
1609 !Local variables-------------------------------
1610 #ifdef HAVE_DFTI
1611 !scalars
1612  type(C_ptr) :: cptr
1613  integer :: iscale__
1614 !arrays
1615  complex(dpc),ABI_CONTIGUOUS pointer :: fptr(:)
1616 
1617 ! *************************************************************************
1618 
1619  iscale__ = merge(1, 0, isign == -1); if (present(iscale)) iscale__ = iscale
1620 
1621  ! Associate complex fptr with real ff via the C pointer
1622  cptr = C_loc(ff)
1623  call C_F_pointer(cptr, fptr, SHAPE=[ldx*ldy*ldz*ndat])
1624 
1625  ! Call complex version --> a lot of boilerplate code avoided
1626  call dfti_fftpad_dpc(fptr, nx, ny, nz, ldx, ldy, ldz, ndat, mgfft, isign, gbound)
1627 
1628 #else
1629  ABI_ERROR("FFT_DFTI support not activated")
1630  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,iscale/))
1631  ABI_UNUSED(gbound(1,1))
1632  ABI_UNUSED(ff(1))
1633 #endif
1634 
1635 end subroutine dfti_fftpad_dp

m_dfti/dfti_fftpad_dpc [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_fftpad_dpc

FUNCTION

  This routine transforms wavefunctions using 3D zero-padded FFTs with DFTI.
  The 3D ffts are computed only on lines and planes which have non zero elements (see zpad_init)
  FFT transform is in-place. Target: complex double-precision arrays.

INPUTS

   nx,ny,nz=Logical dimensions of the FFT mesh.
   ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
   ndat=Number of FFTs.
   mgfft=MAX(nx,ny,nz), only used to dimension gbound
   isign=The sign of the transform.
   gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point.
     See sphereboundary for more info.

SIDE EFFECTS

   ff(ldx*ldy*ldz*ndat)=
     input: The array with the data to be transformed.
     output: The results of the FFT.

SOURCE

1665 subroutine dfti_fftpad_dpc(ff, nx, ny, nz, ldx, ldy, ldz, ndat, mgfft, isign, gbound, iscale)
1666 
1667 !Arguments ------------------------------------
1668 !scalars
1669  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign
1670  integer,optional,intent(in) :: iscale
1671 !arrays
1672  integer,intent(in) :: gbound(2*mgfft+8,2)
1673  complex(dpc),intent(inout) :: ff(ldx*ldy*ldz*ndat)
1674 
1675 !Local variables-------------------------------
1676 #ifdef HAVE_DFTI
1677 
1678 ! *************************************************************************
1679 
1680 ! Include Fortran template
1681 #undef DEV_DFTI_PRECISION
1682 #define DEV_DFTI_PRECISION DFTI_DOUBLE
1683 
1684 #include "dfti_fftpad.finc"
1685 
1686 #else
1687  ABI_ERROR("FFT_DFTI support not activated")
1688  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,iscale/))
1689  ABI_UNUSED(gbound(1,1))
1690  ABI_UNUSED(ff(1))
1691 #endif
1692 
1693 end subroutine dfti_fftpad_dpc

m_dfti/dfti_fftpad_spc [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_fftpad_spc

FUNCTION

  This routine transforms wavefunctions using 3D zero-padded FFTs with DFTI.
  The 3D ffts are computed only on lines and planes which have non zero elements (see zpad_init)
  FFT transform is in-place. Target: complex SPC arrays.

INPUTS

   nx,ny,nz=Logical dimensions of the FFT mesh.
   ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
   ndat=Number of FFTs.
   mgfft=MAX(nx,ny,nz), only used to dimension gbound
   isign=The sign of the transform.
   gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point.
     See sphereboundary for more info.

SIDE EFFECTS

   ff(ldx*ldy*ldz*ndat)=
     input: The array with the data to be transformed.
     output: The results of the FFT.

SOURCE

1723 subroutine dfti_fftpad_spc(ff, nx, ny, nz, ldx, ldy, ldz, ndat, mgfft, isign, gbound, iscale)
1724 
1725 !Arguments ------------------------------------
1726 !scalars
1727  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign
1728  integer,optional,intent(in) :: iscale
1729 !arrays
1730  integer,intent(in) :: gbound(2*mgfft+8,2)
1731  complex(spc),intent(inout) :: ff(ldx*ldy*ldz*ndat)
1732 
1733 #ifdef HAVE_DFTI
1734 
1735 ! Include Fortran template
1736 #undef DEV_DFTI_PRECISION
1737 #define DEV_DFTI_PRECISION DFTI_SINGLE
1738 
1739 #include "dfti_fftpad.finc"
1740 
1741 #else
1742  ABI_ERROR("FFT_DFTI support not activated")
1743  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,iscale/))
1744  ABI_UNUSED(gbound(1,1))
1745  ABI_UNUSED(ff(1))
1746 #endif
1747 
1748 end subroutine dfti_fftpad_spc

m_dfti/dfti_fftrisc_dp [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_fftrisc_dp

FUNCTION

 Carry out Fourier transforms between real and reciprocal (G) space,
 for wavefunctions, contained in a sphere in reciprocal space,
 in both directions. Also accomplish some post-processing.

NOTES

 Specifically uses rather sophisticated algorithms, based on S Goedecker
 routines, specialized for superscalar RISC architecture.
 Zero padding : saves 7/12 execution time
 Bi-dimensional data locality in most of the routine : cache reuse
 For k-point (0 0 0) : takes advantage of symmetry of data.
 Note however that no blocking is used, in both 1D z-transform
 or subsequent 2D transform. This should be improved.

INPUTS

  cplex= if 1 , denpot is real, if 2 , denpot is complex
     (cplex=2 only allowed for option=2 when istwf_k=1)
     one can also use cplex=0 if option=0 or option=3
  fofgin(2,npwin)=holds input wavefunction in G vector basis sphere.
  gboundin(2*mgfft+8,2)=sphere boundary info for reciprocal to real space
  gboundout(2*mgfft+8,2)=sphere boundary info for real to reciprocal space
  istwf_k=option parameter that describes the storage of wfs
  kg_kin(3,npwin)=reduced planewave coordinates, input
  kg_kout(3,npwout)=reduced planewave coordinates, output
  mgfft=maximum size of 1D FFTs
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  npwin=number of elements in fofgin array (for option 0, 1 and 2)
  npwout=number of elements in fofgout array (for option 2 and 3)
  ldx,ldy,ldz=ngfft(4),ngfft(5),ngfft(6), dimensions of fofr.
  option= if 0: do direct FFT
          if 1: do direct FFT, then sum the density
          if 2: do direct FFT, multiply by the potential, then do reverse FFT
          if 3: do reverse FFT only
  weight=weight to be used for the accumulation of the density in real space
          (needed only when option=1)

OUTPUT

  (see side effects)

OPTIONS

  The different options are:
  - reciprocal to real space and output the result (when option=0),
  - reciprocal to real space and accumulate the density (when option=1) or
  - reciprocal to real space, apply the local potential to the wavefunction
    in real space and produce the result in reciprocal space (when option=2)
  - real space to reciprocal space (when option=3).
  option=0 IS NOT ALLOWED when istwf_k>2
  option=3 IS NOT ALLOWED when istwf_k>=2

SIDE EFFECTS

  for option==0, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 fofr(2,ldx,ldy,ldz) contains the Fourier Transform of fofgin;
                 no use of denpot, fofgout and npwout.
  for option==1, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 denpot(cplex*ldx,ldy,ldz) contains the input density at input,
                 and the updated density at output;
                 no use of fofgout and npwout.
  for option==2, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 denpot(cplex*ldx,ldy,ldz) contains the input local potential;
                 fofgout(2,npwout) contains the output function;
  for option==3, fofr(2,ldx,ldy,ldz) contains the real space wavefunction;
                 fofgout(2,npwout) contains its Fourier transform;
                 no use of fofgin and npwin.

SOURCE

700 subroutine dfti_fftrisc_dp(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
701                            mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option, &
702                            weight_r, weight_i, abi_convention, iscale)
703 
704 !Arguments ------------------------------------
705 !scalars
706  integer,intent(in) :: cplex,istwf_k,mgfft,ldx,ldy,ldz,npwin,npwout,option
707  real(dp),intent(in) :: weight_i,weight_r
708 !arrays
709  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2)
710  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18)
711  real(dp),intent(in) :: fofgin(2,npwin)
712  real(dp),intent(inout) :: denpot(cplex*ldx,ldy,ldz)
713  real(dp),intent(inout) :: fofr(2,ldx*ldy*ldz)
714  real(dp),intent(inout) :: fofgout(2,npwout)    !vz_i
715  logical,optional,intent(in) :: abi_convention
716  integer,optional,intent(in) :: iscale
717 
718 ! *************************************************************************
719 
720 #ifdef HAVE_DFTI
721 
722 #undef  FFT_PRECISION
723 #undef  MYKIND
724 #undef  MYCZERO
725 #undef  MYCMPLX
726 #undef  MYCONJG
727 
728 #define FFT_PRECISION DFTI_DOUBLE
729 #define MYKIND DPC
730 #define MYCZERO (0._dp,0._dp)
731 #define MYCMPLX  DCMPLX
732 #define MYCONJG  DCONJG
733 
734 #include "dfti_fftrisc.finc"
735 
736 #else
737  ABI_ERROR("DFTI support not activated")
738  ABI_UNUSED((/cplex,gboundin(1,1),gboundout(1,1),istwf_k,kg_kin(1,1),kg_kout(1,1),iscale/))
739  ABI_UNUSED((/mgfft,ngfft(1),npwin,npwout,ldx,ldy,ldz,option/))
740  ABI_UNUSED((/denpot(1,1,1),fofgin(1,1),fofgout(1,1),fofr(1,1),weight_r,weight_i/))
741  ABI_UNUSED(abi_convention)
742 #endif
743 
744 end subroutine dfti_fftrisc_dp

m_dfti/dfti_fftrisc_mixprec [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_fftrisc_mixprec

FUNCTION

 Carry out Fourier transforms between real and reciprocal (G) space,
 for wavefunctions, contained in a sphere in reciprocal space,
 in both directions. Also accomplish some post-processing.
 This is the Mixed Precision version (dp in input, FFT done with sp data, output is dp)
 See dfti_fftrisc_dp for API doc.

SOURCE

762 subroutine dfti_fftrisc_mixprec(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
763                                  mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,&
764                                  weight_r,weight_i, abi_convention, iscale) ! optional
765 
766 !Arguments ------------------------------------
767 !scalars
768  integer,intent(in) :: cplex,istwf_k,mgfft,ldx,ldy,ldz,npwin,npwout,option
769  real(dp),intent(in) :: weight_i,weight_r
770 !arrays
771  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2)
772  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18)
773  real(dp),intent(in) :: fofgin(2,npwin)
774  real(dp),intent(inout) :: denpot(cplex*ldx,ldy,ldz)
775  real(dp),intent(inout) :: fofr(2,ldx*ldy*ldz)
776  real(dp),intent(inout) :: fofgout(2,npwout)    !vz_i
777  logical,optional,intent(in) :: abi_convention
778  integer,optional,intent(in) :: iscale
779 
780 ! *************************************************************************
781 
782 #ifdef HAVE_DFTI
783 
784 #undef  FFT_PRECISION
785 #undef  MYKIND
786 #undef  MYCZERO
787 #undef  MYCMPLX
788 #undef  MYCONJG
789 
790 #define FFT_PRECISION DFTI_SINGLE
791 #define MYKIND SPC
792 #define MYCZERO (0._sp,0._sp)
793 #define MYCMPLX  CMPLX
794 #define MYCONJG  CONJG
795 
796 #define HAVE_DFTI_MIXED_PRECISION 1
797 
798 #include "dfti_fftrisc.finc"
799 
800 #undef HAVE_DFTI_MIXED_PRECISION
801 
802 #else
803  ABI_ERROR("DFTI support not activated")
804  ABI_UNUSED((/cplex,gboundin(1,1),gboundout(1,1),istwf_k,kg_kin(1,1),kg_kout(1,1),iscale/))
805  ABI_UNUSED((/mgfft,ngfft(1),npwin,npwout,ldx,ldy,ldz,option/))
806  ABI_UNUSED((/denpot(1,1,1),fofgin(1,1),fofgout(1,1),fofr(1,1),weight_r,weight_i/))
807  ABI_UNUSED(abi_convention)
808 #endif
809 
810 end subroutine dfti_fftrisc_mixprec

m_dfti/dfti_fftrisc_sp [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_fftrisc_sp

FUNCTION

 Carry out Fourier transforms between real and reciprocal (G) space,
 for wavefunctions, contained in a sphere in reciprocal space,
 in both directions. Also accomplish some post-processing.
 See dfti_fftrisc_dp for API doc.

SOURCE

580 subroutine dfti_fftrisc_sp(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
581                            mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option, &
582                            weight_r,weight_i, abi_convention, iscale)
583 
584 !Arguments ------------------------------------
585 !scalars
586  integer,intent(in) :: cplex,istwf_k,mgfft,ldx,ldy,ldz,npwin,npwout,option
587  real(dp),intent(in) :: weight_i,weight_r
588 !arrays
589  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2)
590  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18)
591  real(sp),intent(in) :: fofgin(2,npwin)
592  real(dp),intent(inout) :: denpot(cplex*ldx,ldy,ldz)
593  real(sp),intent(inout) :: fofr(2,ldx*ldy*ldz)
594  real(sp),intent(inout) :: fofgout(2,npwout)    !vz_i
595  logical,optional,intent(in) :: abi_convention
596  integer,optional,intent(in) :: iscale
597 
598 ! *************************************************************************
599 
600 #ifdef HAVE_DFTI
601 
602 #undef FFT_PRECISION
603 #undef MYKIND
604 #undef MYCZERO
605 #undef MYCMPLX
606 #undef MYCONJG
607 
608 #define FFT_PRECISION DFTI_SINGLE
609 #define MYKIND SPC
610 #define MYCZERO (0._sp,0._sp)
611 #define MYCMPLX  CMPLX
612 #define MYCONJG  CONJG
613 
614 #include "dfti_fftrisc.finc"
615 
616 #else
617  ABI_ERROR("DFTI support not activated")
618  ABI_UNUSED((/cplex,gboundin(1,1),gboundout(1,1),istwf_k,kg_kin(1,1),kg_kout(1,1),iscale/))
619  ABI_UNUSED((/mgfft,ngfft(1),npwin,npwout,ldx,ldy,ldz,option/))
620  ABI_UNUSED((/denpot(1,1,1),weight_r,weight_i/))
621  ABI_UNUSED((/fofgin(1,1),fofgout(1,1),fofr(1,1)/))
622  ABI_UNUSED(abi_convention)
623 #endif
624 
625 end subroutine dfti_fftrisc_sp

m_dfti/dfti_fftug_dp [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_fftug_dp

FUNCTION

 Compute ndat zero-padded FFTs from G to R space.
 Mainly used for the transform of wavefunctions.
 TARGET: dp arrays with real and imaginary part

INPUTS

 fftalg=FFT algorith (see input variable)
 fftcache=size of the cache (kB)
 npw_k=number of plane waves for this k-point.
 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Leading dimensions of the array.
 ndat=Number of transforms
 istwf_k=Option describing the storage of the wavefunction.
 mgfft=Max number of FFT divisions (used to dimension gbound)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.
  ug(npw_k*ndat)=wavefunctions in reciprocal space.

OUTPUT

  ur(ldx*ldy*ldz*ndat)=wavefunctions in real space.

SOURCE

842 subroutine dfti_fftug_dp(fftalg, fftcache, npw_k, nx, ny, nz, ldx, ldy, ldz, ndat, &
843                          istwf_k, mgfft, kg_k, gbound, ug, ur, &
844                          isign, iscale)  ! optional
845 
846 !Arguments ------------------------------------
847 !scalars
848  integer,intent(in) :: fftalg,fftcache
849  integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft
850 !arrays
851  integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k)
852  real(dp),target,intent(in) :: ug(2*npw_k*ndat)
853  real(dp),target,intent(inout) :: ur(2*ldx*ldy*ldz*ndat)
854  integer,optional,intent(in) :: isign, iscale
855 
856 #ifdef HAVE_DFTI
857 !Local variables-------------------------------
858  integer,parameter :: dist=2
859  integer :: iscale__, isign__
860  real(dp) :: fofgout(2,0)
861  real(dp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:)
862 
863 ! *************************************************************************
864 
865  iscale__ = 0; if (present(iscale)) iscale__ = iscale
866  isign__ = +1; if (present(isign)) isign__ = isign
867 
868 #undef TK_PREF
869 #define TK_PREF(name) CONCAT(cg_,name)
870 
871 #undef  FFT_PRECISION
872 #define FFT_PRECISION FFT_DOUBLE
873 
874 #include "fftug.finc"
875 
876 #undef  FFT_PRECISION
877 
878 #else
879  ! Silence compiler warning
880  ABI_ERROR("FFT_DFTI support not activated")
881  ABI_UNUSED((/fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1), isign, iscale/))
882  ABI_UNUSED((/ug(1),ur(1)/))
883 #endif
884 
885 end subroutine dfti_fftug_dp

m_dfti/dfti_fftug_dpc [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_fftug_dpc

FUNCTION

 Compute ndat zero-padded FFTs from G ro R.
 Mainly used for the transform of wavefunctions.
 TARGET: DPC arrays

INPUTS

 fftalg=FFT algorith (see input variable)
 fftcache=size of the cache (kB)
 npw_k=number of plane waves for this k-point.
 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Leading dimensions of the array.
 ndat=Number of transforms
 istwf_k=Option describing the storage of the wavefunction.
 mgfft=Max number of FFT divisions (used to dimension gbound)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.
  ug(npw_k*ndat)=wavefunctions in reciprocal space

OUTPUT

  ur(ldx*ldy*ldz*ndat)=wavefunctions in real space.

SOURCE

 993 subroutine dfti_fftug_dpc(fftalg, fftcache, npw_k, nx, ny, nz, ldx, ldy, ldz, ndat, &
 994                           istwf_k, mgfft, kg_k, gbound, ug, ur, &
 995                           isign, iscale)  ! optional
 996 
 997 !Arguments ------------------------------------
 998 !scalars
 999  integer,intent(in) :: fftalg,fftcache
1000  integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft
1001 !arrays
1002  integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k)
1003  complex(dpc),target,intent(in) :: ug(npw_k*ndat)
1004  complex(dpc),target,intent(inout) :: ur(ldx*ldy*ldz*ndat)    !vz_i
1005  integer,optional,intent(in) :: isign, iscale
1006 
1007 #ifdef HAVE_DFTI
1008 !Local variables-------------------------------
1009 !scalars
1010  integer,parameter :: dist=1
1011  integer :: iscale__, isign__
1012 !arrays
1013  real(dp) :: fofgout(2,0)
1014  real(dp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:)
1015 
1016 ! *************************************************************************
1017 
1018  iscale__ = 0; if (present(iscale)) iscale__ = iscale
1019  isign__ = +1; if (present(isign)) isign__ = isign
1020 
1021 #undef TK_PREF
1022 #define TK_PREF(name) CONCAT(cplx_,name)
1023 
1024 #undef  FFT_PRECISION
1025 #define FFT_PRECISION FFT_DOUBLE
1026 
1027 #include "fftug.finc"
1028 
1029 #undef  FFT_PRECISION
1030 
1031 #else
1032  ! Silence compiler warning
1033  ABI_ERROR("FFT_DFTI support not activated")
1034  ABI_UNUSED((/fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1),isign,iscale/))
1035  ABI_UNUSED((/ug(1),ur(1)/))
1036 #endif
1037 
1038 end subroutine dfti_fftug_dpc

m_dfti/dfti_fftug_spc [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_fftug_spc

FUNCTION

 Compute ndat zero-padded FFTs from G- to R-space .
 Mainly used for the transform of wavefunctions.
 TARGET: spc arrays

INPUTS

 fftalg=FFT algorith (see input variable)
 fftcache=size of the cache (kB)
 npw_k=number of plane waves for this k-point.
 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Leading dimensions of the array.
 ndat=Number of transforms
 istwf_k=Option describing the storage of the wavefunction.
 mgfft=Max number of FFT divisions (used to dimension gbound)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.
  ug(npw_k*ndat)=wavefunctions in reciprocal space.

OUTPUT

  ur(ldx*ldy*ldz*ndat)=wavefunctions in real space.

SOURCE

917 subroutine dfti_fftug_spc(fftalg, fftcache, npw_k, nx, ny, nz, ldx, ldy, ldz, ndat, &
918                           istwf_k, mgfft, kg_k, gbound, ug, ur, &
919                           isign, iscale) ! optional
920 
921 !Arguments ------------------------------------
922 !scalars
923  integer,intent(in) :: fftalg,fftcache
924  integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft
925  integer,optional,intent(in) :: isign, iscale
926 !arrays
927  integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k)
928  complex(spc),target,intent(in) :: ug(npw_k*ndat)
929  complex(spc),target,intent(inout) :: ur(ldx*ldy*ldz*ndat)    !vz_i
930 
931 #ifdef HAVE_DFTI
932 !Local variables-------------------------------
933 !scalars
934  integer,parameter :: dist=1
935  integer :: iscale__, isign__
936  real(sp) :: fofgout(2,0)
937  real(sp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:)
938 
939 ! *************************************************************************
940 
941  iscale__ = 0; if (present(iscale)) iscale__ = iscale
942  isign__ = +1; if (present(isign)) isign__ = isign
943 
944 #undef TK_PREF
945 #define TK_PREF(name) CONCAT(cplx_,name)
946 
947 #undef  FFT_PRECISION
948 #define FFT_PRECISION FFT_SINGLE
949 
950 #include "fftug.finc"
951 
952 #undef  FFT_PRECISION
953 
954 #else
955  ! Silence compiler warning
956  ABI_ERROR("FFT_DFTI support not activated")
957  ABI_UNUSED((/fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1), iscale, isign/))
958  ABI_UNUSED((/ug(1),ur(1)/))
959 #endif
960 
961 end subroutine dfti_fftug_spc

m_dfti/dfti_fftur_dp [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_fftur_dp

FUNCTION

 Compute ndat zero-padded FFTs from R- to G-space .
 Mainly used for the transform of wavefunctions.
 TARGET: dp arrays with real and imaginary part.

INPUTS

 fftalg=FFT algorith (see input variable)
 fftcache=size of the cache (kB)
 npw_k=number of plane waves for this k-point.
 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Leading dimensions of the array.
 ndat=Number of transforms
 istwf_k=Option describing the storage of the wavefunction.
 mgfft=Max number of FFT divisions (used to dimension gbound)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.

 SIDE EFFECT
 ur(2,ldx*ldy*ldz*ndat)= In input: wavefunctions in real space.
                         Destroyed in output. Do not use ur anymore!

OUTPUT

 ug(2,npw_k*ndat)=wavefunctions in reciprocal space.

SOURCE

1072 subroutine dfti_fftur_dp(fftalg, fftcache, npw_k, nx, ny, nz, ldx, ldy, ldz, ndat, &
1073                          istwf_k, mgfft, kg_k, gbound, ur, ug, &
1074                          isign, iscale) ! optional
1075 
1076 !Arguments ------------------------------------
1077 !scalars
1078  integer,intent(in) :: fftalg,fftcache
1079  integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft
1080  integer,optional,intent(in) :: isign, iscale
1081 !arrays
1082  integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k)
1083  real(dp),target,intent(inout) :: ur(2*ldx*ldy*ldz*ndat)
1084  real(dp),target,intent(inout) :: ug(2*npw_k*ndat)    !vz_i
1085 
1086 #ifdef HAVE_DFTI
1087 !Local variables-------------------------------
1088 !scalars
1089  integer,parameter :: dist=2
1090  integer :: iscale__, isign__
1091 !arrays
1092  real(dp) :: dum_ugin(2,0)
1093  real(dp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:)
1094 
1095 ! *************************************************************************
1096 
1097  iscale__ = 1; if (present(iscale)) iscale__ = iscale
1098  isign__ = -1; if (present(isign)) isign__ = isign
1099 
1100 #undef TK_PREF
1101 #define TK_PREF(name) CONCAT(cg_,name)
1102 
1103 #undef  FFT_PRECISION
1104 #define FFT_PRECISION FFT_DOUBLE
1105 
1106 #include "fftur.finc"
1107 
1108 #undef  FFT_PRECISION
1109 
1110 #else
1111  ! Silence compiler warning
1112  ABI_ERROR("FFT_DFTI support not activated")
1113  ABI_UNUSED((/fftalg,fftcache/))
1114  ABI_UNUSED((/npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1),iscale,isign/))
1115  ABI_UNUSED((/ug(1),ur(1)/))
1116 #endif
1117 
1118 end subroutine dfti_fftur_dp

m_dfti/dfti_fftur_dpc [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_fftur_dpc

FUNCTION

 Compute ndat zero-padded FFTs from R ro G.
 Mainly used for the transform of wavefunctions.
 TARGET: DPC arrays

INPUTS

 fftalg=FFT algorith (see input variable)
 fftcache=size of the cache (kB)
 npw_k=number of plane waves for this k-point.
 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Leading dimensions of the array.
 ndat=Number of transforms
 istwf_k=Option describing the storage of the wavefunction.
 mgfft=Max number of FFT divisions (used to dimension gbound)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.

 SIDE EFFECT
 ur(ldx*ldy*ldz*ndat)= In input: wavefunctions in real space.
                       Destroyed in output. Do not use ur anymore!

OUTPUT

 ug(npw_k*ndat)=wavefunctions in reciprocal space

SOURCE

1233 subroutine dfti_fftur_dpc(fftalg, fftcache, npw_k, nx, ny, nz, ldx, ldy, ldz, ndat, &
1234                           istwf_k, mgfft, kg_k, gbound, ur, ug, &
1235                           isign, iscale) ! optional
1236 
1237 !Arguments ------------------------------------
1238 !scalars
1239  integer,intent(in) :: fftalg,fftcache
1240  integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft
1241  integer,optional,intent(in) :: isign, iscale
1242 !arrays
1243  integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k)
1244  complex(dpc),target,intent(inout) :: ur(ldx*ldy*ldz*ndat)
1245  complex(dpc),target,intent(inout) :: ug(npw_k*ndat)    !vz_i
1246 
1247 #ifdef HAVE_DFTI
1248 !Local variables-------------------------------
1249 !scalars
1250  integer,parameter :: dist=1
1251  integer :: iscale__, isign__
1252 !arrays
1253  real(dp) :: dum_ugin(2,0)
1254  real(dp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:)
1255 
1256 ! *************************************************************************
1257 
1258  iscale__ = 1; if (present(iscale)) iscale__ = iscale
1259  isign__ = -1; if (present(isign)) isign__ = isign
1260 
1261 #undef TK_PREF
1262 #define TK_PREF(name) CONCAT(cplx_,name)
1263 
1264 #undef  FFT_PRECISION
1265 #define FFT_PRECISION FFT_DOUBLE
1266 
1267 #include "fftur.finc"
1268 
1269 #undef  FFT_PRECISION
1270 
1271 #else
1272  ! Silence compiler warning
1273  ABI_ERROR("FFT_DFTI support not activated")
1274  ABI_UNUSED((/fftalg,fftcache/))
1275  ABI_UNUSED((/npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1),iscale,isign/))
1276  ABI_UNUSED((/ug(1),ur(1)/))
1277 #endif
1278 
1279 end subroutine dfti_fftur_dpc

m_dfti/dfti_fftur_spc [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_fftur_spc

FUNCTION

 Compute ndat zero-padded FFTs from R- to G-space .
 Mainly used for the transform of wavefunctions.
 TARGET: spc arrays

INPUTS

 fftalg=FFT algorith (see input variable)
 fftcache=size of the cache (kB)
 npw_k=number of plane waves for this k-point.
 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Leading dimensions of the array.
 ndat=Number of transforms
 istwf_k=Option describing the storage of the wavefunction.
 mgfft=Max number of FFT divisions (used to dimension gbound)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.

 SIDE EFFECT
 ur(ldx*ldy*ldz*ndat)= In input: wavefunctions in real space.
                       Destroyed in output. Do not use ur anymore!

OUTPUT

 ug(npw_k*ndat)=wavefunctions in reciprocal space.

SOURCE

1153 subroutine dfti_fftur_spc(fftalg, fftcache, npw_k, nx, ny, nz, ldx, ldy, ldz, ndat, &
1154                           istwf_k, mgfft, kg_k, gbound, ur, ug, &
1155                           isign, iscale) ! optional
1156 
1157 !Arguments ------------------------------------
1158 !scalars
1159  integer,intent(in) :: fftalg,fftcache
1160  integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft
1161 !arrays
1162  integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k)
1163  complex(spc),target,intent(inout) :: ur(ldx*ldy*ldz*ndat)
1164  complex(spc),target,intent(inout) :: ug(npw_k*ndat)    !vz_i
1165  integer,optional,intent(in) :: isign, iscale
1166 
1167 #ifdef HAVE_DFTI
1168 !Local variables-------------------------------
1169 !scalars
1170  integer,parameter :: dist=1
1171  integer :: iscale__, isign__
1172 !arrays
1173  real(sp) :: dum_ugin(2,0)
1174  real(sp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:)
1175 
1176 ! *************************************************************************
1177 
1178  iscale__ = 1; if (present(iscale)) iscale__ = iscale
1179  isign__ = -1; if (present(isign)) isign__ = isign
1180 
1181 #undef TK_PREF
1182 #define TK_PREF(name) CONCAT(cplx_,name)
1183 
1184 #undef  FFT_PRECISION
1185 #define FFT_PRECISION FFT_SINGLE
1186 
1187 #include "fftur.finc"
1188 
1189 #undef  FFT_PRECISION
1190 
1191 #else
1192  ! Silence compiler warning
1193  ABI_ERROR("FFT_DFTI support not activated")
1194  ABI_UNUSED((/fftalg,fftcache/))
1195  ABI_UNUSED((/npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1),isign,iscale/))
1196  ABI_UNUSED((/ug(1),ur(1)/))
1197 #endif
1198 
1199 end subroutine dfti_fftur_spc

m_dfti/dfti_many_dft_ip [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_many_dft_ip

FUNCTION

 Driver routine for many in-place 3D complex-to-complex FFTs of lengths nx, ny, nz.

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimension of the finout array (to avoid cache conflicts).
 ndat=Number of FFTs to be done.
 isign=sign of Fourier transform exponent: current convention uses
   +1 for transforming from G to r,
   -1 for transforming from r to G.

OUTPUT

 finout(2,ldx*ldy*ldz*ndat)=
   In input: The complex array to be transformed.
   In output: The FFT results.

SOURCE

1538 subroutine dfti_many_dft_ip(nx,ny,nz,ldx,ldy,ldz,ndat,isign,finout)
1539 
1540 !Arguments ------------------------------------
1541 !scalars
1542  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,isign
1543 !arrays
1544  real(dp),target,intent(inout) :: finout(2*ldx*ldy*ldz*ndat)
1545 
1546 #ifdef HAVE_DFTI
1547 !Local variables-------------------------------
1548 !scalars
1549  integer,parameter :: iscale1 = 1
1550  type(C_ptr) :: finout_cptr
1551 !arrays
1552  complex(dpc),ABI_CONTIGUOUS pointer :: finout_fptr(:)
1553 
1554 ! *************************************************************************
1555 
1556  ! Associate complex finout_fptr with real ffinout via the C pointer
1557  finout_cptr = C_loc(finout)
1558  call C_F_pointer(finout_cptr,finout_fptr, shape=(/ldx*ldy*ldz*ndat/))
1559 
1560  ! Call complex version --> a lot of boilerplate code avoided
1561  call dfti_c2c_ip(nx, ny, nz, ldx, ldy, ldz, ndat, iscale1, isign, finout_fptr)
1562 
1563 #else
1564  ABI_ERROR("FFT_DFTI support not activated")
1565  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,isign/))
1566  ABI_UNUSED(finout(1))
1567 #endif
1568 
1569 end subroutine dfti_many_dft_ip

m_dfti/dfti_many_dft_op [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_many_dft_op

FUNCTION

 Driver routine for many out-of-place 3D complex-to-complex FFTs of lengths nx, ny, nz.

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimension of the fin and fout arrays (to avoid cache conflicts).
 ndat=Number of FFTs to be done.
 fin(2*ldx*ldy*ldz*ndat)=The complex array to be transformed.
 isign=sign of Fourier transform exponent: current convention uses
   +1 for transforming from G to r,
   -1 for transforming from r to G.

OUTPUT

 fout(2,ldx*ldy*ldz*ndat)=The Fourier transform of fin.

SOURCE

1474 subroutine dfti_many_dft_op(nx,ny,nz,ldx,ldy,ldz,ndat,isign,fin,fout)
1475 
1476 !Arguments ------------------------------------
1477 !scalars
1478  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,isign
1479 !arrays
1480  real(dp),target,intent(in) :: fin(2*ldx*ldy*ldz*ndat)
1481  real(dp),target,intent(out) :: fout(2*ldx*ldy*ldz*ndat)
1482 
1483 #ifdef HAVE_DFTI
1484 !Local variables-------------------------------
1485 !scalars
1486  integer,parameter :: iscale1 = 1
1487  type(C_ptr) :: fin_cptr, fout_cptr
1488 
1489 !arrays
1490  complex(dpc),ABI_CONTIGUOUS pointer :: fin_fptr(:),fout_fptr(:)
1491 
1492 ! *************************************************************************
1493 
1494  ! Associate complex pointers with real inputs via the C pointers
1495  fin_cptr = C_loc(fin)
1496  call C_F_pointer(fin_cptr,fin_fptr, shape=[ldx*ldy*ldz*ndat])
1497 
1498  fout_cptr = C_loc(fout)
1499  call C_F_pointer(fout_cptr,fout_fptr, shape=[ldx*ldy*ldz*ndat])
1500 
1501  ! Call complex version --> a lot of boilerplate code avoided
1502  call dfti_c2c_op(nx, ny, nz, ldx, ldy, ldz, ndat, iscale1, isign, fin_fptr, fout_fptr)
1503 
1504 #else
1505  ABI_ERROR("FFT_DFTI support not activated")
1506  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,isign/))
1507  ABI_UNUSED(fin(1))
1508  ABI_UNUSED(fout(1))
1509 #endif
1510 
1511 end subroutine dfti_many_dft_op

m_dfti/dfti_r2c_op_dp [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_r2c_op_dp

FUNCTION

 Driver routine for out-of-place 3D real-to-complex FFT of lengths nx, ny, nz.

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimensions of the f array (to avoid cache conflicts).
 ndat=Number of FFTs to be done.
 ff(ldx*ldy*ldz*ndat)=The real array to be transformed.

OUTPUT

 gg(2*ldx*ldy*ldz*ndat)=The forward FFT of ff (real valued)

SOURCE

1894 subroutine dfti_r2c_op_dp(nx, ny, nz, ldx, ldy, ldz, ndat, ff, gg)
1895 
1896 !Arguments ------------------------------------
1897 !scalars
1898  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat
1899 !arrays
1900  real(dp),intent(in) :: ff(ldx*ldy*ldz*ndat)
1901  real(dp),target,intent(out) :: gg(2*ldx*ldy*ldz*ndat)
1902 
1903 #ifdef HAVE_DFTI
1904 !Local variables-------------------------------
1905 !scalars
1906  type(C_ptr) :: gg_cptr
1907 !arrays
1908  complex(dpc),ABI_CONTIGUOUS pointer :: gg_fptr(:)
1909 
1910 ! *************************************************************************
1911 
1912  gg_cptr = C_loc(gg)
1913  call C_F_pointer(gg_cptr,gg_fptr, shape=(/ldx*ldy*ldz*ndat/))
1914 
1915  call dfti_r2c_op_dpc(nx,ny,nz,ldx,ldy,ldz,ndat,ff,gg_fptr)
1916 
1917 #else
1918  ABI_ERROR("FFT_DFTI support not activated")
1919  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz/))
1920  ABI_UNUSED(ff)
1921  ABI_UNUSED(gg(1))
1922 #endif
1923 
1924 end subroutine dfti_r2c_op_dp

m_dfti/dfti_r2c_op_dpc [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_r2c_op_dpc

FUNCTION

 Driver routine for out-of-place 3D real-to-complex FFT of lengths nx, ny, nz.

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimensions of the f array (to avoid cache conflicts).
 ff(ldx*ldy*ldz*ndat)=The real array to be transformed.
 ndat=Number of FFTs to be done.

OUTPUT

 gg(ldx*ldy*ldz*ndat)=The forward FFT of ff (complex valued)

SOURCE

1771 subroutine dfti_r2c_op_dpc(nx, ny, nz, ldx, ldy, ldz, ndat, ff, gg)
1772 
1773 !Arguments ------------------------------------
1774 !scalars
1775  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat
1776 !arrays
1777  real(dp),intent(in) :: ff(ldx*ldy*ldz*ndat)
1778  complex(dpc),intent(out) :: gg(ldx*ldy*ldz*ndat)
1779 
1780 #ifdef HAVE_DFTI
1781 !Local variables-------------------------------
1782 !scalars
1783  integer :: status,nhp,padx,i1,i2,i3,igp,igf,imgf,ii
1784  integer :: i1inv,i2inv,i3inv,idat,padatf
1785  type(DFTI_DESCRIPTOR),pointer :: Desc
1786  type(C_PTR) :: cptr
1787 !arrays
1788  integer,allocatable :: i1inver(:),i2inver(:),i3inver(:)
1789  complex(dpc),ABI_CONTIGUOUS pointer :: gg_hp(:)
1790 
1791 ! *************************************************************************
1792 
1793  padx = (nx/2+1)
1794  nhp = (nx/2+1)*ny*nz
1795 
1796  call dfti_alloc_complex(nhp*ndat,cptr,gg_hp)
1797 
1798  status = DftiCreateDescriptor(Desc, DFTI_DOUBLE, DFTI_REAL, 3, (/nx,ny,nz/) )
1799  DFTI_CHECK(status)
1800 
1801  status = DftiSetValue(Desc, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX)
1802  status = DftiSetValue(Desc, DFTI_PLACEMENT, DFTI_NOT_INPLACE )
1803  status = DftiSetValue(Desc, DFTI_INPUT_STRIDES,  (/0, 1, ldx,  ldx*ldy/))
1804  status = DftiSetValue(Desc, DFTI_INPUT_DISTANCE, ldx*ldy*ldz)
1805  status = DftiSetValue(Desc, DFTI_OUTPUT_STRIDES, (/0, 1, padx, padx*ny/))
1806  status = DftiSetValue(Desc, DFTI_OUTPUT_DISTANCE, nhp)
1807  status = DftiSetValue(Desc, DFTI_NUMBER_OF_TRANSFORMS, ndat)
1808  status = DftiSetValue(Desc, DFTI_FORWARD_SCALE, one / DBLE(nx*ny*nz) )
1809  DFTI_CHECK(status)
1810 
1811  DFTI_CHECK( DftiCommitDescriptor(Desc) )
1812 
1813  DFTI_CHECK( DftiComputeForward(Desc, ff, gg_hp) )
1814 
1815  DFTI_CHECK( DftiFreeDescriptor(Desc) )
1816 
1817  ! Reconstruct full FFT: Hermitian redundancy: out[i] is the conjugate of out[n-i]
1818  ABI_MALLOC(i1inver,(padx))
1819  ABI_MALLOC(i2inver,(ny))
1820  ABI_MALLOC(i3inver,(nz))
1821 
1822  i1inver(1)=1
1823  do i1=2,padx
1824    i1inver(i1)=nx+2-i1
1825  end do
1826 
1827  i2inver(1)=1
1828  do i2=2,ny
1829    i2inver(i2)=ny+2-i2
1830  end do
1831 
1832  i3inver(1)=1
1833  do i3=2,nz
1834    i3inver(i3)=nz+2-i3
1835  end do
1836 
1837  igp=0
1838  do idat=1,ndat
1839    padatf=(idat-1)*ldx*ldy*ldz
1840    do i3=1,nz
1841      i3inv = i3inver(i3)
1842      do i2=1,ny
1843        i2inv = i2inver(i2)
1844        do i1=1,padx
1845          igp=igp+1
1846          igf = i1 + (i3-1)*ldx*ldy + (i2-1)*ldx + padatf
1847          gg(igf) =  gg_hp(igp)
1848          i1inv = i1inver(i1)
1849          if (i1inv/=i1) then
1850            imgf = i1inv + (i3inv-1)*ldx*ldy + (i2inv-1)*ldx + padatf
1851            gg(imgf) = DCONJG(gg_hp(igp))
1852          end if
1853        end do
1854      end do
1855    end do
1856  end do
1857 
1858  ABI_FREE(i1inver)
1859  ABI_FREE(i2inver)
1860  ABI_FREE(i3inver)
1861 
1862  call dfti_free(cptr)
1863 
1864 #else
1865  ABI_ERROR("FFT_DFTI support not activated")
1866  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz/))
1867  ABI_UNUSED(ff)
1868  ABI_UNUSED(gg(1))
1869 #endif
1870 
1871 end subroutine dfti_r2c_op_dpc

m_dfti/dfti_seqfourdp [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

  dfti_seqfourdp

FUNCTION

 Driver routine for 3D FFT of lengths nx, ny, nz. Mainly used for densities or potentials.
 FFT Transform is out-of-place

INPUTS

 cplex=1 if fofr is real, 2 if fofr is complex
 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Leading dimensions of the array.
 ndat = Number of FFTS
 isign= +1 : fofg(G) => fofr(R);
        -1 : fofr(R) => fofg(G)
 fofg(2,ldx*ldy*ldz*ndat)=The array to be transformed.

OUTPUT

 fofr(cplex,ldx*ldy*ldz*ndat)=The FFT of fofg

SOURCE

204 subroutine dfti_seqfourdp(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,isign,fofg,fofr)
205 
206 !Arguments ------------------------------------
207 !scalars
208  integer,intent(in) :: cplex,nx,ny,nz,ldx,ldy,ldz,ndat,isign
209 !arrays
210  real(dp),intent(inout) :: fofg(2*ldx*ldy*ldz*ndat)
211  real(dp),intent(inout) :: fofr(cplex*ldx*ldy*ldz*ndat)
212 
213 !Local variables-------------------------------
214 !scalars
215  integer,parameter :: iscale1 = 1
216  integer :: ii,jj
217  complex(spc), allocatable :: work_sp(:)
218 
219 ! *************************************************************************
220 
221  select case (cplex)
222  case (2)
223    ! Complex to Complex.
224    if (fftcore_mixprec == 1) then
225      ! Mixed precision: copyin + in-place + copyout
226      ABI_MALLOC(work_sp, (ldx*ldy*ldz*ndat))
227      if (isign == +1) then
228        work_sp(:) = cmplx(fofg(1::2), fofg(2::2), kind=spc)
229      else if (isign == -1) then
230        work_sp(:) = cmplx(fofr(1::2), fofr(2::2), kind=spc)
231      else
232        ABI_BUG("Wrong isign")
233      end if
234 
235      call dfti_c2c_ip_spc(nx, ny, nz, ldx, ldy, ldz, ndat, iscale1, isign, work_sp)
236 
237      if (isign == +1) then
238        jj = 1
239        do ii=1,ldx*ldy*ldz*ndat
240          fofr(jj) = real(work_sp(ii), kind=dp)
241          fofr(jj+1) = aimag(work_sp(ii))
242          jj = jj + 2
243        end do
244      else if (isign == -1) then
245        jj = 1
246        do ii=1,ldx*ldy*ldz*ndat
247          fofg(jj) = real(work_sp(ii), kind=dp)
248          fofg(jj+1) = aimag(work_sp(ii))
249          jj = jj + 2
250        end do
251      end if
252      ABI_FREE(work_sp)
253 
254    else
255      ! double precision version.
256      select case (isign)
257      case (+1)
258        call dfti_many_dft_op(nx,ny,nz,ldx,ldy,ldz,ndat,isign,fofg,fofr)
259      case (-1) ! -1
260        call dfti_many_dft_op(nx,ny,nz,ldx,ldy,ldz,ndat,isign,fofr,fofg)
261      case default
262        ABI_BUG("Wrong isign")
263      end select
264    end if
265 
266  case (1) ! Real case.
267 
268    select case (isign)
269    case (+1) ! G --> R
270      call dfti_c2r_op(nx,ny,nz,ldx,ldy,ldz,ndat,fofg,fofr)
271    case (-1) ! R --> G
272      call dfti_r2c_op(nx,ny,nz,ldx,ldy,ldz,ndat,fofr,fofg)
273    case default
274      ABI_BUG("Wrong isign")
275    end select
276 
277  case default
278    ABI_BUG("Wrong value for cplex")
279  end select
280 
281 end subroutine dfti_seqfourdp

m_dfti/dfti_seqfourwf [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_seqfourwf

FUNCTION

 Carry out composite Fourier transforms between real and reciprocal (G) space.
 Wavefunctions, contained in a sphere in reciprocal space,
 can be FFT to real space. They can also be FFT from real space
 to a sphere. Also, the density maybe accumulated, and a local potential can be applied.

 The different options are :
 - option=0 --> reciprocal to real space and output the result.
 - option=1 --> reciprocal to real space and accumulate the density.
 - option=2 --> reciprocal to real space, apply the local potential to the wavefunction
                in real space and produce the result in reciprocal space.
 - option=3 --> real space to reciprocal space.
                NOTE that in this case, fftalg=1x1 MUST be used. This may be changed in the future.

INPUTS

 cplex= if 1 , denpot is real, if 2 , denpot is complex
    (cplex=2 only allowed for option=2, and istwf_k=1)
    not relevant if option=0 or option=3, so cplex=0 can be used to minimize memory
 fofgin(2,npwin)=holds input wavefunction in G vector basis sphere.
                 (intent(in) but the routine sphere can modify it for another iflag)
 gboundin(2*mgfft+8,2)=sphere boundary info for reciprocal to real space
 gboundout(2*mgfft+8,2)=sphere boundary info for real to reciprocal space
 istwf_k=option parameter that describes the storage of wfs
 kg_kin(3,npwin)=reduced planewave coordinates, input
 kg_kout(3,npwout)=reduced planewave coordinates, output
 mgfft=maximum size of 1D FFTs
 ndat=number of FFT to do in //
 ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 npwin=number of elements in fofgin array (for option 0, 1 and 2)
 npwout=number of elements in fofgout array (for option 2 and 3)
 ldx,ldy,ldz=ngfft(4),ngfft(5),ngfft(6), dimensions of fofr.
 option= if 0: do direct FFT
         if 1: do direct FFT, then sum the density
         if 2: do direct FFT, multiply by the potential, then do reverse FFT
         if 3: do reverse FFT only
 weight_r=weight to be used for the accumulation of the density in real space
         (needed only when option=1)

OUTPUT

  (see side effects)

SIDE EFFECTS

 Input/Output
 for option==0, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
                fofr(2,ldx,ldy,ldz) contains the output Fourier Transform of fofgin;
                no use of denpot, fofgout and npwout.
 for option==1, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
                denpot(cplex*ldx,ldy,ldz) contains the input density at input,
                and the updated density at output (accumulated);
                no use of fofgout and npwout.
 for option==2, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
                denpot(cplex*ldx,ldy,ldz) contains the input local potential;
                fofgout(2,npwout*ndat) contains the output function;
 for option==3, fofr(2,ldx,ldy,ldz*ndat) contains the input real space wavefunction;
                fofgout(2,npwout*ndat) contains its output Fourier transform;
                no use of fofgin and npwin.

SOURCE

349 subroutine dfti_seqfourwf(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k, &
350                           kg_kin,kg_kout,mgfft,ndat,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
351 
352 !Arguments ------------------------------------
353 !scalars
354  integer,intent(in) :: cplex,istwf_k,ldx,ldy,ldz,ndat,npwin,npwout,option,mgfft
355  real(dp),intent(in) :: weight_i,weight_r
356 !arrays
357  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2)
358  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18)
359  real(dp),intent(inout) :: denpot(cplex*ldx,ldy,ldz),fofgin(2,npwin*ndat)
360  real(dp),intent(inout) :: fofr(2,ldx*ldy*ldz*ndat)
361  real(dp),intent(out) :: fofgout(2,npwout*ndat)
362 
363 !Local variables-------------------------------
364 !scalars
365  integer,parameter :: ndat1=1
366  integer :: nx,ny,nz,fftalg,fftalga,fftalgc,fftcache,dat,ptg,ptr,ptgin,ptgout,nthreads
367  logical :: use_fftrisc
368  character(len=500) :: msg
369 
370 ! *************************************************************************
371 
372  if (all(option /= [0, 1, 2, 3])) then
373    write(msg,'(a,i0,a)')' Option:',option,' is not allowed. Only option=0, 1, 2 or 3 are allowed presently.'
374    ABI_ERROR(msg)
375  end if
376 
377  if (option == 1 .and. cplex /= 1) then
378    ABI_ERROR(sjoin("With option number 1, cplex must be 1 but it is cplex:", itoa(cplex)))
379  end if
380 
381  if (option==2 .and. (cplex/=1 .and. cplex/=2)) then
382    ABI_ERROR(sjoin("With the option number 2, cplex must be 1 or 2, but it is cplex:", itoa(cplex)))
383  end if
384 
385  nx=ngfft(1); ny=ngfft(2); nz=ngfft(3)
386  fftalg=ngfft(7); fftalga=fftalg/100; fftalgc=MOD(fftalg,10)
387  fftcache=ngfft(8)
388 
389  use_fftrisc = (fftalgc==2)
390  if (istwf_k==2.and.option==3) use_fftrisc = .FALSE.
391  if (istwf_k>2.and.ANY(option==(/0,3/))) use_fftrisc = .FALSE.
392 
393  nthreads = xomp_get_num_threads(open_parallel=.TRUE.)
394 
395  if (use_fftrisc) then
396    !call wrtout(std_out, " calls dfti_fftrisc")
397    if (ndat == 1) then
398      if (fftcore_mixprec == 0) then
399        call dfti_fftrisc_dp(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
400          mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
401      else
402        call dfti_fftrisc_mixprec(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
403          mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
404      end if
405    else
406      ! All this boilerplate code is needed because the caller might pass zero-sized arrays
407      ! for the arguments that are not referenced and we don't want to have problems at run-time.
408      ! Moreover option 1 requires a special treatment when threads are started at this level.
409 
410      SELECT CASE (option)
411      CASE (0)
412        !
413        ! fofgin -> fofr, no use of denpot, fofgout and npwout.
414        if (.not.dfti_spawn_threads_here(ndat,nthreads)) then
415          do dat=1,ndat
416            ptg = 1 + (dat-1)*npwin
417            ptr = 1 + (dat-1)*ldx*ldy*ldz
418            call dfti_fftrisc_dp(cplex,denpot,fofgin(1,ptg),fofgout,fofr(1,ptr),gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
419 &            mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
420          end do
421        else
422 !$OMP PARALLEL DO PRIVATE(ptg,ptr)
423          do dat=1,ndat
424            ptg = 1 + (dat-1)*npwin
425            ptr = 1 + (dat-1)*ldx*ldy*ldz
426            call dfti_fftrisc_dp(cplex,denpot,fofgin(1,ptg),fofgout,fofr(1,ptr),gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
427 &            mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
428          end do
429        end if
430 
431      CASE (1)
432        !fofgin -> local ur and accumulate density in denpot
433        ! TODO this is delicate part to do in parallel, as one should OMP reduce denpot.
434        do dat=1,ndat
435          ptg = 1 + (dat-1)*npwin
436          ptr = 1 + (dat-1)*ldx*ldy*ldz
437          call dfti_fftrisc_dp(cplex,denpot,fofgin(1,ptg),fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
438 &          mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
439        end do
440 
441      CASE (2)
442        ! <G|vloc(r)|fofgin(r)> in fofgout
443        if (.not.dfti_spawn_threads_here(ndat,nthreads)) then
444          do dat=1,ndat
445            ptgin  = 1 + (dat-1)*npwin
446            ptgout = 1 + (dat-1)*npwout
447            if (fftcore_mixprec == 0) then
448              call dfti_fftrisc_dp(cplex,denpot,fofgin(1,ptgin),fofgout(1,ptgout),fofr,gboundin,gboundout,istwf_k,&
449                kg_kin,kg_kout,mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
450            else
451              call dfti_fftrisc_mixprec(cplex,denpot,fofgin(1,ptgin),fofgout(1,ptgout),fofr,gboundin,gboundout,istwf_k,&
452                kg_kin,kg_kout,mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
453            end if
454          end do
455        else
456 !$OMP PARALLEL DO PRIVATE(ptgin,ptgout)
457          do dat=1,ndat
458            ptgin  = 1 + (dat-1)*npwin
459            ptgout = 1 + (dat-1)*npwout
460            call dfti_fftrisc_dp(cplex,denpot,fofgin(1,ptgin),fofgout(1,ptgout),fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
461 &            mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
462          end do
463        end if
464 
465      CASE (3)
466        !fofr -> fofgout
467        if (.not.dfti_spawn_threads_here(ndat,nthreads)) then
468          do dat=1,ndat
469            ptr    = 1 + (dat-1)*ldx*ldy*ldz
470            ptgout = 1 + (dat-1)*npwout
471            call dfti_fftrisc_dp(cplex,denpot,fofgin,fofgout(1,ptgout),fofr(1,ptr),gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
472 &            mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
473          end do
474        else
475 !$OMP PARALLEL DO PRIVATE(ptr,ptgout)
476          do dat=1,ndat
477            ptr    = 1 + (dat-1)*ldx*ldy*ldz
478            ptgout = 1 + (dat-1)*npwout
479            call dfti_fftrisc_dp(cplex,denpot,fofgin,fofgout(1,ptgout),fofr(1,ptr),gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
480 &            mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
481          end do
482        end if
483 
484      CASE DEFAULT
485        write(msg,'(a,i0,a)')'Option',option,' is not allowed. Only option=0, 1, 2 or 3 are allowed presently.'
486        ABI_ERROR(msg)
487      END SELECT
488 
489    end if
490 
491  else
492    SELECT CASE (option)
493    CASE (0)
494      !
495      ! FFT u(g) --> u(r)
496      if (.not.dfti_spawn_threads_here(ndat,nthreads)) then
497        call dfti_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_kin,gboundin,fofgin,fofr)
498      else
499 !$OMP PARALLEL DO PRIVATE(ptg, ptr)
500        do dat=1,ndat
501          ptg = 1 + (dat-1)*npwin
502          ptr = 1 + (dat-1)*ldx*ldy*ldz
503          call dfti_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat1,&
504 &          istwf_k,mgfft,kg_kin,gboundin,fofgin(1,ptg),fofr(1,ptr))
505        end do
506      end if
507 
508    CASE (1)
509      ! TODO this is delicate part to do in parallel, as one should OMP reduce denpot.
510      call dfti_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_kin,gboundin,fofgin,fofr)
511      call cg_addtorho(nx,ny,nz,ldx,ldy,ldz,ndat,weight_r,weight_i,fofr,denpot)
512 
513    CASE (2)
514 
515      if (.not.dfti_spawn_threads_here(ndat,nthreads)) then
516        call dfti_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_kin,gboundin,fofgin,fofr)
517        call cg_vlocpsi(nx,ny,nz,ldx,ldy,ldz,ndat,cplex,denpot,fofr)
518 
519        !  The data for option==2 is now in fofr.
520        call dfti_fftpad_dp(fofr,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,-1,gboundout)
521 
522        call cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat,npwout,kg_kout,fofr,fofgout)
523      else
524 
525 !$OMP PARALLEL DO PRIVATE(ptg, ptr)
526        do dat=1,ndat
527          ptg = 1 + (dat-1)*npwin
528          ptr = 1 + (dat-1)*ldx*ldy*ldz
529          call dfti_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat1,&
530 &          istwf_k,mgfft,kg_kin,gboundin,fofgin(1,ptg),fofr(1,ptr))
531 
532          call cg_vlocpsi(nx,ny,nz,ldx,ldy,ldz,ndat1,cplex,denpot,fofr(1,ptr))
533 
534          !  The data for option==2 is now in fofr.
535          call dfti_fftpad_dp(fofr(1,ptr),nx,ny,nz,ldx,ldy,ldz,ndat1,mgfft,-1,gboundout)
536 
537          ptg = 1 + (dat-1)*npwout
538          call cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat1,npwout,kg_kout,fofr(1,ptr),fofgout(1,ptg))
539        end do
540      end if
541 
542    CASE (3)
543      !  The data for option==3 is already in fofr.
544      if (.not.dfti_spawn_threads_here(ndat,nthreads)) then
545        call dfti_fftpad_dp(fofr,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,-1,gboundout)
546        call cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat,npwout,kg_kout,fofr,fofgout)
547      else
548 !$OMP PARALLEL DO PRIVATE(ptg, ptr)
549        do dat=1,ndat
550          ptg = 1 + (dat-1)*npwout
551          ptr = 1 + (dat-1)*ldx*ldy*ldz
552          call dfti_fftpad_dp(fofr(1,ptr),nx,ny,nz,ldx,ldy,ldz,ndat1,mgfft,-1,gboundout)
553          call cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat1,npwout,kg_kout,fofr(1,ptr),fofgout(1,ptg))
554        end do
555      end if
556 
557    CASE DEFAULT
558      write(msg,'(a,i0,a)')'Option',option,' is not allowed. Only option=0, 1, 2 or 3 are allowed presently.'
559      ABI_ERROR(msg)
560    END SELECT
561  end if
562 
563 end subroutine dfti_seqfourwf

m_dfti/dfti_spawn_threads_here [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_spawn_threads_here

FUNCTION

  Helper function that returns true if FFT calls should be OMP
  parallelized in the client code.

INPUTS

  ndat=Number of FFT transforms to do
  nthreads = Number of threads available

SOURCE

2154 function dfti_spawn_threads_here(ndat, nthreads) result(ans)
2155 
2156 !Arguments ------------------------------------
2157 !scalars
2158  integer,intent(in) :: ndat,nthreads
2159  logical :: ans
2160 
2161 ! *************************************************************************
2162 
2163  ans = .FALSE.
2164 #ifdef HAVE_OPENMP
2165  ans = (nthreads > 1 .and. MOD(ndat,nthreads) == 0 .and. .not. USE_LIB_THREADS)
2166 #else
2167  ABI_UNUSED((/ndat,nthreads/))
2168 #endif
2169 
2170 end function dfti_spawn_threads_here

m_dfti/dfti_use_lib_threads [ Functions ]

[ Top ] [ m_dfti ] [ Functions ]

NAME

 dfti_use_lib_threads

FUNCTION

INPUTS

SOURCE

2185 subroutine dfti_use_lib_threads(logvar)
2186 
2187 !Arguments ------------------------------------
2188 !scalars
2189  logical,intent(in) :: logvar
2190 
2191 ! *************************************************************************
2192 
2193  USE_LIB_THREADS = logvar
2194 
2195 end subroutine dfti_use_lib_threads