TABLE OF CONTENTS


ABINIT/m_fftw3 [ Modules ]

[ Top ] [ Modules ]

NAME

 m_fftw3

FUNCTION

  This module provides wrappers for the FFTW3 routines: in-place and out-of-place version.

COPYRIGHT

 Copyright (C) 2009-2018 ABINIT group (MG, FD)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

NOTES

  1) MPI parallelism is in testing stage
  2) For better performance the FFT divisions should contain small factors  [2, 3, 5, 7, 11]

SOURCE

 21 #if defined HAVE_CONFIG_H
 22 #include "config.h"
 23 #endif
 24 
 25 #include "abi_common.h"
 26 
 27 ! It seems that MKL wrappers do not like the advanced interfaces for
 28 ! r2c and c2r transforms although they work fine if the true FFTW3 library is used.
 29 !#define DEV_RC_BUG
 30 #undef DEV_RC_BUG
 31 
 32 #define FFTLIB "FFTW3"
 33 #define FFT_PREF(name) CONCAT(fftw3_,name)
 34 #define SPAWN_THREADS_HERE(ndat, nthreads) fftw3_spawn_threads_here(ndat, nthreads)
 35 
 36 MODULE m_fftw3
 37 
 38  use defs_basis
 39  use m_abicore
 40  use m_errors
 41  use m_xomp
 42  use m_xmpi
 43  use m_hide_blas
 44  use m_cgtools
 45  use m_cplxtools
 46  use m_distribfft
 47  use m_fftcore
 48  use iso_c_binding
 49 
 50  use m_time,           only : timab
 51  use m_numeric_tools,  only : imax_loc
 52  use defs_abitypes,    only : MPI_type
 53  use m_mpinfo,         only : ptabs_fourwf
 54  use m_fstrings,       only : strcat
 55  use m_fft_mesh,       only : zpad_t, zpad_init, zpad_free
 56  use m_fftcore
 57 
 58  implicit none
 59 
 60 #ifdef HAVE_FFT_FFTW3_MPI
 61  include 'fftw3-mpi.f03'
 62 #endif
 63 
 64 !This should be done but MKL fftw hasn't always this include file
 65 !#ifdef HAVE_FFT_FFTW3
 66 ! include 'fftw3.f03'
 67 !#endif
 68 
 69  private
 70 
 71 ! Entry points for client code
 72  public :: fftw3_seqfourdp      ! 3D FFT of lengths nx, ny, nz. Mainly used for densities or potentials.
 73  public :: fftw3_seqfourwf      ! FFT transform of wavefunctions (high-level interface).
 74  public :: fftw3_fftrisc
 75  public :: fftw3_fftug          ! G-->R. 3D zero-padded FFT of lengths nx, ny, nz. Mainly used for wavefunctions
 76  public :: fftw3_fftur          ! R-->G, 3D zero-padded FFT of lengths nx, ny, nz. Mainly used for wavefunctions
 77  public :: fftw3_use_lib_threads
 78 
 79  public :: fftw3_mpifourdp
 80 
 81 ! Low-level routines.
 82  public :: fftw3_cleanup        ! Reset FFTW to the pristine state it was in when you started your program,
 83  public :: fftw3_init_threads   ! one-time initialization required to use FFTW3 threads.
 84  public :: fftw3_set_nthreads   ! Set the number of threads you want FFTW3 to use when HAVE_FFT_FFTW3_THREADS is defined.
 85  public :: fftw3_r2c_op         ! Real to complex transform (out-of-place version).
 86  public :: fftw3_c2r_op         ! Complex to real transform (out-of-place version).
 87  public :: fftw3_c2c_op         ! complex to complex transform (out-of-place version).
 88  public :: fftw3_c2c_ip         ! complex to complex transform (in-place version).
 89  public :: fftw3_many_dft_op    ! Driver routine for many out-of-place 3D complex-to-complex FFTs.
 90  public :: fftw3_many_dft_ip    ! Driver routine for many in-place 3D complex-to-complex FFTs.
 91  public :: fftw3_fftpad         ! Driver routines for zero-padded FFT of wavefunctions.
 92  public :: fftw3_fftpad_dp      ! Driver routines for zero-padded FFT of wavefunctions.
 93  public :: fftw3_fftug_dp       ! Driver routines for zero-padded FFT of wavefunctions.
 94  public :: fftw3_poisson        ! Solve the poisson equation in G-space starting from n(r).
 95 
 96  ! MPI version
 97  public :: fftw3_mpiback_wf
 98  public :: fftw3_mpiback_manywf
 99  public :: fftw3_mpiforw_wf
100  public :: fftw3_mpiforw_manywf
101  public :: fftw3_mpiback
102  public :: fftw3_mpiforw
103  public :: fftw3_applypot
104  public :: fftw3_applypot_many
105  public :: fftw3_accrho
106 
107 #ifdef HAVE_FFT_FFTW3_MPI
108 ! flags copied from fftw3.f
109  integer,public,parameter :: ABI_FFTW_FORWARD=FFTW_FORWARD
110  integer,public,parameter :: ABI_FFTW_BACKWARD=FFTW_BACKWARD
111  integer,public,parameter :: ABI_FFTW_ESTIMATE=FFTW_ESTIMATE
112  integer,public,parameter :: ABI_FFTW_MEASURE=FFTW_MEASURE
113  ! end flags copied from fftw3.f
114  integer,public,parameter :: ABI_FFTW_MPI_TRANSPOSED_IN=FFTW_MPI_TRANSPOSED_IN
115  integer,public,parameter :: ABI_FFTW_MPI_TRANSPOSED_OUT=FFTW_MPI_TRANSPOSED_OUT
116  ! end flags copies from fftw3-mpi.f03
117 #else
118  integer,public,parameter :: ABI_FFTW_FORWARD=-1
119  integer,public,parameter :: ABI_FFTW_BACKWARD=+1
120  integer,public,parameter :: ABI_FFTW_ESTIMATE=64
121  integer,public,parameter :: ABI_FFTW_MEASURE=0
122 ! end flags copied from fftw3.f
123  integer,public,parameter :: ABI_FFTW_MPI_TRANSPOSED_IN=536870912
124  integer,public,parameter :: ABI_FFTW_MPI_TRANSPOSED_OUT=1073741824
125 ! end flags copies from fftw3-mpi.f03
126 #endif
127 
128 ! ==========================================================================================
129 ! ==== Variables introduced for the FFTW3 interface in abinit. Not belonging to fftw3.f ====
130 ! ==========================================================================================
131 
132  integer,public,parameter :: NULL_PLAN = 0
133  ! MKL wrappers might return NULL_PLAN if a particular FFTW3 feature is not available
134 
135  integer,public,parameter :: KIND_FFTW_PLAN = 8
136  ! It should be at least integer*@SIZEOF_INT_P@
137  ! MKL wrappers requires it to be integer*8, so do _not_ use C_INTPTR_T.
138 
139 #ifdef HAVE_FFT_FFTW3_THREADS
140  integer,private,save :: THREADS_INITED = 0
141  ! 1 if treads have been initialized. 0 otherwise.
142 #endif
143 
144  logical,private,save :: USE_LIB_THREADS = .FALSE.

m_fftw3/cplan_many_dft [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

FUNCTION

INPUTS

SIDE EFFECTS

PARENTS

SOURCE

2929 !! FIXME  technically it should be intent(inout) since FFTW3 can destroy the input for particular flags.
2930 
2931 function cplan_many_dft(rank,n,howmany,fin,inembed,istride,idist,fout,onembed,ostride,odist,sign,flags,nthreads) result(plan)
2932 
2933 
2934 !This section has been created automatically by the script Abilint (TD).
2935 !Do not modify the following lines by hand.
2936 #undef ABI_FUNC
2937 #define ABI_FUNC 'cplan_many_dft'
2938 !End of the abilint section
2939 
2940  implicit none
2941 
2942 !Arguments ------------------------------------
2943 !scalars
2944  integer,intent(in) :: rank,howmany,istride,ostride, sign,flags,idist,odist,nthreads
2945  integer,intent(in) :: n(rank),inembed(rank),onembed(rank)
2946  integer(KIND_FFTW_PLAN) :: plan
2947 !arrays
2948  complex(spc) :: fin(*),fout(*)
2949 
2950 !Local variables-------------------------------
2951  character(len=500) :: msg,frmt
2952 
2953 ! *************************************************************************
2954 
2955 !$OMP CRITICAL (OMPC_cplan_many_dft)
2956  call fftw3_set_nthreads(nthreads)
2957 
2958  call sfftw_plan_many_dft(plan, rank, n, howmany, &
2959 &  fin, inembed, istride, idist, fout, onembed, ostride, odist, sign, flags)
2960 !$OMP END CRITICAL (OMPC_cplan_many_dft)
2961 
2962  if (plan==NULL_PLAN) then ! handle the error
2963    call wrtout(std_out,"sfftw_plan_many_dft returned NULL_PLAN (complex version)","COLL")
2964    write(frmt,*)"(a,",rank,"(1x,i0),3(a,i0),a,2(a,",rank,"(1x,i0),2(a,i0),a))"
2965    write(msg,frmt)&
2966 &    " n = ",n," howmany = ",howmany," sign = ",sign," flags = ",flags,ch10,&
2967 &    " inembed = ",inembed," istride = ",istride," idist =",idist,ch10,     &
2968 &    " onembed = ",onembed," ostride = ",ostride," odist =",idist,ch10
2969    call wrtout(std_out,msg,"COLL")
2970    MSG_ERROR("Check FFTW library and/or abinit code")
2971  end if
2972 
2973 end function cplan_many_dft

m_fftw3/dplan_many_dft_1D [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

FUNCTION

INPUTS

SIDE EFFECTS

PARENTS

SOURCE

2810 function dplan_many_dft_1D(rank,n,howmany,fin,inembed,istride,idist,fout,onembed,ostride,odist,sign,flags,nthreads) result(plan)
2811 
2812 
2813 !This section has been created automatically by the script Abilint (TD).
2814 !Do not modify the following lines by hand.
2815 #undef ABI_FUNC
2816 #define ABI_FUNC 'dplan_many_dft_1D'
2817 !End of the abilint section
2818 
2819  implicit none
2820 
2821 !Arguments ------------------------------------
2822 !scalars
2823  integer,intent(in) :: rank,howmany,istride,ostride, sign,flags,idist,odist,nthreads
2824  integer,intent(in) :: n(rank),inembed(rank),onembed(rank)
2825  integer(KIND_FFTW_PLAN) :: plan
2826 !arrays
2827  real(dp) :: fin(*),fout(*)
2828 
2829 !Local variables-------------------------------
2830  character(len=500) :: msg,frmt
2831 
2832 ! *************************************************************************
2833 
2834 !$OMP CRITICAL (OMPC_dfftw_plan_many_dft_1D)
2835  call fftw3_set_nthreads(nthreads)
2836 
2837  call dfftw_plan_many_dft(plan, rank, n, howmany, &
2838 &  fin, inembed, istride, idist, fout, onembed, ostride, odist, sign, flags)
2839 !$OMP END CRITICAL (OMPC_dfftw_plan_many_dft_1D)
2840 
2841  if (plan==NULL_PLAN) then
2842    call wrtout(std_out,"dfftw_plan_many_dft returned NULL_PLAN!","COLL")
2843    write(frmt,*)"(a,",rank,"(1x,i0),3(a,i0),a,2(a,",rank,"(1x,i0),2(a,i0),a))"
2844    write(msg,frmt)&
2845 &    " n= ",n," howmany= ",howmany," sign= ",sign," flags= ",flags,ch10,&
2846 &    " inembed= ",inembed," istride= ",istride," idist=",idist,ch10,    &
2847 &    " onembed= ",onembed," ostride= ",ostride," odist=",idist,ch10
2848    call wrtout(std_out,msg,"COLL")
2849    MSG_ERROR("Check FFTW library and/or abinit code")
2850  end if
2851 
2852 end function dplan_many_dft_1D

m_fftw3/dplan_many_dft_2D [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

FUNCTION

INPUTS

SIDE EFFECTS

PARENTS

SOURCE

2870 function dplan_many_dft_2D(rank,n,howmany,fin,inembed,istride,idist,fout,onembed,ostride,odist,sign,flags,nthreads) result(plan)
2871 
2872 
2873 !This section has been created automatically by the script Abilint (TD).
2874 !Do not modify the following lines by hand.
2875 #undef ABI_FUNC
2876 #define ABI_FUNC 'dplan_many_dft_2D'
2877 !End of the abilint section
2878 
2879  implicit none
2880 
2881 !Arguments ------------------------------------
2882 !scalars
2883  integer,intent(in) :: rank,howmany,istride,ostride, sign,flags,idist,odist,nthreads
2884  integer,intent(in) :: n(rank),inembed(rank),onembed(rank)
2885  integer(KIND_FFTW_PLAN) :: plan
2886 !arrays
2887  real(dp) :: fin(2,*),fout(2,*)
2888 
2889 !Local variables-------------------------------
2890  character(len=500) :: msg,frmt
2891 
2892 ! *************************************************************************
2893 
2894 !$OMP CRITICAL (OMPC_dfftw_plan_many_dft_2D)
2895  call fftw3_set_nthreads(nthreads)
2896 
2897  call dfftw_plan_many_dft(plan, rank, n, howmany, &
2898 &  fin, inembed, istride, idist, fout, onembed, ostride, odist, sign, flags)
2899 !$OMP END CRITICAL (OMPC_dfftw_plan_many_dft_2D)
2900 
2901  if (plan==NULL_PLAN) then
2902    call wrtout(std_out,"dfftw_plan_many_dft returned NULL_PLAN!","COLL")
2903    write(frmt,*)"(a,",rank,"(1x,i0),3(a,i0),a,2(a,",rank,"(1x,i0),2(a,i0),a))"
2904    write(msg,frmt)&
2905 &    " n= ",n," howmany= ",howmany," sign= ",sign," flags= ",flags,ch10,&
2906 &    " inembed= ",inembed," istride= ",istride," idist=",idist,ch10,    &
2907 &    " onembed= ",onembed," ostride= ",ostride," odist=",idist,ch10
2908    call wrtout(std_out,msg,"COLL")
2909    MSG_ERROR("Check FFTW library and/or abinit code")
2910  end if
2911 
2912 end function dplan_many_dft_2D

m_fftw3/dplan_many_dft_c2r [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

FUNCTION

INPUTS

SIDE EFFECTS

PARENTS

SOURCE

3114 function dplan_many_dft_c2r(rank,n,howmany,fin,inembed,istride,idist,fout,onembed,ostride,odist,flags, nthreads) result(plan)
3115 
3116 
3117 !This section has been created automatically by the script Abilint (TD).
3118 !Do not modify the following lines by hand.
3119 #undef ABI_FUNC
3120 #define ABI_FUNC 'dplan_many_dft_c2r'
3121 !End of the abilint section
3122 
3123  implicit none
3124 
3125 !Arguments ------------------------------------
3126 !scalars
3127  integer,intent(in) :: rank,howmany,istride,ostride,flags,idist,odist,nthreads
3128  integer,intent(in) :: n(rank),inembed(rank),onembed(rank)
3129  integer(KIND_FFTW_PLAN) :: plan
3130 !arrays
3131  real(dp) :: fin(*),fout(*)
3132 
3133 !Local variables-------------------------------
3134  character(len=500) :: msg,frmt
3135 
3136 ! *************************************************************************
3137 
3138 !$OMP CRITICAL (OMPC_dplan_many_dft_c2r)
3139  call fftw3_set_nthreads(nthreads)
3140 
3141  call dfftw_plan_many_dft_c2r(plan, rank, n, howmany, &
3142 &  fin, inembed, istride, idist, fout, onembed, ostride, odist, flags)
3143 !$OMP END CRITICAL (OMPC_dplan_many_dft_c2r)
3144 
3145  if (plan==NULL_PLAN) then ! handle the error.
3146    call wrtout(std_out,"dfftw_plan_many_dft_c2r returned NULL_PLAN","COLL")
3147    write(frmt,*)"(a,",rank,"(1x,i0),2(a,i0),a,2(a,",rank,"(1x,i0),2(a,i0),a))"
3148    write(msg,frmt)&
3149 &    " n = ",n," howmany = ",howmany," flags = ",flags,ch10,&
3150 &    " inembed = ",inembed," istride = ",istride," idist = ",idist,ch10,&
3151 &    " onembed = ",onembed," ostride = ",ostride," odist = ",idist,ch10
3152    call wrtout(std_out,msg,"COLL")
3153    MSG_ERROR("Check FFTW library and/or abinit code")
3154  end if
3155 
3156 end function dplan_many_dft_c2r

m_fftw3/dplan_many_dft_r2c [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

FUNCTION

INPUTS

SIDE EFFECTS

PARENTS

SOURCE

3051 !! FIXME  technically it should be intent(inout) since FFTW3 can destroy the input
3052 !! for particular flags.
3053 
3054 function dplan_many_dft_r2c(rank,n,howmany,fin,inembed,istride,idist,fout,onembed,ostride,odist,flags,nthreads) result(plan)
3055 
3056 
3057 !This section has been created automatically by the script Abilint (TD).
3058 !Do not modify the following lines by hand.
3059 #undef ABI_FUNC
3060 #define ABI_FUNC 'dplan_many_dft_r2c'
3061 !End of the abilint section
3062 
3063  implicit none
3064 
3065 !Arguments ------------------------------------
3066 !scalars
3067  integer,intent(in) :: rank,howmany,istride,ostride,flags,idist,odist,nthreads
3068  integer,intent(in) :: n(rank),inembed(rank),onembed(rank)
3069  integer(KIND_FFTW_PLAN) :: plan
3070 !arrays
3071  real(dp) :: fin(*),fout(*)
3072 
3073 !Local variables-------------------------------
3074  character(len=500) :: msg,frmt
3075 
3076 ! *************************************************************************
3077 
3078 !$OMP CRITICAL (OMPC_dplan_many_dft_r2c)
3079  call fftw3_set_nthreads(nthreads)
3080 
3081  call dfftw_plan_many_dft_r2c(plan, rank, n, howmany, &
3082 &  fin, inembed, istride, idist, fout, onembed, ostride, odist, flags)
3083 !$OMP END CRITICAL (OMPC_dplan_many_dft_r2c)
3084 
3085  if (plan==NULL_PLAN) then ! handle the error.
3086    call wrtout(std_out,"dfftw_plan_many_dft_r2c returned NULL_PLAN","COLL")
3087    write(frmt,*)"(a,",rank,"(1x,i0),2(a,i0),a,2(a,",rank,"(1x,i0),2(a,i0),a))"
3088    write(msg,frmt)&
3089 &    " n = ",n," howmany = ",howmany," flags = ",flags,ch10,&
3090 &    " inembed = ",inembed," istride = ",istride," idist = ",idist,ch10,&
3091 &    " onembed = ",onembed," ostride = ",ostride," odist = ",idist,ch10
3092    call wrtout(std_out,msg,"COLL")
3093    MSG_ERROR("Check FFTW library and/or abinit code")
3094  end if
3095 
3096 end function dplan_many_dft_r2c

m_fftw3/fftw3_accrho [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_accrho

FUNCTION

 Accumulates the real space density rho from the ndat wavefunctions zf
 by transforming zf into real space and adding all the amplitudes squared

 INPUTS:
   ZF: input array (note the switch of i2 and i3)
         real(F(i1,i3,i2,idat))=ZF(1,i1,i3,i2,idat)
         imag(F(i1,i3,i2,idat))=ZF(2,i1,i3,i2,idat)
   max1 is positive or zero ; m1 >=max1+1
   i1= 1... max1+1 corresponds to positive and zero wavevectors 0 ... max1
   then, if m1 > max1+1, one has min1=max1-m1+1 and
   i1= max1+2 ... m1 corresponds to negative wavevectors min1 ... -1
   i2 and i3 have a similar definition of range
   idat=1,ndat
   md1,md2,md3: Dimension of ZF
   md2proc=((md2-1)/nproc_fft)+1 ! maximal number of small box 2nd dim slices for one proc
   weight(ndat)= weight for the density accumulation

 OUTPUTS:
    RHOoutput(i1,i2,i3) = RHOinput(i1,i2,i3) + sum on idat of (FFT(ZF))**2 *weight
        i1=1,n1 , i2=1,n2 , i3=1,n3
   comm_fft: MPI communicator
   nproc_fft: number of processors used as returned by MPI_COMM_SIZE
   me_fft: [0:nproc_fft-1] number of processor as returned by MPI_COMM_RANK
    n1,n2,n3: logical dimension of the transform. As transform lengths
              most products of the prime factors 2,3,5 are allowed.
             The detailed table with allowed transform lengths can
             be found in subroutine CTRIG
    nd1,nd2,nd3: Dimension of RHO
   nd3proc=((nd3-1)/nproc_fft)+1 ! maximal number of big box 3rd dim slices for one proc

 NOTES:
   PERFORMANCE CONSIDERATIONS:
   The maximum number of processors that can reasonably be used is max(n2/2,n3/2)

   It is very important to find the optimal
   value of NCACHE. NCACHE determines the size of the work array ZW, that
   has to fit into cache. It has therefore to be chosen to equal roughly
    half the size of the physical cache in units of real*8 numbers.
   The optimal value of ncache can easily be determined by numerical
   experimentation. A too large value of ncache leads to a dramatic
   and sudden decrease of performance, a too small value to a to a
   slow and less dramatic decrease of performance. If NCACHE is set
   to a value so small, that not even a single one dimensional transform
   can be done in the workarray zw, the program stops with an error message.

PARENTS

      m_fft

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

6488 subroutine fftw3_accrho(cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3,nd3proc,&
6489 &  max1,max2,max3,m1,m2,m3,md1,md2proc,md3,comm_fft,nproc_fft,me_fft,zf,rho,weight_r,weight_i)
6490 
6491 
6492 !This section has been created automatically by the script Abilint (TD).
6493 !Do not modify the following lines by hand.
6494 #undef ABI_FUNC
6495 #define ABI_FUNC 'fftw3_accrho'
6496 !End of the abilint section
6497 
6498  implicit none
6499 
6500 !Arguments ------------------------------------
6501  integer,intent(in) :: cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3,nd3proc
6502  integer,intent(in) :: max1,max2,max3,m1,m2,m3,md1,md2proc,md3,comm_fft,nproc_fft,me_fft
6503  real(dp),intent(in) :: zf(2,md1,md3,md2proc,ndat)
6504  real(dp),intent(in) :: weight_r(ndat) , weight_i(ndat)
6505  real(dp),intent(inout) :: rho(nd1,nd2,nd3)
6506 
6507 !Local variables-------------------------------
6508 !scalars
6509 #ifdef HAVE_FFT_FFTW3
6510  integer,parameter :: unused0=0
6511  integer :: j,i1,idat,ierr,j3glob
6512  integer :: ioption,j2,j3,j2st,jp2st,lzt,m1zt,ma,mb,n1dfft,nnd3
6513  integer :: m2eff,ncache,n1eff,jeff,includelast,lot1,lot2,lot3,nthreads
6514  integer(KIND_FFTW_PLAN) :: bw_plan1_lot,bw_plan1_rest
6515  integer(KIND_FFTW_PLAN) :: bw_plan2_lot,bw_plan2_rest
6516  integer(KIND_FFTW_PLAN) :: bw_plan3_lot,bw_plan3_rest
6517  character(len=500) :: msg
6518 !arrays
6519  real(dp), allocatable :: zmpi1(:,:,:,:),zmpi2(:,:,:,:) ! work arrays for MPI
6520  real(dp),allocatable :: zw(:,:),zt(:,:,:) ! cache work array and array for transpositions
6521  real(dp) :: tsec(2)
6522 
6523 ! *************************************************************************
6524 
6525  !ioption=0 ! This was in the old version.
6526  ioption=1 ! This one is needed to be compatible with paral_kgb
6527 
6528  !nproc_fft = xmpi_comm_size(comm_fft); me_fft = xmpi_comm_rank(comm_fft)
6529 
6530 ! find cache size that gives optimal performance on machine
6531  ncache=2*max(n1,n2,n3,1024)
6532  if (ncache/(2*max(n1,n2,n3)) < 1) then
6533     write(msg,"(5a)") &
6534 &     'ncache has to be enlarged to be able to hold at',ch10,&
6535 &     'least one 1-d FFT of each size even though this will',ch10,&
6536 &     'reduce the performance for shorter transform lengths'
6537     MSG_ERROR(msg)
6538  end if
6539 
6540 !Effective m1 and m2 (complex-to-complex or real-to-complex)
6541  n1eff=n1; m2eff=m2 ; m1zt=n1
6542  if (cplexwf==1) then
6543    n1eff=(n1+1)/2; m2eff=m2/2+1; m1zt=2*(n1/2+1)
6544  end if
6545 
6546  lzt=m2eff
6547  if (mod(m2eff,2) == 0) lzt=lzt+1
6548  if (mod(m2eff,4) == 0) lzt=lzt+1
6549 
6550  ! maximal number of big box 3rd dim slices for all procs
6551  nnd3=nd3proc*nproc_fft
6552 
6553  ABI_ALLOCATE(zw,(2,ncache/2))
6554  ABI_ALLOCATE(zt,(2,lzt,m1zt))
6555  ABI_ALLOCATE(zmpi2,(2,md1,md2proc,nnd3))
6556  if (nproc_fft > 1)  then
6557    ABI_ALLOCATE(zmpi1,(2,md1,md2proc,nnd3))
6558  end if
6559 
6560  ! Create plans.
6561  ! The prototype for sfftw_plan_many_dft is:
6562  ! sfftw_plan_many_dft(rank, n, howmany,
6563  !   fin,  iembed, istride, idist,
6564  !   fout, oembed, ostride, odist, isign, my_flags)
6565 
6566  lot3=ncache/(2*n3)
6567  lot1=ncache/(2*n1)
6568  lot2=ncache/(2*n2)
6569 
6570  nthreads = xomp_get_num_threads(open_parallel=.TRUE.)
6571  !nthreads = 1
6572 
6573  bw_plan3_lot = dplan_many_dft_2D(1, [n3], lot3, &
6574 &    zw, [ncache/2], lot3, 1,                          &
6575 &    zw, [ncache/2], lot3, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6576 
6577  if (mod(m1, lot3) /= 0) then
6578    bw_plan3_rest = dplan_many_dft_2D(1, [n3], mod(m1, lot3), &
6579 &      zw, [ncache/2], lot3, 1,                                    &
6580 &      zw, [ncache/2], lot3, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6581  end if
6582 
6583  bw_plan1_lot = dplan_many_dft_2D(1, [n1], lot1, &
6584 &    zw, [ncache/2],  lot1, 1,                         &
6585 &    zt, [lzt, m1zt], lzt, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6586 
6587  if (mod(m2eff, lot1) /= 0) then
6588    bw_plan1_rest = dplan_many_dft_2D(1, [n1], mod(m2eff, lot1), &
6589 &      zw, [ncache/2],  lot1, 1,                                      &
6590 &      zt, [lzt, m1zt], lzt, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6591  end if
6592 
6593  ! FIXME THis won't work if ixplexwf == 1
6594  bw_plan2_lot = dplan_many_dft_2D(1, [n2], lot2, &
6595 &    zw, [ncache/2], lot2, 1,                          &
6596 &    zw, [ncache/2], lot2, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6597 
6598  if (mod(n1eff, lot2) /= 0) then
6599    bw_plan2_rest = dplan_many_dft_2D(1, [n2], mod(n1eff,lot2), &
6600 &      zw, [ncache/2], lot2, 1,                                      &
6601 &      zw, [ncache/2], lot2, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6602  end if
6603 
6604  do idat=1,ndat
6605    ! transform along z axis
6606    ! input: I1,I3,J2,(Jp2)
6607    !lot=ncache/(4*n3)
6608 
6609    ! Loop over the y planes treated by this node and trasform n1ddft G_z lines.
6610    do j2=1,md2proc
6611      if (me_fft*md2proc+j2 <= m2eff) then ! MG REMOVED TO BE COSISTENT WITH BACK_WF
6612        do i1=1,m1,lot3
6613          ma=i1
6614          mb=min(i1+(lot3-1),m1)
6615          n1dfft=mb-ma+1
6616 
6617          ! zero-pad n1dfft G_z lines
6618          !  input: G1,G3,G2,(Gp2)
6619          ! output: G1,R3,G2,(Gp2)
6620          call fill_cent(md1,md3,lot3,n1dfft,max3,m3,n3,zf(1,i1,1,j2,idat),zw)
6621 
6622          ! Transform along z.
6623          if (n1dfft == lot3) then
6624            call dfftw_execute_dft(bw_plan3_lot, zw, zw)
6625          else
6626            call dfftw_execute_dft(bw_plan3_rest, zw, zw)
6627          end if
6628 
6629          ! Local rotation.
6630          ! input:  G1,R3,G2,(Gp2)
6631          ! output: G1,G2,R3,(Gp2)
6632          call scramble(i1,j2,lot3,n1dfft,md1,n3,md2proc,nnd3,zw,zmpi2)
6633        end do
6634      end if
6635    end do
6636 
6637    ! Interprocessor data transposition
6638    ! input:  G1,G2,R3,Rp3,(Gp2)
6639    ! output: G1,G2,R3,Gp2,(Rp3)
6640    if (nproc_fft > 1) then
6641      call timab(543,1,tsec)
6642      call xmpi_alltoall(zmpi2,2*md1*md2proc*nd3proc, &
6643 &                       zmpi1,2*md1*md2proc*nd3proc,comm_fft,ierr)
6644      call timab(543,2,tsec)
6645    end if
6646 
6647    ! Loop over the z treated by this node.
6648    do j3=1,nd3proc
6649      j3glob = j3 + me_fft*nd3proc
6650 
6651      if (me_fft*nd3proc+j3 <= n3) then
6652        Jp2st=1; J2st=1
6653 
6654        ! Loop over G_y in the small box.
6655        do j=1,m2eff,lot1
6656          ma=j
6657          mb=min(j+(lot1-1),m2eff)
6658          n1dfft=mb-ma+1
6659 
6660          ! Zero-pad input.
6661          ! input:  G1,G2,R3,JG2,(Rp3)
6662          ! output: G2,G1,R3,JG2,(Rp3)
6663          if (nproc_fft == 1) then
6664           call mpiswitch_cent(j3,n1dfft,Jp2st,J2st,lot1,max1,md1,m1,n1,&
6665 &           md2proc,nd3proc,nproc_fft,ioption,zmpi2,zw,unused0, unused0,unused0)
6666          else
6667           call mpiswitch_cent(j3,n1dfft,Jp2st,J2st,lot1,max1,md1,m1,n1,&
6668 &           md2proc,nd3proc,nproc_fft,ioption,zmpi1,zw, unused0,unused0,unused0)
6669          end if
6670 
6671          ! Transform along x
6672          ! input:  G2,G1,R3,(Rp3)
6673          ! output: G2,R1,R3,(Rp3)
6674          if (n1dfft == lot1) then
6675            call dfftw_execute_dft(bw_plan1_lot, zw, zt(1,j,1))
6676          else
6677            call dfftw_execute_dft(bw_plan1_rest, zw, zt(1,j,1))
6678          end if
6679 
6680        end do
6681 
6682        ! Transform along y axis (take into account c2c or c2r case).
6683        ! Must loop over the full box.
6684        !lot=ncache/(4*n2)
6685        ! FIXME THis won't work
6686        if (cplexwf==1) then
6687          if (mod(lot2,2) /=0) lot2=lot2-1 ! needed to introduce jeff
6688        end if
6689 
6690        do j=1,n1eff,lot2
6691          ma=j
6692          mb=min(j+(lot2-1),n1eff)
6693          n1dfft=mb-ma+1
6694          jeff=j
6695          includelast=1
6696 
6697          if (cplexwf==1) then
6698            jeff=2*j-1
6699            includelast=1
6700            if (mb==n1eff .and. n1eff*2/=n1) includelast=0
6701          end if
6702 
6703          ! Zero-pad the input.
6704          ! input:  G2,R1,R3,(Rp3)
6705          ! output: R1,G2,R3,(Rp3)
6706          if (cplexwf==2) then
6707            call switch_cent(n1dfft,max2,m2,n2,lot2,n1,lzt,zt(1,1,j),zw)
6708          else
6709            call switchreal_cent(includelast,n1dfft,max2,n2,lot2,m1zt,lzt,zt(1,1,jeff),zw)
6710          end if
6711 
6712          if (n1dfft == lot2) then
6713            call dfftw_execute_dft(bw_plan2_lot, zw, zw)
6714          else
6715            call dfftw_execute_dft(bw_plan2_rest, zw, zw)
6716          end if
6717 
6718          ! Accumulate
6719          call addrho(cplexwf,includelast,nd1,nd2,n2,lot2,n1dfft,&
6720 &          zw,rho(jeff,1,j3glob),weight_r(idat),weight_i(idat))
6721        end do
6722        ! output: i1,i2,j3,(jp3)
6723 
6724       end if
6725     end do ! j3
6726  end do ! idat
6727 
6728  call dfftw_destroy_plan(bw_plan3_lot)
6729  if (mod(m1, lot3) /= 0) then
6730    call dfftw_destroy_plan(bw_plan3_rest)
6731  end if
6732 
6733  call dfftw_destroy_plan(bw_plan1_lot)
6734  if (mod(m2eff, lot1) /= 0) then
6735    call dfftw_destroy_plan(bw_plan1_rest)
6736  end if
6737 
6738  call dfftw_destroy_plan(bw_plan2_lot)
6739  if (mod(n1eff, lot2) /= 0) then
6740    call dfftw_destroy_plan(bw_plan2_rest)
6741  end if
6742 
6743  ABI_DEALLOCATE(zmpi2)
6744  ABI_DEALLOCATE(zw)
6745  ABI_DEALLOCATE(zt)
6746  if (nproc_fft > 1)  then
6747    ABI_DEALLOCATE(zmpi1)
6748  end if
6749 
6750 #else
6751  MSG_ERROR("FFTW3 support not activated")
6752  ABI_UNUSED((/cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3proc/))
6753  ABI_UNUSED((/ max1,max2,max3,m1,m2,m3,md1,md2proc,md3,comm_fft,nproc_fft,me_fft/))
6754  ABI_UNUSED((/zf(1,1,1,1,1),rho(1,1,1),weight_r(1),weight_i(1)/))
6755 #endif
6756 
6757 end subroutine fftw3_accrho

m_fftw3/fftw3_alloc_complex1d_dpc [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_alloc_complex1d_dpc

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

3448 #ifdef HAVE_FFT_FFTW3
3449 
3450 subroutine fftw3_alloc_complex1d_dpc(size,cptr,fptr)
3451 
3452 
3453 !This section has been created automatically by the script Abilint (TD).
3454 !Do not modify the following lines by hand.
3455 #undef ABI_FUNC
3456 #define ABI_FUNC 'fftw3_alloc_complex1d_dpc'
3457 !End of the abilint section
3458 
3459  implicit none
3460 
3461 !Arguments ------------------------------------
3462 !scalars
3463  integer,intent(in) :: size
3464  complex(dpc),ABI_CONTIGUOUS pointer :: fptr(:)
3465  type(C_PTR),intent(out) :: cptr
3466 
3467 ! *************************************************************************
3468 
3469  cptr = fftw_malloc( INT(2*size*C_DOUBLE, KIND=C_SIZE_T))
3470  if (.not. C_ASSOCIATED(cptr)) then
3471    MSG_ERROR("fftw_malloc returned NULL!")
3472  end if
3473 
3474  call c_f_pointer(cptr, fptr, [size])
3475 
3476 end subroutine fftw3_alloc_complex1d_dpc

m_fftw3/fftw3_alloc_complex1d_spc [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_alloc_complex1d_spc

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

3402 #ifdef HAVE_FFT_FFTW3
3403 
3404 subroutine fftw3_alloc_complex1d_spc(size,cptr,fptr)
3405 
3406 
3407 !This section has been created automatically by the script Abilint (TD).
3408 !Do not modify the following lines by hand.
3409 #undef ABI_FUNC
3410 #define ABI_FUNC 'fftw3_alloc_complex1d_spc'
3411 !End of the abilint section
3412 
3413  implicit none
3414 
3415 !Arguments ------------------------------------
3416 !scalars
3417  integer,intent(in) :: size
3418  complex(spc),ABI_CONTIGUOUS pointer :: fptr(:)
3419  type(C_PTR),intent(out) :: cptr
3420 
3421 ! *************************************************************************
3422 
3423  cptr = fftw_malloc( INT(2*size*C_FLOAT, KIND=C_SIZE_T))
3424  if (.not. C_ASSOCIATED(cptr)) then
3425    MSG_ERROR("fftw_malloc returned NULL!")
3426  end if
3427 
3428  call c_f_pointer(cptr, fptr, [size])
3429 
3430 end subroutine fftw3_alloc_complex1d_spc

m_fftw3/fftw3_alloc_real1d_dp [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_alloc_real1d_dp

FUNCTION

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

3310 #ifdef HAVE_FFT_FFTW3
3311 
3312 subroutine fftw3_alloc_real1d_dp(size,cptr,fptr)
3313 
3314 
3315 !This section has been created automatically by the script Abilint (TD).
3316 !Do not modify the following lines by hand.
3317 #undef ABI_FUNC
3318 #define ABI_FUNC 'fftw3_alloc_real1d_dp'
3319 !End of the abilint section
3320 
3321  implicit none
3322 
3323 !Arguments ------------------------------------
3324 !scalars
3325  integer,intent(in) :: size
3326  real(dp),ABI_CONTIGUOUS pointer :: fptr(:)
3327  type(C_PTR),intent(out) :: cptr
3328 
3329 ! *************************************************************************
3330 
3331  cptr = fftw_malloc( INT(size*C_DOUBLE, KIND=C_SIZE_T))
3332  if (.not. C_ASSOCIATED(cptr)) then
3333    MSG_ERROR("fftw_malloc returned NULL!")
3334  end if
3335 
3336  call c_f_pointer(cptr, fptr, [size])
3337 
3338 end subroutine fftw3_alloc_real1d_dp

m_fftw3/fftw3_alloc_real2d_dp [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_alloc_real2d_dp

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

3356 #ifdef HAVE_FFT_FFTW3
3357 
3358 subroutine fftw3_alloc_real2d_dp(shape,cptr,fptr)
3359 
3360 
3361 !This section has been created automatically by the script Abilint (TD).
3362 !Do not modify the following lines by hand.
3363 #undef ABI_FUNC
3364 #define ABI_FUNC 'fftw3_alloc_real2d_dp'
3365 !End of the abilint section
3366 
3367  implicit none
3368 
3369 !Arguments ------------------------------------
3370 !scalars
3371  integer,intent(in) :: shape(2)
3372  real(dp),ABI_CONTIGUOUS pointer :: fptr(:,:)
3373  type(C_PTR),intent(out) :: cptr
3374 
3375 ! *************************************************************************
3376 
3377  cptr = fftw_malloc( INT(product(shape)*C_DOUBLE, KIND=C_SIZE_T))
3378  if (.not. C_ASSOCIATED(cptr)) then
3379    MSG_ERROR("fftw_malloc returned NULL!")
3380  end if
3381 
3382  call c_f_pointer(cptr, fptr, shape)
3383 
3384 end subroutine fftw3_alloc_real2d_dp

m_fftw3/fftw3_applypot [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_applypot

FUNCTION

 Applies the local real space potential to multiple wavefunctions in Fourier space

INPUTS

   ZF: Wavefunction (input/output) (note the switch of i2 and i3)
        real(F(i1,i3,i2,idat))=ZF(1,i1,i3,i2,idat)
        imag(F(i1,i3,i2,idat))=ZF(2,i1,i3,i2,idat)
   max1 is positive or zero ; m1 >=max1+1
   i1= 1... max1+1 corresponds to positive and zero wavevectors 0 ... max1
   then, if m1 > max1+1, one has min1=max1-m1+1 and
   i1= max1+2 ... m1 corresponds to negative wavevectors min1 ... -1
   i2 and i3 have a similar definition of range
   idat=1,ndat
   md1,md2,md3: Dimension of ZF (input as well as output), distributed on different procs
   md2proc=((md2-1)/nproc_fft)+1  maximal number of small box 2nd dim slices for one proc

   POT: Potential
        POT(cplex*i1,i2,i3)
        cplex=1 or 2 ,  i1=1,n1 , i2=1,n2 , i3=1,n3
   nd1,nd2,nd3: dimension of pot
   comm_fft: MPI communicator
   nproc_fft: number of processors used as returned by MPI_COMM_SIZE
   me_fft: [0:nproc_fft-1] number of processor as returned by MPI_COMM_RANK
    n1,n2,n3: logical dimension of the transform. As transform lengths
              most products of the prime factors 2,3,5 are allowed.
             The detailed table with allowed transform lengths can
             be found in subroutine CTRIG

 NOTES:
   PERFORMANCE CONSIDERATIONS:
   The maximum number of processors that can reasonably be used is max(n2/2,n3/2)

   It is very important to find the optimal
   value of NCACHE. NCACHE determines the size of the work array ZW, that
   has to fit into cache. It has therefore to be chosen to equal roughly
    half the size of the physical cache in units of real*8 numbers.
   The optimal value of ncache can easily be determined by numerical
   experimentation. A too large value of ncache leads to a dramatic
   and sudden decrease of performance, a too small value to a to a
   slow and less dramatic decrease of performance. If NCACHE is set
   to a value so small, that not even a single one dimensional transform
   can be done in the workarray zw, the program stops with an error message.

PARENTS

      m_fft

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

6008 subroutine fftw3_applypot(cplexwf,cplex,ndat,n1,n2,n3,nd1,nd2,nd3,nd3proc,&
6009 &  max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3,&
6010 &  max1o,max2o,max3o,m1o,m2o,m3o,comm_fft,nproc_fft,me_fft,pot,zf)
6011 
6012 
6013 !This section has been created automatically by the script Abilint (TD).
6014 !Do not modify the following lines by hand.
6015 #undef ABI_FUNC
6016 #define ABI_FUNC 'fftw3_applypot'
6017 !End of the abilint section
6018 
6019  implicit none
6020 
6021 !Arguments ------------------------------------
6022  integer,intent(in) :: cplexwf,cplex,ndat,n1,n2,n3,nd1,nd2,nd3,nd3proc
6023  integer,intent(in) :: max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3
6024  integer,intent(in) :: max1o,max2o,max3o,m1o,m2o,m3o,comm_fft,nproc_fft,me_fft
6025  real(dp),intent(in) :: pot(cplex*nd1,nd2,nd3)
6026  real(dp),intent(inout) :: zf(2,md1,md3,md2proc,ndat)
6027 
6028 !Local variables-------------------------------
6029 !scalars
6030 #ifdef HAVE_FFT_FFTW3
6031  integer,parameter :: unused0=0
6032  integer :: j,i1,i2,i3,idat,ierr,j3glob,nthreads
6033  integer :: ioption,j2,j3,lzt,m1zt,ma,mb,n1dfft,nnd3,lot1,lot2,lot3
6034  integer :: m2eff,ncache,n1eff,i1inv,i2inv,i3inv,jeff,includelast,j2stb
6035  integer :: jx,j2stf,Jp2stb,Jp2stf,m2ieff,m2oeff
6036  integer(KIND_FFTW_PLAN) :: bw_plan1_lot,bw_plan1_rest
6037  integer(KIND_FFTW_PLAN) :: bw_plan2_lot,bw_plan2_rest
6038  integer(KIND_FFTW_PLAN) :: bw_plan3_lot,bw_plan3_rest
6039  integer(KIND_FFTW_PLAN) :: fw_plan1_lot,fw_plan1_rest
6040  integer(KIND_FFTW_PLAN) :: fw_plan2_lot,fw_plan2_rest
6041  integer(KIND_FFTW_PLAN) :: fw_plan3_lot,fw_plan3_rest
6042  character(len=500) :: msg
6043 !arrays
6044  real(dp) :: tsec(2)
6045  real(dp), allocatable :: zmpi1(:,:,:,:),zmpi2(:,:,:,:) ! work arrays for MPI
6046  real(dp),allocatable :: zw(:,:),zt(:,:,:) ! cache work array and array for transpositions
6047 ! FFT work arrays
6048 
6049 ! *************************************************************************
6050 
6051  !ioption=0 ! This was in the old version.
6052  ioption=1 ! This one is needed to be compatible with paral_kgb
6053 
6054  ncache=2*max(n1,n2,n3,1024)
6055  if (ncache/(2*max(n1,n2,n3)) < 1) then
6056    write(msg,"(5a)") &
6057 &    'ncache has to be enlarged to be able to hold at',ch10,&
6058 &    'least one 1-d FFT of each size even though this will',ch10,&
6059 &    'reduce the performance for shorter transform lengths'
6060    MSG_ERROR(msg)
6061  end if
6062 
6063  !call wrtout(std_out,"applypot standard ALLTOALL + FFTW3","COLL")
6064 
6065  ! Effective m1 and m2 (complex-to-complex or real-to-complex)
6066  n1eff=n1; m2ieff=m2i; m2oeff=m2o; m1zt=n1
6067  if (cplexwf==1) then
6068    n1eff=(n1+1)/2; m2ieff=m2i/2+1; m2oeff=m2o/2+1; m1zt=2*(n1/2+1)
6069  end if
6070 
6071  m2eff=max(m2ieff,m2oeff)
6072  lzt=m2eff
6073  if (mod(m2eff,2) == 0) lzt=lzt+1
6074  if (mod(m2eff,4) == 0) lzt=lzt+1
6075 
6076  ! maximal number of big box 3rd dim slices for all procs
6077  nnd3=nd3proc*nproc_fft
6078 
6079  ABI_ALLOCATE(zw,(2,ncache/2))
6080  ABI_ALLOCATE(zt,(2,lzt,m1zt))
6081  ABI_ALLOCATE(zmpi2,(2,md1,md2proc,nnd3))
6082  if (nproc_fft > 1)  then
6083    ABI_ALLOCATE(zmpi1,(2,md1,md2proc,nnd3))
6084  end if
6085 
6086  lot3=ncache/(2*n3)
6087  lot1=ncache/(2*n1)
6088  lot2=ncache/(2*n2)
6089 
6090  ! The prototype for sfftw_plan_many_dft is:
6091  ! sfftw_plan_many_dft(rank, n, howmany,
6092  !   fin,  iembed, istride, idist,
6093  !   fout, oembed, ostride, odist, isign, my_flags)
6094 
6095  nthreads = xomp_get_num_threads(open_parallel=.TRUE.)
6096  !nthreads = 1
6097 
6098  ! Create plans for G --> R (see back_wf)
6099  bw_plan3_lot = dplan_many_dft_2D(1, [n3], lot3, &
6100 &    zw, [ncache/2], lot3, 1,                          &
6101 &    zw, [ncache/2], lot3, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6102 
6103  if (mod(m1i, lot3) /= 0) then
6104    bw_plan3_rest = dplan_many_dft_2D(1, [n3], mod(m1i, lot3),&
6105 &      zw, [ncache/2], lot3, 1,                                    &
6106 &      zw, [ncache/2], lot3, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6107  end if
6108 
6109  bw_plan1_lot = dplan_many_dft_2D(1, [n1], lot1, &
6110 &    zw, [ncache/2],  lot1, 1,                         &
6111 &    zt, [lzt, m1zt], lzt, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6112 
6113  if (mod(m2ieff, lot1) /= 0) then
6114    bw_plan1_rest = dplan_many_dft_2D(1, [n1], mod(m2ieff, lot1), &
6115 &      zw, [ncache/2],  lot1, 1,                                       &
6116 &      zt, [lzt, m1zt], lzt, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6117  end if
6118 
6119  !TODO this won't work if iclexwf==1
6120  ! Recheck this
6121  bw_plan2_lot = dplan_many_dft_2D(1, [n2], lot2, &
6122 &    zw, [ncache/2], lot2, 1,                          &
6123 &    zw, [ncache/2], lot2, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6124 
6125  if (mod(n1eff, lot2) /= 0) then
6126    bw_plan2_rest = dplan_many_dft_2D(1, [n2], mod(n1eff,lot2), &
6127 &      zw, [ncache/2], lot2, 1,                                      &
6128 &      zw, [ncache/2], lot2, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6129  end if
6130 
6131  ! Create plans for G --> R (see forw_wf)
6132  fw_plan3_lot = dplan_many_dft_2D(1, [n3], lot3, &
6133 &    zw, [ncache/2], lot3, 1,                          &
6134 &    zw, [ncache/2], lot3, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
6135 
6136  if (mod(m1o, lot3) /= 0) then
6137    fw_plan3_rest = dplan_many_dft_2D(1, [n3], mod(m1o, lot3),&
6138 &    zw, [ncache/2], lot3, 1,                                      &
6139 &    zw, [ncache/2], lot3, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
6140  end if
6141 
6142  fw_plan1_lot = dplan_many_dft_2D(1, [n1], lot1,&
6143 &    zt, [lzt, m1zt], lzt,  1,                        &
6144 &    zw, [ncache/2],  lot1, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
6145 
6146  if (mod(m2oeff, lot1) /= 0) then
6147    fw_plan1_rest = dplan_many_dft_2D(1, [n1], mod(m2oeff, lot1),&
6148 &    zt, [lzt, m1zt], lzt,  1,                                        &
6149 &    zw, [ncache/2],  lot1, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
6150  end if
6151 
6152  fw_plan2_lot = dplan_many_dft_2D(1, [n2], lot2,&
6153 &    zw, [ncache/2], lot2, 1,                         &
6154 &    zw, [ncache/2], lot2, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
6155 
6156  if (mod(n1eff, lot2) /= 0) then
6157    fw_plan2_rest = dplan_many_dft_2D(1, [n2], mod(n1eff,lot2),&
6158 &    zw, [ncache/2], lot2, 1,                                       &
6159 &    zw, [ncache/2], lot2, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
6160  end if
6161 
6162  do idat=1,ndat
6163    !
6164    ! transform along z axis
6165    ! input: G1,G3,G2,(Gp2)
6166    do j2=1,md2proc
6167      if (me_fft*md2proc+j2 <= m2ieff) then
6168        do i1=1,m1i,lot3
6169          ma=i1
6170          mb=min(i1+(lot3-1),m1i)
6171          n1dfft=mb-ma+1
6172 
6173          ! zero-pad n1dfft G_z lines
6174          ! input: G1,G3,G2,(Gp2)
6175          call fill_cent(md1,md3,lot3,n1dfft,max3i,m3i,n3,zf(1,i1,1,j2,idat),zw)
6176 
6177          if (n1dfft == lot3) then
6178            call dfftw_execute_dft(bw_plan3_lot, zw, zw)
6179          else
6180            call dfftw_execute_dft(bw_plan3_rest, zw, zw)
6181          end if
6182 
6183          ! Local rotation.
6184          ! input:  G1,R3,G2,(Gp2)
6185          ! output: G1,G2,R3,(Gp2)
6186          call scramble(i1,j2,lot3,n1dfft,md1,n3,md2proc,nnd3,zw,zmpi2)
6187        end do
6188      end if
6189    end do
6190 
6191    ! Interprocessor data transposition
6192    ! input:  G1,G2,R3,Rp3,(Gp2)
6193    ! output: G1,G2,R3,Gp2,(Rp3)
6194    if (nproc_fft > 1) then
6195      call timab(543,1,tsec)
6196      call xmpi_alltoall(zmpi2,2*md1*md2proc*nd3proc,&
6197 &                       zmpi1,2*md1*md2proc*nd3proc,comm_fft,ierr)
6198      call timab(543,2,tsec)
6199    end if
6200 
6201    do j3=1,nd3proc
6202      j3glob = j3 + me_fft*nd3proc
6203      if (me_fft*nd3proc+j3 <= n3) then
6204        Jp2stb=1; J2stb=1
6205        Jp2stf=1; J2stf=1
6206 
6207        ! transform along x axis
6208        do j=1,m2ieff,lot1
6209          ma=j
6210          mb=min(j+(lot1-1),m2ieff)
6211          n1dfft=mb-ma+1
6212 
6213          ! Zero-pad input.
6214          ! input:  G1,G2,R3,G2,(Rp3)
6215          ! output: G2,G1,R3,G2,(Rp3)
6216          if (nproc_fft == 1) then
6217            call mpiswitch_cent(j3,n1dfft,Jp2stb,J2stb,lot1,max1i,md1,m1i,n1,&
6218 &           md2proc,nd3proc,nproc_fft,ioption,zmpi2,zw, unused0, unused0, unused0)
6219          else
6220            call mpiswitch_cent(j3,n1dfft,Jp2stb,J2stb,lot1,max1i,md1,m1i,n1,&
6221 &           md2proc,nd3proc,nproc_fft,ioption,zmpi1,zw, unused0, unused0, unused0)
6222          end if
6223 
6224          ! Transform along x
6225          ! input:  G2,G1,R3,(Rp3)
6226          ! output: G2,R1,R3,(Rp3)
6227          if (n1dfft == lot1) then
6228            call dfftw_execute_dft(bw_plan1_lot, zw, zt(1,j,1))
6229          else
6230            call dfftw_execute_dft(bw_plan1_rest, zw, zt(1,j,1))
6231          end if
6232        end do
6233 
6234        ! Transform along y axis (take into account c2c or c2r case).
6235        ! Must loop over the full box.
6236        !TODO this won't work
6237        if (cplexwf==1) then
6238          if(mod(lot2,2).ne.0) lot2=lot2-1 ! needed to introduce jeff
6239        end if
6240 
6241        do j=1,n1eff,lot2
6242          ma=j
6243          mb=min(j+(lot2-1),n1eff)
6244          n1dfft=mb-ma+1
6245          jeff=j
6246          includelast=1
6247 
6248          if (cplexwf==1) then
6249            jeff=2*j-1
6250            includelast=1
6251            if (mb==n1eff .and. n1eff*2/=n1) includelast=0
6252          end if
6253 
6254          ! Zero-pad the input.
6255          !  input: G2,R1,R3,(Rp3)
6256          ! output: R1,G2,R3,(Rp3)
6257          if (cplexwf==2) then
6258            call switch_cent(n1dfft,max2i,m2i,n2,lot2,n1,lzt,zt(1,1,jeff),zw)
6259          else
6260            call switchreal_cent(includelast,n1dfft,max2i,n2,lot2,m1zt,lzt,zt(1,1,jeff),zw)
6261          end if
6262 
6263          ! input:  R1,G2,R3,(Rp3)
6264          ! output: R1,R2,R3,(Rp3)
6265          ! Be careful here
6266          if (n1dfft == lot2) then
6267            call dfftw_execute_dft(bw_plan2_lot, zw, zw)
6268          else
6269            call dfftw_execute_dft(bw_plan2_rest, zw, zw)
6270          end if
6271 
6272          ! Multiply with potential in real space
6273          jx=cplex*(jeff-1)+1
6274          call multpot(cplexwf,cplex,includelast,nd1,nd2,n2,lot2,n1dfft,pot(jx,1,j3glob),zw)
6275 
6276          ! TRANSFORM BACK IN FOURIER SPACE
6277          ! transform along y axis
6278          ! input: R1,R2,R3,(Rp3)
6279          if (n1dfft == lot2) then
6280            call dfftw_execute_dft(fw_plan2_lot,  zw, zw)
6281          else
6282            call dfftw_execute_dft(fw_plan2_rest, zw, zw)
6283          end if
6284 
6285          ! input: R1,G2,R3,(Rp3)
6286          ! output: G2,R1,R3,(Rp3)
6287          if (cplexwf==2) then
6288            call unswitch_cent(n1dfft,max2o,m2o,n2,lot2,n1,lzt,zw,zt(1,1,jeff))
6289          else
6290            call unswitchreal_cent(n1dfft,max2o,n2,lot2,n1,lzt,zw,zt(1,1,jeff))
6291          end if
6292        end do ! j
6293 
6294        ! transform along x axis
6295        ! input:  R2,R1,R3,(Rp3)
6296        ! output: R2,G1,R3,(Rp3)
6297        do j=1,m2oeff,lot1
6298          ma=j
6299          mb=min(j+(lot1-1),m2oeff)
6300          n1dfft=mb-ma+1
6301 
6302          if (n1dfft == lot1) then
6303            call dfftw_execute_dft(fw_plan1_lot,  zt(1,j,1), zw)
6304          else
6305            call dfftw_execute_dft(fw_plan1_rest, zt(1,j,1), zw)
6306          end if
6307 
6308          ! input:  G2,G1,R3,Gp2,(Rp3)
6309          ! output: G1,G2,R3,Gp2,(Rp3)
6310          if (nproc_fft == 1) then
6311            call unmpiswitch_cent(j3,n1dfft,Jp2stf,J2stf,lot1,max1o,md1,m1o,n1,&
6312 &           md2proc,nd3proc,nproc_fft,ioption,zw,zmpi2)
6313          else
6314            call unmpiswitch_cent(j3,n1dfft,Jp2stf,J2stf,lot1,max1o,md1,m1o,n1,&
6315 &           md2proc,nd3proc,nproc_fft,ioption,zw,zmpi1)
6316          end if
6317        end do ! j
6318      end if
6319    end do
6320 
6321    ! Interprocessor data transposition
6322    ! input:  G1,G2,R3,Gp2,(Rp3)
6323    ! output: G1,G2,R3,Rp3,(Gp2)
6324    if (nproc_fft > 1) then
6325      call timab(544,1,tsec)
6326      call xmpi_alltoall(zmpi1,2*md1*md2proc*nd3proc, &
6327 &                       zmpi2,2*md1*md2proc*nd3proc,comm_fft,ierr)
6328      call timab(544,2,tsec)
6329    end if
6330 
6331    ! transform along z axis
6332    ! input: G1,G2,R3,(Gp2)
6333    !lot=ncache/(4*n3)
6334    do j2=1,md2proc
6335      if (me_fft*md2proc+j2 <= m2oeff) then
6336        do i1=1,m1o,lot3
6337          ma=i1
6338          mb=min(i1+(lot3-1),m1o)
6339          n1dfft=mb-ma+1
6340 
6341          ! input:  G1,G2,R3,(Gp2)
6342          ! output: G1,R3,G2,(Gp2)
6343          call unscramble(i1,j2,lot3,n1dfft,md1,n3,md2proc,nnd3,zmpi2,zw)
6344 
6345           if (n1dfft == lot3) then
6346             call dfftw_execute_dft(fw_plan3_lot, zw, zw)
6347           else
6348             call dfftw_execute_dft(fw_plan3_rest, zw, zw)
6349           end if
6350 
6351          call unfill_cent(md1,md3,lot3,n1dfft,max3o,m3o,n3,zw,zf(1,i1,1,j2,idat))
6352          ! output: G1,G3,G2,(Gp2)
6353        end do
6354      end if
6355    end do
6356 
6357    ! Complete missing values with complex conjugate
6358    ! Inverse of ix is located at nx+2-ix , except for ix=1, for which it is 1.
6359    if (cplexwf==1) then
6360      do i3=1,m3o
6361        i3inv=m3o+2-i3
6362        if (i3==1) i3inv=1
6363        if (m2oeff>1)then
6364          do i2=2,m2oeff
6365            i2inv=m2o+2-i2
6366            zf(1,1,i3inv,i2inv,idat)= zf(1,1,i3,i2,idat)
6367            zf(2,1,i3inv,i2inv,idat)=-zf(2,1,i3,i2,idat)
6368            do i1=2,m1o
6369              i1inv=m1o+2-i1
6370              zf(1,i1inv,i3inv,i2inv,idat)= zf(1,i1,i3,i2,idat)
6371              zf(2,i1inv,i3inv,i2inv,idat)=-zf(2,i1,i3,i2,idat)
6372            end do
6373          end do
6374        end if
6375      end do
6376    end if
6377 
6378  end do ! idat
6379 
6380  call dfftw_destroy_plan(bw_plan3_lot)
6381  if (mod(m1i, lot3) /= 0) then
6382    call dfftw_destroy_plan(bw_plan3_rest)
6383  end if
6384 
6385  call dfftw_destroy_plan(bw_plan1_lot)
6386  if (mod(m2ieff, lot1) /= 0) then
6387    call dfftw_destroy_plan(bw_plan1_rest)
6388  end if
6389 
6390  call dfftw_destroy_plan(bw_plan2_lot)
6391  if (mod(n1eff, lot2) /= 0) then
6392    call dfftw_destroy_plan(bw_plan2_rest)
6393  end if
6394 
6395  call dfftw_destroy_plan(fw_plan3_lot)
6396  if (mod(m1o, lot3) /= 0) then
6397    call dfftw_destroy_plan(fw_plan3_rest)
6398  end if
6399 
6400  call dfftw_destroy_plan(fw_plan1_lot)
6401  if (mod(m2oeff, lot1) /= 0) then
6402    call dfftw_destroy_plan(fw_plan1_rest)
6403  end if
6404 
6405  call dfftw_destroy_plan(fw_plan2_lot)
6406  if (mod(n1eff, lot2) /= 0) then
6407    call dfftw_destroy_plan(fw_plan2_rest)
6408  end if
6409 
6410  ABI_DEALLOCATE(zmpi2)
6411  ABI_DEALLOCATE(zw)
6412  ABI_DEALLOCATE(zt)
6413  if (nproc_fft > 1)  then
6414    ABI_DEALLOCATE(zmpi1)
6415  end if
6416 
6417 #else
6418  MSG_ERROR("FFTW3 support not activated")
6419  ABI_UNUSED((/cplexwf,cplex,ndat,n1,n2,n3,nd1,nd2,nd3,nd3proc/))
6420  ABI_UNUSED((/max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3/))
6421  ABI_UNUSED((/max1o,max2o,max3o,m1o,m2o,m3o,comm_fft,nproc_fft,me_fft/))
6422  ABI_UNUSED((/pot(1,1,1),zf(1,1,1,1,1)/))
6423 #endif
6424 
6425 end subroutine fftw3_applypot

m_fftw3/fftw3_applypot_many [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_applypot_many

FUNCTION

 Applies the local real space potential to multiple wavefunctions in Fourier space

INPUTS

   ZF: Wavefunction (input/output) (note the switch of i2 and i3)
        real(F(i1,i3,i2,idat))=ZF(1,i1,i3,i2,idat)
        imag(F(i1,i3,i2,idat))=ZF(2,i1,i3,i2,idat)
   max1 is positive or zero ; m1 >=max1+1
   i1= 1... max1+1 corresponds to positive and zero wavevectors 0 ... max1
   then, if m1 > max1+1, one has min1=max1-m1+1 and
   i1= max1+2 ... m1 corresponds to negative wavevectors min1 ... -1
   i2 and i3 have a similar definition of range
   idat=1,ndat
   md1,md2,md3: Dimension of ZF (input as well as output), distributed on different procs
   md2proc=((md2-1)/nproc_fft)+1  maximal number of small box 2nd dim slices for one proc

   POT: Potential
        POT(cplex*i1,i2,i3)
        cplex=1 or 2 ,  i1=1,n1 , i2=1,n2 , i3=1,n3
   nd1,nd2,nd3: dimension of pot
   comm_fft: MPI communicator
   nproc_fft: number of processors used as returned by MPI_COMM_SIZE
   me_fft: [0:nproc_fft-1] number of processor as returned by MPI_COMM_RANK
    n1,n2,n3: logical dimension of the transform. As transform lengths
              most products of the prime factors 2,3,5 are allowed.
             The detailed table with allowed transform lengths can
             be found in subroutine CTRIG

 NOTES:
   PERFORMANCE CONSIDERATIONS:
   The maximum number of processors that can reasonably be used is max(n2/2,n3/2)

   It is very important to find the optimal
   value of NCACHE. NCACHE determines the size of the work array ZW, that
   has to fit into cache. It has therefore to be chosen to equal roughly
    half the size of the physical cache in units of real*8 numbers.
   The optimal value of ncache can easily be determined by numerical
   experimentation. A too large value of ncache leads to a dramatic
   and sudden decrease of performance, a too small value to a to a
   slow and less dramatic decrease of performance. If NCACHE is set
   to a value so small, that not even a single one dimensional transform
   can be done in the workarray zw, the program stops with an error message.

PARENTS

      m_fft

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

7538 subroutine fftw3_applypot_many(cplexwf,cplex,ndat,n1,n2,n3,nd1,nd2,nd3,nd3proc,&
7539 &  max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3,&
7540 &  max1o,max2o,max3o,m1o,m2o,m3o,comm_fft,nproc_fft,me_fft,pot,zf)
7541 
7542 
7543 !This section has been created automatically by the script Abilint (TD).
7544 !Do not modify the following lines by hand.
7545 #undef ABI_FUNC
7546 #define ABI_FUNC 'fftw3_applypot_many'
7547 !End of the abilint section
7548 
7549  implicit none
7550 
7551 !Arguments ------------------------------------
7552  integer,intent(in) :: cplexwf,cplex,ndat,n1,n2,n3,nd1,nd2,nd3,nd3proc
7553  integer,intent(in) :: max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3
7554  integer,intent(in) :: max1o,max2o,max3o,m1o,m2o,m3o,comm_fft,nproc_fft,me_fft
7555  real(dp),intent(in) :: pot(cplex*nd1,nd2,nd3)
7556  real(dp),intent(inout) :: zf(2,md1,md3,md2proc,ndat)
7557 
7558 !Local variables-------------------------------
7559 !scalars
7560 #ifdef HAVE_FFT_FFTW3
7561  integer,parameter :: unused0=0
7562  integer :: j,i1,i2,i3,idat,ierr,j3glob,nthreads
7563  integer :: ioption,j2,j3,lzt,m1zt,ma,mb,n1dfft,nnd3,lot1,lot2,lot3
7564  integer :: m2eff,ncache,n1eff,i1inv,i2inv,i3inv,jeff,includelast,j2stb
7565  integer :: jx,j2stf,Jp2stb,Jp2stf,m2ieff,m2oeff
7566  integer(KIND_FFTW_PLAN) :: bw_plan1_lot,bw_plan1_rest
7567  integer(KIND_FFTW_PLAN) :: bw_plan2_lot,bw_plan2_rest
7568  integer(KIND_FFTW_PLAN) :: bw_plan3_lot,bw_plan3_rest
7569  integer(KIND_FFTW_PLAN) :: fw_plan1_lot,fw_plan1_rest
7570  integer(KIND_FFTW_PLAN) :: fw_plan2_lot,fw_plan2_rest
7571  integer(KIND_FFTW_PLAN) :: fw_plan3_lot,fw_plan3_rest
7572  character(len=500) :: msg
7573 !arrays
7574  integer :: requests(ndat)
7575  real(dp) :: tsec(2)
7576  real(dp) ABI_ASYNC, allocatable :: zmpi1(:,:,:,:,:),zmpi2(:,:,:,:,:) ! work arrays for MPI
7577  real(dp),allocatable :: zw(:,:),zt(:,:,:) ! cache work array and array for transpositions
7578 ! FFT work arrays
7579 
7580 ! *************************************************************************
7581 
7582  !ioption=0 ! This was in the old version.
7583  ioption=1 ! This one is needed to be compatible with paral_kgb
7584 
7585  ncache=2*max(n1,n2,n3,1024)
7586  if (ncache/(2*max(n1,n2,n3)) < 1) then
7587    write(msg,"(5a)") &
7588 &    'ncache has to be enlarged to be able to hold at',ch10,&
7589 &    'least one 1-d FFT of each size even though this will',ch10,&
7590 &    'reduce the performance for shorter transform lengths'
7591    MSG_ERROR(msg)
7592  end if
7593 
7594  !call wrtout(std_out,"applypot with non-blocking IALLTOALL + FFTW3","COLL")
7595  !write(std_out,"(a,i0)")"in applypot_many with ndat: ",ndat
7596 
7597  ! Effective m1 and m2 (complex-to-complex or real-to-complex)
7598  n1eff=n1; m2ieff=m2i; m2oeff=m2o; m1zt=n1
7599  if (cplexwf==1) then
7600    n1eff=(n1+1)/2; m2ieff=m2i/2+1; m2oeff=m2o/2+1; m1zt=2*(n1/2+1)
7601  end if
7602 
7603  m2eff=max(m2ieff,m2oeff)
7604  lzt=m2eff
7605  if (mod(m2eff,2) == 0) lzt=lzt+1
7606  if (mod(m2eff,4) == 0) lzt=lzt+1
7607 
7608  ! maximal number of big box 3rd dim slices for all procs
7609  nnd3=nd3proc*nproc_fft
7610 
7611  ABI_ALLOCATE(zw,(2,ncache/2))
7612  ABI_ALLOCATE(zt,(2,lzt,m1zt))
7613  ABI_ALLOCATE(zmpi2,(2,md1,md2proc,nnd3,ndat))
7614  if (nproc_fft > 1)  then
7615    ABI_ALLOCATE(zmpi1,(2,md1,md2proc,nnd3,ndat))
7616  end if
7617 
7618  lot3=ncache/(2*n3)
7619  lot1=ncache/(2*n1)
7620  lot2=ncache/(2*n2)
7621 
7622  nthreads = xomp_get_num_threads(open_parallel=.TRUE.)
7623  !nthreads = 1
7624 
7625  ! The prototype for sfftw_plan_many_dft is:
7626  ! sfftw_plan_many_dft(rank, n, howmany,
7627  !   fin,  iembed, istride, idist,
7628  !   fout, oembed, ostride, odist, isign, my_flags)
7629 
7630  ! Create plans for G --> R (see back_wf)
7631  bw_plan3_lot = dplan_many_dft_2D(1, [n3], lot3, &
7632 &    zw, [ncache/2], lot3, 1,                          &
7633 &    zw, [ncache/2], lot3, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
7634 
7635  if (mod(m1i, lot3) /= 0) then
7636    bw_plan3_rest = dplan_many_dft_2D(1, [n3], mod(m1i, lot3),&
7637 &      zw, [ncache/2], lot3, 1,                                    &
7638 &      zw, [ncache/2], lot3, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
7639  end if
7640 
7641  bw_plan1_lot = dplan_many_dft_2D(1, [n1], lot1, &
7642 &    zw, [ncache/2],  lot1, 1,                         &
7643 &    zt, [lzt, m1zt], lzt, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
7644 
7645  if (mod(m2ieff, lot1) /= 0) then
7646    bw_plan1_rest = dplan_many_dft_2D(1, [n1], mod(m2ieff, lot1), &
7647 &      zw, [ncache/2],  lot1, 1,                                       &
7648 &      zt, [lzt, m1zt], lzt, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
7649  end if
7650 
7651  !TODO this won't work if iclexwf==1
7652  ! Recheck this
7653  bw_plan2_lot = dplan_many_dft_2D(1, [n2], lot2, &
7654 &    zw, [ncache/2], lot2, 1,                          &
7655 &    zw, [ncache/2], lot2, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
7656 
7657  if (mod(n1eff, lot2) /= 0) then
7658    bw_plan2_rest = dplan_many_dft_2D(1, [n2], mod(n1eff,lot2), &
7659 &      zw, [ncache/2], lot2, 1,                                      &
7660 &      zw, [ncache/2], lot2, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
7661  end if
7662 
7663  ! Create plans for G --> R (see forw_wf)
7664  fw_plan3_lot = dplan_many_dft_2D(1, [n3], lot3, &
7665 &    zw, [ncache/2], lot3, 1,                          &
7666 &    zw, [ncache/2], lot3, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
7667 
7668  if (mod(m1o, lot3) /= 0) then
7669    fw_plan3_rest = dplan_many_dft_2D(1, [n3], mod(m1o, lot3),&
7670 &    zw, [ncache/2], lot3, 1,                                      &
7671 &    zw, [ncache/2], lot3, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
7672  end if
7673 
7674  fw_plan1_lot = dplan_many_dft_2D(1, [n1], lot1,&
7675 &    zt, [lzt, m1zt], lzt,  1,                        &
7676 &    zw, [ncache/2],  lot1, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
7677 
7678  if (mod(m2oeff, lot1) /= 0) then
7679    fw_plan1_rest = dplan_many_dft_2D(1, [n1], mod(m2oeff, lot1),&
7680 &    zt, [lzt, m1zt], lzt,  1,                                        &
7681 &    zw, [ncache/2],  lot1, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
7682  end if
7683 
7684  fw_plan2_lot = dplan_many_dft_2D(1, [n2], lot2,&
7685 &    zw, [ncache/2], lot2, 1,                         &
7686 &    zw, [ncache/2], lot2, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
7687 
7688  if (mod(n1eff, lot2) /= 0) then
7689    fw_plan2_rest = dplan_many_dft_2D(1, [n2], mod(n1eff,lot2),&
7690 &    zw, [ncache/2], lot2, 1,                                       &
7691 &    zw, [ncache/2], lot2, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
7692  end if
7693 
7694  ! Here we take advantage of non-blocking IALLTOALL:
7695  ! Perform the first step of MPI-FFT for ndat wavefunctions.
7696  do idat=1,ndat
7697    !
7698    ! transform along z axis
7699    ! input: G1,G3,G2,(Gp2)
7700    do j2=1,md2proc
7701      if (me_fft*md2proc+j2 <= m2ieff) then
7702        do i1=1,m1i,lot3
7703          ma=i1
7704          mb=min(i1+(lot3-1),m1i)
7705          n1dfft=mb-ma+1
7706 
7707          ! zero-pad n1dfft G_z lines
7708          ! input: G1,G3,G2,(Gp2)
7709          call fill_cent(md1,md3,lot3,n1dfft,max3i,m3i,n3,zf(1,i1,1,j2,idat),zw)
7710 
7711          if (n1dfft == lot3) then
7712            call dfftw_execute_dft(bw_plan3_lot, zw, zw)
7713          else
7714            call dfftw_execute_dft(bw_plan3_rest, zw, zw)
7715          end if
7716 
7717          ! Local rotation.
7718          ! input:  G1,R3,G2,(Gp2)
7719          ! output: G1,G2,R3,(Gp2)
7720          call scramble(i1,j2,lot3,n1dfft,md1,n3,md2proc,nnd3,zw,zmpi2(:,:,:,:,idat))
7721        end do
7722      end if
7723    end do
7724 
7725    ! Interprocessor data transposition
7726    ! input:  G1,G2,R3,Rp3,(Gp2)
7727    ! output: G1,G2,R3,Gp2,(Rp3)
7728    if (nproc_fft > 1) then
7729      call timab(543,1,tsec)
7730      call xmpi_ialltoall(zmpi2(:,:,:,:,idat),2*md1*md2proc*nd3proc,&
7731 &                        zmpi1(:,:,:,:,idat),2*md1*md2proc*nd3proc,comm_fft,requests(idat))
7732      call timab(543,2,tsec)
7733    end if
7734  end do ! idat
7735 
7736  ! The second step of MPI-FFT
7737  do idat=1,ndat
7738     ! Make sure communication is completed.
7739     if (nproc_fft>1) call xmpi_wait(requests(idat),ierr)
7740 
7741    do j3=1,nd3proc
7742      j3glob = j3 + me_fft*nd3proc
7743      if (me_fft*nd3proc+j3 <= n3) then
7744        Jp2stb=1; J2stb=1
7745        Jp2stf=1; J2stf=1
7746 
7747        ! transform along x axis
7748        do j=1,m2ieff,lot1
7749          ma=j
7750          mb=min(j+(lot1-1),m2ieff)
7751          n1dfft=mb-ma+1
7752 
7753          ! Zero-pad input.
7754          ! input:  G1,G2,R3,G2,(Rp3)
7755          ! output: G2,G1,R3,G2,(Rp3)
7756          if (nproc_fft == 1) then
7757            call mpiswitch_cent(j3,n1dfft,Jp2stb,J2stb,lot1,max1i,md1,m1i,n1,&
7758 &           md2proc,nd3proc,nproc_fft,ioption,zmpi2(:,:,:,:,idat),zw, unused0, unused0, unused0)
7759          else
7760            call mpiswitch_cent(j3,n1dfft,Jp2stb,J2stb,lot1,max1i,md1,m1i,n1,&
7761 &           md2proc,nd3proc,nproc_fft,ioption,zmpi1(:,:,:,:,idat),zw, unused0, unused0, unused0)
7762          end if
7763 
7764          ! Transform along x
7765          ! input:  G2,G1,R3,(Rp3)
7766          ! output: G2,R1,R3,(Rp3)
7767          if (n1dfft == lot1) then
7768            call dfftw_execute_dft(bw_plan1_lot, zw, zt(1,j,1))
7769          else
7770            call dfftw_execute_dft(bw_plan1_rest, zw, zt(1,j,1))
7771          end if
7772        end do
7773 
7774        ! Transform along y axis (take into account c2c or c2r case).
7775        ! Must loop over the full box.
7776        !TODO this won't work
7777        if (cplexwf==1) then
7778          if(mod(lot2,2).ne.0) lot2=lot2-1 ! needed to introduce jeff
7779        end if
7780 
7781        do j=1,n1eff,lot2
7782          ma=j
7783          mb=min(j+(lot2-1),n1eff)
7784          n1dfft=mb-ma+1
7785          jeff=j
7786          includelast=1
7787 
7788          if (cplexwf==1) then
7789            jeff=2*j-1
7790            includelast=1
7791            if (mb==n1eff .and. n1eff*2/=n1) includelast=0
7792          end if
7793 
7794          ! Zero-pad the input.
7795          !  input: G2,R1,R3,(Rp3)
7796          ! output: R1,G2,R3,(Rp3)
7797          if (cplexwf==2) then
7798            call switch_cent(n1dfft,max2i,m2i,n2,lot2,n1,lzt,zt(1,1,jeff),zw)
7799          else
7800            call switchreal_cent(includelast,n1dfft,max2i,n2,lot2,m1zt,lzt,zt(1,1,jeff),zw)
7801          end if
7802 
7803          ! input:  R1,G2,R3,(Rp3)
7804          ! output: R1,R2,R3,(Rp3)
7805          ! Be careful here
7806          if (n1dfft == lot2) then
7807            call dfftw_execute_dft(bw_plan2_lot, zw, zw)
7808          else
7809            call dfftw_execute_dft(bw_plan2_rest, zw, zw)
7810          end if
7811 
7812          ! Multiply with potential in real space
7813          jx=cplex*(jeff-1)+1
7814          call multpot(cplexwf,cplex,includelast,nd1,nd2,n2,lot2,n1dfft,pot(jx,1,j3glob),zw)
7815 
7816          ! TRANSFORM BACK IN FOURIER SPACE
7817          ! transform along y axis
7818          ! input: R1,R2,R3,(Rp3)
7819          if (n1dfft == lot2) then
7820            call dfftw_execute_dft(fw_plan2_lot,  zw, zw)
7821          else
7822            call dfftw_execute_dft(fw_plan2_rest, zw, zw)
7823          end if
7824 
7825          !  input: R1,G2,R3,(Rp3)
7826          ! output: G2,R1,R3,(Rp3)
7827          if (cplexwf==2) then
7828            call unswitch_cent(n1dfft,max2o,m2o,n2,lot2,n1,lzt,zw,zt(1,1,jeff))
7829          else
7830            call unswitchreal_cent(n1dfft,max2o,n2,lot2,n1,lzt,zw,zt(1,1,jeff))
7831          end if
7832        end do ! j
7833 
7834        ! transform along x axis
7835        ! input:  R2,R1,R3,(Rp3)
7836        ! output: R2,G1,R3,(Rp3)
7837        do j=1,m2oeff,lot1
7838          ma=j
7839          mb=min(j+(lot1-1),m2oeff)
7840          n1dfft=mb-ma+1
7841 
7842          if (n1dfft == lot1) then
7843            call dfftw_execute_dft(fw_plan1_lot,  zt(1,j,1), zw)
7844          else
7845            call dfftw_execute_dft(fw_plan1_rest, zt(1,j,1), zw)
7846          end if
7847 
7848          ! input:  G2,G1,R3,Gp2,(Rp3)
7849          ! output: G1,G2,R3,Gp2,(Rp3)
7850          if (nproc_fft == 1) then
7851            call unmpiswitch_cent(j3,n1dfft,Jp2stf,J2stf,lot1,max1o,md1,m1o,n1,&
7852 &           md2proc,nd3proc,nproc_fft,ioption,zw,zmpi2(:,:,:,:,idat))
7853          else
7854            call unmpiswitch_cent(j3,n1dfft,Jp2stf,J2stf,lot1,max1o,md1,m1o,n1,&
7855 &           md2proc,nd3proc,nproc_fft,ioption,zw,zmpi1(:,:,:,:,idat))
7856          end if
7857        end do ! j
7858      end if
7859    end do
7860 
7861    ! Interprocessor data transposition
7862    ! input:  G1,G2,R3,Gp2,(Rp3)
7863    ! output: G1,G2,R3,Rp3,(Gp2)
7864    if (nproc_fft > 1) then
7865      call timab(544,1,tsec)
7866      call xmpi_ialltoall(zmpi1(:,:,:,:,idat),2*md1*md2proc*nd3proc, &
7867 &                        zmpi2(:,:,:,:,idat),2*md1*md2proc*nd3proc,comm_fft,requests(idat))
7868      call timab(544,2,tsec)
7869    end if
7870  end do
7871 
7872  do idat=1,ndat
7873    if (nproc_fft>1) call xmpi_wait(requests(idat),ierr)
7874    ! transform along z axis
7875    ! input: G1,G2,R3,(Gp2)
7876    !lot=ncache/(4*n3)
7877    do j2=1,md2proc
7878      if (me_fft*md2proc+j2 <= m2oeff) then
7879        do i1=1,m1o,lot3
7880          ma=i1
7881          mb=min(i1+(lot3-1),m1o)
7882          n1dfft=mb-ma+1
7883 
7884          ! input:  G1,G2,R3,(Gp2)
7885          ! output: G1,R3,G2,(Gp2)
7886          call unscramble(i1,j2,lot3,n1dfft,md1,n3,md2proc,nnd3,zmpi2(:,:,:,:,idat),zw)
7887 
7888           if (n1dfft == lot3) then
7889             call dfftw_execute_dft(fw_plan3_lot, zw, zw)
7890           else
7891             call dfftw_execute_dft(fw_plan3_rest, zw, zw)
7892           end if
7893 
7894          call unfill_cent(md1,md3,lot3,n1dfft,max3o,m3o,n3,zw,zf(1,i1,1,j2,idat))
7895          ! output: G1,G3,G2,(Gp2)
7896        end do
7897      end if
7898    end do
7899 
7900    ! Complete missing values with complex conjugate
7901    ! Inverse of ix is located at nx+2-ix , except for ix=1, for which it is 1.
7902    if (cplexwf==1) then
7903      do i3=1,m3o
7904        i3inv=m3o+2-i3
7905        if (i3==1) i3inv=1
7906        if (m2oeff>1)then
7907          do i2=2,m2oeff
7908            i2inv=m2o+2-i2
7909            zf(1,1,i3inv,i2inv,idat)= zf(1,1,i3,i2,idat)
7910            zf(2,1,i3inv,i2inv,idat)=-zf(2,1,i3,i2,idat)
7911            do i1=2,m1o
7912              i1inv=m1o+2-i1
7913              zf(1,i1inv,i3inv,i2inv,idat)= zf(1,i1,i3,i2,idat)
7914              zf(2,i1inv,i3inv,i2inv,idat)=-zf(2,i1,i3,i2,idat)
7915            end do
7916          end do
7917        end if
7918      end do
7919    end if
7920 
7921  end do ! idat
7922 
7923  call dfftw_destroy_plan(bw_plan3_lot)
7924  if (mod(m1i, lot3) /= 0) then
7925    call dfftw_destroy_plan(bw_plan3_rest)
7926  end if
7927 
7928  call dfftw_destroy_plan(bw_plan1_lot)
7929  if (mod(m2ieff, lot1) /= 0) then
7930    call dfftw_destroy_plan(bw_plan1_rest)
7931  end if
7932 
7933  call dfftw_destroy_plan(bw_plan2_lot)
7934  if (mod(n1eff, lot2) /= 0) then
7935    call dfftw_destroy_plan(bw_plan2_rest)
7936  end if
7937 
7938  call dfftw_destroy_plan(fw_plan3_lot)
7939  if (mod(m1o, lot3) /= 0) then
7940    call dfftw_destroy_plan(fw_plan3_rest)
7941  end if
7942 
7943  call dfftw_destroy_plan(fw_plan1_lot)
7944  if (mod(m2oeff, lot1) /= 0) then
7945    call dfftw_destroy_plan(fw_plan1_rest)
7946  end if
7947 
7948  call dfftw_destroy_plan(fw_plan2_lot)
7949  if (mod(n1eff, lot2) /= 0) then
7950    call dfftw_destroy_plan(fw_plan2_rest)
7951  end if
7952 
7953  ABI_DEALLOCATE(zmpi2)
7954  ABI_DEALLOCATE(zw)
7955  ABI_DEALLOCATE(zt)
7956  if (nproc_fft > 1)  then
7957    ABI_DEALLOCATE(zmpi1)
7958  end if
7959 
7960 #else
7961  MSG_ERROR("FFTW3 support not activated")
7962  ABI_UNUSED((/cplexwf,cplex,ndat,n1,n2,n3,nd1,nd2,nd3,nd3proc/))
7963  ABI_UNUSED((/max1i,max2i,max3i,m1i,m2i,m3i,md1,md2proc,md3/))
7964  ABI_UNUSED((/max1o,max2o,max3o,m1o,m2o,m3o,comm_fft,nproc_fft,me_fft/))
7965  ABI_UNUSED((/pot(1,1,1),zf(1,1,1,1,1)/))
7966 #endif
7967 
7968 end subroutine fftw3_applypot_many

m_fftw3/fftw3_c2c_ip_dpc [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_c2c_ip_dpc

FUNCTION

 Driver routine for in-place 3D complex-complex FFT.

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimensions of the array.
 ndat=Number of FFTs to be done.
 isign= +1 : ff(G) => ff(R); -1 : ff(R) => ff(G)
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator.
   Defaults to ABI_FFTW_ESTIMATE.

SIDE EFFECTS

  ff(ldx*ldy*ldz*ndat)=
    In input: the complex array to be transformed.
    In output: the Fourier transformed in the space specified by isign.

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

1676 subroutine fftw3_c2c_ip_dpc(nx,ny,nz,ldx,ldy,ldz,ndat,isign,ff,fftw_flags)
1677 
1678 
1679 !This section has been created automatically by the script Abilint (TD).
1680 !Do not modify the following lines by hand.
1681 #undef ABI_FUNC
1682 #define ABI_FUNC 'fftw3_c2c_ip_dpc'
1683 !End of the abilint section
1684 
1685  implicit none
1686 
1687 !Arguments ------------------------------------
1688 !scalars
1689  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,isign
1690  integer,optional,intent(in) :: fftw_flags
1691 !arrays
1692  complex(dpc),intent(inout) :: ff(ldx*ldy*ldz*ndat)
1693 
1694 #ifdef HAVE_FFT_FFTW3
1695 !Local variables-------------------------------
1696 !scalars
1697  integer,parameter :: rank3=3,nt_all=-1
1698  integer :: my_flags,dist,stride
1699  integer(KIND_FFTW_PLAN) :: my_plan
1700 !arrays
1701  integer :: embed(rank3),n(rank3)
1702 
1703 ! *************************************************************************
1704 
1705  my_flags=ABI_FFTW_ESTIMATE; if (PRESENT(fftw_flags)) my_flags=fftw_flags
1706 
1707  stride = 1
1708  dist   = ldx*ldy*ldz
1709  embed  = (/ldx,ldy,ldz/)
1710  n      = (/nx ,ny ,nz /)
1711 
1712  my_plan = fftw3_plan_many_dft(rank3, n, ndat, ff, embed, stride, dist, ff, embed, stride, dist, isign, my_flags, nt_all)
1713 
1714  ! Now perform the 3D FFT via FFTW.
1715  call dfftw_execute_dft(my_plan, ff, ff)
1716 
1717  call fftw3_destroy_plan(my_plan)
1718 
1719  if (isign==ABI_FFTW_FORWARD) then ! -1, FFTW returns not normalized FTs
1720   call ZDSCAL(ldx*ldy*ldz*ndat, one/(nx*ny*nz), ff, 1)
1721  end if
1722 
1723 #else
1724  MSG_ERROR("FFTW3 support not activated")
1725  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,isign/))
1726  ABI_UNUSED(ff)
1727  if (PRESENT(fftw_flags)) then
1728    ABI_UNUSED(fftw_flags)
1729  end if
1730 #endif
1731 
1732 end subroutine fftw3_c2c_ip_dpc

m_fftw3/fftw3_c2c_ip_spc [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_c2c_ip_spc

FUNCTION

 Driver routine for in-place 3D complex-complex FFT.
 TARGET: Simple precision complex arrays.

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimensions of the array.
 ndat=Number of FFTs to be done.
 isign= +1 : ff(G) => ff(R); -1 : ff(R) => ff(G)
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator.
   Defaults to ABI_FFTW_ESTIMATE.

SIDE EFFECTS

  ff(ldx*ldy*ldz*ndat)=
    In input: the complex array to be transformed.
    In output: the Fourier transformed in the space specified by isign.

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

1518 subroutine fftw3_c2c_ip_spc(nx,ny,nz,ldx,ldy,ldz,ndat,isign,ff,fftw_flags)
1519 
1520 
1521 !This section has been created automatically by the script Abilint (TD).
1522 !Do not modify the following lines by hand.
1523 #undef ABI_FUNC
1524 #define ABI_FUNC 'fftw3_c2c_ip_spc'
1525 !End of the abilint section
1526 
1527  implicit none
1528 
1529 !Arguments ------------------------------------
1530 !scalars
1531  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,isign
1532  integer,optional,intent(in) :: fftw_flags
1533 !arrays
1534  complex(spc),intent(inout) :: ff(ldx*ldy*ldz*ndat)
1535 
1536 #ifdef HAVE_FFT_FFTW3
1537 !Local variables-------------------------------
1538 !scalars
1539  integer,parameter :: rank3=3,nt_all=-1
1540  integer :: my_flags,dist,stride
1541  integer(KIND_FFTW_PLAN) :: my_plan
1542 !arrays
1543  integer :: embed(rank3),n(rank3)
1544 
1545 ! *************************************************************************
1546 
1547  my_flags=ABI_FFTW_ESTIMATE; if (PRESENT(fftw_flags)) my_flags=fftw_flags
1548 
1549  stride = 1
1550  dist   = ldx*ldy*ldz
1551  embed  = (/ldx,ldy,ldz/)
1552  n      = (/nx ,ny ,nz /)
1553 
1554  my_plan = fftw3_plan_many_dft(rank3, n, ndat, ff, embed, stride, dist, ff, embed, stride, dist, isign, my_flags, nt_all)
1555 
1556  ! Now perform the 3D FFT via FFTW.
1557  call sfftw_execute_dft(my_plan, ff, ff)
1558 
1559  call fftw3_destroy_plan(my_plan)
1560 
1561  if (isign==ABI_FFTW_FORWARD) then ! -1, FFTW returns not normalized FTs
1562    call xscal(ldx*ldy*ldz*ndat, REAL(one/(nx*ny*nz),KIND=sp), ff, 1)
1563  end if
1564 
1565 #else
1566  MSG_ERROR("FFTW3 support not activated")
1567  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,isign/))
1568  ABI_UNUSED(ff)
1569  if (PRESENT(fftw_flags)) then
1570    ABI_UNUSED(fftw_flags)
1571  end if
1572 #endif
1573 
1574 end subroutine fftw3_c2c_ip_spc

m_fftw3/fftw3_c2c_op_dpc [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_c2c_op_dpc

FUNCTION

 Driver routine for out-of-place 3D complex-complex FFT of lengths nx, ny, nz.
 TARGET: single precision complex arrays

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimensions of the array.
 ndat=Number of FFTs to be done.
 isign= +1 : ff(G) => gg(R); -1 : ff(R) => gg(G)
 ff(ldx*ldy*ldz*ndat)=The array to be transformed.
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator.
   Defaults to ABI_FFTW_ESTIMATE.

OUTPUT

 gg(ldx*ldy*ldz*ndat)=The FFT of ff.

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

1854 subroutine fftw3_c2c_op_dpc(nx,ny,nz,ldx,ldy,ldz,ndat,isign,ff,gg,fftw_flags)
1855 
1856 
1857 !This section has been created automatically by the script Abilint (TD).
1858 !Do not modify the following lines by hand.
1859 #undef ABI_FUNC
1860 #define ABI_FUNC 'fftw3_c2c_op_dpc'
1861 !End of the abilint section
1862 
1863  implicit none
1864 
1865 !Arguments ------------------------------------
1866 !scalars
1867  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,isign,ndat
1868  integer,optional,intent(in) :: fftw_flags
1869 !arrays
1870  complex(dpc),intent(in) :: ff(ldx*ldy*ldz*ndat)
1871  complex(dpc),intent(out) :: gg(ldx*ldy*ldz*ndat)
1872 
1873 #ifdef HAVE_FFT_FFTW3
1874 !Local variables-------------------------------
1875 !scalars
1876  integer,parameter :: rank3=3,nt_all=-1
1877  integer :: my_flags,dist,stride
1878  integer(KIND_FFTW_PLAN) :: my_plan
1879 !arrays
1880  integer :: embed(rank3),n(rank3)
1881 
1882 ! *************************************************************************
1883 
1884  my_flags=ABI_FFTW_ESTIMATE; if (PRESENT(fftw_flags)) my_flags= fftw_flags
1885 
1886  stride = 1
1887  dist   = ldx*ldy*ldz
1888  embed  = (/ldx,ldy,ldz/)
1889  n      = (/nx ,ny ,nz/)
1890 
1891  my_plan = fftw3_plan_many_dft(rank3, n, ndat, ff, embed, stride, dist, gg, embed, stride, dist, isign, my_flags, nt_all)
1892 
1893  ! Now perform the 3D FFT via FFTW.
1894  call dfftw_execute_dft(my_plan, ff, gg)
1895 
1896  call fftw3_destroy_plan(my_plan)
1897 
1898  if (isign==ABI_FFTW_FORWARD) then ! -1, FFTW returns not normalized FTs
1899    call xscal(ldx*ldy*ldz*ndat, one/(nx*ny*nz), gg, 1)
1900  end if
1901 
1902 #else
1903  MSG_ERROR("FFTW3 support not activated")
1904  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,isign/))
1905  ABI_UNUSED(ff)
1906  ABI_UNUSED(gg)
1907  if (PRESENT(fftw_flags)) then
1908    ABI_UNUSED(fftw_flags)
1909  end if
1910 #endif
1911 
1912 end subroutine fftw3_c2c_op_dpc

m_fftw3/fftw3_c2c_op_spc [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_c2c_op_spc

FUNCTION

 Driver routine for out-of-place 3D complex-complex FFT of lengths nx, ny, nz.
 TARGET: single precision complex arrays

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimensions of the array.
 ndat=Number of FFTs to be done.
 isign= +1 : ff(G) => gg(R); -1 : ff(R) => gg(G)
 ff(ldx*ldy*ldz*ndat)=The array to be transformed.
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator.
   Defaults to ABI_FFTW_ESTIMATE.

OUTPUT

 gg(ldx*ldy*ldz*ndat)=The FFT of ff.

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

1764 subroutine fftw3_c2c_op_spc(nx,ny,nz,ldx,ldy,ldz,ndat,isign,ff,gg,fftw_flags)
1765 
1766 
1767 !This section has been created automatically by the script Abilint (TD).
1768 !Do not modify the following lines by hand.
1769 #undef ABI_FUNC
1770 #define ABI_FUNC 'fftw3_c2c_op_spc'
1771 !End of the abilint section
1772 
1773  implicit none
1774 
1775 !Arguments ------------------------------------
1776 !scalars
1777  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,isign,ndat
1778  integer,optional,intent(in) :: fftw_flags
1779 !arrays
1780  complex(spc),intent(in) :: ff(ldx*ldy*ldz*ndat)
1781  complex(spc),intent(out) :: gg(ldx*ldy*ldz*ndat)
1782 
1783 #ifdef HAVE_FFT_FFTW3
1784 !Local variables-------------------------------
1785 !scalars
1786  integer,parameter :: rank3=3,nt_all=-1
1787  integer :: my_flags,dist,stride
1788  integer(KIND_FFTW_PLAN) :: my_plan
1789 !arrays
1790  integer :: embed(rank3),n(rank3)
1791 
1792 ! *************************************************************************
1793 
1794  my_flags=ABI_FFTW_ESTIMATE; if (PRESENT(fftw_flags)) my_flags= fftw_flags
1795 
1796  stride = 1
1797  dist   = ldx*ldy*ldz
1798  embed  = (/ldx,ldy,ldz/)
1799  n      = (/nx ,ny ,nz/)
1800 
1801  my_plan = fftw3_plan_many_dft(rank3, n, ndat, ff, embed, stride, dist, gg, embed, stride, dist, isign, my_flags, nt_all)
1802 
1803  ! Now perform the 3D FFT via FFTW.
1804  call sfftw_execute_dft(my_plan, ff, gg)
1805 
1806  call fftw3_destroy_plan(my_plan)
1807 
1808  if (isign==ABI_FFTW_FORWARD) then ! -1, FFTW returns not normalized FTs
1809    call xscal(ldx*ldy*ldz*ndat, REAL(one/(nx*ny*nz), KIND=sp), gg, 1)
1810  end if
1811 
1812 #else
1813  MSG_ERROR("FFTW3 support not activated")
1814  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,isign/))
1815  ABI_UNUSED(ff)
1816  ABI_UNUSED(gg)
1817  if (PRESENT(fftw_flags)) then
1818    ABI_UNUSED(fftw_flags)
1819  end if
1820 #endif
1821 
1822 end subroutine fftw3_c2c_op_spc

m_fftw3/fftw3_c2r_op [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_c2r_op

FUNCTION

 Driver routine for out-of-place 3D complex-to-real FFT of lengths nx, ny, nz.

INPUTS

 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
 ndat=Number of FFTs to be done.
 ff(2*ldx*ldy*ldz*ndat)=The complex array to be transformed.
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator.
   Defaults to ABI_FFTW_ESTIMATE.

OUTPUT

 gg(ldx*ldy*ldz*ndat)=The backwards real FFT of ff.

NOTES

  FIXME For the time-being. No augmentation of the mesh to reduce memory conflicts, as MKL crashes
  if the advanced interface is used.

PARENTS

      m_fftw3

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

2113 subroutine fftw3_c2r_op(nx,ny,nz,ldx,ldy,ldz,ndat,ff,gg,fftw_flags)
2114 
2115 
2116 !This section has been created automatically by the script Abilint (TD).
2117 !Do not modify the following lines by hand.
2118 #undef ABI_FUNC
2119 #define ABI_FUNC 'fftw3_c2r_op'
2120 !End of the abilint section
2121 
2122  implicit none
2123 
2124 !Arguments ------------------------------------
2125 !scalars
2126  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat
2127  integer,optional,intent(in) :: fftw_flags
2128 !arrays
2129  real(dp),intent(in) :: ff(2,ldx*ldy*ldz*ndat)
2130  real(dp),intent(out) :: gg(ldx*ldy*ldz*ndat)
2131 
2132 #ifdef HAVE_FFT_FFTW3
2133 !Local variables-------------------------------
2134 !scalars
2135  integer,parameter :: rank3=3,nt_all=-1
2136  integer :: nhp,my_flags,padx,i2,i3,igp,igf,idat,padatf,padatp,idist,odist,stride
2137  integer(KIND_FFTW_PLAN) :: my_plan
2138 !arrays
2139  integer :: inembed(rank3),onembed(rank3),n(rank3)
2140  real(dp),allocatable :: ff_hp(:,:)
2141 
2142 ! *************************************************************************
2143 
2144 #ifdef DEV_RC_BUG
2145  if (ANY( (/nx,ny,nz/) /= (/ldx,ldy,ldz/) )) then
2146    MSG_ERROR("Augmentation not supported")
2147  end if
2148 #endif
2149 
2150  my_flags=ABI_FFTW_ESTIMATE; if (PRESENT(fftw_flags)) my_flags= fftw_flags
2151 
2152  stride  = 1
2153  nhp     = (nx/2+1)*ny*nz
2154  idist   = nhp
2155  odist   = ldx*ldy*ldz
2156  n       = (/nx,ny,nz/)
2157  inembed = (/(nx/2+1),ny,nz/)
2158  onembed = (/ldx,ldy,ldz/)
2159 
2160  ! Fill the Hermitian part: Hermitian redundancy: out[i] is the conjugate of out[n-i]
2161  ABI_MALLOC(ff_hp,(2,nhp*ndat))
2162 
2163  padx = (nx/2+1)
2164  do idat=1,ndat
2165    padatf=(idat-1)*ldx*ldy*ldz
2166    padatp=(idat-1)*padx*ny*nz
2167 !$OMP PARALLEL DO PRIVATE(igf,igp)
2168    do i3=1,nz
2169      do i2=1,ny
2170        igf = (i3-1)*ldx*ldy + (i2-1)*ldx   + padatf
2171        igp = (i3-1)*padx*ny + (i2-1)*padx  + padatp
2172        ff_hp(:,igp+1:igp+padx) = ff(:,igf+1:igf+padx)
2173      end do
2174    end do
2175  end do
2176 
2177  ! NOTE: The c2r transform destroys its input array even for out-of-place transforms.
2178 #ifdef DEV_RC_BUG
2179  if (ndat/=1) MSG_ERROR("ndat/=1 + MKL not coded")
2180  call dfftw_plan_dft_c2r_3d(my_plan, nx, ny, nz, ff_hp, gg, my_flags)
2181  if (my_plan==NULL_PLAN) then
2182    MSG_ERROR("dfftw_plan_dft_c2r_3d returned NULL_PLAN")
2183  end if
2184 #else
2185  my_plan = dplan_many_dft_c2r(rank3, n, ndat, ff_hp, inembed, stride, idist, gg, onembed, stride, odist, my_flags, nt_all)
2186 #endif
2187 
2188  ! Now perform the 3D FFT via FFTW. c2r are always ABI_FFTW_BACKWARD
2189  call dfftw_execute_dft_c2r(my_plan, ff_hp, gg)
2190 
2191  call fftw3_destroy_plan(my_plan)
2192 
2193  ABI_FREE(ff_hp)
2194 
2195 #else
2196  MSG_ERROR("FFTW3 support not activated")
2197  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz/))
2198  ABI_UNUSED(ff(1,1))
2199  ABI_UNUSED(gg(1))
2200  if (PRESENT(fftw_flags)) then
2201    ABI_UNUSED(fftw_flags)
2202  end if
2203 #endif
2204 
2205 end subroutine fftw3_c2r_op

m_fftw3/fftw3_cleanup [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_cleanup

FUNCTION

  Reset FFTW to the pristine state it was in when you started your program,
  All existing plans become undefined.

NOTES

  FFTW planner saves some other persistent data, such as the accumulated wisdom and a list of
  algorithms available in the current configuration. If you want to deallocate all of that and reset
  FFTW to the pristine state it was in when you started your program, you can call fftw3_cleanup();
  After calling fftw3_cleanup, all existing plans become undefined, and you should not attempt to
  execute them nor to destroy them. You can however create and execute/destroy new plans, in which case
  FFTW starts accumulating wisdom information again.
  fftw3_cleanup does not deallocate your plans, however. To prevent memory leaks, you must still call
  fftw_destroy_plan before executing fftw3_cleanup

PARENTS

      driver

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

2421 subroutine fftw3_cleanup()
2422 
2423 
2424 !This section has been created automatically by the script Abilint (TD).
2425 !Do not modify the following lines by hand.
2426 #undef ABI_FUNC
2427 #define ABI_FUNC 'fftw3_cleanup'
2428 !End of the abilint section
2429 
2430  implicit none
2431 
2432 ! *************************************************************************
2433 
2434 #ifdef HAVE_FFT_FFTW3_MPI
2435  call fftw_mpi_cleanup()
2436 #endif
2437 #ifdef HAVE_FFT_FFTW3_THREADS
2438  if (THREADS_INITED==1) then
2439    call dfftw_cleanup_threads()
2440    THREADS_INITED = 0
2441  end if
2442 #elif defined HAVE_FFT_FFTW3
2443  call dfftw_cleanup()
2444 #else
2445  MSG_ERROR("FFTW3 support not activated")
2446 #endif
2447 
2448 end subroutine fftw3_cleanup

m_fftw3/fftw3_destroy_plan [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_destroy_plan

FUNCTION

  Release the memory allocate for the plan.

INPUTS

PARENTS

      m_fftw3

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

2470 subroutine fftw3_destroy_plan(plan)
2471 
2472 
2473 !This section has been created automatically by the script Abilint (TD).
2474 !Do not modify the following lines by hand.
2475 #undef ABI_FUNC
2476 #define ABI_FUNC 'fftw3_destroy_plan'
2477 !End of the abilint section
2478 
2479  implicit none
2480 
2481 !Arguments ------------------------------------
2482 !scalars
2483  integer(KIND_FFTW_PLAN),intent(in) :: plan
2484 
2485 ! *************************************************************************
2486 
2487 #ifdef HAVE_FFT_FFTW3
2488 !$OMP CRITICAL (OMPC_fftw3_destroy_plan)
2489  call dfftw_destroy_plan(plan)
2490 !$OMP END CRITICAL (OMPC_fftw3_destroy_plan)
2491 
2492 #else
2493  if (.FALSE.) write(std_out,*)plan
2494 #endif
2495 
2496 end subroutine fftw3_destroy_plan

m_fftw3/fftw3_execute_dft_dp [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_execute_dft_dp

FUNCTION

INPUTS

OUTPUT

NOTES

  This interface is used to perform complex to complex FFT with real arrays
  containing the real and imaginary part. I have to admit that this interface
  is a bit ambiguous since FFTW3 provides routines for real-to-real transforms.

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

3185 #ifdef HAVE_FFT_FFTW3
3186 
3187 subroutine fftw3_execute_dft_dp(plan, in, out)
3188 
3189 
3190 !This section has been created automatically by the script Abilint (TD).
3191 !Do not modify the following lines by hand.
3192 #undef ABI_FUNC
3193 #define ABI_FUNC 'fftw3_execute_dft_dp'
3194 !End of the abilint section
3195 
3196  implicit none
3197 
3198 !Arguments ------------------------------------
3199 !scalars
3200  integer(KIND_FFTW_PLAN),intent(in) :: plan
3201  real(C_DOUBLE),intent(inout) :: in(*)
3202  real(C_DOUBLE),intent(out) :: out(*)
3203 
3204 ! *************************************************************************
3205 
3206  call dfftw_execute_dft(plan, in, out)
3207 
3208 end subroutine fftw3_execute_dft_dp

m_fftw3/fftw3_execute_dft_dpc [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_execute_dft_dpc

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

3267 #ifdef HAVE_FFT_FFTW3
3268 
3269 subroutine fftw3_execute_dft_dpc(plan, in, out)
3270 
3271 
3272 !This section has been created automatically by the script Abilint (TD).
3273 !Do not modify the following lines by hand.
3274 #undef ABI_FUNC
3275 #define ABI_FUNC 'fftw3_execute_dft_dpc'
3276 !End of the abilint section
3277 
3278  implicit none
3279 
3280 !Arguments ------------------------------------
3281 !scalars
3282  integer(KIND_FFTW_PLAN),intent(in) :: plan
3283  complex(C_DOUBLE_COMPLEX),intent(inout) :: in(*)
3284  complex(C_DOUBLE_COMPLEX),intent(out) :: out(*)
3285 
3286 ! *************************************************************************
3287 
3288  call dfftw_execute_dft(plan, in, out)
3289 
3290 end subroutine fftw3_execute_dft_dpc

m_fftw3/fftw3_execute_dft_spc [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_execute_dft_spc

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

3226 #ifdef HAVE_FFT_FFTW3
3227 
3228 subroutine fftw3_execute_dft_spc(plan, in, out)
3229 
3230 
3231 !This section has been created automatically by the script Abilint (TD).
3232 !Do not modify the following lines by hand.
3233 #undef ABI_FUNC
3234 #define ABI_FUNC 'fftw3_execute_dft_spc'
3235 !End of the abilint section
3236 
3237  implicit none
3238 
3239 !Arguments ------------------------------------
3240 !scalars
3241  integer(KIND_FFTW_PLAN),intent(in) :: plan
3242  complex(C_FLOAT_COMPLEX),intent(inout) :: in(*)
3243  complex(C_FLOAT_COMPLEX),intent(out) :: out(*)
3244 
3245 ! *************************************************************************
3246 
3247  call sfftw_execute_dft(plan, in, out)
3248 
3249 end subroutine fftw3_execute_dft_spc

m_fftw3/fftw3_fftpad_dp [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_fftpad_dp

FUNCTION

  This routine transforms wavefunctions using 3D zero-padded FFTs with FFTW3.
  The 3D ffts are computed only on lines and planes which have non zero elements.
  These lines and planes are defined by the two vectors do_fft_x(ldy*nz) and do_fft_y(nz)
  FFT transform is in-place.

INPUTS

   nx,ny,nz=Logical dimensions of the FFT mesh.
   ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
   ndat=Number of FFT transforms.
   mgfft=MAX(nx,ny,nz), only used to dimension gbound
   isign=The sign of the transform.
   gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point.
     See sphereboundary for more info.

SIDE EFFECTS

   ff(2*ldx*ldy*ldz*ndat)=
     input: The array with the data to be transformed.
     output: The results of the FFT.

PARENTS

      m_fftw3

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

2684 subroutine fftw3_fftpad_dp(ff,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,gbound)
2685 
2686 
2687 !This section has been created automatically by the script Abilint (TD).
2688 !Do not modify the following lines by hand.
2689 #undef ABI_FUNC
2690 #define ABI_FUNC 'fftw3_fftpad_dp'
2691 !End of the abilint section
2692 
2693  implicit none
2694 
2695 !Arguments ------------------------------------
2696 !scalars
2697  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign
2698 !arrays
2699  integer,intent(in) :: gbound(2*mgfft+8,2)
2700  real(dp),intent(inout) :: ff(2*ldx*ldy*ldz*ndat)
2701 
2702 !Local variables-------------------------------
2703 !scalars
2704 #ifdef HAVE_FFT_FFTW3
2705  integer,parameter :: dst=2
2706  real(dp) :: fact
2707 
2708 ! *************************************************************************
2709 
2710 #include "fftw3_fftpad.finc"
2711 
2712 #else
2713  MSG_ERROR("FFTW3 support not activated")
2714  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,mgfft,isign/))
2715  ABI_UNUSED(gbound(1,1))
2716  ABI_UNUSED(ff(1))
2717 #endif
2718 
2719 end subroutine fftw3_fftpad_dp

m_fftw3/fftw3_fftpad_dpc [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_fftpad_dpc

FUNCTION

  This routine transforms wavefunctions using 3D zero-padded FFTs with FFTW3.
  The 3D ffts are computed only on lines and planes which have non zero elements.
  These lines and planes are defined by the two vectors do_fft_x(ldy*nz) and do_fft_y(nz)
  FFT transform is in-place. Target: complex arrays.

INPUTS

   nx,ny,nz=Logical dimensions of the FFT mesh.
   ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
   ndat=Number of FFT transforms.
   mgfft=MAX(nx,ny,nz), only used to dimension gbound.
   isign=The sign of the transform.
   gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point.
     See sphereboundary for more info.

SIDE EFFECTS

  ff(ldx*ldy*ldz*ndat)=
    input: The array with the data to be transformed.
    output: The results of the FFT.

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

2755 subroutine fftw3_fftpad_dpc(ff,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,gbound)
2756 
2757 
2758 !This section has been created automatically by the script Abilint (TD).
2759 !Do not modify the following lines by hand.
2760 #undef ABI_FUNC
2761 #define ABI_FUNC 'fftw3_fftpad_dpc'
2762 !End of the abilint section
2763 
2764  implicit none
2765 
2766 !Arguments ------------------------------------
2767 !scalars
2768  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign
2769 !arrays
2770  integer,intent(in) :: gbound(2*mgfft+8,2)
2771  complex(dpc),intent(inout) :: ff(ldx*ldy*ldz*ndat)
2772 
2773 #ifdef HAVE_FFT_FFTW3
2774 !Local variables-------------------------------
2775 !scalars
2776  integer,parameter :: dst=1
2777  real(dp) :: fact
2778 
2779 ! *************************************************************************
2780 
2781 #include "fftw3_fftpad.finc"
2782 
2783 #else
2784  MSG_ERROR("FFTW3 support not activated")
2785  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign/))
2786  ABI_UNUSED(gbound(1,1))
2787  ABI_UNUSED(ff(1))
2788 #endif
2789 
2790 end subroutine fftw3_fftpad_dpc

m_fftw3/fftw3_fftpad_spc [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_fftpad_spc

FUNCTION

  This routine transforms wavefunctions using 3D zero-padded FFTs with FFTW3.
  The 3D ffts are computed only on lines and planes which have non zero elements.
  These lines and planes are defined by the two vectors do_fft_x(ldy*nz) and do_fft_y(nz)
  FFT transform is in-place. Target: complex arrays.

INPUTS

   nx,ny,nz=Logical dimensions of the FFT mesh.
   ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
   ndat=Number of FFT transforms.
   mgfft=MAX(nx,ny,nz), only used to dimension gbound.
   isign=The sign of the transform.
   gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point.
     See sphereboundary for more info.

SIDE EFFECTS

  ff(ldx*ldy*ldz*ndat)=
    input: The array with the data to be transformed.
    output: The results of the FFT.

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

1610 subroutine fftw3_fftpad_spc(ff,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,gbound)
1611 
1612 
1613 !This section has been created automatically by the script Abilint (TD).
1614 !Do not modify the following lines by hand.
1615 #undef ABI_FUNC
1616 #define ABI_FUNC 'fftw3_fftpad_spc'
1617 !End of the abilint section
1618 
1619  implicit none
1620 
1621 !Arguments ------------------------------------
1622 !scalars
1623  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign
1624 !arrays
1625  integer,intent(in) :: gbound(2*mgfft+8,2)
1626  complex(spc),intent(inout) :: ff(ldx*ldy*ldz*ndat)
1627 
1628 #ifdef HAVE_FFT_FFTW3
1629 !Local variables-------------------------------
1630  integer,parameter :: dst=1
1631  real(sp) :: fact
1632 
1633 ! *************************************************************************
1634 
1635 #include "fftw3_fftpad.finc"
1636 
1637 #else
1638  MSG_ERROR("FFTW3 support not activated")
1639  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign/))
1640  ABI_UNUSED(gbound(1,1))
1641  ABI_UNUSED(ff(1))
1642 #endif
1643 
1644 end subroutine fftw3_fftpad_spc

m_fftw3/fftw3_fftrisc_dp [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_fftrisc_dp

FUNCTION

 Carry out Fourier transforms between real and reciprocal (G) space,
 for wavefunctions, contained in a sphere in reciprocal space,
 in both directions. Also accomplish some post-processing.

NOTES

 Specifically uses rather sophisticated algorithms, based on S Goedecker
 routines, specialized for superscalar RISC architecture.
 Zero padding : saves 7/12 execution time
 Bi-dimensional data locality in most of the routine : cache reuse
 For k-point (0 0 0) : takes advantage of symmetry of data.
 Note however that no blocking is used, in both 1D z-transform
 or subsequent 2D transform. This should be improved.

INPUTS

  cplex= if 1 , denpot is real, if 2 , denpot is complex
     (cplex=2 only allowed for option=2 when istwf_k=1)
     one can also use cplex=0 if option=0 or option=3
  fofgin(2,npwin)=holds input wavefunction in G vector basis sphere.
  gboundin(2*mgfft+8,2)=sphere boundary info for reciprocal to real space
  gboundout(2*mgfft+8,2)=sphere boundary info for real to reciprocal space
  istwf_k=option parameter that describes the storage of wfs
  kg_kin(3,npwin)=reduced planewave coordinates, input
  kg_kout(3,npwout)=reduced planewave coordinates, output
  mgfft=maximum size of 1D FFTs
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  npwin=number of elements in fofgin array (for option 0, 1 and 2)
  npwout=number of elements in fofgout array (for option 2 and 3)
  ldx,ldy,ldz=ngfft(4),ngfft(5),ngfft(6), dimensions of fofr.
  option= if 0: do direct FFT
          if 1: do direct FFT, then sum the density
          if 2: do direct FFT, multiply by the potential, then do reverse FFT
          if 3: do reverse FFT only
  weight=weight to be used for the accumulation of the density in real space
          (needed only when option=1)

OUTPUT

  (see side effects)

OPTIONS

  The different options are:
  - reciprocal to real space and output the result (when option=0),
  - reciprocal to real space and accumulate the density (when option=1) or
  - reciprocal to real space, apply the local potential to the wavefunction
    in real space and produce the result in reciprocal space (when option=2)
  - real space to reciprocal space (when option=3).
  option=0 IS NOT ALLOWED when istwf_k>2
  option=3 IS NOT ALLOWED when istwf_k>=2

SIDE EFFECTS

  for option==0, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 fofr(2,ldx,ldy,ldz) contains the Fourier Transform of fofgin;
                 no use of denpot, fofgout and npwout.
  for option==1, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 denpot(cplex*ldx,ldy,ldz) contains the input density at input,
                 and the updated density at output;
                 no use of fofgout and npwout.
  for option==2, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 denpot(cplex*ldx,ldy,ldz) contains the input local potential;
                 fofgout(2,npwout) contains the output function;
  for option==3, fofr(2,ldx,ldy,ldz) contains the real space wavefunction;
                 fofgout(2,npwout) contains its Fourier transform;
                 no use of fofgin and npwin.

PARENTS

      m_fftw3

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

 954 subroutine fftw3_fftrisc_dp(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
 955 & mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
 956 
 957 
 958 !This section has been created automatically by the script Abilint (TD).
 959 !Do not modify the following lines by hand.
 960 #undef ABI_FUNC
 961 #define ABI_FUNC 'fftw3_fftrisc_dp'
 962 !End of the abilint section
 963 
 964  implicit none
 965 
 966 !Arguments ------------------------------------
 967 !scalars
 968  integer,intent(in) :: cplex,istwf_k,mgfft,ldx,ldy,ldz,npwin,npwout,option
 969  real(dp),intent(in) :: weight_r,weight_i
 970 !arrays
 971  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2)
 972  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18)
 973  real(dp),intent(in) :: fofgin(2,npwin)
 974  real(dp),intent(inout) :: denpot(cplex*ldx,ldy,ldz),fofr(2,ldx*ldy*ldz)
 975  real(dp),intent(inout) :: fofgout(2,npwout)
 976 
 977 ! *************************************************************************
 978 
 979 #ifdef HAVE_FFT_FFTW3
 980 
 981 #undef  FFT_PRECISION
 982 #undef  MYKIND
 983 #undef  MYCZERO
 984 #undef  MYCMPLX
 985 #undef  MYCONJG
 986 
 987 #define FFT_PRECISION FFTW3_DOUBLE
 988 #define MYKIND DPC
 989 #define MYCZERO (0._dp,0._dp)
 990 #define MYCMPLX  DCMPLX
 991 #define MYCONJG  DCONJG
 992 
 993 #include "fftw3_fftrisc.finc"
 994 
 995 #else
 996  MSG_ERROR("FFTW3 support not activated")
 997  ABI_UNUSED((/cplex,gboundin(1,1),gboundout(1,1),istwf_k,kg_kin(1,1),kg_kout(1,1)/))
 998  ABI_UNUSED((/mgfft,ngfft(1),npwin,npwout,ldx,ldy,ldz,option/))
 999  ABI_UNUSED((/denpot(1,1,1),fofgin(1,1),fofgout(1,1),fofr(1,1),weight_r,weight_i/))
1000 #endif
1001 
1002 end subroutine fftw3_fftrisc_dp

m_fftw3/fftw3_fftrisc_sp [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_fftrisc_sp

FUNCTION

 Carry out Fourier transforms between real and reciprocal (G) space,
 for wavefunctions, contained in a sphere in reciprocal space,
 in both directions. Also accomplish some post-processing.

NOTES

 Specifically uses rather sophisticated algorithms, based on S Goedecker
 routines, specialized for superscalar RISC architecture.
 Zero padding : saves 7/12 execution time
 Bi-dimensional data locality in most of the routine : cache reuse
 For k-point (0 0 0) : takes advantage of symmetry of data.
 Note however that no blocking is used, in both 1D z-transform
 or subsequent 2D transform. This should be improved.

INPUTS

  cplex= if 1 , denpot is real, if 2 , denpot is complex
     (cplex=2 only allowed for option=2 when istwf_k=1)
     one can also use cplex=0 if option=0 or option=3
  fofgin(2,npwin)=holds input wavefunction in G vector basis sphere.
  gboundin(2*mgfft+8,2)=sphere boundary info for reciprocal to real space
  gboundout(2*mgfft+8,2)=sphere boundary info for real to reciprocal space
  istwf_k=option parameter that describes the storage of wfs
  kg_kin(3,npwin)=reduced planewave coordinates, input
  kg_kout(3,npwout)=reduced planewave coordinates, output
  mgfft=maximum size of 1D FFTs
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  npwin=number of elements in fofgin array (for option 0, 1 and 2)
  npwout=number of elements in fofgout array (for option 2 and 3)
  ldx,ldy,ldz=ngfft(4),ngfft(5),ngfft(6), dimensions of fofr.
  option= if 0: do direct FFT
          if 1: do direct FFT, then sum the density
          if 2: do direct FFT, multiply by the potential, then do reverse FFT
          if 3: do reverse FFT only
  weight=weight to be used for the accumulation of the density in real space
          (needed only when option=1)

OUTPUT

  (see side effects)

OPTIONS

  The different options are:
  - reciprocal to real space and output the result (when option=0),
  - reciprocal to real space and accumulate the density (when option=1) or
  - reciprocal to real space, apply the local potential to the wavefunction
    in real space and produce the result in reciprocal space (when option=2)
  - real space to reciprocal space (when option=3).
  option=0 IS NOT ALLOWED when istwf_k>2
  option=3 IS NOT ALLOWED when istwf_k>=2

SIDE EFFECTS

  for option==0, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 fofr(2,ldx,ldy,ldz) contains the Fourier Transform of fofgin;
                 no use of denpot, fofgout and npwout.
  for option==1, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 denpot(cplex*ldx,ldy,ldz) contains the input density at input,
                 and the updated density at output;
                 no use of fofgout and npwout.
  for option==2, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 denpot(cplex*ldx,ldy,ldz) contains the input local potential;
                 fofgout(2,npwout) contains the output function;
  for option==3, fofr(2,ldx,ldy,ldz) contains the real space wavefunction;
                 fofgout(2,npwout) contains its Fourier transform;
                 no use of fofgin and npwin.

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

823 subroutine fftw3_fftrisc_sp(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
824 & mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
825 
826 
827 !This section has been created automatically by the script Abilint (TD).
828 !Do not modify the following lines by hand.
829 #undef ABI_FUNC
830 #define ABI_FUNC 'fftw3_fftrisc_sp'
831 !End of the abilint section
832 
833  implicit none
834 
835 !Arguments ------------------------------------
836 !scalars
837  integer,intent(in) :: cplex,istwf_k,mgfft,ldx,ldy,ldz,npwin,npwout,option
838  real(dp),intent(in) :: weight_i,weight_r
839 !arrays
840  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2)
841  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18)
842  real(sp),intent(in) :: fofgin(2,npwin)
843  real(dp),intent(inout) :: denpot(cplex*ldx,ldy,ldz)
844  real(sp),intent(inout) :: fofr(2,ldx*ldy*ldz)
845  real(sp),intent(inout) :: fofgout(2,npwout)
846 
847 ! *************************************************************************
848 
849 #ifdef HAVE_FFT_FFTW3
850 
851 #undef  FFT_PRECISION
852 #undef  MYKIND
853 #undef  MYCZERO
854 #undef  MYCMPLX
855 #undef  MYCONJG
856 
857 #define FFT_PRECISION FFTW3_SINGLE
858 #define MYKIND SPC
859 #define MYCZERO (0._sp,0._sp)
860 #define MYCMPLX  CMPLX
861 #define MYCONJG  CONJG
862 
863 #include "fftw3_fftrisc.finc"
864 
865 #else
866  MSG_ERROR("FFTW3 support not activated")
867  ABI_UNUSED((/cplex,gboundin(1,1),gboundout(1,1),istwf_k,kg_kin(1,1),kg_kout(1,1)/))
868  ABI_UNUSED((/mgfft,ngfft(1),npwin,npwout,ldx,ldy,ldz,option/))
869  ABI_UNUSED((/denpot(1,1,1),weight_r,weight_i/))
870  ABI_UNUSED((/fofgin(1,1),fofgout(1,1),fofr(1,1)/))
871 #endif
872 
873 end subroutine fftw3_fftrisc_sp

m_fftw3/fftw3_fftug_dp [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_fftug_dp

FUNCTION

 Compute ndat zero-padded FFTs from G to R space.
 Mainly used for the transform of wavefunctions.
 TARGET: dp arrays with real and imaginary part

INPUTS

 fftalg=FFT algorith (see input variable)
 fftcache=size of the cache (kB)
 npw_k=number of plane waves for this k-point.
 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Leading dimensions of the array.
 ndat=Number of transforms
 istwf_k=Option describing the storage of the wavefunction.
 mgfft=Max number of FFT divisions (used to dimension gbound)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.
  ug(npw_k*ndat)=wavefunctions in reciprocal space.

OUTPUT

  ur(ldx*ldy*ldz*ndat)=wavefunctions in real space.

PARENTS

      m_fftw3

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

1040 subroutine fftw3_fftug_dp(fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k,gbound,ug,ur)
1041 
1042 
1043 !This section has been created automatically by the script Abilint (TD).
1044 !Do not modify the following lines by hand.
1045 #undef ABI_FUNC
1046 #define ABI_FUNC 'fftw3_fftug_dp'
1047 !End of the abilint section
1048 
1049  implicit none
1050 
1051 !Arguments ------------------------------------
1052 !scalars
1053  integer,intent(in) :: fftalg,fftcache
1054  integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft
1055 !arrays
1056  integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k)
1057  real(dp),target,intent(in) :: ug(2*npw_k*ndat)
1058  real(dp),target,intent(inout) :: ur(2*ldx*ldy*ldz*ndat)
1059 
1060 #ifdef HAVE_FFT_FFTW3
1061 !Local variables-------------------------------
1062 !scalars
1063  integer,parameter :: dist=2
1064  real(dp) :: fofgout(2,0)
1065  real(dp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:)
1066 
1067 ! *************************************************************************
1068 
1069 #undef TK_PREF
1070 #define TK_PREF(name) CONCAT(cg_,name)
1071 
1072 #include "fftug.finc"
1073 
1074 #else
1075  ! Silence compiler warning
1076  MSG_ERROR("FFT_FFTW3 support not activated")
1077  ABI_UNUSED((/fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1)/))
1078  ABI_UNUSED((/ug(1),ur(1)/))
1079 #endif
1080 
1081 end subroutine fftw3_fftug_dp

m_fftw3/fftw3_fftug_dpc [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_fftug_dpc

FUNCTION

 Compute ndat zero-padded FFTs.
 Mainly used for the transform of wavefunctions.
 TARGET: DPC arrays

INPUTS

 fftalg=FFT algorith (see input variable)
 fftcache=size of the cache (kB)
 npw_k=number of plane waves for this k-point.
 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Leading dimensions of the array.
 ndat=Number of transforms
 istwf_k=Option describing the storage of the wavefunction.
 mgfft=Max number of FFT divisions (used to dimension gbound)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.
  ug(npw_k*ndat)=wavefunctions in reciprocal space

OUTPUT

  ur(ldx*ldy*ldz*ndat)=wavefunctions in real space.

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

1197 subroutine fftw3_fftug_dpc(fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k,gbound,ug,ur)
1198 
1199 
1200 !This section has been created automatically by the script Abilint (TD).
1201 !Do not modify the following lines by hand.
1202 #undef ABI_FUNC
1203 #define ABI_FUNC 'fftw3_fftug_dpc'
1204 !End of the abilint section
1205 
1206  implicit none
1207 
1208 !Arguments ------------------------------------
1209 !scalars
1210  integer,intent(in) :: fftalg,fftcache
1211  integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft
1212 !arrays
1213  integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k)
1214  complex(dpc),target,intent(in) :: ug(npw_k*ndat)
1215  complex(dpc),target,intent(inout) :: ur(ldx*ldy*ldz*ndat)
1216 
1217 #ifdef HAVE_FFT_FFTW3
1218 !Local variables-------------------------------
1219 !scalars
1220  integer,parameter :: dist=1
1221 !arrays
1222  real(dp) :: fofgout(2,0)
1223  real(dp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:)
1224 
1225 ! *************************************************************************
1226 
1227 #undef TK_PREF
1228 #define TK_PREF(name) CONCAT(cplx_,name)
1229 
1230 #include "fftug.finc"
1231 
1232 #else
1233  ! Silence compiler warning
1234  MSG_ERROR("FFTW3 support not activated")
1235  ABI_UNUSED((/fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1)/))
1236  ABI_UNUSED((/ug(1),ur(1)/))
1237 #endif
1238 
1239 end subroutine fftw3_fftug_dpc

m_fftw3/fftw3_fftug_spc [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_fftug_spc

FUNCTION

 Compute ndat zero-padded FFTs from G-->R.
 Mainly used for the transform of wavefunctions.
 TARGET: spc arrays

INPUTS

 fftalg=FFT algorith (see input variable)
 fftcache=size of the cache (kB)
 npw_k=number of plane waves for this k-point.
 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Leading dimensions of the array.
 ndat=Number of transforms
 istwf_k=Option describing the storage of the wavefunction.
 mgfft=Max number of FFT divisions (used to dimension gbound)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.
  ug(npw_k*ndat)=wavefunctions in reciprocal space.

OUTPUT

  ur(ldx*ldy*ldz*ndat)=wavefunctions in real space

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

1118 subroutine fftw3_fftug_spc(fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k,gbound,ug,ur)
1119 
1120 
1121 !This section has been created automatically by the script Abilint (TD).
1122 !Do not modify the following lines by hand.
1123 #undef ABI_FUNC
1124 #define ABI_FUNC 'fftw3_fftug_spc'
1125 !End of the abilint section
1126 
1127  implicit none
1128 
1129 !Arguments ------------------------------------
1130 !scalars
1131  integer,intent(in) :: fftalg,fftcache
1132  integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft
1133 !arrays
1134  integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k)
1135  complex(spc),target,intent(in) :: ug(npw_k*ndat)
1136  complex(spc),target,intent(inout) :: ur(ldx*ldy*ldz*ndat)
1137 
1138 #ifdef HAVE_FFT_FFTW3
1139 !Local variables-------------------------------
1140 !scalars
1141  integer,parameter :: dist=1
1142 !arrays
1143  real(sp) :: fofgout(2,0)
1144  real(sp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:)
1145 
1146 ! *************************************************************************
1147 
1148 #undef TK_PREF
1149 #define TK_PREF(name) CONCAT(cplx_,name)
1150 
1151 #include "fftug.finc"
1152 
1153 #else
1154  ! Silence compiler warning
1155  MSG_ERROR("FFTW3 support not activated")
1156  ABI_UNUSED((/fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1)/))
1157  ABI_UNUSED((/ug(1),ur(1)/))
1158 #endif
1159 
1160 end subroutine fftw3_fftug_spc

m_fftw3/fftw3_fftur_dp [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_fftur_dp

FUNCTION

 Compute ndat zero-padded FFTs from R- to G-space .
 Mainly used for the transform of wavefunctions.
 TARGET: dp arrays

INPUTS

 fftalg=FFT algorith (see input variable)
 fftcache=size of the cache (kB)
 npw_k=number of plane waves for this k-point.
 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Leading dimensions of the array.
 ndat=Number of transforms
 istwf_k=Option describing the storage of the wavefunction.
 mgfft=Max number of FFT divisions (used to dimension gbound)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.

 SIDE EFFECT
 ur(ldx*ldy*ldz*ndat)= In input: wavefunctions in real space.
                       Destroyed in output. Do not use ur anymore!

OUTPUT

 ug(npw_k*ndat)=wavefunctions in reciprocal space.

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

1278 subroutine fftw3_fftur_dp(fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k,gbound,ur,ug)
1279 
1280 
1281 !This section has been created automatically by the script Abilint (TD).
1282 !Do not modify the following lines by hand.
1283 #undef ABI_FUNC
1284 #define ABI_FUNC 'fftw3_fftur_dp'
1285 !End of the abilint section
1286 
1287  implicit none
1288 
1289 !Arguments ------------------------------------
1290 !scalars
1291  integer,intent(in) :: fftalg,fftcache
1292  integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft
1293 !arrays
1294  integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k)
1295  real(dp),target,intent(inout) :: ur(2*ldx*ldy*ldz*ndat)
1296  real(dp),target,intent(inout) :: ug(2*npw_k*ndat)
1297 
1298 #ifdef HAVE_FFT_FFTW3
1299 !Local variables-------------------------------
1300 !scalars
1301  integer,parameter :: dist=2
1302 !arrays
1303  real(dp) :: dum_ugin(2,0)
1304  real(dp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:)
1305 
1306 ! *************************************************************************
1307 
1308 #undef TK_PREF
1309 #define TK_PREF(name) CONCAT(cg_,name)
1310 
1311 #include "fftur.finc"
1312 
1313 #else
1314  ! Silence compiler warning
1315  MSG_ERROR("FFTW3 support not activated")
1316  ABI_UNUSED((/fftalg,fftcache/))
1317  ABI_UNUSED((/npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1)/))
1318  ABI_UNUSED((/ug(1),ur(1)/))
1319 #endif
1320 
1321 end subroutine fftw3_fftur_dp

m_fftw3/fftw3_fftur_dpc [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_fftur_dpc

FUNCTION

 Compute ndat zero-padded FFTs from R ro G.
 Mainly used for the transform of wavefunctions.
 TARGET: DPC arrays

INPUTS

 fftalg=FFT algorith (see input variable)
 fftcache=size of the cache (kB)
 npw_k=number of plane waves for this k-point.
 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Leading dimensions of the array.
 ndat=Number of transforms
 istwf_k=Option describing the storage of the wavefunction.
 mgfft=Max number of FFT divisions (used to dimension gbound)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.

 SIDE EFFECT
 ur(ldx*ldy*ldz*ndat)= In input: wavefunctions in real space.
                       Destroyed in output. Do not use ur anymore!

OUTPUT

 ug(npw_k*ndat)=wavefunctions in reciprocal space

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

1442 subroutine fftw3_fftur_dpc(fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k,gbound,ur,ug)
1443 
1444 
1445 !This section has been created automatically by the script Abilint (TD).
1446 !Do not modify the following lines by hand.
1447 #undef ABI_FUNC
1448 #define ABI_FUNC 'fftw3_fftur_dpc'
1449 !End of the abilint section
1450 
1451  implicit none
1452 
1453 !Arguments ------------------------------------
1454 !scalars
1455  integer,intent(in) :: fftalg,fftcache
1456  integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft
1457 !arrays
1458  integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k)
1459  complex(dpc),target,intent(inout) :: ur(ldx*ldy*ldz*ndat)
1460  complex(dpc),target,intent(inout) :: ug(npw_k*ndat)
1461 
1462 #ifdef HAVE_FFT_FFTW3
1463 !Local variables-------------------------------
1464 !scalars
1465  integer,parameter :: dist=1
1466 !arrays
1467  real(dp) :: dum_ugin(2,0)
1468  real(dp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:)
1469 
1470 ! *************************************************************************
1471 
1472 #undef TK_PREF
1473 #define TK_PREF(name) CONCAT(cplx_,name)
1474 
1475 #include "fftur.finc"
1476 
1477 #else
1478  ! Silence compiler warning
1479  MSG_ERROR("FFTW3 support not activated")
1480  ABI_UNUSED((/fftalg,fftcache/))
1481  ABI_UNUSED((/npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1)/))
1482  ABI_UNUSED((/ug(1),ur(1)/))
1483 #endif
1484 
1485 end subroutine fftw3_fftur_dpc

m_fftw3/fftw3_fftur_spc [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_fftur_spc

FUNCTION

 Compute ndat zero-padded FFTs from R- to G-space .
 Mainly used for the transform of wavefunctions.
 TARGET: spc arrays

INPUTS

 fftalg=FFT algorith (see input variable)
 fftcache=size of the cache (kB)
 npw_k=number of plane waves for this k-point.
 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Leading dimensions of the array.
 ndat=Number of transforms
 istwf_k=Option describing the storage of the wavefunction.
 mgfft=Max number of FFT divisions (used to dimension gbound)
 kg_k(3,npw_k)=G-vectors in reduced coordinates
 gbound(2*mgfft+8,2)=Table for padded-FFT. See sphereboundary.

 SIDE EFFECT
 ur(ldx*ldy*ldz*ndat)= In input: wavefunctions in real space.
                       Destroyed in output. Do not use ur anymore!

OUTPUT

 ug(npw_k*ndat)=wavefunctions in reciprocal space.

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

1360 subroutine fftw3_fftur_spc(fftalg,fftcache,npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k,gbound,ur,ug)
1361 
1362 
1363 !This section has been created automatically by the script Abilint (TD).
1364 !Do not modify the following lines by hand.
1365 #undef ABI_FUNC
1366 #define ABI_FUNC 'fftw3_fftur_spc'
1367 !End of the abilint section
1368 
1369  implicit none
1370 
1371 !Arguments ------------------------------------
1372 !scalars
1373  integer,intent(in) :: fftalg,fftcache
1374  integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft
1375 !arrays
1376  integer,intent(in) :: gbound(2*mgfft+8,2),kg_k(3,npw_k)
1377  complex(spc),target,intent(inout) :: ur(ldx*ldy*ldz*ndat)
1378  complex(spc),target,intent(inout) :: ug(npw_k*ndat)
1379 
1380 #ifdef HAVE_FFT_FFTW3
1381 !Local variables-------------------------------
1382 !scalars
1383  integer,parameter :: dist=1
1384 !arrays
1385  real(sp) :: dum_ugin(2,0)
1386  real(sp),ABI_CONTIGUOUS pointer :: real_ug(:,:),real_ur(:,:)
1387 
1388 ! *************************************************************************
1389 
1390 #undef TK_PREF
1391 #define TK_PREF(name) CONCAT(cplx_,name)
1392 
1393 #include "fftur.finc"
1394 
1395 #else
1396  ! Silence compiler warning
1397  MSG_ERROR("FFTW3 support not activated")
1398  ABI_UNUSED((/fftalg,fftcache/))
1399  ABI_UNUSED((/npw_k,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_k(1,1),gbound(1,1)/))
1400  ABI_UNUSED((/ug(1),ur(1)/))
1401 #endif
1402 
1403 end subroutine fftw3_fftur_spc

m_fftw3/fftw3_init_threads [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_init_threads

FUNCTION

  This function performs the one-time initialization required to use FFTW3 threads.
  It does nothing if HAVE_FFT_FFTW3_THREADS is not defined.

INPUTS

SIDE EFFECTS

  The one-time initialization required to use FFTW3 threads is performed when the routine
  is called for the first time.

PARENTS

      driver,fftprof

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

2523 subroutine fftw3_init_threads()
2524 
2525 
2526 !This section has been created automatically by the script Abilint (TD).
2527 !Do not modify the following lines by hand.
2528 #undef ABI_FUNC
2529 #define ABI_FUNC 'fftw3_init_threads'
2530 !End of the abilint section
2531 
2532  implicit none
2533 
2534 !Local variables ------------------------------
2535 !scalars
2536 #ifdef HAVE_FFT_FFTW3_THREADS
2537  integer :: iret
2538 #endif
2539 
2540 ! *************************************************************************
2541 
2542 #ifdef HAVE_FFT_FFTW3_THREADS
2543  if (THREADS_INITED==0) then
2544    !call wrtout(std_out,"Calling dfftw_init_threads()","COLL")
2545    call dfftw_init_threads(iret)
2546 
2547    if (iret==0) then
2548      MSG_WARNING(" dfftw_init_threads returned 0; threaded FFTW3 is not being used!")
2549    else
2550      THREADS_INITED=1
2551    end if
2552    call fftw3_set_nthreads()
2553  end if
2554 
2555 #ifndef HAVE_OPENMP
2556   MSG_WARNING("Using FFTW3 with threads but HAVE_OPENMP is not defined!")
2557 #endif
2558 #endif
2559 
2560 #ifdef HAVE_FFT_FFTW3_MPI
2561   !call wrtout(std_out,"Calling fftw_mpi_init()","COLL")
2562   call fftw_mpi_init()
2563 #endif
2564 
2565 end subroutine fftw3_init_threads

m_fftw3/fftw3_many_dft_ip [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_many_dft_ip

FUNCTION

 Driver routine for many in-place 3D complex-to-complex FFTs of lengths nx, ny, nz.

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimension of the finout array (to avoid cache conflicts).
 ndat=Number of FFTs to be done.
 isign=sign of Fourier transform exponent: current convention uses
   +1 for transforming from G to r,
   -1 for transforming from r to G.
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator.
   Defaults to ABI_FFTW_ESTIMATE.

OUTPUT

 finout(2,ldx*ldy*ldz*ndat)=
   In input: The complex array to be transformed.
   In output: The FFT results.

PARENTS

      m_fftw3

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

2333 subroutine fftw3_many_dft_ip(nx,ny,nz,ldx,ldy,ldz,ndat,isign,finout,fftw_flags)
2334 
2335 
2336 !This section has been created automatically by the script Abilint (TD).
2337 !Do not modify the following lines by hand.
2338 #undef ABI_FUNC
2339 #define ABI_FUNC 'fftw3_many_dft_ip'
2340 !End of the abilint section
2341 
2342  implicit none
2343 
2344 !Arguments ------------------------------------
2345 !scalars
2346  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,isign
2347  integer,optional,intent(in) :: fftw_flags
2348 !arrays
2349  real(dp),intent(inout) :: finout(2*ldx*ldy*ldz*ndat)
2350 
2351 #ifdef HAVE_FFT_FFTW3
2352 !Local variables-------------------------------
2353 !scalars
2354  integer,parameter :: rank3=3,nt_all=-1
2355  integer :: my_flags,dist,stride
2356  integer(KIND_FFTW_PLAN) :: my_plan
2357 !arrays
2358  integer :: embed(rank3),n(rank3)
2359 
2360 ! *************************************************************************
2361 
2362  my_flags=ABI_FFTW_ESTIMATE; if (PRESENT(fftw_flags)) my_flags= fftw_flags
2363 
2364  stride = 1
2365  dist   = ldx*ldy*ldz
2366  embed  = (/ldx,ldy,ldz/)
2367  n      = (/nx ,ny ,nz /)
2368 
2369  my_plan = fftw3_plan_many_dft(rank3, n, ndat, finout, embed, stride, dist, finout,embed, stride, dist, isign, my_flags, nt_all)
2370 
2371  ! Now perform the 3D FFT via FFTW.
2372  call dfftw_execute_dft(my_plan, finout, finout)
2373 
2374  call fftw3_destroy_plan(my_plan)
2375 
2376  if (isign==ABI_FFTW_FORWARD) then ! -1, FFTW returns not normalized FTs
2377   call ZDSCAL(ldx*ldy*ldz*ndat, one/(nx*ny*nz), finout, 1)
2378   !call cg_zscal(ldx*ldy*ldz*ndat, (/one/(nx*ny*nz),zero/), finout)
2379  end if
2380 
2381 #else
2382  MSG_ERROR("FFTW3 support not activated")
2383  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,isign/))
2384  if (PRESENT(fftw_flags)) then
2385    ABI_UNUSED(fftw_flags)
2386  end if
2387  ABI_UNUSED(finout(1))
2388 #endif
2389 
2390 end subroutine fftw3_many_dft_ip

m_fftw3/fftw3_many_dft_op [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_many_dft_op

FUNCTION

 Driver routine for many out-of-place 3D complex-to-complex FFTs of lengths nx, ny, nz.

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimension of the fin and fout arrays (to avoid cache conflicts).
 ndat=Number of FFTs to be done.
 fin(2*ldx*ldy*ldz*ndat)=The complex array to be transformed.
 isign=sign of Fourier transform exponent: current convention uses
   +1 for transforming from G to r,
   -1 for transforming from r to G.
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator.
   Defaults to ABI_FFTW_ESTIMATE.

OUTPUT

 fout(2,ldx*ldy*ldz*ndat)=The Fourier transform of fin.

PARENTS

      m_fftw3

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

2239 subroutine fftw3_many_dft_op(nx,ny,nz,ldx,ldy,ldz,ndat,isign,fin,fout,fftw_flags)
2240 
2241 
2242 !This section has been created automatically by the script Abilint (TD).
2243 !Do not modify the following lines by hand.
2244 #undef ABI_FUNC
2245 #define ABI_FUNC 'fftw3_many_dft_op'
2246 !End of the abilint section
2247 
2248  implicit none
2249 
2250 !Arguments ------------------------------------
2251 !scalars
2252  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,isign
2253  integer,optional,intent(in) :: fftw_flags
2254 !arrays
2255  real(dp),intent(in) :: fin(2*ldx*ldy*ldz*ndat)
2256  real(dp),intent(out) :: fout(2*ldx*ldy*ldz*ndat)
2257 
2258 #ifdef HAVE_FFT_FFTW3
2259 !Local variables-------------------------------
2260 !scalars
2261  integer,parameter :: rank3=3,nt_all=-1
2262  integer :: my_flags,dist,stride
2263  integer(KIND_FFTW_PLAN) :: my_plan
2264 !arrays
2265  integer :: embed(rank3),n(rank3)
2266 
2267 ! *************************************************************************
2268 
2269  my_flags=ABI_FFTW_ESTIMATE; if (PRESENT(fftw_flags)) my_flags= fftw_flags
2270 
2271  stride = 1
2272  dist   = ldx*ldy*ldz
2273  embed  = (/ldx,ldy,ldz/)
2274  n      = (/nx ,ny ,nz /)
2275 
2276  my_plan = fftw3_plan_many_dft(rank3, n, ndat, fin, embed, stride, dist, fout, embed, stride, dist, isign, my_flags, nt_all)
2277 
2278  ! Now perform the 3D FFT via FFTW.
2279  call dfftw_execute_dft(my_plan, fin, fout)
2280 
2281  call fftw3_destroy_plan(my_plan)
2282 
2283  if (isign==ABI_FFTW_FORWARD) then ! -1, FFTW returns not normalized FTs
2284   call ZDSCAL(ldx*ldy*ldz*ndat, one/(nx*ny*nz), fout, 1)
2285   !call cg_zscal(ldx*ldy*ldz*ndat, (/one/(nx*ny*nz), zero/), fout)
2286  end if
2287 
2288 #else
2289  MSG_ERROR("FFTW3 support not activated")
2290  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,isign/))
2291  if (PRESENT(fftw_flags)) then
2292    ABI_UNUSED(fftw_flags)
2293  end if
2294  ABI_UNUSED(fin(1))
2295  ABI_UNUSED(fout(1))
2296 #endif
2297 
2298 end subroutine fftw3_many_dft_op

m_fftw3/fftw3_mpiback [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_mpiback

FUNCTION

   CALCULATES THE DISCRETE FOURIER TRANSFORM  in parallel using MPI/OpenMP

   ZR(I1,I2,I3)= \sum_(j1,j2,j3) EXP(isign*i*2*pi*(j1*i1/n1+j2*i2/n2+j3*i3/n3)) ZF(j1,j3,j2)

 Adopt standard convention that isign=1 for backward transform

 INPUTS:
    option= 1 if call from fourwf, 2 if call from other routine
    cplex=1 for real --> complex, 2 for complex --> complex
    ZF: input array in G-space (note the switch of i2 and i3)

         real(F(i1,i3,i2,idat))=ZF(1,i1,i3,i2,idat)
         imag(F(i1,i3,i2,idat))=ZF(2,i1,i3,i2,idat)

         i1=1,n1 , i2=1,n2 , i3=1,n3 , idat=1,ndat
 OUTPUTS:
    ZR: output array in R space.

         ZR(1,i1,i2,i3,idat)=real(R(i1,i2,i3,idat))
         ZR(2,i1,i2,i3,idat)=imag(R(i1,i2,i3,idat))

         i1=1,n1 , i2=1,n2 , i3=1,n3 , idat=1,ndat

    nproc_fft: number of processors used as returned by MPI_COMM_SIZE
    me_fft: [0:nproc_fft-1] number of processor as returned by MPI_COMM_RANK
    n1,n2,n3: logical dimension of the transform. As transform lengths
              most products of the prime factors 2,3,5 are allowed.
              The detailed table with allowed transform lengths can
              be found in subroutine CTRIG
    nd1,nd2,nd3: Dimension of ZF and ZR
    nd2proc=((nd2-1)/nproc_fft)+1 maximal number of 2nd dim slices
    nd3proc=((nd3-1)/nproc_fft)+1 maximal number of 3rd dim slices

 NOTES:
   The maximum number of processors that can reasonably be used is max(n2,n3)
   It is very important to find the optimal
   value of NCACHE. NCACHE determines the size of the work array ZW, that
   has to fit into cache. It has therefore to be chosen to equal roughly
    half the size of the physical cache in units of real*8 numbers.
   The optimal value of ncache can easily be determined by numerical
   experimentation. A too large value of ncache leads to a dramatic
   and sudden decrease of performance, a too small value to a to a
   slow and less dramatic decrease of performance. If NCACHE is set
   to a value so small, that not even a single one dimensional transform
   can be done in the workarray zw, the program stops with an error message.

PARENTS

      m_fftw3

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

5299 subroutine fftw3_mpiback(cplex,ndat,n1,n2,n3,nd1,nd2,nd3,nd1eff,nd2proc,nd3proc,option,zf,zr,comm_fft)
5300 
5301 
5302 !This section has been created automatically by the script Abilint (TD).
5303 !Do not modify the following lines by hand.
5304 #undef ABI_FUNC
5305 #define ABI_FUNC 'fftw3_mpiback'
5306 !End of the abilint section
5307 
5308  implicit none
5309 
5310 !Arguments ------------------------------------
5311 ! real space input
5312  integer,intent(in) :: cplex,ndat,n1,n2,n3,nd1,nd2,nd3,nd1eff,nd2proc,nd3proc,option,comm_fft
5313  real(dp),intent(in) :: zf(2,nd1,nd3,nd2proc,ndat)
5314  real(dp),intent(out) :: zr(2,nd1eff,nd2,nd3proc,ndat)
5315 
5316 !Local variables-------------------------------
5317 !scalaras
5318 #ifdef HAVE_FFT_FFTW3
5319  integer :: j,i1,idat,ierr,includelast,j2,j2st,j3,jeff,jp2st,lzt,nthreads
5320  integer :: ma,mb,n1dfft,n1eff,n2eff,n1zt,ncache,nnd3,nproc_fft,me_fft,lot1,lot2,lot3
5321  integer(KIND_FFTW_PLAN) :: bw_plan1_lot,bw_plan1_rest
5322  integer(KIND_FFTW_PLAN) :: bw_plan2_lot,bw_plan2_rest
5323  integer(KIND_FFTW_PLAN) :: bw_plan3_lot,bw_plan3_rest
5324  character(len=500) :: msg
5325 !arrays
5326  real(dp), allocatable :: zmpi1(:,:,:,:),zmpi2(:,:,:,:) ! work arrays for MPI
5327  real(dp),allocatable :: zw(:,:),zt(:,:,:) ! cache work array and array for transpositions
5328 
5329 ! *************************************************************************
5330 
5331  nproc_fft = xmpi_comm_size(comm_fft); me_fft = xmpi_comm_rank(comm_fft)
5332 
5333  ! find cache size that gives optimal performance on machine
5334  ncache=2*max(n1,n2,n3,1024)
5335 
5336  if (ncache/(2*max(n1,n2,n3))<1) then
5337    write(msg,'(5a)') &
5338 &    'ncache has to be enlarged to be able to hold at',ch10, &
5339 &    'least one 1-d FFT of each size even though this will',ch10,&
5340 &    'reduce the performance for shorter transform lengths'
5341    MSG_ERROR(msg)
5342  end if
5343 
5344 ! check input
5345  if (nd1<n1 .or. nd2<n2 .or. nd3<n3) then
5346    MSG_ERROR("nd1<n1 .or. nd2<n2 .or. nd3<n3")
5347  end if
5348 
5349  ! Effective n1 and n2 (complex-to-complex or real-to-complex)
5350  n1eff=n1; n2eff=n2; n1zt=n1
5351  if (cplex==1) then
5352    n1eff=(n1+1)/2; n2eff=n2/2+1 ; n1zt=2*(n1/2+1)
5353  end if
5354 
5355  lzt=n2eff
5356  if (mod(n2eff,2) == 0) lzt=lzt+1
5357  if (mod(n2eff,4) == 0) lzt=lzt+1
5358 
5359 ! maximal number of big box 3rd dim slices for all procs
5360  nnd3=nd3proc*nproc_fft
5361 
5362  ABI_MALLOC(zw,(2,ncache/2))
5363  ABI_MALLOC(zt,(2,lzt,n1zt))
5364  ABI_MALLOC(zmpi2,(2,n1,nd2proc,nnd3))
5365  if (nproc_fft>1)  then
5366    ABI_MALLOC(zmpi1,(2,n1,nd2proc,nnd3))
5367  end if
5368 
5369 !DEBUG
5370 ! write(std_out,'(a,3i4)' )'back,zf n1,n2,n3',n1,n2,n3
5371 ! write(std_out,'(a,3i4)' )'nd1,nd2,nd3proc',nd1,nd2,nd3proc
5372 ! write(std_out,'(a,3i4)' )'m1,m2,m3',m1,m2,m3
5373 ! write(std_out,'(a,3i4)' )'max1,max2,max3',max1,max2,max3
5374 ! write(std_out,'(a,3i4)' )'md1,md2proc,md3',md1,md2proc,md3
5375 ! write(std_out,'(a,3i4)' )'n1eff,m2eff,m1zt',n1eff,m2eff,m1zt
5376 !ENDDEBUG
5377 
5378  ! Create plans.
5379  ! The prototype for sfftw_plan_many_dft is:
5380  ! sfftw_plan_many_dft(rank, n, howmany,
5381  !   fin,  iembed, istride, idist,
5382  !   fout, oembed, ostride, odist, isign, my_flags)
5383 
5384  lot3=ncache/(2*n3)
5385  lot1=ncache/(2*n1)
5386  lot2=ncache/(2*n2)
5387 
5388  nthreads = xomp_get_num_threads(open_parallel=.TRUE.)
5389  !nthreads = 1
5390 
5391  bw_plan3_lot = dplan_many_dft_2D(1, [n3], lot3, &
5392 &    zw, [ncache/2], lot3, 1,                          &
5393 &    zw, [ncache/2], lot3, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
5394 
5395  if (mod(n1, lot3) /= 0) then
5396    bw_plan3_rest = dplan_many_dft_2D(1, [n3], mod(n1, lot3), &
5397 &      zw, [ncache/2], lot3, 1,                                    &
5398 &      zw, [ncache/2], lot3, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
5399  end if
5400 
5401  bw_plan1_lot = dplan_many_dft_2D(1, [n1], lot1, &
5402 &    zw, [ncache/2],  lot1, 1,                         &
5403 &    zt, [lzt, n1zt], lzt, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
5404 
5405  if (mod(n2eff, lot1) /= 0) then
5406    bw_plan1_rest = dplan_many_dft_2D(1, [n1], mod(n2eff, lot1), &
5407 &      zw, [ncache/2], lot1, 1,                                       &
5408 &      zt, [lzt, n1zt],   lzt, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
5409  end if
5410 
5411  bw_plan2_lot = dplan_many_dft_2D(1, [n2], lot2, &
5412 &    zw, [ncache/2], lot2, 1,                          &
5413 &    zr, [nd1eff,nd2,nd3proc,ndat], nd1eff, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
5414 
5415  if (mod(n1eff, lot2) /= 0) then
5416    bw_plan2_rest = dplan_many_dft_2D(1, [n2], mod(n1eff,lot2), &
5417 &      zw, [ncache/2], lot2, 1,                                      &
5418 &      zr, [nd1eff,nd2,nd3proc,ndat], nd1eff, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
5419  end if
5420 
5421  do idat=1,ndat
5422    ! transform along z axis
5423    ! input: I1,I3,J2,(Jp2)
5424 
5425    do j2=1,nd2proc
5426      if (me_fft*nd2proc+j2 <= n2eff) then
5427 
5428        do i1=1,n1,lot3
5429          ma=i1
5430          mb=min(i1+(lot3-1),n1)
5431          n1dfft=mb-ma+1
5432 
5433          ! input:  G1,G3,G2,(Gp2)
5434          ! output: G1,R3,G2,(Gp2)
5435          call fill(nd1,nd3,lot3,n1dfft,n3,zf(1,i1,1,j2,idat),zw)
5436 
5437          if (n1dfft == lot3) then
5438            call dfftw_execute_dft(bw_plan3_lot, zw, zw)
5439          else
5440            call dfftw_execute_dft(bw_plan3_rest, zw, zw)
5441          end if
5442 
5443          ! input:  G1,R3,G2,(Gp2)
5444          ! output: G1,G2,R3,(Gp2)
5445          call scramble(i1,j2,lot3,n1dfft,n1,n3,nd2proc,nd3,zw,zmpi2)
5446        end do
5447      end if
5448    end do
5449 
5450    ! Interprocessor data transposition
5451    ! input:  G1,G2,R3,Rp3,(Gp2)
5452    ! output: G1,G2,G3,Gp2,(Rp3)
5453    if (nproc_fft>1) then
5454      call xmpi_alltoall(zmpi2,2*n1*nd2proc*nd3proc, &
5455 &                       zmpi1,2*n1*nd2proc*nd3proc,comm_fft,ierr)
5456    end if
5457 
5458    do j3=1,nd3proc
5459      if (me_fft*nd3proc+j3 <= n3) then
5460        Jp2st=1; J2st=1
5461 
5462        ! transform along x axis
5463        do j=1,n2eff,lot1
5464          ma=j
5465          mb=min(j+(lot1-1),n2eff)
5466          n1dfft=mb-ma+1
5467 
5468          ! input:  G1,G2,R3,Gp2,(Rp3)
5469          ! output: G2,G1,R3,Jp2,(Rp3)
5470          if (nproc_fft == 1) then
5471            call mpiswitch(j3,n1dfft,Jp2st,J2st,lot1,n1,nd2proc,nd3proc,nproc_fft,option,zmpi2,zw)
5472          else
5473            call mpiswitch(j3,n1dfft,Jp2st,J2st,lot1,n1,nd2proc,nd3proc,nproc_fft,option,zmpi1,zw)
5474          end if
5475 
5476          ! input:  G2,G1,R3,(Rp3)
5477          ! output: G2,R1,R3,(Rp3)
5478          if (n1dfft == lot1) then
5479            call dfftw_execute_dft(bw_plan1_lot, zw, zt(1,j,1))
5480          else
5481            call dfftw_execute_dft(bw_plan1_rest, zw, zt(1,j,1))
5482          end if
5483 
5484        end do
5485 
5486        ! transform along y axis
5487        do j=1,n1eff,lot2
5488          ma=j
5489          mb=min(j+(lot2-1),n1eff)
5490          n1dfft=mb-ma+1
5491          includelast=1
5492          if (cplex==1) then
5493           jeff=2*j-1
5494           includelast=1
5495           if (mb==n1eff .and. n1eff*2/=n1) includelast=0
5496          end if
5497 
5498          ! input:  G2,R1,R3,(Rp3)
5499          ! output: R1,G2,R3,(Rp3)
5500          if (cplex==2) then
5501            call switch(n1dfft,n2,lot2,n1,lzt,zt(1,1,j),zw)
5502          else
5503            call switchreal(includelast,n1dfft,n2,n2eff,lot2,n1zt,lzt,zt(1,1,jeff),zw)
5504          end if
5505 
5506          if (n1dfft == lot2) then
5507            call dfftw_execute_dft(bw_plan2_lot, zw, zr(1,j,1,j3,idat))
5508          else
5509            call dfftw_execute_dft(bw_plan2_rest, zw, zr(1,j,1,j3,idat))
5510          end if
5511        end do
5512        ! output: R1,R2,R3,(Rp3)
5513 
5514      end if
5515    end do
5516  end do ! idat
5517 
5518  call dfftw_destroy_plan(bw_plan3_lot)
5519  if (mod(n1, lot3) /= 0) then
5520    call dfftw_destroy_plan(bw_plan3_rest)
5521  end if
5522 
5523  call dfftw_destroy_plan(bw_plan1_lot)
5524  if (mod(n2eff, lot1) /= 0) then
5525    call dfftw_destroy_plan(bw_plan1_rest)
5526  end if
5527 
5528  call dfftw_destroy_plan(bw_plan2_lot)
5529  if (mod(n1eff, lot2) /= 0) then
5530    call dfftw_destroy_plan(bw_plan2_rest)
5531  end if
5532 
5533  ABI_FREE(zmpi2)
5534  ABI_FREE(zw)
5535  ABI_FREE(zt)
5536  if (nproc_fft>1)  then
5537    ABI_FREE(zmpi1)
5538  end if
5539 
5540 #else
5541  MSG_ERROR("FFTW3 support not activated")
5542  ABI_UNUSED((/cplex,ndat,n1,n2,n3,nd1,nd2,nd1eff,nd2proc,nd3proc,option,comm_fft/))
5543  ABI_UNUSED((/zf(1,1,1,1,1),zr(1,1,1,1,1)/))
5544 #endif
5545 
5546 end subroutine fftw3_mpiback

m_fftw3/fftw3_mpiback_manywf [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_mpiback_manywf

FUNCTION

   Does multiple 3-dim backward FFTs from Fourier into real space
   Adopt standard convention that isign=1 for backward transform

   CALCULATES THE DISCRETE FOURIER TRANSFORM ZF(I1,I2,I3)=

   S_(j1,j2,j3) EXP(isign*i*2*pi*(j1*i1/n1+j2*i2/n2+j3*i3/n3)) ZF(j1,j3,j2)

   in parallel using MPI/OpenMP.

 INPUTS:
    cplexwf=1 if wavefunction is real, 2 if complex
    ndat=Number of wavefunctions to transform.
    n1,n2,n3: logical dimension of the transform. As transform lengths
              most products of the prime factors 2,3,5 are allowed.
              The detailed table with allowed transform lengths can be found in subroutine CTRIG
    nd1,nd2,nd3: Leading Dimension of ZR
    nd3proc=((nd3-1)/nproc_fft)+1 maximal number of big box 3rd dim slices for one proc
    max1 is positive or zero; m1 >=max1+1
      i1= 1... max1+1 corresponds to positive and zero wavevectors 0 ... max1
      then, if m1 > max1+1, one has min1=max1-m1+1 and
      i1= max1+2 ... m1 corresponds to negative wavevectors min1 ... -1
    max2 and max3 have a similar definition of range
    m1,m2,m3=Size of the box enclosing the G-sphere.
    md1,md2,md3: Dimension of ZF given on the **small** FFT box.
    md2proc=((md2-1)/nproc_fft)+1 maximal number of small box 2nd dim slices for one proc
    nproc_fft: number of processors used as returned by MPI_COMM_SIZE
    comm_fft=MPI communicator for the FFT.
    ZF: input array (note the switch of i2 and i3)
          real(F(i1,i3,i2,idat))=ZF(1,i1,i3,i2,idat)
          imag(F(i1,i3,i2,idat))=ZF(2,i1,i3,i2,idat)

 OUTPUTS
    ZR: output array
          ZR(1,i1,i2,i3,idat)=real(R(i1,i2,i3,idat))
          ZR(2,i1,i2,i3,idat)=imag(R(i1,i2,i3,idat))
        i1=1,n1 , i2=1,n2 , i3=1,n3 , idat=1,ndat

NOTES

   The maximum number of processors that can reasonably be used is max(n2/2,n3/2)

   It is very important to find the optimal
   value of NCACHE. NCACHE determines the size of the work array ZW, that
   has to fit into cache. It has therefore to be chosen to equal roughly
   half the size of the physical cache in units of real*8 numbers.
   The optimal value of ncache can easily be determined by numerical
   experimentation. A too large value of ncache leads to a dramatic
   and sudden decrease of performance, a too small value to a to a
   slow and less dramatic decrease of performance. If NCACHE is set
   to a value so small, that not even a single one dimensional transform
   can be done in the workarray zw, the program stops with an error message.

PARENTS

      m_fft

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

6826 subroutine fftw3_mpiback_manywf(cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3proc,&
6827 &  max1,max2,max3,m1,m2,m3,md1,md2proc,md3,zf,zr,comm_fft)
6828 
6829 
6830 !This section has been created automatically by the script Abilint (TD).
6831 !Do not modify the following lines by hand.
6832 #undef ABI_FUNC
6833 #define ABI_FUNC 'fftw3_mpiback_manywf'
6834 !End of the abilint section
6835 
6836  implicit none
6837 
6838 !Arguments ------------------------------------
6839  integer,intent(in) :: cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3proc
6840  integer,intent(in) :: max1,max2,max3,m1,m2,m3,md1,md2proc,md3,comm_fft
6841  real(dp),intent(in) :: zf(2,md1,md3,md2proc,ndat)
6842  real(dp),intent(out) :: zr(2,nd1,nd2,nd3proc,ndat)
6843 
6844 #ifdef HAVE_FFT_FFTW3
6845 !Local variables-------------------------------
6846  integer,parameter :: nt1=1
6847  integer :: j,i1,i2,idat,ierr,includelast,nthreads
6848  integer :: ioption,j2,j3,j2st,jp2st,jeff,lzt,m1zt,ma,mb,n1dfft,nnd3
6849  integer :: lot1,lot2,lot3
6850  integer :: m2eff,ncache,n1eff,n1half,nproc_fft,me_fft
6851  integer(KIND_FFTW_PLAN) :: bw_plan1_lot,bw_plan1_rest
6852  integer(KIND_FFTW_PLAN) :: bw_plan2_lot,bw_plan2_rest
6853  integer(KIND_FFTW_PLAN) :: bw_plan3_lot,bw_plan3_rest
6854  !type(C_PTR) :: zw_cptr,zt_cptr
6855  character(len=500) :: msg
6856 !arrays
6857  integer :: requests(ndat)
6858  real(dp) ABI_ASYNC, allocatable :: zmpi1(:,:,:,:,:),zmpi2(:,:,:,:,:)  ! work arrays for MPI
6859  real(dp),allocatable :: zw(:,:),zt(:,:,:) ! cache work array and array for transpositions
6860  !real(dp),ABI_CONTIGUOUS pointer :: zw(:,:),zt(:,:,:)
6861 ! FFT work arrays
6862  real(dp) :: tsec(2)
6863 
6864 ! *************************************************************************
6865 
6866  !call wrtout(std_out,"mpiback with non-blocking IALLTOALL + FFTW3","COLL")
6867 
6868 
6869  ! FIXME must provide a default value but which one?
6870  ! ioption = 0
6871  ioption = 1
6872  !if (paral_kgb==1) ioption=1
6873 
6874  nproc_fft = xmpi_comm_size(comm_fft); me_fft = xmpi_comm_rank(comm_fft)
6875 
6876  ! Find cache size that gives optimal performance on machine
6877  ncache=2*max(n1,n2,n3,1024)
6878  if (ncache/(2*max(n1,n2,n3))<1) then
6879    write(msg,"(5a)") &
6880 &    'ncache has to be enlarged to be able to hold at',ch10, &
6881 &    'least one 1-d FFT of each size even though this will',ch10,&
6882 &    'reduce the performance for shorter transform lengths'
6883     MSG_ERROR(msg)
6884  end if
6885 
6886  ! Effective m1 and m2 (complex-to-complex or real-to-complex)
6887  n1eff=n1; m2eff=m2; m1zt=n1
6888  if (cplexwf==1) then
6889    n1eff=(n1+1)/2; m2eff=m2/2+1; m1zt=2*(n1/2+1)
6890  end if
6891 
6892  lzt=m2eff
6893  if (mod(m2eff,2)==0) lzt=lzt+1
6894  if (mod(m2eff,4)==0) lzt=lzt+1
6895 
6896  ! maximal number of big box 3rd dim slices for all procs
6897  nnd3=nd3proc*nproc_fft
6898 
6899  ! Allocate cache work array and work arrays for MPI transpositions.
6900  ABI_MALLOC(zw,(2,ncache/2))
6901  ABI_MALLOC(zt,(2,lzt,m1zt))
6902 
6903  !call fftw3_alloc_real([2,ncache/2],zw_cptr,zw)
6904  !call fftw3_alloc_real([2,lzt,m1zt],zt_cptr,zt)
6905 
6906  ABI_MALLOC(zmpi2,(2,md1,md2proc,nnd3,ndat))
6907  if (nproc_fft>1)  then
6908    ABI_MALLOC(zmpi1,(2,md1,md2proc,nnd3,ndat))
6909  end if
6910 
6911  ! Create plans.
6912  ! The prototype for sfftw_plan_many_dft is:
6913  ! sfftw_plan_many_dft(rank, n, howmany,
6914  !   fin,  iembed, istride, idist,
6915  !   fout, oembed, ostride, odist, isign, my_flags)
6916 
6917  lot3=ncache/(2*n3)
6918  lot1=ncache/(2*n1)
6919  lot2=ncache/(2*n2)
6920 
6921  nthreads = xomp_get_num_threads(open_parallel=.TRUE.)
6922  !nthreads = 1
6923 
6924  bw_plan3_lot = dplan_many_dft_2D(1, [n3], lot3, &
6925 &    zw, [ncache/2], lot3, 1,                          &
6926 &    zw, [ncache/2], lot3, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6927 
6928  if (mod(m1, lot3) /= 0) then
6929    bw_plan3_rest = dplan_many_dft_2D(1, [n3], mod(m1, lot3), &
6930 &      zw, [ncache/2], lot3, 1,                                    &
6931 &      zw, [ncache/2], lot3, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6932  end if
6933 
6934  bw_plan1_lot = dplan_many_dft_2D(1, [n1], lot1, &
6935 &    zw, [ncache/2],  lot1, 1,                         &
6936 &    zt, [lzt, m1zt], lzt, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6937 
6938  if (mod(m2eff, lot1) /= 0) then
6939    bw_plan1_rest = dplan_many_dft_2D(1, [n1], mod(m2eff, lot1), &
6940 &      zw, [ncache/2],  lot1, 1,                                      &
6941 &      zt, [lzt, m1zt], lzt, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6942  end if
6943 
6944  bw_plan2_lot = dplan_many_dft_2D(1, [n2], lot2, &
6945 &    zw, [ncache/2], lot2, 1,                          &
6946 &    zr, [nd1,nd2,nd3proc,ndat], nd1, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6947 
6948  if (mod(n1eff, lot2) /= 0) then
6949    bw_plan2_rest = dplan_many_dft_2D(1, [n2], mod(n1eff,lot2), &
6950 &      zw, [ncache/2], lot2, 1,                                      &
6951 &      zr, [nd1,nd2,nd3proc,ndat], nd1, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
6952  end if
6953 
6954  do idat=1,ndat
6955     ! transform along z axis
6956     ! input: G1,G3,G2,(Gp2)
6957 
6958     ! Loop over the y planes treated by this node and trasform n1ddft G_z lines.
6959     do j2=1,md2proc
6960       ! if (me_fft*md2proc+j2<=m2eff) then !a faire plus tard
6961       do i1=1,m1,lot3
6962         ma=i1
6963         mb=min(i1+(lot3-1),m1)
6964         n1dfft=mb-ma+1
6965 
6966         ! zero-pad n1dfft G_z lines
6967         ! input:  G1,G3,G2,(Gp2)
6968         ! output: G1,R3,G2,(Gp2)
6969         call fill_cent(md1,md3,lot3,n1dfft,max3,m3,n3,zf(1,i1,1,j2,idat),zw)
6970 
6971         ! Transform along z.
6972         if (n1dfft == lot3) then
6973           call dfftw_execute_dft(bw_plan3_lot, zw, zw)
6974         else
6975           call dfftw_execute_dft(bw_plan3_rest, zw, zw)
6976         end if
6977 
6978         ! Local rotation.
6979         ! input:  G1,R3,G2,(Gp2)
6980         ! output: G1,G2,R3,(Gp2)
6981         call scramble(i1,j2,lot3,n1dfft,md1,n3,md2proc,nnd3,zw,zmpi2(:,:,:,:,idat))
6982       end do
6983     end do ! j2
6984 
6985     ! Interprocessor data transposition
6986     ! input:  G1,G2,R3,Rp3,(Gp2)
6987     ! output: G1,G2,R3,Gp2,(Rp3)
6988     if (nproc_fft>1) then
6989       call timab(543,1,tsec)
6990       call xmpi_ialltoall(zmpi2(:,:,:,:,idat),2*md1*md2proc*nd3proc, &
6991 &                         zmpi1(:,:,:,:,idat),2*md1*md2proc*nd3proc,comm_fft,requests(idat))
6992       call timab(543,2,tsec)
6993     end if
6994  end do
6995 
6996  do idat=1,ndat
6997     if (nproc_fft>1) call xmpi_wait(requests(idat),ierr)
6998     ! Loop over the z treated by this node.
6999     do j3=1,nd3proc
7000       if (me_fft*nd3proc+j3 <= n3) then
7001         Jp2st=1; J2st=1
7002 
7003         ! Loop over G_y in the small box.
7004         do j=1,m2eff,lot1
7005           ma=j
7006           mb=min(j+(lot1-1),m2eff)
7007           n1dfft=mb-ma+1
7008 
7009           ! Zero-pad input.
7010           ! input:  G1,G2,R3,JG2,(Rp3)
7011           ! output: G2,G1,R3,JG2,(Rp3)
7012           if (nproc_fft==1) then
7013             call mpiswitch_cent(j3,n1dfft,Jp2st,J2st,lot1,max1,md1,m1,n1,&
7014 &             md2proc,nd3proc,nproc_fft,ioption,zmpi2(:,:,:,:,idat),zw,max2,m2,n2)
7015           else
7016             call mpiswitch_cent(j3,n1dfft,Jp2st,J2st,lot1,max1,md1,m1,n1,&
7017 &             md2proc,nd3proc,nproc_fft,ioption,zmpi1(:,:,:,:,idat),zw,max2,m2,n2)
7018           end if
7019 
7020           ! Transform along x
7021           ! input:  G2,G1,R3,(Rp3)
7022           ! output: G2,R1,R3,(Rp3)
7023           if (n1dfft == lot1) then
7024             call dfftw_execute_dft(bw_plan1_lot, zw, zt(1,j,1))
7025           else
7026             call dfftw_execute_dft(bw_plan1_rest, zw, zt(1,j,1))
7027           end if
7028 
7029         end do ! j
7030 
7031         ! Transform along y axis (take into account c2c or c2r case).
7032         ! Must loop over the full box.
7033         do j=1,n1eff,lot2
7034           ma=j
7035           mb=min(j+(lot2-1),n1eff)
7036           n1dfft=mb-ma+1
7037           includelast=1
7038 
7039           if (cplexwf==1) then
7040             jeff=2*j-1
7041             if (mb==n1eff .and. n1eff*2/=n1) includelast=0
7042           end if
7043 
7044           ! Zero-pad the input.
7045           ! input:  G2,R1,R3,(Rp3)
7046           ! output: R1,G2,R3,(Rp3)
7047           if (cplexwf==2) then
7048             call switch_cent(n1dfft,max2,m2,n2,lot2,n1,lzt,zt(1,1,j),zw)
7049           else
7050             call switchreal_cent(includelast,n1dfft,max2,n2,lot2,m1zt,lzt,zt(1,1,jeff),zw)
7051           end if
7052 
7053           ! input:  R1,G2,R3,(Rp3)
7054           ! output: R1,R2,R3,(Rp3)
7055           if (n1dfft == lot2) then
7056             call dfftw_execute_dft(bw_plan2_lot, zw, zr(1,j,1,j3,idat))
7057           else
7058             call dfftw_execute_dft(bw_plan2_rest, zw, zr(1,j,1,j3,idat))
7059           end if
7060 
7061         end do
7062 
7063         ! Treat real wavefunctions.
7064         if (cplexwf==1) then
7065           n1half=n1/2
7066           ! If odd
7067           if (n1half*2/=n1) then
7068             do i2=1,n2
7069               zr(1,n1,i2,j3,idat)=zr(1,n1eff,i2,j3,idat)
7070               zr(2,n1,i2,j3,idat)=zero
7071             end do
7072           end if
7073           do i2=1,n2
7074             do i1=n1half,1,-1
7075               zr(1,2*i1-1,i2,j3,idat)=zr(1,i1,i2,j3,idat)
7076               zr(1,2*i1  ,i2,j3,idat)=zr(2,i1,i2,j3,idat)
7077               zr(2,2*i1-1,i2,j3,idat)=zero
7078               zr(2,2*i1  ,i2,j3,idat)=zero
7079             end do
7080           end do
7081         end if
7082 
7083       end if
7084 
7085    end do ! j3
7086  end do ! idat
7087 
7088  call dfftw_destroy_plan(bw_plan3_lot)
7089  if (mod(m1, lot3) /= 0) then
7090    call dfftw_destroy_plan(bw_plan3_rest)
7091  end if
7092 
7093  call dfftw_destroy_plan(bw_plan1_lot)
7094  if (mod(m2eff, lot1) /= 0) then
7095    call dfftw_destroy_plan(bw_plan1_rest)
7096  end if
7097 
7098  call dfftw_destroy_plan(bw_plan2_lot)
7099  if (mod(n1eff, lot2) /= 0) then
7100    call dfftw_destroy_plan(bw_plan2_rest)
7101  end if
7102 
7103  ABI_FREE(zmpi2)
7104  ABI_FREE(zw)
7105  ABI_FREE(zt)
7106  if (nproc_fft>1)  then
7107    ABI_FREE(zmpi1)
7108  end if
7109 
7110 #else
7111  MSG_ERROR("FFTW3 support not activated")
7112  ABI_UNUSED((/cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3proc/))
7113  ABI_UNUSED((/ max1,max2,max3,m1,m2,m3,md1,md2proc,md3,comm_fft/))
7114  ABI_UNUSED((/zf(1,1,1,1,1),zr(1,1,1,1,1)/))
7115 #endif
7116 
7117 end subroutine fftw3_mpiback_manywf

m_fftw3/fftw3_mpiback_wf [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_mpiback_wf

FUNCTION

   Does multiple 3-dim backward FFTs from Fourier into real space
   Adopt standard convention that isign=1 for backward transform

   CALCULATES THE DISCRETE FOURIER TRANSFORM ZF(I1,I2,I3)=

   S_(j1,j2,j3) EXP(isign*i*2*pi*(j1*i1/n1+j2*i2/n2+j3*i3/n3)) ZF(j1,j3,j2)

   in parallel using MPI/OpenMP.

 INPUTS:
    cplexwf=1 if wavefunction is real, 2 if complex
    ndat=Number of wavefunctions to transform.
    n1,n2,n3: logical dimension of the transform. As transform lengths
              most products of the prime factors 2,3,5 are allowed.
              The detailed table with allowed transform lengths can be found in subroutine CTRIG
    nd1,nd2,nd3: Leading Dimension of ZR
    nd3proc=((nd3-1)/nproc_fft)+1 maximal number of big box 3rd dim slices for one proc
    max1 is positive or zero; m1 >=max1+1
      i1= 1... max1+1 corresponds to positive and zero wavevectors 0 ... max1
      then, if m1 > max1+1, one has min1=max1-m1+1 and
      i1= max1+2 ... m1 corresponds to negative wavevectors min1 ... -1
    max2 and max3 have a similar definition of range
    m1,m2,m3=Size of the box enclosing the G-sphere.
    md1,md2,md3: Dimension of ZF given on the **small** FFT box.
    md2proc=((md2-1)/nproc_fft)+1 maximal number of small box 2nd dim slices for one proc
    nproc_fft: number of processors used as returned by MPI_COMM_SIZE
    comm_fft=MPI communicator for the FFT.
    ZF: input array (note the switch of i2 and i3)
          real(F(i1,i3,i2,idat))=ZF(1,i1,i3,i2,idat)
          imag(F(i1,i3,i2,idat))=ZF(2,i1,i3,i2,idat)

 OUTPUTS
    ZR: output array
          ZR(1,i1,i2,i3,idat)=real(R(i1,i2,i3,idat))
          ZR(2,i1,i2,i3,idat)=imag(R(i1,i2,i3,idat))
        i1=1,n1 , i2=1,n2 , i3=1,n3 , idat=1,ndat

NOTES

   The maximum number of processors that can reasonably be used is max(n2/2,n3/2)

   It is very important to find the optimal
   value of NCACHE. NCACHE determines the size of the work array ZW, that
   has to fit into cache. It has therefore to be chosen to equal roughly
   half the size of the physical cache in units of real*8 numbers.
   The optimal value of ncache can easily be determined by numerical
   experimentation. A too large value of ncache leads to a dramatic
   and sudden decrease of performance, a too small value to a to a
   slow and less dramatic decrease of performance. If NCACHE is set
   to a value so small, that not even a single one dimensional transform
   can be done in the workarray zw, the program stops with an error message.

PARENTS

      m_fft

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

4575 subroutine fftw3_mpiback_wf(cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3proc,&
4576 &  max1,max2,max3,m1,m2,m3,md1,md2proc,md3,zf,zr,comm_fft)
4577 
4578 
4579 !This section has been created automatically by the script Abilint (TD).
4580 !Do not modify the following lines by hand.
4581 #undef ABI_FUNC
4582 #define ABI_FUNC 'fftw3_mpiback_wf'
4583 !End of the abilint section
4584 
4585  implicit none
4586 
4587 !Arguments ------------------------------------
4588  integer,intent(in) :: cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3proc
4589  integer,intent(in) :: max1,max2,max3,m1,m2,m3,md1,md2proc,md3,comm_fft
4590  real(dp),intent(in) :: zf(2,md1,md3,md2proc,ndat)
4591  real(dp),intent(out) :: zr(2,nd1,nd2,nd3proc,ndat)
4592 
4593 #ifdef HAVE_FFT_FFTW3
4594 !Local variables-------------------------------
4595  integer,parameter :: nt1=1
4596  integer :: j,i1,i2,idat,ierr,includelast
4597  integer :: ioption,j2,j3,j2st,jp2st,jeff,lzt,m1zt,ma,mb,n1dfft,nnd3
4598  integer :: lot1,lot2,lot3
4599  integer :: m2eff,ncache,n1eff,n1half,nproc_fft,me_fft,nthreads
4600  integer(KIND_FFTW_PLAN) :: bw_plan1_lot,bw_plan1_rest
4601  integer(KIND_FFTW_PLAN) :: bw_plan2_lot,bw_plan2_rest
4602  integer(KIND_FFTW_PLAN) :: bw_plan3_lot,bw_plan3_rest
4603  !type(C_PTR) :: zw_cptr,zt_cptr
4604  character(len=500) :: msg
4605 !arrays
4606  real(dp),allocatable :: zmpi1(:,:,:,:),zmpi2(:,:,:,:)  ! work arrays for MPI
4607  real(dp),allocatable :: zw(:,:),zt(:,:,:) ! cache work array and array for transpositions
4608  !real(dp),ABI_CONTIGUOUS pointer :: zw(:,:),zt(:,:,:)
4609 ! FFT work arrays
4610  real(dp) :: tsec(2)
4611 
4612 ! *************************************************************************
4613 
4614  !call wrtout(std_out,"mpiback standard ALLTOALL + FFTW3","COLL")
4615 
4616  ! FIXME must provide a default value but which one?
4617  ! ioption = 0
4618  ioption = 1
4619  !if (paral_kgb==1) ioption=1
4620 
4621  nproc_fft = xmpi_comm_size(comm_fft); me_fft = xmpi_comm_rank(comm_fft)
4622 
4623  ! Find cache size that gives optimal performance on machine
4624  ncache=2*max(n1,n2,n3,1024)
4625  if (ncache/(2*max(n1,n2,n3))<1) then
4626    write(msg,"(5a)") &
4627 &    'ncache has to be enlarged to be able to hold at',ch10, &
4628 &    'least one 1-d FFT of each size even though this will',ch10,&
4629 &    'reduce the performance for shorter transform lengths'
4630     MSG_ERROR(msg)
4631  end if
4632 
4633  ! Effective m1 and m2 (complex-to-complex or real-to-complex)
4634  n1eff=n1; m2eff=m2; m1zt=n1
4635  if (cplexwf==1) then
4636    n1eff=(n1+1)/2; m2eff=m2/2+1; m1zt=2*(n1/2+1)
4637  end if
4638 
4639  lzt=m2eff
4640  if (mod(m2eff,2)==0) lzt=lzt+1
4641  if (mod(m2eff,4)==0) lzt=lzt+1
4642 
4643  ! maximal number of big box 3rd dim slices for all procs
4644  nnd3=nd3proc*nproc_fft
4645 
4646  ! Allocate cache work array and work arrays for MPI transpositions.
4647  ABI_MALLOC(zw,(2,ncache/2))
4648  ABI_MALLOC(zt,(2,lzt,m1zt))
4649 
4650  !call fftw3_alloc_real([2,ncache/2],zw_cptr,zw)
4651  !call fftw3_alloc_real([2,lzt,m1zt],zt_cptr,zt)
4652 
4653  ABI_MALLOC(zmpi2,(2,md1,md2proc,nnd3))
4654  if (nproc_fft>1)  then
4655    ABI_MALLOC(zmpi1,(2,md1,md2proc,nnd3))
4656  end if
4657 
4658 !DEBUG
4659 ! write(std_out,'(2a,3i4)' )itoa(me_fft),': fftw3_mpiback_wf,zf n1,n2,n3',n1,n2,n3
4660 ! write(std_out,'(2a,3i4)' )itoa(me_fft),': nd1,nd2,nd3proc',nd1,nd2,nd3proc
4661 ! write(std_out,'(2a,3i4)' )itoa(me_fft),': m1,m2,m3',m1,m2,m3
4662 ! write(std_out,'(2a,3i4)' )itoa(me_fft),': max1,max2,max3',max1,max2,max3
4663 ! write(std_out,'(2a,3i4)' )itoa(me_fft),': md1,md2proc,md3',md1,md2proc,md3
4664 ! write(std_out,'(2a,3i4)' )itoa(me_fft),'n1eff,m2eff,m1zt',n1eff,m2eff,m1zt
4665 !ENDDEBUG
4666 
4667  ! Create plans.
4668  ! The prototype for sfftw_plan_many_dft is:
4669  ! sfftw_plan_many_dft(rank, n, howmany,
4670  !   fin,  iembed, istride, idist,
4671  !   fout, oembed, ostride, odist, isign, my_flags)
4672 
4673  lot3=ncache/(2*n3)
4674  lot1=ncache/(2*n1)
4675  lot2=ncache/(2*n2)
4676 
4677  nthreads = xomp_get_num_threads(open_parallel=.TRUE.)
4678  !nthreads = 1
4679 
4680  bw_plan3_lot = dplan_many_dft_2D(1, [n3], lot3, &
4681 &    zw, [ncache/2], lot3, 1,                          &
4682 &    zw, [ncache/2], lot3, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
4683 
4684  if (mod(m1, lot3) /= 0) then
4685    bw_plan3_rest = dplan_many_dft_2D(1, [n3], mod(m1, lot3), &
4686 &      zw, [ncache/2], lot3, 1,                                    &
4687 &      zw, [ncache/2], lot3, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
4688  end if
4689 
4690  bw_plan1_lot = dplan_many_dft_2D(1, [n1], lot1, &
4691 &    zw, [ncache/2],  lot1, 1,                         &
4692 &    zt, [lzt, m1zt], lzt, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
4693 
4694  if (mod(m2eff, lot1) /= 0) then
4695    bw_plan1_rest = dplan_many_dft_2D(1, [n1], mod(m2eff, lot1), &
4696 &      zw, [ncache/2],  lot1, 1,                                      &
4697 &      zt, [lzt, m1zt], lzt, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
4698  end if
4699 
4700  bw_plan2_lot = dplan_many_dft_2D(1, [n2], lot2, &
4701 &    zw, [ncache/2], lot2, 1,                          &
4702 &    zr, [nd1,nd2,nd3proc,ndat], nd1, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
4703 
4704  if (mod(n1eff, lot2) /= 0) then
4705    bw_plan2_rest = dplan_many_dft_2D(1, [n2], mod(n1eff,lot2), &
4706 &      zw, [ncache/2], lot2, 1,                                      &
4707 &      zr, [nd1,nd2,nd3proc,ndat], nd1, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
4708  end if
4709 
4710  do idat=1,ndat
4711     ! transform along z axis
4712     ! input: G1,G3,G2,(Gp2)
4713 
4714     ! Loop over the y planes treated by this node and trasform n1ddft G_z lines.
4715     do j2=1,md2proc
4716       ! if (me_fft*md2proc+j2<=m2eff) then !a faire plus tard
4717       do i1=1,m1,lot3
4718         ma=i1
4719         mb=min(i1+(lot3-1),m1)
4720         n1dfft=mb-ma+1
4721 
4722         ! zero-pad n1dfft G_z lines
4723         ! input:  G1,G3,G2,(Gp2)
4724         ! output: G1,R3,G2,(Gp2)
4725         call fill_cent(md1,md3,lot3,n1dfft,max3,m3,n3,zf(1,i1,1,j2,idat),zw)
4726 
4727         ! Transform along z.
4728         if (n1dfft == lot3) then
4729           call dfftw_execute_dft(bw_plan3_lot, zw, zw)
4730         else
4731           call dfftw_execute_dft(bw_plan3_rest, zw, zw)
4732         end if
4733 
4734         ! Local rotation.
4735         ! input:  G1,R3,G2,(Gp2)
4736         ! output: G1,G2,R3,(Gp2)
4737         call scramble(i1,j2,lot3,n1dfft,md1,n3,md2proc,nnd3,zw,zmpi2)
4738       end do
4739     end do ! j2
4740 
4741     ! Interprocessor data transposition
4742     ! input:  G1,G2,R3,Rp3,(Gp2)
4743     ! output: G1,G2,R3,Gp2,(Rp3)
4744     if (nproc_fft>1) then
4745       call timab(543,1,tsec)
4746       call xmpi_alltoall(zmpi2,2*md1*md2proc*nd3proc, &
4747 &                        zmpi1,2*md1*md2proc*nd3proc,comm_fft,ierr)
4748       call timab(543,2,tsec)
4749     end if
4750 
4751     ! Loop over the z treated by this node.
4752     do j3=1,nd3proc
4753       if (me_fft*nd3proc+j3 <= n3) then
4754         Jp2st=1; J2st=1
4755 
4756         ! Loop over G_y in the small box.
4757         do j=1,m2eff,lot1
4758           ma=j
4759           mb=min(j+(lot1-1),m2eff)
4760           n1dfft=mb-ma+1
4761 
4762           ! Zero-pad input.
4763           ! input:  G1,G2,R3,JG2,(Rp3)
4764           ! output: G2,G1,R3,JG2,(Rp3)
4765           if (nproc_fft==1) then
4766             call mpiswitch_cent(j3,n1dfft,Jp2st,J2st,lot1,max1,md1,m1,n1,&
4767 &             md2proc,nd3proc,nproc_fft,ioption,zmpi2,zw,max2,m2,n2)
4768           else
4769             call mpiswitch_cent(j3,n1dfft,Jp2st,J2st,lot1,max1,md1,m1,n1,&
4770 &             md2proc,nd3proc,nproc_fft,ioption,zmpi1,zw,max2,m2,n2)
4771           end if
4772 
4773           ! Transform along x
4774           ! input:  G2,G1,R3,(Rp3)
4775           ! output: G2,R1,R3,(Rp3)
4776           if (n1dfft == lot1) then
4777             call dfftw_execute_dft(bw_plan1_lot, zw, zt(1,j,1))
4778           else
4779             call dfftw_execute_dft(bw_plan1_rest, zw, zt(1,j,1))
4780           end if
4781 
4782         end do ! j
4783 
4784         ! Transform along y axis (take into account c2c or c2r case).
4785         ! Must loop over the full box.
4786         do j=1,n1eff,lot2
4787           ma=j
4788           mb=min(j+(lot2-1),n1eff)
4789           n1dfft=mb-ma+1
4790           includelast=1
4791 
4792           if (cplexwf==1) then
4793             jeff=2*j-1
4794             if (mb==n1eff .and. n1eff*2/=n1) includelast=0
4795           end if
4796 
4797           ! Zero-pad the input.
4798           ! input:  G2,R1,R3,(Rp3)
4799           ! output: R1,G2,R3,(Rp3)
4800           if (cplexwf==2) then
4801             call switch_cent(n1dfft,max2,m2,n2,lot2,n1,lzt,zt(1,1,j),zw)
4802           else
4803             call switchreal_cent(includelast,n1dfft,max2,n2,lot2,m1zt,lzt,zt(1,1,jeff),zw)
4804           end if
4805 
4806           ! input:  R1,G2,R3,(Rp3)
4807           ! output: R1,R2,R3,(Rp3)
4808           if (n1dfft == lot2) then
4809             call dfftw_execute_dft(bw_plan2_lot, zw, zr(1,j,1,j3,idat))
4810           else
4811             call dfftw_execute_dft(bw_plan2_rest, zw, zr(1,j,1,j3,idat))
4812           end if
4813 
4814         end do
4815 
4816         ! Treat real wavefunctions.
4817         if (cplexwf==1) then
4818           n1half=n1/2
4819           ! If odd
4820           if (n1half*2/=n1) then
4821             do i2=1,n2
4822               zr(1,n1,i2,j3,idat)=zr(1,n1eff,i2,j3,idat)
4823               zr(2,n1,i2,j3,idat)=zero
4824             end do
4825           end if
4826           do i2=1,n2
4827             do i1=n1half,1,-1
4828               zr(1,2*i1-1,i2,j3,idat)=zr(1,i1,i2,j3,idat)
4829               zr(1,2*i1  ,i2,j3,idat)=zr(2,i1,i2,j3,idat)
4830               zr(2,2*i1-1,i2,j3,idat)=zero
4831               zr(2,2*i1  ,i2,j3,idat)=zero
4832             end do
4833           end do
4834         end if
4835 
4836       end if
4837    end do ! j3
4838  end do ! idat
4839 
4840  call dfftw_destroy_plan(bw_plan3_lot)
4841  if (mod(m1, lot3) /= 0) then
4842    call dfftw_destroy_plan(bw_plan3_rest)
4843  end if
4844 
4845  call dfftw_destroy_plan(bw_plan1_lot)
4846  if (mod(m2eff, lot1) /= 0) then
4847    call dfftw_destroy_plan(bw_plan1_rest)
4848  end if
4849 
4850  call dfftw_destroy_plan(bw_plan2_lot)
4851  if (mod(n1eff, lot2) /= 0) then
4852    call dfftw_destroy_plan(bw_plan2_rest)
4853  end if
4854 
4855  ABI_FREE(zmpi2)
4856  ABI_FREE(zw)
4857  ABI_FREE(zt)
4858  if (nproc_fft>1)  then
4859    ABI_FREE(zmpi1)
4860  end if
4861 
4862 #else
4863  MSG_ERROR("FFTW3 support not activated")
4864  ABI_UNUSED((/cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3proc/))
4865  ABI_UNUSED((/ max1,max2,max3,m1,m2,m3,md1,md2proc,md3,comm_fft/))
4866  ABI_UNUSED((/zf(1,1,1,1,1),zr(1,1,1,1,1)/))
4867 #endif
4868 
4869 end subroutine fftw3_mpiback_wf

m_fftw3/fftw3_mpiforw [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_mpiforw

FUNCTION

   Adopt standard convention that isign=-1 for forward transform
   CALCULATES THE DISCRETE FOURIERTRANSFORM ZF(I1,I3,I2)=
   S_(j1,j2,j3) EXP(isign*i*2*pi*(j1*i1/n1+j2*i2/n2+j3*i3/n3)) ZR(j1,j2,j3)
   in parallel using MPI/OpenMP and BLAS library calls.

INPUTS

    ZR: input array
         ZR(1,i1,i2,i3,idat)=real(R(i1,i2,i3,idat))
         ZR(2,i1,i2,i3,idat)=imag(R(i1,i2,i3,idat))
         i1=1,n1 , i2=1,n2 , i3=1,n3 , idat=1,ndat
 OUTPUTS
    ZF: output array (note the switch of i2 and i3)
         real(F(i1,i3,i2,idat))=ZF(1,i1,i3,i2,idat)
         imag(F(i1,i3,i2,idat))=ZF(2,i1,i3,i2,idat)
         i1=1,n1 , i2=1,n2 , i3=1,n3 , idat=1,ndat
    nproc_fft: number of processors used as returned by MPI_COMM_SIZE
    me_fft: [0:nproc_fft-1] number of processor as returned by MPI_COMM_RANK
     n1,n2,n3: logical dimension of the transform. As transform lengths
               most products of the prime factors 2,3,5 are allowed.
              The detailed table with allowed transform lengths can
              be found in subroutine CTRIG
     nd1,nd2,nd3: Dimension of ZR and ZF
    nd2proc=((nd2-1)/nproc_fft)+1 maximal number of 2nd dim slices
    nd3proc=((nd3-1)/nproc_fft)+1 maximal number of 3rd dim slices

NOTES

  SHOULD describe nd1eff
  SHOULD put cplex and nd1eff in OMP declarations
  SHOULD describe the change of value of nd2prod

  The maximum number of processors that can reasonably be used is max(n2,n3)

  It is very important to find the optimal
  value of NCACHE. NCACHE determines the size of the work array ZW, that
  has to fit into cache. It has therefore to be chosen to equal roughly
   half the size of the physical cache in units of real*8 numbers.
  The optimal value of ncache can easily be determined by numerical
  experimentation. A too large value of ncache leads to a dramatic
  and sudden decrease of performance, a too small value to a to a
  slow and less dramatic decrease of performance. If NCACHE is set
  to a value so small, that not even a single one dimensional transform
  can be done in the workarray zw, the program stops with an error message.

PARENTS

      m_fftw3

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

5607 subroutine fftw3_mpiforw(cplex,ndat,n1,n2,n3,nd1,nd2,nd3,nd1eff,nd2proc,nd3proc,option,zr,zf,comm_fft)
5608 
5609 
5610 !This section has been created automatically by the script Abilint (TD).
5611 !Do not modify the following lines by hand.
5612 #undef ABI_FUNC
5613 #define ABI_FUNC 'fftw3_mpiforw'
5614 !End of the abilint section
5615 
5616  implicit none
5617 
5618 !Arguments ------------------------------------
5619 !scalars
5620  integer,intent(in) :: cplex,comm_fft
5621  integer,intent(in) :: ndat,n1,n2,n3,nd1,nd2,nd3,nd1eff,nd2proc,nd3proc,option
5622 !arrays
5623  real(dp),intent(in) :: zr(2,nd1eff,nd2,nd3proc,ndat)
5624  real(dp),intent(out) :: zf(2,nd1,nd3,nd2proc,ndat)
5625 
5626 !Local variables-------------------------------
5627 !scalars
5628 #ifdef HAVE_FFT_FFTW3
5629  integer :: j,i1,idat,ierr,j2,j2st,j3,jp2st,lzt,nthreads
5630  integer :: ma,mb,n1dfft,n1eff,n2eff,n1zt,ncache,nnd3,nproc_fft,me_fft,lot1,lot2,lot3
5631  integer(KIND_FFTW_PLAN) :: fw_plan1_lot,fw_plan1_rest
5632  integer(KIND_FFTW_PLAN) :: fw_plan2_lot,fw_plan2_rest
5633  integer(KIND_FFTW_PLAN) :: fw_plan3_lot,fw_plan3_rest
5634  character(len=500) :: msg
5635 !arrays
5636  real(dp), allocatable :: zmpi1(:,:,:,:),zmpi2(:,:,:,:) ! work arrays for MPI
5637  real(dp),allocatable :: zw(:,:),zt(:,:,:) ! cache work array and array for transpositions
5638 
5639 ! *************************************************************************
5640 
5641  nproc_fft = xmpi_comm_size(comm_fft); me_fft = xmpi_comm_rank(comm_fft)
5642 
5643  ! find cache size that gives optimal performance on machine
5644  ncache=2*max(n1,n2,n3,1024)
5645  if (ncache/(2*max(n1,n2,n3))<1) then
5646    write(msg,'(5a)')&
5647 &     'ncache has to be enlarged to be able to hold at',ch10, &
5648 &     'least one 1-d FFT of each size even though this will',ch10,&
5649 &     'reduce the performance for shorter transform lengths'
5650    MSG_ERROR(msg)
5651  end if
5652 
5653  ! check input
5654  if (nd1<n1 .or. nd2<n2 .or. nd3<n3) then
5655    MSG_ERROR("forw: assertion error nd1<n1 .or. nd2<n2 .or. nd3<n3")
5656  end if
5657 
5658 !Effective n1 and n2 (complex-to-complex or real-to-complex)
5659  n1eff=n1; n2eff=n2; n1zt=n1
5660  if (cplex==1) then
5661    n1eff=(n1+1)/2; n2eff=n2/2+1; n1zt=2*(n1/2+1)
5662  end if
5663 
5664  lzt=n2eff
5665  if (mod(n2eff,2) == 0) lzt=lzt+1
5666  if (mod(n2eff,4) == 0) lzt=lzt+1
5667 
5668  ! maximal number of big box 3rd dim slices for all procs
5669  nnd3=nd3proc*nproc_fft
5670 
5671  ABI_MALLOC(zw,(2,ncache/2))
5672  ABI_MALLOC(zt,(2,lzt,n1zt))
5673  ABI_MALLOC(zmpi2,(2,n1,nd2proc,nnd3))
5674  if (nproc_fft>1)  then
5675    ABI_MALLOC(zmpi1,(2,n1,nd2proc,nnd3))
5676  end if
5677 
5678  ! Create plans.
5679  ! The prototype for sfftw_plan_many_dft is:
5680  ! sfftw_plan_many_dft(rank, n, howmany,
5681  !   fin,  iembed, istride, idist,
5682  !   fout, oembed, ostride, odist, isign, my_flags)
5683 
5684  lot1=ncache/(2*n1)
5685  lot2=ncache/(2*n2)
5686  lot3=ncache/(2*n3)
5687 
5688  nthreads = xomp_get_num_threads(open_parallel=.TRUE.)
5689  !nthreads = 1
5690 
5691  fw_plan3_lot = dplan_many_dft_2D(1, [n3], lot3, &
5692 &    zw, [ncache/2], lot3, 1,                          &
5693 &    zw, [ncache/2], lot3, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
5694 
5695  if (mod(n1, lot3) /= 0) then
5696    fw_plan3_rest = dplan_many_dft_2D(1, [n3], mod(n1, lot3), &
5697 &    zw, [ncache/2], lot3, 1,                                      &
5698 &    zw, [ncache/2], lot3, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
5699  end if
5700 
5701  fw_plan1_lot = dplan_many_dft_2D(1, [n1], lot1, &
5702 &    zt, [lzt, n1zt],   lzt,  1,                       &
5703 &    zw, [ncache/2], lot1, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
5704 
5705  if (mod(n2eff, lot1) /= 0) then
5706    fw_plan1_rest = dplan_many_dft_2D(1, [n1], mod(n2eff, lot1), &
5707 &    zt, [lzt, n1zt],   lzt, 1,                                       &
5708 &    zw, [ncache/2], lot1, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
5709  end if
5710 
5711  fw_plan2_lot = dplan_many_dft_2D(1, [n2], lot2, &
5712 &    zr, [nd1eff,nd2,nd3proc,ndat], nd1eff, 1,         &
5713 &    zw, [ncache/2], lot2, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
5714 
5715  if (mod(n1eff, lot2) /= 0) then
5716    fw_plan2_rest = dplan_many_dft_2D(1, [n2], mod(n1eff,lot2), &
5717 &    zr, [nd1eff,nd2,nd3proc,ndat], nd1eff, 1,                       &
5718 &    zw, [ncache/2], lot2, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
5719  end if
5720 
5721  do idat=1,ndat
5722 
5723    do j3=1,nd3proc
5724      if (me_fft*(nd3proc)+j3 <= n3) then
5725        Jp2st=1; J2st=1
5726 
5727        ! transform along y axis
5728        ! input: R1,R2,R3,(Rp3)
5729        do j=1,n1eff,lot2
5730          ma=j
5731          mb=min(j+(lot2-1),n1eff)
5732          n1dfft=mb-ma+1
5733 
5734          if (n1dfft == lot2) then
5735            call dfftw_execute_dft(fw_plan2_lot,  zr(1,j,1,j3,idat), zw)
5736          else
5737            call dfftw_execute_dft(fw_plan2_rest, zr(1,j,1,j3,idat), zw)
5738          end if
5739 
5740          !  input: R1,G2,R3,(Rp3)
5741          ! output: G2,R1,R3,(Rp3)
5742          if (cplex==2) then
5743            call unswitch(n1dfft,n2,lot2,n1zt,lzt,zw,zt(1,1,j))
5744          else
5745            call unswitchreal(n1dfft,n2,n2eff,lot2,n1zt,lzt,zw,zt(1,1,2*j-1))
5746          end if
5747        end do
5748 
5749        ! transform along x axis
5750        ! input: G2,R1,R3,(Rp3)
5751        do j=1,n2eff,lot1
5752          ma=j
5753          mb=min(j+(lot1-1),n2eff)
5754          n1dfft=mb-ma+1
5755 
5756          if (n1dfft == lot1) then
5757            call dfftw_execute_dft(fw_plan1_lot,  zt(1,j,1), zw)
5758          else
5759            call dfftw_execute_dft(fw_plan1_rest, zt(1,j,1), zw)
5760          end if
5761 
5762          ! input:  G2,G1,R3,Gp2,(Rp3)
5763          ! output: G1,G2,R3,Gp2,(Rp3)
5764          ! write(std_out,*) 'J2st,Jp2st',J2st,Jp2st
5765          if (nproc_fft == 1) then
5766            call unmpiswitch(j3,n1dfft,Jp2st,J2st,lot1,n1,nd2proc,nd3proc,nproc_fft,option,zw,zmpi2)
5767          else
5768            call unmpiswitch(j3,n1dfft,Jp2st,J2st,lot1,n1,nd2proc,nd3proc,nproc_fft,option,zw,zmpi1)
5769          end if
5770        end do
5771 
5772      end if
5773    end do ! j3
5774 
5775    ! Interprocessor data transposition
5776    ! input:  G1,G2,R3,Gp2,(Rp3)
5777    ! output: G1,G2,R3,Rp3,(Gp2)
5778    if (nproc_fft>1) then
5779      call xmpi_alltoall(zmpi1,2*n1*nd2proc*nd3proc, &
5780 &                       zmpi2,2*n1*nd2proc*nd3proc,comm_fft,ierr)
5781    end if
5782 
5783    ! transform along z axis
5784    ! input: G1,G2,R3,(Gp2)
5785 
5786    do j2=1,nd2proc
5787      if (me_fft*(nd2proc)+j2 <= n2eff) then
5788        do i1=1,n1,lot3
5789          ma=i1
5790          mb=min(i1+(lot3-1),n1)
5791          n1dfft=mb-ma+1
5792 
5793          ! input:  G1,G2,R3,(Gp2)
5794          ! output: G1,R3,G2,(Gp2)
5795          call unscramble(i1,j2,lot3,n1dfft,n1,n3,nd2proc,nd3,zmpi2,zw)
5796 
5797          if (n1dfft == lot3) then
5798            call dfftw_execute_dft(fw_plan3_lot, zw, zw)
5799          else
5800            call dfftw_execute_dft(fw_plan3_rest, zw, zw)
5801          end if
5802 
5803          call unfill(nd1,nd3,lot3,n1dfft,n3,zw,zf(1,i1,1,j2,idat))
5804          ! output: G1,G3,G2,(Gp2)
5805        end do
5806      end if
5807    end do
5808 
5809  end do ! idat
5810 
5811  call dfftw_destroy_plan(fw_plan3_lot)
5812  if (mod(n1, lot3) /= 0) then
5813    call dfftw_destroy_plan(fw_plan3_rest)
5814  end if
5815 
5816  call dfftw_destroy_plan(fw_plan1_lot)
5817  if (mod(n2eff, lot1) /= 0) then
5818    call dfftw_destroy_plan(fw_plan1_rest)
5819  end if
5820 
5821  call dfftw_destroy_plan(fw_plan2_lot)
5822  if (mod(n1eff, lot2) /= 0) then
5823    call dfftw_destroy_plan(fw_plan2_rest)
5824  end if
5825 
5826  ABI_FREE(zmpi2)
5827  ABI_FREE(zw)
5828  ABI_FREE(zt)
5829  if (nproc_fft>1)  then
5830    ABI_FREE(zmpi1)
5831  end if
5832 
5833 #else
5834  MSG_ERROR("FFTW3 support not activated")
5835  ABI_UNUSED((/cplex,ndat,n1,n2,n3,nd1,nd2,nd1eff,nd2proc,nd3proc,option,comm_fft/))
5836  ABI_UNUSED((/zf(1,1,1,1,1),zr(1,1,1,1,1)/))
5837 #endif
5838 
5839 end subroutine fftw3_mpiforw

m_fftw3/fftw3_mpiforw_manywf [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_mpiforw_manywf

FUNCTION

   Does multiple 3-dim backward FFTs from real into Fourier space
   Adopt standard convention that isign=-1 for forward transform
   CALCULATES THE DISCRETE FOURIERTRANSFORM

   ZF(I1,I3,I2)=S_(j1,j2,j3) EXP(isign*i*2*pi*(j1*i1/n1+j2*i2/n2+j3*i3/n3)) ZR(j1,j2,j3)

   in parallel using MPI/OpenMP.

 INPUT:
   ZR: input array
        ZR(1,i1,i2,i3,idat)=real(R(i1,i2,i3,idat))
        ZR(2,i1,i2,i3,idat)=imag(R(i1,i2,i3,idat))
        i1=1,n1 , i2=1,n2 , i3=1,n3 , idat=1,ndat
   NOTE that ZR is changed by the routine

   n1,n2,n3: logical dimension of the transform. As transform lengths
             most products of the prime factors 2,3,5 are allowed.
             The detailed table with allowed transform lengths can
             be found in subroutine CTRIG
   nd1,nd2,nd3: Dimension of ZR
   nd3proc=((nd3-1)/nproc_fft)+1  maximal number of big box 3rd dim slices for one proc

 OUTPUT:
   ZF: output array (note the switch of i2 and i3)
        real(F(i1,i3,i2,idat))=ZF(1,i1,i3,i2,idat)
        imag(F(i1,i3,i2,idat))=ZF(2,i1,i3,i2,idat)
   max1 is positive or zero ; m1 >=max1+1
     i1= 1... max1+1 corresponds to positive and zero wavevectors 0 ... max1
     then, if m1 > max1+1, one has min1=max1-m1+1 and
     i1= max1+2 ... m1 corresponds to negative wavevectors min1 ... -1
     i2 and i3 have a similar definition of range
   idat=1,ndat
   md1,md2,md3: Dimension of ZF
   md2proc=((md2-1)/nproc_fft)+1  maximal number of small box 2nd dim slices for one proc
   nproc_fft: number of processors used as returned by MPI_COMM_SIZE
   me_fft: [0:nproc-1] rank of the processor in the FFT communicator.
   comm_fft=MPI communicator for parallel FFT.

NOTES

  The maximum number of processors that can reasonably be used is max(n2/2,n3/2)

  It is very important to find the optimal
  value of NCACHE. NCACHE determines the size of the work array ZW, that
  has to fit into cache. It has therefore to be chosen to equal roughly
   half the size of the physical cache in units of real*8 numbers.
  The optimal value of ncache can easily be determined by numerical
  experimentation. A too large value of ncache leads to a dramatic
  and sudden decrease of performance, a too small value to a to a
  slow and less dramatic decrease of performance. If NCACHE is set
  to a value so small, that not even a single one dimensional transform
  can be done in the workarray zw, the program stops with an error message.

PARENTS

      m_fft

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

7187 subroutine fftw3_mpiforw_manywf(cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3proc,&
7188 &        max1,max2,max3,m1,m2,m3,md1,md2proc,md3,zr,zf,comm_fft)
7189 
7190 
7191 !This section has been created automatically by the script Abilint (TD).
7192 !Do not modify the following lines by hand.
7193 #undef ABI_FUNC
7194 #define ABI_FUNC 'fftw3_mpiforw_manywf'
7195 !End of the abilint section
7196 
7197  implicit none
7198 
7199 !Arguments ------------------------------------
7200 !scalars
7201  integer,intent(in) :: cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3proc
7202  integer,intent(in) :: max1,max2,max3,m1,m2,m3,md1,md2proc,md3,comm_fft
7203 !arrays
7204  real(dp),intent(inout) :: zr(2,nd1,nd2,nd3proc,ndat)
7205  real(dp),intent(out) :: zf(2,md1,md3,md2proc,ndat)
7206 
7207 !Local variables-------------------------------
7208 !scalars
7209 #ifdef HAVE_FFT_FFTW3
7210  integer :: j,i1,i2,i3,idat,ierr,nproc_fft,me_fft
7211  integer :: ioption,j2,j3,j2st,jp2st,lot1,lot2,lot3,lzt,m1zt,ma,mb,n1dfft,nnd3
7212  integer :: m2eff,ncache,n1eff,n1half,i1inv,i2inv,i3inv,nthreads
7213  integer(KIND_FFTW_PLAN) :: fw_plan1_lot,fw_plan1_rest
7214  integer(KIND_FFTW_PLAN) :: fw_plan2_lot,fw_plan2_rest
7215  integer(KIND_FFTW_PLAN) :: fw_plan3_lot,fw_plan3_rest
7216  character(len=500) :: msg
7217 !arrays
7218  integer :: requests(ndat)
7219  real(dp) ABI_ASYNC, allocatable :: zmpi1(:,:,:,:,:),zmpi2(:,:,:,:,:) ! work arrays for MPI
7220  real(dp),allocatable :: zw(:,:),zt(:,:,:) ! cache work array and array for transpositions
7221 ! FFT work arrays
7222  real(dp) :: tsec(2)
7223 
7224 ! *************************************************************************
7225 
7226  ! FIXME must provide a default value but which one?
7227  !ioption = 0
7228  ioption = 1
7229  !if (paral_kgb==1) ioption=1
7230 
7231  nproc_fft = xmpi_comm_size(comm_fft); me_fft = xmpi_comm_rank(comm_fft)
7232 
7233  ! find cache size that gives optimal performance on machine
7234  ncache=2*max(n1,n2,n3,1024)
7235  !ncache=2*max(n1,n2,n3,16*1024)
7236 
7237  if (ncache/(2*max(n1,n2,n3))<1) then
7238    write(msg,'(5a)') &
7239 &    'ncache has to be enlarged to be able to hold at',ch10, &
7240 &    'least one 1-d FFT of each size even though this will',ch10,&
7241 &    'reduce the performance for shorter transform lengths'
7242    MSG_ERROR(msg)
7243  end if
7244 
7245  ! Effective m1 and m2 (complex-to-complex or real-to-complex)
7246  n1eff=n1; m2eff=m2; m1zt=n1
7247  if (cplexwf==1) then
7248    n1eff=(n1+1)/2; m2eff=m2/2+1; m1zt=2*(n1/2+1)
7249  end if
7250 
7251  lzt=m2eff
7252  if (mod(m2eff,2)==0) lzt=lzt+1
7253  if (mod(m2eff,4)==0) lzt=lzt+1
7254 
7255  ! maximal number of big box 3rd dim slices for all procs
7256  nnd3=nd3proc*nproc_fft
7257 
7258  ABI_MALLOC(zw,(2,ncache/2))
7259  ABI_MALLOC(zt,(2,lzt,m1zt))
7260  ABI_MALLOC(zmpi2,(2,md1,md2proc,nnd3,ndat))
7261  if (nproc_fft>1)  then
7262    ABI_MALLOC(zmpi1,(2,md1,md2proc,nnd3,ndat))
7263  end if
7264 
7265  ! Create plans.
7266  ! The prototype for sfftw_plan_many_dft is:
7267  ! sfftw_plan_many_dft(rank, n, howmany,
7268  !   fin,  iembed, istride, idist,
7269  !   fout, oembed, ostride, odist, isign, my_flags)
7270 
7271  lot2=ncache/(2*n2)
7272  lot1=ncache/(2*n1)
7273  lot3=ncache/(2*n3)
7274 
7275  nthreads = xomp_get_num_threads(open_parallel=.TRUE.)
7276  !nthreads = 1
7277 
7278  fw_plan3_lot = dplan_many_dft_2D(1, [n3], lot3, &
7279 &    zw, [ncache/2], lot3, 1,                          &
7280 &    zw, [ncache/2], lot3, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
7281 
7282  if (mod(m1, lot3) /= 0) then
7283    fw_plan3_rest = dplan_many_dft_2D(1, [n3], mod(m1, lot3), &
7284 &    zw, [ncache/2], lot3, 1,                                      &
7285 &    zw, [ncache/2], lot3, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
7286  end if
7287 
7288  fw_plan1_lot = dplan_many_dft_2D(1, [n1], lot1, &
7289 &    zt, [lzt, m1zt],   lzt,  1,                       &
7290 &    zw, [ncache/2], lot1, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
7291 
7292  if (mod(m2eff, lot1) /= 0) then
7293    fw_plan1_rest = dplan_many_dft_2D(1, [n1], mod(m2eff, lot1), &
7294 &    zt, [lzt, m1zt],   lzt, 1,                                       &
7295 &    zw, [ncache/2], lot1, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
7296  end if
7297 
7298  fw_plan2_lot = dplan_many_dft_2D(1, [n2], lot2, &
7299 &    zr, [nd1,nd2,nd3proc,ndat], nd1, 1,               &
7300 &    zw, [ncache/2], lot2, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
7301 
7302  if (mod(n1eff, lot2) /= 0) then
7303    fw_plan2_rest = dplan_many_dft_2D(1, [n2], mod(n1eff,lot2), &
7304 &    zr, [nd1,nd2,nd3proc,ndat], nd1, 1,                             &
7305 &    zw, [ncache/2], lot2, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
7306  end if
7307 
7308  do idat=1,ndat
7309    ! Loop over the z-planes treated by this node
7310    do j3=1,nd3proc
7311 
7312      if (me_fft*nd3proc+j3 <= n3) then
7313        Jp2st=1
7314        J2st=1
7315 
7316        ! Treat real wavefunctions.
7317        if (cplexwf==1) then
7318          n1half=n1/2
7319          do i2=1,n2
7320            do i1=1,n1half
7321              zr(1,i1,i2,j3,idat)=zr(1,2*i1-1,i2,j3,idat)
7322              zr(2,i1,i2,j3,idat)=zr(1,2*i1  ,i2,j3,idat)
7323            end do
7324          end do
7325          ! If odd
7326          if(n1half*2/=n1)then
7327            do i2=1,n2
7328              zr(1,n1eff,i2,j3,idat)=zr(1,n1,i2,j3,idat)
7329              zr(2,n1eff,i2,j3,idat)=zero
7330            end do
7331          end if
7332        end if
7333 
7334        ! transform along y axis
7335        ! input: R1,R2,R3,(Rp3)
7336        ! input: R1,G2,R3,(Rp3)
7337        do j=1,n1eff,lot2
7338          ma=j
7339          mb=min(j+(lot2-1),n1eff)
7340          n1dfft=mb-ma+1
7341 
7342          if (n1dfft == lot2) then
7343            call dfftw_execute_dft(fw_plan2_lot,  zr(1,j,1,j3,idat), zw)
7344          else
7345            call dfftw_execute_dft(fw_plan2_rest, zr(1,j,1,j3,idat), zw)
7346          end if
7347 
7348          ! input:  R1,G2,R3,(Rp3)
7349          ! output: G2,R1,R3,(Rp3)
7350          if (cplexwf==2) then
7351            call unswitch_cent(n1dfft,max2,m2,n2,lot2,n1,lzt,zw,zt(1,1,j))
7352          else
7353            call unswitchreal_cent(n1dfft,max2,n2,lot2,n1,lzt,zw,zt(1,1,2*j-1))
7354          end if
7355        end do
7356 
7357        ! transform along x axis
7358        ! input: G2,R1,R3,(Rp3)
7359        do j=1,m2eff,lot1
7360          ma=j
7361          mb=min(j+(lot1-1),m2eff)
7362          n1dfft=mb-ma+1
7363 
7364          if (n1dfft == lot1) then
7365            call dfftw_execute_dft(fw_plan1_lot,  zt(1,j,1), zw)
7366          else
7367            call dfftw_execute_dft(fw_plan1_rest, zt(1,j,1), zw)
7368          end if
7369          ! output: G2,G1,R3,(Rp3)
7370 
7371          ! input:  G2,G1,R3,Gp2,(Rp3)
7372          ! output: G1,G2,R3,Gp2,(Rp3)
7373          if (nproc_fft==1) then
7374            call unmpiswitch_cent(j3,n1dfft,Jp2st,J2st,lot1,max1,md1,m1,n1,&
7375 &            md2proc,nd3proc,nproc_fft,ioption,zw,zmpi2(:,:,:,:,idat))
7376          else
7377            call unmpiswitch_cent(j3,n1dfft,Jp2st,J2st,lot1,max1,md1,m1,n1,&
7378 &            md2proc,nd3proc,nproc_fft,ioption,zw,zmpi1(:,:,:,:,idat))
7379          end if
7380        end do
7381      end if
7382    end do ! j3
7383 
7384    ! Interprocessor data transposition
7385    ! input:  G1,G2,R3,Gp2,(Rp3)
7386    ! output: G1,G2,R3,Rp3,(Gp2)
7387    if (nproc_fft>1) then
7388      call timab(544,1,tsec)
7389      call xmpi_ialltoall(zmpi1(:,:,:,:,idat),2*md1*md2proc*nd3proc, &
7390 &                        zmpi2(:,:,:,:,idat),2*md1*md2proc*nd3proc,comm_fft,requests(idat))
7391      call timab(544,2,tsec)
7392    end if
7393  end do
7394 
7395  do idat=1,ndat
7396     if (nproc_fft>1) call xmpi_wait(requests(idat),ierr)
7397    ! transform along z axis
7398    ! input: G1,G2,R3,(Gp2)
7399 
7400    do j2=1,md2proc
7401      if (me_fft*md2proc+j2 <= m2eff) then
7402        ! write(std_out,*)' forwf_wf : before unscramble, j2,md2proc,me_fft,m2=',j2,md2proc,me_fft,m2
7403        do i1=1,m1,lot3
7404          ma=i1
7405          mb=min(i1+(lot3-1),m1)
7406          n1dfft=mb-ma+1
7407 
7408          ! input:  G1,G2,R3,(Gp2)
7409          ! output: G1,R3,G2,(Gp2)
7410          call unscramble(i1,j2,lot3,n1dfft,md1,n3,md2proc,nnd3,zmpi2(:,:,:,:,idat),zw)
7411 
7412          if (n1dfft == lot3) then
7413            call dfftw_execute_dft(fw_plan3_lot, zw, zw)
7414          else
7415            call dfftw_execute_dft(fw_plan3_rest, zw, zw)
7416          end if
7417 
7418          call unfill_cent(md1,md3,lot3,n1dfft,max3,m3,n3,zw,zf(1,i1,1,j2,idat))
7419          ! output: G1,G3,G2,(Gp2)
7420        end do
7421      end if
7422    end do
7423 
7424    if (cplexwf==1) then
7425      ! Complete missing values with complex conjugate
7426      ! Inverse of ix is located at nx+2-ix , except for ix=1, for which it is 1.
7427      do i3=1,m3
7428        i3inv=m3+2-i3
7429        if(i3==1)i3inv=1
7430 
7431        if (m2eff>1) then
7432          do i2=2,m2eff
7433            i2inv=m2+2-i2
7434            zf(1,1,i3inv,i2inv,idat)= zf(1,1,i3,i2,idat)
7435            zf(2,1,i3inv,i2inv,idat)=-zf(2,1,i3,i2,idat)
7436            do i1=2,m1
7437              i1inv=m1+2-i1
7438              zf(1,i1inv,i3inv,i2inv,idat)= zf(1,i1,i3,i2,idat)
7439              zf(2,i1inv,i3inv,i2inv,idat)=-zf(2,i1,i3,i2,idat)
7440            end do
7441          end do
7442        end if
7443      end do
7444    end if
7445 
7446  end do ! idat
7447 
7448  call dfftw_destroy_plan(fw_plan3_lot)
7449  if (mod(m1, lot3) /= 0) then
7450    call dfftw_destroy_plan(fw_plan3_rest)
7451  end if
7452 
7453  call dfftw_destroy_plan(fw_plan1_lot)
7454  if (mod(m2eff, lot1) /= 0) then
7455    call dfftw_destroy_plan(fw_plan1_rest)
7456  end if
7457 
7458  call dfftw_destroy_plan(fw_plan2_lot)
7459  if (mod(n1eff, lot2) /= 0) then
7460    call dfftw_destroy_plan(fw_plan2_rest)
7461  end if
7462 
7463  ABI_FREE(zmpi2)
7464  ABI_FREE(zw)
7465  ABI_FREE(zt)
7466  if (nproc_fft>1)  then
7467    ABI_FREE(zmpi1)
7468  end if
7469 
7470 #else
7471  MSG_ERROR("FFTW3 support not activated")
7472  ABI_UNUSED((/cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3proc/))
7473  ABI_UNUSED((/max1,max2,max3,m1,m2,m3,md1,md2proc,md3,comm_fft/))
7474  ABI_UNUSED((/zf(1,1,1,1,1),zr(1,1,1,1,1)/))
7475 #endif
7476 
7477 end subroutine fftw3_mpiforw_manywf

m_fftw3/fftw3_mpiforw_wf [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_mpiforw_wf

FUNCTION

   Does multiple 3-dim backward FFTs from real into Fourier space
   Adopt standard convention that isign=-1 for forward transform
   CALCULATES THE DISCRETE FOURIERTRANSFORM

   ZF(I1,I3,I2)=S_(j1,j2,j3) EXP(isign*i*2*pi*(j1*i1/n1+j2*i2/n2+j3*i3/n3)) ZR(j1,j2,j3)

   in parallel using MPI/OpenMP.

 INPUT:
   ZR: input array
        ZR(1,i1,i2,i3,idat)=real(R(i1,i2,i3,idat))
        ZR(2,i1,i2,i3,idat)=imag(R(i1,i2,i3,idat))
        i1=1,n1 , i2=1,n2 , i3=1,n3 , idat=1,ndat
   NOTE that ZR is changed by the routine

   n1,n2,n3: logical dimension of the transform. As transform lengths
             most products of the prime factors 2,3,5 are allowed.
             The detailed table with allowed transform lengths can
             be found in subroutine CTRIG
   nd1,nd2,nd3: Dimension of ZR
   nd3proc=((nd3-1)/nproc_fft)+1  maximal number of big box 3rd dim slices for one proc

 OUTPUT:
   ZF: output array (note the switch of i2 and i3)
        real(F(i1,i3,i2,idat))=ZF(1,i1,i3,i2,idat)
        imag(F(i1,i3,i2,idat))=ZF(2,i1,i3,i2,idat)
   max1 is positive or zero ; m1 >=max1+1
     i1= 1... max1+1 corresponds to positive and zero wavevectors 0 ... max1
     then, if m1 > max1+1, one has min1=max1-m1+1 and
     i1= max1+2 ... m1 corresponds to negative wavevectors min1 ... -1
     i2 and i3 have a similar definition of range
   idat=1,ndat
   md1,md2,md3: Dimension of ZF
   md2proc=((md2-1)/nproc_fft)+1  maximal number of small box 2nd dim slices for one proc
   nproc_fft: number of processors used as returned by MPI_COMM_SIZE
   me_fft: [0:nproc-1] rank of the processor in the FFT communicator.
   comm_fft=MPI communicator for parallel FFT.

NOTES

  The maximum number of processors that can reasonably be used is max(n2/2,n3/2)

  It is very important to find the optimal
  value of NCACHE. NCACHE determines the size of the work array ZW, that
  has to fit into cache. It has therefore to be chosen to equal roughly
   half the size of the physical cache in units of real*8 numbers.
  The optimal value of ncache can easily be determined by numerical
  experimentation. A too large value of ncache leads to a dramatic
  and sudden decrease of performance, a too small value to a to a
  slow and less dramatic decrease of performance. If NCACHE is set
  to a value so small, that not even a single one dimensional transform
  can be done in the workarray zw, the program stops with an error message.

PARENTS

      m_fft

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

4939 subroutine fftw3_mpiforw_wf(cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3proc,&
4940 &        max1,max2,max3,m1,m2,m3,md1,md2proc,md3,zr,zf,comm_fft)
4941 
4942 
4943 !This section has been created automatically by the script Abilint (TD).
4944 !Do not modify the following lines by hand.
4945 #undef ABI_FUNC
4946 #define ABI_FUNC 'fftw3_mpiforw_wf'
4947 !End of the abilint section
4948 
4949  implicit none
4950 
4951 !Arguments ------------------------------------
4952 !scalars
4953  integer,intent(in) :: cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3proc
4954  integer,intent(in) :: max1,max2,max3,m1,m2,m3,md1,md2proc,md3,comm_fft
4955 !arrays
4956  real(dp),intent(inout) :: zr(2,nd1,nd2,nd3proc,ndat)
4957  real(dp),intent(out) :: zf(2,md1,md3,md2proc,ndat)
4958 
4959 !Local variables-------------------------------
4960 !scalars
4961 #ifdef HAVE_FFT_FFTW3
4962  integer :: j,i1,i2,i3,idat,ierr,nproc_fft,me_fft,nthreads
4963  integer :: ioption,j2,j3,j2st,jp2st,lot1,lot2,lot3,lzt,m1zt,ma,mb,n1dfft,nnd3
4964  integer :: m2eff,ncache,n1eff,n1half,i1inv,i2inv,i3inv
4965  integer(KIND_FFTW_PLAN) :: fw_plan1_lot,fw_plan1_rest
4966  integer(KIND_FFTW_PLAN) :: fw_plan2_lot,fw_plan2_rest
4967  integer(KIND_FFTW_PLAN) :: fw_plan3_lot,fw_plan3_rest
4968  character(len=500) :: msg
4969 !arrays
4970  real(dp), allocatable :: zmpi1(:,:,:,:),zmpi2(:,:,:,:) ! work arrays for MPI
4971  real(dp),allocatable :: zw(:,:),zt(:,:,:) ! cache work array and array for transpositions
4972 ! FFT work arrays
4973  real(dp) :: tsec(2)
4974 
4975 ! *************************************************************************
4976 
4977  ! FIXME must provide a default value but which one?
4978  !ioption = 0
4979  ioption = 1
4980  !if (paral_kgb==1) ioption=1
4981 
4982  nproc_fft = xmpi_comm_size(comm_fft); me_fft = xmpi_comm_rank(comm_fft)
4983 
4984  ! find cache size that gives optimal performance on machine
4985  ncache=2*max(n1,n2,n3,1024)
4986  !ncache=2*max(n1,n2,n3,16*1024)
4987 
4988  if (ncache/(2*max(n1,n2,n3))<1) then
4989    write(msg,'(5a)') &
4990 &    'ncache has to be enlarged to be able to hold at',ch10, &
4991 &    'least one 1-d FFT of each size even though this will',ch10,&
4992 &    'reduce the performance for shorter transform lengths'
4993    MSG_ERROR(msg)
4994  end if
4995 
4996  ! Effective m1 and m2 (complex-to-complex or real-to-complex)
4997  n1eff=n1; m2eff=m2; m1zt=n1
4998  if (cplexwf==1) then
4999    n1eff=(n1+1)/2; m2eff=m2/2+1; m1zt=2*(n1/2+1)
5000  end if
5001 
5002  lzt=m2eff
5003  if (mod(m2eff,2)==0) lzt=lzt+1
5004  if (mod(m2eff,4)==0) lzt=lzt+1
5005 
5006  ! maximal number of big box 3rd dim slices for all procs
5007  nnd3=nd3proc*nproc_fft
5008 
5009  ABI_MALLOC(zw,(2,ncache/2))
5010  ABI_MALLOC(zt,(2,lzt,m1zt))
5011  ABI_MALLOC(zmpi2,(2,md1,md2proc,nnd3))
5012  if (nproc_fft>1)  then
5013    ABI_MALLOC(zmpi1,(2,md1,md2proc,nnd3))
5014  end if
5015 
5016 !DEBUG
5017 ! write(std_out,'(2a,3i4)' )itoa(me_fft),'fftw3_mpiforw_wf, enter, i1,i2,i3,zr,n1,n2,n3',n1,n2,n3
5018 ! write(std_out,'(2a,3i4)' )itoa(me_fft),'nd1,nd2,nd3proc',nd1,nd2,nd3proc
5019 ! write(std_out,'(2a,3i4)' )itoa(me_fft),'m1,m2,m3',m1,m2,m3
5020 ! write(std_out,'(2a,3i4)' )itoa(me_fft),'max1,max2,max3',max1,max2,max3
5021 ! write(std_out,'(2a,3i4)' )itoa(me_fft),'md1,md2proc,md3',md1,md2proc,md3
5022 ! write(std_out,'(2a,3i4)' )itoa(me_fft),'n1eff,m2eff,m1zt',n1eff,m2eff,m1zt
5023 !ENDDEBUG
5024 
5025  ! Create plans.
5026  ! The prototype for sfftw_plan_many_dft is:
5027  ! sfftw_plan_many_dft(rank, n, howmany,
5028  !   fin,  iembed, istride, idist,
5029  !   fout, oembed, ostride, odist, isign, my_flags)
5030 
5031  lot2=ncache/(2*n2)
5032  lot1=ncache/(2*n1)
5033  lot3=ncache/(2*n3)
5034 
5035  nthreads = xomp_get_num_threads(open_parallel=.TRUE.)
5036  !nthreads = 1
5037 
5038  fw_plan3_lot = dplan_many_dft_2D(1, [n3], lot3, &
5039 &    zw, [ncache/2], lot3, 1,                          &
5040 &    zw, [ncache/2], lot3, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE,nthreads)
5041 
5042  if (mod(m1, lot3) /= 0) then
5043    fw_plan3_rest = dplan_many_dft_2D(1, [n3], mod(m1, lot3), &
5044 &    zw, [ncache/2], lot3, 1,                                      &
5045 &    zw, [ncache/2], lot3, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
5046  end if
5047 
5048  fw_plan1_lot = dplan_many_dft_2D(1, [n1], lot1, &
5049 &    zt, [lzt, m1zt],   lzt,  1,                       &
5050 &    zw, [ncache/2], lot1, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
5051 
5052  if (mod(m2eff, lot1) /= 0) then
5053    fw_plan1_rest = dplan_many_dft_2D(1, [n1], mod(m2eff, lot1), &
5054 &    zt, [lzt, m1zt],   lzt, 1,                                       &
5055 &    zw, [ncache/2], lot1, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
5056  end if
5057 
5058  fw_plan2_lot = dplan_many_dft_2D(1, [n2], lot2, &
5059 &    zr, [nd1,nd2,nd3proc,ndat], nd1, 1,               &
5060 &    zw, [ncache/2], lot2, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
5061 
5062  if (mod(n1eff, lot2) /= 0) then
5063    fw_plan2_rest = dplan_many_dft_2D(1, [n2], mod(n1eff,lot2), &
5064 &    zr, [nd1,nd2,nd3proc,ndat], nd1, 1,                             &
5065 &    zw, [ncache/2], lot2, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
5066  end if
5067 
5068  do idat=1,ndat
5069    ! Loop over the z-planes treated by this node
5070    do j3=1,nd3proc
5071 
5072      if (me_fft*nd3proc+j3 <= n3) then
5073        Jp2st=1
5074        J2st=1
5075 
5076        ! Treat real wavefunctions.
5077        if (cplexwf==1) then
5078          n1half=n1/2
5079          do i2=1,n2
5080            do i1=1,n1half
5081              zr(1,i1,i2,j3,idat)=zr(1,2*i1-1,i2,j3,idat)
5082              zr(2,i1,i2,j3,idat)=zr(1,2*i1  ,i2,j3,idat)
5083            end do
5084          end do
5085          ! If odd
5086          if(n1half*2/=n1)then
5087            do i2=1,n2
5088              zr(1,n1eff,i2,j3,idat)=zr(1,n1,i2,j3,idat)
5089              zr(2,n1eff,i2,j3,idat)=zero
5090            end do
5091          end if
5092        end if
5093 
5094        ! transform along y axis
5095        ! input: R1,R2,R3,(Rp3)
5096        ! input: R1,G2,R3,(Rp3)
5097        do j=1,n1eff,lot2
5098          ma=j
5099          mb=min(j+(lot2-1),n1eff)
5100          n1dfft=mb-ma+1
5101 
5102          if (n1dfft == lot2) then
5103            call dfftw_execute_dft(fw_plan2_lot,  zr(1,j,1,j3,idat), zw)
5104          else
5105            call dfftw_execute_dft(fw_plan2_rest, zr(1,j,1,j3,idat), zw)
5106          end if
5107 
5108          ! input:  R1,G2,R3,(Rp3)
5109          ! output: G2,R1,R3,(Rp3)
5110          if (cplexwf==2) then
5111            call unswitch_cent(n1dfft,max2,m2,n2,lot2,n1,lzt,zw,zt(1,1,j))
5112          else
5113            call unswitchreal_cent(n1dfft,max2,n2,lot2,n1,lzt,zw,zt(1,1,2*j-1))
5114          end if
5115        end do
5116 
5117        ! transform along x axis
5118        ! input: G2,R1,R3,(Rp3)
5119        do j=1,m2eff,lot1
5120          ma=j
5121          mb=min(j+(lot1-1),m2eff)
5122          n1dfft=mb-ma+1
5123 
5124          if (n1dfft == lot1) then
5125            call dfftw_execute_dft(fw_plan1_lot,  zt(1,j,1), zw)
5126          else
5127            call dfftw_execute_dft(fw_plan1_rest, zt(1,j,1), zw)
5128          end if
5129          ! output: G2,G1,R3,(Rp3)
5130 
5131          ! input:  G2,G1,R3,Gp2,(Rp3)
5132          ! output: G1,G2,R3,Gp2,(Rp3)
5133          if (nproc_fft==1) then
5134            call unmpiswitch_cent(j3,n1dfft,Jp2st,J2st,lot1,max1,md1,m1,n1,&
5135 &            md2proc,nd3proc,nproc_fft,ioption,zw,zmpi2)
5136          else
5137            call unmpiswitch_cent(j3,n1dfft,Jp2st,J2st,lot1,max1,md1,m1,n1,&
5138 &            md2proc,nd3proc,nproc_fft,ioption,zw,zmpi1)
5139          end if
5140        end do
5141 
5142       end if
5143     end do ! j3
5144 
5145     ! Interprocessor data transposition
5146     ! input:  G1,G2,R3,Gp2,(Rp3)
5147     ! output: G1,G2,R3,Rp3,(Gp2)
5148     if (nproc_fft>1) then
5149       call timab(544,1,tsec)
5150       call xmpi_alltoall(zmpi1,2*md1*md2proc*nd3proc, &
5151 &                        zmpi2,2*md1*md2proc*nd3proc,comm_fft,ierr)
5152       call timab(544,2,tsec)
5153     end if
5154 
5155     ! transform along z axis
5156     ! input: G1,G2,R3,(Gp2)
5157 
5158     do j2=1,md2proc
5159       if (me_fft*md2proc+j2 <= m2eff) then
5160         ! write(std_out,*)' forwf_wf : before unscramble, j2,md2proc,me_fft,m2=',j2,md2proc,me_fft,m2
5161         do i1=1,m1,lot3
5162           ma=i1
5163           mb=min(i1+(lot3-1),m1)
5164           n1dfft=mb-ma+1
5165 
5166           ! input:  G1,G2,R3,(Gp2)
5167           ! output: G1,R3,G2,(Gp2)
5168           call unscramble(i1,j2,lot3,n1dfft,md1,n3,md2proc,nnd3,zmpi2,zw)
5169 
5170           if (n1dfft == lot3) then
5171             call dfftw_execute_dft(fw_plan3_lot, zw, zw)
5172           else
5173             call dfftw_execute_dft(fw_plan3_rest, zw, zw)
5174           end if
5175 
5176           call unfill_cent(md1,md3,lot3,n1dfft,max3,m3,n3,zw,zf(1,i1,1,j2,idat))
5177           ! output: G1,G3,G2,(Gp2)
5178         end do
5179       end if
5180     end do
5181 
5182     if (cplexwf==1) then
5183       ! Complete missing values with complex conjugate
5184       ! Inverse of ix is located at nx+2-ix , except for ix=1, for which it is 1.
5185       do i3=1,m3
5186         i3inv=m3+2-i3
5187         if(i3==1)i3inv=1
5188 
5189         if (m2eff>1) then
5190           do i2=2,m2eff
5191             i2inv=m2+2-i2
5192             zf(1,1,i3inv,i2inv,idat)= zf(1,1,i3,i2,idat)
5193             zf(2,1,i3inv,i2inv,idat)=-zf(2,1,i3,i2,idat)
5194             do i1=2,m1
5195               i1inv=m1+2-i1
5196               zf(1,i1inv,i3inv,i2inv,idat)= zf(1,i1,i3,i2,idat)
5197               zf(2,i1inv,i3inv,i2inv,idat)=-zf(2,i1,i3,i2,idat)
5198             end do
5199           end do
5200         end if
5201       end do
5202     end if
5203 
5204  end do ! idat
5205 
5206  call dfftw_destroy_plan(fw_plan3_lot)
5207  if (mod(m1, lot3) /= 0) then
5208    call dfftw_destroy_plan(fw_plan3_rest)
5209  end if
5210 
5211  call dfftw_destroy_plan(fw_plan1_lot)
5212  if (mod(m2eff, lot1) /= 0) then
5213    call dfftw_destroy_plan(fw_plan1_rest)
5214  end if
5215 
5216  call dfftw_destroy_plan(fw_plan2_lot)
5217  if (mod(n1eff, lot2) /= 0) then
5218    call dfftw_destroy_plan(fw_plan2_rest)
5219  end if
5220 
5221  ABI_FREE(zmpi2)
5222  ABI_FREE(zw)
5223  ABI_FREE(zt)
5224  if (nproc_fft>1)  then
5225    ABI_FREE(zmpi1)
5226  end if
5227 
5228 #else
5229  MSG_ERROR("FFTW3 support not activated")
5230  ABI_UNUSED((/cplexwf,ndat,n1,n2,n3,nd1,nd2,nd3proc/))
5231  ABI_UNUSED((/max1,max2,max3,m1,m2,m3,md1,md2proc,md3,comm_fft/))
5232  ABI_UNUSED((/zf(1,1,1,1,1),zr(1,1,1,1,1)/))
5233 #endif
5234 
5235 end subroutine fftw3_mpiforw_wf

m_fftw3/fftw3_mpifourdp_c2c [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_mpifourdp_c2c

FUNCTION

 Driver routine for many out-of-place 3D complex-to-complex FFTs of lengths n1, n2, n3.

INPUTS

 cplex=1 if fofr is real, 2 if fofr is complex
 nfft=(effective) number of FFT grid points (for this processor)
 ndat=Number of FFTs to be done.
 isign=sign of Fourier transform exponent: current convention uses
   +1 for transforming from G to r,
   -1 for transforming from r to G.
 fftn2_distrib(n2)=  rank of the processor which own fft planes in 2nd dimension for fourdp
 ffti2_local(n2) = local i2 indices in fourdp
 fftn3_distrib(n3) = rank of the processor which own fft planes in 3rd dimension for fourdp
 ffti3_local(n3) = local i3 indices in fourdp
 comm_fft=MPI communicator for the FFT
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator.
 fin(2*ldx*ldy*ldz*ndat)=The complex array to be transformed.

TODO

   Add c2r and r2c version.

SIDE EFFECTS

 Input/Output
 fofg(2,nfft*ndat)=f(G), complex.
 fofr(cplex*nfft*ndat)=input function f(r) (real or complex)

PARENTS

      m_fftw3

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

4385 subroutine fftw3_mpifourdp_c2c(cplex,nfft,ngfft,ndat,isign,&
4386 &  fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft,fftw_flags)
4387 
4388 
4389 !This section has been created automatically by the script Abilint (TD).
4390 !Do not modify the following lines by hand.
4391 #undef ABI_FUNC
4392 #define ABI_FUNC 'fftw3_mpifourdp_c2c'
4393 !End of the abilint section
4394 
4395  implicit none
4396 
4397 !Arguments ------------------------------------
4398 !scalars
4399  integer,intent(in) :: cplex,isign,nfft,ndat,comm_fft
4400  integer,optional,intent(in) :: fftw_flags
4401 !arrays
4402  integer,intent(in) :: ngfft(18)
4403  integer,intent(in) :: fftn2_distrib(ngfft(2)),ffti2_local(ngfft(2))
4404  integer,intent(in) :: fftn3_distrib(ngfft(3)),ffti3_local(ngfft(3))
4405  real(dp),intent(inout) :: fofg(2,nfft*ndat),fofr(cplex*nfft*ndat)
4406 
4407 #ifdef HAVE_FFT_FFTW3_MPI
4408 !Local variables-------------------------------
4409 !scalars
4410  integer,parameter :: rank3=3
4411  integer :: n1,n2,n3,n4,n5,n6,nd2proc,nd3proc,my_flags,me_fft,nproc_fft
4412  integer(C_INTPTR_T) :: alloc_local,local_n0,local_0_start,local_n1,local_1_start
4413  type(C_PTR) :: plan,cptr_cdata
4414 !arrays
4415  integer(C_INTPTR_T) :: fft_sizes(4)
4416  complex(C_DOUBLE_COMPLEX), ABI_CONTIGUOUS pointer :: f03_cdata(:)
4417 
4418 !*************************************************************************
4419 
4420  my_flags=ABI_FFTW_ESTIMATE; if (PRESENT(fftw_flags)) my_flags= fftw_flags
4421 
4422  n1=ngfft(1); n2=ngfft(2); n3=ngfft(3)
4423  ! No augmentation as FFTW3 does not support it
4424  n4=n1; n5=n2; n6=n3
4425  me_fft=ngfft(11); nproc_fft=ngfft(10)
4426 
4427  nd2proc=((n2-1)/nproc_fft) +1
4428  nd3proc=((n6-1)/nproc_fft) +1
4429 
4430  ! Get local data size and allocate (note dimension reversal, we call the C interface directly!)
4431  fft_sizes = [n3,n2,n1,ndat]
4432 
4433  ! Use TRANSPOSED_OUT
4434  my_flags = ior(ABI_FFTW_ESTIMATE, ABI_FFTW_MPI_TRANSPOSED_OUT)
4435 
4436  if (isign == ABI_FFTW_BACKWARD) then
4437    ! G --> R, Exchange n2 and n3
4438    fft_sizes = [n2,n3,n1,ndat]
4439    !my_flags = ior(ABI_FFTW_ESTIMATE, ABI_FFTW_MPI_TRANSPOSED_IN)
4440  end if
4441 
4442  alloc_local = fftw_mpi_local_size_many_transposed(&
4443 &      rank3,fft_sizes(1:3),fft_sizes(4), &
4444 &      FFTW_MPI_DEFAULT_BLOCK,FFTW_MPI_DEFAULT_BLOCK, comm_fft, &
4445 &      local_n0,local_0_start, &
4446 &      local_n1,local_1_start)
4447 
4448  ! C to F
4449  !local_0_start = local_0_start + 1
4450  !local_1_start = local_1_start + 1
4451  !write(std_out,*)"local_n0,local_0_start,alloc_local",local_n0,local_0_start,alloc_local
4452  !write(std_out,*)"local_n1,local_1_start,alloc_local",local_n1,local_1_start,alloc_local
4453 
4454  ! Allocate cptr_cdata, associate to F pointer and build the plane.
4455  cptr_cdata = fftw_alloc_complex(alloc_local)
4456 
4457  call c_f_pointer(cptr_cdata, f03_cdata, [alloc_local])
4458 
4459  plan = fftw_mpi_plan_many_dft(rank3,fft_sizes(1:3),fft_sizes(4), &
4460 &                              FFTW_MPI_DEFAULT_BLOCK,FFTW_MPI_DEFAULT_BLOCK, &
4461 &                              f03_cdata,f03_cdata,comm_fft,isign,my_flags)
4462 
4463  select case (isign)
4464  case (ABI_FFTW_BACKWARD)
4465      ! G --> R
4466      ABI_CHECK(local_n0 == nd2proc, "local_n0 != nd2proc")
4467 
4468      call mpifft_fg2dbox_dpc(nfft,ndat,fofg,n1,n2,n3,n4,nd2proc,n6,fftn2_distrib,ffti2_local,me_fft,f03_cdata)
4469 
4470      ! Compute transform.
4471      call fftw_mpi_execute_dft(plan, f03_cdata, f03_cdata)
4472 
4473      call mpifft_dbox2fr_dpc(n1,n2,n3,n4,n5,nd3proc,ndat,fftn3_distrib,ffti3_local,me_fft,f03_cdata,cplex,nfft,fofr)
4474 
4475  case (ABI_FFTW_FORWARD)
4476      ! R --> G
4477      ABI_CHECK(local_n0 == nd3proc, "local_n0 != nd3proc")
4478 
4479      call mpifft_fr2dbox_dpc(cplex,nfft,ndat,fofr,n1,n2,n3,n4,n5,nd3proc,fftn3_distrib,ffti3_local,me_fft,f03_cdata)
4480 
4481      ! Compute transform.
4482      call fftw_mpi_execute_dft(plan, f03_cdata, f03_cdata)
4483 
4484      ! Scale results.
4485      call mpifft_dbox2fg_dpc(n1,n2,n3,n4,nd2proc,n6,ndat,fftn2_distrib,ffti2_local,me_fft,f03_cdata,nfft,fofg)
4486 
4487  case default
4488    MSG_ERROR("Wrong sign")
4489  end select
4490 
4491  call fftw_destroy_plan(plan)
4492  call fftw_free(cptr_cdata)
4493 
4494 #else
4495  MSG_ERROR("FFTW3_MPI support not activated")
4496  ABI_UNUSED((/cplex,nfft,ngfft(1),ndat,isign,comm_fft/))
4497  ABI_UNUSED((/fftn2_distrib(1),ffti2_local(1)/))
4498  ABI_UNUSED((/fftn3_distrib(1),ffti3_local(1)/))
4499  if (PRESENT(fftw_flags)) then
4500     ABI_UNUSED(fftw_flags)
4501  end if
4502  ABI_UNUSED(fofg(1,1))
4503  ABI_UNUSED(fofr(1))
4504 #endif
4505 
4506 end subroutine fftw3_mpifourdp_c2c

m_fftw3/fftw3_mpifourdp_c2r [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_mpifourdp_c2r

FUNCTION

 Driver routine for transposed out-of-place 3D complex-to-real FFT of lengths nx, ny, nz.

INPUTS

 nx,ny,nz=Number of point along the three directions.
 ndat=Number of FFTs to be done.
 fofg(2,nx*ny*nz*ndat)=The complex array to be transformed.
 comm_fft=MPI communicator.
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator.
   Defaults to ABI_FFTW_ESTIMATE.

OUTPUT

 fofr(2,nx*ny*nz*ndat)=The backwards real FFT of ff.

NOTES

 LOCAL DATA IN FOURIER SPACE : TRANSPOSED ORDER
 real space     --> dim = [  nx  | ny | nz/np_fft]
 fourier  space --> dim = [ nx/2 | nz | ny/np_ff ]

PARENTS

      m_fftw3

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

3944 subroutine fftw3_mpifourdp_c2r(nfft,ngfft,ndat,&
3945   fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft,fftw_flags)
3946 
3947 
3948 !This section has been created automatically by the script Abilint (TD).
3949 !Do not modify the following lines by hand.
3950 #undef ABI_FUNC
3951 #define ABI_FUNC 'fftw3_mpifourdp_c2r'
3952 !End of the abilint section
3953 
3954  implicit none
3955 
3956 !Arguments ------------------------------------
3957 !scalars
3958  integer,intent(in) :: nfft,ndat,comm_fft
3959  integer,optional,intent(in) :: fftw_flags
3960 !arrays
3961  integer,intent(in) :: ngfft(18)
3962  integer,intent(in) :: fftn2_distrib(ngfft(2)),ffti2_local(ngfft(2))
3963  integer,intent(in) :: fftn3_distrib(ngfft(3)),ffti3_local(ngfft(3))
3964  real(dp),intent(in) :: fofg(2,nfft*ndat)
3965  real(dp),intent(out) :: fofr(nfft*ndat)
3966 
3967 !Local variables-------------------------------
3968 #ifdef HAVE_FFT_FFTW3_MPI
3969 !scalars
3970  integer,parameter :: rank3=3
3971  integer :: nx,ny,nz,nproc_fft
3972  type(C_PTR) :: plan_bw, cdata_cplx,cdata_real
3973  integer(C_INTPTR_T) :: i,j,jdat,k,alloc_local,fft_sizes(4),demi_nx,base,idat,kdat
3974  integer(C_INTPTR_T) :: local_n0, local_0_start, local_n1, local_1_start
3975 !arrays
3976  complex(C_DOUBLE_COMPLEX), ABI_CONTIGUOUS pointer :: data_cplx(:,:,:)
3977  real(C_DOUBLE), ABI_CONTIGUOUS pointer :: data_real(:,:,:)
3978 
3979 ! *************************************************************************
3980 
3981  !ABI_CHECK(ndat==1, "ndat > 1 not implemented yet")
3982 
3983  nx=ngfft(1); ny=ngfft(2); nz=ngfft(3)
3984  nproc_fft = xmpi_comm_size(comm_fft)
3985 
3986  demi_nx = nx/2 + 1
3987  fft_sizes(1)=nz
3988  fft_sizes(2)=ny
3989  fft_sizes(3)=demi_nx
3990  fft_sizes(4)=ndat
3991 
3992  alloc_local = fftw_mpi_local_size_many_transposed(&
3993 &      rank3,fft_sizes(1:3),fft_sizes(4), &
3994 &      FFTW_MPI_DEFAULT_BLOCK,FFTW_MPI_DEFAULT_BLOCK, comm_fft, &
3995 &      local_n0,local_0_start, &
3996 &      local_n1,local_1_start)
3997 
3998  cdata_cplx = fftw_alloc_complex(alloc_local)
3999  cdata_real = fftw_alloc_real(alloc_local*2)
4000 
4001 ! OLD BY FDHAM
4002  ! dimensions are  (x/2,z,y) in Fourier's Space
4003  call c_f_pointer(cdata_cplx, data_cplx, [demi_nx  ,fft_sizes(1),local_n1])
4004  ! dimensions in real space : (nx,ny,nz/nproc)
4005  call c_f_pointer(cdata_real, data_real, [2*demi_nx,fft_sizes(2),local_n0])
4006 
4007  ! dimensions are  (x/2,z,y) in Fourier's Space
4008  !call c_f_pointer(cdata_cplx, data_cplx, [demi_nx  ,fft_sizes(1),local_n0])
4009 
4010  !! dimensions in real space : (nx,ny,nz/nproc)
4011  !call c_f_pointer(cdata_real, data_real, [2*demi_nx,fft_sizes(2),local_n1])
4012 
4013  fft_sizes(3)=nx
4014  plan_bw =  fftw_mpi_plan_many_dft_c2r(&
4015 &      rank3,fft_sizes(1:3),fft_sizes(4), &
4016 &      FFTW_MPI_DEFAULT_BLOCK,FFTW_MPI_DEFAULT_BLOCK, &
4017 &      data_cplx, data_real , &
4018 &      comm_fft,ior(ABI_FFTW_ESTIMATE,ABI_FFTW_MPI_TRANSPOSED_IN))
4019 
4020  do idat=1,ndat
4021    do k=1, nz
4022      do j=1, ny / nproc_fft
4023        jdat = j + (idat-1) * ny / nproc_fft
4024        base = nx*((j-1) + (ny/nproc_fft)*(k-1)) + (idat-1) * nfft
4025        do i=1, demi_nx
4026          data_cplx(i,k,jdat) = CMPLX(fofg(1, i + base), fofg(2, i + base), kind=C_DOUBLE_COMPLEX)
4027        end do
4028      end do
4029    end do
4030  end do
4031 
4032  ! compute transform (as many times as desired)
4033  call fftw_mpi_execute_dft_c2r(plan_bw, data_cplx, data_real)
4034 
4035  do idat=1,ndat
4036    do k=1,local_n0
4037      kdat = k + (idat - 1) * local_n0
4038      do j=1,ny
4039        base = nx*((j-1) + ny*(k-1)) + (idat - 1) * nfft
4040        do i=1,nx
4041          fofr(i+base) = data_real(i,j,kdat)
4042        end do
4043      end do
4044    end do
4045  end do
4046 
4047  call fftw_destroy_plan(plan_bw)
4048  call fftw_free(cdata_cplx)
4049  call fftw_free(cdata_real)
4050 
4051 #else
4052  MSG_ERROR("FFTW3_MPI support not activated")
4053  ABI_UNUSED((/nfft,ngfft(1),ndat,comm_fft/))
4054  ABI_UNUSED((/fftn2_distrib(1),ffti2_local(1)/))
4055  ABI_UNUSED((/fftn3_distrib(1),ffti3_local(1)/))
4056  if (PRESENT(fftw_flags)) then
4057     ABI_UNUSED(fftw_flags)
4058  end if
4059  ABI_UNUSED(fofg(1,1))
4060  ABI_UNUSED(fofr(1))
4061 #endif
4062 
4063 end subroutine fftw3_mpifourdp_c2r

m_fftw3/fftw3_mpifourdp_r2c [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_mpifourdp_r2c

FUNCTION

 Driver routine for out-of-place 3D real-to-complex FFT of lengths nx, ny, nz.

INPUTS

 fofr(nx*ny*nz*ndat)=The real array to be transformed.
 ndat=Number of FFTs to be done.
 comm_fft=MPI communicator for the FFT.
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator.
   Defaults to ABI_FFTW_ESTIMATE.

OUTPUT

 fofg(2,nx*ny*nz*ndat)=The forward FFT of ff.

NOTES

 LOCAL DATA FOR FOURIER TRANSFORMS : TRANSPOSED ORDER AND DISTRIBUTED
 real space     --> dim = [  nx  | ny | nz/np_fft ]
 fourier  space --> dim = [  nx | nz | ny/np_fft ]
 we can't take in account the symetric of the real case because after
 fft have been computed, the symetric data needed are dispatched over
 other process in parallel

PARENTS

      m_fftw3

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

4102 subroutine fftw3_mpifourdp_r2c(nfft,ngfft,ndat,&
4103   fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft,fftw_flags)
4104 
4105 
4106 !This section has been created automatically by the script Abilint (TD).
4107 !Do not modify the following lines by hand.
4108 #undef ABI_FUNC
4109 #define ABI_FUNC 'fftw3_mpifourdp_r2c'
4110 !End of the abilint section
4111 
4112  implicit none
4113 
4114 !Arguments ------------------------------------
4115 !scalars
4116  integer,intent(in) :: nfft,ndat,comm_fft
4117  integer,optional,intent(in) :: fftw_flags
4118 !arrays
4119  integer,intent(in) :: ngfft(18)
4120  integer,intent(in) :: fftn2_distrib(ngfft(2)),ffti2_local(ngfft(2))
4121  integer,intent(in) :: fftn3_distrib(ngfft(3)),ffti3_local(ngfft(3))
4122  real(dp),intent(in) :: fofr(nfft*ndat)
4123  real(dp),intent(out) :: fofg(2,nfft*ndat)
4124 
4125 !Local variables-------------------------------
4126 #ifdef HAVE_FFT_FFTW3_MPI
4127  !scalars
4128  integer,parameter :: rank3=3
4129  integer :: my_flags,nproc_fft,nx,ny,nz
4130  integer(C_INTPTR_T) :: i,j,k,base,alloc_local,i1,i2,i3,igf,idat,kdat,i2dat,padatf
4131  integer(C_INTPTR_T) :: local_n0,local_0_start,local_n1,local_1_start
4132  real(dp) :: factor_fft
4133  type(C_PTR) :: plan_fw,cdata_cplx,cdata_real
4134 !arrays
4135  complex(C_DOUBLE_COMPLEX), ABI_CONTIGUOUS pointer :: data_cplx(:,:,:),data_real(:,:,:)
4136  integer(C_INTPTR_T) :: fft_sizes(4)
4137 
4138 ! *************************************************************************
4139 
4140  nproc_fft = xmpi_comm_size(comm_fft)
4141 
4142  nx=ngfft(1); ny=ngfft(2); nz=ngfft(3)
4143 
4144  my_flags=ABI_FFTW_ESTIMATE; if (PRESENT(fftw_flags)) my_flags= fftw_flags
4145 
4146  fft_sizes(1)=nz
4147  fft_sizes(2)=ny
4148  fft_sizes(3)=nx
4149  fft_sizes(4)=ndat
4150 
4151  ! Get parallel sizes
4152  alloc_local = fftw_mpi_local_size_many_transposed(&
4153 &      rank3,fft_sizes(1:3),fft_sizes(4), &
4154 &      FFTW_MPI_DEFAULT_BLOCK,FFTW_MPI_DEFAULT_BLOCK, comm_fft, &
4155 &      local_n0,local_0_start, &
4156 &      local_n1,local_1_start)
4157 
4158  ! Allocate data and reference it
4159 
4160  ! local data in real space     --> dim = [nx | ny | nz/nproc_fft]
4161  cdata_real = fftw_alloc_complex(alloc_local)
4162  call c_f_pointer(cdata_real, data_real, [fft_sizes(3),fft_sizes(2),local_n0])
4163 
4164  ! local data in Fourier space --> dim = [nx | nz | ny/nproc_fft]
4165  cdata_cplx = fftw_alloc_complex(alloc_local)
4166  call c_f_pointer(cdata_cplx, data_cplx, [fft_sizes(3),fft_sizes(1),local_n1])
4167 
4168  ! TODO: Use true real to complex API!
4169  ! Create Plan C2C (nx,ny,nz)
4170  plan_fw =  fftw_mpi_plan_many_dft(&
4171 &      rank3,fft_sizes(1:3),fft_sizes(4), &
4172 &      FFTW_MPI_DEFAULT_BLOCK,FFTW_MPI_DEFAULT_BLOCK, &
4173 &      data_real, data_cplx , &
4174 &      comm_fft,ABI_FFTW_FORWARD,ior(ABI_FFTW_ESTIMATE,ABI_FFTW_MPI_TRANSPOSED_OUT))
4175 
4176  ! Copy input data in correct format
4177  do idat=1,ndat
4178    do k=1,local_n0
4179      kdat = k + (idat-1) * local_n0
4180      do j=1, ny
4181        base = nx*((j-1) + ny*(k-1)) + (idat-1) * nfft
4182        do i=1, nx
4183          data_real(i,j,kdat) = CMPLX(fofr(i+base),zero, kind=C_DOUBLE_COMPLEX)
4184        end do
4185      end do
4186    end do
4187  end do
4188 
4189  ! Compute transform
4190  call fftw_mpi_execute_dft(plan_fw, data_real, data_cplx)
4191 
4192  factor_fft = one / (nx*ny*nz)
4193 
4194  do idat=1,ndat
4195     padatf=(idat-1)*nfft
4196     do i3=1,nz
4197        do i2=1,ny/nproc_fft ! equivalent a local_n1
4198           i2dat = i2 + (idat-1) * ny/nproc_fft
4199           do i1=1,nx
4200              igf = i1 + nx*( (i2-1) + (i3-1)*ny/nproc_fft  ) + padatf
4201              fofg(1,igf) = real(data_cplx(i1,i3,i2dat)) * factor_fft
4202              fofg(2,igf) =aimag(data_cplx(i1,i3,i2dat)) * factor_fft
4203           end do
4204        end do
4205     end do
4206  end do
4207 
4208  call fftw_destroy_plan(plan_fw)
4209  call fftw_free(cdata_cplx)
4210  call fftw_free(cdata_real)
4211 
4212 #else
4213  MSG_ERROR("FFTW3_MPI support not activated")
4214  ABI_UNUSED((/nfft,ngfft(1),ndat,comm_fft/))
4215  ABI_UNUSED((/fftn2_distrib(1),ffti2_local(1)/))
4216  ABI_UNUSED((/fftn3_distrib(1),ffti3_local(1)/))
4217  if (PRESENT(fftw_flags)) then
4218     ABI_UNUSED(fftw_flags)
4219  end if
4220  ABI_UNUSED(fofg(1,1))
4221  ABI_UNUSED(fofr(1))
4222 #endif
4223 
4224 end subroutine fftw3_mpifourdp_r2c

m_fftw3/fftw3_plan3_t [ Types ]

[ Top ] [ m_fftw3 ] [ Types ]

NAME

 fftw3_plan3_t

FUNCTION

  Structure storing the pointer to the FFTW plan as well as the options used to generate it.

SOURCE

158  type,private :: fftw3_plan3_t
159    integer :: isign=0                           ! Sign of the exponential in the FFT
160    integer :: ndat=-1                           ! Number of FFTs associated to the plan
161    integer :: flags=-HUGE(0)                    ! FFTW3 flags used to construct the plan.
162    integer(KIND_FFTW_PLAN) :: plan=NULL_PLAN    ! FFTW3 plan.
163    integer :: nthreads=1                        ! The number of threads associated to the plan.
164    integer :: idist=-1
165    integer :: odist=-1
166    integer :: istride=-1
167    integer :: ostride=-1
168    integer :: n(3)=-1                           ! The number of FFT divisions.
169    integer :: inembed(3)=-1
170    integer :: onembed(3)=-1
171    !integer(C_INT) :: alignment(2)              ! The alignment of the arrays used to construct the plan.
172  end type fftw3_plan3_t

m_fftw3/fftw3_poisson [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_poisson

FUNCTION

  Solve the Poisson equation in G-space given the density, n(r),
  in real space of the FFT box.

INPUTS

 cplex=1 if fofr is real, 2 if fofr is complex
 nx,ny,nz=Number of FFT points along the three directions.
 ldx,ldy,ldz=Leading dimension of the array nr and vg.
 ndat = Number of densities
 vg(nx*ny*nz)=Potential in reciprocal space.

SIDE EFFECTS

 nr(cplex*ldx*ldy*ldz*ndat)
    input: n(r) (real or complex)
    output: the hartree potential in real space

NOTES

   vg is given on the FFT mesh instead of the augmented mesh [ldx,ldy,ldz]
   in order to simplify the interface with the other routines operating of vg

PARENTS

      m_fft

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

8005 subroutine fftw3_poisson(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,vg,nr)
8006 
8007 
8008 !This section has been created automatically by the script Abilint (TD).
8009 !Do not modify the following lines by hand.
8010 #undef ABI_FUNC
8011 #define ABI_FUNC 'fftw3_poisson'
8012 !End of the abilint section
8013 
8014  implicit none
8015 
8016 !Arguments ------------------------------------
8017 !scalars
8018  integer,intent(in) :: cplex,nx,ny,nz,ldx,ldy,ldz,ndat
8019 !arrays
8020  real(dp),intent(inout) :: nr(cplex*ldx*ldy*ldz*ndat)
8021  real(dp),intent(in) :: vg(nx*ny*nz)
8022 
8023 #ifdef HAVE_FFT_FFTW3
8024 !Local variables-------------------------------
8025 !scalars
8026  integer,parameter :: rank1=1,rank2=2
8027  integer :: ii,jj,kk,sidx,ig,ir,vgbase,ypad
8028  integer, parameter :: nthreads=1
8029  integer(KIND_FFTW_PLAN) :: bw_plan_xy,bw_plan3
8030  integer(KIND_FFTW_PLAN) :: fw_plan_xy,fw_plan3
8031  real(dp) :: fft_fact,vg_fftfact
8032 
8033 ! *************************************************************************
8034 
8035  !write(std_out,*)"in poisson"
8036  ABI_CHECK(cplex==2,"cplex!=2 not coded")
8037  ABI_CHECK(ndat==1,"ndat!=1 not coded")
8038 
8039  fft_fact = one/(nx*ny*nz)
8040 
8041  ! The prototype for sfftw_plan_many_dft is:
8042  ! sfftw_plan_many_dft(n, howmany,
8043  !   fin,  iembed, istride, idist,
8044  !   fout, oembed, ostride, odist, isign, my_flags)
8045 
8046  ! 1) ldx*ldy transforms along Rz.
8047  fw_plan3 = fftw3_plan_many_dft(rank1, (/nz/), ldx*ldy, & ! We have to visit the entire augmented x-y plane!
8048 &   nr, (/ldx, ldy, ldz/), ldx*ldy, 1,                  &
8049 &   nr, (/ldx, ldy, ldz/), ldx*ldy, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
8050 
8051  call fftw3_execute_dft(fw_plan3, nr, nr) ! Now we have nr(x,y,Gz)
8052  call fftw3_destroy_plan(fw_plan3)
8053 
8054  ! R --> G Transforms in x-y plane
8055  fw_plan_xy = fftw3_plan_many_dft(rank2, [nx,ny], 1, &
8056 &     nr, (/ldx, ldy, ldz/), 1, 1,                   &
8057 &     nr, (/ldx, ldy, ldz/), 1, 1, ABI_FFTW_FORWARD, ABI_FFTW_ESTIMATE, nthreads)
8058 
8059  ! G --> R Transforms in x-y plane
8060  bw_plan_xy = fftw3_plan_many_dft(rank2, [nx, ny], 1, &
8061 &     nr, (/ldx, ldy, ldz/), 1, 1,                    &
8062 &     nr, (/ldx, ldy, ldz/), 1, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
8063 
8064  ! Loop on z-planes.
8065  do kk=1,nz
8066    sidx = 1 + cplex*(kk-1)*ldx*ldy  !+ cplex*(dat-1) * ldx*ldy*ldz
8067 
8068    call fftw3_execute_dft(fw_plan_xy, nr(sidx:), nr(sidx:))
8069 
8070    ! At this point we have nr(Gx,Gy,Gz) on the current plane.
8071    ! Multiply by vc(Gx,Gy,Gz) and then back transform immediately to get vc(x,y,Gz)
8072    ! Note that nr is complex whereas vg is real.
8073    ! Besides, FFTW returns not normalized FTs if sign=-1 so we have to scale by fft_fact
8074    vgbase = (kk-1)*nx*ny !;vgbase = (kk-1)*ldx*ldy
8075 
8076    ig = 0
8077    do jj=1,ny
8078      ypad = cplex*(jj-1)*ldx + sidx
8079      do ii=1,nx
8080        ig = ig + 1
8081        vg_fftfact = vg(vgbase+ig) * fft_fact
8082 
8083        ir = cplex*(ii-1) + ypad
8084        nr(ir:ir+1) = nr(ir:ir+1) * vg_fftfact
8085      end do
8086    end do
8087 
8088    call fftw3_execute_dft(bw_plan_xy, nr(sidx:), nr(sidx:))
8089  end do
8090 
8091  ! Free plans
8092  call fftw3_destroy_plan(fw_plan_xy)
8093  call fftw3_destroy_plan(bw_plan_xy)
8094 
8095  ! Final transforms of vc(x,y,Gz) along Gz to get vc(x,y,z)
8096  bw_plan3 = fftw3_plan_many_dft(rank1, (/nz/), ldx*ldy, & ! We have to visit the entire augmented x-y plane!
8097 &   nr, (/ldx, ldy, ldz/), ldx*ldy, 1,                  &
8098 &   nr, (/ldx, ldy, ldz/), ldx*ldy, 1, ABI_FFTW_BACKWARD, ABI_FFTW_ESTIMATE, nthreads)
8099 
8100  call fftw3_execute_dft(bw_plan3, nr, nr)
8101  call fftw3_destroy_plan(bw_plan3)
8102 
8103 #else
8104  ABI_UNUSED((/cplex,nx,ny,nz,ldx,ldy,ldz,ndat/))
8105  ABI_UNUSED((/nr(1),vg(1)/))
8106 #endif
8107 
8108 end subroutine fftw3_poisson
8109 !!**
8110 
8111 !----------------------------------------------------------------------
8112 
8113 END MODULE m_fftw3

m_fftw3/fftw3_r2c_op [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_r2c_op

FUNCTION

 Driver routine for out-of-place 3D real-to-complex FFT of lengths nx, ny, nz.

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimensions of the f array (to avoid cache conflicts).
 ff(ldx*ldy*ldz*ndat)=The real array to be transformed.
 ndat=Number of FFTs to be done.
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator.
   Defaults to ABI_FFTW_ESTIMATE.

OUTPUT

 gg(2,nx*ny*nz*ndat)=The forward FFT of ff.

NOTES

  FIXME For the time-being. No augmentation of the mesh to reduce memory conflicts, as MKL crashes
  if the advanced interface is used.

PARENTS

      m_fftw3

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

1947 subroutine fftw3_r2c_op(nx,ny,nz,ldx,ldy,ldz,ndat,ff,gg,fftw_flags)
1948 
1949 
1950 !This section has been created automatically by the script Abilint (TD).
1951 !Do not modify the following lines by hand.
1952 #undef ABI_FUNC
1953 #define ABI_FUNC 'fftw3_r2c_op'
1954 !End of the abilint section
1955 
1956  implicit none
1957 
1958 !Arguments ------------------------------------
1959 !scalars
1960  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat
1961  integer,optional,intent(in) :: fftw_flags
1962 !arrays
1963  real(dp),intent(in) :: ff(ldx*ldy*ldz*ndat)
1964  real(dp),intent(out) :: gg(2,ldx*ldy*ldz*ndat)
1965 
1966 #ifdef HAVE_FFT_FFTW3
1967 !Local variables-------------------------------
1968 !scalars
1969  integer,parameter :: rank3=3,nt_all=-1
1970  integer :: nhp,my_flags,idist,odist,padx,i1,i2,i3,igp,igf,imgf,stride
1971  integer :: i1inv,i2inv,i3inv,idat,padatf
1972  integer(KIND_FFTW_PLAN) :: my_plan
1973 !arrays
1974  integer :: inembed(rank3),onembed(rank3),n(rank3)
1975  integer,allocatable :: i1inver(:),i2inver(:),i3inver(:)
1976  real(dp),allocatable :: gg_hp(:,:)
1977 
1978 ! *************************************************************************
1979 
1980  my_flags=ABI_FFTW_ESTIMATE; if (PRESENT(fftw_flags)) my_flags= fftw_flags
1981 
1982  idist = ldx*ldy*ldz
1983  nhp = (nx/2+1)*ny*nz
1984  odist = nhp
1985 
1986  stride = 1
1987  n      = (/nx,ny,nz/)
1988  inembed= (/ldx,ldy,ldz/)
1989  onembed= (/(nx/2+1),ny,nz/)
1990 
1991  ABI_MALLOC(gg_hp,(2,nhp*ndat))
1992 
1993 #ifdef DEV_RC_BUG
1994  if (ndat/=1) MSG_ERROR("ndat/=1 + MKL not coded")
1995 
1996  if (ANY( n /= inembed )) then
1997    MSG_ERROR("Augmentation not supported")
1998  end if
1999 
2000  call dfftw_plan_dft_r2c_3d(my_plan, nx, ny, nz, ff, gg_hp, my_flags)
2001  if (my_plan==NULL_PLAN) then
2002    MSG_ERROR("dfftw_plan_dft_r2c_3d returned NULL_PLAN")
2003  end if
2004 
2005  !fftw_plan fftw_plan_many_dft_r2c(int rank3, const int *n, int howmany,
2006  !  double *in, const int *inembed, int istride, int idist,
2007  !  fftw_complex *out, const int *onembed, int ostride, int odist, unsigned flags);
2008 #else
2009  my_plan = dplan_many_dft_r2c(rank3, n, ndat, ff, inembed, stride, idist, gg_hp, onembed, stride, odist, my_flags, nt_all)
2010 #endif
2011 
2012  ! Now perform the 3D FFT via FFTW. r2c are always ABI_FFTW_FORWARD
2013  call dfftw_execute_dft_r2c(my_plan, ff, gg_hp)
2014 
2015  call fftw3_destroy_plan(my_plan)
2016 
2017  call ZDSCAL(nhp*ndat, one/(nx*ny*nz), gg_hp, 1)  ! FFTW returns not normalized FTs
2018  ! Reconstruct full FFT: Hermitian redundancy: out[i] is the conjugate of out[n-i]
2019  padx = (nx/2+1)
2020 
2021  ABI_MALLOC(i1inver,(padx))
2022  ABI_MALLOC(i2inver,(ny))
2023  ABI_MALLOC(i3inver,(nz))
2024 
2025  i1inver(1)=1
2026  do i1=2,padx
2027    i1inver(i1)=nx+2-i1
2028  end do
2029 
2030  i2inver(1)=1
2031  do i2=2,ny
2032    i2inver(i2)=ny+2-i2
2033  end do
2034 
2035  i3inver(1)=1
2036  do i3=2,nz
2037    i3inver(i3)=nz+2-i3
2038  end do
2039 
2040  igp=0
2041  do idat=1,ndat
2042    padatf = (idat-1)*ldx*ldy*ldz
2043    do i3=1,nz
2044      i3inv = i3inver(i3)
2045      do i2=1,ny
2046        i2inv = i2inver(i2)
2047        do i1=1,padx
2048          igp = igp+1
2049          igf = i1 + (i3-1)*ldx*ldy + (i2-1)*ldx + padatf
2050          gg(:,igf) =  gg_hp(:,igp)
2051          i1inv = i1inver(i1)
2052          if (i1inv/=i1) then
2053            imgf = i1inv + (i3inv-1)*ldx*ldy + (i2inv-1)*ldx + padatf
2054            gg(1,imgf) =  gg_hp(1,igp)
2055            gg(2,imgf) = -gg_hp(2,igp)
2056          end if
2057        end do
2058      end do
2059    end do
2060  end do
2061 
2062  ABI_FREE(i1inver)
2063  ABI_FREE(i2inver)
2064  ABI_FREE(i3inver)
2065 
2066  ABI_FREE(gg_hp)
2067 
2068 #else
2069  MSG_ERROR("FFTW3 support not activated")
2070  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz/))
2071  ABI_UNUSED(ff)
2072  ABI_UNUSED(gg(1,1))
2073  if (PRESENT(fftw_flags)) then
2074    ABI_UNUSED(fftw_flags)
2075  end if
2076 #endif
2077 
2078 end subroutine fftw3_r2c_op

m_fftw3/fftw3_seqfourdp [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_seqfourdp

FUNCTION

 Driver routine for 3D FFT of lengths nx, ny, nz. Mainly used for densities or potentials.
 FFT Transform is out-of-place

INPUTS

 cplex=1 if fofr is real, 2 if fofr is complex
 nx,ny,nz=Number of point along the three directions.
 ldx,ldy,ldz=Leading dimension of the array.
 ndat = Number of FFTS
 isign= +1 : fofg(G) => fofr(R);
        -1 : fofr(R) => fofg(G)
 fofg(2,ldx*ldy*ldz*ndat)=The array to be transformed.
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator. Defaults to FFTW_ESTIMATE.

OUTPUT

 fofr(cplex,ldx*ldy*ldz*ndat)=The FFT of fofg

PARENTS

      fourdp,m_fft

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

297 subroutine fftw3_seqfourdp(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,isign,fofg,fofr,fftw_flags)
298 
299 
300 !This section has been created automatically by the script Abilint (TD).
301 !Do not modify the following lines by hand.
302 #undef ABI_FUNC
303 #define ABI_FUNC 'fftw3_seqfourdp'
304 !End of the abilint section
305 
306  implicit none
307 
308 !Arguments ------------------------------------
309 !scalars
310  integer,intent(in) :: cplex,nx,ny,nz,ldx,ldy,ldz,ndat,isign
311  integer,optional,intent(in) :: fftw_flags
312 !arrays
313  real(dp),intent(inout) :: fofg(2*ldx*ldy*ldz*ndat)
314  real(dp),intent(inout) :: fofr(cplex*ldx*ldy*ldz*ndat)
315 
316 !Local variables-------------------------------
317 !scalars
318  integer :: my_flags
319 
320 ! *************************************************************************
321 
322  my_flags=ABI_FFTW_ESTIMATE; if (PRESENT(fftw_flags)) my_flags= fftw_flags
323 
324  select case (cplex)
325  case (2) ! Complex to Complex.
326 
327    select case (isign)
328    case (ABI_FFTW_BACKWARD) ! +1
329      call fftw3_many_dft_op(nx,ny,nz,ldx,ldy,ldz,ndat,isign,fofg,fofr,fftw_flags=my_flags)
330 
331    case (ABI_FFTW_FORWARD)  ! -1
332      call fftw3_many_dft_op(nx,ny,nz,ldx,ldy,ldz,ndat,isign,fofr,fofg,fftw_flags=my_flags)
333 
334    case default
335      MSG_BUG("Wrong isign")
336    end select
337 
338  case (1) ! Real case.
339 
340    select case (isign)
341    case (ABI_FFTW_FORWARD) ! -1; R --> G
342      call fftw3_r2c_op(nx,ny,nz,ldx,ldy,ldz,ndat,fofr,fofg,fftw_flags=my_flags)
343 
344    case (ABI_FFTW_BACKWARD) ! +1; G --> R
345      call fftw3_c2r_op(nx,ny,nz,ldx,ldy,ldz,ndat,fofg,fofr,fftw_flags=my_flags)
346 
347    case default
348      MSG_BUG("Wrong isign")
349    end select
350 
351  case default
352    MSG_BUG(" Wrong value for cplex")
353  end select
354 
355 end subroutine fftw3_seqfourdp

m_fftw3/fftw3_seqfourwf [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_seqfourwf

FUNCTION

 Carry out composite Fourier transforms between real and reciprocal (G) space.
 Wavefunctions, contained in a sphere in reciprocal space,
 can be FFT to real space. They can also be FFT from real space
 to a sphere. Also, the density maybe accumulated, and a local potential can be applied.

 The different options are :
 - option=0 --> reciprocal to real space and output the result.
 - option=1 --> reciprocal to real space and accumulate the density.
 - option=2 --> reciprocal to real space, apply the local potential to the wavefunction
                in real space and produce the result in reciprocal space.
 - option=3 --> real space to reciprocal space.
                NOTE that in this case, fftalg=1x1 MUST be used. This may be changed in the future.

INPUTS

 cplex= if 1 , denpot is real, if 2 , denpot is complex
    (cplex=2 only allowed for option=2, and istwf_k=1)
    not relevant if option=0 or option=3, so cplex=0 can be used to minimize memory
 fofgin(2,npwin)=holds input wavefunction in G vector basis sphere.
                 (intent(in) but the routine sphere can modify it for another iflag)
 gboundin(2*mgfft+8,2)=sphere boundary info for reciprocal to real space
 gboundout(2*mgfft+8,2)=sphere boundary info for real to reciprocal space
 istwf_k=option parameter that describes the storage of wfs
 kg_kin(3,npwin)=reduced planewave coordinates, input
 kg_kout(3,npwout)=reduced planewave coordinates, output
 mgfft=maximum size of 1D FFTs
 ndat=number of FFT to do in //
 ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 npwin=number of elements in fofgin array (for option 0, 1 and 2)
 npwout=number of elements in fofgout array (for option 2 and 3)
 ldx,ldy,ldz=ngfft(4),ngfft(5),ngfft(6), dimensions of fofr.
 option= if 0: do direct FFT
         if 1: do direct FFT, then sum the density
         if 2: do direct FFT, multiply by the potential, then do reverse FFT
         if 3: do reverse FFT only
 weight_r=weight to be used for the accumulation of the density in real space
         (needed only when option=1)

OUTPUT

  (see side effects)

SIDE EFFECTS

 Input/Output
 for option==0, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
                fofr(2,ldx*ldy*ldz) contains the output Fourier Transform of fofgin;
                no use of denpot, fofgout and npwout.
 for option==1, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
                denpot(cplex*ldx,ldy,ldz) contains the input density at input,
                and the updated density at output (accumulated);
                no use of fofgout and npwout.
 for option==2, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
                denpot(cplex*ldx,ldy,ldz) contains the input local potential;
                fofgout(2,npwout*ndat) contains the output function;
 for option==3, fofr(2,ldx*ldy*ldz*ndat) contains the input real space wavefunction;
                fofgout(2,npwout*ndat) contains its output Fourier transform;
                no use of fofgin and npwin.

PARENTS

      fourwf

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

429 subroutine fftw3_seqfourwf(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,&
430 &  kg_kin,kg_kout,mgfft,ndat,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
431 
432 
433 !This section has been created automatically by the script Abilint (TD).
434 !Do not modify the following lines by hand.
435 #undef ABI_FUNC
436 #define ABI_FUNC 'fftw3_seqfourwf'
437 !End of the abilint section
438 
439  implicit none
440 
441 !Arguments ------------------------------------
442 !scalars
443  integer,intent(in) :: cplex,istwf_k,ldx,ldy,ldz,ndat,npwin,npwout,option,mgfft
444  real(dp),intent(in) :: weight_i,weight_r
445 !arrays
446  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2)
447  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18)
448  real(dp),intent(inout) :: denpot(cplex*ldx,ldy,ldz),fofgin(2,npwin*ndat)
449  real(dp),intent(inout) :: fofr(2,ldx*ldy*ldz*ndat)
450  real(dp),intent(out) :: fofgout(2,npwout*ndat)
451 
452 !Local variables-------------------------------
453 !scalars
454  integer,parameter :: me_g0=1,ndat1=1
455  integer :: nx,ny,nz,fftalg,fftalga,fftalgc,fftcache,dat,ptg,ptr,ptgin,ptgout,nthreads
456  character(len=500) :: msg
457  logical :: use_fftrisc
458 !arrays
459  !real(dp),allocatable :: saveden(:,:,:)
460 #if 0
461  logical :: use_fftbox
462  integer,parameter :: shiftg(3)=(/0,0,0/)
463  integer :: symm(3,3)
464 #endif
465 
466 ! *************************************************************************
467 
468  nx=ngfft(1); ny=ngfft(2); nz=ngfft(3)
469  fftalg=ngfft(7); fftalga=fftalg/100; fftalgc=mod(fftalg,10)
470  fftcache=ngfft(8)
471 
472  if (ALL(option /= (/0,1,2,3/))) then
473    write(msg,'(a,i0,a)')' The option number',option,' is not allowed. Only option=0, 1, 2 or 3 are allowed presently.'
474    MSG_ERROR(msg)
475  end if
476 
477  if (option==1 .and. cplex/=1) then
478    write(msg,'(a,i0)')' With the option number 1, cplex must be 1 but it is cplex=',cplex
479    MSG_ERROR(msg)
480  end if
481 
482  if (option==2 .and. (cplex/=1 .and. cplex/=2)) then
483    write(msg,'(a,i0)')' With the option number 2, cplex must be 1 or 2, but it is cplex=',cplex
484    MSG_ERROR(msg)
485  end if
486 
487  use_fftrisc = (fftalgc==2)
488  if (istwf_k==2.and.option==3) use_fftrisc = .FALSE.
489  if (istwf_k>2.and.ANY(option==(/0,3/))) use_fftrisc = .FALSE.
490 
491  nthreads = xomp_get_num_threads(open_parallel=.TRUE.)
492 
493  if (use_fftrisc) then
494    !call wrtout(std_out,strcat(ABI_FUNC,": calls fftw3_fftrisc","COLL")
495 
496    if (ndat==1) then
497      call fftw3_fftrisc(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
498 &      mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
499 
500    else
501      ! All this boilerplate code is needed because the caller might pass zero-sized arrays
502      ! for the arguments that are not referenced and we don't want to have problems at run-time.
503      ! Moreover option 1 requires a special treatment when threads are started at this level.
504 
505      SELECT CASE (option)
506      CASE (0)
507        !
508        ! fofgin -> fofr, no use of denpot, fofgout and npwout.
509        if (.not.fftw3_spawn_threads_here(ndat,nthreads)) then
510          do dat=1,ndat
511            ptg = 1 + (dat-1)*npwin
512            ptr = 1 + (dat-1)*ldx*ldy*ldz
513            call fftw3_fftrisc_dp(cplex,denpot,fofgin(1,ptg),fofgout,fofr(1,ptr),gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
514 &            mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
515          end do
516        else
517 !$OMP PARALLEL DO PRIVATE(ptg,ptr)
518          do dat=1,ndat
519            ptg = 1 + (dat-1)*npwin
520            ptr = 1 + (dat-1)*ldx*ldy*ldz
521            call fftw3_fftrisc_dp(cplex,denpot,fofgin(1,ptg),fofgout,fofr(1,ptr),gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
522 &            mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
523          end do
524        end if
525 
526      CASE (1)
527        !fofgin -> local ur and accumulate density in denpot
528        ! TODO this is delicate part to do in parallel, as one should OMP reduce denpot.
529        ! but this causes problems with the stack.
530 
531        do dat=1,ndat
532          ptg = 1 + (dat-1)*npwin
533          ptr = 1 + (dat-1)*ldx*ldy*ldz
534          call fftw3_fftrisc_dp(cplex,denpot,fofgin(1,ptg),fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
535 &          mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
536        end do
537 
538        ! This version seems not to be efficient
539        !!!  !$OMP PARALLEL PRIVATE(ptg,ptr,saveden)
540        !!!         ABI_MALLOC(saveden, (ldx,ldy,ldz))
541        !!!         saveden = zero
542        !!!  !$OMP DO
543        !!!         do dat=1,ndat
544        !!!           ptg = 1 + (dat-1)*npwin
545        !!!           ptr = 1 + (dat-1)*ldx*ldy*ldz
546        !!!           call fftw3_fftrisc_dp(cplex,saveden,fofgin(1,ptg),fofgout,fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
547        !!!  &          mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r)
548        !!!         end do
549        !!!  !$OMP END DO NOWAIT
550        !!!  !$OMP CRITICAL (OMPC_addrho)
551        !!!         denpot = denpot + saveden
552        !!!  !$OMP END CRITICAL (OMPC_addrho)
553        !!!         ABI_FREE(saveden)
554        !!!  !$OMP END PARALLEL
555 
556      CASE (2)
557        ! <G|vloc(r)|fofgin(r)> in fofgout
558        if (.not.fftw3_spawn_threads_here(ndat,nthreads)) then
559          do dat=1,ndat
560            ptgin  = 1 + (dat-1)*npwin
561            ptgout = 1 + (dat-1)*npwout
562            call fftw3_fftrisc_dp(cplex,denpot,fofgin(1,ptgin),fofgout(1,ptgout),fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
563 &            mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
564          end do
565        else
566 !$OMP PARALLEL DO PRIVATE(ptgin,ptgout)
567          do dat=1,ndat
568            ptgin  = 1 + (dat-1)*npwin
569            ptgout = 1 + (dat-1)*npwout
570            call fftw3_fftrisc_dp(cplex,denpot,fofgin(1,ptgin),fofgout(1,ptgout),fofr,gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
571 &            mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
572          end do
573        end if
574 
575      CASE (3)
576        ! fofr -> fofgout
577        if (.not.fftw3_spawn_threads_here(ndat,nthreads)) then
578          do dat=1,ndat
579            ptr    = 1 + (dat-1)*ldx*ldy*ldz
580            ptgout = 1 + (dat-1)*npwout
581            call fftw3_fftrisc_dp(cplex,denpot,fofgin,fofgout(1,ptgout),fofr(1,ptr),gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
582 &            mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
583          end do
584        else
585 !$OMP PARALLEL DO PRIVATE(ptr,ptgout)
586          do dat=1,ndat
587            ptr    = 1 + (dat-1)*ldx*ldy*ldz
588            ptgout = 1 + (dat-1)*npwout
589            call fftw3_fftrisc_dp(cplex,denpot,fofgin,fofgout(1,ptgout),fofr(1,ptr),gboundin,gboundout,istwf_k,kg_kin,kg_kout,&
590 &            mgfft,ngfft,npwin,npwout,ldx,ldy,ldz,option,weight_r,weight_i)
591          end do
592        end if
593 
594      CASE DEFAULT
595        write(msg,'(a,i0,a)')'Option',option,' is not allowed. Only option=0, 1, 2 or 3 are allowed presently.'
596        MSG_ERROR(msg)
597      END SELECT
598 
599    end if
600 
601  else
602 
603 #if 1
604    SELECT CASE (option)
605    CASE (0)
606      !
607      ! FFT u(g) --> u(r)
608      if (.not.fftw3_spawn_threads_here(ndat,nthreads)) then
609        call fftw3_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_kin,gboundin,fofgin,fofr)
610      else
611 !$OMP PARALLEL DO PRIVATE(ptg, ptr)
612        do dat=1,ndat
613          ptg = 1 + (dat-1)*npwin
614          ptr = 1 + (dat-1)*ldx*ldy*ldz
615          call fftw3_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat1,&
616 &          istwf_k,mgfft,kg_kin,gboundin,fofgin(1,ptg),fofr(1,ptr))
617        end do
618      end if
619 
620    CASE (1)
621      ! TODO this is delicate part to do in parallel, as one should OMP reduce denpot.
622      call fftw3_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_kin,gboundin,fofgin,fofr)
623      call cg_addtorho(nx,ny,nz,ldx,ldy,ldz,ndat,weight_r,weight_i,fofr,denpot)
624 
625    CASE (2)
626 
627      if (.not.fftw3_spawn_threads_here(ndat,nthreads)) then
628        call fftw3_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat,istwf_k,mgfft,kg_kin,gboundin,fofgin,fofr)
629        call cg_vlocpsi(nx,ny,nz,ldx,ldy,ldz,ndat,cplex,denpot,fofr)
630 
631        !  The data for option==2 is now in fofr.
632        call fftw3_fftpad_dp(fofr,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,-1,gboundout)
633 
634        call cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat,npwout,kg_kout,fofr,fofgout)
635      else
636 
637 !$OMP PARALLEL DO PRIVATE(ptg, ptr)
638        do dat=1,ndat
639          ptg = 1 + (dat-1)*npwin
640          ptr = 1 + (dat-1)*ldx*ldy*ldz
641          call fftw3_fftug_dp(fftalg,fftcache,npwin,nx,ny,nz,ldx,ldy,ldz,ndat1,&
642 &          istwf_k,mgfft,kg_kin,gboundin,fofgin(1,ptg),fofr(1,ptr))
643 
644          call cg_vlocpsi(nx,ny,nz,ldx,ldy,ldz,ndat1,cplex,denpot,fofr(1,ptr))
645 
646          !  The data for option==2 is now in fofr.
647          call fftw3_fftpad_dp(fofr(1,ptr),nx,ny,nz,ldx,ldy,ldz,ndat1,mgfft,-1,gboundout)
648 
649          ptg = 1 + (dat-1)*npwout
650          call cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat1,npwout,kg_kout,fofr(1,ptr),fofgout(1,ptg))
651        end do
652      end if
653 
654    CASE (3)
655      !  The data for option==3 is already in fofr.
656      if (.not.fftw3_spawn_threads_here(ndat,nthreads)) then
657        call fftw3_fftpad_dp(fofr,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,-1,gboundout)
658        call cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat,npwout,kg_kout,fofr,fofgout)
659      else
660 !$OMP PARALLEL DO PRIVATE(ptg, ptr)
661        do dat=1,ndat
662          ptg = 1 + (dat-1)*npwout
663          ptr = 1 + (dat-1)*ldx*ldy*ldz
664          call fftw3_fftpad_dp(fofr(1,ptr),nx,ny,nz,ldx,ldy,ldz,ndat1,mgfft,-1,gboundout)
665          call cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat1,npwout,kg_kout,fofr(1,ptr),fofgout(1,ptg))
666        end do
667      end if
668 
669    CASE DEFAULT
670      write(msg,'(a,i0,a)')'Option',option,' is not allowed. Only option=0, 1, 2 or 3 are allowed presently.'
671      MSG_ERROR(msg)
672    END SELECT
673 
674 
675 #else
676    symm=0; symm(1,1)=1; symm(2,2)=1; symm(3,3)=1
677    use_fftbox = .FALSE.
678 #ifdef HAVE_OPENMP
679    use_fftbox = (ndat>1)
680 #endif
681    !use_fftbox = .TRUE.
682 
683    SELECT CASE (option)
684    CASE (0)
685      !
686      ! FFT u(g) --> u(r)
687      call sphere(fofgin,ndat,npwin,fofr,nx,ny,nz,ldx,ldy,ldz,kg_kin,istwf_k,1,me_g0,shiftg,symm,one)
688 
689      if (use_fftbox) then
690        call fftw3_many_dft_ip(nx,ny,nz,ldx,ldy,ldz,ndat,ABI_FFTW_BACKWARD,fofr)
691      else
692        call fftw3_fftpad_dp(fofr,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,ABI_FFTW_BACKWARD,gboundin)
693      end if
694 
695    CASE (1)
696      ! TODO this is delicate part to do in parallel, as one should OMP reduce denpot.
697 
698      call sphere(fofgin,ndat,npwin,fofr,nx,ny,nz,ldx,ldy,ldz,kg_kin,istwf_k,1,me_g0,shiftg,symm,one)
699 
700      if (use_fftbox) then
701        call fftw3_many_dft_ip(nx,ny,nz,ldx,ldy,ldz,ndat,ABI_FFTW_BACKWARD,fofr)
702      else
703        call fftw3_fftpad_dp(fofr,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,ABI_FFTW_BACKWARD,gboundin)
704      end if
705 
706      call cg_addtorho(nx,ny,nz,ldx,ldy,ldz,ndat,weight_r,weight_i,fofr,denpot)
707 
708    CASE (2)
709 
710      call sphere(fofgin,ndat,npwin,fofr,nx,ny,nz,ldx,ldy,ldz,kg_kin,istwf_k,1,me_g0,shiftg,symm,one)
711 
712      if (use_fftbox) then
713        call fftw3_many_dft_ip(nx,ny,nz,ldx,ldy,ldz,ndat,ABI_FFTW_BACKWARD,fofr)
714      else
715        call fftw3_fftpad_dp(fofr,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,ABI_FFTW_BACKWARD,gboundin)
716      end if
717 
718      call cg_vlocpsi(nx,ny,nz,ldx,ldy,ldz,ndat,cplex,denpot,fofr)
719 
720      ! The data for option==2 is now in fofr.
721      if (use_fftbox) then
722        call fftw3_many_dft_ip(nx,ny,nz,ldx,ldy,ldz,ndat,ABI_FFTW_FORWARD,fofr)
723      else
724        call fftw3_fftpad_dp(fofr,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,ABI_FFTW_FORWARD,gboundout)
725      end if
726 
727      call cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat,npwout,kg_kout,fofr,fofgout)
728 
729    CASE (3)
730      !  The data for option==3 is already in fofr.
731      call fftw3_fftpad_dp(fofr,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,ABI_FFTW_FORWARD,gboundout)
732 
733      call cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat,npwout,kg_kout,fofr,fofgout)
734 
735    CASE DEFAULT
736      write(msg,'(a,i0,a)')'Option',option,' is not allowed. Only option=0, 1, 2 or 3 are allowed presently.'
737      MSG_ERROR(msg)
738    END SELECT
739 #endif
740 
741  end if
742 
743 end subroutine fftw3_seqfourwf

m_fftw3/fftw3_set_nthreads [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3_set_nthreads

FUNCTION

  This function sets the number of threads you want FFTW3 to use (or actually, the maximum number).
  It also performs any one-time initialization required to use FFTW3 threads.
  All plans subsequently created with any planner routine will use nthreads threads.
  If you pass an nthreads argument of 1 (the default), threads are disabled for subsequent plans.
  It does nothing if HAVE_FFT_FFTW3_THREADS is not defined.

INPUTS

  [nthreads]=The number of threads you want FFTW3 to use.  Default xomp_get_max_threads()

PARENTS

      m_fft_prof,m_fftw3

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

2592 subroutine fftw3_set_nthreads(nthreads)
2593 
2594 
2595 !This section has been created automatically by the script Abilint (TD).
2596 !Do not modify the following lines by hand.
2597 #undef ABI_FUNC
2598 #define ABI_FUNC 'fftw3_set_nthreads'
2599 !End of the abilint section
2600 
2601  implicit none
2602 
2603 !Arguments ------------------------------------
2604 !scalars
2605  integer,optional,intent(in) :: nthreads
2606 
2607 !Local variables ------------------------------
2608 !scalars
2609 #ifdef HAVE_FFT_FFTW3_THREADS
2610  integer :: istat,nt
2611  integer,parameter :: enough=1
2612  integer,save :: nwarns=0
2613 #endif
2614 
2615 ! *************************************************************************
2616 
2617 #ifdef HAVE_FFT_FFTW3_THREADS
2618  if (THREADS_INITED==0) then
2619    MSG_WARNING("Threads are not initialized")
2620  end if
2621 
2622  if (PRESENT(nthreads)) then
2623    if (nthreads<=0) then
2624      nt = xomp_get_max_threads()
2625    else
2626      nt = nthreads
2627    end if
2628  else
2629    nt = xomp_get_max_threads()
2630  end if
2631 
2632  call dfftw_plan_with_nthreads(nt)
2633 
2634 #ifndef HAVE_OPENMP
2635   if (nwarns <= enough) then
2636     nwarns = nwarns + 1
2637     MSG_WARNING("Using FFTW3 with threads but HAVE_OPENMP is not defined!")
2638   end if
2639 #endif
2640 
2641 #else
2642  if (PRESENT(nthreads)) then
2643    ABI_UNUSED(nthreads)
2644  end if
2645 #endif
2646 
2647 end subroutine fftw3_set_nthreads

m_fftw3/fftw3_spawn_threads_here [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_spawn_threads_here

FUNCTION

  Helper function that returns true if FFT calls should be OMP
  parallelized in the client code.

INPUTS

  ndat=Number of FFT transforms to do
  nthreads = Number of threads available

PARENTS

SOURCE

3499 function fftw3_spawn_threads_here(ndat,nthreads) result(ans)
3500 
3501 
3502 !This section has been created automatically by the script Abilint (TD).
3503 !Do not modify the following lines by hand.
3504 #undef ABI_FUNC
3505 #define ABI_FUNC 'fftw3_spawn_threads_here'
3506 !End of the abilint section
3507 
3508  implicit none
3509 
3510 !Arguments ------------------------------------
3511 !scalars
3512  integer,intent(in) :: ndat,nthreads
3513  logical :: ans
3514 
3515 ! *************************************************************************
3516 
3517  ans = .FALSE.
3518 #ifdef HAVE_OPENMP
3519  ans = (nthreads > 1 .and. MOD(ndat,nthreads) == 0 .and. .not. USE_LIB_THREADS)
3520 #else
3521  ABI_UNUSED((/ndat,nthreads/))
3522 #endif
3523 
3524 end function fftw3_spawn_threads_here

m_fftw3/fftw3_use_lib_threads [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

 fftw3_use_lib_threads

FUNCTION

INPUTS

PARENTS

      m_fft

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

3545 subroutine fftw3_use_lib_threads(logvar)
3546 
3547 
3548 !This section has been created automatically by the script Abilint (TD).
3549 !Do not modify the following lines by hand.
3550 #undef ABI_FUNC
3551 #define ABI_FUNC 'fftw3_use_lib_threads'
3552 !End of the abilint section
3553 
3554  implicit none
3555 
3556 !Arguments ------------------------------------
3557 !scalars
3558  logical,intent(in) :: logvar
3559 
3560 ! *************************************************************************
3561 
3562  USE_LIB_THREADS = logvar
3563 
3564 end subroutine fftw3_use_lib_threads

m_fftw3/fftw3mpi_many_dft_ip [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3mpi_many_dft_ip

FUNCTION

 Driver routine for many out-of-place 3D complex-to-complex FFTs of lengths nx, ny, nz.

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimension of the fin and fout arrays (to avoid cache conflicts).
 ndat=Number of FFTs to be done.
 fin(2*ldx*ldy*ldz*ndat)=The complex array to be transformed.
 isign=sign of Fourier transform exponent: current convention uses
   +1 for transforming from G to r,
   -1 for transforming from r to G.
 comm_fft=MPI communicator for the FFT
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator.
   Defaults to ABI_FFTW_ESTIMATE.

TODO

  Remove me

OUTPUT

 fout(2,ldx*ldy*ldz*ndat)=The Fourier transform of fin.

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

3728 subroutine fftw3mpi_many_dft_ip(nx,ny,nz,ldx,ldy,ldz,ndat,isign,fin,fout,comm_fft,fftw_flags)
3729 
3730 
3731 !This section has been created automatically by the script Abilint (TD).
3732 !Do not modify the following lines by hand.
3733 #undef ABI_FUNC
3734 #define ABI_FUNC 'fftw3mpi_many_dft_ip'
3735 !End of the abilint section
3736 
3737   implicit none
3738 
3739 !Arguments ------------------------------------
3740 !scalars
3741  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,isign,comm_fft
3742  integer,optional,intent(in) :: fftw_flags
3743 !arrays
3744  real(dp),intent(in) :: fin(2,ldx,ldy,ldz*ndat)
3745  real(dp),intent(out) :: fout(2,ldx,ldy,ldz*ndat)
3746 
3747 #ifdef HAVE_FFT_FFTW3_MPI
3748 !Local variables-------------------------------
3749 !scalars
3750  integer,parameter :: rank3=3
3751  integer :: my_flags
3752  real(dp):: factor_fft
3753 !arrays
3754  type(C_PTR) :: plan, cdata
3755  complex(C_DOUBLE_COMPLEX), ABI_CONTIGUOUS pointer :: data(:,:,:)
3756  integer(C_INTPTR_T) :: i, j, k, alloc_local, local_n0, local_0_start,fft_sizes(4)
3757 
3758 !*************************************************************************
3759 
3760  my_flags=ABI_FFTW_ESTIMATE; if (PRESENT(fftw_flags)) my_flags= fftw_flags
3761 
3762  ! get local data size and allocate (note dimension reversal)
3763  fft_sizes = [nz,ny,nx,ndat]
3764 
3765  alloc_local = fftw_mpi_local_size_many( &
3766 &      rank3,fft_sizes(1:3),fft_sizes(4),&
3767 &      FFTW_MPI_DEFAULT_BLOCK, comm_fft, &
3768 &      local_n0,local_0_start)
3769 
3770  ! Allocate cdata, build the plane and copy data: fin --> data
3771  cdata = fftw_alloc_complex(alloc_local)
3772  call c_f_pointer(cdata, data, [fft_sizes(3),fft_sizes(2), local_n0])
3773 
3774  plan = fftw_mpi_plan_many_dft(rank3,fft_sizes(1:3),fft_sizes(4), &
3775 &                               FFTW_MPI_DEFAULT_BLOCK,FFTW_MPI_DEFAULT_BLOCK, &
3776 &                               data,data,comm_fft,isign,my_flags)
3777 
3778  do k=1, local_n0*ndat
3779     do j=1, ny
3780        do i=1, nx
3781           data(i,j,k) = CMPLX( fin(1,i,j,k),fin(2,i,j,k),C_DOUBLE_COMPLEX)
3782        end do
3783     end do
3784  end do
3785 
3786  ! Compute transform.
3787  call fftw_mpi_execute_dft(plan, data, data)
3788 
3789  if(isign==ABI_FFTW_FORWARD) then
3790     ! Scale results.
3791     factor_fft = one / (nx*ny*nz)
3792     do k=1, local_n0*ndat
3793        do j=1, ny
3794           do i=1, nx
3795              fout(1,i,j,k) =  real(data(i,j,k)) * factor_fft
3796              fout(2,i,j,k) = aimag(data(i,j,k)) * factor_fft
3797           end do
3798        end do
3799     end do
3800  end if
3801 
3802  call fftw_destroy_plan(plan)
3803  call fftw_free(cdata)
3804 
3805 #else
3806  MSG_ERROR("FFTW3_MPI support not activated")
3807  ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,isign/))
3808  ABI_UNUSED(comm_fft)
3809  if (PRESENT(fftw_flags)) then
3810     ABI_UNUSED(fftw_flags)
3811  end if
3812  ABI_UNUSED(fin(1,1,1,1))
3813  ABI_UNUSED(fout(1,1,1,1))
3814 #endif
3815 
3816 end subroutine fftw3mpi_many_dft_ip

m_fftw3/fftw3mpi_many_dft_tr [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftw3mpi_many_dft_tr

FUNCTION

 Driver routine for many out-of-place 3D complex-to-complex FFTs of lengths nx, ny, nz.

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ldx,ldy,ldz=Physical dimension of the fin and fout arrays (to avoid cache conflicts).
 ndat=Number of FFTs to be done.
 fin(2*ldx*ldy*ldz*ndat)=The complex array to be transformed.
 isign=sign of Fourier transform exponent: current convention uses
   +1 for transforming from G to r,
   -1 for transforming from r to G.
 comm_fft=MPI communicator for the FFT.
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator.
   Defaults to ABI_FFTW_ESTIMATE.

TODO

  Remove me

OUTPUT

 fout(2,ldx*ldy*ldz*ndat)=The Fourier transform of fin.

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

3853 subroutine fftw3mpi_many_dft_tr(nx,ny,nz,ndat,isign,fin,fout,comm_fft,fftw_flags)
3854 
3855 
3856 !This section has been created automatically by the script Abilint (TD).
3857 !Do not modify the following lines by hand.
3858 #undef ABI_FUNC
3859 #define ABI_FUNC 'fftw3mpi_many_dft_tr'
3860 !End of the abilint section
3861 
3862  implicit none
3863 
3864 !Arguments ------------------------------------
3865 !scalars
3866  integer,intent(in) :: nx,ny,nz,ndat,isign,comm_fft
3867  integer,optional,intent(in) :: fftw_flags
3868 !arrays
3869  complex(C_DOUBLE_COMPLEX),ABI_CONTIGUOUS pointer  :: fin(:,:,:)
3870  complex(C_DOUBLE_COMPLEX),ABI_CONTIGUOUS pointer :: fout(:,:,:)
3871 
3872 !Local variables-------------------------------
3873 #ifdef HAVE_FFT_FFTW3_MPI
3874 !scalars
3875  integer :: my_flags
3876  !FFTWMPI stuff
3877  type(C_PTR) :: plan
3878  integer(C_INTPTR_T) :: fft_sizes(4)
3879 
3880 !*************************************************************************
3881 
3882  my_flags=ABI_FFTW_ESTIMATE; if (PRESENT(fftw_flags)) my_flags= fftw_flags
3883  my_flags = ior(my_flags,FFTW_DESTROY_INPUT)
3884 
3885  fft_sizes(1)=nz
3886  fft_sizes(2)=ny
3887  fft_sizes(3)=nx
3888  fft_sizes(4)=ndat
3889 
3890  plan = fftw_mpi_plan_many_dft(3,fft_sizes(1:3),fft_sizes(4), &
3891 &                              FFTW_MPI_DEFAULT_BLOCK,FFTW_MPI_DEFAULT_BLOCK, &
3892 &                              fin,fout,comm_fft,isign,my_flags)
3893 
3894 !Compute transform (as many times as desired)
3895  call fftw_mpi_execute_dft(plan, fin, fout)
3896  call fftw_destroy_plan(plan)
3897 
3898 #else
3899  MSG_ERROR("FFTW3_MPI support not activated")
3900  ABI_UNUSED((/nx,ny,nz,ndat,isign,comm_fft/))
3901  if (PRESENT(fftw_flags)) then
3902     ABI_UNUSED(fftw_flags)
3903  end if
3904  ABI_UNUSED(fin(1,1,1))
3905  ABI_UNUSED(fout(1,1,1))
3906 #endif
3907 
3908 end subroutine fftw3mpi_many_dft_tr

m_fftw3/fftwmpi_free_work_array [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftwmpi_free_work_array

FUNCTION

  routine for freeing fftw work arrray

INPUTS

OUTPUT

 cdata_f,cdata_r: C pointers to free for fourier andreal data

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

3664 subroutine fftwmpi_free_work_array(cdata_f,cdata_r)
3665 
3666 
3667 !This section has been created automatically by the script Abilint (TD).
3668 !Do not modify the following lines by hand.
3669 #undef ABI_FUNC
3670 #define ABI_FUNC 'fftwmpi_free_work_array'
3671 !End of the abilint section
3672 
3673  implicit none
3674 
3675 !Arguments ------------------------------------
3676 !scalars
3677  type(C_PTR), intent(inout) :: cdata_f,cdata_r
3678 
3679 ! *************************************************************************
3680 
3681 #ifdef HAVE_FFT_FFTW3_MPI
3682  call fftw_free(cdata_r)
3683  call fftw_free(cdata_f)
3684 #else
3685  MSG_ERROR("FFTW3_MPI support not activated")
3686  if(.false.) then
3687    cdata_r = C_NULL_PTR; cdata_f = C_NULL_PTR
3688  end if
3689 #endif
3690 
3691 end subroutine fftwmpi_free_work_array

m_fftw3/fftwmpi_get_work_array [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  fftwmpi_get_work_array

FUNCTION

 Driver routine for allocate fftw work arrray for 3D complex-to-complex FFTs of lengths nx, ny, nz.

INPUTS

 nx,ny,nz=Number of points along the three directions.
 ndat=Number of FFTs to be done.
 comm_fft=MPI communicator.

OUTPUT

 cdata_f,cdata_r: C pointers to use for fourier andreal data
 n0,n0_tr : local size on the shared dimension (nz or ny if transposed mode is used)
 offset,offset_tr : offset per process in continuous tabx

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

3593 subroutine fftwmpi_get_work_array(cdata_f,cdata_r,rank,nx,ny,nz,ndat,comm_fft,n0,offset,n0_tr,offset_tr)
3594 
3595 
3596 !This section has been created automatically by the script Abilint (TD).
3597 !Do not modify the following lines by hand.
3598 #undef ABI_FUNC
3599 #define ABI_FUNC 'fftwmpi_get_work_array'
3600 !End of the abilint section
3601 
3602  implicit none
3603 
3604 !Arguments ------------------------------------
3605 !scalars
3606  integer,intent(in) :: nx,ny,nz,ndat,rank,comm_fft
3607  integer(C_INTPTR_T), intent(out) :: n0, offset, n0_tr, offset_tr
3608  type(C_PTR), intent(out) :: cdata_f,cdata_r
3609 
3610 !Local variables-------------------------------
3611 #ifdef HAVE_FFT_FFTW3_MPI
3612 !scalars
3613  integer(C_INTPTR_T) :: alloc_local
3614 !arrays
3615  integer(C_INTPTR_T) :: fft_sizes(4)
3616 
3617 ! *************************************************************************
3618 
3619  ! Dimensions are inverted here (C interface).
3620  fft_sizes(1)=nz
3621  fft_sizes(2)=ny
3622  fft_sizes(3)=nx
3623  fft_sizes(4)=ndat
3624 
3625  alloc_local = fftw_mpi_local_size_many_transposed(rank,fft_sizes(1:3),fft_sizes(4), &
3626 &      FFTW_MPI_DEFAULT_BLOCK,FFTW_MPI_DEFAULT_BLOCK, comm_fft, &
3627 &      n0,offset, &
3628 &      n0_tr,offset_tr)
3629 
3630  cdata_f = fftw_alloc_complex(alloc_local)
3631  cdata_r = fftw_alloc_complex(alloc_local)
3632 
3633 #else
3634   MSG_ERROR("FFTW3_MPI support not activated")
3635   ABI_UNUSED((/nx,ny,nz,ndat,rank,comm_fft/))
3636   cdata_f = C_NULL_PTR; cdata_r = C_NULL_PTR
3637   n0 = 0; offset = 0; n0_tr = 0; offset_tr = 0
3638 #endif
3639 
3640 end subroutine fftwmpi_get_work_array

m_fftw3/old_fftw3_mpifourdp [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

  old_fftw3_mpifourdp

FUNCTION

INPUTS

 cplex=1 if fofr is real, 2 if fofr is complex
 nfft=(effective) number of FFT grid points (for this processor)
 ndat=Number of FFTs to be done.
 isign= +1 : fofg(G) => fofr(R);
        -1 : fofr(R) => fofg(G)
 fftn2_distrib(n2)=  rank of the processor which own fft planes in 2nd dimension for fourdp
 ffti2_local(n2) = local i2 indices in fourdp
 fftn3_distrib(n3) = rank of the processor which own fft planes in 3rd dimension for fourdp
 ffti3_local(n3) = local i3 indices in fourdp
 comm_fft=MPI communicator for the FFT
 [fftw_flags]=Flags used to create the plan. They can be combined with the "+" operator.

SIDE EFFECTS

 Input/Output
 fofg(2,nfft*ndat)=f(G), complex.
 fofr(cplex*nfft*ndat)=input function f(r) (real or complex)

PARENTS

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

4260 subroutine old_fftw3_mpifourdp(cplex,nfft,ngfft,ndat,isign,&
4261   fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft,fftw_flags)
4262 
4263 
4264 !This section has been created automatically by the script Abilint (TD).
4265 !Do not modify the following lines by hand.
4266 #undef ABI_FUNC
4267 #define ABI_FUNC 'old_fftw3_mpifourdp'
4268 !End of the abilint section
4269 
4270  implicit none
4271 
4272 !Arguments ------------------------------------
4273 !scalars
4274  integer,intent(in) :: cplex,nfft,ndat,isign,comm_fft
4275  integer,optional,intent(in) :: fftw_flags
4276 !arrays
4277  integer,intent(in) :: ngfft(18)
4278  integer,intent(in) :: fftn2_distrib(ngfft(2)),ffti2_local(ngfft(2))
4279  integer,intent(in) :: fftn3_distrib(ngfft(3)),ffti3_local(ngfft(3))
4280  real(dp),intent(inout) :: fofg(2,nfft*ndat),fofr(cplex*nfft*ndat)
4281 
4282 #ifdef HAVE_FFT_FFTW3_MPI
4283 !Local variables-------------------------------
4284 !scalars
4285  integer :: nx,ny,nz,my_flags
4286 
4287 ! *************************************************************************
4288 
4289  my_flags=ABI_FFTW_ESTIMATE; if (PRESENT(fftw_flags)) my_flags= fftw_flags
4290 
4291  nx=ngfft(1); ny=ngfft(2); nz=ngfft(3)
4292  !me_fft=ngfft(11); nproc_fft=ngfft(10)
4293 
4294  select case (cplex)
4295 
4296  case (1)
4297 
4298    ! Complex to Complex.
4299    ! This one is ok when ndat > 1
4300    !call fftw3_mpifourdp_c2c(cplex,nfft,ngfft,ndat,isign,&
4301    !& fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft,fftw_flags=my_flags)
4302    !return
4303 
4304    ! r2c or c2r case.
4305    ! FIXME this one is buggy when ndat > 1
4306    select case (isign)
4307    case (ABI_FFTW_FORWARD)
4308      ! +1; R --> G
4309     call fftw3_mpifourdp_r2c(nfft,ngfft,ndat,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,&
4310 &     fofg,fofr,comm_fft,fftw_flags=my_flags)
4311 
4312    case (ABI_FFTW_BACKWARD)
4313      ! -1; G --> R
4314     call fftw3_mpifourdp_c2r(nfft,ngfft,ndat,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,&
4315 &     fofg,fofr,comm_fft,fftw_flags=my_flags)
4316 
4317    case default
4318      MSG_BUG("Wrong isign")
4319    end select
4320 
4321  case (2)
4322    ! Complex to Complex.
4323    call fftw3_mpifourdp_c2c(cplex,nfft,ngfft,ndat,isign,&
4324 &    fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft,fftw_flags=my_flags)
4325 
4326  case default
4327    MSG_BUG(" Wrong value for cplex")
4328  end select
4329 
4330 #else
4331  MSG_ERROR("FFTW3_MPI support not activated")
4332  ABI_UNUSED((/cplex,nfft,ngfft(1),ndat,isign,comm_fft/))
4333  ABI_UNUSED((/fftn2_distrib(1),ffti2_local(1)/))
4334  ABI_UNUSED((/fftn3_distrib(1),ffti3_local(1)/))
4335  if (PRESENT(fftw_flags)) then
4336     ABI_UNUSED(fftw_flags)
4337  end if
4338  ABI_UNUSED(fofg(1,1))
4339  ABI_UNUSED(fofr(1))
4340 #endif
4341 
4342 end subroutine old_fftw3_mpifourdp

m_fftw3/zplan_many_dft [ Functions ]

[ Top ] [ m_fftw3 ] [ Functions ]

NAME

FUNCTION

INPUTS

SIDE EFFECTS

PARENTS

SOURCE

2990 !! FIXME  technically it should be intent(inout) since FFTW3 can destroy the input for particular flags.
2991 
2992 function zplan_many_dft(rank,n,howmany,fin,inembed,istride,idist,fout,onembed,ostride,odist,sign,flags,nthreads) result(plan)
2993 
2994 
2995 !This section has been created automatically by the script Abilint (TD).
2996 !Do not modify the following lines by hand.
2997 #undef ABI_FUNC
2998 #define ABI_FUNC 'zplan_many_dft'
2999 !End of the abilint section
3000 
3001  implicit none
3002 
3003 !Arguments ------------------------------------
3004 !scalars
3005  integer,intent(in) :: rank,howmany,istride,ostride, sign,flags,idist,odist,nthreads
3006  integer,intent(in) :: n(rank),inembed(rank),onembed(rank)
3007  integer(KIND_FFTW_PLAN) :: plan
3008 !arrays
3009  complex(dpc) :: fin(*),fout(*)
3010 
3011 !Local variables-------------------------------
3012  character(len=500) :: msg,frmt
3013 
3014 ! *************************************************************************
3015 
3016 !$OMP CRITICAL (OMPC_zplan_many_dft)
3017  call fftw3_set_nthreads(nthreads)
3018 
3019  call dfftw_plan_many_dft(plan, rank, n, howmany, &
3020 &  fin, inembed, istride, idist, fout, onembed, ostride, odist, sign, flags)
3021 !$OMP END CRITICAL (OMPC_zplan_many_dft)
3022 
3023  if (plan==NULL_PLAN) then ! handle the error
3024    call wrtout(std_out,"dfftw_plan_many_dft returned NULL_PLAN (complex version)","COLL")
3025    write(frmt,*)"(a,",rank,"(1x,i0),3(a,i0),a,2(a,",rank,"(1x,i0),2(a,i0),a))"
3026    write(msg,frmt)&
3027 &    " n = ",n," howmany = ",howmany," sign = ",sign," flags = ",flags,ch10,&
3028 &    " inembed = ",inembed," istride = ",istride," idist =",idist,ch10,     &
3029 &    " onembed = ",onembed," ostride = ",ostride," odist =",idist,ch10
3030    call wrtout(std_out,msg,"COLL")
3031    MSG_ERROR("Check FFTW library and/or abinit code")
3032  end if
3033 
3034 end function zplan_many_dft

m_m_fftw3/fftw3_mpifourdp [ Functions ]

[ Top ] [ Functions ]

NAME

 fftw3_mpifourdp

FUNCTION

 Conduct Fourier transform of REAL or COMPLEX function f(r)=fofr defined on
 fft grid in real space, to create complex f(G)=fofg defined on full fft grid
 in reciprocal space, in full storage mode, or the reverse operation.
 For the reverse operation, the final data is divided by nfftot.
 REAL case when cplex=1, COMPLEX case when cplex=2
 Usually used for density and potentials.

INPUTS

 cplex=1 if fofr is real, 2 if fofr is complex
 nfft=(effective) number of FFT grid points (for this processor)
 ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 ndat=Numbre of FFT transforms
 isign=sign of Fourier transform exponent: current convention uses
    +1 for transforming from G to r
    -1 for transforming from r to G.
 fftn2_distrib(2),ffti2_local(2)
 fftn3_distrib(3),ffti3_local(3)
 comm_fft=MPI communicator

SIDE EFFECTS

 Input/Output
 fofg(2,nfft)=f(G), complex.
 fofr(cplex*nfft)=input function f(r) (real or complex)

PARENTS

      fourdp,m_fft

CHILDREN

      fftw3_destroy_plan,fftw3_execute_dft

SOURCE

5881 subroutine fftw3_mpifourdp(cplex,nfft,ngfft,ndat,isign,&
5882 &  fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local,fofg,fofr,comm_fft)
5883 
5884 
5885 !This section has been created automatically by the script Abilint (TD).
5886 !Do not modify the following lines by hand.
5887 #undef ABI_FUNC
5888 #define ABI_FUNC 'fftw3_mpifourdp'
5889 !End of the abilint section
5890 
5891  implicit none
5892 
5893 !Arguments ------------------------------------
5894 !scalars
5895  integer,intent(in) :: cplex,isign,nfft,ndat,comm_fft
5896 !arrays
5897  integer,intent(in) :: ngfft(18)
5898  integer,intent(in) :: fftn2_distrib(ngfft(2)),ffti2_local(ngfft(2))
5899  integer,intent(in) :: fftn3_distrib(ngfft(3)),ffti3_local(ngfft(3))
5900  real(dp),intent(inout) :: fofg(2,nfft*ndat),fofr(cplex*nfft*ndat)
5901 
5902 !Local variables-------------------------------
5903 !scalars
5904  integer :: n1,n2,n3,n4,n5,n6,nd2proc,nd3proc,nproc_fft,me_fft
5905 !arrays
5906  real(dp),allocatable :: workf(:,:,:,:,:),workr(:,:,:,:,:)
5907 
5908 ! *************************************************************************
5909 
5910  ! Note the only c2c is supported in parallel.
5911  n1=ngfft(1); n2=ngfft(2); n3=ngfft(3)
5912  n4=ngfft(4); n5=ngfft(5); n6=ngfft(6)
5913  me_fft=ngfft(11); nproc_fft=ngfft(10)
5914 
5915  nd2proc=((n2-1)/nproc_fft) +1
5916  nd3proc=((n6-1)/nproc_fft) +1
5917  ABI_ALLOCATE(workr,(2,n4,n5,nd3proc,ndat))
5918  ABI_ALLOCATE(workf,(2,n4,n6,nd2proc,ndat))
5919 
5920  ! Complex to Complex
5921  ! TODO: Complex to Real
5922  select case (isign)
5923  case (1)
5924    ! G --> R
5925    call mpifft_fg2dbox(nfft,ndat,fofg,n1,n2,n3,n4,nd2proc,n6,fftn2_distrib,ffti2_local,me_fft,workf)
5926 
5927    call fftw3_mpiback(2,ndat,n1,n2,n3,n4,n5,n6,n4,nd2proc,nd3proc,2,workf,workr,comm_fft)
5928 
5929    call mpifft_dbox2fr(n1,n2,n3,n4,n5,nd3proc,ndat,fftn3_distrib,ffti3_local,me_fft,workr,cplex,nfft,fofr)
5930 
5931  case (-1)
5932    ! R --> G
5933    call mpifft_fr2dbox(cplex,nfft,ndat,fofr,n1,n2,n3,n4,n5,nd3proc,fftn3_distrib,ffti3_local,me_fft,workr)
5934 
5935    call fftw3_mpiforw(2,ndat,n1,n2,n3,n4,n5,n6,n4,nd2proc,nd3proc,2,workr,workf,comm_fft)
5936 
5937    ! Transfer FFT output to the original fft box.
5938    call mpifft_dbox2fg(n1,n2,n3,n4,nd2proc,n6,ndat,fftn2_distrib,ffti2_local,me_fft,workf,nfft,fofg)
5939 
5940  case default
5941    MSG_BUG("Wrong isign")
5942  end select
5943 
5944  ABI_DEALLOCATE(workr)
5945  ABI_DEALLOCATE(workf)
5946 
5947 end subroutine fftw3_mpifourdp