TABLE OF CONTENTS


ABINIT/m_sgfft [ Modules ]

[ Top ] [ Modules ]

NAME

  m_sgfft

FUNCTION

  This module provides low-level interfaces to Goedecker's FFT library.

COPYRIGHT

 Copyright by Stefan Goedecker, Ithaca, NY USA, July 14, 1993
 Copyright (C) 1998-2018 ABINIT group (DCA, XG)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

18 #if defined HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include "abi_common.h"
23 
24 MODULE m_sgfft
25 
26  use defs_basis
27  use m_abicore
28  use m_errors
29  use m_fftcore
30 
31  use defs_fftdata,  only : mg
32 
33  implicit none
34 
35  private
36 
37 ! Public API.
38  public :: sg_fft_cc      ! Complex-Complex version (full box)
39  public :: sg_fft_rc      ! Real-Complex version (full box)
40  public :: sg_fftpad      ! Zero-padding version of "fft".
41  public :: sg_fftrisc     ! Fourier transforms of wavefunctions
42  public :: sg_fftrisc_2
43  public :: sg_poisson     ! Solve the poisson equation in G-space starting from n(r).
44 
45 CONTAINS  !====================================================================

m_sgfft/fft_cc_one_nothreadsafe [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 fft_cc_one_nothreadsafe

FUNCTION

 Calculates the discrete Fourier transform:

   ftarr(i1,i2,i3)=exp(ris*i*2*pi*(j1*i1/n1+j2*i2/n2+j3*i3/n3)) arr(j1,j2,j3)

INPUTS

  fftcache=size of the cache (kB)
  nd1,nd2,nd3=memory dimension of arr and ftarr
  n1,n2,n3=physical dimension of the transform
  arr(2,nd1,nd2,nd3)=input complex array with alternating real and imaginary
  elements; data resides in 2*n1*n2*n3 of this array, spread out.
  (see SIDE FFECTS).
  ris=(real(dp)) sign of exponential in transform

OUTPUT

  ftarr(2,nd1,nd2,nd3)=working space for transform and contains output

SIDE EFFECTS

  arr(2,nd1,nd2,nd3) is modified by sg_fftx,sg_ffty,sg_fftz.

NOTES

  ndi must always be greater or equal to ni.  Recommended choice for nd1
  and nd2 is: ni for ni=odd or ni+1 for ni=even (hence 2*(ni/2)+1);
  nd3 should always be n3.  Note that choosing nd1 or nd2 larger than
  the recommended value can severely degrade efficiency of this routine.
  Avoiding even ndi for nd1 and nd2 avoids cache conflicts on cache machines.
  Each of n1,n2,n3 must be a
  product of the prime factors 2,3,5. If two ni s are equal
  it is recommended to place them behind each other.
  The largest any of these may be is set by parameter "mg" below.
  This fft is particularly efficient for cache architectures.
  Note that the meaning of fftcache has changed from the original
  ncache of SG (that was the maximum number of COMPLEX*16 in the cache)

PARENTS

      m_sgfft

CHILDREN

      sg_fft_cc

SOURCE

176 subroutine fft_cc_one_nothreadsafe(fftcache,nd1,nd2,nd3,n1,n2,n3,arr,ftarr,ris)
177 
178 
179 !This section has been created automatically by the script Abilint (TD).
180 !Do not modify the following lines by hand.
181 #undef ABI_FUNC
182 #define ABI_FUNC 'fft_cc_one_nothreadsafe'
183 !End of the abilint section
184 
185  implicit none
186 
187 !Arguments ------------------------------------
188 !scalars
189  integer,intent(in) :: fftcache,n1,n2,n3,nd1,nd2,nd3
190  real(dp),intent(in) :: ris
191 !arrays
192  real(dp),intent(inout) :: arr(2,nd1,nd2,nd3)
193  real(dp),intent(inout) :: ftarr(2,nd1,nd2,nd3) !vz_i
194 
195 !Local variables-------------------------------
196 !mfac sets maximum number of factors (5, 4, 3, or 2) which may be
197 !contained within any n1, n2, or n3
198 !mg sets the maximum 1 dimensional fft length (any one of n1, n2, or n3)
199 !scalars
200  integer,parameter :: mfac=11
201  integer :: i2,ic,n1i,n3i
202  character(len=500) :: message
203 !arrays
204  integer :: aft(mfac),bef(mfac),ind(mg),now(mfac)
205  real(dp) :: trig(2,mg)
206 
207 ! *************************************************************************
208 
209 !Check that dimension is not exceeded
210  if (n1>mg.or.n2>mg.or.n3>mg) then
211    write(message, '(a,3i10,a,i10,a)' )&
212 &   'one of the dimensions n1,n2,n3=',n1,n2,n3,&
213 &   'exceeds allowed dimension mg=',mg,ch10
214    MSG_BUG(message)
215  end if
216 
217 !transform along x direction
218  call sg_ctrig(n1,trig,aft,bef,now,ris,ic,ind,mfac,mg)
219  call sg_fftx(fftcache,mfac,mg,nd1,nd2,nd3,n2,n3,&
220 & arr,ftarr,trig,aft,now,bef,ris,ind,ic)
221 
222 !transform along y direction
223  if (n2/=n1)then
224    call sg_ctrig(n2,trig,aft,bef,now,ris,ic,ind,mfac,mg)
225  end if
226  n1i=1 ; n3i=1
227  call sg_ffty(fftcache,mfac,mg,nd1,nd2,nd3,n1i,n1,n3i,n3,&
228 & ftarr,arr,trig,aft,now,bef,ris,ind,ic)
229 
230 !transform along z direction
231  if (n3/=n2)then
232    call sg_ctrig(n3,trig,aft,bef,now,ris,ic,ind,mfac,mg)
233  end if
234 
235 !$OMP PARALLEL DO SHARED(aft,arr,bef,ftarr,ind,ic)&
236 !$OMP SHARED(nd1,nd2,nd3,now,n1,n2,ris,trig)&
237 !$OMP PRIVATE(i2)
238  do i2=1,n2
239    call sg_fftz(mfac,mg,nd1,nd2,nd3,n1,i2,i2,arr,ftarr,&
240 &   trig,aft,now,bef,ris,ind,ic)
241  end do
242 !$OMP END PARALLEL DO
243 
244 end subroutine fft_cc_one_nothreadsafe

m_sgfft/fftpad_one_nothreadsafe [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 fftpad_one_nothreadsafe

FUNCTION

 Fast Fourier transform. This is the zero-padding version of "fft" for a single array.
 This version is not thread-safe.

INPUTS

  fftcache=size of the cache (kB)
  mgfft=maximum size of 1D FFTs
  nd1,nd2,nd3=memory dimension of arr and ftarr
  n1,n2,n3=physical dimension of the transform
  arr(2,nd1,nd2,nd3)=input complex array with alternating real and imaginary
    elements; data resides in 2*n1*n2*n3 of this array, spread out.
  ris=(real(dp)) sign of exponential in transform
  gbound(2*mgfft+8,2)=sphere boundary info

OUTPUT

  ftarr(2,nd1,nd2,nd3)=working space for transform and contains output

SIDE EFFECTS

  arr(2,nd1,nd2,nd3) is modified by sg_fftpx,sg_ffty,sg_fftz.

NOTES

  mfac sets maximum number of factors (5, 4, 3, or 2) which may be
  contained within any n1, n2, or n3
  mg sets the maximum 1 dimensional fft length (any one of n1, n2, or n3)
  XG: the signification of mg is changed with respect to fft3dp !!!

PARENTS

      m_sgfft

CHILDREN

      sg_fft_cc

SOURCE

711 subroutine fftpad_one_nothreadsafe(fftcache,mgfft,nd1,nd2,nd3,n1,n2,n3,arr,ftarr,ris,gbound)
712 
713 
714 !This section has been created automatically by the script Abilint (TD).
715 !Do not modify the following lines by hand.
716 #undef ABI_FUNC
717 #define ABI_FUNC 'fftpad_one_nothreadsafe'
718 !End of the abilint section
719 
720  implicit none
721 
722 !Arguments ------------------------------------
723 !scalars
724  integer,intent(in) :: fftcache,mgfft,n1,n2,n3,nd1,nd2,nd3
725  real(dp),intent(in) :: ris
726 !arrays
727  integer,intent(in) :: gbound(2*mgfft+8,2)
728  real(dp),intent(inout) :: arr(2,nd1,nd2,nd3)
729  real(dp),intent(out) :: ftarr(2,nd1,nd2,nd3)
730 
731 !Local variables-------------------------------
732 !scalars
733  integer,parameter :: mfac=11
734  integer :: g3max,g3min,i2,ic,n1i,n3i,n3p
735 #ifdef DEBUG_MODE
736  character(len=500) :: message
737 #endif
738 !arrays
739  integer :: aft(mfac),bef(mfac),ind(mg),now(mfac)
740  real(dp) :: trig(2,mg)
741 
742 ! *************************************************************************
743 
744 #ifdef DEBUG_MODE
745 !Check that dimension is not exceeded
746  if (n1>mg.or.n2>mg.or.n3>mg) then
747    write(message, '(a,3i10,a,i10)')&
748 &   'one of the dimensions n1,n2,n3=',n1,n2,n3,' exceeds the allowed dimension mg=',mg
749    MSG_BUG(message)
750  end if
751 #endif
752 
753  g3min=gbound(3,2)
754  g3max=gbound(4,2)
755 
756 !--------------------------------------------------------------------------
757 
758  if (abs(ris-one)<tol12) then
759 
760 !  Handle G -> r  transform (G sphere to fft box)
761 
762 !  Transform along x direction
763    call sg_ctrig(n1,trig,aft,bef,now,ris,ic,ind,mfac,mg)
764 
765 !  Zero out the untransformed (0) data part of the work array
766 !  -- at every (y,z) there are 0 s to be added to the ends of
767 !  the x data so have to zero whole thing.
768    ftarr(:,:,:,:)=0.0d0
769 
770 !  Note the passing of the relevant part of gbound
771    call sg_fftpx(fftcache,mfac,mg,mgfft,nd1,nd2,nd3,n2,n3,&
772 &   arr,ftarr,trig,aft,now,bef,ris,ind,ic,gbound(3,2))
773 
774 !  Transform along y direction in two regions of z
775    if (n2/=n1)then
776      call sg_ctrig(n2,trig,aft,bef,now,ris,ic,ind,mfac,mg)
777    end if
778 
779 !  First y transform: z=1..g3max+1
780    n3p=g3max+1
781    n1i=1 ; n3i=1
782    call sg_ffty(fftcache,mfac,mg,nd1,nd2,nd3,n1i,n1,n3i,n3p,ftarr,arr,&
783 &   trig,aft,now,bef,ris,ind,ic)
784 
785 !  Zero out the untransformed (0) data part of the work array
786 !  -- only need to zero specified ranges of z
787    arr(:,:,:,n3p+1:g3min+n3)=0.0d0
788 
789 !  Second y transform: z=g3min+1..0 (wrapped around)
790    n3p=-g3min
791    if (n3p>0) then
792      n3i=1+g3min+n3 ; n1i=1
793      call sg_ffty(fftcache,mfac,mg,nd1,nd2,nd3,n1i,n1,n3i,n3,ftarr,arr,&
794 &     trig,aft,now,bef,ris,ind,ic)
795    end if
796 
797 !  Transform along z direction
798    if (n3/=n2) then
799      call sg_ctrig(n3,trig,aft,bef,now,ris,ic,ind,mfac,mg)
800    end if
801 
802 !$OMP PARALLEL DO
803    do i2=1,n2
804      call sg_fftz(mfac,mg,nd1,nd2,nd3,n1,i2,i2,arr,ftarr,&
805 &     trig,aft,now,bef,ris,ind,ic)
806    end do
807 
808  else
809 
810 !  *************************************************
811 !  Handle r -> G transform (from fft box to G sphere)
812 
813 !  Transform along z direction
814    call sg_ctrig(n3,trig,aft,bef,now,ris,ic,ind,mfac,mg)
815 
816 !$OMP PARALLEL DO
817    do i2=1,n2
818      call sg_fftz(mfac,mg,nd1,nd2,nd3,n1,i2,i2,arr,ftarr,&
819 &     trig,aft,now,bef,ris,ind,ic)
820    end do
821 
822 !  Transform along y direction in two regions of z
823    if (n2/=n3) then
824      call sg_ctrig(n2,trig,aft,bef,now,ris,ic,ind,mfac,mg)
825    end if
826 
827 !  First y transform: z=1..g3max+1
828    n3p=g3max+1
829    n1i=1 ; n3i=1
830    call sg_ffty(fftcache,mfac,mg,nd1,nd2,nd3,n1i,n1,n3i,n3p,ftarr,arr,&
831 &   trig,aft,now,bef,ris,ind,ic)
832 
833 !  Second y transform: z=g3min+1..0 (wrapped around)
834    n3p=-g3min
835    if (n3p>0) then
836      n1i=1 ; n3i=1+g3min+n3
837      call sg_ffty(fftcache,mfac,mg,nd1,nd2,nd3,n1i,n1,n3i,n3,ftarr,arr,&
838 &     trig,aft,now,bef,ris,ind,ic)
839    end if
840 
841 !  Transform along x direction
842    if (n1/=n2) then
843      call sg_ctrig(n1,trig,aft,bef,now,ris,ic,ind,mfac,mg)
844    end if
845 
846 !  Zero out the untransformed (0) data part of the work array
847 !  -- at every (y,z) there are 0 s to be added to the ends of
848 !  the x data so have to zero whole thing.
849    ftarr(:,:,:,:)=0.0d0
850 
851 !  Note the passing of the relevant part of gbound
852    call sg_fftpx(fftcache,mfac,mg,mgfft,nd1,nd2,nd3,n2,n3,&
853 &   arr,ftarr,trig,aft,now,bef,ris,ind,ic,gbound(3,2))
854 
855 !  Data is now ready to be extracted from fft box to sphere
856  end if
857 
858 end subroutine fftpad_one_nothreadsafe

m_sgfft/fftrisc_one_nothreadsafe [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 fftrisc_one_nothreadsafe

FUNCTION

 Carry out Fourier transforms between real and reciprocal (G) space,
 for wavefunctions, contained in a sphere in reciprocal space,
 in both directions. Also accomplish some post-processing.

NOTES

 Specifically uses rather sophisticated algorithms, based on S Goedecker
 routines, specialized for superscalar RISC architecture.
 Zero padding : saves 7/12 execution time
 Bi-dimensional data locality in most of the routine : cache reuse
 For k-point (0 0 0) : takes advantage of symmetry of data.
 Note however that no blocking is used, in both 1D z-transform
 or subsequent 2D transform. This should be improved.

 * This routine is not thread-safe due to the presence of variables with the save attribute!
   DO NOT CALL THIS ROUTINE INSIDE A OPENMP PARALLEL REGION

INPUTS

  cplex= if 1 , denpot is real, if 2 , denpot is complex
     (cplex=2 only allowed for option=2 when istwf_k=1)
     one can also use cplex=0 if option=0 or option=3
  fofgin(2,npwin)=holds input wavefunction in G vector basis sphere.
  gboundin(2*mgfft+8,2)=sphere boundary info for reciprocal to real space
  gboundout(2*mgfft+8,2)=sphere boundary info for real to reciprocal space
  istwf_k=option parameter that describes the storage of wfs
  kg_kin(3,npwin)=reduced planewave coordinates, input
  kg_kout(3,npwout)=reduced planewave coordinates, output
  mgfft=maximum size of 1D FFTs
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  npwin=number of elements in fofgin array (for option 0, 1 and 2)
  npwout=number of elements in fofgout array (for option 2 and 3)
  n4,n5,n6=ngfft(4),ngfft(5),ngfft(6), dimensions of fofr.
  option= if 0: do direct FFT
          if 1: do direct FFT, then sum the density
          if 2: do direct FFT, multiply by the potential, then do reverse FFT
          if 3: do reverse FFT only
  weight=weight to be used for the accumulation of the density in real space
          (needed only when option=1)

OUTPUT

  (see side effects)

OPTIONS

  The different options are:
  - reciprocal to real space and output the result (when option=0),
  - reciprocal to real space and accumulate the density (when option=1) or
  - reciprocal to real space, apply the local potential to the wavefunction
    in real space and produce the result in reciprocal space (when option=2)
  - real space to reciprocal space (when option=3).
  option=0 IS NOT ALLOWED when istwf_k>2
  option=3 IS NOT ALLOWED when istwf_k>=2

SIDE EFFECTS

  for option==0, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 fofr(2,n4,n5,n6) contains the Fourier Transform of fofgin;
                 no use of denpot, fofgout and npwout.
  for option==1, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 denpot(cplex*n4,n5,n6) contains the input density at input,
                 and the updated density at output;
                 no use of fofgout and npwout.
  for option==2, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 denpot(cplex*n4,n5,n6) contains the input local potential;
                 fofgout(2,npwout) contains the output function;
  for option==3, fofr(2,n4,n5,n6) contains the real space wavefunction;
                 fofgout(2,npwout) contains its Fourier transform;
                 no use of fofgin and npwin.

PARENTS

      m_sgfft

CHILDREN

      sg_fft_cc

SOURCE

4146 subroutine fftrisc_one_nothreadsafe(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,&
4147 & kg_kin,kg_kout,mgfft,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_i)
4148 
4149 
4150 !This section has been created automatically by the script Abilint (TD).
4151 !Do not modify the following lines by hand.
4152 #undef ABI_FUNC
4153 #define ABI_FUNC 'fftrisc_one_nothreadsafe'
4154 !End of the abilint section
4155 
4156  implicit none
4157 
4158 !Arguments ------------------------------------
4159 !scalars
4160  integer,intent(in) :: cplex,istwf_k,mgfft,n4,n5,n6,npwin,npwout,option
4161  real(dp),intent(in) :: weight_i,weight_r
4162 !arrays
4163  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2)
4164  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18)
4165  real(dp),intent(in) :: fofgin(2,npwin)
4166  real(dp),intent(inout) :: denpot(cplex*n4,n5,n6),fofr(2,n4,n5,n6)
4167  real(dp),intent(out) :: fofgout(2,npwout)
4168 
4169 !Local variables-------------------------------
4170 !scalars
4171  integer,parameter :: mfac=11
4172  integer,save :: ic1,ic2,ic3,ic4,ic5,ic6,n1_save=0,n2_save=0,n3_save=0
4173  integer :: fftcache,g2max,g2min,i1,i1max,i2,i3,i3inv,ig,igb
4174  integer :: igb_inv,igbmax,ii2,lot,lotin,lotout,mgb,n1
4175  integer :: n1half1,n1halfm,n1i,n2,n2half1,n3,n4half1,n5half1,nfftot,ngbin
4176  integer :: ngbout,nlot,nproc_omp
4177  real(dp) :: ai,ar,fraction,norm,phai,phar,wkim,wkre
4178  character(len=500) :: message
4179 !arrays
4180  integer,save :: aft1(mfac),aft2(mfac),aft3(mfac),aft4(mfac),aft5(mfac)
4181  integer,save :: aft6(mfac),bef1(mfac),bef2(mfac),bef3(mfac),bef4(mfac)
4182  integer,save :: bef5(mfac),bef6(mfac),ind1(mg),ind2(mg),ind3(mg),ind4(mg)
4183  integer,save :: ind5(mg),ind6(mg),now1(mfac),now2(mfac),now3(mfac),now4(mfac)
4184  integer,save :: now5(mfac),now6(mfac)
4185  integer :: gbound_dum(4)
4186  integer,allocatable :: indpw_kin(:,:),indpw_kout(:,:)
4187  real(dp),save :: trig1(2,mg),trig2(2,mg),trig3(2,mg),trig4(2,mg),trig5(2,mg)
4188  real(dp),save :: trig6(2,mg)
4189  real(dp),allocatable :: pha1(:,:),pha2(:,:),pha3(:,:),wk1d_a(:,:,:,:)
4190  real(dp),allocatable :: wk1d_b(:,:,:,:),wk2d_a(:,:,:,:),wk2d_b(:,:,:,:)
4191  real(dp),allocatable :: wk2d_c(:,:,:,:),wk2d_d(:,:,:,:)
4192 #if defined HAVE_OPENMP
4193  integer,external :: OMP_GET_NUM_THREADS
4194 #endif
4195 
4196 ! *************************************************************************
4197 
4198  if(istwf_k>2 .and. option==0)then
4199    write(message,'(a,i0)')' option=0 is not allowed with istwf_k=',istwf_k
4200    MSG_BUG(message)
4201  end if
4202 
4203  if(istwf_k>=2 .and. option==3)then
4204    write(message,'(a,i0)')' option=3 is not allowed with istwf_k=',istwf_k
4205    MSG_BUG(message)
4206  end if
4207 
4208 !For all other tests of validity of inputs, assume that they
4209 !have been done in the calling routine
4210 
4211  n1=ngfft(1) ; n2=ngfft(2) ; n3=ngfft(3) ; nfftot=n1*n2*n3
4212  fftcache=ngfft(8)
4213 
4214  if(option/=3)then
4215    ABI_ALLOCATE(indpw_kin,(4,npwin))
4216    call indfftrisc(gboundin(3:3+2*mgfft+4,1),indpw_kin,kg_kin,mgfft,ngbin,ngfft,npwin)
4217  end if
4218  if(option==2 .or. option==3)then
4219    ABI_ALLOCATE(indpw_kout,(4,npwout))
4220    call indfftrisc(gboundout(3:3+2*mgfft+4,1),indpw_kout,kg_kout,mgfft,ngbout,ngfft,npwout)
4221  end if
4222 
4223 !Define the dimension of the first work arrays, for 1D transforms along z ,
4224 !taking into account the need to avoid the cache trashing
4225  if(option==2)then
4226    mgb=max(ngbin,ngbout)
4227  else if(option==0 .or. option==1)then
4228    mgb=ngbin ; ngbout=1
4229  else if(option==3)then
4230    mgb=ngbout ; ngbin=1
4231  end if
4232 
4233  if(mod(mgb,2)/=1)mgb=mgb+1
4234 
4235 !Initialise openmp, if needed
4236 !$OMP PARALLEL
4237 !$OMP SINGLE
4238  nproc_omp=1
4239 #if defined HAVE_OPENMP
4240  nproc_omp=OMP_GET_NUM_THREADS()
4241 #endif
4242 !$OMP END SINGLE
4243 !$OMP END PARALLEL
4244 
4245 !For the treatment of the z transform,
4246 !one tries to use only a fraction of the cache, since the
4247 !treatment of the array wk1d_a will not involve contiguous segments
4248  fraction=0.25
4249 !First estimation of lot and nlot
4250  lot=(fftcache*fraction*1000)/(n3*8*2)+1
4251 !Select the smallest integer multiple of nproc_omp, larger
4252 !or equal to nlot. In this way, the cache size is not exhausted,
4253 !and one takes care correctly of the number of processors.
4254 !Treat separately the in and out cases
4255  nlot=(ngbin-1)/lot+1
4256  nlot=nproc_omp*((nlot-1)/nproc_omp+1)
4257  lotin=(ngbin-1)/nlot+1
4258  nlot=(ngbout-1)/lot+1
4259  nlot=nproc_omp*((nlot-1)/nproc_omp+1)
4260  lotout=(ngbout-1)/nlot+1
4261 !The next line impose only one lot. Usually, comment it.
4262 !lotin=mgb ; lotout=mgb
4263 
4264 !Compute auxiliary arrays needed for FFTs
4265  if(n1/=n1_save)then
4266    call sg_ctrig(n1,trig1,aft1,bef1,now1,one,ic1,ind1,mfac,mg)
4267    call sg_ctrig(n1,trig4,aft4,bef4,now4,-one,ic4,ind4,mfac,mg)
4268    n1_save=n1
4269  end if
4270  if(n2/=n2_save)then
4271    call sg_ctrig(n2,trig2,aft2,bef2,now2,one,ic2,ind2,mfac,mg)
4272    call sg_ctrig(n2,trig5,aft5,bef5,now5,-one,ic5,ind5,mfac,mg)
4273    n2_save=n2
4274  end if
4275  if(n3/=n3_save)then
4276    call sg_ctrig(n3,trig3,aft3,bef3,now3,one,ic3,ind3,mfac,mg)
4277    call sg_ctrig(n3,trig6,aft6,bef6,now6,-one,ic6,ind6,mfac,mg)
4278    n3_save=n3
4279  end if
4280 
4281 !------------------------------------------------------------------
4282 !Here, call general k-point code
4283 
4284  if(istwf_k==1)then
4285 
4286 !  Note that the z transform will appear as a y transform
4287    ABI_ALLOCATE(wk1d_a,(2,mgb,n3,1))
4288    ABI_ALLOCATE(wk1d_b,(2,mgb,n3,1))
4289 
4290    if(option/=3)then
4291 
4292 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(n3,ngbin,wk1d_a)
4293      do i3=1,n3
4294        do igb=1,ngbin
4295          wk1d_a(1,igb,i3,1)=zero
4296          wk1d_a(2,igb,i3,1)=zero
4297        end do
4298      end do
4299 !$OMP END PARALLEL DO
4300 
4301 !    Insert fofgin into the work array
4302 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(fofgin,indpw_kin,npwin,wk1d_a)
4303      do ig=1,npwin
4304        igb=indpw_kin(4,ig) ; i3=indpw_kin(3,ig)
4305        wk1d_a(1,igb,i3,1)=fofgin(1,ig)
4306        wk1d_a(2,igb,i3,1)=fofgin(2,ig)
4307      end do
4308 !$OMP END PARALLEL DO
4309 
4310 !    Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
4311 !    However, due to special packing of data, use routine ffty
4312 !$OMP PARALLEL DO SHARED(aft3,bef3,fftcache,ind3,ic3,lotin,mgb)&
4313 !$OMP&SHARED(ngbin,now3,n3,trig3,wk1d_a,wk1d_b)&
4314 !$OMP&PRIVATE(igb,igbmax)
4315      do igb=1,ngbin,lotin
4316        igbmax=min(igb+lotin-1,ngbin)
4317 !      Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
4318 !      However, due to special packing of data, use routine ffty
4319        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_a,wk1d_b, &
4320 &       trig3,aft3,now3,bef3,one,ind3,ic3)
4321      end do
4322 !$OMP END PARALLEL DO
4323 
4324    end if !  if(option/=3)
4325 
4326 !  Do-loop on the planes stacked in the z direction
4327 !$OMP PARALLEL DEFAULT(PRIVATE) &
4328 !$OMP&SHARED(aft1,aft2,aft4,aft5,bef1,bef2,bef4,bef5,cplex,denpot) &
4329 !$OMP&SHARED(fftcache,fofr,gboundin,gboundout)&
4330 !$OMP&SHARED(ic1,ic2,ic4,ic5,ind1,ind2,ind4) &
4331 !$OMP&SHARED(ind5,indpw_kin,indpw_kout,mgb,n1,n2,n3,n4,n5,ngbin) &
4332 !$OMP&SHARED(ngbout,now1,now2,now4,now5,option,trig1,trig2,trig4,trig5) &
4333 !$OMP&SHARED(weight_r,weight_i,wk1d_a,wk1d_b)
4334 
4335 !  Allocate two 2-dimensional work arrays
4336    ABI_ALLOCATE(wk2d_a,(2,n4,n5,1))
4337    ABI_ALLOCATE(wk2d_b,(2,n4,n5,1))
4338 !$OMP DO
4339    do i3=1,n3
4340 
4341      if(option/=3)then
4342 !      Zero the values on the current plane
4343 !      wk2d_a(1:2,1:n1,1:n2,1)=zero
4344        do i2=1,n2
4345          do i1=1,n1
4346            wk2d_a(1,i1,i2,1)=zero
4347            wk2d_a(2,i1,i2,1)=zero
4348          end do
4349        end do
4350 !      Copy the data in the current plane
4351        do igb=1,ngbin
4352          i1=indpw_kin(1,igb) ; i2=indpw_kin(2,igb)
4353          wk2d_a(1,i1,i2,1)=wk1d_b(1,igb,i3,1)
4354          wk2d_a(2,i1,i2,1)=wk1d_b(2,igb,i3,1)
4355        end do
4356 !      Perform x transform, taking into account arrays of zeros
4357        g2min=gboundin(3,1) ; g2max=gboundin(4,1)
4358        if ( g2min+n2 >= g2max+2 ) then
4359          do i2=g2max+2,g2min+n2
4360            do i1=1,n1
4361              wk2d_b(1,i1,i2,1)=zero
4362              wk2d_b(2,i1,i2,1)=zero
4363            end do
4364          end do
4365        end if
4366        gbound_dum(1)=1 ; gbound_dum(2)=1
4367        gbound_dum(3)=g2min ; gbound_dum(4)=g2max
4368        call sg_fftpx(fftcache,mfac,mg,0,n4,n5,1,n2,1,wk2d_a,wk2d_b,&
4369 &       trig1,aft1,now1,bef1,one,ind1,ic1,gbound_dum)
4370 !      Perform y transform
4371        n1i=1
4372        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1,1,1,wk2d_b,wk2d_a, &
4373 &       trig2,aft2,now2,bef2,one,ind2,ic2)
4374 !      The wave function is now in real space, for the current plane
4375      end if
4376 
4377      if(option==0)then ! Copy the transformed function at the right place
4378        do i2=1,n2
4379          do i1=1,n1
4380            fofr(1,i1,i2,i3)=wk2d_a(1,i1,i2,1)
4381            fofr(2,i1,i2,i3)=wk2d_a(2,i1,i2,1)
4382          end do
4383        end do
4384      end if
4385 
4386      if(option==1)then ! Accumulate density
4387        do i2=1,n2
4388          do i1=1,n1
4389            denpot(i1,i2,i3)=denpot(i1,i2,i3)+weight_r*wk2d_a(1,i1,i2,1)**2+weight_i*wk2d_a(2,i1,i2,1)**2
4390          end do
4391        end do
4392      end if
4393 
4394      if(option==2)then ! Apply local potential
4395        if(cplex==1)then
4396          do i2=1,n2
4397            do i1=1,n1
4398              wk2d_a(1,i1,i2,1)=denpot(i1,i2,i3)*wk2d_a(1,i1,i2,1)
4399              wk2d_a(2,i1,i2,1)=denpot(i1,i2,i3)*wk2d_a(2,i1,i2,1)
4400            end do
4401          end do
4402        else
4403          do i2=1,n2
4404            do i1=1,n1
4405              wkre=wk2d_a(1,i1,i2,1)
4406              wkim=wk2d_a(2,i1,i2,1)
4407              wk2d_a(1,i1,i2,1)=denpot(2*i1-1,i2,i3)*wkre -denpot(2*i1  ,i2,i3)*wkim
4408              wk2d_a(2,i1,i2,1)=denpot(2*i1-1,i2,i3)*wkim +denpot(2*i1  ,i2,i3)*wkre
4409            end do
4410          end do
4411        end if
4412      end if
4413 
4414      if(option==3)then ! Copy the function to be tranformed at the right place
4415        do i2=1,n2
4416          do i1=1,n1
4417            wk2d_a(1,i1,i2,1)=fofr(1,i1,i2,i3)
4418            wk2d_a(2,i1,i2,1)=fofr(2,i1,i2,i3)
4419          end do
4420        end do
4421      end if
4422 
4423      if(option==2 .or. option==3)then  ! Perform y transform
4424        n1i=1
4425        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1,1,1,wk2d_a,wk2d_b, &
4426 &       trig5,aft5,now5,bef5,-one,ind5,ic5)
4427 !      Perform x transform, taking into account arrays of zeros
4428        gbound_dum(1)=1 ; gbound_dum(2)=1
4429        gbound_dum(3)=gboundout(3,1) ; gbound_dum(4)=gboundout(4,1)
4430        call sg_fftpx(fftcache,mfac,mg,0,n4,n5,1,n2,1,wk2d_b,wk2d_a,&
4431 &       trig4,aft4,now4,bef4,-one,ind4,ic4,gbound_dum)
4432 !      Copy the data from the current plane to wk1d_b
4433        do igb=1,ngbout
4434          i1=indpw_kout(1,igb) ; i2=indpw_kout(2,igb)
4435          wk1d_b(1,igb,i3,1)=wk2d_a(1,i1,i2,1)
4436          wk1d_b(2,igb,i3,1)=wk2d_a(2,i1,i2,1)
4437        end do
4438      end if
4439 
4440 !    End loop on planes
4441    end do
4442 !$OMP END DO
4443    ABI_DEALLOCATE(wk2d_a)
4444    ABI_DEALLOCATE(wk2d_b)
4445 !$OMP END PARALLEL
4446 
4447    if(option==2 .or. option==3)then
4448 
4449 !    Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
4450 !    However, due to special packing of data, use routine ffty
4451 !$OMP PARALLEL DO SHARED(aft6,bef6,fftcache,ind6,ic6,lotout,mgb)&
4452 !$OMP&SHARED(ngbout,now6,n3,trig6,wk1d_a,wk1d_b)&
4453 !$OMP&PRIVATE(igb,igbmax)
4454      do igb=1,ngbout,lotout
4455        igbmax=min(igb+lotout-1,ngbout)
4456 !      Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
4457 !      However, due to special packing of data, use routine ffty
4458        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_b,wk1d_a, &
4459 &       trig6,aft6,now6,bef6,-one,ind6,ic6)
4460 
4461      end do
4462 !$OMP END PARALLEL DO
4463 
4464 !    Transfer the data in the output array, after normalization
4465      norm=1.d0/dble(nfftot)
4466 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(fofgout,indpw_kout,norm,npwout,wk1d_a)
4467      do ig=1,npwout
4468        igb=indpw_kout(4,ig) ; i3=indpw_kout(3,ig)
4469        fofgout(1,ig)=wk1d_a(1,igb,i3,1)*norm
4470        fofgout(2,ig)=wk1d_a(2,igb,i3,1)*norm
4471      end do
4472 !$OMP END PARALLEL DO
4473    end if
4474 
4475    ABI_DEALLOCATE(wk1d_a)
4476    ABI_DEALLOCATE(wk1d_b)
4477 
4478 !  End general k-point part
4479  end if
4480 
4481 !------------------------------------------------------------------
4482 !Here, use of time-reversal symmetry
4483 
4484  if(istwf_k>=2)then
4485 
4486    n1half1=n1/2+1 ; n1halfm=(n1+1)/2
4487    n2half1=n2/2+1
4488 !  n4half1 or n5half1 are the odd integers >= n1half1 or n2half1
4489    n4half1=(n1half1/2)*2+1
4490    n5half1=(n2half1/2)*2+1
4491 !  Note that the z transform will appear as a y transform
4492    ABI_ALLOCATE(wk1d_a,(2,mgb,n3,1))
4493    ABI_ALLOCATE(wk1d_b,(2,mgb,n3,1))
4494 
4495    if(istwf_k/=2)then
4496      ABI_ALLOCATE(pha1,(2,n1))
4497      ABI_ALLOCATE(pha2,(2,n2))
4498      ABI_ALLOCATE(pha3,(3,n3))
4499      do i1=1,n1
4500        pha1(1,i1)=cos(dble(i1-1)*pi/dble(n1))
4501        pha1(2,i1)=sin(dble(i1-1)*pi/dble(n1))
4502      end do
4503      do i2=1,n2
4504        pha2(1,i2)=cos(dble(i2-1)*pi/dble(n2))
4505        pha2(2,i2)=sin(dble(i2-1)*pi/dble(n2))
4506      end do
4507      do i3=1,n3
4508        pha3(1,i3)=cos(dble(i3-1)*pi/dble(n3))
4509        pha3(2,i3)=sin(dble(i3-1)*pi/dble(n3))
4510      end do
4511    end if
4512 
4513    if(option/=3)then
4514 
4515 !    Zero the components of wk1d_a
4516 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(n3,ngbin,wk1d_a)
4517      do i3=1,n3
4518        do igb=1,ngbin
4519          wk1d_a(1,igb,i3,1)=zero
4520          wk1d_a(2,igb,i3,1)=zero
4521        end do
4522      end do
4523 !$OMP END PARALLEL DO
4524 
4525 !    Insert fofgin into the work array
4526 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(fofgin,indpw_kin,npwin,wk1d_a)
4527      do ig=1,npwin
4528        igb=indpw_kin(4,ig) ; i3=indpw_kin(3,ig)
4529        wk1d_a(1,igb,i3,1)=fofgin(1,ig)
4530        wk1d_a(2,igb,i3,1)=fofgin(2,ig)
4531      end do
4532 !$OMP END PARALLEL DO
4533 
4534 !    Must complete the i2=1 plane when $k_y \equiv 0$
4535 
4536 !    Take care of i1=1 when $k_x \equiv 0$
4537      if(istwf_k==2)then
4538 !      Take care of i1=1
4539        do i3=n3/2+1,n3
4540          i3inv=n3+2-i3
4541          wk1d_a(1,1,i3,1)= wk1d_a(1,1,i3inv,1)
4542          wk1d_a(2,1,i3,1)=-wk1d_a(2,1,i3inv,1)
4543        end do
4544      else if(istwf_k==4)then
4545 !      Take care of i1=1
4546        do i3=n3/2+1,n3
4547          i3inv=n3+1-i3
4548          wk1d_a(1,1,i3,1)= wk1d_a(1,1,i3inv,1)
4549          wk1d_a(2,1,i3,1)=-wk1d_a(2,1,i3inv,1)
4550        end do
4551      end if
4552 
4553 !    Now, take care of other i1 values, except i3==1 when $k_z \equiv 0$
4554      i1max=gboundin(6,1)+1
4555      if(istwf_k==2)then
4556 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(i1max,n3,wk1d_a)
4557        do igb=2,2*i1max-1
4558          igb_inv=2*i1max+1-igb
4559          do i3=n3/2+1,n3
4560            i3inv=n3+2-i3
4561            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
4562            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
4563          end do
4564        end do
4565 !$OMP END PARALLEL DO
4566 
4567      else if(istwf_k==3)then
4568 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(i1max,n3,wk1d_a)
4569        do igb=1,2*i1max
4570          igb_inv=2*i1max+1-igb
4571          do i3=n3/2+1,n3
4572            i3inv=n3+2-i3
4573            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
4574            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
4575          end do
4576        end do
4577 !$OMP END PARALLEL DO
4578 
4579      else if(istwf_k==4)then
4580 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(i1max,n3,wk1d_a)
4581        do igb=2,2*i1max-1
4582          igb_inv=2*i1max+1-igb
4583          do i3=n3/2+1,n3
4584            i3inv=n3+1-i3
4585            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
4586            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
4587          end do
4588        end do
4589 !$OMP END PARALLEL DO
4590 
4591      else if(istwf_k==5)then
4592 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(i1max,n3,wk1d_a)
4593        do igb=1,2*i1max
4594          igb_inv=2*i1max+1-igb
4595          do i3=n3/2+1,n3
4596            i3inv=n3+1-i3
4597            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
4598            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
4599          end do
4600        end do
4601 !$OMP END PARALLEL DO
4602 
4603      end if
4604 
4605 !    Now, i3==1
4606      if(istwf_k==2)then
4607        do igb=2,i1max
4608          igb_inv=2*i1max+1-igb
4609          wk1d_a(1,igb_inv,1,1)= wk1d_a(1,igb,1,1)
4610          wk1d_a(2,igb_inv,1,1)=-wk1d_a(2,igb,1,1)
4611        end do
4612      else if(istwf_k==3)then
4613        do igb=1,i1max
4614          igb_inv=2*i1max+1-igb
4615          wk1d_a(1,igb_inv,1,1)= wk1d_a(1,igb,1,1)
4616          wk1d_a(2,igb_inv,1,1)=-wk1d_a(2,igb,1,1)
4617        end do
4618      end if
4619 
4620 !    Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
4621 !    However, due to special packing of data, use routine ffty
4622 !$OMP PARALLEL DO SHARED(aft3,bef3,fftcache,ind3,ic3,lotin,mgb)&
4623 !$OMP&SHARED(ngbin,now3,n3,trig3,wk1d_a,wk1d_b)&
4624 !$OMP&PRIVATE(igb,igbmax)
4625      do igb=1,ngbin,lotin
4626        igbmax=min(igb+lotin-1,ngbin)
4627 !      Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
4628 !      However, due to special packing of data, use routine ffty
4629        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_a,wk1d_b, &
4630 &       trig3,aft3,now3,bef3,one,ind3,ic3)
4631      end do
4632 !$OMP END PARALLEL DO
4633 
4634 !    Change the phase if $k_z \neq 0$
4635      if(istwf_k==4 .or. istwf_k==5 .or. istwf_k==8 .or. istwf_k==9 )then
4636 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(ngbin,n3,pha3,wk1d_b)
4637        do i3=1,n3
4638          phar=pha3(1,i3)
4639          phai=pha3(2,i3)
4640          do igb=1,ngbin
4641            ar=wk1d_b(1,igb,i3,1)
4642            ai=wk1d_b(2,igb,i3,1)
4643            wk1d_b(1,igb,i3,1)=phar*ar-phai*ai
4644            wk1d_b(2,igb,i3,1)=phai*ar+phar*ai
4645          end do
4646        end do
4647 !$OMP END PARALLEL DO
4648      end if
4649 
4650    end if !  if(option/=3)
4651 
4652 !  Do-loop on the planes stacked in the z direction
4653 
4654 !$OMP PARALLEL DEFAULT(PRIVATE) &
4655 !$OMP&SHARED(aft1,aft2,aft4,aft5,bef1,bef2,bef4,bef5,denpot) &
4656 !$OMP&SHARED(fftcache,fofr,gboundin,ic1,ic2,ic4,ic5,ind1,ind2,ind4,ind5) &
4657 !$OMP&SHARED(indpw_kin,indpw_kout,istwf_k,mgb,n1,n1half1) &
4658 !$OMP&SHARED(n1halfm,n2,n2half1,n3,n4,n5,ngbin,ngbout) &
4659 !$OMP&SHARED(now1,now2,now4,now5,option,pha1,pha2,trig1) &
4660 !$OMP&SHARED(trig2,trig4,trig5,weight_r,weight_i,wk1d_a,wk1d_b)
4661 
4662 !  Allocate two 2-dimensional work arrays
4663    ABI_ALLOCATE(wk2d_a,(2,n4,n5,1))
4664    ABI_ALLOCATE(wk2d_b,(2,n4,n5,1))
4665    ABI_ALLOCATE(wk2d_c,(2,2*n1halfm,n5,1))
4666    ABI_ALLOCATE(wk2d_d,(2,2*n1halfm,n5,1))
4667 !$OMP DO
4668    do i3=1,n3
4669 
4670      g2max=gboundin(4,1)
4671 
4672      if(option/=3)then
4673 !      Zero the values on the current plane : need only from i2=1 to g2max+1
4674        do i2=1,g2max+1
4675          do i1=1,n1
4676            wk2d_a(1,i1,i2,1)=zero
4677            wk2d_a(2,i1,i2,1)=zero
4678          end do
4679        end do
4680 
4681 !      Copy the data in the current plane
4682        do igb=1,ngbin
4683          i1=indpw_kin(1,igb) ; i2=indpw_kin(2,igb)
4684          wk2d_a(1,i1,i2,1)=wk1d_b(1,igb,i3,1)
4685          wk2d_a(2,i1,i2,1)=wk1d_b(2,igb,i3,1)
4686        end do
4687 
4688 !      Perform x transform, taking into account arrays of zeros
4689        call sg_fftx(fftcache,mfac,mg,n4,n5,1,g2max+1,1,wk2d_a,wk2d_b,&
4690 &       trig1,aft1,now1,bef1,one,ind1,ic1)
4691 
4692 !      Change the phase if $k_x \neq 0$
4693        if(istwf_k==3 .or. istwf_k==5 .or. istwf_k==7 .or. istwf_k==9)then
4694          do i1=1,n1
4695            phar=pha1(1,i1)
4696            phai=pha1(2,i1)
4697            do i2=1,g2max+1
4698              ar=wk2d_b(1,i1,i2,1)
4699              ai=wk2d_b(2,i1,i2,1)
4700              wk2d_b(1,i1,i2,1)=phar*ar-phai*ai
4701              wk2d_b(2,i1,i2,1)=phai*ar+phar*ai
4702            end do
4703          end do
4704        end if
4705 
4706 !      Compute symmetric and antisymmetric combinations
4707        if(istwf_k>=2 .and. istwf_k<=5)then
4708          do i1=1,n1half1-1
4709            wk2d_a(1,i1,1,1)=wk2d_b(1,2*i1-1,1,1)
4710            wk2d_a(2,i1,1,1)=wk2d_b(1,2*i1  ,1,1)
4711          end do
4712 !        If n1 odd, must add last data
4713          if((2*n1half1-2)/=n1)then
4714            wk2d_a(1,n1half1,1,1)=wk2d_b(1,n1,1,1)
4715            wk2d_a(2,n1half1,1,1)=zero
4716          end if
4717          ii2=2
4718        else
4719          ii2=1
4720        end if
4721        if( g2max+1 >= ii2)then
4722          do i2=ii2,g2max+1
4723            do i1=1,n1half1-1
4724              wk2d_a(1,i1,i2,1)=        wk2d_b(1,2*i1-1,i2,1)-wk2d_b(2,2*i1,i2,1)
4725              wk2d_a(2,i1,i2,1)=        wk2d_b(2,2*i1-1,i2,1)+wk2d_b(1,2*i1,i2,1)
4726              wk2d_a(1,i1,n2+ii2-i2,1)= wk2d_b(1,2*i1-1,i2,1)+wk2d_b(2,2*i1,i2,1)
4727              wk2d_a(2,i1,n2+ii2-i2,1)=-wk2d_b(2,2*i1-1,i2,1)+wk2d_b(1,2*i1,i2,1)
4728            end do
4729            if((2*n1half1-2)/=n1)then
4730              wk2d_a(1,n1half1,i2,1)=        wk2d_b(1,n1,i2,1)
4731              wk2d_a(2,n1half1,i2,1)=        wk2d_b(2,n1,i2,1)
4732              wk2d_a(1,n1half1,n2+ii2-i2,1)= wk2d_b(1,n1,i2,1)
4733              wk2d_a(2,n1half1,n2+ii2-i2,1)=-wk2d_b(2,n1,i2,1)
4734            end if
4735          end do
4736        end if
4737        if ( n2half1 >= g2max+2 ) then
4738          do i2=g2max+2,n2half1
4739            do i1=1,n1half1-1
4740              wk2d_a(1,i1,i2,1)=zero
4741              wk2d_a(2,i1,i2,1)=zero
4742              wk2d_a(1,i1,n2+ii2-i2,1)=zero
4743              wk2d_a(2,i1,n2+ii2-i2,1)=zero
4744            end do
4745            if((2*n1half1-2)/=n1)then
4746              wk2d_a(1,n1half1,i2,1)=zero
4747              wk2d_a(2,n1half1,i2,1)=zero
4748              wk2d_a(1,n1half1,n2+ii2-i2,1)=zero
4749              wk2d_a(2,n1half1,n2+ii2-i2,1)=zero
4750            end if
4751          end do
4752        end if
4753 
4754        n1i=1
4755        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1halfm,1,1,wk2d_a,wk2d_b,&
4756 &       trig2,aft2,now2,bef2,one,ind2,ic2)
4757 
4758 !      Change the phase if $k_y \neq 0$
4759        if(istwf_k>=6 .and. istwf_k<=9)then
4760          do i2=1,n2
4761            phar=pha2(1,i2)
4762            phai=pha2(2,i2)
4763            do i1=1,n1halfm
4764              ar=wk2d_b(1,i1,i2,1)
4765              ai=wk2d_b(2,i1,i2,1)
4766              wk2d_b(1,i1,i2,1)= phar*ar-phai*ai
4767              wk2d_b(2,i1,i2,1)= phai*ar+phar*ai
4768            end do
4769          end do
4770        end if
4771 
4772      end if ! option/=3
4773 
4774 !    The wave function is now in real space, for the current plane,
4775 !    represented by REAL numbers, although packed in the complex array wk2d_b
4776 
4777      g2max=gboundin(4,1)
4778 
4779      if(option==0)then
4780 !      This option is only permitted for istwf_k==2 (Gamma point)
4781 !      Copy the transformed function at the right place
4782        do i2=1,n2
4783          do i1=1,n1half1-1
4784            fofr(1,2*i1-1,i2,i3)=wk2d_b(1,i1,i2,1)
4785            fofr(1,2*i1  ,i2,i3)=wk2d_b(2,i1,i2,1)
4786            fofr(2,2*i1-1,i2,i3)=zero
4787            fofr(2,2*i1  ,i2,i3)=zero
4788          end do
4789 !        If n1 odd, must add last data
4790          if((2*n1half1-2)/=n1)then
4791            fofr(1,n1,i2,i3)=wk2d_b(1,n1half1,i2,1)
4792            fofr(2,n1,i2,i3)=zero
4793          end if
4794        end do
4795      end if
4796 
4797      if(option==1)then ! Accumulate density
4798        do i2=1,n2
4799          do i1=1,n1half1-1
4800            denpot(2*i1-1,i2,i3)=denpot(2*i1-1,i2,i3)+weight_r*wk2d_b(1,i1,i2,1)**2
4801            denpot(2*i1  ,i2,i3)=denpot(2*i1  ,i2,i3)+weight_i*wk2d_b(2,i1,i2,1)**2
4802          end do
4803 !        If n1 odd, must add last data
4804          if((2*n1half1-2)/=n1)then
4805            denpot(n1,i2,i3)=denpot(n1,i2,i3)+weight_r*wk2d_b(1,n1half1,i2,1)**2
4806          end if
4807        end do
4808      end if
4809 
4810      if(option==2)then ! Apply local potential
4811        do i2=1,n2
4812          do i1=1,n1half1-1
4813            wk2d_a(1,i1,i2,1)=denpot(2*i1-1,i2,i3)*wk2d_b(1,i1,i2,1)
4814            wk2d_a(2,i1,i2,1)=denpot(2*i1  ,i2,i3)*wk2d_b(2,i1,i2,1)
4815          end do
4816 !        If n1 odd, must add last data
4817          if((2*n1half1-2)/=n1)then
4818            wk2d_a(1,n1half1,i2,1)=denpot(n1,i2,i3)*wk2d_b(1,n1half1,i2,1)
4819            wk2d_a(2,n1half1,i2,1)=zero
4820          end if
4821        end do
4822      end if
4823 
4824      if(option==3)then
4825 !      This option is only permitted for istwf_k==2 (Gamma point)
4826 !      Copy the transformed function at the right place
4827        do i2=1,n2
4828          do i1=1,n1half1-1
4829            wk2d_b(1,i1,i2,1)=fofr(1,2*i1-1,i2,i3)
4830            wk2d_b(2,i1,i2,1)=fofr(1,2*i1  ,i2,i3)
4831          end do
4832 !        If n1 odd, must add last data
4833          if((2*n1half1-2)/=n1)then
4834            wk2d_b(1,n1half1,i2,1)=fofr(1,n1,i2,i3)
4835          end if
4836        end do
4837      end if
4838 
4839      if(option==2 .or. option==3)then  ! Change the phase if $k_y \neq 0$
4840        if(istwf_k>=6 .and. istwf_k<=9)then
4841          do i2=1,n2
4842            phar=pha2(1,i2)
4843            phai=pha2(2,i2)
4844            do i1=1,n1halfm
4845              ar=wk2d_a(1,i1,i2,1)
4846              ai=wk2d_a(2,i1,i2,1)
4847              wk2d_a(1,i1,i2,1)= phar*ar+phai*ai
4848              wk2d_a(2,i1,i2,1)=-phai*ar+phar*ai
4849            end do
4850          end do
4851        end if
4852 
4853 !      Perform y transform
4854        n1i=1
4855        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1halfm,1,1,wk2d_a,wk2d_b, &
4856 &       trig5,aft5,now5,bef5,-one,ind5,ic5)
4857 
4858 !      Decompose symmetric and antisymmetric parts
4859        if(istwf_k>=2 .and. istwf_k<=5)then
4860          do i1=1,n1halfm
4861            wk2d_c(1,2*i1-1,1,1)=wk2d_b(1,i1,1,1)
4862            wk2d_c(2,2*i1-1,1,1)=zero
4863            wk2d_c(1,2*i1,1,1)=wk2d_b(2,i1,1,1)
4864            wk2d_c(2,2*i1,1,1)=zero
4865          end do
4866          ii2=2
4867        else
4868          ii2=1
4869        end if
4870        do i2=ii2,g2max+1
4871          do i1=1,n1halfm
4872            wk2d_c(1,2*i1-1,i2,1)=(wk2d_b(1,i1,i2,1)+wk2d_b(1,i1,n2+ii2-i2,1))*0.5d0
4873            wk2d_c(2,2*i1-1,i2,1)=(wk2d_b(2,i1,i2,1)-wk2d_b(2,i1,n2+ii2-i2,1))*0.5d0
4874            wk2d_c(1,2*i1,i2,1)= ( wk2d_b(2,i1,i2,1)+wk2d_b(2,i1,n2+ii2-i2,1))*0.5d0
4875            wk2d_c(2,2*i1,i2,1)= (-wk2d_b(1,i1,i2,1)+wk2d_b(1,i1,n2+ii2-i2,1))*0.5d0
4876          end do
4877        end do
4878 
4879 !      Change the phase if $k_x \neq 0$
4880        if(istwf_k==3 .or. istwf_k==5 .or. istwf_k==7 .or. istwf_k==9 )then
4881          do i1=1,n1
4882            phar=pha1(1,i1)
4883            phai=pha1(2,i1)
4884            do i2=1,g2max+1
4885              ar=wk2d_c(1,i1,i2,1)
4886              ai=wk2d_c(2,i1,i2,1)
4887              wk2d_c(1,i1,i2,1)= phar*ar+phai*ai
4888              wk2d_c(2,i1,i2,1)=-phai*ar+phar*ai
4889            end do
4890          end do
4891        end if
4892 
4893 !      Perform x transform : for y=1 to g2max+1, to benefit from zeros
4894        call sg_fftx(fftcache,mfac,mg,2*n1halfm,n5,1,g2max+1,1,wk2d_c,wk2d_d,&
4895 &       trig4,aft4,now4,bef4,-one,ind4,ic4)
4896 
4897 !      Copy the data from the current plane to wk1d_b
4898        do igb=1,ngbout
4899          i1=indpw_kout(1,igb) ; i2=indpw_kout(2,igb)
4900          wk1d_b(1,igb,i3,1)=wk2d_d(1,i1,i2,1)
4901          wk1d_b(2,igb,i3,1)=wk2d_d(2,i1,i2,1)
4902        end do
4903 
4904      end if ! option==2 or 3
4905 
4906 !    End loop on planes
4907    end do
4908 
4909 !$OMP END DO
4910    ABI_DEALLOCATE(wk2d_a)
4911    ABI_DEALLOCATE(wk2d_b)
4912    ABI_DEALLOCATE(wk2d_c)
4913    ABI_DEALLOCATE(wk2d_d)
4914 !$OMP END PARALLEL
4915 
4916    if(option==2 .or. option==3)then
4917 
4918 !    Change the phase if $k_z \neq 0$
4919      if(istwf_k==4 .or. istwf_k==5 .or. istwf_k==8 .or. istwf_k==9 )then
4920 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(ngbout,n3,pha3,wk1d_b)
4921        do i3=1,n3
4922          phar=pha3(1,i3)
4923          phai=pha3(2,i3)
4924          do igb=1,ngbout
4925            ar=wk1d_b(1,igb,i3,1)
4926            ai=wk1d_b(2,igb,i3,1)
4927            wk1d_b(1,igb,i3,1)= phar*ar+phai*ai
4928            wk1d_b(2,igb,i3,1)=-phai*ar+phar*ai
4929          end do
4930        end do
4931 !$OMP END PARALLEL DO
4932      end if
4933 
4934 !    Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
4935 !    However, due to special packing of data, use routine ffty
4936 !$OMP PARALLEL DO SHARED(aft6,bef6,fftcache,ind6,ic6,lotout,mgb)&
4937 !$OMP&SHARED(ngbout,now6,n3,trig6,wk1d_a,wk1d_b)&
4938 !$OMP&PRIVATE(igb,igbmax)
4939      do igb=1,ngbout,lotout
4940        igbmax=min(igb+lotout-1,ngbout)
4941 !      Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
4942 !      However, due to special packing of data, use routine ffty
4943        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_b,wk1d_a, &
4944 &       trig6,aft6,now6,bef6,-one,ind6,ic6)
4945 
4946      end do
4947 !$OMP END PARALLEL DO
4948 
4949 !    Transfer the data in the output array, after normalization
4950      norm=1.d0/dble(nfftot)
4951 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(fofgout,indpw_kout,norm,npwout,wk1d_a)
4952      do ig=1,npwout
4953        igb=indpw_kout(4,ig) ; i3=indpw_kout(3,ig)
4954        fofgout(1,ig)=wk1d_a(1,igb,i3,1)*norm
4955        fofgout(2,ig)=wk1d_a(2,igb,i3,1)*norm
4956      end do
4957 !$OMP END PARALLEL DO
4958 
4959    end if
4960 
4961    ABI_DEALLOCATE(wk1d_a)
4962    ABI_DEALLOCATE(wk1d_b)
4963 
4964    if(istwf_k/=2)then
4965      ABI_DEALLOCATE(pha1)
4966      ABI_DEALLOCATE(pha2)
4967      ABI_DEALLOCATE(pha3)
4968    end if
4969 
4970  end if !  End time-reversal symmetry
4971 
4972 !------------------------------------------------------------------
4973 
4974  if(option/=3) then
4975    ABI_DEALLOCATE(indpw_kin)
4976  end if
4977  if(option==2 .or. option==3) then
4978    ABI_DEALLOCATE(indpw_kout)
4979  end if
4980 
4981 end subroutine fftrisc_one_nothreadsafe

m_sgfft/sg_ctrig [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_ctrig

FUNCTION

 Precalculates trigonometric expressions and bitreversal key IND (Stefan Goedecker lib).

INPUTS

 n=Number of FFT points for 1D FFT.
 ris  = sign of exponential in transform (should be 1 or -1; real)
 mfac = maximum number of factors in 1D FFTs
 mg   = maximum length of 1D FFTs

OUTPUT

 trig(2,mg) TO BE DESCRIBED SB 090902
 aft(mfac) TO BE DESCRIBED SB 090902
 bef(mfac) TO BE DESCRIBED SB 090902
 now(mfac) TO BE DESCRIBED SB 090902
 ic = number of (radix) factors of x transform length (from ctrig)
 ind(mg) TO BE DESCRIBED SB 090902

NOTES

 * This version of sg_ctrig produces cos and tan instead of sin and cos--
   this allows for much greater efficiency on the superscalar architecture
   of ibm rs6000 where floating point multiply and add (FMA) is used.

 * This routine is not thread-safe due to the presence of variables with the save attribute!
   DO NOT CALL THIS ROUTINE INSIDE A OPENMP PARALLEL REGION

TODO

 Should describe arguments
 Should suppress one-letter variables

PARENTS

      m_sgfft

CHILDREN

      sg_fft_cc

SOURCE

3829 subroutine sg_ctrig(n,trig,aft,bef,now,ris,ic,ind,mfac,mg)
3830 
3831 
3832 !This section has been created automatically by the script Abilint (TD).
3833 !Do not modify the following lines by hand.
3834 #undef ABI_FUNC
3835 #define ABI_FUNC 'sg_ctrig'
3836 !End of the abilint section
3837 
3838  implicit none
3839 
3840 !Arguments ------------------------------------
3841 !scalars
3842  integer,intent(in) :: mfac,mg,n
3843  integer,intent(out) :: ic
3844  real(dp),intent(in) :: ris
3845 !arrays
3846  integer,intent(out) :: aft(mfac),bef(mfac),ind(mg),now(mfac)
3847  real(dp),intent(out) :: trig(2,mg)
3848 
3849 !Local variables-------------------------------
3850 !scalars
3851  integer,save :: nextmx=4
3852  integer :: i,ii,inc,irep,j,k,l,next,nh
3853  integer,save :: prime(4)=(/5,4,3,2/)  !"prime" is the set of radices coded elsewhere for fft
3854  real(dp) :: angle,trigc,trigs,twopi
3855  character(len=500) :: message
3856 
3857 ! *************************************************************************
3858 
3859 !**Note**
3860 !2*Pi must not be defined too accurately here or else
3861 !cos(twopi/2) will be exactly 0 and sin/cos below will be
3862 !infinite; if a small error is left in Pi, then sin/cos will
3863 !be about 10**14 and later cos * (sin/cos) will be 1 to within
3864 !about 10**(-14) and the fft routines will work
3865 !The precision on sgi causes the algorithm to fail if
3866 !twopi is defined as 8.d0*atan(1.0d0).
3867 
3868  twopi=6.2831853071795867d0
3869 
3870  angle=ris*twopi/n
3871 !trig(1,0)=1.d0
3872 !trig(2,0)=0.d0
3873  if (mod(n,2)==0) then
3874    nh=n/2
3875    trig(1,nh)=-1.d0
3876    trig(2,nh)=0.d0
3877    do i=1,nh-1
3878      trigc=cos(i*angle)
3879      trigs=sin(i*angle)
3880      trig(1,i)=trigc
3881      trig(2,i)=trigs/trigc
3882      trig(1,n-i)=trigc
3883      trig(2,n-i)=-trigs/trigc
3884    end do
3885  else
3886    nh=(n-1)/2
3887    do i=1,nh
3888      trigc=cos(i*angle)
3889      trigs=sin(i*angle)
3890      trig(1,i)=trigc
3891      trig(2,i)=trigs/trigc
3892      trig(1,n-i)=trigc
3893      trig(2,n-i)=-trigs/trigc
3894    end do
3895  end if
3896 
3897  ic=1
3898  aft(ic)=1
3899  bef(ic)=n
3900  next=1
3901 
3902 !An infinite loop, with exit or cycle instructions
3903  do
3904    if( (bef(ic)/prime(next))*prime(next)<bef(ic) ) then
3905      next=next+1
3906      if (next<=nextmx) then
3907        cycle
3908      else
3909        now(ic)=bef(ic)
3910        bef(ic)=1
3911      end if
3912    else
3913      now(ic)=prime(next)
3914      bef(ic)=bef(ic)/prime(next)
3915    end if
3916    aft(ic+1)=aft(ic)
3917    now(ic+1)=now(ic)
3918    bef(ic+1)=bef(ic)
3919    ic=ic+1
3920    if (ic>mfac) then
3921      write(message, '(a,i0,2a,i0)' )&
3922 &     'number of factors ic=',ic,ch10,&
3923 &     'exceeds dimensioned mfac=',mfac
3924      MSG_BUG(message)
3925    end if
3926    if (bef(ic)/=1) then
3927      aft(ic)=aft(ic)*now(ic)
3928      cycle
3929    end if
3930 !  If not cycled, exit
3931    exit
3932  end do
3933 
3934  ic=ic-1
3935 
3936 !DEBUG
3937 !write(std_out,*) 'now',(now(i),i=1,ic)
3938 !write(std_out,*) 'aft',(aft(i),i=1,ic)
3939 !write(std_out,*) 'bef',(bef(i),i=1,ic)
3940 !ENDDEBUG
3941 
3942  do i=1,n
3943    ind(i)=1
3944  end do
3945 
3946  irep=1
3947  inc=n
3948  do l=ic,1,-1
3949    inc=inc/now(l)
3950    ii=0
3951    do k=1,1+(n-1)/(now(l)*irep)
3952      do j=0,now(l)-1
3953        do i=1,irep
3954          ii=ii+1
3955          ind(ii)=ind(ii)+j*inc
3956        end do
3957      end do
3958    end do
3959    irep=irep*now(l)
3960  end do
3961 
3962  if (irep/=n) then
3963    write(message,'(a,i0,a,i0)')'  irep should equal n ; irep=',irep,' n=',n
3964    MSG_BUG(message)
3965  end if
3966 
3967  if (inc/=1) then
3968    write(message, '(a,i0)' )' inc should equal 1 in sg_ctrig; inc=',inc
3969    MSG_BUG(message)
3970  end if
3971 
3972 end subroutine sg_ctrig

m_sgfft/sg_fft_cc [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fft_cc

FUNCTION

 Calculates the discrete Fourier transform:

   ftarr(i1,i2,i3)=exp(ris*i*2*pi*(j1*i1/n1+j2*i2/n2+j3*i3/n3)) arr(j1,j2,j3)

INPUTS

  fftcache=size of the cache (kB)
  n1,n2,n3=physical dimension of the transform
  nd1,nd2,nd3=memory dimension of arr and ftarr
  ndat=Number of FFT transforms
  isign=+1 for G-->R, -1 for R-->G
  arr(2,nd1*nd2*nd3*ndat)=input complex array with alternating real and imaginary
  elements; data resides in 2*n1*n2*n3 of this array, spread out.
  (see SIDE FFECTS).

OUTPUT

  ftarr(2,nd1*nd2*nd3*ndat)=working space for transform and contains output

SIDE EFFECTS

  arr(2,nd1*nd2*nd3*ndat) is modified by sg_fftx,sg_ffty,sg_fftz.

NOTES

  ndi must always be greater or equal to ni.  Recommended choice for nd1
  and nd2 is: ni for ni=odd or ni+1 for ni=even (hence 2*(ni/2)+1);
  nd3 should always be n3.  Note that choosing nd1 or nd2 larger than
  the recommended value can severely degrade efficiency of this routine.
  Avoiding even ndi for nd1 and nd2 avoids cache conflicts on cache machines.
  Each of n1,n2,n3 must be a
  product of the prime factors 2,3,5. If two ni s are equal
  it is recommended to place them behind each other.
  The largest any of these may be is set by parameter "mg" below.
  This fft is particularly efficient for cache architectures.
  Note that the meaning of fftcache has changed from the original
  ncache of SG (that was the maximum number of COMPLEX*16 in the cache)

PARENTS

      ccfft,m_sgfft

CHILDREN

      sg_fft_cc

SOURCE

 96 subroutine sg_fft_cc(fftcache,n1,n2,n3,nd1,nd2,nd3,ndat,isign,arr,ftarr)
 97 
 98 
 99 !This section has been created automatically by the script Abilint (TD).
100 !Do not modify the following lines by hand.
101 #undef ABI_FUNC
102 #define ABI_FUNC 'sg_fft_cc'
103 !End of the abilint section
104 
105  implicit none
106 
107 !Arguments ------------------------------------
108 !scalars
109  integer,intent(in) :: fftcache,n1,n2,n3,nd1,nd2,nd3,ndat,isign
110 !arrays
111  real(dp),intent(inout) :: arr(2,nd1*nd2*nd3*ndat)
112  real(dp),intent(inout) :: ftarr(2,nd1*nd2*nd3*ndat)
113 
114 !Local variables-------------------------------
115 !scalars
116  integer :: idat,start
117 
118 ! *************************************************************************
119 
120  do idat=1,ndat
121    start = 1 + (idat-1)*nd1*nd2*nd3
122    call fft_cc_one_nothreadsafe(fftcache,nd1,nd2,nd3,n1,n2,n3,arr(1,start),ftarr(1,start),real(isign,kind=dp))
123  end do
124 
125 end subroutine sg_fft_cc

m_sgfft/sg_fft_rc [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fft_rc

FUNCTION

 Conduct Fourier transform of REAL or COMPLEX function f(r)=fofr defined on
 fft grid in real space, to create complex f(G)=fofg defined on full fft grid
 in reciprocal space, in full storage mode, or the reverse operation.
 For the reverse operation, the final data is divided by nfftot.
 REAL case when cplex=1, COMPLEX case when cplex=2. Usually used for density and potentials.

 There are two different possibilities :
  fftalgb=0 means using the complex-to-complex FFT routine,
   irrespective of the value of cplex
  fftalgb=1 means using a real-to-complex FFT or a complex-to-complex FFT,
   depending on the value of cplex.
  The only real-to-complex FFT available is from SGoedecker library.

INPUTS

 cplex=1 if fofr is real, 2 if fofr is complex
 isign=sign of Fourier transform exponent: current convention uses
  +1 for transforming from G to r
  -1 for transforming from r to G.
 nfft=(effective) number of FFT grid points (for this processor)
 ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft

OUTPUT

  (see side effects)

SIDE EFFECTS

 Input/Output
 fofg(2,nfft)=f(G), complex.
 fofr(cplex*nfft)=input function f(r) (real or complex)

PARENTS

      fourdp

CHILDREN

      sg_fft_cc

SOURCE

291 subroutine sg_fft_rc(cplex,fofg,fofr,isign,nfft,ngfft)
292 
293 
294 !This section has been created automatically by the script Abilint (TD).
295 !Do not modify the following lines by hand.
296 #undef ABI_FUNC
297 #define ABI_FUNC 'sg_fft_rc'
298 !End of the abilint section
299 
300  implicit none
301 
302 !Arguments ------------------------------------
303 !scalars
304  integer,intent(in) :: cplex,isign,nfft
305 !arrays
306  integer,intent(in) :: ngfft(18)
307  real(dp),intent(inout) :: fofg(2,nfft),fofr(cplex*nfft)
308 
309 !Local variables-------------------------------
310 !scalars
311  integer,parameter :: mfac=11
312  integer :: fftalg,fftalga,fftalgb,fftcache,i1,i2,i3,ic1,ic2,ic3,index
313  integer :: n1,n1half1,n1halfm,n2,n2half1,n3,n4,n4half1,n5,n5half1,n6
314  real(dp) :: ris,xnorm
315  character(len=500) :: msg
316 !arrays
317  integer :: aft1(mfac),aft2(mfac),aft3(mfac),bef1(mfac),bef2(mfac),bef3(mfac)
318  integer :: ind1(mg),ind2(mg),ind3(mg),now1(mfac),now2(mfac),now3(mfac)
319  real(dp) :: trig1(2,mg),trig2(2,mg),trig3(3,mg)
320  real(dp),allocatable :: wk2d_a(:,:,:,:),wk2d_b(:,:,:,:),wk2d_c(:,:,:,:)
321  real(dp),allocatable :: wk2d_d(:,:,:,:),work1(:,:,:,:),work2(:,:,:,:)
322 
323 ! *************************************************************************
324 
325  !DBG_ENTER("COLL")
326 
327  n1=ngfft(1); n2=ngfft(2); n3=ngfft(3)
328  n4=ngfft(4); n5=ngfft(5); n6=ngfft(6)
329 
330  fftcache=ngfft(8)
331  fftalg  =ngfft(7)
332  fftalga =fftalg/100
333  fftalgb =mod(fftalg,100)/10
334 
335  ris=dble(isign)
336  xnorm=1.0d0/dble(n1*n2*n3)
337 
338  if (fftalgb/=0 .and. fftalgb/=1) then
339    write(msg, '(a,i4,a,a,a,a,a)' )&
340 &   'The input algorithm number fftalg=',fftalg,' is not allowed.',ch10,&
341 &   'The second digit (fftalg(B)) must be 0 or 1.',ch10,&
342 &   'Action: change fftalg in your input file.'
343    MSG_BUG(msg)
344  end if
345 
346  if (fftalgb==1 .and. ALL(fftalga/=(/1,3,4/)) )then
347    write(msg,'(a,i4,5a)')&
348 &   'The input algorithm number fftalg=',fftalg,' is not allowed.',ch10,&
349 &   'When fftalg(B) is 1, the allowed values for fftalg(A) are 1 and 4.',ch10,&
350 &   'Action: change fftalg in your input file.'
351    MSG_BUG(msg)
352  end if
353 
354  if (n4<n1.or.n5<n2.or.n6<n3) then
355    write(msg,'(a,3i8,a,3i8)')'  Each of n4,n5,n6=',n4,n5,n6,'must be >= n1, n2, n3 =',n1,n2,n3
356    MSG_BUG(msg)
357  end if
358 
359 !---------------------------------------------------------
360 !Here sophisticated algorithm based on S. Goedecker routines, only for the REAL case.
361 !Take advantage of the fact that fofr is real, and that fofg has corresponding symmetry properties.
362 
363 #ifdef DEBUG_MODE
364  if (n1>mg .or. n2>mg .or. n3>mg) then
365    write(msg, '(a,3i10,a,a,a,i10,a)' )&
366 &   'One of the dimensions n1,n2,n3=',n1,n2,n3,',',ch10,&
367 &   'exceeds allowed dimension mg=',mg,'.'
368    MSG_BUG(msg)
369  end if
370 #endif
371 
372  n1half1=n1/2+1 ; n1halfm=(n1+1)/2
373  n2half1=n2/2+1
374 !n4half1 or n5half1 are the odd integers >= n1half1 or n2half1
375  n4half1=(n1half1/2)*2+1
376  n5half1=(n2half1/2)*2+1
377 
378 !This sophisticated algorithm allows to decrease the memory needs.
379  ABI_ALLOCATE(work1,(2,n4,n5half1,n6))
380  ABI_ALLOCATE(work2,(2,n4,n5half1,n6))
381 
382  if(isign==1)then
383 
384 !  Compute auxiliary arrays needed for FFTs, here forward FFT
385    call sg_ctrig(n1,trig1,aft1,bef1,now1,one,ic1,ind1,mfac,mg)
386    call sg_ctrig(n2,trig2,aft2,bef2,now2,one,ic2,ind2,mfac,mg)
387    call sg_ctrig(n3,trig3,aft3,bef3,now3,one,ic3,ind3,mfac,mg)
388 
389 !  Transfer fofg to the expanded fft box (only half of it)
390 
391 !$OMP PARALLEL DO PRIVATE(i1,i2,i3,index) SHARED(fofg,n1,n2,n3,work1)
392    do i3=1,n3
393      do i2=1,n2half1
394        index=n1*(i2-1+n2*(i3-1))
395        do i1=1,n1
396          work1(1,i1,i2,i3)=fofg(1,i1+index)
397          work1(2,i1,i2,i3)=fofg(2,i1+index)
398        end do
399      end do
400    end do
401 
402 !$OMP PARALLEL DO SHARED(aft3,bef3,ind3,ic3,now3,n1,n2half1,n4,n5half1,n6,ris,trig3,work1,work2) PRIVATE(i2)
403    do i2=1,n2half1
404      call sg_fftz(mfac,mg,n4,n5half1,n6,n1,i2,i2,work1,work2,&
405 &     trig3,aft3,now3,bef3,ris,ind3,ic3)
406    end do
407 
408 !  Loop over x-y planes
409 
410 !$OMP PARALLEL PRIVATE(i1,i2,i3,index,wk2d_a,wk2d_b,wk2d_c,wk2d_d) &
411 !$OMP&SHARED(aft1,aft2,bef1,bef2,fftcache,fofg,fofr,ic1,ic2,ind1,ind2) &
412 !$OMP&SHARED(n1,n1half1,n1halfm,n2,n2half1,n3) &
413 !$OMP&SHARED(n4,n5,now1,now2,ris,trig1,trig2,work2)
414 
415    ABI_ALLOCATE(wk2d_a,(2,n4,n5,1))
416    ABI_ALLOCATE(wk2d_b,(2,n4,n5,1))
417    ABI_ALLOCATE(wk2d_c,(2,2*n1halfm+1,n5,1))
418    ABI_ALLOCATE(wk2d_d,(2,2*n1halfm+1,n5,1))
419 
420 !$OMP DO
421    do i3=1,n3
422 
423      do i2=1,n2half1
424        do i1=1,n1
425          wk2d_c(1,i1,i2,1)=work2(1,i1,i2,i3)
426          wk2d_c(2,i1,i2,1)=work2(2,i1,i2,i3)
427        end do
428      end do
429 
430      call sg_fftx(fftcache,mfac,mg,2*n1halfm+1,n5,1,n2half1,1,wk2d_c,wk2d_d,&
431 &     trig1,aft1,now1,bef1,ris,ind1,ic1)
432 
433      do i1=1,n1half1-1 ! Compute symmetric and antisymmetric combinations
434        wk2d_a(1,i1,1,1)=wk2d_d(1,2*i1-1,1,1)
435        wk2d_a(2,i1,1,1)=wk2d_d(1,2*i1  ,1,1)
436      end do
437 
438      if((2*n1half1-2)/=n1)then  ! If n1 odd, must add last data
439        wk2d_a(1,n1half1,1,1)=wk2d_d(1,n1,1,1)
440        wk2d_a(2,n1half1,1,1)=0.0d0
441      end if
442 
443      do i2=2,n2half1
444        do i1=1,n1half1-1
445          wk2d_a(1,i1,i2,1)     = wk2d_d(1,2*i1-1,i2,1)-wk2d_d(2,2*i1,i2,1)
446          wk2d_a(2,i1,i2,1)     = wk2d_d(2,2*i1-1,i2,1)+wk2d_d(1,2*i1,i2,1)
447          wk2d_a(1,i1,n2+2-i2,1)= wk2d_d(1,2*i1-1,i2,1)+wk2d_d(2,2*i1,i2,1)
448          wk2d_a(2,i1,n2+2-i2,1)=-wk2d_d(2,2*i1-1,i2,1)+wk2d_d(1,2*i1,i2,1)
449        end do
450        if((2*n1half1-2)/=n1)then
451          wk2d_a(1,n1half1,i2,1)     = wk2d_d(1,n1,i2,1)
452          wk2d_a(2,n1half1,i2,1)     = wk2d_d(2,n1,i2,1)
453          wk2d_a(1,n1half1,n2+2-i2,1)= wk2d_d(1,n1,i2,1)
454          wk2d_a(2,n1half1,n2+2-i2,1)=-wk2d_d(2,n1,i2,1)
455        end if
456      end do
457 
458      call sg_ffty(fftcache,mfac,mg,n4,n5,1,1,n1halfm,1,1,wk2d_a,wk2d_b,&
459 &     trig2,aft2,now2,bef2,ris,ind2,ic2)
460 
461      do i2=1,n2  ! Take real part data from expanded box and put it in the original box.
462        index=n1*(i2-1+n2*(i3-1))
463        do i1=1,n1half1-1 ! copy data
464          fofr(2*i1-1+index)=wk2d_b(1,i1,i2,1)
465          fofr(2*i1  +index)=wk2d_b(2,i1,i2,1)
466        end do
467        if((2*n1half1-2)/=n1)then ! If n1 odd, must add last data
468          fofr(n1+index)=wk2d_b(1,n1half1,i2,1)
469        end if
470      end do
471 
472    end do ! loop over x-y planes
473 !$OMP END DO
474    ABI_DEALLOCATE(wk2d_a)
475    ABI_DEALLOCATE(wk2d_b)
476    ABI_DEALLOCATE(wk2d_c)
477    ABI_DEALLOCATE(wk2d_d)
478 !$OMP END PARALLEL
479 
480  else if(isign==-1)then
481 
482 !  Compute auxiliary arrays needed for FFTs, here backward FFT
483    call sg_ctrig(n1,trig1,aft1,bef1,now1,-one,ic1,ind1,mfac,mg)
484    call sg_ctrig(n2,trig2,aft2,bef2,now2,-one,ic2,ind2,mfac,mg)
485    call sg_ctrig(n3,trig3,aft3,bef3,now3,-one,ic3,ind3,mfac,mg)
486 
487 !  Treat first x-transform in x-y plane, and multiply
488 !  by overall normalization factor 1/nfftot
489 
490 !  Loop over x-y planes
491 
492 !$OMP PARALLEL PRIVATE(i1,i2,i3,index,wk2d_a,wk2d_b,wk2d_c,wk2d_d) &
493 !$OMP&SHARED(aft1,aft2,bef1,bef2,fftcache,fofr,ic1,ic2,ind1,ind2) &
494 !$OMP&SHARED(n1,n1half1,n1halfm,n2,n2half1,n3) &
495 !$OMP&SHARED(n4,n5,now1,now2,ris,trig1,trig2,work1,xnorm)
496 
497    ABI_ALLOCATE(wk2d_a,(2,n4,n5,1))
498    ABI_ALLOCATE(wk2d_b,(2,n4,n5,1))
499    ABI_ALLOCATE(wk2d_c,(2,2*n1halfm+1,n5,1))
500    ABI_ALLOCATE(wk2d_d,(2,2*n1halfm+1,n5,1))
501 
502 !$OMP DO
503    do i3=1,n3
504      do i2=1,n2
505        index=n1*(i2-1+n2*(i3-1))
506        do i1=1,n1half1-1 ! copy and normalize data
507          wk2d_a(1,i1,i2,1)=fofr(2*i1-1+index)*xnorm
508          wk2d_a(2,i1,i2,1)=fofr(2*i1  +index)*xnorm
509        end do
510 
511        if((2*n1half1-2)/=n1)then ! If n1 odd, must add last data
512          wk2d_a(1,n1half1,i2,1)=fofr(n1+index)*xnorm
513          wk2d_a(2,n1half1,i2,1)=zero
514        end if
515      end do
516 
517      call sg_ffty(fftcache,mfac,mg,n4,n5,1,1,n1halfm,1,1,wk2d_a,wk2d_b,&
518 &     trig2,aft2,now2,bef2,ris,ind2,ic2)
519 
520      do i1=1,n1halfm ! Decompose symmetric and antisymmetric parts
521        wk2d_c(1,2*i1-1,1,1)=wk2d_b(1,i1,1,1)
522        wk2d_c(2,2*i1-1,1,1)=0.0d0
523        wk2d_c(1,2*i1,1,1)=wk2d_b(2,i1,1,1)
524        wk2d_c(2,2*i1,1,1)=0.0d0
525      end do
526 
527      do i2=2,n2half1
528        do i1=1,n1halfm
529          wk2d_c(1,2*i1-1,i2,1)= (wk2d_b(1,i1,i2,1)+wk2d_b(1,i1,n2+2-i2,1))*0.5d0
530          wk2d_c(2,2*i1-1,i2,1)= (wk2d_b(2,i1,i2,1)-wk2d_b(2,i1,n2+2-i2,1))*0.5d0
531          wk2d_c(1,2*i1,i2,1)  = (wk2d_b(2,i1,i2,1)+wk2d_b(2,i1,n2+2-i2,1))*0.5d0
532          wk2d_c(2,2*i1,i2,1)  =-(wk2d_b(1,i1,i2,1)-wk2d_b(1,i1,n2+2-i2,1))*0.5d0
533        end do
534      end do
535 
536      call sg_fftx(fftcache,mfac,mg,2*n1halfm+1,n5,1,n2half1,1,wk2d_c,wk2d_d,&
537 &     trig1,aft1,now1,bef1,ris,ind1,ic1)
538 
539      do i2=1,n2half1
540        do i1=1,n1
541          work1(1,i1,i2,i3)=wk2d_d(1,i1,i2,1)
542          work1(2,i1,i2,i3)=wk2d_d(2,i1,i2,1)
543        end do
544      end do
545 
546    end do
547 !$OMP END DO
548    ABI_DEALLOCATE(wk2d_a)
549    ABI_DEALLOCATE(wk2d_b)
550    ABI_DEALLOCATE(wk2d_c)
551    ABI_DEALLOCATE(wk2d_d)
552 !$OMP END PARALLEL
553 
554 !$OMP PARALLEL DO SHARED(aft3,bef3,ind3,ic3,now3,n1,n2half1,n4,n5half1,n6,ris,trig3,work1,work2) PRIVATE(i2)
555    do i2=1,n2half1
556      call sg_fftz(mfac,mg,n4,n5half1,n6,n1,i2,i2,work1,work2,&
557 &     trig3,aft3,now3,bef3,ris,ind3,ic3)
558    end do
559 
560 !  Transfer fft output to the original fft box
561 
562 !$OMP PARALLEL DO PRIVATE(i1,i2,i3,index) SHARED(fofg,n1,n2,n2half1,n3,work2)
563    do i3=1,n3
564      do i2=1,n2half1
565        index=n1*(i2-1+n2*(i3-1))
566        do i1=1,n1
567          fofg(1,i1+index)=work2(1,i1,i2,i3)
568          fofg(2,i1+index)=work2(2,i1,i2,i3)
569        end do
570      end do
571 !    Complete missing values with complex conjugate
572 !    Inverse of ix is located at nx+2-ix , except for ix=1, for which it is 1.
573      if(n2half1>2)then
574        do i2=2,n2+1-n2half1
575          index=n1*((n2+2-i2)-1)
576          if(i3/=1)index=index+n1*n2*((n3+2-i3)-1)
577          fofg(1,1+index)= work2(1,1,i2,i3)
578          fofg(2,1+index)=-work2(2,1,i2,i3)
579          do i1=2,n1
580            fofg(1,n1+2-i1+index)= work2(1,i1,i2,i3)
581            fofg(2,n1+2-i1+index)=-work2(2,i1,i2,i3)
582          end do
583        end do
584      end if
585    end do
586 
587  end if ! choice of isign
588 
589  ABI_DEALLOCATE(work1)
590  ABI_DEALLOCATE(work2)
591 
592  !DBG_EXIT("COLL")
593 
594 end subroutine sg_fft_rc

m_sgfft/sg_fftpad [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fftpad

FUNCTION

 Fast Fourier transform. This is the zero-padding version of "fft".

INPUTS

  fftcache=size of the cache (kB)
  mgfft=maximum size of 1D FFTs
  n1,n2,n3=physical dimension of the transform
  nd1,nd2,nd3=memory dimension of arr and ftarr
  ndat=Number of FFT transforms.
  isign= sign of exponential in transform
  gbound(2*mgfft+8,2)=sphere boundary info

OUTPUT

  ftarr(2,nd1,nd2,nd3*ndat)=working space for transform and contains output

SIDE EFFECTS

  arr(2,nd1,nd2,nd3*ndat)=input complex array with alternating real and imaginary
    elements; data resides in 2*n1*n2*n3 of this array, spread out.
  arr(2,nd1,nd2,nd3*ndat) is modified by sg_fftpx,sg_ffty,sg_fftz.

NOTES

  mfac sets maximum number of factors (5, 4, 3, or 2) which may be
  contained within any n1, n2, or n3
  mg sets the maximum 1 dimensional fft length (any one of n1, n2, or n3)
  XG: the signification of mg is changed with respect to fft3dp !!!

PARENTS

      fourwf,m_fft

CHILDREN

      sg_fft_cc

SOURCE

637 subroutine sg_fftpad(fftcache,mgfft,n1,n2,n3,nd1,nd2,nd3,ndat,gbound,isign,arr,ftarr)
638 
639 
640 !This section has been created automatically by the script Abilint (TD).
641 !Do not modify the following lines by hand.
642 #undef ABI_FUNC
643 #define ABI_FUNC 'sg_fftpad'
644 !End of the abilint section
645 
646  implicit none
647 
648 !Arguments ------------------------------------
649 !scalars
650  integer,intent(in) :: fftcache,mgfft,n1,n2,n3,nd1,nd2,nd3,ndat,isign
651 !arrays
652  integer,intent(in) :: gbound(2*mgfft+8,2)
653  real(dp),intent(inout) :: arr(2,nd1,nd2,nd3*ndat)
654  real(dp),intent(out) :: ftarr(2,nd1,nd2,nd3*ndat)
655 
656 !Local variables-------------------------------
657 !scalars
658  integer :: idat,start
659 
660 ! *************************************************************************
661 
662  do idat=1,ndat
663    start = 1 + (idat-1)*nd3
664    call fftpad_one_nothreadsafe(fftcache,mgfft,nd1,nd2,nd3,n1,n2,n3,&
665 &    arr(1,1,1,start),ftarr(1,1,1,start),real(isign, kind=dp),gbound)
666  end do
667 
668 end subroutine sg_fftpad

m_sgfft/sg_fftpx [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fftpx

FUNCTION

 This subroutine is called by the 3-dimensional fft to conduct the
 "x" transforms for all y and z.
 Accomodate more optimal treatment of
 zero padding following the method of fft3dp.

INPUTS

  fftcache=size of the cache (kB)
  mfac = maximum number of factors in 1D FFTs
  mg = maximum length of 1D FFTs
  mgfft = effective maximum length of 1D FFTs, for dimensioning gbound
  nd1=first dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd2=second dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd3=third dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  n2,n3=actual length of y and z transforms
  z(2,nd1,nd2,nd3)=INPUT array; destroyed by transformation
  trig, aft, now, bef, ind=provided by previous call to ctrig
   Note that in this routine (and in ctrig) the values in array trig are
   actually cos and tan, not cos and sin.  Use of tan allows advantageous
   use of FMA on the ibm rs6000.
  ris=sign of exponential in transform (should be 1 or -1; real)
  ic=number of (radix) factors of x transform length (from ctrig)
  gbound(2*mgfft+4)=sphere boundary info

OUTPUT

  zbr(2,nd1,nd2,nd3)=OUTPUT transformed array; no scaling applied

SIDE EFFECTS

NOTES

 This routine blocks the x transforms
 so that all transforms under consideration at one step fit within
 the cache memory, which is crucial for optimal performance.
 The blocking factor is set by parameter "fftcache" below, which should
 be adjusted to be somewhat smaller (say 3/4) than the actual cache size
 of the machine.

TODO

 Use latex for the equation above

PARENTS

      m_sgfft

CHILDREN

      sg_fft_cc

SOURCE

 918 subroutine sg_fftpx(fftcache,mfac,mg,mgfft,nd1,nd2,nd3,n2,n3,&
 919 &    z,zbr,trig,aft,now,bef,ris,ind,ic,gbound)
 920 
 921 
 922 !This section has been created automatically by the script Abilint (TD).
 923 !Do not modify the following lines by hand.
 924 #undef ABI_FUNC
 925 #define ABI_FUNC 'sg_fftpx'
 926 !End of the abilint section
 927 
 928  implicit none
 929 
 930 !Arguments ------------------------------------
 931 !Dimensions of aft, now, bef, ind, and trig should agree with
 932 !those in subroutine ctrig.
 933 !scalars
 934  integer,intent(in) :: fftcache,ic,mfac,mg,mgfft,n2,n3,nd1,nd2,nd3
 935  real(dp),intent(in) :: ris
 936 !arrays
 937  integer,intent(in) :: aft(mfac),bef(mfac),gbound(2*mgfft+4),ind(mg),now(mfac)
 938  real(dp),intent(in) :: trig(2,mg)
 939  real(dp),intent(inout) :: z(2,nd1,nd2,nd3)
 940  real(dp),intent(inout) :: zbr(2,nd1,nd2,nd3) !vz_i
 941 
 942 !Local variables-------------------------------
 943 !scalars
 944  integer :: g2,g2max,g2min,g3,g3max,g3min,gg3,i,ia,ib,igb,ihalfy,indx,j
 945  integer :: len3,lot,lowlim,ma,mb,ntb,upplim
 946 !no_abirules
 947  real(dp),parameter :: &
 948 & cos2=0.3090169943749474d0,&   !cos(2.d0*pi/5.d0)
 949 & cos4=-0.8090169943749474d0,&  !cos(4.d0*pi/5.d0)
 950 & sin42=0.6180339887498948d0    !sin(4.d0*pi/5.d0)/sin(2.d0*pi/5.d0)
 951  real(dp) :: bb,cr2,cr2s,cr3,cr3p,cr4,cr5,ct2,ct3,ct4,ct5,&
 952 & factor,r,r1,r2,r25,r3,r34,r4,r5,s,sin2,s1,s2,s25,s3,s34,s4,s5
 953 
 954 ! *************************************************************************
 955 
 956  g3min=gbound(1)
 957  g3max=gbound(2)
 958  igb=3
 959  len3=g3max-g3min+1
 960 
 961 
 962 !Do x transforms in blocks of size "lot" which is set by how
 963 !many x transform arrays (of size nd1 each) fit into the nominal
 964 !cache size "fftcache".
 965 !Loop over blocks in the loop below.
 966 
 967  factor=0.75d0
 968  lot=(fftcache*factor*1000d0)/(nd1*8*2)
 969  if(lot.lt.1) lot=1
 970 !Express loop over y, z in terms of separate z and y loops
 971 
 972 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
 973 !$OMP SHARED(aft,bef,gbound,g3max,ic,ind,len3,lot)&
 974 !$OMP SHARED(n2,n3,nd2,now,ris,trig,z,zbr)
 975  do gg3=1,len3
 976 
 977    if (gg3<=g3max+1) then
 978      g3=gg3
 979    else
 980 !    wrap around for negative gg3
 981      g3=gg3-len3+n3
 982    end if
 983 
 984    igb=gg3*2+1
 985    g2min=gbound(igb)
 986    g2max=gbound(igb+1)
 987 
 988 !  Split the y loop into positive and wrapped-around negative parts
 989 
 990    do ihalfy=1,2
 991 
 992 !    Start at 1 for ihalfy=1; g2min+1+n2 for ihalfy=2
 993      lowlim=1+(ihalfy-1)*(g2min+n2)
 994 !    End at g2max+1 for ihalfy=1; n2 for ihalfy=2
 995      upplim=g2max+1+(ihalfy-1)*(n2-g2max-1)
 996 
 997      do g2=lowlim,upplim,lot
 998 
 999 !      Find array starting address ma and ending address mb
1000 !      modified xg 980107
1001 !      ma=g2+(g3-1)*nd2
1002        ma=g2
1003 !      Perform "lot" transforms at a time (until out of data)
1004 !      mb=min(g2+(lot-1),upplim)+(g3-1)*nd2
1005        mb=min(g2+(lot-1),upplim)
1006 
1007 !      -------------------------------------------------------------------------
1008 !
1009 !      Direct transformation
1010 
1011 !      Run over all factors except the last (to ic-1), performing
1012 !      x transform
1013 
1014 !      Note: fortran should skip this loop if ic=1; beware "onetrip"
1015 !      compiler option which forces each loop at least once
1016 
1017        do i=1,ic-1
1018          ntb=now(i)*bef(i)
1019 
1020 !        Treat radix 4
1021          if (now(i)==4) then
1022            ia=0
1023 
1024 !          First step of factor 4
1025            do ib=1,bef(i)
1026              do j=ma,mb
1027                r4=z(1,ia*ntb+3*bef(i)+ib,j,g3)
1028                s4=z(2,ia*ntb+3*bef(i)+ib,j,g3)
1029                r3=z(1,ia*ntb+2*bef(i)+ib,j,g3)
1030                s3=z(2,ia*ntb+2*bef(i)+ib,j,g3)
1031                r2=z(1,ia*ntb+bef(i)+ib,j,g3)
1032                s2=z(2,ia*ntb+bef(i)+ib,j,g3)
1033                r1=z(1,ia*ntb+ib,j,g3)
1034                s1=z(2,ia*ntb+ib,j,g3)
1035 
1036                r=r1 + r3
1037                s=r2 + r4
1038                z(1,ia*ntb+ib,j,g3) = r + s
1039                z(1,ia*ntb+2*bef(i)+ib,j,g3) = r - s
1040                r=r1 - r3
1041                s=s2 - s4
1042                z(1,ia*ntb+bef(i)+ib,j,g3) = r - s*ris
1043                z(1,ia*ntb+3*bef(i)+ib,j,g3) = r + s*ris
1044                r=s1 + s3
1045                s=s2 + s4
1046                z(2,ia*ntb+ib,j,g3) = r + s
1047                z(2,ia*ntb+2*bef(i)+ib,j,g3) = r - s
1048                r=s1 - s3
1049                s=r2 - r4
1050                z(2,ia*ntb+bef(i)+ib,j,g3) = r + s*ris
1051                z(2,ia*ntb+3*bef(i)+ib,j,g3) = r - s*ris
1052              end do
1053            end do
1054 
1055 !          Second step of factor 4
1056            do ia=1,aft(i)-1
1057              indx=ind(ia*4*bef(i)+1)-1
1058              indx=indx*bef(i)
1059              cr2=trig(1,indx)
1060              ct2=trig(2,indx)
1061              cr3=trig(1,2*indx)
1062              ct3=trig(2,2*indx)
1063              cr4=trig(1,3*indx)
1064              ct4=trig(2,3*indx)
1065              cr4=cr4/cr2
1066              cr2s=cr2*ris
1067              do ib=1,bef(i)
1068                do j=ma,mb
1069                  r4=z(1,ia*ntb+3*bef(i)+ib,j,g3) - &
1070 &                 z(2,ia*ntb+3*bef(i)+ib,j,g3)*ct4
1071                  s4=z(1,ia*ntb+3*bef(i)+ib,j,g3)*ct4 + &
1072 &                 z(2,ia*ntb+3*bef(i)+ib,j,g3)
1073                  r3=z(1,ia*ntb+2*bef(i)+ib,j,g3) - &
1074 &                 z(2,ia*ntb+2*bef(i)+ib,j,g3)*ct3
1075                  s3=z(1,ia*ntb+2*bef(i)+ib,j,g3)*ct3 + &
1076 &                 z(2,ia*ntb+2*bef(i)+ib,j,g3)
1077                  r2=z(1,ia*ntb+bef(i)+ib,j,g3) - &
1078 &                 z(2,ia*ntb+bef(i)+ib,j,g3)*ct2
1079                  s2=z(1,ia*ntb+bef(i)+ib,j,g3)*ct2 + &
1080 &                 z(2,ia*ntb+bef(i)+ib,j,g3)
1081                  r1=z(1,ia*ntb+ib,j,g3)
1082                  s1=z(2,ia*ntb+ib,j,g3)
1083 
1084                  r=r1 + r3*cr3
1085                  s=r2 + r4*cr4
1086                  z(1,ia*ntb+ib,j,g3) = r + s*cr2
1087                  z(1,ia*ntb+2*bef(i)+ib,j,g3) = r - s*cr2
1088                  r=r1 - r3*cr3
1089                  s=s2 - s4*cr4
1090                  z(1,ia*ntb+bef(i)+ib,j,g3) = r - s*cr2s
1091                  z(1,ia*ntb+3*bef(i)+ib,j,g3) = r + s*cr2s
1092                  r=s1 + s3*cr3
1093                  s=s2 + s4*cr4
1094                  z(2,ia*ntb+ib,j,g3) = r + s*cr2
1095                  z(2,ia*ntb+2*bef(i)+ib,j,g3) = r - s*cr2
1096                  r=s1 - s3*cr3
1097                  s=r2 - r4*cr4
1098                  z(2,ia*ntb+bef(i)+ib,j,g3) = r + s*cr2s
1099                  z(2,ia*ntb+3*bef(i)+ib,j,g3) = r - s*cr2s
1100                end do
1101              end do
1102            end do
1103 
1104 !          Treat radix 2
1105          else if (now(i)==2) then
1106            ia=0
1107 
1108 !          First step of factor 2
1109            do ib=1,bef(i)
1110              do j=ma,mb
1111                r1=z(1,ia*ntb+ib,j,g3)
1112                s1=z(2,ia*ntb+ib,j,g3)
1113                r2=z(1,ia*ntb+bef(i)+ib,j,g3)
1114                s2=z(2,ia*ntb+bef(i)+ib,j,g3)
1115                z(1,ia*ntb+ib,j,g3) =  r2 + r1
1116                z(2,ia*ntb+ib,j,g3) =  s2 + s1
1117                z(1,ia*ntb+bef(i)+ib,j,g3) = -r2 + r1
1118                z(2,ia*ntb+bef(i)+ib,j,g3) = -s2 + s1
1119              end do
1120            end do
1121 
1122 !          Second step of radix 2
1123            do ia=1,aft(i)-1
1124              indx=ind(ia*2*bef(i)+1)-1
1125              indx=indx*bef(i)
1126              cr2=trig(1,indx)
1127              ct2=trig(2,indx)
1128              do ib=1,bef(i)
1129                do j=ma,mb
1130                  r1=z(1,ia*ntb+ib,j,g3)
1131                  s1=z(2,ia*ntb+ib,j,g3)
1132                  r2=z(1,ia*ntb+bef(i)+ib,j,g3) - &
1133 &                 z(2,ia*ntb+bef(i)+ib,j,g3)*ct2
1134                  s2=z(1,ia*ntb+bef(i)+ib,j,g3)*ct2 + &
1135 &                 z(2,ia*ntb+bef(i)+ib,j,g3)
1136                  z(1,ia*ntb+ib,j,g3) =  r2*cr2 + r1
1137                  z(2,ia*ntb+ib,j,g3) =  s2*cr2 + s1
1138                  z(1,ia*ntb+bef(i)+ib,j,g3) = -r2*cr2 + r1
1139                  z(2,ia*ntb+bef(i)+ib,j,g3) = -s2*cr2 + s1
1140                end do
1141              end do
1142            end do
1143 
1144 !          Treat radix 3
1145          else if (now(i)==3) then
1146 !          .5d0*sqrt(3.d0)=0.8660254037844387d0
1147            ia=0
1148            bb=ris*0.8660254037844387d0
1149 
1150 !          First step of radix 3
1151            do ib=1,bef(i)
1152              do j=ma,mb
1153                r1=z(1,ia*ntb+ib,j,g3)
1154                s1=z(2,ia*ntb+ib,j,g3)
1155                r2=z(1,ia*ntb+bef(i)+ib,j,g3)
1156                s2=z(2,ia*ntb+bef(i)+ib,j,g3)
1157                r3=z(1,ia*ntb+2*bef(i)+ib,j,g3)
1158                s3=z(2,ia*ntb+2*bef(i)+ib,j,g3)
1159                r=r2 + r3
1160                s=s2 + s3
1161                z(1,ia*ntb+ib,j,g3) = r + r1
1162                z(2,ia*ntb+ib,j,g3) = s + s1
1163                r1=r1 - r*.5d0
1164                s1=s1 - s*.5d0
1165                r2=r2-r3
1166                s2=s2-s3
1167                z(1,ia*ntb+bef(i)+ib,j,g3) = r1 - s2*bb
1168                z(2,ia*ntb+bef(i)+ib,j,g3) = s1 + r2*bb
1169                z(1,ia*ntb+2*bef(i)+ib,j,g3) = r1 + s2*bb
1170                z(2,ia*ntb+2*bef(i)+ib,j,g3) = s1 - r2*bb
1171              end do
1172            end do
1173 
1174 !          Second step of radix 3
1175            do ia=1,aft(i)-1
1176              indx=ind(ia*3*bef(i)+1)-1
1177              indx=indx*bef(i)
1178              cr2=trig(1,indx)
1179              ct2=trig(2,indx)
1180              cr3=trig(1,2*indx)
1181              ct3=trig(2,2*indx)
1182              cr2=cr2/cr3
1183              cr3p=.5d0*cr3
1184              bb=ris*cr3*0.8660254037844387d0
1185              do ib=1,bef(i)
1186                do j=ma,mb
1187                  r1=z(1,ia*ntb+ib,j,g3)
1188                  s1=z(2,ia*ntb+ib,j,g3)
1189                  r2=z(1,ia*ntb+bef(i)+ib,j,g3) - &
1190 &                 z(2,ia*ntb+bef(i)+ib,j,g3)*ct2
1191                  s2=z(1,ia*ntb+bef(i)+ib,j,g3)*ct2 + &
1192 &                 z(2,ia*ntb+bef(i)+ib,j,g3)
1193                  r3=z(1,ia*ntb+2*bef(i)+ib,j,g3) - &
1194 &                 z(2,ia*ntb+2*bef(i)+ib,j,g3)*ct3
1195                  s3=z(1,ia*ntb+2*bef(i)+ib,j,g3)*ct3 + &
1196 &                 z(2,ia*ntb+2*bef(i)+ib,j,g3)
1197                  r=cr2*r2 + r3
1198                  s=cr2*s2 + s3
1199                  z(1,ia*ntb+ib,j,g3) = r*cr3 + r1
1200                  z(2,ia*ntb+ib,j,g3) = s*cr3 + s1
1201                  r1=r1 - r*cr3p
1202                  s1=s1 - s*cr3p
1203                  r2=cr2*r2-r3
1204                  s2=cr2*s2-s3
1205                  z(1,ia*ntb+bef(i)+ib,j,g3) = r1 - s2*bb
1206                  z(2,ia*ntb+bef(i)+ib,j,g3) = s1 + r2*bb
1207                  z(1,ia*ntb+2*bef(i)+ib,j,g3) = r1 + s2*bb
1208                  z(2,ia*ntb+2*bef(i)+ib,j,g3) = s1 - r2*bb
1209                end do
1210              end do
1211            end do
1212 
1213 !          Treat radix 5
1214          else if (now(i)==5) then
1215 !          sin(2.d0*pi/5.d0)
1216            sin2=ris*0.9510565162951536d0
1217            ia=0
1218 
1219 !          First step of radix 5
1220            do ib=1,bef(i)
1221              do j=ma,mb
1222                r1=z(1,ia*ntb+ib,j,g3)
1223                s1=z(2,ia*ntb+ib,j,g3)
1224                r2=z(1,ia*ntb+bef(i)+ib,j,g3)
1225                s2=z(2,ia*ntb+bef(i)+ib,j,g3)
1226                r3=z(1,ia*ntb+2*bef(i)+ib,j,g3)
1227                s3=z(2,ia*ntb+2*bef(i)+ib,j,g3)
1228                r4=z(1,ia*ntb+3*bef(i)+ib,j,g3)
1229                s4=z(2,ia*ntb+3*bef(i)+ib,j,g3)
1230                r5=z(1,ia*ntb+4*bef(i)+ib,j,g3)
1231                s5=z(2,ia*ntb+4*bef(i)+ib,j,g3)
1232                r25 = r2 + r5
1233                r34 = r3 + r4
1234                s25 = s2 - s5
1235                s34 = s3 - s4
1236                z(1,ia*ntb+ib,j,g3) = r1 + r25 + r34
1237                r = r1 + cos2*r25 + cos4*r34
1238                s = s25 + sin42*s34
1239                z(1,ia*ntb+bef(i)+ib,j,g3) = r - sin2*s
1240                z(1,ia*ntb+4*bef(i)+ib,j,g3) = r + sin2*s
1241                r = r1 + cos4*r25 + cos2*r34
1242                s = sin42*s25 - s34
1243                z(1,ia*ntb+2*bef(i)+ib,j,g3) = r - sin2*s
1244                z(1,ia*ntb+3*bef(i)+ib,j,g3) = r + sin2*s
1245                r25 = r2 - r5
1246                r34 = r3 - r4
1247                s25 = s2 + s5
1248                s34 = s3 + s4
1249                z(2,ia*ntb+ib,j,g3) = s1 + s25 + s34
1250                r = s1 + cos2*s25 + cos4*s34
1251                s = r25 + sin42*r34
1252                z(2,ia*ntb+bef(i)+ib,j,g3) = r + sin2*s
1253                z(2,ia*ntb+4*bef(i)+ib,j,g3) = r - sin2*s
1254                r = s1 + cos4*s25 + cos2*s34
1255                s = sin42*r25 - r34
1256                z(2,ia*ntb+2*bef(i)+ib,j,g3) = r + sin2*s
1257                z(2,ia*ntb+3*bef(i)+ib,j,g3) = r - sin2*s
1258              end do
1259            end do
1260 
1261 !          Second step of radix 5
1262            do ia=1,aft(i)-1
1263              indx=ind(ia*5*bef(i)+1)-1
1264              indx=indx*bef(i)
1265              cr2=trig(1,indx)
1266              ct2=trig(2,indx)
1267              cr3=trig(1,2*indx)
1268              ct3=trig(2,2*indx)
1269              cr4=trig(1,3*indx)
1270              ct4=trig(2,3*indx)
1271              cr5=trig(1,4*indx)
1272              ct5=trig(2,4*indx)
1273              do ib=1,bef(i)
1274                do j=ma,mb
1275                  r1=z(1,ia*ntb+ib,j,g3)
1276                  s1=z(2,ia*ntb+ib,j,g3)
1277                  r2=cr2*(z(1,ia*ntb+bef(i)+ib,j,g3) - &
1278 &                 z(2,ia*ntb+bef(i)+ib,j,g3)*ct2)
1279                  s2=cr2*(z(1,ia*ntb+bef(i)+ib,j,g3)*ct2 + &
1280 &                 z(2,ia*ntb+bef(i)+ib,j,g3))
1281                  r3=cr3*(z(1,ia*ntb+2*bef(i)+ib,j,g3) - &
1282 &                 z(2,ia*ntb+2*bef(i)+ib,j,g3)*ct3)
1283                  s3=cr3*(z(1,ia*ntb+2*bef(i)+ib,j,g3)*ct3 + &
1284 &                 z(2,ia*ntb+2*bef(i)+ib,j,g3))
1285                  r4=z(1,ia*ntb+3*bef(i)+ib,j,g3) - &
1286 &                 z(2,ia*ntb+3*bef(i)+ib,j,g3)*ct4
1287                  s4=z(1,ia*ntb+3*bef(i)+ib,j,g3)*ct4 + &
1288 &                 z(2,ia*ntb+3*bef(i)+ib,j,g3)
1289                  r5=z(1,ia*ntb+4*bef(i)+ib,j,g3) - &
1290 &                 z(2,ia*ntb+4*bef(i)+ib,j,g3)*ct5
1291                  s5=z(1,ia*ntb+4*bef(i)+ib,j,g3)*ct5 + &
1292 &                 z(2,ia*ntb+4*bef(i)+ib,j,g3)
1293                  r25 = r2 + r5*cr5
1294                  r34 = r3 + r4*cr4
1295                  s25 = s2 - s5*cr5
1296                  s34 = s3 - s4*cr4
1297                  z(1,ia*ntb+ib,j,g3) = r1 + r25 + r34
1298                  r = r1 + cos2*r25 + cos4*r34
1299                  s = s25 + sin42*s34
1300                  z(1,ia*ntb+bef(i)+ib,j,g3) = r - sin2*s
1301                  z(1,ia*ntb+4*bef(i)+ib,j,g3) = r + sin2*s
1302                  r = r1 + cos4*r25 + cos2*r34
1303                  s = sin42*s25 - s34
1304                  z(1,ia*ntb+2*bef(i)+ib,j,g3) = r - sin2*s
1305                  z(1,ia*ntb+3*bef(i)+ib,j,g3) = r + sin2*s
1306                  r25 = r2 - r5*cr5
1307                  r34 = r3 - r4*cr4
1308                  s25 = s2 + s5*cr5
1309                  s34 = s3 + s4*cr4
1310                  z(2,ia*ntb+ib,j,g3) = s1 + s25 + s34
1311                  r = s1 + cos2*s25 + cos4*s34
1312                  s = r25 + sin42*r34
1313                  z(2,ia*ntb+bef(i)+ib,j,g3) = r + sin2*s
1314                  z(2,ia*ntb+4*bef(i)+ib,j,g3) = r - sin2*s
1315                  r = s1 + cos4*s25 + cos2*s34
1316                  s = sin42*r25 - r34
1317                  z(2,ia*ntb+2*bef(i)+ib,j,g3) = r + sin2*s
1318                  z(2,ia*ntb+3*bef(i)+ib,j,g3) = r - sin2*s
1319                end do
1320              end do
1321            end do
1322 
1323          else
1324 !          All radices treated
1325            MSG_BUG('called with factors other than 2, 3, and 5')
1326          end if
1327 
1328        end do  ! End of direct transformation (loop over ic)
1329 
1330 !      -----------------------------------------------------------------
1331 
1332 !      Bitreversal
1333 !      Perform bit reversal on last factor of transformation
1334 
1335 !      Treat radix 4
1336        if (now(ic)==4) then
1337          ia=0
1338 
1339 !        First step of radix 4
1340          do j=ma,mb
1341            r4=z(1,ia*4+4,j,g3)
1342            s4=z(2,ia*4+4,j,g3)
1343            r3=z(1,ia*4+3,j,g3)
1344            s3=z(2,ia*4+3,j,g3)
1345            r2=z(1,ia*4+2,j,g3)
1346            s2=z(2,ia*4+2,j,g3)
1347            r1=z(1,ia*4+1,j,g3)
1348            s1=z(2,ia*4+1,j,g3)
1349 
1350            r=r1 + r3
1351            s=r2 + r4
1352            zbr(1,ind(ia*4+1),j,g3) = r + s
1353            zbr(1,ind(ia*4+3),j,g3) = r - s
1354            r=r1 - r3
1355            s=s2 - s4
1356            zbr(1,ind(ia*4+2),j,g3) = r - s*ris
1357            zbr(1,ind(ia*4+4),j,g3) = r + s*ris
1358            r=s1 + s3
1359            s=s2 + s4
1360            zbr(2,ind(ia*4+1),j,g3) = r + s
1361            zbr(2,ind(ia*4+3),j,g3) = r - s
1362            r=s1 - s3
1363            s=r2 - r4
1364            zbr(2,ind(ia*4+2),j,g3) = r + s*ris
1365            zbr(2,ind(ia*4+4),j,g3) = r - s*ris
1366          end do
1367 
1368 !        Second step of radix 4
1369          do ia=1,aft(ic)-1
1370            indx=ind(ia*4+1)-1
1371            cr2=trig(1,indx)
1372            ct2=trig(2,indx)
1373            cr3=trig(1,2*indx)
1374            ct3=trig(2,2*indx)
1375            cr4=trig(1,3*indx)
1376            ct4=trig(2,3*indx)
1377            cr4=cr4/cr2
1378            cr2s=cr2*ris
1379            do j=ma,mb
1380              r4=z(1,ia*4+4,j,g3) - z(2,ia*4+4,j,g3)*ct4
1381              s4=z(1,ia*4+4,j,g3)*ct4 + z(2,ia*4+4,j,g3)
1382              r3=z(1,ia*4+3,j,g3) - z(2,ia*4+3,j,g3)*ct3
1383              s3=z(1,ia*4+3,j,g3)*ct3 + z(2,ia*4+3,j,g3)
1384              r2=z(1,ia*4+2,j,g3) - z(2,ia*4+2,j,g3)*ct2
1385              s2=z(1,ia*4+2,j,g3)*ct2 + z(2,ia*4+2,j,g3)
1386              r1=z(1,ia*4+1,j,g3)
1387              s1=z(2,ia*4+1,j,g3)
1388 
1389              r=r1 + r3*cr3
1390              s=r2 + r4*cr4
1391              zbr(1,ind(ia*4+1),j,g3) = r + s*cr2
1392              zbr(1,ind(ia*4+3),j,g3) = r - s*cr2
1393              r=r1 - r3*cr3
1394              s=s2 - s4*cr4
1395              zbr(1,ind(ia*4+2),j,g3) = r - s*cr2s
1396              zbr(1,ind(ia*4+4),j,g3) = r + s*cr2s
1397              r=s1 + s3*cr3
1398              s=s2 + s4*cr4
1399              zbr(2,ind(ia*4+1),j,g3) = r + s*cr2
1400              zbr(2,ind(ia*4+3),j,g3) = r - s*cr2
1401              r=s1 - s3*cr3
1402              s=r2 - r4*cr4
1403              zbr(2,ind(ia*4+2),j,g3) = r + s*cr2s
1404              zbr(2,ind(ia*4+4),j,g3) = r - s*cr2s
1405            end do
1406          end do
1407 
1408 !        Treat radix 2
1409        else if (now(ic)==2) then
1410 
1411          ia=0
1412 
1413 !        First step of radix 2
1414          do j=ma,mb
1415            r1=z(1,ia*2+1,j,g3)
1416            s1=z(2,ia*2+1,j,g3)
1417            r2=z(1,ia*2+2,j,g3)
1418            s2=z(2,ia*2+2,j,g3)
1419            zbr(1,ind(ia*2+1),j,g3) =  r2 + r1
1420            zbr(2,ind(ia*2+1),j,g3) =  s2 + s1
1421            zbr(1,ind(ia*2+2),j,g3) = -r2 + r1
1422            zbr(2,ind(ia*2+2),j,g3) = -s2 + s1
1423          end do
1424 
1425 !        Second step of radix 2
1426          do ia=1,aft(ic)-1
1427            indx=ind(ia*2+1)-1
1428            cr2=trig(1,indx)
1429            ct2=trig(2,indx)
1430            do j=ma,mb
1431              r1=z(1,ia*2+1,j,g3)
1432              s1=z(2,ia*2+1,j,g3)
1433              r2=z(1,ia*2+2,j,g3) - z(2,ia*2+2,j,g3)*ct2
1434              s2=z(1,ia*2+2,j,g3)*ct2 + z(2,ia*2+2,j,g3)
1435              zbr(1,ind(ia*2+1),j,g3) =  r2*cr2 + r1
1436              zbr(2,ind(ia*2+1),j,g3) =  s2*cr2 + s1
1437              zbr(1,ind(ia*2+2),j,g3) = -r2*cr2 + r1
1438              zbr(2,ind(ia*2+2),j,g3) = -s2*cr2 + s1
1439            end do
1440          end do
1441 
1442 !        Treat radix 3
1443        else if (now(ic)==3) then
1444 !        radix 3
1445 !        .5d0*sqrt(3.d0)=0.8660254037844387d0
1446          ia=0
1447          bb=ris*0.8660254037844387d0
1448 
1449 !        First step of radix 3
1450          do j=ma,mb
1451            r1=z(1,ia*3+1,j,g3)
1452            s1=z(2,ia*3+1,j,g3)
1453            r2=z(1,ia*3+2,j,g3)
1454            s2=z(2,ia*3+2,j,g3)
1455            r3=z(1,ia*3+3,j,g3)
1456            s3=z(2,ia*3+3,j,g3)
1457            r=r2 + r3
1458            s=s2 + s3
1459            zbr(1,ind(ia*3+1),j,g3) = r + r1
1460            zbr(2,ind(ia*3+1),j,g3) = s + s1
1461            r1=r1 - r*.5d0
1462            s1=s1 - s*.5d0
1463            r2=r2-r3
1464            s2=s2-s3
1465            zbr(1,ind(ia*3+2),j,g3) = r1 - s2*bb
1466            zbr(2,ind(ia*3+2),j,g3) = s1 + r2*bb
1467            zbr(1,ind(ia*3+3),j,g3) = r1 + s2*bb
1468            zbr(2,ind(ia*3+3),j,g3) = s1 - r2*bb
1469          end do
1470 
1471          do ia=1,aft(ic)-1
1472            indx=ind(ia*3+1)-1
1473            cr2=trig(1,indx)
1474            ct2=trig(2,indx)
1475            cr3=trig(1,2*indx)
1476            ct3=trig(2,2*indx)
1477            cr2=cr2/cr3
1478            cr3p=.5d0*cr3
1479            bb=ris*cr3*0.8660254037844387d0
1480            do j=ma,mb
1481              r1=z(1,ia*3+1,j,g3)
1482              s1=z(2,ia*3+1,j,g3)
1483              r2=z(1,ia*3+2,j,g3) - z(2,ia*3+2,j,g3)*ct2
1484              s2=z(1,ia*3+2,j,g3)*ct2 + z(2,ia*3+2,j,g3)
1485              r3=z(1,ia*3+3,j,g3) - z(2,ia*3+3,j,g3)*ct3
1486              s3=z(1,ia*3+3,j,g3)*ct3 + z(2,ia*3+3,j,g3)
1487              r=cr2*r2 + r3
1488              s=cr2*s2 + s3
1489              zbr(1,ind(ia*3+1),j,g3) = r*cr3 + r1
1490              zbr(2,ind(ia*3+1),j,g3) = s*cr3 + s1
1491              r1=r1 - r*cr3p
1492              s1=s1 - s*cr3p
1493              r2=cr2*r2-r3
1494              s2=cr2*s2-s3
1495              zbr(1,ind(ia*3+2),j,g3) = r1 - s2*bb
1496              zbr(2,ind(ia*3+2),j,g3) = s1 + r2*bb
1497              zbr(1,ind(ia*3+3),j,g3) = r1 + s2*bb
1498              zbr(2,ind(ia*3+3),j,g3) = s1 - r2*bb
1499            end do
1500          end do
1501 
1502 !        Treat radix 5
1503        else if (now(ic)==5) then
1504 !        radix 5
1505 !        sin(2.d0*pi/5.d0)
1506          sin2=ris*0.9510565162951536d0
1507          ia=0
1508 
1509 !        First step of radix 5
1510          do j=ma,mb
1511            r1=z(1,ia*5+1,j,g3)
1512            s1=z(2,ia*5+1,j,g3)
1513            r2=z(1,ia*5+2,j,g3)
1514            s2=z(2,ia*5+2,j,g3)
1515            r3=z(1,ia*5+3,j,g3)
1516            s3=z(2,ia*5+3,j,g3)
1517            r4=z(1,ia*5+4,j,g3)
1518            s4=z(2,ia*5+4,j,g3)
1519            r5=z(1,ia*5+5,j,g3)
1520            s5=z(2,ia*5+5,j,g3)
1521            r25 = r2 + r5
1522            r34 = r3 + r4
1523            s25 = s2 - s5
1524            s34 = s3 - s4
1525            zbr(1,ind(ia*5+1),j,g3) = r1 + r25 + r34
1526            r = r1 + cos2*r25 + cos4*r34
1527            s = s25 + sin42*s34
1528            zbr(1,ind(ia*5+2),j,g3) = r - sin2*s
1529            zbr(1,ind(ia*5+5),j,g3) = r + sin2*s
1530            r = r1 + cos4*r25 + cos2*r34
1531            s = sin42*s25 - s34
1532            zbr(1,ind(ia*5+3),j,g3) = r - sin2*s
1533            zbr(1,ind(ia*5+4),j,g3) = r + sin2*s
1534            r25 = r2 - r5
1535            r34 = r3 - r4
1536            s25 = s2 + s5
1537            s34 = s3 + s4
1538            zbr(2,ind(ia*5+1),j,g3) = s1 + s25 + s34
1539            r = s1 + cos2*s25 + cos4*s34
1540            s = r25 + sin42*r34
1541            zbr(2,ind(ia*5+2),j,g3) = r + sin2*s
1542            zbr(2,ind(ia*5+5),j,g3) = r - sin2*s
1543            r = s1 + cos4*s25 + cos2*s34
1544            s = sin42*r25 - r34
1545            zbr(2,ind(ia*5+3),j,g3) = r + sin2*s
1546            zbr(2,ind(ia*5+4),j,g3) = r - sin2*s
1547          end do
1548 
1549 !        Second step of radix 5
1550          do ia=1,aft(ic)-1
1551            indx=ind(ia*5+1)-1
1552            cr2=trig(1,indx)
1553            ct2=trig(2,indx)
1554            cr3=trig(1,2*indx)
1555            ct3=trig(2,2*indx)
1556            cr4=trig(1,3*indx)
1557            ct4=trig(2,3*indx)
1558            cr5=trig(1,4*indx)
1559            ct5=trig(2,4*indx)
1560            do j=ma,mb
1561              r1=z(1,ia*5+1,j,g3)
1562              s1=z(2,ia*5+1,j,g3)
1563              r2=cr2*(z(1,ia*5+2,j,g3) - z(2,ia*5+2,j,g3)*ct2)
1564              s2=cr2*(z(1,ia*5+2,j,g3)*ct2 + z(2,ia*5+2,j,g3))
1565              r3=cr3*(z(1,ia*5+3,j,g3) - z(2,ia*5+3,j,g3)*ct3)
1566              s3=cr3*(z(1,ia*5+3,j,g3)*ct3 + z(2,ia*5+3,j,g3))
1567              r4=z(1,ia*5+4,j,g3) - z(2,ia*5+4,j,g3)*ct4
1568              s4=z(1,ia*5+4,j,g3)*ct4 + z(2,ia*5+4,j,g3)
1569              r5=z(1,ia*5+5,j,g3) - z(2,ia*5+5,j,g3)*ct5
1570              s5=z(1,ia*5+5,j,g3)*ct5 + z(2,ia*5+5,j,g3)
1571              r25 = r2 + r5*cr5
1572              r34 = r3 + r4*cr4
1573              s25 = s2 - s5*cr5
1574              s34 = s3 - s4*cr4
1575              zbr(1,ind(ia*5+1),j,g3) = r1 + r25 + r34
1576              r = r1 + cos2*r25 + cos4*r34
1577              s = s25 + sin42*s34
1578              zbr(1,ind(ia*5+2),j,g3) = r - sin2*s
1579              zbr(1,ind(ia*5+5),j,g3) = r + sin2*s
1580              r = r1 + cos4*r25 + cos2*r34
1581              s = sin42*s25 - s34
1582              zbr(1,ind(ia*5+3),j,g3) = r - sin2*s
1583              zbr(1,ind(ia*5+4),j,g3) = r + sin2*s
1584              r25 = r2 - r5*cr5
1585              r34 = r3 - r4*cr4
1586              s25 = s2 + s5*cr5
1587              s34 = s3 + s4*cr4
1588              zbr(2,ind(ia*5+1),j,g3) = s1 + s25 + s34
1589              r = s1 + cos2*s25 + cos4*s34
1590              s = r25 + sin42*r34
1591              zbr(2,ind(ia*5+2),j,g3) = r + sin2*s
1592              zbr(2,ind(ia*5+5),j,g3) = r - sin2*s
1593              r = s1 + cos4*s25 + cos2*s34
1594              s = sin42*r25 - r34
1595              zbr(2,ind(ia*5+3),j,g3) = r + sin2*s
1596              zbr(2,ind(ia*5+4),j,g3) = r - sin2*s
1597            end do
1598          end do
1599 
1600        else
1601 !        All radices are treated
1602          MSG_BUG('called with factors other than 2, 3, and 5')
1603        end if
1604 
1605 !      End of bit reversal
1606 
1607 !      -------------------------------------------------------------------
1608      end do
1609    end do
1610  end do
1611 !$OMP END PARALLEL DO
1612 
1613 end subroutine sg_fftpx

m_sgfft/sg_fftrisc [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fftrisc

FUNCTION

  Wrapper around fftrisc_one_nothreadsafe that supports ndat transforms.

 * This routine is not thread-safe due to the presence of variables with the save attribute!
   DO NOT CALL THIS ROUTINE INSIDE A OPENMP PARALLEL REGION

PARENTS

      fourwf

CHILDREN

      sg_fft_cc

SOURCE

3995 subroutine sg_fftrisc(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,&
3996 & kg_kin,kg_kout,mgfft,ndat,ngfft,npwin,npwout,n4,n5,n6,option,weight_r, weight_i)
3997 
3998 
3999 !This section has been created automatically by the script Abilint (TD).
4000 !Do not modify the following lines by hand.
4001 #undef ABI_FUNC
4002 #define ABI_FUNC 'sg_fftrisc'
4003 !End of the abilint section
4004 
4005  implicit none
4006 
4007 !Arguments ------------------------------------
4008 !scalars
4009  integer,intent(in) :: cplex,istwf_k,mgfft,n4,n5,n6,ndat,npwin,npwout,option
4010  real(dp),intent(in) :: weight_i,weight_r
4011 !arrays
4012  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2)
4013  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18)
4014  real(dp),intent(in) :: fofgin(2,npwin*ndat)
4015  real(dp),intent(inout) :: denpot(cplex*n4*n5*n6),fofr(2,n4*n5*n6*ndat)
4016  real(dp),intent(out) :: fofgout(2,npwout*ndat)
4017 
4018 !Local variables-------------------------------
4019 !scalars
4020  integer :: idat,fofgin_p,fofr_p,fofgout_p
4021 !arrays
4022  real(dp) :: dum_fofgin(0,0),dum_fofr(0,0),dum_fofgout(0,0)
4023 
4024 ! *************************************************************************
4025 
4026  do idat=1,ndat
4027    fofgin_p = 1 + (idat-1) * npwin
4028    fofr_p = 1 + (idat - 1) * n4*n5*n6
4029    fofgout_p = 1 + (idat-1) * npwout
4030 
4031    select case (option)
4032    case (0)
4033      call fftrisc_one_nothreadsafe(&
4034 &      cplex,denpot,fofgin(1,fofgin_p),dum_fofgout,fofr(1,fofr_p),&
4035 &      gboundin,gboundout,istwf_k,&
4036 &      kg_kin,kg_kout,mgfft,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_i)
4037 
4038    case (1)
4039      ! Don't know why but fofr is not touched by this option.
4040      call fftrisc_one_nothreadsafe(&
4041 &      cplex,denpot,fofgin(1,fofgin_p),dum_fofgout,dum_fofr,&
4042 &      gboundin,gboundout,istwf_k,&
4043 &      kg_kin,kg_kout,mgfft,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_i)
4044 
4045    case (2)
4046      call fftrisc_one_nothreadsafe(&
4047 &      cplex,denpot,fofgin(1,fofgin_p),fofgout(1,fofgout_p),dum_fofr,&
4048 &      gboundin,gboundout,istwf_k,&
4049 &      kg_kin,kg_kout,mgfft,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_i)
4050 
4051    case (3)
4052      call fftrisc_one_nothreadsafe(&
4053 &      cplex,denpot,dum_fofgin,fofgout(1,fofgout_p),fofr(1,fofr_p),&
4054 &      gboundin,gboundout,istwf_k,&
4055 &      kg_kin,kg_kout,mgfft,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_i)
4056 
4057    case default
4058       MSG_ERROR("Wrong option")
4059    end select
4060  end do
4061 
4062 end subroutine sg_fftrisc

m_sgfft/sg_fftrisc_2 [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fftrisc_2

FUNCTION

 Carry out Fourier transforms between real and reciprocal (G) space,
 for wavefunctions, contained in a sphere in reciprocal space,
 in both directions. Also accomplish some post-processing.
 if luse_ndo is activated, do two FFT, and compute the density with
 non-diagonal occupations.

NOTES

 * Specifically uses rather sophisticated algorithms, based on S Goedecker
   routines, specialized for superscalar RISC architecture.
   Zero padding : saves 7/12 execution time
   Bi-dimensional data locality in most of the routine : cache reuse
   For k-point (0 0 0) : takes advantage of symmetry of data.
   Note however that no blocking is used, in both 1D z-transform
   or subsequent 2D transform. This should be improved.

 * This routine is not thread-safe due to the presence of variables with the save attribute!
   DO NOT CALL THIS ROUTINE INSIDE A OPENMP PARALLEL REGION

INPUTS

  cplex= if 1 , denpot is real, if 2 , denpot is complex
     (cplex=2 only allowed for option=2 when istwf_k=1)
     one can also use cplex=0 if option=0 or option=3
  fofgin(2,npwin)=holds input wavefunction in G vector basis sphere.
  fofgin_p(2,npwin) (optional) =holds second input wavefunction in G vector basis sphere.
  gboundin(2*mgfft+8,2)=sphere boundary info for reciprocal to real space
  gboundout(2*mgfft+8,2)=sphere boundary info for real to reciprocal space
  istwf_k=option parameter that describes the storage of wfs
  kg_kin(3,npwin)=reduced planewave coordinates, input
  kg_kout(3,npwout)=reduced planewave coordinates, output
  luse_ndo (optional) = use non diagonal occup (in this case, exists fofgin_p)
  npwin=number of elements in fofgin array (for option 0, 1 and 2)
  npwout=number of elements in fofgout array (for option 2 and 3)
  mgfft=maximum size of 1D FFTs
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  n4,n5,n6=ngfft(4),ngfft(5),ngfft(6), dimensions of fofr.
  option= if 0: do direct FFT
          if 1: do direct FFT, then sum the density
          if 2: do direct FFT, multiply by the potential, then do reverse FFT
          if 3: do reverse FFT only
  weight=weight to be used for the accumulation of the density in real space
          (needed only when option=1)

OUTPUT

  (see side effects)

OPTIONS

  The different options are:
  - reciprocal to real space and output the result (when option=0),
  - reciprocal to real space and accumulate the density (when option=1) or
  - reciprocal to real space, apply the local potential to the wavefunction
    in real space and produce the result in reciprocal space (when option=2)
  - real space to reciprocal space (when option=3).
  option=0 IS NOT ALLOWED when istwf_k>2
  option=3 IS NOT ALLOWED when istwf_k>=2
  (this version can be used to compute fft of two wavefunction and
     compute the product in denpot)

SIDE EFFECTS

  for option==0, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 fofr(2,n4,n5,n6) contains the Fourier Transform of fofgin;
                 no use of denpot, fofgout and npwout.
  for option==1, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 denpot(cplex*n4,n5,n6) contains the input density at input,
                 and the updated density at output;
                 fofr(2,n4,n5,n6) contains the Fourier transform of fofgin,
                 except in the case of the hp library subroutine;
                 no use of fofgout and npwout.
  for option==2, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 denpot(cplex*n4,n5,n6) contains the input local potential;
                 fofgout(2,npwout) contains the output function;
                 fofr(2,n4,n5,n6) contains the Fourier transform of fofgin,
                 except in the case of the hp library subroutine.
  for option==3, fofr(2,n4,n5,n6) contains the real space wavefunction;
                 fofgout(2,npwout) contains its Fourier transform;
                 no use of fofgin and npwin.

TODO

 Complete input and output list.

PARENTS

      fourwf

CHILDREN

      sg_fft_cc

SOURCE

5078 subroutine sg_fftrisc_2(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,&
5079 & istwf_k,kg_kin,kg_kout,&
5080 & mgfft,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_2,&
5081 & luse_ndo,fofgin_p) ! optional
5082 
5083 
5084 !This section has been created automatically by the script Abilint (TD).
5085 !Do not modify the following lines by hand.
5086 #undef ABI_FUNC
5087 #define ABI_FUNC 'sg_fftrisc_2'
5088 !End of the abilint section
5089 
5090  implicit none
5091 
5092 !Arguments ------------------------------------
5093 !scalars
5094  integer,intent(in) :: cplex,istwf_k,mgfft,n4,n5,n6,npwin,npwout,option
5095  real(dp),intent(in) :: weight_r
5096  real(dp),intent(in),optional :: weight_2
5097 !arrays
5098  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2)
5099  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18)
5100  logical,intent(in),optional :: luse_ndo
5101  real(dp),intent(in) :: fofgin(2,npwin)
5102  real(dp),intent(in),optional :: fofgin_p(:,:)
5103  real(dp),intent(inout) :: denpot(cplex*n4,n5,n6),fofr(2,n4,n5,n6)
5104  real(dp),intent(out) :: fofgout(2,npwout)
5105 
5106 !Local variables-------------------------------
5107 !scalars
5108  integer,parameter :: mfac=11
5109  integer,save :: ic1,ic2,ic3,ic4,ic5,ic6,n1_save=0,n2_save=0,n3_save=0
5110  integer :: fftcache,g2max,g2min,i1,i1max,i2,i3,i3inv,ig,igb
5111  integer :: igb_inv,igbmax,ii2,lot,lotin,lotout,mgb,n1
5112  integer :: n1half1,n1halfm,n1i,n2,n2half1,n3,n4half1,n5half1,nfftot,ngbin
5113  integer :: ngbout,nlot,nproc_omp
5114  integer :: weight_i
5115  real(dp) :: ai,ar,fraction,norm,phai,phar,wkim,wkre
5116  character(len=500) :: message
5117 !arrays
5118  integer,save :: aft1(mfac),aft2(mfac),aft3(mfac),aft4(mfac),aft5(mfac)
5119  integer,save :: aft6(mfac),bef1(mfac),bef2(mfac),bef3(mfac),bef4(mfac)
5120  integer,save :: bef5(mfac),bef6(mfac),ind1(mg),ind2(mg),ind3(mg),ind4(mg)
5121  integer,save :: ind5(mg),ind6(mg),now1(mfac),now2(mfac),now3(mfac),now4(mfac)
5122  integer,save :: now5(mfac),now6(mfac)
5123  integer :: gbound_dum(4)
5124  integer,allocatable :: indpw_kin(:,:),indpw_kout(:,:)
5125  logical :: lluse_ndo
5126  real(dp),save :: trig1(2,mg),trig2(2,mg),trig3(2,mg),trig4(2,mg),trig5(2,mg)
5127  real(dp),save :: trig6(2,mg)
5128  real(dp),allocatable :: pha1(:,:),pha2(:,:),pha3(:,:),wk1d_a(:,:,:,:)
5129  real(dp),allocatable :: wk1d_b(:,:,:,:),wk2d_a(:,:,:,:),wk2d_b(:,:,:,:)
5130  real(dp),allocatable :: wk2d_c(:,:,:,:),wk2d_d(:,:,:,:)
5131  real(dp),allocatable :: wk1d_a_p(:,:,:,:),wk1d_b_p(:,:,:,:)
5132  real(dp),allocatable :: wk2d_a_p(:,:,:,:),wk2d_b_p(:,:,:,:)
5133 #if defined HAVE_OPENMP
5134  integer,external :: OMP_GET_NUM_THREADS
5135 #endif
5136 
5137 ! *************************************************************************
5138 
5139  !DBG_ENTER("COLL")
5140 
5141 !DEBUG
5142 !write(std_out,*)' sg_fftrisc_2 : enter, istwf_k= ',istwf_k
5143 !write(std_out,*)' sg_fftrisc_2 : option,mgfft=',option,mgfft
5144 !write(std_out,*)' sg_fftrisc_2 : gboundin(3:2*mgfft+6,1)='
5145 !do ii=1,mgfft+2
5146 !write(std_out,*)gboundin(2*ii+1,1),gboundin(2*ii+2,1)
5147 !end do
5148 !stop
5149 !ENDDEBUG
5150 !
5151  lluse_ndo=.true.
5152  if(istwf_k/=1)then
5153    write(message,'(a,i0)' )' It is not yet allowed to use dmft with istwf_k=',istwf_k
5154    MSG_BUG(message)
5155  end if
5156 
5157  if(istwf_k>2 .and. option==0)then
5158    write(message, '(a,i0)' )' It is not allowed to use option=0 with istwf_k=',istwf_k
5159    MSG_BUG(message)
5160  end if
5161 
5162  if(istwf_k>=2 .and. option==3)then
5163    write(message, '(a,i0)' )'  It is not allowed to use option=3 with istwf_k=',istwf_k
5164    MSG_BUG(message)
5165  end if
5166 
5167  lluse_ndo=.false.
5168  if(present(luse_ndo).and.present(fofgin_p)) then
5169    if(luse_ndo) lluse_ndo=.true.
5170  end if
5171  if(lluse_ndo) then
5172    if((size(fofgin_p,2)==0).and.(luse_ndo)) then
5173      write(message, '(a,a,a,i4,i5)' )&
5174 &     'fofgin_p has a dimension equal to zero and luse_ndo true',ch10,&
5175 &     'Action: check dimension of fofgin_p',size(fofgin_p,2),luse_ndo
5176      MSG_BUG(message)
5177    end if
5178  end if
5179 
5180  weight_i= weight_r
5181  if ( present (weight_2 )) then
5182      weight_i= weight_2
5183      if ( present(luse_ndo) .and. (luse_ndo) )weight_i=weight_r
5184  end if
5185 
5186 !For all other tests of validity of inputs, assume that they
5187 !have been done in the calling routine
5188 
5189  n1=ngfft(1) ; n2=ngfft(2) ; n3=ngfft(3) ; nfftot=n1*n2*n3
5190  fftcache=ngfft(8)
5191 
5192  if(option/=3)then
5193    ABI_ALLOCATE(indpw_kin,(4,npwin))
5194    call indfftrisc(gboundin(3:3+2*mgfft+4,1),indpw_kin,kg_kin,mgfft,ngbin,ngfft,npwin)
5195  end if
5196  if(option==2 .or. option==3)then
5197    ABI_ALLOCATE(indpw_kout,(4,npwout))
5198    call indfftrisc(gboundout(3:3+2*mgfft+4,1),indpw_kout,kg_kout,mgfft,ngbout,ngfft,npwout)
5199  end if
5200 
5201 !Define the dimension of the first work arrays, for 1D transforms along z ,
5202 !taking into account the need to avoid the cache trashing
5203  if(option==2)then
5204    mgb=max(ngbin,ngbout)
5205  else if(option==0 .or. option==1)then
5206    mgb=ngbin ; ngbout=1
5207  else if(option==3)then
5208    mgb=ngbout ; ngbin=1
5209  end if
5210 
5211  if(mod(mgb,2)/=1)mgb=mgb+1
5212 
5213 !Initialise openmp, if needed
5214 !$OMP PARALLEL
5215 !$OMP SINGLE
5216  nproc_omp=1
5217 #if defined HAVE_OPENMP
5218  nproc_omp=OMP_GET_NUM_THREADS()
5219 #endif
5220 !$OMP END SINGLE
5221 !$OMP END PARALLEL
5222 
5223 !For the treatment of the z transform,
5224 !one tries to use only a fraction of the cache, since the
5225 !treatment of the array wk1d_a will not involve contiguous segments
5226  fraction=0.25
5227 !First estimation of lot and nlot
5228  lot=(fftcache*fraction*1000)/(n3*8*2)+1
5229 !Select the smallest integer multiple of nproc_omp, larger
5230 !or equal to nlot. In this way, the cache size is not exhausted,
5231 !and one takes care correctly of the number of processors.
5232 !Treat separately the in and out cases
5233  nlot=(ngbin-1)/lot+1
5234  nlot=nproc_omp*((nlot-1)/nproc_omp+1)
5235  lotin=(ngbin-1)/nlot+1
5236  nlot=(ngbout-1)/lot+1
5237  nlot=nproc_omp*((nlot-1)/nproc_omp+1)
5238  lotout=(ngbout-1)/nlot+1
5239 !The next line impose only one lot. Usually, comment it.
5240 !lotin=mgb ; lotout=mgb
5241 
5242 !Compute auxiliary arrays needed for FFTs
5243  if(n1/=n1_save)then
5244    call sg_ctrig(n1,trig1,aft1,bef1,now1,one,ic1,ind1,mfac,mg)
5245    call sg_ctrig(n1,trig4,aft4,bef4,now4,-one,ic4,ind4,mfac,mg)
5246    n1_save=n1
5247  end if
5248  if(n2/=n2_save)then
5249    call sg_ctrig(n2,trig2,aft2,bef2,now2,one,ic2,ind2,mfac,mg)
5250    call sg_ctrig(n2,trig5,aft5,bef5,now5,-one,ic5,ind5,mfac,mg)
5251    n2_save=n2
5252  end if
5253  if(n3/=n3_save)then
5254    call sg_ctrig(n3,trig3,aft3,bef3,now3,one,ic3,ind3,mfac,mg)
5255    call sg_ctrig(n3,trig6,aft6,bef6,now6,-one,ic6,ind6,mfac,mg)
5256    n3_save=n3
5257  end if
5258 
5259 !------------------------------------------------------------------
5260 !Here, call general k-point code
5261 
5262  if(istwf_k==1)then
5263 
5264 !  Note that the z transform will appear as a y transform
5265    ABI_ALLOCATE(wk1d_a,(2,mgb,n3,1))
5266    ABI_ALLOCATE(wk1d_b,(2,mgb,n3,1))
5267    ABI_ALLOCATE(wk1d_a_p,(2,mgb,n3,1))
5268    ABI_ALLOCATE(wk1d_b_p,(2,mgb,n3,1))
5269 
5270    if(option/=3)then
5271 
5272      if(lluse_ndo)  then
5273 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5274 !$OMP&SHARED(n3,ngbin,wk1d_a_p)
5275        do i3=1,n3
5276          do igb=1,ngbin
5277            wk1d_a_p(1,igb,i3,1)=zero
5278            wk1d_a_p(2,igb,i3,1)=zero
5279          end do
5280        end do
5281 !$OMP END PARALLEL DO
5282 
5283 !      Insert fofgin_p into the work array
5284 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5285 !$OMP&SHARED(fofgin_p,indpw_kin,npwin,wk1d_a_p)
5286        do ig=1,npwin
5287          igb=indpw_kin(4,ig) ; i3=indpw_kin(3,ig)
5288          wk1d_a_p(1,igb,i3,1)=fofgin_p(1,ig)
5289          wk1d_a_p(2,igb,i3,1)=fofgin_p(2,ig)
5290        end do
5291 !$OMP END PARALLEL DO
5292 
5293 !      Go from wk1d_a_p to wk1d_b_p, using 1D FFTs on the z direction
5294 !      However, due to special packing of data, use routine ffty
5295 !$OMP PARALLEL DO SHARED(aft3,bef3,fftcache,ind3,ic3,lotin,mgb)&
5296 !$OMP&SHARED(ngbin,now3,n3,trig3,wk1d_a_p,wk1d_b_p)&
5297 !$OMP&PRIVATE(igb,igbmax)
5298        do igb=1,ngbin,lotin
5299          igbmax=min(igb+lotin-1,ngbin)
5300 !        Go from wk1d_a_p to wk1d_b_p, using 1D FFTs on the z direction
5301 !        However, due to special packing of data, use routine ffty
5302          call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_a_p,wk1d_b_p, &
5303 &         trig3,aft3,now3,bef3,one,ind3,ic3)
5304        end do
5305 !$OMP END PARALLEL DO
5306 
5307      end if ! lluse_ndo
5308 
5309 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5310 !$OMP&SHARED(n3,ngbin,wk1d_a)
5311      do i3=1,n3
5312        do igb=1,ngbin
5313          wk1d_a(1,igb,i3,1)=zero
5314          wk1d_a(2,igb,i3,1)=zero
5315        end do
5316      end do
5317 !$OMP END PARALLEL DO
5318 
5319 !    Insert fofgin into the work array
5320 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5321 !$OMP&SHARED(fofgin,indpw_kin,npwin,wk1d_a)
5322      do ig=1,npwin
5323        igb=indpw_kin(4,ig) ; i3=indpw_kin(3,ig)
5324        wk1d_a(1,igb,i3,1)=fofgin(1,ig)
5325        wk1d_a(2,igb,i3,1)=fofgin(2,ig)
5326      end do
5327 !$OMP END PARALLEL DO
5328 
5329 !    Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
5330 !    However, due to special packing of data, use routine ffty
5331 !$OMP PARALLEL DO SHARED(aft3,bef3,fftcache,ind3,ic3,lotin,mgb)&
5332 !$OMP&SHARED(ngbin,now3,n3,trig3,wk1d_a,wk1d_b)&
5333 !$OMP&PRIVATE(igb,igbmax)
5334      do igb=1,ngbin,lotin
5335        igbmax=min(igb+lotin-1,ngbin)
5336 !      Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
5337 !      However, due to special packing of data, use routine ffty
5338        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_a,wk1d_b, &
5339 &       trig3,aft3,now3,bef3,one,ind3,ic3)
5340      end do
5341 !$OMP END PARALLEL DO
5342 
5343    end if !  if(option/=3)
5344 
5345 !  Do-loop on the planes stacked in the z direction
5346 !$OMP PARALLEL DEFAULT(PRIVATE) &
5347 !$OMP&SHARED(aft1,aft2,aft4,aft5,bef1,bef2,bef4,bef5,cplex,denpot) &
5348 !$OMP&SHARED(fftcache,fofr,gboundin,gboundout)&
5349 !$OMP&SHARED(ic1,ic2,ic4,ic5,ind1,ind2,ind4) &
5350 !$OMP&SHARED(ind5,indpw_kin,indpw_kout,lluse_ndo,mgb,n1,n2,n3,n4,n5,ngbin) &
5351 !$OMP&SHARED(ngbout,now1,now2,now4,now5,option,trig1,trig2,trig4,trig5) &
5352 !$OMP&SHARED(weight_r,weight_i,weight_2,wk1d_a,wk1d_b,wk1d_b_p)
5353 
5354 !  Allocate two 2-dimensional work arrays
5355    ABI_ALLOCATE(wk2d_a,(2,n4,n5,1))
5356    ABI_ALLOCATE(wk2d_b,(2,n4,n5,1))
5357    ABI_ALLOCATE(wk2d_a_p,(2,n4,n5,1))
5358    ABI_ALLOCATE(wk2d_b_p,(2,n4,n5,1))
5359 !$OMP DO
5360    do i3=1,n3
5361 
5362      if(option/=3)then
5363        if(lluse_ndo)  then
5364 !        Zero the values on the current plane
5365 !        wk2d_a_p(1:2,1:n1,1:n2,1)=zero
5366          do i2=1,n2
5367            do i1=1,n1
5368              wk2d_a_p(1,i1,i2,1)=zero
5369              wk2d_a_p(2,i1,i2,1)=zero
5370            end do
5371          end do
5372 !        Copy the data in the current plane
5373          do igb=1,ngbin
5374            i1=indpw_kin(1,igb) ; i2=indpw_kin(2,igb)
5375            wk2d_a_p(1,i1,i2,1)=wk1d_b_p(1,igb,i3,1)
5376            wk2d_a_p(2,i1,i2,1)=wk1d_b_p(2,igb,i3,1)
5377          end do
5378 !        Perform x transform, taking into account arrays of zeros
5379          g2min=gboundin(3,1) ; g2max=gboundin(4,1)
5380          if ( g2min+n2 >= g2max+2 ) then
5381            do i2=g2max+2,g2min+n2
5382              do i1=1,n1
5383                wk2d_b_p(1,i1,i2,1)=zero
5384                wk2d_b_p(2,i1,i2,1)=zero
5385              end do
5386            end do
5387          end if
5388          gbound_dum(1)=1 ; gbound_dum(2)=1
5389          gbound_dum(3)=g2min ; gbound_dum(4)=g2max
5390          call sg_fftpx(fftcache,mfac,mg,0,n4,n5,1,n2,1,wk2d_a_p,wk2d_b_p,&
5391 &         trig1,aft1,now1,bef1,one,ind1,ic1,gbound_dum)
5392 !        Perform y transform
5393          n1i=1
5394          call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1,1,1,wk2d_b_p,wk2d_a_p, &
5395 &         trig2,aft2,now2,bef2,one,ind2,ic2)
5396 !        The wave function is now in real space, for the current plane
5397        end if  ! lluse_ndo
5398 
5399 !      Zero the values on the current plane
5400 !      wk2d_a(1:2,1:n1,1:n2,1)=zero
5401        do i2=1,n2
5402          do i1=1,n1
5403            wk2d_a(1,i1,i2,1)=zero
5404            wk2d_a(2,i1,i2,1)=zero
5405          end do
5406        end do
5407 !      Copy the data in the current plane
5408        do igb=1,ngbin
5409          i1=indpw_kin(1,igb) ; i2=indpw_kin(2,igb)
5410          wk2d_a(1,i1,i2,1)=wk1d_b(1,igb,i3,1)
5411          wk2d_a(2,i1,i2,1)=wk1d_b(2,igb,i3,1)
5412        end do
5413 !      Perform x transform, taking into account arrays of zeros
5414        g2min=gboundin(3,1) ; g2max=gboundin(4,1)
5415        if ( g2min+n2 >= g2max+2 ) then
5416          do i2=g2max+2,g2min+n2
5417            do i1=1,n1
5418              wk2d_b(1,i1,i2,1)=zero
5419              wk2d_b(2,i1,i2,1)=zero
5420            end do
5421          end do
5422        end if
5423        gbound_dum(1)=1 ; gbound_dum(2)=1
5424        gbound_dum(3)=g2min ; gbound_dum(4)=g2max
5425        call sg_fftpx(fftcache,mfac,mg,0,n4,n5,1,n2,1,wk2d_a,wk2d_b,&
5426 &       trig1,aft1,now1,bef1,one,ind1,ic1,gbound_dum)
5427 !      Perform y transform
5428        n1i=1
5429        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1,1,1,wk2d_b,wk2d_a, &
5430 &       trig2,aft2,now2,bef2,one,ind2,ic2)
5431 !      The wave function is now in real space, for the current plane
5432      end if
5433 
5434      if(option==0)then
5435 !      Copy the transformed function at the right place
5436        do i2=1,n2
5437          do i1=1,n1
5438            fofr(1,i1,i2,i3)=wk2d_a(1,i1,i2,1)
5439            fofr(2,i1,i2,i3)=wk2d_a(2,i1,i2,1)
5440          end do
5441        end do
5442      end if
5443 
5444      if(option==1)then
5445 !      Accumulate density
5446        do i2=1,n2
5447          do i1=1,n1
5448            if(lluse_ndo)  then
5449              denpot(i1,i2,i3)=denpot(i1,i2,i3)+&
5450 &             weight_r*(wk2d_a(1,i1,i2,1)*wk2d_a_p(1,i1,i2,1)&
5451 &             +wk2d_a(2,i1,i2,1)*wk2d_a_p(2,i1,i2,1))
5452              if(present(weight_2)) then
5453                denpot(i1,i2,i3)=denpot(i1,i2,i3)+&
5454 &               weight_2*(wk2d_a_p(2,i1,i2,1)*wk2d_a(1,i1,i2,1)&
5455 &               -wk2d_a_p(1,i1,i2,1)*wk2d_a(2,i1,i2,1))
5456              end if
5457            else
5458              denpot(i1,i2,i3)=denpot(i1,i2,i3)+&
5459 &             weight_r*wk2d_a(1,i1,i2,1)**2+ weight_i*wk2d_a(2,i1,i2,1)**2
5460            end if
5461          end do
5462        end do
5463      end if
5464 
5465      if(option==2)then
5466 !      Apply local potential
5467        if(cplex==1)then
5468          do i2=1,n2
5469            do i1=1,n1
5470              wk2d_a(1,i1,i2,1)=denpot(i1,i2,i3)*wk2d_a(1,i1,i2,1)
5471              wk2d_a(2,i1,i2,1)=denpot(i1,i2,i3)*wk2d_a(2,i1,i2,1)
5472            end do
5473          end do
5474        else
5475          do i2=1,n2
5476            do i1=1,n1
5477              wkre=wk2d_a(1,i1,i2,1)
5478              wkim=wk2d_a(2,i1,i2,1)
5479              wk2d_a(1,i1,i2,1)=denpot(2*i1-1,i2,i3)*wkre &
5480 &             -denpot(2*i1  ,i2,i3)*wkim
5481              wk2d_a(2,i1,i2,1)=denpot(2*i1-1,i2,i3)*wkim &
5482 &             +denpot(2*i1  ,i2,i3)*wkre
5483            end do
5484          end do
5485        end if
5486      end if
5487 
5488      if(option==3)then
5489 !      Copy the function to be tranformed at the right place
5490        do i2=1,n2
5491          do i1=1,n1
5492            wk2d_a(1,i1,i2,1)=fofr(1,i1,i2,i3)
5493            wk2d_a(2,i1,i2,1)=fofr(2,i1,i2,i3)
5494          end do
5495        end do
5496      end if
5497 
5498      if(option==2 .or. option==3)then
5499 !      Perform y transform
5500        n1i=1
5501        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1,1,1,wk2d_a,wk2d_b, &
5502 &       trig5,aft5,now5,bef5,-one,ind5,ic5)
5503 !      Perform x transform, taking into account arrays of zeros
5504        gbound_dum(1)=1 ; gbound_dum(2)=1
5505        gbound_dum(3)=gboundout(3,1) ; gbound_dum(4)=gboundout(4,1)
5506        call sg_fftpx(fftcache,mfac,mg,0,n4,n5,1,n2,1,wk2d_b,wk2d_a,&
5507 &       trig4,aft4,now4,bef4,-one,ind4,ic4,gbound_dum)
5508 !      Copy the data from the current plane to wk1d_b
5509        do igb=1,ngbout
5510          i1=indpw_kout(1,igb) ; i2=indpw_kout(2,igb)
5511          wk1d_b(1,igb,i3,1)=wk2d_a(1,i1,i2,1)
5512          wk1d_b(2,igb,i3,1)=wk2d_a(2,i1,i2,1)
5513        end do
5514      end if
5515 
5516 !    End loop on planes
5517    end do
5518 !$OMP END DO
5519    ABI_DEALLOCATE(wk2d_a)
5520    ABI_DEALLOCATE(wk2d_b)
5521    ABI_DEALLOCATE(wk2d_a_p)
5522    ABI_DEALLOCATE(wk2d_b_p)
5523 !$OMP END PARALLEL
5524 
5525    if(option==2 .or. option==3)then
5526 
5527 !    Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
5528 !    However, due to special packing of data, use routine ffty
5529 !$OMP PARALLEL DO SHARED(aft6,bef6,fftcache,ind6,ic6,lotout,mgb)&
5530 !$OMP&SHARED(ngbout,now6,n3,trig6,wk1d_a,wk1d_b)&
5531 !$OMP&PRIVATE(igb,igbmax)
5532      do igb=1,ngbout,lotout
5533        igbmax=min(igb+lotout-1,ngbout)
5534 !      Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
5535 !      However, due to special packing of data, use routine ffty
5536        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_b,wk1d_a, &
5537 &       trig6,aft6,now6,bef6,-one,ind6,ic6)
5538 
5539      end do
5540 !$OMP END PARALLEL DO
5541 
5542 !    Transfer the data in the output array, after normalization
5543      norm=1.d0/dble(nfftot)
5544 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5545 !$OMP&SHARED(fofgout,indpw_kout,norm,npwout,wk1d_a)
5546      do ig=1,npwout
5547        igb=indpw_kout(4,ig) ; i3=indpw_kout(3,ig)
5548        fofgout(1,ig)=wk1d_a(1,igb,i3,1)*norm
5549        fofgout(2,ig)=wk1d_a(2,igb,i3,1)*norm
5550      end do
5551 !$OMP END PARALLEL DO
5552    end if
5553 
5554    ABI_DEALLOCATE(wk1d_a)
5555    ABI_DEALLOCATE(wk1d_b)
5556    ABI_DEALLOCATE(wk1d_a_p)
5557    ABI_DEALLOCATE(wk1d_b_p)
5558 
5559 !  End general k-point part
5560  end if
5561 
5562 !------------------------------------------------------------------
5563 !Here, use of time-reversal symmetry
5564 
5565  if(istwf_k>=2)then
5566 
5567    n1half1=n1/2+1 ; n1halfm=(n1+1)/2
5568    n2half1=n2/2+1
5569 !  n4half1 or n5half1 are the odd integers >= n1half1 or n2half1
5570    n4half1=(n1half1/2)*2+1
5571    n5half1=(n2half1/2)*2+1
5572 !  Note that the z transform will appear as a y transform
5573    ABI_ALLOCATE(wk1d_a,(2,mgb,n3,1))
5574    ABI_ALLOCATE(wk1d_b,(2,mgb,n3,1))
5575 
5576    if(istwf_k/=2)then
5577      ABI_ALLOCATE(pha1,(2,n1))
5578      ABI_ALLOCATE(pha2,(2,n2))
5579      ABI_ALLOCATE(pha3,(3,n3))
5580      do i1=1,n1
5581        pha1(1,i1)=cos(dble(i1-1)*pi/dble(n1))
5582        pha1(2,i1)=sin(dble(i1-1)*pi/dble(n1))
5583      end do
5584      do i2=1,n2
5585        pha2(1,i2)=cos(dble(i2-1)*pi/dble(n2))
5586        pha2(2,i2)=sin(dble(i2-1)*pi/dble(n2))
5587      end do
5588      do i3=1,n3
5589        pha3(1,i3)=cos(dble(i3-1)*pi/dble(n3))
5590        pha3(2,i3)=sin(dble(i3-1)*pi/dble(n3))
5591      end do
5592    end if
5593 
5594    if(option/=3)then
5595 
5596 !    Zero the components of wk1d_a
5597 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5598 !$OMP&SHARED(n3,ngbin,wk1d_a)
5599      do i3=1,n3
5600        do igb=1,ngbin
5601          wk1d_a(1,igb,i3,1)=zero
5602          wk1d_a(2,igb,i3,1)=zero
5603        end do
5604      end do
5605 !$OMP END PARALLEL DO
5606 
5607 !    Insert fofgin into the work array
5608 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5609 !$OMP&SHARED(fofgin,indpw_kin,npwin,wk1d_a)
5610      do ig=1,npwin
5611        igb=indpw_kin(4,ig) ; i3=indpw_kin(3,ig)
5612        wk1d_a(1,igb,i3,1)=fofgin(1,ig)
5613        wk1d_a(2,igb,i3,1)=fofgin(2,ig)
5614      end do
5615 !$OMP END PARALLEL DO
5616 
5617 !    Must complete the i2=1 plane when $k_y \equiv 0$
5618 
5619 !    Take care of i1=1 when $k_x \equiv 0$
5620      if(istwf_k==2)then
5621 !      Take care of i1=1
5622        do i3=n3/2+1,n3
5623          i3inv=n3+2-i3
5624          wk1d_a(1,1,i3,1)= wk1d_a(1,1,i3inv,1)
5625          wk1d_a(2,1,i3,1)=-wk1d_a(2,1,i3inv,1)
5626        end do
5627      else if(istwf_k==4)then
5628 !      Take care of i1=1
5629        do i3=n3/2+1,n3
5630          i3inv=n3+1-i3
5631          wk1d_a(1,1,i3,1)= wk1d_a(1,1,i3inv,1)
5632          wk1d_a(2,1,i3,1)=-wk1d_a(2,1,i3inv,1)
5633        end do
5634      end if
5635 
5636 !    Now, take care of other i1 values, except i3==1 when $k_z \equiv 0$
5637      i1max=gboundin(6,1)+1
5638      if(istwf_k==2)then
5639 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5640 !$OMP&SHARED(i1max,n3,wk1d_a)
5641        do igb=2,2*i1max-1
5642          igb_inv=2*i1max+1-igb
5643          do i3=n3/2+1,n3
5644            i3inv=n3+2-i3
5645            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
5646            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
5647          end do
5648        end do
5649 !$OMP END PARALLEL DO
5650 
5651      else if(istwf_k==3)then
5652 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5653 !$OMP&SHARED(i1max,n3,wk1d_a)
5654        do igb=1,2*i1max
5655          igb_inv=2*i1max+1-igb
5656          do i3=n3/2+1,n3
5657            i3inv=n3+2-i3
5658            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
5659            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
5660          end do
5661        end do
5662 !$OMP END PARALLEL DO
5663 
5664      else if(istwf_k==4)then
5665 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5666 !$OMP&SHARED(i1max,n3,wk1d_a)
5667        do igb=2,2*i1max-1
5668          igb_inv=2*i1max+1-igb
5669          do i3=n3/2+1,n3
5670            i3inv=n3+1-i3
5671            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
5672            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
5673          end do
5674        end do
5675 !$OMP END PARALLEL DO
5676 
5677      else if(istwf_k==5)then
5678 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5679 !$OMP&SHARED(i1max,n3,wk1d_a)
5680        do igb=1,2*i1max
5681          igb_inv=2*i1max+1-igb
5682          do i3=n3/2+1,n3
5683            i3inv=n3+1-i3
5684            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
5685            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
5686          end do
5687        end do
5688 !$OMP END PARALLEL DO
5689 
5690      end if
5691 
5692 !    Now, i3==1
5693      if(istwf_k==2)then
5694        do igb=2,i1max
5695          igb_inv=2*i1max+1-igb
5696          wk1d_a(1,igb_inv,1,1)= wk1d_a(1,igb,1,1)
5697          wk1d_a(2,igb_inv,1,1)=-wk1d_a(2,igb,1,1)
5698        end do
5699      else if(istwf_k==3)then
5700        do igb=1,i1max
5701          igb_inv=2*i1max+1-igb
5702          wk1d_a(1,igb_inv,1,1)= wk1d_a(1,igb,1,1)
5703          wk1d_a(2,igb_inv,1,1)=-wk1d_a(2,igb,1,1)
5704        end do
5705      end if
5706 
5707 !    Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
5708 !    However, due to special packing of data, use routine ffty
5709 !$OMP PARALLEL DO SHARED(aft3,bef3,fftcache,ind3,ic3,lotin,mgb)&
5710 !$OMP&SHARED(ngbin,now3,n3,trig3,wk1d_a,wk1d_b)&
5711 !$OMP&PRIVATE(igb,igbmax)
5712      do igb=1,ngbin,lotin
5713        igbmax=min(igb+lotin-1,ngbin)
5714 !      Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
5715 !      However, due to special packing of data, use routine ffty
5716        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_a,wk1d_b, &
5717 &       trig3,aft3,now3,bef3,one,ind3,ic3)
5718      end do
5719 !$OMP END PARALLEL DO
5720 
5721 !    Change the phase if $k_z \neq 0$
5722      if(istwf_k==4 .or. istwf_k==5 .or. istwf_k==8 .or. istwf_k==9 )then
5723 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5724 !$OMP&SHARED(ngbin,n3,pha3,wk1d_b)
5725        do i3=1,n3
5726          phar=pha3(1,i3)
5727          phai=pha3(2,i3)
5728          do igb=1,ngbin
5729            ar=wk1d_b(1,igb,i3,1)
5730            ai=wk1d_b(2,igb,i3,1)
5731            wk1d_b(1,igb,i3,1)=phar*ar-phai*ai
5732            wk1d_b(2,igb,i3,1)=phai*ar+phar*ai
5733          end do
5734        end do
5735 !$OMP END PARALLEL DO
5736      end if
5737 
5738    end if !  if(option/=3)
5739 
5740 !  Do-loop on the planes stacked in the z direction
5741 
5742 !$OMP PARALLEL DEFAULT(PRIVATE) &
5743 !$OMP&SHARED(aft1,aft2,aft4,aft5,bef1,bef2,bef4,bef5,denpot) &
5744 !$OMP&SHARED(fftcache,fofr,gboundin,ic1,ic2,ic4,ic5,ind1,ind2,ind4,ind5) &
5745 !$OMP&SHARED(indpw_kin,indpw_kout,istwf_k,mgb,n1,n1half1) &
5746 !$OMP&SHARED(n1halfm,n2,n2half1,n3,n4,n5,ngbin,ngbout) &
5747 !$OMP&SHARED(now1,now2,now4,now5,option,pha1,pha2,trig1) &
5748 !$OMP&SHARED(trig2,trig4,trig5,weight_r,weight_i,wk1d_a,wk1d_b)
5749 
5750 !  Allocate two 2-dimensional work arrays
5751    ABI_ALLOCATE(wk2d_a,(2,n4,n5,1))
5752    ABI_ALLOCATE(wk2d_b,(2,n4,n5,1))
5753    ABI_ALLOCATE(wk2d_c,(2,2*n1halfm,n5,1))
5754    ABI_ALLOCATE(wk2d_d,(2,2*n1halfm,n5,1))
5755 !$OMP DO
5756    do i3=1,n3
5757 
5758      g2max=gboundin(4,1)
5759 
5760      if(option/=3)then
5761 !      Zero the values on the current plane : need only from i2=1 to g2max+1
5762        do i2=1,g2max+1
5763          do i1=1,n1
5764            wk2d_a(1,i1,i2,1)=zero
5765            wk2d_a(2,i1,i2,1)=zero
5766          end do
5767        end do
5768 
5769 !      Copy the data in the current plane
5770        do igb=1,ngbin
5771          i1=indpw_kin(1,igb) ; i2=indpw_kin(2,igb)
5772          wk2d_a(1,i1,i2,1)=wk1d_b(1,igb,i3,1)
5773          wk2d_a(2,i1,i2,1)=wk1d_b(2,igb,i3,1)
5774        end do
5775 
5776 !      Perform x transform, taking into account arrays of zeros
5777        call sg_fftx(fftcache,mfac,mg,n4,n5,1,g2max+1,1,wk2d_a,wk2d_b,&
5778 &       trig1,aft1,now1,bef1,one,ind1,ic1)
5779 
5780 !      Change the phase if $k_x \neq 0$
5781        if(istwf_k==3 .or. istwf_k==5 .or. istwf_k==7 .or. istwf_k==9)then
5782          do i1=1,n1
5783            phar=pha1(1,i1)
5784            phai=pha1(2,i1)
5785            do i2=1,g2max+1
5786              ar=wk2d_b(1,i1,i2,1)
5787              ai=wk2d_b(2,i1,i2,1)
5788              wk2d_b(1,i1,i2,1)=phar*ar-phai*ai
5789              wk2d_b(2,i1,i2,1)=phai*ar+phar*ai
5790            end do
5791          end do
5792        end if
5793 
5794 !      Compute symmetric and antisymmetric combinations
5795        if(istwf_k>=2 .and. istwf_k<=5)then
5796          do i1=1,n1half1-1
5797            wk2d_a(1,i1,1,1)=wk2d_b(1,2*i1-1,1,1)
5798            wk2d_a(2,i1,1,1)=wk2d_b(1,2*i1  ,1,1)
5799          end do
5800 !        If n1 odd, must add last data
5801          if((2*n1half1-2)/=n1)then
5802            wk2d_a(1,n1half1,1,1)=wk2d_b(1,n1,1,1)
5803            wk2d_a(2,n1half1,1,1)=zero
5804          end if
5805          ii2=2
5806        else
5807          ii2=1
5808        end if
5809        if( g2max+1 >= ii2)then
5810          do i2=ii2,g2max+1
5811            do i1=1,n1half1-1
5812              wk2d_a(1,i1,i2,1)=        wk2d_b(1,2*i1-1,i2,1)-wk2d_b(2,2*i1,i2,1)
5813              wk2d_a(2,i1,i2,1)=        wk2d_b(2,2*i1-1,i2,1)+wk2d_b(1,2*i1,i2,1)
5814              wk2d_a(1,i1,n2+ii2-i2,1)= wk2d_b(1,2*i1-1,i2,1)+wk2d_b(2,2*i1,i2,1)
5815              wk2d_a(2,i1,n2+ii2-i2,1)=-wk2d_b(2,2*i1-1,i2,1)+wk2d_b(1,2*i1,i2,1)
5816            end do
5817            if((2*n1half1-2)/=n1)then
5818              wk2d_a(1,n1half1,i2,1)=        wk2d_b(1,n1,i2,1)
5819              wk2d_a(2,n1half1,i2,1)=        wk2d_b(2,n1,i2,1)
5820              wk2d_a(1,n1half1,n2+ii2-i2,1)= wk2d_b(1,n1,i2,1)
5821              wk2d_a(2,n1half1,n2+ii2-i2,1)=-wk2d_b(2,n1,i2,1)
5822            end if
5823          end do
5824        end if
5825        if ( n2half1 >= g2max+2 ) then
5826          do i2=g2max+2,n2half1
5827            do i1=1,n1half1-1
5828              wk2d_a(1,i1,i2,1)=zero
5829              wk2d_a(2,i1,i2,1)=zero
5830              wk2d_a(1,i1,n2+ii2-i2,1)=zero
5831              wk2d_a(2,i1,n2+ii2-i2,1)=zero
5832            end do
5833            if((2*n1half1-2)/=n1)then
5834              wk2d_a(1,n1half1,i2,1)=zero
5835              wk2d_a(2,n1half1,i2,1)=zero
5836              wk2d_a(1,n1half1,n2+ii2-i2,1)=zero
5837              wk2d_a(2,n1half1,n2+ii2-i2,1)=zero
5838            end if
5839          end do
5840        end if
5841 
5842        n1i=1
5843        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1halfm,1,1,wk2d_a,wk2d_b,&
5844 &       trig2,aft2,now2,bef2,one,ind2,ic2)
5845 
5846 !      Change the phase if $k_y \neq 0$
5847        if(istwf_k>=6 .and. istwf_k<=9)then
5848          do i2=1,n2
5849            phar=pha2(1,i2)
5850            phai=pha2(2,i2)
5851            do i1=1,n1halfm
5852              ar=wk2d_b(1,i1,i2,1)
5853              ai=wk2d_b(2,i1,i2,1)
5854              wk2d_b(1,i1,i2,1)= phar*ar-phai*ai
5855              wk2d_b(2,i1,i2,1)= phai*ar+phar*ai
5856            end do
5857          end do
5858        end if
5859 
5860      end if ! option/=3
5861 
5862 !    The wave function is now in real space, for the current plane,
5863 !    represented by REAL numbers, although packed in the complex array wk2d_b
5864 
5865      g2max=gboundin(4,1)
5866 
5867      if(option==0)then
5868 !      This option is only permitted for istwf_k==2 (Gamma point)
5869 !      Copy the transformed function at the right place
5870        do i2=1,n2
5871          do i1=1,n1half1-1
5872            fofr(1,2*i1-1,i2,i3)=wk2d_b(1,i1,i2,1)
5873            fofr(1,2*i1  ,i2,i3)=wk2d_b(2,i1,i2,1)
5874            fofr(2,2*i1-1,i2,i3)=zero
5875            fofr(2,2*i1  ,i2,i3)=zero
5876          end do
5877 !        If n1 odd, must add last data
5878          if((2*n1half1-2)/=n1)then
5879            fofr(1,n1,i2,i3)=wk2d_b(1,n1half1,i2,1)
5880            fofr(2,n1,i2,i3)=zero
5881          end if
5882        end do
5883      end if
5884 
5885      if(option==1)then
5886 !      Accumulate density
5887        do i2=1,n2
5888          do i1=1,n1half1-1
5889            denpot(2*i1-1,i2,i3)=denpot(2*i1-1,i2,i3)+weight_r*wk2d_b(1,i1,i2,1)**2
5890            denpot(2*i1  ,i2,i3)=denpot(2*i1  ,i2,i3)+weight_i*wk2d_b(2,i1,i2,1)**2
5891          end do
5892 !        If n1 odd, must add last data
5893          if((2*n1half1-2)/=n1)then
5894            denpot(n1,i2,i3)=denpot(n1,i2,i3)+weight_r*wk2d_b(1,n1half1,i2,1)**2
5895 !          not use in DMFT because istwfk required to be one.
5896          end if
5897        end do
5898      end if
5899 
5900      if(option==2)then
5901 !      Apply local potential
5902        do i2=1,n2
5903          do i1=1,n1half1-1
5904            wk2d_a(1,i1,i2,1)=denpot(2*i1-1,i2,i3)*wk2d_b(1,i1,i2,1)
5905            wk2d_a(2,i1,i2,1)=denpot(2*i1  ,i2,i3)*wk2d_b(2,i1,i2,1)
5906          end do
5907 !        If n1 odd, must add last data
5908          if((2*n1half1-2)/=n1)then
5909            wk2d_a(1,n1half1,i2,1)=denpot(n1,i2,i3)*wk2d_b(1,n1half1,i2,1)
5910            wk2d_a(2,n1half1,i2,1)=zero
5911          end if
5912        end do
5913      end if
5914 
5915      if(option==3)then
5916 !      This option is only permitted for istwf_k==2 (Gamma point)
5917 !      Copy the transformed function at the right place
5918        do i2=1,n2
5919          do i1=1,n1half1-1
5920            wk2d_b(1,i1,i2,1)=fofr(1,2*i1-1,i2,i3)
5921            wk2d_b(2,i1,i2,1)=fofr(1,2*i1  ,i2,i3)
5922          end do
5923 !        If n1 odd, must add last data
5924          if((2*n1half1-2)/=n1)then
5925            wk2d_b(1,n1half1,i2,1)=fofr(1,n1,i2,i3)
5926          end if
5927        end do
5928      end if
5929 
5930      if(option==2 .or. option==3)then
5931 !      Change the phase if $k_y \neq 0$
5932        if(istwf_k>=6 .and. istwf_k<=9)then
5933          do i2=1,n2
5934            phar=pha2(1,i2)
5935            phai=pha2(2,i2)
5936            do i1=1,n1halfm
5937              ar=wk2d_a(1,i1,i2,1)
5938              ai=wk2d_a(2,i1,i2,1)
5939              wk2d_a(1,i1,i2,1)= phar*ar+phai*ai
5940              wk2d_a(2,i1,i2,1)=-phai*ar+phar*ai
5941            end do
5942          end do
5943        end if
5944 
5945 !      Perform y transform
5946        n1i=1
5947        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1halfm,1,1,wk2d_a,wk2d_b, &
5948 &       trig5,aft5,now5,bef5,-one,ind5,ic5)
5949 
5950 !      Decompose symmetric and antisymmetric parts
5951        if(istwf_k>=2 .and. istwf_k<=5)then
5952          do i1=1,n1halfm
5953            wk2d_c(1,2*i1-1,1,1)=wk2d_b(1,i1,1,1)
5954            wk2d_c(2,2*i1-1,1,1)=zero
5955            wk2d_c(1,2*i1,1,1)=wk2d_b(2,i1,1,1)
5956            wk2d_c(2,2*i1,1,1)=zero
5957          end do
5958          ii2=2
5959        else
5960          ii2=1
5961        end if
5962        do i2=ii2,g2max+1
5963          do i1=1,n1halfm
5964            wk2d_c(1,2*i1-1,i2,1)=(wk2d_b(1,i1,i2,1)+wk2d_b(1,i1,n2+ii2-i2,1))*0.5d0
5965            wk2d_c(2,2*i1-1,i2,1)=(wk2d_b(2,i1,i2,1)-wk2d_b(2,i1,n2+ii2-i2,1))*0.5d0
5966            wk2d_c(1,2*i1,i2,1)= ( wk2d_b(2,i1,i2,1)+wk2d_b(2,i1,n2+ii2-i2,1))*0.5d0
5967            wk2d_c(2,2*i1,i2,1)= (-wk2d_b(1,i1,i2,1)+wk2d_b(1,i1,n2+ii2-i2,1))*0.5d0
5968          end do
5969        end do
5970 
5971 !      Change the phase if $k_x \neq 0$
5972        if(istwf_k==3 .or. istwf_k==5 .or. istwf_k==7 .or. istwf_k==9 )then
5973          do i1=1,n1
5974            phar=pha1(1,i1)
5975            phai=pha1(2,i1)
5976            do i2=1,g2max+1
5977              ar=wk2d_c(1,i1,i2,1)
5978              ai=wk2d_c(2,i1,i2,1)
5979              wk2d_c(1,i1,i2,1)= phar*ar+phai*ai
5980              wk2d_c(2,i1,i2,1)=-phai*ar+phar*ai
5981            end do
5982          end do
5983        end if
5984 
5985 !      Perform x transform : for y=1 to g2max+1, to benefit from zeros
5986        call sg_fftx(fftcache,mfac,mg,2*n1halfm,n5,1,g2max+1,1,wk2d_c,wk2d_d,&
5987 &       trig4,aft4,now4,bef4,-one,ind4,ic4)
5988 
5989 !      Copy the data from the current plane to wk1d_b
5990        do igb=1,ngbout
5991          i1=indpw_kout(1,igb) ; i2=indpw_kout(2,igb)
5992          wk1d_b(1,igb,i3,1)=wk2d_d(1,i1,i2,1)
5993          wk1d_b(2,igb,i3,1)=wk2d_d(2,i1,i2,1)
5994        end do
5995 
5996      end if ! option==2 or 3
5997 
5998 !    End loop on planes
5999    end do
6000 
6001 !$OMP END DO
6002    ABI_DEALLOCATE(wk2d_a)
6003    ABI_DEALLOCATE(wk2d_b)
6004    ABI_DEALLOCATE(wk2d_c)
6005    ABI_DEALLOCATE(wk2d_d)
6006 !$OMP END PARALLEL
6007 
6008    if(option==2 .or. option==3)then
6009 
6010 !    Change the phase if $k_z \neq 0$
6011      if(istwf_k==4 .or. istwf_k==5 .or. istwf_k==8 .or. istwf_k==9 )then
6012 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
6013 !$OMP&SHARED(ngbout,n3,pha3,wk1d_b)
6014        do i3=1,n3
6015          phar=pha3(1,i3)
6016          phai=pha3(2,i3)
6017          do igb=1,ngbout
6018            ar=wk1d_b(1,igb,i3,1)
6019            ai=wk1d_b(2,igb,i3,1)
6020            wk1d_b(1,igb,i3,1)= phar*ar+phai*ai
6021            wk1d_b(2,igb,i3,1)=-phai*ar+phar*ai
6022          end do
6023        end do
6024 !$OMP END PARALLEL DO
6025      end if
6026 
6027 !    Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
6028 !    However, due to special packing of data, use routine ffty
6029 !$OMP PARALLEL DO SHARED(aft6,bef6,fftcache,ind6,ic6,lotout,mgb)&
6030 !$OMP&SHARED(ngbout,now6,n3,trig6,wk1d_a,wk1d_b)&
6031 !$OMP&PRIVATE(igb,igbmax)
6032      do igb=1,ngbout,lotout
6033        igbmax=min(igb+lotout-1,ngbout)
6034 !      Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
6035 !      However, due to special packing of data, use routine ffty
6036        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_b,wk1d_a, &
6037 &       trig6,aft6,now6,bef6,-one,ind6,ic6)
6038 
6039      end do
6040 !$OMP END PARALLEL DO
6041 
6042 !    Transfer the data in the output array, after normalization
6043      norm=1.d0/dble(nfftot)
6044 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
6045 !$OMP&SHARED(fofgout,indpw_kout,norm,npwout,wk1d_a)
6046      do ig=1,npwout
6047        igb=indpw_kout(4,ig) ; i3=indpw_kout(3,ig)
6048        fofgout(1,ig)=wk1d_a(1,igb,i3,1)*norm
6049        fofgout(2,ig)=wk1d_a(2,igb,i3,1)*norm
6050      end do
6051 !$OMP END PARALLEL DO
6052 
6053    end if
6054 
6055    ABI_DEALLOCATE(wk1d_a)
6056    ABI_DEALLOCATE(wk1d_b)
6057 
6058    if(istwf_k/=2)then
6059      ABI_DEALLOCATE(pha1)
6060      ABI_DEALLOCATE(pha2)
6061      ABI_DEALLOCATE(pha3)
6062    end if
6063 
6064 !  End time-reversal symmetry
6065  end if
6066 
6067  if(option/=3) then
6068    ABI_DEALLOCATE(indpw_kin)
6069  end if
6070  if(option==2 .or. option==3) then
6071    ABI_DEALLOCATE(indpw_kout)
6072  end if
6073 
6074  !DBG_EXIT("COLL")
6075 
6076 end subroutine sg_fftrisc_2

m_sgfft/sg_fftx [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fftx

FUNCTION

 This subroutine is called by the 3-dimensional fft to conduct the
 "x" transforms for all y and z.

INPUTS

  fftcache=size of the cache (kB)
  mfac = maximum number of factors in 1D FFTs
  mg = maximum length of 1D FFTs
  nd1=first dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd2=second dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd3=third dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  n2,n3=actual length of y and z transforms
  z(2,nd1,nd2,nd3)=INPUT array; destroyed by transformation
  trig, aft, now, bef, ind=provided by previous call to ctrig
   Note that in this routine (and in ctrig) the values in array trig are
   actually cos and tan, not cos and sin.  Use of tan allows advantageous
   use of FMA on the ibm rs6000.
  ris=sign of exponential in transform (should be 1 or -1; real)
  ic=number of (radix) factors of x transform length (from ctrig)

OUTPUT

  zbr(2,nd1,nd2,nd3)=OUTPUT transformed array; no scaling applied

SIDE EFFECTS

NOTES

 This routine blocks the x transforms
 so that all transforms under consideration at one step fit within
 the cache memory, which is crucial for optimal performance.
 The blocking factor is set by parameter "fftcache" below, which should
 be adjusted to be somewhat smaller (say 3/4) than the actual cache size
 of the machine.

TODO

 Use latex for the equation above

PARENTS

      m_sgfft

CHILDREN

      sg_fft_cc

SOURCE

1669 subroutine sg_fftx(fftcache,mfac,mg,nd1,nd2,nd3,n2,n3,z,zbr,&
1670 & trig,aft,now,bef,ris,ind,ic)
1671 
1672 
1673 !This section has been created automatically by the script Abilint (TD).
1674 !Do not modify the following lines by hand.
1675 #undef ABI_FUNC
1676 #define ABI_FUNC 'sg_fftx'
1677 !End of the abilint section
1678 
1679  implicit none
1680 
1681 !Arguments ------------------------------------
1682 !Dimensions of aft, now, bef, ind, and trig should agree with
1683 !those in subroutine ctrig.
1684 !scalars
1685  integer,intent(in) :: fftcache,ic,mfac,mg,n2,n3,nd1,nd2,nd3
1686  real(dp),intent(in) :: ris
1687 !arrays
1688  integer,intent(in) :: aft(mfac),bef(mfac),ind(mg),now(mfac)
1689  real(dp),intent(in) :: trig(2,mg)
1690  real(dp),intent(inout) :: z(2,nd1,nd2,nd3),zbr(2,nd1,nd2,nd3)
1691 
1692 !Local variables-------------------------------
1693 !scalars
1694  integer :: i,i3,ia,ib,indx,j,jj,lot,ma,mb,ntb
1695  real(dp),parameter :: cos2=0.3090169943749474d0   !cos(2.d0*pi/5.d0)
1696  real(dp),parameter :: cos4=-0.8090169943749474d0  !cos(4.d0*pi/5.d0)
1697  real(dp),parameter :: sin42=0.6180339887498948d0  !sin(4.d0*pi/5.d0)/sin(2.d0*pi/5.d0)
1698  real(dp) :: bb,cr2,cr2s,cr3,cr3p,cr4,cr5,ct2,ct3,ct4,ct5
1699  real(dp) :: factor,r,r1,r2,r25,r3,r34,r4,r5,s,sin2,s1,s2,s25,s3,s34,s4,s5
1700 
1701 ! *************************************************************************
1702 
1703 !Do x transforms in blocks of size "lot" which is set by how
1704 !many x transform arrays (of size nd1 each) fit into the nominal
1705 !cache size "fftcache".
1706  factor=0.75d0
1707  lot=(fftcache*factor*1000d0)/(nd1*8*2)
1708 
1709 !XG : due to the dimension problems on the P6, I have slightly
1710 !modified this part of the code, with an external loop
1711 !on n3 ...
1712 !Modifications are indicated explicitely, or
1713 !are related to the increase of the number of dimensions of z and
1714 !zbr ...
1715 
1716  factor=0.75d0
1717  lot=(fftcache*factor*1000d0)/(nd1*8*2)
1718  if(lot.lt.1) lot=1 ! this may happen for very large cells
1719 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(aft,bef,ic,ind,lot,n2,n3,now,ris,trig,z,zbr)
1720  do i3=1,n3
1721    do jj=1,n2,lot
1722 !    end of modification
1723 
1724 !    For each jj, ma and mb give starting and ending addresses for fft
1725 !    ma starts where we left off after last block
1726      ma=jj
1727 !    mb runs to the end of the block or else to the end of the data
1728 !    modified XG 980107
1729 !    mb=min(jj+(lot-1),n23)
1730      mb=min(jj+(lot-1),n2)
1731 
1732 !    Run over all factors except the last (to ic-1), performing
1733 !    x transform
1734 
1735 !    Note: fortran should skip this loop if ic=1; beware "onetrip"
1736 !    compiler option which forces each loop at least once
1737 
1738 !    ------------------------------------------------------------------------
1739 
1740 !    Direct transformation (to be followed by bit reversal)
1741 
1742      do i=1,ic-1
1743        ntb=now(i)*bef(i)
1744 !      radix 4
1745 
1746 !      Treat radix 4
1747        if (now(i)==4) then
1748          ia=0
1749 
1750 !        First step of factor 4
1751          do ib=1,bef(i)
1752            do j=ma,mb
1753              r4=z(1,ia*ntb+3*bef(i)+ib,j,i3)
1754              s4=z(2,ia*ntb+3*bef(i)+ib,j,i3)
1755              r3=z(1,ia*ntb+2*bef(i)+ib,j,i3)
1756              s3=z(2,ia*ntb+2*bef(i)+ib,j,i3)
1757              r2=z(1,ia*ntb+bef(i)+ib,j,i3)
1758              s2=z(2,ia*ntb+bef(i)+ib,j,i3)
1759              r1=z(1,ia*ntb+ib,j,i3)
1760              s1=z(2,ia*ntb+ib,j,i3)
1761 
1762              r=r1 + r3
1763              s=r2 + r4
1764              z(1,ia*ntb+ib,j,i3) = r + s
1765              z(1,ia*ntb+2*bef(i)+ib,j,i3) = r - s
1766              r=r1 - r3
1767              s=s2 - s4
1768              z(1,ia*ntb+bef(i)+ib,j,i3) = r - s*ris
1769              z(1,ia*ntb+3*bef(i)+ib,j,i3) = r + s*ris
1770              r=s1 + s3
1771              s=s2 + s4
1772              z(2,ia*ntb+ib,j,i3) = r + s
1773              z(2,ia*ntb+2*bef(i)+ib,j,i3) = r - s
1774              r=s1 - s3
1775              s=r2 - r4
1776              z(2,ia*ntb+bef(i)+ib,j,i3) = r + s*ris
1777              z(2,ia*ntb+3*bef(i)+ib,j,i3) = r - s*ris
1778            end do
1779          end do
1780 
1781 !        Second step of factor 4
1782          do ia=1,aft(i)-1
1783            indx=ind(ia*4*bef(i)+1)-1
1784            indx=indx*bef(i)
1785            cr2=trig(1,indx)
1786            ct2=trig(2,indx)
1787            cr3=trig(1,2*indx)
1788            ct3=trig(2,2*indx)
1789            cr4=trig(1,3*indx)
1790            ct4=trig(2,3*indx)
1791            cr4=cr4/cr2
1792            cr2s=cr2*ris
1793            do ib=1,bef(i)
1794              do j=ma,mb
1795                r4=z(1,ia*ntb+3*bef(i)+ib,j,i3) - &
1796 &               z(2,ia*ntb+3*bef(i)+ib,j,i3)*ct4
1797                s4=z(1,ia*ntb+3*bef(i)+ib,j,i3)*ct4 + &
1798 &               z(2,ia*ntb+3*bef(i)+ib,j,i3)
1799                r3=z(1,ia*ntb+2*bef(i)+ib,j,i3) - &
1800 &               z(2,ia*ntb+2*bef(i)+ib,j,i3)*ct3
1801                s3=z(1,ia*ntb+2*bef(i)+ib,j,i3)*ct3 + &
1802 &               z(2,ia*ntb+2*bef(i)+ib,j,i3)
1803                r2=z(1,ia*ntb+bef(i)+ib,j,i3) - &
1804 &               z(2,ia*ntb+bef(i)+ib,j,i3)*ct2
1805                s2=z(1,ia*ntb+bef(i)+ib,j,i3)*ct2 + &
1806 &               z(2,ia*ntb+bef(i)+ib,j,i3)
1807                r1=z(1,ia*ntb+ib,j,i3)
1808                s1=z(2,ia*ntb+ib,j,i3)
1809 
1810                r=r1 + r3*cr3
1811                s=r2 + r4*cr4
1812                z(1,ia*ntb+ib,j,i3) = r + s*cr2
1813                z(1,ia*ntb+2*bef(i)+ib,j,i3) = r - s*cr2
1814                r=r1 - r3*cr3
1815                s=s2 - s4*cr4
1816                z(1,ia*ntb+bef(i)+ib,j,i3) = r - s*cr2s
1817                z(1,ia*ntb+3*bef(i)+ib,j,i3) = r + s*cr2s
1818                r=s1 + s3*cr3
1819                s=s2 + s4*cr4
1820                z(2,ia*ntb+ib,j,i3) = r + s*cr2
1821                z(2,ia*ntb+2*bef(i)+ib,j,i3) = r - s*cr2
1822                r=s1 - s3*cr3
1823                s=r2 - r4*cr4
1824                z(2,ia*ntb+bef(i)+ib,j,i3) = r + s*cr2s
1825                z(2,ia*ntb+3*bef(i)+ib,j,i3) = r - s*cr2s
1826              end do
1827            end do
1828          end do
1829 
1830 !        Treat radix 2
1831        else if (now(i)==2) then
1832          ia=0
1833 
1834 !        First step of factor 2
1835          do ib=1,bef(i)
1836            do j=ma,mb
1837              r1=z(1,ia*ntb+ib,j,i3)
1838              s1=z(2,ia*ntb+ib,j,i3)
1839              r2=z(1,ia*ntb+bef(i)+ib,j,i3)
1840              s2=z(2,ia*ntb+bef(i)+ib,j,i3)
1841              z(1,ia*ntb+ib,j,i3) =  r2 + r1
1842              z(2,ia*ntb+ib,j,i3) =  s2 + s1
1843              z(1,ia*ntb+bef(i)+ib,j,i3) = -r2 + r1
1844              z(2,ia*ntb+bef(i)+ib,j,i3) = -s2 + s1
1845            end do
1846          end do
1847 
1848 !        Second step of factor 2
1849          do ia=1,aft(i)-1
1850            indx=ind(ia*2*bef(i)+1)-1
1851            indx=indx*bef(i)
1852            cr2=trig(1,indx)
1853            ct2=trig(2,indx)
1854            do ib=1,bef(i)
1855              do j=ma,mb
1856                r1=z(1,ia*ntb+ib,j,i3)
1857                s1=z(2,ia*ntb+ib,j,i3)
1858                r2=z(1,ia*ntb+bef(i)+ib,j,i3) - &
1859 &               z(2,ia*ntb+bef(i)+ib,j,i3)*ct2
1860                s2=z(1,ia*ntb+bef(i)+ib,j,i3)*ct2 + &
1861 &               z(2,ia*ntb+bef(i)+ib,j,i3)
1862                z(1,ia*ntb+ib,j,i3) =  r2*cr2 + r1
1863                z(2,ia*ntb+ib,j,i3) =  s2*cr2 + s1
1864                z(1,ia*ntb+bef(i)+ib,j,i3) = -r2*cr2 + r1
1865                z(2,ia*ntb+bef(i)+ib,j,i3) = -s2*cr2 + s1
1866              end do
1867            end do
1868          end do
1869 
1870 !        Treat radix 3
1871        else if (now(i)==3) then
1872 !        .5d0*sqrt(3.d0)=0.8660254037844387d0
1873          ia=0
1874          bb=ris*0.8660254037844387d0
1875 
1876 !        First step of factor 3
1877          do ib=1,bef(i)
1878            do j=ma,mb
1879              r1=z(1,ia*ntb+ib,j,i3)
1880              s1=z(2,ia*ntb+ib,j,i3)
1881              r2=z(1,ia*ntb+bef(i)+ib,j,i3)
1882              s2=z(2,ia*ntb+bef(i)+ib,j,i3)
1883              r3=z(1,ia*ntb+2*bef(i)+ib,j,i3)
1884              s3=z(2,ia*ntb+2*bef(i)+ib,j,i3)
1885              r=r2 + r3
1886              s=s2 + s3
1887              z(1,ia*ntb+ib,j,i3) = r + r1
1888              z(2,ia*ntb+ib,j,i3) = s + s1
1889              r1=r1 - r*.5d0
1890              s1=s1 - s*.5d0
1891              r2=r2-r3
1892              s2=s2-s3
1893              z(1,ia*ntb+bef(i)+ib,j,i3) = r1 - s2*bb
1894              z(2,ia*ntb+bef(i)+ib,j,i3) = s1 + r2*bb
1895              z(1,ia*ntb+2*bef(i)+ib,j,i3) = r1 + s2*bb
1896              z(2,ia*ntb+2*bef(i)+ib,j,i3) = s1 - r2*bb
1897            end do
1898          end do
1899 
1900 !        Second step of factor 3
1901          do ia=1,aft(i)-1
1902            indx=ind(ia*3*bef(i)+1)-1
1903            indx=indx*bef(i)
1904            cr2=trig(1,indx)
1905            ct2=trig(2,indx)
1906            cr3=trig(1,2*indx)
1907            ct3=trig(2,2*indx)
1908            cr2=cr2/cr3
1909            cr3p=.5d0*cr3
1910            bb=ris*cr3*0.8660254037844387d0
1911            do ib=1,bef(i)
1912              do j=ma,mb
1913                r1=z(1,ia*ntb+ib,j,i3)
1914                s1=z(2,ia*ntb+ib,j,i3)
1915                r2=z(1,ia*ntb+bef(i)+ib,j,i3) - &
1916 &               z(2,ia*ntb+bef(i)+ib,j,i3)*ct2
1917                s2=z(1,ia*ntb+bef(i)+ib,j,i3)*ct2 + &
1918 &               z(2,ia*ntb+bef(i)+ib,j,i3)
1919                r3=z(1,ia*ntb+2*bef(i)+ib,j,i3) - &
1920 &               z(2,ia*ntb+2*bef(i)+ib,j,i3)*ct3
1921                s3=z(1,ia*ntb+2*bef(i)+ib,j,i3)*ct3 + &
1922 &               z(2,ia*ntb+2*bef(i)+ib,j,i3)
1923                r=cr2*r2 + r3
1924                s=cr2*s2 + s3
1925                z(1,ia*ntb+ib,j,i3) = r*cr3 + r1
1926                z(2,ia*ntb+ib,j,i3) = s*cr3 + s1
1927                r1=r1 - r*cr3p
1928                s1=s1 - s*cr3p
1929                r2=cr2*r2-r3
1930                s2=cr2*s2-s3
1931                z(1,ia*ntb+bef(i)+ib,j,i3) = r1 - s2*bb
1932                z(2,ia*ntb+bef(i)+ib,j,i3) = s1 + r2*bb
1933                z(1,ia*ntb+2*bef(i)+ib,j,i3) = r1 + s2*bb
1934                z(2,ia*ntb+2*bef(i)+ib,j,i3) = s1 - r2*bb
1935              end do
1936            end do
1937          end do
1938 
1939 !        Treat radix 5
1940        else if (now(i)==5) then
1941 !        sin(2.d0*pi/5.d0)
1942          sin2=ris*0.9510565162951536d0
1943          ia=0
1944 
1945 !        First step of factor 5
1946          do ib=1,bef(i)
1947            do j=ma,mb
1948              r1=z(1,ia*ntb+ib,j,i3)
1949              s1=z(2,ia*ntb+ib,j,i3)
1950              r2=z(1,ia*ntb+bef(i)+ib,j,i3)
1951              s2=z(2,ia*ntb+bef(i)+ib,j,i3)
1952              r3=z(1,ia*ntb+2*bef(i)+ib,j,i3)
1953              s3=z(2,ia*ntb+2*bef(i)+ib,j,i3)
1954              r4=z(1,ia*ntb+3*bef(i)+ib,j,i3)
1955              s4=z(2,ia*ntb+3*bef(i)+ib,j,i3)
1956              r5=z(1,ia*ntb+4*bef(i)+ib,j,i3)
1957              s5=z(2,ia*ntb+4*bef(i)+ib,j,i3)
1958              r25 = r2 + r5
1959              r34 = r3 + r4
1960              s25 = s2 - s5
1961              s34 = s3 - s4
1962              z(1,ia*ntb+ib,j,i3) = r1 + r25 + r34
1963              r = r1 + cos2*r25 + cos4*r34
1964              s = s25 + sin42*s34
1965              z(1,ia*ntb+bef(i)+ib,j,i3) = r - sin2*s
1966              z(1,ia*ntb+4*bef(i)+ib,j,i3) = r + sin2*s
1967              r = r1 + cos4*r25 + cos2*r34
1968              s = sin42*s25 - s34
1969              z(1,ia*ntb+2*bef(i)+ib,j,i3) = r - sin2*s
1970              z(1,ia*ntb+3*bef(i)+ib,j,i3) = r + sin2*s
1971              r25 = r2 - r5
1972              r34 = r3 - r4
1973              s25 = s2 + s5
1974              s34 = s3 + s4
1975              z(2,ia*ntb+ib,j,i3) = s1 + s25 + s34
1976              r = s1 + cos2*s25 + cos4*s34
1977              s = r25 + sin42*r34
1978              z(2,ia*ntb+bef(i)+ib,j,i3) = r + sin2*s
1979              z(2,ia*ntb+4*bef(i)+ib,j,i3) = r - sin2*s
1980              r = s1 + cos4*s25 + cos2*s34
1981              s = sin42*r25 - r34
1982              z(2,ia*ntb+2*bef(i)+ib,j,i3) = r + sin2*s
1983              z(2,ia*ntb+3*bef(i)+ib,j,i3) = r - sin2*s
1984            end do
1985          end do
1986 
1987 !        Second step of factor 5
1988          do ia=1,aft(i)-1
1989            indx=ind(ia*5*bef(i)+1)-1
1990            indx=indx*bef(i)
1991            cr2=trig(1,indx)
1992            ct2=trig(2,indx)
1993            cr3=trig(1,2*indx)
1994            ct3=trig(2,2*indx)
1995            cr4=trig(1,3*indx)
1996            ct4=trig(2,3*indx)
1997            cr5=trig(1,4*indx)
1998            ct5=trig(2,4*indx)
1999            do ib=1,bef(i)
2000              do j=ma,mb
2001                r1=z(1,ia*ntb+ib,j,i3)
2002                s1=z(2,ia*ntb+ib,j,i3)
2003                r2=cr2*(z(1,ia*ntb+bef(i)+ib,j,i3) - &
2004 &               z(2,ia*ntb+bef(i)+ib,j,i3)*ct2)
2005                s2=cr2*(z(1,ia*ntb+bef(i)+ib,j,i3)*ct2 + &
2006 &               z(2,ia*ntb+bef(i)+ib,j,i3))
2007                r3=cr3*(z(1,ia*ntb+2*bef(i)+ib,j,i3) - &
2008 &               z(2,ia*ntb+2*bef(i)+ib,j,i3)*ct3)
2009                s3=cr3*(z(1,ia*ntb+2*bef(i)+ib,j,i3)*ct3 + &
2010 &               z(2,ia*ntb+2*bef(i)+ib,j,i3))
2011                r4=z(1,ia*ntb+3*bef(i)+ib,j,i3) - &
2012 &               z(2,ia*ntb+3*bef(i)+ib,j,i3)*ct4
2013                s4=z(1,ia*ntb+3*bef(i)+ib,j,i3)*ct4 + &
2014 &               z(2,ia*ntb+3*bef(i)+ib,j,i3)
2015                r5=z(1,ia*ntb+4*bef(i)+ib,j,i3) - &
2016 &               z(2,ia*ntb+4*bef(i)+ib,j,i3)*ct5
2017                s5=z(1,ia*ntb+4*bef(i)+ib,j,i3)*ct5 + &
2018 &               z(2,ia*ntb+4*bef(i)+ib,j,i3)
2019                r25 = r2 + r5*cr5
2020                r34 = r3 + r4*cr4
2021                s25 = s2 - s5*cr5
2022                s34 = s3 - s4*cr4
2023                z(1,ia*ntb+ib,j,i3) = r1 + r25 + r34
2024                r = r1 + cos2*r25 + cos4*r34
2025                s = s25 + sin42*s34
2026                z(1,ia*ntb+bef(i)+ib,j,i3) = r - sin2*s
2027                z(1,ia*ntb+4*bef(i)+ib,j,i3) = r + sin2*s
2028                r = r1 + cos4*r25 + cos2*r34
2029                s = sin42*s25 - s34
2030                z(1,ia*ntb+2*bef(i)+ib,j,i3) = r - sin2*s
2031                z(1,ia*ntb+3*bef(i)+ib,j,i3) = r + sin2*s
2032                r25 = r2 - r5*cr5
2033                r34 = r3 - r4*cr4
2034                s25 = s2 + s5*cr5
2035                s34 = s3 + s4*cr4
2036                z(2,ia*ntb+ib,j,i3) = s1 + s25 + s34
2037                r = s1 + cos2*s25 + cos4*s34
2038                s = r25 + sin42*r34
2039                z(2,ia*ntb+bef(i)+ib,j,i3) = r + sin2*s
2040                z(2,ia*ntb+4*bef(i)+ib,j,i3) = r - sin2*s
2041                r = s1 + cos4*s25 + cos2*s34
2042                s = sin42*r25 - r34
2043                z(2,ia*ntb+2*bef(i)+ib,j,i3) = r + sin2*s
2044                z(2,ia*ntb+3*bef(i)+ib,j,i3) = r - sin2*s
2045              end do
2046            end do
2047          end do
2048 
2049        else
2050 !        All factors have been treated
2051          MSG_BUG('called with factors other than 2, 3, and 5')
2052        end if
2053 
2054      end do
2055 
2056 !    ---------------------------------------------------------------
2057 
2058 !    bitreversal
2059 
2060 !    Perform bit reversal on last factor of transformation
2061 
2062 !    Treat factor 4
2063      if (now(ic)==4) then
2064 !      radix 4
2065        ia=0
2066 
2067 !      First step of factor 4
2068        do j=ma,mb
2069          r4=z(1,ia*4+4,j,i3)
2070          s4=z(2,ia*4+4,j,i3)
2071          r3=z(1,ia*4+3,j,i3)
2072          s3=z(2,ia*4+3,j,i3)
2073          r2=z(1,ia*4+2,j,i3)
2074          s2=z(2,ia*4+2,j,i3)
2075          r1=z(1,ia*4+1,j,i3)
2076          s1=z(2,ia*4+1,j,i3)
2077 
2078          r=r1 + r3
2079          s=r2 + r4
2080          zbr(1,ind(ia*4+1),j,i3) = r + s
2081          zbr(1,ind(ia*4+3),j,i3) = r - s
2082          r=r1 - r3
2083          s=s2 - s4
2084          zbr(1,ind(ia*4+2),j,i3) = r - s*ris
2085          zbr(1,ind(ia*4+4),j,i3) = r + s*ris
2086          r=s1 + s3
2087          s=s2 + s4
2088          zbr(2,ind(ia*4+1),j,i3) = r + s
2089          zbr(2,ind(ia*4+3),j,i3) = r - s
2090          r=s1 - s3
2091          s=r2 - r4
2092          zbr(2,ind(ia*4+2),j,i3) = r + s*ris
2093          zbr(2,ind(ia*4+4),j,i3) = r - s*ris
2094        end do
2095 
2096 !      Second step of factor 4
2097        do ia=1,aft(ic)-1
2098          indx=ind(ia*4+1)-1
2099          cr2=trig(1,indx)
2100          ct2=trig(2,indx)
2101          cr3=trig(1,2*indx)
2102          ct3=trig(2,2*indx)
2103          cr4=trig(1,3*indx)
2104          ct4=trig(2,3*indx)
2105          cr4=cr4/cr2
2106          cr2s=cr2*ris
2107          do j=ma,mb
2108            r4=z(1,ia*4+4,j,i3) - z(2,ia*4+4,j,i3)*ct4
2109            s4=z(1,ia*4+4,j,i3)*ct4 + z(2,ia*4+4,j,i3)
2110            r3=z(1,ia*4+3,j,i3) - z(2,ia*4+3,j,i3)*ct3
2111            s3=z(1,ia*4+3,j,i3)*ct3 + z(2,ia*4+3,j,i3)
2112            r2=z(1,ia*4+2,j,i3) - z(2,ia*4+2,j,i3)*ct2
2113            s2=z(1,ia*4+2,j,i3)*ct2 + z(2,ia*4+2,j,i3)
2114            r1=z(1,ia*4+1,j,i3)
2115            s1=z(2,ia*4+1,j,i3)
2116 
2117            r=r1 + r3*cr3
2118            s=r2 + r4*cr4
2119            zbr(1,ind(ia*4+1),j,i3) = r + s*cr2
2120            zbr(1,ind(ia*4+3),j,i3) = r - s*cr2
2121            r=r1 - r3*cr3
2122            s=s2 - s4*cr4
2123            zbr(1,ind(ia*4+2),j,i3) = r - s*cr2s
2124            zbr(1,ind(ia*4+4),j,i3) = r + s*cr2s
2125            r=s1 + s3*cr3
2126            s=s2 + s4*cr4
2127            zbr(2,ind(ia*4+1),j,i3) = r + s*cr2
2128            zbr(2,ind(ia*4+3),j,i3) = r - s*cr2
2129            r=s1 - s3*cr3
2130            s=r2 - r4*cr4
2131            zbr(2,ind(ia*4+2),j,i3) = r + s*cr2s
2132            zbr(2,ind(ia*4+4),j,i3) = r - s*cr2s
2133          end do
2134        end do
2135 
2136 !      Treat factor 2
2137      else if (now(ic)==2) then
2138 !      radix 2
2139        ia=0
2140 
2141 !      First step of factor 2
2142        do j=ma,mb
2143          r1=z(1,ia*2+1,j,i3)
2144          s1=z(2,ia*2+1,j,i3)
2145          r2=z(1,ia*2+2,j,i3)
2146          s2=z(2,ia*2+2,j,i3)
2147          zbr(1,ind(ia*2+1),j,i3) =  r2 + r1
2148          zbr(2,ind(ia*2+1),j,i3) =  s2 + s1
2149          zbr(1,ind(ia*2+2),j,i3) = -r2 + r1
2150          zbr(2,ind(ia*2+2),j,i3) = -s2 + s1
2151        end do
2152 
2153 !      Second step of factor 2
2154        do ia=1,aft(ic)-1
2155          indx=ind(ia*2+1)-1
2156          cr2=trig(1,indx)
2157          ct2=trig(2,indx)
2158          do j=ma,mb
2159            r1=z(1,ia*2+1,j,i3)
2160            s1=z(2,ia*2+1,j,i3)
2161            r2=z(1,ia*2+2,j,i3) - z(2,ia*2+2,j,i3)*ct2
2162            s2=z(1,ia*2+2,j,i3)*ct2 + z(2,ia*2+2,j,i3)
2163            zbr(1,ind(ia*2+1),j,i3) =  r2*cr2 + r1
2164            zbr(2,ind(ia*2+1),j,i3) =  s2*cr2 + s1
2165            zbr(1,ind(ia*2+2),j,i3) = -r2*cr2 + r1
2166            zbr(2,ind(ia*2+2),j,i3) = -s2*cr2 + s1
2167          end do
2168        end do
2169 
2170 !      Treat factor 3
2171      else if (now(ic)==3) then
2172 !      radix 3
2173 !      .5d0*sqrt(3.d0)=0.8660254037844387d0
2174        ia=0
2175        bb=ris*0.8660254037844387d0
2176 
2177 !      First step of factor 3
2178        do j=ma,mb
2179          r1=z(1,ia*3+1,j,i3)
2180          s1=z(2,ia*3+1,j,i3)
2181          r2=z(1,ia*3+2,j,i3)
2182          s2=z(2,ia*3+2,j,i3)
2183          r3=z(1,ia*3+3,j,i3)
2184          s3=z(2,ia*3+3,j,i3)
2185          r=r2 + r3
2186          s=s2 + s3
2187          zbr(1,ind(ia*3+1),j,i3) = r + r1
2188          zbr(2,ind(ia*3+1),j,i3) = s + s1
2189          r1=r1 - r*.5d0
2190          s1=s1 - s*.5d0
2191          r2=r2-r3
2192          s2=s2-s3
2193          zbr(1,ind(ia*3+2),j,i3) = r1 - s2*bb
2194          zbr(2,ind(ia*3+2),j,i3) = s1 + r2*bb
2195          zbr(1,ind(ia*3+3),j,i3) = r1 + s2*bb
2196          zbr(2,ind(ia*3+3),j,i3) = s1 - r2*bb
2197        end do
2198 
2199 !      Second step of factor 3
2200        do ia=1,aft(ic)-1
2201          indx=ind(ia*3+1)-1
2202          cr2=trig(1,indx)
2203          ct2=trig(2,indx)
2204          cr3=trig(1,2*indx)
2205          ct3=trig(2,2*indx)
2206          cr2=cr2/cr3
2207          cr3p=.5d0*cr3
2208          bb=ris*cr3*0.8660254037844387d0
2209          do j=ma,mb
2210            r1=z(1,ia*3+1,j,i3)
2211            s1=z(2,ia*3+1,j,i3)
2212            r2=z(1,ia*3+2,j,i3) - z(2,ia*3+2,j,i3)*ct2
2213            s2=z(1,ia*3+2,j,i3)*ct2 + z(2,ia*3+2,j,i3)
2214            r3=z(1,ia*3+3,j,i3) - z(2,ia*3+3,j,i3)*ct3
2215            s3=z(1,ia*3+3,j,i3)*ct3 + z(2,ia*3+3,j,i3)
2216            r=cr2*r2 + r3
2217            s=cr2*s2 + s3
2218            zbr(1,ind(ia*3+1),j,i3) = r*cr3 + r1
2219            zbr(2,ind(ia*3+1),j,i3) = s*cr3 + s1
2220            r1=r1 - r*cr3p
2221            s1=s1 - s*cr3p
2222            r2=cr2*r2-r3
2223            s2=cr2*s2-s3
2224            zbr(1,ind(ia*3+2),j,i3) = r1 - s2*bb
2225            zbr(2,ind(ia*3+2),j,i3) = s1 + r2*bb
2226            zbr(1,ind(ia*3+3),j,i3) = r1 + s2*bb
2227            zbr(2,ind(ia*3+3),j,i3) = s1 - r2*bb
2228          end do
2229        end do
2230 
2231 !      Treat factor 5
2232      else if (now(ic)==5) then
2233 !      radix 5
2234 !      sin(2.d0*pi/5.d0)
2235        sin2=ris*0.9510565162951536d0
2236        ia=0
2237 
2238 !      First step of factor 5
2239        do j=ma,mb
2240          r1=z(1,ia*5+1,j,i3)
2241          s1=z(2,ia*5+1,j,i3)
2242          r2=z(1,ia*5+2,j,i3)
2243          s2=z(2,ia*5+2,j,i3)
2244          r3=z(1,ia*5+3,j,i3)
2245          s3=z(2,ia*5+3,j,i3)
2246          r4=z(1,ia*5+4,j,i3)
2247          s4=z(2,ia*5+4,j,i3)
2248          r5=z(1,ia*5+5,j,i3)
2249          s5=z(2,ia*5+5,j,i3)
2250          r25 = r2 + r5
2251          r34 = r3 + r4
2252          s25 = s2 - s5
2253          s34 = s3 - s4
2254          zbr(1,ind(ia*5+1),j,i3) = r1 + r25 + r34
2255          r = r1 + cos2*r25 + cos4*r34
2256          s = s25 + sin42*s34
2257          zbr(1,ind(ia*5+2),j,i3) = r - sin2*s
2258          zbr(1,ind(ia*5+5),j,i3) = r + sin2*s
2259          r = r1 + cos4*r25 + cos2*r34
2260          s = sin42*s25 - s34
2261          zbr(1,ind(ia*5+3),j,i3) = r - sin2*s
2262          zbr(1,ind(ia*5+4),j,i3) = r + sin2*s
2263          r25 = r2 - r5
2264          r34 = r3 - r4
2265          s25 = s2 + s5
2266          s34 = s3 + s4
2267          zbr(2,ind(ia*5+1),j,i3) = s1 + s25 + s34
2268          r = s1 + cos2*s25 + cos4*s34
2269          s = r25 + sin42*r34
2270          zbr(2,ind(ia*5+2),j,i3) = r + sin2*s
2271          zbr(2,ind(ia*5+5),j,i3) = r - sin2*s
2272          r = s1 + cos4*s25 + cos2*s34
2273          s = sin42*r25 - r34
2274          zbr(2,ind(ia*5+3),j,i3) = r + sin2*s
2275          zbr(2,ind(ia*5+4),j,i3) = r - sin2*s
2276        end do
2277 
2278 !      Second step of factor 5
2279        do ia=1,aft(ic)-1
2280          indx=ind(ia*5+1)-1
2281          cr2=trig(1,indx)
2282          ct2=trig(2,indx)
2283          cr3=trig(1,2*indx)
2284          ct3=trig(2,2*indx)
2285          cr4=trig(1,3*indx)
2286          ct4=trig(2,3*indx)
2287          cr5=trig(1,4*indx)
2288          ct5=trig(2,4*indx)
2289          do j=ma,mb
2290            r1=z(1,ia*5+1,j,i3)
2291            s1=z(2,ia*5+1,j,i3)
2292            r2=cr2*(z(1,ia*5+2,j,i3) - z(2,ia*5+2,j,i3)*ct2)
2293            s2=cr2*(z(1,ia*5+2,j,i3)*ct2 + z(2,ia*5+2,j,i3))
2294            r3=cr3*(z(1,ia*5+3,j,i3) - z(2,ia*5+3,j,i3)*ct3)
2295            s3=cr3*(z(1,ia*5+3,j,i3)*ct3 + z(2,ia*5+3,j,i3))
2296            r4=z(1,ia*5+4,j,i3) - z(2,ia*5+4,j,i3)*ct4
2297            s4=z(1,ia*5+4,j,i3)*ct4 + z(2,ia*5+4,j,i3)
2298            r5=z(1,ia*5+5,j,i3) - z(2,ia*5+5,j,i3)*ct5
2299            s5=z(1,ia*5+5,j,i3)*ct5 + z(2,ia*5+5,j,i3)
2300            r25 = r2 + r5*cr5
2301            r34 = r3 + r4*cr4
2302            s25 = s2 - s5*cr5
2303            s34 = s3 - s4*cr4
2304            zbr(1,ind(ia*5+1),j,i3) = r1 + r25 + r34
2305            r = r1 + cos2*r25 + cos4*r34
2306            s = s25 + sin42*s34
2307            zbr(1,ind(ia*5+2),j,i3) = r - sin2*s
2308            zbr(1,ind(ia*5+5),j,i3) = r + sin2*s
2309            r = r1 + cos4*r25 + cos2*r34
2310            s = sin42*s25 - s34
2311            zbr(1,ind(ia*5+3),j,i3) = r - sin2*s
2312            zbr(1,ind(ia*5+4),j,i3) = r + sin2*s
2313            r25 = r2 - r5*cr5
2314            r34 = r3 - r4*cr4
2315            s25 = s2 + s5*cr5
2316            s34 = s3 + s4*cr4
2317            zbr(2,ind(ia*5+1),j,i3) = s1 + s25 + s34
2318            r = s1 + cos2*s25 + cos4*s34
2319            s = r25 + sin42*r34
2320            zbr(2,ind(ia*5+2),j,i3) = r + sin2*s
2321            zbr(2,ind(ia*5+5),j,i3) = r - sin2*s
2322            r = s1 + cos4*s25 + cos2*s34
2323            s = sin42*r25 - r34
2324            zbr(2,ind(ia*5+3),j,i3) = r + sin2*s
2325            zbr(2,ind(ia*5+4),j,i3) = r - sin2*s
2326          end do
2327        end do
2328 
2329      else
2330 !      All factors treated
2331        MSG_BUG('called with factors other than 2, 3, and 5')
2332      end if
2333 
2334 !    ---------------------------------------------------------------
2335 
2336    end do ! do i3=1,n3
2337  end do  ! do jj=1,n2,lot
2338 !$OMP END PARALLEL DO
2339 
2340 end subroutine sg_fftx

m_sgfft/sg_ffty [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_ffty

FUNCTION

 This subroutine is called by the 3-dimensional fft to conduct the
 "y" transforms for all x and z.

INPUTS

  fftcache=size of the cache (kB)
  mfac = maximum number of factors in 1D FFTs
  mg = maximum length of 1D FFTs
  nd1=first dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd2=second dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd3=third dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  n1i=lower i1 index, used for blocking : the do-loop will be i1=n1i,n1
   put to 1 for usual ffty
  n1=upper i1 index, used for blocking, put usual n1 for usual ffty
  n3i=lower i3 index, used for blocking : the do-loop will be i3=n3i,n3
   put to 1 for usual ffty
  n3=upper i3 index, used for blocking, put usual n3 for usual ffty
  z(2,nd1,nd2,nd3)=INPUT array; destroyed by transformation
  trig, aft, now, bef, ind=provided by previous call to ctrig
   Note that in this routine (and in ctrig) the values in array trig are
   actually cos and tan, not cos and sin.  Use of tan allows advantageous
   use of FMA on the ibm rs6000.
  ris=sign of exponential in transform (should be 1 or -1; real)
  ic=number of (radix) factors of x transform length (from ctrig)

OUTPUT

  zbr(2,nd1,nd2,nd3)=OUTPUT transformed array; no scaling applied

TODO

 Use latex for the equation above

PARENTS

      m_sgfft

CHILDREN

      sg_fft_cc

SOURCE

2391 subroutine sg_ffty(fftcache,mfac,mg,nd1,nd2,nd3,n1i,n1,n3i,n3,&
2392 &          z,zbr,trig,aft,now,bef,ris,ind,ic)
2393 
2394 
2395 !This section has been created automatically by the script Abilint (TD).
2396 !Do not modify the following lines by hand.
2397 #undef ABI_FUNC
2398 #define ABI_FUNC 'sg_ffty'
2399 !End of the abilint section
2400 
2401  implicit none
2402 
2403 !Arguments ------------------------------------
2404 !Dimensions of aft, now, bef, ind, and trig should agree with
2405 !those in subroutine ctrig.
2406 !scalars
2407  integer,intent(in) :: fftcache,ic,mfac,mg,n1,n1i,n3,n3i,nd1,nd2,nd3
2408  real(dp),intent(in) :: ris
2409 !arrays
2410  integer,intent(in) :: aft(mfac),bef(mfac),ind(mg),now(mfac)
2411  real(dp),intent(in) :: trig(2,mg)
2412  real(dp),intent(inout) :: z(2,nd1,nd2,nd3),zbr(2,nd1,nd2,nd3)
2413 
2414 !Local variables-------------------------------
2415 !scalars
2416  integer :: i,ia,ib,indx,j1,j2,ntb
2417  real(dp),parameter :: cos2=0.3090169943749474d0   !cos(2.d0*pi/5.d0)
2418  real(dp),parameter :: cos4=-0.8090169943749474d0  !cos(4.d0*pi/5.d0)
2419  real(dp),parameter :: sin42=0.6180339887498948d0  !sin(4.d0*pi/5.d0)/sin(2.d0*pi/5.d0)
2420  real(dp) :: bb,cr2,cr2s,cr3,cr3p,cr4,cr5,ct2,ct3,ct4,ct5
2421  real(dp) :: r,r1,r2,r25,r3,r34,r4,r5,s,sin2,s1,s2,s25,s3,s34,s4,s5
2422 
2423 ! *************************************************************************
2424 
2425  if (fftcache<0) then
2426    MSG_ERROR('fftcache must be positive')
2427  end if
2428 
2429 !Outer loop over z planes (j2)--note range from n3i to n3
2430 
2431 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(aft,bef,ic,ind,n1,n1i,n3,n3i,now,ris,trig,z,zbr)
2432  do j2=n3i,n3
2433 
2434 !  Direct transformation
2435    do i=1,ic-1
2436      ntb=now(i)*bef(i)
2437 
2438 !    Treat radix 4
2439      if (now(i)==4) then
2440        ia=0
2441 
2442 !      First step of radix 4
2443        do ib=1,bef(i)
2444 !        Inner loop over all x values (j1) -- note range from n1i to n1
2445 !        y transform is performed for this range of x values repeatedly
2446 !        below
2447 
2448          do j1=n1i,n1
2449            r4=z(1,j1,ia*ntb+3*bef(i)+ib,j2)
2450            s4=z(2,j1,ia*ntb+3*bef(i)+ib,j2)
2451            r3=z(1,j1,ia*ntb+2*bef(i)+ib,j2)
2452            s3=z(2,j1,ia*ntb+2*bef(i)+ib,j2)
2453            r2=z(1,j1,ia*ntb+bef(i)+ib,j2)
2454            s2=z(2,j1,ia*ntb+bef(i)+ib,j2)
2455            r1=z(1,j1,ia*ntb+ib,j2)
2456            s1=z(2,j1,ia*ntb+ib,j2)
2457 
2458            r=r1 + r3
2459            s=r2 + r4
2460            z(1,j1,ia*ntb+ib,j2) = r + s
2461            z(1,j1,ia*ntb+2*bef(i)+ib,j2) = r - s
2462            r=r1 - r3
2463            s=s2 - s4
2464            z(1,j1,ia*ntb+bef(i)+ib,j2) = r - s*ris
2465            z(1,j1,ia*ntb+3*bef(i)+ib,j2) = r + s*ris
2466            r=s1 + s3
2467            s=s2 + s4
2468            z(2,j1,ia*ntb+ib,j2) = r + s
2469            z(2,j1,ia*ntb+2*bef(i)+ib,j2) = r - s
2470            r=s1 - s3
2471            s=r2 - r4
2472            z(2,j1,ia*ntb+bef(i)+ib,j2) = r + s*ris
2473            z(2,j1,ia*ntb+3*bef(i)+ib,j2) = r - s*ris
2474          end do ! j1
2475        end do ! ib
2476 
2477 !      Second step of radix 4
2478        do ia=1,aft(i)-1
2479          indx=ind(ia*4*bef(i)+1)-1
2480          indx=indx*bef(i)
2481          cr2=trig(1,indx)
2482          ct2=trig(2,indx)
2483          cr3=trig(1,2*indx)
2484          ct3=trig(2,2*indx)
2485          cr4=trig(1,3*indx)
2486          ct4=trig(2,3*indx)
2487          cr4=cr4/cr2
2488          cr2s=cr2*ris
2489          do ib=1,bef(i)
2490 !          Range of x array again (also appears many times below)
2491            do j1=n1i,n1
2492              r4=z(1,j1,ia*ntb+3*bef(i)+ib,j2) - &
2493 &             z(2,j1,ia*ntb+3*bef(i)+ib,j2)*ct4
2494              s4=z(1,j1,ia*ntb+3*bef(i)+ib,j2)*ct4 + &
2495 &             z(2,j1,ia*ntb+3*bef(i)+ib,j2)
2496              r3=z(1,j1,ia*ntb+2*bef(i)+ib,j2) - &
2497 &             z(2,j1,ia*ntb+2*bef(i)+ib,j2)*ct3
2498              s3=z(1,j1,ia*ntb+2*bef(i)+ib,j2)*ct3 + &
2499 &             z(2,j1,ia*ntb+2*bef(i)+ib,j2)
2500              r2=z(1,j1,ia*ntb+bef(i)+ib,j2) - &
2501 &             z(2,j1,ia*ntb+bef(i)+ib,j2)*ct2
2502              s2=z(1,j1,ia*ntb+bef(i)+ib,j2)*ct2 + &
2503 &             z(2,j1,ia*ntb+bef(i)+ib,j2)
2504              r1=z(1,j1,ia*ntb+ib,j2)
2505              s1=z(2,j1,ia*ntb+ib,j2)
2506 
2507              r=r1 + r3*cr3
2508              s=r2 + r4*cr4
2509              z(1,j1,ia*ntb+ib,j2) = r + s*cr2
2510              z(1,j1,ia*ntb+2*bef(i)+ib,j2) = r - s*cr2
2511              r=r1 - r3*cr3
2512              s=s2 - s4*cr4
2513              z(1,j1,ia*ntb+bef(i)+ib,j2) = r - s*cr2s
2514              z(1,j1,ia*ntb+3*bef(i)+ib,j2) = r + s*cr2s
2515              r=s1 + s3*cr3
2516              s=s2 + s4*cr4
2517              z(2,j1,ia*ntb+ib,j2) = r + s*cr2
2518              z(2,j1,ia*ntb+2*bef(i)+ib,j2) = r - s*cr2
2519              r=s1 - s3*cr3
2520              s=r2 - r4*cr4
2521              z(2,j1,ia*ntb+bef(i)+ib,j2) = r + s*cr2s
2522              z(2,j1,ia*ntb+3*bef(i)+ib,j2) = r - s*cr2s
2523            end do ! j1
2524          end do ! ib
2525        end do ! ia
2526 
2527 !      Treat radix 2
2528      else if (now(i)==2) then
2529        ia=0
2530 
2531 !      First step of radix 2
2532        do ib=1,bef(i)
2533          do j1=n1i,n1
2534            r1=z(1,j1,ia*ntb+ib,j2)
2535            s1=z(2,j1,ia*ntb+ib,j2)
2536            r2=z(1,j1,ia*ntb+bef(i)+ib,j2)
2537            s2=z(2,j1,ia*ntb+bef(i)+ib,j2)
2538            z(1,j1,ia*ntb+ib,j2) =  r2 + r1
2539            z(2,j1,ia*ntb+ib,j2) =  s2 + s1
2540            z(1,j1,ia*ntb+bef(i)+ib,j2) = -r2 + r1
2541            z(2,j1,ia*ntb+bef(i)+ib,j2) = -s2 + s1
2542          end do
2543        end do
2544 
2545 !      Second step of radix 2
2546        do ia=1,aft(i)-1
2547          indx=ind(ia*2*bef(i)+1)-1
2548          indx=indx*bef(i)
2549          cr2=trig(1,indx)
2550          ct2=trig(2,indx)
2551          do ib=1,bef(i)
2552            do j1=n1i,n1
2553              r1=z(1,j1,ia*ntb+ib,j2)
2554              s1=z(2,j1,ia*ntb+ib,j2)
2555              r2=z(1,j1,ia*ntb+bef(i)+ib,j2) - &
2556 &             z(2,j1,ia*ntb+bef(i)+ib,j2)*ct2
2557              s2=z(1,j1,ia*ntb+bef(i)+ib,j2)*ct2 + &
2558 &             z(2,j1,ia*ntb+bef(i)+ib,j2)
2559              z(1,j1,ia*ntb+ib,j2) =  r2*cr2 + r1
2560              z(2,j1,ia*ntb+ib,j2) =  s2*cr2 + s1
2561              z(1,j1,ia*ntb+bef(i)+ib,j2) = -r2*cr2 + r1
2562              z(2,j1,ia*ntb+bef(i)+ib,j2) = -s2*cr2 + s1
2563            end do
2564          end do
2565        end do
2566 
2567 !      Treat radix 3
2568      else if (now(i)==3) then
2569 !      .5d0*sqrt(3.d0)=0.8660254037844387d0
2570        ia=0
2571        bb=ris*0.8660254037844387d0
2572 
2573 !      First step of radix 3
2574        do ib=1,bef(i)
2575          do j1=n1i,n1
2576            r1=z(1,j1,ia*ntb+ib,j2)
2577            s1=z(2,j1,ia*ntb+ib,j2)
2578            r2=z(1,j1,ia*ntb+bef(i)+ib,j2)
2579            s2=z(2,j1,ia*ntb+bef(i)+ib,j2)
2580            r3=z(1,j1,ia*ntb+2*bef(i)+ib,j2)
2581            s3=z(2,j1,ia*ntb+2*bef(i)+ib,j2)
2582            r=r2 + r3
2583            s=s2 + s3
2584            z(1,j1,ia*ntb+ib,j2) = r + r1
2585            z(2,j1,ia*ntb+ib,j2) = s + s1
2586            r1=r1 - r*.5d0
2587            s1=s1 - s*.5d0
2588            r2=r2-r3
2589            s2=s2-s3
2590            z(1,j1,ia*ntb+bef(i)+ib,j2) = r1 - s2*bb
2591            z(2,j1,ia*ntb+bef(i)+ib,j2) = s1 + r2*bb
2592            z(1,j1,ia*ntb+2*bef(i)+ib,j2) = r1 + s2*bb
2593            z(2,j1,ia*ntb+2*bef(i)+ib,j2) = s1 - r2*bb
2594          end do
2595        end do
2596 
2597 !      Second step of radix 3
2598        do ia=1,aft(i)-1
2599          indx=ind(ia*3*bef(i)+1)-1
2600          indx=indx*bef(i)
2601          cr2=trig(1,indx)
2602          ct2=trig(2,indx)
2603          cr3=trig(1,2*indx)
2604          ct3=trig(2,2*indx)
2605          cr2=cr2/cr3
2606          cr3p=.5d0*cr3
2607          bb=ris*cr3*0.8660254037844387d0
2608          do ib=1,bef(i)
2609            do j1=n1i,n1
2610              r1=z(1,j1,ia*ntb+ib,j2)
2611              s1=z(2,j1,ia*ntb+ib,j2)
2612              r2=z(1,j1,ia*ntb+bef(i)+ib,j2) - &
2613 &             z(2,j1,ia*ntb+bef(i)+ib,j2)*ct2
2614              s2=z(1,j1,ia*ntb+bef(i)+ib,j2)*ct2 + &
2615 &             z(2,j1,ia*ntb+bef(i)+ib,j2)
2616              r3=z(1,j1,ia*ntb+2*bef(i)+ib,j2) - &
2617 &             z(2,j1,ia*ntb+2*bef(i)+ib,j2)*ct3
2618              s3=z(1,j1,ia*ntb+2*bef(i)+ib,j2)*ct3 + &
2619 &             z(2,j1,ia*ntb+2*bef(i)+ib,j2)
2620              r=cr2*r2 + r3
2621              s=cr2*s2 + s3
2622              z(1,j1,ia*ntb+ib,j2) = r*cr3 + r1
2623              z(2,j1,ia*ntb+ib,j2) = s*cr3 + s1
2624              r1=r1 - r*cr3p
2625              s1=s1 - s*cr3p
2626              r2=cr2*r2-r3
2627              s2=cr2*s2-s3
2628              z(1,j1,ia*ntb+bef(i)+ib,j2) = r1 - s2*bb
2629              z(2,j1,ia*ntb+bef(i)+ib,j2) = s1 + r2*bb
2630              z(1,j1,ia*ntb+2*bef(i)+ib,j2) = r1 + s2*bb
2631              z(2,j1,ia*ntb+2*bef(i)+ib,j2) = s1 - r2*bb
2632            end do
2633          end do
2634        end do
2635 
2636 !      Treat radix 5
2637      else if (now(i)==5) then
2638 !      sin(2.d0*pi/5.d0)
2639        sin2=ris*0.9510565162951536d0
2640        ia=0
2641 
2642 !      First step of radix 5
2643        do ib=1,bef(i)
2644          do j1=n1i,n1
2645            r1=z(1,j1,ia*ntb+ib,j2)
2646            s1=z(2,j1,ia*ntb+ib,j2)
2647            r2=z(1,j1,ia*ntb+bef(i)+ib,j2)
2648            s2=z(2,j1,ia*ntb+bef(i)+ib,j2)
2649            r3=z(1,j1,ia*ntb+2*bef(i)+ib,j2)
2650            s3=z(2,j1,ia*ntb+2*bef(i)+ib,j2)
2651            r4=z(1,j1,ia*ntb+3*bef(i)+ib,j2)
2652            s4=z(2,j1,ia*ntb+3*bef(i)+ib,j2)
2653            r5=z(1,j1,ia*ntb+4*bef(i)+ib,j2)
2654            s5=z(2,j1,ia*ntb+4*bef(i)+ib,j2)
2655            r25 = r2 + r5
2656            r34 = r3 + r4
2657            s25 = s2 - s5
2658            s34 = s3 - s4
2659            z(1,j1,ia*ntb+ib,j2) = r1 + r25 + r34
2660            r = r1 + cos2*r25 + cos4*r34
2661            s = s25 + sin42*s34
2662            z(1,j1,ia*ntb+bef(i)+ib,j2) = r - sin2*s
2663            z(1,j1,ia*ntb+4*bef(i)+ib,j2) = r + sin2*s
2664            r = r1 + cos4*r25 + cos2*r34
2665            s = sin42*s25 - s34
2666            z(1,j1,ia*ntb+2*bef(i)+ib,j2) = r - sin2*s
2667            z(1,j1,ia*ntb+3*bef(i)+ib,j2) = r + sin2*s
2668            r25 = r2 - r5
2669            r34 = r3 - r4
2670            s25 = s2 + s5
2671            s34 = s3 + s4
2672            z(2,j1,ia*ntb+ib,j2) = s1 + s25 + s34
2673            r = s1 + cos2*s25 + cos4*s34
2674            s = r25 + sin42*r34
2675            z(2,j1,ia*ntb+bef(i)+ib,j2) = r + sin2*s
2676            z(2,j1,ia*ntb+4*bef(i)+ib,j2) = r - sin2*s
2677            r = s1 + cos4*s25 + cos2*s34
2678            s = sin42*r25 - r34
2679            z(2,j1,ia*ntb+2*bef(i)+ib,j2) = r + sin2*s
2680            z(2,j1,ia*ntb+3*bef(i)+ib,j2) = r - sin2*s
2681          end do
2682        end do
2683 
2684 !      Second step of radix 5
2685        do ia=1,aft(i)-1
2686          indx=ind(ia*5*bef(i)+1)-1
2687          indx=indx*bef(i)
2688          cr2=trig(1,indx)
2689          ct2=trig(2,indx)
2690          cr3=trig(1,2*indx)
2691          ct3=trig(2,2*indx)
2692          cr4=trig(1,3*indx)
2693          ct4=trig(2,3*indx)
2694          cr5=trig(1,4*indx)
2695          ct5=trig(2,4*indx)
2696          do ib=1,bef(i)
2697            do j1=n1i,n1
2698              r1=z(1,j1,ia*ntb+ib,j2)
2699              s1=z(2,j1,ia*ntb+ib,j2)
2700              r2=cr2*(z(1,j1,ia*ntb+bef(i)+ib,j2) - &
2701 &             z(2,j1,ia*ntb+bef(i)+ib,j2)*ct2)
2702              s2=cr2*(z(1,j1,ia*ntb+bef(i)+ib,j2)*ct2 + &
2703 &             z(2,j1,ia*ntb+bef(i)+ib,j2))
2704              r3=cr3*(z(1,j1,ia*ntb+2*bef(i)+ib,j2) - &
2705 &             z(2,j1,ia*ntb+2*bef(i)+ib,j2)*ct3)
2706              s3=cr3*(z(1,j1,ia*ntb+2*bef(i)+ib,j2)*ct3 + &
2707 &             z(2,j1,ia*ntb+2*bef(i)+ib,j2))
2708              r4=z(1,j1,ia*ntb+3*bef(i)+ib,j2) - &
2709 &             z(2,j1,ia*ntb+3*bef(i)+ib,j2)*ct4
2710              s4=z(1,j1,ia*ntb+3*bef(i)+ib,j2)*ct4 + &
2711 &             z(2,j1,ia*ntb+3*bef(i)+ib,j2)
2712              r5=z(1,j1,ia*ntb+4*bef(i)+ib,j2) - &
2713 &             z(2,j1,ia*ntb+4*bef(i)+ib,j2)*ct5
2714              s5=z(1,j1,ia*ntb+4*bef(i)+ib,j2)*ct5 + &
2715 &             z(2,j1,ia*ntb+4*bef(i)+ib,j2)
2716              r25 = r2 + r5*cr5
2717              r34 = r3 + r4*cr4
2718              s25 = s2 - s5*cr5
2719              s34 = s3 - s4*cr4
2720              z(1,j1,ia*ntb+ib,j2) = r1 + r25 + r34
2721              r = r1 + cos2*r25 + cos4*r34
2722              s = s25 + sin42*s34
2723              z(1,j1,ia*ntb+bef(i)+ib,j2) = r - sin2*s
2724              z(1,j1,ia*ntb+4*bef(i)+ib,j2) = r + sin2*s
2725              r = r1 + cos4*r25 + cos2*r34
2726              s = sin42*s25 - s34
2727              z(1,j1,ia*ntb+2*bef(i)+ib,j2) = r - sin2*s
2728              z(1,j1,ia*ntb+3*bef(i)+ib,j2) = r + sin2*s
2729              r25 = r2 - r5*cr5
2730              r34 = r3 - r4*cr4
2731              s25 = s2 + s5*cr5
2732              s34 = s3 + s4*cr4
2733              z(2,j1,ia*ntb+ib,j2) = s1 + s25 + s34
2734              r = s1 + cos2*s25 + cos4*s34
2735              s = r25 + sin42*r34
2736              z(2,j1,ia*ntb+bef(i)+ib,j2) = r + sin2*s
2737              z(2,j1,ia*ntb+4*bef(i)+ib,j2) = r - sin2*s
2738              r = s1 + cos4*s25 + cos2*s34
2739              s = sin42*r25 - r34
2740              z(2,j1,ia*ntb+2*bef(i)+ib,j2) = r + sin2*s
2741              z(2,j1,ia*ntb+3*bef(i)+ib,j2) = r - sin2*s
2742            end do
2743          end do
2744        end do
2745 
2746      else
2747 !      All radices treated
2748        MSG_BUG('called with factors other than 2, 3, and 5')
2749      end if
2750 
2751    end do
2752 
2753 !  ---------------------------------------------------------------
2754 
2755 !  bitreversal
2756 
2757 !  Treat radix 4
2758    if (now(ic)==4) then
2759      ia=0
2760 
2761 !    First step of radix 4
2762      do j1=n1i,n1
2763        r4=z(1,j1,ia*4+4,j2)
2764        s4=z(2,j1,ia*4+4,j2)
2765        r3=z(1,j1,ia*4+3,j2)
2766        s3=z(2,j1,ia*4+3,j2)
2767        r2=z(1,j1,ia*4+2,j2)
2768        s2=z(2,j1,ia*4+2,j2)
2769        r1=z(1,j1,ia*4+1,j2)
2770        s1=z(2,j1,ia*4+1,j2)
2771 
2772        r=r1 + r3
2773        s=r2 + r4
2774        zbr(1,j1,ind(ia*4+1),j2) = r + s
2775        zbr(1,j1,ind(ia*4+3),j2) = r - s
2776        r=r1 - r3
2777        s=s2 - s4
2778        zbr(1,j1,ind(ia*4+2),j2) = r - s*ris
2779        zbr(1,j1,ind(ia*4+4),j2) = r + s*ris
2780        r=s1 + s3
2781        s=s2 + s4
2782        zbr(2,j1,ind(ia*4+1),j2) = r + s
2783        zbr(2,j1,ind(ia*4+3),j2) = r - s
2784        r=s1 - s3
2785        s=r2 - r4
2786        zbr(2,j1,ind(ia*4+2),j2) = r + s*ris
2787        zbr(2,j1,ind(ia*4+4),j2) = r - s*ris
2788      end do
2789 
2790 !    Second step of radix 4
2791      do ia=1,aft(ic)-1
2792        indx=ind(ia*4+1)-1
2793        cr2=trig(1,indx)
2794        ct2=trig(2,indx)
2795        cr3=trig(1,2*indx)
2796        ct3=trig(2,2*indx)
2797        cr4=trig(1,3*indx)
2798        ct4=trig(2,3*indx)
2799        cr4=cr4/cr2
2800        cr2s=cr2*ris
2801        do j1=n1i,n1
2802          r4=z(1,j1,ia*4+4,j2) - z(2,j1,ia*4+4,j2)*ct4
2803          s4=z(1,j1,ia*4+4,j2)*ct4 + z(2,j1,ia*4+4,j2)
2804          r3=z(1,j1,ia*4+3,j2) - z(2,j1,ia*4+3,j2)*ct3
2805          s3=z(1,j1,ia*4+3,j2)*ct3 + z(2,j1,ia*4+3,j2)
2806          r2=z(1,j1,ia*4+2,j2) - z(2,j1,ia*4+2,j2)*ct2
2807          s2=z(1,j1,ia*4+2,j2)*ct2 + z(2,j1,ia*4+2,j2)
2808          r1=z(1,j1,ia*4+1,j2)
2809          s1=z(2,j1,ia*4+1,j2)
2810 
2811          r=r1 + r3*cr3
2812          s=r2 + r4*cr4
2813          zbr(1,j1,ind(ia*4+1),j2) = r + s*cr2
2814          zbr(1,j1,ind(ia*4+3),j2) = r - s*cr2
2815          r=r1 - r3*cr3
2816          s=s2 - s4*cr4
2817          zbr(1,j1,ind(ia*4+2),j2) = r - s*cr2s
2818          zbr(1,j1,ind(ia*4+4),j2) = r + s*cr2s
2819          r=s1 + s3*cr3
2820          s=s2 + s4*cr4
2821          zbr(2,j1,ind(ia*4+1),j2) = r + s*cr2
2822          zbr(2,j1,ind(ia*4+3),j2) = r - s*cr2
2823          r=s1 - s3*cr3
2824          s=r2 - r4*cr4
2825          zbr(2,j1,ind(ia*4+2),j2) = r + s*cr2s
2826          zbr(2,j1,ind(ia*4+4),j2) = r - s*cr2s
2827        end do
2828      end do
2829 
2830 !    Treat radix 2
2831    else if (now(ic)==2) then
2832      ia=0
2833 
2834 !    First step of radix 2
2835      do j1=n1i,n1
2836        r1=z(1,j1,ia*2+1,j2)
2837        s1=z(2,j1,ia*2+1,j2)
2838        r2=z(1,j1,ia*2+2,j2)
2839        s2=z(2,j1,ia*2+2,j2)
2840        zbr(1,j1,ind(ia*2+1),j2) =  r2 + r1
2841        zbr(2,j1,ind(ia*2+1),j2) =  s2 + s1
2842        zbr(1,j1,ind(ia*2+2),j2) = -r2 + r1
2843        zbr(2,j1,ind(ia*2+2),j2) = -s2 + s1
2844      end do
2845 
2846 !    Second step of radix 2
2847      do ia=1,aft(ic)-1
2848        indx=ind(ia*2+1)-1
2849        cr2=trig(1,indx)
2850        ct2=trig(2,indx)
2851        do j1=n1i,n1
2852          r1=z(1,j1,ia*2+1,j2)
2853          s1=z(2,j1,ia*2+1,j2)
2854          r2=z(1,j1,ia*2+2,j2) - z(2,j1,ia*2+2,j2)*ct2
2855          s2=z(1,j1,ia*2+2,j2)*ct2 + z(2,j1,ia*2+2,j2)
2856          zbr(1,j1,ind(ia*2+1),j2) =  r2*cr2 + r1
2857          zbr(2,j1,ind(ia*2+1),j2) =  s2*cr2 + s1
2858          zbr(1,j1,ind(ia*2+2),j2) = -r2*cr2 + r1
2859          zbr(2,j1,ind(ia*2+2),j2) = -s2*cr2 + s1
2860        end do
2861      end do
2862 
2863 !    Treat radix 3
2864    else if (now(ic)==3) then
2865 !    .5d0*sqrt(3.d0)=0.8660254037844387d0
2866      ia=0
2867      bb=ris*0.8660254037844387d0
2868 
2869 !    First step of radix 3
2870      do j1=n1i,n1
2871        r1=z(1,j1,ia*3+1,j2)
2872        s1=z(2,j1,ia*3+1,j2)
2873        r2=z(1,j1,ia*3+2,j2)
2874        s2=z(2,j1,ia*3+2,j2)
2875        r3=z(1,j1,ia*3+3,j2)
2876        s3=z(2,j1,ia*3+3,j2)
2877        r=r2 + r3
2878        s=s2 + s3
2879        zbr(1,j1,ind(ia*3+1),j2) = r + r1
2880        zbr(2,j1,ind(ia*3+1),j2) = s + s1
2881        r1=r1 - r*.5d0
2882        s1=s1 - s*.5d0
2883        r2=r2-r3
2884        s2=s2-s3
2885        zbr(1,j1,ind(ia*3+2),j2) = r1 - s2*bb
2886        zbr(2,j1,ind(ia*3+2),j2) = s1 + r2*bb
2887        zbr(1,j1,ind(ia*3+3),j2) = r1 + s2*bb
2888        zbr(2,j1,ind(ia*3+3),j2) = s1 - r2*bb
2889      end do
2890 
2891 !    Second step of radix 3
2892      do ia=1,aft(ic)-1
2893        indx=ind(ia*3+1)-1
2894        cr2=trig(1,indx)
2895        ct2=trig(2,indx)
2896        cr3=trig(1,2*indx)
2897        ct3=trig(2,2*indx)
2898        cr2=cr2/cr3
2899        cr3p=.5d0*cr3
2900        bb=ris*cr3*0.8660254037844387d0
2901        do j1=n1i,n1
2902          r1=z(1,j1,ia*3+1,j2)
2903          s1=z(2,j1,ia*3+1,j2)
2904          r2=z(1,j1,ia*3+2,j2) - z(2,j1,ia*3+2,j2)*ct2
2905          s2=z(1,j1,ia*3+2,j2)*ct2 + z(2,j1,ia*3+2,j2)
2906          r3=z(1,j1,ia*3+3,j2) - z(2,j1,ia*3+3,j2)*ct3
2907          s3=z(1,j1,ia*3+3,j2)*ct3 + z(2,j1,ia*3+3,j2)
2908          r=cr2*r2 + r3
2909          s=cr2*s2 + s3
2910          zbr(1,j1,ind(ia*3+1),j2) = r*cr3 + r1
2911          zbr(2,j1,ind(ia*3+1),j2) = s*cr3 + s1
2912          r1=r1 - r*cr3p
2913          s1=s1 - s*cr3p
2914          r2=cr2*r2-r3
2915          s2=cr2*s2-s3
2916          zbr(1,j1,ind(ia*3+2),j2) = r1 - s2*bb
2917          zbr(2,j1,ind(ia*3+2),j2) = s1 + r2*bb
2918          zbr(1,j1,ind(ia*3+3),j2) = r1 + s2*bb
2919          zbr(2,j1,ind(ia*3+3),j2) = s1 - r2*bb
2920        end do
2921      end do
2922 
2923 !    Treat radix 5
2924    else if (now(ic)==5) then
2925 !    sin(2.d0*pi/5.d0)
2926      sin2=ris*0.9510565162951536d0
2927      ia=0
2928 
2929 !    First step of radix 5
2930      do j1=n1i,n1
2931        r1=z(1,j1,ia*5+1,j2)
2932        s1=z(2,j1,ia*5+1,j2)
2933        r2=z(1,j1,ia*5+2,j2)
2934        s2=z(2,j1,ia*5+2,j2)
2935        r3=z(1,j1,ia*5+3,j2)
2936        s3=z(2,j1,ia*5+3,j2)
2937        r4=z(1,j1,ia*5+4,j2)
2938        s4=z(2,j1,ia*5+4,j2)
2939        r5=z(1,j1,ia*5+5,j2)
2940        s5=z(2,j1,ia*5+5,j2)
2941        r25 = r2 + r5
2942        r34 = r3 + r4
2943        s25 = s2 - s5
2944        s34 = s3 - s4
2945        zbr(1,j1,ind(ia*5+1),j2) = r1 + r25 + r34
2946        r = r1 + cos2*r25 + cos4*r34
2947        s = s25 + sin42*s34
2948        zbr(1,j1,ind(ia*5+2),j2) = r - sin2*s
2949        zbr(1,j1,ind(ia*5+5),j2) = r + sin2*s
2950        r = r1 + cos4*r25 + cos2*r34
2951        s = sin42*s25 - s34
2952        zbr(1,j1,ind(ia*5+3),j2) = r - sin2*s
2953        zbr(1,j1,ind(ia*5+4),j2) = r + sin2*s
2954        r25 = r2 - r5
2955        r34 = r3 - r4
2956        s25 = s2 + s5
2957        s34 = s3 + s4
2958        zbr(2,j1,ind(ia*5+1),j2) = s1 + s25 + s34
2959        r = s1 + cos2*s25 + cos4*s34
2960        s = r25 + sin42*r34
2961        zbr(2,j1,ind(ia*5+2),j2) = r + sin2*s
2962        zbr(2,j1,ind(ia*5+5),j2) = r - sin2*s
2963        r = s1 + cos4*s25 + cos2*s34
2964        s = sin42*r25 - r34
2965        zbr(2,j1,ind(ia*5+3),j2) = r + sin2*s
2966        zbr(2,j1,ind(ia*5+4),j2) = r - sin2*s
2967      end do
2968 
2969 !    Second step of radix 5
2970      do ia=1,aft(ic)-1
2971        indx=ind(ia*5+1)-1
2972        cr2=trig(1,indx)
2973        ct2=trig(2,indx)
2974        cr3=trig(1,2*indx)
2975        ct3=trig(2,2*indx)
2976        cr4=trig(1,3*indx)
2977        ct4=trig(2,3*indx)
2978        cr5=trig(1,4*indx)
2979        ct5=trig(2,4*indx)
2980        do j1=n1i,n1
2981          r1=z(1,j1,ia*5+1,j2)
2982          s1=z(2,j1,ia*5+1,j2)
2983          r2=cr2*(z(1,j1,ia*5+2,j2) - z(2,j1,ia*5+2,j2)*ct2)
2984          s2=cr2*(z(1,j1,ia*5+2,j2)*ct2 + z(2,j1,ia*5+2,j2))
2985          r3=cr3*(z(1,j1,ia*5+3,j2) - z(2,j1,ia*5+3,j2)*ct3)
2986          s3=cr3*(z(1,j1,ia*5+3,j2)*ct3 + z(2,j1,ia*5+3,j2))
2987          r4=z(1,j1,ia*5+4,j2) - z(2,j1,ia*5+4,j2)*ct4
2988          s4=z(1,j1,ia*5+4,j2)*ct4 + z(2,j1,ia*5+4,j2)
2989          r5=z(1,j1,ia*5+5,j2) - z(2,j1,ia*5+5,j2)*ct5
2990          s5=z(1,j1,ia*5+5,j2)*ct5 + z(2,j1,ia*5+5,j2)
2991          r25 = r2 + r5*cr5
2992          r34 = r3 + r4*cr4
2993          s25 = s2 - s5*cr5
2994          s34 = s3 - s4*cr4
2995          zbr(1,j1,ind(ia*5+1),j2) = r1 + r25 + r34
2996          r = r1 + cos2*r25 + cos4*r34
2997          s = s25 + sin42*s34
2998          zbr(1,j1,ind(ia*5+2),j2) = r - sin2*s
2999          zbr(1,j1,ind(ia*5+5),j2) = r + sin2*s
3000          r = r1 + cos4*r25 + cos2*r34
3001          s = sin42*s25 - s34
3002          zbr(1,j1,ind(ia*5+3),j2) = r - sin2*s
3003          zbr(1,j1,ind(ia*5+4),j2) = r + sin2*s
3004          r25 = r2 - r5*cr5
3005          r34 = r3 - r4*cr4
3006          s25 = s2 + s5*cr5
3007          s34 = s3 + s4*cr4
3008          zbr(2,j1,ind(ia*5+1),j2) = s1 + s25 + s34
3009          r = s1 + cos2*s25 + cos4*s34
3010          s = r25 + sin42*r34
3011          zbr(2,j1,ind(ia*5+2),j2) = r + sin2*s
3012          zbr(2,j1,ind(ia*5+5),j2) = r - sin2*s
3013          r = s1 + cos4*s25 + cos2*s34
3014          s = sin42*r25 - r34
3015          zbr(2,j1,ind(ia*5+3),j2) = r + sin2*s
3016          zbr(2,j1,ind(ia*5+4),j2) = r - sin2*s
3017        end do
3018      end do
3019 
3020    else
3021 !    All radices done
3022      MSG_BUG('Called with factors other than 2, 3, and 5')
3023    end if
3024  end do
3025 !$OMP END PARALLEL DO
3026 
3027 end subroutine sg_ffty

m_sgfft/sg_fftz [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fftz

FUNCTION

 This subroutine is called by the 3-dimensional fft to conduct the
 "z" transforms for all x and y.

INPUTS

  mfac = maximum number of factors in 1D FFTs
  mg = maximum length of 1D FFTs
  nd1=first dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd2=second dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd3=third dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  n1=actual length of x and y transforms
  n2i=lower i2 index, used for blocking : the do-loop will be i2=n2i,n2
   put to 1 for usual ffty
  n2=upper i2 index, used for blocking, put usual n2 for usual ffty
  z(2,nd1,nd2,nd3)=INPUT array; destroyed by transformation
  trig, aft, now, bef, ind=provided by previous call to ctrig
   Note that in this routine (and in ctrig) the values in array trig are
   actually cos and tan, not cos and sin.  Use of tan allows advantageous
   use of FMA on the ibm rs6000.
  ris=sign of exponential in transform (should be 1 or -1; real)
  ic=number of (radix) factors of x transform length (from ctrig)

OUTPUT

  zbr(2,nd1,nd2,nd3)=OUTPUT transformed array; no scaling applied

TODO

 Use latex for the equation above

PARENTS

      m_sgfft

CHILDREN

      sg_fft_cc

SOURCE

3075 subroutine sg_fftz(mfac,mg,nd1,nd2,nd3,n1,n2i,n2,z,zbr,trig,aft,now,bef,ris,ind,ic)
3076 
3077 
3078 !This section has been created automatically by the script Abilint (TD).
3079 !Do not modify the following lines by hand.
3080 #undef ABI_FUNC
3081 #define ABI_FUNC 'sg_fftz'
3082 !End of the abilint section
3083 
3084  implicit none
3085 
3086 !Arguments ------------------------------------
3087 !Dimensions of aft, now, bef, ind, and trig should agree with
3088 !those in subroutine ctrig.
3089 !scalars
3090  integer,intent(in) :: ic,mfac,mg,n1,n2,n2i,nd1,nd2,nd3
3091  real(dp),intent(in) :: ris
3092 !arrays
3093  integer,intent(in) :: aft(mfac),bef(mfac),ind(mg),now(mfac)
3094  real(dp),intent(in) :: trig(2,mg)
3095  real(dp),intent(inout) :: z(2,nd1,nd2,nd3),zbr(2,nd1,nd2,nd3)
3096 
3097 !Local variables-------------------------------
3098 !scalars
3099  integer :: b_i,i,i2,ia,ib,indx,j,ntb
3100  real(dp),parameter :: cos2=0.3090169943749474d0   !cos(2.d0*pi/5.d0)
3101  real(dp),parameter :: cos4=-0.8090169943749474d0  !cos(4.d0*pi/5.d0)
3102  real(dp),parameter :: sin42=0.6180339887498948d0  !sin(4.d0*pi/5.d0)/sin(2.d0*pi/5.d0)
3103  real(dp) :: bb,cr2,cr2s,cr3,cr3p,cr4,cr5,ct2,ct3,ct4,ct5
3104  real(dp) :: r,r1,r2,r25,r3,r34,r4,r5,s,sin2,s1,s2,s25,s3,s34,s4,s5
3105 
3106 ! *************************************************************************
3107 
3108 !n12 occurs as a loop index repeated below; do z transform while
3109 !looping over all n12 lines of data
3110 
3111 !Direct transformation (to ic-1), bitreversal will be in second part
3112 !of routine
3113 
3114  do i=1,ic-1
3115    ntb=now(i)*bef(i)
3116    b_i=bef(i)
3117 
3118 !  Treat radix 4
3119    if (now(i)==4) then
3120      ia=0
3121 
3122 !    First step of radix 4
3123      do ib=1,b_i
3124 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3125 !$OMP&SHARED(b_i,ia,ib,n1,n2i,n2,ntb,ris,z)
3126        do i2=n2i,n2
3127          do j=1,n1
3128            r4=z(1,j,i2,ia*ntb+3*b_i+ib)
3129            s4=z(2,j,i2,ia*ntb+3*b_i+ib)
3130            r3=z(1,j,i2,ia*ntb+2*b_i+ib)
3131            s3=z(2,j,i2,ia*ntb+2*b_i+ib)
3132            r2=z(1,j,i2,ia*ntb+b_i+ib)
3133            s2=z(2,j,i2,ia*ntb+b_i+ib)
3134            r1=z(1,j,i2,ia*ntb+ib)
3135            s1=z(2,j,i2,ia*ntb+ib)
3136 
3137            r=r1 + r3
3138            s=r2 + r4
3139            z(1,j,i2,ia*ntb+ib) = r + s
3140            z(1,j,i2,ia*ntb+2*b_i+ib) = r - s
3141            r=r1 - r3
3142            s=s2 - s4
3143            z(1,j,i2,ia*ntb+b_i+ib) = r - s*ris
3144            z(1,j,i2,ia*ntb+3*b_i+ib) = r + s*ris
3145            r=s1 + s3
3146            s=s2 + s4
3147            z(2,j,i2,ia*ntb+ib) = r + s
3148            z(2,j,i2,ia*ntb+2*b_i+ib) = r - s
3149            r=s1 - s3
3150            s=r2 - r4
3151            z(2,j,i2,ia*ntb+b_i+ib) = r + s*ris
3152            z(2,j,i2,ia*ntb+3*b_i+ib) = r - s*ris
3153          end do ! j
3154        end do ! i2
3155 !$OMP END PARALLEL DO
3156      end do ! ib
3157 
3158 !    Second step of radix 4
3159      do ia=1,aft(i)-1
3160        indx=ind(ia*4*b_i+1)-1
3161        indx=indx*b_i
3162        cr2=trig(1,indx)
3163        ct2=trig(2,indx)
3164        cr3=trig(1,2*indx)
3165        ct3=trig(2,2*indx)
3166        cr4=trig(1,3*indx)
3167        ct4=trig(2,3*indx)
3168        cr4=cr4/cr2
3169        cr2s=cr2*ris
3170        do ib=1,b_i
3171 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3172 !$OMP&SHARED(b_i,cr2,cr3,cr4,ct2,cr2s,ct3,ct4,i,ia,ib,n1,n2i,n2,ntb,ris,z)
3173          do i2=n2i,n2
3174            do j=1,n1
3175              r4=z(1,j,i2,ia*ntb+3*b_i+ib) - &
3176 &             z(2,j,i2,ia*ntb+3*b_i+ib)*ct4
3177              s4=z(1,j,i2,ia*ntb+3*b_i+ib)*ct4 + &
3178 &             z(2,j,i2,ia*ntb+3*b_i+ib)
3179              r3=z(1,j,i2,ia*ntb+2*b_i+ib) - &
3180 &             z(2,j,i2,ia*ntb+2*b_i+ib)*ct3
3181              s3=z(1,j,i2,ia*ntb+2*b_i+ib)*ct3 + &
3182 &             z(2,j,i2,ia*ntb+2*b_i+ib)
3183              r2=z(1,j,i2,ia*ntb+b_i+ib) - &
3184 &             z(2,j,i2,ia*ntb+b_i+ib)*ct2
3185              s2=z(1,j,i2,ia*ntb+b_i+ib)*ct2 + &
3186 &             z(2,j,i2,ia*ntb+b_i+ib)
3187              r1=z(1,j,i2,ia*ntb+ib)
3188              s1=z(2,j,i2,ia*ntb+ib)
3189 
3190              r=r1 + r3*cr3
3191              s=r2 + r4*cr4
3192              z(1,j,i2,ia*ntb+ib) = r + s*cr2
3193              z(1,j,i2,ia*ntb+2*b_i+ib) = r - s*cr2
3194              r=r1 - r3*cr3
3195              s=s2 - s4*cr4
3196              z(1,j,i2,ia*ntb+b_i+ib) = r - s*cr2s
3197              z(1,j,i2,ia*ntb+3*b_i+ib) = r + s*cr2s
3198              r=s1 + s3*cr3
3199              s=s2 + s4*cr4
3200              z(2,j,i2,ia*ntb+ib) = r + s*cr2
3201              z(2,j,i2,ia*ntb+2*b_i+ib) = r - s*cr2
3202              r=s1 - s3*cr3
3203              s=r2 - r4*cr4
3204              z(2,j,i2,ia*ntb+b_i+ib) = r + s*cr2s
3205              z(2,j,i2,ia*ntb+3*b_i+ib) = r - s*cr2s
3206            end do ! j
3207          end do ! i2
3208 !$OMP END PARALLEL DO
3209        end do ! ib
3210 
3211      end do ! ia
3212 
3213 !    Treat radix 2
3214    else if (now(i)==2) then
3215      ia=0
3216 
3217 !    First step of radix 2
3218      do ib=1,b_i
3219 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3220 !$OMP&SHARED(b_i,ia,ib,n1,n2,n2i,ntb,z)
3221        do i2=n2i,n2
3222          do j=1,n1
3223            r1=z(1,j,i2,ia*ntb+ib)
3224            s1=z(2,j,i2,ia*ntb+ib)
3225            r2=z(1,j,i2,ia*ntb+b_i+ib)
3226            s2=z(2,j,i2,ia*ntb+b_i+ib)
3227            z(1,j,i2,ia*ntb+ib) =  r2 + r1
3228            z(2,j,i2,ia*ntb+ib) =  s2 + s1
3229            z(1,j,i2,ia*ntb+b_i+ib) = -r2 + r1
3230            z(2,j,i2,ia*ntb+b_i+ib) = -s2 + s1
3231          end do ! j
3232        end do ! i2
3233 !$OMP END PARALLEL DO
3234      end do ! ib
3235 
3236 !    Second step of radix 2
3237      do ia=1,aft(i)-1
3238        indx=ind(ia*2*b_i+1)-1
3239        indx=indx*b_i
3240        cr2=trig(1,indx)
3241        ct2=trig(2,indx)
3242        do ib=1,b_i
3243 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3244 !$OMP&SHARED(b_i,cr2,ct2,ia,ib,n1,n2,n2i,ntb,z)
3245          do i2=n2i,n2
3246            do j=1,n1
3247              r1=z(1,j,i2,ia*ntb+ib)
3248              s1=z(2,j,i2,ia*ntb+ib)
3249              r2=z(1,j,i2,ia*ntb+b_i+ib) - &
3250 &             z(2,j,i2,ia*ntb+b_i+ib)*ct2
3251              s2=z(1,j,i2,ia*ntb+b_i+ib)*ct2 + &
3252 &             z(2,j,i2,ia*ntb+b_i+ib)
3253              z(1,j,i2,ia*ntb+ib) =  r2*cr2 + r1
3254              z(2,j,i2,ia*ntb+ib) =  s2*cr2 + s1
3255              z(1,j,i2,ia*ntb+b_i+ib) = -r2*cr2 + r1
3256              z(2,j,i2,ia*ntb+b_i+ib) = -s2*cr2 + s1
3257            end do ! j
3258          end do ! i2
3259 !$OMP END PARALLEL DO
3260        end do ! ib
3261 
3262      end do ! ia
3263 
3264 !    Treat radix 3
3265    else if (now(i)==3) then
3266 !    .5d0*sqrt(3.d0)=0.8660254037844387d0
3267      ia=0
3268      bb=ris*0.8660254037844387d0
3269 
3270 !    First step of radix 3
3271      do ib=1,b_i
3272 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3273 !$OMP&SHARED(bb,b_i,ia,ib,n1,n2,n2i,ntb,z)
3274        do i2=n2i,n2
3275          do j=1,n1
3276            r1=z(1,j,i2,ia*ntb+ib)
3277            s1=z(2,j,i2,ia*ntb+ib)
3278            r2=z(1,j,i2,ia*ntb+b_i+ib)
3279            s2=z(2,j,i2,ia*ntb+b_i+ib)
3280            r3=z(1,j,i2,ia*ntb+2*b_i+ib)
3281            s3=z(2,j,i2,ia*ntb+2*b_i+ib)
3282            r=r2 + r3
3283            s=s2 + s3
3284            z(1,j,i2,ia*ntb+ib) = r + r1
3285            z(2,j,i2,ia*ntb+ib) = s + s1
3286            r1=r1 - r*.5d0
3287            s1=s1 - s*.5d0
3288            r2=r2-r3
3289            s2=s2-s3
3290            z(1,j,i2,ia*ntb+b_i+ib) = r1 - s2*bb
3291            z(2,j,i2,ia*ntb+b_i+ib) = s1 + r2*bb
3292            z(1,j,i2,ia*ntb+2*b_i+ib) = r1 + s2*bb
3293            z(2,j,i2,ia*ntb+2*b_i+ib) = s1 - r2*bb
3294          end do ! j
3295        end do ! i2
3296 !$OMP END PARALLEL DO
3297      end do ! ib
3298 
3299 !    Second step of radix 3
3300      do ia=1,aft(i)-1
3301        indx=ind(ia*3*b_i+1)-1
3302        indx=indx*b_i
3303        cr2=trig(1,indx)
3304        ct2=trig(2,indx)
3305        cr3=trig(1,2*indx)
3306        ct3=trig(2,2*indx)
3307        cr2=cr2/cr3
3308        cr3p=.5d0*cr3
3309        bb=ris*cr3*0.8660254037844387d0
3310        do ib=1,b_i
3311 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3312 !$OMP&SHARED(bb,b_i,cr2,cr3,cr3p,ct2,ct3,ia,ib,n1,n2,n2i,ntb,z)
3313          do i2=n2i,n2
3314            do j=1,n1
3315              r1=z(1,j,i2,ia*ntb+ib)
3316              s1=z(2,j,i2,ia*ntb+ib)
3317              r2=z(1,j,i2,ia*ntb+b_i+ib) - &
3318 &             z(2,j,i2,ia*ntb+b_i+ib)*ct2
3319              s2=z(1,j,i2,ia*ntb+b_i+ib)*ct2 + &
3320 &             z(2,j,i2,ia*ntb+b_i+ib)
3321              r3=z(1,j,i2,ia*ntb+2*b_i+ib) - &
3322 &             z(2,j,i2,ia*ntb+2*b_i+ib)*ct3
3323              s3=z(1,j,i2,ia*ntb+2*b_i+ib)*ct3 + &
3324 &             z(2,j,i2,ia*ntb+2*b_i+ib)
3325              r=cr2*r2 + r3
3326              s=cr2*s2 + s3
3327              z(1,j,i2,ia*ntb+ib) = r*cr3 + r1
3328              z(2,j,i2,ia*ntb+ib) = s*cr3 + s1
3329              r1=r1 - r*cr3p
3330              s1=s1 - s*cr3p
3331              r2=cr2*r2-r3
3332              s2=cr2*s2-s3
3333              z(1,j,i2,ia*ntb+b_i+ib) = r1 - s2*bb
3334              z(2,j,i2,ia*ntb+b_i+ib) = s1 + r2*bb
3335              z(1,j,i2,ia*ntb+2*b_i+ib) = r1 + s2*bb
3336              z(2,j,i2,ia*ntb+2*b_i+ib) = s1 - r2*bb
3337            end do ! j
3338          end do ! i2
3339 !$OMP END PARALLEL DO
3340        end do ! ib
3341 
3342      end do ! ia
3343 
3344 !    Treat radix 5
3345    else if (now(i)==5) then
3346      sin2=ris*0.9510565162951536d0
3347      ia=0
3348 
3349 !    First step of radix 5
3350      do ib=1,b_i
3351 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3352 !$OMP&SHARED(b_i,ia,ib,n1,n2,n2i,ntb,sin2,z)
3353        do i2=n2i,n2
3354          do j=1,n1
3355            r1=z(1,j,i2,ia*ntb+ib)
3356            s1=z(2,j,i2,ia*ntb+ib)
3357            r2=z(1,j,i2,ia*ntb+b_i+ib)
3358            s2=z(2,j,i2,ia*ntb+b_i+ib)
3359            r3=z(1,j,i2,ia*ntb+2*b_i+ib)
3360            s3=z(2,j,i2,ia*ntb+2*b_i+ib)
3361            r4=z(1,j,i2,ia*ntb+3*b_i+ib)
3362            s4=z(2,j,i2,ia*ntb+3*b_i+ib)
3363            r5=z(1,j,i2,ia*ntb+4*b_i+ib)
3364            s5=z(2,j,i2,ia*ntb+4*b_i+ib)
3365            r25 = r2 + r5
3366            r34 = r3 + r4
3367            s25 = s2 - s5
3368            s34 = s3 - s4
3369            z(1,j,i2,ia*ntb+ib) = r1 + r25 + r34
3370            r = r1 + cos2*r25 + cos4*r34
3371            s = s25 + sin42*s34
3372            z(1,j,i2,ia*ntb+b_i+ib) = r - sin2*s
3373            z(1,j,i2,ia*ntb+4*b_i+ib) = r + sin2*s
3374            r = r1 + cos4*r25 + cos2*r34
3375            s = sin42*s25 - s34
3376            z(1,j,i2,ia*ntb+2*b_i+ib) = r - sin2*s
3377            z(1,j,i2,ia*ntb+3*b_i+ib) = r + sin2*s
3378            r25 = r2 - r5
3379            r34 = r3 - r4
3380            s25 = s2 + s5
3381            s34 = s3 + s4
3382            z(2,j,i2,ia*ntb+ib) = s1 + s25 + s34
3383            r = s1 + cos2*s25 + cos4*s34
3384            s = r25 + sin42*r34
3385            z(2,j,i2,ia*ntb+b_i+ib) = r + sin2*s
3386            z(2,j,i2,ia*ntb+4*b_i+ib) = r - sin2*s
3387            r = s1 + cos4*s25 + cos2*s34
3388            s = sin42*r25 - r34
3389            z(2,j,i2,ia*ntb+2*b_i+ib) = r + sin2*s
3390            z(2,j,i2,ia*ntb+3*b_i+ib) = r - sin2*s
3391          end do ! j
3392        end do ! i2
3393 !$OMP END PARALLEL DO
3394      end do ! ib
3395 
3396 !    Second step of radix 5
3397      do ia=1,aft(i)-1
3398        indx=ind(ia*5*b_i+1)-1
3399        indx=indx*b_i
3400        cr2=trig(1,indx)
3401        ct2=trig(2,indx)
3402        cr3=trig(1,2*indx)
3403        ct3=trig(2,2*indx)
3404        cr4=trig(1,3*indx)
3405        ct4=trig(2,3*indx)
3406        cr5=trig(1,4*indx)
3407        ct5=trig(2,4*indx)
3408        do ib=1,b_i
3409 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3410 !$OMP&SHARED(b_i,cr2,cr3,cr4,cr5,ct2,ct3,ct4,ct5,ia,ib,n1,n2,n2i,ntb,sin2,z)
3411          do i2=n2i,n2
3412            do j=1,n1
3413              r1=z(1,j,i2,ia*ntb+ib)
3414              s1=z(2,j,i2,ia*ntb+ib)
3415              r2=cr2*(z(1,j,i2,ia*ntb+b_i+ib) - &
3416 &             z(2,j,i2,ia*ntb+b_i+ib)*ct2)
3417              s2=cr2*(z(1,j,i2,ia*ntb+b_i+ib)*ct2 + &
3418 &             z(2,j,i2,ia*ntb+b_i+ib))
3419              r3=cr3*(z(1,j,i2,ia*ntb+2*b_i+ib) - &
3420 &             z(2,j,i2,ia*ntb+2*b_i+ib)*ct3)
3421              s3=cr3*(z(1,j,i2,ia*ntb+2*b_i+ib)*ct3 + &
3422 &             z(2,j,i2,ia*ntb+2*b_i+ib))
3423              r4=z(1,j,i2,ia*ntb+3*b_i+ib) - &
3424 &             z(2,j,i2,ia*ntb+3*b_i+ib)*ct4
3425              s4=z(1,j,i2,ia*ntb+3*b_i+ib)*ct4 + &
3426 &             z(2,j,i2,ia*ntb+3*b_i+ib)
3427              r5=z(1,j,i2,ia*ntb+4*b_i+ib) - &
3428 &             z(2,j,i2,ia*ntb+4*b_i+ib)*ct5
3429              s5=z(1,j,i2,ia*ntb+4*b_i+ib)*ct5 + &
3430 &             z(2,j,i2,ia*ntb+4*b_i+ib)
3431              r25 = r2 + r5*cr5
3432              r34 = r3 + r4*cr4
3433              s25 = s2 - s5*cr5
3434              s34 = s3 - s4*cr4
3435              z(1,j,i2,ia*ntb+ib) = r1 + r25 + r34
3436              r = r1 + cos2*r25 + cos4*r34
3437              s = s25 + sin42*s34
3438              z(1,j,i2,ia*ntb+b_i+ib) = r - sin2*s
3439              z(1,j,i2,ia*ntb+4*b_i+ib) = r + sin2*s
3440              r = r1 + cos4*r25 + cos2*r34
3441              s = sin42*s25 - s34
3442              z(1,j,i2,ia*ntb+2*b_i+ib) = r - sin2*s
3443              z(1,j,i2,ia*ntb+3*b_i+ib) = r + sin2*s
3444              r25 = r2 - r5*cr5
3445              r34 = r3 - r4*cr4
3446              s25 = s2 + s5*cr5
3447              s34 = s3 + s4*cr4
3448              z(2,j,i2,ia*ntb+ib) = s1 + s25 + s34
3449              r = s1 + cos2*s25 + cos4*s34
3450              s = r25 + sin42*r34
3451              z(2,j,i2,ia*ntb+b_i+ib) = r + sin2*s
3452              z(2,j,i2,ia*ntb+4*b_i+ib) = r - sin2*s
3453              r = s1 + cos4*s25 + cos2*s34
3454              s = sin42*r25 - r34
3455              z(2,j,i2,ia*ntb+2*b_i+ib) = r + sin2*s
3456              z(2,j,i2,ia*ntb+3*b_i+ib) = r - sin2*s
3457            end do ! j
3458          end do ! i2
3459 !$OMP END PARALLEL DO
3460        end do ! ib
3461 
3462      end do ! ia
3463 
3464 !    All radices treated
3465    else
3466      MSG_BUG('called with factors other than 2, 3, and 5')
3467    end if
3468 
3469 !  End of direct transformation
3470  end do
3471 
3472 !------------------------------------------------------------
3473 !bitreversal  (zbr is for z"bit-reversed")
3474 
3475 !Treat radix 4
3476  if (now(ic)==4) then
3477    ia=0
3478 
3479 !  First step of radix 4
3480 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3481 !$OMP&SHARED(ia,ind,n1,n2,n2i,ntb,ris,z,zbr)
3482    do i2=n2i,n2
3483      do j=1,n1
3484        r4=z(1,j,i2,ia*4+4)
3485        s4=z(2,j,i2,ia*4+4)
3486        r3=z(1,j,i2,ia*4+3)
3487        s3=z(2,j,i2,ia*4+3)
3488        r2=z(1,j,i2,ia*4+2)
3489        s2=z(2,j,i2,ia*4+2)
3490        r1=z(1,j,i2,ia*4+1)
3491        s1=z(2,j,i2,ia*4+1)
3492 
3493        r=r1 + r3
3494        s=r2 + r4
3495        zbr(1,j,i2,ind(ia*4+1)) = r + s
3496        zbr(1,j,i2,ind(ia*4+3)) = r - s
3497        r=r1 - r3
3498        s=s2 - s4
3499        zbr(1,j,i2,ind(ia*4+2)) = r - s*ris
3500        zbr(1,j,i2,ind(ia*4+4)) = r + s*ris
3501        r=s1 + s3
3502        s=s2 + s4
3503        zbr(2,j,i2,ind(ia*4+1)) = r + s
3504        zbr(2,j,i2,ind(ia*4+3)) = r - s
3505        r=s1 - s3
3506        s=r2 - r4
3507        zbr(2,j,i2,ind(ia*4+2)) = r + s*ris
3508        zbr(2,j,i2,ind(ia*4+4)) = r - s*ris
3509      end do ! j
3510    end do ! i2
3511 !$OMP END PARALLEL DO
3512 
3513 !  Second step of radix 4
3514    do ia=1,aft(ic)-1
3515      indx=ind(ia*4+1)-1
3516      cr2=trig(1,indx)
3517      ct2=trig(2,indx)
3518      cr3=trig(1,2*indx)
3519      ct3=trig(2,2*indx)
3520      cr4=trig(1,3*indx)
3521      ct4=trig(2,3*indx)
3522      cr4=cr4/cr2
3523      cr2s=cr2*ris
3524 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3525 !$OMP&SHARED(ia,cr2,cr2s,cr3,cr4,ct2,ct3,ct4,ind,n1,n2,n2i,z,zbr)
3526      do i2=n2i,n2
3527        do j=1,n1
3528          r4=z(1,j,i2,ia*4+4) - z(2,j,i2,ia*4+4)*ct4
3529          s4=z(1,j,i2,ia*4+4)*ct4 + z(2,j,i2,ia*4+4)
3530          r3=z(1,j,i2,ia*4+3) - z(2,j,i2,ia*4+3)*ct3
3531          s3=z(1,j,i2,ia*4+3)*ct3 + z(2,j,i2,ia*4+3)
3532          r2=z(1,j,i2,ia*4+2) - z(2,j,i2,ia*4+2)*ct2
3533          s2=z(1,j,i2,ia*4+2)*ct2 + z(2,j,i2,ia*4+2)
3534          r1=z(1,j,i2,ia*4+1)
3535          s1=z(2,j,i2,ia*4+1)
3536 
3537          r=r1 + r3*cr3
3538          s=r2 + r4*cr4
3539          zbr(1,j,i2,ind(ia*4+1)) = r + s*cr2
3540          zbr(1,j,i2,ind(ia*4+3)) = r - s*cr2
3541          r=r1 - r3*cr3
3542          s=s2 - s4*cr4
3543          zbr(1,j,i2,ind(ia*4+2)) = r - s*cr2s
3544          zbr(1,j,i2,ind(ia*4+4)) = r + s*cr2s
3545          r=s1 + s3*cr3
3546          s=s2 + s4*cr4
3547          zbr(2,j,i2,ind(ia*4+1)) = r + s*cr2
3548          zbr(2,j,i2,ind(ia*4+3)) = r - s*cr2
3549          r=s1 - s3*cr3
3550          s=r2 - r4*cr4
3551          zbr(2,j,i2,ind(ia*4+2)) = r + s*cr2s
3552          zbr(2,j,i2,ind(ia*4+4)) = r - s*cr2s
3553        end do ! j
3554      end do ! i2
3555 !$OMP END PARALLEL DO
3556 
3557    end do ! ia
3558 
3559 !  Treat radix 2
3560  else if (now(ic)==2) then
3561    ia=0
3562 
3563 !  First step of radix 2
3564 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3565 !$OMP&SHARED(ia,ind,n1,n2,n2i,z,zbr)
3566    do i2=n2i,n2
3567      do j=1,n1
3568        r1=z(1,j,i2,ia*2+1)
3569        s1=z(2,j,i2,ia*2+1)
3570        r2=z(1,j,i2,ia*2+2)
3571        s2=z(2,j,i2,ia*2+2)
3572        zbr(1,j,i2,ind(ia*2+1)) =  r2 + r1
3573        zbr(2,j,i2,ind(ia*2+1)) =  s2 + s1
3574        zbr(1,j,i2,ind(ia*2+2)) = -r2 + r1
3575        zbr(2,j,i2,ind(ia*2+2)) = -s2 + s1
3576      end do ! j
3577    end do ! i2
3578 !$OMP END PARALLEL DO
3579 
3580 !  Second step of radix 2
3581    do ia=1,aft(ic)-1
3582      indx=ind(ia*2+1)-1
3583      cr2=trig(1,indx)
3584      ct2=trig(2,indx)
3585 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3586 !$OMP&SHARED(cr2,ct2,ia,ind,n1,n2,n2i,z,zbr)
3587      do i2=n2i,n2
3588        do j=1,n1
3589          r1=z(1,j,i2,ia*2+1)
3590          s1=z(2,j,i2,ia*2+1)
3591          r2=z(1,j,i2,ia*2+2) - z(2,j,i2,ia*2+2)*ct2
3592          s2=z(1,j,i2,ia*2+2)*ct2 + z(2,j,i2,ia*2+2)
3593          zbr(1,j,i2,ind(ia*2+1)) =  r2*cr2 + r1
3594          zbr(2,j,i2,ind(ia*2+1)) =  s2*cr2 + s1
3595          zbr(1,j,i2,ind(ia*2+2)) = -r2*cr2 + r1
3596          zbr(2,j,i2,ind(ia*2+2)) = -s2*cr2 + s1
3597        end do ! j
3598      end do ! i2
3599 !$OMP END PARALLEL DO
3600    end do ! ia
3601 
3602 !  Treat radix 3
3603  else if (now(ic)==3) then
3604 !  .5d0*sqrt(3.d0)=0.8660254037844387d0
3605    ia=0
3606    bb=ris*0.8660254037844387d0
3607 
3608 !  First step of radix 3
3609 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3610 !$OMP&SHARED(bb,ia,ind,n1,n2,n2i,z,zbr)
3611    do i2=n2i,n2
3612      do j=1,n1
3613        r1=z(1,j,i2,ia*3+1)
3614        s1=z(2,j,i2,ia*3+1)
3615        r2=z(1,j,i2,ia*3+2)
3616        s2=z(2,j,i2,ia*3+2)
3617        r3=z(1,j,i2,ia*3+3)
3618        s3=z(2,j,i2,ia*3+3)
3619        r=r2 + r3
3620        s=s2 + s3
3621        zbr(1,j,i2,ind(ia*3+1)) = r + r1
3622        zbr(2,j,i2,ind(ia*3+1)) = s + s1
3623        r1=r1 - r*.5d0
3624        s1=s1 - s*.5d0
3625        r2=r2-r3
3626        s2=s2-s3
3627        zbr(1,j,i2,ind(ia*3+2)) = r1 - s2*bb
3628        zbr(2,j,i2,ind(ia*3+2)) = s1 + r2*bb
3629        zbr(1,j,i2,ind(ia*3+3)) = r1 + s2*bb
3630        zbr(2,j,i2,ind(ia*3+3)) = s1 - r2*bb
3631      end do ! j
3632    end do ! i2
3633 !$OMP END PARALLEL DO
3634 
3635 !  Second step of radix 3
3636    do ia=1,aft(ic)-1
3637      indx=ind(ia*3+1)-1
3638      cr2=trig(1,indx)
3639      ct2=trig(2,indx)
3640      cr3=trig(1,2*indx)
3641      ct3=trig(2,2*indx)
3642      cr2=cr2/cr3
3643      cr3p=.5d0*cr3
3644      bb=ris*cr3*0.8660254037844387d0
3645 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3646 !$OMP&SHARED(bb,cr2,cr3,cr3p,ct2,ct3,ia,ind,n1,n2,n2i,z,zbr)
3647      do i2=n2i,n2
3648        do j=1,n1
3649          r1=z(1,j,i2,ia*3+1)
3650          s1=z(2,j,i2,ia*3+1)
3651          r2=z(1,j,i2,ia*3+2) - z(2,j,i2,ia*3+2)*ct2
3652          s2=z(1,j,i2,ia*3+2)*ct2 + z(2,j,i2,ia*3+2)
3653          r3=z(1,j,i2,ia*3+3) - z(2,j,i2,ia*3+3)*ct3
3654          s3=z(1,j,i2,ia*3+3)*ct3 + z(2,j,i2,ia*3+3)
3655          r=cr2*r2 + r3
3656          s=cr2*s2 + s3
3657          zbr(1,j,i2,ind(ia*3+1)) = r*cr3 + r1
3658          zbr(2,j,i2,ind(ia*3+1)) = s*cr3 + s1
3659          r1=r1 - r*cr3p
3660          s1=s1 - s*cr3p
3661          r2=cr2*r2-r3
3662          s2=cr2*s2-s3
3663          zbr(1,j,i2,ind(ia*3+2)) = r1 - s2*bb
3664          zbr(2,j,i2,ind(ia*3+2)) = s1 + r2*bb
3665          zbr(1,j,i2,ind(ia*3+3)) = r1 + s2*bb
3666          zbr(2,j,i2,ind(ia*3+3)) = s1 - r2*bb
3667        end do ! j
3668      end do ! i2
3669 !$OMP END PARALLEL DO
3670    end do ! ia
3671 
3672 !  Treat radix 5
3673  else if (now(ic)==5) then
3674 !  sin(2.d0*pi/5.d0)
3675    sin2=ris*0.9510565162951536d0
3676    ia=0
3677 
3678 !  First step of radix 5
3679 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3680 !$OMP&SHARED(ia,ind,n1,n2,n2i,sin2,z,zbr)
3681    do i2=n2i,n2
3682      do j=1,n1
3683        r1=z(1,j,i2,ia*5+1)
3684        s1=z(2,j,i2,ia*5+1)
3685        r2=z(1,j,i2,ia*5+2)
3686        s2=z(2,j,i2,ia*5+2)
3687        r3=z(1,j,i2,ia*5+3)
3688        s3=z(2,j,i2,ia*5+3)
3689        r4=z(1,j,i2,ia*5+4)
3690        s4=z(2,j,i2,ia*5+4)
3691        r5=z(1,j,i2,ia*5+5)
3692        s5=z(2,j,i2,ia*5+5)
3693        r25 = r2 + r5
3694        r34 = r3 + r4
3695        s25 = s2 - s5
3696        s34 = s3 - s4
3697        zbr(1,j,i2,ind(ia*5+1)) = r1 + r25 + r34
3698        r = r1 + cos2*r25 + cos4*r34
3699        s = s25 + sin42*s34
3700        zbr(1,j,i2,ind(ia*5+2)) = r - sin2*s
3701        zbr(1,j,i2,ind(ia*5+5)) = r + sin2*s
3702        r = r1 + cos4*r25 + cos2*r34
3703        s = sin42*s25 - s34
3704        zbr(1,j,i2,ind(ia*5+3)) = r - sin2*s
3705        zbr(1,j,i2,ind(ia*5+4)) = r + sin2*s
3706        r25 = r2 - r5
3707        r34 = r3 - r4
3708        s25 = s2 + s5
3709        s34 = s3 + s4
3710        zbr(2,j,i2,ind(ia*5+1)) = s1 + s25 + s34
3711        r = s1 + cos2*s25 + cos4*s34
3712        s = r25 + sin42*r34
3713        zbr(2,j,i2,ind(ia*5+2)) = r + sin2*s
3714        zbr(2,j,i2,ind(ia*5+5)) = r - sin2*s
3715        r = s1 + cos4*s25 + cos2*s34
3716        s = sin42*r25 - r34
3717        zbr(2,j,i2,ind(ia*5+3)) = r + sin2*s
3718        zbr(2,j,i2,ind(ia*5+4)) = r - sin2*s
3719      end do ! j
3720    end do ! i2
3721 !$OMP END PARALLEL DO
3722 
3723 !  Second step of radix 5
3724    do ia=1,aft(ic)-1
3725      indx=ind(ia*5+1)-1
3726      cr2=trig(1,indx)
3727      ct2=trig(2,indx)
3728      cr3=trig(1,2*indx)
3729      ct3=trig(2,2*indx)
3730      cr4=trig(1,3*indx)
3731      ct4=trig(2,3*indx)
3732      cr5=trig(1,4*indx)
3733      ct5=trig(2,4*indx)
3734 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3735 !$OMP&SHARED(cr2,cr3,cr4,cr5,ct2,ct3,ct4,ct5,ia,ind,n1,n2,n2i,sin2,z,zbr)
3736      do i2=n2i,n2
3737        do j=1,n1
3738          r1=z(1,j,i2,ia*5+1)
3739          s1=z(2,j,i2,ia*5+1)
3740          r2=cr2*(z(1,j,i2,ia*5+2) - z(2,j,i2,ia*5+2)*ct2)
3741          s2=cr2*(z(1,j,i2,ia*5+2)*ct2 + z(2,j,i2,ia*5+2))
3742          r3=cr3*(z(1,j,i2,ia*5+3) - z(2,j,i2,ia*5+3)*ct3)
3743          s3=cr3*(z(1,j,i2,ia*5+3)*ct3 + z(2,j,i2,ia*5+3))
3744          r4=z(1,j,i2,ia*5+4) - z(2,j,i2,ia*5+4)*ct4
3745          s4=z(1,j,i2,ia*5+4)*ct4 + z(2,j,i2,ia*5+4)
3746          r5=z(1,j,i2,ia*5+5) - z(2,j,i2,ia*5+5)*ct5
3747          s5=z(1,j,i2,ia*5+5)*ct5 + z(2,j,i2,ia*5+5)
3748          r25 = r2 + r5*cr5
3749          r34 = r3 + r4*cr4
3750          s25 = s2 - s5*cr5
3751          s34 = s3 - s4*cr4
3752          zbr(1,j,i2,ind(ia*5+1)) = r1 + r25 + r34
3753          r = r1 + cos2*r25 + cos4*r34
3754          s = s25 + sin42*s34
3755          zbr(1,j,i2,ind(ia*5+2)) = r - sin2*s
3756          zbr(1,j,i2,ind(ia*5+5)) = r + sin2*s
3757          r = r1 + cos4*r25 + cos2*r34
3758          s = sin42*s25 - s34
3759          zbr(1,j,i2,ind(ia*5+3)) = r - sin2*s
3760          zbr(1,j,i2,ind(ia*5+4)) = r + sin2*s
3761          r25 = r2 - r5*cr5
3762          r34 = r3 - r4*cr4
3763          s25 = s2 + s5*cr5
3764          s34 = s3 + s4*cr4
3765          zbr(2,j,i2,ind(ia*5+1)) = s1 + s25 + s34
3766          r = s1 + cos2*s25 + cos4*s34
3767          s = r25 + sin42*r34
3768          zbr(2,j,i2,ind(ia*5+2)) = r + sin2*s
3769          zbr(2,j,i2,ind(ia*5+5)) = r - sin2*s
3770          r = s1 + cos4*s25 + cos2*s34
3771          s = sin42*r25 - r34
3772          zbr(2,j,i2,ind(ia*5+3)) = r + sin2*s
3773          zbr(2,j,i2,ind(ia*5+4)) = r - sin2*s
3774        end do ! j
3775      end do ! i2
3776 !$OMP END PARALLEL DO
3777    end do ! ia
3778 
3779  else !  All radices treated
3780    MSG_BUG('called with factors other than 2, 3, and 5')
3781  end if
3782 
3783 end subroutine sg_fftz

m_sgfft/sg_poisson [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_poisson

FUNCTION

  Solve the Poisson equation in G-space given the density, n(r),
  in real space of the FFT box.

INPUTS

 fftcache=size of the cache (kB)
 cplex=1 if fofr is real, 2 if fofr is complex
 nx,ny,nz=Number of FFT points along the three directions.
 ldx,ldy,ldz=Leading dimension of the array nr and vg.
 ndat = Number of densities
 vg(nx*ny*nz)=Potential in reciprocal space.

SIDE EFFECTS

 nr(cplex*ldx*ldy*ldz*ndat)
    input: n(r) (real or complex)
    output: the hartree potential in real space

NOTES

   vg is given on the FFT mesh instead of the augmented mesh [ldx,ldy,ldz]
   in order to simplify the interface with the other routines operating of vg

PARENTS

      m_fft

CHILDREN

      sg_fft_cc

SOURCE

6114 subroutine sg_poisson(fftcache,cplex,nx,ny,nz,ldx,ldy,ldz,ndat,vg,nr)
6115 
6116 
6117 !This section has been created automatically by the script Abilint (TD).
6118 !Do not modify the following lines by hand.
6119 #undef ABI_FUNC
6120 #define ABI_FUNC 'sg_poisson'
6121 !End of the abilint section
6122 
6123  implicit none
6124 
6125 !Arguments ------------------------------------
6126 !scalars
6127  integer,intent(in) :: fftcache,cplex,nx,ny,nz,ldx,ldy,ldz,ndat
6128 !arrays
6129  real(dp),intent(inout) :: nr(cplex*ldx*ldy*ldz*ndat)
6130  real(dp),intent(in) :: vg(nx*ny*nz)
6131 
6132 !Local variables-------------------------------
6133  integer,parameter :: ndat1=1
6134  integer :: ii,jj,kk,ifft,dat,ptr,ig
6135  real(dp) :: fft_fact
6136 !arrays
6137  real(dp),allocatable :: work(:,:)
6138 
6139 ! *************************************************************************
6140 
6141  fft_fact = one/(nx*ny*nz)
6142 
6143  ABI_CHECK(cplex==2,"cplex!=2 not coded")
6144 
6145  ABI_MALLOC(work, (2,ldx*ldy*ldz))
6146 
6147  do dat=1,ndat
6148    ! n(r) --> n(G)
6149    ptr = 1 + (dat-1)*cplex*ldx*ldy*ldz
6150    call sg_fft_cc(fftcache,nx,ny,nz,ldx,ldy,ldz,ndat1,-1,nr(ptr),work)
6151 
6152    ! Multiply by v(G)
6153    ig = 0
6154    do kk=1,nz
6155      do jj=1,ny
6156        do ii=1,nx
6157          ig = ig + 1
6158          ifft = ii + (jj-1)*ldx + (kk-1)*ldx*ldy
6159          work(1:2,ifft) = work(1:2,ifft) * vg(ig) * fft_fact
6160       end do
6161      end do
6162    end do
6163 
6164    ! compute vh(r)
6165    call sg_fft_cc(fftcache,nx,ny,nz,ldx,ldy,ldz,ndat1,+1,work,nr(ptr))
6166  end do
6167 
6168  ABI_FREE(work)
6169 
6170 end subroutine sg_poisson