TABLE OF CONTENTS


ABINIT/m_sgfft [ Modules ]

[ Top ] [ Modules ]

NAME

  m_sgfft

FUNCTION

  This module provides low-level interfaces to Goedecker's FFT library.

COPYRIGHT

 Copyright by Stefan Goedecker, Ithaca, NY USA, July 14, 1993
 Copyright (C) 1998-2022 ABINIT group (DCA, XG)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

17 #if defined HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include "abi_common.h"
22 
23 MODULE m_sgfft
24 
25  use defs_basis
26  use m_abicore
27  use m_errors
28  use m_fftcore
29 
30  use m_fstrings,    only : sjoin, itoa
31  use defs_fftdata,  only : mg
32 
33  implicit none
34 
35  private
36 
37 ! Public API.
38  public :: sg_fft_cc      ! Complex-Complex version (full box)
39  public :: sg_fft_rc      ! Real-Complex version (full box)
40  public :: sg_fftpad      ! Zero-padding version of "fft".
41  public :: sg_fftrisc     ! Fourier transforms of wavefunctions
42  public :: sg_fftrisc_2
43  public :: sg_poisson     ! Solve the poisson equation in G-space starting from n(r).
44 
45 CONTAINS  !====================================================================

m_sgfft/fft_cc_one_nothreadsafe [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 fft_cc_one_nothreadsafe

FUNCTION

 Calculates the discrete Fourier transform:

   ftarr(i1,i2,i3)=exp(ris*i*2*pi*(j1*i1/n1+j2*i2/n2+j3*i3/n3)) arr(j1,j2,j3)

INPUTS

  fftcache=size of the cache (kB)
  nd1,nd2,nd3=memory dimension of arr and ftarr
  n1,n2,n3=physical dimension of the transform
  arr(2,nd1,nd2,nd3)=input complex array with alternating real and imaginary
  elements; data resides in 2*n1*n2*n3 of this array, spread out.
  (see SIDE FFECTS).
  ris=(real(dp)) sign of exponential in transform

OUTPUT

  ftarr(2,nd1,nd2,nd3)=working space for transform and contains output

SIDE EFFECTS

  arr(2,nd1,nd2,nd3) is modified by sg_fftx,sg_ffty,sg_fftz.

NOTES

  ndi must always be greater or equal to ni.  Recommended choice for nd1
  and nd2 is: ni for ni=odd or ni+1 for ni=even (hence 2*(ni/2)+1);
  nd3 should always be n3.  Note that choosing nd1 or nd2 larger than
  the recommended value can severely degrade efficiency of this routine.
  Avoiding even ndi for nd1 and nd2 avoids cache conflicts on cache machines.
  Each of n1,n2,n3 must be a
  product of the prime factors 2,3,5. If two ni s are equal
  it is recommended to place them behind each other.
  The largest any of these may be is set by parameter "mg" below.
  This fft is particularly efficient for cache architectures.
  Note that the meaning of fftcache has changed from the original
  ncache of SG (that was the maximum number of COMPLEX*16 in the cache)

SOURCE

155 subroutine fft_cc_one_nothreadsafe(fftcache,nd1,nd2,nd3,n1,n2,n3,arr,ftarr,ris)
156 
157 !Arguments ------------------------------------
158 !scalars
159  integer,intent(in) :: fftcache,n1,n2,n3,nd1,nd2,nd3
160  real(dp),intent(in) :: ris
161 !arrays
162  real(dp),intent(inout) :: arr(2,nd1,nd2,nd3)
163  real(dp),intent(inout) :: ftarr(2,nd1,nd2,nd3) !vz_i
164 
165 !Local variables-------------------------------
166 !mfac sets maximum number of factors (5, 4, 3, or 2) which may be
167 !contained within any n1, n2, or n3
168 !mg sets the maximum 1 dimensional fft length (any one of n1, n2, or n3)
169 !scalars
170  integer,parameter :: mfac=11
171  integer :: i2,ic,n1i,n3i
172  character(len=500) :: message
173 !arrays
174  integer :: aft(mfac),bef(mfac),ind(mg),now(mfac)
175  real(dp) :: trig(2,mg)
176 
177 ! *************************************************************************
178 
179 !Check that dimension is not exceeded
180  if (n1>mg.or.n2>mg.or.n3>mg) then
181    write(message, '(a,3i10,a,i10,a)' )&
182 &   'one of the dimensions n1,n2,n3=',n1,n2,n3,&
183 &   'exceeds allowed dimension mg=',mg,ch10
184    ABI_BUG(message)
185  end if
186 
187 !transform along x direction
188  call sg_ctrig(n1,trig,aft,bef,now,ris,ic,ind,mfac,mg)
189  call sg_fftx(fftcache,mfac,mg,nd1,nd2,nd3,n2,n3,&
190 & arr,ftarr,trig,aft,now,bef,ris,ind,ic)
191 
192  ! This to handle 1d FFTs
193  if (n2 == 1 .and. n3 == 1) then
194    !print *, "Returning as n2, n3:", n2, n3
195    return
196  end if
197 
198 !transform along y direction
199  if (n2/=n1)then
200    call sg_ctrig(n2,trig,aft,bef,now,ris,ic,ind,mfac,mg)
201  end if
202  n1i=1 ; n3i=1
203  call sg_ffty(fftcache,mfac,mg,nd1,nd2,nd3,n1i,n1,n3i,n3,&
204 & ftarr,arr,trig,aft,now,bef,ris,ind,ic)
205 
206 !transform along z direction
207  if (n3/=n2)then
208    call sg_ctrig(n3,trig,aft,bef,now,ris,ic,ind,mfac,mg)
209  end if
210 
211 !$OMP PARALLEL DO SHARED(aft,arr,bef,ftarr,ind,ic)&
212 !$OMP SHARED(nd1,nd2,nd3,now,n1,n2,ris,trig)&
213 !$OMP PRIVATE(i2)
214  do i2=1,n2
215    call sg_fftz(mfac,mg,nd1,nd2,nd3,n1,i2,i2,arr,ftarr,&
216 &   trig,aft,now,bef,ris,ind,ic)
217  end do
218 !$OMP END PARALLEL DO
219 
220 end subroutine fft_cc_one_nothreadsafe

m_sgfft/fftpad_one_nothreadsafe [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 fftpad_one_nothreadsafe

FUNCTION

 Fast Fourier transform. This is the zero-padding version of "fft" for a single array.
 This version is not thread-safe.

INPUTS

  fftcache=size of the cache (kB)
  mgfft=maximum size of 1D FFTs
  nd1,nd2,nd3=memory dimension of arr and ftarr
  n1,n2,n3=physical dimension of the transform
  arr(2,nd1,nd2,nd3)=input complex array with alternating real and imaginary
    elements; data resides in 2*n1*n2*n3 of this array, spread out.
  ris=(real(dp)) sign of exponential in transform
  gbound(2*mgfft+8,2)=sphere boundary info

OUTPUT

  ftarr(2,nd1,nd2,nd3)=working space for transform and contains output

SIDE EFFECTS

  arr(2,nd1,nd2,nd3) is modified by sg_fftpx,sg_ffty,sg_fftz.

NOTES

  mfac sets maximum number of factors (5, 4, 3, or 2) which may be
  contained within any n1, n2, or n3
  mg sets the maximum 1 dimensional fft length (any one of n1, n2, or n3)
  XG: the signification of mg is changed with respect to fft3dp !!!

SOURCE

651 subroutine fftpad_one_nothreadsafe(fftcache,mgfft,nd1,nd2,nd3,n1,n2,n3,arr,ftarr,ris,gbound)
652 
653 !Arguments ------------------------------------
654 !scalars
655  integer,intent(in) :: fftcache,mgfft,n1,n2,n3,nd1,nd2,nd3
656  real(dp),intent(in) :: ris
657 !arrays
658  integer,intent(in) :: gbound(2*mgfft+8,2)
659  real(dp),intent(inout) :: arr(2,nd1,nd2,nd3)
660  real(dp),intent(out) :: ftarr(2,nd1,nd2,nd3)
661 
662 !Local variables-------------------------------
663 !scalars
664  integer,parameter :: mfac=11
665  integer :: g3max,g3min,i2,ic,n1i,n3i,n3p
666 #ifdef DEBUG_MODE
667  character(len=500) :: message
668 #endif
669 !arrays
670  integer :: aft(mfac),bef(mfac),ind(mg),now(mfac)
671  real(dp) :: trig(2,mg)
672 
673 ! *************************************************************************
674 
675 #ifdef DEBUG_MODE
676 !Check that dimension is not exceeded
677  if (n1>mg.or.n2>mg.or.n3>mg) then
678    write(message, '(a,3i10,a,i10)')&
679 &   'one of the dimensions n1,n2,n3=',n1,n2,n3,' exceeds the allowed dimension mg=',mg
680    ABI_BUG(message)
681  end if
682 #endif
683 
684  g3min=gbound(3,2)
685  g3max=gbound(4,2)
686 
687 !--------------------------------------------------------------------------
688 
689  if (abs(ris-one)<tol12) then
690 
691 !  Handle G -> r  transform (G sphere to fft box)
692 
693 !  Transform along x direction
694    call sg_ctrig(n1,trig,aft,bef,now,ris,ic,ind,mfac,mg)
695 
696 !  Zero out the untransformed (0) data part of the work array
697 !  -- at every (y,z) there are 0 s to be added to the ends of
698 !  the x data so have to zero whole thing.
699    ftarr(:,:,:,:)=0.0d0
700 
701 !  Note the passing of the relevant part of gbound
702    call sg_fftpx(fftcache,mfac,mg,mgfft,nd1,nd2,nd3,n2,n3,&
703 &   arr,ftarr,trig,aft,now,bef,ris,ind,ic,gbound(3,2))
704 
705 !  Transform along y direction in two regions of z
706    if (n2/=n1)then
707      call sg_ctrig(n2,trig,aft,bef,now,ris,ic,ind,mfac,mg)
708    end if
709 
710 !  First y transform: z=1..g3max+1
711    n3p=g3max+1
712    n1i=1 ; n3i=1
713    call sg_ffty(fftcache,mfac,mg,nd1,nd2,nd3,n1i,n1,n3i,n3p,ftarr,arr,&
714 &   trig,aft,now,bef,ris,ind,ic)
715 
716 !  Zero out the untransformed (0) data part of the work array
717 !  -- only need to zero specified ranges of z
718    arr(:,:,:,n3p+1:g3min+n3)=0.0d0
719 
720 !  Second y transform: z=g3min+1..0 (wrapped around)
721    n3p=-g3min
722    if (n3p>0) then
723      n3i=1+g3min+n3 ; n1i=1
724      call sg_ffty(fftcache,mfac,mg,nd1,nd2,nd3,n1i,n1,n3i,n3,ftarr,arr,&
725 &     trig,aft,now,bef,ris,ind,ic)
726    end if
727 
728 !  Transform along z direction
729    if (n3/=n2) then
730      call sg_ctrig(n3,trig,aft,bef,now,ris,ic,ind,mfac,mg)
731    end if
732 
733 !$OMP PARALLEL DO
734    do i2=1,n2
735      call sg_fftz(mfac,mg,nd1,nd2,nd3,n1,i2,i2,arr,ftarr,&
736 &     trig,aft,now,bef,ris,ind,ic)
737    end do
738 
739  else
740 
741 !  *************************************************
742 !  Handle r -> G transform (from fft box to G sphere)
743 
744 !  Transform along z direction
745    call sg_ctrig(n3,trig,aft,bef,now,ris,ic,ind,mfac,mg)
746 
747 !$OMP PARALLEL DO
748    do i2=1,n2
749      call sg_fftz(mfac,mg,nd1,nd2,nd3,n1,i2,i2,arr,ftarr,&
750 &     trig,aft,now,bef,ris,ind,ic)
751    end do
752 
753 !  Transform along y direction in two regions of z
754    if (n2/=n3) then
755      call sg_ctrig(n2,trig,aft,bef,now,ris,ic,ind,mfac,mg)
756    end if
757 
758 !  First y transform: z=1..g3max+1
759    n3p=g3max+1
760    n1i=1 ; n3i=1
761    call sg_ffty(fftcache,mfac,mg,nd1,nd2,nd3,n1i,n1,n3i,n3p,ftarr,arr,&
762 &   trig,aft,now,bef,ris,ind,ic)
763 
764 !  Second y transform: z=g3min+1..0 (wrapped around)
765    n3p=-g3min
766    if (n3p>0) then
767      n1i=1 ; n3i=1+g3min+n3
768      call sg_ffty(fftcache,mfac,mg,nd1,nd2,nd3,n1i,n1,n3i,n3,ftarr,arr,&
769 &     trig,aft,now,bef,ris,ind,ic)
770    end if
771 
772 !  Transform along x direction
773    if (n1/=n2) then
774      call sg_ctrig(n1,trig,aft,bef,now,ris,ic,ind,mfac,mg)
775    end if
776 
777 !  Zero out the untransformed (0) data part of the work array
778 !  -- at every (y,z) there are 0 s to be added to the ends of
779 !  the x data so have to zero whole thing.
780    ftarr(:,:,:,:)=0.0d0
781 
782 !  Note the passing of the relevant part of gbound
783    call sg_fftpx(fftcache,mfac,mg,mgfft,nd1,nd2,nd3,n2,n3,&
784 &   arr,ftarr,trig,aft,now,bef,ris,ind,ic,gbound(3,2))
785 
786 !  Data is now ready to be extracted from fft box to sphere
787  end if
788 
789 end subroutine fftpad_one_nothreadsafe

m_sgfft/fftrisc_one_nothreadsafe [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 fftrisc_one_nothreadsafe

FUNCTION

 Carry out Fourier transforms between real and reciprocal (G) space,
 for wavefunctions, contained in a sphere in reciprocal space,
 in both directions. Also accomplish some post-processing.

NOTES

 Specifically uses rather sophisticated algorithms, based on S Goedecker
 routines, specialized for superscalar RISC architecture.
 Zero padding : saves 7/12 execution time
 Bi-dimensional data locality in most of the routine : cache reuse
 For k-point (0 0 0) : takes advantage of symmetry of data.
 Note however that no blocking is used, in both 1D z-transform
 or subsequent 2D transform. This should be improved.

 * This routine is not thread-safe due to the presence of variables with the save attribute!
   DO NOT CALL THIS ROUTINE INSIDE A OPENMP PARALLEL REGION

INPUTS

  cplex= if 1 , denpot is real, if 2 , denpot is complex
     (cplex=2 only allowed for option=2 when istwf_k=1)
     one can also use cplex=0 if option=0 or option=3
  fofgin(2,npwin)=holds input wavefunction in G vector basis sphere.
  gboundin(2*mgfft+8,2)=sphere boundary info for reciprocal to real space
  gboundout(2*mgfft+8,2)=sphere boundary info for real to reciprocal space
  istwf_k=option parameter that describes the storage of wfs
  kg_kin(3,npwin)=reduced planewave coordinates, input
  kg_kout(3,npwout)=reduced planewave coordinates, output
  mgfft=maximum size of 1D FFTs
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  npwin=number of elements in fofgin array (for option 0, 1 and 2)
  npwout=number of elements in fofgout array (for option 2 and 3)
  n4,n5,n6=ngfft(4),ngfft(5),ngfft(6), dimensions of fofr.
  option= if 0: do direct FFT
          if 1: do direct FFT, then sum the density
          if 2: do direct FFT, multiply by the potential, then do reverse FFT
          if 3: do reverse FFT only
  weight=weight to be used for the accumulation of the density in real space
          (needed only when option=1)

OUTPUT

  (see side effects)

OPTIONS

  The different options are:
  - reciprocal to real space and output the result (when option=0),
  - reciprocal to real space and accumulate the density (when option=1) or
  - reciprocal to real space, apply the local potential to the wavefunction
    in real space and produce the result in reciprocal space (when option=2)
  - real space to reciprocal space (when option=3).
  option=0 IS NOT ALLOWED when istwf_k>2
  option=3 IS NOT ALLOWED when istwf_k>=2

SIDE EFFECTS

  for option==0, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 fofr(2,n4,n5,n6) contains the Fourier Transform of fofgin;
                 no use of denpot, fofgout and npwout.
  for option==1, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 denpot(cplex*n4,n5,n6) contains the input density at input,
                 and the updated density at output;
                 no use of fofgout and npwout.
  for option==2, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 denpot(cplex*n4,n5,n6) contains the input local potential;
                 fofgout(2,npwout) contains the output function;
  for option==3, fofr(2,n4,n5,n6) contains the real space wavefunction;
                 fofgout(2,npwout) contains its Fourier transform;
                 no use of fofgin and npwin.

SOURCE

3987 subroutine fftrisc_one_nothreadsafe(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,&
3988 & kg_kin,kg_kout,mgfft,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_i)
3989 
3990 !Arguments ------------------------------------
3991 !scalars
3992  integer,intent(in) :: cplex,istwf_k,mgfft,n4,n5,n6,npwin,npwout,option
3993  real(dp),intent(in) :: weight_i,weight_r
3994 !arrays
3995  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2)
3996  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18)
3997  real(dp),intent(in) :: fofgin(2,npwin)
3998  real(dp),intent(inout) :: denpot(cplex*n4,n5,n6),fofr(2,n4,n5,n6)
3999  real(dp),intent(out) :: fofgout(2,npwout)
4000 
4001 !Local variables-------------------------------
4002 !scalars
4003  integer,parameter :: mfac=11
4004  integer,save :: ic1,ic2,ic3,ic4,ic5,ic6,n1_save=0,n2_save=0,n3_save=0
4005  integer :: fftcache,g2max,g2min,i1,i1max,i2,i3,i3inv,ig,igb
4006  integer :: igb_inv,igbmax,ii2,lot,lotin,lotout,mgb,n1
4007  integer :: n1half1,n1halfm,n1i,n2,n2half1,n3,n4half1,n5half1,nfftot,ngbin
4008  integer :: ngbout,nlot,nproc_omp
4009  real(dp) :: ai,ar,fraction,norm,phai,phar,wkim,wkre
4010  character(len=500) :: message
4011 !arrays
4012  integer,save :: aft1(mfac),aft2(mfac),aft3(mfac),aft4(mfac),aft5(mfac)
4013  integer,save :: aft6(mfac),bef1(mfac),bef2(mfac),bef3(mfac),bef4(mfac)
4014  integer,save :: bef5(mfac),bef6(mfac),ind1(mg),ind2(mg),ind3(mg),ind4(mg)
4015  integer,save :: ind5(mg),ind6(mg),now1(mfac),now2(mfac),now3(mfac),now4(mfac)
4016  integer,save :: now5(mfac),now6(mfac)
4017  integer :: gbound_dum(4)
4018  integer,allocatable :: indpw_kin(:,:),indpw_kout(:,:)
4019  real(dp),save :: trig1(2,mg),trig2(2,mg),trig3(2,mg),trig4(2,mg),trig5(2,mg)
4020  real(dp),save :: trig6(2,mg)
4021  real(dp),allocatable :: pha1(:,:),pha2(:,:),pha3(:,:),wk1d_a(:,:,:,:)
4022  real(dp),allocatable :: wk1d_b(:,:,:,:),wk2d_a(:,:,:,:),wk2d_b(:,:,:,:)
4023  real(dp),allocatable :: wk2d_c(:,:,:,:),wk2d_d(:,:,:,:)
4024 #if defined HAVE_OPENMP
4025  integer,external :: OMP_GET_NUM_THREADS
4026 #endif
4027 
4028 ! *************************************************************************
4029 
4030  if(istwf_k>2 .and. option==0)then
4031    write(message,'(a,i0)')' option=0 is not allowed with istwf_k=',istwf_k
4032    ABI_BUG(message)
4033  end if
4034 
4035  if(istwf_k>=2 .and. option==3)then
4036    write(message,'(a,i0)')' option=3 is not allowed with istwf_k=',istwf_k
4037    ABI_BUG(message)
4038  end if
4039 
4040 !For all other tests of validity of inputs, assume that they
4041 !have been done in the calling routine
4042 
4043  n1=ngfft(1) ; n2=ngfft(2) ; n3=ngfft(3) ; nfftot=n1*n2*n3
4044  fftcache=ngfft(8)
4045 
4046  if(option/=3)then
4047    ABI_MALLOC(indpw_kin,(4,npwin))
4048    call indfftrisc(gboundin(3:3+2*mgfft+4,1),indpw_kin,kg_kin,mgfft,ngbin,ngfft,npwin)
4049  end if
4050  if(option==2 .or. option==3)then
4051    ABI_MALLOC(indpw_kout,(4,npwout))
4052    call indfftrisc(gboundout(3:3+2*mgfft+4,1),indpw_kout,kg_kout,mgfft,ngbout,ngfft,npwout)
4053  end if
4054 
4055 !Define the dimension of the first work arrays, for 1D transforms along z ,
4056 !taking into account the need to avoid the cache trashing
4057  if(option==2)then
4058    mgb=max(ngbin,ngbout)
4059  else if(option==0 .or. option==1)then
4060    mgb=ngbin ; ngbout=1
4061  else if(option==3)then
4062    mgb=ngbout ; ngbin=1
4063  end if
4064 
4065  if(mod(mgb,2)/=1)mgb=mgb+1
4066 
4067 !Initialise openmp, if needed
4068 !$OMP PARALLEL
4069 !$OMP SINGLE
4070  nproc_omp=1
4071 #if defined HAVE_OPENMP
4072  nproc_omp=OMP_GET_NUM_THREADS()
4073 #endif
4074 !$OMP END SINGLE
4075 !$OMP END PARALLEL
4076 
4077 !For the treatment of the z transform,
4078 !one tries to use only a fraction of the cache, since the
4079 !treatment of the array wk1d_a will not involve contiguous segments
4080  fraction=0.25
4081 !First estimation of lot and nlot
4082  lot=(fftcache*fraction*1000)/(n3*8*2)+1
4083 !Select the smallest integer multiple of nproc_omp, larger
4084 !or equal to nlot. In this way, the cache size is not exhausted,
4085 !and one takes care correctly of the number of processors.
4086 !Treat separately the in and out cases
4087  nlot=(ngbin-1)/lot+1
4088  nlot=nproc_omp*((nlot-1)/nproc_omp+1)
4089  lotin=(ngbin-1)/nlot+1
4090  nlot=(ngbout-1)/lot+1
4091  nlot=nproc_omp*((nlot-1)/nproc_omp+1)
4092  lotout=(ngbout-1)/nlot+1
4093 !The next line impose only one lot. Usually, comment it.
4094 !lotin=mgb ; lotout=mgb
4095 
4096 !Compute auxiliary arrays needed for FFTs
4097  if(n1/=n1_save)then
4098    call sg_ctrig(n1,trig1,aft1,bef1,now1,one,ic1,ind1,mfac,mg)
4099    call sg_ctrig(n1,trig4,aft4,bef4,now4,-one,ic4,ind4,mfac,mg)
4100    n1_save=n1
4101  end if
4102  if(n2/=n2_save)then
4103    call sg_ctrig(n2,trig2,aft2,bef2,now2,one,ic2,ind2,mfac,mg)
4104    call sg_ctrig(n2,trig5,aft5,bef5,now5,-one,ic5,ind5,mfac,mg)
4105    n2_save=n2
4106  end if
4107  if(n3/=n3_save)then
4108    call sg_ctrig(n3,trig3,aft3,bef3,now3,one,ic3,ind3,mfac,mg)
4109    call sg_ctrig(n3,trig6,aft6,bef6,now6,-one,ic6,ind6,mfac,mg)
4110    n3_save=n3
4111  end if
4112 
4113 !------------------------------------------------------------------
4114 !Here, call general k-point code
4115 
4116  if(istwf_k==1)then
4117 
4118 !  Note that the z transform will appear as a y transform
4119    ABI_MALLOC(wk1d_a,(2,mgb,n3,1))
4120    ABI_MALLOC(wk1d_b,(2,mgb,n3,1))
4121 
4122    if(option/=3)then
4123 
4124 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(n3,ngbin,wk1d_a)
4125      do i3=1,n3
4126        do igb=1,ngbin
4127          wk1d_a(1,igb,i3,1)=zero
4128          wk1d_a(2,igb,i3,1)=zero
4129        end do
4130      end do
4131 !$OMP END PARALLEL DO
4132 
4133 !    Insert fofgin into the work array
4134 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(fofgin,indpw_kin,npwin,wk1d_a)
4135      do ig=1,npwin
4136        igb=indpw_kin(4,ig) ; i3=indpw_kin(3,ig)
4137        wk1d_a(1,igb,i3,1)=fofgin(1,ig)
4138        wk1d_a(2,igb,i3,1)=fofgin(2,ig)
4139      end do
4140 !$OMP END PARALLEL DO
4141 
4142 !    Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
4143 !    However, due to special packing of data, use routine ffty
4144 !$OMP PARALLEL DO SHARED(aft3,bef3,fftcache,ind3,ic3,lotin,mgb)&
4145 !$OMP&SHARED(ngbin,now3,n3,trig3,wk1d_a,wk1d_b)&
4146 !$OMP&PRIVATE(igb,igbmax)
4147      do igb=1,ngbin,lotin
4148        igbmax=min(igb+lotin-1,ngbin)
4149 !      Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
4150 !      However, due to special packing of data, use routine ffty
4151        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_a,wk1d_b, &
4152 &       trig3,aft3,now3,bef3,one,ind3,ic3)
4153      end do
4154 !$OMP END PARALLEL DO
4155 
4156    end if !  if(option/=3)
4157 
4158 !  Do-loop on the planes stacked in the z direction
4159 !$OMP PARALLEL DEFAULT(PRIVATE) &
4160 !$OMP&SHARED(aft1,aft2,aft4,aft5,bef1,bef2,bef4,bef5,cplex,denpot) &
4161 !$OMP&SHARED(fftcache,fofr,gboundin,gboundout)&
4162 !$OMP&SHARED(ic1,ic2,ic4,ic5,ind1,ind2,ind4) &
4163 !$OMP&SHARED(ind5,indpw_kin,indpw_kout,mgb,n1,n2,n3,n4,n5,ngbin) &
4164 !$OMP&SHARED(ngbout,now1,now2,now4,now5,option,trig1,trig2,trig4,trig5) &
4165 !$OMP&SHARED(weight_r,weight_i,wk1d_a,wk1d_b)
4166 
4167 !  Allocate two 2-dimensional work arrays
4168    ABI_MALLOC(wk2d_a,(2,n4,n5,1))
4169    ABI_MALLOC(wk2d_b,(2,n4,n5,1))
4170 !$OMP DO
4171    do i3=1,n3
4172 
4173      if(option/=3)then
4174 !      Zero the values on the current plane
4175 !      wk2d_a(1:2,1:n1,1:n2,1)=zero
4176        do i2=1,n2
4177          do i1=1,n1
4178            wk2d_a(1,i1,i2,1)=zero
4179            wk2d_a(2,i1,i2,1)=zero
4180          end do
4181        end do
4182 !      Copy the data in the current plane
4183        do igb=1,ngbin
4184          i1=indpw_kin(1,igb) ; i2=indpw_kin(2,igb)
4185          wk2d_a(1,i1,i2,1)=wk1d_b(1,igb,i3,1)
4186          wk2d_a(2,i1,i2,1)=wk1d_b(2,igb,i3,1)
4187        end do
4188 !      Perform x transform, taking into account arrays of zeros
4189        g2min=gboundin(3,1) ; g2max=gboundin(4,1)
4190        if ( g2min+n2 >= g2max+2 ) then
4191          do i2=g2max+2,g2min+n2
4192            do i1=1,n1
4193              wk2d_b(1,i1,i2,1)=zero
4194              wk2d_b(2,i1,i2,1)=zero
4195            end do
4196          end do
4197        end if
4198        gbound_dum(1)=1 ; gbound_dum(2)=1
4199        gbound_dum(3)=g2min ; gbound_dum(4)=g2max
4200        call sg_fftpx(fftcache,mfac,mg,0,n4,n5,1,n2,1,wk2d_a,wk2d_b,&
4201 &       trig1,aft1,now1,bef1,one,ind1,ic1,gbound_dum)
4202 !      Perform y transform
4203        n1i=1
4204        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1,1,1,wk2d_b,wk2d_a, &
4205 &       trig2,aft2,now2,bef2,one,ind2,ic2)
4206 !      The wave function is now in real space, for the current plane
4207      end if
4208 
4209      if(option==0)then ! Copy the transformed function at the right place
4210        do i2=1,n2
4211          do i1=1,n1
4212            fofr(1,i1,i2,i3)=wk2d_a(1,i1,i2,1)
4213            fofr(2,i1,i2,i3)=wk2d_a(2,i1,i2,1)
4214          end do
4215        end do
4216      end if
4217 
4218      if(option==1)then ! Accumulate density
4219        do i2=1,n2
4220          do i1=1,n1
4221            denpot(i1,i2,i3)=denpot(i1,i2,i3)+weight_r*wk2d_a(1,i1,i2,1)**2+weight_i*wk2d_a(2,i1,i2,1)**2
4222          end do
4223        end do
4224      end if
4225 
4226      if(option==2)then ! Apply local potential
4227        if(cplex==1)then
4228          do i2=1,n2
4229            do i1=1,n1
4230              wk2d_a(1,i1,i2,1)=denpot(i1,i2,i3)*wk2d_a(1,i1,i2,1)
4231              wk2d_a(2,i1,i2,1)=denpot(i1,i2,i3)*wk2d_a(2,i1,i2,1)
4232            end do
4233          end do
4234        else
4235          do i2=1,n2
4236            do i1=1,n1
4237              wkre=wk2d_a(1,i1,i2,1)
4238              wkim=wk2d_a(2,i1,i2,1)
4239              wk2d_a(1,i1,i2,1)=denpot(2*i1-1,i2,i3)*wkre -denpot(2*i1  ,i2,i3)*wkim
4240              wk2d_a(2,i1,i2,1)=denpot(2*i1-1,i2,i3)*wkim +denpot(2*i1  ,i2,i3)*wkre
4241            end do
4242          end do
4243        end if
4244      end if
4245 
4246      if(option==3)then ! Copy the function to be tranformed at the right place
4247        do i2=1,n2
4248          do i1=1,n1
4249            wk2d_a(1,i1,i2,1)=fofr(1,i1,i2,i3)
4250            wk2d_a(2,i1,i2,1)=fofr(2,i1,i2,i3)
4251          end do
4252        end do
4253      end if
4254 
4255      if(option==2 .or. option==3)then  ! Perform y transform
4256        n1i=1
4257        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1,1,1,wk2d_a,wk2d_b, &
4258 &       trig5,aft5,now5,bef5,-one,ind5,ic5)
4259 !      Perform x transform, taking into account arrays of zeros
4260        gbound_dum(1)=1 ; gbound_dum(2)=1
4261        gbound_dum(3)=gboundout(3,1) ; gbound_dum(4)=gboundout(4,1)
4262        call sg_fftpx(fftcache,mfac,mg,0,n4,n5,1,n2,1,wk2d_b,wk2d_a,&
4263 &       trig4,aft4,now4,bef4,-one,ind4,ic4,gbound_dum)
4264 !      Copy the data from the current plane to wk1d_b
4265        do igb=1,ngbout
4266          i1=indpw_kout(1,igb) ; i2=indpw_kout(2,igb)
4267          wk1d_b(1,igb,i3,1)=wk2d_a(1,i1,i2,1)
4268          wk1d_b(2,igb,i3,1)=wk2d_a(2,i1,i2,1)
4269        end do
4270      end if
4271 
4272 !    End loop on planes
4273    end do
4274 !$OMP END DO
4275    ABI_FREE(wk2d_a)
4276    ABI_FREE(wk2d_b)
4277 !$OMP END PARALLEL
4278 
4279    if(option==2 .or. option==3)then
4280 
4281 !    Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
4282 !    However, due to special packing of data, use routine ffty
4283 !$OMP PARALLEL DO SHARED(aft6,bef6,fftcache,ind6,ic6,lotout,mgb)&
4284 !$OMP&SHARED(ngbout,now6,n3,trig6,wk1d_a,wk1d_b)&
4285 !$OMP&PRIVATE(igb,igbmax)
4286      do igb=1,ngbout,lotout
4287        igbmax=min(igb+lotout-1,ngbout)
4288 !      Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
4289 !      However, due to special packing of data, use routine ffty
4290        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_b,wk1d_a, &
4291 &       trig6,aft6,now6,bef6,-one,ind6,ic6)
4292 
4293      end do
4294 !$OMP END PARALLEL DO
4295 
4296 !    Transfer the data in the output array, after normalization
4297      norm=1.d0/dble(nfftot)
4298 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(fofgout,indpw_kout,norm,npwout,wk1d_a)
4299      do ig=1,npwout
4300        igb=indpw_kout(4,ig) ; i3=indpw_kout(3,ig)
4301        fofgout(1,ig)=wk1d_a(1,igb,i3,1)*norm
4302        fofgout(2,ig)=wk1d_a(2,igb,i3,1)*norm
4303      end do
4304 !$OMP END PARALLEL DO
4305    end if
4306 
4307    ABI_FREE(wk1d_a)
4308    ABI_FREE(wk1d_b)
4309 
4310 !  End general k-point part
4311  end if
4312 
4313 !------------------------------------------------------------------
4314 !Here, use of time-reversal symmetry
4315 
4316  if(istwf_k>=2)then
4317 
4318    n1half1=n1/2+1 ; n1halfm=(n1+1)/2
4319    n2half1=n2/2+1
4320 !  n4half1 or n5half1 are the odd integers >= n1half1 or n2half1
4321    n4half1=(n1half1/2)*2+1
4322    n5half1=(n2half1/2)*2+1
4323 !  Note that the z transform will appear as a y transform
4324    ABI_MALLOC(wk1d_a,(2,mgb,n3,1))
4325    ABI_MALLOC(wk1d_b,(2,mgb,n3,1))
4326 
4327    if(istwf_k/=2)then
4328      ABI_MALLOC(pha1,(2,n1))
4329      ABI_MALLOC(pha2,(2,n2))
4330      ABI_MALLOC(pha3,(3,n3))
4331      do i1=1,n1
4332        pha1(1,i1)=cos(dble(i1-1)*pi/dble(n1))
4333        pha1(2,i1)=sin(dble(i1-1)*pi/dble(n1))
4334      end do
4335      do i2=1,n2
4336        pha2(1,i2)=cos(dble(i2-1)*pi/dble(n2))
4337        pha2(2,i2)=sin(dble(i2-1)*pi/dble(n2))
4338      end do
4339      do i3=1,n3
4340        pha3(1,i3)=cos(dble(i3-1)*pi/dble(n3))
4341        pha3(2,i3)=sin(dble(i3-1)*pi/dble(n3))
4342      end do
4343    end if
4344 
4345    if(option/=3)then
4346 
4347 !    Zero the components of wk1d_a
4348 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(n3,ngbin,wk1d_a)
4349      do i3=1,n3
4350        do igb=1,ngbin
4351          wk1d_a(1,igb,i3,1)=zero
4352          wk1d_a(2,igb,i3,1)=zero
4353        end do
4354      end do
4355 !$OMP END PARALLEL DO
4356 
4357 !    Insert fofgin into the work array
4358 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(fofgin,indpw_kin,npwin,wk1d_a)
4359      do ig=1,npwin
4360        igb=indpw_kin(4,ig) ; i3=indpw_kin(3,ig)
4361        wk1d_a(1,igb,i3,1)=fofgin(1,ig)
4362        wk1d_a(2,igb,i3,1)=fofgin(2,ig)
4363      end do
4364 !$OMP END PARALLEL DO
4365 
4366 !    Must complete the i2=1 plane when $k_y \equiv 0$
4367 
4368 !    Take care of i1=1 when $k_x \equiv 0$
4369      if(istwf_k==2)then
4370 !      Take care of i1=1
4371        do i3=n3/2+1,n3
4372          i3inv=n3+2-i3
4373          wk1d_a(1,1,i3,1)= wk1d_a(1,1,i3inv,1)
4374          wk1d_a(2,1,i3,1)=-wk1d_a(2,1,i3inv,1)
4375        end do
4376      else if(istwf_k==4)then
4377 !      Take care of i1=1
4378        do i3=n3/2+1,n3
4379          i3inv=n3+1-i3
4380          wk1d_a(1,1,i3,1)= wk1d_a(1,1,i3inv,1)
4381          wk1d_a(2,1,i3,1)=-wk1d_a(2,1,i3inv,1)
4382        end do
4383      end if
4384 
4385 !    Now, take care of other i1 values, except i3==1 when $k_z \equiv 0$
4386      i1max=gboundin(6,1)+1
4387      if(istwf_k==2)then
4388 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(i1max,n3,wk1d_a)
4389        do igb=2,2*i1max-1
4390          igb_inv=2*i1max+1-igb
4391          do i3=n3/2+1,n3
4392            i3inv=n3+2-i3
4393            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
4394            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
4395          end do
4396        end do
4397 !$OMP END PARALLEL DO
4398 
4399      else if(istwf_k==3)then
4400 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(i1max,n3,wk1d_a)
4401        do igb=1,2*i1max
4402          igb_inv=2*i1max+1-igb
4403          do i3=n3/2+1,n3
4404            i3inv=n3+2-i3
4405            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
4406            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
4407          end do
4408        end do
4409 !$OMP END PARALLEL DO
4410 
4411      else if(istwf_k==4)then
4412 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(i1max,n3,wk1d_a)
4413        do igb=2,2*i1max-1
4414          igb_inv=2*i1max+1-igb
4415          do i3=n3/2+1,n3
4416            i3inv=n3+1-i3
4417            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
4418            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
4419          end do
4420        end do
4421 !$OMP END PARALLEL DO
4422 
4423      else if(istwf_k==5)then
4424 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(i1max,n3,wk1d_a)
4425        do igb=1,2*i1max
4426          igb_inv=2*i1max+1-igb
4427          do i3=n3/2+1,n3
4428            i3inv=n3+1-i3
4429            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
4430            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
4431          end do
4432        end do
4433 !$OMP END PARALLEL DO
4434 
4435      end if
4436 
4437 !    Now, i3==1
4438      if(istwf_k==2)then
4439        do igb=2,i1max
4440          igb_inv=2*i1max+1-igb
4441          wk1d_a(1,igb_inv,1,1)= wk1d_a(1,igb,1,1)
4442          wk1d_a(2,igb_inv,1,1)=-wk1d_a(2,igb,1,1)
4443        end do
4444      else if(istwf_k==3)then
4445        do igb=1,i1max
4446          igb_inv=2*i1max+1-igb
4447          wk1d_a(1,igb_inv,1,1)= wk1d_a(1,igb,1,1)
4448          wk1d_a(2,igb_inv,1,1)=-wk1d_a(2,igb,1,1)
4449        end do
4450      end if
4451 
4452 !    Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
4453 !    However, due to special packing of data, use routine ffty
4454 !$OMP PARALLEL DO SHARED(aft3,bef3,fftcache,ind3,ic3,lotin,mgb)&
4455 !$OMP&SHARED(ngbin,now3,n3,trig3,wk1d_a,wk1d_b)&
4456 !$OMP&PRIVATE(igb,igbmax)
4457      do igb=1,ngbin,lotin
4458        igbmax=min(igb+lotin-1,ngbin)
4459 !      Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
4460 !      However, due to special packing of data, use routine ffty
4461        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_a,wk1d_b, &
4462 &       trig3,aft3,now3,bef3,one,ind3,ic3)
4463      end do
4464 !$OMP END PARALLEL DO
4465 
4466 !    Change the phase if $k_z \neq 0$
4467      if(istwf_k==4 .or. istwf_k==5 .or. istwf_k==8 .or. istwf_k==9 )then
4468 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(ngbin,n3,pha3,wk1d_b)
4469        do i3=1,n3
4470          phar=pha3(1,i3)
4471          phai=pha3(2,i3)
4472          do igb=1,ngbin
4473            ar=wk1d_b(1,igb,i3,1)
4474            ai=wk1d_b(2,igb,i3,1)
4475            wk1d_b(1,igb,i3,1)=phar*ar-phai*ai
4476            wk1d_b(2,igb,i3,1)=phai*ar+phar*ai
4477          end do
4478        end do
4479 !$OMP END PARALLEL DO
4480      end if
4481 
4482    end if !  if(option/=3)
4483 
4484 !  Do-loop on the planes stacked in the z direction
4485 
4486 !$OMP PARALLEL DEFAULT(PRIVATE) &
4487 !$OMP&SHARED(aft1,aft2,aft4,aft5,bef1,bef2,bef4,bef5,denpot) &
4488 !$OMP&SHARED(fftcache,fofr,gboundin,ic1,ic2,ic4,ic5,ind1,ind2,ind4,ind5) &
4489 !$OMP&SHARED(indpw_kin,indpw_kout,istwf_k,mgb,n1,n1half1) &
4490 !$OMP&SHARED(n1halfm,n2,n2half1,n3,n4,n5,ngbin,ngbout) &
4491 !$OMP&SHARED(now1,now2,now4,now5,option,pha1,pha2,trig1) &
4492 !$OMP&SHARED(trig2,trig4,trig5,weight_r,weight_i,wk1d_a,wk1d_b)
4493 
4494 !  Allocate two 2-dimensional work arrays
4495    ABI_MALLOC(wk2d_a,(2,n4,n5,1))
4496    ABI_MALLOC(wk2d_b,(2,n4,n5,1))
4497    ABI_MALLOC(wk2d_c,(2,2*n1halfm,n5,1))
4498    ABI_MALLOC(wk2d_d,(2,2*n1halfm,n5,1))
4499 !$OMP DO
4500    do i3=1,n3
4501 
4502      g2max=gboundin(4,1)
4503 
4504      if(option/=3)then
4505 !      Zero the values on the current plane : need only from i2=1 to g2max+1
4506        do i2=1,g2max+1
4507          do i1=1,n1
4508            wk2d_a(1,i1,i2,1)=zero
4509            wk2d_a(2,i1,i2,1)=zero
4510          end do
4511        end do
4512 
4513 !      Copy the data in the current plane
4514        do igb=1,ngbin
4515          i1=indpw_kin(1,igb) ; i2=indpw_kin(2,igb)
4516          wk2d_a(1,i1,i2,1)=wk1d_b(1,igb,i3,1)
4517          wk2d_a(2,i1,i2,1)=wk1d_b(2,igb,i3,1)
4518        end do
4519 
4520 !      Perform x transform, taking into account arrays of zeros
4521        call sg_fftx(fftcache,mfac,mg,n4,n5,1,g2max+1,1,wk2d_a,wk2d_b,&
4522 &       trig1,aft1,now1,bef1,one,ind1,ic1)
4523 
4524 !      Change the phase if $k_x \neq 0$
4525        if(istwf_k==3 .or. istwf_k==5 .or. istwf_k==7 .or. istwf_k==9)then
4526          do i1=1,n1
4527            phar=pha1(1,i1)
4528            phai=pha1(2,i1)
4529            do i2=1,g2max+1
4530              ar=wk2d_b(1,i1,i2,1)
4531              ai=wk2d_b(2,i1,i2,1)
4532              wk2d_b(1,i1,i2,1)=phar*ar-phai*ai
4533              wk2d_b(2,i1,i2,1)=phai*ar+phar*ai
4534            end do
4535          end do
4536        end if
4537 
4538 !      Compute symmetric and antisymmetric combinations
4539        if(istwf_k>=2 .and. istwf_k<=5)then
4540          do i1=1,n1half1-1
4541            wk2d_a(1,i1,1,1)=wk2d_b(1,2*i1-1,1,1)
4542            wk2d_a(2,i1,1,1)=wk2d_b(1,2*i1  ,1,1)
4543          end do
4544 !        If n1 odd, must add last data
4545          if((2*n1half1-2)/=n1)then
4546            wk2d_a(1,n1half1,1,1)=wk2d_b(1,n1,1,1)
4547            wk2d_a(2,n1half1,1,1)=zero
4548          end if
4549          ii2=2
4550        else
4551          ii2=1
4552        end if
4553        if( g2max+1 >= ii2)then
4554          do i2=ii2,g2max+1
4555            do i1=1,n1half1-1
4556              wk2d_a(1,i1,i2,1)=        wk2d_b(1,2*i1-1,i2,1)-wk2d_b(2,2*i1,i2,1)
4557              wk2d_a(2,i1,i2,1)=        wk2d_b(2,2*i1-1,i2,1)+wk2d_b(1,2*i1,i2,1)
4558              wk2d_a(1,i1,n2+ii2-i2,1)= wk2d_b(1,2*i1-1,i2,1)+wk2d_b(2,2*i1,i2,1)
4559              wk2d_a(2,i1,n2+ii2-i2,1)=-wk2d_b(2,2*i1-1,i2,1)+wk2d_b(1,2*i1,i2,1)
4560            end do
4561            if((2*n1half1-2)/=n1)then
4562              wk2d_a(1,n1half1,i2,1)=        wk2d_b(1,n1,i2,1)
4563              wk2d_a(2,n1half1,i2,1)=        wk2d_b(2,n1,i2,1)
4564              wk2d_a(1,n1half1,n2+ii2-i2,1)= wk2d_b(1,n1,i2,1)
4565              wk2d_a(2,n1half1,n2+ii2-i2,1)=-wk2d_b(2,n1,i2,1)
4566            end if
4567          end do
4568        end if
4569        if ( n2half1 >= g2max+2 ) then
4570          do i2=g2max+2,n2half1
4571            do i1=1,n1half1-1
4572              wk2d_a(1,i1,i2,1)=zero
4573              wk2d_a(2,i1,i2,1)=zero
4574              wk2d_a(1,i1,n2+ii2-i2,1)=zero
4575              wk2d_a(2,i1,n2+ii2-i2,1)=zero
4576            end do
4577            if((2*n1half1-2)/=n1)then
4578              wk2d_a(1,n1half1,i2,1)=zero
4579              wk2d_a(2,n1half1,i2,1)=zero
4580              wk2d_a(1,n1half1,n2+ii2-i2,1)=zero
4581              wk2d_a(2,n1half1,n2+ii2-i2,1)=zero
4582            end if
4583          end do
4584        end if
4585 
4586        n1i=1
4587        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1halfm,1,1,wk2d_a,wk2d_b,&
4588 &       trig2,aft2,now2,bef2,one,ind2,ic2)
4589 
4590 !      Change the phase if $k_y \neq 0$
4591        if(istwf_k>=6 .and. istwf_k<=9)then
4592          do i2=1,n2
4593            phar=pha2(1,i2)
4594            phai=pha2(2,i2)
4595            do i1=1,n1halfm
4596              ar=wk2d_b(1,i1,i2,1)
4597              ai=wk2d_b(2,i1,i2,1)
4598              wk2d_b(1,i1,i2,1)= phar*ar-phai*ai
4599              wk2d_b(2,i1,i2,1)= phai*ar+phar*ai
4600            end do
4601          end do
4602        end if
4603 
4604      end if ! option/=3
4605 
4606 !    The wave function is now in real space, for the current plane,
4607 !    represented by REAL numbers, although packed in the complex array wk2d_b
4608 
4609      g2max=gboundin(4,1)
4610 
4611      if(option==0)then
4612 !      This option is only permitted for istwf_k==2 (Gamma point)
4613 !      Copy the transformed function at the right place
4614        do i2=1,n2
4615          do i1=1,n1half1-1
4616            fofr(1,2*i1-1,i2,i3)=wk2d_b(1,i1,i2,1)
4617            fofr(1,2*i1  ,i2,i3)=wk2d_b(2,i1,i2,1)
4618            fofr(2,2*i1-1,i2,i3)=zero
4619            fofr(2,2*i1  ,i2,i3)=zero
4620          end do
4621 !        If n1 odd, must add last data
4622          if((2*n1half1-2)/=n1)then
4623            fofr(1,n1,i2,i3)=wk2d_b(1,n1half1,i2,1)
4624            fofr(2,n1,i2,i3)=zero
4625          end if
4626        end do
4627      end if
4628 
4629      if(option==1)then ! Accumulate density
4630        do i2=1,n2
4631          do i1=1,n1half1-1
4632            denpot(2*i1-1,i2,i3)=denpot(2*i1-1,i2,i3)+weight_r*wk2d_b(1,i1,i2,1)**2
4633            denpot(2*i1  ,i2,i3)=denpot(2*i1  ,i2,i3)+weight_i*wk2d_b(2,i1,i2,1)**2
4634          end do
4635 !        If n1 odd, must add last data
4636          if((2*n1half1-2)/=n1)then
4637            denpot(n1,i2,i3)=denpot(n1,i2,i3)+weight_r*wk2d_b(1,n1half1,i2,1)**2
4638          end if
4639        end do
4640      end if
4641 
4642      if(option==2)then ! Apply local potential
4643        do i2=1,n2
4644          do i1=1,n1half1-1
4645            wk2d_a(1,i1,i2,1)=denpot(2*i1-1,i2,i3)*wk2d_b(1,i1,i2,1)
4646            wk2d_a(2,i1,i2,1)=denpot(2*i1  ,i2,i3)*wk2d_b(2,i1,i2,1)
4647          end do
4648 !        If n1 odd, must add last data
4649          if((2*n1half1-2)/=n1)then
4650            wk2d_a(1,n1half1,i2,1)=denpot(n1,i2,i3)*wk2d_b(1,n1half1,i2,1)
4651            wk2d_a(2,n1half1,i2,1)=zero
4652          end if
4653        end do
4654      end if
4655 
4656      if(option==3)then
4657 !      This option is only permitted for istwf_k==2 (Gamma point)
4658 !      Copy the transformed function at the right place
4659        do i2=1,n2
4660          do i1=1,n1half1-1
4661            wk2d_b(1,i1,i2,1)=fofr(1,2*i1-1,i2,i3)
4662            wk2d_b(2,i1,i2,1)=fofr(1,2*i1  ,i2,i3)
4663          end do
4664 !        If n1 odd, must add last data
4665          if((2*n1half1-2)/=n1)then
4666            wk2d_b(1,n1half1,i2,1)=fofr(1,n1,i2,i3)
4667          end if
4668        end do
4669      end if
4670 
4671      if(option==2 .or. option==3)then  ! Change the phase if $k_y \neq 0$
4672        if(istwf_k>=6 .and. istwf_k<=9)then
4673          do i2=1,n2
4674            phar=pha2(1,i2)
4675            phai=pha2(2,i2)
4676            do i1=1,n1halfm
4677              ar=wk2d_a(1,i1,i2,1)
4678              ai=wk2d_a(2,i1,i2,1)
4679              wk2d_a(1,i1,i2,1)= phar*ar+phai*ai
4680              wk2d_a(2,i1,i2,1)=-phai*ar+phar*ai
4681            end do
4682          end do
4683        end if
4684 
4685 !      Perform y transform
4686        n1i=1
4687        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1halfm,1,1,wk2d_a,wk2d_b, &
4688 &       trig5,aft5,now5,bef5,-one,ind5,ic5)
4689 
4690 !      Decompose symmetric and antisymmetric parts
4691        if(istwf_k>=2 .and. istwf_k<=5)then
4692          do i1=1,n1halfm
4693            wk2d_c(1,2*i1-1,1,1)=wk2d_b(1,i1,1,1)
4694            wk2d_c(2,2*i1-1,1,1)=zero
4695            wk2d_c(1,2*i1,1,1)=wk2d_b(2,i1,1,1)
4696            wk2d_c(2,2*i1,1,1)=zero
4697          end do
4698          ii2=2
4699        else
4700          ii2=1
4701        end if
4702        do i2=ii2,g2max+1
4703          do i1=1,n1halfm
4704            wk2d_c(1,2*i1-1,i2,1)=(wk2d_b(1,i1,i2,1)+wk2d_b(1,i1,n2+ii2-i2,1))*0.5d0
4705            wk2d_c(2,2*i1-1,i2,1)=(wk2d_b(2,i1,i2,1)-wk2d_b(2,i1,n2+ii2-i2,1))*0.5d0
4706            wk2d_c(1,2*i1,i2,1)= ( wk2d_b(2,i1,i2,1)+wk2d_b(2,i1,n2+ii2-i2,1))*0.5d0
4707            wk2d_c(2,2*i1,i2,1)= (-wk2d_b(1,i1,i2,1)+wk2d_b(1,i1,n2+ii2-i2,1))*0.5d0
4708          end do
4709        end do
4710 
4711 !      Change the phase if $k_x \neq 0$
4712        if(istwf_k==3 .or. istwf_k==5 .or. istwf_k==7 .or. istwf_k==9 )then
4713          do i1=1,n1
4714            phar=pha1(1,i1)
4715            phai=pha1(2,i1)
4716            do i2=1,g2max+1
4717              ar=wk2d_c(1,i1,i2,1)
4718              ai=wk2d_c(2,i1,i2,1)
4719              wk2d_c(1,i1,i2,1)= phar*ar+phai*ai
4720              wk2d_c(2,i1,i2,1)=-phai*ar+phar*ai
4721            end do
4722          end do
4723        end if
4724 
4725 !      Perform x transform : for y=1 to g2max+1, to benefit from zeros
4726        call sg_fftx(fftcache,mfac,mg,2*n1halfm,n5,1,g2max+1,1,wk2d_c,wk2d_d,&
4727 &       trig4,aft4,now4,bef4,-one,ind4,ic4)
4728 
4729 !      Copy the data from the current plane to wk1d_b
4730        do igb=1,ngbout
4731          i1=indpw_kout(1,igb) ; i2=indpw_kout(2,igb)
4732          wk1d_b(1,igb,i3,1)=wk2d_d(1,i1,i2,1)
4733          wk1d_b(2,igb,i3,1)=wk2d_d(2,i1,i2,1)
4734        end do
4735 
4736      end if ! option==2 or 3
4737 
4738 !    End loop on planes
4739    end do
4740 
4741 !$OMP END DO
4742    ABI_FREE(wk2d_a)
4743    ABI_FREE(wk2d_b)
4744    ABI_FREE(wk2d_c)
4745    ABI_FREE(wk2d_d)
4746 !$OMP END PARALLEL
4747 
4748    if(option==2 .or. option==3)then
4749 
4750 !    Change the phase if $k_z \neq 0$
4751      if(istwf_k==4 .or. istwf_k==5 .or. istwf_k==8 .or. istwf_k==9 )then
4752 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(ngbout,n3,pha3,wk1d_b)
4753        do i3=1,n3
4754          phar=pha3(1,i3)
4755          phai=pha3(2,i3)
4756          do igb=1,ngbout
4757            ar=wk1d_b(1,igb,i3,1)
4758            ai=wk1d_b(2,igb,i3,1)
4759            wk1d_b(1,igb,i3,1)= phar*ar+phai*ai
4760            wk1d_b(2,igb,i3,1)=-phai*ar+phar*ai
4761          end do
4762        end do
4763 !$OMP END PARALLEL DO
4764      end if
4765 
4766 !    Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
4767 !    However, due to special packing of data, use routine ffty
4768 !$OMP PARALLEL DO SHARED(aft6,bef6,fftcache,ind6,ic6,lotout,mgb)&
4769 !$OMP&SHARED(ngbout,now6,n3,trig6,wk1d_a,wk1d_b)&
4770 !$OMP&PRIVATE(igb,igbmax)
4771      do igb=1,ngbout,lotout
4772        igbmax=min(igb+lotout-1,ngbout)
4773 !      Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
4774 !      However, due to special packing of data, use routine ffty
4775        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_b,wk1d_a, &
4776 &       trig6,aft6,now6,bef6,-one,ind6,ic6)
4777 
4778      end do
4779 !$OMP END PARALLEL DO
4780 
4781 !    Transfer the data in the output array, after normalization
4782      norm=1.d0/dble(nfftot)
4783 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(fofgout,indpw_kout,norm,npwout,wk1d_a)
4784      do ig=1,npwout
4785        igb=indpw_kout(4,ig) ; i3=indpw_kout(3,ig)
4786        fofgout(1,ig)=wk1d_a(1,igb,i3,1)*norm
4787        fofgout(2,ig)=wk1d_a(2,igb,i3,1)*norm
4788      end do
4789 !$OMP END PARALLEL DO
4790 
4791    end if
4792 
4793    ABI_FREE(wk1d_a)
4794    ABI_FREE(wk1d_b)
4795 
4796    if(istwf_k/=2)then
4797      ABI_FREE(pha1)
4798      ABI_FREE(pha2)
4799      ABI_FREE(pha3)
4800    end if
4801 
4802  end if !  End time-reversal symmetry
4803 
4804 !------------------------------------------------------------------
4805 
4806  if(option/=3) then
4807    ABI_FREE(indpw_kin)
4808  end if
4809  if(option==2 .or. option==3) then
4810    ABI_FREE(indpw_kout)
4811  end if
4812 
4813 end subroutine fftrisc_one_nothreadsafe

m_sgfft/sg_ctrig [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_ctrig

FUNCTION

 Precalculates trigonometric expressions and bitreversal key IND (Stefan Goedecker lib).

INPUTS

 n=Number of FFT points for 1D FFT.
 ris  = sign of exponential in transform (should be 1 or -1; real)
 mfac = maximum number of factors in 1D FFTs
 mg   = maximum length of 1D FFTs

OUTPUT

 trig(2,mg) TO BE DESCRIBED SB 090902
 aft(mfac) TO BE DESCRIBED SB 090902
 bef(mfac) TO BE DESCRIBED SB 090902
 now(mfac) TO BE DESCRIBED SB 090902
 ic = number of (radix) factors of x transform length (from ctrig)
 ind(mg) TO BE DESCRIBED SB 090902

NOTES

 * This version of sg_ctrig produces cos and tan instead of sin and cos--
   this allows for much greater efficiency on the superscalar architecture
   of ibm rs6000 where floating point multiply and add (FMA) is used.

 * This routine is not thread-safe due to the presence of variables with the save attribute!
   DO NOT CALL THIS ROUTINE INSIDE A OPENMP PARALLEL REGION

TODO

 Should describe arguments
 Should suppress one-letter variables

SOURCE

3700 subroutine sg_ctrig(n,trig,aft,bef,now,ris,ic,ind,mfac,mg)
3701 
3702 !Arguments ------------------------------------
3703 !scalars
3704  integer,intent(in) :: mfac,mg,n
3705  integer,intent(out) :: ic
3706  real(dp),intent(in) :: ris
3707 !arrays
3708  integer,intent(out) :: aft(mfac),bef(mfac),ind(mg),now(mfac)
3709  real(dp),intent(out) :: trig(2,mg)
3710 
3711 !Local variables-------------------------------
3712 !scalars
3713  integer,save :: nextmx=4
3714  integer :: i,ii,inc,irep,j,k,l,next,nh
3715  integer,save :: prime(4)=(/5,4,3,2/)  !"prime" is the set of radices coded elsewhere for fft
3716  real(dp) :: angle,trigc,trigs,twopi
3717  character(len=500) :: message
3718 
3719 ! *************************************************************************
3720 
3721 !**Note**
3722 !2*Pi must not be defined too accurately here or else
3723 !cos(twopi/2) will be exactly 0 and sin/cos below will be
3724 !infinite; if a small error is left in Pi, then sin/cos will
3725 !be about 10**14 and later cos * (sin/cos) will be 1 to within
3726 !about 10**(-14) and the fft routines will work
3727 !The precision on sgi causes the algorithm to fail if
3728 !twopi is defined as 8.d0*atan(1.0d0).
3729 
3730  twopi=6.2831853071795867d0
3731 
3732  angle=ris*twopi/n
3733 !trig(1,0)=1.d0
3734 !trig(2,0)=0.d0
3735  if (mod(n,2)==0) then
3736    nh=n/2
3737    trig(1,nh)=-1.d0
3738    trig(2,nh)=0.d0
3739    do i=1,nh-1
3740      trigc=cos(i*angle)
3741      trigs=sin(i*angle)
3742      trig(1,i)=trigc
3743      trig(2,i)=trigs/trigc
3744      trig(1,n-i)=trigc
3745      trig(2,n-i)=-trigs/trigc
3746    end do
3747  else
3748    nh=(n-1)/2
3749    do i=1,nh
3750      trigc=cos(i*angle)
3751      trigs=sin(i*angle)
3752      trig(1,i)=trigc
3753      trig(2,i)=trigs/trigc
3754      trig(1,n-i)=trigc
3755      trig(2,n-i)=-trigs/trigc
3756    end do
3757  end if
3758 
3759  ic=1
3760  aft(ic)=1
3761  bef(ic)=n
3762  next=1
3763 
3764 !An infinite loop, with exit or cycle instructions
3765  do
3766    if( (bef(ic)/prime(next))*prime(next)<bef(ic) ) then
3767      next=next+1
3768      if (next<=nextmx) then
3769        cycle
3770      else
3771        now(ic)=bef(ic)
3772        bef(ic)=1
3773      end if
3774    else
3775      now(ic)=prime(next)
3776      bef(ic)=bef(ic)/prime(next)
3777    end if
3778    aft(ic+1)=aft(ic)
3779    now(ic+1)=now(ic)
3780    bef(ic+1)=bef(ic)
3781    ic=ic+1
3782    if (ic>mfac) then
3783      write(message, '(a,i0,2a,i0)' )&
3784 &     'number of factors ic=',ic,ch10,&
3785 &     'exceeds dimensioned mfac=',mfac
3786      ABI_BUG(message)
3787    end if
3788    if (bef(ic)/=1) then
3789      aft(ic)=aft(ic)*now(ic)
3790      cycle
3791    end if
3792 !  If not cycled, exit
3793    exit
3794  end do
3795 
3796  ic=ic-1
3797 
3798 !DEBUG
3799 !write(std_out,*) 'now',(now(i),i=1,ic)
3800 !write(std_out,*) 'aft',(aft(i),i=1,ic)
3801 !write(std_out,*) 'bef',(bef(i),i=1,ic)
3802 !ENDDEBUG
3803 
3804  do i=1,n
3805    ind(i)=1
3806  end do
3807 
3808  irep=1
3809  inc=n
3810  do l=ic,1,-1
3811    inc=inc/now(l)
3812    ii=0
3813    do k=1,1+(n-1)/(now(l)*irep)
3814      do j=0,now(l)-1
3815        do i=1,irep
3816          ii=ii+1
3817          ind(ii)=ind(ii)+j*inc
3818        end do
3819      end do
3820    end do
3821    irep=irep*now(l)
3822  end do
3823 
3824  if (irep/=n) then
3825    write(message,'(a,i0,a,i0)')'  irep should equal n ; irep=',irep,' n=',n
3826    ABI_BUG(message)
3827  end if
3828 
3829  if (inc/=1) then
3830    write(message, '(a,i0)' )' inc should equal 1 in sg_ctrig; inc=',inc
3831    ABI_BUG(message)
3832  end if
3833 
3834 end subroutine sg_ctrig

m_sgfft/sg_fft_cc [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fft_cc

FUNCTION

 Calculates the discrete Fourier transform:

   ftarr(i1,i2,i3)=exp(ris*i*2*pi*(j1*i1/n1+j2*i2/n2+j3*i3/n3)) arr(j1,j2,j3)

INPUTS

  fftcache=size of the cache (kB)
  n1,n2,n3=physical dimension of the transform
  nd1,nd2,nd3=memory dimension of arr and ftarr
  ndat=Number of FFT transforms
  isign=+1 for G-->R, -1 for R-->G
  arr(2,nd1*nd2*nd3*ndat)=input complex array with alternating real and imaginary
  elements; data resides in 2*n1*n2*n3 of this array, spread out.
  (see SIDE FFECTS).

OUTPUT

  ftarr(2,nd1*nd2*nd3*ndat)=working space for transform and contains output

SIDE EFFECTS

  arr(2,nd1*nd2*nd3*ndat) is modified by sg_fftx,sg_ffty,sg_fftz.

NOTES

  ndi must always be greater or equal to ni.  Recommended choice for nd1
  and nd2 is: ni for ni=odd or ni+1 for ni=even (hence 2*(ni/2)+1);
  nd3 should always be n3.  Note that choosing nd1 or nd2 larger than
  the recommended value can severely degrade efficiency of this routine.
  Avoiding even ndi for nd1 and nd2 avoids cache conflicts on cache machines.
  Each of n1,n2,n3 must be a
  product of the prime factors 2,3,5. If two ni s are equal
  it is recommended to place them behind each other.
  The largest any of these may be is set by parameter "mg" below.
  This fft is particularly efficient for cache architectures.
  Note that the meaning of fftcache has changed from the original
  ncache of SG (that was the maximum number of COMPLEX*16 in the cache)

SOURCE

 90 subroutine sg_fft_cc(fftcache,n1,n2,n3,nd1,nd2,nd3,ndat,isign,arr,ftarr)
 91 
 92 !Arguments ------------------------------------
 93 !scalars
 94  integer,intent(in) :: fftcache,n1,n2,n3,nd1,nd2,nd3,ndat,isign
 95 !arrays
 96  real(dp),intent(inout) :: arr(2,nd1*nd2*nd3*ndat)
 97  real(dp),intent(inout) :: ftarr(2,nd1*nd2*nd3*ndat)
 98 
 99 !Local variables-------------------------------
100 !scalars
101  integer :: idat,start
102 
103 ! *************************************************************************
104 
105  do idat=1,ndat
106    start = 1 + (idat-1)*nd1*nd2*nd3
107    call fft_cc_one_nothreadsafe(fftcache,nd1,nd2,nd3,n1,n2,n3,arr(1,start),ftarr(1,start),real(isign,kind=dp))
108  end do
109 
110 end subroutine sg_fft_cc

m_sgfft/sg_fft_rc [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fft_rc

FUNCTION

 Conduct Fourier transform of REAL or COMPLEX function f(r)=fofr defined on
 fft grid in real space, to create complex f(G)=fofg defined on full fft grid
 in reciprocal space, in full storage mode, or the reverse operation.
 For the reverse operation, the final data is divided by nfftot.
 REAL case when cplex=1, COMPLEX case when cplex=2. Usually used for density and potentials.

 There are two different possibilities :
  fftalgb=0 means using the complex-to-complex FFT routine,
   irrespective of the value of cplex
  fftalgb=1 means using a real-to-complex FFT or a complex-to-complex FFT,
   depending on the value of cplex.
  The only real-to-complex FFT available is from SGoedecker library.

INPUTS

 cplex=1 if fofr is real, 2 if fofr is complex
 isign=sign of Fourier transform exponent: current convention uses
  +1 for transforming from G to r
  -1 for transforming from r to G.
 nfft=(effective) number of FFT grid points (for this processor)
 ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft

OUTPUT

  (see side effects)

SIDE EFFECTS

 Input/Output
 fofg(2,nfft)=f(G), complex.
 fofr(cplex*nfft)=input function f(r) (real or complex)

SOURCE

261 subroutine sg_fft_rc(cplex,fofg,fofr,isign,nfft,ngfft)
262 
263 !Arguments ------------------------------------
264 !scalars
265  integer,intent(in) :: cplex,isign,nfft
266 !arrays
267  integer,intent(in) :: ngfft(18)
268  real(dp),intent(inout) :: fofg(2,nfft),fofr(cplex*nfft)
269 
270 !Local variables-------------------------------
271 !scalars
272  integer,parameter :: mfac=11
273  integer :: fftalg,fftalga,fftalgb,fftcache,i1,i2,i3,ic1,ic2,ic3,index
274  integer :: n1,n1half1,n1halfm,n2,n2half1,n3,n4,n4half1,n5,n5half1,n6
275  real(dp) :: ris,xnorm
276  character(len=500) :: msg
277 !arrays
278  integer :: aft1(mfac),aft2(mfac),aft3(mfac),bef1(mfac),bef2(mfac),bef3(mfac)
279  integer :: ind1(mg),ind2(mg),ind3(mg),now1(mfac),now2(mfac),now3(mfac)
280  real(dp) :: trig1(2,mg),trig2(2,mg),trig3(3,mg)
281  real(dp),allocatable :: wk2d_a(:,:,:,:),wk2d_b(:,:,:,:),wk2d_c(:,:,:,:)
282  real(dp),allocatable :: wk2d_d(:,:,:,:),work1(:,:,:,:),work2(:,:,:,:)
283 
284 ! *************************************************************************
285 
286  !DBG_ENTER("COLL")
287 
288  n1=ngfft(1); n2=ngfft(2); n3=ngfft(3)
289  n4=ngfft(4); n5=ngfft(5); n6=ngfft(6)
290 
291  fftcache=ngfft(8)
292  fftalg  =ngfft(7)
293  fftalga =fftalg/100
294  fftalgb =mod(fftalg,100)/10
295 
296  ris=dble(isign)
297  xnorm=1.0d0/dble(n1*n2*n3)
298 
299  if (fftalgb/=0 .and. fftalgb/=1) then
300    write(msg, '(a,i4,a,a,a,a,a)' )&
301 &   'The input algorithm number fftalg=',fftalg,' is not allowed.',ch10,&
302 &   'The second digit (fftalg(B)) must be 0 or 1.',ch10,&
303 &   'Action: change fftalg in your input file.'
304    ABI_BUG(msg)
305  end if
306 
307  if (fftalgb==1 .and. ALL(fftalga/=(/1,3,4/)) )then
308    write(msg,'(a,i4,5a)')&
309 &   'The input algorithm number fftalg=',fftalg,' is not allowed.',ch10,&
310 &   'When fftalg(B) is 1, the allowed values for fftalg(A) are 1 and 4.',ch10,&
311 &   'Action: change fftalg in your input file.'
312    ABI_BUG(msg)
313  end if
314 
315  if (n4<n1.or.n5<n2.or.n6<n3) then
316    write(msg,'(a,3i8,a,3i8)')'  Each of n4,n5,n6=',n4,n5,n6,'must be >= n1, n2, n3 =',n1,n2,n3
317    ABI_BUG(msg)
318  end if
319 
320 !---------------------------------------------------------
321 !Here sophisticated algorithm based on S. Goedecker routines, only for the REAL case.
322 !Take advantage of the fact that fofr is real, and that fofg has corresponding symmetry properties.
323 
324 #ifdef DEBUG_MODE
325  if (n1>mg .or. n2>mg .or. n3>mg) then
326    write(msg, '(a,3i10,a,a,a,i10,a)' )&
327 &   'One of the dimensions n1,n2,n3=',n1,n2,n3,',',ch10,&
328 &   'exceeds allowed dimension mg=',mg,'.'
329    ABI_BUG(msg)
330  end if
331 #endif
332 
333  n1half1=n1/2+1 ; n1halfm=(n1+1)/2
334  n2half1=n2/2+1
335 !n4half1 or n5half1 are the odd integers >= n1half1 or n2half1
336  n4half1=(n1half1/2)*2+1
337  n5half1=(n2half1/2)*2+1
338 
339 !This sophisticated algorithm allows to decrease the memory needs.
340  ABI_MALLOC(work1,(2,n4,n5half1,n6))
341  ABI_MALLOC(work2,(2,n4,n5half1,n6))
342 
343  if(isign==1)then
344 
345 !  Compute auxiliary arrays needed for FFTs, here forward FFT
346    call sg_ctrig(n1,trig1,aft1,bef1,now1,one,ic1,ind1,mfac,mg)
347    call sg_ctrig(n2,trig2,aft2,bef2,now2,one,ic2,ind2,mfac,mg)
348    call sg_ctrig(n3,trig3,aft3,bef3,now3,one,ic3,ind3,mfac,mg)
349 
350 !  Transfer fofg to the expanded fft box (only half of it)
351 
352 !$OMP PARALLEL DO PRIVATE(i1,i2,i3,index) SHARED(fofg,n1,n2,n3,work1)
353    do i3=1,n3
354      do i2=1,n2half1
355        index=n1*(i2-1+n2*(i3-1))
356        do i1=1,n1
357          work1(1,i1,i2,i3)=fofg(1,i1+index)
358          work1(2,i1,i2,i3)=fofg(2,i1+index)
359        end do
360      end do
361    end do
362 
363 !$OMP PARALLEL DO SHARED(aft3,bef3,ind3,ic3,now3,n1,n2half1,n4,n5half1,n6,ris,trig3,work1,work2) PRIVATE(i2)
364    do i2=1,n2half1
365      call sg_fftz(mfac,mg,n4,n5half1,n6,n1,i2,i2,work1,work2,&
366 &     trig3,aft3,now3,bef3,ris,ind3,ic3)
367    end do
368 
369 !  Loop over x-y planes
370 
371 !$OMP PARALLEL PRIVATE(i1,i2,i3,index,wk2d_a,wk2d_b,wk2d_c,wk2d_d) &
372 !$OMP&SHARED(aft1,aft2,bef1,bef2,fftcache,fofg,fofr,ic1,ic2,ind1,ind2) &
373 !$OMP&SHARED(n1,n1half1,n1halfm,n2,n2half1,n3) &
374 !$OMP&SHARED(n4,n5,now1,now2,ris,trig1,trig2,work2)
375 
376    ABI_MALLOC(wk2d_a,(2,n4,n5,1))
377    ABI_MALLOC(wk2d_b,(2,n4,n5,1))
378    ABI_MALLOC(wk2d_c,(2,2*n1halfm+1,n5,1))
379    ABI_MALLOC(wk2d_d,(2,2*n1halfm+1,n5,1))
380 
381 !$OMP DO
382    do i3=1,n3
383 
384      do i2=1,n2half1
385        do i1=1,n1
386          wk2d_c(1,i1,i2,1)=work2(1,i1,i2,i3)
387          wk2d_c(2,i1,i2,1)=work2(2,i1,i2,i3)
388        end do
389      end do
390 
391      call sg_fftx(fftcache,mfac,mg,2*n1halfm+1,n5,1,n2half1,1,wk2d_c,wk2d_d,&
392 &     trig1,aft1,now1,bef1,ris,ind1,ic1)
393 
394      do i1=1,n1half1-1 ! Compute symmetric and antisymmetric combinations
395        wk2d_a(1,i1,1,1)=wk2d_d(1,2*i1-1,1,1)
396        wk2d_a(2,i1,1,1)=wk2d_d(1,2*i1  ,1,1)
397      end do
398 
399      if((2*n1half1-2)/=n1)then  ! If n1 odd, must add last data
400        wk2d_a(1,n1half1,1,1)=wk2d_d(1,n1,1,1)
401        wk2d_a(2,n1half1,1,1)=0.0d0
402      end if
403 
404      do i2=2,n2half1
405        do i1=1,n1half1-1
406          wk2d_a(1,i1,i2,1)     = wk2d_d(1,2*i1-1,i2,1)-wk2d_d(2,2*i1,i2,1)
407          wk2d_a(2,i1,i2,1)     = wk2d_d(2,2*i1-1,i2,1)+wk2d_d(1,2*i1,i2,1)
408          wk2d_a(1,i1,n2+2-i2,1)= wk2d_d(1,2*i1-1,i2,1)+wk2d_d(2,2*i1,i2,1)
409          wk2d_a(2,i1,n2+2-i2,1)=-wk2d_d(2,2*i1-1,i2,1)+wk2d_d(1,2*i1,i2,1)
410        end do
411        if((2*n1half1-2)/=n1)then
412          wk2d_a(1,n1half1,i2,1)     = wk2d_d(1,n1,i2,1)
413          wk2d_a(2,n1half1,i2,1)     = wk2d_d(2,n1,i2,1)
414          wk2d_a(1,n1half1,n2+2-i2,1)= wk2d_d(1,n1,i2,1)
415          wk2d_a(2,n1half1,n2+2-i2,1)=-wk2d_d(2,n1,i2,1)
416        end if
417      end do
418 
419      call sg_ffty(fftcache,mfac,mg,n4,n5,1,1,n1halfm,1,1,wk2d_a,wk2d_b,&
420 &     trig2,aft2,now2,bef2,ris,ind2,ic2)
421 
422      do i2=1,n2  ! Take real part data from expanded box and put it in the original box.
423        index=n1*(i2-1+n2*(i3-1))
424        do i1=1,n1half1-1 ! copy data
425          fofr(2*i1-1+index)=wk2d_b(1,i1,i2,1)
426          fofr(2*i1  +index)=wk2d_b(2,i1,i2,1)
427        end do
428        if((2*n1half1-2)/=n1)then ! If n1 odd, must add last data
429          fofr(n1+index)=wk2d_b(1,n1half1,i2,1)
430        end if
431      end do
432 
433    end do ! loop over x-y planes
434 !$OMP END DO
435    ABI_FREE(wk2d_a)
436    ABI_FREE(wk2d_b)
437    ABI_FREE(wk2d_c)
438    ABI_FREE(wk2d_d)
439 !$OMP END PARALLEL
440 
441  else if(isign==-1)then
442 
443 !  Compute auxiliary arrays needed for FFTs, here backward FFT
444    call sg_ctrig(n1,trig1,aft1,bef1,now1,-one,ic1,ind1,mfac,mg)
445    call sg_ctrig(n2,trig2,aft2,bef2,now2,-one,ic2,ind2,mfac,mg)
446    call sg_ctrig(n3,trig3,aft3,bef3,now3,-one,ic3,ind3,mfac,mg)
447 
448 !  Treat first x-transform in x-y plane, and multiply
449 !  by overall normalization factor 1/nfftot
450 
451 !  Loop over x-y planes
452 
453 !$OMP PARALLEL PRIVATE(i1,i2,i3,index,wk2d_a,wk2d_b,wk2d_c,wk2d_d) &
454 !$OMP&SHARED(aft1,aft2,bef1,bef2,fftcache,fofr,ic1,ic2,ind1,ind2) &
455 !$OMP&SHARED(n1,n1half1,n1halfm,n2,n2half1,n3) &
456 !$OMP&SHARED(n4,n5,now1,now2,ris,trig1,trig2,work1,xnorm)
457 
458    ABI_MALLOC(wk2d_a,(2,n4,n5,1))
459    ABI_MALLOC(wk2d_b,(2,n4,n5,1))
460    ABI_MALLOC(wk2d_c,(2,2*n1halfm+1,n5,1))
461    ABI_MALLOC(wk2d_d,(2,2*n1halfm+1,n5,1))
462 
463 !$OMP DO
464    do i3=1,n3
465      do i2=1,n2
466        index=n1*(i2-1+n2*(i3-1))
467        do i1=1,n1half1-1 ! copy and normalize data
468          wk2d_a(1,i1,i2,1)=fofr(2*i1-1+index)*xnorm
469          wk2d_a(2,i1,i2,1)=fofr(2*i1  +index)*xnorm
470        end do
471 
472        if((2*n1half1-2)/=n1)then ! If n1 odd, must add last data
473          wk2d_a(1,n1half1,i2,1)=fofr(n1+index)*xnorm
474          wk2d_a(2,n1half1,i2,1)=zero
475        end if
476      end do
477 
478      call sg_ffty(fftcache,mfac,mg,n4,n5,1,1,n1halfm,1,1,wk2d_a,wk2d_b,&
479 &     trig2,aft2,now2,bef2,ris,ind2,ic2)
480 
481      do i1=1,n1halfm ! Decompose symmetric and antisymmetric parts
482        wk2d_c(1,2*i1-1,1,1)=wk2d_b(1,i1,1,1)
483        wk2d_c(2,2*i1-1,1,1)=0.0d0
484        wk2d_c(1,2*i1,1,1)=wk2d_b(2,i1,1,1)
485        wk2d_c(2,2*i1,1,1)=0.0d0
486      end do
487 
488      do i2=2,n2half1
489        do i1=1,n1halfm
490          wk2d_c(1,2*i1-1,i2,1)= (wk2d_b(1,i1,i2,1)+wk2d_b(1,i1,n2+2-i2,1))*0.5d0
491          wk2d_c(2,2*i1-1,i2,1)= (wk2d_b(2,i1,i2,1)-wk2d_b(2,i1,n2+2-i2,1))*0.5d0
492          wk2d_c(1,2*i1,i2,1)  = (wk2d_b(2,i1,i2,1)+wk2d_b(2,i1,n2+2-i2,1))*0.5d0
493          wk2d_c(2,2*i1,i2,1)  =-(wk2d_b(1,i1,i2,1)-wk2d_b(1,i1,n2+2-i2,1))*0.5d0
494        end do
495      end do
496 
497      call sg_fftx(fftcache,mfac,mg,2*n1halfm+1,n5,1,n2half1,1,wk2d_c,wk2d_d,&
498 &     trig1,aft1,now1,bef1,ris,ind1,ic1)
499 
500      do i2=1,n2half1
501        do i1=1,n1
502          work1(1,i1,i2,i3)=wk2d_d(1,i1,i2,1)
503          work1(2,i1,i2,i3)=wk2d_d(2,i1,i2,1)
504        end do
505      end do
506 
507    end do
508 !$OMP END DO
509    ABI_FREE(wk2d_a)
510    ABI_FREE(wk2d_b)
511    ABI_FREE(wk2d_c)
512    ABI_FREE(wk2d_d)
513 !$OMP END PARALLEL
514 
515 !$OMP PARALLEL DO SHARED(aft3,bef3,ind3,ic3,now3,n1,n2half1,n4,n5half1,n6,ris,trig3,work1,work2) PRIVATE(i2)
516    do i2=1,n2half1
517      call sg_fftz(mfac,mg,n4,n5half1,n6,n1,i2,i2,work1,work2,&
518 &     trig3,aft3,now3,bef3,ris,ind3,ic3)
519    end do
520 
521 !  Transfer fft output to the original fft box
522 
523 !$OMP PARALLEL DO PRIVATE(i1,i2,i3,index) SHARED(fofg,n1,n2,n2half1,n3,work2)
524    do i3=1,n3
525      do i2=1,n2half1
526        index=n1*(i2-1+n2*(i3-1))
527        do i1=1,n1
528          fofg(1,i1+index)=work2(1,i1,i2,i3)
529          fofg(2,i1+index)=work2(2,i1,i2,i3)
530        end do
531      end do
532 !    Complete missing values with complex conjugate
533 !    Inverse of ix is located at nx+2-ix , except for ix=1, for which it is 1.
534      if(n2half1>2)then
535        do i2=2,n2+1-n2half1
536          index=n1*((n2+2-i2)-1)
537          if(i3/=1)index=index+n1*n2*((n3+2-i3)-1)
538          fofg(1,1+index)= work2(1,1,i2,i3)
539          fofg(2,1+index)=-work2(2,1,i2,i3)
540          do i1=2,n1
541            fofg(1,n1+2-i1+index)= work2(1,i1,i2,i3)
542            fofg(2,n1+2-i1+index)=-work2(2,i1,i2,i3)
543          end do
544        end do
545      end if
546    end do
547 
548  end if ! choice of isign
549 
550  ABI_FREE(work1)
551  ABI_FREE(work2)
552 
553  !DBG_EXIT("COLL")
554 
555 end subroutine sg_fft_rc

m_sgfft/sg_fftpad [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fftpad

FUNCTION

 Fast Fourier transform. This is the zero-padding version of "fft".

INPUTS

  fftcache=size of the cache (kB)
  mgfft=maximum size of 1D FFTs
  n1,n2,n3=physical dimension of the transform
  nd1,nd2,nd3=memory dimension of arr and ftarr
  ndat=Number of FFT transforms.
  isign= sign of exponential in transform
  gbound(2*mgfft+8,2)=sphere boundary info

OUTPUT

  ftarr(2,nd1,nd2,nd3*ndat)=working space for transform and contains output

SIDE EFFECTS

  arr(2,nd1,nd2,nd3*ndat)=input complex array with alternating real and imaginary
    elements; data resides in 2*n1*n2*n3 of this array, spread out.
  arr(2,nd1,nd2,nd3*ndat) is modified by sg_fftpx,sg_ffty,sg_fftz.

NOTES

  mfac sets maximum number of factors (5, 4, 3, or 2) which may be
  contained within any n1, n2, or n3
  mg sets the maximum 1 dimensional fft length (any one of n1, n2, or n3)
  XG: the signification of mg is changed with respect to fft3dp !!!

SOURCE

592 subroutine sg_fftpad(fftcache,mgfft,n1,n2,n3,nd1,nd2,nd3,ndat,gbound,isign,arr,ftarr)
593 
594 !Arguments ------------------------------------
595 !scalars
596  integer,intent(in) :: fftcache,mgfft,n1,n2,n3,nd1,nd2,nd3,ndat,isign
597 !arrays
598  integer,intent(in) :: gbound(2*mgfft+8,2)
599  real(dp),intent(inout) :: arr(2,nd1,nd2,nd3*ndat)
600  real(dp),intent(out) :: ftarr(2,nd1,nd2,nd3*ndat)
601 
602 !Local variables-------------------------------
603 !scalars
604  integer :: idat,start
605 
606 ! *************************************************************************
607 
608  do idat=1,ndat
609    start = 1 + (idat-1)*nd3
610    call fftpad_one_nothreadsafe(fftcache,mgfft,nd1,nd2,nd3,n1,n2,n3,&
611 &    arr(1,1,1,start),ftarr(1,1,1,start),real(isign, kind=dp),gbound)
612  end do
613 
614 end subroutine sg_fftpad

m_sgfft/sg_fftpx [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fftpx

FUNCTION

 This subroutine is called by the 3-dimensional fft to conduct the
 "x" transforms for all y and z.
 Accomodate more optimal treatment of
 zero padding following the method of fft3dp.

INPUTS

  fftcache=size of the cache (kB)
  mfac = maximum number of factors in 1D FFTs
  mg = maximum length of 1D FFTs
  mgfft = effective maximum length of 1D FFTs, for dimensioning gbound
  nd1=first dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd2=second dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd3=third dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  n2,n3=actual length of y and z transforms
  z(2,nd1,nd2,nd3)=INPUT array; destroyed by transformation
  trig, aft, now, bef, ind=provided by previous call to ctrig
   Note that in this routine (and in ctrig) the values in array trig are
   actually cos and tan, not cos and sin.  Use of tan allows advantageous
   use of FMA on the ibm rs6000.
  ris=sign of exponential in transform (should be 1 or -1; real)
  ic=number of (radix) factors of x transform length (from ctrig)
  gbound(2*mgfft+4)=sphere boundary info

OUTPUT

  zbr(2,nd1,nd2,nd3)=OUTPUT transformed array; no scaling applied

SIDE EFFECTS

NOTES

 This routine blocks the x transforms
 so that all transforms under consideration at one step fit within
 the cache memory, which is crucial for optimal performance.
 The blocking factor is set by parameter "fftcache" below, which should
 be adjusted to be somewhat smaller (say 3/4) than the actual cache size
 of the machine.

TODO

 Use latex for the equation above

SOURCE

 843 subroutine sg_fftpx(fftcache,mfac,mg,mgfft,nd1,nd2,nd3,n2,n3,&
 844 &    z,zbr,trig,aft,now,bef,ris,ind,ic,gbound)
 845 
 846 !Arguments ------------------------------------
 847 !Dimensions of aft, now, bef, ind, and trig should agree with
 848 !those in subroutine ctrig.
 849 !scalars
 850  integer,intent(in) :: fftcache,ic,mfac,mg,mgfft,n2,n3,nd1,nd2,nd3
 851  real(dp),intent(in) :: ris
 852 !arrays
 853  integer,intent(in) :: aft(mfac),bef(mfac),gbound(2*mgfft+4),ind(mg),now(mfac)
 854  real(dp),intent(in) :: trig(2,mg)
 855  real(dp),intent(inout) :: z(2,nd1,nd2,nd3)
 856  real(dp),intent(inout) :: zbr(2,nd1,nd2,nd3) !vz_i
 857 
 858 !Local variables-------------------------------
 859 !scalars
 860  integer :: g2,g2max,g2min,g3,g3max,g3min,gg3,i,ia,ib,igb,ihalfy,indx,j
 861  integer :: len3,lot,lowlim,ma,mb,ntb,upplim
 862 !no_abirules
 863  real(dp),parameter :: &
 864 & cos2=0.3090169943749474d0,&   !cos(2.d0*pi/5.d0)
 865 & cos4=-0.8090169943749474d0,&  !cos(4.d0*pi/5.d0)
 866 & sin42=0.6180339887498948d0    !sin(4.d0*pi/5.d0)/sin(2.d0*pi/5.d0)
 867  real(dp) :: bb,cr2,cr2s,cr3,cr3p,cr4,cr5,ct2,ct3,ct4,ct5,&
 868 & factor,r,r1,r2,r25,r3,r34,r4,r5,s,sin2,s1,s2,s25,s3,s34,s4,s5
 869 
 870 ! *************************************************************************
 871 
 872  g3min=gbound(1)
 873  g3max=gbound(2)
 874  igb=3
 875  len3=g3max-g3min+1
 876 
 877 
 878 !Do x transforms in blocks of size "lot" which is set by how
 879 !many x transform arrays (of size nd1 each) fit into the nominal
 880 !cache size "fftcache".
 881 !Loop over blocks in the loop below.
 882 
 883  factor=0.75d0
 884  lot=(fftcache*factor*1000d0)/(nd1*8*2)
 885  if(lot.lt.1) lot=1
 886 !Express loop over y, z in terms of separate z and y loops
 887 
 888 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
 889 !$OMP SHARED(aft,bef,gbound,g3max,ic,ind,len3,lot)&
 890 !$OMP SHARED(n2,n3,nd2,now,ris,trig,z,zbr)
 891  do gg3=1,len3
 892 
 893    if (gg3<=g3max+1) then
 894      g3=gg3
 895    else
 896 !    wrap around for negative gg3
 897      g3=gg3-len3+n3
 898    end if
 899 
 900    igb=gg3*2+1
 901    g2min=gbound(igb)
 902    g2max=gbound(igb+1)
 903 
 904 !  Split the y loop into positive and wrapped-around negative parts
 905 
 906    do ihalfy=1,2
 907 
 908 !    Start at 1 for ihalfy=1; g2min+1+n2 for ihalfy=2
 909      lowlim=1+(ihalfy-1)*(g2min+n2)
 910 !    End at g2max+1 for ihalfy=1; n2 for ihalfy=2
 911      upplim=g2max+1+(ihalfy-1)*(n2-g2max-1)
 912 
 913      do g2=lowlim,upplim,lot
 914 
 915 !      Find array starting address ma and ending address mb
 916 !      modified xg 980107
 917 !      ma=g2+(g3-1)*nd2
 918        ma=g2
 919 !      Perform "lot" transforms at a time (until out of data)
 920 !      mb=min(g2+(lot-1),upplim)+(g3-1)*nd2
 921        mb=min(g2+(lot-1),upplim)
 922 
 923 !      -------------------------------------------------------------------------
 924 !
 925 !      Direct transformation
 926 
 927 !      Run over all factors except the last (to ic-1), performing
 928 !      x transform
 929 
 930 !      Note: fortran should skip this loop if ic=1; beware "onetrip"
 931 !      compiler option which forces each loop at least once
 932 
 933        do i=1,ic-1
 934          ntb=now(i)*bef(i)
 935 
 936 !        Treat radix 4
 937          if (now(i)==4) then
 938            ia=0
 939 
 940 !          First step of factor 4
 941            do ib=1,bef(i)
 942              do j=ma,mb
 943                r4=z(1,ia*ntb+3*bef(i)+ib,j,g3)
 944                s4=z(2,ia*ntb+3*bef(i)+ib,j,g3)
 945                r3=z(1,ia*ntb+2*bef(i)+ib,j,g3)
 946                s3=z(2,ia*ntb+2*bef(i)+ib,j,g3)
 947                r2=z(1,ia*ntb+bef(i)+ib,j,g3)
 948                s2=z(2,ia*ntb+bef(i)+ib,j,g3)
 949                r1=z(1,ia*ntb+ib,j,g3)
 950                s1=z(2,ia*ntb+ib,j,g3)
 951 
 952                r=r1 + r3
 953                s=r2 + r4
 954                z(1,ia*ntb+ib,j,g3) = r + s
 955                z(1,ia*ntb+2*bef(i)+ib,j,g3) = r - s
 956                r=r1 - r3
 957                s=s2 - s4
 958                z(1,ia*ntb+bef(i)+ib,j,g3) = r - s*ris
 959                z(1,ia*ntb+3*bef(i)+ib,j,g3) = r + s*ris
 960                r=s1 + s3
 961                s=s2 + s4
 962                z(2,ia*ntb+ib,j,g3) = r + s
 963                z(2,ia*ntb+2*bef(i)+ib,j,g3) = r - s
 964                r=s1 - s3
 965                s=r2 - r4
 966                z(2,ia*ntb+bef(i)+ib,j,g3) = r + s*ris
 967                z(2,ia*ntb+3*bef(i)+ib,j,g3) = r - s*ris
 968              end do
 969            end do
 970 
 971 !          Second step of factor 4
 972            do ia=1,aft(i)-1
 973              indx=ind(ia*4*bef(i)+1)-1
 974              indx=indx*bef(i)
 975              cr2=trig(1,indx)
 976              ct2=trig(2,indx)
 977              cr3=trig(1,2*indx)
 978              ct3=trig(2,2*indx)
 979              cr4=trig(1,3*indx)
 980              ct4=trig(2,3*indx)
 981              cr4=cr4/cr2
 982              cr2s=cr2*ris
 983              do ib=1,bef(i)
 984                do j=ma,mb
 985                  r4=z(1,ia*ntb+3*bef(i)+ib,j,g3) - &
 986 &                 z(2,ia*ntb+3*bef(i)+ib,j,g3)*ct4
 987                  s4=z(1,ia*ntb+3*bef(i)+ib,j,g3)*ct4 + &
 988 &                 z(2,ia*ntb+3*bef(i)+ib,j,g3)
 989                  r3=z(1,ia*ntb+2*bef(i)+ib,j,g3) - &
 990 &                 z(2,ia*ntb+2*bef(i)+ib,j,g3)*ct3
 991                  s3=z(1,ia*ntb+2*bef(i)+ib,j,g3)*ct3 + &
 992 &                 z(2,ia*ntb+2*bef(i)+ib,j,g3)
 993                  r2=z(1,ia*ntb+bef(i)+ib,j,g3) - &
 994 &                 z(2,ia*ntb+bef(i)+ib,j,g3)*ct2
 995                  s2=z(1,ia*ntb+bef(i)+ib,j,g3)*ct2 + &
 996 &                 z(2,ia*ntb+bef(i)+ib,j,g3)
 997                  r1=z(1,ia*ntb+ib,j,g3)
 998                  s1=z(2,ia*ntb+ib,j,g3)
 999 
1000                  r=r1 + r3*cr3
1001                  s=r2 + r4*cr4
1002                  z(1,ia*ntb+ib,j,g3) = r + s*cr2
1003                  z(1,ia*ntb+2*bef(i)+ib,j,g3) = r - s*cr2
1004                  r=r1 - r3*cr3
1005                  s=s2 - s4*cr4
1006                  z(1,ia*ntb+bef(i)+ib,j,g3) = r - s*cr2s
1007                  z(1,ia*ntb+3*bef(i)+ib,j,g3) = r + s*cr2s
1008                  r=s1 + s3*cr3
1009                  s=s2 + s4*cr4
1010                  z(2,ia*ntb+ib,j,g3) = r + s*cr2
1011                  z(2,ia*ntb+2*bef(i)+ib,j,g3) = r - s*cr2
1012                  r=s1 - s3*cr3
1013                  s=r2 - r4*cr4
1014                  z(2,ia*ntb+bef(i)+ib,j,g3) = r + s*cr2s
1015                  z(2,ia*ntb+3*bef(i)+ib,j,g3) = r - s*cr2s
1016                end do
1017              end do
1018            end do
1019 
1020 !          Treat radix 2
1021          else if (now(i)==2) then
1022            ia=0
1023 
1024 !          First step of factor 2
1025            do ib=1,bef(i)
1026              do j=ma,mb
1027                r1=z(1,ia*ntb+ib,j,g3)
1028                s1=z(2,ia*ntb+ib,j,g3)
1029                r2=z(1,ia*ntb+bef(i)+ib,j,g3)
1030                s2=z(2,ia*ntb+bef(i)+ib,j,g3)
1031                z(1,ia*ntb+ib,j,g3) =  r2 + r1
1032                z(2,ia*ntb+ib,j,g3) =  s2 + s1
1033                z(1,ia*ntb+bef(i)+ib,j,g3) = -r2 + r1
1034                z(2,ia*ntb+bef(i)+ib,j,g3) = -s2 + s1
1035              end do
1036            end do
1037 
1038 !          Second step of radix 2
1039            do ia=1,aft(i)-1
1040              indx=ind(ia*2*bef(i)+1)-1
1041              indx=indx*bef(i)
1042              cr2=trig(1,indx)
1043              ct2=trig(2,indx)
1044              do ib=1,bef(i)
1045                do j=ma,mb
1046                  r1=z(1,ia*ntb+ib,j,g3)
1047                  s1=z(2,ia*ntb+ib,j,g3)
1048                  r2=z(1,ia*ntb+bef(i)+ib,j,g3) - &
1049 &                 z(2,ia*ntb+bef(i)+ib,j,g3)*ct2
1050                  s2=z(1,ia*ntb+bef(i)+ib,j,g3)*ct2 + &
1051 &                 z(2,ia*ntb+bef(i)+ib,j,g3)
1052                  z(1,ia*ntb+ib,j,g3) =  r2*cr2 + r1
1053                  z(2,ia*ntb+ib,j,g3) =  s2*cr2 + s1
1054                  z(1,ia*ntb+bef(i)+ib,j,g3) = -r2*cr2 + r1
1055                  z(2,ia*ntb+bef(i)+ib,j,g3) = -s2*cr2 + s1
1056                end do
1057              end do
1058            end do
1059 
1060 !          Treat radix 3
1061          else if (now(i)==3) then
1062 !          .5d0*sqrt(3.d0)=0.8660254037844387d0
1063            ia=0
1064            bb=ris*0.8660254037844387d0
1065 
1066 !          First step of radix 3
1067            do ib=1,bef(i)
1068              do j=ma,mb
1069                r1=z(1,ia*ntb+ib,j,g3)
1070                s1=z(2,ia*ntb+ib,j,g3)
1071                r2=z(1,ia*ntb+bef(i)+ib,j,g3)
1072                s2=z(2,ia*ntb+bef(i)+ib,j,g3)
1073                r3=z(1,ia*ntb+2*bef(i)+ib,j,g3)
1074                s3=z(2,ia*ntb+2*bef(i)+ib,j,g3)
1075                r=r2 + r3
1076                s=s2 + s3
1077                z(1,ia*ntb+ib,j,g3) = r + r1
1078                z(2,ia*ntb+ib,j,g3) = s + s1
1079                r1=r1 - r*.5d0
1080                s1=s1 - s*.5d0
1081                r2=r2-r3
1082                s2=s2-s3
1083                z(1,ia*ntb+bef(i)+ib,j,g3) = r1 - s2*bb
1084                z(2,ia*ntb+bef(i)+ib,j,g3) = s1 + r2*bb
1085                z(1,ia*ntb+2*bef(i)+ib,j,g3) = r1 + s2*bb
1086                z(2,ia*ntb+2*bef(i)+ib,j,g3) = s1 - r2*bb
1087              end do
1088            end do
1089 
1090 !          Second step of radix 3
1091            do ia=1,aft(i)-1
1092              indx=ind(ia*3*bef(i)+1)-1
1093              indx=indx*bef(i)
1094              cr2=trig(1,indx)
1095              ct2=trig(2,indx)
1096              cr3=trig(1,2*indx)
1097              ct3=trig(2,2*indx)
1098              cr2=cr2/cr3
1099              cr3p=.5d0*cr3
1100              bb=ris*cr3*0.8660254037844387d0
1101              do ib=1,bef(i)
1102                do j=ma,mb
1103                  r1=z(1,ia*ntb+ib,j,g3)
1104                  s1=z(2,ia*ntb+ib,j,g3)
1105                  r2=z(1,ia*ntb+bef(i)+ib,j,g3) - &
1106 &                 z(2,ia*ntb+bef(i)+ib,j,g3)*ct2
1107                  s2=z(1,ia*ntb+bef(i)+ib,j,g3)*ct2 + &
1108 &                 z(2,ia*ntb+bef(i)+ib,j,g3)
1109                  r3=z(1,ia*ntb+2*bef(i)+ib,j,g3) - &
1110 &                 z(2,ia*ntb+2*bef(i)+ib,j,g3)*ct3
1111                  s3=z(1,ia*ntb+2*bef(i)+ib,j,g3)*ct3 + &
1112 &                 z(2,ia*ntb+2*bef(i)+ib,j,g3)
1113                  r=cr2*r2 + r3
1114                  s=cr2*s2 + s3
1115                  z(1,ia*ntb+ib,j,g3) = r*cr3 + r1
1116                  z(2,ia*ntb+ib,j,g3) = s*cr3 + s1
1117                  r1=r1 - r*cr3p
1118                  s1=s1 - s*cr3p
1119                  r2=cr2*r2-r3
1120                  s2=cr2*s2-s3
1121                  z(1,ia*ntb+bef(i)+ib,j,g3) = r1 - s2*bb
1122                  z(2,ia*ntb+bef(i)+ib,j,g3) = s1 + r2*bb
1123                  z(1,ia*ntb+2*bef(i)+ib,j,g3) = r1 + s2*bb
1124                  z(2,ia*ntb+2*bef(i)+ib,j,g3) = s1 - r2*bb
1125                end do
1126              end do
1127            end do
1128 
1129 !          Treat radix 5
1130          else if (now(i)==5) then
1131 !          sin(2.d0*pi/5.d0)
1132            sin2=ris*0.9510565162951536d0
1133            ia=0
1134 
1135 !          First step of radix 5
1136            do ib=1,bef(i)
1137              do j=ma,mb
1138                r1=z(1,ia*ntb+ib,j,g3)
1139                s1=z(2,ia*ntb+ib,j,g3)
1140                r2=z(1,ia*ntb+bef(i)+ib,j,g3)
1141                s2=z(2,ia*ntb+bef(i)+ib,j,g3)
1142                r3=z(1,ia*ntb+2*bef(i)+ib,j,g3)
1143                s3=z(2,ia*ntb+2*bef(i)+ib,j,g3)
1144                r4=z(1,ia*ntb+3*bef(i)+ib,j,g3)
1145                s4=z(2,ia*ntb+3*bef(i)+ib,j,g3)
1146                r5=z(1,ia*ntb+4*bef(i)+ib,j,g3)
1147                s5=z(2,ia*ntb+4*bef(i)+ib,j,g3)
1148                r25 = r2 + r5
1149                r34 = r3 + r4
1150                s25 = s2 - s5
1151                s34 = s3 - s4
1152                z(1,ia*ntb+ib,j,g3) = r1 + r25 + r34
1153                r = r1 + cos2*r25 + cos4*r34
1154                s = s25 + sin42*s34
1155                z(1,ia*ntb+bef(i)+ib,j,g3) = r - sin2*s
1156                z(1,ia*ntb+4*bef(i)+ib,j,g3) = r + sin2*s
1157                r = r1 + cos4*r25 + cos2*r34
1158                s = sin42*s25 - s34
1159                z(1,ia*ntb+2*bef(i)+ib,j,g3) = r - sin2*s
1160                z(1,ia*ntb+3*bef(i)+ib,j,g3) = r + sin2*s
1161                r25 = r2 - r5
1162                r34 = r3 - r4
1163                s25 = s2 + s5
1164                s34 = s3 + s4
1165                z(2,ia*ntb+ib,j,g3) = s1 + s25 + s34
1166                r = s1 + cos2*s25 + cos4*s34
1167                s = r25 + sin42*r34
1168                z(2,ia*ntb+bef(i)+ib,j,g3) = r + sin2*s
1169                z(2,ia*ntb+4*bef(i)+ib,j,g3) = r - sin2*s
1170                r = s1 + cos4*s25 + cos2*s34
1171                s = sin42*r25 - r34
1172                z(2,ia*ntb+2*bef(i)+ib,j,g3) = r + sin2*s
1173                z(2,ia*ntb+3*bef(i)+ib,j,g3) = r - sin2*s
1174              end do
1175            end do
1176 
1177 !          Second step of radix 5
1178            do ia=1,aft(i)-1
1179              indx=ind(ia*5*bef(i)+1)-1
1180              indx=indx*bef(i)
1181              cr2=trig(1,indx)
1182              ct2=trig(2,indx)
1183              cr3=trig(1,2*indx)
1184              ct3=trig(2,2*indx)
1185              cr4=trig(1,3*indx)
1186              ct4=trig(2,3*indx)
1187              cr5=trig(1,4*indx)
1188              ct5=trig(2,4*indx)
1189              do ib=1,bef(i)
1190                do j=ma,mb
1191                  r1=z(1,ia*ntb+ib,j,g3)
1192                  s1=z(2,ia*ntb+ib,j,g3)
1193                  r2=cr2*(z(1,ia*ntb+bef(i)+ib,j,g3) - &
1194 &                 z(2,ia*ntb+bef(i)+ib,j,g3)*ct2)
1195                  s2=cr2*(z(1,ia*ntb+bef(i)+ib,j,g3)*ct2 + &
1196 &                 z(2,ia*ntb+bef(i)+ib,j,g3))
1197                  r3=cr3*(z(1,ia*ntb+2*bef(i)+ib,j,g3) - &
1198 &                 z(2,ia*ntb+2*bef(i)+ib,j,g3)*ct3)
1199                  s3=cr3*(z(1,ia*ntb+2*bef(i)+ib,j,g3)*ct3 + &
1200 &                 z(2,ia*ntb+2*bef(i)+ib,j,g3))
1201                  r4=z(1,ia*ntb+3*bef(i)+ib,j,g3) - &
1202 &                 z(2,ia*ntb+3*bef(i)+ib,j,g3)*ct4
1203                  s4=z(1,ia*ntb+3*bef(i)+ib,j,g3)*ct4 + &
1204 &                 z(2,ia*ntb+3*bef(i)+ib,j,g3)
1205                  r5=z(1,ia*ntb+4*bef(i)+ib,j,g3) - &
1206 &                 z(2,ia*ntb+4*bef(i)+ib,j,g3)*ct5
1207                  s5=z(1,ia*ntb+4*bef(i)+ib,j,g3)*ct5 + &
1208 &                 z(2,ia*ntb+4*bef(i)+ib,j,g3)
1209                  r25 = r2 + r5*cr5
1210                  r34 = r3 + r4*cr4
1211                  s25 = s2 - s5*cr5
1212                  s34 = s3 - s4*cr4
1213                  z(1,ia*ntb+ib,j,g3) = r1 + r25 + r34
1214                  r = r1 + cos2*r25 + cos4*r34
1215                  s = s25 + sin42*s34
1216                  z(1,ia*ntb+bef(i)+ib,j,g3) = r - sin2*s
1217                  z(1,ia*ntb+4*bef(i)+ib,j,g3) = r + sin2*s
1218                  r = r1 + cos4*r25 + cos2*r34
1219                  s = sin42*s25 - s34
1220                  z(1,ia*ntb+2*bef(i)+ib,j,g3) = r - sin2*s
1221                  z(1,ia*ntb+3*bef(i)+ib,j,g3) = r + sin2*s
1222                  r25 = r2 - r5*cr5
1223                  r34 = r3 - r4*cr4
1224                  s25 = s2 + s5*cr5
1225                  s34 = s3 + s4*cr4
1226                  z(2,ia*ntb+ib,j,g3) = s1 + s25 + s34
1227                  r = s1 + cos2*s25 + cos4*s34
1228                  s = r25 + sin42*r34
1229                  z(2,ia*ntb+bef(i)+ib,j,g3) = r + sin2*s
1230                  z(2,ia*ntb+4*bef(i)+ib,j,g3) = r - sin2*s
1231                  r = s1 + cos4*s25 + cos2*s34
1232                  s = sin42*r25 - r34
1233                  z(2,ia*ntb+2*bef(i)+ib,j,g3) = r + sin2*s
1234                  z(2,ia*ntb+3*bef(i)+ib,j,g3) = r - sin2*s
1235                end do
1236              end do
1237            end do
1238 
1239          else
1240 !          All radices treated
1241            ABI_BUG('called with factors other than 2, 3, and 5')
1242          end if
1243 
1244        end do  ! End of direct transformation (loop over ic)
1245 
1246 !      -----------------------------------------------------------------
1247 
1248 !      Bitreversal
1249 !      Perform bit reversal on last factor of transformation
1250 
1251 !      Treat radix 4
1252        if (now(ic)==4) then
1253          ia=0
1254 
1255 !        First step of radix 4
1256          do j=ma,mb
1257            r4=z(1,ia*4+4,j,g3)
1258            s4=z(2,ia*4+4,j,g3)
1259            r3=z(1,ia*4+3,j,g3)
1260            s3=z(2,ia*4+3,j,g3)
1261            r2=z(1,ia*4+2,j,g3)
1262            s2=z(2,ia*4+2,j,g3)
1263            r1=z(1,ia*4+1,j,g3)
1264            s1=z(2,ia*4+1,j,g3)
1265 
1266            r=r1 + r3
1267            s=r2 + r4
1268            zbr(1,ind(ia*4+1),j,g3) = r + s
1269            zbr(1,ind(ia*4+3),j,g3) = r - s
1270            r=r1 - r3
1271            s=s2 - s4
1272            zbr(1,ind(ia*4+2),j,g3) = r - s*ris
1273            zbr(1,ind(ia*4+4),j,g3) = r + s*ris
1274            r=s1 + s3
1275            s=s2 + s4
1276            zbr(2,ind(ia*4+1),j,g3) = r + s
1277            zbr(2,ind(ia*4+3),j,g3) = r - s
1278            r=s1 - s3
1279            s=r2 - r4
1280            zbr(2,ind(ia*4+2),j,g3) = r + s*ris
1281            zbr(2,ind(ia*4+4),j,g3) = r - s*ris
1282          end do
1283 
1284 !        Second step of radix 4
1285          do ia=1,aft(ic)-1
1286            indx=ind(ia*4+1)-1
1287            cr2=trig(1,indx)
1288            ct2=trig(2,indx)
1289            cr3=trig(1,2*indx)
1290            ct3=trig(2,2*indx)
1291            cr4=trig(1,3*indx)
1292            ct4=trig(2,3*indx)
1293            cr4=cr4/cr2
1294            cr2s=cr2*ris
1295            do j=ma,mb
1296              r4=z(1,ia*4+4,j,g3) - z(2,ia*4+4,j,g3)*ct4
1297              s4=z(1,ia*4+4,j,g3)*ct4 + z(2,ia*4+4,j,g3)
1298              r3=z(1,ia*4+3,j,g3) - z(2,ia*4+3,j,g3)*ct3
1299              s3=z(1,ia*4+3,j,g3)*ct3 + z(2,ia*4+3,j,g3)
1300              r2=z(1,ia*4+2,j,g3) - z(2,ia*4+2,j,g3)*ct2
1301              s2=z(1,ia*4+2,j,g3)*ct2 + z(2,ia*4+2,j,g3)
1302              r1=z(1,ia*4+1,j,g3)
1303              s1=z(2,ia*4+1,j,g3)
1304 
1305              r=r1 + r3*cr3
1306              s=r2 + r4*cr4
1307              zbr(1,ind(ia*4+1),j,g3) = r + s*cr2
1308              zbr(1,ind(ia*4+3),j,g3) = r - s*cr2
1309              r=r1 - r3*cr3
1310              s=s2 - s4*cr4
1311              zbr(1,ind(ia*4+2),j,g3) = r - s*cr2s
1312              zbr(1,ind(ia*4+4),j,g3) = r + s*cr2s
1313              r=s1 + s3*cr3
1314              s=s2 + s4*cr4
1315              zbr(2,ind(ia*4+1),j,g3) = r + s*cr2
1316              zbr(2,ind(ia*4+3),j,g3) = r - s*cr2
1317              r=s1 - s3*cr3
1318              s=r2 - r4*cr4
1319              zbr(2,ind(ia*4+2),j,g3) = r + s*cr2s
1320              zbr(2,ind(ia*4+4),j,g3) = r - s*cr2s
1321            end do
1322          end do
1323 
1324 !        Treat radix 2
1325        else if (now(ic)==2) then
1326 
1327          ia=0
1328 
1329 !        First step of radix 2
1330          do j=ma,mb
1331            r1=z(1,ia*2+1,j,g3)
1332            s1=z(2,ia*2+1,j,g3)
1333            r2=z(1,ia*2+2,j,g3)
1334            s2=z(2,ia*2+2,j,g3)
1335            zbr(1,ind(ia*2+1),j,g3) =  r2 + r1
1336            zbr(2,ind(ia*2+1),j,g3) =  s2 + s1
1337            zbr(1,ind(ia*2+2),j,g3) = -r2 + r1
1338            zbr(2,ind(ia*2+2),j,g3) = -s2 + s1
1339          end do
1340 
1341 !        Second step of radix 2
1342          do ia=1,aft(ic)-1
1343            indx=ind(ia*2+1)-1
1344            cr2=trig(1,indx)
1345            ct2=trig(2,indx)
1346            do j=ma,mb
1347              r1=z(1,ia*2+1,j,g3)
1348              s1=z(2,ia*2+1,j,g3)
1349              r2=z(1,ia*2+2,j,g3) - z(2,ia*2+2,j,g3)*ct2
1350              s2=z(1,ia*2+2,j,g3)*ct2 + z(2,ia*2+2,j,g3)
1351              zbr(1,ind(ia*2+1),j,g3) =  r2*cr2 + r1
1352              zbr(2,ind(ia*2+1),j,g3) =  s2*cr2 + s1
1353              zbr(1,ind(ia*2+2),j,g3) = -r2*cr2 + r1
1354              zbr(2,ind(ia*2+2),j,g3) = -s2*cr2 + s1
1355            end do
1356          end do
1357 
1358 !        Treat radix 3
1359        else if (now(ic)==3) then
1360 !        radix 3
1361 !        .5d0*sqrt(3.d0)=0.8660254037844387d0
1362          ia=0
1363          bb=ris*0.8660254037844387d0
1364 
1365 !        First step of radix 3
1366          do j=ma,mb
1367            r1=z(1,ia*3+1,j,g3)
1368            s1=z(2,ia*3+1,j,g3)
1369            r2=z(1,ia*3+2,j,g3)
1370            s2=z(2,ia*3+2,j,g3)
1371            r3=z(1,ia*3+3,j,g3)
1372            s3=z(2,ia*3+3,j,g3)
1373            r=r2 + r3
1374            s=s2 + s3
1375            zbr(1,ind(ia*3+1),j,g3) = r + r1
1376            zbr(2,ind(ia*3+1),j,g3) = s + s1
1377            r1=r1 - r*.5d0
1378            s1=s1 - s*.5d0
1379            r2=r2-r3
1380            s2=s2-s3
1381            zbr(1,ind(ia*3+2),j,g3) = r1 - s2*bb
1382            zbr(2,ind(ia*3+2),j,g3) = s1 + r2*bb
1383            zbr(1,ind(ia*3+3),j,g3) = r1 + s2*bb
1384            zbr(2,ind(ia*3+3),j,g3) = s1 - r2*bb
1385          end do
1386 
1387          do ia=1,aft(ic)-1
1388            indx=ind(ia*3+1)-1
1389            cr2=trig(1,indx)
1390            ct2=trig(2,indx)
1391            cr3=trig(1,2*indx)
1392            ct3=trig(2,2*indx)
1393            cr2=cr2/cr3
1394            cr3p=.5d0*cr3
1395            bb=ris*cr3*0.8660254037844387d0
1396            do j=ma,mb
1397              r1=z(1,ia*3+1,j,g3)
1398              s1=z(2,ia*3+1,j,g3)
1399              r2=z(1,ia*3+2,j,g3) - z(2,ia*3+2,j,g3)*ct2
1400              s2=z(1,ia*3+2,j,g3)*ct2 + z(2,ia*3+2,j,g3)
1401              r3=z(1,ia*3+3,j,g3) - z(2,ia*3+3,j,g3)*ct3
1402              s3=z(1,ia*3+3,j,g3)*ct3 + z(2,ia*3+3,j,g3)
1403              r=cr2*r2 + r3
1404              s=cr2*s2 + s3
1405              zbr(1,ind(ia*3+1),j,g3) = r*cr3 + r1
1406              zbr(2,ind(ia*3+1),j,g3) = s*cr3 + s1
1407              r1=r1 - r*cr3p
1408              s1=s1 - s*cr3p
1409              r2=cr2*r2-r3
1410              s2=cr2*s2-s3
1411              zbr(1,ind(ia*3+2),j,g3) = r1 - s2*bb
1412              zbr(2,ind(ia*3+2),j,g3) = s1 + r2*bb
1413              zbr(1,ind(ia*3+3),j,g3) = r1 + s2*bb
1414              zbr(2,ind(ia*3+3),j,g3) = s1 - r2*bb
1415            end do
1416          end do
1417 
1418 !        Treat radix 5
1419        else if (now(ic)==5) then
1420 !        radix 5
1421 !        sin(2.d0*pi/5.d0)
1422          sin2=ris*0.9510565162951536d0
1423          ia=0
1424 
1425 !        First step of radix 5
1426          do j=ma,mb
1427            r1=z(1,ia*5+1,j,g3)
1428            s1=z(2,ia*5+1,j,g3)
1429            r2=z(1,ia*5+2,j,g3)
1430            s2=z(2,ia*5+2,j,g3)
1431            r3=z(1,ia*5+3,j,g3)
1432            s3=z(2,ia*5+3,j,g3)
1433            r4=z(1,ia*5+4,j,g3)
1434            s4=z(2,ia*5+4,j,g3)
1435            r5=z(1,ia*5+5,j,g3)
1436            s5=z(2,ia*5+5,j,g3)
1437            r25 = r2 + r5
1438            r34 = r3 + r4
1439            s25 = s2 - s5
1440            s34 = s3 - s4
1441            zbr(1,ind(ia*5+1),j,g3) = r1 + r25 + r34
1442            r = r1 + cos2*r25 + cos4*r34
1443            s = s25 + sin42*s34
1444            zbr(1,ind(ia*5+2),j,g3) = r - sin2*s
1445            zbr(1,ind(ia*5+5),j,g3) = r + sin2*s
1446            r = r1 + cos4*r25 + cos2*r34
1447            s = sin42*s25 - s34
1448            zbr(1,ind(ia*5+3),j,g3) = r - sin2*s
1449            zbr(1,ind(ia*5+4),j,g3) = r + sin2*s
1450            r25 = r2 - r5
1451            r34 = r3 - r4
1452            s25 = s2 + s5
1453            s34 = s3 + s4
1454            zbr(2,ind(ia*5+1),j,g3) = s1 + s25 + s34
1455            r = s1 + cos2*s25 + cos4*s34
1456            s = r25 + sin42*r34
1457            zbr(2,ind(ia*5+2),j,g3) = r + sin2*s
1458            zbr(2,ind(ia*5+5),j,g3) = r - sin2*s
1459            r = s1 + cos4*s25 + cos2*s34
1460            s = sin42*r25 - r34
1461            zbr(2,ind(ia*5+3),j,g3) = r + sin2*s
1462            zbr(2,ind(ia*5+4),j,g3) = r - sin2*s
1463          end do
1464 
1465 !        Second step of radix 5
1466          do ia=1,aft(ic)-1
1467            indx=ind(ia*5+1)-1
1468            cr2=trig(1,indx)
1469            ct2=trig(2,indx)
1470            cr3=trig(1,2*indx)
1471            ct3=trig(2,2*indx)
1472            cr4=trig(1,3*indx)
1473            ct4=trig(2,3*indx)
1474            cr5=trig(1,4*indx)
1475            ct5=trig(2,4*indx)
1476            do j=ma,mb
1477              r1=z(1,ia*5+1,j,g3)
1478              s1=z(2,ia*5+1,j,g3)
1479              r2=cr2*(z(1,ia*5+2,j,g3) - z(2,ia*5+2,j,g3)*ct2)
1480              s2=cr2*(z(1,ia*5+2,j,g3)*ct2 + z(2,ia*5+2,j,g3))
1481              r3=cr3*(z(1,ia*5+3,j,g3) - z(2,ia*5+3,j,g3)*ct3)
1482              s3=cr3*(z(1,ia*5+3,j,g3)*ct3 + z(2,ia*5+3,j,g3))
1483              r4=z(1,ia*5+4,j,g3) - z(2,ia*5+4,j,g3)*ct4
1484              s4=z(1,ia*5+4,j,g3)*ct4 + z(2,ia*5+4,j,g3)
1485              r5=z(1,ia*5+5,j,g3) - z(2,ia*5+5,j,g3)*ct5
1486              s5=z(1,ia*5+5,j,g3)*ct5 + z(2,ia*5+5,j,g3)
1487              r25 = r2 + r5*cr5
1488              r34 = r3 + r4*cr4
1489              s25 = s2 - s5*cr5
1490              s34 = s3 - s4*cr4
1491              zbr(1,ind(ia*5+1),j,g3) = r1 + r25 + r34
1492              r = r1 + cos2*r25 + cos4*r34
1493              s = s25 + sin42*s34
1494              zbr(1,ind(ia*5+2),j,g3) = r - sin2*s
1495              zbr(1,ind(ia*5+5),j,g3) = r + sin2*s
1496              r = r1 + cos4*r25 + cos2*r34
1497              s = sin42*s25 - s34
1498              zbr(1,ind(ia*5+3),j,g3) = r - sin2*s
1499              zbr(1,ind(ia*5+4),j,g3) = r + sin2*s
1500              r25 = r2 - r5*cr5
1501              r34 = r3 - r4*cr4
1502              s25 = s2 + s5*cr5
1503              s34 = s3 + s4*cr4
1504              zbr(2,ind(ia*5+1),j,g3) = s1 + s25 + s34
1505              r = s1 + cos2*s25 + cos4*s34
1506              s = r25 + sin42*r34
1507              zbr(2,ind(ia*5+2),j,g3) = r + sin2*s
1508              zbr(2,ind(ia*5+5),j,g3) = r - sin2*s
1509              r = s1 + cos4*s25 + cos2*s34
1510              s = sin42*r25 - r34
1511              zbr(2,ind(ia*5+3),j,g3) = r + sin2*s
1512              zbr(2,ind(ia*5+4),j,g3) = r - sin2*s
1513            end do
1514          end do
1515 
1516        else
1517 !        All radices are treated
1518          ABI_BUG('called with factors other than 2, 3, and 5')
1519        end if
1520 
1521 !      End of bit reversal
1522 
1523 !      -------------------------------------------------------------------
1524      end do
1525    end do
1526  end do
1527 !$OMP END PARALLEL DO
1528 
1529 end subroutine sg_fftpx

m_sgfft/sg_fftrisc [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fftrisc

FUNCTION

  Wrapper around fftrisc_one_nothreadsafe that supports ndat transforms.

 * This routine is not thread-safe due to the presence of variables with the save attribute!
   DO NOT CALL THIS ROUTINE INSIDE A OPENMP PARALLEL REGION

SOURCE

3851 subroutine sg_fftrisc(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,istwf_k,&
3852 & kg_kin,kg_kout,mgfft,ndat,ngfft,npwin,npwout,n4,n5,n6,option,weight_r, weight_i)
3853 
3854 !Arguments ------------------------------------
3855 !scalars
3856  integer,intent(in) :: cplex,istwf_k,mgfft,n4,n5,n6,ndat,npwin,npwout,option
3857  real(dp),intent(in) :: weight_i,weight_r
3858 !arrays
3859  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2)
3860  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18)
3861  real(dp),intent(in) :: fofgin(2,npwin*ndat)
3862  real(dp),intent(inout) :: denpot(cplex*n4*n5*n6),fofr(2,n4*n5*n6*ndat)
3863  real(dp),intent(out) :: fofgout(2,npwout*ndat)
3864 
3865 !Local variables-------------------------------
3866 !scalars
3867  integer :: idat,fofgin_p,fofr_p,fofgout_p
3868 !arrays
3869  real(dp) :: dum_fofgin(0,0),dum_fofr(0,0),dum_fofgout(0,0)
3870 
3871 ! *************************************************************************
3872 
3873  do idat=1,ndat
3874    fofgin_p = 1 + (idat-1) * npwin
3875    fofr_p = 1 + (idat - 1) * n4*n5*n6
3876    fofgout_p = 1 + (idat-1) * npwout
3877 
3878    select case (option)
3879    case (0)
3880      call fftrisc_one_nothreadsafe(&
3881 &      cplex,denpot,fofgin(1,fofgin_p),dum_fofgout,fofr(1,fofr_p),&
3882 &      gboundin,gboundout,istwf_k,&
3883 &      kg_kin,kg_kout,mgfft,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_i)
3884 
3885    case (1)
3886      ! Don't know why but fofr is not touched by this option.
3887      call fftrisc_one_nothreadsafe(&
3888 &      cplex,denpot,fofgin(1,fofgin_p),dum_fofgout,dum_fofr,&
3889 &      gboundin,gboundout,istwf_k,&
3890 &      kg_kin,kg_kout,mgfft,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_i)
3891 
3892    case (2)
3893      call fftrisc_one_nothreadsafe(&
3894 &      cplex,denpot,fofgin(1,fofgin_p),fofgout(1,fofgout_p),dum_fofr,&
3895 &      gboundin,gboundout,istwf_k,&
3896 &      kg_kin,kg_kout,mgfft,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_i)
3897 
3898    case (3)
3899      call fftrisc_one_nothreadsafe(&
3900 &      cplex,denpot,dum_fofgin,fofgout(1,fofgout_p),fofr(1,fofr_p),&
3901 &      gboundin,gboundout,istwf_k,&
3902 &      kg_kin,kg_kout,mgfft,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_i)
3903 
3904    case default
3905       ABI_ERROR("Wrong option")
3906    end select
3907  end do
3908 
3909 end subroutine sg_fftrisc

m_sgfft/sg_fftrisc_2 [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fftrisc_2

FUNCTION

 Carry out Fourier transforms between real and reciprocal (G) space,
 for wavefunctions, contained in a sphere in reciprocal space,
 in both directions. Also accomplish some post-processing.
 if luse_ndo is activated, do two FFT, and compute the density with
 non-diagonal occupations.

NOTES

 * Specifically uses rather sophisticated algorithms, based on S Goedecker
   routines, specialized for superscalar RISC architecture.
   Zero padding : saves 7/12 execution time
   Bi-dimensional data locality in most of the routine : cache reuse
   For k-point (0 0 0) : takes advantage of symmetry of data.
   Note however that no blocking is used, in both 1D z-transform
   or subsequent 2D transform. This should be improved.

 * This routine is not thread-safe due to the presence of variables with the save attribute!
   DO NOT CALL THIS ROUTINE INSIDE A OPENMP PARALLEL REGION

INPUTS

  cplex= if 1 , denpot is real, if 2 , denpot is complex
     (cplex=2 only allowed for option=2 when istwf_k=1)
     one can also use cplex=0 if option=0 or option=3
  fofgin(2,npwin)=holds input wavefunction in G vector basis sphere.
  fofgin_p(2,npwin) (optional) =holds second input wavefunction in G vector basis sphere.
  gboundin(2*mgfft+8,2)=sphere boundary info for reciprocal to real space
  gboundout(2*mgfft+8,2)=sphere boundary info for real to reciprocal space
  istwf_k=option parameter that describes the storage of wfs
  kg_kin(3,npwin)=reduced planewave coordinates, input
  kg_kout(3,npwout)=reduced planewave coordinates, output
  luse_ndo (optional) = use non diagonal occup (in this case, exists fofgin_p)
  npwin=number of elements in fofgin array (for option 0, 1 and 2)
  npwout=number of elements in fofgout array (for option 2 and 3)
  mgfft=maximum size of 1D FFTs
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  n4,n5,n6=ngfft(4),ngfft(5),ngfft(6), dimensions of fofr.
  option= if 0: do direct FFT
          if 1: do direct FFT, then sum the density
          if 2: do direct FFT, multiply by the potential, then do reverse FFT
          if 3: do reverse FFT only
  weight=weight to be used for the accumulation of the density in real space
          (needed only when option=1)

OUTPUT

  (see side effects)

OPTIONS

  The different options are:
  - reciprocal to real space and output the result (when option=0),
  - reciprocal to real space and accumulate the density (when option=1) or
  - reciprocal to real space, apply the local potential to the wavefunction
    in real space and produce the result in reciprocal space (when option=2)
  - real space to reciprocal space (when option=3).
  option=0 IS NOT ALLOWED when istwf_k>2
  option=3 IS NOT ALLOWED when istwf_k>=2
  (this version can be used to compute fft of two wavefunction and
     compute the product in denpot)

SIDE EFFECTS

  for option==0, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 fofr(2,n4,n5,n6) contains the Fourier Transform of fofgin;
                 no use of denpot, fofgout and npwout.
  for option==1, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 denpot(cplex*n4,n5,n6) contains the input density at input,
                 and the updated density at output;
                 fofr(2,n4,n5,n6) contains the Fourier transform of fofgin,
                 except in the case of the hp library subroutine;
                 no use of fofgout and npwout.
  for option==2, fofgin(2,npwin)=holds input wavefunction in G sphere;
                 denpot(cplex*n4,n5,n6) contains the input local potential;
                 fofgout(2,npwout) contains the output function;
                 fofr(2,n4,n5,n6) contains the Fourier transform of fofgin,
                 except in the case of the hp library subroutine.
  for option==3, fofr(2,n4,n5,n6) contains the real space wavefunction;
                 fofgout(2,npwout) contains its Fourier transform;
                 no use of fofgin and npwin.

TODO

 Complete input and output list.

SOURCE

4904 subroutine sg_fftrisc_2(cplex,denpot,fofgin,fofgout,fofr,gboundin,gboundout,&
4905 & istwf_k,kg_kin,kg_kout,&
4906 & mgfft,ngfft,npwin,npwout,n4,n5,n6,option,weight_r,weight_2,&
4907 & luse_ndo,fofgin_p) ! optional
4908 
4909 !Arguments ------------------------------------
4910 !scalars
4911  integer,intent(in) :: cplex,istwf_k,mgfft,n4,n5,n6,npwin,npwout,option
4912  real(dp),intent(in) :: weight_r
4913  real(dp),intent(in),optional :: weight_2
4914 !arrays
4915  integer,intent(in) :: gboundin(2*mgfft+8,2),gboundout(2*mgfft+8,2)
4916  integer,intent(in) :: kg_kin(3,npwin),kg_kout(3,npwout),ngfft(18)
4917  logical,intent(in),optional :: luse_ndo
4918  real(dp),intent(in) :: fofgin(2,npwin)
4919  real(dp),intent(in),optional :: fofgin_p(:,:)
4920  real(dp),intent(inout) :: denpot(cplex*n4,n5,n6),fofr(2,n4,n5,n6)
4921  real(dp),intent(out) :: fofgout(2,npwout)
4922 
4923 !Local variables-------------------------------
4924 !scalars
4925  integer,parameter :: mfac=11
4926  integer,save :: ic1,ic2,ic3,ic4,ic5,ic6,n1_save=0,n2_save=0,n3_save=0
4927  integer :: fftcache,g2max,g2min,i1,i1max,i2,i3,i3inv,ig,igb
4928  integer :: igb_inv,igbmax,ii2,lot,lotin,lotout,mgb,n1
4929  integer :: n1half1,n1halfm,n1i,n2,n2half1,n3,n4half1,n5half1,nfftot,ngbin
4930  integer :: ngbout,nlot,nproc_omp
4931  integer :: weight_i
4932  real(dp) :: ai,ar,fraction,norm,phai,phar,wkim,wkre
4933  character(len=500) :: message
4934 !arrays
4935  integer,save :: aft1(mfac),aft2(mfac),aft3(mfac),aft4(mfac),aft5(mfac)
4936  integer,save :: aft6(mfac),bef1(mfac),bef2(mfac),bef3(mfac),bef4(mfac)
4937  integer,save :: bef5(mfac),bef6(mfac),ind1(mg),ind2(mg),ind3(mg),ind4(mg)
4938  integer,save :: ind5(mg),ind6(mg),now1(mfac),now2(mfac),now3(mfac),now4(mfac)
4939  integer,save :: now5(mfac),now6(mfac)
4940  integer :: gbound_dum(4)
4941  integer,allocatable :: indpw_kin(:,:),indpw_kout(:,:)
4942  logical :: lluse_ndo
4943  real(dp),save :: trig1(2,mg),trig2(2,mg),trig3(2,mg),trig4(2,mg),trig5(2,mg)
4944  real(dp),save :: trig6(2,mg)
4945  real(dp),allocatable :: pha1(:,:),pha2(:,:),pha3(:,:),wk1d_a(:,:,:,:)
4946  real(dp),allocatable :: wk1d_b(:,:,:,:),wk2d_a(:,:,:,:),wk2d_b(:,:,:,:)
4947  real(dp),allocatable :: wk2d_c(:,:,:,:),wk2d_d(:,:,:,:)
4948  real(dp),allocatable :: wk1d_a_p(:,:,:,:),wk1d_b_p(:,:,:,:)
4949  real(dp),allocatable :: wk2d_a_p(:,:,:,:),wk2d_b_p(:,:,:,:)
4950 #if defined HAVE_OPENMP
4951  integer,external :: OMP_GET_NUM_THREADS
4952 #endif
4953 
4954 ! *************************************************************************
4955 
4956  !DBG_ENTER("COLL")
4957 
4958 !DEBUG
4959 !write(std_out,*)' sg_fftrisc_2 : enter, istwf_k= ',istwf_k
4960 !write(std_out,*)' sg_fftrisc_2 : option,mgfft=',option,mgfft
4961 !write(std_out,*)' sg_fftrisc_2 : gboundin(3:2*mgfft+6,1)='
4962 !do ii=1,mgfft+2
4963 !write(std_out,*)gboundin(2*ii+1,1),gboundin(2*ii+2,1)
4964 !end do
4965 !stop
4966 !ENDDEBUG
4967 !
4968  lluse_ndo=.true.
4969  if(istwf_k/=1)then
4970    write(message,'(a,i0)' )' It is not yet allowed to use dmft with istwf_k=',istwf_k
4971    ABI_BUG(message)
4972  end if
4973 
4974  if(istwf_k>2 .and. option==0)then
4975    write(message, '(a,i0)' )' It is not allowed to use option=0 with istwf_k=',istwf_k
4976    ABI_BUG(message)
4977  end if
4978 
4979  if(istwf_k>=2 .and. option==3)then
4980    write(message, '(a,i0)' )'  It is not allowed to use option=3 with istwf_k=',istwf_k
4981    ABI_BUG(message)
4982  end if
4983 
4984  lluse_ndo=.false.
4985  if(present(luse_ndo).and.present(fofgin_p)) then
4986    if(luse_ndo) lluse_ndo=.true.
4987  end if
4988  if(lluse_ndo) then
4989    if((size(fofgin_p,2)==0).and.(luse_ndo)) then
4990      write(message, '(a,a,a,i4,i5)' )&
4991 &     'fofgin_p has a dimension equal to zero and luse_ndo true',ch10,&
4992 &     'Action: check dimension of fofgin_p',size(fofgin_p,2),luse_ndo
4993      ABI_BUG(message)
4994    end if
4995  end if
4996 
4997  weight_i= weight_r
4998  if ( present (weight_2 )) then
4999      weight_i= weight_2
5000      if ( present(luse_ndo) .and. (luse_ndo) )weight_i=weight_r
5001  end if
5002 
5003 !For all other tests of validity of inputs, assume that they
5004 !have been done in the calling routine
5005 
5006  n1=ngfft(1) ; n2=ngfft(2) ; n3=ngfft(3) ; nfftot=n1*n2*n3
5007  fftcache=ngfft(8)
5008 
5009  if(option/=3)then
5010    ABI_MALLOC(indpw_kin,(4,npwin))
5011    call indfftrisc(gboundin(3:3+2*mgfft+4,1),indpw_kin,kg_kin,mgfft,ngbin,ngfft,npwin)
5012  end if
5013  if(option==2 .or. option==3)then
5014    ABI_MALLOC(indpw_kout,(4,npwout))
5015    call indfftrisc(gboundout(3:3+2*mgfft+4,1),indpw_kout,kg_kout,mgfft,ngbout,ngfft,npwout)
5016  end if
5017 
5018 !Define the dimension of the first work arrays, for 1D transforms along z ,
5019 !taking into account the need to avoid the cache trashing
5020  if(option==2)then
5021    mgb=max(ngbin,ngbout)
5022  else if(option==0 .or. option==1)then
5023    mgb=ngbin ; ngbout=1
5024  else if(option==3)then
5025    mgb=ngbout ; ngbin=1
5026  end if
5027 
5028  if(mod(mgb,2)/=1)mgb=mgb+1
5029 
5030 !Initialise openmp, if needed
5031 !$OMP PARALLEL
5032 !$OMP SINGLE
5033  nproc_omp=1
5034 #if defined HAVE_OPENMP
5035  nproc_omp=OMP_GET_NUM_THREADS()
5036 #endif
5037 !$OMP END SINGLE
5038 !$OMP END PARALLEL
5039 
5040 !For the treatment of the z transform,
5041 !one tries to use only a fraction of the cache, since the
5042 !treatment of the array wk1d_a will not involve contiguous segments
5043  fraction=0.25
5044 !First estimation of lot and nlot
5045  lot=(fftcache*fraction*1000)/(n3*8*2)+1
5046 !Select the smallest integer multiple of nproc_omp, larger
5047 !or equal to nlot. In this way, the cache size is not exhausted,
5048 !and one takes care correctly of the number of processors.
5049 !Treat separately the in and out cases
5050  nlot=(ngbin-1)/lot+1
5051  nlot=nproc_omp*((nlot-1)/nproc_omp+1)
5052  lotin=(ngbin-1)/nlot+1
5053  nlot=(ngbout-1)/lot+1
5054  nlot=nproc_omp*((nlot-1)/nproc_omp+1)
5055  lotout=(ngbout-1)/nlot+1
5056 !The next line impose only one lot. Usually, comment it.
5057 !lotin=mgb ; lotout=mgb
5058 
5059 !Compute auxiliary arrays needed for FFTs
5060  if(n1/=n1_save)then
5061    call sg_ctrig(n1,trig1,aft1,bef1,now1,one,ic1,ind1,mfac,mg)
5062    call sg_ctrig(n1,trig4,aft4,bef4,now4,-one,ic4,ind4,mfac,mg)
5063    n1_save=n1
5064  end if
5065  if(n2/=n2_save)then
5066    call sg_ctrig(n2,trig2,aft2,bef2,now2,one,ic2,ind2,mfac,mg)
5067    call sg_ctrig(n2,trig5,aft5,bef5,now5,-one,ic5,ind5,mfac,mg)
5068    n2_save=n2
5069  end if
5070  if(n3/=n3_save)then
5071    call sg_ctrig(n3,trig3,aft3,bef3,now3,one,ic3,ind3,mfac,mg)
5072    call sg_ctrig(n3,trig6,aft6,bef6,now6,-one,ic6,ind6,mfac,mg)
5073    n3_save=n3
5074  end if
5075 
5076 !------------------------------------------------------------------
5077 !Here, call general k-point code
5078 
5079  if(istwf_k==1)then
5080 
5081 !  Note that the z transform will appear as a y transform
5082    ABI_MALLOC(wk1d_a,(2,mgb,n3,1))
5083    ABI_MALLOC(wk1d_b,(2,mgb,n3,1))
5084    ABI_MALLOC(wk1d_a_p,(2,mgb,n3,1))
5085    ABI_MALLOC(wk1d_b_p,(2,mgb,n3,1))
5086 
5087    if(option/=3)then
5088 
5089      if(lluse_ndo)  then
5090 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5091 !$OMP&SHARED(n3,ngbin,wk1d_a_p)
5092        do i3=1,n3
5093          do igb=1,ngbin
5094            wk1d_a_p(1,igb,i3,1)=zero
5095            wk1d_a_p(2,igb,i3,1)=zero
5096          end do
5097        end do
5098 !$OMP END PARALLEL DO
5099 
5100 !      Insert fofgin_p into the work array
5101 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5102 !$OMP&SHARED(fofgin_p,indpw_kin,npwin,wk1d_a_p)
5103        do ig=1,npwin
5104          igb=indpw_kin(4,ig) ; i3=indpw_kin(3,ig)
5105          wk1d_a_p(1,igb,i3,1)=fofgin_p(1,ig)
5106          wk1d_a_p(2,igb,i3,1)=fofgin_p(2,ig)
5107        end do
5108 !$OMP END PARALLEL DO
5109 
5110 !      Go from wk1d_a_p to wk1d_b_p, using 1D FFTs on the z direction
5111 !      However, due to special packing of data, use routine ffty
5112 !$OMP PARALLEL DO SHARED(aft3,bef3,fftcache,ind3,ic3,lotin,mgb)&
5113 !$OMP&SHARED(ngbin,now3,n3,trig3,wk1d_a_p,wk1d_b_p)&
5114 !$OMP&PRIVATE(igb,igbmax)
5115        do igb=1,ngbin,lotin
5116          igbmax=min(igb+lotin-1,ngbin)
5117 !        Go from wk1d_a_p to wk1d_b_p, using 1D FFTs on the z direction
5118 !        However, due to special packing of data, use routine ffty
5119          call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_a_p,wk1d_b_p, &
5120 &         trig3,aft3,now3,bef3,one,ind3,ic3)
5121        end do
5122 !$OMP END PARALLEL DO
5123 
5124      end if ! lluse_ndo
5125 
5126 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5127 !$OMP&SHARED(n3,ngbin,wk1d_a)
5128      do i3=1,n3
5129        do igb=1,ngbin
5130          wk1d_a(1,igb,i3,1)=zero
5131          wk1d_a(2,igb,i3,1)=zero
5132        end do
5133      end do
5134 !$OMP END PARALLEL DO
5135 
5136 !    Insert fofgin into the work array
5137 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5138 !$OMP&SHARED(fofgin,indpw_kin,npwin,wk1d_a)
5139      do ig=1,npwin
5140        igb=indpw_kin(4,ig) ; i3=indpw_kin(3,ig)
5141        wk1d_a(1,igb,i3,1)=fofgin(1,ig)
5142        wk1d_a(2,igb,i3,1)=fofgin(2,ig)
5143      end do
5144 !$OMP END PARALLEL DO
5145 
5146 !    Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
5147 !    However, due to special packing of data, use routine ffty
5148 !$OMP PARALLEL DO SHARED(aft3,bef3,fftcache,ind3,ic3,lotin,mgb)&
5149 !$OMP&SHARED(ngbin,now3,n3,trig3,wk1d_a,wk1d_b)&
5150 !$OMP&PRIVATE(igb,igbmax)
5151      do igb=1,ngbin,lotin
5152        igbmax=min(igb+lotin-1,ngbin)
5153 !      Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
5154 !      However, due to special packing of data, use routine ffty
5155        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_a,wk1d_b, &
5156 &       trig3,aft3,now3,bef3,one,ind3,ic3)
5157      end do
5158 !$OMP END PARALLEL DO
5159 
5160    end if !  if(option/=3)
5161 
5162 !  Do-loop on the planes stacked in the z direction
5163 !$OMP PARALLEL DEFAULT(PRIVATE) &
5164 !$OMP&SHARED(aft1,aft2,aft4,aft5,bef1,bef2,bef4,bef5,cplex,denpot) &
5165 !$OMP&SHARED(fftcache,fofr,gboundin,gboundout)&
5166 !$OMP&SHARED(ic1,ic2,ic4,ic5,ind1,ind2,ind4) &
5167 !$OMP&SHARED(ind5,indpw_kin,indpw_kout,lluse_ndo,mgb,n1,n2,n3,n4,n5,ngbin) &
5168 !$OMP&SHARED(ngbout,now1,now2,now4,now5,option,trig1,trig2,trig4,trig5) &
5169 !$OMP&SHARED(weight_r,weight_i,weight_2,wk1d_a,wk1d_b,wk1d_b_p)
5170 
5171 !  Allocate two 2-dimensional work arrays
5172    ABI_MALLOC(wk2d_a,(2,n4,n5,1))
5173    ABI_MALLOC(wk2d_b,(2,n4,n5,1))
5174    ABI_MALLOC(wk2d_a_p,(2,n4,n5,1))
5175    ABI_MALLOC(wk2d_b_p,(2,n4,n5,1))
5176 !$OMP DO
5177    do i3=1,n3
5178 
5179      if(option/=3)then
5180        if(lluse_ndo)  then
5181 !        Zero the values on the current plane
5182 !        wk2d_a_p(1:2,1:n1,1:n2,1)=zero
5183          do i2=1,n2
5184            do i1=1,n1
5185              wk2d_a_p(1,i1,i2,1)=zero
5186              wk2d_a_p(2,i1,i2,1)=zero
5187            end do
5188          end do
5189 !        Copy the data in the current plane
5190          do igb=1,ngbin
5191            i1=indpw_kin(1,igb) ; i2=indpw_kin(2,igb)
5192            wk2d_a_p(1,i1,i2,1)=wk1d_b_p(1,igb,i3,1)
5193            wk2d_a_p(2,i1,i2,1)=wk1d_b_p(2,igb,i3,1)
5194          end do
5195 !        Perform x transform, taking into account arrays of zeros
5196          g2min=gboundin(3,1) ; g2max=gboundin(4,1)
5197          if ( g2min+n2 >= g2max+2 ) then
5198            do i2=g2max+2,g2min+n2
5199              do i1=1,n1
5200                wk2d_b_p(1,i1,i2,1)=zero
5201                wk2d_b_p(2,i1,i2,1)=zero
5202              end do
5203            end do
5204          end if
5205          gbound_dum(1)=1 ; gbound_dum(2)=1
5206          gbound_dum(3)=g2min ; gbound_dum(4)=g2max
5207          call sg_fftpx(fftcache,mfac,mg,0,n4,n5,1,n2,1,wk2d_a_p,wk2d_b_p,&
5208 &         trig1,aft1,now1,bef1,one,ind1,ic1,gbound_dum)
5209 !        Perform y transform
5210          n1i=1
5211          call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1,1,1,wk2d_b_p,wk2d_a_p, &
5212 &         trig2,aft2,now2,bef2,one,ind2,ic2)
5213 !        The wave function is now in real space, for the current plane
5214        end if  ! lluse_ndo
5215 
5216 !      Zero the values on the current plane
5217 !      wk2d_a(1:2,1:n1,1:n2,1)=zero
5218        do i2=1,n2
5219          do i1=1,n1
5220            wk2d_a(1,i1,i2,1)=zero
5221            wk2d_a(2,i1,i2,1)=zero
5222          end do
5223        end do
5224 !      Copy the data in the current plane
5225        do igb=1,ngbin
5226          i1=indpw_kin(1,igb) ; i2=indpw_kin(2,igb)
5227          wk2d_a(1,i1,i2,1)=wk1d_b(1,igb,i3,1)
5228          wk2d_a(2,i1,i2,1)=wk1d_b(2,igb,i3,1)
5229        end do
5230 !      Perform x transform, taking into account arrays of zeros
5231        g2min=gboundin(3,1) ; g2max=gboundin(4,1)
5232        if ( g2min+n2 >= g2max+2 ) then
5233          do i2=g2max+2,g2min+n2
5234            do i1=1,n1
5235              wk2d_b(1,i1,i2,1)=zero
5236              wk2d_b(2,i1,i2,1)=zero
5237            end do
5238          end do
5239        end if
5240        gbound_dum(1)=1 ; gbound_dum(2)=1
5241        gbound_dum(3)=g2min ; gbound_dum(4)=g2max
5242        call sg_fftpx(fftcache,mfac,mg,0,n4,n5,1,n2,1,wk2d_a,wk2d_b,&
5243 &       trig1,aft1,now1,bef1,one,ind1,ic1,gbound_dum)
5244 !      Perform y transform
5245        n1i=1
5246        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1,1,1,wk2d_b,wk2d_a, &
5247 &       trig2,aft2,now2,bef2,one,ind2,ic2)
5248 !      The wave function is now in real space, for the current plane
5249      end if
5250 
5251      if(option==0)then
5252 !      Copy the transformed function at the right place
5253        do i2=1,n2
5254          do i1=1,n1
5255            fofr(1,i1,i2,i3)=wk2d_a(1,i1,i2,1)
5256            fofr(2,i1,i2,i3)=wk2d_a(2,i1,i2,1)
5257          end do
5258        end do
5259      end if
5260 
5261      if(option==1)then
5262 !      Accumulate density
5263        do i2=1,n2
5264          do i1=1,n1
5265            if(lluse_ndo)  then
5266              denpot(i1,i2,i3)=denpot(i1,i2,i3)+&
5267 &             weight_r*(wk2d_a(1,i1,i2,1)*wk2d_a_p(1,i1,i2,1)&
5268 &             +wk2d_a(2,i1,i2,1)*wk2d_a_p(2,i1,i2,1))
5269              if(present(weight_2)) then
5270                denpot(i1,i2,i3)=denpot(i1,i2,i3)+&
5271 &               weight_2*(wk2d_a_p(2,i1,i2,1)*wk2d_a(1,i1,i2,1)&
5272 &               -wk2d_a_p(1,i1,i2,1)*wk2d_a(2,i1,i2,1))
5273              end if
5274            else
5275              denpot(i1,i2,i3)=denpot(i1,i2,i3)+&
5276 &             weight_r*wk2d_a(1,i1,i2,1)**2+ weight_i*wk2d_a(2,i1,i2,1)**2
5277            end if
5278          end do
5279        end do
5280      end if
5281 
5282      if(option==2)then
5283 !      Apply local potential
5284        if(cplex==1)then
5285          do i2=1,n2
5286            do i1=1,n1
5287              wk2d_a(1,i1,i2,1)=denpot(i1,i2,i3)*wk2d_a(1,i1,i2,1)
5288              wk2d_a(2,i1,i2,1)=denpot(i1,i2,i3)*wk2d_a(2,i1,i2,1)
5289            end do
5290          end do
5291        else
5292          do i2=1,n2
5293            do i1=1,n1
5294              wkre=wk2d_a(1,i1,i2,1)
5295              wkim=wk2d_a(2,i1,i2,1)
5296              wk2d_a(1,i1,i2,1)=denpot(2*i1-1,i2,i3)*wkre &
5297 &             -denpot(2*i1  ,i2,i3)*wkim
5298              wk2d_a(2,i1,i2,1)=denpot(2*i1-1,i2,i3)*wkim &
5299 &             +denpot(2*i1  ,i2,i3)*wkre
5300            end do
5301          end do
5302        end if
5303      end if
5304 
5305      if(option==3)then
5306 !      Copy the function to be tranformed at the right place
5307        do i2=1,n2
5308          do i1=1,n1
5309            wk2d_a(1,i1,i2,1)=fofr(1,i1,i2,i3)
5310            wk2d_a(2,i1,i2,1)=fofr(2,i1,i2,i3)
5311          end do
5312        end do
5313      end if
5314 
5315      if(option==2 .or. option==3)then
5316 !      Perform y transform
5317        n1i=1
5318        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1,1,1,wk2d_a,wk2d_b, &
5319 &       trig5,aft5,now5,bef5,-one,ind5,ic5)
5320 !      Perform x transform, taking into account arrays of zeros
5321        gbound_dum(1)=1 ; gbound_dum(2)=1
5322        gbound_dum(3)=gboundout(3,1) ; gbound_dum(4)=gboundout(4,1)
5323        call sg_fftpx(fftcache,mfac,mg,0,n4,n5,1,n2,1,wk2d_b,wk2d_a,&
5324 &       trig4,aft4,now4,bef4,-one,ind4,ic4,gbound_dum)
5325 !      Copy the data from the current plane to wk1d_b
5326        do igb=1,ngbout
5327          i1=indpw_kout(1,igb) ; i2=indpw_kout(2,igb)
5328          wk1d_b(1,igb,i3,1)=wk2d_a(1,i1,i2,1)
5329          wk1d_b(2,igb,i3,1)=wk2d_a(2,i1,i2,1)
5330        end do
5331      end if
5332 
5333 !    End loop on planes
5334    end do
5335 !$OMP END DO
5336    ABI_FREE(wk2d_a)
5337    ABI_FREE(wk2d_b)
5338    ABI_FREE(wk2d_a_p)
5339    ABI_FREE(wk2d_b_p)
5340 !$OMP END PARALLEL
5341 
5342    if(option==2 .or. option==3)then
5343 
5344 !    Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
5345 !    However, due to special packing of data, use routine ffty
5346 !$OMP PARALLEL DO SHARED(aft6,bef6,fftcache,ind6,ic6,lotout,mgb)&
5347 !$OMP&SHARED(ngbout,now6,n3,trig6,wk1d_a,wk1d_b)&
5348 !$OMP&PRIVATE(igb,igbmax)
5349      do igb=1,ngbout,lotout
5350        igbmax=min(igb+lotout-1,ngbout)
5351 !      Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
5352 !      However, due to special packing of data, use routine ffty
5353        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_b,wk1d_a, &
5354 &       trig6,aft6,now6,bef6,-one,ind6,ic6)
5355 
5356      end do
5357 !$OMP END PARALLEL DO
5358 
5359 !    Transfer the data in the output array, after normalization
5360      norm=1.d0/dble(nfftot)
5361 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5362 !$OMP&SHARED(fofgout,indpw_kout,norm,npwout,wk1d_a)
5363      do ig=1,npwout
5364        igb=indpw_kout(4,ig) ; i3=indpw_kout(3,ig)
5365        fofgout(1,ig)=wk1d_a(1,igb,i3,1)*norm
5366        fofgout(2,ig)=wk1d_a(2,igb,i3,1)*norm
5367      end do
5368 !$OMP END PARALLEL DO
5369    end if
5370 
5371    ABI_FREE(wk1d_a)
5372    ABI_FREE(wk1d_b)
5373    ABI_FREE(wk1d_a_p)
5374    ABI_FREE(wk1d_b_p)
5375 
5376 !  End general k-point part
5377  end if
5378 
5379 !------------------------------------------------------------------
5380 !Here, use of time-reversal symmetry
5381 
5382  if(istwf_k>=2)then
5383 
5384    n1half1=n1/2+1 ; n1halfm=(n1+1)/2
5385    n2half1=n2/2+1
5386 !  n4half1 or n5half1 are the odd integers >= n1half1 or n2half1
5387    n4half1=(n1half1/2)*2+1
5388    n5half1=(n2half1/2)*2+1
5389 !  Note that the z transform will appear as a y transform
5390    ABI_MALLOC(wk1d_a,(2,mgb,n3,1))
5391    ABI_MALLOC(wk1d_b,(2,mgb,n3,1))
5392 
5393    if(istwf_k/=2)then
5394      ABI_MALLOC(pha1,(2,n1))
5395      ABI_MALLOC(pha2,(2,n2))
5396      ABI_MALLOC(pha3,(3,n3))
5397      do i1=1,n1
5398        pha1(1,i1)=cos(dble(i1-1)*pi/dble(n1))
5399        pha1(2,i1)=sin(dble(i1-1)*pi/dble(n1))
5400      end do
5401      do i2=1,n2
5402        pha2(1,i2)=cos(dble(i2-1)*pi/dble(n2))
5403        pha2(2,i2)=sin(dble(i2-1)*pi/dble(n2))
5404      end do
5405      do i3=1,n3
5406        pha3(1,i3)=cos(dble(i3-1)*pi/dble(n3))
5407        pha3(2,i3)=sin(dble(i3-1)*pi/dble(n3))
5408      end do
5409    end if
5410 
5411    if(option/=3)then
5412 
5413 !    Zero the components of wk1d_a
5414 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5415 !$OMP&SHARED(n3,ngbin,wk1d_a)
5416      do i3=1,n3
5417        do igb=1,ngbin
5418          wk1d_a(1,igb,i3,1)=zero
5419          wk1d_a(2,igb,i3,1)=zero
5420        end do
5421      end do
5422 !$OMP END PARALLEL DO
5423 
5424 !    Insert fofgin into the work array
5425 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5426 !$OMP&SHARED(fofgin,indpw_kin,npwin,wk1d_a)
5427      do ig=1,npwin
5428        igb=indpw_kin(4,ig) ; i3=indpw_kin(3,ig)
5429        wk1d_a(1,igb,i3,1)=fofgin(1,ig)
5430        wk1d_a(2,igb,i3,1)=fofgin(2,ig)
5431      end do
5432 !$OMP END PARALLEL DO
5433 
5434 !    Must complete the i2=1 plane when $k_y \equiv 0$
5435 
5436 !    Take care of i1=1 when $k_x \equiv 0$
5437      if(istwf_k==2)then
5438 !      Take care of i1=1
5439        do i3=n3/2+1,n3
5440          i3inv=n3+2-i3
5441          wk1d_a(1,1,i3,1)= wk1d_a(1,1,i3inv,1)
5442          wk1d_a(2,1,i3,1)=-wk1d_a(2,1,i3inv,1)
5443        end do
5444      else if(istwf_k==4)then
5445 !      Take care of i1=1
5446        do i3=n3/2+1,n3
5447          i3inv=n3+1-i3
5448          wk1d_a(1,1,i3,1)= wk1d_a(1,1,i3inv,1)
5449          wk1d_a(2,1,i3,1)=-wk1d_a(2,1,i3inv,1)
5450        end do
5451      end if
5452 
5453 !    Now, take care of other i1 values, except i3==1 when $k_z \equiv 0$
5454      i1max=gboundin(6,1)+1
5455      if(istwf_k==2)then
5456 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5457 !$OMP&SHARED(i1max,n3,wk1d_a)
5458        do igb=2,2*i1max-1
5459          igb_inv=2*i1max+1-igb
5460          do i3=n3/2+1,n3
5461            i3inv=n3+2-i3
5462            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
5463            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
5464          end do
5465        end do
5466 !$OMP END PARALLEL DO
5467 
5468      else if(istwf_k==3)then
5469 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5470 !$OMP&SHARED(i1max,n3,wk1d_a)
5471        do igb=1,2*i1max
5472          igb_inv=2*i1max+1-igb
5473          do i3=n3/2+1,n3
5474            i3inv=n3+2-i3
5475            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
5476            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
5477          end do
5478        end do
5479 !$OMP END PARALLEL DO
5480 
5481      else if(istwf_k==4)then
5482 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5483 !$OMP&SHARED(i1max,n3,wk1d_a)
5484        do igb=2,2*i1max-1
5485          igb_inv=2*i1max+1-igb
5486          do i3=n3/2+1,n3
5487            i3inv=n3+1-i3
5488            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
5489            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
5490          end do
5491        end do
5492 !$OMP END PARALLEL DO
5493 
5494      else if(istwf_k==5)then
5495 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5496 !$OMP&SHARED(i1max,n3,wk1d_a)
5497        do igb=1,2*i1max
5498          igb_inv=2*i1max+1-igb
5499          do i3=n3/2+1,n3
5500            i3inv=n3+1-i3
5501            wk1d_a(1,igb,i3,1)= wk1d_a(1,igb_inv,i3inv,1)
5502            wk1d_a(2,igb,i3,1)=-wk1d_a(2,igb_inv,i3inv,1)
5503          end do
5504        end do
5505 !$OMP END PARALLEL DO
5506 
5507      end if
5508 
5509 !    Now, i3==1
5510      if(istwf_k==2)then
5511        do igb=2,i1max
5512          igb_inv=2*i1max+1-igb
5513          wk1d_a(1,igb_inv,1,1)= wk1d_a(1,igb,1,1)
5514          wk1d_a(2,igb_inv,1,1)=-wk1d_a(2,igb,1,1)
5515        end do
5516      else if(istwf_k==3)then
5517        do igb=1,i1max
5518          igb_inv=2*i1max+1-igb
5519          wk1d_a(1,igb_inv,1,1)= wk1d_a(1,igb,1,1)
5520          wk1d_a(2,igb_inv,1,1)=-wk1d_a(2,igb,1,1)
5521        end do
5522      end if
5523 
5524 !    Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
5525 !    However, due to special packing of data, use routine ffty
5526 !$OMP PARALLEL DO SHARED(aft3,bef3,fftcache,ind3,ic3,lotin,mgb)&
5527 !$OMP&SHARED(ngbin,now3,n3,trig3,wk1d_a,wk1d_b)&
5528 !$OMP&PRIVATE(igb,igbmax)
5529      do igb=1,ngbin,lotin
5530        igbmax=min(igb+lotin-1,ngbin)
5531 !      Go from wk1d_a to wk1d_b, using 1D FFTs on the z direction
5532 !      However, due to special packing of data, use routine ffty
5533        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_a,wk1d_b, &
5534 &       trig3,aft3,now3,bef3,one,ind3,ic3)
5535      end do
5536 !$OMP END PARALLEL DO
5537 
5538 !    Change the phase if $k_z \neq 0$
5539      if(istwf_k==4 .or. istwf_k==5 .or. istwf_k==8 .or. istwf_k==9 )then
5540 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5541 !$OMP&SHARED(ngbin,n3,pha3,wk1d_b)
5542        do i3=1,n3
5543          phar=pha3(1,i3)
5544          phai=pha3(2,i3)
5545          do igb=1,ngbin
5546            ar=wk1d_b(1,igb,i3,1)
5547            ai=wk1d_b(2,igb,i3,1)
5548            wk1d_b(1,igb,i3,1)=phar*ar-phai*ai
5549            wk1d_b(2,igb,i3,1)=phai*ar+phar*ai
5550          end do
5551        end do
5552 !$OMP END PARALLEL DO
5553      end if
5554 
5555    end if !  if(option/=3)
5556 
5557 !  Do-loop on the planes stacked in the z direction
5558 
5559 !$OMP PARALLEL DEFAULT(PRIVATE) &
5560 !$OMP&SHARED(aft1,aft2,aft4,aft5,bef1,bef2,bef4,bef5,denpot) &
5561 !$OMP&SHARED(fftcache,fofr,gboundin,ic1,ic2,ic4,ic5,ind1,ind2,ind4,ind5) &
5562 !$OMP&SHARED(indpw_kin,indpw_kout,istwf_k,mgb,n1,n1half1) &
5563 !$OMP&SHARED(n1halfm,n2,n2half1,n3,n4,n5,ngbin,ngbout) &
5564 !$OMP&SHARED(now1,now2,now4,now5,option,pha1,pha2,trig1) &
5565 !$OMP&SHARED(trig2,trig4,trig5,weight_r,weight_i,wk1d_a,wk1d_b)
5566 
5567 !  Allocate two 2-dimensional work arrays
5568    ABI_MALLOC(wk2d_a,(2,n4,n5,1))
5569    ABI_MALLOC(wk2d_b,(2,n4,n5,1))
5570    ABI_MALLOC(wk2d_c,(2,2*n1halfm,n5,1))
5571    ABI_MALLOC(wk2d_d,(2,2*n1halfm,n5,1))
5572 !$OMP DO
5573    do i3=1,n3
5574 
5575      g2max=gboundin(4,1)
5576 
5577      if(option/=3)then
5578 !      Zero the values on the current plane : need only from i2=1 to g2max+1
5579        do i2=1,g2max+1
5580          do i1=1,n1
5581            wk2d_a(1,i1,i2,1)=zero
5582            wk2d_a(2,i1,i2,1)=zero
5583          end do
5584        end do
5585 
5586 !      Copy the data in the current plane
5587        do igb=1,ngbin
5588          i1=indpw_kin(1,igb) ; i2=indpw_kin(2,igb)
5589          wk2d_a(1,i1,i2,1)=wk1d_b(1,igb,i3,1)
5590          wk2d_a(2,i1,i2,1)=wk1d_b(2,igb,i3,1)
5591        end do
5592 
5593 !      Perform x transform, taking into account arrays of zeros
5594        call sg_fftx(fftcache,mfac,mg,n4,n5,1,g2max+1,1,wk2d_a,wk2d_b,&
5595 &       trig1,aft1,now1,bef1,one,ind1,ic1)
5596 
5597 !      Change the phase if $k_x \neq 0$
5598        if(istwf_k==3 .or. istwf_k==5 .or. istwf_k==7 .or. istwf_k==9)then
5599          do i1=1,n1
5600            phar=pha1(1,i1)
5601            phai=pha1(2,i1)
5602            do i2=1,g2max+1
5603              ar=wk2d_b(1,i1,i2,1)
5604              ai=wk2d_b(2,i1,i2,1)
5605              wk2d_b(1,i1,i2,1)=phar*ar-phai*ai
5606              wk2d_b(2,i1,i2,1)=phai*ar+phar*ai
5607            end do
5608          end do
5609        end if
5610 
5611 !      Compute symmetric and antisymmetric combinations
5612        if(istwf_k>=2 .and. istwf_k<=5)then
5613          do i1=1,n1half1-1
5614            wk2d_a(1,i1,1,1)=wk2d_b(1,2*i1-1,1,1)
5615            wk2d_a(2,i1,1,1)=wk2d_b(1,2*i1  ,1,1)
5616          end do
5617 !        If n1 odd, must add last data
5618          if((2*n1half1-2)/=n1)then
5619            wk2d_a(1,n1half1,1,1)=wk2d_b(1,n1,1,1)
5620            wk2d_a(2,n1half1,1,1)=zero
5621          end if
5622          ii2=2
5623        else
5624          ii2=1
5625        end if
5626        if( g2max+1 >= ii2)then
5627          do i2=ii2,g2max+1
5628            do i1=1,n1half1-1
5629              wk2d_a(1,i1,i2,1)=        wk2d_b(1,2*i1-1,i2,1)-wk2d_b(2,2*i1,i2,1)
5630              wk2d_a(2,i1,i2,1)=        wk2d_b(2,2*i1-1,i2,1)+wk2d_b(1,2*i1,i2,1)
5631              wk2d_a(1,i1,n2+ii2-i2,1)= wk2d_b(1,2*i1-1,i2,1)+wk2d_b(2,2*i1,i2,1)
5632              wk2d_a(2,i1,n2+ii2-i2,1)=-wk2d_b(2,2*i1-1,i2,1)+wk2d_b(1,2*i1,i2,1)
5633            end do
5634            if((2*n1half1-2)/=n1)then
5635              wk2d_a(1,n1half1,i2,1)=        wk2d_b(1,n1,i2,1)
5636              wk2d_a(2,n1half1,i2,1)=        wk2d_b(2,n1,i2,1)
5637              wk2d_a(1,n1half1,n2+ii2-i2,1)= wk2d_b(1,n1,i2,1)
5638              wk2d_a(2,n1half1,n2+ii2-i2,1)=-wk2d_b(2,n1,i2,1)
5639            end if
5640          end do
5641        end if
5642        if ( n2half1 >= g2max+2 ) then
5643          do i2=g2max+2,n2half1
5644            do i1=1,n1half1-1
5645              wk2d_a(1,i1,i2,1)=zero
5646              wk2d_a(2,i1,i2,1)=zero
5647              wk2d_a(1,i1,n2+ii2-i2,1)=zero
5648              wk2d_a(2,i1,n2+ii2-i2,1)=zero
5649            end do
5650            if((2*n1half1-2)/=n1)then
5651              wk2d_a(1,n1half1,i2,1)=zero
5652              wk2d_a(2,n1half1,i2,1)=zero
5653              wk2d_a(1,n1half1,n2+ii2-i2,1)=zero
5654              wk2d_a(2,n1half1,n2+ii2-i2,1)=zero
5655            end if
5656          end do
5657        end if
5658 
5659        n1i=1
5660        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1halfm,1,1,wk2d_a,wk2d_b,&
5661 &       trig2,aft2,now2,bef2,one,ind2,ic2)
5662 
5663 !      Change the phase if $k_y \neq 0$
5664        if(istwf_k>=6 .and. istwf_k<=9)then
5665          do i2=1,n2
5666            phar=pha2(1,i2)
5667            phai=pha2(2,i2)
5668            do i1=1,n1halfm
5669              ar=wk2d_b(1,i1,i2,1)
5670              ai=wk2d_b(2,i1,i2,1)
5671              wk2d_b(1,i1,i2,1)= phar*ar-phai*ai
5672              wk2d_b(2,i1,i2,1)= phai*ar+phar*ai
5673            end do
5674          end do
5675        end if
5676 
5677      end if ! option/=3
5678 
5679 !    The wave function is now in real space, for the current plane,
5680 !    represented by REAL numbers, although packed in the complex array wk2d_b
5681 
5682      g2max=gboundin(4,1)
5683 
5684      if(option==0)then
5685 !      This option is only permitted for istwf_k==2 (Gamma point)
5686 !      Copy the transformed function at the right place
5687        do i2=1,n2
5688          do i1=1,n1half1-1
5689            fofr(1,2*i1-1,i2,i3)=wk2d_b(1,i1,i2,1)
5690            fofr(1,2*i1  ,i2,i3)=wk2d_b(2,i1,i2,1)
5691            fofr(2,2*i1-1,i2,i3)=zero
5692            fofr(2,2*i1  ,i2,i3)=zero
5693          end do
5694 !        If n1 odd, must add last data
5695          if((2*n1half1-2)/=n1)then
5696            fofr(1,n1,i2,i3)=wk2d_b(1,n1half1,i2,1)
5697            fofr(2,n1,i2,i3)=zero
5698          end if
5699        end do
5700      end if
5701 
5702      if(option==1)then
5703 !      Accumulate density
5704        do i2=1,n2
5705          do i1=1,n1half1-1
5706            denpot(2*i1-1,i2,i3)=denpot(2*i1-1,i2,i3)+weight_r*wk2d_b(1,i1,i2,1)**2
5707            denpot(2*i1  ,i2,i3)=denpot(2*i1  ,i2,i3)+weight_i*wk2d_b(2,i1,i2,1)**2
5708          end do
5709 !        If n1 odd, must add last data
5710          if((2*n1half1-2)/=n1)then
5711            denpot(n1,i2,i3)=denpot(n1,i2,i3)+weight_r*wk2d_b(1,n1half1,i2,1)**2
5712 !          not use in DMFT because istwfk required to be one.
5713          end if
5714        end do
5715      end if
5716 
5717      if(option==2)then
5718 !      Apply local potential
5719        do i2=1,n2
5720          do i1=1,n1half1-1
5721            wk2d_a(1,i1,i2,1)=denpot(2*i1-1,i2,i3)*wk2d_b(1,i1,i2,1)
5722            wk2d_a(2,i1,i2,1)=denpot(2*i1  ,i2,i3)*wk2d_b(2,i1,i2,1)
5723          end do
5724 !        If n1 odd, must add last data
5725          if((2*n1half1-2)/=n1)then
5726            wk2d_a(1,n1half1,i2,1)=denpot(n1,i2,i3)*wk2d_b(1,n1half1,i2,1)
5727            wk2d_a(2,n1half1,i2,1)=zero
5728          end if
5729        end do
5730      end if
5731 
5732      if(option==3)then
5733 !      This option is only permitted for istwf_k==2 (Gamma point)
5734 !      Copy the transformed function at the right place
5735        do i2=1,n2
5736          do i1=1,n1half1-1
5737            wk2d_b(1,i1,i2,1)=fofr(1,2*i1-1,i2,i3)
5738            wk2d_b(2,i1,i2,1)=fofr(1,2*i1  ,i2,i3)
5739          end do
5740 !        If n1 odd, must add last data
5741          if((2*n1half1-2)/=n1)then
5742            wk2d_b(1,n1half1,i2,1)=fofr(1,n1,i2,i3)
5743          end if
5744        end do
5745      end if
5746 
5747      if(option==2 .or. option==3)then
5748 !      Change the phase if $k_y \neq 0$
5749        if(istwf_k>=6 .and. istwf_k<=9)then
5750          do i2=1,n2
5751            phar=pha2(1,i2)
5752            phai=pha2(2,i2)
5753            do i1=1,n1halfm
5754              ar=wk2d_a(1,i1,i2,1)
5755              ai=wk2d_a(2,i1,i2,1)
5756              wk2d_a(1,i1,i2,1)= phar*ar+phai*ai
5757              wk2d_a(2,i1,i2,1)=-phai*ar+phar*ai
5758            end do
5759          end do
5760        end if
5761 
5762 !      Perform y transform
5763        n1i=1
5764        call sg_ffty(fftcache,mfac,mg,n4,n5,1,n1i,n1halfm,1,1,wk2d_a,wk2d_b, &
5765 &       trig5,aft5,now5,bef5,-one,ind5,ic5)
5766 
5767 !      Decompose symmetric and antisymmetric parts
5768        if(istwf_k>=2 .and. istwf_k<=5)then
5769          do i1=1,n1halfm
5770            wk2d_c(1,2*i1-1,1,1)=wk2d_b(1,i1,1,1)
5771            wk2d_c(2,2*i1-1,1,1)=zero
5772            wk2d_c(1,2*i1,1,1)=wk2d_b(2,i1,1,1)
5773            wk2d_c(2,2*i1,1,1)=zero
5774          end do
5775          ii2=2
5776        else
5777          ii2=1
5778        end if
5779        do i2=ii2,g2max+1
5780          do i1=1,n1halfm
5781            wk2d_c(1,2*i1-1,i2,1)=(wk2d_b(1,i1,i2,1)+wk2d_b(1,i1,n2+ii2-i2,1))*0.5d0
5782            wk2d_c(2,2*i1-1,i2,1)=(wk2d_b(2,i1,i2,1)-wk2d_b(2,i1,n2+ii2-i2,1))*0.5d0
5783            wk2d_c(1,2*i1,i2,1)= ( wk2d_b(2,i1,i2,1)+wk2d_b(2,i1,n2+ii2-i2,1))*0.5d0
5784            wk2d_c(2,2*i1,i2,1)= (-wk2d_b(1,i1,i2,1)+wk2d_b(1,i1,n2+ii2-i2,1))*0.5d0
5785          end do
5786        end do
5787 
5788 !      Change the phase if $k_x \neq 0$
5789        if(istwf_k==3 .or. istwf_k==5 .or. istwf_k==7 .or. istwf_k==9 )then
5790          do i1=1,n1
5791            phar=pha1(1,i1)
5792            phai=pha1(2,i1)
5793            do i2=1,g2max+1
5794              ar=wk2d_c(1,i1,i2,1)
5795              ai=wk2d_c(2,i1,i2,1)
5796              wk2d_c(1,i1,i2,1)= phar*ar+phai*ai
5797              wk2d_c(2,i1,i2,1)=-phai*ar+phar*ai
5798            end do
5799          end do
5800        end if
5801 
5802 !      Perform x transform : for y=1 to g2max+1, to benefit from zeros
5803        call sg_fftx(fftcache,mfac,mg,2*n1halfm,n5,1,g2max+1,1,wk2d_c,wk2d_d,&
5804 &       trig4,aft4,now4,bef4,-one,ind4,ic4)
5805 
5806 !      Copy the data from the current plane to wk1d_b
5807        do igb=1,ngbout
5808          i1=indpw_kout(1,igb) ; i2=indpw_kout(2,igb)
5809          wk1d_b(1,igb,i3,1)=wk2d_d(1,i1,i2,1)
5810          wk1d_b(2,igb,i3,1)=wk2d_d(2,i1,i2,1)
5811        end do
5812 
5813      end if ! option==2 or 3
5814 
5815 !    End loop on planes
5816    end do
5817 
5818 !$OMP END DO
5819    ABI_FREE(wk2d_a)
5820    ABI_FREE(wk2d_b)
5821    ABI_FREE(wk2d_c)
5822    ABI_FREE(wk2d_d)
5823 !$OMP END PARALLEL
5824 
5825    if(option==2 .or. option==3)then
5826 
5827 !    Change the phase if $k_z \neq 0$
5828      if(istwf_k==4 .or. istwf_k==5 .or. istwf_k==8 .or. istwf_k==9 )then
5829 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5830 !$OMP&SHARED(ngbout,n3,pha3,wk1d_b)
5831        do i3=1,n3
5832          phar=pha3(1,i3)
5833          phai=pha3(2,i3)
5834          do igb=1,ngbout
5835            ar=wk1d_b(1,igb,i3,1)
5836            ai=wk1d_b(2,igb,i3,1)
5837            wk1d_b(1,igb,i3,1)= phar*ar+phai*ai
5838            wk1d_b(2,igb,i3,1)=-phai*ar+phar*ai
5839          end do
5840        end do
5841 !$OMP END PARALLEL DO
5842      end if
5843 
5844 !    Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
5845 !    However, due to special packing of data, use routine ffty
5846 !$OMP PARALLEL DO SHARED(aft6,bef6,fftcache,ind6,ic6,lotout,mgb)&
5847 !$OMP&SHARED(ngbout,now6,n3,trig6,wk1d_a,wk1d_b)&
5848 !$OMP&PRIVATE(igb,igbmax)
5849      do igb=1,ngbout,lotout
5850        igbmax=min(igb+lotout-1,ngbout)
5851 !      Go from wk1d_b to wk1d_a, using 1D FFTs on the z direction
5852 !      However, due to special packing of data, use routine ffty
5853        call sg_ffty(fftcache,mfac,mg,mgb,n3,1,igb,igbmax,1,1,wk1d_b,wk1d_a, &
5854 &       trig6,aft6,now6,bef6,-one,ind6,ic6)
5855 
5856      end do
5857 !$OMP END PARALLEL DO
5858 
5859 !    Transfer the data in the output array, after normalization
5860      norm=1.d0/dble(nfftot)
5861 !$OMP PARALLEL DO DEFAULT(PRIVATE) &
5862 !$OMP&SHARED(fofgout,indpw_kout,norm,npwout,wk1d_a)
5863      do ig=1,npwout
5864        igb=indpw_kout(4,ig) ; i3=indpw_kout(3,ig)
5865        fofgout(1,ig)=wk1d_a(1,igb,i3,1)*norm
5866        fofgout(2,ig)=wk1d_a(2,igb,i3,1)*norm
5867      end do
5868 !$OMP END PARALLEL DO
5869 
5870    end if
5871 
5872    ABI_FREE(wk1d_a)
5873    ABI_FREE(wk1d_b)
5874 
5875    if(istwf_k/=2)then
5876      ABI_FREE(pha1)
5877      ABI_FREE(pha2)
5878      ABI_FREE(pha3)
5879    end if
5880 
5881 !  End time-reversal symmetry
5882  end if
5883 
5884  if(option/=3) then
5885    ABI_FREE(indpw_kin)
5886  end if
5887  if(option==2 .or. option==3) then
5888    ABI_FREE(indpw_kout)
5889  end if
5890 
5891  !DBG_EXIT("COLL")
5892 
5893 end subroutine sg_fftrisc_2

m_sgfft/sg_fftx [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fftx

FUNCTION

 This subroutine is called by the 3-dimensional fft to conduct the
 "x" transforms for all y and z.

INPUTS

  fftcache=size of the cache (kB)
  mfac = maximum number of factors in 1D FFTs
  mg = maximum length of 1D FFTs
  nd1=first dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd2=second dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd3=third dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  n2,n3=actual length of y and z transforms
  z(2,nd1,nd2,nd3)=INPUT array; destroyed by transformation
  trig, aft, now, bef, ind=provided by previous call to ctrig
   Note that in this routine (and in ctrig) the values in array trig are
   actually cos and tan, not cos and sin.  Use of tan allows advantageous
   use of FMA on the ibm rs6000.
  ris=sign of exponential in transform (should be 1 or -1; real)
  ic=number of (radix) factors of x transform length (from ctrig)

OUTPUT

  zbr(2,nd1,nd2,nd3)=OUTPUT transformed array; no scaling applied

SIDE EFFECTS

NOTES

 This routine blocks the x transforms
 so that all transforms under consideration at one step fit within
 the cache memory, which is crucial for optimal performance.
 The blocking factor is set by parameter "fftcache" below, which should
 be adjusted to be somewhat smaller (say 3/4) than the actual cache size
 of the machine.

TODO

 Use latex for the equation above

SOURCE

1579 subroutine sg_fftx(fftcache,mfac,mg,nd1,nd2,nd3,n2,n3,z,zbr,&
1580 & trig,aft,now,bef,ris,ind,ic)
1581 
1582 !Arguments ------------------------------------
1583 !Dimensions of aft, now, bef, ind, and trig should agree with
1584 !those in subroutine ctrig.
1585 !scalars
1586  integer,intent(in) :: fftcache,ic,mfac,mg,n2,n3,nd1,nd2,nd3
1587  real(dp),intent(in) :: ris
1588 !arrays
1589  integer,intent(in) :: aft(mfac),bef(mfac),ind(mg),now(mfac)
1590  real(dp),intent(in) :: trig(2,mg)
1591  real(dp),intent(inout) :: z(2,nd1,nd2,nd3),zbr(2,nd1,nd2,nd3)
1592 
1593 !Local variables-------------------------------
1594 !scalars
1595  integer :: i,i3,ia,ib,indx,j,jj,lot,ma,mb,ntb
1596  real(dp),parameter :: cos2=0.3090169943749474d0   !cos(2.d0*pi/5.d0)
1597  real(dp),parameter :: cos4=-0.8090169943749474d0  !cos(4.d0*pi/5.d0)
1598  real(dp),parameter :: sin42=0.6180339887498948d0  !sin(4.d0*pi/5.d0)/sin(2.d0*pi/5.d0)
1599  real(dp) :: bb,cr2,cr2s,cr3,cr3p,cr4,cr5,ct2,ct3,ct4,ct5
1600  real(dp) :: factor,r,r1,r2,r25,r3,r34,r4,r5,s,sin2,s1,s2,s25,s3,s34,s4,s5
1601 
1602 ! *************************************************************************
1603 
1604  !print *, "now", now(1:ic)
1605 
1606 !Do x transforms in blocks of size "lot" which is set by how
1607 !many x transform arrays (of size nd1 each) fit into the nominal
1608 !cache size "fftcache".
1609  factor=0.75d0
1610  lot=(fftcache*factor*1000d0)/(nd1*8*2)
1611 
1612 !XG : due to the dimension problems on the P6, I have slightly
1613 !modified this part of the code, with an external loop
1614 !on n3 ...
1615 !Modifications are indicated explicitely, or
1616 !are related to the increase of the number of dimensions of z and
1617 !zbr ...
1618 
1619  factor=0.75d0
1620  lot=(fftcache*factor*1000d0)/(nd1*8*2)
1621  if(lot.lt.1) lot=1 ! this may happen for very large cells
1622 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(aft,bef,ic,ind,lot,n2,n3,now,ris,trig,z,zbr)
1623  do i3=1,n3
1624    do jj=1,n2,lot
1625 !    end of modification
1626 
1627 !    For each jj, ma and mb give starting and ending addresses for fft
1628 !    ma starts where we left off after last block
1629      ma=jj
1630 !    mb runs to the end of the block or else to the end of the data
1631 !    modified XG 980107
1632 !    mb=min(jj+(lot-1),n23)
1633      mb=min(jj+(lot-1),n2)
1634 
1635 !    Run over all factors except the last (to ic-1), performing
1636 !    x transform
1637 
1638 !    Note: fortran should skip this loop if ic=1; beware "onetrip"
1639 !    compiler option which forces each loop at least once
1640 
1641 !    ------------------------------------------------------------------------
1642 
1643 !    Direct transformation (to be followed by bit reversal)
1644 
1645      do i=1,ic-1
1646        ntb=now(i)*bef(i)
1647 !      radix 4
1648 
1649 !      Treat radix 4
1650        if (now(i)==4) then
1651          ia=0
1652 
1653 !        First step of factor 4
1654          do ib=1,bef(i)
1655            do j=ma,mb
1656              r4=z(1,ia*ntb+3*bef(i)+ib,j,i3)
1657              s4=z(2,ia*ntb+3*bef(i)+ib,j,i3)
1658              r3=z(1,ia*ntb+2*bef(i)+ib,j,i3)
1659              s3=z(2,ia*ntb+2*bef(i)+ib,j,i3)
1660              r2=z(1,ia*ntb+bef(i)+ib,j,i3)
1661              s2=z(2,ia*ntb+bef(i)+ib,j,i3)
1662              r1=z(1,ia*ntb+ib,j,i3)
1663              s1=z(2,ia*ntb+ib,j,i3)
1664 
1665              r=r1 + r3
1666              s=r2 + r4
1667              z(1,ia*ntb+ib,j,i3) = r + s
1668              z(1,ia*ntb+2*bef(i)+ib,j,i3) = r - s
1669              r=r1 - r3
1670              s=s2 - s4
1671              z(1,ia*ntb+bef(i)+ib,j,i3) = r - s*ris
1672              z(1,ia*ntb+3*bef(i)+ib,j,i3) = r + s*ris
1673              r=s1 + s3
1674              s=s2 + s4
1675              z(2,ia*ntb+ib,j,i3) = r + s
1676              z(2,ia*ntb+2*bef(i)+ib,j,i3) = r - s
1677              r=s1 - s3
1678              s=r2 - r4
1679              z(2,ia*ntb+bef(i)+ib,j,i3) = r + s*ris
1680              z(2,ia*ntb+3*bef(i)+ib,j,i3) = r - s*ris
1681            end do
1682          end do
1683 
1684 !        Second step of factor 4
1685          do ia=1,aft(i)-1
1686            indx=ind(ia*4*bef(i)+1)-1
1687            indx=indx*bef(i)
1688            cr2=trig(1,indx)
1689            ct2=trig(2,indx)
1690            cr3=trig(1,2*indx)
1691            ct3=trig(2,2*indx)
1692            cr4=trig(1,3*indx)
1693            ct4=trig(2,3*indx)
1694            cr4=cr4/cr2
1695            cr2s=cr2*ris
1696            do ib=1,bef(i)
1697              do j=ma,mb
1698                r4=z(1,ia*ntb+3*bef(i)+ib,j,i3) - &
1699 &               z(2,ia*ntb+3*bef(i)+ib,j,i3)*ct4
1700                s4=z(1,ia*ntb+3*bef(i)+ib,j,i3)*ct4 + &
1701 &               z(2,ia*ntb+3*bef(i)+ib,j,i3)
1702                r3=z(1,ia*ntb+2*bef(i)+ib,j,i3) - &
1703 &               z(2,ia*ntb+2*bef(i)+ib,j,i3)*ct3
1704                s3=z(1,ia*ntb+2*bef(i)+ib,j,i3)*ct3 + &
1705 &               z(2,ia*ntb+2*bef(i)+ib,j,i3)
1706                r2=z(1,ia*ntb+bef(i)+ib,j,i3) - &
1707 &               z(2,ia*ntb+bef(i)+ib,j,i3)*ct2
1708                s2=z(1,ia*ntb+bef(i)+ib,j,i3)*ct2 + &
1709 &               z(2,ia*ntb+bef(i)+ib,j,i3)
1710                r1=z(1,ia*ntb+ib,j,i3)
1711                s1=z(2,ia*ntb+ib,j,i3)
1712 
1713                r=r1 + r3*cr3
1714                s=r2 + r4*cr4
1715                z(1,ia*ntb+ib,j,i3) = r + s*cr2
1716                z(1,ia*ntb+2*bef(i)+ib,j,i3) = r - s*cr2
1717                r=r1 - r3*cr3
1718                s=s2 - s4*cr4
1719                z(1,ia*ntb+bef(i)+ib,j,i3) = r - s*cr2s
1720                z(1,ia*ntb+3*bef(i)+ib,j,i3) = r + s*cr2s
1721                r=s1 + s3*cr3
1722                s=s2 + s4*cr4
1723                z(2,ia*ntb+ib,j,i3) = r + s*cr2
1724                z(2,ia*ntb+2*bef(i)+ib,j,i3) = r - s*cr2
1725                r=s1 - s3*cr3
1726                s=r2 - r4*cr4
1727                z(2,ia*ntb+bef(i)+ib,j,i3) = r + s*cr2s
1728                z(2,ia*ntb+3*bef(i)+ib,j,i3) = r - s*cr2s
1729              end do
1730            end do
1731          end do
1732 
1733 !        Treat radix 2
1734        else if (now(i)==2) then
1735          ia=0
1736 
1737 !        First step of factor 2
1738          do ib=1,bef(i)
1739            do j=ma,mb
1740              r1=z(1,ia*ntb+ib,j,i3)
1741              s1=z(2,ia*ntb+ib,j,i3)
1742              r2=z(1,ia*ntb+bef(i)+ib,j,i3)
1743              s2=z(2,ia*ntb+bef(i)+ib,j,i3)
1744              z(1,ia*ntb+ib,j,i3) =  r2 + r1
1745              z(2,ia*ntb+ib,j,i3) =  s2 + s1
1746              z(1,ia*ntb+bef(i)+ib,j,i3) = -r2 + r1
1747              z(2,ia*ntb+bef(i)+ib,j,i3) = -s2 + s1
1748            end do
1749          end do
1750 
1751 !        Second step of factor 2
1752          do ia=1,aft(i)-1
1753            indx=ind(ia*2*bef(i)+1)-1
1754            indx=indx*bef(i)
1755            cr2=trig(1,indx)
1756            ct2=trig(2,indx)
1757            do ib=1,bef(i)
1758              do j=ma,mb
1759                r1=z(1,ia*ntb+ib,j,i3)
1760                s1=z(2,ia*ntb+ib,j,i3)
1761                r2=z(1,ia*ntb+bef(i)+ib,j,i3) - &
1762 &               z(2,ia*ntb+bef(i)+ib,j,i3)*ct2
1763                s2=z(1,ia*ntb+bef(i)+ib,j,i3)*ct2 + &
1764 &               z(2,ia*ntb+bef(i)+ib,j,i3)
1765                z(1,ia*ntb+ib,j,i3) =  r2*cr2 + r1
1766                z(2,ia*ntb+ib,j,i3) =  s2*cr2 + s1
1767                z(1,ia*ntb+bef(i)+ib,j,i3) = -r2*cr2 + r1
1768                z(2,ia*ntb+bef(i)+ib,j,i3) = -s2*cr2 + s1
1769              end do
1770            end do
1771          end do
1772 
1773 !        Treat radix 3
1774        else if (now(i)==3) then
1775 !        .5d0*sqrt(3.d0)=0.8660254037844387d0
1776          ia=0
1777          bb=ris*0.8660254037844387d0
1778 
1779 !        First step of factor 3
1780          do ib=1,bef(i)
1781            do j=ma,mb
1782              r1=z(1,ia*ntb+ib,j,i3)
1783              s1=z(2,ia*ntb+ib,j,i3)
1784              r2=z(1,ia*ntb+bef(i)+ib,j,i3)
1785              s2=z(2,ia*ntb+bef(i)+ib,j,i3)
1786              r3=z(1,ia*ntb+2*bef(i)+ib,j,i3)
1787              s3=z(2,ia*ntb+2*bef(i)+ib,j,i3)
1788              r=r2 + r3
1789              s=s2 + s3
1790              z(1,ia*ntb+ib,j,i3) = r + r1
1791              z(2,ia*ntb+ib,j,i3) = s + s1
1792              r1=r1 - r*.5d0
1793              s1=s1 - s*.5d0
1794              r2=r2-r3
1795              s2=s2-s3
1796              z(1,ia*ntb+bef(i)+ib,j,i3) = r1 - s2*bb
1797              z(2,ia*ntb+bef(i)+ib,j,i3) = s1 + r2*bb
1798              z(1,ia*ntb+2*bef(i)+ib,j,i3) = r1 + s2*bb
1799              z(2,ia*ntb+2*bef(i)+ib,j,i3) = s1 - r2*bb
1800            end do
1801          end do
1802 
1803 !        Second step of factor 3
1804          do ia=1,aft(i)-1
1805            indx=ind(ia*3*bef(i)+1)-1
1806            indx=indx*bef(i)
1807            cr2=trig(1,indx)
1808            ct2=trig(2,indx)
1809            cr3=trig(1,2*indx)
1810            ct3=trig(2,2*indx)
1811            cr2=cr2/cr3
1812            cr3p=.5d0*cr3
1813            bb=ris*cr3*0.8660254037844387d0
1814            do ib=1,bef(i)
1815              do j=ma,mb
1816                r1=z(1,ia*ntb+ib,j,i3)
1817                s1=z(2,ia*ntb+ib,j,i3)
1818                r2=z(1,ia*ntb+bef(i)+ib,j,i3) - &
1819 &               z(2,ia*ntb+bef(i)+ib,j,i3)*ct2
1820                s2=z(1,ia*ntb+bef(i)+ib,j,i3)*ct2 + &
1821 &               z(2,ia*ntb+bef(i)+ib,j,i3)
1822                r3=z(1,ia*ntb+2*bef(i)+ib,j,i3) - &
1823 &               z(2,ia*ntb+2*bef(i)+ib,j,i3)*ct3
1824                s3=z(1,ia*ntb+2*bef(i)+ib,j,i3)*ct3 + &
1825 &               z(2,ia*ntb+2*bef(i)+ib,j,i3)
1826                r=cr2*r2 + r3
1827                s=cr2*s2 + s3
1828                z(1,ia*ntb+ib,j,i3) = r*cr3 + r1
1829                z(2,ia*ntb+ib,j,i3) = s*cr3 + s1
1830                r1=r1 - r*cr3p
1831                s1=s1 - s*cr3p
1832                r2=cr2*r2-r3
1833                s2=cr2*s2-s3
1834                z(1,ia*ntb+bef(i)+ib,j,i3) = r1 - s2*bb
1835                z(2,ia*ntb+bef(i)+ib,j,i3) = s1 + r2*bb
1836                z(1,ia*ntb+2*bef(i)+ib,j,i3) = r1 + s2*bb
1837                z(2,ia*ntb+2*bef(i)+ib,j,i3) = s1 - r2*bb
1838              end do
1839            end do
1840          end do
1841 
1842 !        Treat radix 5
1843        else if (now(i)==5) then
1844 !        sin(2.d0*pi/5.d0)
1845          sin2=ris*0.9510565162951536d0
1846          ia=0
1847 
1848 !        First step of factor 5
1849          do ib=1,bef(i)
1850            do j=ma,mb
1851              r1=z(1,ia*ntb+ib,j,i3)
1852              s1=z(2,ia*ntb+ib,j,i3)
1853              r2=z(1,ia*ntb+bef(i)+ib,j,i3)
1854              s2=z(2,ia*ntb+bef(i)+ib,j,i3)
1855              r3=z(1,ia*ntb+2*bef(i)+ib,j,i3)
1856              s3=z(2,ia*ntb+2*bef(i)+ib,j,i3)
1857              r4=z(1,ia*ntb+3*bef(i)+ib,j,i3)
1858              s4=z(2,ia*ntb+3*bef(i)+ib,j,i3)
1859              r5=z(1,ia*ntb+4*bef(i)+ib,j,i3)
1860              s5=z(2,ia*ntb+4*bef(i)+ib,j,i3)
1861              r25 = r2 + r5
1862              r34 = r3 + r4
1863              s25 = s2 - s5
1864              s34 = s3 - s4
1865              z(1,ia*ntb+ib,j,i3) = r1 + r25 + r34
1866              r = r1 + cos2*r25 + cos4*r34
1867              s = s25 + sin42*s34
1868              z(1,ia*ntb+bef(i)+ib,j,i3) = r - sin2*s
1869              z(1,ia*ntb+4*bef(i)+ib,j,i3) = r + sin2*s
1870              r = r1 + cos4*r25 + cos2*r34
1871              s = sin42*s25 - s34
1872              z(1,ia*ntb+2*bef(i)+ib,j,i3) = r - sin2*s
1873              z(1,ia*ntb+3*bef(i)+ib,j,i3) = r + sin2*s
1874              r25 = r2 - r5
1875              r34 = r3 - r4
1876              s25 = s2 + s5
1877              s34 = s3 + s4
1878              z(2,ia*ntb+ib,j,i3) = s1 + s25 + s34
1879              r = s1 + cos2*s25 + cos4*s34
1880              s = r25 + sin42*r34
1881              z(2,ia*ntb+bef(i)+ib,j,i3) = r + sin2*s
1882              z(2,ia*ntb+4*bef(i)+ib,j,i3) = r - sin2*s
1883              r = s1 + cos4*s25 + cos2*s34
1884              s = sin42*r25 - r34
1885              z(2,ia*ntb+2*bef(i)+ib,j,i3) = r + sin2*s
1886              z(2,ia*ntb+3*bef(i)+ib,j,i3) = r - sin2*s
1887            end do
1888          end do
1889 
1890 !        Second step of factor 5
1891          do ia=1,aft(i)-1
1892            indx=ind(ia*5*bef(i)+1)-1
1893            indx=indx*bef(i)
1894            cr2=trig(1,indx)
1895            ct2=trig(2,indx)
1896            cr3=trig(1,2*indx)
1897            ct3=trig(2,2*indx)
1898            cr4=trig(1,3*indx)
1899            ct4=trig(2,3*indx)
1900            cr5=trig(1,4*indx)
1901            ct5=trig(2,4*indx)
1902            do ib=1,bef(i)
1903              do j=ma,mb
1904                r1=z(1,ia*ntb+ib,j,i3)
1905                s1=z(2,ia*ntb+ib,j,i3)
1906                r2=cr2*(z(1,ia*ntb+bef(i)+ib,j,i3) - &
1907 &               z(2,ia*ntb+bef(i)+ib,j,i3)*ct2)
1908                s2=cr2*(z(1,ia*ntb+bef(i)+ib,j,i3)*ct2 + &
1909 &               z(2,ia*ntb+bef(i)+ib,j,i3))
1910                r3=cr3*(z(1,ia*ntb+2*bef(i)+ib,j,i3) - &
1911 &               z(2,ia*ntb+2*bef(i)+ib,j,i3)*ct3)
1912                s3=cr3*(z(1,ia*ntb+2*bef(i)+ib,j,i3)*ct3 + &
1913 &               z(2,ia*ntb+2*bef(i)+ib,j,i3))
1914                r4=z(1,ia*ntb+3*bef(i)+ib,j,i3) - &
1915 &               z(2,ia*ntb+3*bef(i)+ib,j,i3)*ct4
1916                s4=z(1,ia*ntb+3*bef(i)+ib,j,i3)*ct4 + &
1917 &               z(2,ia*ntb+3*bef(i)+ib,j,i3)
1918                r5=z(1,ia*ntb+4*bef(i)+ib,j,i3) - &
1919 &               z(2,ia*ntb+4*bef(i)+ib,j,i3)*ct5
1920                s5=z(1,ia*ntb+4*bef(i)+ib,j,i3)*ct5 + &
1921 &               z(2,ia*ntb+4*bef(i)+ib,j,i3)
1922                r25 = r2 + r5*cr5
1923                r34 = r3 + r4*cr4
1924                s25 = s2 - s5*cr5
1925                s34 = s3 - s4*cr4
1926                z(1,ia*ntb+ib,j,i3) = r1 + r25 + r34
1927                r = r1 + cos2*r25 + cos4*r34
1928                s = s25 + sin42*s34
1929                z(1,ia*ntb+bef(i)+ib,j,i3) = r - sin2*s
1930                z(1,ia*ntb+4*bef(i)+ib,j,i3) = r + sin2*s
1931                r = r1 + cos4*r25 + cos2*r34
1932                s = sin42*s25 - s34
1933                z(1,ia*ntb+2*bef(i)+ib,j,i3) = r - sin2*s
1934                z(1,ia*ntb+3*bef(i)+ib,j,i3) = r + sin2*s
1935                r25 = r2 - r5*cr5
1936                r34 = r3 - r4*cr4
1937                s25 = s2 + s5*cr5
1938                s34 = s3 + s4*cr4
1939                z(2,ia*ntb+ib,j,i3) = s1 + s25 + s34
1940                r = s1 + cos2*s25 + cos4*s34
1941                s = r25 + sin42*r34
1942                z(2,ia*ntb+bef(i)+ib,j,i3) = r + sin2*s
1943                z(2,ia*ntb+4*bef(i)+ib,j,i3) = r - sin2*s
1944                r = s1 + cos4*s25 + cos2*s34
1945                s = sin42*r25 - r34
1946                z(2,ia*ntb+2*bef(i)+ib,j,i3) = r + sin2*s
1947                z(2,ia*ntb+3*bef(i)+ib,j,i3) = r - sin2*s
1948              end do
1949            end do
1950          end do
1951 
1952        else
1953 !        All factors have been treated
1954          ABI_BUG('called with factors other than 2, 3, and 5')
1955        end if
1956 
1957      end do
1958 
1959 !    ---------------------------------------------------------------
1960 
1961 !    bitreversal
1962 
1963 !    Perform bit reversal on last factor of transformation
1964 
1965 !    Treat factor 4
1966      if (now(ic)==4) then
1967 !      radix 4
1968        ia=0
1969 
1970 !      First step of factor 4
1971        do j=ma,mb
1972          r4=z(1,ia*4+4,j,i3)
1973          s4=z(2,ia*4+4,j,i3)
1974          r3=z(1,ia*4+3,j,i3)
1975          s3=z(2,ia*4+3,j,i3)
1976          r2=z(1,ia*4+2,j,i3)
1977          s2=z(2,ia*4+2,j,i3)
1978          r1=z(1,ia*4+1,j,i3)
1979          s1=z(2,ia*4+1,j,i3)
1980 
1981          r=r1 + r3
1982          s=r2 + r4
1983          zbr(1,ind(ia*4+1),j,i3) = r + s
1984          zbr(1,ind(ia*4+3),j,i3) = r - s
1985          r=r1 - r3
1986          s=s2 - s4
1987          zbr(1,ind(ia*4+2),j,i3) = r - s*ris
1988          zbr(1,ind(ia*4+4),j,i3) = r + s*ris
1989          r=s1 + s3
1990          s=s2 + s4
1991          zbr(2,ind(ia*4+1),j,i3) = r + s
1992          zbr(2,ind(ia*4+3),j,i3) = r - s
1993          r=s1 - s3
1994          s=r2 - r4
1995          zbr(2,ind(ia*4+2),j,i3) = r + s*ris
1996          zbr(2,ind(ia*4+4),j,i3) = r - s*ris
1997        end do
1998 
1999 !      Second step of factor 4
2000        do ia=1,aft(ic)-1
2001          indx=ind(ia*4+1)-1
2002          cr2=trig(1,indx)
2003          ct2=trig(2,indx)
2004          cr3=trig(1,2*indx)
2005          ct3=trig(2,2*indx)
2006          cr4=trig(1,3*indx)
2007          ct4=trig(2,3*indx)
2008          cr4=cr4/cr2
2009          cr2s=cr2*ris
2010          do j=ma,mb
2011            r4=z(1,ia*4+4,j,i3) - z(2,ia*4+4,j,i3)*ct4
2012            s4=z(1,ia*4+4,j,i3)*ct4 + z(2,ia*4+4,j,i3)
2013            r3=z(1,ia*4+3,j,i3) - z(2,ia*4+3,j,i3)*ct3
2014            s3=z(1,ia*4+3,j,i3)*ct3 + z(2,ia*4+3,j,i3)
2015            r2=z(1,ia*4+2,j,i3) - z(2,ia*4+2,j,i3)*ct2
2016            s2=z(1,ia*4+2,j,i3)*ct2 + z(2,ia*4+2,j,i3)
2017            r1=z(1,ia*4+1,j,i3)
2018            s1=z(2,ia*4+1,j,i3)
2019 
2020            r=r1 + r3*cr3
2021            s=r2 + r4*cr4
2022            zbr(1,ind(ia*4+1),j,i3) = r + s*cr2
2023            zbr(1,ind(ia*4+3),j,i3) = r - s*cr2
2024            r=r1 - r3*cr3
2025            s=s2 - s4*cr4
2026            zbr(1,ind(ia*4+2),j,i3) = r - s*cr2s
2027            zbr(1,ind(ia*4+4),j,i3) = r + s*cr2s
2028            r=s1 + s3*cr3
2029            s=s2 + s4*cr4
2030            zbr(2,ind(ia*4+1),j,i3) = r + s*cr2
2031            zbr(2,ind(ia*4+3),j,i3) = r - s*cr2
2032            r=s1 - s3*cr3
2033            s=r2 - r4*cr4
2034            zbr(2,ind(ia*4+2),j,i3) = r + s*cr2s
2035            zbr(2,ind(ia*4+4),j,i3) = r - s*cr2s
2036          end do
2037        end do
2038 
2039 !      Treat factor 2
2040      else if (now(ic)==2) then
2041 !      radix 2
2042        ia=0
2043 
2044 !      First step of factor 2
2045        do j=ma,mb
2046          r1=z(1,ia*2+1,j,i3)
2047          s1=z(2,ia*2+1,j,i3)
2048          r2=z(1,ia*2+2,j,i3)
2049          s2=z(2,ia*2+2,j,i3)
2050          zbr(1,ind(ia*2+1),j,i3) =  r2 + r1
2051          zbr(2,ind(ia*2+1),j,i3) =  s2 + s1
2052          zbr(1,ind(ia*2+2),j,i3) = -r2 + r1
2053          zbr(2,ind(ia*2+2),j,i3) = -s2 + s1
2054        end do
2055 
2056 !      Second step of factor 2
2057        do ia=1,aft(ic)-1
2058          indx=ind(ia*2+1)-1
2059          cr2=trig(1,indx)
2060          ct2=trig(2,indx)
2061          do j=ma,mb
2062            r1=z(1,ia*2+1,j,i3)
2063            s1=z(2,ia*2+1,j,i3)
2064            r2=z(1,ia*2+2,j,i3) - z(2,ia*2+2,j,i3)*ct2
2065            s2=z(1,ia*2+2,j,i3)*ct2 + z(2,ia*2+2,j,i3)
2066            zbr(1,ind(ia*2+1),j,i3) =  r2*cr2 + r1
2067            zbr(2,ind(ia*2+1),j,i3) =  s2*cr2 + s1
2068            zbr(1,ind(ia*2+2),j,i3) = -r2*cr2 + r1
2069            zbr(2,ind(ia*2+2),j,i3) = -s2*cr2 + s1
2070          end do
2071        end do
2072 
2073 !      Treat factor 3
2074      else if (now(ic)==3) then
2075 !      radix 3
2076 !      .5d0*sqrt(3.d0)=0.8660254037844387d0
2077        ia=0
2078        bb=ris*0.8660254037844387d0
2079 
2080 !      First step of factor 3
2081        do j=ma,mb
2082          r1=z(1,ia*3+1,j,i3)
2083          s1=z(2,ia*3+1,j,i3)
2084          r2=z(1,ia*3+2,j,i3)
2085          s2=z(2,ia*3+2,j,i3)
2086          r3=z(1,ia*3+3,j,i3)
2087          s3=z(2,ia*3+3,j,i3)
2088          r=r2 + r3
2089          s=s2 + s3
2090          zbr(1,ind(ia*3+1),j,i3) = r + r1
2091          zbr(2,ind(ia*3+1),j,i3) = s + s1
2092          r1=r1 - r*.5d0
2093          s1=s1 - s*.5d0
2094          r2=r2-r3
2095          s2=s2-s3
2096          zbr(1,ind(ia*3+2),j,i3) = r1 - s2*bb
2097          zbr(2,ind(ia*3+2),j,i3) = s1 + r2*bb
2098          zbr(1,ind(ia*3+3),j,i3) = r1 + s2*bb
2099          zbr(2,ind(ia*3+3),j,i3) = s1 - r2*bb
2100        end do
2101 
2102 !      Second step of factor 3
2103        do ia=1,aft(ic)-1
2104          indx=ind(ia*3+1)-1
2105          cr2=trig(1,indx)
2106          ct2=trig(2,indx)
2107          cr3=trig(1,2*indx)
2108          ct3=trig(2,2*indx)
2109          cr2=cr2/cr3
2110          cr3p=.5d0*cr3
2111          bb=ris*cr3*0.8660254037844387d0
2112          do j=ma,mb
2113            r1=z(1,ia*3+1,j,i3)
2114            s1=z(2,ia*3+1,j,i3)
2115            r2=z(1,ia*3+2,j,i3) - z(2,ia*3+2,j,i3)*ct2
2116            s2=z(1,ia*3+2,j,i3)*ct2 + z(2,ia*3+2,j,i3)
2117            r3=z(1,ia*3+3,j,i3) - z(2,ia*3+3,j,i3)*ct3
2118            s3=z(1,ia*3+3,j,i3)*ct3 + z(2,ia*3+3,j,i3)
2119            r=cr2*r2 + r3
2120            s=cr2*s2 + s3
2121            zbr(1,ind(ia*3+1),j,i3) = r*cr3 + r1
2122            zbr(2,ind(ia*3+1),j,i3) = s*cr3 + s1
2123            r1=r1 - r*cr3p
2124            s1=s1 - s*cr3p
2125            r2=cr2*r2-r3
2126            s2=cr2*s2-s3
2127            zbr(1,ind(ia*3+2),j,i3) = r1 - s2*bb
2128            zbr(2,ind(ia*3+2),j,i3) = s1 + r2*bb
2129            zbr(1,ind(ia*3+3),j,i3) = r1 + s2*bb
2130            zbr(2,ind(ia*3+3),j,i3) = s1 - r2*bb
2131          end do
2132        end do
2133 
2134 !      Treat factor 5
2135      else if (now(ic)==5) then
2136 !      radix 5
2137 !      sin(2.d0*pi/5.d0)
2138        sin2=ris*0.9510565162951536d0
2139        ia=0
2140 
2141 !      First step of factor 5
2142        do j=ma,mb
2143          r1=z(1,ia*5+1,j,i3)
2144          s1=z(2,ia*5+1,j,i3)
2145          r2=z(1,ia*5+2,j,i3)
2146          s2=z(2,ia*5+2,j,i3)
2147          r3=z(1,ia*5+3,j,i3)
2148          s3=z(2,ia*5+3,j,i3)
2149          r4=z(1,ia*5+4,j,i3)
2150          s4=z(2,ia*5+4,j,i3)
2151          r5=z(1,ia*5+5,j,i3)
2152          s5=z(2,ia*5+5,j,i3)
2153          r25 = r2 + r5
2154          r34 = r3 + r4
2155          s25 = s2 - s5
2156          s34 = s3 - s4
2157          zbr(1,ind(ia*5+1),j,i3) = r1 + r25 + r34
2158          r = r1 + cos2*r25 + cos4*r34
2159          s = s25 + sin42*s34
2160          zbr(1,ind(ia*5+2),j,i3) = r - sin2*s
2161          zbr(1,ind(ia*5+5),j,i3) = r + sin2*s
2162          r = r1 + cos4*r25 + cos2*r34
2163          s = sin42*s25 - s34
2164          zbr(1,ind(ia*5+3),j,i3) = r - sin2*s
2165          zbr(1,ind(ia*5+4),j,i3) = r + sin2*s
2166          r25 = r2 - r5
2167          r34 = r3 - r4
2168          s25 = s2 + s5
2169          s34 = s3 + s4
2170          zbr(2,ind(ia*5+1),j,i3) = s1 + s25 + s34
2171          r = s1 + cos2*s25 + cos4*s34
2172          s = r25 + sin42*r34
2173          zbr(2,ind(ia*5+2),j,i3) = r + sin2*s
2174          zbr(2,ind(ia*5+5),j,i3) = r - sin2*s
2175          r = s1 + cos4*s25 + cos2*s34
2176          s = sin42*r25 - r34
2177          zbr(2,ind(ia*5+3),j,i3) = r + sin2*s
2178          zbr(2,ind(ia*5+4),j,i3) = r - sin2*s
2179        end do
2180 
2181 !      Second step of factor 5
2182        do ia=1,aft(ic)-1
2183          indx=ind(ia*5+1)-1
2184          cr2=trig(1,indx)
2185          ct2=trig(2,indx)
2186          cr3=trig(1,2*indx)
2187          ct3=trig(2,2*indx)
2188          cr4=trig(1,3*indx)
2189          ct4=trig(2,3*indx)
2190          cr5=trig(1,4*indx)
2191          ct5=trig(2,4*indx)
2192          do j=ma,mb
2193            r1=z(1,ia*5+1,j,i3)
2194            s1=z(2,ia*5+1,j,i3)
2195            r2=cr2*(z(1,ia*5+2,j,i3) - z(2,ia*5+2,j,i3)*ct2)
2196            s2=cr2*(z(1,ia*5+2,j,i3)*ct2 + z(2,ia*5+2,j,i3))
2197            r3=cr3*(z(1,ia*5+3,j,i3) - z(2,ia*5+3,j,i3)*ct3)
2198            s3=cr3*(z(1,ia*5+3,j,i3)*ct3 + z(2,ia*5+3,j,i3))
2199            r4=z(1,ia*5+4,j,i3) - z(2,ia*5+4,j,i3)*ct4
2200            s4=z(1,ia*5+4,j,i3)*ct4 + z(2,ia*5+4,j,i3)
2201            r5=z(1,ia*5+5,j,i3) - z(2,ia*5+5,j,i3)*ct5
2202            s5=z(1,ia*5+5,j,i3)*ct5 + z(2,ia*5+5,j,i3)
2203            r25 = r2 + r5*cr5
2204            r34 = r3 + r4*cr4
2205            s25 = s2 - s5*cr5
2206            s34 = s3 - s4*cr4
2207            zbr(1,ind(ia*5+1),j,i3) = r1 + r25 + r34
2208            r = r1 + cos2*r25 + cos4*r34
2209            s = s25 + sin42*s34
2210            zbr(1,ind(ia*5+2),j,i3) = r - sin2*s
2211            zbr(1,ind(ia*5+5),j,i3) = r + sin2*s
2212            r = r1 + cos4*r25 + cos2*r34
2213            s = sin42*s25 - s34
2214            zbr(1,ind(ia*5+3),j,i3) = r - sin2*s
2215            zbr(1,ind(ia*5+4),j,i3) = r + sin2*s
2216            r25 = r2 - r5*cr5
2217            r34 = r3 - r4*cr4
2218            s25 = s2 + s5*cr5
2219            s34 = s3 + s4*cr4
2220            zbr(2,ind(ia*5+1),j,i3) = s1 + s25 + s34
2221            r = s1 + cos2*s25 + cos4*s34
2222            s = r25 + sin42*r34
2223            zbr(2,ind(ia*5+2),j,i3) = r + sin2*s
2224            zbr(2,ind(ia*5+5),j,i3) = r - sin2*s
2225            r = s1 + cos4*s25 + cos2*s34
2226            s = sin42*r25 - r34
2227            zbr(2,ind(ia*5+3),j,i3) = r + sin2*s
2228            zbr(2,ind(ia*5+4),j,i3) = r - sin2*s
2229          end do
2230        end do
2231 
2232      else
2233 !      All factors treated
2234        ABI_BUG('called with factors other than 2, 3, and 5')
2235      end if
2236 
2237 !    ---------------------------------------------------------------
2238 
2239    end do ! do i3=1,n3
2240  end do  ! do jj=1,n2,lot
2241 !$OMP END PARALLEL DO
2242 
2243 end subroutine sg_fftx

m_sgfft/sg_ffty [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_ffty

FUNCTION

 This subroutine is called by the 3-dimensional fft to conduct the
 "y" transforms for all x and z.

INPUTS

  fftcache=size of the cache (kB)
  mfac = maximum number of factors in 1D FFTs
  mg = maximum length of 1D FFTs
  nd1=first dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd2=second dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd3=third dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  n1i=lower i1 index, used for blocking : the do-loop will be i1=n1i,n1
   put to 1 for usual ffty
  n1=upper i1 index, used for blocking, put usual n1 for usual ffty
  n3i=lower i3 index, used for blocking : the do-loop will be i3=n3i,n3
   put to 1 for usual ffty
  n3=upper i3 index, used for blocking, put usual n3 for usual ffty
  z(2,nd1,nd2,nd3)=INPUT array; destroyed by transformation
  trig, aft, now, bef, ind=provided by previous call to ctrig
   Note that in this routine (and in ctrig) the values in array trig are
   actually cos and tan, not cos and sin.  Use of tan allows advantageous
   use of FMA on the ibm rs6000.
  ris=sign of exponential in transform (should be 1 or -1; real)
  ic=number of (radix) factors of x transform length (from ctrig)

OUTPUT

  zbr(2,nd1,nd2,nd3)=OUTPUT transformed array; no scaling applied

TODO

 Use latex for the equation above

SOURCE

2288 subroutine sg_ffty(fftcache,mfac,mg,nd1,nd2,nd3,n1i,n1,n3i,n3,&
2289 &          z,zbr,trig,aft,now,bef,ris,ind,ic)
2290 
2291 !Arguments ------------------------------------
2292 !Dimensions of aft, now, bef, ind, and trig should agree with
2293 !those in subroutine ctrig.
2294 !scalars
2295  integer,intent(in) :: fftcache,ic,mfac,mg,n1,n1i,n3,n3i,nd1,nd2,nd3
2296  real(dp),intent(in) :: ris
2297 !arrays
2298  integer,intent(in) :: aft(mfac),bef(mfac),ind(mg),now(mfac)
2299  real(dp),intent(in) :: trig(2,mg)
2300  real(dp),intent(inout) :: z(2,nd1,nd2,nd3),zbr(2,nd1,nd2,nd3)
2301 
2302 !Local variables-------------------------------
2303 !scalars
2304  integer :: i,ia,ib,indx,j1,j2,ntb
2305  real(dp),parameter :: cos2=0.3090169943749474d0   !cos(2.d0*pi/5.d0)
2306  real(dp),parameter :: cos4=-0.8090169943749474d0  !cos(4.d0*pi/5.d0)
2307  real(dp),parameter :: sin42=0.6180339887498948d0  !sin(4.d0*pi/5.d0)/sin(2.d0*pi/5.d0)
2308  real(dp) :: bb,cr2,cr2s,cr3,cr3p,cr4,cr5,ct2,ct3,ct4,ct5
2309  real(dp) :: r,r1,r2,r25,r3,r34,r4,r5,s,sin2,s1,s2,s25,s3,s34,s4,s5
2310 
2311 ! *************************************************************************
2312 
2313  if (fftcache<0) then
2314    ABI_ERROR('fftcache must be positive')
2315  end if
2316 
2317 !Outer loop over z planes (j2)--note range from n3i to n3
2318 
2319 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(aft,bef,ic,ind,n1,n1i,n3,n3i,now,ris,trig,z,zbr)
2320  do j2=n3i,n3
2321 
2322 !  Direct transformation
2323    do i=1,ic-1
2324      ntb=now(i)*bef(i)
2325 
2326 !    Treat radix 4
2327      if (now(i)==4) then
2328        ia=0
2329 
2330 !      First step of radix 4
2331        do ib=1,bef(i)
2332 !        Inner loop over all x values (j1) -- note range from n1i to n1
2333 !        y transform is performed for this range of x values repeatedly
2334 !        below
2335 
2336          do j1=n1i,n1
2337            r4=z(1,j1,ia*ntb+3*bef(i)+ib,j2)
2338            s4=z(2,j1,ia*ntb+3*bef(i)+ib,j2)
2339            r3=z(1,j1,ia*ntb+2*bef(i)+ib,j2)
2340            s3=z(2,j1,ia*ntb+2*bef(i)+ib,j2)
2341            r2=z(1,j1,ia*ntb+bef(i)+ib,j2)
2342            s2=z(2,j1,ia*ntb+bef(i)+ib,j2)
2343            r1=z(1,j1,ia*ntb+ib,j2)
2344            s1=z(2,j1,ia*ntb+ib,j2)
2345 
2346            r=r1 + r3
2347            s=r2 + r4
2348            z(1,j1,ia*ntb+ib,j2) = r + s
2349            z(1,j1,ia*ntb+2*bef(i)+ib,j2) = r - s
2350            r=r1 - r3
2351            s=s2 - s4
2352            z(1,j1,ia*ntb+bef(i)+ib,j2) = r - s*ris
2353            z(1,j1,ia*ntb+3*bef(i)+ib,j2) = r + s*ris
2354            r=s1 + s3
2355            s=s2 + s4
2356            z(2,j1,ia*ntb+ib,j2) = r + s
2357            z(2,j1,ia*ntb+2*bef(i)+ib,j2) = r - s
2358            r=s1 - s3
2359            s=r2 - r4
2360            z(2,j1,ia*ntb+bef(i)+ib,j2) = r + s*ris
2361            z(2,j1,ia*ntb+3*bef(i)+ib,j2) = r - s*ris
2362          end do ! j1
2363        end do ! ib
2364 
2365 !      Second step of radix 4
2366        do ia=1,aft(i)-1
2367          indx=ind(ia*4*bef(i)+1)-1
2368          indx=indx*bef(i)
2369          cr2=trig(1,indx)
2370          ct2=trig(2,indx)
2371          cr3=trig(1,2*indx)
2372          ct3=trig(2,2*indx)
2373          cr4=trig(1,3*indx)
2374          ct4=trig(2,3*indx)
2375          cr4=cr4/cr2
2376          cr2s=cr2*ris
2377          do ib=1,bef(i)
2378 !          Range of x array again (also appears many times below)
2379            do j1=n1i,n1
2380              r4=z(1,j1,ia*ntb+3*bef(i)+ib,j2) - &
2381 &             z(2,j1,ia*ntb+3*bef(i)+ib,j2)*ct4
2382              s4=z(1,j1,ia*ntb+3*bef(i)+ib,j2)*ct4 + &
2383 &             z(2,j1,ia*ntb+3*bef(i)+ib,j2)
2384              r3=z(1,j1,ia*ntb+2*bef(i)+ib,j2) - &
2385 &             z(2,j1,ia*ntb+2*bef(i)+ib,j2)*ct3
2386              s3=z(1,j1,ia*ntb+2*bef(i)+ib,j2)*ct3 + &
2387 &             z(2,j1,ia*ntb+2*bef(i)+ib,j2)
2388              r2=z(1,j1,ia*ntb+bef(i)+ib,j2) - &
2389 &             z(2,j1,ia*ntb+bef(i)+ib,j2)*ct2
2390              s2=z(1,j1,ia*ntb+bef(i)+ib,j2)*ct2 + &
2391 &             z(2,j1,ia*ntb+bef(i)+ib,j2)
2392              r1=z(1,j1,ia*ntb+ib,j2)
2393              s1=z(2,j1,ia*ntb+ib,j2)
2394 
2395              r=r1 + r3*cr3
2396              s=r2 + r4*cr4
2397              z(1,j1,ia*ntb+ib,j2) = r + s*cr2
2398              z(1,j1,ia*ntb+2*bef(i)+ib,j2) = r - s*cr2
2399              r=r1 - r3*cr3
2400              s=s2 - s4*cr4
2401              z(1,j1,ia*ntb+bef(i)+ib,j2) = r - s*cr2s
2402              z(1,j1,ia*ntb+3*bef(i)+ib,j2) = r + s*cr2s
2403              r=s1 + s3*cr3
2404              s=s2 + s4*cr4
2405              z(2,j1,ia*ntb+ib,j2) = r + s*cr2
2406              z(2,j1,ia*ntb+2*bef(i)+ib,j2) = r - s*cr2
2407              r=s1 - s3*cr3
2408              s=r2 - r4*cr4
2409              z(2,j1,ia*ntb+bef(i)+ib,j2) = r + s*cr2s
2410              z(2,j1,ia*ntb+3*bef(i)+ib,j2) = r - s*cr2s
2411            end do ! j1
2412          end do ! ib
2413        end do ! ia
2414 
2415 !      Treat radix 2
2416      else if (now(i)==2) then
2417        ia=0
2418 
2419 !      First step of radix 2
2420        do ib=1,bef(i)
2421          do j1=n1i,n1
2422            r1=z(1,j1,ia*ntb+ib,j2)
2423            s1=z(2,j1,ia*ntb+ib,j2)
2424            r2=z(1,j1,ia*ntb+bef(i)+ib,j2)
2425            s2=z(2,j1,ia*ntb+bef(i)+ib,j2)
2426            z(1,j1,ia*ntb+ib,j2) =  r2 + r1
2427            z(2,j1,ia*ntb+ib,j2) =  s2 + s1
2428            z(1,j1,ia*ntb+bef(i)+ib,j2) = -r2 + r1
2429            z(2,j1,ia*ntb+bef(i)+ib,j2) = -s2 + s1
2430          end do
2431        end do
2432 
2433 !      Second step of radix 2
2434        do ia=1,aft(i)-1
2435          indx=ind(ia*2*bef(i)+1)-1
2436          indx=indx*bef(i)
2437          cr2=trig(1,indx)
2438          ct2=trig(2,indx)
2439          do ib=1,bef(i)
2440            do j1=n1i,n1
2441              r1=z(1,j1,ia*ntb+ib,j2)
2442              s1=z(2,j1,ia*ntb+ib,j2)
2443              r2=z(1,j1,ia*ntb+bef(i)+ib,j2) - &
2444 &             z(2,j1,ia*ntb+bef(i)+ib,j2)*ct2
2445              s2=z(1,j1,ia*ntb+bef(i)+ib,j2)*ct2 + &
2446 &             z(2,j1,ia*ntb+bef(i)+ib,j2)
2447              z(1,j1,ia*ntb+ib,j2) =  r2*cr2 + r1
2448              z(2,j1,ia*ntb+ib,j2) =  s2*cr2 + s1
2449              z(1,j1,ia*ntb+bef(i)+ib,j2) = -r2*cr2 + r1
2450              z(2,j1,ia*ntb+bef(i)+ib,j2) = -s2*cr2 + s1
2451            end do
2452          end do
2453        end do
2454 
2455 !      Treat radix 3
2456      else if (now(i)==3) then
2457 !      .5d0*sqrt(3.d0)=0.8660254037844387d0
2458        ia=0
2459        bb=ris*0.8660254037844387d0
2460 
2461 !      First step of radix 3
2462        do ib=1,bef(i)
2463          do j1=n1i,n1
2464            r1=z(1,j1,ia*ntb+ib,j2)
2465            s1=z(2,j1,ia*ntb+ib,j2)
2466            r2=z(1,j1,ia*ntb+bef(i)+ib,j2)
2467            s2=z(2,j1,ia*ntb+bef(i)+ib,j2)
2468            r3=z(1,j1,ia*ntb+2*bef(i)+ib,j2)
2469            s3=z(2,j1,ia*ntb+2*bef(i)+ib,j2)
2470            r=r2 + r3
2471            s=s2 + s3
2472            z(1,j1,ia*ntb+ib,j2) = r + r1
2473            z(2,j1,ia*ntb+ib,j2) = s + s1
2474            r1=r1 - r*.5d0
2475            s1=s1 - s*.5d0
2476            r2=r2-r3
2477            s2=s2-s3
2478            z(1,j1,ia*ntb+bef(i)+ib,j2) = r1 - s2*bb
2479            z(2,j1,ia*ntb+bef(i)+ib,j2) = s1 + r2*bb
2480            z(1,j1,ia*ntb+2*bef(i)+ib,j2) = r1 + s2*bb
2481            z(2,j1,ia*ntb+2*bef(i)+ib,j2) = s1 - r2*bb
2482          end do
2483        end do
2484 
2485 !      Second step of radix 3
2486        do ia=1,aft(i)-1
2487          indx=ind(ia*3*bef(i)+1)-1
2488          indx=indx*bef(i)
2489          cr2=trig(1,indx)
2490          ct2=trig(2,indx)
2491          cr3=trig(1,2*indx)
2492          ct3=trig(2,2*indx)
2493          cr2=cr2/cr3
2494          cr3p=.5d0*cr3
2495          bb=ris*cr3*0.8660254037844387d0
2496          do ib=1,bef(i)
2497            do j1=n1i,n1
2498              r1=z(1,j1,ia*ntb+ib,j2)
2499              s1=z(2,j1,ia*ntb+ib,j2)
2500              r2=z(1,j1,ia*ntb+bef(i)+ib,j2) - &
2501 &             z(2,j1,ia*ntb+bef(i)+ib,j2)*ct2
2502              s2=z(1,j1,ia*ntb+bef(i)+ib,j2)*ct2 + &
2503 &             z(2,j1,ia*ntb+bef(i)+ib,j2)
2504              r3=z(1,j1,ia*ntb+2*bef(i)+ib,j2) - &
2505 &             z(2,j1,ia*ntb+2*bef(i)+ib,j2)*ct3
2506              s3=z(1,j1,ia*ntb+2*bef(i)+ib,j2)*ct3 + &
2507 &             z(2,j1,ia*ntb+2*bef(i)+ib,j2)
2508              r=cr2*r2 + r3
2509              s=cr2*s2 + s3
2510              z(1,j1,ia*ntb+ib,j2) = r*cr3 + r1
2511              z(2,j1,ia*ntb+ib,j2) = s*cr3 + s1
2512              r1=r1 - r*cr3p
2513              s1=s1 - s*cr3p
2514              r2=cr2*r2-r3
2515              s2=cr2*s2-s3
2516              z(1,j1,ia*ntb+bef(i)+ib,j2) = r1 - s2*bb
2517              z(2,j1,ia*ntb+bef(i)+ib,j2) = s1 + r2*bb
2518              z(1,j1,ia*ntb+2*bef(i)+ib,j2) = r1 + s2*bb
2519              z(2,j1,ia*ntb+2*bef(i)+ib,j2) = s1 - r2*bb
2520            end do
2521          end do
2522        end do
2523 
2524 !      Treat radix 5
2525      else if (now(i)==5) then
2526 !      sin(2.d0*pi/5.d0)
2527        sin2=ris*0.9510565162951536d0
2528        ia=0
2529 
2530 !      First step of radix 5
2531        do ib=1,bef(i)
2532          do j1=n1i,n1
2533            r1=z(1,j1,ia*ntb+ib,j2)
2534            s1=z(2,j1,ia*ntb+ib,j2)
2535            r2=z(1,j1,ia*ntb+bef(i)+ib,j2)
2536            s2=z(2,j1,ia*ntb+bef(i)+ib,j2)
2537            r3=z(1,j1,ia*ntb+2*bef(i)+ib,j2)
2538            s3=z(2,j1,ia*ntb+2*bef(i)+ib,j2)
2539            r4=z(1,j1,ia*ntb+3*bef(i)+ib,j2)
2540            s4=z(2,j1,ia*ntb+3*bef(i)+ib,j2)
2541            r5=z(1,j1,ia*ntb+4*bef(i)+ib,j2)
2542            s5=z(2,j1,ia*ntb+4*bef(i)+ib,j2)
2543            r25 = r2 + r5
2544            r34 = r3 + r4
2545            s25 = s2 - s5
2546            s34 = s3 - s4
2547            z(1,j1,ia*ntb+ib,j2) = r1 + r25 + r34
2548            r = r1 + cos2*r25 + cos4*r34
2549            s = s25 + sin42*s34
2550            z(1,j1,ia*ntb+bef(i)+ib,j2) = r - sin2*s
2551            z(1,j1,ia*ntb+4*bef(i)+ib,j2) = r + sin2*s
2552            r = r1 + cos4*r25 + cos2*r34
2553            s = sin42*s25 - s34
2554            z(1,j1,ia*ntb+2*bef(i)+ib,j2) = r - sin2*s
2555            z(1,j1,ia*ntb+3*bef(i)+ib,j2) = r + sin2*s
2556            r25 = r2 - r5
2557            r34 = r3 - r4
2558            s25 = s2 + s5
2559            s34 = s3 + s4
2560            z(2,j1,ia*ntb+ib,j2) = s1 + s25 + s34
2561            r = s1 + cos2*s25 + cos4*s34
2562            s = r25 + sin42*r34
2563            z(2,j1,ia*ntb+bef(i)+ib,j2) = r + sin2*s
2564            z(2,j1,ia*ntb+4*bef(i)+ib,j2) = r - sin2*s
2565            r = s1 + cos4*s25 + cos2*s34
2566            s = sin42*r25 - r34
2567            z(2,j1,ia*ntb+2*bef(i)+ib,j2) = r + sin2*s
2568            z(2,j1,ia*ntb+3*bef(i)+ib,j2) = r - sin2*s
2569          end do
2570        end do
2571 
2572 !      Second step of radix 5
2573        do ia=1,aft(i)-1
2574          indx=ind(ia*5*bef(i)+1)-1
2575          indx=indx*bef(i)
2576          cr2=trig(1,indx)
2577          ct2=trig(2,indx)
2578          cr3=trig(1,2*indx)
2579          ct3=trig(2,2*indx)
2580          cr4=trig(1,3*indx)
2581          ct4=trig(2,3*indx)
2582          cr5=trig(1,4*indx)
2583          ct5=trig(2,4*indx)
2584          do ib=1,bef(i)
2585            do j1=n1i,n1
2586              r1=z(1,j1,ia*ntb+ib,j2)
2587              s1=z(2,j1,ia*ntb+ib,j2)
2588              r2=cr2*(z(1,j1,ia*ntb+bef(i)+ib,j2) - &
2589 &             z(2,j1,ia*ntb+bef(i)+ib,j2)*ct2)
2590              s2=cr2*(z(1,j1,ia*ntb+bef(i)+ib,j2)*ct2 + &
2591 &             z(2,j1,ia*ntb+bef(i)+ib,j2))
2592              r3=cr3*(z(1,j1,ia*ntb+2*bef(i)+ib,j2) - &
2593 &             z(2,j1,ia*ntb+2*bef(i)+ib,j2)*ct3)
2594              s3=cr3*(z(1,j1,ia*ntb+2*bef(i)+ib,j2)*ct3 + &
2595 &             z(2,j1,ia*ntb+2*bef(i)+ib,j2))
2596              r4=z(1,j1,ia*ntb+3*bef(i)+ib,j2) - &
2597 &             z(2,j1,ia*ntb+3*bef(i)+ib,j2)*ct4
2598              s4=z(1,j1,ia*ntb+3*bef(i)+ib,j2)*ct4 + &
2599 &             z(2,j1,ia*ntb+3*bef(i)+ib,j2)
2600              r5=z(1,j1,ia*ntb+4*bef(i)+ib,j2) - &
2601 &             z(2,j1,ia*ntb+4*bef(i)+ib,j2)*ct5
2602              s5=z(1,j1,ia*ntb+4*bef(i)+ib,j2)*ct5 + &
2603 &             z(2,j1,ia*ntb+4*bef(i)+ib,j2)
2604              r25 = r2 + r5*cr5
2605              r34 = r3 + r4*cr4
2606              s25 = s2 - s5*cr5
2607              s34 = s3 - s4*cr4
2608              z(1,j1,ia*ntb+ib,j2) = r1 + r25 + r34
2609              r = r1 + cos2*r25 + cos4*r34
2610              s = s25 + sin42*s34
2611              z(1,j1,ia*ntb+bef(i)+ib,j2) = r - sin2*s
2612              z(1,j1,ia*ntb+4*bef(i)+ib,j2) = r + sin2*s
2613              r = r1 + cos4*r25 + cos2*r34
2614              s = sin42*s25 - s34
2615              z(1,j1,ia*ntb+2*bef(i)+ib,j2) = r - sin2*s
2616              z(1,j1,ia*ntb+3*bef(i)+ib,j2) = r + sin2*s
2617              r25 = r2 - r5*cr5
2618              r34 = r3 - r4*cr4
2619              s25 = s2 + s5*cr5
2620              s34 = s3 + s4*cr4
2621              z(2,j1,ia*ntb+ib,j2) = s1 + s25 + s34
2622              r = s1 + cos2*s25 + cos4*s34
2623              s = r25 + sin42*r34
2624              z(2,j1,ia*ntb+bef(i)+ib,j2) = r + sin2*s
2625              z(2,j1,ia*ntb+4*bef(i)+ib,j2) = r - sin2*s
2626              r = s1 + cos4*s25 + cos2*s34
2627              s = sin42*r25 - r34
2628              z(2,j1,ia*ntb+2*bef(i)+ib,j2) = r + sin2*s
2629              z(2,j1,ia*ntb+3*bef(i)+ib,j2) = r - sin2*s
2630            end do
2631          end do
2632        end do
2633 
2634      else
2635 !      All radices treated
2636        ABI_BUG('called with factors other than 2, 3, and 5')
2637      end if
2638 
2639    end do
2640 
2641 !  ---------------------------------------------------------------
2642 
2643 !  bitreversal
2644 
2645 !  Treat radix 4
2646    if (now(ic)==4) then
2647      ia=0
2648 
2649 !    First step of radix 4
2650      do j1=n1i,n1
2651        r4=z(1,j1,ia*4+4,j2)
2652        s4=z(2,j1,ia*4+4,j2)
2653        r3=z(1,j1,ia*4+3,j2)
2654        s3=z(2,j1,ia*4+3,j2)
2655        r2=z(1,j1,ia*4+2,j2)
2656        s2=z(2,j1,ia*4+2,j2)
2657        r1=z(1,j1,ia*4+1,j2)
2658        s1=z(2,j1,ia*4+1,j2)
2659 
2660        r=r1 + r3
2661        s=r2 + r4
2662        zbr(1,j1,ind(ia*4+1),j2) = r + s
2663        zbr(1,j1,ind(ia*4+3),j2) = r - s
2664        r=r1 - r3
2665        s=s2 - s4
2666        zbr(1,j1,ind(ia*4+2),j2) = r - s*ris
2667        zbr(1,j1,ind(ia*4+4),j2) = r + s*ris
2668        r=s1 + s3
2669        s=s2 + s4
2670        zbr(2,j1,ind(ia*4+1),j2) = r + s
2671        zbr(2,j1,ind(ia*4+3),j2) = r - s
2672        r=s1 - s3
2673        s=r2 - r4
2674        zbr(2,j1,ind(ia*4+2),j2) = r + s*ris
2675        zbr(2,j1,ind(ia*4+4),j2) = r - s*ris
2676      end do
2677 
2678 !    Second step of radix 4
2679      do ia=1,aft(ic)-1
2680        indx=ind(ia*4+1)-1
2681        cr2=trig(1,indx)
2682        ct2=trig(2,indx)
2683        cr3=trig(1,2*indx)
2684        ct3=trig(2,2*indx)
2685        cr4=trig(1,3*indx)
2686        ct4=trig(2,3*indx)
2687        cr4=cr4/cr2
2688        cr2s=cr2*ris
2689        do j1=n1i,n1
2690          r4=z(1,j1,ia*4+4,j2) - z(2,j1,ia*4+4,j2)*ct4
2691          s4=z(1,j1,ia*4+4,j2)*ct4 + z(2,j1,ia*4+4,j2)
2692          r3=z(1,j1,ia*4+3,j2) - z(2,j1,ia*4+3,j2)*ct3
2693          s3=z(1,j1,ia*4+3,j2)*ct3 + z(2,j1,ia*4+3,j2)
2694          r2=z(1,j1,ia*4+2,j2) - z(2,j1,ia*4+2,j2)*ct2
2695          s2=z(1,j1,ia*4+2,j2)*ct2 + z(2,j1,ia*4+2,j2)
2696          r1=z(1,j1,ia*4+1,j2)
2697          s1=z(2,j1,ia*4+1,j2)
2698 
2699          r=r1 + r3*cr3
2700          s=r2 + r4*cr4
2701          zbr(1,j1,ind(ia*4+1),j2) = r + s*cr2
2702          zbr(1,j1,ind(ia*4+3),j2) = r - s*cr2
2703          r=r1 - r3*cr3
2704          s=s2 - s4*cr4
2705          zbr(1,j1,ind(ia*4+2),j2) = r - s*cr2s
2706          zbr(1,j1,ind(ia*4+4),j2) = r + s*cr2s
2707          r=s1 + s3*cr3
2708          s=s2 + s4*cr4
2709          zbr(2,j1,ind(ia*4+1),j2) = r + s*cr2
2710          zbr(2,j1,ind(ia*4+3),j2) = r - s*cr2
2711          r=s1 - s3*cr3
2712          s=r2 - r4*cr4
2713          zbr(2,j1,ind(ia*4+2),j2) = r + s*cr2s
2714          zbr(2,j1,ind(ia*4+4),j2) = r - s*cr2s
2715        end do
2716      end do
2717 
2718 !    Treat radix 2
2719    else if (now(ic)==2) then
2720      ia=0
2721 
2722 !    First step of radix 2
2723      do j1=n1i,n1
2724        r1=z(1,j1,ia*2+1,j2)
2725        s1=z(2,j1,ia*2+1,j2)
2726        r2=z(1,j1,ia*2+2,j2)
2727        s2=z(2,j1,ia*2+2,j2)
2728        zbr(1,j1,ind(ia*2+1),j2) =  r2 + r1
2729        zbr(2,j1,ind(ia*2+1),j2) =  s2 + s1
2730        zbr(1,j1,ind(ia*2+2),j2) = -r2 + r1
2731        zbr(2,j1,ind(ia*2+2),j2) = -s2 + s1
2732      end do
2733 
2734 !    Second step of radix 2
2735      do ia=1,aft(ic)-1
2736        indx=ind(ia*2+1)-1
2737        cr2=trig(1,indx)
2738        ct2=trig(2,indx)
2739        do j1=n1i,n1
2740          r1=z(1,j1,ia*2+1,j2)
2741          s1=z(2,j1,ia*2+1,j2)
2742          r2=z(1,j1,ia*2+2,j2) - z(2,j1,ia*2+2,j2)*ct2
2743          s2=z(1,j1,ia*2+2,j2)*ct2 + z(2,j1,ia*2+2,j2)
2744          zbr(1,j1,ind(ia*2+1),j2) =  r2*cr2 + r1
2745          zbr(2,j1,ind(ia*2+1),j2) =  s2*cr2 + s1
2746          zbr(1,j1,ind(ia*2+2),j2) = -r2*cr2 + r1
2747          zbr(2,j1,ind(ia*2+2),j2) = -s2*cr2 + s1
2748        end do
2749      end do
2750 
2751 !    Treat radix 3
2752    else if (now(ic)==3) then
2753 !    .5d0*sqrt(3.d0)=0.8660254037844387d0
2754      ia=0
2755      bb=ris*0.8660254037844387d0
2756 
2757 !    First step of radix 3
2758      do j1=n1i,n1
2759        r1=z(1,j1,ia*3+1,j2)
2760        s1=z(2,j1,ia*3+1,j2)
2761        r2=z(1,j1,ia*3+2,j2)
2762        s2=z(2,j1,ia*3+2,j2)
2763        r3=z(1,j1,ia*3+3,j2)
2764        s3=z(2,j1,ia*3+3,j2)
2765        r=r2 + r3
2766        s=s2 + s3
2767        zbr(1,j1,ind(ia*3+1),j2) = r + r1
2768        zbr(2,j1,ind(ia*3+1),j2) = s + s1
2769        r1=r1 - r*.5d0
2770        s1=s1 - s*.5d0
2771        r2=r2-r3
2772        s2=s2-s3
2773        zbr(1,j1,ind(ia*3+2),j2) = r1 - s2*bb
2774        zbr(2,j1,ind(ia*3+2),j2) = s1 + r2*bb
2775        zbr(1,j1,ind(ia*3+3),j2) = r1 + s2*bb
2776        zbr(2,j1,ind(ia*3+3),j2) = s1 - r2*bb
2777      end do
2778 
2779 !    Second step of radix 3
2780      do ia=1,aft(ic)-1
2781        indx=ind(ia*3+1)-1
2782        cr2=trig(1,indx)
2783        ct2=trig(2,indx)
2784        cr3=trig(1,2*indx)
2785        ct3=trig(2,2*indx)
2786        cr2=cr2/cr3
2787        cr3p=.5d0*cr3
2788        bb=ris*cr3*0.8660254037844387d0
2789        do j1=n1i,n1
2790          r1=z(1,j1,ia*3+1,j2)
2791          s1=z(2,j1,ia*3+1,j2)
2792          r2=z(1,j1,ia*3+2,j2) - z(2,j1,ia*3+2,j2)*ct2
2793          s2=z(1,j1,ia*3+2,j2)*ct2 + z(2,j1,ia*3+2,j2)
2794          r3=z(1,j1,ia*3+3,j2) - z(2,j1,ia*3+3,j2)*ct3
2795          s3=z(1,j1,ia*3+3,j2)*ct3 + z(2,j1,ia*3+3,j2)
2796          r=cr2*r2 + r3
2797          s=cr2*s2 + s3
2798          zbr(1,j1,ind(ia*3+1),j2) = r*cr3 + r1
2799          zbr(2,j1,ind(ia*3+1),j2) = s*cr3 + s1
2800          r1=r1 - r*cr3p
2801          s1=s1 - s*cr3p
2802          r2=cr2*r2-r3
2803          s2=cr2*s2-s3
2804          zbr(1,j1,ind(ia*3+2),j2) = r1 - s2*bb
2805          zbr(2,j1,ind(ia*3+2),j2) = s1 + r2*bb
2806          zbr(1,j1,ind(ia*3+3),j2) = r1 + s2*bb
2807          zbr(2,j1,ind(ia*3+3),j2) = s1 - r2*bb
2808        end do
2809      end do
2810 
2811 !    Treat radix 5
2812    else if (now(ic)==5) then
2813 !    sin(2.d0*pi/5.d0)
2814      sin2=ris*0.9510565162951536d0
2815      ia=0
2816 
2817 !    First step of radix 5
2818      do j1=n1i,n1
2819        r1=z(1,j1,ia*5+1,j2)
2820        s1=z(2,j1,ia*5+1,j2)
2821        r2=z(1,j1,ia*5+2,j2)
2822        s2=z(2,j1,ia*5+2,j2)
2823        r3=z(1,j1,ia*5+3,j2)
2824        s3=z(2,j1,ia*5+3,j2)
2825        r4=z(1,j1,ia*5+4,j2)
2826        s4=z(2,j1,ia*5+4,j2)
2827        r5=z(1,j1,ia*5+5,j2)
2828        s5=z(2,j1,ia*5+5,j2)
2829        r25 = r2 + r5
2830        r34 = r3 + r4
2831        s25 = s2 - s5
2832        s34 = s3 - s4
2833        zbr(1,j1,ind(ia*5+1),j2) = r1 + r25 + r34
2834        r = r1 + cos2*r25 + cos4*r34
2835        s = s25 + sin42*s34
2836        zbr(1,j1,ind(ia*5+2),j2) = r - sin2*s
2837        zbr(1,j1,ind(ia*5+5),j2) = r + sin2*s
2838        r = r1 + cos4*r25 + cos2*r34
2839        s = sin42*s25 - s34
2840        zbr(1,j1,ind(ia*5+3),j2) = r - sin2*s
2841        zbr(1,j1,ind(ia*5+4),j2) = r + sin2*s
2842        r25 = r2 - r5
2843        r34 = r3 - r4
2844        s25 = s2 + s5
2845        s34 = s3 + s4
2846        zbr(2,j1,ind(ia*5+1),j2) = s1 + s25 + s34
2847        r = s1 + cos2*s25 + cos4*s34
2848        s = r25 + sin42*r34
2849        zbr(2,j1,ind(ia*5+2),j2) = r + sin2*s
2850        zbr(2,j1,ind(ia*5+5),j2) = r - sin2*s
2851        r = s1 + cos4*s25 + cos2*s34
2852        s = sin42*r25 - r34
2853        zbr(2,j1,ind(ia*5+3),j2) = r + sin2*s
2854        zbr(2,j1,ind(ia*5+4),j2) = r - sin2*s
2855      end do
2856 
2857 !    Second step of radix 5
2858      do ia=1,aft(ic)-1
2859        indx=ind(ia*5+1)-1
2860        cr2=trig(1,indx)
2861        ct2=trig(2,indx)
2862        cr3=trig(1,2*indx)
2863        ct3=trig(2,2*indx)
2864        cr4=trig(1,3*indx)
2865        ct4=trig(2,3*indx)
2866        cr5=trig(1,4*indx)
2867        ct5=trig(2,4*indx)
2868        do j1=n1i,n1
2869          r1=z(1,j1,ia*5+1,j2)
2870          s1=z(2,j1,ia*5+1,j2)
2871          r2=cr2*(z(1,j1,ia*5+2,j2) - z(2,j1,ia*5+2,j2)*ct2)
2872          s2=cr2*(z(1,j1,ia*5+2,j2)*ct2 + z(2,j1,ia*5+2,j2))
2873          r3=cr3*(z(1,j1,ia*5+3,j2) - z(2,j1,ia*5+3,j2)*ct3)
2874          s3=cr3*(z(1,j1,ia*5+3,j2)*ct3 + z(2,j1,ia*5+3,j2))
2875          r4=z(1,j1,ia*5+4,j2) - z(2,j1,ia*5+4,j2)*ct4
2876          s4=z(1,j1,ia*5+4,j2)*ct4 + z(2,j1,ia*5+4,j2)
2877          r5=z(1,j1,ia*5+5,j2) - z(2,j1,ia*5+5,j2)*ct5
2878          s5=z(1,j1,ia*5+5,j2)*ct5 + z(2,j1,ia*5+5,j2)
2879          r25 = r2 + r5*cr5
2880          r34 = r3 + r4*cr4
2881          s25 = s2 - s5*cr5
2882          s34 = s3 - s4*cr4
2883          zbr(1,j1,ind(ia*5+1),j2) = r1 + r25 + r34
2884          r = r1 + cos2*r25 + cos4*r34
2885          s = s25 + sin42*s34
2886          zbr(1,j1,ind(ia*5+2),j2) = r - sin2*s
2887          zbr(1,j1,ind(ia*5+5),j2) = r + sin2*s
2888          r = r1 + cos4*r25 + cos2*r34
2889          s = sin42*s25 - s34
2890          zbr(1,j1,ind(ia*5+3),j2) = r - sin2*s
2891          zbr(1,j1,ind(ia*5+4),j2) = r + sin2*s
2892          r25 = r2 - r5*cr5
2893          r34 = r3 - r4*cr4
2894          s25 = s2 + s5*cr5
2895          s34 = s3 + s4*cr4
2896          zbr(2,j1,ind(ia*5+1),j2) = s1 + s25 + s34
2897          r = s1 + cos2*s25 + cos4*s34
2898          s = r25 + sin42*r34
2899          zbr(2,j1,ind(ia*5+2),j2) = r + sin2*s
2900          zbr(2,j1,ind(ia*5+5),j2) = r - sin2*s
2901          r = s1 + cos4*s25 + cos2*s34
2902          s = sin42*r25 - r34
2903          zbr(2,j1,ind(ia*5+3),j2) = r + sin2*s
2904          zbr(2,j1,ind(ia*5+4),j2) = r - sin2*s
2905        end do
2906      end do
2907 
2908    else
2909      ! All radices done
2910      !if (now(ic) /= 1) then
2911      ABI_BUG(sjoin("Called with factors other than 2, 3, and 5. now(ic) = ", itoa(now(ic))))
2912      !end if
2913    end if
2914  end do
2915 !$OMP END PARALLEL DO
2916 
2917 end subroutine sg_ffty

m_sgfft/sg_fftz [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_fftz

FUNCTION

 This subroutine is called by the 3-dimensional fft to conduct the
 "z" transforms for all x and y.

INPUTS

  mfac = maximum number of factors in 1D FFTs
  mg = maximum length of 1D FFTs
  nd1=first dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd2=second dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  nd3=third dimension of (complex) arrays z and zbr (treated as real within
   this subroutine)
  n1=actual length of x and y transforms
  n2i=lower i2 index, used for blocking : the do-loop will be i2=n2i,n2
   put to 1 for usual ffty
  n2=upper i2 index, used for blocking, put usual n2 for usual ffty
  z(2,nd1,nd2,nd3)=INPUT array; destroyed by transformation
  trig, aft, now, bef, ind=provided by previous call to ctrig
   Note that in this routine (and in ctrig) the values in array trig are
   actually cos and tan, not cos and sin.  Use of tan allows advantageous
   use of FMA on the ibm rs6000.
  ris=sign of exponential in transform (should be 1 or -1; real)
  ic=number of (radix) factors of x transform length (from ctrig)

OUTPUT

  zbr(2,nd1,nd2,nd3)=OUTPUT transformed array; no scaling applied

TODO

 Use latex for the equation above

SOURCE

2959 subroutine sg_fftz(mfac,mg,nd1,nd2,nd3,n1,n2i,n2,z,zbr,trig,aft,now,bef,ris,ind,ic)
2960 
2961 !Arguments ------------------------------------
2962 !Dimensions of aft, now, bef, ind, and trig should agree with
2963 !those in subroutine ctrig.
2964 !scalars
2965  integer,intent(in) :: ic,mfac,mg,n1,n2,n2i,nd1,nd2,nd3
2966  real(dp),intent(in) :: ris
2967 !arrays
2968  integer,intent(in) :: aft(mfac),bef(mfac),ind(mg),now(mfac)
2969  real(dp),intent(in) :: trig(2,mg)
2970  real(dp),intent(inout) :: z(2,nd1,nd2,nd3),zbr(2,nd1,nd2,nd3)
2971 
2972 !Local variables-------------------------------
2973 !scalars
2974  integer :: b_i,i,i2,ia,ib,indx,j,ntb
2975  real(dp),parameter :: cos2=0.3090169943749474d0   !cos(2.d0*pi/5.d0)
2976  real(dp),parameter :: cos4=-0.8090169943749474d0  !cos(4.d0*pi/5.d0)
2977  real(dp),parameter :: sin42=0.6180339887498948d0  !sin(4.d0*pi/5.d0)/sin(2.d0*pi/5.d0)
2978  real(dp) :: bb,cr2,cr2s,cr3,cr3p,cr4,cr5,ct2,ct3,ct4,ct5
2979  real(dp) :: r,r1,r2,r25,r3,r34,r4,r5,s,sin2,s1,s2,s25,s3,s34,s4,s5
2980 
2981 ! *************************************************************************
2982 
2983 !n12 occurs as a loop index repeated below; do z transform while
2984 !looping over all n12 lines of data
2985 
2986 !Direct transformation (to ic-1), bitreversal will be in second part
2987 !of routine
2988 
2989  do i=1,ic-1
2990    ntb=now(i)*bef(i)
2991    b_i=bef(i)
2992 
2993 !  Treat radix 4
2994    if (now(i)==4) then
2995      ia=0
2996 
2997 !    First step of radix 4
2998      do ib=1,b_i
2999 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3000 !$OMP&SHARED(b_i,ia,ib,n1,n2i,n2,ntb,ris,z)
3001        do i2=n2i,n2
3002          do j=1,n1
3003            r4=z(1,j,i2,ia*ntb+3*b_i+ib)
3004            s4=z(2,j,i2,ia*ntb+3*b_i+ib)
3005            r3=z(1,j,i2,ia*ntb+2*b_i+ib)
3006            s3=z(2,j,i2,ia*ntb+2*b_i+ib)
3007            r2=z(1,j,i2,ia*ntb+b_i+ib)
3008            s2=z(2,j,i2,ia*ntb+b_i+ib)
3009            r1=z(1,j,i2,ia*ntb+ib)
3010            s1=z(2,j,i2,ia*ntb+ib)
3011 
3012            r=r1 + r3
3013            s=r2 + r4
3014            z(1,j,i2,ia*ntb+ib) = r + s
3015            z(1,j,i2,ia*ntb+2*b_i+ib) = r - s
3016            r=r1 - r3
3017            s=s2 - s4
3018            z(1,j,i2,ia*ntb+b_i+ib) = r - s*ris
3019            z(1,j,i2,ia*ntb+3*b_i+ib) = r + s*ris
3020            r=s1 + s3
3021            s=s2 + s4
3022            z(2,j,i2,ia*ntb+ib) = r + s
3023            z(2,j,i2,ia*ntb+2*b_i+ib) = r - s
3024            r=s1 - s3
3025            s=r2 - r4
3026            z(2,j,i2,ia*ntb+b_i+ib) = r + s*ris
3027            z(2,j,i2,ia*ntb+3*b_i+ib) = r - s*ris
3028          end do ! j
3029        end do ! i2
3030 !$OMP END PARALLEL DO
3031      end do ! ib
3032 
3033 !    Second step of radix 4
3034      do ia=1,aft(i)-1
3035        indx=ind(ia*4*b_i+1)-1
3036        indx=indx*b_i
3037        cr2=trig(1,indx)
3038        ct2=trig(2,indx)
3039        cr3=trig(1,2*indx)
3040        ct3=trig(2,2*indx)
3041        cr4=trig(1,3*indx)
3042        ct4=trig(2,3*indx)
3043        cr4=cr4/cr2
3044        cr2s=cr2*ris
3045        do ib=1,b_i
3046 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3047 !$OMP&SHARED(b_i,cr2,cr3,cr4,ct2,cr2s,ct3,ct4,i,ia,ib,n1,n2i,n2,ntb,ris,z)
3048          do i2=n2i,n2
3049            do j=1,n1
3050              r4=z(1,j,i2,ia*ntb+3*b_i+ib) - &
3051 &             z(2,j,i2,ia*ntb+3*b_i+ib)*ct4
3052              s4=z(1,j,i2,ia*ntb+3*b_i+ib)*ct4 + &
3053 &             z(2,j,i2,ia*ntb+3*b_i+ib)
3054              r3=z(1,j,i2,ia*ntb+2*b_i+ib) - &
3055 &             z(2,j,i2,ia*ntb+2*b_i+ib)*ct3
3056              s3=z(1,j,i2,ia*ntb+2*b_i+ib)*ct3 + &
3057 &             z(2,j,i2,ia*ntb+2*b_i+ib)
3058              r2=z(1,j,i2,ia*ntb+b_i+ib) - &
3059 &             z(2,j,i2,ia*ntb+b_i+ib)*ct2
3060              s2=z(1,j,i2,ia*ntb+b_i+ib)*ct2 + &
3061 &             z(2,j,i2,ia*ntb+b_i+ib)
3062              r1=z(1,j,i2,ia*ntb+ib)
3063              s1=z(2,j,i2,ia*ntb+ib)
3064 
3065              r=r1 + r3*cr3
3066              s=r2 + r4*cr4
3067              z(1,j,i2,ia*ntb+ib) = r + s*cr2
3068              z(1,j,i2,ia*ntb+2*b_i+ib) = r - s*cr2
3069              r=r1 - r3*cr3
3070              s=s2 - s4*cr4
3071              z(1,j,i2,ia*ntb+b_i+ib) = r - s*cr2s
3072              z(1,j,i2,ia*ntb+3*b_i+ib) = r + s*cr2s
3073              r=s1 + s3*cr3
3074              s=s2 + s4*cr4
3075              z(2,j,i2,ia*ntb+ib) = r + s*cr2
3076              z(2,j,i2,ia*ntb+2*b_i+ib) = r - s*cr2
3077              r=s1 - s3*cr3
3078              s=r2 - r4*cr4
3079              z(2,j,i2,ia*ntb+b_i+ib) = r + s*cr2s
3080              z(2,j,i2,ia*ntb+3*b_i+ib) = r - s*cr2s
3081            end do ! j
3082          end do ! i2
3083 !$OMP END PARALLEL DO
3084        end do ! ib
3085 
3086      end do ! ia
3087 
3088 !    Treat radix 2
3089    else if (now(i)==2) then
3090      ia=0
3091 
3092 !    First step of radix 2
3093      do ib=1,b_i
3094 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3095 !$OMP&SHARED(b_i,ia,ib,n1,n2,n2i,ntb,z)
3096        do i2=n2i,n2
3097          do j=1,n1
3098            r1=z(1,j,i2,ia*ntb+ib)
3099            s1=z(2,j,i2,ia*ntb+ib)
3100            r2=z(1,j,i2,ia*ntb+b_i+ib)
3101            s2=z(2,j,i2,ia*ntb+b_i+ib)
3102            z(1,j,i2,ia*ntb+ib) =  r2 + r1
3103            z(2,j,i2,ia*ntb+ib) =  s2 + s1
3104            z(1,j,i2,ia*ntb+b_i+ib) = -r2 + r1
3105            z(2,j,i2,ia*ntb+b_i+ib) = -s2 + s1
3106          end do ! j
3107        end do ! i2
3108 !$OMP END PARALLEL DO
3109      end do ! ib
3110 
3111 !    Second step of radix 2
3112      do ia=1,aft(i)-1
3113        indx=ind(ia*2*b_i+1)-1
3114        indx=indx*b_i
3115        cr2=trig(1,indx)
3116        ct2=trig(2,indx)
3117        do ib=1,b_i
3118 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3119 !$OMP&SHARED(b_i,cr2,ct2,ia,ib,n1,n2,n2i,ntb,z)
3120          do i2=n2i,n2
3121            do j=1,n1
3122              r1=z(1,j,i2,ia*ntb+ib)
3123              s1=z(2,j,i2,ia*ntb+ib)
3124              r2=z(1,j,i2,ia*ntb+b_i+ib) - &
3125 &             z(2,j,i2,ia*ntb+b_i+ib)*ct2
3126              s2=z(1,j,i2,ia*ntb+b_i+ib)*ct2 + &
3127 &             z(2,j,i2,ia*ntb+b_i+ib)
3128              z(1,j,i2,ia*ntb+ib) =  r2*cr2 + r1
3129              z(2,j,i2,ia*ntb+ib) =  s2*cr2 + s1
3130              z(1,j,i2,ia*ntb+b_i+ib) = -r2*cr2 + r1
3131              z(2,j,i2,ia*ntb+b_i+ib) = -s2*cr2 + s1
3132            end do ! j
3133          end do ! i2
3134 !$OMP END PARALLEL DO
3135        end do ! ib
3136 
3137      end do ! ia
3138 
3139 !    Treat radix 3
3140    else if (now(i)==3) then
3141 !    .5d0*sqrt(3.d0)=0.8660254037844387d0
3142      ia=0
3143      bb=ris*0.8660254037844387d0
3144 
3145 !    First step of radix 3
3146      do ib=1,b_i
3147 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3148 !$OMP&SHARED(bb,b_i,ia,ib,n1,n2,n2i,ntb,z)
3149        do i2=n2i,n2
3150          do j=1,n1
3151            r1=z(1,j,i2,ia*ntb+ib)
3152            s1=z(2,j,i2,ia*ntb+ib)
3153            r2=z(1,j,i2,ia*ntb+b_i+ib)
3154            s2=z(2,j,i2,ia*ntb+b_i+ib)
3155            r3=z(1,j,i2,ia*ntb+2*b_i+ib)
3156            s3=z(2,j,i2,ia*ntb+2*b_i+ib)
3157            r=r2 + r3
3158            s=s2 + s3
3159            z(1,j,i2,ia*ntb+ib) = r + r1
3160            z(2,j,i2,ia*ntb+ib) = s + s1
3161            r1=r1 - r*.5d0
3162            s1=s1 - s*.5d0
3163            r2=r2-r3
3164            s2=s2-s3
3165            z(1,j,i2,ia*ntb+b_i+ib) = r1 - s2*bb
3166            z(2,j,i2,ia*ntb+b_i+ib) = s1 + r2*bb
3167            z(1,j,i2,ia*ntb+2*b_i+ib) = r1 + s2*bb
3168            z(2,j,i2,ia*ntb+2*b_i+ib) = s1 - r2*bb
3169          end do ! j
3170        end do ! i2
3171 !$OMP END PARALLEL DO
3172      end do ! ib
3173 
3174 !    Second step of radix 3
3175      do ia=1,aft(i)-1
3176        indx=ind(ia*3*b_i+1)-1
3177        indx=indx*b_i
3178        cr2=trig(1,indx)
3179        ct2=trig(2,indx)
3180        cr3=trig(1,2*indx)
3181        ct3=trig(2,2*indx)
3182        cr2=cr2/cr3
3183        cr3p=.5d0*cr3
3184        bb=ris*cr3*0.8660254037844387d0
3185        do ib=1,b_i
3186 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3187 !$OMP&SHARED(bb,b_i,cr2,cr3,cr3p,ct2,ct3,ia,ib,n1,n2,n2i,ntb,z)
3188          do i2=n2i,n2
3189            do j=1,n1
3190              r1=z(1,j,i2,ia*ntb+ib)
3191              s1=z(2,j,i2,ia*ntb+ib)
3192              r2=z(1,j,i2,ia*ntb+b_i+ib) - &
3193 &             z(2,j,i2,ia*ntb+b_i+ib)*ct2
3194              s2=z(1,j,i2,ia*ntb+b_i+ib)*ct2 + &
3195 &             z(2,j,i2,ia*ntb+b_i+ib)
3196              r3=z(1,j,i2,ia*ntb+2*b_i+ib) - &
3197 &             z(2,j,i2,ia*ntb+2*b_i+ib)*ct3
3198              s3=z(1,j,i2,ia*ntb+2*b_i+ib)*ct3 + &
3199 &             z(2,j,i2,ia*ntb+2*b_i+ib)
3200              r=cr2*r2 + r3
3201              s=cr2*s2 + s3
3202              z(1,j,i2,ia*ntb+ib) = r*cr3 + r1
3203              z(2,j,i2,ia*ntb+ib) = s*cr3 + s1
3204              r1=r1 - r*cr3p
3205              s1=s1 - s*cr3p
3206              r2=cr2*r2-r3
3207              s2=cr2*s2-s3
3208              z(1,j,i2,ia*ntb+b_i+ib) = r1 - s2*bb
3209              z(2,j,i2,ia*ntb+b_i+ib) = s1 + r2*bb
3210              z(1,j,i2,ia*ntb+2*b_i+ib) = r1 + s2*bb
3211              z(2,j,i2,ia*ntb+2*b_i+ib) = s1 - r2*bb
3212            end do ! j
3213          end do ! i2
3214 !$OMP END PARALLEL DO
3215        end do ! ib
3216 
3217      end do ! ia
3218 
3219 !    Treat radix 5
3220    else if (now(i)==5) then
3221      sin2=ris*0.9510565162951536d0
3222      ia=0
3223 
3224 !    First step of radix 5
3225      do ib=1,b_i
3226 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3227 !$OMP&SHARED(b_i,ia,ib,n1,n2,n2i,ntb,sin2,z)
3228        do i2=n2i,n2
3229          do j=1,n1
3230            r1=z(1,j,i2,ia*ntb+ib)
3231            s1=z(2,j,i2,ia*ntb+ib)
3232            r2=z(1,j,i2,ia*ntb+b_i+ib)
3233            s2=z(2,j,i2,ia*ntb+b_i+ib)
3234            r3=z(1,j,i2,ia*ntb+2*b_i+ib)
3235            s3=z(2,j,i2,ia*ntb+2*b_i+ib)
3236            r4=z(1,j,i2,ia*ntb+3*b_i+ib)
3237            s4=z(2,j,i2,ia*ntb+3*b_i+ib)
3238            r5=z(1,j,i2,ia*ntb+4*b_i+ib)
3239            s5=z(2,j,i2,ia*ntb+4*b_i+ib)
3240            r25 = r2 + r5
3241            r34 = r3 + r4
3242            s25 = s2 - s5
3243            s34 = s3 - s4
3244            z(1,j,i2,ia*ntb+ib) = r1 + r25 + r34
3245            r = r1 + cos2*r25 + cos4*r34
3246            s = s25 + sin42*s34
3247            z(1,j,i2,ia*ntb+b_i+ib) = r - sin2*s
3248            z(1,j,i2,ia*ntb+4*b_i+ib) = r + sin2*s
3249            r = r1 + cos4*r25 + cos2*r34
3250            s = sin42*s25 - s34
3251            z(1,j,i2,ia*ntb+2*b_i+ib) = r - sin2*s
3252            z(1,j,i2,ia*ntb+3*b_i+ib) = r + sin2*s
3253            r25 = r2 - r5
3254            r34 = r3 - r4
3255            s25 = s2 + s5
3256            s34 = s3 + s4
3257            z(2,j,i2,ia*ntb+ib) = s1 + s25 + s34
3258            r = s1 + cos2*s25 + cos4*s34
3259            s = r25 + sin42*r34
3260            z(2,j,i2,ia*ntb+b_i+ib) = r + sin2*s
3261            z(2,j,i2,ia*ntb+4*b_i+ib) = r - sin2*s
3262            r = s1 + cos4*s25 + cos2*s34
3263            s = sin42*r25 - r34
3264            z(2,j,i2,ia*ntb+2*b_i+ib) = r + sin2*s
3265            z(2,j,i2,ia*ntb+3*b_i+ib) = r - sin2*s
3266          end do ! j
3267        end do ! i2
3268 !$OMP END PARALLEL DO
3269      end do ! ib
3270 
3271 !    Second step of radix 5
3272      do ia=1,aft(i)-1
3273        indx=ind(ia*5*b_i+1)-1
3274        indx=indx*b_i
3275        cr2=trig(1,indx)
3276        ct2=trig(2,indx)
3277        cr3=trig(1,2*indx)
3278        ct3=trig(2,2*indx)
3279        cr4=trig(1,3*indx)
3280        ct4=trig(2,3*indx)
3281        cr5=trig(1,4*indx)
3282        ct5=trig(2,4*indx)
3283        do ib=1,b_i
3284 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3285 !$OMP&SHARED(b_i,cr2,cr3,cr4,cr5,ct2,ct3,ct4,ct5,ia,ib,n1,n2,n2i,ntb,sin2,z)
3286          do i2=n2i,n2
3287            do j=1,n1
3288              r1=z(1,j,i2,ia*ntb+ib)
3289              s1=z(2,j,i2,ia*ntb+ib)
3290              r2=cr2*(z(1,j,i2,ia*ntb+b_i+ib) - &
3291 &             z(2,j,i2,ia*ntb+b_i+ib)*ct2)
3292              s2=cr2*(z(1,j,i2,ia*ntb+b_i+ib)*ct2 + &
3293 &             z(2,j,i2,ia*ntb+b_i+ib))
3294              r3=cr3*(z(1,j,i2,ia*ntb+2*b_i+ib) - &
3295 &             z(2,j,i2,ia*ntb+2*b_i+ib)*ct3)
3296              s3=cr3*(z(1,j,i2,ia*ntb+2*b_i+ib)*ct3 + &
3297 &             z(2,j,i2,ia*ntb+2*b_i+ib))
3298              r4=z(1,j,i2,ia*ntb+3*b_i+ib) - &
3299 &             z(2,j,i2,ia*ntb+3*b_i+ib)*ct4
3300              s4=z(1,j,i2,ia*ntb+3*b_i+ib)*ct4 + &
3301 &             z(2,j,i2,ia*ntb+3*b_i+ib)
3302              r5=z(1,j,i2,ia*ntb+4*b_i+ib) - &
3303 &             z(2,j,i2,ia*ntb+4*b_i+ib)*ct5
3304              s5=z(1,j,i2,ia*ntb+4*b_i+ib)*ct5 + &
3305 &             z(2,j,i2,ia*ntb+4*b_i+ib)
3306              r25 = r2 + r5*cr5
3307              r34 = r3 + r4*cr4
3308              s25 = s2 - s5*cr5
3309              s34 = s3 - s4*cr4
3310              z(1,j,i2,ia*ntb+ib) = r1 + r25 + r34
3311              r = r1 + cos2*r25 + cos4*r34
3312              s = s25 + sin42*s34
3313              z(1,j,i2,ia*ntb+b_i+ib) = r - sin2*s
3314              z(1,j,i2,ia*ntb+4*b_i+ib) = r + sin2*s
3315              r = r1 + cos4*r25 + cos2*r34
3316              s = sin42*s25 - s34
3317              z(1,j,i2,ia*ntb+2*b_i+ib) = r - sin2*s
3318              z(1,j,i2,ia*ntb+3*b_i+ib) = r + sin2*s
3319              r25 = r2 - r5*cr5
3320              r34 = r3 - r4*cr4
3321              s25 = s2 + s5*cr5
3322              s34 = s3 + s4*cr4
3323              z(2,j,i2,ia*ntb+ib) = s1 + s25 + s34
3324              r = s1 + cos2*s25 + cos4*s34
3325              s = r25 + sin42*r34
3326              z(2,j,i2,ia*ntb+b_i+ib) = r + sin2*s
3327              z(2,j,i2,ia*ntb+4*b_i+ib) = r - sin2*s
3328              r = s1 + cos4*s25 + cos2*s34
3329              s = sin42*r25 - r34
3330              z(2,j,i2,ia*ntb+2*b_i+ib) = r + sin2*s
3331              z(2,j,i2,ia*ntb+3*b_i+ib) = r - sin2*s
3332            end do ! j
3333          end do ! i2
3334 !$OMP END PARALLEL DO
3335        end do ! ib
3336 
3337      end do ! ia
3338 
3339 !    All radices treated
3340    else
3341      ABI_BUG('called with factors other than 2, 3, and 5')
3342    end if
3343 
3344 !  End of direct transformation
3345  end do
3346 
3347 !------------------------------------------------------------
3348 !bitreversal  (zbr is for z"bit-reversed")
3349 
3350 !Treat radix 4
3351  if (now(ic)==4) then
3352    ia=0
3353 
3354 !  First step of radix 4
3355 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3356 !$OMP&SHARED(ia,ind,n1,n2,n2i,ntb,ris,z,zbr)
3357    do i2=n2i,n2
3358      do j=1,n1
3359        r4=z(1,j,i2,ia*4+4)
3360        s4=z(2,j,i2,ia*4+4)
3361        r3=z(1,j,i2,ia*4+3)
3362        s3=z(2,j,i2,ia*4+3)
3363        r2=z(1,j,i2,ia*4+2)
3364        s2=z(2,j,i2,ia*4+2)
3365        r1=z(1,j,i2,ia*4+1)
3366        s1=z(2,j,i2,ia*4+1)
3367 
3368        r=r1 + r3
3369        s=r2 + r4
3370        zbr(1,j,i2,ind(ia*4+1)) = r + s
3371        zbr(1,j,i2,ind(ia*4+3)) = r - s
3372        r=r1 - r3
3373        s=s2 - s4
3374        zbr(1,j,i2,ind(ia*4+2)) = r - s*ris
3375        zbr(1,j,i2,ind(ia*4+4)) = r + s*ris
3376        r=s1 + s3
3377        s=s2 + s4
3378        zbr(2,j,i2,ind(ia*4+1)) = r + s
3379        zbr(2,j,i2,ind(ia*4+3)) = r - s
3380        r=s1 - s3
3381        s=r2 - r4
3382        zbr(2,j,i2,ind(ia*4+2)) = r + s*ris
3383        zbr(2,j,i2,ind(ia*4+4)) = r - s*ris
3384      end do ! j
3385    end do ! i2
3386 !$OMP END PARALLEL DO
3387 
3388 !  Second step of radix 4
3389    do ia=1,aft(ic)-1
3390      indx=ind(ia*4+1)-1
3391      cr2=trig(1,indx)
3392      ct2=trig(2,indx)
3393      cr3=trig(1,2*indx)
3394      ct3=trig(2,2*indx)
3395      cr4=trig(1,3*indx)
3396      ct4=trig(2,3*indx)
3397      cr4=cr4/cr2
3398      cr2s=cr2*ris
3399 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3400 !$OMP&SHARED(ia,cr2,cr2s,cr3,cr4,ct2,ct3,ct4,ind,n1,n2,n2i,z,zbr)
3401      do i2=n2i,n2
3402        do j=1,n1
3403          r4=z(1,j,i2,ia*4+4) - z(2,j,i2,ia*4+4)*ct4
3404          s4=z(1,j,i2,ia*4+4)*ct4 + z(2,j,i2,ia*4+4)
3405          r3=z(1,j,i2,ia*4+3) - z(2,j,i2,ia*4+3)*ct3
3406          s3=z(1,j,i2,ia*4+3)*ct3 + z(2,j,i2,ia*4+3)
3407          r2=z(1,j,i2,ia*4+2) - z(2,j,i2,ia*4+2)*ct2
3408          s2=z(1,j,i2,ia*4+2)*ct2 + z(2,j,i2,ia*4+2)
3409          r1=z(1,j,i2,ia*4+1)
3410          s1=z(2,j,i2,ia*4+1)
3411 
3412          r=r1 + r3*cr3
3413          s=r2 + r4*cr4
3414          zbr(1,j,i2,ind(ia*4+1)) = r + s*cr2
3415          zbr(1,j,i2,ind(ia*4+3)) = r - s*cr2
3416          r=r1 - r3*cr3
3417          s=s2 - s4*cr4
3418          zbr(1,j,i2,ind(ia*4+2)) = r - s*cr2s
3419          zbr(1,j,i2,ind(ia*4+4)) = r + s*cr2s
3420          r=s1 + s3*cr3
3421          s=s2 + s4*cr4
3422          zbr(2,j,i2,ind(ia*4+1)) = r + s*cr2
3423          zbr(2,j,i2,ind(ia*4+3)) = r - s*cr2
3424          r=s1 - s3*cr3
3425          s=r2 - r4*cr4
3426          zbr(2,j,i2,ind(ia*4+2)) = r + s*cr2s
3427          zbr(2,j,i2,ind(ia*4+4)) = r - s*cr2s
3428        end do ! j
3429      end do ! i2
3430 !$OMP END PARALLEL DO
3431 
3432    end do ! ia
3433 
3434 !  Treat radix 2
3435  else if (now(ic)==2) then
3436    ia=0
3437 
3438 !  First step of radix 2
3439 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3440 !$OMP&SHARED(ia,ind,n1,n2,n2i,z,zbr)
3441    do i2=n2i,n2
3442      do j=1,n1
3443        r1=z(1,j,i2,ia*2+1)
3444        s1=z(2,j,i2,ia*2+1)
3445        r2=z(1,j,i2,ia*2+2)
3446        s2=z(2,j,i2,ia*2+2)
3447        zbr(1,j,i2,ind(ia*2+1)) =  r2 + r1
3448        zbr(2,j,i2,ind(ia*2+1)) =  s2 + s1
3449        zbr(1,j,i2,ind(ia*2+2)) = -r2 + r1
3450        zbr(2,j,i2,ind(ia*2+2)) = -s2 + s1
3451      end do ! j
3452    end do ! i2
3453 !$OMP END PARALLEL DO
3454 
3455 !  Second step of radix 2
3456    do ia=1,aft(ic)-1
3457      indx=ind(ia*2+1)-1
3458      cr2=trig(1,indx)
3459      ct2=trig(2,indx)
3460 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3461 !$OMP&SHARED(cr2,ct2,ia,ind,n1,n2,n2i,z,zbr)
3462      do i2=n2i,n2
3463        do j=1,n1
3464          r1=z(1,j,i2,ia*2+1)
3465          s1=z(2,j,i2,ia*2+1)
3466          r2=z(1,j,i2,ia*2+2) - z(2,j,i2,ia*2+2)*ct2
3467          s2=z(1,j,i2,ia*2+2)*ct2 + z(2,j,i2,ia*2+2)
3468          zbr(1,j,i2,ind(ia*2+1)) =  r2*cr2 + r1
3469          zbr(2,j,i2,ind(ia*2+1)) =  s2*cr2 + s1
3470          zbr(1,j,i2,ind(ia*2+2)) = -r2*cr2 + r1
3471          zbr(2,j,i2,ind(ia*2+2)) = -s2*cr2 + s1
3472        end do ! j
3473      end do ! i2
3474 !$OMP END PARALLEL DO
3475    end do ! ia
3476 
3477 !  Treat radix 3
3478  else if (now(ic)==3) then
3479 !  .5d0*sqrt(3.d0)=0.8660254037844387d0
3480    ia=0
3481    bb=ris*0.8660254037844387d0
3482 
3483 !  First step of radix 3
3484 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3485 !$OMP&SHARED(bb,ia,ind,n1,n2,n2i,z,zbr)
3486    do i2=n2i,n2
3487      do j=1,n1
3488        r1=z(1,j,i2,ia*3+1)
3489        s1=z(2,j,i2,ia*3+1)
3490        r2=z(1,j,i2,ia*3+2)
3491        s2=z(2,j,i2,ia*3+2)
3492        r3=z(1,j,i2,ia*3+3)
3493        s3=z(2,j,i2,ia*3+3)
3494        r=r2 + r3
3495        s=s2 + s3
3496        zbr(1,j,i2,ind(ia*3+1)) = r + r1
3497        zbr(2,j,i2,ind(ia*3+1)) = s + s1
3498        r1=r1 - r*.5d0
3499        s1=s1 - s*.5d0
3500        r2=r2-r3
3501        s2=s2-s3
3502        zbr(1,j,i2,ind(ia*3+2)) = r1 - s2*bb
3503        zbr(2,j,i2,ind(ia*3+2)) = s1 + r2*bb
3504        zbr(1,j,i2,ind(ia*3+3)) = r1 + s2*bb
3505        zbr(2,j,i2,ind(ia*3+3)) = s1 - r2*bb
3506      end do ! j
3507    end do ! i2
3508 !$OMP END PARALLEL DO
3509 
3510 !  Second step of radix 3
3511    do ia=1,aft(ic)-1
3512      indx=ind(ia*3+1)-1
3513      cr2=trig(1,indx)
3514      ct2=trig(2,indx)
3515      cr3=trig(1,2*indx)
3516      ct3=trig(2,2*indx)
3517      cr2=cr2/cr3
3518      cr3p=.5d0*cr3
3519      bb=ris*cr3*0.8660254037844387d0
3520 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3521 !$OMP&SHARED(bb,cr2,cr3,cr3p,ct2,ct3,ia,ind,n1,n2,n2i,z,zbr)
3522      do i2=n2i,n2
3523        do j=1,n1
3524          r1=z(1,j,i2,ia*3+1)
3525          s1=z(2,j,i2,ia*3+1)
3526          r2=z(1,j,i2,ia*3+2) - z(2,j,i2,ia*3+2)*ct2
3527          s2=z(1,j,i2,ia*3+2)*ct2 + z(2,j,i2,ia*3+2)
3528          r3=z(1,j,i2,ia*3+3) - z(2,j,i2,ia*3+3)*ct3
3529          s3=z(1,j,i2,ia*3+3)*ct3 + z(2,j,i2,ia*3+3)
3530          r=cr2*r2 + r3
3531          s=cr2*s2 + s3
3532          zbr(1,j,i2,ind(ia*3+1)) = r*cr3 + r1
3533          zbr(2,j,i2,ind(ia*3+1)) = s*cr3 + s1
3534          r1=r1 - r*cr3p
3535          s1=s1 - s*cr3p
3536          r2=cr2*r2-r3
3537          s2=cr2*s2-s3
3538          zbr(1,j,i2,ind(ia*3+2)) = r1 - s2*bb
3539          zbr(2,j,i2,ind(ia*3+2)) = s1 + r2*bb
3540          zbr(1,j,i2,ind(ia*3+3)) = r1 + s2*bb
3541          zbr(2,j,i2,ind(ia*3+3)) = s1 - r2*bb
3542        end do ! j
3543      end do ! i2
3544 !$OMP END PARALLEL DO
3545    end do ! ia
3546 
3547 !  Treat radix 5
3548  else if (now(ic)==5) then
3549 !  sin(2.d0*pi/5.d0)
3550    sin2=ris*0.9510565162951536d0
3551    ia=0
3552 
3553 !  First step of radix 5
3554 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3555 !$OMP&SHARED(ia,ind,n1,n2,n2i,sin2,z,zbr)
3556    do i2=n2i,n2
3557      do j=1,n1
3558        r1=z(1,j,i2,ia*5+1)
3559        s1=z(2,j,i2,ia*5+1)
3560        r2=z(1,j,i2,ia*5+2)
3561        s2=z(2,j,i2,ia*5+2)
3562        r3=z(1,j,i2,ia*5+3)
3563        s3=z(2,j,i2,ia*5+3)
3564        r4=z(1,j,i2,ia*5+4)
3565        s4=z(2,j,i2,ia*5+4)
3566        r5=z(1,j,i2,ia*5+5)
3567        s5=z(2,j,i2,ia*5+5)
3568        r25 = r2 + r5
3569        r34 = r3 + r4
3570        s25 = s2 - s5
3571        s34 = s3 - s4
3572        zbr(1,j,i2,ind(ia*5+1)) = r1 + r25 + r34
3573        r = r1 + cos2*r25 + cos4*r34
3574        s = s25 + sin42*s34
3575        zbr(1,j,i2,ind(ia*5+2)) = r - sin2*s
3576        zbr(1,j,i2,ind(ia*5+5)) = r + sin2*s
3577        r = r1 + cos4*r25 + cos2*r34
3578        s = sin42*s25 - s34
3579        zbr(1,j,i2,ind(ia*5+3)) = r - sin2*s
3580        zbr(1,j,i2,ind(ia*5+4)) = r + sin2*s
3581        r25 = r2 - r5
3582        r34 = r3 - r4
3583        s25 = s2 + s5
3584        s34 = s3 + s4
3585        zbr(2,j,i2,ind(ia*5+1)) = s1 + s25 + s34
3586        r = s1 + cos2*s25 + cos4*s34
3587        s = r25 + sin42*r34
3588        zbr(2,j,i2,ind(ia*5+2)) = r + sin2*s
3589        zbr(2,j,i2,ind(ia*5+5)) = r - sin2*s
3590        r = s1 + cos4*s25 + cos2*s34
3591        s = sin42*r25 - r34
3592        zbr(2,j,i2,ind(ia*5+3)) = r + sin2*s
3593        zbr(2,j,i2,ind(ia*5+4)) = r - sin2*s
3594      end do ! j
3595    end do ! i2
3596 !$OMP END PARALLEL DO
3597 
3598 !  Second step of radix 5
3599    do ia=1,aft(ic)-1
3600      indx=ind(ia*5+1)-1
3601      cr2=trig(1,indx)
3602      ct2=trig(2,indx)
3603      cr3=trig(1,2*indx)
3604      ct3=trig(2,2*indx)
3605      cr4=trig(1,3*indx)
3606      ct4=trig(2,3*indx)
3607      cr5=trig(1,4*indx)
3608      ct5=trig(2,4*indx)
3609 !$OMP PARALLEL DO DEFAULT(PRIVATE)&
3610 !$OMP&SHARED(cr2,cr3,cr4,cr5,ct2,ct3,ct4,ct5,ia,ind,n1,n2,n2i,sin2,z,zbr)
3611      do i2=n2i,n2
3612        do j=1,n1
3613          r1=z(1,j,i2,ia*5+1)
3614          s1=z(2,j,i2,ia*5+1)
3615          r2=cr2*(z(1,j,i2,ia*5+2) - z(2,j,i2,ia*5+2)*ct2)
3616          s2=cr2*(z(1,j,i2,ia*5+2)*ct2 + z(2,j,i2,ia*5+2))
3617          r3=cr3*(z(1,j,i2,ia*5+3) - z(2,j,i2,ia*5+3)*ct3)
3618          s3=cr3*(z(1,j,i2,ia*5+3)*ct3 + z(2,j,i2,ia*5+3))
3619          r4=z(1,j,i2,ia*5+4) - z(2,j,i2,ia*5+4)*ct4
3620          s4=z(1,j,i2,ia*5+4)*ct4 + z(2,j,i2,ia*5+4)
3621          r5=z(1,j,i2,ia*5+5) - z(2,j,i2,ia*5+5)*ct5
3622          s5=z(1,j,i2,ia*5+5)*ct5 + z(2,j,i2,ia*5+5)
3623          r25 = r2 + r5*cr5
3624          r34 = r3 + r4*cr4
3625          s25 = s2 - s5*cr5
3626          s34 = s3 - s4*cr4
3627          zbr(1,j,i2,ind(ia*5+1)) = r1 + r25 + r34
3628          r = r1 + cos2*r25 + cos4*r34
3629          s = s25 + sin42*s34
3630          zbr(1,j,i2,ind(ia*5+2)) = r - sin2*s
3631          zbr(1,j,i2,ind(ia*5+5)) = r + sin2*s
3632          r = r1 + cos4*r25 + cos2*r34
3633          s = sin42*s25 - s34
3634          zbr(1,j,i2,ind(ia*5+3)) = r - sin2*s
3635          zbr(1,j,i2,ind(ia*5+4)) = r + sin2*s
3636          r25 = r2 - r5*cr5
3637          r34 = r3 - r4*cr4
3638          s25 = s2 + s5*cr5
3639          s34 = s3 + s4*cr4
3640          zbr(2,j,i2,ind(ia*5+1)) = s1 + s25 + s34
3641          r = s1 + cos2*s25 + cos4*s34
3642          s = r25 + sin42*r34
3643          zbr(2,j,i2,ind(ia*5+2)) = r + sin2*s
3644          zbr(2,j,i2,ind(ia*5+5)) = r - sin2*s
3645          r = s1 + cos4*s25 + cos2*s34
3646          s = sin42*r25 - r34
3647          zbr(2,j,i2,ind(ia*5+3)) = r + sin2*s
3648          zbr(2,j,i2,ind(ia*5+4)) = r - sin2*s
3649        end do ! j
3650      end do ! i2
3651 !$OMP END PARALLEL DO
3652    end do ! ia
3653 
3654  else !  All radices treated
3655    !if (now(ic) /= 1) then
3656    ABI_BUG(sjoin("Called with factors other than 2, 3, and 5. now(ic) = ", itoa(now(ic))))
3657    !end if
3658  end if
3659 
3660 end subroutine sg_fftz

m_sgfft/sg_poisson [ Functions ]

[ Top ] [ m_sgfft ] [ Functions ]

NAME

 sg_poisson

FUNCTION

  Solve the Poisson equation in G-space given the density, n(r),
  in real space of the FFT box.

INPUTS

 fftcache=size of the cache (kB)
 cplex=1 if fofr is real, 2 if fofr is complex
 nx,ny,nz=Number of FFT points along the three directions.
 ldx,ldy,ldz=Leading dimension of the array nr and vg.
 ndat = Number of densities
 vg(nx*ny*nz)=Potential in reciprocal space.

SIDE EFFECTS

 nr(cplex*ldx*ldy*ldz*ndat)
    input: n(r) (real or complex)
    output: the hartree potential in real space

NOTES

   vg is given on the FFT mesh instead of the augmented mesh [ldx,ldy,ldz]
   in order to simplify the interface with the other routines operating of vg

SOURCE

5925 subroutine sg_poisson(fftcache,cplex,nx,ny,nz,ldx,ldy,ldz,ndat,vg,nr)
5926 
5927 !Arguments ------------------------------------
5928 !scalars
5929  integer,intent(in) :: fftcache,cplex,nx,ny,nz,ldx,ldy,ldz,ndat
5930 !arrays
5931  real(dp),intent(inout) :: nr(cplex*ldx*ldy*ldz*ndat)
5932  real(dp),intent(in) :: vg(nx*ny*nz)
5933 
5934 !Local variables-------------------------------
5935  integer,parameter :: ndat1=1
5936  integer :: ii,jj,kk,ifft,dat,ptr,ig
5937  real(dp) :: fft_fact
5938 !arrays
5939  real(dp),allocatable :: work(:,:)
5940 
5941 ! *************************************************************************
5942 
5943  fft_fact = one/(nx*ny*nz)
5944 
5945  ABI_CHECK(cplex==2,"cplex!=2 not coded")
5946 
5947  ABI_MALLOC(work, (2,ldx*ldy*ldz))
5948 
5949  do dat=1,ndat
5950    ! n(r) --> n(G)
5951    ptr = 1 + (dat-1)*cplex*ldx*ldy*ldz
5952    call sg_fft_cc(fftcache,nx,ny,nz,ldx,ldy,ldz,ndat1,-1,nr(ptr),work)
5953 
5954    ! Multiply by v(G)
5955    ig = 0
5956    do kk=1,nz
5957      do jj=1,ny
5958        do ii=1,nx
5959          ig = ig + 1
5960          ifft = ii + (jj-1)*ldx + (kk-1)*ldx*ldy
5961          work(1:2,ifft) = work(1:2,ifft) * vg(ig) * fft_fact
5962       end do
5963      end do
5964    end do
5965 
5966    ! compute vh(r)
5967    call sg_fft_cc(fftcache,nx,ny,nz,ldx,ldy,ldz,ndat1,+1,work,nr(ptr))
5968  end do
5969 
5970  ABI_FREE(work)
5971 
5972 end subroutine sg_poisson