./52_fft_mpi_noabirule/m

ABINIT/m_fftcore
m_fftcore/addrho
m_fftcore/bound
m_fftcore/change_istwfk
m_fftcore/fftalg_for_npfft
m_fftcore/fftalg_has_mpi
m_fftcore/fftalg_info
m_fftcore/fftalg_isavailable
m_fftcore/fftcore_set_mixprec
m_fftcore/fill
m_fftcore/fill_cent
m_fftcore/get_cache_kb
m_fftcore/get_kg
m_fftcore/getng
m_fftcore/indfftrisc
m_fftcore/kgindex
m_fftcore/kpgcount
m_fftcore/kpgsph
m_fftcore/mpifft_collect_datar
m_fftcore/mpifft_dbox2fg
m_fftcore/mpifft_dbox2fg_dpc
m_fftcore/mpifft_dbox2fr
m_fftcore/mpifft_dbox2fr_dpc
m_fftcore/mpifft_fg2dbox
m_fftcore/mpifft_fg2dbox_dpc
m_fftcore/mpifft_fr2dbox
m_fftcore/mpifft_fr2dbox_dpc
m_fftcore/mpiswitch
m_fftcore/mpiswitch_cent
m_fftcore/multpot
m_fftcore/ngfft_seq
m_fftcore/print_ngfft
m_fftcore/scramble
m_fftcore/sphere
m_fftcore/sphere_fft
m_fftcore/sphere_fft1
m_fftcore/sphereboundary
m_fftcore/switch
m_fftcore/switch_cent
m_fftcore/switchreal
m_fftcore/switchreal_cent
m_fftcore/unfill
m_fftcore/unfill_cent
m_fftcore/unmpiswitch
m_fftcore/unmpiswitch_cent
m_fftcore/unscramble
m_fftcore/unswitch
m_fftcore/unswitch_cent
m_fftcore/unswitchreal
m_fftcore/unswitchreal_cent

ABINIT/m_fftcore [ Modules ]

NAME

  m_fftcore

FUNCTION

  Low-level tools for FFT (sequential and MPI parallel version)
  It also provides helper functions to set up the list of G vectors
  inside a sphere or to count them.

  Copyright (C) 2014-2024 ABINIT group (SG, XG, AR, MG, MT)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

TODO

  1) Pass distribfft instead of MPI_enreg to simplify the API and facilitate code-reuse.

  2) Merge this module with m_distribfft

  3) Get rid of paral_kgb and MPI_type! This is a low-level module that may be called by other
     code in which paral_kgb is meaningless! FFT tables and a MPI communicator are sufficient.

SOURCE

 26 #if defined HAVE_CONFIG_H
 27 #include "config.h"
 28 #endif
 29 
 30 #include "abi_common.h"
 31 
 32 module m_fftcore
 33 
 34  use defs_basis
 35  use m_abicore
 36  use m_errors
 37  use m_xmpi
 38  use m_sort
 39 
 40  use m_time,         only : timab
 41  use m_fstrings,     only : itoa, sjoin
 42  use m_geometry,     only : normv
 43  use defs_abitypes,  only : MPI_type
 44  use m_mpinfo,       only : destroy_mpi_enreg, initmpi_seq
 45 
 46  implicit none
 47 
 48  private
 49 
 50  public :: fftalg_isavailable  ! True if the FFT library specified by fftalg is available.
 51  public :: fftalg_has_mpi      ! True if fftalg provides MPI-FFTs.
 52  public :: fftalg_for_npfft    ! Returns the default value for fftalg given the number of processors for the FFT.
 53  public :: fftalg_info         ! Returns strings with info on the FFT library specified by fftalg.
 54  public :: get_cache_kb        ! Returns the cache size in Kbs (based on CPP variables).
 55  public :: ngfft_seq           ! initialize ngfft(18) from the FFT divisions (assume sequential FFT)
 56  public :: print_ngfft         ! Print the content of ngfft(18) in explicative format.
 57  public :: bound               ! Find distance**2 to boundary point of fft box nearest to kpt
 58  public :: getng               ! From ecut and metric tensor in reciprocal space, computes recommended ngfft(1:3)
 59  public :: sphereboundary      ! Finds the boundary of the basis sphere of G vectors
 60  public :: sphere
 61  public :: sphere_fft      ! Insert cg inside box.
 62  public :: sphere_fft1     ! TODO: This one should be replaced by sphere_fft.
 63  public :: change_istwfk   ! Change the istwfk mode of a set of wavefunctions (sequential version, same k-point)
 64  public :: kpgsph          ! Set up the G vector list
 65  public :: kpgcount        ! Give the number of G vector in each direction
 66  public :: get_kg          ! Helper function to calculate the set of G-vectors at a given kpoint (no MPI FFT)
 67  public :: kgindex         ! Compute the index of each plane wave on a FFT grid.
 68 
 69  ! Low-level tools for MPI FFT
 70  public :: switch
 71  public :: switch_cent
 72  public :: switchreal
 73  public :: switchreal_cent
 74  public :: scramble
 75  public :: fill
 76  public :: fill_cent
 77  public :: unfill
 78  public :: unfill_cent
 79  public :: unmpiswitch
 80  public :: unswitch
 81  public :: unswitchreal_cent
 82  public :: unmpiswitch_cent
 83  public :: unscramble
 84  public :: unswitch_cent
 85  public :: unswitchreal
 86  public :: mpiswitch
 87  public :: mpiswitch_cent
 88 
 89  public :: mpifft_fg2dbox
 90  public :: mpifft_fg2dbox_dpc
 91  public :: mpifft_dbox2fg
 92  public :: mpifft_dbox2fg_dpc
 93  public :: mpifft_dbox2fr
 94  public :: mpifft_dbox2fr_dpc
 95  public :: mpifft_fr2dbox
 96  public :: mpifft_fr2dbox_dpc
 97  public :: mpifft_collect_datar           ! Collect a real-space MPI-FFT distributed array on each proc.
 98 
 99  public :: indfftrisc
100  public :: addrho
101  public :: multpot
102 
103  ! 0 for double precision version (default), 1 for mixed precision FFTs
104  integer, public, protected :: fftcore_mixprec = 0
105  public :: fftcore_set_mixprec
106 
107 ! *************************************************************************
108 
109 !----------------------------------------------------------------------
110 ! Private variables
111 
112 #define FFTALGA_SIZE 5
113  character(len=*),private,parameter :: fftalga2name(1:FFTALGA_SIZE)= &
114  (/"Goedecker     ", &
115    "Vendor FFT    ", &
116    "FFTW3         ", &
117    "Goedecker2002 ", &
118    "DFTI          " /)
119 
120 #define FFTALGB_SIZE 1
121  character(len=*),private,parameter :: fftalgb2name(0:FFTALGB_SIZE)= &
122  (/"C2C",&
123    "R2C"/)
124 
125 #define FFTALGC_SIZE 2
126  character(len=*),private,parameter :: fftalgc2name(0:FFTALGC_SIZE)= &
127  (/"No pad         ",&
128    "zero-pad       ",&
129    "zero-pad+cache "/)
130 
131 contains

m_fftcore/addrho [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  addrho

FUNCTION

   Add the contribution to the density generated by n1dfft x-y planes

INPUTS

  icplexwf=1 if u(r) is real, 2 otherwise.
  includelast
  nd1,nd2=Leading dimensions of rhopart.
  n2=FFT dimension along y.
  lot=2nd Leading dimension of zw (cache blocking factor).
  n1dfft=Number of 1D FFTs along y performed.
  zw(2,lot,n2)=Array with the x-y planes (wavefunction in real space).
  weigth=Weight factor for the density.

SIDE EFFECTS

   rhopart(nd1,nd2)=density in the x-y plane, accumulated in output.

SOURCE

4596 pure subroutine addrho(icplexwf,includelast,nd1,nd2,n2,lot,n1dfft,zw,rhopart,weight_r,weight_i)
4597 
4598 
4599 !Arguments ------------------------------------
4600  integer,intent(in) :: icplexwf,includelast,nd1,nd2,n2,lot,n1dfft
4601  real(dp),intent(in) :: zw(2,lot,n2)
4602  real(dp),intent(inout) :: rhopart(nd1,nd2)
4603  real(dp),intent(in) :: weight_i,weight_r
4604 
4605 !Local variables-------------------------------
4606  integer :: i2,j
4607 
4608 ! *************************************************************************
4609 
4610  if (icplexwf==2) then
4611    ! Complex wavefunction in real space.
4612    do i2=1,n2-1,2
4613      do j=1,n1dfft
4614        rhopart(j,i2) =   rhopart(j,i2) +   weight_r*zw(1,j,i2)**2+weight_i*zw(2,j,i2)**2
4615        rhopart(j,i2+1) = rhopart(j,i2+1) + weight_r*zw(1,j,i2+1)**2+weight_i*zw(2,j,i2+1)**2
4616      end do
4617    end do
4618 
4619    if (2*(n2/2)/=n2) then
4620      do j=1,n1dfft
4621        rhopart(j,n2  )=rhopart(j,n2  )+weight_r*zw(1,j,n2  )**2+weight_i*zw(2,j,n2  )**2
4622      end do
4623    end if
4624  else
4625    ! The wavefunction is real, in real space
4626    if (includelast==1) then
4627      do i2=1,n2
4628        do j=1,n1dfft
4629          rhopart(2*j-1,i2)=rhopart(2*j-1,i2)+zw(1,j,i2)**2*weight_r
4630          rhopart(2*j  ,i2)=rhopart(2*j  ,i2)+zw(2,j,i2)**2*weight_i
4631        end do
4632      end do
4633    else
4634      do i2=1,n2
4635        do j=1,n1dfft-1
4636          rhopart(2*j-1,i2)=rhopart(2*j-1,i2)+zw(1,j,i2)**2*weight_r
4637          rhopart(2*j  ,i2)=rhopart(2*j  ,i2)+zw(2,j,i2)**2*weight_i
4638        end do
4639        rhopart(2*n1dfft-1,i2)=rhopart(2*n1dfft-1,i2)+zw(1,n1dfft,i2)**2*weight_r
4640      end do
4641    end if
4642 
4643  end if
4644 
4645 end subroutine addrho

m_fftcore/bound [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 bound

FUNCTION

 For given kpt, ngfft, and gmet,
  Find distance**2 to boundary point of fft box nearest to kpt
  Find distance**2 to boundary point of fft box farthest to kpt

INPUTS

  kpt(3)=real input k vector (reduced coordinates)
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  gmet(3,3)=reciprocal space metric (currently in Bohr**-2)

OUTPUT

  dsqmax=maximum distance**2 from k to boundary in Bohr**-2.
  dsqmin=minimum distance**2 from k to boundary in Bohr**-2.
  gbound(3)=coords of G on boundary (correspnding to gsqmin)
  plane=which plane min occurs in (1,2, or 3 for G1,etc).

NOTES

 Potential trouble: this routine was written assuming kpt lies inside
 first Brillouin zone.  No measure is taken to fold input kpt back
 into first zone.  Given arbitrary kpt, this will cause trouble.

SOURCE

520 subroutine bound(dsqmax,dsqmin,gbound,gmet,kpt,ngfft,plane)
521 
522 !Arguments ------------------------------------
523 !scalars
524  integer,intent(out) :: plane
525  real(dp),intent(out) :: dsqmax,dsqmin
526 !arrays
527  integer,intent(in) :: ngfft(18)
528  integer,intent(out) :: gbound(3)
529  real(dp),intent(in) :: gmet(3,3),kpt(3)
530 
531 !Local variables-------------------------------
532 !scalars
533  integer :: i1,i1min,i2,i2min,i3,i3min
534  real(dp) :: dsm,dsp
535  character(len=500) :: msg
536 
537 ! *************************************************************************
538 
539 !Set plane to impossible value
540  plane=0
541 
542 !look at +/- g1 planes:
543  dsqmax=zero
544  dsqmin=dsq(ngfft(1)/2,-ngfft(2)/2,-ngfft(3)/2,gmet,kpt)+0.01_dp
545  do i2=-ngfft(2)/2,ngfft(2)/2
546    do i3=-ngfft(3)/2,ngfft(3)/2
547      dsp = dsq(ngfft(1)/2, i2, i3,gmet,kpt)
548      dsm = dsq( - ngfft(1)/2, i2, i3,gmet,kpt)
549      if (dsp>dsqmax) dsqmax = dsp
550      if (dsm>dsqmax) dsqmax = dsm
551      if (dsp<dsqmin) then
552        dsqmin = dsp
553        i1min = ngfft(1)/2
554        i2min = i2
555        i3min = i3
556        plane=1
557      end if
558      if (dsm<dsqmin) then
559        dsqmin = dsm
560        i1min =  - ngfft(1)/2
561        i2min = i2
562        i3min = i3
563        plane=1
564      end if
565    end do
566  end do
567 !
568 !+/- g2 planes:
569  do i1=-ngfft(1)/2,ngfft(1)/2
570    do i3=-ngfft(3)/2,ngfft(3)/2
571      dsp = dsq(i1,ngfft(2)/2,i3,gmet,kpt)
572      dsm = dsq(i1,-ngfft(2)/2,i3,gmet,kpt)
573      if (dsp>dsqmax) dsqmax = dsp
574      if (dsm>dsqmax) dsqmax = dsm
575      if (dsp<dsqmin) then
576        dsqmin = dsp
577        i1min = i1
578        i2min = ngfft(2)/2
579        i3min = i3
580        plane=2
581      end if
582      if (dsm<dsqmin) then
583        dsqmin = dsm
584        i1min = i1
585        i2min =  - ngfft(2)/2
586        i3min = i3
587        plane=2
588      end if
589    end do
590  end do
591 !
592 !+/- g3 planes:
593  do i1=-ngfft(1)/2,ngfft(1)/2
594    do i2=-ngfft(2)/2,ngfft(2)/2
595      dsp = dsq(i1,i2,ngfft(3)/2,gmet,kpt)
596      dsm = dsq(i1,i2,-ngfft(3)/2,gmet,kpt)
597      if (dsp>dsqmax) dsqmax = dsp
598      if (dsm>dsqmax) dsqmax = dsm
599      if (dsp<dsqmin) then
600        dsqmin = dsp
601        i1min = i1
602        i2min = i2
603        i3min = ngfft(3)/2
604        plane=3
605      end if
606      if (dsm<dsqmin) then
607        dsqmin = dsm
608        i1min = i1
609        i2min = i2
610        i3min =  - ngfft(3)/2
611        plane=3
612      end if
613    end do
614  end do
615 
616  if (plane==0) then
617 !  Trouble: missed boundary somehow
618    write(msg, '(a,a,a,3f9.4,a,3(i0,1x),a,a,a,a,a)' )&
619    'Trouble finding boundary of G sphere for',ch10,&
620    'kpt=',kpt(:),' and ng=',ngfft(1:3),ch10,&
621    'Action : check that kpt lies',&
622    'reasonably within first Brillouin zone; ',ch10,&
623    'else code bug, contact ABINIT group.'
624    ABI_BUG(msg)
625  end if
626 
627  gbound(1)=i1min
628  gbound(2)=i2min
629  gbound(3)=i3min
630 
631  contains
632 
633    function dsq(i1,i2,i3,gmet,kpt)
634 
635      integer :: i1,i2,i3
636      real(dp) :: dsq
637      real(dp) :: kpt(3),gmet(3,3)
638 
639      dsq=gmet(1,1)*(kpt(1)+dble(i1))**2&
640 &      +gmet(2,2)*(kpt(2)+dble(i2))**2&
641 &      +gmet(3,3)*(kpt(3)+dble(i3))**2&
642 &      +2._dp*(gmet(1,2)*(kpt(1)+dble(i1))*(kpt(2)+dble(i2))&
643 &      +gmet(2,3)*(kpt(2)+dble(i2))*(kpt(3)+dble(i3))&
644 &      +gmet(3,1)*(kpt(3)+dble(i3))*(kpt(1)+dble(i1)))
645    end function dsq
646 
647 end subroutine bound

m_fftcore/change_istwfk [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 change_istwfk

FUNCTION

 This function allows one to change the time-reversal storage mode (istwfk)
 of a *full* set of u(G). It does not support MPI-FFT!

INPUTS

 from_npw=number of G vectors in input from_cg
 from_kg_k(3,npw)=integer coordinates of the G vectors of from_cg
 from_istwfk=option parameter that describes the storage in from_cg
 to_npw=number of G vectors in output to_cg
 to_kg_k(3,npw)=integer coordinates of the G vectors in to_cg
 to_istwfk=option parameter that describes the storage in to_cg
 n1,n2,n3=physical dimension of the box (must be large enough to contain the sphere, no check is done)
 ndat=number of wavefunctions
 from_cg(2,from_npw*ndat)= Input u(g) values

 OUTPUTS
 to_cg(2,to_npw*ndat)= Output u(g) defined on the list of vectors to_kg_k with time-reversal mode to_istwfk

SOURCE

2094 subroutine change_istwfk(from_npw,from_kg,from_istwfk,to_npw,to_kg,to_istwfk,n1,n2,n3,ndat,from_cg,to_cg)
2095 
2096 !Arguments ------------------------------------
2097 !scalars
2098  integer,intent(in) :: from_npw,from_istwfk,to_npw,to_istwfk,n1,n2,n3,ndat
2099 !arrays
2100  integer,intent(in) :: from_kg(3,from_npw),to_kg(3,to_npw)
2101  real(dp),intent(inout) :: from_cg(2,from_npw*ndat) ! out due to sphere!
2102  real(dp),intent(inout) :: to_cg(2,to_npw*ndat)
2103 
2104 !Local variables-------------------------------
2105 !scalars
2106  integer :: n4,n5,n6
2107  real(dp),parameter :: xnorm1=one
2108 !arrays
2109  integer,parameter :: shiftg0(3)=0,me_g0=1
2110  integer,parameter :: symmE(3,3)=reshape([1,0,0,0,1,0,0,0,1],[3,3])
2111  real(dp),allocatable :: cfft(:,:,:,:)
2112 
2113 ! *************************************************************************
2114 
2115  n4=2*(n1/2)+1
2116  n5=2*(n2/2)+1
2117  n6=2*(n3/2)+1
2118 
2119  ABI_MALLOC(cfft, (2,n4,n5,n6*ndat))
2120 
2121  ! iflag=1 ==> insert from_cg into cfft.
2122  call sphere(from_cg,ndat,from_npw,cfft,n1,n2,n3,n4,n5,n6,from_kg,from_istwfk,+1,me_g0,shiftg0,symmE,xnorm1)
2123 
2124  ! iflag=-1 ==> extract to_cg from cfft.
2125  call sphere(to_cg,ndat,to_npw,cfft,n1,n2,n3,n4,n5,n6,to_kg,to_istwfk,-1,me_g0,shiftg0,symmE,xnorm1)
2126 
2127  ABI_FREE(cfft)
2128 
2129 end subroutine change_istwfk

m_fftcore/fftalg_for_npfft [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 fftalg_for_npfft

FUNCTION

  Returns the default value of fftalg given the number of MPI nodes
  to be used in the FFTs.

INPUTS

  nproc_fft=Number of processors used for MPI FFT

OUTPUT

  fftalg=Integer used to select the FFT library.

SOURCE

268 pure function fftalg_for_npfft(nproc_fft) result(fftalg)
269 
270 !Arguments ------------------------------------
271 !scalars
272  integer,intent(in) :: nproc_fft
273  integer :: fftalg
274 
275 ! *************************************************************************
276 
277  ! Default  for the sequential case.
278  fftalg = 112
279 
280  ! Use Goedecker2002 if fftalg does not support MPI (e.g 112)
281  if (nproc_fft > 1) fftalg = 401
282  !if (nproc_fft > 1) fftalg = 402
283 
284 #ifdef HAVE_FFTW3
285  fftalg = 312
286 #elif defined HAVE_DFTI
287  fftalg = 512
288  if (nproc_fft > 1) fftalg = 401  ! MPI-FFT with DFTI is not implemented yet
289 #endif
290 
291  !if (nproc_fft > 1) fftalg = 401 ! This is to revert to the old behavior.
292 
293 end function fftalg_for_npfft

m_fftcore/fftalg_has_mpi [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 fftalg_has_mpi

FUNCTION

  True if the FFT library specified by fftalg is available.

INPUTS

  fftalg=Input variable.

SOURCE

227 pure function fftalg_has_mpi(fftalg) result(ans)
228 
229 !Arguments ------------------------------------
230 !scalars
231  integer,intent(in) :: fftalg
232  logical :: ans
233 
234 !Local variables-------------------------------
235 !scalars
236  integer :: fftalga,fftalgb,fftalgc
237 
238 ! *************************************************************************
239 
240  ans = .False.
241  fftalga = fftalg/100; fftalgb = mod(fftalg,100)/10; fftalgc = mod(fftalg,10)
242 
243  if (fftalga == FFT_FFTW3) ans = .True.
244  !if (fftalga == FFT_DFTI)  ans = .True.
245  if (fftalga == FFT_SG2002) ans = .True.
246 
247 end function fftalg_has_mpi

m_fftcore/fftalg_info [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 fftalg_info

FUNCTION

  Returns info on the FFT library specified by fftalg (ngfft(7))

INPUTS

  fftalg=Input variable.

OUTPUT

  library=String with the name of FFT library
  cplex_mode= String defining whether the FFT library supports real<-->complex transforms.
  padding_mode=Padding mode.

SOURCE

315 subroutine fftalg_info(fftalg,library,cplex_mode,padding_mode)
316 
317 !Arguments ------------------------------------
318 !scalars
319  integer,intent(in) :: fftalg
320  character(len=*),intent(out) :: library,cplex_mode,padding_mode
321 
322 !Local variables-------------------------------
323 !scalars
324  integer :: fftalga,fftalgb,fftalgc
325 
326 ! *************************************************************************
327 
328  library = "Unknown"; cplex_mode = "Unknown"; padding_mode  = "Unknown"
329 
330  fftalga=fftalg/100
331  if (fftalga>0 .and. fftalga<=FFTALGA_SIZE) library = fftalga2name(fftalga)
332 
333  fftalgb=mod(fftalg,100)/10
334  if (fftalgb>=0 .and. fftalgb<=FFTALGB_SIZE) cplex_mode = fftalgb2name(fftalgb)
335 
336  fftalgc=mod(fftalg,10)
337  if (fftalgc>=0 .and. fftalgc<=FFTALGC_SIZE) padding_mode = fftalgc2name(fftalgc)
338 
339 end subroutine fftalg_info

m_fftcore/fftalg_isavailable [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 fftalg_isavailable

FUNCTION

  Returns TRUE if the FFT library specified by fftalg (ngfft(7)) is available

INPUTS

  fftalg=Input variable.

SOURCE

186 logical pure function fftalg_isavailable(fftalg) result(ans)
187 
188 !Arguments ------------------------------------
189  integer,intent(in) :: fftalg
190 
191 !Local variables-------------------------------
192  integer :: fftalga,fftalgb,fftalgc
193 
194 ! *************************************************************************
195 
196  ans = .TRUE.
197  fftalga = fftalg/100
198  fftalgb = mod(fftalg,100)/10
199  fftalgc = mod(fftalg,10)
200 
201  ! Optional FFT libraries.
202 #ifndef HAVE_FFTW3
203  if (fftalga == FFT_FFTW3) ans = .FALSE.
204 #endif
205 
206 #ifndef HAVE_DFTI
207  if (fftalga == FFT_DFTI) ans = .FALSE.
208 #endif
209 
210 end function fftalg_isavailable

m_fftcore/fftcore_set_mixprec [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 fftalg_set_precision

FUNCTION

  Set the precision to be used in the FFT routines: 0 for standard double precision,
  1 for mixed precision (dp input, sp for intermediate arrays passed to FFT libs)
  Return old value.

INPUTS

SOURCE

149 integer function fftcore_set_mixprec(wp) result(old_wp)
150 
151 !Arguments ------------------------------------
152 !scalars
153  integer,intent(in) :: wp
154 
155 ! *************************************************************************
156 
157  old_wp = fftcore_mixprec
158  fftcore_mixprec = abs(wp)
159 
160  select case (fftcore_mixprec)
161  case (0)
162    if (old_wp /= fftcore_mixprec) call wrtout(std_out, " fftcore_mixprec 0 --> Using double-precision FFT", newlines=1)
163  case (1)
164    if (old_wp /= fftcore_mixprec) call wrtout(std_out, " fftcore_mixprec 1 --> Using mixed precision FFT", newlines=1)
165  case default
166    ABI_ERROR(sjoin("Wrong value for input wp:", itoa(fftcore_mixprec)))
167  end select
168 
169 end function fftcore_set_mixprec

m_fftcore/fill [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  fill

FUNCTION

   Receives a set of z-lines in reciprocal space,
   insert the values in the cache work array zw (no padding)

INPUTS

  nd1,nd3=Dimensions of the input array zf.
  lot=Cache blocking factor.
  n1dfft=Number of 1D FFTs to perform
  n3=Dimension of the transform along z
  zf(2,nd1,nd3)=Input array

OUTPUT

  zw(2,lot,n3)=Cache work array with the z-lines.

SOURCE

2493 pure subroutine fill(nd1,nd3,lot,n1dfft,n3,zf,zw)
2494 
2495 !Arguments ------------------------------------
2496  integer,intent(in) :: nd1,nd3,lot,n1dfft,n3
2497  real(dp),intent(in) :: zf(2,nd1,nd3)
2498  real(dp),intent(inout) :: zw(2,lot,n3)
2499 
2500 ! local variables
2501  integer :: i1,i3
2502 
2503 ! *************************************************************************
2504 
2505  do i3=1,n3
2506    do i1=1,n1dfft
2507      zw(1,i1,i3)=zf(1,i1,i3)
2508      zw(2,i1,i3)=zf(2,i1,i3)
2509    end do
2510  end do
2511 
2512 end subroutine fill

m_fftcore/fill_cent [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 fill_cent

FUNCTION

   Receives a set of z-lines in reciprocal space,
   insert the values in cache work array defined on the FFT box
   and pads the central of the frequency region with zeros.

INPUTS

  md1,md3=Leading dimension of zf along x and z
  lot=second dimension of zw (cache blocking factor)
  n1dfft=Number of 1d transforms to be performed along z.
  max3=Max G_z in the small box enclosing the G-sphere.
  m3=Number of points in the *small* box enclosing the G-sphere
  n3=Dimension of the FFT transform along z
  zf(2,md1,md3)=x-z planes in reciprocal space

OUTPUT

   zw(2,lot,n3)= Filled cache work array.
     zw(:,1:n1dfft,n3) contains the lines to be transformed along.

SOURCE

2541 pure subroutine fill_cent(md1,md3,lot,n1dfft,max3,m3,n3,zf,zw)
2542 
2543 !Arguments ------------------------------------
2544  integer,intent(in) :: md1,md3,lot,n1dfft,max3,m3,n3
2545  real(dp),intent(in) :: zf(2,md1,md3)
2546  real(dp),intent(inout) :: zw(2,lot,n3)
2547 
2548 !Local variables-------------------------------
2549 !scalars
2550  integer :: i1,i3
2551 
2552 ! *************************************************************************
2553 
2554  ! Here, zero and positive frequencies
2555  do i3=1,max3+1
2556    do i1=1,n1dfft
2557      zw(1,i1,i3)=zf(1,i1,i3)
2558      zw(2,i1,i3)=zf(2,i1,i3)
2559    end do
2560  end do
2561 
2562  ! Fill the center region with zeros
2563  do i3=max3+2,n3-m3+max3+1
2564    do i1=1,n1dfft
2565      zw(1,i1,i3)=zero
2566      zw(2,i1,i3)=zero
2567    end do
2568  end do
2569 
2570  ! Here, negative frequencies
2571  do i3=max3+2,m3
2572    do i1=1,n1dfft
2573      zw(1,i1,i3+n3-m3)=zf(1,i1,i3)
2574      zw(2,i1,i3+n3-m3)=zf(2,i1,i3)
2575    end do
2576  end do
2577 
2578 end subroutine fill_cent

m_fftcore/get_cache_kb [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 get_cache_kb

FUNCTION

  Returns the cache size in KB to be used for cache blocking algorithms in the FFT routines.
  The value is derived from the values of the CPP options defined in config.h

TODO

   Use C to get the real cache size.
   See http://stackoverflow.com/questions/12594208/c-program-to-determine-levels-size-of-cache

SOURCE

358 pure function get_cache_kb()
359 
360 !Local variables-------------------------------
361 !scalars
362  integer :: get_cache_kb
363 
364 ! *************************************************************************
365 
366  ! Default value
367  get_cache_kb = 16
368  !get_cache_kb = 32
369  !get_cache_kb = 256
370 
371 end function get_cache_kb

m_fftcore/get_kg [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  get_kg

FUNCTION

  Helper function to calculate the set of G-vectors at a given kpoint.
  without taking advantage of FFT parallelism and G-vector distributions.

INPUTS

  kpoint(3)=The k-point in reduced coordinates.
  ecut=Cutoff energy for planewave basis set.
  gmet(3,3)=reciprocal space metric ($\textrm{bohr}^{-2}$).
  istwfk=Options defining if time-reversal is used to decrease the number of G"s.
  [kin_sorted]=True if output g-vectors should be sorted by |k+g|^2/2. Default: False.

OUTPUT

  npw_k=Total number of G-vectors in the full G-sphere.
  kg_k(3,npw_k) list of G-vectors allocated by the routine.

SOURCE

4425 subroutine get_kg(kpoint, istwf_k, ecut, gmet, npw_k, kg_k, kin_sorted)
4426 
4427 !Arguments ------------------------------------
4428 !scalars
4429  integer,intent(in) :: istwf_k
4430  integer,intent(out) :: npw_k
4431  real(dp),intent(in) :: ecut
4432 !arrays
4433  integer,allocatable,intent(out) :: kg_k(:,:)
4434  real(dp),intent(in) :: gmet(3,3),kpoint(3)
4435  logical,optional,intent(in) :: kin_sorted
4436 
4437 !Local variables-------------------------------
4438 !scalars
4439  integer,parameter :: mkmem_ = 1, exchn2n3d0 = 0, ikg0 = 0
4440  integer :: npw_k_test, ig, ig_sort
4441  type(MPI_type) :: MPI_enreg_seq
4442 !arrays
4443  integer :: kg_dum(3, 0)
4444  integer,allocatable :: iperm(:), iwork(:,:)
4445  real(dp),allocatable :: kin_kg(:)
4446 
4447 ! *********************************************************************
4448 
4449  call initmpi_seq(MPI_enreg_seq)
4450 
4451  ! Calculate the number of G-vectors for this k-point.
4452  call kpgsph(ecut,exchn2n3d0, gmet, ikg0, 0, istwf_k, kg_dum, kpoint, 0, MPI_enreg_seq, 0, npw_k)
4453 
4454  ! Allocate and calculate the set of G-vectors.
4455  ABI_MALLOC(kg_k,(3,npw_k))
4456  call kpgsph(ecut, exchn2n3d0, gmet, ikg0, 0, istwf_k, kg_k, kpoint, mkmem_, MPI_enreg_seq, npw_k, npw_k_test)
4457 
4458  call destroy_mpi_enreg(MPI_enreg_seq)
4459 
4460  if (present(kin_sorted)) then
4461    if (kin_sorted) then
4462      ABI_MALLOC(kin_kg, (npw_k))
4463      ABI_MALLOC(iperm, (npw_k))
4464      iperm = [(ig, ig=1,npw_k)]
4465      do ig=1,npw_k
4466        kin_kg(ig) = half * normv(kpoint + kg_k(:, ig), gmet, "G") ** 2
4467      end do
4468 
4469      call sort_dp(npw_k, kin_kg, iperm, tol14)
4470      ABI_FREE(kin_kg)
4471 
4472      ABI_MALLOC(iwork, (3, npw_k))
4473      iwork = kg_k
4474      do ig=1,npw_k
4475        ig_sort = iperm(ig)
4476        kg_k(:,ig) = iwork(:,ig_sort)
4477      end do
4478 
4479      ABI_FREE(iwork)
4480      ABI_FREE(iperm)
4481    end if
4482  end if
4483 
4484 end subroutine get_kg

m_fftcore/getng [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 getng

FUNCTION

 From ecut and metric tensor in reciprocal space, computes recommended ngfft(1:3)
 Also computes the recommended value of nfft and mgfft
 Pay attention that the FFT grid must be compatible with the symmetry operations (see irrzg.f).

INPUTS

 boxcutmin=minimum value of boxcut admitted (boxcut is the ratio
   between the radius of the sphere contained in the FFT box, and the
   radius of the planewave sphere): usually 2.0.
 chksymtnons= if==3, will impose the FFT grid to be invariant under the spatial symmetries.
 ecut=energy cutoff in Hartrees
 gmet(3,3)=reciprocal space metric (bohr**-2).
 kpt(3)=input k vector in terms of reciprocal lattice primitive translations
 me_fft=index of the processor in the FFT set (use 0 if sequential)
 nproc_fft=number of processors in the FFT set (use 1 if sequential)
 nsym=number of symmetry elements in group
 paral_fft=0 if no FFT parallelisation; 1 if FFT parallelisation
 symrel(3,3,nsym)=symmetry matrices in real space (integers)
 tnons(3,nsym)=nonsymmorphic translations associated to symrel

OUTPUT

 mgfft= max(ngfft(1),ngfft(2),ngfft(3))
 nfft=number of points in the FFT box=ngfft(1)*ngfft(2)*ngfft(3)/nproc_fft

SIDE EFFECTS

 Input/Output
 ngfft(1:18)=integer array with FFT box dimensions and other information on FFTs.
   On input ngfft(1:3) contains optional trial values. If ngfft(1:3)/minbox is greater than value
   calculated to avoid wrap-around error and ngfft obeys constraint placed by the FFT routine that is used
   then ngfft(1:3) is left unchanged. Otherwise set to value computed in now.

 Note that there is the possibility of an undetected error if we
 are dealing with a cubic unit cell and ngfft(1), ngfft(2) and ngfft(3)
 are different. In the future we should handle this case.

 ngfft(4),ngfft(5),ngfft(6)= modified values to avoid cache trashing,
   presently: ngfft(4)=ngfft(1)+1 if ngfft(1) is even;
              ngfft(5)=ngfft(2)+1 if ngfft(2) is even.
   in the other cases, ngfft(4:6)=ngfft(1:3).
   Other choices may better, but this is left for the future.
 ngfft(7)=choice for FFT algorithm, see the input variable fftalg
 ngfft(8)=size of the cache, in bytes (not used here presently).!!
   other ngfft slots are used for parallelism see ~abinit/doc/variables/vargs.htm#ngfft
 [ngfftc(1:18)]= -optional- value of ngfft for the "coarse" grid
 [unit] = -optional-  output unit number (DEFAULT std_out)
 [gpu_option] = GPU implementation to use, i.e. cuda, openMP, ... (0=not using GPU)

SOURCE

 703 subroutine getng(boxcutmin, chksymtnons, ecut, gmet, kpt, me_fft, mgfft, nfft, ngfft, &
 704                 nproc_fft, nsym,paral_fft, symrel, tnons, &
 705                 ngfftc, unit, gpu_option) ! optional
 706 
 707  use defs_fftdata,  only : mg
 708 
 709 !Arguments ------------------------------------
 710 !scalars
 711  integer,intent(in) :: chksymtnons,me_fft,nproc_fft,nsym,paral_fft
 712  integer,intent(out) :: mgfft,nfft
 713  integer,optional,intent(in) :: unit,gpu_option
 714  real(dp),intent(in) :: boxcutmin,ecut
 715 !arrays
 716  integer,intent(in) :: symrel(3,3,nsym)
 717  integer,intent(in),optional :: ngfftc(3)
 718  integer,intent(inout) :: ngfft(18)
 719  real(dp),intent(in) :: gmet(3,3),kpt(3)
 720  real(dp),intent(in) :: tnons(3,nsym)
 721 
 722 !Local variables-------------------------------
 723 !scalars
 724  integer,save :: first=1,msrch(3),previous_paral_mode=0
 725  integer :: element,ifactor,ii,index,ipower,isrch,isrch1,isrch2,isrch3,isym,jj,mu,paral_fft_
 726  integer :: plane,testok,tobechecked,ount,fftalga,nn,ngdiv,valpow
 727  real(dp),parameter :: minbox=0.75_dp
 728  real(dp) :: dsqmax,dsqmin,ecutmx,prodcurrent,prodtrial,tnscaled,xx,yy
 729  logical :: testdiv
 730  character(len=500) :: msg
 731  integer,parameter :: largest_ngfft=mg ! Goedecker FFT: any powers of 2, 3, and 5 - must be coherent with defs_fftdata.F90
 732  integer,parameter :: maxpow2 =16      ! int(log(largest_ngfft+half)/log(two))
 733  integer,parameter :: maxpow3 =6       ! int(log(largest_ngfft+half)/log(three))
 734  integer,parameter :: maxpow5 =6       ! int(log(largest_ngfft+half)/log(five))
 735  integer,parameter :: maxpow7 =0
 736  integer,parameter :: maxpow11=0
 737  integer,parameter :: mmsrch=(maxpow2+1)*(maxpow3+1)*(maxpow5+1)*(maxpow7+1)*(maxpow11+1)
 738  integer,parameter :: nfactor=10, mpower=5
 739 !Arrays
 740  integer,save :: iperm(mmsrch),srch(mmsrch,3)
 741  integer(i8b) :: li_srch(mmsrch)
 742  integer :: divisor(3,3),gbound(3),imax(3),imin(3),ngcurrent(3)
 743  integer :: ngmax(3),ngsav(3),ngtrial(3)
 744  integer :: npower(3,mpower)
 745  integer,parameter :: factor(10) = (/1,2,3,4,5,6,8,9,10,12/)
 746  integer,parameter :: power(5) = (/2,3,5,7,11/)
 747 
 748 ! *************************************************************************
 749 
 750  ount = std_out; if (present(unit)) ount = unit
 751 
 752 !DEBUG
 753 !write(std_out,*)' m_fftcore/getng : enter'
 754 !ENDDEBUG
 755 
 756  fftalga = ngfft(7)/100
 757 
 758 !If not yet done, compute recommended (boxcut>=2) fft grid dimensions
 759 !In case we switch for paral to sequential mode, recompute srch.
 760 !This is the case e.g. when computing ngfftdiel in sequential mode
 761 !after an initial computation of ngfft in parallel
 762 
 763  paral_fft_=paral_fft;if (nproc_fft==0) paral_fft_=0
 764 
 765  if(first==1.or.paral_fft_ /= previous_paral_mode) then
 766    first=0; previous_paral_mode=paral_fft_
 767    srch(:,:)=0
 768 
 769    ! Factors of 2
 770    srch(1,1)=1
 771    do ii=1,maxpow2
 772      srch(ii+1,1)=srch(ii,1)*2
 773    end do
 774 
 775    ! Factors of 3
 776    index=maxpow2+1
 777    if(maxpow3>0)then
 778      do ii=1,max(1,maxpow3)
 779        srch(1+ii*index:(ii+1)*index,1)=3*srch(1+(ii-1)*index:ii*index,1)
 780      end do
 781    end if
 782 
 783    ! Factors of 5
 784    index=(maxpow3+1)*index
 785    if(maxpow5>0)then
 786      do ii=1,max(1,maxpow5)
 787        li_srch = 0
 788        li_srch(1+ii*index:(ii+1)*index)=5_i8b*srch(1+(ii-1)*index:ii*index,1)
 789        where (li_srch > huge(maxpow3)) li_srch = huge(maxpow3)
 790        srch(1+ii*index:(ii+1)*index,1)=li_srch(1+ii*index:(ii+1)*index)
 791      end do
 792    end if
 793 
 794    ! Factors of 7
 795    index=(maxpow5+1)*index
 796    if(maxpow7>0)then
 797      do ii=1,max(1,maxpow7)
 798        srch(1+ii*index:(ii+1)*index,1)=7*srch(1+(ii-1)*index:ii*index,1)
 799      end do
 800    end if
 801 
 802    ! Factors of 11
 803    index=(maxpow7+1)*index
 804    if(maxpow11>0)then
 805      do ii=1,max(1,maxpow11)
 806        srch(1+ii*index:(ii+1)*index,1)=11*srch(1+(ii-1)*index:ii*index,1)
 807      end do
 808    end if
 809 
 810    call sort_int(mmsrch,srch(:,1),iperm)
 811 
 812    do ii=1,mmsrch
 813      if(srch(ii,1)>largest_ngfft)exit
 814    end do
 815    msrch(1)=ii-1
 816 
 817    ! In case of FFT parallelism, one need ngfft(2) and ngfft(3) to be multiple of nproc_fft
 818    if(paral_fft_==1)then
 819      msrch(2)=0
 820      do ii=1,msrch(1)
 821        if(modulo(srch(ii,1),nproc_fft)==0) then
 822          msrch(2)=msrch(2)+1
 823          srch(msrch(2),2)=srch(ii,1)
 824        end if
 825      end do
 826      !write(msg,'(a,i0,a,i0,2a,i0)')&
 827      ! 'The second and third dimension of the FFT grid: ',ngfft(2),", ",ngfft(3),ch10,&
 828      ! 'were imposed to be multiple of the number of processors for the FFT: ', nproc_fft
 829      !if (ount /= dev_null) ABI_COMMENT(msg)
 830    else
 831      msrch(2)=msrch(1)
 832      srch(:,2)=srch(:,1)
 833    end if
 834 
 835    ! The second and third search list have the same constraint
 836    msrch(3)=msrch(2)
 837    srch(:,3)=srch(:,2)
 838 
 839 !  The set of allowed ngfft values has been found
 840  end if ! first==1
 841 
 842 !=============================================================================================
 843 !
 844 ! Determination of sufficient values of ngfft with ngfft(2) and ngfft(3) taken inside
 845 ! sets of values that take into account the constraint on nproc_fft
 846 
 847 !Save input values of ngfft
 848  ngsav(1:3) = ngfft(1:3)
 849 
 850 !As an initial guess for ngfft, use the provided coarse mesh grid
 851  if (PRESENT(ngfftc)) then
 852    ngfft(1:3)=ngfftc(1:3)
 853    call wrtout(ount,' Using supplied coarse mesh as initial guess.')
 854  else
 855    ngfft(1:3)=2
 856  end if
 857 
 858 !Enlarge the initial guess until the set of ngfft entirely comprises the sphere
 859  do
 860 
 861    call bound(dsqmax,dsqmin,gbound,gmet,kpt,ngfft,plane)
 862 
 863    ! Exit the infinite do-loop if the sphere is inside the FFT box
 864    if (dsqmin>=(half*boxcutmin**2*ecut/pi**2)) exit
 865 
 866    ! Fix nearest boundary
 867    do ii=1,msrch(plane)-1
 868      if (srch(ii,plane)>=ngfft(plane)) then
 869 !      redefine ngfft(plane) to next higher choice
 870        ngfft(plane)=srch(ii+1,plane)
 871        exit ! Exit the loop over ii
 872      end if
 873 
 874      if (ii==msrch(plane)-1)then
 875        ! Here, we are in trouble
 876        write(msg, '(a,i12,5a)' ) &
 877         'ngfft is bigger than allowed value =',ngfft(plane),'.',ch10,&
 878         'This indicates that desired ngfft is larger than getng',ch10,&
 879         'can handle. The code has to be changed and compiled.'
 880        ABI_BUG(msg)
 881      end if
 882    end do
 883 
 884  end do ! End of the infinite do-loop : will either "exit", or abort
 885 
 886 !ecutmx=maximum ecut consistent with chosen ngfft
 887  ecutmx=0.5_dp*pi**2*dsqmin
 888 
 889 !Print results
 890  write(msg, '(a,1p,e14.6,a,3i8,a,a,e14.6)' ) &
 891   ' For input ecut=',ecut,' best grid ngfft=',ngfft(1:3),ch10,&
 892   '       max ecut=',ecutmx
 893  call wrtout(ount,msg)
 894 
 895 ! The FFT grid is compatible with the symmetries if for each
 896 ! symmetry isym, each ii and each jj, the quantity
 897 ! (ngfft(jj)*symrel(jj,ii,isym))/ngfft(ii) is an integer.
 898 ! This relation is immediately verified for diagonal elements, since
 899 ! symrel is an integer. It is also verified if symrel(ii,jj,isym) is zero.
 900 ! Moreover, to cope with the non-symmorphic translation vectors, at least the
 901 ! origin must be sent to a point of the FFT grid. Hence, the non-symmorphic
 902 ! translations tnons(i) multiplied by ngfft(i) must be an integer.
 903 ! The latter condition is however imposed only when chksymtnons=3.
 904 ! Indeed, ABINIT will be able to reimpose the symmetry at the level of the density and potential.
 905 ! This might be a problem for GW calculations, though ...
 906 
 907 !Compute the biggest (positive) common divisor of each off-diagonal element of the symmetry matrices
 908  divisor(:,:)=0; tobechecked=0
 909 
 910  do ii=1,3
 911    do jj=1,3
 912      if(jj==ii)cycle
 913      do isym=1,nsym
 914        if(symrel(jj,ii,isym)==0 .or. divisor(jj,ii)==1 )cycle
 915        tobechecked=1
 916        element=abs(symrel(jj,ii,isym))
 917        testdiv= ( divisor(jj,ii)==0 .or. divisor(jj,ii)==element .or. element==1)
 918        if(testdiv)then
 919          divisor(jj,ii)=element
 920        else
 921 !        Must evaluate common divisor between non-trivial numbers
 922          do
 923            if(divisor(jj,ii)<element)element=element-divisor(jj,ii)
 924            if(divisor(jj,ii)>element)divisor(jj,ii)=divisor(jj,ii)-element
 925            if(divisor(jj,ii)==element)exit
 926          end do
 927        end if
 928      end do
 929    end do
 930  end do
 931 
 932 !Check whether there is a problem: the grid must be invariant
 933 !with respect to point symmetry operations, and spatial symmetry operations if chksymtnons==3
 934  testok=1
 935  if(tobechecked==1)then
 936    do ii=1,3
 937      do jj=1,3
 938        xx=divisor(jj,ii)*ngfft(jj)
 939        yy=xx/ngfft(ii)
 940        if(abs(yy-nint(yy))>tol8)testok=0
 941      end do
 942      if(chksymtnons==3)then
 943        do isym=1,nsym
 944          tnscaled=tnons(ii,isym)*ngfft(ii)
 945          if(abs(tnscaled-nint(tnscaled))>tol8)testok=0
 946        enddo
 947      endif
 948    end do
 949  end if
 950 
 951 !DEBUG
 952 !write(std_out,*)' m_fftcore/getng : chksymtnons,testok,nproc_fft=',chksymtnons,testok,nproc_fft
 953 !ENDDEBUG
 954 
 955 !If there is a problem
 956  if(testok==0)then
 957 !  Find the powers of 2, 3, 5 (possibly 7 and 11) that are needed or will be enough in ngfft,
 958 !  taking both the constraint on nproc_nfft and the constraint on tnons.
 959 !  First decompose nproc_fft, providing divisors of ngfft(2:3)
 960    nn=nproc_fft
 961    npower(:,:)=0
 962    do ipower=1,mpower
 963      valpow=power(ipower)
 964      do while (mod(nn,valpow)==0)
 965        nn=nn/valpow
 966        npower(3,ipower)=npower(3,ipower)+1
 967      end do
 968      if(nn/=1)then
 969        ABI_ERROR(sjoin("nproc_fft: ", itoa(nn), "is not a multiple of 2, 3, 5, 7 or 11"))
 970      endif
 971    enddo
 972    npower(2,:)=npower(3,:)
 973 
 974 !  Then examine tnons
 975    if(chksymtnons==3)then
 976      do ii=1,3
 977        do ifactor=1,nfactor
 978          testok=1
 979          do isym=1,nsym
 980            tnscaled=factor(ifactor)*tnons(ii,isym)
 981            if(abs(tnscaled-nint(tnscaled))>tol8)testok=0
 982          enddo
 983          if(testok==1)exit
 984        enddo
 985        if(testok==1)then
 986          if(ifactor/=1)then
 987            if(ifactor==2)npower(ii,1)=max(npower(ii,1),1)  ! At least one power of 2
 988            if(ifactor==3)npower(ii,2)=max(npower(ii,2),1)  ! At least one power of 3
 989            if(ifactor==4)npower(ii,1)=max(npower(ii,1),2)  ! At least two powers of 2
 990            if(ifactor==5)npower(ii,3)=max(npower(ii,3),1)  ! At least one power of 5
 991            if(ifactor==6)then
 992              npower(ii,1)=max(npower(ii,1),1)  ! At least one power of 2
 993              npower(ii,2)=max(npower(ii,2),1)  ! At least one power of 3
 994            endif
 995            if(ifactor==7)npower(ii,1)=max(npower(ii,1),3)  ! At least three powers of 2
 996            if(ifactor==8)npower(ii,2)=max(npower(ii,2),2)  ! At least two powers of 3
 997            if(ifactor==9)then
 998              npower(ii,1)=max(npower(ii,1),1)  ! At least one power of 2
 999              npower(ii,3)=max(npower(ii,3),1)  ! At least one power of 5
1000            end if
1001            if(ifactor==10)then
1002              npower(ii,1)=max(npower(ii,1),2)  ! At least two powers of 2
1003              npower(ii,2)=max(npower(ii,2),1)  ! At least one power of 3
1004            end if
1005          endif
1006        else
1007          write(msg, '(5a,i12,2a,9i12,2a,3f10.7,2a)' ) &
1008           'Chksymtnons=1 . Found potentially symmetry-breaking value of tnons, ', ch10,&
1009 &         '   which is neither a rational fraction in 1/8th nor in 1/12th (1/9th and 1/10th are tolerated also) :', ch10,&
1010 &         '   for the symmetry number ',isym,ch10,&
1011 &         '   symrel is ',symrel(1:3,1:3,isym),ch10,&
1012 &         '   tnons is ',tnons(1:3,isym),ch10,&
1013           'This problem should have been caught earlier.'
1014          ABI_BUG(msg)
1015        endif
1016      enddo ! ii
1017    endif ! chksymtnons
1018 
1019 !  Get minimal search indices, simply those of the current ngfft
1020    do ii=1,3
1021      do isrch=1,msrch(ii)
1022        index=srch(isrch,ii)
1023        if(index==ngfft(ii))imin(ii)=isrch
1024      end do
1025    end do
1026 
1027 !  Get maximal search indices : the ngtrial values must be identical (to fulfill the constraint induced by the
1028 !  off diagonal elements of symrel), but also must contain sufficient powers of basic primes (2, 3, 5, 7, 11),
1029 !  and be bigger than all current ngfft components. This should guarantee that such a triplet fulfills all constraints.
1030 !  Determine the divisor of allowed ngmax
1031    ngdiv=1
1032    do ipower=1,mpower
1033      ngdiv=ngdiv*power(ipower)**(maxval(npower(:,ipower)))
1034    enddo
1035    ngmax(1)=ngdiv*(maxval(ngfft(1:3)-1)/ngdiv+1)
1036    ngmax(1:3)=ngmax(1)
1037    do ii=1,3
1038      do isrch=1,msrch(ii)
1039        index=srch(isrch,ii)
1040        if(mod(index,ngdiv)==0 .and. index>=ngmax(ii))then
1041          imax(ii)=isrch
1042          ngmax(ii)=index
1043          exit
1044        endif
1045      end do
1046    end do
1047 !  This gives a tentative symetric triplet
1048    ngcurrent(1:3)=ngmax(1:3)
1049    prodcurrent=ngmax(1)*ngmax(2)*ngmax(3)+1.0d-3
1050 !  However, it is perhaps possible to do better, by assymetric triplets, still giving lower prodcurrent !
1051    ngmax(1)=min(int(prodcurrent/(ngfft(2)*ngfft(3))),srch(msrch(1),1))
1052    ngmax(2)=min(int(prodcurrent/(ngfft(1)*ngfft(3))),srch(msrch(2),2))
1053    ngmax(3)=min(int(prodcurrent/(ngfft(1)*ngfft(2))),srch(msrch(3),3))
1054    do ii=1,3
1055      do isrch=1,msrch(ii)
1056        index=srch(isrch,ii)
1057 !      One cannot suppose that ngmax belongs to the allowed list,
1058 !      so must use <= instead of == , to determine largest index
1059        if(index<=ngmax(ii))imax(ii)=isrch
1060      end do
1061    end do
1062 
1063 !DEBUG
1064 !write(std_out,*)' ngmin(1:3)=',srch(imin(1),1),srch(imin(2),2),srch(imin(3),3)
1065 !write(std_out,*)' ngmax(1:3)=',ngmax(1:3)
1066 !ENDDEBUG
1067 
1068    ngcurrent(1:3)=ngmax(1:3)
1069    prodcurrent=ngmax(1)*ngmax(2)*ngmax(3)+1.0d-3
1070 
1071 !  Now, start brute force search
1072    do isrch1=imin(1),imax(1)
1073      ngtrial(1)=srch(isrch1,1)
1074      do isrch2=imin(2),imax(2)
1075        ngtrial(2)=srch(isrch2,2)
1076        do isrch3=imin(3),imax(3)
1077          ngtrial(3)=srch(isrch3,3)
1078          prodtrial=real(ngtrial(1))*real(ngtrial(2))*real(ngtrial(3))+1.0d-3
1079          if(prodtrial>prodcurrent-1.0d-4)exit
1080 !        The trial product is lower or equal to the current product,
1081 !        so now, checks whether the symmetry constraints are OK
1082          testok=1
1083          do ii=1,3
1084            do jj=1,3
1085              xx=divisor(jj,ii)*ngtrial(jj)
1086              yy=xx/ngtrial(ii)
1087              if(abs(yy-nint(yy))>tol8)testok=0
1088            end do
1089            if(chksymtnons==3)then
1090              do isym=1,nsym
1091                tnscaled=tnons(ii,isym)*ngtrial(ii)
1092                if(abs(tnscaled-nint(tnscaled))>tol8)testok=0
1093              enddo
1094            endif
1095          end do
1096 !        DEBUG
1097 !        write(ount,'(a,3i6,a,i3,a,es16.6)' )' getng : current trial triplet',ngtrial(1:3),&
1098 !        &     ' testok=',testok,' prodtrial=',prodtrial
1099 !        ENDDEBUG
1100          if(testok==0)cycle
1101 !        When one arrives here, the symmetry constraints are fulfilled, so update current values
1102 !        Then continues the search, in hope of a better value.
1103          ngcurrent(1:3)=ngtrial(1:3)
1104          prodcurrent=prodtrial
1105        end do
1106      end do
1107    end do
1108 
1109    ngfft(1:3)=ngcurrent(1:3)
1110    call bound(dsqmax,dsqmin,gbound,gmet,kpt,ngfft,plane)
1111 !  ecutmx=maximum ecut consistent with chosen ngfft
1112    ecutmx=0.5_dp*pi**2*dsqmin
1113 !  Give results
1114    write(msg, '(a,3i8,a,a,e14.6)' ) &
1115     ' However, must be changed due to symmetry =>',ngfft(1:3),ch10,&
1116     '       with max ecut=',ecutmx
1117    call wrtout(ount,msg)
1118 
1119    if (prodcurrent>huge(ii)) then
1120      write(msg, '(5a)' )&
1121       'The best FFT grid will lead to indices larger',ch10,&
1122       'than the largest representable integer on this machine.',ch10,&
1123       'Action: try to deal with smaller problems. Also contact ABINIT group.'
1124      ABI_ERROR(msg)
1125    end if
1126 
1127  end if ! testok==0
1128 
1129 !Possibly use the input values of ngfft
1130  if (int( dble(ngsav(1)) / minbox ) >= ngfft(1) .and.&
1131      int( dble(ngsav(2)) / minbox ) >= ngfft(2) .and.&
1132      int( dble(ngsav(3)) / minbox ) >= ngfft(3) ) then
1133 
1134 !  Must check whether the values are in the allowed list
1135    testok=0
1136    do mu=1,3
1137      do ii=1,msrch(mu)
1138        if(srch(ii,mu)==ngsav(mu))then
1139          testok=testok+1
1140          exit
1141        end if
1142      end do
1143    end do
1144    if(testok==3)then
1145      write(msg,'(a,3(a,i1,a,i3),a)') ' input values of',&
1146       (' ngfft(',mu,') =',ngsav(mu),mu=1,3),' are alright and will be used'
1147      call wrtout(ount,msg)
1148      do mu = 1,3
1149        ngfft(mu) = ngsav(mu)
1150      end do
1151    end if
1152 
1153  end if
1154 
1155 !mgfft needs to be set to the maximum of ngfft(1),ngfft(2),ngfft(3)
1156  mgfft = maxval(ngfft(1:3))
1157 
1158  if (paral_fft_==1) then
1159    ! For the time being, one need ngfft(2) and ngfft(3) to be multiple of nproc_fft
1160    if(modulo(ngfft(2),nproc_fft)/=0)then
1161      write(msg,'(4a,i5,a,i5)')&
1162       'The second dimension of the FFT grid, ngfft(2), should be ',&
1163       'a multiple of the number of processors for the FFT, nproc_fft.',ch10,&
1164       'However, ngfft(2)=',ngfft(2),' and nproc_fft=',nproc_fft
1165      ABI_BUG(msg)
1166    end if
1167    if(modulo(ngfft(3),nproc_fft)/=0)then
1168      write(msg,'(4a,i5,a,i5)')&
1169       'The third dimension of the FFT grid, ngfft(3), should be ',&
1170       'a multiple of the number of processors for the FFT, nproc_fft.',ch10,&
1171       'However, ngfft(3)=',ngfft(3),' and nproc_fft=',nproc_fft
1172      ABI_BUG(msg)
1173    end if
1174 
1175  else if (paral_fft_/=0) then
1176    write(msg,'(a,i0)')'paral_fft_ should be 0 or 1, but its value is ',paral_fft_
1177    ABI_BUG(msg)
1178  end if
1179 
1180 ! Compute effective number of FFT points (for this MPI node if parall FFT)
1181  nfft=product(ngfft(1:3))/max(1,nproc_fft)
1182 
1183 !Set up fft array dimensions ngfft(4,5,6) to avoid cache conflicts
1184  ngfft(4)=2*(ngfft(1)/2)+1
1185  ngfft(5)=2*(ngfft(2)/2)+1
1186  ngfft(6)=ngfft(3)
1187  if (any(fftalga == [FFT_FFTW3, FFT_DFTI])) then
1188    ! FFTW3 supports leading dimensions but at the price of a larger number of FFTs
1189    ! to be executed along z when the zero-padded version is used.
1190    ! One should check whether the augmentation is beneficial for FFTW3.
1191    ngfft(4)=2*(ngfft(1)/2)+1
1192    ngfft(5)=2*(ngfft(2)/2)+1
1193    !ngfft(4)=ngfft(1)
1194    !ngfft(5)=ngfft(2)
1195    ngfft(6)=ngfft(3)
1196  end if
1197 
1198  if (present(gpu_option)) then
1199    if (gpu_option/=ABI_GPU_DISABLED) then
1200      ngfft(4)=ngfft(1)
1201      ngfft(5)=ngfft(2)
1202      ngfft(6)=ngfft(3)
1203    end if
1204  end if
1205 
1206  ngfft(14:18)=0 ! ngfft(14) to be filled outside of getng
1207 
1208  if (paral_fft_==0) then
1209    ngfft(9)=0     ! paral_fft_
1210    ngfft(10)=1    ! nproc_fft
1211    ngfft(11)=0    ! me_fft
1212    ngfft(12)=0    ! n2proc
1213    ngfft(13)=0    ! n3proc
1214  else
1215    ngfft(9)=1     ! paral_fft_
1216    ngfft(10)=nproc_fft
1217    ngfft(11)=me_fft
1218    ngfft(12)=ngfft(2)/nproc_fft
1219    ngfft(13)=ngfft(3)/nproc_fft
1220  end if
1221 
1222  call print_ngfft(ngfft,header="FFT mesh",unit=ount,mode_paral="COLL")
1223 
1224 end subroutine getng

m_fftcore/indfftrisc [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 indfftrisc

FUNCTION

 Take the data for sphere boundary and list of planewave in sphere (kg_k), manipulate them
 for convenient use in fourwf, and output them in indpw_k

INPUTS

 gbound(2*mgfft+4)=sphere boundary data
 kg_k(3,npw_k)=reduced planewave coordinates
 mgfft=maximum size of 1D FFTs
 ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
 npw_k=number of G vectors in basis at this k point

OUTPUT

 indpw_k(4,npw_k)=array which gives fft box index for given basis sphere
   in a representation that is directly usable by sg_fftrisc.f
 ngb=number of FFTs along z

SOURCE

3820 subroutine indfftrisc(gbound,indpw_k,kg_k,mgfft,ngb,ngfft,npw_k)
3821 
3822 !Arguments ------------------------------------
3823 !scalars
3824  integer,intent(in) :: mgfft,npw_k
3825  integer,intent(out) :: ngb
3826 !arrays
3827  integer,intent(in) :: gbound(2*mgfft+4),kg_k(3,npw_k),ngfft(18)
3828  integer,intent(out) :: indpw_k(4,npw_k)
3829 
3830 !Local variables-------------------------------
3831 !scalars
3832  integer :: g1,g2,i1,i2,i3,igb,index,ipw,n1,n2,n3
3833 !arrays
3834  integer,allocatable :: index2d(:,:)
3835 
3836 ! *************************************************************************
3837 
3838  n1=ngfft(1) ; n2=ngfft(2) ; n3=ngfft(3)
3839 
3840 !First, generate a 2d index for each column of data
3841  ABI_MALLOC(index2d,(n1,n2))
3842  index2d(:,:)=0
3843  index=1
3844  igb=3
3845  do g2=0,gbound(2) ! g2max
3846    do g1=0,gbound(igb+1)  ! g1max
3847      index2d(g1+1,g2+1)=index
3848      index=index+1
3849    end do
3850    if(gbound(igb)<=-1)then ! g1min
3851      do g1=gbound(igb)+n1,n1-1
3852        index2d(g1+1,g2+1)=index
3853        index=index+1
3854      end do
3855    end if
3856    igb=igb+2
3857  end do
3858 
3859  if(gbound(1)<=-1)then ! g2min
3860    do g2=gbound(1)+n2,n2-1
3861      do g1=0,gbound(igb+1)
3862        index2d(g1+1,g2+1)=index
3863        index=index+1
3864      end do
3865      if(gbound(igb)<=-1)then
3866        do g1=gbound(igb)+n1,n1-1
3867          index2d(g1+1,g2+1)=index
3868          index=index+1
3869        end do
3870      end if
3871      igb=igb+2
3872    end do
3873  end if
3874 
3875  ngb=index-1
3876 
3877 
3878 !The 2d index has been generated
3879 !Now, contract indpw_k(1,ipw) and indpw_k(2,ipw) into indpw_k(4,ipw)
3880 !indpw_k(1,ipw) and indpw_k(2,ipw) are used to hold inverse of index2d,
3881 !and for them, the second index does not fill 1:npw . It is only
3882 !the number of z-transform FFTs.
3883 
3884 !$OMP PARALLEL DO PRIVATE(i1,i2,i3)
3885  do ipw=1,npw_k
3886    i1=kg_k(1,ipw); if(i1<0)i1=i1+n1 ; i1=i1+1
3887    i2=kg_k(2,ipw); if(i2<0)i2=i2+n2 ; i2=i2+1
3888    i3=kg_k(3,ipw); if(i3<0)i3=i3+n3 ; i3=i3+1
3889    indpw_k(4,ipw)=index2d(i1,i2)
3890    indpw_k(3,ipw)=i3
3891  end do
3892 
3893  do i1=1,n1
3894    do i2=1,n2
3895      index=index2d(i1,i2)
3896      if(index/=0)then
3897        indpw_k(1,index)=i1
3898        indpw_k(2,index)=i2
3899      end if
3900    end do
3901  end do
3902 
3903  ABI_FREE(index2d)
3904 
3905 end subroutine indfftrisc

m_fftcore/kgindex [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 kgindex

FUNCTION

 Compute the index of each plane wave on a FFT grid.

INPUTS

  kg_k(3,npw_k)=dimensionless coords of G vecs (integer)
  mpi_enreg=information about MPI parallelization
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  npw_k=number of planewaves

OUTPUT

  indpw_k(npw_k)=linear list number (in fft box) of given G vector for the current processor (local adress)
                =0 if kg_k(ipw) is not treated by this procesor
  mask(npw_k)=True if  kg_k(ipw) belongs to this procesor, false otherwise.

NOTES

   mpi_enreg is not necessary in this case (the info is also in ngfft), but much more easy to read...

SOURCE

4510 subroutine kgindex(indpw_k, kg_k, mask, mpi_enreg, ngfft, npw_k)
4511 
4512 !Arguments ------------------------------------
4513 !scalars
4514  integer,intent(in) :: npw_k
4515  type(MPI_type),intent(in) :: mpi_enreg
4516 !arrays
4517  integer,intent(in) :: kg_k(3,npw_k),ngfft(18)
4518  integer,intent(out) :: indpw_k(npw_k)
4519  logical,intent(out) :: mask(npw_k)
4520 !Local variables-------------------------------
4521 !scalars
4522  integer :: ig,ig1,ig2,ig3,me_fft,n1,n2,n3,nd2
4523  character(len=500) :: msg
4524  !arrays
4525  integer, ABI_CONTIGUOUS pointer :: fftn2_distrib(:),ffti2_local(:)
4526  !integer, ABI_CONTIGUOUS pointer :: fftn3_distrib(:),ffti3_local(:)
4527 
4528 ! *************************************************************************
4529 
4530  n1=ngfft(1); n2=ngfft(2); n3=ngfft(3)
4531 
4532  ! Use the following indexing (N means ngfft of the adequate direction)
4533  ! 0 1 2 3 ... N/2    -(N-1)/2 ... -1    <= kg
4534  ! 1 2 3 4 ....N/2+1  N/2+2    ...  N    <= index
4535 
4536  me_fft=mpi_enreg%me_fft
4537  nd2=(n2-1)/mpi_enreg%nproc_fft+1
4538 
4539  if (n2== mpi_enreg%distribfft%n2_coarse) then
4540    fftn2_distrib => mpi_enreg%distribfft%tab_fftdp2_distrib
4541    ffti2_local => mpi_enreg%distribfft%tab_fftdp2_local
4542  else if (n2 == mpi_enreg%distribfft%n2_fine) then
4543    fftn2_distrib => mpi_enreg%distribfft%tab_fftdp2dg_distrib
4544    ffti2_local => mpi_enreg%distribfft%tab_fftdp2dg_local
4545  else
4546    ABI_BUG("Unable to find an allocated distrib for this fft grid")
4547  end if
4548 
4549  !call ptabs_fourwf(mpi_enreg,n2,n3,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local)
4550 
4551  do ig=1,npw_k
4552    ig1=modulo(kg_k(1,ig),n1)
4553    ig2=modulo(kg_k(2,ig),n2)
4554    ig3=modulo(kg_k(3,ig),n3)
4555    if(me_fft==fftn2_distrib(ig2+1)) then
4556      ig2=ffti2_local(ig2+1) - 1
4557      indpw_k(ig)=ig1+1+n1*(ig2+nd2*ig3)
4558      mask(ig)=.true.
4559    else
4560      indpw_k(ig)=0
4561      mask(ig)=.false.
4562    end if
4563    if (any(kg_k(:,ig) > ngfft(1:3)/2) .or. any(kg_k(:,ig) < -(ngfft(1:3)-1)/2) ) then
4564      write(msg,'(a,3(i0,1x),a)')" The G-vector: ",kg_k(:, ig)," falls outside the FFT box. Increase boxcutmin (?)"
4565      ABI_ERROR(msg)
4566    end if
4567  end do
4568 
4569 end subroutine kgindex

m_fftcore/kpgcount [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 kpgcount

FUNCTION

  Give the minimum and maximum number of G vectors in each direction:
  for each k-point, compute the number of G vectors in each direction,
  then store the min and max value over the set of k-points.

INPUTS

  ecut=planewave kinetic energy cutoff (hartrees)
  exchn2n3d=if 1, n2 and n3 are exchanged
  gmet(3,3)=reciprocal space metric (bohr^-2)
  istwfk(nkpt)=option parameter that describes the storage of wfs
  kpt(3,nkpt)=reduced coords of k point (in terms of recip latt vecs)
  nkpt=number of k points

OUTPUT

  ngmax(3)=maximum number of G vectors in each direction (x,y,z)
  ngmin(3)=minimum number of G vectors in each direction (x,y,z)

NOTES

  This routine has been extracted from kpgsph...

SOURCE

4329 subroutine kpgcount(ecut,exchn2n3d,gmet,istwfk,kpt,ngmax,ngmin,nkpt)
4330 
4331 
4332 !Arguments ------------------------------------
4333 !scalars
4334  integer,intent(in) :: exchn2n3d,nkpt
4335  integer,intent(out) :: ngmax(3),ngmin(3)
4336  real(dp),intent(in) :: ecut
4337 !arrays
4338  integer,intent(in) :: istwfk(nkpt)
4339  real(dp),intent(in) :: gmet(3,3),kpt(3,nkpt)
4340 
4341 !Local variables-------------------------------
4342 !scalars
4343  integer :: ii,ikpt,istwf_k,kmax,ng1,ng2,ng3,nmin
4344  real(dp) :: gmet_trace,gscut,xx
4345 !arrays
4346  integer :: ngrid(3),nmax(3)
4347  real(dp) :: minor(3),numer(3)
4348 
4349 ! *************************************************************************
4350 
4351  DBG_ENTER("COLL")
4352 
4353  gscut=0.5_dp*ecut*piinv**2
4354  gmet_trace=gmet(1,1)+gmet(2,2)+gmet(3,3)
4355  minor(1)=gmet(2,2)*gmet(3,3)-gmet(2,3)**2
4356  minor(2)=gmet(1,1)*gmet(3,3)-gmet(1,3)**2
4357  minor(3)=gmet(2,2)*gmet(1,1)-gmet(1,2)**2
4358  numer(1)=gmet(1,2)**2*gmet(3,3)-2.0_dp*gmet(1,2)*gmet(1,3)*gmet(2,3)+gmet(1,3)**2*gmet(2,2)
4359  numer(2)=gmet(2,3)**2*gmet(1,1)-2.0_dp*gmet(1,2)*gmet(1,3)*gmet(2,3)+gmet(2,1)**2*gmet(3,3)
4360  numer(3)=gmet(3,2)**2*gmet(1,1)-2.0_dp*gmet(1,2)*gmet(1,3)*gmet(2,3)+gmet(1,3)**2*gmet(2,2)
4361 
4362  ngmin(:)=1000000;ngmax(:)=0
4363  do ikpt=1,nkpt
4364    istwf_k=istwfk(ikpt)
4365 
4366    do ii=1,3
4367      xx=gmet(ii,ii)*minor(ii)-numer(ii)
4368      if(xx<tol10*gmet_trace**3.or.minor(ii)<tol10*gmet_trace**2)then
4369        ABI_BUG('The metric tensor seem incorrect')
4370      end if
4371      kmax=sqrt(gscut*minor(ii)/xx)
4372      nmax(ii)=floor(kmax-kpt(ii,ikpt)+tol10)
4373      nmin=ceiling(-kmax-kpt(ii,ikpt)-tol10)
4374      ngrid(ii)=nmax(ii)-nmin+1
4375    end do
4376 
4377    ng1=ngrid(1);if(istwf_k==2.or.istwf_k==3) ng1=nmax(1)+1
4378    if(exchn2n3d==0)then
4379      ng3=ngrid(3)
4380      ng2=ngrid(2);if(istwf_k>=2) ng2=nmax(2)+1
4381      if(istwf_k>=2.and.istwf_k<=5) ng2=ng2-1
4382    else
4383      ng2=ngrid(2)
4384      ng3=ngrid(3);if(istwf_k>=2) ng3=nmax(3)+1
4385      if(istwf_k==2.or.istwf_k==3.or.istwf_k==6.or.istwf_k==7) ng3=ng3-1
4386    end if
4387 
4388    if(ng1<ngmin(1)) ngmin(1)=ng1
4389    if(ng2<ngmin(2)) ngmin(2)=ng2
4390    if(ng3<ngmin(3)) ngmin(3)=ng3
4391    if(ng1>ngmax(1)) ngmax(1)=ng1
4392    if(ng2>ngmax(2)) ngmax(2)=ng2
4393    if(ng3>ngmax(3)) ngmax(3)=ng3
4394 
4395  end do
4396 
4397  DBG_EXIT("COLL")
4398 
4399 end subroutine kpgcount

m_fftcore/kpgsph [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 kpgsph

FUNCTION

 Use reciprocal space metric gmet(3,3) to set up the list
 of G vectors inside a sphere out to $ (1/2)*(2*\pi*(k+G))^2=ecut $.
 If mkmem=0 and mpw=0, then only count the number of planewaves

INPUTS

  ecut=planewave kinetic energy cutoff (hartrees)
  exchn2n3d=if 1, n2 and n3 are exchanged
  gmet(3,3)=reciprocal space metric (bohr^-2)
  ikg=shift to be given to the location of the output data in the array kg
  ikpt=number of the k-point
  istwf_k=option parameter that describes the storage of wfs
  kpt(3)=reduced coords of k point (in terms of recip latt vecs)
  mkmem =maximum number of k points which can fit in core memory
  mpi_enreg=information about MPI parallelization
  mpw=maximum number of planewaves as dimensioned in calling routine

OUTPUT

  kg(3,mpw*mkmem)=dimensionless coords of resulting G vecs (integer)
  npw=resulting number of planewaves inside ecut centered at kpt

SIDE EFFECTS

  mpi_enreg
    %me_g0=if 1, the plane wave G(0 0 0) is in the set of plane waves (and is the first)
    TODO: other SIDE EFFECTS on mpi_enreg should be described !!!

NOTES

  Must take into account the time-reversal symmetry when istwf_k is not 1.

SOURCE

3946 subroutine kpgsph(ecut,exchn2n3d,gmet,ikg,ikpt,istwf_k,kg,kpt,mkmem,mpi_enreg,mpw,npw)
3947 
3948 !Arguments ------------------------------------
3949 !scalars
3950  integer,intent(in) :: exchn2n3d,ikg,ikpt,istwf_k,mkmem,mpw
3951  integer,intent(out) :: npw
3952  real(dp),intent(in) :: ecut
3953  type(MPI_type),intent(inout) :: mpi_enreg
3954 !arrays
3955  integer,intent(inout) :: kg(3,mpw*mkmem)
3956  real(dp),intent(in) :: gmet(3,3),kpt(3)
3957 
3958 !Local variables-------------------------------
3959 !scalars
3960  integer :: i1,ig,ig1p,ig1pmax,ig2,ig2p,ig2pmax,ig2pmin,ig3,ig3p,ig3pmax
3961  integer :: ig3pmin,igtot,ii,ikpt_this_proc,in,ind,np_band,np_fft,npw_before,npw_remain,npw_split
3962  integer, save :: alloc_size=0
3963  real(dp) :: gap_pw,gmet11,gmet_trace,gmin,gs_fact,gs_part,gscut,v1,v2,v3,xx
3964  logical :: ipw_ok
3965  character(len=500) :: msg
3966 !arrays
3967  integer :: ngrid(3),nmax(3),nmin(3),n2,ierr
3968  integer,allocatable :: array_ipw(:),ig1arr(:),ig2arr(:)
3969  integer,allocatable :: ig3arr(:),kg_ind(:),kg_small(:,:)
3970  integer, allocatable :: npw_gather(:),npw_disp(:) ,kg_ind_gather(:),kg_small_gather(:,:)
3971  real(dp) :: kmax(3),minor(3),numer(3),tsec(2)
3972  real(dp),allocatable :: kg1arr(:),kg2arr(:),kg3arr(:)
3973 
3974 ! *************************************************************************
3975 
3976  DBG_ENTER("COLL")
3977 
3978  call timab(23,1,tsec)
3979  if(istwf_k<1 .or. istwf_k>9)then
3980    write(msg,'(3a,i0,a)' )&
3981     'The variable istwf_k must be between 1 and 9, while',ch10,&
3982     'the argument of the routine istwf_k =',istwf_k,'.'
3983    ABI_BUG(msg)
3984  end if
3985 
3986  if(ikg+mpw>mkmem*mpw)then
3987    write(msg,'(5a,i0,a,i0,a,i0,4a)')&
3988     'The variables ikg, mkmem, and mpw  must satisfy ikg<=(mkmem-1)*mpw,',ch10,&
3989     'while the arguments of the routine are',ch10,&
3990     'ikg =',ikg,', mkmem =',mkmem,', mpw =',mpw,ch10,&
3991     'Probable cause: Known error in invars1 for parallel spin-polarized case.',ch10,&
3992     'Temporary solution: Change the number of parallel processes.'
3993    ABI_BUG(msg)
3994  end if
3995 
3996  np_band=0
3997  if (mpw>0) then
3998    np_band=1; if(mpi_enreg%paral_kgb==1) np_band=max(1,mpi_enreg%nproc_band)
3999    alloc_size=max(alloc_size,(mpw+1)*np_band)
4000    ABI_MALLOC(kg_small,(3, alloc_size))
4001    ABI_MALLOC(kg_ind,(alloc_size))
4002    kg_ind(:)=0
4003  end if
4004 
4005 !A larger array, that will be split on the correct processor
4006 !G**2 cutoff, gscut=Ecut/2 /Pi^2
4007 
4008  gscut=0.5_dp*ecut*piinv**2
4009 
4010 !In reduced coordinates, determine maximal value of k+G and G for each direction
4011 
4012  minor(1)=gmet(2,2)*gmet(3,3)-gmet(2,3)**2
4013  numer(1)=gmet(1,2)**2*gmet(3,3)-2.0_dp*gmet(1,2)*gmet(1,3)*gmet(2,3) +gmet(1,3)**2*gmet(2,2)
4014  minor(2)=gmet(1,1)*gmet(3,3)-gmet(1,3)**2
4015  numer(2)=gmet(2,3)**2*gmet(1,1)-2.0_dp*gmet(1,2)*gmet(1,3)*gmet(2,3) +gmet(2,1)**2*gmet(3,3)
4016  minor(3)=gmet(2,2)*gmet(1,1)-gmet(1,2)**2
4017  numer(3)=gmet(3,2)**2*gmet(1,1)-2.0_dp*gmet(1,2)*gmet(1,3)*gmet(2,3) +gmet(1,3)**2*gmet(2,2)
4018 
4019 !Take the trace of the gmet tensor as dimensional reference
4020  gmet_trace=gmet(1,1)+gmet(2,2)+gmet(3,3)
4021 
4022  do ii=1,3
4023    xx=gmet(ii,ii)*minor(ii)-numer(ii)
4024    if(xx<tol10*gmet_trace**3 .or. minor(ii)<tol10*gmet_trace**2)then
4025      ABI_BUG('The metric tensor seem incorrect')
4026    end if
4027    kmax(ii)=sqrt(gscut*minor(ii)/xx)
4028    nmax(ii)=floor(kmax(ii)-kpt(ii)+tol10)
4029    nmin(ii)=ceiling(-kmax(ii)-kpt(ii)-tol10)
4030    ngrid(ii)=nmax(ii)-nmin(ii)+1
4031  end do
4032 !perform looping over fft box grid of size ngfft(1)*ngfft(2)*ngfft(3):
4033  ig=0;ind=0
4034  in=0
4035  gmet11=gmet(1,1)
4036 
4037 !Set up standard search sequence for grid points, in standard storage mode :
4038 !0 1 2 3 ... nmax nmin ... -1
4039 !If the mode is not standard, then some part of the FFT grid must be selected
4040 !
4041  ABI_MALLOC(ig1arr,(ngrid(1)))
4042  ABI_MALLOC(ig2arr,(ngrid(2)))
4043  ABI_MALLOC(ig3arr,(ngrid(3)))
4044  ABI_MALLOC(kg1arr,(ngrid(1)))
4045  ABI_MALLOC(kg2arr,(ngrid(2)))
4046  ABI_MALLOC(kg3arr,(ngrid(3)))
4047 
4048  do ig1p=1,ngrid(1)
4049    ig1arr(ig1p)=ig1p-1
4050    if (ig1p-1>nmax(1)) ig1arr(ig1p)=ig1p-ngrid(1)-1
4051    kg1arr(ig1p)=kpt(1)+dble(ig1arr(ig1p))
4052  end do
4053 
4054 !For the second direction, the number of points might depend on istwf_k
4055 !---------------------------------------------------------------------
4056  ig2pmax=ngrid(2)
4057  if(istwf_k>=2 .and. exchn2n3d==0) ig2pmax=nmax(2)+1
4058  ABI_MALLOC(array_ipw,(-ig2pmax:ig2pmax))
4059  array_ipw(:)=0
4060  do ig2p=1,ig2pmax
4061    ig2arr(ig2p)=ig2p-1
4062    if (ig2p-1>nmax(2)) ig2arr(ig2p)=ig2p-ngrid(2)-1
4063    kg2arr(ig2p)=kpt(2)+dble(ig2arr(ig2p))
4064  end do
4065 
4066 !For the third direction, the number of points might depend on istwf_k
4067 !---------------------------------------------------------------------
4068  ig3pmax=ngrid(3)
4069  if (istwf_k>=2 .and. exchn2n3d==1) ig3pmax=nmax(3)+1
4070 
4071  do ig3p=1,ig3pmax
4072    ig3arr(ig3p)=ig3p-1
4073    if(ig3p-1>nmax(3)) ig3arr(ig3p)=ig3p-ngrid(3)-1
4074    kg3arr(ig3p)=kpt(3)+dble(ig3arr(ig3p))
4075  end do
4076 
4077 !Performs loop on all grid points.
4078 !---------------------------------------------------------------------
4079  igtot = 0
4080  if(exchn2n3d==0)then
4081    mpi_enreg%me_g0=0
4082    do ig3p=1,ngrid(3)
4083      ig3=ig3arr(ig3p)
4084      v3=kg3arr(ig3p)
4085      ig2pmin=1
4086      if( istwf_k>=2 .and. istwf_k<=5 .and. ig3<0)then
4087        ig2pmin=2
4088      end if
4089 !    ig2pmax was initialized previously
4090      do ig2p=ig2pmin,ig2pmax
4091        ig2=ig2arr(ig2p)
4092 !      PAY ATTENTION : the proc 0 must have me_g0=1
4093        ipw_ok = .true.
4094        if(mpi_enreg%paral_kgb==1 ) then
4095           n2 =mpi_enreg%distribfft%n2_coarse
4096           ipw_ok = ipw_ok.and.(mpi_enreg%me_fft == mpi_enreg%distribfft%tab_fftwf2_distrib( modulo(ig2,n2) + 1))
4097        end if
4098        if (ig2==0 .and. ipw_ok) mpi_enreg%me_g0=1
4099        v2=kg2arr(ig2p)
4100        gs_part=gmet(2,2)*v2*v2+gmet(3,3)*v3*v3+2.0_dp*gmet(2,3)*v2*v3
4101        gs_fact=2.0_dp*(gmet(1,2)*v2+gmet(3,1)*v3)
4102        ig1pmax=ngrid(1)
4103        if( (istwf_k==2.or.istwf_k==3) .and. ig3p==1 .and. ig2p==1)ig1pmax=nmax(1)+1
4104        do ig1p=1,ig1pmax
4105          v1=kg1arr(ig1p)
4106          gmin=gs_part+v1*(gs_fact+v1*gmet11)
4107 !        If inside sphere:
4108          if (gmin<=gscut) then
4109            if (ipw_ok) then
4110              ig=ig+1  ! inside sphere
4111              igtot=igtot+1
4112              if (mpw>0.and.ig<=alloc_size) then
4113 !              Keep coords of pw:
4114                kg_small(1,ig)=ig1arr(ig1p)
4115                kg_small(2,ig)=ig2
4116                kg_small(3,ig)=ig3
4117                kg_ind(ig)=igtot
4118              end if
4119              array_ipw(ig2)=array_ipw(ig2)+1
4120            else
4121              igtot=igtot+1
4122            end if
4123          end if
4124        end do !ig1p
4125      end do !ig2p
4126    end do !ig3p
4127 
4128  else ! if (exchn2n3d/=0)
4129 
4130 !  ig2pmax was initialized previously
4131    mpi_enreg%me_g0=0
4132    do ig2p=1,ngrid(2)
4133      ig2=ig2arr(ig2p)
4134 !    PAY ATTENTION : the proc 0 must have me_g0=1
4135      ipw_ok = .true.
4136      if(mpi_enreg%paral_kgb==1 ) then
4137         n2 =mpi_enreg%distribfft%n2_coarse
4138         ipw_ok = ipw_ok.and.(mpi_enreg%me_fft == mpi_enreg%distribfft%tab_fftwf2_distrib( modulo(ig2,n2) + 1))
4139      end if
4140      if(ig2==0 .and. istwf_k>=2 .and. ipw_ok) mpi_enreg%me_g0=1
4141      v2     =kg2arr(ig2p)
4142      ig3pmin=1
4143      if( (istwf_k==2 .or. istwf_k==3 .or. istwf_k==6 .or. istwf_k==7) .and. ig2<0)then
4144        ig3pmin=2
4145      end if
4146      do ig3p=ig3pmin,ig3pmax
4147        ig3=ig3arr(ig3p)
4148        v3=kg3arr(ig3p)
4149        gs_part=gmet(2,2)*v2*v2+gmet(3,3)*v3*v3+2.0_dp*gmet(2,3)*v2*v3
4150        gs_fact=2.0_dp*(gmet(1,2)*v2+gmet(3,1)*v3)
4151        ig1pmax=ngrid(1)
4152        if( (istwf_k==2.or.istwf_k==3) .and. ig3p==1 .and. ig2p==1)ig1pmax=nmax(1)+1
4153        do ig1p=1,ig1pmax
4154          v1=kg1arr(ig1p)
4155          gmin=gs_part+v1*(gs_fact+v1*gmet11)
4156 !        If inside sphere:
4157          if (gmin<=gscut) then
4158            if (ipw_ok) then
4159              ig=ig+1  ! inside sphere
4160              igtot=igtot+1
4161 !            Make sure not to overrun array, or simply do not store if mpw=0
4162              if (mpw>0.and.ig<=alloc_size) then
4163 !              Keep coords of pw:
4164                kg_small(1,ig)=ig1arr(ig1p)
4165                kg_small(2,ig)=ig2
4166                kg_small(3,ig)=ig3
4167                kg_ind(ig)=igtot
4168              end if
4169            else
4170              igtot=igtot+1
4171            end if
4172          end if
4173 
4174        end do ! ig1p
4175      end do ! ig3p
4176 !    end if ! if the ig2 plane is to be treated by this processor
4177    end do ! ig2p
4178 
4179  end if ! exchn2n3d==0 or ==1
4180 
4181 !Total number of G vectors at this k point is assigned: npw
4182 !when getcell = 1 it can be that ig exceeds mpw, the bound on kp_small
4183 !here is a workaround:
4184  if (mpw*np_band > 0 .and. ig > alloc_size) then
4185    npw = mpw*np_band
4186  else
4187    npw=ig
4188  end if
4189  alloc_size = max(alloc_size,npw)
4190 
4191  ABI_FREE(ig1arr)
4192  ABI_FREE(ig2arr)
4193  ABI_FREE(ig3arr)
4194  ABI_FREE(kg1arr)
4195  ABI_FREE(kg2arr)
4196  ABI_FREE(kg3arr)
4197 
4198 !BandFFT: plane-wave load balancing
4199  if (mpi_enreg%paral_kgb==1.and.mpi_enreg%nproc_fft>1.and. mpi_enreg%pw_unbal_thresh>zero.and. istwf_k==1) then
4200 !  Check for reequilibration
4201    np_fft=max(1,mpi_enreg%nproc_fft)
4202    ABI_MALLOC(npw_gather,(np_fft)) ! Count pw before balancing
4203    call xmpi_allgather(npw,npw_gather,mpi_enreg%comm_fft,ierr)
4204    gap_pw = 100._dp*(maxval(npw_gather(:))-minval(npw_gather))/(1.*sum(npw_gather(:))/np_fft)
4205    write(msg,'(a,f5.2)' ) ' Relative gap for number of plane waves between process (%): ',gap_pw
4206    call wrtout(std_out,msg)
4207    if(gap_pw > mpi_enreg%pw_unbal_thresh) then ! Effective reequilibration
4208      write(msg,'(a,f5.2,a,i4,a,f5.2,a)') &
4209         'Plane-wave unbalancing (',gap_pw,'%) for kpt ',ikpt,' is higher than threshold (',&
4210         mpi_enreg%pw_unbal_thresh,'%); a plane-wave balancing procedure is activated!'
4211      call wrtout(std_out,msg)
4212      !Get optimal number
4213      npw_split=sum(npw_gather(:))
4214      npw=npw_split/np_fft
4215      npw_remain=modulo(npw_split,np_fft)
4216      if(mpi_enreg%me_fft < npw_remain) npw=npw+1
4217      ig=npw
4218      !write(msg,*) 'New npw_fft = ', npw
4219      !call wrtout(std_out,msg)
4220      alloc_size = max(alloc_size,npw)
4221      if(mpw>0 ) then  !Step for requilibration between fft process
4222        ABI_MALLOC(npw_disp,(np_fft))
4223        npw_disp=0
4224        do i1=2,np_fft
4225          npw_disp(i1) = npw_disp(i1-1) + npw_gather(i1-1)
4226        end do
4227        ABI_MALLOC(kg_ind_gather,(npw_split))
4228        ABI_MALLOC(kg_small_gather,(3,npw_split))
4229        call xmpi_allgatherv(kg_ind, npw_gather(mpi_enreg%me_fft+1) , &
4230 &            kg_ind_gather,npw_gather,npw_disp,mpi_enreg%comm_fft,ierr)
4231        call xmpi_allgatherv(kg_small,3*npw_gather(mpi_enreg%me_fft+1), &
4232 &            kg_small_gather,3*npw_gather, 3*npw_disp,mpi_enreg%comm_fft,ierr)
4233        npw_before=mpi_enreg%me_fft*(npw_split/np_fft)+min(npw_remain,mpi_enreg%me_fft)
4234        kg_small(:,1:npw)=kg_small_gather(:,npw_before+1:npw_before+npw)
4235        kg_ind(  1:npw)=kg_ind_gather(npw_before+1:npw_before+npw)
4236 #ifdef DEBUG_MODE
4237        call wrtout(std_out,"keeping values done")
4238 #endif
4239        ABI_FREE(npw_disp)
4240        ABI_FREE(kg_ind_gather)
4241        ABI_FREE(kg_small_gather)
4242      end if
4243    end if!End of reequilibration step for paral KGB
4244    ABI_FREE(npw_gather)
4245  end if
4246 
4247 !BandFFT: band load balancing
4248  if(mpi_enreg%paral_kgb==1.and.mpi_enreg%nproc_band>0) then
4249    np_band=max(1,mpi_enreg%nproc_band)
4250    npw_split=ig;npw=npw_split/np_band
4251    npw_remain=modulo(npw_split,np_band)
4252    if(mpi_enreg%me_band < npw_remain) npw=npw+1
4253    if(mpw > 0) then ! This is for the case when we only compute npw and put mpw=0
4254      npw_before=mpi_enreg%me_band*(npw_split/np_band)+min(npw_remain,mpi_enreg%me_band)
4255      kg_small(:,1:npw)=kg_small(:,npw_before+1:npw_before+npw)
4256      kg_ind  (  1:npw)=kg_ind  (  npw_before+1:npw_before+npw)
4257    end if
4258  end if
4259  if(mpw > 0) then
4260    do i1=1,npw
4261      kg(:,i1+ikg)=kg_small(:,i1)
4262      if (allocated(mpi_enreg%my_kgtab)) then
4263        ikpt_this_proc=mpi_enreg%my_kpttab(ikpt)
4264        mpi_enreg%my_kgtab(i1,ikpt_this_proc) = kg_ind(i1)
4265      end if
4266    end do
4267    ABI_FREE(kg_small)
4268    ABI_FREE(kg_ind)
4269  end if
4270 
4271  ABI_FREE(array_ipw)
4272 
4273 !Take care of the me_g0 flag
4274  mpi_enreg%me_g0_fft=mpi_enreg%me_g0
4275  if(mpi_enreg%paral_kgb==1.and.mpi_enreg%nproc_band>0) then
4276    if(mpi_enreg%me_band==0.and.mpi_enreg%me_g0==1) then
4277 !    In this case, the processors had the 0 G vector before the new distribution, and still keeps it
4278      mpi_enreg%me_g0=1
4279    else
4280 !    All other cases
4281      mpi_enreg%me_g0=0
4282    end if
4283  end if
4284 
4285 !Check that npw is not zero
4286  if(mpi_enreg%paral_kgb==1.and.npw==0) then
4287    write(msg,'(5a)' )&
4288     'Please decrease the number of npband*npfft MPI processes!',ch10,&
4289     'One of the MPI process has no plane-wave to handle.',ch10,&
4290     'Action: decrease npband and/or npfft.'
4291    ABI_ERROR(msg)
4292  endif
4293 
4294  call timab(23,2,tsec)
4295 
4296  DBG_EXIT("COLL")
4297 
4298 end subroutine kpgsph

m_fftcore/mpifft_collect_datar [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  mpifft_collect_datar

FUNCTION

 Collect a real-space MPI-FFT distributed array on each proc.

INPUTS

  ngfft(18)=contain all needed information about 3D FFT (see NOTES at beginning of scfcv)
  cplex=1 if real array, 2 for complex
  nfft=Number of FFT points treated by this MPI proc
  nspden=Second dimension of rhor
  rhor(cplex*nfft,nspden)=Array in real space (MPI-FFT distributed)
  fftn3_distrib(n3)=rank of the processors which own fft planes in 3rd dimension.
  fftn3_local(n3)=local i3 indices
  comm_fft=MPI-FFT communicator
  [master]=MPI rank, Optional. If present, the global array is available only on master node.

OUTPUT

   rhor_glob(cplex*nfft_tot,nspden)=Global array

SOURCE

4781 subroutine mpifft_collect_datar(ngfft,cplex,nfft,nspden,rhor,comm_fft,fftn3_distrib,ffti3_local,rhor_glob,master)
4782 
4783 !Arguments ------------------------------------
4784 !scalars
4785  integer,intent(in) :: cplex,nfft,nspden,comm_fft
4786  integer,optional,intent(in) :: master
4787 !arrays
4788  integer,intent(in) :: ngfft(18),fftn3_distrib(ngfft(3)),ffti3_local(ngfft(3))
4789  real(dp),intent(in) :: rhor(cplex*nfft,nspden)
4790  real(dp),intent(out) :: rhor_glob(cplex*product(ngfft(1:3)),nspden)
4791 
4792 !Local variables-------------------------------
4793  integer :: ispden,i1,i2,i3,me_fft,i3_local,my_fftbase,glob_fftbase
4794  integer :: n1,n2,n3,ierr,nfft_tot
4795 
4796 ! *************************************************************************
4797 
4798  nfft_tot = product(ngfft(1:3)); me_fft = xmpi_comm_rank(comm_fft)
4799  n1 = ngfft(1); n2 = ngfft(2); n3 = ngfft(3)
4800 
4801  if (nfft_tot == nfft) then
4802    ! full rhor on each node, just do a copy
4803    rhor_glob = rhor
4804  else
4805    ! if MPI-FFT we have to gather the full rhor on each node.
4806    rhor_glob = zero
4807    do ispden=1,nspden
4808      do i3=1,n3
4809        if (me_fft == fftn3_distrib(i3)) then
4810          i3_local = ffti3_local(i3)
4811          do i2=1,n2
4812            my_fftbase =   cplex * ( (i2-1)*n1 + (i3_local-1)*n1*n2 )
4813            glob_fftbase = cplex * ( (i2-1)*n1 + (i3-1)*n1*n2 )
4814            do i1=1,cplex * n1
4815              rhor_glob(i1+glob_fftbase,ispden) = rhor(i1+my_fftbase,ispden)
4816            end do
4817          end do
4818        end if
4819      end do
4820    end do
4821    if (present(master)) then
4822      call xmpi_sum_master(rhor_glob,master,comm_fft,ierr)
4823    else
4824      call xmpi_sum(rhor_glob,comm_fft,ierr)
4825    end if
4826  end if
4827 
4828 end subroutine mpifft_collect_datar

m_fftcore/mpifft_dbox2fg [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  mpifft_dbox2fg

FUNCTION

INPUTS

OUTPUT

SOURCE

3409 pure subroutine mpifft_dbox2fg(n1,n2,n3,n4,nd2proc,n6,ndat,fftn2_distrib,ffti2_local,me_fft,workf,nfft,fofg)
3410 
3411 !Arguments ------------------------------------
3412 !scalars
3413  integer,intent(in) :: n1,n2,n3,n4,nd2proc,n6,ndat,me_fft,nfft
3414 !arrays
3415  integer,intent(in) :: fftn2_distrib(n2),ffti2_local(n2)
3416  real(dp),intent(in) :: workf(2,n4,n6,nd2proc*ndat)
3417  real(dp),intent(out) :: fofg(2,nfft*ndat)
3418 
3419 !Local variables-------------------------------
3420  integer :: idat,i1,i2,i3,i2_local,i2_ldat,fgbase
3421  real(dp) :: xnorm
3422 
3423 ! *************************************************************************
3424 
3425  xnorm=one/dble(n1*n2*n3)
3426 
3427  ! Transfer fft output to the original fft box
3428  do idat=1,ndat
3429    do i2=1,n2
3430      if( fftn2_distrib(i2) == me_fft) then
3431        i2_local = ffti2_local(i2)
3432        i2_ldat = i2_local + (idat-1) * nd2proc
3433        do i3=1,n3
3434          fgbase = n1*(i2_local - 1 + nd2proc*(i3-1)) + (idat - 1) * nfft
3435          do i1=1,n1
3436            fofg(1,i1+fgbase)=workf(1,i1,i3,i2_ldat)*xnorm
3437            fofg(2,i1+fgbase)=workf(2,i1,i3,i2_ldat)*xnorm
3438          end do
3439        end do
3440      end if
3441    end do
3442  end do
3443 
3444 end subroutine mpifft_dbox2fg

m_fftcore/mpifft_dbox2fg_dpc [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  mpifft_dbox2fg_dpc

FUNCTION

INPUTS

OUTPUT

SOURCE

3461 pure subroutine mpifft_dbox2fg_dpc(n1,n2,n3,n4,nd2proc,n6,ndat,fftn2_distrib,ffti2_local,me_fft,workf,nfft,fofg)
3462 
3463 !Arguments ------------------------------------
3464 !scalars
3465  integer,intent(in) :: n1,n2,n3,n4,nd2proc,n6,ndat,me_fft,nfft
3466 !arrays
3467  integer,intent(in) :: fftn2_distrib(n2),ffti2_local(n2)
3468  complex(dpc),intent(in) :: workf(n4,n6,nd2proc*ndat)
3469  real(dp),intent(out) :: fofg(2,nfft*ndat)
3470 
3471 !Local variables-------------------------------
3472  integer :: idat,i1,i2,i3,i2_local,i2_ldat,fgbase
3473  real(dp) :: xnorm
3474 
3475 ! *************************************************************************
3476 
3477  xnorm=one/dble(n1*n2*n3)
3478 
3479  ! Transfer fft output to the original fft box
3480  do idat=1,ndat
3481    do i2=1,n2
3482      if( fftn2_distrib(i2) == me_fft) then
3483        i2_local = ffti2_local(i2)
3484        i2_ldat = i2_local + (idat-1) * nd2proc
3485        do i3=1,n3
3486          fgbase = n1*(i2_local - 1 + nd2proc*(i3-1)) + (idat - 1) * nfft
3487          do i1=1,n1
3488            fofg(1,i1+fgbase)=REAL (workf(i1,i3,i2_ldat))*xnorm
3489            fofg(2,i1+fgbase)=AIMAG(workf(i1,i3,i2_ldat))*xnorm
3490          end do
3491        end do
3492      end if
3493    end do
3494  end do
3495 
3496 end subroutine mpifft_dbox2fg_dpc

m_fftcore/mpifft_dbox2fr [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  mpifft_dbox2fr

FUNCTION

INPUTS

OUTPUT

SOURCE

3513 pure subroutine mpifft_dbox2fr(n1,n2,n3,n4,n5,nd3proc,ndat,fftn3_distrib,ffti3_local,me_fft,workr,cplex,nfft,fofr)
3514 
3515 !Arguments ------------------------------------
3516 !scalars
3517  integer,intent(in) :: n1,n2,n3,n4,n5,nd3proc,ndat,me_fft,nfft,cplex
3518 !!arrays
3519  integer,intent(in) :: fftn3_distrib(n3),ffti3_local(n3)
3520  real(dp),intent(in) :: workr(2,n4,n5,nd3proc*ndat)
3521  real(dp),intent(out) :: fofr(cplex*nfft*ndat)
3522 
3523 !Local variables-------------------------------
3524  integer :: idat,i1,i2,i3,i3_local,i3_ldat,frbase
3525 
3526 ! *************************************************************************
3527 
3528  select case (cplex)
3529  case (1)
3530 
3531    do idat=1,ndat
3532      do i3=1,n3
3533        if( fftn3_distrib(i3) == me_fft) then
3534          i3_local = ffti3_local(i3)
3535          i3_ldat = i3_local + (idat - 1) * nd3proc
3536          do i2=1,n2
3537            frbase=n1*(i2-1+n2*(i3_local-1)) + (idat - 1) * nfft
3538            do i1=1,n1
3539              fofr(i1+frbase)=workr(1,i1,i2,i3_ldat)
3540            end do
3541          end do
3542        end if
3543      end do
3544    end do
3545 
3546  case (2)
3547 
3548    do idat=1,ndat
3549      do i3=1,n3
3550        if (fftn3_distrib(i3) == me_fft) then
3551          i3_local = ffti3_local(i3)
3552          i3_ldat = i3_local + (idat - 1) * nd3proc
3553          do i2=1,n2
3554            frbase=2*n1*(i2-1+n2*(i3_local-1)) + (idat - 1) * cplex * nfft
3555            !if (frbase > cplex*nfft*ndat - 2*n1) then
3556            !   write(std_out,*)i2,i3_local,frbase,cplex*nfft*ndat
3557            !   ABI_ERROR("frbase")
3558            !end if
3559            do i1=1,n1
3560              fofr(2*i1-1+frbase)=workr(1,i1,i2,i3_ldat)
3561              fofr(2*i1  +frbase)=workr(2,i1,i2,i3_ldat)
3562            end do
3563          end do
3564        end if
3565      end do
3566    end do
3567 
3568  case default
3569    !ABI_BUG("Wrong cplex")
3570    fofr = huge(one)
3571  end select
3572 
3573 end subroutine mpifft_dbox2fr

m_fftcore/mpifft_dbox2fr_dpc [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  mpifft_dbox2fr_dpc

FUNCTION

INPUTS

OUTPUT

SOURCE

3590 pure subroutine mpifft_dbox2fr_dpc(n1,n2,n3,n4,n5,nd3proc,ndat,fftn3_distrib,ffti3_local,me_fft,workr,cplex,nfft,fofr)
3591 
3592 !Arguments ------------------------------------
3593 !scalars
3594  integer,intent(in) :: n1,n2,n3,n4,n5,nd3proc,ndat,me_fft,nfft,cplex
3595 !!arrays
3596  integer,intent(in) :: fftn3_distrib(n3),ffti3_local(n3)
3597  complex(dpc),intent(in) :: workr(n4,n5,nd3proc*ndat)
3598  real(dp),intent(out) :: fofr(cplex*nfft*ndat)
3599 
3600 !Local variables-------------------------------
3601  integer :: idat,i1,i2,i3,i3_local,i3_ldat,frbase
3602 
3603 ! *************************************************************************
3604 
3605  select case (cplex)
3606  case (1)
3607 
3608    do idat=1,ndat
3609      do i3=1,n3
3610        if( fftn3_distrib(i3) == me_fft) then
3611          i3_local = ffti3_local(i3)
3612          i3_ldat = i3_local + (idat - 1) * nd3proc
3613          do i2=1,n2
3614            frbase=n1*(i2-1+n2*(i3_local-1)) + (idat - 1) * nfft
3615            do i1=1,n1
3616              fofr(i1+frbase)=REAL(workr(i1,i2,i3_ldat))
3617            end do
3618          end do
3619        end if
3620      end do
3621    end do
3622 
3623  case (2)
3624 
3625    do idat=1,ndat
3626      do i3=1,n3
3627        if (fftn3_distrib(i3) == me_fft) then
3628          i3_local = ffti3_local(i3)
3629          i3_ldat = i3_local + (idat - 1) * nd3proc
3630          do i2=1,n2
3631            frbase=2*n1*(i2-1+n2*(i3_local-1)) + (idat - 1) * cplex * nfft
3632            do i1=1,n1
3633              fofr(2*i1-1+frbase)=REAL (workr(i1,i2,i3_ldat))
3634              fofr(2*i1  +frbase)=AIMAG(workr(i1,i2,i3_ldat))
3635            end do
3636          end do
3637        end if
3638      end do
3639    end do
3640 
3641  case default
3642    !ABI_BUG("Wrong cplex")
3643    fofr = huge(one)
3644  end select
3645 
3646 end subroutine mpifft_dbox2fr_dpc

m_fftcore/mpifft_fg2dbox [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  mpifft_fg2dbox

FUNCTION

INPUTS

OUTPUT

SOURCE

3314 pure subroutine mpifft_fg2dbox(nfft,ndat,fofg,n1,n2,n3,n4,nd2proc,n6,fftn2_distrib,ffti2_local,me_fft,workf)
3315 
3316 !Arguments ------------------------------------
3317 !scalars
3318  integer,intent(in) :: nfft,ndat,n1,n2,n3,n4,nd2proc,n6,me_fft
3319 !arrays
3320  integer,intent(in) :: fftn2_distrib(n2),ffti2_local(n2)
3321  real(dp),intent(in) :: fofg(2,nfft*ndat)
3322  real(dp),intent(inout) :: workf(2,n4,n6,nd2proc*ndat)
3323 
3324 !Local variables-------------------------------
3325  integer :: idat,i1,i2,i3,i2_local,i2_ldat,fgbase
3326 
3327 ! *************************************************************************
3328 
3329  do idat=1,ndat
3330    do i3=1,n3
3331      do i2=1,n2
3332        if (fftn2_distrib(i2) == me_fft) then
3333          i2_local = ffti2_local(i2)
3334          i2_ldat = i2_local + (idat-1) * nd2proc
3335          fgbase= n1*(i2_local-1 + nd2proc*(i3-1)) + (idat-1) * nfft
3336          do i1=1,n1
3337            workf(1,i1,i3,i2_ldat)=fofg(1,i1+fgbase)
3338            workf(2,i1,i3,i2_ldat)=fofg(2,i1+fgbase)
3339          end do
3340        end if
3341      end do
3342    end do
3343  end do
3344 
3345 end subroutine mpifft_fg2dbox

m_fftcore/mpifft_fg2dbox_dpc [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  mpifft_fg2dbox_dpc

FUNCTION

INPUTS

OUTPUT

SOURCE

3362 pure subroutine mpifft_fg2dbox_dpc(nfft,ndat,fofg,n1,n2,n3,n4,nd2proc,n6,fftn2_distrib,ffti2_local,me_fft,workf)
3363 
3364 !Arguments ------------------------------------
3365 !scalars
3366  integer,intent(in) :: nfft,ndat,n1,n2,n3,n4,nd2proc,n6,me_fft
3367 !arrays
3368  integer,intent(in) :: fftn2_distrib(n2),ffti2_local(n2)
3369  real(dp),intent(in) :: fofg(2,nfft*ndat)
3370  complex(dpc),intent(inout) :: workf(n4,n6,nd2proc*ndat)
3371 
3372 !Local variables-------------------------------
3373  integer :: idat,i1,i2,i3,i2_local,i2_ldat,fgbase
3374 
3375 ! *************************************************************************
3376 
3377  do idat=1,ndat
3378    do i3=1,n3
3379      do i2=1,n2
3380        if (fftn2_distrib(i2) == me_fft) then
3381          i2_local = ffti2_local(i2)
3382          i2_ldat = i2_local + (idat-1) * nd2proc
3383          fgbase= n1*(i2_local-1 + nd2proc*(i3-1)) + (idat-1) * nfft
3384          do i1=1,n1
3385            workf(i1,i3,i2_ldat)=CMPLX(fofg(1,i1+fgbase), fofg(2,i1+fgbase), kind=dpc)
3386          end do
3387        end if
3388      end do
3389    end do
3390  end do
3391 
3392 end subroutine mpifft_fg2dbox_dpc

m_fftcore/mpifft_fr2dbox [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  mpifft_fr2dbox

FUNCTION

INPUTS

OUTPUT

SOURCE

3663 pure subroutine mpifft_fr2dbox(cplex,nfft,ndat,fofr,n1,n2,n3,n4,n5,nd3proc,fftn3_distrib,ffti3_local,me_fft,workr)
3664 
3665 !Arguments ------------------------------------
3666 !scalars
3667  integer,intent(in) :: cplex,nfft,ndat,n1,n2,n3,n4,n5,nd3proc,me_fft
3668 !!arrays
3669  integer,intent(in) :: fftn3_distrib(n3),ffti3_local(n3)
3670  real(dp),intent(in) :: fofr(cplex*nfft*ndat)
3671  real(dp),intent(inout) :: workr(2,n4,n5,nd3proc*ndat)
3672 
3673 !Local variables-------------------------------
3674  integer :: idat,i1,i2,i3,i3_local,i3_ldat,frbase
3675 
3676 ! *************************************************************************
3677 
3678  select case (cplex)
3679  case (1)
3680 
3681    do idat=1,ndat
3682      do i3=1,n3
3683        if( me_fft == fftn3_distrib(i3) ) then
3684          i3_local = ffti3_local(i3)
3685          i3_ldat = i3_local + (idat-1) * nd3proc
3686          do i2=1,n2
3687            frbase=n1*(i2-1+n2*(i3_local-1)) + (idat-1) * nfft
3688            do i1=1,n1
3689              workr(1,i1,i2,i3_ldat)=fofr(i1+frbase)
3690              workr(2,i1,i2,i3_ldat)=zero
3691            end do
3692          end do
3693        end if
3694      end do
3695    end do
3696 
3697  case (2)
3698 
3699    do idat=1,ndat
3700      do i3=1,n3
3701        if( me_fft == fftn3_distrib(i3) ) then
3702          i3_local = ffti3_local(i3)
3703          i3_ldat = i3_local + (idat-1) * nd3proc
3704          do i2=1,n2
3705            frbase=2*n1*(i2-1+n2*(i3_local-1)) + (idat-1) * cplex * nfft
3706            do i1=1,n1
3707              workr(1,i1,i2,i3_ldat)=fofr(2*i1-1+frbase)
3708              workr(2,i1,i2,i3_ldat)=fofr(2*i1  +frbase)
3709            end do
3710          end do
3711        end if
3712      end do
3713    end do
3714 
3715  case default
3716    !ABI_BUG("Wrong cplex")
3717    workr = huge(one)
3718  end select
3719 
3720 end subroutine mpifft_fr2dbox

m_fftcore/mpifft_fr2dbox_dpc [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  mpifft_fr2dbox_dpc

FUNCTION

INPUTS

OUTPUT

SOURCE

3737 pure subroutine mpifft_fr2dbox_dpc(cplex,nfft,ndat,fofr,n1,n2,n3,n4,n5,nd3proc,fftn3_distrib,ffti3_local,me_fft,workr)
3738 
3739 !Arguments ------------------------------------
3740 !scalars
3741  integer,intent(in) :: cplex,nfft,ndat,n1,n2,n3,n4,n5,nd3proc,me_fft
3742 !!arrays
3743  integer,intent(in) :: fftn3_distrib(n3),ffti3_local(n3)
3744  real(dp),intent(in) :: fofr(cplex*nfft*ndat)
3745  complex(dpc),intent(inout) :: workr(n4,n5,nd3proc*ndat)
3746 
3747 !Local variables-------------------------------
3748  integer :: idat,i1,i2,i3,i3_local,i3_ldat,frbase
3749 
3750 ! *************************************************************************
3751 
3752  select case (cplex)
3753  case (1)
3754 
3755    do idat=1,ndat
3756      do i3=1,n3
3757        if( me_fft == fftn3_distrib(i3) ) then
3758          i3_local = ffti3_local(i3)
3759          i3_ldat = i3_local + (idat-1) * nd3proc
3760          do i2=1,n2
3761            frbase=n1*(i2-1+n2*(i3_local-1)) + (idat-1) * nfft
3762            do i1=1,n1
3763              workr(i1,i2,i3_ldat)=CMPLX(fofr(i1+frbase), zero, kind=dpc)
3764            end do
3765          end do
3766        end if
3767      end do
3768    end do
3769 
3770  case (2)
3771 
3772    do idat=1,ndat
3773      do i3=1,n3
3774        if( me_fft == fftn3_distrib(i3) ) then
3775          i3_local = ffti3_local(i3)
3776          i3_ldat = i3_local + (idat-1) * nd3proc
3777          do i2=1,n2
3778            frbase=2*n1*(i2-1+n2*(i3_local-1)) + (idat-1) * cplex * nfft
3779            do i1=1,n1
3780              workr(i1,i2,i3_ldat)=CMPLX(fofr(2*i1-1+frbase), fofr(2*i1  +frbase), kind=dpc)
3781            end do
3782          end do
3783        end if
3784      end do
3785    end do
3786 
3787  case default
3788    !ABI_BUG("Wrong cplex")
3789    workr = huge(one)
3790  end select
3791 
3792 end subroutine mpifft_fr2dbox_dpc

m_fftcore/mpiswitch [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 mpiswitch

FUNCTION

INPUTS

OUTPUT

SOURCE

3115 pure subroutine mpiswitch(j3,n1dfft,Jp2st,J2st,lot,n1,nd2proc,nd3proc,nproc,ioption,zmpi1,zw)
3116 
3117 !Arguments ------------------------------------
3118  integer,intent(in) :: j3,n1dfft,lot,n1,nd2proc,nd3proc,nproc,ioption
3119  integer,intent(inout) :: Jp2st,J2st
3120  real(dp),intent(in) :: zmpi1(2,n1,nd2proc,nd3proc,nproc)
3121  real(dp),intent(inout) :: zw(2,lot,n1)
3122 
3123 !Local variables-------------------------------
3124  integer :: Jp2,J2,I1,ind,jj2,mfft,jjp2
3125 
3126 ! *************************************************************************
3127  mfft=0
3128 
3129  if (ioption /= 1) then
3130    do Jp2=Jp2st,nproc
3131      do J2=J2st,nd2proc
3132        mfft=mfft+1
3133        if (mfft.gt.n1dfft) then
3134          Jp2st=Jp2
3135          J2st=J2
3136          return
3137        end if
3138        do I1=1,n1
3139          zw(1,mfft,I1)=zmpi1(1,I1,J2,j3,Jp2)
3140          zw(2,mfft,I1)=zmpi1(2,I1,J2,j3,Jp2)
3141        end do
3142      end do
3143      J2st=1
3144    end do
3145 
3146  else
3147    do Jp2=Jp2st,nproc
3148      do J2=J2st,nd2proc
3149        mfft=mfft+1
3150        if (mfft.gt.n1dfft) then
3151          Jp2st=Jp2
3152          J2st=J2
3153          return
3154        end if
3155        ind=(Jp2-1) * nd2proc + J2
3156        jj2=(ind-1)/nproc +1
3157 
3158        !jjp2=modulo(ind,nproc) +1
3159        jjp2=modulo(ind-1,nproc)+1
3160 
3161        !in other words: mfft=(jj2-1)*nproc+jjp2 (modulo case)
3162        !istead of mfft=(Jjp2-1) * nd2proc + Jj2 (slice case)
3163        !with 1<=jjp2<=nproc, jj2=1,nd2proc
3164        do I1=1,n1
3165          ! zw(1,mfft,I1)=zmpi1(1,I1,J2,j3,Jp2)
3166          ! zw(2,mfft,I1)=zmpi1(2,I1,J2,j3,Jp2)
3167          zw(1,mfft,I1)=zmpi1(1,I1,jj2,j3,jjp2)
3168          zw(2,mfft,I1)=zmpi1(2,I1,jj2,j3,jjp2)
3169        end do
3170      end do
3171      J2st=1
3172    end do
3173  end if
3174 
3175 end subroutine mpiswitch

m_fftcore/mpiswitch_cent [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 mpiswitch_cent

FUNCTION

   Perform the local rotation

     input: I1,J2,j3,Jp2,(jp3)
     output: J2,Jp2,I1,j3,(jp3)

   and fill the central region of the frequency spectrum with zeros

INPUTS

OUTPUT

SOURCE

3199 pure subroutine mpiswitch_cent(j3,n1dfft,Jp2stb,J2stb,lot,max1,md1,m1,n1,md2proc,&
3200                                nd3proc,nproc,ioption,zmpi1,zw,max2,m2,n2)
3201 
3202 !Arguments ------------------------------------
3203  integer,intent(in) :: j3,n1dfft,lot,max1,md1,m1,n1,md2proc,nd3proc,nproc,ioption
3204  integer,intent(in) :: m2,max2,n2
3205  integer,intent(inout) :: Jp2stb,J2stb
3206  real(dp),intent(in) :: zmpi1(2,md1,md2proc,nd3proc,nproc)
3207  real(dp),intent(inout) :: zw(2,lot,n1)
3208 
3209 !Local variables-------------------------------
3210  integer :: mfft,jp2,j2,jjp2,jj2,i1,ind
3211 
3212 ! *************************************************************************
3213 
3214  ABI_UNUSED((/m2,max2,n2/))
3215 
3216  mfft=0
3217 
3218  if (ioption /= 1) then
3219    do Jp2=Jp2stb,nproc
3220      do J2=J2stb,md2proc
3221 
3222        mfft=mfft+1
3223        if (mfft.gt.n1dfft) then
3224          Jp2stb=Jp2
3225          J2stb=J2
3226          !ABI_WARNING("Returning from mpiswithc_cent")
3227          return
3228        end if
3229 
3230        ! Here, zero and positive frequencies
3231        ! In zmpi1, they are stored from 1 to max1+1
3232        do I1=1,max1+1
3233          zw(1,mfft,I1)=zmpi1(1,I1,J2,j3,Jp2)
3234          zw(2,mfft,I1)=zmpi1(2,I1,J2,j3,Jp2)
3235        end do
3236 
3237        ! Fill the center region with zeros
3238        do I1=max1+2,n1-m1+max1+1
3239          zw(1,mfft,I1)=zero
3240          zw(2,mfft,I1)=zero
3241        end do
3242 
3243        ! Here, negative frequencies
3244        ! In zmpi1, they are stored from 1 to m1half
3245        do I1=max1+2,m1
3246          zw(1,mfft,I1+n1-m1)=zmpi1(1,I1,J2,j3,Jp2)
3247          zw(2,mfft,I1+n1-m1)=zmpi1(2,I1,J2,j3,Jp2)
3248        end do
3249      end do
3250      J2stb=1
3251    end do
3252 
3253  else
3254    do Jp2=Jp2stb,nproc
3255      do J2=J2stb,md2proc
3256 
3257        mfft=mfft+1
3258        if (mfft.gt.n1dfft) then
3259          Jp2stb=Jp2
3260          J2stb=J2
3261          !ABI_WARNING("Returning from mpiswithc_cent")
3262          return
3263        end if
3264 
3265        ind=(Jp2-1) * md2proc + J2
3266        jj2=(ind-1)/nproc +1
3267 
3268        !jjp2=modulo(ind,nproc) +1
3269        jjp2=modulo(ind-1,nproc)+1
3270 
3271        ! I gather consecutive I2 indexes in mfft in the modulo case
3272        ! Here, zero and positive frequencies
3273        ! In zmpi1, they are stored from 1 to max1+1
3274        do I1=1,max1+1
3275          zw(1,mfft,I1)=zmpi1(1,I1,Jj2,j3,Jjp2)
3276          zw(2,mfft,I1)=zmpi1(2,I1,Jj2,j3,Jjp2)
3277        end do
3278 
3279        ! Fill the center region with zeros
3280        do I1=max1+2,n1-m1+max1+1
3281          zw(1,mfft,I1)=zero
3282          zw(2,mfft,I1)=zero
3283        end do
3284 
3285        ! Here, negative frequencies
3286        ! In zmpi1, they are stored from 1 to m1half
3287        do I1=max1+2,m1
3288          zw(1,mfft,I1+n1-m1)=zmpi1(1,I1,Jj2,j3,Jjp2)
3289          zw(2,mfft,I1+n1-m1)=zmpi1(2,I1,Jj2,j3,Jjp2)
3290        end do
3291 
3292      end do
3293      J2stb=1
3294    end do
3295  end if
3296 
3297 end subroutine mpiswitch_cent

m_fftcore/multpot [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  multpot

FUNCTION

INPUTS

  icplexwf=1 if u(r) is real, 2 otherwise.
  icplex=1 if v(r) is real, 2 otherwise.
  includelast
  nd1,nd2=Leading dimensions of pot(icplex*nd1,nd2)
  n2
  lot
  n1dfft

OUTPUT

SOURCE

4669 subroutine multpot(icplexwf,icplex,includelast,nd1,nd2,n2,lot,n1dfft,pot,zw)
4670 
4671  !Arguments ------------------------------------
4672  integer,intent(in) :: icplexwf,icplex,includelast,nd1,nd2,n2,lot,n1dfft
4673  real(dp),intent(in) :: pot(icplex*nd1,nd2)
4674  real(dp),intent(inout) :: zw(2,lot,n2)
4675 
4676 !Local variables-------------------------------
4677  integer :: i2,j
4678  real(dp) :: rew,imw
4679 
4680 ! *************************************************************************
4681 
4682  if (icplexwf==1) then
4683    ! Real u(r)
4684 
4685    if (icplex==2) then
4686      ABI_BUG('multpot: icplexwf=1 and icplex=2')
4687    else
4688      ! TO BE SPEEDED UP : should use the same trick as Stefan
4689      if(includelast==1)then
4690        do i2=1,n2
4691          do j=1,n1dfft
4692            zw(1,j,i2)=zw(1,j,i2)*pot(2*j-1,i2)
4693            zw(2,j,i2)=zw(2,j,i2)*pot(2*j  ,i2)
4694          end do
4695        end do
4696      else
4697        do i2=1,n2
4698          do j=1,n1dfft-1
4699            zw(1,j,i2)=zw(1,j,i2)*pot(2*j-1,i2)
4700            zw(2,j,i2)=zw(2,j,i2)*pot(2*j  ,i2)
4701          end do
4702          zw(1,n1dfft,i2)=zw(1,n1dfft,i2)*pot(2*n1dfft-1,i2)
4703        end do
4704      end if
4705    end if
4706 
4707  else if (icplexwf==2) then
4708    ! Complex u(r)
4709 
4710    if (icplex==1) then
4711 
4712      do i2=1,n2-1,2
4713        do j=1,n1dfft
4714          zw(1,j,i2)=zw(1,j,i2)*pot(j,i2)
4715          zw(2,j,i2)=zw(2,j,i2)*pot(j,i2)
4716          zw(1,j,i2+1)=zw(1,j,i2+1)*pot(j,i2+1)
4717          zw(2,j,i2+1)=zw(2,j,i2+1)*pot(j,i2+1)
4718        end do
4719      end do
4720 
4721      if (2*(n2/2)/=n2) then
4722        do j=1,n1dfft
4723          zw(1,j,n2)=zw(1,j,n2)*pot(j,n2)
4724          zw(2,j,n2)=zw(2,j,n2)*pot(j,n2)
4725        end do
4726      end if
4727 
4728    else
4729 
4730      do i2=1,n2-1,2
4731        do j=1,n1dfft
4732          rew = zw(1,j,i2); imw = zw(2,j,i2)
4733          zw(1,j,i2) = rew*pot(2*j-1,i2) - imw*pot(2*j,i2)
4734          zw(2,j,i2) = imw*pot(2*j-1,i2) + rew*pot(2*j,i2)
4735 
4736          rew = zw(1,j,i2+1); imw = zw(2,j,i2+1)
4737          zw(1,j,i2+1) = rew*pot(2*j-1,i2+1) - imw*pot(2*j,i2+1)
4738          zw(2,j,i2+1) = imw*pot(2*j-1,i2+1) + rew*pot(2*j,i2+1)
4739        end do
4740      end do
4741 
4742      if (2*(n2/2)/=n2) then
4743        do j=1,n1dfft
4744          rew = zw(1,j,n2); imw = zw(2,j,n2)
4745          zw(1,j,n2) = rew*pot(2*j-1,n2) - imw*pot(2*j,n2)
4746          zw(2,j,n2) = imw*pot(2*j-1,n2) + rew*pot(2*j,n2)
4747        end do
4748      end if
4749 
4750    end if
4751  end if
4752 
4753 end subroutine multpot

m_fftcore/ngfft_seq [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 ngfft_seq

FUNCTION

 Helper function used to initialize ngfft(18) from the FFT divisions
 in the case of sequential execution.

INPUTS

  n123(3)=FFT divisions.

OUTPUT

  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft.

SOURCE

392 pure subroutine ngfft_seq(ngfft, n123)
393 
394 !Arguments ------------------------------------
395  integer,intent(in) :: n123(3)
396  integer,intent(out) :: ngfft(18)
397 
398 !Local variables-------------------------------
399  integer :: fftalg
400 
401 ! *************************************************************************
402 
403  ! Default for sequential case.
404  fftalg = 112
405 #ifdef HAVE_FFTW3
406  fftalg = 312
407 #elif defined HAVE_DFTI
408  fftalg = 512
409 #endif
410 
411  ngfft(1:3) = n123
412  ngfft(4) = 2*(ngfft(1)/2)+1
413  ngfft(5) = 2*(ngfft(2)/2)+1
414  ngfft(6) = ngfft(3)
415  ngfft(7)= fftalg           ! fftalg
416  ngfft(8)= get_cache_kb()   ! cache_kb
417  ngfft(9)= 0                ! paral_fft_
418  ngfft(10)=1                ! nproc_fft
419  ngfft(11)=0                ! me_fft
420  ngfft(12)=0                ! n2proc
421  ngfft(13)=0                ! n3proc
422  ngfft(14:18)=0             ! not used
423 
424 end subroutine ngfft_seq

m_fftcore/print_ngfft [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 print_ngfft

FUNCTION

  Print the content of ngfft(18) in explicative format.

INPUTS

  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft.
  [unit]=unit number for output (defaults to std_out).
  [prtvol]=verbosity level (defaults to 0).
  [mode_paral]=either "COLL" or "PERS" ("COLL" is default).

OUTPUT

  Only writing

SOURCE

447 subroutine print_ngfft(ngfft, header, unit, mode_paral, prtvol)
448 
449 !Arguments ------------------------------------
450 !scalars
451  integer,intent(in),optional :: prtvol,unit
452  character(len=*),intent(in),optional :: header
453  character(len=4),intent(in),optional :: mode_paral
454 !arrays
455  integer,intent(in) :: ngfft(18)
456 
457 !Local variables-------------------------------
458  integer :: my_unt,my_prtvol
459  character(len=4) :: my_mode
460  character(len=500) :: msg
461 
462 ! *************************************************************************
463 
464  my_prtvol=0;       if (PRESENT(prtvol    )) my_prtvol=prtvol
465  my_unt   =std_out; if (PRESENT(unit      )) my_unt   =unit
466  my_mode  ='COLL';  if (PRESENT(mode_paral)) my_mode  =mode_paral
467 
468  msg=ch10//' ==== FFT mesh description (ngfft) ==== '
469  if (PRESENT(header)) msg=ch10//' ==== '//TRIM(ADJUSTL(header))//' ==== '
470  call wrtout(my_unt,msg,my_mode)
471  write(msg,'(2(a,3i5,a),a,i5,2a,i5)')&
472   '  FFT mesh divisions ........................ ',ngfft(1),ngfft(2),ngfft(3),ch10,&
473   '  Augmented FFT divisions ................... ',ngfft(4),ngfft(5),ngfft(6),ch10,&
474   '  FFT algorithm ............................. ',ngfft(7),ch10,&
475   '  FFT cache size ............................ ',ngfft(8)
476  call wrtout(my_unt,msg,my_mode)
477 
478  if (my_prtvol > 0) then
479    write(msg,'(6(a,i5,a),a,4i5)')&
480     '  FFT parallelization level ................. ',ngfft(9),ch10,&
481     '  Number of processors in my FFT group ...... ',ngfft(10),ch10,&
482     '  Index of me in my FFT group ............... ',ngfft(11),ch10,&
483     '  No of xy planes in R space treated by me .. ',ngfft(12),ch10,&
484     '  No of xy planes in G space treated by me .. ',ngfft(13),ch10,&
485     '  MPI communicator for FFT .................. ',ngfft(14),ch10,&
486     '  Value of ngfft(15:18) ..................... ',ngfft(15:18)
487    call wrtout(my_unt,msg,my_mode)
488  end if
489 
490 end subroutine print_ngfft

m_fftcore/scramble [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  scramble

FUNCTION

  This routine performs the local rotation

     input:  G1,R3,G2,(Gp2)
     output: G1,G2,R3,(Gp2)

INPUTS

  i1=Index of x in the small box enclosing the G-sphere.
  j2
  lot=Cache blocking factor
  n1dfft=Number of 1D FFTs performed.
  md1,md2proc,nnd3=Used to dimension zmpi2
  n3=Dimension of the transform along z.
  zw(2,lot,n3): zw(:,1:n1dfft,n3) contains the lines transformed along z

 OUTPTU
 zmpi2(2,md1,md2proc,nnd3)

SOURCE

2449 pure subroutine scramble(i1,j2,lot,n1dfft,md1,n3,md2proc,nnd3,zw,zmpi2)
2450 
2451 !Arguments ------------------------------------
2452  integer,intent(in) :: i1,j2,lot,n1dfft,md1,n3,md2proc,nnd3
2453  real(dp),intent(in) :: zw(2,lot,n3)
2454  real(dp),intent(inout) :: zmpi2(2,md1,md2proc,nnd3)
2455 
2456 !Local variables-------------------------------
2457  integer :: i3,i
2458 
2459 ! *************************************************************************
2460 
2461  do i3=1,n3
2462    do i=0,n1dfft-1
2463      zmpi2(1,i1+i,j2,i3)=zw(1,i+1,i3)
2464      zmpi2(2,i1+i,j2,i3)=zw(2,i+1,i3)
2465    end do
2466  end do
2467 
2468 end subroutine scramble

m_fftcore/sphere [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 sphere

FUNCTION

 Array cg is defined in sphere with npw g-vectors.
 Insert cg inside FFT box of n1*n2*n3 points to define array cfft for
 rest of cfft is filled with 0 s.

 iflag=1 ==> insert cg into cfft.
 iflag=2 ==> insert cg into cfft, where the second and third dimension
      have been switched (needed for new 2002 SGoedecker FFT)
 iflag=-1==> extract cg from cfft.
 iflag=-2==> extract cg from cfft, where the second and third dimension
 have been switched (needed for new 2002 SGoedecker FFT)
      WARNING: iflag=-2 cannot use symmetry operations.

 There is also the possibility to apply a symmetry operation,
 as well as to make a shift in reciprocal space, or to multiply
 by a constant factor, in the case iflag=-1.
 Multiplication by a constant factor is also possible in the case iflag=-2.

INPUTS

 cg(2,npw*ndat)= contains values for npw G vectors in basis sphere
 ndat=number of wavefunctions
 npw=number of G vectors in basis at this k point
 cfft(2,n4,n5,n6*ndat) = array in FFT box
 n1,n2,n3=physical dimension of the box (cfft)
 n4,n5,n6=memory dimension of cfft
 kg_k(3,npw)=integer coordinates of G vectors in basis sphere
 istwf_k=option parameter that describes the storage of wfs
 iflag=option parameter. Possible values: -1, -2, 1, 2
 me_g0=1 if this node has G=0.
 shiftg(3)=The shift in reciprocal space.
 symrec(3,3)=symmetry operation in reciprocal space to be applied (symrec)
 xnorm=Normalization factor.

SIDE EFFECTS

 Input/Output
 iflag=1 and 2, insert cg(input) into cfft(output)
 iflag=-1 and -2, extract cg(output) from cfft(input)

NOTES

 cg and cfft are assumed to be of type COMPLEX, although this routine treats
 them as real of twice the length to avoid nonstandard complex*16.
 If istwf_k differs from 1, then special storage modes must be taken
 into account, for symmetric wavefunctions coming from k=(0 0 0) or other special k points.

TODO

 1) Order arguments
 2) Split the two cases to avoid breaking intent: from and to sphere (merge with cg_box2gpsh and cg_gsph2box?)
 3) If symmetries are used with or without shiftg, it might happen that the FFT mesh
    is not large enough to accomodate the rotated G, in this case one should return ierr /= 0

SOURCE

1543 subroutine sphere(cg, ndat, npw, cfft, n1, n2, n3, n4, n5, n6, kg_k, istwf_k, iflag, me_g0, shiftg, symrec, xnorm)
1544 
1545 !Arguments ------------------------------------
1546 !scalars
1547  integer,intent(in) :: iflag,istwf_k,n1,n2,n3,n4,n5,n6,ndat,npw,me_g0
1548  real(dp),intent(in) :: xnorm
1549 !arrays
1550  integer,intent(in) :: kg_k(3,npw),shiftg(3),symrec(3,3)
1551  real(dp),intent(inout) :: cfft(2,n4,n5,n6*ndat),cg(2,npw*ndat)
1552 
1553 !Local variables-------------------------------
1554 !scalars
1555  integer :: i1,i1inv,i2,i2inv,i3,i3inv,id1,id2,id3,idat,ipw
1556  integer :: j1,j2,j3,l1,l2,l3,npwmin,use_symmetry,i3dat,i3invdat,i2invdat,ipwdat,i2dat
1557  character(len=500) :: msg
1558 !arrays
1559  integer :: identity(3,3)
1560  integer :: i1inver(n1),i2inver(n2),i3inver(n3)
1561 
1562 ! *************************************************************************
1563 
1564  DBG_ENTER("COLL")
1565 
1566  ! In the case of special k-points, invariant under time-reversal,
1567  ! but not Gamma, initialize the inverse coordinates.
1568  ! Remember that:
1569  !
1570  !  u_k(G) = u_{k+G0}(G-G0); u_{-k}(G) = u_k(G)^* and therefore:
1571  !  u_{G0/2}(G) = u_{G0/2}(-G-G0)^*.
1572 
1573  if (istwf_k>=2) then
1574    if(istwf_k==2 .or. istwf_k==4 .or. istwf_k==6 .or. istwf_k==8)then
1575      i1inver(1)=1
1576      do i1=2,n1
1577        i1inver(i1)=n1+2-i1
1578      end do
1579    else
1580      do i1=1,n1
1581        i1inver(i1)=n1+1-i1
1582      end do
1583    end if
1584    if(istwf_k>=2 .and. istwf_k<=5)then
1585      i2inver(1)=1
1586      do i2=2,n2
1587        i2inver(i2)=n2+2-i2
1588      end do
1589    else
1590      do i2=1,n2
1591        i2inver(i2)=n2+1-i2
1592      end do
1593    end if
1594    if(istwf_k==2 .or. istwf_k==3 .or. istwf_k==6 .or. istwf_k==7)then
1595      i3inver(1)=1
1596      do i3=2,n3
1597        i3inver(i3)=n3+2-i3
1598      end do
1599    else
1600      do i3=1,n3
1601        i3inver(i3)=n3+1-i3
1602      end do
1603    end if
1604  end if
1605 
1606  if (iflag==1 .or. iflag==2) then
1607    ! Insert cg into cfft with extra 0 s around outside:
1608    cfft = zero
1609 
1610    ! Take care of each plane wave, and complete cfft if needed
1611    if (istwf_k==1) then
1612 
1613      if (iflag==1) then
1614 !$OMP PARALLEL DO PRIVATE(i1,i2,i3) IF (ndat>1)
1615        do idat=1,ndat
1616          do ipw=1,npw
1617            i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1
1618            i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1
1619            i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1
1620 
1621            cfft(1,i1,i2,i3+n6*(idat-1))=cg(1,ipw+npw*(idat-1))
1622            cfft(2,i1,i2,i3+n6*(idat-1))=cg(2,ipw+npw*(idat-1))
1623          end do
1624        end do
1625      end if
1626 
1627      if (iflag==2) then
1628 !$OMP PARALLEL DO PRIVATE(i1,i2,i3) IF (ndat>1)
1629        do idat=1,ndat
1630          do ipw=1,npw
1631            i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1
1632            i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1
1633            i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1
1634 
1635            cfft(1,i1,i3,i2+n6*(idat-1))=cg(1,ipw+npw*(idat-1))
1636            cfft(2,i1,i3,i2+n6*(idat-1))=cg(2,ipw+npw*(idat-1))
1637          end do
1638        end do
1639      end if
1640 
1641    else if (istwf_k>=2) then
1642 
1643      npwmin=1
1644      if (istwf_k==2 .and. me_g0==1) then
1645        ! If gamma point, then cfft must be completed
1646        do idat=1,ndat
1647          cfft(1,1,1,1+n6*(idat-1))=cg(1,1+npw*(idat-1))
1648          cfft(2,1,1,1+n6*(idat-1))=zero
1649        end do
1650        npwmin=2
1651      end if
1652 
1653      if (iflag==1) then
1654 !$OMP PARALLEL DO PRIVATE(i1,i1inv,i2,i2inv,i3,i3inv) IF (ndat>1)
1655        do idat=1,ndat
1656          do ipw=npwmin,npw
1657            i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1
1658            i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1
1659            i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1
1660            ! Construct the coordinates of -k-G
1661            i1inv=i1inver(i1) ; i2inv=i2inver(i2) ; i3inv=i3inver(i3)
1662 
1663            cfft(1,i1,i2,i3+n6*(idat-1))=cg(1,ipw+npw*(idat-1))
1664            cfft(2,i1,i2,i3+n6*(idat-1))=cg(2,ipw+npw*(idat-1))
1665            cfft(1,i1inv,i2inv,i3inv+n6*(idat-1))= cg(1,ipw+npw*(idat-1))
1666            cfft(2,i1inv,i2inv,i3inv+n6*(idat-1))=-cg(2,ipw+npw*(idat-1))
1667          end do
1668        end do
1669      end if
1670 
1671      if (iflag==2) then
1672 !$OMP PARALLEL DO PRIVATE(i1,i1inv,i2,i2inv,i3,i3inv) IF (ndat>1)
1673        do idat=1,ndat
1674          do ipw=npwmin,npw
1675            i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1
1676            i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1
1677            i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1
1678 
1679            ! Construct the coordinates of -k-G
1680            i1inv=i1inver(i1) ; i2inv=i2inver(i2) ; i3inv=i3inver(i3)
1681 
1682            cfft(1,i1,i3,i2+n6*(idat-1))=cg(1,ipw+npw*(idat-1))
1683            cfft(2,i1,i3,i2+n6*(idat-1))=cg(2,ipw+npw*(idat-1))
1684            cfft(1,i1inv,i3inv,i2inv+n6*(idat-1))= cg(1,ipw+npw*(idat-1))
1685            cfft(2,i1inv,i3inv,i2inv+n6*(idat-1))=-cg(2,ipw+npw*(idat-1))
1686          end do
1687        end do
1688      end if
1689 
1690    end if
1691 
1692  else if (iflag==-1 .or. iflag==-2) then
1693    ! extract cg(output) from cfft(input)
1694 
1695    use_symmetry=0
1696    identity(:,:)=0; identity(1,1)=1 ; identity(2,2)=1 ; identity(3,3)=1
1697    if(sum((symrec(:,:)-identity(:,:))**2)/=0)use_symmetry=1
1698    if(sum(shiftg(:)**2)/=0)use_symmetry=1
1699 
1700    ! Extract cg from cfft, ignoring components outside range of cg:
1701    if (istwf_k==1) then
1702 
1703      if (use_symmetry==0) then
1704        if (iflag==-1) then
1705 !$OMP PARALLEL DO PRIVATE(i1,i2,i3,ipwdat,i3dat) IF (ndat>1)
1706          do idat=1,ndat
1707            do ipw=1,npw
1708              i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1
1709              i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1
1710              i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1
1711              ipwdat = ipw + (idat-1) * npw
1712              i3dat = i3 + (idat-1) * n6
1713 
1714              cg(1,ipwdat)=cfft(1,i1,i2,i3dat)*xnorm
1715              cg(2,ipwdat)=cfft(2,i1,i2,i3dat)*xnorm
1716            end do
1717          end do
1718        else
1719          ! iflag==-2
1720 !$OMP PARALLEL DO PRIVATE(i1,i2,i3,ipwdat,i2dat) IF (ndat>1)
1721          do idat=1,ndat
1722            do ipw=1,npw
1723              i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1
1724              i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1
1725              i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1
1726 
1727              ipwdat = ipw + (idat-1) * npw
1728              i2dat = i2 + (idat-1) * n6
1729 
1730              cg(1,ipwdat)=cfft(1,i1,i3,i2dat)*xnorm
1731              cg(2,ipwdat)=cfft(2,i1,i3,i2dat)*xnorm
1732            end do
1733          end do
1734        end if
1735      else
1736        ! use_symmetry == 1
1737 !$OMP PARALLEL DO PRIVATE(i1,i2,i3,j1,j2,j3,l1,l2,l3,ipwdat,i3dat) IF (ndat>1)
1738        do idat=1,ndat
1739          do ipw=1,npw
1740            l1=kg_k(1,ipw)+shiftg(1)
1741            l2=kg_k(2,ipw)+shiftg(2)
1742            l3=kg_k(3,ipw)+shiftg(3)
1743            j1=symrec(1,1)*l1+symrec(1,2)*l2+symrec(1,3)*l3
1744            j2=symrec(2,1)*l1+symrec(2,2)*l2+symrec(2,3)*l3
1745            j3=symrec(3,1)*l1+symrec(3,2)*l2+symrec(3,3)*l3
1746            if(j1<0) j1=j1+n1; i1=j1+1
1747            if(j2<0) j2=j2+n2; i2=j2+1
1748            if(j3<0) j3=j3+n3; i3=j3+1
1749            ! [i1, i2, i3] are the indices of S(g + g0) in the FFT box.
1750            ! while ipw is the index of g in kg_k
1751 
1752            ipwdat = ipw + (idat-1) * npw
1753            i3dat = i3 + (idat-1)*n6
1754 
1755            ! c(g) = cfft(S(g + shiftg))
1756            cg(1,ipwdat)=cfft(1,i1,i2,i3dat)*xnorm
1757            cg(2,ipwdat)=cfft(2,i1,i2,i3dat)*xnorm
1758          end do
1759        end do
1760      end if
1761 
1762    else if (istwf_k>=2) then
1763 
1764      npwmin=1
1765      if (istwf_k==2 .and. me_g0==1) then
1766        ! Extract cg from cfft, in a way that projects on a
1767        ! wavefunction with time-reversal symmetry
1768        do idat=1,ndat
1769          ipwdat = 1 + (idat-1) * npw
1770          i3dat = 1 + (idat-1)*n6
1771          cg(1,ipwdat)=cfft(1,1,1,i3dat)*xnorm
1772          cg(2,ipwdat)=zero
1773        end do
1774        npwmin=2
1775      end if
1776 
1777      if (use_symmetry==0) then
1778 
1779        if (iflag==-1) then
1780 !$OMP PARALLEL DO PRIVATE(i1,i1inv,i2,i2inv,i3,i3inv,ipwdat,i3dat,i3invdat) IF (ndat>1)
1781          do idat=1,ndat
1782            do ipw=npwmin,npw
1783              i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1
1784              i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1
1785              i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1
1786 
1787              ! Construct the coordinates of -k-G
1788              i1inv=i1inver(i1); i2inv=i2inver(i2); i3inv=i3inver(i3)
1789 
1790              ipwdat = ipw + (idat-1) * npw
1791              i3dat = i3 + (idat-1) * n6
1792              i3invdat = i3inv + (idat-1) * n6
1793 
1794              ! Here the time-reversal symmetry is used to project from cfft
1795              cg(1,ipwdat)=(cfft(1,i1,i2,i3dat) + cfft(1,i1inv,i2inv,i3invdat))*0.5d0*xnorm
1796              cg(2,ipwdat)=(cfft(2,i1,i2,i3dat) - cfft(2,i1inv,i2inv,i3invdat))*0.5d0*xnorm
1797            end do
1798          end do
1799 
1800        else
1801 !$OMP PARALLEL DO PRIVATE(i1,i1inv,i2,i2inv,i3,i3inv,ipwdat,i2dat,i2invdat) IF (ndat>1)
1802          do idat=1,ndat
1803            do ipw=npwmin,npw
1804              i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1
1805              i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1
1806              i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1
1807 
1808              ! Construct the coordinates of -k-G
1809              i1inv=i1inver(i1) ; i2inv=i2inver(i2) ; i3inv=i3inver(i3)
1810 
1811              ipwdat = ipw + (idat-1) * npw
1812              i2dat = i2 + (idat-1) * n6
1813              i2invdat = i2inv + (idat-1) * n6
1814 
1815              ! Here the time-reversal symmetry is used to project from cfft
1816              cg(1,ipwdat)=(cfft(1,i1,i3,i2dat) + cfft(1,i1inv,i3inv,i2invdat))*0.5d0*xnorm
1817              cg(2,ipwdat)=(cfft(2,i1,i3,i2dat) - cfft(2,i1inv,i3inv,i2invdat))*0.5d0*xnorm
1818            end do
1819          end do
1820        end if
1821 
1822      else
1823        ! Use symmetry
1824        id1=n1/2+2
1825        id2=n2/2+2
1826        id3=n3/2+2
1827 
1828 !$OMP PARALLEL DO PRIVATE(i1,i1inv,i2,i2inv,i3,i3inv,j1,j2,j3,l1,l2,l3,ipwdat,i3dat,i3invdat) IF (ndat>1)
1829        do idat=1,ndat
1830          do ipw=npwmin,npw
1831 
1832            i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1
1833            i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1
1834            i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1
1835 
1836            i1inv=i1inver(i1) ; i2inv=i2inver(i2) ; i3inv=i3inver(i3)
1837 
1838            l1=kg_k(1,ipw)+shiftg(1)
1839            l2=kg_k(2,ipw)+shiftg(2)
1840            l3=kg_k(3,ipw)+shiftg(3)
1841            j1=symrec(1,1)*l1+symrec(1,2)*l2+symrec(1,3)*l3
1842            j2=symrec(2,1)*l1+symrec(2,2)*l2+symrec(2,3)*l3
1843            j3=symrec(3,1)*l1+symrec(3,2)*l2+symrec(3,3)*l3
1844            if(j1<0)j1=j1+n1 ; i1=j1+1
1845            if(j2<0)j2=j2+n2 ; i2=j2+1
1846            if(j3<0)j3=j3+n3 ; i3=j3+1
1847 
1848            ! Construct the coordinates of -k-G
1849            l1=i1inv-(i1inv/id1)*n1-1+shiftg(1)
1850            l2=i2inv-(i2inv/id2)*n2-1+shiftg(2)
1851            l3=i3inv-(i3inv/id3)*n3-1+shiftg(3)
1852            j1=symrec(1,1)*l1+symrec(1,2)*l2+symrec(1,3)*l3
1853            j2=symrec(2,1)*l1+symrec(2,2)*l2+symrec(2,3)*l3
1854            j3=symrec(3,1)*l1+symrec(3,2)*l2+symrec(3,3)*l3
1855            if(j1<0)j1=j1+n1 ; i1inv=j1+1
1856            if(j2<0)j2=j2+n2 ; i2inv=j2+1
1857            if(j3<0)j3=j3+n3 ; i3inv=j3+1
1858 
1859            ipwdat = ipw + (idat-1) * npw
1860            i3dat = i3 + (idat-1) * n6
1861            i3invdat = i3inv + (idat-1) * n6
1862 
1863            ! Here the time-reversal symmetry is used to project from cfft
1864            cg(1,ipwdat)=(cfft(1,i1,i2,i3dat) + cfft(1,i1inv,i2inv,i3invdat))*0.5d0*xnorm
1865            cg(2,ipwdat)=(cfft(2,i1,i2,i3dat) - cfft(2,i1inv,i2inv,i3invdat))*0.5d0*xnorm
1866          end do
1867        end do
1868      end if
1869 
1870    end if
1871 
1872  else
1873    write(msg,'(a,i0,a)')'  iflag=',iflag,' not acceptable.'
1874    ABI_BUG(msg)
1875  end if
1876 
1877  DBG_EXIT("COLL")
1878 
1879 end subroutine sphere

m_fftcore/sphere_fft [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 sphere_fft

FUNCTION

 Array cg is defined in sphere with npw points. Insert cg inside box
 of n1*n2*n3 points to define array cfft for fft box.
 corresponds to given element in cg.  rest of cfft is filled with 0 s.

 iflag=1==>insert cg into cfft.
 iflag=2==>insert cg into cfft, where the second and third dimension
 have been switched (needed for new 2002 SGoedecker FFT)
 iflag=-1==> extract cg from cfft.
 iflag=-2==> extract cg from cfft, where the second and third dimension
 have been switched (needed for new 2002 SGoedecker FFT)
  (WARNING : iflag=-2 cannot use symmetry operations)

 There is also the possibility to apply a symmetry operation,
 as well as to make a shift in reciprocal space, or to multiply
 by a constant factor, in the case iflag=-1.
 Multiplication by a constant factor is also possible in the case iflag=-2.

INPUTS

 cg(2,npw)= contains values for npw G vectors in basis sphere
 ndat=number of FFT to do in //
 npw=number of G vectors in basis at this k point
 cfft(2,n4,n5,n6) = fft box
 n1,n2,n3=physical dimension of the box (cfft)
 n4,n5,n6=memory dimension of cfft
 kg_k(3,npw)=integer coordinates of G vectors in basis sphere
 mpi_enreg=information about MPI parallelization
 tab_fftwf2_local(n2)=local i2 indices in fourwf
 nd2proc TO BE DESCRIBED SB 090831
 iflag=option parameter. Possible values: -1, -2, 1, 2 ; this is used only in debug option

OUTPUT

  (see side effects)

SIDE EFFECTS

 Input/Output
 iflag=1 and 2, insert cg(input) into cfft(output)
 iflag=-1 and -2, extract cg(output) from cfft(input)

NOTES

 cg and cfft are assumed to be of type COMPLEX, although this routine treats
 them as real of twice the length to avoid nonstandard complex*16.

 WARNING
 NO CHECK is DONE over iflag.

TODO

 Order arguments

SOURCE

1939 subroutine sphere_fft(cg,ndat,npw,cfft,n1,n2,n3,n4,n5,kg_k,tab_fftwf2_local,nd2proc)
1940 
1941 !Arguments ------------------------------------
1942 !scalars
1943  integer,intent(in) :: n1,n2,n3,n4,n5,nd2proc,ndat,npw
1944  integer,intent(in) :: tab_fftwf2_local(n2)
1945 !arrays
1946  integer,intent(in) :: kg_k(3,npw)
1947  real(dp),intent(in) :: cg(2,npw*ndat)
1948  real(dp),intent(out) :: cfft(2,n4,n5,nd2proc*ndat)
1949 
1950 !Local variables-------------------------------
1951 !scalars
1952  integer :: i1,i2,i2_local,i3,idat,ipw
1953 
1954 ! *************************************************************************
1955 
1956 !Insert cg into cfft with extra 0 s around outside:
1957  cfft = zero
1958 
1959 !$OMP PARALLEL DO PRIVATE(i1,i2,i2_local,i3)
1960  do ipw=1,npw
1961    i1=kg_k(1,ipw); if(i1<0)i1=i1+n1; i1=i1+1
1962    i2=kg_k(2,ipw); if(i2<0)i2=i2+n2; i2=i2+1
1963    i3=kg_k(3,ipw); if(i3<0)i3=i3+n3; i3=i3+1
1964    i2_local = tab_fftwf2_local(i2)
1965    do idat=1,ndat
1966      cfft(1,i1,i3,i2_local + nd2proc*(idat-1))=cg(1,ipw+npw*(idat-1))
1967      cfft(2,i1,i3,i2_local + nd2proc*(idat-1))=cg(2,ipw+npw*(idat-1))
1968    end do
1969  end do
1970 
1971 end subroutine sphere_fft

m_fftcore/sphere_fft1 [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 sphere_fft1

FUNCTION

 Array cg is defined in sphere with npw points. Insert cg inside box
 of n1*n2*n3 points to define array cfft for fft box.
 corresponds to given element in cg.  rest of cfft is filled with 0 s.

 iflag=1==>insert cg into cfft.
 iflag=2==>insert cg into cfft, where the second and third dimension
 have been switched (needed for new 2002 SGoedecker FFT)
 iflag=-1==> extract cg from cfft.
 iflag=-2==> extract cg from cfft, where the second and third dimension
 have been switched (needed for new 2002 SGoedecker FFT)
  (WARNING : iflag=-2 cannot use symmetry operations)

 There is also the possibility to apply a symmetry operation,
 as well as to make a shift in reciprocal space, or to multiply
 by a constant factor, in the case iflag=-1.
 Multiplication by a constant factor is also possible in the case iflag=-2.

INPUTS

 cg(2,npw)= contains values for npw G vectors in basis sphere
 ndat=number of FFT to do in //
 npw=number of G vectors in basis at this k point
 cfft(2,n4,n5,n6) = fft box
 n1,n2,n3=physical dimension of the box (cfft)
 n4,n5,n6=memory dimension of cfft
 kg_k(3,npw)=integer coordinates of G vectors in basis sphere
 nd2proc TO BE DESCRIBED SB 090831
 iflag=option parameter. Possible values: -1, -2, 1, 2
 tab_fftwf2_local(n2)=local i2 indices in fourwf

OUTPUT

  (see side effects)

SIDE EFFECTS

 Input/Output
 iflag=1 and 2, insert cg(input) into cfft(output)
 iflag=-1 and -2, extract cg(output) from cfft(input)

NOTES

 cg and cfft are assumed to be of type COMPLEX, although this routine treats
 them as real of twice the length to avoid nonstandard complex*16.

 WARNING
 NO CHECK is DONE over iflag.

TODO

   Order arguments
   sphere_fft1 is similar to sphere_fft, the only difference being that ndat > 1 is not supported.
   Why? Should merge the two APIs.

SOURCE

2032 subroutine sphere_fft1(cg,ndat,npw,cfft,n1,n2,n3,n4,n5,n6,kg_k,tab_fftwf2_local)
2033 
2034 
2035 !Arguments ------------------------------------
2036 !scalars
2037  integer,intent(in) :: n1,n2,n3,n4,n5,n6,ndat,npw
2038 !arrays
2039  integer,intent(in) :: kg_k(3,npw)
2040  integer,intent(in) :: tab_fftwf2_local(n2)
2041  real(dp),intent(in) :: cg(2,npw*ndat)
2042  real(dp),intent(inout) :: cfft(2,n4,n5,n6*ndat)
2043 
2044 !Local variables-------------------------------
2045 !scalars
2046  integer :: i1,i2,i2_local,i3,idat,ipw
2047 
2048 ! *************************************************************************
2049 
2050 !Insert cg into cfft with extra 0 s around outside:
2051 
2052  cfft = zero
2053 !$OMP PARALLEL DO PRIVATE(i1,i2,i2_local,i3)
2054  do idat=1,ndat
2055    do ipw=1,npw
2056      i1=kg_k(1,ipw); if(i1<0)i1=i1+n1; i1=i1+1
2057      i2=kg_k(2,ipw); if(i2<0)i2=i2+n2; i2=i2+1
2058      i3=kg_k(3,ipw); if(i3<0)i3=i3+n3; i3=i3+1
2059      i2_local = tab_fftwf2_local(i2) + n6*(idat-1)
2060      cfft(1,i1,i3,i2_local)=cg(1,ipw+npw*(idat-1))
2061      cfft(2,i1,i3,i2_local)=cg(2,ipw+npw*(idat-1))
2062    end do
2063  end do
2064 
2065 end subroutine sphere_fft1

m_fftcore/sphereboundary [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 sphereboundary

FUNCTION

 Finds the boundary of the basis sphere of G vectors (at a given
 k point) for use in improved zero padding of ffts in 3 dimensions.
 Provides data to be used by subroutine fourwf, in the form of
 an array gbound(2*mgfft+8,2).

 The first component (for use when mod(fftalg,10)==2))
 provides integer values g1min,g1max,g2min,g2max
 and then for g2 in the
 sequence g2=0,1,2,...,g2max,g2min,g2min+1,...,-1, provides g1min, g1max.

 The second component (for use when mod(fftalg,10)==1))
 provides integer values g1min,g1max,g3min,g3max,
 where g3min and g3max have been corrected in case of time-reversal
 and then for g3 in the sequence
 g3=0,1,2,...,g3max,g3min,g3min+1,...,-1, provides g2min, g2max.
 (also corrected in case of time-reversal)

 These values are stored in the above order in array gbound.
 Debug mode, if fftalg is between 000 and 099

INPUTS

  istwf_k=option parameter that describes the storage of wfs
  kg_k(3,npw)=integer coordinates of G vectors in basis sphere
  mgfft=maximum size of 1D FFTs (only for dimensioning purposes)
  npw=number of G vectors in basis at this k point

OUTPUT

  gbound(2*mgfft+8,2)=defined above

SOURCE

1263 subroutine sphereboundary(gbound, istwf_k, kg_k, mgfft, npw)
1264 
1265 !Arguments ------------------------------------
1266 !scalars
1267  integer,intent(in) :: istwf_k,mgfft,npw
1268 !arrays
1269  integer,intent(in) :: kg_k(3,npw)
1270  integer,intent(out) :: gbound(2*mgfft+8,2)
1271 
1272 !Local variables-------------------------------
1273 !scalars
1274  integer :: dim_a,dim_b,fftalgc,g_a,gmax_a,gmax_b,gmax_b1,gmax_b2,gmin_a,gmin_b
1275  integer :: gmin_b1,gmin_b2,igb,ii,iloop,ipw,testm,testp,kgk
1276  character(len=500) :: msg
1277 !arrays
1278  integer :: gmax(2),gmin(2)
1279 
1280 ! *************************************************************************
1281 !
1282 !DEBUG
1283 !write(std_out,*)' sphereboundary : enter'
1284 !write(std_out, '(a)' )' sphereboundary : list of plane waves coordinates for k point '
1285 !write(std_out, '(a)' )'       ipw       kg_k(1:3,ipw) '
1286 !do ipw=1,npw
1287 !write(std_out, '(i10,a,3i6)' )ipw,'  ',kg_k(1:3,ipw)
1288 !end do
1289 !gbound=-999
1290 !ENDDEBUG
1291 
1292 !Determine cube boundaries
1293  gbound(1,1)=minval(kg_k(1,:))
1294  gbound(2,1)=maxval(kg_k(1,:))
1295  gbound(1:2,2)=gbound(1:2,1)
1296 
1297 !Treat differently the fftalgc cases
1298  do ii=1,2
1299 
1300    fftalgc=3-ii
1301 
1302    if(fftalgc/=2)then
1303      dim_a=3
1304      dim_b=2
1305    else
1306      dim_a=2
1307      dim_b=1
1308    end if
1309 
1310 !  Relevant boundaries
1311    gbound(3,ii)=minval(kg_k(dim_a,:))
1312    gbound(4,ii)=maxval(kg_k(dim_a,:))
1313    gmin_a=gbound(3,ii)
1314    gmax_a=gbound(4,ii)
1315 
1316 !  Must complete the sphere for fftalgc==1 and special storage modes.
1317 !  Explanation : sg_fftpad is not able to take into account
1318 !  the time-reversal symmetry, so that the boundaries will not be delimited
1319 !  by the kg_k set, but by their symmetric also.
1320    if(istwf_k>=2 .and. fftalgc==1)then
1321      if( istwf_k==2 .or. istwf_k==3 .or. istwf_k==6 .or. istwf_k==7 )then
1322        gbound(4,2)=max(gmax_a,-gmin_a)
1323        gbound(3,2)=-gbound(4,2)
1324      else if( istwf_k==4 .or. istwf_k==5 .or. istwf_k==8 .or. istwf_k==9 )then
1325        gbound(4,2)=max(gmax_a,-gmin_a-1)
1326        gbound(3,2)=-gbound(4,2)-1
1327      end if
1328      gmax_a=gbound(4,2) ; gmin_a=gbound(3,2)
1329    end if
1330 
1331    igb=5
1332 
1333 !  Consider first every g_a in range 0 ... gmax_a, then gmin_a ... -1
1334    gmin(1)=0         ; gmax(1)=gmax_a
1335    gmin(2)=gmin_a    ; gmax(2)=-1
1336 
1337    do iloop=1,2
1338 
1339      if( gmin(iloop) <= gmax(iloop) )then
1340 
1341        do g_a=gmin(iloop),gmax(iloop)
1342 
1343          if(istwf_k==1 .or. fftalgc/=1)then
1344            ! Select the minimal and maximal values, in the selected plane
1345            gmin_b=mgfft+1 ! Initialized with a value larger than all possible ones
1346            gmax_b=-mgfft-1 ! Initialized with a value smaller than all possible ones
1347            do ipw=1,npw
1348              if(kg_k(dim_a,ipw)==g_a)then
1349                kgk=kg_k(dim_b,ipw)
1350                if(kgk<=gmin_b)gmin_b=kgk
1351                if(kgk>=gmax_b)gmax_b=kgk
1352              end if
1353            end do
1354 
1355          else if(istwf_k>=2 .and. fftalgc==1)then
1356 
1357            ! Here, must take into account time-reversal symmetry explicitely
1358 
1359            ! Determine the boundaries for the plane g_a
1360            testp=0
1361            if(g_a<=gmax_a)then
1362              ! Select the minimal and maximal values, in the selected plane
1363              gmin_b1=mgfft+1 ! Initialized with a value larger than all possible ones
1364              gmax_b1=-mgfft-1 ! Initialized with a value smaller than all possible ones
1365              do ipw=1,npw
1366                if(kg_k(dim_a,ipw)==g_a)then
1367                  kgk=kg_k(dim_b,ipw)
1368                  if(kgk<=gmin_b1)gmin_b1=kgk
1369                  if(kgk>=gmax_b1)gmax_b1=kgk
1370                end if
1371              end do
1372 
1373 
1374              testp=1
1375            end if
1376 
1377            ! Determine the boundaries for the plane -g_a or -g_a-1
1378            testm=0
1379            if( istwf_k==2 .or. istwf_k==3 .or. istwf_k==6 .or. istwf_k==7 )then
1380 
1381              if(-g_a>=gmin_a)then
1382                ! Select the minimal and maximal values, in the selected plane
1383                ! Warning : there is an inversion of search (might be confusing)
1384                gmax_b2=mgfft+1 ! Initialized with a value larger than all possible ones
1385                gmin_b2=-mgfft-1 ! Initialized with a value smaller than all possible ones
1386                do ipw=1,npw
1387                  if(kg_k(dim_a,ipw)==-g_a)then
1388                    kgk=kg_k(dim_b,ipw)
1389                    if(kgk<=gmax_b2)gmax_b2=kgk
1390                    if(kgk>=gmin_b2)gmin_b2=kgk
1391                  end if
1392                end do
1393                testm=1
1394              end if
1395 
1396            else if( istwf_k==4 .or. istwf_k==5 .or. istwf_k==8 .or. istwf_k==9 )then
1397 
1398              if(-g_a-1>=gmin_a)then
1399                ! Select the minimal and maximal values, in the selected plane
1400                ! Warning : there is an inversion of search (might be confusing)
1401                gmax_b2=mgfft+1 ! Initialized with a value larger than all possible ones
1402                gmin_b2=-mgfft-1 ! Initialized with a value smaller than all possible ones
1403                do ipw=1,npw
1404                  if(kg_k(dim_a,ipw)==-g_a-1)then
1405                    kgk=kg_k(dim_b,ipw)
1406                    if(kgk<=gmax_b2)gmax_b2=kgk
1407                    if(kgk>=gmin_b2)gmin_b2=kgk
1408                  end if
1409                end do
1410                testm=1
1411              end if
1412 
1413            end if
1414 
1415            !  Must invert the boundaries, to use them for plane g_a
1416            if(testm==1)then
1417              ! This is needed to avoid border effect
1418              ! if the search did not lead to any element
1419              gmin_b2=max(gmin_b2,-mgfft) ; gmax_b2=min(gmax_b2,mgfft)
1420              if(istwf_k<=5)then
1421                gmax_b2=-gmax_b2 ; gmin_b2=-gmin_b2
1422              else
1423                gmax_b2=-gmax_b2-1 ; gmin_b2=-gmin_b2-1
1424              end if
1425            end if
1426 
1427            if( testp==1 .and. testm==1)then
1428              gmin_b=min(gmin_b1,gmin_b2) ; gmax_b=max(gmax_b1,gmax_b2)
1429            else if( testp==1 )then
1430              gmin_b=gmin_b1 ; gmax_b=gmax_b1
1431            else if( testm==1 )then
1432              gmin_b=gmin_b2 ; gmax_b=gmax_b2
1433            end if
1434 
1435          end if ! Endif take into account time-reversal symmetry
1436 
1437          if (igb+1>2*mgfft+4) then
1438            write(msg, '(2a, 4(a,3(i0,1x),a))' )&
1439              "About to overwrite gbound array (FFT mesh too small) ",ch10, &
1440              "   iloop, igb, mgb = ",iloop,igb,2*mgfft+4, ch10, &
1441              "   istwfk, mgfft, npw = ",istwf_k, mgfft, npw, ch10, &
1442              "   minval(kg_k) = ",minval(kg_k, dim=2), ch10, &
1443              "   maxval(kg_k) = ",maxval(kg_k, dim=2), ch10
1444            ABI_BUG(msg)
1445          end if
1446 
1447          gbound(igb,ii)=gmin_b
1448          gbound(igb+1,ii)=gmax_b
1449 
1450          if( iloop==1 .and. istwf_k>=2 .and. istwf_k<=5 .and. fftalgc==2 .and. g_a==0)then
1451 !          If k_y=0 , for fftalgc==2, the g_a==0 plane must be completed
1452            if(istwf_k==2 .or. istwf_k==4)then
1453              gbound(igb+1,ii)=max(gmax_b,-gmin_b)
1454              gbound(igb,ii)=-gbound(igb+1,ii)
1455            else if(istwf_k==3 .or. istwf_k==5)then
1456              gbound(igb+1,ii)=max(gmax_b,-gmin_b-1)
1457              gbound(igb,ii)=-gbound(igb+1,ii)-1
1458            end if
1459 
1460          end if
1461 
1462          igb=igb+2
1463 
1464        end do ! g_a
1465      end if
1466    end do  ! iloop
1467  end do ! ii (fftalgc)
1468 
1469 !DEBUG
1470 !write(std_out,'(a)')' sphereoundary : list of plane waves coordinates for 1st k point '
1471 !write(std_out,'(a)')'       ipw       kg_k(1:3,ipw) '
1472 !do ipw=1,npw
1473 !write(std_out, '(i10,a,3i6)' )ipw,'  ',kg_k(1:3,ipw)
1474 !end do
1475 !write(std_out, '(a)' )' sphereboundary : list of boundaries '
1476 !do igb=1,2*mgfft+8
1477 !write(std_out, '(i10,a,2i6)' )igb,'  ',gbound(igb,1),gbound(igb,2)
1478 !end do
1479 !write(std_out,*)' sphereboundary : exit '
1480 !ENDDEBUG
1481 
1482 end subroutine sphereboundary

m_fftcore/switch [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  switch

FUNCTION

INPUTS

OUTPUT

SOURCE

2146 pure subroutine switch(n1dfft,n2,lot,n1,lzt,zt,zw)
2147 
2148 !Arguments ------------------------------------
2149  integer,intent(in) :: n1dfft,n2,lot,n1,lzt
2150  real(dp),intent(in) :: zt(2,lzt,n1)
2151  real(dp),intent(inout) :: zw(2,lot,n2)
2152 
2153 !Local variables-------------------------------
2154  integer :: i,j
2155 ! *************************************************************************
2156 
2157  do j=1,n1dfft
2158    do i=1,n2
2159      zw(1,j,i)=zt(1,i,j)
2160      zw(2,j,i)=zt(2,i,j)
2161    end do
2162  end do
2163 
2164 end subroutine switch

m_fftcore/switch_cent [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 switch_cent

FUNCTION

   Perform the rotation:

     input: I2,i1,j3,(jp3)
     output: i1,I2,j3,(jp3)

   and padd the signal with zeros.

INPUTS

  n1dfft=Number of 1D FFTs to perform
  max2=Max G_y in the small box enclosing the G-sphere.
  m2=Size of the small box enclosing the G-sphere along y
  n2=Dimension of the transform along y
  lot=Cache blocking factor.
  n1=Dimension of the transform along x
  lzt=Second dimension of z
  zt(2,lzt,n1)

OUTPUT

  zw(2,lot,n2)=Cache working array

SOURCE

2196 pure subroutine switch_cent(n1dfft,max2,m2,n2,lot,n1,lzt,zt,zw)
2197 
2198 !Arguments ------------------------------------
2199  integer,intent(in) :: n1dfft,max2,m2,n2,lot,n1,lzt
2200  real(dp),intent(in) :: zt(2,lzt,n1)
2201  real(dp),intent(inout) :: zw(2,lot,n2)
2202 
2203 !Local variables-------------------------------
2204  integer :: i,j
2205 
2206 ! *************************************************************************
2207 
2208  ! Here, zero and positive frequencies
2209  do j=1,n1dfft
2210    do i=1,max2+1
2211      zw(1,j,i)=zt(1,i,j)
2212      zw(2,j,i)=zt(2,i,j)
2213    end do
2214  end do
2215 
2216  ! Fill the center region with zeros
2217  do i=max2+2,n2-m2+max2+1
2218    do j=1,n1dfft
2219      zw(1,j,i)=zero
2220      zw(2,j,i)=zero
2221    end do
2222  end do
2223 
2224  ! Here, negative frequencies
2225  if (m2>=max2+2) then
2226    do j=1,n1dfft
2227      do i=max2+2,m2
2228        zw(1,j,i+n2-m2)=zt(1,i,j)
2229        zw(2,j,i+n2-m2)=zt(2,i,j)
2230      end do
2231    end do
2232  end if
2233 
2234 end subroutine switch_cent

m_fftcore/switchreal [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  switchreal

FUNCTION

   Perform the rotation:

     input: I2,i1,j3,(jp3)
     output: i1,I2,j3,(jp3)

   and padd the signal with zeros.
   Used for real wavefunctions.

INPUTS

  includelast
  n1dfft=Number of 1D FFTs to perform
  n2=Dimension of the transform along y
  n2eff
  lot=Cache blocking factor.
  n1zt
  lzt
  zt(2,lzt,n1zt)

OUTPUT

  zw(2,lot,n2)

SOURCE

2267 pure subroutine switchreal(includelast,n1dfft,n2,n2eff,lot,n1zt,lzt,zt,zw)
2268 
2269 !Arguments ------------------------------------
2270  integer,intent(in) :: includelast,n1dfft,n2,n2eff,lot,n1zt,lzt
2271  real(dp),intent(in) :: zt(2,lzt,n1zt)
2272  real(dp),intent(inout) :: zw(2,lot,n2)
2273 
2274 !Local variables-------------------------------
2275  integer :: i,j
2276 ! *************************************************************************
2277 
2278  if (includelast==1) then
2279 
2280    ! Compute symmetric and antisymmetric combinations
2281    do j=1,n1dfft
2282      zw(1,j,1)=zt(1,1,2*j-1)
2283      zw(2,j,1)=zt(1,1,2*j  )
2284    end do
2285    do i=2,n2eff
2286      do j=1,n1dfft
2287        zw(1,j,i)=      zt(1,i,2*j-1)-zt(2,i,2*j)
2288        zw(2,j,i)=      zt(2,i,2*j-1)+zt(1,i,2*j)
2289        zw(1,j,n2+2-i)= zt(1,i,2*j-1)+zt(2,i,2*j)
2290        zw(2,j,n2+2-i)=-zt(2,i,2*j-1)+zt(1,i,2*j)
2291      end do
2292    end do
2293 
2294  else
2295 
2296    ! An odd number of FFTs
2297    ! Compute symmetric and antisymmetric combinations
2298    do j=1,n1dfft-1
2299      zw(1,j,1)=zt(1,1,2*j-1)
2300      zw(2,j,1)=zt(1,1,2*j  )
2301    end do
2302    zw(1,n1dfft,1)=zt(1,1,2*n1dfft-1)
2303    zw(2,n1dfft,1)=zero
2304 
2305    do i=2,n2eff
2306      do j=1,n1dfft-1
2307        zw(1,j,i)=      zt(1,i,2*j-1)-zt(2,i,2*j)
2308        zw(2,j,i)=      zt(2,i,2*j-1)+zt(1,i,2*j)
2309        zw(1,j,n2+2-i)= zt(1,i,2*j-1)+zt(2,i,2*j)
2310        zw(2,j,n2+2-i)=-zt(2,i,2*j-1)+zt(1,i,2*j)
2311      end do
2312      zw(1,n1dfft,i)=      zt(1,i,2*n1dfft-1)
2313      zw(2,n1dfft,i)=      zt(2,i,2*n1dfft-1)
2314      zw(1,n1dfft,n2+2-i)= zt(1,i,2*n1dfft-1)
2315      zw(2,n1dfft,n2+2-i)=-zt(2,i,2*n1dfft-1)
2316    end do
2317  end if
2318 
2319 end subroutine switchreal

m_fftcore/switchreal_cent [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  switchreal_cent

FUNCTION

   Perform the rotation:

     input: I2,i1,j3,(jp3)
     output: i1,I2,j3,(jp3)

   and padd the signal with zeros.
   Used for the Fourier transform of real wavefunctions.

INPUTS

  includelast
  n1dfft=Number of 1D FFTs to perform
  max2
  n2=Dimension of the transform along y
  lot=Cache blocking factor.
  n1zt, lzt=Dimensions of zt.
  zt(2,lzt,n1zt)

OUTPUT

  zw(2,lot,n2)

SOURCE

2351 pure subroutine switchreal_cent(includelast,n1dfft,max2,n2,lot,n1zt,lzt,zt,zw)
2352 
2353 !Arguments ------------------------------------
2354  integer,intent(in) :: includelast,n1dfft,max2,n2,lot,n1zt,lzt
2355  real(dp),intent(in) :: zt(2,lzt,n1zt)
2356  real(dp),intent(inout) :: zw(2,lot,n2)
2357 
2358 !Local variables-------------------------------
2359  integer :: i,j
2360 ! *************************************************************************
2361 
2362  if (includelast==1) then
2363 
2364    ! Compute symmetric and antisymmetric combinations
2365    do j=1,n1dfft
2366     zw(1,j,1)=zt(1,1,2*j-1)
2367     zw(2,j,1)=zt(1,1,2*j  )
2368    end do
2369 
2370    do i=2,max2+1
2371     do j=1,n1dfft
2372      zw(1,j,i)=      zt(1,i,2*j-1)-zt(2,i,2*j)
2373      zw(2,j,i)=      zt(2,i,2*j-1)+zt(1,i,2*j)
2374      zw(1,j,n2+2-i)= zt(1,i,2*j-1)+zt(2,i,2*j)
2375      zw(2,j,n2+2-i)=-zt(2,i,2*j-1)+zt(1,i,2*j)
2376     end do
2377    end do
2378 
2379    if(max2+1<n2-max2)then
2380     do i=max2+2,n2-max2
2381      do j=1,n1dfft
2382       zw(1,j,i)=zero
2383       zw(2,j,i)=zero
2384      end do
2385     end do
2386    end if
2387 
2388  else
2389    ! Compute symmetric and antisymmetric combinations
2390    do j=1,n1dfft-1
2391      zw(1,j,1)=zt(1,1,2*j-1)
2392      zw(2,j,1)=zt(1,1,2*j  )
2393    end do
2394 
2395    zw(1,n1dfft,1)=zt(1,1,2*n1dfft-1)
2396    zw(2,n1dfft,1)=zero
2397    do i=2,max2+1
2398      do j=1,n1dfft-1
2399        zw(1,j,i)=      zt(1,i,2*j-1)-zt(2,i,2*j)
2400        zw(2,j,i)=      zt(2,i,2*j-1)+zt(1,i,2*j)
2401        zw(1,j,n2+2-i)= zt(1,i,2*j-1)+zt(2,i,2*j)
2402        zw(2,j,n2+2-i)=-zt(2,i,2*j-1)+zt(1,i,2*j)
2403      end do
2404      zw(1,n1dfft,i)=      zt(1,i,2*n1dfft-1)
2405      zw(2,n1dfft,i)=      zt(2,i,2*n1dfft-1)
2406      zw(1,n1dfft,n2+2-i)= zt(1,i,2*n1dfft-1)
2407      zw(2,n1dfft,n2+2-i)=-zt(2,i,2*n1dfft-1)
2408    end do
2409 
2410    if(max2+1<n2-max2)then
2411      do i=max2+2,n2-max2
2412        do j=1,n1dfft
2413         zw(1,j,i)=zero
2414         zw(2,j,i)=zero
2415        end do
2416      end do
2417    end if
2418  end if
2419 
2420 end subroutine switchreal_cent

m_fftcore/unfill [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  unfill

FUNCTION

  Move data from the cache work array to zf

INPUTS

  nd1,nd3=Dimensions of the input array zf.
  lot=Cache blocking factor.
  n1dfft=Number of 1D FFTs to perform
  n3=Dimension of the transform along z
  zw(2,lot,n3)=Cache work array with the z-lines.

OUTPUT

  zf(2,nd1,nd3)= zf(:,1:n1dfft,:1:n3) is filled with the results stored in zw

SOURCE

2602 pure subroutine unfill(nd1,nd3,lot,n1dfft,n3,zw,zf)
2603 
2604 !Arguments ------------------------------------
2605  integer,intent(in) :: nd1,nd3,lot,n1dfft,n3
2606  real(dp),intent(in) :: zw(2,lot,n3)
2607  real(dp),intent(inout) :: zf(2,nd1,nd3)
2608 
2609 !Local variables-------------------------------
2610  integer :: i1,i3
2611 ! *************************************************************************
2612 
2613  do i3=1,n3
2614    do i1=1,n1dfft
2615      zf(1,i1,i3)=zw(1,i1,i3)
2616      zf(2,i1,i3)=zw(2,i1,i3)
2617    end do
2618  end do
2619 
2620 end subroutine unfill

m_fftcore/unfill_cent [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  unfill_cent

FUNCTION

  Transfer data from the cache work array to zf.
  Takes into account zero padding (only the non-zero entries are moved)

INPUTS

  md1,md3=Leading dimension of zf along x and z
  lot=Cache blocking factor.
  n1dfft=Number of 1d transforms performed along z.
  max3=Max index of G_z the small box enclosing the G-sphere.
  m3=Number of points in the *small* box enclosing the G-sphere
  n3=Dimension of the FFT transform along z
  zw(2,lot,n3)=Cache work array

OUTPUT

  zf(2,md1,md3)= zf(:,1:n1dfft,1:m3) is filled with the non-zero components.

SOURCE

2647 pure subroutine unfill_cent(md1,md3,lot,n1dfft,max3,m3,n3,zw,zf)
2648 
2649 !Arguments ------------------------------------
2650  integer,intent(in) :: md1,md3,lot,n1dfft,max3,m3,n3
2651  real(dp),intent(in) :: zw(2,lot,n3)
2652  real(dp),intent(inout) :: zf(2,md1,md3)
2653 
2654 !Local variables-------------------------------
2655  integer :: i1,i3
2656 ! *************************************************************************
2657 
2658  ! Here, zero and positive frequencies
2659  do i3=1,max3+1
2660    do i1=1,n1dfft
2661      zf(1,i1,i3)=zw(1,i1,i3)
2662      zf(2,i1,i3)=zw(2,i1,i3)
2663    end do
2664  end do
2665 
2666  ! Here, negative frequencies
2667  do i3=max3+2,m3
2668    do i1=1,n1dfft
2669      zf(1,i1,i3)=zw(1,i1,i3+n3-m3)
2670      zf(2,i1,i3)=zw(2,i1,i3+n3-m3)
2671    end do
2672  end do
2673 
2674 end subroutine unfill_cent

m_fftcore/unmpiswitch [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  unmpiswitch

FUNCTION

INPUTS

OUTPUT

SOURCE

2691 pure subroutine unmpiswitch(j3,n1dfft,Jp2st,J2st,lot,n1,nd2proc,nd3proc,nproc,ioption,zw,zmpi1)
2692 
2693 !Arguments ------------------------------------
2694  integer,intent(in) :: j3,n1dfft,lot,n1,nd2proc,nd3proc,nproc,ioption
2695  integer,intent(inout) :: Jp2st,J2st
2696  real(dp),intent(in) :: zw(2,lot,n1)
2697  real(dp),intent(inout) :: zmpi1(2,n1,nd2proc,nd3proc,nproc)
2698 
2699 !Local variables-------------------------------
2700  integer :: i1,jp2,j2,ind,jjp2,mfft,jj2
2701 
2702 ! *************************************************************************
2703 
2704  mfft=0
2705  if (ioption == 2) then
2706    do Jp2=Jp2st,nproc
2707      do J2=J2st,nd2proc
2708        mfft=mfft+1
2709        if (mfft.gt.n1dfft) then
2710          Jp2st=Jp2
2711          J2st=J2
2712          return
2713        end if
2714        do I1=1,n1
2715          zmpi1(1,I1,J2,j3,Jp2)=zw(1,mfft,I1)
2716          zmpi1(2,I1,J2,j3,Jp2)=zw(2,mfft,I1)
2717        end do
2718      end do
2719      J2st=1
2720    end do
2721 
2722  else
2723    do Jp2=Jp2st,nproc
2724      do J2=J2st,nd2proc
2725        mfft=mfft+1
2726        if (mfft.gt.n1dfft) then
2727          Jp2st=Jp2
2728          J2st=J2
2729          return
2730        end if
2731        ind=(Jp2-1) * nd2proc + J2
2732        jj2=(ind-1)/nproc +1
2733 
2734        !jjp2=modulo(ind,nproc) +1
2735        jjp2=modulo(ind-1,nproc)+1
2736 
2737        do I1=1,n1
2738          zmpi1(1,I1,jj2,j3,jjp2)=zw(1,mfft,I1)
2739          zmpi1(2,I1,jj2,j3,jjp2)=zw(2,mfft,I1)
2740        end do
2741      end do
2742      J2st=1
2743    end do
2744  end if
2745 
2746 end subroutine unmpiswitch

m_fftcore/unmpiswitch_cent [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  unmpiswitch_cent

FUNCTION

INPUTS

OUTPUT

SOURCE

2763 pure subroutine unmpiswitch_cent(j3,n1dfft,Jp2stf,J2stf,lot,max1,md1,m1,n1,md2proc,nd3proc,nproc,ioption,zw,zmpi1)
2764 
2765 !Arguments ------------------------------------
2766  integer,intent(in) :: j3,n1dfft,lot,max1,md1,m1,n1,md2proc,nd3proc,nproc,ioption
2767  integer,intent(inout) :: Jp2stf,J2stf
2768  real(dp),intent(inout) :: zmpi1(2,md1,md2proc,nd3proc,nproc)
2769  real(dp),intent(in) :: zw(2,lot,n1)
2770 
2771 !Local variables-------------------------------
2772  integer :: mfft,Jp2,J2,I1,ind,jj2,jjp2
2773 
2774 ! *************************************************************************
2775 
2776  mfft=0
2777 
2778  if (ioption == 2) then
2779    do Jp2=Jp2stf,nproc
2780      do J2=J2stf,md2proc
2781        mfft=mfft+1
2782 
2783        if (mfft.gt.n1dfft) then
2784          Jp2stf=Jp2
2785          J2stf=J2
2786          return
2787        end if
2788 
2789        ! Here, zero and positive frequencies
2790        do I1=1,max1+1
2791          zmpi1(1,I1,J2,j3,Jp2)=zw(1,mfft,I1)
2792          zmpi1(2,I1,J2,j3,Jp2)=zw(2,mfft,I1)
2793        end do
2794 
2795        ! Here, negative frequencies
2796        do I1=max1+2,m1
2797          zmpi1(1,I1,J2,j3,Jp2)=zw(1,mfft,I1+n1-m1)
2798          zmpi1(2,I1,J2,j3,Jp2)=zw(2,mfft,I1+n1-m1)
2799        end do
2800 
2801      end do
2802      J2stf=1
2803    end do
2804 
2805  else
2806    do Jp2=Jp2stf,nproc
2807      do J2=J2stf,md2proc
2808        mfft=mfft+1
2809        if (mfft.gt.n1dfft) then
2810          Jp2stf=Jp2
2811          J2stf=J2
2812          return
2813        end if
2814        ind=(Jp2-1) * md2proc + J2
2815        jj2=(ind-1)/nproc +1
2816 
2817        !jjp2=modulo(ind,nproc) +1
2818        jjp2=modulo(ind-1,nproc)+1
2819 
2820        ! Here, zero and positive frequencies
2821        do I1=1,max1+1
2822          zmpi1(1,I1,Jj2,j3,Jjp2)=zw(1,mfft,I1)
2823          zmpi1(2,I1,Jj2,j3,Jjp2)=zw(2,mfft,I1)
2824        end do
2825 
2826        ! Here, negative frequencies
2827        do I1=max1+2,m1
2828          zmpi1(1,I1,Jj2,j3,Jjp2)=zw(1,mfft,I1+n1-m1)
2829          zmpi1(2,I1,Jj2,j3,Jjp2)=zw(2,mfft,I1+n1-m1)
2830        end do
2831      end do
2832      J2stf=1
2833    end do
2834  end if
2835 
2836 end subroutine unmpiswitch_cent

m_fftcore/unscramble [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  unscramble

FUNCTION

INPUTS

  i1
  j2
  lot=Cache blocking factor.
  n1dfft=Number of 1D FFTs to perform
  md1,n3,md2proc,nnd3
  zmpi2(2,md1,md2proc,nnd3)

OUTPUT

  zw(2,lot,n3)= cache work array

SOURCE

2860 pure subroutine unscramble(i1,j2,lot,n1dfft,md1,n3,md2proc,nnd3,zmpi2,zw)
2861 
2862 !Arguments ------------------------------------
2863  integer,intent(in) :: i1,j2,lot,n1dfft,md1,n3,md2proc,nnd3
2864  real(dp),intent(in) :: zmpi2(2,md1,md2proc,nnd3)
2865  real(dp),intent(inout) :: zw(2,lot,n3)
2866 
2867 !Local variables-------------------------------
2868 !scalars
2869  integer :: i,i3
2870 
2871 ! *************************************************************************
2872 
2873  do i3=1,n3
2874    do i=0,n1dfft-1
2875      zw(1,i+1,i3)=zmpi2(1,i1+i,j2,i3)
2876      zw(2,i+1,i3)=zmpi2(2,i1+i,j2,i3)
2877    end do
2878  end do
2879 
2880 end subroutine unscramble

m_fftcore/unswitch [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  unswitch

FUNCTION

INPUTS

  n1dfft=Number of 1D FFTs
  n2=Dimension of the transform along y
  lot=Cache blocking factor.
  n1=Dimension of the transform along x.
  lzt
  zw(2,lot,n2)=Cache work array

OUTPUT

  zt(2,lzt,n1)

SOURCE

2904 pure subroutine unswitch(n1dfft,n2,lot,n1,lzt,zw,zt)
2905 
2906 !Arguments ------------------------------------
2907  integer,intent(in) :: n1dfft,n2,lot,n1,lzt
2908  real(dp),intent(in) :: zw(2,lot,n2)
2909  real(dp),intent(inout) :: zt(2,lzt,n1)
2910 
2911 !Local variables-------------------------------
2912  integer :: i,j
2913 ! *************************************************************************
2914 
2915  do j=1,n1dfft
2916    do i=1,n2
2917      zt(1,i,j)=zw(1,j,i)
2918      zt(2,i,j)=zw(2,j,i)
2919    end do
2920  end do
2921 
2922 end subroutine unswitch

m_fftcore/unswitch_cent [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  unswitch_cent

FUNCTION

INPUTS

  n1dfft=Number of 1D FFTs to perform
  max2=Max G_y in the small box enclosing the G-sphere.
  m2=Size of the small box enclosing the G-sphere along y
  n2=Dimension of the transform along y
  lot=Cache blocking factor.
  n1=Dimension of the transform along x
  lzt
  zw(2,lot,n2)=Cache working array

OUTPUT

  zt(2,lzt,n1)

SOURCE

2948 pure subroutine unswitch_cent(n1dfft,max2,m2,n2,lot,n1,lzt,zw,zt)
2949 
2950 !Arguments ------------------------------------
2951  integer,intent(in) :: n1dfft,max2,m2,n2,lot,n1,lzt
2952  real(dp),intent(in) :: zw(2,lot,n2)
2953  real(dp),intent(inout) :: zt(2,lzt,n1)
2954 
2955 !Local variables-------------------------------
2956  integer :: i,j
2957 ! *************************************************************************
2958 
2959 ! Here, zero and positive frequencies
2960  do j=1,n1dfft
2961    do i=1,max2+1
2962      zt(1,i,j)=zw(1,j,i)
2963      zt(2,i,j)=zw(2,j,i)
2964    end do
2965  end do
2966 
2967 ! Here, negative frequencies
2968  if(m2>=max2+2)then
2969    do j=1,n1dfft
2970      do i=max2+2,m2
2971        zt(1,i,j)=zw(1,j,i+n2-m2)
2972        zt(2,i,j)=zw(2,j,i+n2-m2)
2973      end do
2974    end do
2975  end if
2976 
2977 end subroutine unswitch_cent

m_fftcore/unswitchreal [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

  unswitchreal

FUNCTION

INPUTS

  n1dfft=Number of 1D FFTs to perform
  n2=Dimension of the transform along y
  n2eff=
  lot=Cache blocking factor.
  n1zt
  lzt
  zw(2,lot,n2)=Cache working array

OUTPUT

  zt(2,lzt,n1)

SOURCE

3002 pure subroutine unswitchreal(n1dfft,n2,n2eff,lot,n1zt,lzt,zw,zt)
3003 
3004 !Arguments ------------------------------------
3005  integer,intent(in) :: n1dfft,n2,n2eff,lot,n1zt,lzt
3006  real(dp),intent(in) :: zw(2,lot,n2)
3007  real(dp),intent(inout) :: zt(2,lzt,n1zt)
3008 
3009 !Local variables-------------------------------
3010  integer :: i,j
3011 ! *************************************************************************
3012 
3013 ! Decompose symmetric and antisymmetric parts
3014  do j=1,n1dfft
3015    zt(1,1,2*j-1)=zw(1,j,1)
3016    zt(2,1,2*j-1)=zero
3017    zt(1,1,2*j)  =zw(2,j,1)
3018    zt(2,1,2*j)  =zero
3019  end do
3020 
3021  do i=2,n2eff
3022    do j=1,n1dfft
3023      zt(1,i,2*j-1)= (zw(1,j,i)+zw(1,j,n2+2-i))*half
3024      zt(2,i,2*j-1)= (zw(2,j,i)-zw(2,j,n2+2-i))*half
3025      zt(1,i,2*j)  = (zw(2,j,i)+zw(2,j,n2+2-i))*half
3026      zt(2,i,2*j)  =-(zw(1,j,i)-zw(1,j,n2+2-i))*half
3027    end do
3028  end do
3029 
3030 end subroutine unswitchreal

m_fftcore/unswitchreal_cent [ Functions ]

[ Top ] [ m_fftcore ] [ Functions ]

NAME

 unswitchreal_cent

FUNCTION

INPUTS

  n1dfft=Number of 1D FFTs to perform
  max2=Max G_y in the small box enclosing the G-sphere.
  n2=Dimension of the transform along y
  lot=Cache blocking factor.
  n1zt
  lzt
  zw(2,lot,n2)=Cache working array

OUTPUT

  zt(2,lzt,n1)

SOURCE

3055 pure subroutine unswitchreal_cent(n1dfft,max2,n2,lot,n1zt,lzt,zw,zt)
3056 
3057 !Arguments ------------------------------------
3058  integer,intent(in) :: n1dfft,max2,n2,lot,n1zt,lzt
3059  real(dp),intent(in) :: zw(2,lot,n2)
3060  real(dp),intent(inout) :: zt(2,lzt,n1zt)
3061 
3062 !Local variables-------------------------------
3063  integer :: i,j
3064 ! *************************************************************************
3065 
3066  do j=1,n1dfft
3067    zt(1,1,2*j-1)=zw(1,j,1)
3068    zt(2,1,2*j-1)=zero
3069    zt(1,1,2*j)  =zw(2,j,1)
3070    zt(2,1,2*j)  =zero
3071  end do
3072 
3073  do i=2,max2+1
3074    do j=1,n1dfft
3075      zt(1,i,2*j-1)= (zw(1,j,i)+zw(1,j,n2+2-i))*half
3076      zt(2,i,2*j-1)= (zw(2,j,i)-zw(2,j,n2+2-i))*half
3077      zt(1,i,2*j)  = (zw(2,j,i)+zw(2,j,n2+2-i))*half
3078      zt(2,i,2*j)  =-(zw(1,j,i)-zw(1,j,n2+2-i))*half
3079    end do
3080  end do
3081 
3082 !       Here, zero and positive frequencies
3083 !        do 90,j=1,n1dfft
3084 !        do 90,i=1,max2+1
3085 !        zt(1,i,j)=zw(1,j,i)
3086 !        zt(2,i,j)=zw(2,j,i)
3087 !90      continue
3088 
3089 !       Here, negative frequencies
3090 !        if(m2>=max2+2)then
3091 !         do 110,j=1,n1dfft
3092 !         do 110,i=max2+2,m2
3093 !         zt(1,i,j)=zw(1,j,i+n2-m2)
3094 !         zt(2,i,j)=zw(2,j,i+n2-m2)
3095 !110      continue
3096 !        end if
3097 
3098 end subroutine unswitchreal_cent

TABLE OF CONTENTS

ABINIT/m_fftcore [ Modules ]

m_fftcore/addrho [ Functions ]

m_fftcore/bound [ Functions ]

m_fftcore/change_istwfk [ Functions ]

m_fftcore/fftalg_for_npfft [ Functions ]

m_fftcore/fftalg_has_mpi [ Functions ]

m_fftcore/fftalg_info [ Functions ]

m_fftcore/fftalg_isavailable [ Functions ]

m_fftcore/fftcore_set_mixprec [ Functions ]

m_fftcore/fill [ Functions ]

m_fftcore/fill_cent [ Functions ]

m_fftcore/get_cache_kb [ Functions ]

m_fftcore/get_kg [ Functions ]

m_fftcore/getng [ Functions ]

m_fftcore/indfftrisc [ Functions ]

m_fftcore/kgindex [ Functions ]

m_fftcore/kpgcount [ Functions ]

m_fftcore/kpgsph [ Functions ]

m_fftcore/mpifft_collect_datar [ Functions ]

m_fftcore/mpifft_dbox2fg [ Functions ]

m_fftcore/mpifft_dbox2fg_dpc [ Functions ]

m_fftcore/mpifft_dbox2fr [ Functions ]

m_fftcore/mpifft_dbox2fr_dpc [ Functions ]

m_fftcore/mpifft_fg2dbox [ Functions ]

m_fftcore/mpifft_fg2dbox_dpc [ Functions ]

m_fftcore/mpifft_fr2dbox [ Functions ]

m_fftcore/mpifft_fr2dbox_dpc [ Functions ]

m_fftcore/mpiswitch [ Functions ]

m_fftcore/mpiswitch_cent [ Functions ]

m_fftcore/multpot [ Functions ]

m_fftcore/ngfft_seq [ Functions ]

m_fftcore/print_ngfft [ Functions ]

m_fftcore/scramble [ Functions ]

m_fftcore/sphere [ Functions ]

m_fftcore/sphere_fft [ Functions ]

m_fftcore/sphere_fft1 [ Functions ]

m_fftcore/sphereboundary [ Functions ]

m_fftcore/switch [ Functions ]

m_fftcore/switch_cent [ Functions ]

m_fftcore/switchreal [ Functions ]

m_fftcore/switchreal_cent [ Functions ]

m_fftcore/unfill [ Functions ]

m_fftcore/unfill_cent [ Functions ]

m_fftcore/unmpiswitch [ Functions ]

m_fftcore/unmpiswitch_cent [ Functions ]

m_fftcore/unscramble [ Functions ]

m_fftcore/unswitch [ Functions ]

m_fftcore/unswitch_cent [ Functions ]

m_fftcore/unswitchreal [ Functions ]

m_fftcore/unswitchreal_cent [ Functions ]