TABLE OF CONTENTS
- ABINIT/m_fftcore
- m_fftcore/addrho
- m_fftcore/bound
- m_fftcore/change_istwfk
- m_fftcore/fftalg_for_npfft
- m_fftcore/fftalg_has_mpi
- m_fftcore/fftalg_info
- m_fftcore/fftalg_isavailable
- m_fftcore/fftcore_set_mixprec
- m_fftcore/fill
- m_fftcore/fill_cent
- m_fftcore/get_cache_kb
- m_fftcore/get_kg
- m_fftcore/getng
- m_fftcore/indfftrisc
- m_fftcore/kgindex
- m_fftcore/kpgcount
- m_fftcore/kpgsph
- m_fftcore/mpifft_collect_datar
- m_fftcore/mpifft_dbox2fg
- m_fftcore/mpifft_dbox2fg_dpc
- m_fftcore/mpifft_dbox2fr
- m_fftcore/mpifft_dbox2fr_dpc
- m_fftcore/mpifft_fg2dbox
- m_fftcore/mpifft_fg2dbox_dpc
- m_fftcore/mpifft_fr2dbox
- m_fftcore/mpifft_fr2dbox_dpc
- m_fftcore/mpiswitch
- m_fftcore/mpiswitch_cent
- m_fftcore/multpot
- m_fftcore/ngfft_seq
- m_fftcore/print_ngfft
- m_fftcore/scramble
- m_fftcore/sphere
- m_fftcore/sphere_fft
- m_fftcore/sphere_fft1
- m_fftcore/sphereboundary
- m_fftcore/switch
- m_fftcore/switch_cent
- m_fftcore/switchreal
- m_fftcore/switchreal_cent
- m_fftcore/unfill
- m_fftcore/unfill_cent
- m_fftcore/unmpiswitch
- m_fftcore/unmpiswitch_cent
- m_fftcore/unscramble
- m_fftcore/unswitch
- m_fftcore/unswitch_cent
- m_fftcore/unswitchreal
- m_fftcore/unswitchreal_cent
ABINIT/m_fftcore [ Modules ]
NAME
m_fftcore
FUNCTION
Low-level tools for FFT (sequential and MPI parallel version) It also provides helper functions to set up the list of G vectors inside a sphere or to count them.
COPYRIGHT
Copyright (C) 2014-2024 ABINIT group (SG, XG, AR, MG, MT) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
TODO
1) Pass distribfft instead of MPI_enreg to simplify the API and facilitate code-reuse. 2) Merge this module with m_distribfft 3) Get rid of paral_kgb and MPI_type! This is a low-level module that may be called by other code in which paral_kgb is meaningless! FFT tables and a MPI communicator are sufficient.
SOURCE
26 #if defined HAVE_CONFIG_H 27 #include "config.h" 28 #endif 29 30 #include "abi_common.h" 31 32 module m_fftcore 33 34 use defs_basis 35 use m_abicore 36 use m_errors 37 use m_xmpi 38 use m_sort 39 40 use m_time, only : timab 41 use m_fstrings, only : itoa, sjoin 42 use m_geometry, only : normv 43 use defs_abitypes, only : MPI_type 44 use m_mpinfo, only : destroy_mpi_enreg, initmpi_seq 45 46 implicit none 47 48 private 49 50 public :: fftalg_isavailable ! True if the FFT library specified by fftalg is available. 51 public :: fftalg_has_mpi ! True if fftalg provides MPI-FFTs. 52 public :: fftalg_for_npfft ! Returns the default value for fftalg given the number of processors for the FFT. 53 public :: fftalg_info ! Returns strings with info on the FFT library specified by fftalg. 54 public :: get_cache_kb ! Returns the cache size in Kbs (based on CPP variables). 55 public :: ngfft_seq ! initialize ngfft(18) from the FFT divisions (assume sequential FFT) 56 public :: print_ngfft ! Print the content of ngfft(18) in explicative format. 57 public :: bound ! Find distance**2 to boundary point of fft box nearest to kpt 58 public :: getng ! From ecut and metric tensor in reciprocal space, computes recommended ngfft(1:3) 59 public :: sphereboundary ! Finds the boundary of the basis sphere of G vectors 60 public :: sphere 61 public :: sphere_fft ! Insert cg inside box. 62 public :: sphere_fft1 ! TODO: This one should be replaced by sphere_fft. 63 public :: change_istwfk ! Change the istwfk mode of a set of wavefunctions (sequential version, same k-point) 64 public :: kpgsph ! Set up the G vector list 65 public :: kpgcount ! Give the number of G vector in each direction 66 public :: get_kg ! Helper function to calculate the set of G-vectors at a given kpoint (no MPI FFT) 67 public :: kgindex ! Compute the index of each plane wave on a FFT grid. 68 69 ! Low-level tools for MPI FFT 70 public :: switch 71 public :: switch_cent 72 public :: switchreal 73 public :: switchreal_cent 74 public :: scramble 75 public :: fill 76 public :: fill_cent 77 public :: unfill 78 public :: unfill_cent 79 public :: unmpiswitch 80 public :: unswitch 81 public :: unswitchreal_cent 82 public :: unmpiswitch_cent 83 public :: unscramble 84 public :: unswitch_cent 85 public :: unswitchreal 86 public :: mpiswitch 87 public :: mpiswitch_cent 88 89 public :: mpifft_fg2dbox 90 public :: mpifft_fg2dbox_dpc 91 public :: mpifft_dbox2fg 92 public :: mpifft_dbox2fg_dpc 93 public :: mpifft_dbox2fr 94 public :: mpifft_dbox2fr_dpc 95 public :: mpifft_fr2dbox 96 public :: mpifft_fr2dbox_dpc 97 public :: mpifft_collect_datar ! Collect a real-space MPI-FFT distributed array on each proc. 98 99 public :: indfftrisc 100 public :: addrho 101 public :: multpot 102 103 ! 0 for double precision version (default), 1 for mixed precision FFTs 104 integer, public, protected :: fftcore_mixprec = 0 105 public :: fftcore_set_mixprec 106 107 ! ************************************************************************* 108 109 !---------------------------------------------------------------------- 110 ! Private variables 111 112 #define FFTALGA_SIZE 5 113 character(len=*),private,parameter :: fftalga2name(1:FFTALGA_SIZE)= & 114 (/"Goedecker ", & 115 "Vendor FFT ", & 116 "FFTW3 ", & 117 "Goedecker2002 ", & 118 "DFTI " /) 119 120 #define FFTALGB_SIZE 1 121 character(len=*),private,parameter :: fftalgb2name(0:FFTALGB_SIZE)= & 122 (/"C2C",& 123 "R2C"/) 124 125 #define FFTALGC_SIZE 2 126 character(len=*),private,parameter :: fftalgc2name(0:FFTALGC_SIZE)= & 127 (/"No pad ",& 128 "zero-pad ",& 129 "zero-pad+cache "/) 130 131 contains
m_fftcore/addrho [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
addrho
FUNCTION
Add the contribution to the density generated by n1dfft x-y planes
INPUTS
icplexwf=1 if u(r) is real, 2 otherwise. includelast nd1,nd2=Leading dimensions of rhopart. n2=FFT dimension along y. lot=2nd Leading dimension of zw (cache blocking factor). n1dfft=Number of 1D FFTs along y performed. zw(2,lot,n2)=Array with the x-y planes (wavefunction in real space). weigth=Weight factor for the density.
SIDE EFFECTS
rhopart(nd1,nd2)=density in the x-y plane, accumulated in output.
SOURCE
4596 pure subroutine addrho(icplexwf,includelast,nd1,nd2,n2,lot,n1dfft,zw,rhopart,weight_r,weight_i) 4597 4598 4599 !Arguments ------------------------------------ 4600 integer,intent(in) :: icplexwf,includelast,nd1,nd2,n2,lot,n1dfft 4601 real(dp),intent(in) :: zw(2,lot,n2) 4602 real(dp),intent(inout) :: rhopart(nd1,nd2) 4603 real(dp),intent(in) :: weight_i,weight_r 4604 4605 !Local variables------------------------------- 4606 integer :: i2,j 4607 4608 ! ************************************************************************* 4609 4610 if (icplexwf==2) then 4611 ! Complex wavefunction in real space. 4612 do i2=1,n2-1,2 4613 do j=1,n1dfft 4614 rhopart(j,i2) = rhopart(j,i2) + weight_r*zw(1,j,i2)**2+weight_i*zw(2,j,i2)**2 4615 rhopart(j,i2+1) = rhopart(j,i2+1) + weight_r*zw(1,j,i2+1)**2+weight_i*zw(2,j,i2+1)**2 4616 end do 4617 end do 4618 4619 if (2*(n2/2)/=n2) then 4620 do j=1,n1dfft 4621 rhopart(j,n2 )=rhopart(j,n2 )+weight_r*zw(1,j,n2 )**2+weight_i*zw(2,j,n2 )**2 4622 end do 4623 end if 4624 else 4625 ! The wavefunction is real, in real space 4626 if (includelast==1) then 4627 do i2=1,n2 4628 do j=1,n1dfft 4629 rhopart(2*j-1,i2)=rhopart(2*j-1,i2)+zw(1,j,i2)**2*weight_r 4630 rhopart(2*j ,i2)=rhopart(2*j ,i2)+zw(2,j,i2)**2*weight_i 4631 end do 4632 end do 4633 else 4634 do i2=1,n2 4635 do j=1,n1dfft-1 4636 rhopart(2*j-1,i2)=rhopart(2*j-1,i2)+zw(1,j,i2)**2*weight_r 4637 rhopart(2*j ,i2)=rhopart(2*j ,i2)+zw(2,j,i2)**2*weight_i 4638 end do 4639 rhopart(2*n1dfft-1,i2)=rhopart(2*n1dfft-1,i2)+zw(1,n1dfft,i2)**2*weight_r 4640 end do 4641 end if 4642 4643 end if 4644 4645 end subroutine addrho
m_fftcore/bound [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
bound
FUNCTION
For given kpt, ngfft, and gmet, Find distance**2 to boundary point of fft box nearest to kpt Find distance**2 to boundary point of fft box farthest to kpt
INPUTS
kpt(3)=real input k vector (reduced coordinates) ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft gmet(3,3)=reciprocal space metric (currently in Bohr**-2)
OUTPUT
dsqmax=maximum distance**2 from k to boundary in Bohr**-2. dsqmin=minimum distance**2 from k to boundary in Bohr**-2. gbound(3)=coords of G on boundary (correspnding to gsqmin) plane=which plane min occurs in (1,2, or 3 for G1,etc).
NOTES
Potential trouble: this routine was written assuming kpt lies inside first Brillouin zone. No measure is taken to fold input kpt back into first zone. Given arbitrary kpt, this will cause trouble.
SOURCE
520 subroutine bound(dsqmax,dsqmin,gbound,gmet,kpt,ngfft,plane) 521 522 !Arguments ------------------------------------ 523 !scalars 524 integer,intent(out) :: plane 525 real(dp),intent(out) :: dsqmax,dsqmin 526 !arrays 527 integer,intent(in) :: ngfft(18) 528 integer,intent(out) :: gbound(3) 529 real(dp),intent(in) :: gmet(3,3),kpt(3) 530 531 !Local variables------------------------------- 532 !scalars 533 integer :: i1,i1min,i2,i2min,i3,i3min 534 real(dp) :: dsm,dsp 535 character(len=500) :: msg 536 537 ! ************************************************************************* 538 539 !Set plane to impossible value 540 plane=0 541 542 !look at +/- g1 planes: 543 dsqmax=zero 544 dsqmin=dsq(ngfft(1)/2,-ngfft(2)/2,-ngfft(3)/2,gmet,kpt)+0.01_dp 545 do i2=-ngfft(2)/2,ngfft(2)/2 546 do i3=-ngfft(3)/2,ngfft(3)/2 547 dsp = dsq(ngfft(1)/2, i2, i3,gmet,kpt) 548 dsm = dsq( - ngfft(1)/2, i2, i3,gmet,kpt) 549 if (dsp>dsqmax) dsqmax = dsp 550 if (dsm>dsqmax) dsqmax = dsm 551 if (dsp<dsqmin) then 552 dsqmin = dsp 553 i1min = ngfft(1)/2 554 i2min = i2 555 i3min = i3 556 plane=1 557 end if 558 if (dsm<dsqmin) then 559 dsqmin = dsm 560 i1min = - ngfft(1)/2 561 i2min = i2 562 i3min = i3 563 plane=1 564 end if 565 end do 566 end do 567 ! 568 !+/- g2 planes: 569 do i1=-ngfft(1)/2,ngfft(1)/2 570 do i3=-ngfft(3)/2,ngfft(3)/2 571 dsp = dsq(i1,ngfft(2)/2,i3,gmet,kpt) 572 dsm = dsq(i1,-ngfft(2)/2,i3,gmet,kpt) 573 if (dsp>dsqmax) dsqmax = dsp 574 if (dsm>dsqmax) dsqmax = dsm 575 if (dsp<dsqmin) then 576 dsqmin = dsp 577 i1min = i1 578 i2min = ngfft(2)/2 579 i3min = i3 580 plane=2 581 end if 582 if (dsm<dsqmin) then 583 dsqmin = dsm 584 i1min = i1 585 i2min = - ngfft(2)/2 586 i3min = i3 587 plane=2 588 end if 589 end do 590 end do 591 ! 592 !+/- g3 planes: 593 do i1=-ngfft(1)/2,ngfft(1)/2 594 do i2=-ngfft(2)/2,ngfft(2)/2 595 dsp = dsq(i1,i2,ngfft(3)/2,gmet,kpt) 596 dsm = dsq(i1,i2,-ngfft(3)/2,gmet,kpt) 597 if (dsp>dsqmax) dsqmax = dsp 598 if (dsm>dsqmax) dsqmax = dsm 599 if (dsp<dsqmin) then 600 dsqmin = dsp 601 i1min = i1 602 i2min = i2 603 i3min = ngfft(3)/2 604 plane=3 605 end if 606 if (dsm<dsqmin) then 607 dsqmin = dsm 608 i1min = i1 609 i2min = i2 610 i3min = - ngfft(3)/2 611 plane=3 612 end if 613 end do 614 end do 615 616 if (plane==0) then 617 ! Trouble: missed boundary somehow 618 write(msg, '(a,a,a,3f9.4,a,3(i0,1x),a,a,a,a,a)' )& 619 'Trouble finding boundary of G sphere for',ch10,& 620 'kpt=',kpt(:),' and ng=',ngfft(1:3),ch10,& 621 'Action : check that kpt lies',& 622 'reasonably within first Brillouin zone; ',ch10,& 623 'else code bug, contact ABINIT group.' 624 ABI_BUG(msg) 625 end if 626 627 gbound(1)=i1min 628 gbound(2)=i2min 629 gbound(3)=i3min 630 631 contains 632 633 function dsq(i1,i2,i3,gmet,kpt) 634 635 integer :: i1,i2,i3 636 real(dp) :: dsq 637 real(dp) :: kpt(3),gmet(3,3) 638 639 dsq=gmet(1,1)*(kpt(1)+dble(i1))**2& 640 & +gmet(2,2)*(kpt(2)+dble(i2))**2& 641 & +gmet(3,3)*(kpt(3)+dble(i3))**2& 642 & +2._dp*(gmet(1,2)*(kpt(1)+dble(i1))*(kpt(2)+dble(i2))& 643 & +gmet(2,3)*(kpt(2)+dble(i2))*(kpt(3)+dble(i3))& 644 & +gmet(3,1)*(kpt(3)+dble(i3))*(kpt(1)+dble(i1))) 645 end function dsq 646 647 end subroutine bound
m_fftcore/change_istwfk [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
change_istwfk
FUNCTION
This function allows one to change the time-reversal storage mode (istwfk) of a *full* set of u(G). It does not support MPI-FFT!
INPUTS
from_npw=number of G vectors in input from_cg from_kg_k(3,npw)=integer coordinates of the G vectors of from_cg from_istwfk=option parameter that describes the storage in from_cg to_npw=number of G vectors in output to_cg to_kg_k(3,npw)=integer coordinates of the G vectors in to_cg to_istwfk=option parameter that describes the storage in to_cg n1,n2,n3=physical dimension of the box (must be large enough to contain the sphere, no check is done) ndat=number of wavefunctions from_cg(2,from_npw*ndat)= Input u(g) values OUTPUTS to_cg(2,to_npw*ndat)= Output u(g) defined on the list of vectors to_kg_k with time-reversal mode to_istwfk
SOURCE
2094 subroutine change_istwfk(from_npw,from_kg,from_istwfk,to_npw,to_kg,to_istwfk,n1,n2,n3,ndat,from_cg,to_cg) 2095 2096 !Arguments ------------------------------------ 2097 !scalars 2098 integer,intent(in) :: from_npw,from_istwfk,to_npw,to_istwfk,n1,n2,n3,ndat 2099 !arrays 2100 integer,intent(in) :: from_kg(3,from_npw),to_kg(3,to_npw) 2101 real(dp),intent(inout) :: from_cg(2,from_npw*ndat) ! out due to sphere! 2102 real(dp),intent(inout) :: to_cg(2,to_npw*ndat) 2103 2104 !Local variables------------------------------- 2105 !scalars 2106 integer :: n4,n5,n6 2107 real(dp),parameter :: xnorm1=one 2108 !arrays 2109 integer,parameter :: shiftg0(3)=0,me_g0=1 2110 integer,parameter :: symmE(3,3)=reshape([1,0,0,0,1,0,0,0,1],[3,3]) 2111 real(dp),allocatable :: cfft(:,:,:,:) 2112 2113 ! ************************************************************************* 2114 2115 n4=2*(n1/2)+1 2116 n5=2*(n2/2)+1 2117 n6=2*(n3/2)+1 2118 2119 ABI_MALLOC(cfft, (2,n4,n5,n6*ndat)) 2120 2121 ! iflag=1 ==> insert from_cg into cfft. 2122 call sphere(from_cg,ndat,from_npw,cfft,n1,n2,n3,n4,n5,n6,from_kg,from_istwfk,+1,me_g0,shiftg0,symmE,xnorm1) 2123 2124 ! iflag=-1 ==> extract to_cg from cfft. 2125 call sphere(to_cg,ndat,to_npw,cfft,n1,n2,n3,n4,n5,n6,to_kg,to_istwfk,-1,me_g0,shiftg0,symmE,xnorm1) 2126 2127 ABI_FREE(cfft) 2128 2129 end subroutine change_istwfk
m_fftcore/fftalg_for_npfft [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
fftalg_for_npfft
FUNCTION
Returns the default value of fftalg given the number of MPI nodes to be used in the FFTs.
INPUTS
nproc_fft=Number of processors used for MPI FFT
OUTPUT
fftalg=Integer used to select the FFT library.
SOURCE
268 pure function fftalg_for_npfft(nproc_fft) result(fftalg) 269 270 !Arguments ------------------------------------ 271 !scalars 272 integer,intent(in) :: nproc_fft 273 integer :: fftalg 274 275 ! ************************************************************************* 276 277 ! Default for the sequential case. 278 fftalg = 112 279 280 ! Use Goedecker2002 if fftalg does not support MPI (e.g 112) 281 if (nproc_fft > 1) fftalg = 401 282 !if (nproc_fft > 1) fftalg = 402 283 284 #ifdef HAVE_FFTW3 285 fftalg = 312 286 #elif defined HAVE_DFTI 287 fftalg = 512 288 if (nproc_fft > 1) fftalg = 401 ! MPI-FFT with DFTI is not implemented yet 289 #endif 290 291 !if (nproc_fft > 1) fftalg = 401 ! This is to revert to the old behavior. 292 293 end function fftalg_for_npfft
m_fftcore/fftalg_has_mpi [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
fftalg_has_mpi
FUNCTION
True if the FFT library specified by fftalg is available.
INPUTS
fftalg=Input variable.
SOURCE
227 pure function fftalg_has_mpi(fftalg) result(ans) 228 229 !Arguments ------------------------------------ 230 !scalars 231 integer,intent(in) :: fftalg 232 logical :: ans 233 234 !Local variables------------------------------- 235 !scalars 236 integer :: fftalga,fftalgb,fftalgc 237 238 ! ************************************************************************* 239 240 ans = .False. 241 fftalga = fftalg/100; fftalgb = mod(fftalg,100)/10; fftalgc = mod(fftalg,10) 242 243 if (fftalga == FFT_FFTW3) ans = .True. 244 !if (fftalga == FFT_DFTI) ans = .True. 245 if (fftalga == FFT_SG2002) ans = .True. 246 247 end function fftalg_has_mpi
m_fftcore/fftalg_info [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
fftalg_info
FUNCTION
Returns info on the FFT library specified by fftalg (ngfft(7))
INPUTS
fftalg=Input variable.
OUTPUT
library=String with the name of FFT library cplex_mode= String defining whether the FFT library supports real<-->complex transforms. padding_mode=Padding mode.
SOURCE
315 subroutine fftalg_info(fftalg,library,cplex_mode,padding_mode) 316 317 !Arguments ------------------------------------ 318 !scalars 319 integer,intent(in) :: fftalg 320 character(len=*),intent(out) :: library,cplex_mode,padding_mode 321 322 !Local variables------------------------------- 323 !scalars 324 integer :: fftalga,fftalgb,fftalgc 325 326 ! ************************************************************************* 327 328 library = "Unknown"; cplex_mode = "Unknown"; padding_mode = "Unknown" 329 330 fftalga=fftalg/100 331 if (fftalga>0 .and. fftalga<=FFTALGA_SIZE) library = fftalga2name(fftalga) 332 333 fftalgb=mod(fftalg,100)/10 334 if (fftalgb>=0 .and. fftalgb<=FFTALGB_SIZE) cplex_mode = fftalgb2name(fftalgb) 335 336 fftalgc=mod(fftalg,10) 337 if (fftalgc>=0 .and. fftalgc<=FFTALGC_SIZE) padding_mode = fftalgc2name(fftalgc) 338 339 end subroutine fftalg_info
m_fftcore/fftalg_isavailable [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
fftalg_isavailable
FUNCTION
Returns TRUE if the FFT library specified by fftalg (ngfft(7)) is available
INPUTS
fftalg=Input variable.
SOURCE
186 logical pure function fftalg_isavailable(fftalg) result(ans) 187 188 !Arguments ------------------------------------ 189 integer,intent(in) :: fftalg 190 191 !Local variables------------------------------- 192 integer :: fftalga,fftalgb,fftalgc 193 194 ! ************************************************************************* 195 196 ans = .TRUE. 197 fftalga = fftalg/100 198 fftalgb = mod(fftalg,100)/10 199 fftalgc = mod(fftalg,10) 200 201 ! Optional FFT libraries. 202 #ifndef HAVE_FFTW3 203 if (fftalga == FFT_FFTW3) ans = .FALSE. 204 #endif 205 206 #ifndef HAVE_DFTI 207 if (fftalga == FFT_DFTI) ans = .FALSE. 208 #endif 209 210 end function fftalg_isavailable
m_fftcore/fftcore_set_mixprec [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
fftalg_set_precision
FUNCTION
Set the precision to be used in the FFT routines: 0 for standard double precision, 1 for mixed precision (dp input, sp for intermediate arrays passed to FFT libs) Return old value.
INPUTS
SOURCE
149 integer function fftcore_set_mixprec(wp) result(old_wp) 150 151 !Arguments ------------------------------------ 152 !scalars 153 integer,intent(in) :: wp 154 155 ! ************************************************************************* 156 157 old_wp = fftcore_mixprec 158 fftcore_mixprec = abs(wp) 159 160 select case (fftcore_mixprec) 161 case (0) 162 if (old_wp /= fftcore_mixprec) call wrtout(std_out, " fftcore_mixprec 0 --> Using double-precision FFT", newlines=1) 163 case (1) 164 if (old_wp /= fftcore_mixprec) call wrtout(std_out, " fftcore_mixprec 1 --> Using mixed precision FFT", newlines=1) 165 case default 166 ABI_ERROR(sjoin("Wrong value for input wp:", itoa(fftcore_mixprec))) 167 end select 168 169 end function fftcore_set_mixprec
m_fftcore/fill [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
fill
FUNCTION
Receives a set of z-lines in reciprocal space, insert the values in the cache work array zw (no padding)
INPUTS
nd1,nd3=Dimensions of the input array zf. lot=Cache blocking factor. n1dfft=Number of 1D FFTs to perform n3=Dimension of the transform along z zf(2,nd1,nd3)=Input array
OUTPUT
zw(2,lot,n3)=Cache work array with the z-lines.
SOURCE
2493 pure subroutine fill(nd1,nd3,lot,n1dfft,n3,zf,zw) 2494 2495 !Arguments ------------------------------------ 2496 integer,intent(in) :: nd1,nd3,lot,n1dfft,n3 2497 real(dp),intent(in) :: zf(2,nd1,nd3) 2498 real(dp),intent(inout) :: zw(2,lot,n3) 2499 2500 ! local variables 2501 integer :: i1,i3 2502 2503 ! ************************************************************************* 2504 2505 do i3=1,n3 2506 do i1=1,n1dfft 2507 zw(1,i1,i3)=zf(1,i1,i3) 2508 zw(2,i1,i3)=zf(2,i1,i3) 2509 end do 2510 end do 2511 2512 end subroutine fill
m_fftcore/fill_cent [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
fill_cent
FUNCTION
Receives a set of z-lines in reciprocal space, insert the values in cache work array defined on the FFT box and pads the central of the frequency region with zeros.
INPUTS
md1,md3=Leading dimension of zf along x and z lot=second dimension of zw (cache blocking factor) n1dfft=Number of 1d transforms to be performed along z. max3=Max G_z in the small box enclosing the G-sphere. m3=Number of points in the *small* box enclosing the G-sphere n3=Dimension of the FFT transform along z zf(2,md1,md3)=x-z planes in reciprocal space
OUTPUT
zw(2,lot,n3)= Filled cache work array. zw(:,1:n1dfft,n3) contains the lines to be transformed along.
SOURCE
2541 pure subroutine fill_cent(md1,md3,lot,n1dfft,max3,m3,n3,zf,zw) 2542 2543 !Arguments ------------------------------------ 2544 integer,intent(in) :: md1,md3,lot,n1dfft,max3,m3,n3 2545 real(dp),intent(in) :: zf(2,md1,md3) 2546 real(dp),intent(inout) :: zw(2,lot,n3) 2547 2548 !Local variables------------------------------- 2549 !scalars 2550 integer :: i1,i3 2551 2552 ! ************************************************************************* 2553 2554 ! Here, zero and positive frequencies 2555 do i3=1,max3+1 2556 do i1=1,n1dfft 2557 zw(1,i1,i3)=zf(1,i1,i3) 2558 zw(2,i1,i3)=zf(2,i1,i3) 2559 end do 2560 end do 2561 2562 ! Fill the center region with zeros 2563 do i3=max3+2,n3-m3+max3+1 2564 do i1=1,n1dfft 2565 zw(1,i1,i3)=zero 2566 zw(2,i1,i3)=zero 2567 end do 2568 end do 2569 2570 ! Here, negative frequencies 2571 do i3=max3+2,m3 2572 do i1=1,n1dfft 2573 zw(1,i1,i3+n3-m3)=zf(1,i1,i3) 2574 zw(2,i1,i3+n3-m3)=zf(2,i1,i3) 2575 end do 2576 end do 2577 2578 end subroutine fill_cent
m_fftcore/get_cache_kb [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
get_cache_kb
FUNCTION
Returns the cache size in KB to be used for cache blocking algorithms in the FFT routines. The value is derived from the values of the CPP options defined in config.h
TODO
Use C to get the real cache size. See http://stackoverflow.com/questions/12594208/c-program-to-determine-levels-size-of-cache
SOURCE
358 pure function get_cache_kb() 359 360 !Local variables------------------------------- 361 !scalars 362 integer :: get_cache_kb 363 364 ! ************************************************************************* 365 366 ! Default value 367 get_cache_kb = 16 368 !get_cache_kb = 32 369 !get_cache_kb = 256 370 371 end function get_cache_kb
m_fftcore/get_kg [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
get_kg
FUNCTION
Helper function to calculate the set of G-vectors at a given kpoint. without taking advantage of FFT parallelism and G-vector distributions.
INPUTS
kpoint(3)=The k-point in reduced coordinates. ecut=Cutoff energy for planewave basis set. gmet(3,3)=reciprocal space metric ($\textrm{bohr}^{-2}$). istwfk=Options defining if time-reversal is used to decrease the number of G"s. [kin_sorted]=True if output g-vectors should be sorted by |k+g|^2/2. Default: False.
OUTPUT
npw_k=Total number of G-vectors in the full G-sphere. kg_k(3,npw_k) list of G-vectors allocated by the routine.
SOURCE
4425 subroutine get_kg(kpoint, istwf_k, ecut, gmet, npw_k, kg_k, kin_sorted) 4426 4427 !Arguments ------------------------------------ 4428 !scalars 4429 integer,intent(in) :: istwf_k 4430 integer,intent(out) :: npw_k 4431 real(dp),intent(in) :: ecut 4432 !arrays 4433 integer,allocatable,intent(out) :: kg_k(:,:) 4434 real(dp),intent(in) :: gmet(3,3),kpoint(3) 4435 logical,optional,intent(in) :: kin_sorted 4436 4437 !Local variables------------------------------- 4438 !scalars 4439 integer,parameter :: mkmem_ = 1, exchn2n3d0 = 0, ikg0 = 0 4440 integer :: npw_k_test, ig, ig_sort 4441 type(MPI_type) :: MPI_enreg_seq 4442 !arrays 4443 integer :: kg_dum(3, 0) 4444 integer,allocatable :: iperm(:), iwork(:,:) 4445 real(dp),allocatable :: kin_kg(:) 4446 4447 ! ********************************************************************* 4448 4449 call initmpi_seq(MPI_enreg_seq) 4450 4451 ! Calculate the number of G-vectors for this k-point. 4452 call kpgsph(ecut,exchn2n3d0, gmet, ikg0, 0, istwf_k, kg_dum, kpoint, 0, MPI_enreg_seq, 0, npw_k) 4453 4454 ! Allocate and calculate the set of G-vectors. 4455 ABI_MALLOC(kg_k,(3,npw_k)) 4456 call kpgsph(ecut, exchn2n3d0, gmet, ikg0, 0, istwf_k, kg_k, kpoint, mkmem_, MPI_enreg_seq, npw_k, npw_k_test) 4457 4458 call destroy_mpi_enreg(MPI_enreg_seq) 4459 4460 if (present(kin_sorted)) then 4461 if (kin_sorted) then 4462 ABI_MALLOC(kin_kg, (npw_k)) 4463 ABI_MALLOC(iperm, (npw_k)) 4464 iperm = [(ig, ig=1,npw_k)] 4465 do ig=1,npw_k 4466 kin_kg(ig) = half * normv(kpoint + kg_k(:, ig), gmet, "G") ** 2 4467 end do 4468 4469 call sort_dp(npw_k, kin_kg, iperm, tol14) 4470 ABI_FREE(kin_kg) 4471 4472 ABI_MALLOC(iwork, (3, npw_k)) 4473 iwork = kg_k 4474 do ig=1,npw_k 4475 ig_sort = iperm(ig) 4476 kg_k(:,ig) = iwork(:,ig_sort) 4477 end do 4478 4479 ABI_FREE(iwork) 4480 ABI_FREE(iperm) 4481 end if 4482 end if 4483 4484 end subroutine get_kg
m_fftcore/getng [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
getng
FUNCTION
From ecut and metric tensor in reciprocal space, computes recommended ngfft(1:3) Also computes the recommended value of nfft and mgfft Pay attention that the FFT grid must be compatible with the symmetry operations (see irrzg.f).
INPUTS
boxcutmin=minimum value of boxcut admitted (boxcut is the ratio between the radius of the sphere contained in the FFT box, and the radius of the planewave sphere): usually 2.0. chksymtnons= if==3, will impose the FFT grid to be invariant under the spatial symmetries. ecut=energy cutoff in Hartrees gmet(3,3)=reciprocal space metric (bohr**-2). kpt(3)=input k vector in terms of reciprocal lattice primitive translations me_fft=index of the processor in the FFT set (use 0 if sequential) nproc_fft=number of processors in the FFT set (use 1 if sequential) nsym=number of symmetry elements in group paral_fft=0 if no FFT parallelisation; 1 if FFT parallelisation symrel(3,3,nsym)=symmetry matrices in real space (integers) tnons(3,nsym)=nonsymmorphic translations associated to symrel
OUTPUT
mgfft= max(ngfft(1),ngfft(2),ngfft(3)) nfft=number of points in the FFT box=ngfft(1)*ngfft(2)*ngfft(3)/nproc_fft
SIDE EFFECTS
Input/Output ngfft(1:18)=integer array with FFT box dimensions and other information on FFTs. On input ngfft(1:3) contains optional trial values. If ngfft(1:3)/minbox is greater than value calculated to avoid wrap-around error and ngfft obeys constraint placed by the FFT routine that is used then ngfft(1:3) is left unchanged. Otherwise set to value computed in now. Note that there is the possibility of an undetected error if we are dealing with a cubic unit cell and ngfft(1), ngfft(2) and ngfft(3) are different. In the future we should handle this case. ngfft(4),ngfft(5),ngfft(6)= modified values to avoid cache trashing, presently: ngfft(4)=ngfft(1)+1 if ngfft(1) is even; ngfft(5)=ngfft(2)+1 if ngfft(2) is even. in the other cases, ngfft(4:6)=ngfft(1:3). Other choices may better, but this is left for the future. ngfft(7)=choice for FFT algorithm, see the input variable fftalg ngfft(8)=size of the cache, in bytes (not used here presently).!! other ngfft slots are used for parallelism see ~abinit/doc/variables/vargs.htm#ngfft [ngfftc(1:18)]= -optional- value of ngfft for the "coarse" grid [unit] = -optional- output unit number (DEFAULT std_out) [gpu_option] = GPU implementation to use, i.e. cuda, openMP, ... (0=not using GPU)
SOURCE
703 subroutine getng(boxcutmin, chksymtnons, ecut, gmet, kpt, me_fft, mgfft, nfft, ngfft, & 704 nproc_fft, nsym,paral_fft, symrel, tnons, & 705 ngfftc, unit, gpu_option) ! optional 706 707 use defs_fftdata, only : mg 708 709 !Arguments ------------------------------------ 710 !scalars 711 integer,intent(in) :: chksymtnons,me_fft,nproc_fft,nsym,paral_fft 712 integer,intent(out) :: mgfft,nfft 713 integer,optional,intent(in) :: unit,gpu_option 714 real(dp),intent(in) :: boxcutmin,ecut 715 !arrays 716 integer,intent(in) :: symrel(3,3,nsym) 717 integer,intent(in),optional :: ngfftc(3) 718 integer,intent(inout) :: ngfft(18) 719 real(dp),intent(in) :: gmet(3,3),kpt(3) 720 real(dp),intent(in) :: tnons(3,nsym) 721 722 !Local variables------------------------------- 723 !scalars 724 integer,save :: first=1,msrch(3),previous_paral_mode=0 725 integer :: element,ifactor,ii,index,ipower,isrch,isrch1,isrch2,isrch3,isym,jj,mu,paral_fft_ 726 integer :: plane,testok,tobechecked,ount,fftalga,nn,ngdiv,valpow 727 real(dp),parameter :: minbox=0.75_dp 728 real(dp) :: dsqmax,dsqmin,ecutmx,prodcurrent,prodtrial,tnscaled,xx,yy 729 logical :: testdiv 730 character(len=500) :: msg 731 integer,parameter :: largest_ngfft=mg ! Goedecker FFT: any powers of 2, 3, and 5 - must be coherent with defs_fftdata.F90 732 integer,parameter :: maxpow2 =16 ! int(log(largest_ngfft+half)/log(two)) 733 integer,parameter :: maxpow3 =6 ! int(log(largest_ngfft+half)/log(three)) 734 integer,parameter :: maxpow5 =6 ! int(log(largest_ngfft+half)/log(five)) 735 integer,parameter :: maxpow7 =0 736 integer,parameter :: maxpow11=0 737 integer,parameter :: mmsrch=(maxpow2+1)*(maxpow3+1)*(maxpow5+1)*(maxpow7+1)*(maxpow11+1) 738 integer,parameter :: nfactor=10, mpower=5 739 !Arrays 740 integer,save :: iperm(mmsrch),srch(mmsrch,3) 741 integer(i8b) :: li_srch(mmsrch) 742 integer :: divisor(3,3),gbound(3),imax(3),imin(3),ngcurrent(3) 743 integer :: ngmax(3),ngsav(3),ngtrial(3) 744 integer :: npower(3,mpower) 745 integer,parameter :: factor(10) = (/1,2,3,4,5,6,8,9,10,12/) 746 integer,parameter :: power(5) = (/2,3,5,7,11/) 747 748 ! ************************************************************************* 749 750 ount = std_out; if (present(unit)) ount = unit 751 752 !DEBUG 753 !write(std_out,*)' m_fftcore/getng : enter' 754 !ENDDEBUG 755 756 fftalga = ngfft(7)/100 757 758 !If not yet done, compute recommended (boxcut>=2) fft grid dimensions 759 !In case we switch for paral to sequential mode, recompute srch. 760 !This is the case e.g. when computing ngfftdiel in sequential mode 761 !after an initial computation of ngfft in parallel 762 763 paral_fft_=paral_fft;if (nproc_fft==0) paral_fft_=0 764 765 if(first==1.or.paral_fft_ /= previous_paral_mode) then 766 first=0; previous_paral_mode=paral_fft_ 767 srch(:,:)=0 768 769 ! Factors of 2 770 srch(1,1)=1 771 do ii=1,maxpow2 772 srch(ii+1,1)=srch(ii,1)*2 773 end do 774 775 ! Factors of 3 776 index=maxpow2+1 777 if(maxpow3>0)then 778 do ii=1,max(1,maxpow3) 779 srch(1+ii*index:(ii+1)*index,1)=3*srch(1+(ii-1)*index:ii*index,1) 780 end do 781 end if 782 783 ! Factors of 5 784 index=(maxpow3+1)*index 785 if(maxpow5>0)then 786 do ii=1,max(1,maxpow5) 787 li_srch = 0 788 li_srch(1+ii*index:(ii+1)*index)=5_i8b*srch(1+(ii-1)*index:ii*index,1) 789 where (li_srch > huge(maxpow3)) li_srch = huge(maxpow3) 790 srch(1+ii*index:(ii+1)*index,1)=li_srch(1+ii*index:(ii+1)*index) 791 end do 792 end if 793 794 ! Factors of 7 795 index=(maxpow5+1)*index 796 if(maxpow7>0)then 797 do ii=1,max(1,maxpow7) 798 srch(1+ii*index:(ii+1)*index,1)=7*srch(1+(ii-1)*index:ii*index,1) 799 end do 800 end if 801 802 ! Factors of 11 803 index=(maxpow7+1)*index 804 if(maxpow11>0)then 805 do ii=1,max(1,maxpow11) 806 srch(1+ii*index:(ii+1)*index,1)=11*srch(1+(ii-1)*index:ii*index,1) 807 end do 808 end if 809 810 call sort_int(mmsrch,srch(:,1),iperm) 811 812 do ii=1,mmsrch 813 if(srch(ii,1)>largest_ngfft)exit 814 end do 815 msrch(1)=ii-1 816 817 ! In case of FFT parallelism, one need ngfft(2) and ngfft(3) to be multiple of nproc_fft 818 if(paral_fft_==1)then 819 msrch(2)=0 820 do ii=1,msrch(1) 821 if(modulo(srch(ii,1),nproc_fft)==0) then 822 msrch(2)=msrch(2)+1 823 srch(msrch(2),2)=srch(ii,1) 824 end if 825 end do 826 !write(msg,'(a,i0,a,i0,2a,i0)')& 827 ! 'The second and third dimension of the FFT grid: ',ngfft(2),", ",ngfft(3),ch10,& 828 ! 'were imposed to be multiple of the number of processors for the FFT: ', nproc_fft 829 !if (ount /= dev_null) ABI_COMMENT(msg) 830 else 831 msrch(2)=msrch(1) 832 srch(:,2)=srch(:,1) 833 end if 834 835 ! The second and third search list have the same constraint 836 msrch(3)=msrch(2) 837 srch(:,3)=srch(:,2) 838 839 ! The set of allowed ngfft values has been found 840 end if ! first==1 841 842 !============================================================================================= 843 ! 844 ! Determination of sufficient values of ngfft with ngfft(2) and ngfft(3) taken inside 845 ! sets of values that take into account the constraint on nproc_fft 846 847 !Save input values of ngfft 848 ngsav(1:3) = ngfft(1:3) 849 850 !As an initial guess for ngfft, use the provided coarse mesh grid 851 if (PRESENT(ngfftc)) then 852 ngfft(1:3)=ngfftc(1:3) 853 call wrtout(ount,' Using supplied coarse mesh as initial guess.') 854 else 855 ngfft(1:3)=2 856 end if 857 858 !Enlarge the initial guess until the set of ngfft entirely comprises the sphere 859 do 860 861 call bound(dsqmax,dsqmin,gbound,gmet,kpt,ngfft,plane) 862 863 ! Exit the infinite do-loop if the sphere is inside the FFT box 864 if (dsqmin>=(half*boxcutmin**2*ecut/pi**2)) exit 865 866 ! Fix nearest boundary 867 do ii=1,msrch(plane)-1 868 if (srch(ii,plane)>=ngfft(plane)) then 869 ! redefine ngfft(plane) to next higher choice 870 ngfft(plane)=srch(ii+1,plane) 871 exit ! Exit the loop over ii 872 end if 873 874 if (ii==msrch(plane)-1)then 875 ! Here, we are in trouble 876 write(msg, '(a,i12,5a)' ) & 877 'ngfft is bigger than allowed value =',ngfft(plane),'.',ch10,& 878 'This indicates that desired ngfft is larger than getng',ch10,& 879 'can handle. The code has to be changed and compiled.' 880 ABI_BUG(msg) 881 end if 882 end do 883 884 end do ! End of the infinite do-loop : will either "exit", or abort 885 886 !ecutmx=maximum ecut consistent with chosen ngfft 887 ecutmx=0.5_dp*pi**2*dsqmin 888 889 !Print results 890 write(msg, '(a,1p,e14.6,a,3i8,a,a,e14.6)' ) & 891 ' For input ecut=',ecut,' best grid ngfft=',ngfft(1:3),ch10,& 892 ' max ecut=',ecutmx 893 call wrtout(ount,msg) 894 895 ! The FFT grid is compatible with the symmetries if for each 896 ! symmetry isym, each ii and each jj, the quantity 897 ! (ngfft(jj)*symrel(jj,ii,isym))/ngfft(ii) is an integer. 898 ! This relation is immediately verified for diagonal elements, since 899 ! symrel is an integer. It is also verified if symrel(ii,jj,isym) is zero. 900 ! Moreover, to cope with the non-symmorphic translation vectors, at least the 901 ! origin must be sent to a point of the FFT grid. Hence, the non-symmorphic 902 ! translations tnons(i) multiplied by ngfft(i) must be an integer. 903 ! The latter condition is however imposed only when chksymtnons=3. 904 ! Indeed, ABINIT will be able to reimpose the symmetry at the level of the density and potential. 905 ! This might be a problem for GW calculations, though ... 906 907 !Compute the biggest (positive) common divisor of each off-diagonal element of the symmetry matrices 908 divisor(:,:)=0; tobechecked=0 909 910 do ii=1,3 911 do jj=1,3 912 if(jj==ii)cycle 913 do isym=1,nsym 914 if(symrel(jj,ii,isym)==0 .or. divisor(jj,ii)==1 )cycle 915 tobechecked=1 916 element=abs(symrel(jj,ii,isym)) 917 testdiv= ( divisor(jj,ii)==0 .or. divisor(jj,ii)==element .or. element==1) 918 if(testdiv)then 919 divisor(jj,ii)=element 920 else 921 ! Must evaluate common divisor between non-trivial numbers 922 do 923 if(divisor(jj,ii)<element)element=element-divisor(jj,ii) 924 if(divisor(jj,ii)>element)divisor(jj,ii)=divisor(jj,ii)-element 925 if(divisor(jj,ii)==element)exit 926 end do 927 end if 928 end do 929 end do 930 end do 931 932 !Check whether there is a problem: the grid must be invariant 933 !with respect to point symmetry operations, and spatial symmetry operations if chksymtnons==3 934 testok=1 935 if(tobechecked==1)then 936 do ii=1,3 937 do jj=1,3 938 xx=divisor(jj,ii)*ngfft(jj) 939 yy=xx/ngfft(ii) 940 if(abs(yy-nint(yy))>tol8)testok=0 941 end do 942 if(chksymtnons==3)then 943 do isym=1,nsym 944 tnscaled=tnons(ii,isym)*ngfft(ii) 945 if(abs(tnscaled-nint(tnscaled))>tol8)testok=0 946 enddo 947 endif 948 end do 949 end if 950 951 !DEBUG 952 !write(std_out,*)' m_fftcore/getng : chksymtnons,testok,nproc_fft=',chksymtnons,testok,nproc_fft 953 !ENDDEBUG 954 955 !If there is a problem 956 if(testok==0)then 957 ! Find the powers of 2, 3, 5 (possibly 7 and 11) that are needed or will be enough in ngfft, 958 ! taking both the constraint on nproc_nfft and the constraint on tnons. 959 ! First decompose nproc_fft, providing divisors of ngfft(2:3) 960 nn=nproc_fft 961 npower(:,:)=0 962 do ipower=1,mpower 963 valpow=power(ipower) 964 do while (mod(nn,valpow)==0) 965 nn=nn/valpow 966 npower(3,ipower)=npower(3,ipower)+1 967 end do 968 if(nn/=1)then 969 ABI_ERROR(sjoin("nproc_fft: ", itoa(nn), "is not a multiple of 2, 3, 5, 7 or 11")) 970 endif 971 enddo 972 npower(2,:)=npower(3,:) 973 974 ! Then examine tnons 975 if(chksymtnons==3)then 976 do ii=1,3 977 do ifactor=1,nfactor 978 testok=1 979 do isym=1,nsym 980 tnscaled=factor(ifactor)*tnons(ii,isym) 981 if(abs(tnscaled-nint(tnscaled))>tol8)testok=0 982 enddo 983 if(testok==1)exit 984 enddo 985 if(testok==1)then 986 if(ifactor/=1)then 987 if(ifactor==2)npower(ii,1)=max(npower(ii,1),1) ! At least one power of 2 988 if(ifactor==3)npower(ii,2)=max(npower(ii,2),1) ! At least one power of 3 989 if(ifactor==4)npower(ii,1)=max(npower(ii,1),2) ! At least two powers of 2 990 if(ifactor==5)npower(ii,3)=max(npower(ii,3),1) ! At least one power of 5 991 if(ifactor==6)then 992 npower(ii,1)=max(npower(ii,1),1) ! At least one power of 2 993 npower(ii,2)=max(npower(ii,2),1) ! At least one power of 3 994 endif 995 if(ifactor==7)npower(ii,1)=max(npower(ii,1),3) ! At least three powers of 2 996 if(ifactor==8)npower(ii,2)=max(npower(ii,2),2) ! At least two powers of 3 997 if(ifactor==9)then 998 npower(ii,1)=max(npower(ii,1),1) ! At least one power of 2 999 npower(ii,3)=max(npower(ii,3),1) ! At least one power of 5 1000 end if 1001 if(ifactor==10)then 1002 npower(ii,1)=max(npower(ii,1),2) ! At least two powers of 2 1003 npower(ii,2)=max(npower(ii,2),1) ! At least one power of 3 1004 end if 1005 endif 1006 else 1007 write(msg, '(5a,i12,2a,9i12,2a,3f10.7,2a)' ) & 1008 'Chksymtnons=1 . Found potentially symmetry-breaking value of tnons, ', ch10,& 1009 & ' which is neither a rational fraction in 1/8th nor in 1/12th (1/9th and 1/10th are tolerated also) :', ch10,& 1010 & ' for the symmetry number ',isym,ch10,& 1011 & ' symrel is ',symrel(1:3,1:3,isym),ch10,& 1012 & ' tnons is ',tnons(1:3,isym),ch10,& 1013 'This problem should have been caught earlier.' 1014 ABI_BUG(msg) 1015 endif 1016 enddo ! ii 1017 endif ! chksymtnons 1018 1019 ! Get minimal search indices, simply those of the current ngfft 1020 do ii=1,3 1021 do isrch=1,msrch(ii) 1022 index=srch(isrch,ii) 1023 if(index==ngfft(ii))imin(ii)=isrch 1024 end do 1025 end do 1026 1027 ! Get maximal search indices : the ngtrial values must be identical (to fulfill the constraint induced by the 1028 ! off diagonal elements of symrel), but also must contain sufficient powers of basic primes (2, 3, 5, 7, 11), 1029 ! and be bigger than all current ngfft components. This should guarantee that such a triplet fulfills all constraints. 1030 ! Determine the divisor of allowed ngmax 1031 ngdiv=1 1032 do ipower=1,mpower 1033 ngdiv=ngdiv*power(ipower)**(maxval(npower(:,ipower))) 1034 enddo 1035 ngmax(1)=ngdiv*(maxval(ngfft(1:3)-1)/ngdiv+1) 1036 ngmax(1:3)=ngmax(1) 1037 do ii=1,3 1038 do isrch=1,msrch(ii) 1039 index=srch(isrch,ii) 1040 if(mod(index,ngdiv)==0 .and. index>=ngmax(ii))then 1041 imax(ii)=isrch 1042 ngmax(ii)=index 1043 exit 1044 endif 1045 end do 1046 end do 1047 ! This gives a tentative symetric triplet 1048 ngcurrent(1:3)=ngmax(1:3) 1049 prodcurrent=ngmax(1)*ngmax(2)*ngmax(3)+1.0d-3 1050 ! However, it is perhaps possible to do better, by assymetric triplets, still giving lower prodcurrent ! 1051 ngmax(1)=min(int(prodcurrent/(ngfft(2)*ngfft(3))),srch(msrch(1),1)) 1052 ngmax(2)=min(int(prodcurrent/(ngfft(1)*ngfft(3))),srch(msrch(2),2)) 1053 ngmax(3)=min(int(prodcurrent/(ngfft(1)*ngfft(2))),srch(msrch(3),3)) 1054 do ii=1,3 1055 do isrch=1,msrch(ii) 1056 index=srch(isrch,ii) 1057 ! One cannot suppose that ngmax belongs to the allowed list, 1058 ! so must use <= instead of == , to determine largest index 1059 if(index<=ngmax(ii))imax(ii)=isrch 1060 end do 1061 end do 1062 1063 !DEBUG 1064 !write(std_out,*)' ngmin(1:3)=',srch(imin(1),1),srch(imin(2),2),srch(imin(3),3) 1065 !write(std_out,*)' ngmax(1:3)=',ngmax(1:3) 1066 !ENDDEBUG 1067 1068 ngcurrent(1:3)=ngmax(1:3) 1069 prodcurrent=ngmax(1)*ngmax(2)*ngmax(3)+1.0d-3 1070 1071 ! Now, start brute force search 1072 do isrch1=imin(1),imax(1) 1073 ngtrial(1)=srch(isrch1,1) 1074 do isrch2=imin(2),imax(2) 1075 ngtrial(2)=srch(isrch2,2) 1076 do isrch3=imin(3),imax(3) 1077 ngtrial(3)=srch(isrch3,3) 1078 prodtrial=real(ngtrial(1))*real(ngtrial(2))*real(ngtrial(3))+1.0d-3 1079 if(prodtrial>prodcurrent-1.0d-4)exit 1080 ! The trial product is lower or equal to the current product, 1081 ! so now, checks whether the symmetry constraints are OK 1082 testok=1 1083 do ii=1,3 1084 do jj=1,3 1085 xx=divisor(jj,ii)*ngtrial(jj) 1086 yy=xx/ngtrial(ii) 1087 if(abs(yy-nint(yy))>tol8)testok=0 1088 end do 1089 if(chksymtnons==3)then 1090 do isym=1,nsym 1091 tnscaled=tnons(ii,isym)*ngtrial(ii) 1092 if(abs(tnscaled-nint(tnscaled))>tol8)testok=0 1093 enddo 1094 endif 1095 end do 1096 ! DEBUG 1097 ! write(ount,'(a,3i6,a,i3,a,es16.6)' )' getng : current trial triplet',ngtrial(1:3),& 1098 ! & ' testok=',testok,' prodtrial=',prodtrial 1099 ! ENDDEBUG 1100 if(testok==0)cycle 1101 ! When one arrives here, the symmetry constraints are fulfilled, so update current values 1102 ! Then continues the search, in hope of a better value. 1103 ngcurrent(1:3)=ngtrial(1:3) 1104 prodcurrent=prodtrial 1105 end do 1106 end do 1107 end do 1108 1109 ngfft(1:3)=ngcurrent(1:3) 1110 call bound(dsqmax,dsqmin,gbound,gmet,kpt,ngfft,plane) 1111 ! ecutmx=maximum ecut consistent with chosen ngfft 1112 ecutmx=0.5_dp*pi**2*dsqmin 1113 ! Give results 1114 write(msg, '(a,3i8,a,a,e14.6)' ) & 1115 ' However, must be changed due to symmetry =>',ngfft(1:3),ch10,& 1116 ' with max ecut=',ecutmx 1117 call wrtout(ount,msg) 1118 1119 if (prodcurrent>huge(ii)) then 1120 write(msg, '(5a)' )& 1121 'The best FFT grid will lead to indices larger',ch10,& 1122 'than the largest representable integer on this machine.',ch10,& 1123 'Action: try to deal with smaller problems. Also contact ABINIT group.' 1124 ABI_ERROR(msg) 1125 end if 1126 1127 end if ! testok==0 1128 1129 !Possibly use the input values of ngfft 1130 if (int( dble(ngsav(1)) / minbox ) >= ngfft(1) .and.& 1131 int( dble(ngsav(2)) / minbox ) >= ngfft(2) .and.& 1132 int( dble(ngsav(3)) / minbox ) >= ngfft(3) ) then 1133 1134 ! Must check whether the values are in the allowed list 1135 testok=0 1136 do mu=1,3 1137 do ii=1,msrch(mu) 1138 if(srch(ii,mu)==ngsav(mu))then 1139 testok=testok+1 1140 exit 1141 end if 1142 end do 1143 end do 1144 if(testok==3)then 1145 write(msg,'(a,3(a,i1,a,i3),a)') ' input values of',& 1146 (' ngfft(',mu,') =',ngsav(mu),mu=1,3),' are alright and will be used' 1147 call wrtout(ount,msg) 1148 do mu = 1,3 1149 ngfft(mu) = ngsav(mu) 1150 end do 1151 end if 1152 1153 end if 1154 1155 !mgfft needs to be set to the maximum of ngfft(1),ngfft(2),ngfft(3) 1156 mgfft = maxval(ngfft(1:3)) 1157 1158 if (paral_fft_==1) then 1159 ! For the time being, one need ngfft(2) and ngfft(3) to be multiple of nproc_fft 1160 if(modulo(ngfft(2),nproc_fft)/=0)then 1161 write(msg,'(4a,i5,a,i5)')& 1162 'The second dimension of the FFT grid, ngfft(2), should be ',& 1163 'a multiple of the number of processors for the FFT, nproc_fft.',ch10,& 1164 'However, ngfft(2)=',ngfft(2),' and nproc_fft=',nproc_fft 1165 ABI_BUG(msg) 1166 end if 1167 if(modulo(ngfft(3),nproc_fft)/=0)then 1168 write(msg,'(4a,i5,a,i5)')& 1169 'The third dimension of the FFT grid, ngfft(3), should be ',& 1170 'a multiple of the number of processors for the FFT, nproc_fft.',ch10,& 1171 'However, ngfft(3)=',ngfft(3),' and nproc_fft=',nproc_fft 1172 ABI_BUG(msg) 1173 end if 1174 1175 else if (paral_fft_/=0) then 1176 write(msg,'(a,i0)')'paral_fft_ should be 0 or 1, but its value is ',paral_fft_ 1177 ABI_BUG(msg) 1178 end if 1179 1180 ! Compute effective number of FFT points (for this MPI node if parall FFT) 1181 nfft=product(ngfft(1:3))/max(1,nproc_fft) 1182 1183 !Set up fft array dimensions ngfft(4,5,6) to avoid cache conflicts 1184 ngfft(4)=2*(ngfft(1)/2)+1 1185 ngfft(5)=2*(ngfft(2)/2)+1 1186 ngfft(6)=ngfft(3) 1187 if (any(fftalga == [FFT_FFTW3, FFT_DFTI])) then 1188 ! FFTW3 supports leading dimensions but at the price of a larger number of FFTs 1189 ! to be executed along z when the zero-padded version is used. 1190 ! One should check whether the augmentation is beneficial for FFTW3. 1191 ngfft(4)=2*(ngfft(1)/2)+1 1192 ngfft(5)=2*(ngfft(2)/2)+1 1193 !ngfft(4)=ngfft(1) 1194 !ngfft(5)=ngfft(2) 1195 ngfft(6)=ngfft(3) 1196 end if 1197 1198 if (present(gpu_option)) then 1199 if (gpu_option/=ABI_GPU_DISABLED) then 1200 ngfft(4)=ngfft(1) 1201 ngfft(5)=ngfft(2) 1202 ngfft(6)=ngfft(3) 1203 end if 1204 end if 1205 1206 ngfft(14:18)=0 ! ngfft(14) to be filled outside of getng 1207 1208 if (paral_fft_==0) then 1209 ngfft(9)=0 ! paral_fft_ 1210 ngfft(10)=1 ! nproc_fft 1211 ngfft(11)=0 ! me_fft 1212 ngfft(12)=0 ! n2proc 1213 ngfft(13)=0 ! n3proc 1214 else 1215 ngfft(9)=1 ! paral_fft_ 1216 ngfft(10)=nproc_fft 1217 ngfft(11)=me_fft 1218 ngfft(12)=ngfft(2)/nproc_fft 1219 ngfft(13)=ngfft(3)/nproc_fft 1220 end if 1221 1222 call print_ngfft(ngfft,header="FFT mesh",unit=ount,mode_paral="COLL") 1223 1224 end subroutine getng
m_fftcore/indfftrisc [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
indfftrisc
FUNCTION
Take the data for sphere boundary and list of planewave in sphere (kg_k), manipulate them for convenient use in fourwf, and output them in indpw_k
INPUTS
gbound(2*mgfft+4)=sphere boundary data kg_k(3,npw_k)=reduced planewave coordinates mgfft=maximum size of 1D FFTs ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft npw_k=number of G vectors in basis at this k point
OUTPUT
indpw_k(4,npw_k)=array which gives fft box index for given basis sphere in a representation that is directly usable by sg_fftrisc.f ngb=number of FFTs along z
SOURCE
3820 subroutine indfftrisc(gbound,indpw_k,kg_k,mgfft,ngb,ngfft,npw_k) 3821 3822 !Arguments ------------------------------------ 3823 !scalars 3824 integer,intent(in) :: mgfft,npw_k 3825 integer,intent(out) :: ngb 3826 !arrays 3827 integer,intent(in) :: gbound(2*mgfft+4),kg_k(3,npw_k),ngfft(18) 3828 integer,intent(out) :: indpw_k(4,npw_k) 3829 3830 !Local variables------------------------------- 3831 !scalars 3832 integer :: g1,g2,i1,i2,i3,igb,index,ipw,n1,n2,n3 3833 !arrays 3834 integer,allocatable :: index2d(:,:) 3835 3836 ! ************************************************************************* 3837 3838 n1=ngfft(1) ; n2=ngfft(2) ; n3=ngfft(3) 3839 3840 !First, generate a 2d index for each column of data 3841 ABI_MALLOC(index2d,(n1,n2)) 3842 index2d(:,:)=0 3843 index=1 3844 igb=3 3845 do g2=0,gbound(2) ! g2max 3846 do g1=0,gbound(igb+1) ! g1max 3847 index2d(g1+1,g2+1)=index 3848 index=index+1 3849 end do 3850 if(gbound(igb)<=-1)then ! g1min 3851 do g1=gbound(igb)+n1,n1-1 3852 index2d(g1+1,g2+1)=index 3853 index=index+1 3854 end do 3855 end if 3856 igb=igb+2 3857 end do 3858 3859 if(gbound(1)<=-1)then ! g2min 3860 do g2=gbound(1)+n2,n2-1 3861 do g1=0,gbound(igb+1) 3862 index2d(g1+1,g2+1)=index 3863 index=index+1 3864 end do 3865 if(gbound(igb)<=-1)then 3866 do g1=gbound(igb)+n1,n1-1 3867 index2d(g1+1,g2+1)=index 3868 index=index+1 3869 end do 3870 end if 3871 igb=igb+2 3872 end do 3873 end if 3874 3875 ngb=index-1 3876 3877 3878 !The 2d index has been generated 3879 !Now, contract indpw_k(1,ipw) and indpw_k(2,ipw) into indpw_k(4,ipw) 3880 !indpw_k(1,ipw) and indpw_k(2,ipw) are used to hold inverse of index2d, 3881 !and for them, the second index does not fill 1:npw . It is only 3882 !the number of z-transform FFTs. 3883 3884 !$OMP PARALLEL DO PRIVATE(i1,i2,i3) 3885 do ipw=1,npw_k 3886 i1=kg_k(1,ipw); if(i1<0)i1=i1+n1 ; i1=i1+1 3887 i2=kg_k(2,ipw); if(i2<0)i2=i2+n2 ; i2=i2+1 3888 i3=kg_k(3,ipw); if(i3<0)i3=i3+n3 ; i3=i3+1 3889 indpw_k(4,ipw)=index2d(i1,i2) 3890 indpw_k(3,ipw)=i3 3891 end do 3892 3893 do i1=1,n1 3894 do i2=1,n2 3895 index=index2d(i1,i2) 3896 if(index/=0)then 3897 indpw_k(1,index)=i1 3898 indpw_k(2,index)=i2 3899 end if 3900 end do 3901 end do 3902 3903 ABI_FREE(index2d) 3904 3905 end subroutine indfftrisc
m_fftcore/kgindex [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
kgindex
FUNCTION
Compute the index of each plane wave on a FFT grid.
INPUTS
kg_k(3,npw_k)=dimensionless coords of G vecs (integer) mpi_enreg=information about MPI parallelization ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft npw_k=number of planewaves
OUTPUT
indpw_k(npw_k)=linear list number (in fft box) of given G vector for the current processor (local adress) =0 if kg_k(ipw) is not treated by this procesor mask(npw_k)=True if kg_k(ipw) belongs to this procesor, false otherwise.
NOTES
mpi_enreg is not necessary in this case (the info is also in ngfft), but much more easy to read...
SOURCE
4510 subroutine kgindex(indpw_k, kg_k, mask, mpi_enreg, ngfft, npw_k) 4511 4512 !Arguments ------------------------------------ 4513 !scalars 4514 integer,intent(in) :: npw_k 4515 type(MPI_type),intent(in) :: mpi_enreg 4516 !arrays 4517 integer,intent(in) :: kg_k(3,npw_k),ngfft(18) 4518 integer,intent(out) :: indpw_k(npw_k) 4519 logical,intent(out) :: mask(npw_k) 4520 !Local variables------------------------------- 4521 !scalars 4522 integer :: ig,ig1,ig2,ig3,me_fft,n1,n2,n3,nd2 4523 character(len=500) :: msg 4524 !arrays 4525 integer, ABI_CONTIGUOUS pointer :: fftn2_distrib(:),ffti2_local(:) 4526 !integer, ABI_CONTIGUOUS pointer :: fftn3_distrib(:),ffti3_local(:) 4527 4528 ! ************************************************************************* 4529 4530 n1=ngfft(1); n2=ngfft(2); n3=ngfft(3) 4531 4532 ! Use the following indexing (N means ngfft of the adequate direction) 4533 ! 0 1 2 3 ... N/2 -(N-1)/2 ... -1 <= kg 4534 ! 1 2 3 4 ....N/2+1 N/2+2 ... N <= index 4535 4536 me_fft=mpi_enreg%me_fft 4537 nd2=(n2-1)/mpi_enreg%nproc_fft+1 4538 4539 if (n2== mpi_enreg%distribfft%n2_coarse) then 4540 fftn2_distrib => mpi_enreg%distribfft%tab_fftdp2_distrib 4541 ffti2_local => mpi_enreg%distribfft%tab_fftdp2_local 4542 else if (n2 == mpi_enreg%distribfft%n2_fine) then 4543 fftn2_distrib => mpi_enreg%distribfft%tab_fftdp2dg_distrib 4544 ffti2_local => mpi_enreg%distribfft%tab_fftdp2dg_local 4545 else 4546 ABI_BUG("Unable to find an allocated distrib for this fft grid") 4547 end if 4548 4549 !call ptabs_fourwf(mpi_enreg,n2,n3,fftn2_distrib,ffti2_local,fftn3_distrib,ffti3_local) 4550 4551 do ig=1,npw_k 4552 ig1=modulo(kg_k(1,ig),n1) 4553 ig2=modulo(kg_k(2,ig),n2) 4554 ig3=modulo(kg_k(3,ig),n3) 4555 if(me_fft==fftn2_distrib(ig2+1)) then 4556 ig2=ffti2_local(ig2+1) - 1 4557 indpw_k(ig)=ig1+1+n1*(ig2+nd2*ig3) 4558 mask(ig)=.true. 4559 else 4560 indpw_k(ig)=0 4561 mask(ig)=.false. 4562 end if 4563 if (any(kg_k(:,ig) > ngfft(1:3)/2) .or. any(kg_k(:,ig) < -(ngfft(1:3)-1)/2) ) then 4564 write(msg,'(a,3(i0,1x),a)')" The G-vector: ",kg_k(:, ig)," falls outside the FFT box. Increase boxcutmin (?)" 4565 ABI_ERROR(msg) 4566 end if 4567 end do 4568 4569 end subroutine kgindex
m_fftcore/kpgcount [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
kpgcount
FUNCTION
Give the minimum and maximum number of G vectors in each direction: for each k-point, compute the number of G vectors in each direction, then store the min and max value over the set of k-points.
INPUTS
ecut=planewave kinetic energy cutoff (hartrees) exchn2n3d=if 1, n2 and n3 are exchanged gmet(3,3)=reciprocal space metric (bohr^-2) istwfk(nkpt)=option parameter that describes the storage of wfs kpt(3,nkpt)=reduced coords of k point (in terms of recip latt vecs) nkpt=number of k points
OUTPUT
ngmax(3)=maximum number of G vectors in each direction (x,y,z) ngmin(3)=minimum number of G vectors in each direction (x,y,z)
NOTES
This routine has been extracted from kpgsph...
SOURCE
4329 subroutine kpgcount(ecut,exchn2n3d,gmet,istwfk,kpt,ngmax,ngmin,nkpt) 4330 4331 4332 !Arguments ------------------------------------ 4333 !scalars 4334 integer,intent(in) :: exchn2n3d,nkpt 4335 integer,intent(out) :: ngmax(3),ngmin(3) 4336 real(dp),intent(in) :: ecut 4337 !arrays 4338 integer,intent(in) :: istwfk(nkpt) 4339 real(dp),intent(in) :: gmet(3,3),kpt(3,nkpt) 4340 4341 !Local variables------------------------------- 4342 !scalars 4343 integer :: ii,ikpt,istwf_k,kmax,ng1,ng2,ng3,nmin 4344 real(dp) :: gmet_trace,gscut,xx 4345 !arrays 4346 integer :: ngrid(3),nmax(3) 4347 real(dp) :: minor(3),numer(3) 4348 4349 ! ************************************************************************* 4350 4351 DBG_ENTER("COLL") 4352 4353 gscut=0.5_dp*ecut*piinv**2 4354 gmet_trace=gmet(1,1)+gmet(2,2)+gmet(3,3) 4355 minor(1)=gmet(2,2)*gmet(3,3)-gmet(2,3)**2 4356 minor(2)=gmet(1,1)*gmet(3,3)-gmet(1,3)**2 4357 minor(3)=gmet(2,2)*gmet(1,1)-gmet(1,2)**2 4358 numer(1)=gmet(1,2)**2*gmet(3,3)-2.0_dp*gmet(1,2)*gmet(1,3)*gmet(2,3)+gmet(1,3)**2*gmet(2,2) 4359 numer(2)=gmet(2,3)**2*gmet(1,1)-2.0_dp*gmet(1,2)*gmet(1,3)*gmet(2,3)+gmet(2,1)**2*gmet(3,3) 4360 numer(3)=gmet(3,2)**2*gmet(1,1)-2.0_dp*gmet(1,2)*gmet(1,3)*gmet(2,3)+gmet(1,3)**2*gmet(2,2) 4361 4362 ngmin(:)=1000000;ngmax(:)=0 4363 do ikpt=1,nkpt 4364 istwf_k=istwfk(ikpt) 4365 4366 do ii=1,3 4367 xx=gmet(ii,ii)*minor(ii)-numer(ii) 4368 if(xx<tol10*gmet_trace**3.or.minor(ii)<tol10*gmet_trace**2)then 4369 ABI_BUG('The metric tensor seem incorrect') 4370 end if 4371 kmax=sqrt(gscut*minor(ii)/xx) 4372 nmax(ii)=floor(kmax-kpt(ii,ikpt)+tol10) 4373 nmin=ceiling(-kmax-kpt(ii,ikpt)-tol10) 4374 ngrid(ii)=nmax(ii)-nmin+1 4375 end do 4376 4377 ng1=ngrid(1);if(istwf_k==2.or.istwf_k==3) ng1=nmax(1)+1 4378 if(exchn2n3d==0)then 4379 ng3=ngrid(3) 4380 ng2=ngrid(2);if(istwf_k>=2) ng2=nmax(2)+1 4381 if(istwf_k>=2.and.istwf_k<=5) ng2=ng2-1 4382 else 4383 ng2=ngrid(2) 4384 ng3=ngrid(3);if(istwf_k>=2) ng3=nmax(3)+1 4385 if(istwf_k==2.or.istwf_k==3.or.istwf_k==6.or.istwf_k==7) ng3=ng3-1 4386 end if 4387 4388 if(ng1<ngmin(1)) ngmin(1)=ng1 4389 if(ng2<ngmin(2)) ngmin(2)=ng2 4390 if(ng3<ngmin(3)) ngmin(3)=ng3 4391 if(ng1>ngmax(1)) ngmax(1)=ng1 4392 if(ng2>ngmax(2)) ngmax(2)=ng2 4393 if(ng3>ngmax(3)) ngmax(3)=ng3 4394 4395 end do 4396 4397 DBG_EXIT("COLL") 4398 4399 end subroutine kpgcount
m_fftcore/kpgsph [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
kpgsph
FUNCTION
Use reciprocal space metric gmet(3,3) to set up the list of G vectors inside a sphere out to $ (1/2)*(2*\pi*(k+G))^2=ecut $. If mkmem=0 and mpw=0, then only count the number of planewaves
INPUTS
ecut=planewave kinetic energy cutoff (hartrees) exchn2n3d=if 1, n2 and n3 are exchanged gmet(3,3)=reciprocal space metric (bohr^-2) ikg=shift to be given to the location of the output data in the array kg ikpt=number of the k-point istwf_k=option parameter that describes the storage of wfs kpt(3)=reduced coords of k point (in terms of recip latt vecs) mkmem =maximum number of k points which can fit in core memory mpi_enreg=information about MPI parallelization mpw=maximum number of planewaves as dimensioned in calling routine
OUTPUT
kg(3,mpw*mkmem)=dimensionless coords of resulting G vecs (integer) npw=resulting number of planewaves inside ecut centered at kpt
SIDE EFFECTS
mpi_enreg %me_g0=if 1, the plane wave G(0 0 0) is in the set of plane waves (and is the first) TODO: other SIDE EFFECTS on mpi_enreg should be described !!!
NOTES
Must take into account the time-reversal symmetry when istwf_k is not 1.
SOURCE
3946 subroutine kpgsph(ecut,exchn2n3d,gmet,ikg,ikpt,istwf_k,kg,kpt,mkmem,mpi_enreg,mpw,npw) 3947 3948 !Arguments ------------------------------------ 3949 !scalars 3950 integer,intent(in) :: exchn2n3d,ikg,ikpt,istwf_k,mkmem,mpw 3951 integer,intent(out) :: npw 3952 real(dp),intent(in) :: ecut 3953 type(MPI_type),intent(inout) :: mpi_enreg 3954 !arrays 3955 integer,intent(inout) :: kg(3,mpw*mkmem) 3956 real(dp),intent(in) :: gmet(3,3),kpt(3) 3957 3958 !Local variables------------------------------- 3959 !scalars 3960 integer :: i1,ig,ig1p,ig1pmax,ig2,ig2p,ig2pmax,ig2pmin,ig3,ig3p,ig3pmax 3961 integer :: ig3pmin,igtot,ii,ikpt_this_proc,in,ind,np_band,np_fft,npw_before,npw_remain,npw_split 3962 integer, save :: alloc_size=0 3963 real(dp) :: gap_pw,gmet11,gmet_trace,gmin,gs_fact,gs_part,gscut,v1,v2,v3,xx 3964 logical :: ipw_ok 3965 character(len=500) :: msg 3966 !arrays 3967 integer :: ngrid(3),nmax(3),nmin(3),n2,ierr 3968 integer,allocatable :: array_ipw(:),ig1arr(:),ig2arr(:) 3969 integer,allocatable :: ig3arr(:),kg_ind(:),kg_small(:,:) 3970 integer, allocatable :: npw_gather(:),npw_disp(:) ,kg_ind_gather(:),kg_small_gather(:,:) 3971 real(dp) :: kmax(3),minor(3),numer(3),tsec(2) 3972 real(dp),allocatable :: kg1arr(:),kg2arr(:),kg3arr(:) 3973 3974 ! ************************************************************************* 3975 3976 DBG_ENTER("COLL") 3977 3978 call timab(23,1,tsec) 3979 if(istwf_k<1 .or. istwf_k>9)then 3980 write(msg,'(3a,i0,a)' )& 3981 'The variable istwf_k must be between 1 and 9, while',ch10,& 3982 'the argument of the routine istwf_k =',istwf_k,'.' 3983 ABI_BUG(msg) 3984 end if 3985 3986 if(ikg+mpw>mkmem*mpw)then 3987 write(msg,'(5a,i0,a,i0,a,i0,4a)')& 3988 'The variables ikg, mkmem, and mpw must satisfy ikg<=(mkmem-1)*mpw,',ch10,& 3989 'while the arguments of the routine are',ch10,& 3990 'ikg =',ikg,', mkmem =',mkmem,', mpw =',mpw,ch10,& 3991 'Probable cause: Known error in invars1 for parallel spin-polarized case.',ch10,& 3992 'Temporary solution: Change the number of parallel processes.' 3993 ABI_BUG(msg) 3994 end if 3995 3996 np_band=0 3997 if (mpw>0) then 3998 np_band=1; if(mpi_enreg%paral_kgb==1) np_band=max(1,mpi_enreg%nproc_band) 3999 alloc_size=max(alloc_size,(mpw+1)*np_band) 4000 ABI_MALLOC(kg_small,(3, alloc_size)) 4001 ABI_MALLOC(kg_ind,(alloc_size)) 4002 kg_ind(:)=0 4003 end if 4004 4005 !A larger array, that will be split on the correct processor 4006 !G**2 cutoff, gscut=Ecut/2 /Pi^2 4007 4008 gscut=0.5_dp*ecut*piinv**2 4009 4010 !In reduced coordinates, determine maximal value of k+G and G for each direction 4011 4012 minor(1)=gmet(2,2)*gmet(3,3)-gmet(2,3)**2 4013 numer(1)=gmet(1,2)**2*gmet(3,3)-2.0_dp*gmet(1,2)*gmet(1,3)*gmet(2,3) +gmet(1,3)**2*gmet(2,2) 4014 minor(2)=gmet(1,1)*gmet(3,3)-gmet(1,3)**2 4015 numer(2)=gmet(2,3)**2*gmet(1,1)-2.0_dp*gmet(1,2)*gmet(1,3)*gmet(2,3) +gmet(2,1)**2*gmet(3,3) 4016 minor(3)=gmet(2,2)*gmet(1,1)-gmet(1,2)**2 4017 numer(3)=gmet(3,2)**2*gmet(1,1)-2.0_dp*gmet(1,2)*gmet(1,3)*gmet(2,3) +gmet(1,3)**2*gmet(2,2) 4018 4019 !Take the trace of the gmet tensor as dimensional reference 4020 gmet_trace=gmet(1,1)+gmet(2,2)+gmet(3,3) 4021 4022 do ii=1,3 4023 xx=gmet(ii,ii)*minor(ii)-numer(ii) 4024 if(xx<tol10*gmet_trace**3 .or. minor(ii)<tol10*gmet_trace**2)then 4025 ABI_BUG('The metric tensor seem incorrect') 4026 end if 4027 kmax(ii)=sqrt(gscut*minor(ii)/xx) 4028 nmax(ii)=floor(kmax(ii)-kpt(ii)+tol10) 4029 nmin(ii)=ceiling(-kmax(ii)-kpt(ii)-tol10) 4030 ngrid(ii)=nmax(ii)-nmin(ii)+1 4031 end do 4032 !perform looping over fft box grid of size ngfft(1)*ngfft(2)*ngfft(3): 4033 ig=0;ind=0 4034 in=0 4035 gmet11=gmet(1,1) 4036 4037 !Set up standard search sequence for grid points, in standard storage mode : 4038 !0 1 2 3 ... nmax nmin ... -1 4039 !If the mode is not standard, then some part of the FFT grid must be selected 4040 ! 4041 ABI_MALLOC(ig1arr,(ngrid(1))) 4042 ABI_MALLOC(ig2arr,(ngrid(2))) 4043 ABI_MALLOC(ig3arr,(ngrid(3))) 4044 ABI_MALLOC(kg1arr,(ngrid(1))) 4045 ABI_MALLOC(kg2arr,(ngrid(2))) 4046 ABI_MALLOC(kg3arr,(ngrid(3))) 4047 4048 do ig1p=1,ngrid(1) 4049 ig1arr(ig1p)=ig1p-1 4050 if (ig1p-1>nmax(1)) ig1arr(ig1p)=ig1p-ngrid(1)-1 4051 kg1arr(ig1p)=kpt(1)+dble(ig1arr(ig1p)) 4052 end do 4053 4054 !For the second direction, the number of points might depend on istwf_k 4055 !--------------------------------------------------------------------- 4056 ig2pmax=ngrid(2) 4057 if(istwf_k>=2 .and. exchn2n3d==0) ig2pmax=nmax(2)+1 4058 ABI_MALLOC(array_ipw,(-ig2pmax:ig2pmax)) 4059 array_ipw(:)=0 4060 do ig2p=1,ig2pmax 4061 ig2arr(ig2p)=ig2p-1 4062 if (ig2p-1>nmax(2)) ig2arr(ig2p)=ig2p-ngrid(2)-1 4063 kg2arr(ig2p)=kpt(2)+dble(ig2arr(ig2p)) 4064 end do 4065 4066 !For the third direction, the number of points might depend on istwf_k 4067 !--------------------------------------------------------------------- 4068 ig3pmax=ngrid(3) 4069 if (istwf_k>=2 .and. exchn2n3d==1) ig3pmax=nmax(3)+1 4070 4071 do ig3p=1,ig3pmax 4072 ig3arr(ig3p)=ig3p-1 4073 if(ig3p-1>nmax(3)) ig3arr(ig3p)=ig3p-ngrid(3)-1 4074 kg3arr(ig3p)=kpt(3)+dble(ig3arr(ig3p)) 4075 end do 4076 4077 !Performs loop on all grid points. 4078 !--------------------------------------------------------------------- 4079 igtot = 0 4080 if(exchn2n3d==0)then 4081 mpi_enreg%me_g0=0 4082 do ig3p=1,ngrid(3) 4083 ig3=ig3arr(ig3p) 4084 v3=kg3arr(ig3p) 4085 ig2pmin=1 4086 if( istwf_k>=2 .and. istwf_k<=5 .and. ig3<0)then 4087 ig2pmin=2 4088 end if 4089 ! ig2pmax was initialized previously 4090 do ig2p=ig2pmin,ig2pmax 4091 ig2=ig2arr(ig2p) 4092 ! PAY ATTENTION : the proc 0 must have me_g0=1 4093 ipw_ok = .true. 4094 if(mpi_enreg%paral_kgb==1 ) then 4095 n2 =mpi_enreg%distribfft%n2_coarse 4096 ipw_ok = ipw_ok.and.(mpi_enreg%me_fft == mpi_enreg%distribfft%tab_fftwf2_distrib( modulo(ig2,n2) + 1)) 4097 end if 4098 if (ig2==0 .and. ipw_ok) mpi_enreg%me_g0=1 4099 v2=kg2arr(ig2p) 4100 gs_part=gmet(2,2)*v2*v2+gmet(3,3)*v3*v3+2.0_dp*gmet(2,3)*v2*v3 4101 gs_fact=2.0_dp*(gmet(1,2)*v2+gmet(3,1)*v3) 4102 ig1pmax=ngrid(1) 4103 if( (istwf_k==2.or.istwf_k==3) .and. ig3p==1 .and. ig2p==1)ig1pmax=nmax(1)+1 4104 do ig1p=1,ig1pmax 4105 v1=kg1arr(ig1p) 4106 gmin=gs_part+v1*(gs_fact+v1*gmet11) 4107 ! If inside sphere: 4108 if (gmin<=gscut) then 4109 if (ipw_ok) then 4110 ig=ig+1 ! inside sphere 4111 igtot=igtot+1 4112 if (mpw>0.and.ig<=alloc_size) then 4113 ! Keep coords of pw: 4114 kg_small(1,ig)=ig1arr(ig1p) 4115 kg_small(2,ig)=ig2 4116 kg_small(3,ig)=ig3 4117 kg_ind(ig)=igtot 4118 end if 4119 array_ipw(ig2)=array_ipw(ig2)+1 4120 else 4121 igtot=igtot+1 4122 end if 4123 end if 4124 end do !ig1p 4125 end do !ig2p 4126 end do !ig3p 4127 4128 else ! if (exchn2n3d/=0) 4129 4130 ! ig2pmax was initialized previously 4131 mpi_enreg%me_g0=0 4132 do ig2p=1,ngrid(2) 4133 ig2=ig2arr(ig2p) 4134 ! PAY ATTENTION : the proc 0 must have me_g0=1 4135 ipw_ok = .true. 4136 if(mpi_enreg%paral_kgb==1 ) then 4137 n2 =mpi_enreg%distribfft%n2_coarse 4138 ipw_ok = ipw_ok.and.(mpi_enreg%me_fft == mpi_enreg%distribfft%tab_fftwf2_distrib( modulo(ig2,n2) + 1)) 4139 end if 4140 if(ig2==0 .and. istwf_k>=2 .and. ipw_ok) mpi_enreg%me_g0=1 4141 v2 =kg2arr(ig2p) 4142 ig3pmin=1 4143 if( (istwf_k==2 .or. istwf_k==3 .or. istwf_k==6 .or. istwf_k==7) .and. ig2<0)then 4144 ig3pmin=2 4145 end if 4146 do ig3p=ig3pmin,ig3pmax 4147 ig3=ig3arr(ig3p) 4148 v3=kg3arr(ig3p) 4149 gs_part=gmet(2,2)*v2*v2+gmet(3,3)*v3*v3+2.0_dp*gmet(2,3)*v2*v3 4150 gs_fact=2.0_dp*(gmet(1,2)*v2+gmet(3,1)*v3) 4151 ig1pmax=ngrid(1) 4152 if( (istwf_k==2.or.istwf_k==3) .and. ig3p==1 .and. ig2p==1)ig1pmax=nmax(1)+1 4153 do ig1p=1,ig1pmax 4154 v1=kg1arr(ig1p) 4155 gmin=gs_part+v1*(gs_fact+v1*gmet11) 4156 ! If inside sphere: 4157 if (gmin<=gscut) then 4158 if (ipw_ok) then 4159 ig=ig+1 ! inside sphere 4160 igtot=igtot+1 4161 ! Make sure not to overrun array, or simply do not store if mpw=0 4162 if (mpw>0.and.ig<=alloc_size) then 4163 ! Keep coords of pw: 4164 kg_small(1,ig)=ig1arr(ig1p) 4165 kg_small(2,ig)=ig2 4166 kg_small(3,ig)=ig3 4167 kg_ind(ig)=igtot 4168 end if 4169 else 4170 igtot=igtot+1 4171 end if 4172 end if 4173 4174 end do ! ig1p 4175 end do ! ig3p 4176 ! end if ! if the ig2 plane is to be treated by this processor 4177 end do ! ig2p 4178 4179 end if ! exchn2n3d==0 or ==1 4180 4181 !Total number of G vectors at this k point is assigned: npw 4182 !when getcell = 1 it can be that ig exceeds mpw, the bound on kp_small 4183 !here is a workaround: 4184 if (mpw*np_band > 0 .and. ig > alloc_size) then 4185 npw = mpw*np_band 4186 else 4187 npw=ig 4188 end if 4189 alloc_size = max(alloc_size,npw) 4190 4191 ABI_FREE(ig1arr) 4192 ABI_FREE(ig2arr) 4193 ABI_FREE(ig3arr) 4194 ABI_FREE(kg1arr) 4195 ABI_FREE(kg2arr) 4196 ABI_FREE(kg3arr) 4197 4198 !BandFFT: plane-wave load balancing 4199 if (mpi_enreg%paral_kgb==1.and.mpi_enreg%nproc_fft>1.and. mpi_enreg%pw_unbal_thresh>zero.and. istwf_k==1) then 4200 ! Check for reequilibration 4201 np_fft=max(1,mpi_enreg%nproc_fft) 4202 ABI_MALLOC(npw_gather,(np_fft)) ! Count pw before balancing 4203 call xmpi_allgather(npw,npw_gather,mpi_enreg%comm_fft,ierr) 4204 gap_pw = 100._dp*(maxval(npw_gather(:))-minval(npw_gather))/(1.*sum(npw_gather(:))/np_fft) 4205 write(msg,'(a,f5.2)' ) ' Relative gap for number of plane waves between process (%): ',gap_pw 4206 call wrtout(std_out,msg) 4207 if(gap_pw > mpi_enreg%pw_unbal_thresh) then ! Effective reequilibration 4208 write(msg,'(a,f5.2,a,i4,a,f5.2,a)') & 4209 'Plane-wave unbalancing (',gap_pw,'%) for kpt ',ikpt,' is higher than threshold (',& 4210 mpi_enreg%pw_unbal_thresh,'%); a plane-wave balancing procedure is activated!' 4211 call wrtout(std_out,msg) 4212 !Get optimal number 4213 npw_split=sum(npw_gather(:)) 4214 npw=npw_split/np_fft 4215 npw_remain=modulo(npw_split,np_fft) 4216 if(mpi_enreg%me_fft < npw_remain) npw=npw+1 4217 ig=npw 4218 !write(msg,*) 'New npw_fft = ', npw 4219 !call wrtout(std_out,msg) 4220 alloc_size = max(alloc_size,npw) 4221 if(mpw>0 ) then !Step for requilibration between fft process 4222 ABI_MALLOC(npw_disp,(np_fft)) 4223 npw_disp=0 4224 do i1=2,np_fft 4225 npw_disp(i1) = npw_disp(i1-1) + npw_gather(i1-1) 4226 end do 4227 ABI_MALLOC(kg_ind_gather,(npw_split)) 4228 ABI_MALLOC(kg_small_gather,(3,npw_split)) 4229 call xmpi_allgatherv(kg_ind, npw_gather(mpi_enreg%me_fft+1) , & 4230 & kg_ind_gather,npw_gather,npw_disp,mpi_enreg%comm_fft,ierr) 4231 call xmpi_allgatherv(kg_small,3*npw_gather(mpi_enreg%me_fft+1), & 4232 & kg_small_gather,3*npw_gather, 3*npw_disp,mpi_enreg%comm_fft,ierr) 4233 npw_before=mpi_enreg%me_fft*(npw_split/np_fft)+min(npw_remain,mpi_enreg%me_fft) 4234 kg_small(:,1:npw)=kg_small_gather(:,npw_before+1:npw_before+npw) 4235 kg_ind( 1:npw)=kg_ind_gather(npw_before+1:npw_before+npw) 4236 #ifdef DEBUG_MODE 4237 call wrtout(std_out,"keeping values done") 4238 #endif 4239 ABI_FREE(npw_disp) 4240 ABI_FREE(kg_ind_gather) 4241 ABI_FREE(kg_small_gather) 4242 end if 4243 end if!End of reequilibration step for paral KGB 4244 ABI_FREE(npw_gather) 4245 end if 4246 4247 !BandFFT: band load balancing 4248 if(mpi_enreg%paral_kgb==1.and.mpi_enreg%nproc_band>0) then 4249 np_band=max(1,mpi_enreg%nproc_band) 4250 npw_split=ig;npw=npw_split/np_band 4251 npw_remain=modulo(npw_split,np_band) 4252 if(mpi_enreg%me_band < npw_remain) npw=npw+1 4253 if(mpw > 0) then ! This is for the case when we only compute npw and put mpw=0 4254 npw_before=mpi_enreg%me_band*(npw_split/np_band)+min(npw_remain,mpi_enreg%me_band) 4255 kg_small(:,1:npw)=kg_small(:,npw_before+1:npw_before+npw) 4256 kg_ind ( 1:npw)=kg_ind ( npw_before+1:npw_before+npw) 4257 end if 4258 end if 4259 if(mpw > 0) then 4260 do i1=1,npw 4261 kg(:,i1+ikg)=kg_small(:,i1) 4262 if (allocated(mpi_enreg%my_kgtab)) then 4263 ikpt_this_proc=mpi_enreg%my_kpttab(ikpt) 4264 mpi_enreg%my_kgtab(i1,ikpt_this_proc) = kg_ind(i1) 4265 end if 4266 end do 4267 ABI_FREE(kg_small) 4268 ABI_FREE(kg_ind) 4269 end if 4270 4271 ABI_FREE(array_ipw) 4272 4273 !Take care of the me_g0 flag 4274 mpi_enreg%me_g0_fft=mpi_enreg%me_g0 4275 if(mpi_enreg%paral_kgb==1.and.mpi_enreg%nproc_band>0) then 4276 if(mpi_enreg%me_band==0.and.mpi_enreg%me_g0==1) then 4277 ! In this case, the processors had the 0 G vector before the new distribution, and still keeps it 4278 mpi_enreg%me_g0=1 4279 else 4280 ! All other cases 4281 mpi_enreg%me_g0=0 4282 end if 4283 end if 4284 4285 !Check that npw is not zero 4286 if(mpi_enreg%paral_kgb==1.and.npw==0) then 4287 write(msg,'(5a)' )& 4288 'Please decrease the number of npband*npfft MPI processes!',ch10,& 4289 'One of the MPI process has no plane-wave to handle.',ch10,& 4290 'Action: decrease npband and/or npfft.' 4291 ABI_ERROR(msg) 4292 endif 4293 4294 call timab(23,2,tsec) 4295 4296 DBG_EXIT("COLL") 4297 4298 end subroutine kpgsph
m_fftcore/mpifft_collect_datar [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
mpifft_collect_datar
FUNCTION
Collect a real-space MPI-FFT distributed array on each proc.
INPUTS
ngfft(18)=contain all needed information about 3D FFT (see NOTES at beginning of scfcv) cplex=1 if real array, 2 for complex nfft=Number of FFT points treated by this MPI proc nspden=Second dimension of rhor rhor(cplex*nfft,nspden)=Array in real space (MPI-FFT distributed) fftn3_distrib(n3)=rank of the processors which own fft planes in 3rd dimension. fftn3_local(n3)=local i3 indices comm_fft=MPI-FFT communicator [master]=MPI rank, Optional. If present, the global array is available only on master node.
OUTPUT
rhor_glob(cplex*nfft_tot,nspden)=Global array
SOURCE
4781 subroutine mpifft_collect_datar(ngfft,cplex,nfft,nspden,rhor,comm_fft,fftn3_distrib,ffti3_local,rhor_glob,master) 4782 4783 !Arguments ------------------------------------ 4784 !scalars 4785 integer,intent(in) :: cplex,nfft,nspden,comm_fft 4786 integer,optional,intent(in) :: master 4787 !arrays 4788 integer,intent(in) :: ngfft(18),fftn3_distrib(ngfft(3)),ffti3_local(ngfft(3)) 4789 real(dp),intent(in) :: rhor(cplex*nfft,nspden) 4790 real(dp),intent(out) :: rhor_glob(cplex*product(ngfft(1:3)),nspden) 4791 4792 !Local variables------------------------------- 4793 integer :: ispden,i1,i2,i3,me_fft,i3_local,my_fftbase,glob_fftbase 4794 integer :: n1,n2,n3,ierr,nfft_tot 4795 4796 ! ************************************************************************* 4797 4798 nfft_tot = product(ngfft(1:3)); me_fft = xmpi_comm_rank(comm_fft) 4799 n1 = ngfft(1); n2 = ngfft(2); n3 = ngfft(3) 4800 4801 if (nfft_tot == nfft) then 4802 ! full rhor on each node, just do a copy 4803 rhor_glob = rhor 4804 else 4805 ! if MPI-FFT we have to gather the full rhor on each node. 4806 rhor_glob = zero 4807 do ispden=1,nspden 4808 do i3=1,n3 4809 if (me_fft == fftn3_distrib(i3)) then 4810 i3_local = ffti3_local(i3) 4811 do i2=1,n2 4812 my_fftbase = cplex * ( (i2-1)*n1 + (i3_local-1)*n1*n2 ) 4813 glob_fftbase = cplex * ( (i2-1)*n1 + (i3-1)*n1*n2 ) 4814 do i1=1,cplex * n1 4815 rhor_glob(i1+glob_fftbase,ispden) = rhor(i1+my_fftbase,ispden) 4816 end do 4817 end do 4818 end if 4819 end do 4820 end do 4821 if (present(master)) then 4822 call xmpi_sum_master(rhor_glob,master,comm_fft,ierr) 4823 else 4824 call xmpi_sum(rhor_glob,comm_fft,ierr) 4825 end if 4826 end if 4827 4828 end subroutine mpifft_collect_datar
m_fftcore/mpifft_dbox2fg [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
mpifft_dbox2fg
FUNCTION
INPUTS
OUTPUT
SOURCE
3409 pure subroutine mpifft_dbox2fg(n1,n2,n3,n4,nd2proc,n6,ndat,fftn2_distrib,ffti2_local,me_fft,workf,nfft,fofg) 3410 3411 !Arguments ------------------------------------ 3412 !scalars 3413 integer,intent(in) :: n1,n2,n3,n4,nd2proc,n6,ndat,me_fft,nfft 3414 !arrays 3415 integer,intent(in) :: fftn2_distrib(n2),ffti2_local(n2) 3416 real(dp),intent(in) :: workf(2,n4,n6,nd2proc*ndat) 3417 real(dp),intent(out) :: fofg(2,nfft*ndat) 3418 3419 !Local variables------------------------------- 3420 integer :: idat,i1,i2,i3,i2_local,i2_ldat,fgbase 3421 real(dp) :: xnorm 3422 3423 ! ************************************************************************* 3424 3425 xnorm=one/dble(n1*n2*n3) 3426 3427 ! Transfer fft output to the original fft box 3428 do idat=1,ndat 3429 do i2=1,n2 3430 if( fftn2_distrib(i2) == me_fft) then 3431 i2_local = ffti2_local(i2) 3432 i2_ldat = i2_local + (idat-1) * nd2proc 3433 do i3=1,n3 3434 fgbase = n1*(i2_local - 1 + nd2proc*(i3-1)) + (idat - 1) * nfft 3435 do i1=1,n1 3436 fofg(1,i1+fgbase)=workf(1,i1,i3,i2_ldat)*xnorm 3437 fofg(2,i1+fgbase)=workf(2,i1,i3,i2_ldat)*xnorm 3438 end do 3439 end do 3440 end if 3441 end do 3442 end do 3443 3444 end subroutine mpifft_dbox2fg
m_fftcore/mpifft_dbox2fg_dpc [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
mpifft_dbox2fg_dpc
FUNCTION
INPUTS
OUTPUT
SOURCE
3461 pure subroutine mpifft_dbox2fg_dpc(n1,n2,n3,n4,nd2proc,n6,ndat,fftn2_distrib,ffti2_local,me_fft,workf,nfft,fofg) 3462 3463 !Arguments ------------------------------------ 3464 !scalars 3465 integer,intent(in) :: n1,n2,n3,n4,nd2proc,n6,ndat,me_fft,nfft 3466 !arrays 3467 integer,intent(in) :: fftn2_distrib(n2),ffti2_local(n2) 3468 complex(dpc),intent(in) :: workf(n4,n6,nd2proc*ndat) 3469 real(dp),intent(out) :: fofg(2,nfft*ndat) 3470 3471 !Local variables------------------------------- 3472 integer :: idat,i1,i2,i3,i2_local,i2_ldat,fgbase 3473 real(dp) :: xnorm 3474 3475 ! ************************************************************************* 3476 3477 xnorm=one/dble(n1*n2*n3) 3478 3479 ! Transfer fft output to the original fft box 3480 do idat=1,ndat 3481 do i2=1,n2 3482 if( fftn2_distrib(i2) == me_fft) then 3483 i2_local = ffti2_local(i2) 3484 i2_ldat = i2_local + (idat-1) * nd2proc 3485 do i3=1,n3 3486 fgbase = n1*(i2_local - 1 + nd2proc*(i3-1)) + (idat - 1) * nfft 3487 do i1=1,n1 3488 fofg(1,i1+fgbase)=REAL (workf(i1,i3,i2_ldat))*xnorm 3489 fofg(2,i1+fgbase)=AIMAG(workf(i1,i3,i2_ldat))*xnorm 3490 end do 3491 end do 3492 end if 3493 end do 3494 end do 3495 3496 end subroutine mpifft_dbox2fg_dpc
m_fftcore/mpifft_dbox2fr [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
mpifft_dbox2fr
FUNCTION
INPUTS
OUTPUT
SOURCE
3513 pure subroutine mpifft_dbox2fr(n1,n2,n3,n4,n5,nd3proc,ndat,fftn3_distrib,ffti3_local,me_fft,workr,cplex,nfft,fofr) 3514 3515 !Arguments ------------------------------------ 3516 !scalars 3517 integer,intent(in) :: n1,n2,n3,n4,n5,nd3proc,ndat,me_fft,nfft,cplex 3518 !!arrays 3519 integer,intent(in) :: fftn3_distrib(n3),ffti3_local(n3) 3520 real(dp),intent(in) :: workr(2,n4,n5,nd3proc*ndat) 3521 real(dp),intent(out) :: fofr(cplex*nfft*ndat) 3522 3523 !Local variables------------------------------- 3524 integer :: idat,i1,i2,i3,i3_local,i3_ldat,frbase 3525 3526 ! ************************************************************************* 3527 3528 select case (cplex) 3529 case (1) 3530 3531 do idat=1,ndat 3532 do i3=1,n3 3533 if( fftn3_distrib(i3) == me_fft) then 3534 i3_local = ffti3_local(i3) 3535 i3_ldat = i3_local + (idat - 1) * nd3proc 3536 do i2=1,n2 3537 frbase=n1*(i2-1+n2*(i3_local-1)) + (idat - 1) * nfft 3538 do i1=1,n1 3539 fofr(i1+frbase)=workr(1,i1,i2,i3_ldat) 3540 end do 3541 end do 3542 end if 3543 end do 3544 end do 3545 3546 case (2) 3547 3548 do idat=1,ndat 3549 do i3=1,n3 3550 if (fftn3_distrib(i3) == me_fft) then 3551 i3_local = ffti3_local(i3) 3552 i3_ldat = i3_local + (idat - 1) * nd3proc 3553 do i2=1,n2 3554 frbase=2*n1*(i2-1+n2*(i3_local-1)) + (idat - 1) * cplex * nfft 3555 !if (frbase > cplex*nfft*ndat - 2*n1) then 3556 ! write(std_out,*)i2,i3_local,frbase,cplex*nfft*ndat 3557 ! ABI_ERROR("frbase") 3558 !end if 3559 do i1=1,n1 3560 fofr(2*i1-1+frbase)=workr(1,i1,i2,i3_ldat) 3561 fofr(2*i1 +frbase)=workr(2,i1,i2,i3_ldat) 3562 end do 3563 end do 3564 end if 3565 end do 3566 end do 3567 3568 case default 3569 !ABI_BUG("Wrong cplex") 3570 fofr = huge(one) 3571 end select 3572 3573 end subroutine mpifft_dbox2fr
m_fftcore/mpifft_dbox2fr_dpc [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
mpifft_dbox2fr_dpc
FUNCTION
INPUTS
OUTPUT
SOURCE
3590 pure subroutine mpifft_dbox2fr_dpc(n1,n2,n3,n4,n5,nd3proc,ndat,fftn3_distrib,ffti3_local,me_fft,workr,cplex,nfft,fofr) 3591 3592 !Arguments ------------------------------------ 3593 !scalars 3594 integer,intent(in) :: n1,n2,n3,n4,n5,nd3proc,ndat,me_fft,nfft,cplex 3595 !!arrays 3596 integer,intent(in) :: fftn3_distrib(n3),ffti3_local(n3) 3597 complex(dpc),intent(in) :: workr(n4,n5,nd3proc*ndat) 3598 real(dp),intent(out) :: fofr(cplex*nfft*ndat) 3599 3600 !Local variables------------------------------- 3601 integer :: idat,i1,i2,i3,i3_local,i3_ldat,frbase 3602 3603 ! ************************************************************************* 3604 3605 select case (cplex) 3606 case (1) 3607 3608 do idat=1,ndat 3609 do i3=1,n3 3610 if( fftn3_distrib(i3) == me_fft) then 3611 i3_local = ffti3_local(i3) 3612 i3_ldat = i3_local + (idat - 1) * nd3proc 3613 do i2=1,n2 3614 frbase=n1*(i2-1+n2*(i3_local-1)) + (idat - 1) * nfft 3615 do i1=1,n1 3616 fofr(i1+frbase)=REAL(workr(i1,i2,i3_ldat)) 3617 end do 3618 end do 3619 end if 3620 end do 3621 end do 3622 3623 case (2) 3624 3625 do idat=1,ndat 3626 do i3=1,n3 3627 if (fftn3_distrib(i3) == me_fft) then 3628 i3_local = ffti3_local(i3) 3629 i3_ldat = i3_local + (idat - 1) * nd3proc 3630 do i2=1,n2 3631 frbase=2*n1*(i2-1+n2*(i3_local-1)) + (idat - 1) * cplex * nfft 3632 do i1=1,n1 3633 fofr(2*i1-1+frbase)=REAL (workr(i1,i2,i3_ldat)) 3634 fofr(2*i1 +frbase)=AIMAG(workr(i1,i2,i3_ldat)) 3635 end do 3636 end do 3637 end if 3638 end do 3639 end do 3640 3641 case default 3642 !ABI_BUG("Wrong cplex") 3643 fofr = huge(one) 3644 end select 3645 3646 end subroutine mpifft_dbox2fr_dpc
m_fftcore/mpifft_fg2dbox [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
mpifft_fg2dbox
FUNCTION
INPUTS
OUTPUT
SOURCE
3314 pure subroutine mpifft_fg2dbox(nfft,ndat,fofg,n1,n2,n3,n4,nd2proc,n6,fftn2_distrib,ffti2_local,me_fft,workf) 3315 3316 !Arguments ------------------------------------ 3317 !scalars 3318 integer,intent(in) :: nfft,ndat,n1,n2,n3,n4,nd2proc,n6,me_fft 3319 !arrays 3320 integer,intent(in) :: fftn2_distrib(n2),ffti2_local(n2) 3321 real(dp),intent(in) :: fofg(2,nfft*ndat) 3322 real(dp),intent(inout) :: workf(2,n4,n6,nd2proc*ndat) 3323 3324 !Local variables------------------------------- 3325 integer :: idat,i1,i2,i3,i2_local,i2_ldat,fgbase 3326 3327 ! ************************************************************************* 3328 3329 do idat=1,ndat 3330 do i3=1,n3 3331 do i2=1,n2 3332 if (fftn2_distrib(i2) == me_fft) then 3333 i2_local = ffti2_local(i2) 3334 i2_ldat = i2_local + (idat-1) * nd2proc 3335 fgbase= n1*(i2_local-1 + nd2proc*(i3-1)) + (idat-1) * nfft 3336 do i1=1,n1 3337 workf(1,i1,i3,i2_ldat)=fofg(1,i1+fgbase) 3338 workf(2,i1,i3,i2_ldat)=fofg(2,i1+fgbase) 3339 end do 3340 end if 3341 end do 3342 end do 3343 end do 3344 3345 end subroutine mpifft_fg2dbox
m_fftcore/mpifft_fg2dbox_dpc [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
mpifft_fg2dbox_dpc
FUNCTION
INPUTS
OUTPUT
SOURCE
3362 pure subroutine mpifft_fg2dbox_dpc(nfft,ndat,fofg,n1,n2,n3,n4,nd2proc,n6,fftn2_distrib,ffti2_local,me_fft,workf) 3363 3364 !Arguments ------------------------------------ 3365 !scalars 3366 integer,intent(in) :: nfft,ndat,n1,n2,n3,n4,nd2proc,n6,me_fft 3367 !arrays 3368 integer,intent(in) :: fftn2_distrib(n2),ffti2_local(n2) 3369 real(dp),intent(in) :: fofg(2,nfft*ndat) 3370 complex(dpc),intent(inout) :: workf(n4,n6,nd2proc*ndat) 3371 3372 !Local variables------------------------------- 3373 integer :: idat,i1,i2,i3,i2_local,i2_ldat,fgbase 3374 3375 ! ************************************************************************* 3376 3377 do idat=1,ndat 3378 do i3=1,n3 3379 do i2=1,n2 3380 if (fftn2_distrib(i2) == me_fft) then 3381 i2_local = ffti2_local(i2) 3382 i2_ldat = i2_local + (idat-1) * nd2proc 3383 fgbase= n1*(i2_local-1 + nd2proc*(i3-1)) + (idat-1) * nfft 3384 do i1=1,n1 3385 workf(i1,i3,i2_ldat)=CMPLX(fofg(1,i1+fgbase), fofg(2,i1+fgbase), kind=dpc) 3386 end do 3387 end if 3388 end do 3389 end do 3390 end do 3391 3392 end subroutine mpifft_fg2dbox_dpc
m_fftcore/mpifft_fr2dbox [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
mpifft_fr2dbox
FUNCTION
INPUTS
OUTPUT
SOURCE
3663 pure subroutine mpifft_fr2dbox(cplex,nfft,ndat,fofr,n1,n2,n3,n4,n5,nd3proc,fftn3_distrib,ffti3_local,me_fft,workr) 3664 3665 !Arguments ------------------------------------ 3666 !scalars 3667 integer,intent(in) :: cplex,nfft,ndat,n1,n2,n3,n4,n5,nd3proc,me_fft 3668 !!arrays 3669 integer,intent(in) :: fftn3_distrib(n3),ffti3_local(n3) 3670 real(dp),intent(in) :: fofr(cplex*nfft*ndat) 3671 real(dp),intent(inout) :: workr(2,n4,n5,nd3proc*ndat) 3672 3673 !Local variables------------------------------- 3674 integer :: idat,i1,i2,i3,i3_local,i3_ldat,frbase 3675 3676 ! ************************************************************************* 3677 3678 select case (cplex) 3679 case (1) 3680 3681 do idat=1,ndat 3682 do i3=1,n3 3683 if( me_fft == fftn3_distrib(i3) ) then 3684 i3_local = ffti3_local(i3) 3685 i3_ldat = i3_local + (idat-1) * nd3proc 3686 do i2=1,n2 3687 frbase=n1*(i2-1+n2*(i3_local-1)) + (idat-1) * nfft 3688 do i1=1,n1 3689 workr(1,i1,i2,i3_ldat)=fofr(i1+frbase) 3690 workr(2,i1,i2,i3_ldat)=zero 3691 end do 3692 end do 3693 end if 3694 end do 3695 end do 3696 3697 case (2) 3698 3699 do idat=1,ndat 3700 do i3=1,n3 3701 if( me_fft == fftn3_distrib(i3) ) then 3702 i3_local = ffti3_local(i3) 3703 i3_ldat = i3_local + (idat-1) * nd3proc 3704 do i2=1,n2 3705 frbase=2*n1*(i2-1+n2*(i3_local-1)) + (idat-1) * cplex * nfft 3706 do i1=1,n1 3707 workr(1,i1,i2,i3_ldat)=fofr(2*i1-1+frbase) 3708 workr(2,i1,i2,i3_ldat)=fofr(2*i1 +frbase) 3709 end do 3710 end do 3711 end if 3712 end do 3713 end do 3714 3715 case default 3716 !ABI_BUG("Wrong cplex") 3717 workr = huge(one) 3718 end select 3719 3720 end subroutine mpifft_fr2dbox
m_fftcore/mpifft_fr2dbox_dpc [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
mpifft_fr2dbox_dpc
FUNCTION
INPUTS
OUTPUT
SOURCE
3737 pure subroutine mpifft_fr2dbox_dpc(cplex,nfft,ndat,fofr,n1,n2,n3,n4,n5,nd3proc,fftn3_distrib,ffti3_local,me_fft,workr) 3738 3739 !Arguments ------------------------------------ 3740 !scalars 3741 integer,intent(in) :: cplex,nfft,ndat,n1,n2,n3,n4,n5,nd3proc,me_fft 3742 !!arrays 3743 integer,intent(in) :: fftn3_distrib(n3),ffti3_local(n3) 3744 real(dp),intent(in) :: fofr(cplex*nfft*ndat) 3745 complex(dpc),intent(inout) :: workr(n4,n5,nd3proc*ndat) 3746 3747 !Local variables------------------------------- 3748 integer :: idat,i1,i2,i3,i3_local,i3_ldat,frbase 3749 3750 ! ************************************************************************* 3751 3752 select case (cplex) 3753 case (1) 3754 3755 do idat=1,ndat 3756 do i3=1,n3 3757 if( me_fft == fftn3_distrib(i3) ) then 3758 i3_local = ffti3_local(i3) 3759 i3_ldat = i3_local + (idat-1) * nd3proc 3760 do i2=1,n2 3761 frbase=n1*(i2-1+n2*(i3_local-1)) + (idat-1) * nfft 3762 do i1=1,n1 3763 workr(i1,i2,i3_ldat)=CMPLX(fofr(i1+frbase), zero, kind=dpc) 3764 end do 3765 end do 3766 end if 3767 end do 3768 end do 3769 3770 case (2) 3771 3772 do idat=1,ndat 3773 do i3=1,n3 3774 if( me_fft == fftn3_distrib(i3) ) then 3775 i3_local = ffti3_local(i3) 3776 i3_ldat = i3_local + (idat-1) * nd3proc 3777 do i2=1,n2 3778 frbase=2*n1*(i2-1+n2*(i3_local-1)) + (idat-1) * cplex * nfft 3779 do i1=1,n1 3780 workr(i1,i2,i3_ldat)=CMPLX(fofr(2*i1-1+frbase), fofr(2*i1 +frbase), kind=dpc) 3781 end do 3782 end do 3783 end if 3784 end do 3785 end do 3786 3787 case default 3788 !ABI_BUG("Wrong cplex") 3789 workr = huge(one) 3790 end select 3791 3792 end subroutine mpifft_fr2dbox_dpc
m_fftcore/mpiswitch [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
mpiswitch
FUNCTION
INPUTS
OUTPUT
SOURCE
3115 pure subroutine mpiswitch(j3,n1dfft,Jp2st,J2st,lot,n1,nd2proc,nd3proc,nproc,ioption,zmpi1,zw) 3116 3117 !Arguments ------------------------------------ 3118 integer,intent(in) :: j3,n1dfft,lot,n1,nd2proc,nd3proc,nproc,ioption 3119 integer,intent(inout) :: Jp2st,J2st 3120 real(dp),intent(in) :: zmpi1(2,n1,nd2proc,nd3proc,nproc) 3121 real(dp),intent(inout) :: zw(2,lot,n1) 3122 3123 !Local variables------------------------------- 3124 integer :: Jp2,J2,I1,ind,jj2,mfft,jjp2 3125 3126 ! ************************************************************************* 3127 mfft=0 3128 3129 if (ioption /= 1) then 3130 do Jp2=Jp2st,nproc 3131 do J2=J2st,nd2proc 3132 mfft=mfft+1 3133 if (mfft.gt.n1dfft) then 3134 Jp2st=Jp2 3135 J2st=J2 3136 return 3137 end if 3138 do I1=1,n1 3139 zw(1,mfft,I1)=zmpi1(1,I1,J2,j3,Jp2) 3140 zw(2,mfft,I1)=zmpi1(2,I1,J2,j3,Jp2) 3141 end do 3142 end do 3143 J2st=1 3144 end do 3145 3146 else 3147 do Jp2=Jp2st,nproc 3148 do J2=J2st,nd2proc 3149 mfft=mfft+1 3150 if (mfft.gt.n1dfft) then 3151 Jp2st=Jp2 3152 J2st=J2 3153 return 3154 end if 3155 ind=(Jp2-1) * nd2proc + J2 3156 jj2=(ind-1)/nproc +1 3157 3158 !jjp2=modulo(ind,nproc) +1 3159 jjp2=modulo(ind-1,nproc)+1 3160 3161 !in other words: mfft=(jj2-1)*nproc+jjp2 (modulo case) 3162 !istead of mfft=(Jjp2-1) * nd2proc + Jj2 (slice case) 3163 !with 1<=jjp2<=nproc, jj2=1,nd2proc 3164 do I1=1,n1 3165 ! zw(1,mfft,I1)=zmpi1(1,I1,J2,j3,Jp2) 3166 ! zw(2,mfft,I1)=zmpi1(2,I1,J2,j3,Jp2) 3167 zw(1,mfft,I1)=zmpi1(1,I1,jj2,j3,jjp2) 3168 zw(2,mfft,I1)=zmpi1(2,I1,jj2,j3,jjp2) 3169 end do 3170 end do 3171 J2st=1 3172 end do 3173 end if 3174 3175 end subroutine mpiswitch
m_fftcore/mpiswitch_cent [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
mpiswitch_cent
FUNCTION
Perform the local rotation input: I1,J2,j3,Jp2,(jp3) output: J2,Jp2,I1,j3,(jp3) and fill the central region of the frequency spectrum with zeros
INPUTS
OUTPUT
SOURCE
3199 pure subroutine mpiswitch_cent(j3,n1dfft,Jp2stb,J2stb,lot,max1,md1,m1,n1,md2proc,& 3200 nd3proc,nproc,ioption,zmpi1,zw,max2,m2,n2) 3201 3202 !Arguments ------------------------------------ 3203 integer,intent(in) :: j3,n1dfft,lot,max1,md1,m1,n1,md2proc,nd3proc,nproc,ioption 3204 integer,intent(in) :: m2,max2,n2 3205 integer,intent(inout) :: Jp2stb,J2stb 3206 real(dp),intent(in) :: zmpi1(2,md1,md2proc,nd3proc,nproc) 3207 real(dp),intent(inout) :: zw(2,lot,n1) 3208 3209 !Local variables------------------------------- 3210 integer :: mfft,jp2,j2,jjp2,jj2,i1,ind 3211 3212 ! ************************************************************************* 3213 3214 ABI_UNUSED((/m2,max2,n2/)) 3215 3216 mfft=0 3217 3218 if (ioption /= 1) then 3219 do Jp2=Jp2stb,nproc 3220 do J2=J2stb,md2proc 3221 3222 mfft=mfft+1 3223 if (mfft.gt.n1dfft) then 3224 Jp2stb=Jp2 3225 J2stb=J2 3226 !ABI_WARNING("Returning from mpiswithc_cent") 3227 return 3228 end if 3229 3230 ! Here, zero and positive frequencies 3231 ! In zmpi1, they are stored from 1 to max1+1 3232 do I1=1,max1+1 3233 zw(1,mfft,I1)=zmpi1(1,I1,J2,j3,Jp2) 3234 zw(2,mfft,I1)=zmpi1(2,I1,J2,j3,Jp2) 3235 end do 3236 3237 ! Fill the center region with zeros 3238 do I1=max1+2,n1-m1+max1+1 3239 zw(1,mfft,I1)=zero 3240 zw(2,mfft,I1)=zero 3241 end do 3242 3243 ! Here, negative frequencies 3244 ! In zmpi1, they are stored from 1 to m1half 3245 do I1=max1+2,m1 3246 zw(1,mfft,I1+n1-m1)=zmpi1(1,I1,J2,j3,Jp2) 3247 zw(2,mfft,I1+n1-m1)=zmpi1(2,I1,J2,j3,Jp2) 3248 end do 3249 end do 3250 J2stb=1 3251 end do 3252 3253 else 3254 do Jp2=Jp2stb,nproc 3255 do J2=J2stb,md2proc 3256 3257 mfft=mfft+1 3258 if (mfft.gt.n1dfft) then 3259 Jp2stb=Jp2 3260 J2stb=J2 3261 !ABI_WARNING("Returning from mpiswithc_cent") 3262 return 3263 end if 3264 3265 ind=(Jp2-1) * md2proc + J2 3266 jj2=(ind-1)/nproc +1 3267 3268 !jjp2=modulo(ind,nproc) +1 3269 jjp2=modulo(ind-1,nproc)+1 3270 3271 ! I gather consecutive I2 indexes in mfft in the modulo case 3272 ! Here, zero and positive frequencies 3273 ! In zmpi1, they are stored from 1 to max1+1 3274 do I1=1,max1+1 3275 zw(1,mfft,I1)=zmpi1(1,I1,Jj2,j3,Jjp2) 3276 zw(2,mfft,I1)=zmpi1(2,I1,Jj2,j3,Jjp2) 3277 end do 3278 3279 ! Fill the center region with zeros 3280 do I1=max1+2,n1-m1+max1+1 3281 zw(1,mfft,I1)=zero 3282 zw(2,mfft,I1)=zero 3283 end do 3284 3285 ! Here, negative frequencies 3286 ! In zmpi1, they are stored from 1 to m1half 3287 do I1=max1+2,m1 3288 zw(1,mfft,I1+n1-m1)=zmpi1(1,I1,Jj2,j3,Jjp2) 3289 zw(2,mfft,I1+n1-m1)=zmpi1(2,I1,Jj2,j3,Jjp2) 3290 end do 3291 3292 end do 3293 J2stb=1 3294 end do 3295 end if 3296 3297 end subroutine mpiswitch_cent
m_fftcore/multpot [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
multpot
FUNCTION
INPUTS
icplexwf=1 if u(r) is real, 2 otherwise. icplex=1 if v(r) is real, 2 otherwise. includelast nd1,nd2=Leading dimensions of pot(icplex*nd1,nd2) n2 lot n1dfft
OUTPUT
SOURCE
4669 subroutine multpot(icplexwf,icplex,includelast,nd1,nd2,n2,lot,n1dfft,pot,zw) 4670 4671 !Arguments ------------------------------------ 4672 integer,intent(in) :: icplexwf,icplex,includelast,nd1,nd2,n2,lot,n1dfft 4673 real(dp),intent(in) :: pot(icplex*nd1,nd2) 4674 real(dp),intent(inout) :: zw(2,lot,n2) 4675 4676 !Local variables------------------------------- 4677 integer :: i2,j 4678 real(dp) :: rew,imw 4679 4680 ! ************************************************************************* 4681 4682 if (icplexwf==1) then 4683 ! Real u(r) 4684 4685 if (icplex==2) then 4686 ABI_BUG('multpot: icplexwf=1 and icplex=2') 4687 else 4688 ! TO BE SPEEDED UP : should use the same trick as Stefan 4689 if(includelast==1)then 4690 do i2=1,n2 4691 do j=1,n1dfft 4692 zw(1,j,i2)=zw(1,j,i2)*pot(2*j-1,i2) 4693 zw(2,j,i2)=zw(2,j,i2)*pot(2*j ,i2) 4694 end do 4695 end do 4696 else 4697 do i2=1,n2 4698 do j=1,n1dfft-1 4699 zw(1,j,i2)=zw(1,j,i2)*pot(2*j-1,i2) 4700 zw(2,j,i2)=zw(2,j,i2)*pot(2*j ,i2) 4701 end do 4702 zw(1,n1dfft,i2)=zw(1,n1dfft,i2)*pot(2*n1dfft-1,i2) 4703 end do 4704 end if 4705 end if 4706 4707 else if (icplexwf==2) then 4708 ! Complex u(r) 4709 4710 if (icplex==1) then 4711 4712 do i2=1,n2-1,2 4713 do j=1,n1dfft 4714 zw(1,j,i2)=zw(1,j,i2)*pot(j,i2) 4715 zw(2,j,i2)=zw(2,j,i2)*pot(j,i2) 4716 zw(1,j,i2+1)=zw(1,j,i2+1)*pot(j,i2+1) 4717 zw(2,j,i2+1)=zw(2,j,i2+1)*pot(j,i2+1) 4718 end do 4719 end do 4720 4721 if (2*(n2/2)/=n2) then 4722 do j=1,n1dfft 4723 zw(1,j,n2)=zw(1,j,n2)*pot(j,n2) 4724 zw(2,j,n2)=zw(2,j,n2)*pot(j,n2) 4725 end do 4726 end if 4727 4728 else 4729 4730 do i2=1,n2-1,2 4731 do j=1,n1dfft 4732 rew = zw(1,j,i2); imw = zw(2,j,i2) 4733 zw(1,j,i2) = rew*pot(2*j-1,i2) - imw*pot(2*j,i2) 4734 zw(2,j,i2) = imw*pot(2*j-1,i2) + rew*pot(2*j,i2) 4735 4736 rew = zw(1,j,i2+1); imw = zw(2,j,i2+1) 4737 zw(1,j,i2+1) = rew*pot(2*j-1,i2+1) - imw*pot(2*j,i2+1) 4738 zw(2,j,i2+1) = imw*pot(2*j-1,i2+1) + rew*pot(2*j,i2+1) 4739 end do 4740 end do 4741 4742 if (2*(n2/2)/=n2) then 4743 do j=1,n1dfft 4744 rew = zw(1,j,n2); imw = zw(2,j,n2) 4745 zw(1,j,n2) = rew*pot(2*j-1,n2) - imw*pot(2*j,n2) 4746 zw(2,j,n2) = imw*pot(2*j-1,n2) + rew*pot(2*j,n2) 4747 end do 4748 end if 4749 4750 end if 4751 end if 4752 4753 end subroutine multpot
m_fftcore/ngfft_seq [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
ngfft_seq
FUNCTION
Helper function used to initialize ngfft(18) from the FFT divisions in the case of sequential execution.
INPUTS
n123(3)=FFT divisions.
OUTPUT
ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft.
SOURCE
392 pure subroutine ngfft_seq(ngfft, n123) 393 394 !Arguments ------------------------------------ 395 integer,intent(in) :: n123(3) 396 integer,intent(out) :: ngfft(18) 397 398 !Local variables------------------------------- 399 integer :: fftalg 400 401 ! ************************************************************************* 402 403 ! Default for sequential case. 404 fftalg = 112 405 #ifdef HAVE_FFTW3 406 fftalg = 312 407 #elif defined HAVE_DFTI 408 fftalg = 512 409 #endif 410 411 ngfft(1:3) = n123 412 ngfft(4) = 2*(ngfft(1)/2)+1 413 ngfft(5) = 2*(ngfft(2)/2)+1 414 ngfft(6) = ngfft(3) 415 ngfft(7)= fftalg ! fftalg 416 ngfft(8)= get_cache_kb() ! cache_kb 417 ngfft(9)= 0 ! paral_fft_ 418 ngfft(10)=1 ! nproc_fft 419 ngfft(11)=0 ! me_fft 420 ngfft(12)=0 ! n2proc 421 ngfft(13)=0 ! n3proc 422 ngfft(14:18)=0 ! not used 423 424 end subroutine ngfft_seq
m_fftcore/print_ngfft [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
print_ngfft
FUNCTION
Print the content of ngfft(18) in explicative format.
INPUTS
ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft. [unit]=unit number for output (defaults to std_out). [prtvol]=verbosity level (defaults to 0). [mode_paral]=either "COLL" or "PERS" ("COLL" is default).
OUTPUT
Only writing
SOURCE
447 subroutine print_ngfft(ngfft, header, unit, mode_paral, prtvol) 448 449 !Arguments ------------------------------------ 450 !scalars 451 integer,intent(in),optional :: prtvol,unit 452 character(len=*),intent(in),optional :: header 453 character(len=4),intent(in),optional :: mode_paral 454 !arrays 455 integer,intent(in) :: ngfft(18) 456 457 !Local variables------------------------------- 458 integer :: my_unt,my_prtvol 459 character(len=4) :: my_mode 460 character(len=500) :: msg 461 462 ! ************************************************************************* 463 464 my_prtvol=0; if (PRESENT(prtvol )) my_prtvol=prtvol 465 my_unt =std_out; if (PRESENT(unit )) my_unt =unit 466 my_mode ='COLL'; if (PRESENT(mode_paral)) my_mode =mode_paral 467 468 msg=ch10//' ==== FFT mesh description (ngfft) ==== ' 469 if (PRESENT(header)) msg=ch10//' ==== '//TRIM(ADJUSTL(header))//' ==== ' 470 call wrtout(my_unt,msg,my_mode) 471 write(msg,'(2(a,3i5,a),a,i5,2a,i5)')& 472 ' FFT mesh divisions ........................ ',ngfft(1),ngfft(2),ngfft(3),ch10,& 473 ' Augmented FFT divisions ................... ',ngfft(4),ngfft(5),ngfft(6),ch10,& 474 ' FFT algorithm ............................. ',ngfft(7),ch10,& 475 ' FFT cache size ............................ ',ngfft(8) 476 call wrtout(my_unt,msg,my_mode) 477 478 if (my_prtvol > 0) then 479 write(msg,'(6(a,i5,a),a,4i5)')& 480 ' FFT parallelization level ................. ',ngfft(9),ch10,& 481 ' Number of processors in my FFT group ...... ',ngfft(10),ch10,& 482 ' Index of me in my FFT group ............... ',ngfft(11),ch10,& 483 ' No of xy planes in R space treated by me .. ',ngfft(12),ch10,& 484 ' No of xy planes in G space treated by me .. ',ngfft(13),ch10,& 485 ' MPI communicator for FFT .................. ',ngfft(14),ch10,& 486 ' Value of ngfft(15:18) ..................... ',ngfft(15:18) 487 call wrtout(my_unt,msg,my_mode) 488 end if 489 490 end subroutine print_ngfft
m_fftcore/scramble [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
scramble
FUNCTION
This routine performs the local rotation input: G1,R3,G2,(Gp2) output: G1,G2,R3,(Gp2)
INPUTS
i1=Index of x in the small box enclosing the G-sphere. j2 lot=Cache blocking factor n1dfft=Number of 1D FFTs performed. md1,md2proc,nnd3=Used to dimension zmpi2 n3=Dimension of the transform along z. zw(2,lot,n3): zw(:,1:n1dfft,n3) contains the lines transformed along z OUTPTU zmpi2(2,md1,md2proc,nnd3)
SOURCE
2449 pure subroutine scramble(i1,j2,lot,n1dfft,md1,n3,md2proc,nnd3,zw,zmpi2) 2450 2451 !Arguments ------------------------------------ 2452 integer,intent(in) :: i1,j2,lot,n1dfft,md1,n3,md2proc,nnd3 2453 real(dp),intent(in) :: zw(2,lot,n3) 2454 real(dp),intent(inout) :: zmpi2(2,md1,md2proc,nnd3) 2455 2456 !Local variables------------------------------- 2457 integer :: i3,i 2458 2459 ! ************************************************************************* 2460 2461 do i3=1,n3 2462 do i=0,n1dfft-1 2463 zmpi2(1,i1+i,j2,i3)=zw(1,i+1,i3) 2464 zmpi2(2,i1+i,j2,i3)=zw(2,i+1,i3) 2465 end do 2466 end do 2467 2468 end subroutine scramble
m_fftcore/sphere [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
sphere
FUNCTION
Array cg is defined in sphere with npw g-vectors. Insert cg inside FFT box of n1*n2*n3 points to define array cfft for rest of cfft is filled with 0 s. iflag=1 ==> insert cg into cfft. iflag=2 ==> insert cg into cfft, where the second and third dimension have been switched (needed for new 2002 SGoedecker FFT) iflag=-1==> extract cg from cfft. iflag=-2==> extract cg from cfft, where the second and third dimension have been switched (needed for new 2002 SGoedecker FFT) WARNING: iflag=-2 cannot use symmetry operations. There is also the possibility to apply a symmetry operation, as well as to make a shift in reciprocal space, or to multiply by a constant factor, in the case iflag=-1. Multiplication by a constant factor is also possible in the case iflag=-2.
INPUTS
cg(2,npw*ndat)= contains values for npw G vectors in basis sphere ndat=number of wavefunctions npw=number of G vectors in basis at this k point cfft(2,n4,n5,n6*ndat) = array in FFT box n1,n2,n3=physical dimension of the box (cfft) n4,n5,n6=memory dimension of cfft kg_k(3,npw)=integer coordinates of G vectors in basis sphere istwf_k=option parameter that describes the storage of wfs iflag=option parameter. Possible values: -1, -2, 1, 2 me_g0=1 if this node has G=0. shiftg(3)=The shift in reciprocal space. symrec(3,3)=symmetry operation in reciprocal space to be applied (symrec) xnorm=Normalization factor.
SIDE EFFECTS
Input/Output iflag=1 and 2, insert cg(input) into cfft(output) iflag=-1 and -2, extract cg(output) from cfft(input)
NOTES
cg and cfft are assumed to be of type COMPLEX, although this routine treats them as real of twice the length to avoid nonstandard complex*16. If istwf_k differs from 1, then special storage modes must be taken into account, for symmetric wavefunctions coming from k=(0 0 0) or other special k points.
TODO
1) Order arguments 2) Split the two cases to avoid breaking intent: from and to sphere (merge with cg_box2gpsh and cg_gsph2box?) 3) If symmetries are used with or without shiftg, it might happen that the FFT mesh is not large enough to accomodate the rotated G, in this case one should return ierr /= 0
SOURCE
1543 subroutine sphere(cg, ndat, npw, cfft, n1, n2, n3, n4, n5, n6, kg_k, istwf_k, iflag, me_g0, shiftg, symrec, xnorm) 1544 1545 !Arguments ------------------------------------ 1546 !scalars 1547 integer,intent(in) :: iflag,istwf_k,n1,n2,n3,n4,n5,n6,ndat,npw,me_g0 1548 real(dp),intent(in) :: xnorm 1549 !arrays 1550 integer,intent(in) :: kg_k(3,npw),shiftg(3),symrec(3,3) 1551 real(dp),intent(inout) :: cfft(2,n4,n5,n6*ndat),cg(2,npw*ndat) 1552 1553 !Local variables------------------------------- 1554 !scalars 1555 integer :: i1,i1inv,i2,i2inv,i3,i3inv,id1,id2,id3,idat,ipw 1556 integer :: j1,j2,j3,l1,l2,l3,npwmin,use_symmetry,i3dat,i3invdat,i2invdat,ipwdat,i2dat 1557 character(len=500) :: msg 1558 !arrays 1559 integer :: identity(3,3) 1560 integer :: i1inver(n1),i2inver(n2),i3inver(n3) 1561 1562 ! ************************************************************************* 1563 1564 DBG_ENTER("COLL") 1565 1566 ! In the case of special k-points, invariant under time-reversal, 1567 ! but not Gamma, initialize the inverse coordinates. 1568 ! Remember that: 1569 ! 1570 ! u_k(G) = u_{k+G0}(G-G0); u_{-k}(G) = u_k(G)^* and therefore: 1571 ! u_{G0/2}(G) = u_{G0/2}(-G-G0)^*. 1572 1573 if (istwf_k>=2) then 1574 if(istwf_k==2 .or. istwf_k==4 .or. istwf_k==6 .or. istwf_k==8)then 1575 i1inver(1)=1 1576 do i1=2,n1 1577 i1inver(i1)=n1+2-i1 1578 end do 1579 else 1580 do i1=1,n1 1581 i1inver(i1)=n1+1-i1 1582 end do 1583 end if 1584 if(istwf_k>=2 .and. istwf_k<=5)then 1585 i2inver(1)=1 1586 do i2=2,n2 1587 i2inver(i2)=n2+2-i2 1588 end do 1589 else 1590 do i2=1,n2 1591 i2inver(i2)=n2+1-i2 1592 end do 1593 end if 1594 if(istwf_k==2 .or. istwf_k==3 .or. istwf_k==6 .or. istwf_k==7)then 1595 i3inver(1)=1 1596 do i3=2,n3 1597 i3inver(i3)=n3+2-i3 1598 end do 1599 else 1600 do i3=1,n3 1601 i3inver(i3)=n3+1-i3 1602 end do 1603 end if 1604 end if 1605 1606 if (iflag==1 .or. iflag==2) then 1607 ! Insert cg into cfft with extra 0 s around outside: 1608 cfft = zero 1609 1610 ! Take care of each plane wave, and complete cfft if needed 1611 if (istwf_k==1) then 1612 1613 if (iflag==1) then 1614 !$OMP PARALLEL DO PRIVATE(i1,i2,i3) IF (ndat>1) 1615 do idat=1,ndat 1616 do ipw=1,npw 1617 i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1 1618 i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1 1619 i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1 1620 1621 cfft(1,i1,i2,i3+n6*(idat-1))=cg(1,ipw+npw*(idat-1)) 1622 cfft(2,i1,i2,i3+n6*(idat-1))=cg(2,ipw+npw*(idat-1)) 1623 end do 1624 end do 1625 end if 1626 1627 if (iflag==2) then 1628 !$OMP PARALLEL DO PRIVATE(i1,i2,i3) IF (ndat>1) 1629 do idat=1,ndat 1630 do ipw=1,npw 1631 i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1 1632 i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1 1633 i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1 1634 1635 cfft(1,i1,i3,i2+n6*(idat-1))=cg(1,ipw+npw*(idat-1)) 1636 cfft(2,i1,i3,i2+n6*(idat-1))=cg(2,ipw+npw*(idat-1)) 1637 end do 1638 end do 1639 end if 1640 1641 else if (istwf_k>=2) then 1642 1643 npwmin=1 1644 if (istwf_k==2 .and. me_g0==1) then 1645 ! If gamma point, then cfft must be completed 1646 do idat=1,ndat 1647 cfft(1,1,1,1+n6*(idat-1))=cg(1,1+npw*(idat-1)) 1648 cfft(2,1,1,1+n6*(idat-1))=zero 1649 end do 1650 npwmin=2 1651 end if 1652 1653 if (iflag==1) then 1654 !$OMP PARALLEL DO PRIVATE(i1,i1inv,i2,i2inv,i3,i3inv) IF (ndat>1) 1655 do idat=1,ndat 1656 do ipw=npwmin,npw 1657 i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1 1658 i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1 1659 i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1 1660 ! Construct the coordinates of -k-G 1661 i1inv=i1inver(i1) ; i2inv=i2inver(i2) ; i3inv=i3inver(i3) 1662 1663 cfft(1,i1,i2,i3+n6*(idat-1))=cg(1,ipw+npw*(idat-1)) 1664 cfft(2,i1,i2,i3+n6*(idat-1))=cg(2,ipw+npw*(idat-1)) 1665 cfft(1,i1inv,i2inv,i3inv+n6*(idat-1))= cg(1,ipw+npw*(idat-1)) 1666 cfft(2,i1inv,i2inv,i3inv+n6*(idat-1))=-cg(2,ipw+npw*(idat-1)) 1667 end do 1668 end do 1669 end if 1670 1671 if (iflag==2) then 1672 !$OMP PARALLEL DO PRIVATE(i1,i1inv,i2,i2inv,i3,i3inv) IF (ndat>1) 1673 do idat=1,ndat 1674 do ipw=npwmin,npw 1675 i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1 1676 i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1 1677 i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1 1678 1679 ! Construct the coordinates of -k-G 1680 i1inv=i1inver(i1) ; i2inv=i2inver(i2) ; i3inv=i3inver(i3) 1681 1682 cfft(1,i1,i3,i2+n6*(idat-1))=cg(1,ipw+npw*(idat-1)) 1683 cfft(2,i1,i3,i2+n6*(idat-1))=cg(2,ipw+npw*(idat-1)) 1684 cfft(1,i1inv,i3inv,i2inv+n6*(idat-1))= cg(1,ipw+npw*(idat-1)) 1685 cfft(2,i1inv,i3inv,i2inv+n6*(idat-1))=-cg(2,ipw+npw*(idat-1)) 1686 end do 1687 end do 1688 end if 1689 1690 end if 1691 1692 else if (iflag==-1 .or. iflag==-2) then 1693 ! extract cg(output) from cfft(input) 1694 1695 use_symmetry=0 1696 identity(:,:)=0; identity(1,1)=1 ; identity(2,2)=1 ; identity(3,3)=1 1697 if(sum((symrec(:,:)-identity(:,:))**2)/=0)use_symmetry=1 1698 if(sum(shiftg(:)**2)/=0)use_symmetry=1 1699 1700 ! Extract cg from cfft, ignoring components outside range of cg: 1701 if (istwf_k==1) then 1702 1703 if (use_symmetry==0) then 1704 if (iflag==-1) then 1705 !$OMP PARALLEL DO PRIVATE(i1,i2,i3,ipwdat,i3dat) IF (ndat>1) 1706 do idat=1,ndat 1707 do ipw=1,npw 1708 i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1 1709 i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1 1710 i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1 1711 ipwdat = ipw + (idat-1) * npw 1712 i3dat = i3 + (idat-1) * n6 1713 1714 cg(1,ipwdat)=cfft(1,i1,i2,i3dat)*xnorm 1715 cg(2,ipwdat)=cfft(2,i1,i2,i3dat)*xnorm 1716 end do 1717 end do 1718 else 1719 ! iflag==-2 1720 !$OMP PARALLEL DO PRIVATE(i1,i2,i3,ipwdat,i2dat) IF (ndat>1) 1721 do idat=1,ndat 1722 do ipw=1,npw 1723 i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1 1724 i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1 1725 i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1 1726 1727 ipwdat = ipw + (idat-1) * npw 1728 i2dat = i2 + (idat-1) * n6 1729 1730 cg(1,ipwdat)=cfft(1,i1,i3,i2dat)*xnorm 1731 cg(2,ipwdat)=cfft(2,i1,i3,i2dat)*xnorm 1732 end do 1733 end do 1734 end if 1735 else 1736 ! use_symmetry == 1 1737 !$OMP PARALLEL DO PRIVATE(i1,i2,i3,j1,j2,j3,l1,l2,l3,ipwdat,i3dat) IF (ndat>1) 1738 do idat=1,ndat 1739 do ipw=1,npw 1740 l1=kg_k(1,ipw)+shiftg(1) 1741 l2=kg_k(2,ipw)+shiftg(2) 1742 l3=kg_k(3,ipw)+shiftg(3) 1743 j1=symrec(1,1)*l1+symrec(1,2)*l2+symrec(1,3)*l3 1744 j2=symrec(2,1)*l1+symrec(2,2)*l2+symrec(2,3)*l3 1745 j3=symrec(3,1)*l1+symrec(3,2)*l2+symrec(3,3)*l3 1746 if(j1<0) j1=j1+n1; i1=j1+1 1747 if(j2<0) j2=j2+n2; i2=j2+1 1748 if(j3<0) j3=j3+n3; i3=j3+1 1749 ! [i1, i2, i3] are the indices of S(g + g0) in the FFT box. 1750 ! while ipw is the index of g in kg_k 1751 1752 ipwdat = ipw + (idat-1) * npw 1753 i3dat = i3 + (idat-1)*n6 1754 1755 ! c(g) = cfft(S(g + shiftg)) 1756 cg(1,ipwdat)=cfft(1,i1,i2,i3dat)*xnorm 1757 cg(2,ipwdat)=cfft(2,i1,i2,i3dat)*xnorm 1758 end do 1759 end do 1760 end if 1761 1762 else if (istwf_k>=2) then 1763 1764 npwmin=1 1765 if (istwf_k==2 .and. me_g0==1) then 1766 ! Extract cg from cfft, in a way that projects on a 1767 ! wavefunction with time-reversal symmetry 1768 do idat=1,ndat 1769 ipwdat = 1 + (idat-1) * npw 1770 i3dat = 1 + (idat-1)*n6 1771 cg(1,ipwdat)=cfft(1,1,1,i3dat)*xnorm 1772 cg(2,ipwdat)=zero 1773 end do 1774 npwmin=2 1775 end if 1776 1777 if (use_symmetry==0) then 1778 1779 if (iflag==-1) then 1780 !$OMP PARALLEL DO PRIVATE(i1,i1inv,i2,i2inv,i3,i3inv,ipwdat,i3dat,i3invdat) IF (ndat>1) 1781 do idat=1,ndat 1782 do ipw=npwmin,npw 1783 i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1 1784 i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1 1785 i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1 1786 1787 ! Construct the coordinates of -k-G 1788 i1inv=i1inver(i1); i2inv=i2inver(i2); i3inv=i3inver(i3) 1789 1790 ipwdat = ipw + (idat-1) * npw 1791 i3dat = i3 + (idat-1) * n6 1792 i3invdat = i3inv + (idat-1) * n6 1793 1794 ! Here the time-reversal symmetry is used to project from cfft 1795 cg(1,ipwdat)=(cfft(1,i1,i2,i3dat) + cfft(1,i1inv,i2inv,i3invdat))*0.5d0*xnorm 1796 cg(2,ipwdat)=(cfft(2,i1,i2,i3dat) - cfft(2,i1inv,i2inv,i3invdat))*0.5d0*xnorm 1797 end do 1798 end do 1799 1800 else 1801 !$OMP PARALLEL DO PRIVATE(i1,i1inv,i2,i2inv,i3,i3inv,ipwdat,i2dat,i2invdat) IF (ndat>1) 1802 do idat=1,ndat 1803 do ipw=npwmin,npw 1804 i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1 1805 i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1 1806 i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1 1807 1808 ! Construct the coordinates of -k-G 1809 i1inv=i1inver(i1) ; i2inv=i2inver(i2) ; i3inv=i3inver(i3) 1810 1811 ipwdat = ipw + (idat-1) * npw 1812 i2dat = i2 + (idat-1) * n6 1813 i2invdat = i2inv + (idat-1) * n6 1814 1815 ! Here the time-reversal symmetry is used to project from cfft 1816 cg(1,ipwdat)=(cfft(1,i1,i3,i2dat) + cfft(1,i1inv,i3inv,i2invdat))*0.5d0*xnorm 1817 cg(2,ipwdat)=(cfft(2,i1,i3,i2dat) - cfft(2,i1inv,i3inv,i2invdat))*0.5d0*xnorm 1818 end do 1819 end do 1820 end if 1821 1822 else 1823 ! Use symmetry 1824 id1=n1/2+2 1825 id2=n2/2+2 1826 id3=n3/2+2 1827 1828 !$OMP PARALLEL DO PRIVATE(i1,i1inv,i2,i2inv,i3,i3inv,j1,j2,j3,l1,l2,l3,ipwdat,i3dat,i3invdat) IF (ndat>1) 1829 do idat=1,ndat 1830 do ipw=npwmin,npw 1831 1832 i1=kg_k(1,ipw); if(i1<0) i1=i1+n1; i1=i1+1 1833 i2=kg_k(2,ipw); if(i2<0) i2=i2+n2; i2=i2+1 1834 i3=kg_k(3,ipw); if(i3<0) i3=i3+n3; i3=i3+1 1835 1836 i1inv=i1inver(i1) ; i2inv=i2inver(i2) ; i3inv=i3inver(i3) 1837 1838 l1=kg_k(1,ipw)+shiftg(1) 1839 l2=kg_k(2,ipw)+shiftg(2) 1840 l3=kg_k(3,ipw)+shiftg(3) 1841 j1=symrec(1,1)*l1+symrec(1,2)*l2+symrec(1,3)*l3 1842 j2=symrec(2,1)*l1+symrec(2,2)*l2+symrec(2,3)*l3 1843 j3=symrec(3,1)*l1+symrec(3,2)*l2+symrec(3,3)*l3 1844 if(j1<0)j1=j1+n1 ; i1=j1+1 1845 if(j2<0)j2=j2+n2 ; i2=j2+1 1846 if(j3<0)j3=j3+n3 ; i3=j3+1 1847 1848 ! Construct the coordinates of -k-G 1849 l1=i1inv-(i1inv/id1)*n1-1+shiftg(1) 1850 l2=i2inv-(i2inv/id2)*n2-1+shiftg(2) 1851 l3=i3inv-(i3inv/id3)*n3-1+shiftg(3) 1852 j1=symrec(1,1)*l1+symrec(1,2)*l2+symrec(1,3)*l3 1853 j2=symrec(2,1)*l1+symrec(2,2)*l2+symrec(2,3)*l3 1854 j3=symrec(3,1)*l1+symrec(3,2)*l2+symrec(3,3)*l3 1855 if(j1<0)j1=j1+n1 ; i1inv=j1+1 1856 if(j2<0)j2=j2+n2 ; i2inv=j2+1 1857 if(j3<0)j3=j3+n3 ; i3inv=j3+1 1858 1859 ipwdat = ipw + (idat-1) * npw 1860 i3dat = i3 + (idat-1) * n6 1861 i3invdat = i3inv + (idat-1) * n6 1862 1863 ! Here the time-reversal symmetry is used to project from cfft 1864 cg(1,ipwdat)=(cfft(1,i1,i2,i3dat) + cfft(1,i1inv,i2inv,i3invdat))*0.5d0*xnorm 1865 cg(2,ipwdat)=(cfft(2,i1,i2,i3dat) - cfft(2,i1inv,i2inv,i3invdat))*0.5d0*xnorm 1866 end do 1867 end do 1868 end if 1869 1870 end if 1871 1872 else 1873 write(msg,'(a,i0,a)')' iflag=',iflag,' not acceptable.' 1874 ABI_BUG(msg) 1875 end if 1876 1877 DBG_EXIT("COLL") 1878 1879 end subroutine sphere
m_fftcore/sphere_fft [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
sphere_fft
FUNCTION
Array cg is defined in sphere with npw points. Insert cg inside box of n1*n2*n3 points to define array cfft for fft box. corresponds to given element in cg. rest of cfft is filled with 0 s. iflag=1==>insert cg into cfft. iflag=2==>insert cg into cfft, where the second and third dimension have been switched (needed for new 2002 SGoedecker FFT) iflag=-1==> extract cg from cfft. iflag=-2==> extract cg from cfft, where the second and third dimension have been switched (needed for new 2002 SGoedecker FFT) (WARNING : iflag=-2 cannot use symmetry operations) There is also the possibility to apply a symmetry operation, as well as to make a shift in reciprocal space, or to multiply by a constant factor, in the case iflag=-1. Multiplication by a constant factor is also possible in the case iflag=-2.
INPUTS
cg(2,npw)= contains values for npw G vectors in basis sphere ndat=number of FFT to do in // npw=number of G vectors in basis at this k point cfft(2,n4,n5,n6) = fft box n1,n2,n3=physical dimension of the box (cfft) n4,n5,n6=memory dimension of cfft kg_k(3,npw)=integer coordinates of G vectors in basis sphere mpi_enreg=information about MPI parallelization tab_fftwf2_local(n2)=local i2 indices in fourwf nd2proc TO BE DESCRIBED SB 090831 iflag=option parameter. Possible values: -1, -2, 1, 2 ; this is used only in debug option
OUTPUT
(see side effects)
SIDE EFFECTS
Input/Output iflag=1 and 2, insert cg(input) into cfft(output) iflag=-1 and -2, extract cg(output) from cfft(input)
NOTES
cg and cfft are assumed to be of type COMPLEX, although this routine treats them as real of twice the length to avoid nonstandard complex*16. WARNING NO CHECK is DONE over iflag.
TODO
Order arguments
SOURCE
1939 subroutine sphere_fft(cg,ndat,npw,cfft,n1,n2,n3,n4,n5,kg_k,tab_fftwf2_local,nd2proc) 1940 1941 !Arguments ------------------------------------ 1942 !scalars 1943 integer,intent(in) :: n1,n2,n3,n4,n5,nd2proc,ndat,npw 1944 integer,intent(in) :: tab_fftwf2_local(n2) 1945 !arrays 1946 integer,intent(in) :: kg_k(3,npw) 1947 real(dp),intent(in) :: cg(2,npw*ndat) 1948 real(dp),intent(out) :: cfft(2,n4,n5,nd2proc*ndat) 1949 1950 !Local variables------------------------------- 1951 !scalars 1952 integer :: i1,i2,i2_local,i3,idat,ipw 1953 1954 ! ************************************************************************* 1955 1956 !Insert cg into cfft with extra 0 s around outside: 1957 cfft = zero 1958 1959 !$OMP PARALLEL DO PRIVATE(i1,i2,i2_local,i3) 1960 do ipw=1,npw 1961 i1=kg_k(1,ipw); if(i1<0)i1=i1+n1; i1=i1+1 1962 i2=kg_k(2,ipw); if(i2<0)i2=i2+n2; i2=i2+1 1963 i3=kg_k(3,ipw); if(i3<0)i3=i3+n3; i3=i3+1 1964 i2_local = tab_fftwf2_local(i2) 1965 do idat=1,ndat 1966 cfft(1,i1,i3,i2_local + nd2proc*(idat-1))=cg(1,ipw+npw*(idat-1)) 1967 cfft(2,i1,i3,i2_local + nd2proc*(idat-1))=cg(2,ipw+npw*(idat-1)) 1968 end do 1969 end do 1970 1971 end subroutine sphere_fft
m_fftcore/sphere_fft1 [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
sphere_fft1
FUNCTION
Array cg is defined in sphere with npw points. Insert cg inside box of n1*n2*n3 points to define array cfft for fft box. corresponds to given element in cg. rest of cfft is filled with 0 s. iflag=1==>insert cg into cfft. iflag=2==>insert cg into cfft, where the second and third dimension have been switched (needed for new 2002 SGoedecker FFT) iflag=-1==> extract cg from cfft. iflag=-2==> extract cg from cfft, where the second and third dimension have been switched (needed for new 2002 SGoedecker FFT) (WARNING : iflag=-2 cannot use symmetry operations) There is also the possibility to apply a symmetry operation, as well as to make a shift in reciprocal space, or to multiply by a constant factor, in the case iflag=-1. Multiplication by a constant factor is also possible in the case iflag=-2.
INPUTS
cg(2,npw)= contains values for npw G vectors in basis sphere ndat=number of FFT to do in // npw=number of G vectors in basis at this k point cfft(2,n4,n5,n6) = fft box n1,n2,n3=physical dimension of the box (cfft) n4,n5,n6=memory dimension of cfft kg_k(3,npw)=integer coordinates of G vectors in basis sphere nd2proc TO BE DESCRIBED SB 090831 iflag=option parameter. Possible values: -1, -2, 1, 2 tab_fftwf2_local(n2)=local i2 indices in fourwf
OUTPUT
(see side effects)
SIDE EFFECTS
Input/Output iflag=1 and 2, insert cg(input) into cfft(output) iflag=-1 and -2, extract cg(output) from cfft(input)
NOTES
cg and cfft are assumed to be of type COMPLEX, although this routine treats them as real of twice the length to avoid nonstandard complex*16. WARNING NO CHECK is DONE over iflag.
TODO
Order arguments sphere_fft1 is similar to sphere_fft, the only difference being that ndat > 1 is not supported. Why? Should merge the two APIs.
SOURCE
2032 subroutine sphere_fft1(cg,ndat,npw,cfft,n1,n2,n3,n4,n5,n6,kg_k,tab_fftwf2_local) 2033 2034 2035 !Arguments ------------------------------------ 2036 !scalars 2037 integer,intent(in) :: n1,n2,n3,n4,n5,n6,ndat,npw 2038 !arrays 2039 integer,intent(in) :: kg_k(3,npw) 2040 integer,intent(in) :: tab_fftwf2_local(n2) 2041 real(dp),intent(in) :: cg(2,npw*ndat) 2042 real(dp),intent(inout) :: cfft(2,n4,n5,n6*ndat) 2043 2044 !Local variables------------------------------- 2045 !scalars 2046 integer :: i1,i2,i2_local,i3,idat,ipw 2047 2048 ! ************************************************************************* 2049 2050 !Insert cg into cfft with extra 0 s around outside: 2051 2052 cfft = zero 2053 !$OMP PARALLEL DO PRIVATE(i1,i2,i2_local,i3) 2054 do idat=1,ndat 2055 do ipw=1,npw 2056 i1=kg_k(1,ipw); if(i1<0)i1=i1+n1; i1=i1+1 2057 i2=kg_k(2,ipw); if(i2<0)i2=i2+n2; i2=i2+1 2058 i3=kg_k(3,ipw); if(i3<0)i3=i3+n3; i3=i3+1 2059 i2_local = tab_fftwf2_local(i2) + n6*(idat-1) 2060 cfft(1,i1,i3,i2_local)=cg(1,ipw+npw*(idat-1)) 2061 cfft(2,i1,i3,i2_local)=cg(2,ipw+npw*(idat-1)) 2062 end do 2063 end do 2064 2065 end subroutine sphere_fft1
m_fftcore/sphereboundary [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
sphereboundary
FUNCTION
Finds the boundary of the basis sphere of G vectors (at a given k point) for use in improved zero padding of ffts in 3 dimensions. Provides data to be used by subroutine fourwf, in the form of an array gbound(2*mgfft+8,2). The first component (for use when mod(fftalg,10)==2)) provides integer values g1min,g1max,g2min,g2max and then for g2 in the sequence g2=0,1,2,...,g2max,g2min,g2min+1,...,-1, provides g1min, g1max. The second component (for use when mod(fftalg,10)==1)) provides integer values g1min,g1max,g3min,g3max, where g3min and g3max have been corrected in case of time-reversal and then for g3 in the sequence g3=0,1,2,...,g3max,g3min,g3min+1,...,-1, provides g2min, g2max. (also corrected in case of time-reversal) These values are stored in the above order in array gbound. Debug mode, if fftalg is between 000 and 099
INPUTS
istwf_k=option parameter that describes the storage of wfs kg_k(3,npw)=integer coordinates of G vectors in basis sphere mgfft=maximum size of 1D FFTs (only for dimensioning purposes) npw=number of G vectors in basis at this k point
OUTPUT
gbound(2*mgfft+8,2)=defined above
SOURCE
1263 subroutine sphereboundary(gbound, istwf_k, kg_k, mgfft, npw) 1264 1265 !Arguments ------------------------------------ 1266 !scalars 1267 integer,intent(in) :: istwf_k,mgfft,npw 1268 !arrays 1269 integer,intent(in) :: kg_k(3,npw) 1270 integer,intent(out) :: gbound(2*mgfft+8,2) 1271 1272 !Local variables------------------------------- 1273 !scalars 1274 integer :: dim_a,dim_b,fftalgc,g_a,gmax_a,gmax_b,gmax_b1,gmax_b2,gmin_a,gmin_b 1275 integer :: gmin_b1,gmin_b2,igb,ii,iloop,ipw,testm,testp,kgk 1276 character(len=500) :: msg 1277 !arrays 1278 integer :: gmax(2),gmin(2) 1279 1280 ! ************************************************************************* 1281 ! 1282 !DEBUG 1283 !write(std_out,*)' sphereboundary : enter' 1284 !write(std_out, '(a)' )' sphereboundary : list of plane waves coordinates for k point ' 1285 !write(std_out, '(a)' )' ipw kg_k(1:3,ipw) ' 1286 !do ipw=1,npw 1287 !write(std_out, '(i10,a,3i6)' )ipw,' ',kg_k(1:3,ipw) 1288 !end do 1289 !gbound=-999 1290 !ENDDEBUG 1291 1292 !Determine cube boundaries 1293 gbound(1,1)=minval(kg_k(1,:)) 1294 gbound(2,1)=maxval(kg_k(1,:)) 1295 gbound(1:2,2)=gbound(1:2,1) 1296 1297 !Treat differently the fftalgc cases 1298 do ii=1,2 1299 1300 fftalgc=3-ii 1301 1302 if(fftalgc/=2)then 1303 dim_a=3 1304 dim_b=2 1305 else 1306 dim_a=2 1307 dim_b=1 1308 end if 1309 1310 ! Relevant boundaries 1311 gbound(3,ii)=minval(kg_k(dim_a,:)) 1312 gbound(4,ii)=maxval(kg_k(dim_a,:)) 1313 gmin_a=gbound(3,ii) 1314 gmax_a=gbound(4,ii) 1315 1316 ! Must complete the sphere for fftalgc==1 and special storage modes. 1317 ! Explanation : sg_fftpad is not able to take into account 1318 ! the time-reversal symmetry, so that the boundaries will not be delimited 1319 ! by the kg_k set, but by their symmetric also. 1320 if(istwf_k>=2 .and. fftalgc==1)then 1321 if( istwf_k==2 .or. istwf_k==3 .or. istwf_k==6 .or. istwf_k==7 )then 1322 gbound(4,2)=max(gmax_a,-gmin_a) 1323 gbound(3,2)=-gbound(4,2) 1324 else if( istwf_k==4 .or. istwf_k==5 .or. istwf_k==8 .or. istwf_k==9 )then 1325 gbound(4,2)=max(gmax_a,-gmin_a-1) 1326 gbound(3,2)=-gbound(4,2)-1 1327 end if 1328 gmax_a=gbound(4,2) ; gmin_a=gbound(3,2) 1329 end if 1330 1331 igb=5 1332 1333 ! Consider first every g_a in range 0 ... gmax_a, then gmin_a ... -1 1334 gmin(1)=0 ; gmax(1)=gmax_a 1335 gmin(2)=gmin_a ; gmax(2)=-1 1336 1337 do iloop=1,2 1338 1339 if( gmin(iloop) <= gmax(iloop) )then 1340 1341 do g_a=gmin(iloop),gmax(iloop) 1342 1343 if(istwf_k==1 .or. fftalgc/=1)then 1344 ! Select the minimal and maximal values, in the selected plane 1345 gmin_b=mgfft+1 ! Initialized with a value larger than all possible ones 1346 gmax_b=-mgfft-1 ! Initialized with a value smaller than all possible ones 1347 do ipw=1,npw 1348 if(kg_k(dim_a,ipw)==g_a)then 1349 kgk=kg_k(dim_b,ipw) 1350 if(kgk<=gmin_b)gmin_b=kgk 1351 if(kgk>=gmax_b)gmax_b=kgk 1352 end if 1353 end do 1354 1355 else if(istwf_k>=2 .and. fftalgc==1)then 1356 1357 ! Here, must take into account time-reversal symmetry explicitely 1358 1359 ! Determine the boundaries for the plane g_a 1360 testp=0 1361 if(g_a<=gmax_a)then 1362 ! Select the minimal and maximal values, in the selected plane 1363 gmin_b1=mgfft+1 ! Initialized with a value larger than all possible ones 1364 gmax_b1=-mgfft-1 ! Initialized with a value smaller than all possible ones 1365 do ipw=1,npw 1366 if(kg_k(dim_a,ipw)==g_a)then 1367 kgk=kg_k(dim_b,ipw) 1368 if(kgk<=gmin_b1)gmin_b1=kgk 1369 if(kgk>=gmax_b1)gmax_b1=kgk 1370 end if 1371 end do 1372 1373 1374 testp=1 1375 end if 1376 1377 ! Determine the boundaries for the plane -g_a or -g_a-1 1378 testm=0 1379 if( istwf_k==2 .or. istwf_k==3 .or. istwf_k==6 .or. istwf_k==7 )then 1380 1381 if(-g_a>=gmin_a)then 1382 ! Select the minimal and maximal values, in the selected plane 1383 ! Warning : there is an inversion of search (might be confusing) 1384 gmax_b2=mgfft+1 ! Initialized with a value larger than all possible ones 1385 gmin_b2=-mgfft-1 ! Initialized with a value smaller than all possible ones 1386 do ipw=1,npw 1387 if(kg_k(dim_a,ipw)==-g_a)then 1388 kgk=kg_k(dim_b,ipw) 1389 if(kgk<=gmax_b2)gmax_b2=kgk 1390 if(kgk>=gmin_b2)gmin_b2=kgk 1391 end if 1392 end do 1393 testm=1 1394 end if 1395 1396 else if( istwf_k==4 .or. istwf_k==5 .or. istwf_k==8 .or. istwf_k==9 )then 1397 1398 if(-g_a-1>=gmin_a)then 1399 ! Select the minimal and maximal values, in the selected plane 1400 ! Warning : there is an inversion of search (might be confusing) 1401 gmax_b2=mgfft+1 ! Initialized with a value larger than all possible ones 1402 gmin_b2=-mgfft-1 ! Initialized with a value smaller than all possible ones 1403 do ipw=1,npw 1404 if(kg_k(dim_a,ipw)==-g_a-1)then 1405 kgk=kg_k(dim_b,ipw) 1406 if(kgk<=gmax_b2)gmax_b2=kgk 1407 if(kgk>=gmin_b2)gmin_b2=kgk 1408 end if 1409 end do 1410 testm=1 1411 end if 1412 1413 end if 1414 1415 ! Must invert the boundaries, to use them for plane g_a 1416 if(testm==1)then 1417 ! This is needed to avoid border effect 1418 ! if the search did not lead to any element 1419 gmin_b2=max(gmin_b2,-mgfft) ; gmax_b2=min(gmax_b2,mgfft) 1420 if(istwf_k<=5)then 1421 gmax_b2=-gmax_b2 ; gmin_b2=-gmin_b2 1422 else 1423 gmax_b2=-gmax_b2-1 ; gmin_b2=-gmin_b2-1 1424 end if 1425 end if 1426 1427 if( testp==1 .and. testm==1)then 1428 gmin_b=min(gmin_b1,gmin_b2) ; gmax_b=max(gmax_b1,gmax_b2) 1429 else if( testp==1 )then 1430 gmin_b=gmin_b1 ; gmax_b=gmax_b1 1431 else if( testm==1 )then 1432 gmin_b=gmin_b2 ; gmax_b=gmax_b2 1433 end if 1434 1435 end if ! Endif take into account time-reversal symmetry 1436 1437 if (igb+1>2*mgfft+4) then 1438 write(msg, '(2a, 4(a,3(i0,1x),a))' )& 1439 "About to overwrite gbound array (FFT mesh too small) ",ch10, & 1440 " iloop, igb, mgb = ",iloop,igb,2*mgfft+4, ch10, & 1441 " istwfk, mgfft, npw = ",istwf_k, mgfft, npw, ch10, & 1442 " minval(kg_k) = ",minval(kg_k, dim=2), ch10, & 1443 " maxval(kg_k) = ",maxval(kg_k, dim=2), ch10 1444 ABI_BUG(msg) 1445 end if 1446 1447 gbound(igb,ii)=gmin_b 1448 gbound(igb+1,ii)=gmax_b 1449 1450 if( iloop==1 .and. istwf_k>=2 .and. istwf_k<=5 .and. fftalgc==2 .and. g_a==0)then 1451 ! If k_y=0 , for fftalgc==2, the g_a==0 plane must be completed 1452 if(istwf_k==2 .or. istwf_k==4)then 1453 gbound(igb+1,ii)=max(gmax_b,-gmin_b) 1454 gbound(igb,ii)=-gbound(igb+1,ii) 1455 else if(istwf_k==3 .or. istwf_k==5)then 1456 gbound(igb+1,ii)=max(gmax_b,-gmin_b-1) 1457 gbound(igb,ii)=-gbound(igb+1,ii)-1 1458 end if 1459 1460 end if 1461 1462 igb=igb+2 1463 1464 end do ! g_a 1465 end if 1466 end do ! iloop 1467 end do ! ii (fftalgc) 1468 1469 !DEBUG 1470 !write(std_out,'(a)')' sphereoundary : list of plane waves coordinates for 1st k point ' 1471 !write(std_out,'(a)')' ipw kg_k(1:3,ipw) ' 1472 !do ipw=1,npw 1473 !write(std_out, '(i10,a,3i6)' )ipw,' ',kg_k(1:3,ipw) 1474 !end do 1475 !write(std_out, '(a)' )' sphereboundary : list of boundaries ' 1476 !do igb=1,2*mgfft+8 1477 !write(std_out, '(i10,a,2i6)' )igb,' ',gbound(igb,1),gbound(igb,2) 1478 !end do 1479 !write(std_out,*)' sphereboundary : exit ' 1480 !ENDDEBUG 1481 1482 end subroutine sphereboundary
m_fftcore/switch [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
switch
FUNCTION
INPUTS
OUTPUT
SOURCE
2146 pure subroutine switch(n1dfft,n2,lot,n1,lzt,zt,zw) 2147 2148 !Arguments ------------------------------------ 2149 integer,intent(in) :: n1dfft,n2,lot,n1,lzt 2150 real(dp),intent(in) :: zt(2,lzt,n1) 2151 real(dp),intent(inout) :: zw(2,lot,n2) 2152 2153 !Local variables------------------------------- 2154 integer :: i,j 2155 ! ************************************************************************* 2156 2157 do j=1,n1dfft 2158 do i=1,n2 2159 zw(1,j,i)=zt(1,i,j) 2160 zw(2,j,i)=zt(2,i,j) 2161 end do 2162 end do 2163 2164 end subroutine switch
m_fftcore/switch_cent [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
switch_cent
FUNCTION
Perform the rotation: input: I2,i1,j3,(jp3) output: i1,I2,j3,(jp3) and padd the signal with zeros.
INPUTS
n1dfft=Number of 1D FFTs to perform max2=Max G_y in the small box enclosing the G-sphere. m2=Size of the small box enclosing the G-sphere along y n2=Dimension of the transform along y lot=Cache blocking factor. n1=Dimension of the transform along x lzt=Second dimension of z zt(2,lzt,n1)
OUTPUT
zw(2,lot,n2)=Cache working array
SOURCE
2196 pure subroutine switch_cent(n1dfft,max2,m2,n2,lot,n1,lzt,zt,zw) 2197 2198 !Arguments ------------------------------------ 2199 integer,intent(in) :: n1dfft,max2,m2,n2,lot,n1,lzt 2200 real(dp),intent(in) :: zt(2,lzt,n1) 2201 real(dp),intent(inout) :: zw(2,lot,n2) 2202 2203 !Local variables------------------------------- 2204 integer :: i,j 2205 2206 ! ************************************************************************* 2207 2208 ! Here, zero and positive frequencies 2209 do j=1,n1dfft 2210 do i=1,max2+1 2211 zw(1,j,i)=zt(1,i,j) 2212 zw(2,j,i)=zt(2,i,j) 2213 end do 2214 end do 2215 2216 ! Fill the center region with zeros 2217 do i=max2+2,n2-m2+max2+1 2218 do j=1,n1dfft 2219 zw(1,j,i)=zero 2220 zw(2,j,i)=zero 2221 end do 2222 end do 2223 2224 ! Here, negative frequencies 2225 if (m2>=max2+2) then 2226 do j=1,n1dfft 2227 do i=max2+2,m2 2228 zw(1,j,i+n2-m2)=zt(1,i,j) 2229 zw(2,j,i+n2-m2)=zt(2,i,j) 2230 end do 2231 end do 2232 end if 2233 2234 end subroutine switch_cent
m_fftcore/switchreal [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
switchreal
FUNCTION
Perform the rotation: input: I2,i1,j3,(jp3) output: i1,I2,j3,(jp3) and padd the signal with zeros. Used for real wavefunctions.
INPUTS
includelast n1dfft=Number of 1D FFTs to perform n2=Dimension of the transform along y n2eff lot=Cache blocking factor. n1zt lzt zt(2,lzt,n1zt)
OUTPUT
zw(2,lot,n2)
SOURCE
2267 pure subroutine switchreal(includelast,n1dfft,n2,n2eff,lot,n1zt,lzt,zt,zw) 2268 2269 !Arguments ------------------------------------ 2270 integer,intent(in) :: includelast,n1dfft,n2,n2eff,lot,n1zt,lzt 2271 real(dp),intent(in) :: zt(2,lzt,n1zt) 2272 real(dp),intent(inout) :: zw(2,lot,n2) 2273 2274 !Local variables------------------------------- 2275 integer :: i,j 2276 ! ************************************************************************* 2277 2278 if (includelast==1) then 2279 2280 ! Compute symmetric and antisymmetric combinations 2281 do j=1,n1dfft 2282 zw(1,j,1)=zt(1,1,2*j-1) 2283 zw(2,j,1)=zt(1,1,2*j ) 2284 end do 2285 do i=2,n2eff 2286 do j=1,n1dfft 2287 zw(1,j,i)= zt(1,i,2*j-1)-zt(2,i,2*j) 2288 zw(2,j,i)= zt(2,i,2*j-1)+zt(1,i,2*j) 2289 zw(1,j,n2+2-i)= zt(1,i,2*j-1)+zt(2,i,2*j) 2290 zw(2,j,n2+2-i)=-zt(2,i,2*j-1)+zt(1,i,2*j) 2291 end do 2292 end do 2293 2294 else 2295 2296 ! An odd number of FFTs 2297 ! Compute symmetric and antisymmetric combinations 2298 do j=1,n1dfft-1 2299 zw(1,j,1)=zt(1,1,2*j-1) 2300 zw(2,j,1)=zt(1,1,2*j ) 2301 end do 2302 zw(1,n1dfft,1)=zt(1,1,2*n1dfft-1) 2303 zw(2,n1dfft,1)=zero 2304 2305 do i=2,n2eff 2306 do j=1,n1dfft-1 2307 zw(1,j,i)= zt(1,i,2*j-1)-zt(2,i,2*j) 2308 zw(2,j,i)= zt(2,i,2*j-1)+zt(1,i,2*j) 2309 zw(1,j,n2+2-i)= zt(1,i,2*j-1)+zt(2,i,2*j) 2310 zw(2,j,n2+2-i)=-zt(2,i,2*j-1)+zt(1,i,2*j) 2311 end do 2312 zw(1,n1dfft,i)= zt(1,i,2*n1dfft-1) 2313 zw(2,n1dfft,i)= zt(2,i,2*n1dfft-1) 2314 zw(1,n1dfft,n2+2-i)= zt(1,i,2*n1dfft-1) 2315 zw(2,n1dfft,n2+2-i)=-zt(2,i,2*n1dfft-1) 2316 end do 2317 end if 2318 2319 end subroutine switchreal
m_fftcore/switchreal_cent [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
switchreal_cent
FUNCTION
Perform the rotation: input: I2,i1,j3,(jp3) output: i1,I2,j3,(jp3) and padd the signal with zeros. Used for the Fourier transform of real wavefunctions.
INPUTS
includelast n1dfft=Number of 1D FFTs to perform max2 n2=Dimension of the transform along y lot=Cache blocking factor. n1zt, lzt=Dimensions of zt. zt(2,lzt,n1zt)
OUTPUT
zw(2,lot,n2)
SOURCE
2351 pure subroutine switchreal_cent(includelast,n1dfft,max2,n2,lot,n1zt,lzt,zt,zw) 2352 2353 !Arguments ------------------------------------ 2354 integer,intent(in) :: includelast,n1dfft,max2,n2,lot,n1zt,lzt 2355 real(dp),intent(in) :: zt(2,lzt,n1zt) 2356 real(dp),intent(inout) :: zw(2,lot,n2) 2357 2358 !Local variables------------------------------- 2359 integer :: i,j 2360 ! ************************************************************************* 2361 2362 if (includelast==1) then 2363 2364 ! Compute symmetric and antisymmetric combinations 2365 do j=1,n1dfft 2366 zw(1,j,1)=zt(1,1,2*j-1) 2367 zw(2,j,1)=zt(1,1,2*j ) 2368 end do 2369 2370 do i=2,max2+1 2371 do j=1,n1dfft 2372 zw(1,j,i)= zt(1,i,2*j-1)-zt(2,i,2*j) 2373 zw(2,j,i)= zt(2,i,2*j-1)+zt(1,i,2*j) 2374 zw(1,j,n2+2-i)= zt(1,i,2*j-1)+zt(2,i,2*j) 2375 zw(2,j,n2+2-i)=-zt(2,i,2*j-1)+zt(1,i,2*j) 2376 end do 2377 end do 2378 2379 if(max2+1<n2-max2)then 2380 do i=max2+2,n2-max2 2381 do j=1,n1dfft 2382 zw(1,j,i)=zero 2383 zw(2,j,i)=zero 2384 end do 2385 end do 2386 end if 2387 2388 else 2389 ! Compute symmetric and antisymmetric combinations 2390 do j=1,n1dfft-1 2391 zw(1,j,1)=zt(1,1,2*j-1) 2392 zw(2,j,1)=zt(1,1,2*j ) 2393 end do 2394 2395 zw(1,n1dfft,1)=zt(1,1,2*n1dfft-1) 2396 zw(2,n1dfft,1)=zero 2397 do i=2,max2+1 2398 do j=1,n1dfft-1 2399 zw(1,j,i)= zt(1,i,2*j-1)-zt(2,i,2*j) 2400 zw(2,j,i)= zt(2,i,2*j-1)+zt(1,i,2*j) 2401 zw(1,j,n2+2-i)= zt(1,i,2*j-1)+zt(2,i,2*j) 2402 zw(2,j,n2+2-i)=-zt(2,i,2*j-1)+zt(1,i,2*j) 2403 end do 2404 zw(1,n1dfft,i)= zt(1,i,2*n1dfft-1) 2405 zw(2,n1dfft,i)= zt(2,i,2*n1dfft-1) 2406 zw(1,n1dfft,n2+2-i)= zt(1,i,2*n1dfft-1) 2407 zw(2,n1dfft,n2+2-i)=-zt(2,i,2*n1dfft-1) 2408 end do 2409 2410 if(max2+1<n2-max2)then 2411 do i=max2+2,n2-max2 2412 do j=1,n1dfft 2413 zw(1,j,i)=zero 2414 zw(2,j,i)=zero 2415 end do 2416 end do 2417 end if 2418 end if 2419 2420 end subroutine switchreal_cent
m_fftcore/unfill [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
unfill
FUNCTION
Move data from the cache work array to zf
INPUTS
nd1,nd3=Dimensions of the input array zf. lot=Cache blocking factor. n1dfft=Number of 1D FFTs to perform n3=Dimension of the transform along z zw(2,lot,n3)=Cache work array with the z-lines.
OUTPUT
zf(2,nd1,nd3)= zf(:,1:n1dfft,:1:n3) is filled with the results stored in zw
SOURCE
2602 pure subroutine unfill(nd1,nd3,lot,n1dfft,n3,zw,zf) 2603 2604 !Arguments ------------------------------------ 2605 integer,intent(in) :: nd1,nd3,lot,n1dfft,n3 2606 real(dp),intent(in) :: zw(2,lot,n3) 2607 real(dp),intent(inout) :: zf(2,nd1,nd3) 2608 2609 !Local variables------------------------------- 2610 integer :: i1,i3 2611 ! ************************************************************************* 2612 2613 do i3=1,n3 2614 do i1=1,n1dfft 2615 zf(1,i1,i3)=zw(1,i1,i3) 2616 zf(2,i1,i3)=zw(2,i1,i3) 2617 end do 2618 end do 2619 2620 end subroutine unfill
m_fftcore/unfill_cent [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
unfill_cent
FUNCTION
Transfer data from the cache work array to zf. Takes into account zero padding (only the non-zero entries are moved)
INPUTS
md1,md3=Leading dimension of zf along x and z lot=Cache blocking factor. n1dfft=Number of 1d transforms performed along z. max3=Max index of G_z the small box enclosing the G-sphere. m3=Number of points in the *small* box enclosing the G-sphere n3=Dimension of the FFT transform along z zw(2,lot,n3)=Cache work array
OUTPUT
zf(2,md1,md3)= zf(:,1:n1dfft,1:m3) is filled with the non-zero components.
SOURCE
2647 pure subroutine unfill_cent(md1,md3,lot,n1dfft,max3,m3,n3,zw,zf) 2648 2649 !Arguments ------------------------------------ 2650 integer,intent(in) :: md1,md3,lot,n1dfft,max3,m3,n3 2651 real(dp),intent(in) :: zw(2,lot,n3) 2652 real(dp),intent(inout) :: zf(2,md1,md3) 2653 2654 !Local variables------------------------------- 2655 integer :: i1,i3 2656 ! ************************************************************************* 2657 2658 ! Here, zero and positive frequencies 2659 do i3=1,max3+1 2660 do i1=1,n1dfft 2661 zf(1,i1,i3)=zw(1,i1,i3) 2662 zf(2,i1,i3)=zw(2,i1,i3) 2663 end do 2664 end do 2665 2666 ! Here, negative frequencies 2667 do i3=max3+2,m3 2668 do i1=1,n1dfft 2669 zf(1,i1,i3)=zw(1,i1,i3+n3-m3) 2670 zf(2,i1,i3)=zw(2,i1,i3+n3-m3) 2671 end do 2672 end do 2673 2674 end subroutine unfill_cent
m_fftcore/unmpiswitch [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
unmpiswitch
FUNCTION
INPUTS
OUTPUT
SOURCE
2691 pure subroutine unmpiswitch(j3,n1dfft,Jp2st,J2st,lot,n1,nd2proc,nd3proc,nproc,ioption,zw,zmpi1) 2692 2693 !Arguments ------------------------------------ 2694 integer,intent(in) :: j3,n1dfft,lot,n1,nd2proc,nd3proc,nproc,ioption 2695 integer,intent(inout) :: Jp2st,J2st 2696 real(dp),intent(in) :: zw(2,lot,n1) 2697 real(dp),intent(inout) :: zmpi1(2,n1,nd2proc,nd3proc,nproc) 2698 2699 !Local variables------------------------------- 2700 integer :: i1,jp2,j2,ind,jjp2,mfft,jj2 2701 2702 ! ************************************************************************* 2703 2704 mfft=0 2705 if (ioption == 2) then 2706 do Jp2=Jp2st,nproc 2707 do J2=J2st,nd2proc 2708 mfft=mfft+1 2709 if (mfft.gt.n1dfft) then 2710 Jp2st=Jp2 2711 J2st=J2 2712 return 2713 end if 2714 do I1=1,n1 2715 zmpi1(1,I1,J2,j3,Jp2)=zw(1,mfft,I1) 2716 zmpi1(2,I1,J2,j3,Jp2)=zw(2,mfft,I1) 2717 end do 2718 end do 2719 J2st=1 2720 end do 2721 2722 else 2723 do Jp2=Jp2st,nproc 2724 do J2=J2st,nd2proc 2725 mfft=mfft+1 2726 if (mfft.gt.n1dfft) then 2727 Jp2st=Jp2 2728 J2st=J2 2729 return 2730 end if 2731 ind=(Jp2-1) * nd2proc + J2 2732 jj2=(ind-1)/nproc +1 2733 2734 !jjp2=modulo(ind,nproc) +1 2735 jjp2=modulo(ind-1,nproc)+1 2736 2737 do I1=1,n1 2738 zmpi1(1,I1,jj2,j3,jjp2)=zw(1,mfft,I1) 2739 zmpi1(2,I1,jj2,j3,jjp2)=zw(2,mfft,I1) 2740 end do 2741 end do 2742 J2st=1 2743 end do 2744 end if 2745 2746 end subroutine unmpiswitch
m_fftcore/unmpiswitch_cent [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
unmpiswitch_cent
FUNCTION
INPUTS
OUTPUT
SOURCE
2763 pure subroutine unmpiswitch_cent(j3,n1dfft,Jp2stf,J2stf,lot,max1,md1,m1,n1,md2proc,nd3proc,nproc,ioption,zw,zmpi1) 2764 2765 !Arguments ------------------------------------ 2766 integer,intent(in) :: j3,n1dfft,lot,max1,md1,m1,n1,md2proc,nd3proc,nproc,ioption 2767 integer,intent(inout) :: Jp2stf,J2stf 2768 real(dp),intent(inout) :: zmpi1(2,md1,md2proc,nd3proc,nproc) 2769 real(dp),intent(in) :: zw(2,lot,n1) 2770 2771 !Local variables------------------------------- 2772 integer :: mfft,Jp2,J2,I1,ind,jj2,jjp2 2773 2774 ! ************************************************************************* 2775 2776 mfft=0 2777 2778 if (ioption == 2) then 2779 do Jp2=Jp2stf,nproc 2780 do J2=J2stf,md2proc 2781 mfft=mfft+1 2782 2783 if (mfft.gt.n1dfft) then 2784 Jp2stf=Jp2 2785 J2stf=J2 2786 return 2787 end if 2788 2789 ! Here, zero and positive frequencies 2790 do I1=1,max1+1 2791 zmpi1(1,I1,J2,j3,Jp2)=zw(1,mfft,I1) 2792 zmpi1(2,I1,J2,j3,Jp2)=zw(2,mfft,I1) 2793 end do 2794 2795 ! Here, negative frequencies 2796 do I1=max1+2,m1 2797 zmpi1(1,I1,J2,j3,Jp2)=zw(1,mfft,I1+n1-m1) 2798 zmpi1(2,I1,J2,j3,Jp2)=zw(2,mfft,I1+n1-m1) 2799 end do 2800 2801 end do 2802 J2stf=1 2803 end do 2804 2805 else 2806 do Jp2=Jp2stf,nproc 2807 do J2=J2stf,md2proc 2808 mfft=mfft+1 2809 if (mfft.gt.n1dfft) then 2810 Jp2stf=Jp2 2811 J2stf=J2 2812 return 2813 end if 2814 ind=(Jp2-1) * md2proc + J2 2815 jj2=(ind-1)/nproc +1 2816 2817 !jjp2=modulo(ind,nproc) +1 2818 jjp2=modulo(ind-1,nproc)+1 2819 2820 ! Here, zero and positive frequencies 2821 do I1=1,max1+1 2822 zmpi1(1,I1,Jj2,j3,Jjp2)=zw(1,mfft,I1) 2823 zmpi1(2,I1,Jj2,j3,Jjp2)=zw(2,mfft,I1) 2824 end do 2825 2826 ! Here, negative frequencies 2827 do I1=max1+2,m1 2828 zmpi1(1,I1,Jj2,j3,Jjp2)=zw(1,mfft,I1+n1-m1) 2829 zmpi1(2,I1,Jj2,j3,Jjp2)=zw(2,mfft,I1+n1-m1) 2830 end do 2831 end do 2832 J2stf=1 2833 end do 2834 end if 2835 2836 end subroutine unmpiswitch_cent
m_fftcore/unscramble [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
unscramble
FUNCTION
INPUTS
i1 j2 lot=Cache blocking factor. n1dfft=Number of 1D FFTs to perform md1,n3,md2proc,nnd3 zmpi2(2,md1,md2proc,nnd3)
OUTPUT
zw(2,lot,n3)= cache work array
SOURCE
2860 pure subroutine unscramble(i1,j2,lot,n1dfft,md1,n3,md2proc,nnd3,zmpi2,zw) 2861 2862 !Arguments ------------------------------------ 2863 integer,intent(in) :: i1,j2,lot,n1dfft,md1,n3,md2proc,nnd3 2864 real(dp),intent(in) :: zmpi2(2,md1,md2proc,nnd3) 2865 real(dp),intent(inout) :: zw(2,lot,n3) 2866 2867 !Local variables------------------------------- 2868 !scalars 2869 integer :: i,i3 2870 2871 ! ************************************************************************* 2872 2873 do i3=1,n3 2874 do i=0,n1dfft-1 2875 zw(1,i+1,i3)=zmpi2(1,i1+i,j2,i3) 2876 zw(2,i+1,i3)=zmpi2(2,i1+i,j2,i3) 2877 end do 2878 end do 2879 2880 end subroutine unscramble
m_fftcore/unswitch [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
unswitch
FUNCTION
INPUTS
n1dfft=Number of 1D FFTs n2=Dimension of the transform along y lot=Cache blocking factor. n1=Dimension of the transform along x. lzt zw(2,lot,n2)=Cache work array
OUTPUT
zt(2,lzt,n1)
SOURCE
2904 pure subroutine unswitch(n1dfft,n2,lot,n1,lzt,zw,zt) 2905 2906 !Arguments ------------------------------------ 2907 integer,intent(in) :: n1dfft,n2,lot,n1,lzt 2908 real(dp),intent(in) :: zw(2,lot,n2) 2909 real(dp),intent(inout) :: zt(2,lzt,n1) 2910 2911 !Local variables------------------------------- 2912 integer :: i,j 2913 ! ************************************************************************* 2914 2915 do j=1,n1dfft 2916 do i=1,n2 2917 zt(1,i,j)=zw(1,j,i) 2918 zt(2,i,j)=zw(2,j,i) 2919 end do 2920 end do 2921 2922 end subroutine unswitch
m_fftcore/unswitch_cent [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
unswitch_cent
FUNCTION
INPUTS
n1dfft=Number of 1D FFTs to perform max2=Max G_y in the small box enclosing the G-sphere. m2=Size of the small box enclosing the G-sphere along y n2=Dimension of the transform along y lot=Cache blocking factor. n1=Dimension of the transform along x lzt zw(2,lot,n2)=Cache working array
OUTPUT
zt(2,lzt,n1)
SOURCE
2948 pure subroutine unswitch_cent(n1dfft,max2,m2,n2,lot,n1,lzt,zw,zt) 2949 2950 !Arguments ------------------------------------ 2951 integer,intent(in) :: n1dfft,max2,m2,n2,lot,n1,lzt 2952 real(dp),intent(in) :: zw(2,lot,n2) 2953 real(dp),intent(inout) :: zt(2,lzt,n1) 2954 2955 !Local variables------------------------------- 2956 integer :: i,j 2957 ! ************************************************************************* 2958 2959 ! Here, zero and positive frequencies 2960 do j=1,n1dfft 2961 do i=1,max2+1 2962 zt(1,i,j)=zw(1,j,i) 2963 zt(2,i,j)=zw(2,j,i) 2964 end do 2965 end do 2966 2967 ! Here, negative frequencies 2968 if(m2>=max2+2)then 2969 do j=1,n1dfft 2970 do i=max2+2,m2 2971 zt(1,i,j)=zw(1,j,i+n2-m2) 2972 zt(2,i,j)=zw(2,j,i+n2-m2) 2973 end do 2974 end do 2975 end if 2976 2977 end subroutine unswitch_cent
m_fftcore/unswitchreal [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
unswitchreal
FUNCTION
INPUTS
n1dfft=Number of 1D FFTs to perform n2=Dimension of the transform along y n2eff= lot=Cache blocking factor. n1zt lzt zw(2,lot,n2)=Cache working array
OUTPUT
zt(2,lzt,n1)
SOURCE
3002 pure subroutine unswitchreal(n1dfft,n2,n2eff,lot,n1zt,lzt,zw,zt) 3003 3004 !Arguments ------------------------------------ 3005 integer,intent(in) :: n1dfft,n2,n2eff,lot,n1zt,lzt 3006 real(dp),intent(in) :: zw(2,lot,n2) 3007 real(dp),intent(inout) :: zt(2,lzt,n1zt) 3008 3009 !Local variables------------------------------- 3010 integer :: i,j 3011 ! ************************************************************************* 3012 3013 ! Decompose symmetric and antisymmetric parts 3014 do j=1,n1dfft 3015 zt(1,1,2*j-1)=zw(1,j,1) 3016 zt(2,1,2*j-1)=zero 3017 zt(1,1,2*j) =zw(2,j,1) 3018 zt(2,1,2*j) =zero 3019 end do 3020 3021 do i=2,n2eff 3022 do j=1,n1dfft 3023 zt(1,i,2*j-1)= (zw(1,j,i)+zw(1,j,n2+2-i))*half 3024 zt(2,i,2*j-1)= (zw(2,j,i)-zw(2,j,n2+2-i))*half 3025 zt(1,i,2*j) = (zw(2,j,i)+zw(2,j,n2+2-i))*half 3026 zt(2,i,2*j) =-(zw(1,j,i)-zw(1,j,n2+2-i))*half 3027 end do 3028 end do 3029 3030 end subroutine unswitchreal
m_fftcore/unswitchreal_cent [ Functions ]
[ Top ] [ m_fftcore ] [ Functions ]
NAME
unswitchreal_cent
FUNCTION
INPUTS
n1dfft=Number of 1D FFTs to perform max2=Max G_y in the small box enclosing the G-sphere. n2=Dimension of the transform along y lot=Cache blocking factor. n1zt lzt zw(2,lot,n2)=Cache working array
OUTPUT
zt(2,lzt,n1)
SOURCE
3055 pure subroutine unswitchreal_cent(n1dfft,max2,n2,lot,n1zt,lzt,zw,zt) 3056 3057 !Arguments ------------------------------------ 3058 integer,intent(in) :: n1dfft,max2,n2,lot,n1zt,lzt 3059 real(dp),intent(in) :: zw(2,lot,n2) 3060 real(dp),intent(inout) :: zt(2,lzt,n1zt) 3061 3062 !Local variables------------------------------- 3063 integer :: i,j 3064 ! ************************************************************************* 3065 3066 do j=1,n1dfft 3067 zt(1,1,2*j-1)=zw(1,j,1) 3068 zt(2,1,2*j-1)=zero 3069 zt(1,1,2*j) =zw(2,j,1) 3070 zt(2,1,2*j) =zero 3071 end do 3072 3073 do i=2,max2+1 3074 do j=1,n1dfft 3075 zt(1,i,2*j-1)= (zw(1,j,i)+zw(1,j,n2+2-i))*half 3076 zt(2,i,2*j-1)= (zw(2,j,i)-zw(2,j,n2+2-i))*half 3077 zt(1,i,2*j) = (zw(2,j,i)+zw(2,j,n2+2-i))*half 3078 zt(2,i,2*j) =-(zw(1,j,i)-zw(1,j,n2+2-i))*half 3079 end do 3080 end do 3081 3082 ! Here, zero and positive frequencies 3083 ! do 90,j=1,n1dfft 3084 ! do 90,i=1,max2+1 3085 ! zt(1,i,j)=zw(1,j,i) 3086 ! zt(2,i,j)=zw(2,j,i) 3087 !90 continue 3088 3089 ! Here, negative frequencies 3090 ! if(m2>=max2+2)then 3091 ! do 110,j=1,n1dfft 3092 ! do 110,i=max2+2,m2 3093 ! zt(1,i,j)=zw(1,j,i+n2-m2) 3094 ! zt(2,i,j)=zw(2,j,i+n2-m2) 3095 !110 continue 3096 ! end if 3097 3098 end subroutine unswitchreal_cent