TABLE OF CONTENTS


ABINIT/getng [ Functions ]

[ Top ] [ Functions ]

NAME

 getng

FUNCTION

 From ecut and metric tensor in reciprocal space, computes recommended ngfft(1:3)
 Also computes the recommended value of nfft and mgfft
 Pay attention that the FFT grid must be compatible with the symmetry operations (see irrzg.f).

COPYRIGHT

 Copyright (C) 1998-2018 ABINIT group (DCA, XG, GMR, MM)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

 boxcutmin=minimum value of boxcut admitted (boxcut is the ratio
  between the radius of the sphere contained in the FFT box, and the
  radius of the planewave sphere) : usually 2.0 .
 ecut=energy cutoff in Hartrees
 gmet(3,3)=reciprocal space metric (bohr**-2).
 kpt(3)=input k vector in terms of reciprocal lattice primitive translations
 me_fft=index of the processor in the FFT set (use 0 if sequential)
 nproc_fft=number of processors in the FFT set (use 1 if sequential)
 nsym=number of symmetry elements in group
 paral_fft=0 if no FFT parallelisation ; 1 if FFT parallelisation
 symrel(3,3,nsym)=symmetry matrices in real space (integers)

OUTPUT

 mgfft= max(ngfft(1),ngfft(2),ngfft(3))
 nfft=number of points in the FFT box=ngfft(1)*ngfft(2)*ngfft(3)/nproc_fft

SIDE EFFECTS

 Input/Output
 ngfft(1:18)=integer array with FFT box dimensions and other
   information on FFTs. On input ngfft(1:3) contains
   optional trial values. If ngfft(1:3)/minbox is greater than value
   calculated to avoid wrap-around error and ngfft obeys constraint
   placed by the FFT routine that is used
   then ngfft(1:3) is left unchanged. Otherwise set to value computed in now.

 Note that there is the possibility of an undetected error if we
 are dealing with a cubic unit cell and ngfft(1), ngfft(2) and ngfft(3)
 are different. In the future we should handle this case.

 ngfft(4),ngfft(5),ngfft(6)= modified values to avoid cache trashing,
        presently : ngfft(4)=ngfft(1)+1 if ngfft(1) is even ;
                    ngfft(5)=ngfft(2)+1 if ngfft(2) is even.
           in the other cases, ngfft(4:6)=ngfft(1:3).
   Other choices may better, but this is left for the future.
 ngfft(7)=choice for FFT algorithm, see the input variable fftalg
 ngfft(8)=size of the cache, in bytes (not used here presently).!!
   other ngfft slots are used for parallelism see ~abinit/doc/variables/vargs.htm#ngfft
 [unit] = Output Unit number (DEFAULT std_out)

PARENTS

      fftprof,m_fft,m_fft_prof,memory_eval,mpi_setup,mrgscr,scfcv

CHILDREN

      bound,print_ngfft,sort_int,wrtout

SOURCE

 66 #if defined HAVE_CONFIG_H
 67 #include "config.h"
 68 #endif
 69 
 70 #include "abi_common.h"
 71 
 72 subroutine getng(boxcutmin,ecut,gmet,kpt,me_fft,mgfft,nfft,ngfft,nproc_fft,nsym,paral_fft,symrel,&
 73 &                ngfftc,use_gpu_cuda,unit) ! optional
 74 
 75  use defs_basis
 76  use m_errors
 77  use m_profiling_abi
 78  use m_sort
 79 
 80  use defs_fftdata,  only : mg
 81  use m_fftcore,     only : print_ngfft
 82 
 83 !This section has been created automatically by the script Abilint (TD).
 84 !Do not modify the following lines by hand.
 85 #undef ABI_FUNC
 86 #define ABI_FUNC 'getng'
 87  use interfaces_14_hidewrite
 88  use interfaces_52_fft_mpi_noabirule, except_this_one => getng
 89 !End of the abilint section
 90 
 91  implicit none
 92 
 93 !Arguments ------------------------------------
 94 !scalars
 95  integer,intent(in) :: me_fft,nproc_fft,nsym,paral_fft
 96  integer,intent(out) :: mgfft,nfft
 97  integer,optional,intent(in) :: unit,use_gpu_cuda
 98  real(dp),intent(in) :: boxcutmin,ecut
 99 !arrays
100  integer,intent(in) :: symrel(3,3,nsym)
101  integer,intent(in),optional :: ngfftc(3)
102  integer,intent(inout) :: ngfft(18)
103  real(dp),intent(in) :: gmet(3,3),kpt(3)
104 
105 !Local variables-------------------------------
106 !scalars
107  integer,save :: first=1,msrch(3),previous_paral_mode=0
108  integer :: element,ii,index,isrch,isrch1,isrch2,isrch3,isym,jj,mu,paral_fft_
109  integer :: plane,testok,tobechecked,ount,fftalga
110  real(dp),parameter :: minbox=0.75_dp
111  real(dp) :: dsqmax,dsqmin,ecutmx,prodcurrent,prodtrial,xx,yy
112  logical :: testdiv
113  character(len=500) :: message
114  integer,parameter :: largest_ngfft=mg ! Goedecker FFT: any powers of 2, 3, and 5 - must be coherent with defs_fftdata.F90
115  integer,parameter :: maxpow2 =16      ! int(log(largest_ngfft+half)/log(two))
116  integer,parameter :: maxpow3 =6       ! int(log(largest_ngfft+half)/log(three))
117  integer,parameter :: maxpow5 =6       ! int(log(largest_ngfft+half)/log(five))
118  integer,parameter :: maxpow7 =0
119  integer,parameter :: maxpow11=0
120  integer,parameter :: mmsrch=(maxpow2+1)*(maxpow3+1)*(maxpow5+1)*(maxpow7+1)*(maxpow11+1)
121  integer,save :: iperm(mmsrch),srch(mmsrch,3)
122  integer(i8b) :: li_srch(mmsrch)
123  integer :: divisor(3,3),gbound(3),imax(3),imin(3),ngcurrent(3)
124  integer :: ngmax(3),ngsav(3),ngtrial(3)
125 
126 ! *************************************************************************
127 
128  ount = std_out; if (PRESENT(unit)) ount = unit
129 
130  fftalga = ngfft(7)/100
131 
132 !If not yet done, compute recommended (boxcut>=2) fft grid dimensions
133 !In case we switch for paral to sequential mode, recompute srch.
134 !This is the case e.g. when computing ngfftdiel in sequential mode
135 !after an initial computation of ngfft in parallel
136 
137  paral_fft_=paral_fft;if (nproc_fft==0) paral_fft_=0
138 
139  if(first==1.or.paral_fft_ /= previous_paral_mode) then
140    first=0; previous_paral_mode=paral_fft_
141    srch(:,:)=0
142 
143    ! Factors of 2
144    srch(1,1)=1
145    do ii=1,maxpow2
146      srch(ii+1,1)=srch(ii,1)*2
147    end do
148 
149    ! Factors of 3
150    index=maxpow2+1
151    if(maxpow3>0)then
152      do ii=1,max(1,maxpow3)
153        srch(1+ii*index:(ii+1)*index,1)=3*srch(1+(ii-1)*index:ii*index,1)
154      end do
155    end if
156 
157    ! Factors of 5
158    index=(maxpow3+1)*index
159    if(maxpow5>0)then
160      do ii=1,max(1,maxpow5)
161        li_srch = 0
162        li_srch(1+ii*index:(ii+1)*index)=5_i8b*srch(1+(ii-1)*index:ii*index,1)
163        where (li_srch > huge(maxpow3)) li_srch = huge(maxpow3)
164        srch(1+ii*index:(ii+1)*index,1)=li_srch(1+ii*index:(ii+1)*index)
165      end do
166    end if
167 
168    ! Factors of 7
169    index=(maxpow5+1)*index
170    if(maxpow7>0)then
171      do ii=1,max(1,maxpow7)
172        srch(1+ii*index:(ii+1)*index,1)=7*srch(1+(ii-1)*index:ii*index,1)
173      end do
174    end if
175 
176    ! Factors of 11
177    index=(maxpow7+1)*index
178    if(maxpow11>0)then
179      do ii=1,max(1,maxpow11)
180        srch(1+ii*index:(ii+1)*index,1)=11*srch(1+(ii-1)*index:ii*index,1)
181      end do
182    end if
183 
184    call sort_int(mmsrch,srch(:,1),iperm)
185 
186    do ii=1,mmsrch
187      if(srch(ii,1)>largest_ngfft)exit
188    end do
189    msrch(1)=ii-1
190 
191    ! In case of FFT parallelism, one need ngfft(2) and ngfft(3) to be multiple of nproc_fft
192    if(paral_fft_==1)then
193      msrch(2)=0
194      do ii=1,msrch(1)
195        if(modulo(srch(ii,1),nproc_fft)==0) then
196          msrch(2)=msrch(2)+1
197          srch(msrch(2),2)=srch(ii,1)
198        end if
199      end do
200      !write(message,'(a,i0,a,i0,2a,i0)')&
201      ! 'The second and third dimension of the FFT grid: ',ngfft(2),", ",ngfft(3),ch10,&
202      ! 'were imposed to be multiple of the number of processors for the FFT: ', nproc_fft
203      !if (ount /= dev_null) MSG_COMMENT(message)
204    else
205      msrch(2)=msrch(1)
206      srch(:,2)=srch(:,1)
207    end if
208 
209    ! The second and third search list have the same constraint
210    msrch(3)=msrch(2)
211    srch(:,3)=srch(:,2)
212 
213 !  The set of allowed ngfft values has been found
214  end if ! first==1
215 
216 !Save input values of ngfft
217  ngsav(1:3) = ngfft(1:3)
218 
219 !As an initial guess for ngfft, use the provided coarse mesh grid
220  if (PRESENT(ngfftc)) then
221    ngfft(1:3)=ngfftc(1:3)
222    call wrtout(ount,' Using supplied coarse mesh as initial guess.','COLL')
223  else
224    ngfft(1:3)=2
225  end if
226 
227 !Enlarge the initial guess until the set of ngfft entirely comprises the sphere
228  do
229 
230    call bound(dsqmax,dsqmin,gbound,gmet,kpt,ngfft,plane)
231 
232    ! Exit the infinite do-loop if the sphere is inside the FFT box
233    if (dsqmin>=(half*boxcutmin**2*ecut/pi**2)) exit
234 
235    ! Fix nearest boundary
236    do ii=1,msrch(plane)-1
237      if (srch(ii,plane)>=ngfft(plane)) then
238 !      redefine ngfft(plane) to next higher choice
239        ngfft(plane)=srch(ii+1,plane)
240        exit ! Exit the loop over ii
241      end if
242 
243      if (ii==msrch(plane)-1)then
244        ! Here, we are in trouble
245        write(message, '(a,i12,5a)' ) &
246 &       'ngfft is bigger than allowed value =',ngfft(plane),'.',ch10,&
247 &       'This indicates that desired ngfft is larger than getng',ch10,&
248 &       'can handle. The code has to be changed and compiled.'
249        MSG_BUG(message)
250      end if
251    end do
252 
253  end do ! End of the infinite do-loop : will either "exit", or abort
254 
255 !ecutmx=maximum ecut consistent with chosen ngfft
256  ecutmx=0.5_dp*pi**2*dsqmin
257 
258 !Print results
259  write(message, '(a,1p,e14.6,a,3i8,a,a,e14.6)' ) &
260 & ' For input ecut=',ecut,' best grid ngfft=',ngfft(1:3),ch10,&
261 & '       max ecut=',ecutmx
262  call wrtout(ount,message,'COLL')
263 
264 ! The FFT grid is compatible with the symmetries if for each
265 ! symmetry isym, each ii and each jj, the quantity
266 ! (ngfft(jj)*symrel(jj,ii,isym))/ngfft(ii) is an integer.
267 ! This relation is immediately verified for diagonal elements, since
268 ! symrel is an integer. It is also verified if symrel(ii,jj,isym) is zero.
269 
270 !Compute the biggest (positive) common divisor of each off-diagonal element of the symmetry matrices
271  divisor(:,:)=0; tobechecked=0
272 
273  do ii=1,3
274    do jj=1,3
275      if(jj==ii)cycle
276      do isym=1,nsym
277        if(symrel(jj,ii,isym)==0 .or. divisor(jj,ii)==1 )cycle
278        tobechecked=1
279        element=abs(symrel(jj,ii,isym))
280        testdiv= ( divisor(jj,ii)==0 .or. divisor(jj,ii)==element .or. element==1)
281        if(testdiv)then
282          divisor(jj,ii)=element
283        else
284 !        Must evaluate common divisor between non-trivial numbers
285          do
286            if(divisor(jj,ii)<element)element=element-divisor(jj,ii)
287            if(divisor(jj,ii)>element)divisor(jj,ii)=divisor(jj,ii)-element
288            if(divisor(jj,ii)==element)exit
289          end do
290        end if
291      end do
292    end do
293  end do
294 
295 !Check whether there is a problem
296  testok=1
297  if(tobechecked==1)then
298    do ii=1,3
299      do jj=1,3
300        xx=divisor(jj,ii)*ngfft(jj)
301        yy=xx/ngfft(ii)
302        if(abs(yy-nint(yy))>tol8)testok=0
303      end do
304    end do
305  end if
306 
307 !There is definitely a problem
308  if(testok==0)then
309 !  Use a brute force algorithm
310 !  1) Because one knows that three identical numbers will satisfy
311 !  the constraint, use the maximal ngfft value to define current triplet
312 !  and associate total number of grid points
313    ngcurrent(1:3)=maxval(ngfft(1:3))
314 !  Takes into account the fact that ngfft(2) and ngfft(3) must
315 !  be multiple of nproc_fft
316    if(mod(ngcurrent(1),nproc_fft)/=0)ngcurrent(1:3)=ngcurrent(1:3)*max(1,nproc_fft)
317    prodcurrent=ngcurrent(1)**3+1.0d-3
318 !  2) Define maximal values for each component, limited
319 !  by the maximal value of the list
320    ngmax(1)=min(int(prodcurrent/(ngfft(2)*ngfft(3))),srch(msrch(1),1))
321    ngmax(2)=min(int(prodcurrent/(ngfft(1)*ngfft(3))),srch(msrch(2),2))
322    ngmax(3)=min(int(prodcurrent/(ngfft(1)*ngfft(2))),srch(msrch(3),3))
323 !  3) Get minimal and maximal search indices
324    do ii=1,3
325      do isrch=1,msrch(ii)
326        index=srch(isrch,ii)
327        if(index==ngfft(ii))imin(ii)=isrch
328 !      One cannot suppose that imax belongs to the allowed list,
329 !      so must use <= instead of == , to determine largest index
330        if(index<=ngmax(ii))imax(ii)=isrch
331      end do
332    end do
333 !  4) Compute product of trial ngffts
334 !  DEBUG
335 !  write(ount,*)' getng : enter triple loop '
336 !  write(ount,*)'imin',imin(1:3)
337 !  write(ount,*)'imax',imax(1:3)
338 !  write(ount,*)'ngcurrent',ngcurrent(1:3)
339 !  ENDDEBUG
340    do isrch1=imin(1),imax(1)
341      ngtrial(1)=srch(isrch1,1)
342      do isrch2=imin(2),imax(2)
343        ngtrial(2)=srch(isrch2,2)
344        do isrch3=imin(3),imax(3)
345          ngtrial(3)=srch(isrch3,3)
346          prodtrial=real(ngtrial(1))*real(ngtrial(2))*real(ngtrial(3))+1.0d-3
347          if(prodtrial>prodcurrent-1.0d-4)exit
348 !        The trial product is lower or equal to the current product,
349 !        so now, checks whether the symmetry constraints are OK
350          testok=1
351          do ii=1,3
352            do jj=1,3
353              xx=divisor(jj,ii)*ngtrial(jj)
354              yy=xx/ngtrial(ii)
355              if(abs(yy-nint(yy))>tol8)testok=0
356            end do
357          end do
358 !        DEBUG
359 !        write(ount,'(a,3i6,a,i3,a,es16.6)' )' getng : current trial triplet',ngtrial(1:3),&
360 !        &     ' testok=',testok,' prodtrial=',prodtrial
361 !        ENDDEBUG
362          if(testok==0)cycle
363 !        The symmetry constraints are fulfilled, so update current values
364          ngcurrent(1:3)=ngtrial(1:3)
365          prodcurrent=prodtrial
366        end do
367      end do
368    end do
369 
370    ngfft(1:3)=ngcurrent(1:3)
371    call bound(dsqmax,dsqmin,gbound,gmet,kpt,ngfft,plane)
372 !  ecutmx=maximum ecut consistent with chosen ngfft
373    ecutmx=0.5_dp*pi**2*dsqmin
374 !  Give results
375    write(message, '(a,3i8,a,a,e14.6)' ) &
376 &   ' However, must be changed due to symmetry =>',ngfft(1:3),ch10,&
377 &   '       with max ecut=',ecutmx
378    call wrtout(ount,message,'COLL')
379 
380    if (prodcurrent>huge(ii)) then
381      write(message, '(5a)' )&
382 &     'The best FFT grid will lead to indices larger',ch10,&
383 &     'than the largest representable integer on this machine.',ch10,&
384 &     'Action: try to deal with smaller problems. Also contact ABINIT group.'
385      MSG_ERROR(message)
386    end if
387 
388  end if ! testok==0
389 
390 !Possibly use the input values of ngfft
391  if ( int( dble(ngsav(1)) / minbox ) >= ngfft(1) .and.&
392 &     int( dble(ngsav(2)) / minbox ) >= ngfft(2) .and.&
393 &     int( dble(ngsav(3)) / minbox ) >= ngfft(3) ) then
394 
395 !  Must check whether the values are in the allowed list
396    testok=0
397    do mu=1,3
398      do ii=1,msrch(mu)
399        if(srch(ii,mu)==ngsav(mu))then
400          testok=testok+1
401          exit
402        end if
403      end do
404    end do
405    if(testok==3)then
406      write(message,'(a,3(a,i1,a,i3),a)') ' input values of',&
407 &     (' ngfft(',mu,') =',ngsav(mu),mu=1,3),' are alright and will be used'
408      call wrtout(ount,message,'COLL')
409      do mu = 1,3
410        ngfft(mu) = ngsav(mu)
411      end do
412    end if
413 
414  end if
415 
416 !mgfft needs to be set to the maximum of ngfft(1),ngfft(2),ngfft(3)
417  mgfft = maxval(ngfft(1:3))
418 
419  if (paral_fft_==1) then
420    ! For the time being, one need ngfft(2) and ngfft(3) to be multiple of nproc_fft
421    if(modulo(ngfft(2),nproc_fft)/=0)then
422      write(message,'(3a,i5,a,i5)')&
423 &     'The second dimension of the FFT grid, ngfft(2), should be',&
424 &     'a multiple of the number of processors for the FFT, nproc_fft.',&
425 &     'However, ngfft(2)=',ngfft(2),' and nproc_fft=',nproc_fft
426      MSG_BUG(message)
427    end if
428    if(modulo(ngfft(3),nproc_fft)/=0)then
429      write(message,'(3a,i5,a,i5)')&
430 &     'The third dimension of the FFT grid, ngfft(3), should be',&
431 &     'a multiple of the number of processors for the FFT, nproc_fft.',&
432 &     'However, ngfft(3)=',ngfft(3),' and nproc_fft=',nproc_fft
433      MSG_BUG(message)
434    end if
435 
436  else if (paral_fft_/=0) then
437    write(message,'(a,i0)')'paral_fft_ should be 0 or 1, but its value is ',paral_fft_
438    MSG_BUG(message)
439  end if
440 
441 ! Compute effective number of FFT points (for this MPI node if parall FFT)
442  nfft=product(ngfft(1:3))/max(1,nproc_fft)
443 
444 !Set up fft array dimensions ngfft(4,5,6) to avoid cache conflicts
445  ngfft(4)=2*(ngfft(1)/2)+1
446  ngfft(5)=2*(ngfft(2)/2)+1
447  ngfft(6)=ngfft(3)
448  if (any(fftalga == [FFT_FFTW3, FFT_DFTI])) then
449    ! FFTW3 supports leading dimensions but at the price of a larger number of FFTs
450    ! to be executed along z when the zero-padded version is used.
451    ! One should check whether the augmentation is beneficial for FFTW3.
452    ngfft(4)=2*(ngfft(1)/2)+1
453    ngfft(5)=2*(ngfft(2)/2)+1
454    !ngfft(4)=ngfft(1)
455    !ngfft(5)=ngfft(2)
456    ngfft(6)=ngfft(3)
457  end if
458 
459  if (present(use_gpu_cuda)) then
460    if (use_gpu_cuda==1) then
461      ngfft(4)=ngfft(1)
462      ngfft(5)=ngfft(2)
463      ngfft(6)=ngfft(3)
464    end if
465  end if
466 
467  ngfft(14:18)=0 ! ngfft(14) to be filled outside of getng
468 
469  if (paral_fft_==0) then
470    ngfft(9)=0     ! paral_fft_
471    ngfft(10)=1    ! nproc_fft
472    ngfft(11)=0    ! me_fft
473    ngfft(12)=0    ! n2proc
474    ngfft(13)=0    ! n3proc
475  else
476    ngfft(9)=1     ! paral_fft_
477    ngfft(10)=nproc_fft
478    ngfft(11)=me_fft
479    ngfft(12)=ngfft(2)/nproc_fft
480    ngfft(13)=ngfft(3)/nproc_fft
481  end if
482 
483 
484  call print_ngfft(ngfft,header="FFT mesh",unit=ount,mode_paral="COLL")
485 
486 end subroutine getng