TABLE OF CONTENTS


ABINIT/m_FFT_prof [ Modules ]

[ Top ] [ Modules ]

NAME

 m_FFT_prof

COPYRIGHT

  Copyright (C) 2008-2022 ABINIT group (MG)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

13 #if defined HAVE_CONFIG_H
14 #include "config.h"
15 #endif
16 
17 #include "abi_common.h"
18 
19 MODULE m_FFT_prof
20 
21  use defs_basis
22  use m_xomp
23  use m_errors
24  use m_abicore
25  use m_fftw3
26  use m_fft
27  use m_distribfft
28 
29  use defs_abitypes,    only : MPI_type
30  use m_fstrings,       only : sjoin, itoa
31  use m_numeric_tools,  only : arth
32  use m_time,           only : cwtime
33  use m_io_tools,       only : open_file
34  use m_geometry,       only : metric
35  use m_hide_blas,      only : xcopy
36  use m_cgtools,        only : set_istwfk
37  use m_fftcore,        only : get_kg, print_ngfft, fftalg_info, kgindex, getng, sphereboundary
38  use m_fft_mesh,       only : calc_eigr, calc_ceigr
39  use m_mpinfo,         only : nullify_mpi_enreg, destroy_mpi_enreg, copy_mpi_enreg, initmpi_seq
40  use m_oscillators,    only : rho_tw_g
41 
42  implicit none
43 
44  private
45 
46  integer,private,parameter :: TNAME_LEN=100

m_fft_mesh/FFT_prof_t [ Types ]

[ Top ] [ m_fft_mesh ] [ Types ]

NAME

 FFT_prof_t

FUNCTION

  The results of the tests

SOURCE

128  type,public :: FFT_prof_t
129    integer :: ncalls
130    integer :: ndat
131    integer :: use_gpu
132    integer :: nthreads
133    real(dp) :: cpu_time
134    real(dp) :: wall_time
135    real(dp) :: gflops
136    character(len=TNAME_LEN) :: test_name
137    complex(dpc),allocatable :: results(:)
138 
139  contains
140    procedure :: init => fftprof_init
141    procedure :: free => fftprof_free_0D
142  end type FFT_prof_t
143 
144  public :: fftprofs_free
145  public :: fftprofs_print
146 
147  interface fftprofs_free
148    module procedure fftprof_free_0D
149    module procedure fftprof_free_1D
150  end interface fftprofs_free

m_fft_mesh/FFT_test_t [ Types ]

[ Top ] [ m_fft_mesh ] [ Types ]

NAME

 FFT_test_t

FUNCTION

 Parameters passed to the FFT routines used in abinit (fourdp|fourwf).

SOURCE

 69  type, public :: FFT_test_t
 70 
 71    integer :: available = 0
 72    integer :: istwf_k = -1
 73    integer :: mgfft = -1
 74    integer :: ndat = -1
 75    integer :: nfft = -1
 76    integer :: nthreads = 1
 77    integer :: npw_k = -1
 78    integer :: npw_kout = -1
 79    integer :: paral_kgb = -1
 80    integer :: use_gpu = 0
 81 
 82    real(dp) :: ecut=zero
 83    integer :: ngfft(18)=-1
 84 
 85    real(dp) :: kpoint(3) = [zero,zero,zero]
 86    real(dp) :: rprimd(3,3),rmet(3,3)
 87    real(dp) :: gprimd(3,3),gmet(3,3)
 88 
 89    integer,allocatable :: kg_k(:,:)
 90    integer,allocatable :: kg_kout(:,:)
 91    integer,allocatable :: indpw_k(:)
 92 
 93    type(MPI_type) :: MPI_enreg
 94 
 95  contains
 96    procedure :: init => fft_test_init
 97    procedure :: free => fft_test_free_0D
 98    procedure :: print => fft_test_print
 99    procedure :: get_name
100 
101    ! Timing routines.
102    procedure :: time_fourdp
103    procedure :: time_fftbox
104    procedure :: time_fourwf
105    procedure :: time_rhotwg
106    procedure :: time_fftu
107  end type FFT_test_t
108 
109  public :: fft_tests_free
110 
111  interface fft_tests_free
112    module procedure fft_test_free_0D
113    module procedure fft_test_free_1D
114  end interface fft_tests_free

m_FFT_prof/empty_cache [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  empty_cache

FUNCTION

  Empty the memory cache

INPUTS

OUTPUT

SOURCE

1731 integer function empty_cache(kbsize) result(fake)
1732 
1733 !Arguments -----------------------------------
1734  integer,intent(in) :: kbsize
1735 
1736 !Local variables-------------------------------
1737  integer :: sz
1738  real(dp),allocatable :: chunk(:)
1739 
1740 ! *********************************************************************
1741 
1742  fake = 0
1743  if (kbsize <= 0) RETURN
1744 
1745  sz = int((100._dp * kbsize) / dp)
1746 
1747  ABI_MALLOC(chunk,(sz))
1748  call random_number(chunk)
1749  fake = int(SUM(chunk)) ! Need a result, otherwise smart compilers may skip the call.
1750  ABI_FREE(chunk)
1751 
1752 end function empty_cache

m_FFT_prof/fft_test_free_0D [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  fft_test_free_0D

FUNCTION

  Destruction method for the FFT_test_t structured datatype.

INPUTS

OUTPUT

SOURCE

269 subroutine fft_test_free_0D(Ftest)
270 
271 !Arguments -----------------------------------
272  class(FFT_test_t),intent(inout) :: Ftest
273 
274 ! *********************************************************************
275 
276  ABI_SFREE(Ftest%indpw_k)
277  ABI_SFREE(Ftest%kg_k)
278  ABI_SFREE(Ftest%kg_kout)
279 
280  call destroy_mpi_enreg(Ftest%MPI_enreg)
281 
282 end subroutine fft_test_free_0D

m_FFT_prof/fft_test_free_1D [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  fft_test_free_1D

FUNCTION

  Destruction method for the FFT_test_t structured datatype.

INPUTS

OUTPUT

SOURCE

300 subroutine fft_test_free_1D(Ftest)
301 
302 !Arguments -----------------------------------
303  class(FFT_test_t),intent(inout) :: Ftest(:)
304 
305 !Local variables-------------------------------
306  integer :: ii
307 
308 ! *********************************************************************
309 
310  do ii=LBOUND(Ftest,DIM=1),UBOUND(Ftest,DIM=1)
311    call Ftest(ii)%free()
312  end do
313 
314 end subroutine fft_test_free_1D

m_FFT_prof/fft_test_init [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  fft_test_init

FUNCTION

  Creation method for the FFT_test_t structured datatype.

INPUTS

OUTPUT

SOURCE

178 subroutine fft_test_init(Ftest, fft_setup, kpoint, ecut, boxcutmin, rprimd, nsym, symrel, MPI_enreg_in)
179 
180 !Arguments -----------------------------------
181 !scalars
182  class(FFT_test_t),intent(inout) :: Ftest
183  integer,intent(in) :: nsym
184  real(dp),intent(in) :: ecut,boxcutmin
185  class(MPI_type),intent(in) :: MPI_enreg_in
186 !arrays
187  integer,intent(in) :: fft_setup(6),symrel(3,3,nsym)
188  real(dp),intent(in) :: kpoint(3),rprimd(3,3)
189 
190 !Local variables-------------------------------
191 !scalars
192  integer :: fftalg,fftcache,ndat
193  real(dp) :: ucvol
194 !arrays
195  logical,allocatable :: mask(:)
196  real(dp),parameter :: k0(3)=zero
197  real(dp) :: gmet(3,3),gprimd(3,3),rmet(3,3)
198  real(dp),allocatable :: tnons(:,:)
199 
200 ! *************************************************************************
201 
202  call nullify_mpi_enreg(Ftest%MPI_enreg)
203 
204  call metric(gmet,gprimd,-1,rmet,rprimd,ucvol)
205 
206  Ftest%rprimd = rprimd
207  Ftest%rmet   = rmet
208  Ftest%gprimd = gprimd
209  Ftest%gmet   = gmet
210  Ftest%ecut   = ecut
211 
212  fftalg          = fft_setup(1)
213  fftcache        = fft_setup(2)
214  ndat            = fft_setup(3)
215  Ftest%nthreads  = fft_setup(4)
216  Ftest%available = fft_setup(5)
217  Ftest%use_gpu   = fft_setup(6)
218 
219  Ftest%paral_kgb = 0
220  Ftest%kpoint    = kpoint
221  Ftest%ndat      = ndat
222 
223  Ftest%istwf_k = set_istwfk(kpoint)
224 
225  call get_kg(Ftest%kpoint,Ftest%istwf_k,ecut,gmet,Ftest%npw_k,Ftest%kg_k)
226  call get_kg(Ftest%kpoint,Ftest%istwf_k,ecut,gmet,Ftest%npw_kout,Ftest%kg_kout)
227 
228  call copy_mpi_enreg(MPI_enreg_in,Ftest%MPI_enreg)
229 
230  Ftest%ngfft(7) = fftalg
231  Ftest%ngfft(8) = fftcache
232 
233  ! Fill part of ngfft
234  ABI_CALLOC(tnons,(3,nsym))
235 
236  call getng(boxcutmin,0,ecut,gmet,k0,Ftest%MPI_enreg%me_fft,Ftest%mgfft,Ftest%nfft,Ftest%ngfft,Ftest%MPI_enreg%nproc_fft,nsym,&
237 &  Ftest%MPI_enreg%paral_kgb,symrel,tnons, unit=dev_null, use_gpu_cuda=ftest%use_gpu)
238 
239  ABI_FREE(tnons)
240 
241  call init_distribfft(Ftest%MPI_enreg%distribfft,'c',Ftest%MPI_enreg%nproc_fft,Ftest%ngfft(2),Ftest%ngfft(3))
242 
243  ! Compute the index of each plane wave in the FFT grid.
244  ABI_MALLOC(Ftest%indpw_k,(Ftest%npw_k))
245 
246  ABI_MALLOC(mask,(Ftest%npw_k))
247  call kgindex(Ftest%indpw_k,Ftest%kg_k,mask,Ftest%MPI_enreg,Ftest%ngfft,Ftest%npw_k)
248  ABI_CHECK(ALL(mask),"FFT parallelism not supported in fftprof")
249  ABI_FREE(mask)
250 
251 end subroutine fft_test_init

m_FFT_prof/fft_test_print [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  fft_test_print

FUNCTION

  Printout of the FFT_test_t structured datatype.

INPUTS

OUTPUT

SOURCE

332 subroutine fft_test_print(Ftest, header, unit, mode_paral, prtvol)
333 
334 !Arguments -----------------------------------
335  class(FFT_test_t),intent(in) :: Ftest
336  integer,optional,intent(in) :: unit,prtvol
337  character(len=4),optional,intent(in) :: mode_paral
338  character(len=*),optional,intent(in) :: header
339 
340 !Local variables-------------------------------
341 !scalars
342  integer :: my_unt,my_prtvol
343  character(len=4) :: my_mode
344  character(len=500) :: msg
345 ! *********************************************************************
346 
347  my_unt   =std_out; if (PRESENT(unit      )) my_unt   =unit
348  my_prtvol=0      ; if (PRESENT(prtvol    )) my_prtvol=prtvol
349  my_mode  ='COLL' ; if (PRESENT(mode_paral)) my_mode  =mode_paral
350 
351  !msg=' ==== Info on the FFT test object ==== '
352  if (PRESENT(header)) then
353    msg=' ==== '//TRIM(ADJUSTL(header))//' ==== '
354    call wrtout(my_unt,msg,my_mode)
355  end if
356 
357  write(msg,'(a,i3)')"FFT setup for fftalg ",Ftest%ngfft(7)
358  call print_ngfft(Ftest%ngfft, header=msg, unit=my_unt)
359 
360 end subroutine fft_test_print

m_FFT_prof/fftprof_free_0D [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  fftprof_free_0D

FUNCTION

  Destruction method for the FFT_prof_t structured datatype.

SOURCE

456 subroutine fftprof_free_0D(Ftprof)
457 
458 !Arguments -----------------------------------
459  class(FFT_prof_t),intent(inout) :: Ftprof
460 
461 ! *********************************************************************
462 
463  ABI_SFREE(Ftprof%results)
464 
465 end subroutine fftprof_free_0D

m_FFT_prof/fftprof_free_1D [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  fftprof_free_1D

FUNCTION

  Destruction method for the FFT_prof_t structured datatype.

INPUTS

OUTPUT

SOURCE

483 subroutine fftprof_free_1D(Ftprof)
484 
485 !Arguments -----------------------------------
486  class(FFT_prof_t),intent(inout) :: Ftprof(:)
487 
488 !Local variables-------------------------------
489  integer :: ii
490 ! *********************************************************************
491 
492  do ii=LBOUND(Ftprof,DIM=1),UBOUND(Ftprof,DIM=1)
493    call fftprof_free_0D(Ftprof(ii))
494  end do
495 
496 end subroutine fftprof_free_1D

m_FFT_prof/fftprof_init [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  fftprof_init

FUNCTION

  Creation method for the FFT_prof_t structured datatype.

INPUTS

  gflops = Gigaflops

OUTPUT

SOURCE

416 subroutine fftprof_init(Ftprof, test_name, nthreads, ncalls, ndat, use_gpu, cpu_time, wall_time, gflops, results)
417 
418 !Arguments -----------------------------------
419  class(FFT_prof_t),intent(out) :: Ftprof
420  integer,intent(in) :: ncalls,nthreads,ndat, use_gpu
421  real(dp),intent(in) :: cpu_time,wall_time,gflops
422  character(len=*),intent(in) :: test_name
423 !arrays
424  complex(dpc),optional,intent(in) :: results(:)
425 
426 ! *************************************************************************
427 
428  Ftprof%ncalls    = ncalls
429  Ftprof%nthreads  = nthreads
430  Ftprof%ndat      = ndat
431  Ftprof%use_gpu   = use_gpu
432  Ftprof%cpu_time  = cpu_time
433  Ftprof%wall_time = wall_time
434  Ftprof%gflops    = gflops
435  Ftprof%test_name = test_name
436 
437  if (present(results)) then
438    ABI_REMALLOC(Ftprof%results, (size(results)))
439    Ftprof%results = results
440  end if
441 
442 end subroutine fftprof_init

m_FFT_prof/fftprof_ncalls_per_test [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  fftprof_ncalls_per_test

FUNCTION

  Helper function used to set the number of calls to  be used in each time_* routine.

INPUTS

   ncalls=Number of calls to be used.

SIDE EFFECTS

  NCALLS_FOR_TEST = ncalls

SOURCE

1127 subroutine fftprof_ncalls_per_test(ncalls)
1128 
1129 !Arguments -----------------------------------
1130  integer,intent(in) :: ncalls
1131 ! *********************************************************************
1132 
1133  NCALLS_FOR_TEST = ncalls
1134 
1135 end subroutine fftprof_ncalls_per_test

m_FFT_prof/fftprofs_print [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  fftprofs_print

FUNCTION

  Printout of the FFT_prof_t structured datatype.

INPUTS

OUTPUT

SOURCE

514 subroutine fftprofs_print(Fprof, header, unit, mode_paral, prtvol)
515 
516 !Arguments -----------------------------------
517  class(FFT_prof_t),intent(in) :: Fprof(:)
518  integer,optional,intent(in) :: unit,prtvol
519  character(len=4),optional,intent(in) :: mode_paral
520  character(len=*),optional,intent(in) :: header
521 
522 !Local variables-------------------------------
523 !scalars
524  integer :: my_unt,my_prtvol,ncalls, field1_w,ii,ref_lib !ifft
525  real(dp) :: mabs_err,mean_err,check_mabs_err,check_mean_err, ref_wtime,para_eff
526  character(len=4) :: my_mode
527  character(len=500) :: ofmt,hfmt,nafmt,msg
528 
529 ! *********************************************************************
530 
531  my_unt   =std_out; if (PRESENT(unit      )) my_unt   =unit
532  my_prtvol=0      ; if (PRESENT(prtvol    )) my_prtvol=prtvol
533  my_mode  ='COLL' ; if (PRESENT(mode_paral)) my_mode  =mode_paral
534 
535  msg='==== Info on the FFT_prof_t object ===='
536  if (PRESENT(header)) msg='==== '//TRIM(ADJUSTL(header))//' ===='
537 
538  call wrtout(my_unt,ch10//REPEAT("=",LEN_TRIM(msg)))
539  call wrtout(my_unt,msg,my_mode)
540  call wrtout(my_unt,REPEAT("=",LEN_TRIM(msg)))
541 
542  field1_w=0 ! Width of the field used to print key names.
543  do ii=1,SIZE(Fprof)
544    field1_w = MAX(field1_w, LEN_TRIM(Fprof(ii)%test_name))
545  end do
546 
547  if (field1_w==0) RETURN
548  field1_w = field1_w + 2 ! To account for ". "
549  write(ofmt,*)"(a",field1_w,",2x,2(f7.4,4x),1x,i2,1x,a,i3,a,1x,i0,4x,2(es9.2,3x))"
550 
551  write(hfmt,*)"(a",field1_w,",2x,a)"
552  write(std_out,hfmt)" Library      ","CPU-time   WALL-time   nthreads  ncalls  Max_|Err|   <|Err|>"
553  !
554  ! Find reference library.
555  ref_lib=0
556  do ii=1,SIZE(Fprof)
557    if (Fprof(ii)%ncalls>0) then
558      ref_lib = ii
559      EXIT
560    end if
561  end do
562  !ref_lib=3
563  !
564  ! Write timing analysis and error wrt reference library if available.
565  check_mabs_err=zero; check_mean_err=zero
566  do ii=1,SIZE(Fprof)
567    ncalls = Fprof(ii)%ncalls
568    if (ncalls>0) then
569      mabs_err = zero; mean_err=zero
570      if (ref_lib>0) then
571        mabs_err = MAXVAL( ABS(Fprof(ii)%results - Fprof(ref_lib)%results) )
572        mean_err = SUM( ABS(Fprof(ii)%results - Fprof(ref_lib)%results) ) / SIZE(Fprof(ref_lib)%results)
573        ! Relative error is not a good estimator because some components are close to zero within machine accuracy.
574        !mean_err = 100 * MAXVAL( ABS(Fprof(ii)%results - Fprof(1)%results)/ ABS(Fprof(1)%results ))
575        !ifft = imax_loc( ABS(Fprof(ii)%results - Fprof(1)%results)/ ABS(Fprof(1)%results) )
576        !write(std_out,*) Fprof(ii)%results(ifft),Fprof(1)%results(ifft)
577      end if
578      if (Fprof(ii)%nthreads==1) ref_wtime = Fprof(ii)%wall_time
579      para_eff = 100 * ref_wtime / ( Fprof(ii)%wall_time * Fprof(ii)%nthreads)
580      write(std_out,ofmt)&
581        "- "//Fprof(ii)%test_name,Fprof(ii)%cpu_time/ncalls,Fprof(ii)%wall_time/ncalls,&
582         Fprof(ii)%nthreads,"(",NINT(para_eff),"%)",ncalls,mabs_err,mean_err
583      check_mabs_err = MAX(check_mabs_err, mabs_err)
584      check_mean_err = MAX(check_mean_err, mean_err)
585    else
586      write(nafmt,*)"(a",field1_w,",2x,a)"
587      write(std_out,nafmt)"- "//Fprof(ii)%test_name,"   N/A        N/A        N/A     N/A       N/A        N/A"
588    end if
589  end do
590 
591  if (ref_lib>0) then
592    write(std_out,'(/,2(a,es9.2),2a)')&
593     " Consistency check: MAX(Max_|Err|) = ",check_mabs_err,&
594     ", Max(<|Err|>) = ",check_mean_err,", reference_lib: ",TRIM(Fprof(ref_lib)%test_name)
595  end if
596  write(std_out,*)
597 
598 end subroutine fftprofs_print

m_FFT_prof/get_name [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  get_name

FUNCTION

  Returns a string with info on the test.

INPUTS

OUTPUT

SOURCE

378 character(len=TNAME_LEN) function get_name(Ftest)
379 
380 !Arguments -----------------------------------
381  class(FFT_test_t),intent(in) :: Ftest
382 
383 !Local variables-------------------------------
384  character(len=TNAME_LEN) :: library_name,cplex_mode,padding_mode
385 
386 ! *********************************************************************
387 
388  if (ftest%use_gpu == 0) then
389    call fftalg_info(Ftest%ngfft(7), library_name, cplex_mode, padding_mode)
390    !get_name = TRIM(library_name)//"; "//TRIM(cplex_mode)//"; "//TRIM(padding_mode)
391    write(get_name,'(i3)')Ftest%ngfft(7)
392    get_name = TRIM(library_name)//" ("//TRIM(get_name)//")"
393  else
394    get_name = "GPU"
395  end if
396 
397 end function get_name

m_FFT_prof/prof_fourdp [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  prof_fourdp

FUNCTION

  Profile fourdp

INPUTS

OUTPUT

SOURCE

1437 subroutine prof_fourdp(fft_setups, isign, cplex, necut, ecut_arth, boxcutmin, rprimd, nsym, symrel, MPI_enreg_in)
1438 
1439 !Arguments -----------------------------------
1440 !scalars
1441  integer,intent(in) :: nsym,isign,cplex,necut
1442  real(dp),intent(in) :: boxcutmin
1443  type(MPI_type),intent(in) :: MPI_enreg_in
1444 !arrays
1445  integer,intent(in) :: fft_setups(:,:),symrel(3,3,nsym)
1446  real(dp),intent(in) :: ecut_arth(2)
1447  real(dp),intent(in) :: rprimd(3,3)
1448 
1449 !Local variables-------------------------------
1450 !scalars
1451  integer :: iec,nsetups,set,funt
1452  type(FFT_test_t) :: Ftest
1453  type(FFT_prof_t) :: Ftprof
1454  character(len=500) :: msg,frm,header
1455  character(len=fnlen) :: fname
1456 !arrays
1457  integer :: ngfft_ecut(18,necut)
1458  real(dp),parameter :: k_gamma(3)=zero
1459  real(dp) :: ecut_list(necut)
1460  real(dp),allocatable :: prof_res(:,:,:)
1461 
1462 ! *********************************************************************
1463 
1464  nsetups = size(fft_setups, dim=2)
1465  ecut_list = arth(ecut_arth(1), ecut_arth(2), necut)
1466 
1467  ! Open file and write header with info.
1468  write(fname,'(2(a,i1))')"PROF_fourdp_cplex",cplex,"_isign",isign
1469  if (open_file(fname, msg, newunit=funt) /= 0) then
1470    ABI_ERROR(msg)
1471  end if
1472 
1473  write(msg,'(2(a,i0))')"Benchmark: routine = fourdp, cplex =",cplex,", isign=",isign
1474  write(std_out,'(a)')" Running "//TRIM(msg)
1475 
1476  write(funt,'(a)')"# "//TRIM(msg)
1477  do set=1,nsetups
1478    write(funt,'(a, 6(a,i0))') "#",&
1479     "  fftalg = "   ,fft_setups(1,set), &
1480     ", fftcache = " ,fft_setups(2,set), &
1481     ", ndat = "     ,fft_setups(3,set), &
1482     ", nthreads = " ,fft_setups(4,set), &
1483     ", available = ",fft_setups(5,set), &
1484     ", use_gpu = ",fft_setups(6,set)
1485  end do
1486 
1487  ABI_MALLOC(prof_res,(2,necut,nsetups))
1488 
1489  do set=1,nsetups
1490    do iec=1,necut
1491      call Ftest%init(fft_setups(:,set),k_gamma,ecut_list(iec),boxcutmin,rprimd,nsym,symrel,MPI_enreg_in)
1492      call Ftest%time_fourdp(isign, cplex, header, Ftprof)
1493 
1494      prof_res(1,iec,set) = Ftprof%cpu_time /Ftprof%ncalls
1495      prof_res(2,iec,set) = Ftprof%wall_time/Ftprof%ncalls
1496 
1497      ! Save FFT divisions.
1498      if (set == 1) ngfft_ecut(:,iec) = Ftest%ngfft
1499 
1500      call Ftprof%free()
1501      call Ftest%free()
1502    end do
1503  end do
1504 
1505  ! Write the wall-time as a function of ecut.
1506  write(frm,*)"(f7.1,3i4,",nsetups,"(f7.4))"
1507  do iec=1,necut
1508    write(funt,frm) ecut_list(iec),ngfft_ecut(4:6,iec),prof_res(2,iec,:)
1509  end do
1510 
1511  close(funt)
1512  ABI_FREE(prof_res)
1513 
1514 end subroutine prof_fourdp

m_FFT_prof/prof_fourwf [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  prof_fourwf

FUNCTION

  profile fourwf

INPUTS

OUTPUT

SOURCE

1532 subroutine prof_fourwf(fft_setups, cplex, option, kpoint, necut, ecut_arth, &
1533                        boxcutmin, rprimd, nsym, symrel, MPI_enreg_in)
1534 
1535 !Arguments -----------------------------------
1536 !scalars
1537  integer,intent(in) :: nsym,cplex,necut,option
1538  real(dp),intent(in) :: boxcutmin
1539  type(MPI_type),intent(in) :: MPI_enreg_in
1540 !arrays
1541  integer,intent(in) :: fft_setups(:,:),symrel(3,3,nsym)
1542  real(dp),intent(in) :: ecut_arth(2)
1543  real(dp),intent(in) :: kpoint(3),rprimd(3,3)
1544 
1545 !Local variables-------------------------------
1546 !scalars
1547  integer :: iec,nsetups,set,funt,istwf_k
1548  type(FFT_test_t) :: Ftest
1549  type(FFT_prof_t) :: Ftprof
1550  character(len=500) :: msg,frm,header
1551  character(len=fnlen) :: fname
1552 !arrays
1553  integer :: ngfft_ecut(18,necut)
1554  real(dp) :: ecut_list(necut)
1555  real(dp),allocatable :: prof_res(:,:,:)
1556 
1557 ! *********************************************************************
1558 
1559  nsetups = size(fft_setups, dim=2)
1560  ecut_list = arth(ecut_arth(1), ecut_arth(2), necut)
1561  istwf_k = set_istwfk(kpoint)
1562 
1563  ! Open file and write header with info.
1564  write(fname,'(3(a,i1))')"PROF_fourwf_cplex",cplex,"_option",option,"_istwfk",istwf_k
1565  if (open_file(fname, msg, newunit=funt) /= 0) then
1566    ABI_ERROR(msg)
1567  end if
1568 
1569  write(msg,'(3(a,i1))')"Benchmark: routine = fourwf, cplex = ",cplex,", option= ",option,", istwfk= ",istwf_k
1570  write(std_out,'(a)')" Running "//TRIM(msg)
1571 
1572  write(funt,'(a)')"# "//TRIM(msg)
1573  do set=1,nsetups
1574    write(funt,'(a,6(a,i0))') "#",&
1575     "  fftalg = "   ,fft_setups(1,set), &
1576     ", fftcache = " ,fft_setups(2,set), &
1577     ", ndat = "     ,fft_setups(3,set), &
1578     ", nthreads = " ,fft_setups(4,set), &
1579     ", available = ",fft_setups(5,set), &
1580     ", use_gpu = ",fft_setups(6,set)
1581  end do
1582 
1583  ABI_MALLOC(prof_res,(2,necut,nsetups))
1584 
1585  do set=1,nsetups
1586    do iec=1,necut
1587      call Ftest%init(fft_setups(:,set),kpoint,ecut_list(iec),boxcutmin,rprimd,nsym,symrel,MPI_enreg_in)
1588      call Ftest%time_fourwf(cplex, option, header, Ftprof)
1589 
1590      prof_res(1,iec,set) = Ftprof%cpu_time /Ftprof%ncalls
1591      prof_res(2,iec,set) = Ftprof%wall_time/Ftprof%ncalls
1592 
1593      ! Save FFT divisions.
1594      if (set == 1) ngfft_ecut(:,iec) = Ftest%ngfft
1595      call Ftprof%free()
1596      call Ftest%free()
1597    end do
1598  end do
1599 
1600  ! Write the wall-time as a function of ecut.
1601  write(frm,*)"(f7.1,3i4,",nsetups,"(f7.4))"
1602  do iec=1,necut
1603    write(funt,frm) ecut_list(iec),ngfft_ecut(4:6,iec),prof_res(2,iec,:)
1604  end do
1605 
1606  close(funt)
1607  ABI_FREE(prof_res)
1608 
1609 end subroutine prof_fourwf

m_FFT_prof/prof_rhotwg [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  prof_rhotwg

FUNCTION

  profile rhotwg

INPUTS

OUTPUT

SOURCE

1627 subroutine prof_rhotwg(fft_setups,map2sphere,use_padfft,necut,ecut_arth,osc_ecut,boxcutmin,&
1628                        rprimd,nsym,symrel,gmet,MPI_enreg_in)
1629 
1630 !Arguments -----------------------------------
1631 !scalars
1632  integer,intent(in) :: nsym,necut,map2sphere,use_padfft
1633  real(dp),intent(in) :: boxcutmin,osc_ecut
1634  type(MPI_type),intent(in) :: MPI_enreg_in
1635 !arrays
1636  integer,intent(in) :: fft_setups(:,:),symrel(3,3,nsym)
1637  real(dp),intent(in) :: ecut_arth(2)
1638  real(dp),intent(in) :: rprimd(3,3),gmet(3,3)
1639 
1640 !Local variables-------------------------------
1641 !scalars
1642  integer :: iec,nsetups,set,funt,osc_npw
1643  type(FFT_test_t) :: Ftest
1644  type(FFT_prof_t) :: Ftprof
1645  character(len=500) :: msg,frm,header
1646  character(len=fnlen) :: fname
1647 !arrays
1648  integer,allocatable :: osc_gvec(:,:)
1649  integer :: ngfft_ecut(18,necut)
1650  real(dp),parameter :: k_gamma(3)=zero
1651  real(dp) :: ecut_list(necut)
1652  real(dp),allocatable :: prof_res(:,:,:)
1653 
1654 ! *********************************************************************
1655 
1656  nsetups = size(fft_setups, dim=2)
1657  ecut_list = arth(ecut_arth(1), ecut_arth(2), necut)
1658 
1659  ! Open file and write header with info.
1660  write(fname,'(2(a,i1))')"PROF_rhotwg_map2sphere",map2sphere,"_use_padfft",use_padfft
1661  if (open_file(fname, msg, newunit=funt) /= 0) then
1662    ABI_ERROR(msg)
1663  end if
1664 
1665  write(msg,'(2(a,i0),a,f5.1)') &
1666   "Benchmark: routine = rho_tw_g, map2sphere = ",map2sphere,", use_padfft = ",use_padfft,", osc_ecut = ",osc_ecut
1667  write(std_out,'(a)')" Running "//TRIM(msg)
1668 
1669  write(funt,'(a)')"# "//TRIM(msg)
1670  do set=1,nsetups
1671    write(funt,'(a,6(a,i0))') "#",&
1672     "  fftalg = "   ,fft_setups(1,set),&
1673     ", fftcache = " ,fft_setups(2,set),&
1674     ", ndat = "     ,fft_setups(3,set),&
1675     ", nthreads = " ,fft_setups(4,set),&
1676     ", available = ",fft_setups(5,set),&
1677     ", use_gpu = ",fft_setups(6,set)
1678  end do
1679 
1680  call get_kg([zero,zero,zero],1,osc_ecut,gmet,osc_npw,osc_gvec)
1681 !  TODO should reorder by shells to be consistent with the GW part!
1682 !  Moreover I guess this ordering is more efficient when we have
1683 !  to map the box to the G-sphere!
1684 
1685  ABI_MALLOC(prof_res,(2,necut,nsetups))
1686 
1687  do set=1,nsetups
1688    do iec=1,necut
1689      call Ftest%init(fft_setups(:,set),k_gamma,ecut_list(iec),boxcutmin,rprimd,nsym,symrel,MPI_enreg_in)
1690 
1691      call Ftest%time_rhotwg(map2sphere,use_padfft,osc_npw,osc_gvec,header,Ftprof)
1692 
1693      prof_res(1,iec,set) = Ftprof%cpu_time /Ftprof%ncalls
1694      prof_res(2,iec,set) = Ftprof%wall_time/Ftprof%ncalls
1695      ! Save FFT divisions.
1696      if (set==1) ngfft_ecut(:,iec) = Ftest%ngfft
1697 
1698      call Ftprof%free()
1699      call Ftest%free()
1700    end do
1701  end do
1702 
1703  ! Write the wall-time as a function of ecut.
1704  write(frm,*)"(f7.1,3i4,",nsetups,"(f7.4))"
1705  do iec=1,necut
1706    write(funt,frm) ecut_list(iec),ngfft_ecut(4:6,iec),prof_res(2,iec,:)
1707  end do
1708 
1709  close(funt)
1710  ABI_FREE(prof_res)
1711  ABI_FREE(osc_gvec)
1712 
1713 end subroutine prof_rhotwg

m_FFT_prof/time_fftbox [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  time_fftbox

FUNCTION

  Profiling of the fftbox_[io]p routines.

INPUTS

OUTPUT

SOURCE

736 subroutine time_fftbox(Ftest, isign, inplace, header, Ftprof)
737 
738 !Arguments -----------------------------------
739  class(FFT_test_t),intent(inout) :: Ftest
740  integer,intent(in) :: isign,inplace
741  character(len=500),intent(inout) :: header
742  class(FFT_prof_t),intent(out) :: Ftprof
743 
744 !Local variables-------------------------------
745 !scalars
746  integer,parameter :: nspinor1 = 1, fftcache0 = 0
747  integer :: icall,i1,i2,i3,n1,n2,n3,ifft,ndat,nfft,dat,padat
748  real(dp) :: cpu_time,wall_time,gflops,gsq
749  !character(len=500) :: msg
750  character(len=TNAME_LEN) :: test_name
751  type(fftbox_plan3_t) :: plan
752 !arrays
753  integer,parameter :: g0(3) = [1,-2,1]
754  integer :: gg(3)
755  complex(dpc),allocatable :: ffc(:),ggc(:),results(:)
756 ! *********************************************************************
757 
758  test_name = Ftest%get_name()
759 
760  if (Ftest%available == 0) then
761    call Ftprof%init(test_name, 0, 0, 0, 0, zero, zero, zero)
762    return
763  end if
764 
765  call xomp_set_num_threads(Ftest%nthreads)
766 
767  if (Ftest%ngfft(7)/100 == FFT_FFTW3) call fftw3_set_nthreads(Ftest%nthreads)
768 
769  n1 = Ftest%ngfft(1); n2 = Ftest%ngfft(2); n3 = Ftest%ngfft(3)
770  nfft = Ftest%nfft; ndat = Ftest%ndat
771  write(header,'(3(a,i2))')" fftbox with isign ",isign,", in-place ",inplace,", ndat ",ndat
772 
773  ABI_CALLOC(ffc,(nfft*ndat))
774  ABI_CALLOC(ggc,(nfft*ndat))
775  ABI_CALLOC(results, (nfft*ndat))
776 
777  if (isign==-1) then
778    call calc_ceigr(g0,nfft,nspinor1,Ftest%ngfft,ffc)
779  else if (isign==1) then
780    ifft=0
781    do i3=1,n3
782      gg(3)=i3-1; if (i3>1+n3/2) gg(3)=i3-n3-1 ! TODO recheck this
783      do i2=1,n2
784        gg(2)=i2-1; if (i2>1+n2/2) gg(2)=i2-n2-1
785        do i1=1,n1
786          gg(1)=i1-1; if (i1>1+n1/2) gg(1)=i1-n1-1
787          gsq = two_pi**2 * DOT_PRODUCT(gg,MATMUL(Ftest%gmet,gg))
788          ifft=ifft+1
789          ffc(ifft) = EXP(-gsq)
790        end do
791      end do
792    end do
793  else
794    ABI_ERROR("Wrong isign")
795  end if
796 
797  ! Replicate and scale input data
798  do dat=2,ndat
799    padat = (dat-1) * nfft
800    do ifft=1,nfft
801      ffc(ifft+padat) = DBLE(dat) * ffc(ifft)
802    end do
803  end do
804 
805  call cwtime(cpu_time, wall_time, gflops, "start")
806 
807  ! No augmentation here.
808  call plan%init(ndat, Ftest%ngfft(1:3), Ftest%ngfft(1:3), Ftest%ngfft(7), fftcache0, ftest%use_gpu)
809 
810  select case (inplace)
811  case (0)
812    do icall=1,NCALLS_FOR_TEST
813      ifft = empty_cache(CACHE_KBSIZE)
814      call plan%execute(ffc, ggc, isign)
815      ! Store results at the first call.
816      if (icall == 1) results = ggc
817    end do
818  case (1)
819    do icall=1,NCALLS_FOR_TEST
820      ifft = empty_cache(CACHE_KBSIZE)
821      call plan%execute(ffc, isign)
822      ! Store results at the first call.
823      if (icall == 1) results = ffc
824    end do
825  case default
826    ABI_ERROR(sjoin("Wrong value for inplace:", itoa(inplace)))
827  end select
828 
829  call cwtime(cpu_time, wall_time, gflops, "stop")
830  call Ftprof%init(test_name,Ftest%nthreads,NCALLS_FOR_TEST,Ftest%ndat,ftest%use_gpu, &
831                   cpu_time,wall_time,gflops,results=results)
832 
833  call plan%free()
834  ABI_FREE(ffc)
835  ABI_FREE(ggc)
836  ABI_FREE(results)
837 
838 end subroutine time_fftbox

m_FFT_prof/time_fftu [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  time_fftu

FUNCTION

  Profiling of the fftu routines.

INPUTS

OUTPUT

SOURCE

1286 subroutine time_fftu(Ftest, isign, header, Ftprof)
1287 
1288 !Arguments -----------------------------------
1289  class(FFT_test_t),intent(inout) :: Ftest
1290  integer,intent(in) :: isign
1291  character(len=500),intent(out) :: header
1292  class(FFT_prof_t),intent(out) :: Ftprof
1293 
1294 !Local variables-------------------------------
1295 !scalars
1296  integer,parameter :: nspinor1=1
1297  integer :: icall,i1,i2,i3,n1,n2,n3,ifft,npw_k,ndat,dat,nfft,cnt,ipw,padat,istwf_k
1298  !integer :: n4,n5,n6,n456,idx
1299  real(dp) :: cpu_time,wall_time,gflops,gsq,g0dotr
1300  logical :: not_implemented
1301  !character(len=500) :: msg
1302  character(len=TNAME_LEN) :: test_name
1303 !arrays
1304  integer,parameter :: g0(3) = [1,-2,1]
1305  integer :: gg(3)
1306  integer,allocatable :: kg_k(:,:),gbound(:,:)
1307  complex(dpc),allocatable :: ug(:),results(:),ur(:)
1308 ! *********************************************************************
1309 
1310  test_name = Ftest%get_name()
1311 
1312  n1=Ftest%ngfft(1); n2=Ftest%ngfft(2); n3=Ftest%ngfft(3)
1313  !n4=Ftest%ngfft(4); n5=Ftest%ngfft(5); n6=Ftest%ngfft(6)
1314 
1315  nfft = n1*n2*n3
1316  !n456 = n4*n5*n6
1317  ndat = Ftest%ndat
1318 
1319  write(header,'(2(a,i2))')" fftu with isign ",isign,", ndat ",ndat
1320 
1321  ! TODO: zero-pad not available with SG2001 routines.
1322  not_implemented = (Ftest%ngfft(7) == 412)
1323 
1324  if (Ftest%available==0.or.not_implemented) then
1325    call Ftprof%init(test_name,0,0,0,0,zero,zero,zero)
1326    return
1327  end if
1328 
1329  call xomp_set_num_threads(Ftest%nthreads)
1330 
1331  if (Ftest%ngfft(7)/100 == FFT_FFTW3) call fftw3_set_nthreads(Ftest%nthreads)
1332  !
1333  istwf_k = Ftest%istwf_k
1334  !istwf_k = 1 ! Do not use istwf_k trick here
1335  call get_kg(Ftest%kpoint,istwf_k,Ftest%ecut,Ftest%gmet,npw_k,kg_k)
1336 
1337  ABI_MALLOC(gbound,(2*Ftest%mgfft+8,2))
1338  call sphereboundary(gbound,istwf_k,kg_k,Ftest%mgfft,npw_k)
1339 
1340  ABI_MALLOC(ug,(npw_k*ndat))
1341  ABI_MALLOC(ur, (nfft*ndat))
1342  ABI_MALLOC(results,(nfft*ndat))
1343  results=czero ! needed for R --> G since npw_k < nfft
1344 
1345  if (isign==1) then
1346    do cnt=0,(npw_k * ndat) - 1
1347      ipw = 1 + MOD(cnt, npw_k)
1348      gg = kg_k(:,ipw)
1349      gsq = two_pi**2 * DOT_PRODUCT(gg,MATMUL(Ftest%gmet,gg))
1350      ug(cnt+1) = DCMPLX(EXP(-gsq), zero)
1351    end do
1352    !
1353    ! Replicate input data
1354    do dat=2,ndat
1355      padat = (dat-1) * npw_k
1356      do ipw=1,npw_k
1357        ug(ipw+padat) = DBLE(dat) * ug(ipw)
1358      end do
1359    end do
1360 
1361  else if (isign==-1) then
1362 
1363    do i3=0,n3-1
1364      do i2=0,n2-1
1365        do i1=0,n1-1
1366          g0dotr= two_pi*( g0(1)*(i1/DBLE(n1)) &
1367 &                        +g0(2)*(i2/DBLE(n2)) &
1368 &                        +g0(3)*(i3/DBLE(n3)) )
1369          ifft = 1 + i1 + i2*n1 + i3*n2*n3
1370          ur(ifft)=DCMPLX(DCOS(g0dotr),DSIN(g0dotr))
1371        end do
1372      end do
1373    end do
1374    !
1375    ! Replicate input data
1376    do dat=2,ndat
1377      padat = (dat-1)*nfft
1378      do ifft=1,nfft
1379        ur(ifft+padat) = DBLE(dat) * ur(ifft)
1380      end do
1381    end do
1382 
1383  else
1384    ABI_ERROR(sjoin("Wrong isign:", itoa(isign)))
1385  end if
1386 
1387  call cwtime(cpu_time, wall_time, gflops, "start")
1388 
1389  do icall=1,NCALLS_FOR_TEST
1390    ifft = empty_cache(CACHE_KBSIZE)
1391 
1392    !call fftpad(ug,Ftest%ngfft,n1,n2,n3,n4,n5,n6,ndat,Ftest%mgfft,isign,gbound)
1393    if (isign == +1) then
1394      call fft_ug(npw_k,nfft,nspinor1,ndat,Ftest%mgfft,Ftest%ngfft,istwf_k,kg_k,gbound,ug,ur)
1395      ! Store results at the first call.
1396      if (icall == 1) call xcopy(nfft*ndat,ur,1,results,1)
1397 
1398    else
1399      call fft_ur(npw_k,nfft,nspinor1,ndat,Ftest%mgfft,Ftest%ngfft,istwf_k,kg_k,gbound,ur,ug)
1400      ! Store results at the first call.
1401      if (icall==1) call xcopy(npw_k*ndat,ug,1,results,1)
1402    end if
1403    !if (isign==-1) then
1404    !  write(Ftest%ngfft(7),*)"ngfft, isign ",Ftest%ngfft(7),isign
1405    !  do idx =1,n4*n5*n6*ndat; write(Ftest%ngfft(7),*)idx,results(idx); end do
1406    !end if
1407  end do
1408 
1409  call cwtime(cpu_time, wall_time, gflops, "stop")
1410  call Ftprof%init(test_name,Ftest%nthreads,NCALLS_FOR_TEST,ndat,ftest%use_gpu, &
1411                   cpu_time,wall_time,gflops,results=results)
1412 
1413  ABI_FREE(kg_k)
1414  ABI_FREE(gbound)
1415  ABI_FREE(ug)
1416  ABI_FREE(ur)
1417  ABI_FREE(results)
1418 
1419 end subroutine time_fftu

m_FFT_prof/time_fourdp [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  time_fourdp

FUNCTION

  Profiling of the fourdp routine.

INPUTS

OUTPUT

SOURCE

616 subroutine time_fourdp(Ftest, isign, cplex, header, Ftprof)
617 
618 !Arguments -----------------------------------
619  class(FFT_test_t),intent(inout) :: Ftest
620  integer,intent(in) :: cplex,isign
621  character(len=500),intent(out) :: header
622  class(FFT_prof_t),intent(out) :: Ftprof
623 
624 !Local variables-------------------------------
625 !scalars
626  integer,parameter :: nspinor1=1
627  integer :: icall,i1,i2,i3,n1,n2,n3,ifft
628  real(dp) :: cpu_time,wall_time,gflops,gsq
629  !character(len=500) :: msg
630  character(len=TNAME_LEN) :: test_name
631 !arrays
632  integer,parameter :: g0(3)=(/1,2,-1/)
633  integer :: gg(3)
634  real(dp),allocatable :: fofg(:,:),fofr(:)
635  complex(dpc),allocatable :: results(:),ctmp(:)
636 
637 ! *********************************************************************
638 
639  test_name = Ftest%get_name()
640  n1=Ftest%ngfft(1); n2=Ftest%ngfft(2); n3=Ftest%ngfft(3)
641 
642  write(header,'(2(a,i2),a)')" fourdp with cplex ",cplex,", isign ",isign,", ndat 1"
643 
644  if (Ftest%available==0) then
645    call Ftprof%init(test_name,0,0,0,0,zero,zero,zero)
646    RETURN
647  end if
648 
649  call xomp_set_num_threads(Ftest%nthreads)
650 
651  if (Ftest%ngfft(7)/100 == FFT_FFTW3) call fftw3_set_nthreads(Ftest%nthreads)
652 
653  ABI_MALLOC(fofg,(2,Ftest%nfft))
654  ABI_MALLOC(fofr,(cplex*Ftest%nfft))
655 
656  ! Initialize input data.
657  if (isign==1) then
658    ! initialize fofg
659    fofg = zero
660    ifft=0
661    do i3=1,n3
662      gg(3)=i3-1; if (i3>1+n3/2) gg(3)=i3-n3-1 ! TODO recheck this
663      do i2=1,n2
664        gg(2)=i2-1; if (i2>1+n2/2) gg(2)=i2-n2-1
665        do i1=1,n1
666          gg(1)=i1-1; if (i1>1+n1/2) gg(1)=i1-n1-1
667          gsq = two_pi**2 * DOT_PRODUCT(gg,MATMUL(Ftest%gmet,gg))
668          ifft=ifft+1
669          fofg(1,ifft) = EXP(-gsq)
670          fofg(2,ifft) = zero
671        end do
672      end do
673    end do
674 
675  else
676    ! init fofr
677    if (cplex==2) then
678      call calc_eigr(g0,Ftest%nfft,Ftest%ngfft,fofr)
679    else if (cplex==1) then
680      ABI_MALLOC(ctmp,(Ftest%nfft))
681      call calc_ceigr(g0,Ftest%nfft,nspinor1,Ftest%ngfft,ctmp)
682      fofr = REAL(ctmp)
683      ABI_FREE(ctmp)
684    else
685      ABI_ERROR(sjoin("Wrong cplex:", itoa(cplex)))
686    end if
687  end if
688 
689  ABI_CALLOC(results, (Ftest%nfft))
690  call cwtime(cpu_time, wall_time, gflops, "start")
691 
692  do icall=1,NCALLS_FOR_TEST
693    ifft = empty_cache(CACHE_KBSIZE)
694    call fourdp(cplex,fofg,fofr,isign,Ftest%MPI_enreg,Ftest%nfft,1,Ftest%ngfft,0)
695 
696    ! Store results at the first call.
697    if (icall==1) then
698      if (isign==1) then
699        if (cplex==1) then
700          results = DCMPLX(fofr, zero)
701        else if (cplex==2) then
702          results = DCMPLX( fofr(1:2*Ftest%nfft:2), fofr(2:2*Ftest%nfft:2) )
703        end if
704      else if (isign==-1) then
705        results = DCMPLX(fofg(1,:),fofg(2,:))
706      end if
707    end if
708  end do
709 
710  call cwtime(cpu_time, wall_time, gflops, "stop")
711  call Ftprof%init(test_name,Ftest%nthreads,NCALLS_FOR_TEST,Ftest%ndat,ftest%use_gpu, &
712                   cpu_time,wall_time,gflops,results=results)
713 
714  ABI_FREE(fofg)
715  ABI_FREE(fofr)
716  ABI_FREE(results)
717 
718 end subroutine time_fourdp

m_FFT_prof/time_fourwf [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  time_fourwf

FUNCTION

  Profiling of the fourwf routine.

INPUTS

OUTPUT

SOURCE

 856 subroutine time_fourwf(Ftest, cplex, option_fourwf, header, Ftprof)
 857 
 858 !Arguments -----------------------------------
 859  class(FFT_test_t),intent(inout) :: Ftest
 860  integer,intent(in) :: cplex,option_fourwf
 861  character(len=500),intent(out) :: header
 862  class(FFT_prof_t),intent(out) :: Ftprof
 863 
 864 !Local variables-------------------------------
 865 !scalars
 866  integer,parameter :: tim0=0
 867  integer :: n1,n2,n3,n4,n5,n6,npw_out,icall,i1,i2,i3,idx,ipw,ndat,cnt,dat,padat
 868  integer :: fftalg,fftalga,fftalgc
 869  real(dp),parameter :: weight_i=one,weight_r=one
 870  real(dp) :: cpu_time,wall_time,gflops,gsq,g0dotr
 871  logical :: isbuggy,not_supported
 872  !character(len=500) :: msg
 873  character(len=TNAME_LEN) :: test_name
 874 !arrays
 875  integer,parameter :: g0(3)=[1,-1,2] !g0(3)=[1,0,0]
 876  integer :: gg(3)
 877  integer,allocatable :: gbound_in(:,:),gbound_out(:,:)
 878  real(dp),allocatable :: denpot(:,:,:),fofg_in(:,:), fofr_4(:,:,:,:),fofg_out(:,:)
 879  complex(dpc),allocatable :: results(:)
 880 
 881 ! *********************************************************************
 882 
 883  test_name = Ftest%get_name()
 884 
 885  fftalg  = Ftest%ngfft(7); fftalga = fftalg/100; fftalgc = MOD(fftalg,10)
 886 
 887  isbuggy = &
 888     (option_fourwf==3 .and. fftalga==FFT_SG2002 .and. fftalgc /= 0 .and. &
 889      any(Ftest%istwf_k == [3,4,5,6,7,8,9]))  ! see sg_fourwf
 890  !isbuggy = .False.
 891 
 892  !FIXME problems with the unitary tests reference files!
 893  not_supported = ( &
 894   (fftalgc == 2 .and. option_fourwf==0 .and. Ftest%istwf_k >  2))
 895  !not_supported = .FALSE.
 896 
 897  npw_out= Ftest%npw_kout
 898  ndat   = Ftest%ndat
 899  n1=Ftest%ngfft(1); n2=Ftest%ngfft(2); n3=Ftest%ngfft(3)
 900  n4=Ftest%ngfft(4); n5=Ftest%ngfft(5); n6=Ftest%ngfft(6)
 901 
 902  write(header,'(4(a,i2))')" fourwf with option ",option_fourwf,", cplex ",cplex,", ndat ",ndat,", istwf_k ",Ftest%istwf_k
 903 
 904  if (isbuggy .or. not_supported .or. Ftest%available==0) then
 905    call Ftprof%init(test_name,0,0,0,0,zero,zero,zero)
 906    RETURN
 907  end if
 908 
 909  call xomp_set_num_threads(Ftest%nthreads)
 910 
 911  if (Ftest%ngfft(7)/100 == FFT_FFTW3) call fftw3_set_nthreads(Ftest%nthreads)
 912 
 913  ! FFTW3 does not need gbound_in but oh well
 914  ABI_MALLOC(gbound_in, (2*Ftest%mgfft+8,2))
 915  call sphereboundary(gbound_in,Ftest%istwf_k,Ftest%kg_k,Ftest%mgfft,Ftest%npw_k)
 916 
 917  ABI_MALLOC(gbound_out, (2*Ftest%mgfft+8,2))
 918  call sphereboundary(gbound_out,Ftest%istwf_k,Ftest%kg_kout,Ftest%mgfft,Ftest%npw_kout)
 919 
 920  ABI_CALLOC(denpot,(cplex*n4,n5,n6))
 921  ABI_CALLOC(fofg_in,(2,Ftest%npw_k*ndat))
 922  ABI_CALLOC(fofg_out,(2,npw_out*ndat))
 923  ABI_CALLOC(fofr_4,(2,n4,n5,n6*ndat))
 924  ABI_CALLOC(results, (Ftest%nfft*ndat))
 925 
 926  select case (option_fourwf)
 927  case (0,1,2)
 928    !! for option==0, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
 929    !!                fofr(2,n4,n5,n6) contains the output Fourier Transform of fofgin;
 930    !!                no use of denpot, fofgout and npwout.
 931    !! for option==1, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
 932    !!                denpot(cplex*n4,n5,n6) contains the input density at input,
 933    !!                and the updated density at output (accumulated);
 934    !!                no use of fofgout and npwout.
 935    !! for option==2, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
 936    !!                denpot(cplex*n4,n5,n6) contains the input local potential;
 937    !!                fofgout(2,npwout*ndat) contains the output function;
 938    !!
 939    do cnt=0,(Ftest%npw_k * ndat) - 1
 940      ipw = 1 + MOD(cnt, Ftest%npw_k)
 941      gg = Ftest%kg_k(:,ipw)
 942      gsq = two_pi**2 * DOT_PRODUCT(gg,MATMUL(Ftest%gmet,gg))
 943      fofg_in(1,cnt+1) = EXP(-gsq)
 944      fofg_in(2,cnt+1) = zero
 945    end do
 946 
 947    if (option_fourwf==1) then ! Init denpot
 948      denpot = one
 949    end if
 950    !
 951    if (option_fourwf==2) then ! Init denpot
 952      !
 953      if (cplex==1) then
 954        do i3=0,n3-1
 955          do i2=0,n2-1
 956            do i1=0,n1-1
 957              g0dotr= two_pi*( g0(1)*(i1/DBLE(n1)) &
 958 &                            +g0(2)*(i2/DBLE(n2)) &
 959 &                            +g0(3)*(i3/DBLE(n3)) )
 960              denpot(i1+1,i2+1,i3+1)=COS(g0dotr)
 961            end do
 962          end do
 963        end do
 964      else if (cplex==2) then
 965        do i3=0,n3-1
 966          do i2=0,n2-1
 967            idx=1
 968            do i1=0,n1-1
 969              g0dotr= two_pi*( g0(1)*(i1/DBLE(n1)) &
 970 &                            +g0(2)*(i2/DBLE(n2)) &
 971 &                            +g0(3)*(i3/DBLE(n3)) )
 972 
 973              denpot(idx,  i2+1,i3+1)= COS(g0dotr)
 974              denpot(idx+1,i2+1,i3+1)= SIN(g0dotr)
 975              idx=idx+2
 976            end do
 977          end do
 978        end do
 979      end if
 980    end if
 981 
 982  case (3)
 983    !! for option==3, fofr(2,n4,n5,n6*ndat) contains the input real space wavefunction;
 984    !!                fofgout(2,npwout*ndat) contains its output Fourier transform;
 985    !!                no use of fofgin and npwin.
 986    idx=0
 987    do dat=1,ndat
 988      padat = (dat-1)*n6
 989      do i3=0,n3-1
 990        do i2=0,n2-1
 991          do i1=0,n1-1
 992            g0dotr= two_pi*( g0(1)*(i1/DBLE(n1)) &
 993                            +g0(2)*(i2/DBLE(n2)) &
 994                            +g0(3)*(i3/DBLE(n3)) )
 995            !fofr_4(1,i1+1,i2+1,i3+1+padat)=COS(g0dotr)
 996            !fofr_4(2,i1+1,i2+1,i3+1+padat)=SIN(g0dotr)
 997            fofr_4(1,i1+1,i2+1,i3+1+padat)=10 * EXP(-g0dotr**2)
 998            fofr_4(2,i1+1,i2+1,i3+1+padat)=zero
 999            idx=idx+1
1000            results(idx) = DCMPLX(fofr_4(1,i1+1,i2+1,i3+1+padat), fofr_4(2,i1+1,i2+1,i3+1+padat))
1001          end do
1002        end do
1003      end do
1004    end do
1005 
1006  case default
1007    ABI_ERROR(sjoin("Wrong value for option_fourwf:", itoa(option_fourwf)))
1008  end select
1009 
1010  call cwtime(cpu_time, wall_time, gflops, "start")
1011 
1012  do icall=1,NCALLS_FOR_TEST
1013 
1014    i1 = empty_cache(CACHE_KBSIZE)
1015 
1016    call fourwf(cplex,denpot,fofg_in,fofg_out,fofr_4,gbound_in,gbound_out,Ftest%istwf_k,&
1017     Ftest%kg_k,Ftest%kg_kout,Ftest%mgfft,Ftest%MPI_enreg,ndat,Ftest%ngfft,Ftest%npw_k,npw_out,n4,n5,n6,option_fourwf,&
1018     tim0,weight_r,weight_i, use_gpu_cuda=ftest%use_gpu)
1019 
1020    ! Store results at the first call.
1021    if (icall == 1) then
1022      select case (option_fourwf)
1023      case (0)
1024        !! for option==0, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
1025        !!                fofr(2,n4,n5,n6) contains the output Fourier Transform of fofgin;
1026        !!                no use of denpot, fofgout and npwout.
1027        idx=0
1028        do dat=1,ndat
1029          padat = (dat-1)*n6
1030          do i3=1,n3
1031            do i2=1,n2
1032              do i1=1,n1
1033                idx=idx+1
1034                results(idx) = DCMPLX(fofr_4(1,i1,i2,i3+padat),fofr_4(2,i1,i2,i3+padat))
1035              end do
1036            end do
1037          end do
1038        end do
1039      case (1)
1040        !! for option==1, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
1041        !!                denpot(cplex*n4,n5,n6) contains the input density at input,
1042        !!                and the updated density at output (accumulated);
1043        !!                no use of fofgout and npwout.
1044        if (cplex==1) then
1045         idx=0
1046         do i3=1,n3
1047           do i2=1,n2
1048             do i1=1,n1
1049               idx=idx+1
1050               results(idx) = DCMPLX(denpot(i1,i2,i3),zero)
1051             end do
1052           end do
1053         end do
1054 
1055        else if (cplex==2) then
1056          idx=0
1057          do i3=1,n3
1058            do i2=1,n2
1059              do i1=1,2*n1,2
1060                idx=idx+1
1061                results(idx) = DCMPLX(denpot(i1,i2,i3),denpot(i1+1,i2,i3))
1062              end do
1063            end do
1064          end do
1065        else
1066          ABI_ERROR("Wrong cplex")
1067        end if
1068 
1069      case (2)
1070        !! for option==2, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere;
1071        !!                denpot(cplex*n4,n5,n6) contains the input local potential;
1072        !!                fofgout(2,npwout*ndat) contains the output function;
1073        do ipw=1,npw_out*ndat
1074          results(ipw) = DCMPLX(fofg_out(1,ipw),fofg_out(2,ipw))
1075        end do
1076 
1077      case (3)
1078        !! for option==3, fofr(2,n4,n5,n6*ndat) contains the input real space wavefunction;
1079        !!                fofgout(2,npwout*ndat) contains its output Fourier transform;
1080        !!                no use of fofgin and npwin.
1081        do ipw=1,npw_out*ndat
1082          results(ipw) = DCMPLX(fofg_out(1,ipw),fofg_out(2,ipw))
1083        end do
1084        !write(Ftest%ngfft(7),*)"results opt 3 Ftest%ngfft(7)",Ftest%ngfft(7)
1085        !do dat=1,ndat
1086        !  do ipw=1,npw_out
1087        !    idx = ipw + (dat-1)*npw_out
1088        !    write(Ftest%ngfft(7),*)ipw,dat,results(idx)
1089        !  end do
1090        !end do
1091      end select
1092    end if
1093  end do
1094 
1095  call cwtime(cpu_time, wall_time, gflops, "stop")
1096  call Ftprof%init(test_name,Ftest%nthreads,NCALLS_FOR_TEST,Ftest%ndat,ftest%use_gpu,&
1097                   cpu_time,wall_time,gflops,results=results)
1098 
1099  ABI_FREE(denpot)
1100  ABI_FREE(fofg_in)
1101  ABI_FREE(fofg_out)
1102  ABI_FREE(fofr_4)
1103  ABI_FREE(gbound_in)
1104  ABI_FREE(gbound_out)
1105  ABI_FREE(results)
1106 
1107 end subroutine time_fourwf

m_FFT_prof/time_rhotwg [ Functions ]

[ Top ] [ m_FFT_prof ] [ Functions ]

NAME

  time_rhotwg

FUNCTION

  Profiling of the rho_tw_g routine.

INPUTS

OUTPUT

SOURCE

1153 subroutine time_rhotwg(Ftest, map2sphere, use_padfft, osc_npw, osc_gvec, header, Ftprof)
1154 
1155 !Arguments -----------------------------------
1156 !scalars
1157  class(FFT_test_t),intent(inout) :: Ftest
1158  integer,intent(in) :: map2sphere,use_padfft,osc_npw
1159  character(len=500),intent(out) :: header
1160  class(FFT_prof_t),intent(out) :: Ftprof
1161 !arrays
1162  integer,intent(in) :: osc_gvec(3,osc_npw)
1163 
1164 !Local variables-------------------------------
1165 !scalars
1166  integer,parameter :: nspinor1=1,dim_rtwg1=1,istwfk1=1
1167  integer :: icall,ifft,itim1,itim2,nfft,dat,sprc,ptr,ndat, n1,n2,n3,n4,n5,n6
1168  real(dp) :: cpu_time,wall_time,gflops
1169  complex(dpc) :: ktabp1 = cone, ktabp2 = cone
1170  character(len=TNAME_LEN) :: test_name
1171  logical :: not_implemented
1172  type(MPI_type) :: MPI_enreg_seq
1173 !arrays
1174  !integer,parameter :: g1(3)=[1,1,2],g2(3)=[-1,-2,-1]
1175  integer,parameter :: g1(3)=[-1,0,0], g2(3)=[1,0,0]
1176  integer,allocatable :: gbound(:,:), ktabr1(:), ktabr2(:), igfftg0(:)
1177  real(dp),parameter :: spinrot1(4)=(/one,zero,zero,one/),spinrot2(4)=(/one,zero,zero,one/)
1178  logical,allocatable :: mask(:)
1179  complex(dpc),allocatable :: results(:)
1180  complex(gwpc),allocatable :: rhotwg(:), wfn1(:), wfn2(:)
1181 
1182 ! *********************************************************************
1183 
1184  test_name = Ftest%get_name()
1185 
1186  nfft = Ftest%nfft; ndat = Ftest%ndat
1187  n1=Ftest%ngfft(1); n2=Ftest%ngfft(2); n3=Ftest%ngfft(3)
1188  n4=Ftest%ngfft(4); n5=Ftest%ngfft(5); n6=Ftest%ngfft(6)
1189 
1190  write(header,'(3(a,i2))')"rho_tw_g with use_padfft ",use_padfft,", map2sphere ",map2sphere,", ndat ",ndat
1191 
1192  ! TODO: zero-pad not available with SG2001 routines.
1193  not_implemented = (use_padfft==1.and.Ftest%ngfft(7) == 412)
1194 
1195  if (Ftest%available==0.or.not_implemented) then
1196    call Ftprof%init(test_name,0,0,0,0,zero,zero,zero)
1197    return
1198  end if
1199 
1200  call xomp_set_num_threads(Ftest%nthreads)
1201 
1202  if (Ftest%ngfft(7)/100 == FFT_FFTW3) call fftw3_set_nthreads(Ftest%nthreads)
1203 
1204  call initmpi_seq(MPI_enreg_seq)
1205  call init_distribfft_seq(MPI_enreg_seq%distribfft,'c',n2,n3,'all')
1206 
1207  itim1=1; itim2=1
1208  ABI_MALLOC(ktabr1,(nfft))
1209  ABI_MALLOC(ktabr2,(nfft))
1210 
1211  do ifft=1,nfft
1212    ktabr1(ifft)= ifft
1213    ktabr2(ifft)= ifft
1214  end do
1215 
1216  ABI_MALLOC(igfftg0,(osc_npw*map2sphere))
1217 
1218  if (map2sphere > 0) then
1219    ABI_MALLOC(mask,(osc_npw))
1220    call kgindex(igfftg0,osc_gvec,mask,MPI_enreg_seq,Ftest%ngfft,osc_npw)
1221    ABI_CHECK(ALL(mask)," FFT parallelism not supported")
1222    ABI_FREE(mask)
1223  end if
1224 
1225  ABI_MALLOC(gbound,(2*Ftest%mgfft+8,2*use_padfft))
1226  if (use_padfft == 1) call sphereboundary(gbound,istwfk1,osc_gvec,Ftest%mgfft,osc_npw)
1227 
1228  ABI_MALLOC(wfn1, (nfft*nspinor1*ndat))
1229  ABI_MALLOC(wfn2, (nfft*nspinor1*ndat))
1230 
1231  do dat=1,ndat
1232    do sprc=1,nspinor1
1233      ptr = 1 + (sprc-1)*nfft + (dat-1)*nfft*nspinor1
1234      call calc_ceigr(g1,nfft,nspinor1,Ftest%ngfft,wfn1(ptr:))
1235      call calc_ceigr(g2,nfft,nspinor1,Ftest%ngfft,wfn2(ptr:))
1236    end do
1237  end do
1238 
1239  ABI_MALLOC(rhotwg,(osc_npw*dim_rtwg1*ndat))
1240  ABI_MALLOC(results,(osc_npw*dim_rtwg1*ndat))
1241 
1242  call cwtime(cpu_time, wall_time, gflops, "start")
1243 
1244  do icall=1,NCALLS_FOR_TEST
1245      ifft = empty_cache(CACHE_KBSIZE)
1246      call rho_tw_g(nspinor1,osc_npw,nfft,ndat,Ftest%ngfft,map2sphere,use_padfft,igfftg0,gbound,&
1247         wfn1,itim1,ktabr1,ktabp1,spinrot1,&
1248         wfn2,itim2,ktabr2,ktabp2,spinrot2,&
1249         dim_rtwg1,rhotwg)
1250      ! Store results at the first call.
1251      if (icall==1) results = rhotwg
1252  end do
1253 
1254  call cwtime(cpu_time, wall_time, gflops, "stop")
1255  call Ftprof%init(test_name,Ftest%nthreads,NCALLS_FOR_TEST,Ftest%ndat,ftest%use_gpu, &
1256                   cpu_time,wall_time,gflops,results=results)
1257 
1258  ABI_FREE(ktabr1)
1259  ABI_FREE(ktabr2)
1260  ABI_FREE(igfftg0)
1261  ABI_FREE(gbound)
1262  ABI_FREE(rhotwg)
1263  ABI_FREE(wfn1)
1264  ABI_FREE(wfn2)
1265  ABI_FREE(results)
1266  call destroy_mpi_enreg(MPI_enreg_seq)
1267 
1268 end subroutine time_rhotwg