TABLE OF CONTENTS
- ABINIT/m_FFT_prof
- m_fft_mesh/FFT_prof_t
- m_fft_mesh/FFT_test_t
- m_FFT_prof/empty_cache
- m_FFT_prof/fft_test_free_0D
- m_FFT_prof/fft_test_free_1D
- m_FFT_prof/fft_test_init
- m_FFT_prof/fft_test_nullify
- m_FFT_prof/fft_test_print
- m_FFT_prof/fftprof_free_0D
- m_FFT_prof/fftprof_free_1D
- m_FFT_prof/fftprof_init
- m_FFT_prof/fftprof_ncalls_per_test
- m_FFT_prof/fftprof_print
- m_FFT_prof/name_of
- m_FFT_prof/prof_fourdp
- m_FFT_prof/prof_fourwf
- m_FFT_prof/prof_rhotwg
- m_FFT_prof/time_fftbox
- m_FFT_prof/time_fftu
- m_FFT_prof/time_fourdp
- m_FFT_prof/time_fourwf
- m_FFT_prof/time_rhotwg
ABINIT/m_FFT_prof [ Modules ]
NAME
m_FFT_prof
COPYRIGHT
Copyright (C) 2008-2018 ABINIT group (MG) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
14 #if defined HAVE_CONFIG_H 15 #include "config.h" 16 #endif 17 18 #include "abi_common.h" 19 20 MODULE m_FFT_prof 21 22 use defs_basis 23 use defs_abitypes 24 use m_xomp 25 use m_errors 26 use m_abicore 27 use m_fftw3 28 use m_fft 29 30 use m_numeric_tools, only : arth 31 use m_time, only : cwtime 32 use m_io_tools, only : open_file 33 use m_geometry, only : metric 34 use m_hide_blas, only : xcopy 35 use m_cgtools, only : set_istwfk 36 use m_fftcore, only : get_kg, print_ngfft, fftalg_info, kgindex, getng, sphereboundary 37 use m_fft_mesh, only : calc_eigr, calc_ceigr 38 use m_mpinfo, only : nullify_mpi_enreg, destroy_mpi_enreg, copy_mpi_enreg, initmpi_seq 39 use m_oscillators, only : rho_tw_g 40 41 42 implicit none 43 44 private 45 46 integer,private,parameter :: TNAME_LEN=100
m_fft_mesh/FFT_prof_t [ Types ]
[ Top ] [ m_fft_mesh ] [ Types ]
NAME
FFT_prof_t
FUNCTION
The results of the tests
SOURCE
108 type,public :: FFT_prof_t 109 integer :: ncalls 110 integer :: ndat 111 integer :: nthreads 112 real(dp) :: cpu_time 113 real(dp) :: wall_time 114 real(dp) :: gflops 115 character(len=TNAME_LEN) :: test_name 116 complex(dpc),allocatable :: results(:) 117 end type FFT_prof_t 118 119 public :: fftprof_init 120 public :: fftprof_free 121 public :: fftprof_print 122 123 interface fftprof_free 124 module procedure fftprof_free_0D 125 module procedure fftprof_free_1D 126 end interface fftprof_free
m_fft_mesh/FFT_test_t [ Types ]
[ Top ] [ m_fft_mesh ] [ Types ]
NAME
FFT_test_t
FUNCTION
Structure storing the set of paramenters passed to the FFT routines used in abinit (fourdp|fourwf).
SOURCE
60 type,public :: FFT_test_t 61 integer :: available=0 62 integer :: istwf_k=-1 63 integer :: mgfft=-1 64 integer :: ndat=-1 65 integer :: nfft=-1 66 integer :: nthreads=1 67 integer :: npw_k=-1 68 integer :: npw_kout=-1 69 integer :: paral_kgb=-1 70 71 real(dp) :: ecut=zero 72 73 integer :: ngfft(18)=-1 74 75 real(dp) :: kpoint(3) = [zero,zero,zero] 76 real(dp) :: rprimd(3,3),rmet(3,3) 77 real(dp) :: gprimd(3,3),gmet(3,3) 78 79 integer,allocatable :: kg_k(:,:) 80 integer,allocatable :: kg_kout(:,:) 81 integer,allocatable :: indpw_k(:) 82 83 type(MPI_type) :: MPI_enreg 84 end type FFT_test_t 85 86 public :: fft_test_init 87 public :: fft_test_nullify 88 public :: fft_test_free 89 public :: fft_test_print
m_FFT_prof/empty_cache [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
empty_cache
FUNCTION
Empty the memory cache
INPUTS
OUTPUT
PARENTS
SOURCE
2165 function empty_cache(kbsize) result(fake) 2166 2167 2168 !This section has been created automatically by the script Abilint (TD). 2169 !Do not modify the following lines by hand. 2170 #undef ABI_FUNC 2171 #define ABI_FUNC 'empty_cache' 2172 !End of the abilint section 2173 2174 implicit none 2175 2176 !Arguments ----------------------------------- 2177 !scalars 2178 integer,intent(in) :: kbsize 2179 integer :: fake 2180 2181 !Local variables------------------------------- 2182 !scalars 2183 integer :: sz 2184 !arrays 2185 real(dp),allocatable :: chunk(:) 2186 2187 ! ********************************************************************* 2188 2189 if (kbsize <= 0) RETURN 2190 2191 sz = (100. * kbsize) / dp 2192 2193 ABI_MALLOC(chunk,(sz)) 2194 call random_number(chunk) 2195 fake = SUM(chunk) ! Need a result, otherwise some smart compiler could skip the call. 2196 ABI_FREE(chunk) 2197 2198 !---------------------------------------------------------------------- 2199 2200 end function empty_cache
m_FFT_prof/fft_test_free_0D [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
fft_test_free_0D
FUNCTION
Destruction method for the FFT_test_t structured datatype.
INPUTS
OUTPUT
PARENTS
m_fft_prof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
321 subroutine fft_test_free_0D(Ftest) 322 323 324 !This section has been created automatically by the script Abilint (TD). 325 !Do not modify the following lines by hand. 326 #undef ABI_FUNC 327 #define ABI_FUNC 'fft_test_free_0D' 328 !End of the abilint section 329 330 implicit none 331 332 !Arguments ----------------------------------- 333 !scalars 334 type(FFT_test_t),intent(inout) :: Ftest 335 336 ! ********************************************************************* 337 338 !@FFT_test_t 339 Ftest%available=0 340 Ftest%istwf_k=-1 341 Ftest%mgfft=-1 342 Ftest%ndat=-1 343 Ftest%nfft=-1 344 Ftest%nthreads=1 345 Ftest%npw_k=-1 346 Ftest%npw_kout=-1 347 Ftest%paral_kgb=-1 348 349 Ftest%ecut=zero 350 351 Ftest%ngfft=-1 352 353 Ftest%kpoint =zero 354 Ftest%rprimd =zero 355 Ftest%rmet =zero 356 Ftest%gprimd =zero 357 Ftest%gmet =zero 358 359 if (allocated(Ftest%indpw_k)) then 360 ABI_FREE(Ftest%indpw_k) 361 end if 362 if (allocated(Ftest%kg_k)) then 363 ABI_FREE(Ftest%kg_k) 364 end if 365 if (allocated(Ftest%kg_kout)) then 366 ABI_FREE(Ftest%kg_kout) 367 end if 368 369 call destroy_mpi_enreg(Ftest%MPI_enreg) 370 371 end subroutine fft_test_free_0D
m_FFT_prof/fft_test_free_1D [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
fft_test_free_1D
FUNCTION
Destruction method for the FFT_test_t structured datatype.
INPUTS
OUTPUT
PARENTS
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
395 subroutine fft_test_free_1D(Ftest) 396 397 398 !This section has been created automatically by the script Abilint (TD). 399 !Do not modify the following lines by hand. 400 #undef ABI_FUNC 401 #define ABI_FUNC 'fft_test_free_1D' 402 !End of the abilint section 403 404 implicit none 405 406 !Arguments ----------------------------------- 407 !scalars 408 type(FFT_test_t),intent(inout) :: Ftest(:) 409 410 !Local variables------------------------------- 411 !scalars 412 integer :: ii 413 414 ! ********************************************************************* 415 416 do ii=LBOUND(Ftest,DIM=1),UBOUND(Ftest,DIM=1) 417 call fft_test_free_0D(Ftest(ii)) 418 end do 419 420 end subroutine fft_test_free_1D
m_FFT_prof/fft_test_init [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
fft_test_init
FUNCTION
Creation method for the FFT_test_t structured datatype.
INPUTS
OUTPUT
PARENTS
fftprof,m_fft_prof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
177 subroutine fft_test_init(Ftest,fft_setup,kpoint,ecut,boxcutmin,rprimd,nsym,symrel,MPI_enreg_in) 178 179 180 !This section has been created automatically by the script Abilint (TD). 181 !Do not modify the following lines by hand. 182 #undef ABI_FUNC 183 #define ABI_FUNC 'fft_test_init' 184 !End of the abilint section 185 186 implicit none 187 188 !Arguments ----------------------------------- 189 !scalars 190 integer,intent(in) :: nsym 191 real(dp),intent(in) :: ecut,boxcutmin 192 type(FFT_test_t),intent(inout) :: Ftest 193 type(MPI_type),intent(in) :: MPI_enreg_in 194 !arrays 195 integer,intent(in) :: fft_setup(5),symrel(3,3,nsym) 196 real(dp),intent(in) :: kpoint(3),rprimd(3,3) 197 198 !Local variables------------------------------- 199 !scalars 200 integer :: fftalg,fftcache,ndat 201 real(dp) :: ucvol 202 !arrays 203 logical,allocatable :: mask(:) 204 real(dp),parameter :: k0(3)=(/zero,zero,zero/) 205 real(dp) :: gmet(3,3),gprimd(3,3),rmet(3,3) 206 207 ! ************************************************************************* 208 209 call metric(gmet,gprimd,-1,rmet,rprimd,ucvol) 210 211 !@FFT_test_t 212 Ftest%rprimd = rprimd 213 Ftest%rmet = rmet 214 Ftest%gprimd = gprimd 215 Ftest%gmet = gmet 216 Ftest%ecut = ecut 217 218 fftalg = fft_setup(1) 219 fftcache = fft_setup(2) 220 ndat = fft_setup(3) 221 222 Ftest%nthreads = fft_setup(4) 223 Ftest%available = fft_setup(5) 224 225 Ftest%paral_kgb = 0 226 Ftest%kpoint = kpoint 227 Ftest%ndat = ndat 228 229 Ftest%istwf_k = set_istwfk(kpoint) 230 231 call get_kg(Ftest%kpoint,Ftest%istwf_k,ecut,gmet,Ftest%npw_k,Ftest%kg_k) 232 call get_kg(Ftest%kpoint,Ftest%istwf_k,ecut,gmet,Ftest%npw_kout,Ftest%kg_kout) 233 234 call copy_mpi_enreg(MPI_enreg_in,Ftest%MPI_enreg) 235 236 Ftest%ngfft(7) = fftalg 237 Ftest%ngfft(8) = fftcache 238 239 ! Fill part of ngfft 240 call getng(boxcutmin,ecut,gmet,k0,Ftest%MPI_enreg%me_fft,Ftest%mgfft,Ftest%nfft,Ftest%ngfft,Ftest%MPI_enreg%nproc_fft,nsym,& 241 & Ftest%MPI_enreg%paral_kgb,symrel, unit=dev_null) 242 243 call init_distribfft(Ftest%MPI_enreg%distribfft,'c',Ftest%MPI_enreg%nproc_fft,Ftest%ngfft(2),Ftest%ngfft(3)) 244 245 ! Compute the index of each plane wave in the FFT grid. 246 ABI_MALLOC(Ftest%indpw_k,(Ftest%npw_k)) 247 248 ABI_MALLOC(mask,(Ftest%npw_k)) 249 call kgindex(Ftest%indpw_k,Ftest%kg_k,mask,Ftest%MPI_enreg,Ftest%ngfft,Ftest%npw_k) 250 ABI_CHECK(ALL(mask),"FFT parallelism not supported in fftprof") 251 ABI_FREE(mask) 252 253 end subroutine fft_test_init
m_FFT_prof/fft_test_nullify [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
fft_test_nullify
FUNCTION
Nullify all pointers.
INPUTS
PARENTS
fftprof,m_fft_prof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
276 subroutine fft_test_nullify(Ftest) 277 278 279 !This section has been created automatically by the script Abilint (TD). 280 !Do not modify the following lines by hand. 281 #undef ABI_FUNC 282 #define ABI_FUNC 'fft_test_nullify' 283 !End of the abilint section 284 285 implicit none 286 287 !Arguments ----------------------------------- 288 !scalars 289 type(FFT_test_t),intent(inout) :: Ftest 290 291 ! ************************************************************************* 292 293 ! @FFT_test_t 294 call nullify_mpi_enreg(Ftest%MPI_enreg) 295 296 end subroutine fft_test_nullify
m_FFT_prof/fft_test_print [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
fft_test_print
FUNCTION
Printout of the FFT_test_t structured datatype.
INPUTS
OUTPUT
PARENTS
fftprof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
445 subroutine fft_test_print(Ftest,header,unit,mode_paral,prtvol) 446 447 448 !This section has been created automatically by the script Abilint (TD). 449 !Do not modify the following lines by hand. 450 #undef ABI_FUNC 451 #define ABI_FUNC 'fft_test_print' 452 !End of the abilint section 453 454 implicit none 455 456 !Arguments ----------------------------------- 457 !scalars 458 integer,optional,intent(in) :: unit,prtvol 459 character(len=4),optional,intent(in) :: mode_paral 460 character(len=*),optional,intent(in) :: header 461 type(FFT_test_t),intent(in) :: Ftest 462 463 !Local variables------------------------------- 464 !scalars 465 integer :: my_unt,my_prtvol 466 character(len=4) :: my_mode 467 character(len=500) :: msg 468 ! ********************************************************************* 469 470 my_unt =std_out; if (PRESENT(unit )) my_unt =unit 471 my_prtvol=0 ; if (PRESENT(prtvol )) my_prtvol=prtvol 472 my_mode ='COLL' ; if (PRESENT(mode_paral)) my_mode =mode_paral 473 474 !msg=' ==== Info on the FFT test object ==== ' 475 if (PRESENT(header)) then 476 msg=' ==== '//TRIM(ADJUSTL(header))//' ==== ' 477 call wrtout(my_unt,msg,my_mode) 478 end if 479 480 !TODO add additional info 481 write(msg,'(a,i3)')"FFT setup for fftalg ",Ftest%ngfft(7) 482 call print_ngfft(Ftest%ngfft,header=msg,unit=my_unt,mode_paral="COLL") 483 484 end subroutine fft_test_print
m_FFT_prof/fftprof_free_0D [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
fftprof_free_0D
FUNCTION
Destruction method for the FFT_prof_t structured datatype.
INPUTS
OUTPUT
PARENTS
m_fft_prof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
622 subroutine fftprof_free_0D(Ftprof) 623 624 625 !This section has been created automatically by the script Abilint (TD). 626 !Do not modify the following lines by hand. 627 #undef ABI_FUNC 628 #define ABI_FUNC 'fftprof_free_0D' 629 !End of the abilint section 630 631 implicit none 632 633 !Arguments ----------------------------------- 634 !scalars 635 type(FFT_prof_t),intent(inout) :: Ftprof 636 637 ! ********************************************************************* 638 639 !@FFT_prof_t 640 Ftprof%ncalls=0 641 Ftprof%nthreads=0 642 Ftprof%cpu_time=zero 643 Ftprof%wall_time=zero 644 Ftprof%test_name = "None" 645 646 if (allocated(Ftprof%results)) then 647 ABI_FREE(Ftprof%results) 648 end if 649 650 end subroutine fftprof_free_0D
m_FFT_prof/fftprof_free_1D [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
fftprof_free_1D
FUNCTION
Destruction method for the FFT_prof_t structured datatype.
INPUTS
OUTPUT
PARENTS
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
674 subroutine fftprof_free_1D(Ftprof) 675 676 677 !This section has been created automatically by the script Abilint (TD). 678 !Do not modify the following lines by hand. 679 #undef ABI_FUNC 680 #define ABI_FUNC 'fftprof_free_1D' 681 !End of the abilint section 682 683 implicit none 684 685 !Arguments ----------------------------------- 686 !scalars 687 type(FFT_prof_t),intent(inout) :: Ftprof(:) 688 689 !Local variables------------------------------- 690 !scalars 691 integer :: ii 692 ! ********************************************************************* 693 694 !@FFT_prof_t 695 do ii=LBOUND(Ftprof,DIM=1),UBOUND(Ftprof,DIM=1) 696 call fftprof_free_0D(Ftprof(ii)) 697 end do 698 699 end subroutine fftprof_free_1D
m_FFT_prof/fftprof_init [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
fftprof_init
FUNCTION
Creation method for the FFT_prof_t structured datatype.
INPUTS
gflops = Gigaflops
OUTPUT
PARENTS
m_fft_prof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
558 subroutine fftprof_init(Ftprof,test_name,nthreads,ncalls,ndat,cpu_time,wall_time,gflops,results) 559 560 561 !This section has been created automatically by the script Abilint (TD). 562 !Do not modify the following lines by hand. 563 #undef ABI_FUNC 564 #define ABI_FUNC 'fftprof_init' 565 !End of the abilint section 566 567 implicit none 568 569 !Arguments ----------------------------------- 570 !scalars 571 integer,intent(in) :: ncalls,nthreads,ndat 572 real(dp),intent(in) :: cpu_time,wall_time,gflops 573 character(len=*),intent(in) :: test_name 574 type(FFT_prof_t),intent(out) :: Ftprof 575 !arrays 576 complex(dpc),optional,intent(in) :: results(:) 577 578 ! ************************************************************************* 579 580 !@FFT_prof_t 581 Ftprof%ncalls = ncalls 582 Ftprof%nthreads = nthreads 583 Ftprof%ndat = ndat 584 Ftprof%cpu_time = cpu_time 585 Ftprof%wall_time = wall_time 586 Ftprof%gflops = gflops 587 Ftprof%test_name = test_name 588 589 if (PRESENT(results)) then 590 if (allocated(Ftprof%results)) then 591 ABI_FREE(Ftprof%results) 592 end if 593 ABI_MALLOC(Ftprof%results,(SIZE(results))) 594 Ftprof%results = results 595 end if 596 597 end subroutine fftprof_init
m_FFT_prof/fftprof_ncalls_per_test [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
fftprof_ncalls_per_test
FUNCTION
Helper function used to set the number of calls to be used in each time_* routine.
INPUTS
ncalls=Number of calls to be used.
SIDE EFFECTS
NCALLS_FOR_TEST = ncalls
PARENTS
fftprof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
1436 subroutine fftprof_ncalls_per_test(ncalls) 1437 1438 1439 !This section has been created automatically by the script Abilint (TD). 1440 !Do not modify the following lines by hand. 1441 #undef ABI_FUNC 1442 #define ABI_FUNC 'fftprof_ncalls_per_test' 1443 !End of the abilint section 1444 1445 implicit none 1446 1447 !Arguments ----------------------------------- 1448 !scalars 1449 integer,intent(in) :: ncalls 1450 1451 ! ********************************************************************* 1452 1453 NCALLS_FOR_TEST = ncalls 1454 1455 end subroutine fftprof_ncalls_per_test
m_FFT_prof/fftprof_print [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
fftprof_print
FUNCTION
Printout of the FFT_prof_t structured datatype.
INPUTS
OUTPUT
PARENTS
fftprof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
724 subroutine fftprof_print(Fprof,header,unit,mode_paral,prtvol) 725 726 727 !This section has been created automatically by the script Abilint (TD). 728 !Do not modify the following lines by hand. 729 #undef ABI_FUNC 730 #define ABI_FUNC 'fftprof_print' 731 !End of the abilint section 732 733 implicit none 734 735 !Arguments ----------------------------------- 736 !scalars 737 integer,optional,intent(in) :: unit,prtvol 738 character(len=4),optional,intent(in) :: mode_paral 739 character(len=*),optional,intent(in) :: header 740 type(FFT_prof_t),intent(in) :: Fprof(:) 741 742 !Local variables------------------------------- 743 !scalars 744 integer :: my_unt,my_prtvol,ncalls 745 integer :: field1_w,ii,ref_lib !ifft 746 real(dp) :: mabs_err,mean_err,check_mabs_err,check_mean_err 747 real(dp) :: ref_wtime,para_eff 748 character(len=4) :: my_mode 749 character(len=500) :: ofmt,hfmt,nafmt,msg 750 751 ! ********************************************************************* 752 753 my_unt =std_out; if (PRESENT(unit )) my_unt =unit 754 my_prtvol=0 ; if (PRESENT(prtvol )) my_prtvol=prtvol 755 my_mode ='COLL' ; if (PRESENT(mode_paral)) my_mode =mode_paral 756 757 msg='==== Info on the FFT_prof_t object ====' 758 if (PRESENT(header)) msg='==== '//TRIM(ADJUSTL(header))//' ====' 759 760 call wrtout(my_unt,ch10//REPEAT("=",LEN_TRIM(msg))) 761 call wrtout(my_unt,msg,my_mode) 762 call wrtout(my_unt,REPEAT("=",LEN_TRIM(msg))) 763 764 field1_w=0 ! Width of the field used to print key names. 765 do ii=1,SIZE(Fprof) 766 field1_w = MAX(field1_w, LEN_TRIM(Fprof(ii)%test_name)) 767 end do 768 769 ! TODO Add gflops 770 if (field1_w==0) RETURN 771 field1_w = field1_w + 2 ! To account for ". " 772 write(ofmt,*)"(a",field1_w,",2x,2(f7.4,4x),1x,i2,1x,a,i3,a,1x,i0,4x,2(es9.2,3x))" 773 774 write(hfmt,*)"(a",field1_w,",2x,a)" 775 write(std_out,hfmt)" Library ","CPU-time WALL-time nthreads ncalls Max_|Err| <|Err|>" 776 ! 777 ! Find reference library. 778 ref_lib=0 779 do ii=1,SIZE(Fprof) 780 if (Fprof(ii)%ncalls>0) then 781 ref_lib = ii 782 EXIT 783 end if 784 end do 785 !ref_lib=3 786 ! 787 ! Write timing analysis and error wrt reference library if available. 788 check_mabs_err=zero; check_mean_err=zero 789 do ii=1,SIZE(Fprof) 790 ncalls = Fprof(ii)%ncalls 791 if (ncalls>0) then 792 mabs_err = zero; mean_err=zero 793 if (ref_lib>0) then 794 mabs_err = MAXVAL( ABS(Fprof(ii)%results - Fprof(ref_lib)%results) ) 795 mean_err = SUM( ABS(Fprof(ii)%results - Fprof(ref_lib)%results) ) / SIZE(Fprof(ref_lib)%results) 796 ! Relative error is not a good estimator because some components are close to zero within machine accuracy. 797 !mean_err = 100 * MAXVAL( ABS(Fprof(ii)%results - Fprof(1)%results)/ ABS(Fprof(1)%results )) 798 !ifft = imax_loc( ABS(Fprof(ii)%results - Fprof(1)%results)/ ABS(Fprof(1)%results) ) 799 !write(std_out,*) Fprof(ii)%results(ifft),Fprof(1)%results(ifft) 800 end if 801 if (Fprof(ii)%nthreads==1) ref_wtime = Fprof(ii)%wall_time 802 para_eff = 100 * ref_wtime / ( Fprof(ii)%wall_time * Fprof(ii)%nthreads) 803 write(std_out,ofmt)& 804 & "- "//Fprof(ii)%test_name,Fprof(ii)%cpu_time/ncalls,Fprof(ii)%wall_time/ncalls,& 805 & Fprof(ii)%nthreads,"(",NINT(para_eff),"%)",ncalls,mabs_err,mean_err 806 check_mabs_err = MAX(check_mabs_err, mabs_err) 807 check_mean_err = MAX(check_mean_err, mean_err) 808 else 809 write(nafmt,*)"(a",field1_w,",2x,a)" 810 write(std_out,nafmt)"- "//Fprof(ii)%test_name," N/A N/A N/A N/A N/A N/A" 811 end if 812 end do 813 814 if (ref_lib>0) then 815 write(std_out,'(/,2(a,es9.2),2a)')& 816 & " Consistency check: MAX(Max_|Err|) = ",check_mabs_err,& 817 & ", Max(<|Err|>) = ",check_mean_err,", reference_lib: ",TRIM(Fprof(ref_lib)%test_name) 818 end if 819 write(std_out,*) 820 821 end subroutine fftprof_print
m_FFT_prof/name_of [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
name_of
FUNCTION
Returns a string with info on the test.
INPUTS
OUTPUT
PARENTS
SOURCE
504 function name_of(Ftest) 505 506 507 !This section has been created automatically by the script Abilint (TD). 508 !Do not modify the following lines by hand. 509 #undef ABI_FUNC 510 #define ABI_FUNC 'name_of' 511 !End of the abilint section 512 513 implicit none 514 515 !Arguments ----------------------------------- 516 !scalars 517 character(len=TNAME_LEN) :: name_of 518 type(FFT_test_t),intent(in) :: Ftest 519 520 !Local variables------------------------------- 521 !scalars 522 character(len=TNAME_LEN) :: library_name,cplex_mode,padding_mode 523 524 ! ********************************************************************* 525 526 call fftalg_info(Ftest%ngfft(7),library_name,cplex_mode,padding_mode) 527 !name_of = TRIM(library_name)//"; "//TRIM(cplex_mode)//"; "//TRIM(padding_mode) 528 529 write(name_of,'(i3)')Ftest%ngfft(7) 530 name_of = TRIM(library_name)//" ("//TRIM(name_of)//")" 531 532 end function name_of
m_FFT_prof/prof_fourdp [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
prof_fourdp
FUNCTION
Profile fourdp
INPUTS
OUTPUT
PARENTS
fftprof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
1818 subroutine prof_fourdp(fft_setups,isign,cplex,necut,ecut_arth,boxcutmin,rprimd,nsym,symrel,MPI_enreg_in) 1819 1820 1821 !This section has been created automatically by the script Abilint (TD). 1822 !Do not modify the following lines by hand. 1823 #undef ABI_FUNC 1824 #define ABI_FUNC 'prof_fourdp' 1825 !End of the abilint section 1826 1827 implicit none 1828 1829 !Arguments ----------------------------------- 1830 !scalars 1831 integer,intent(in) :: nsym,isign,cplex,necut 1832 real(dp),intent(in) :: boxcutmin 1833 type(MPI_type),intent(in) :: MPI_enreg_in 1834 !arrays 1835 integer,intent(in) :: fft_setups(:,:),symrel(3,3,nsym) 1836 real(dp),intent(in) :: ecut_arth(2) 1837 real(dp),intent(in) :: rprimd(3,3) 1838 1839 !Local variables------------------------------- 1840 !scalars 1841 integer :: iec,nsetups,set,funt 1842 type(FFT_test_t) :: Ftest 1843 type(FFT_prof_t) :: Ftprof 1844 character(len=500) :: msg,frm,header 1845 character(len=fnlen) :: fname 1846 !arrays 1847 integer :: ngfft_ecut(18,necut) 1848 real(dp),parameter :: k_gamma(3)=zero 1849 real(dp) :: ecut_list(necut) 1850 real(dp),allocatable :: prof_res(:,:,:) 1851 1852 ! ********************************************************************* 1853 1854 nsetups = SIZE(fft_setups,DIM=2) 1855 ecut_list = arth(ecut_arth(1),ecut_arth(2),necut) 1856 ! 1857 ! Open file and write header with info. 1858 write(fname,'(2(a,i1))')"PROF_fourdp_cplex",cplex,"_isign",isign 1859 if (open_file(fname,msg,newunit=funt) /= 0) then 1860 MSG_ERROR(msg) 1861 end if 1862 1863 write(msg,'(2(a,i0))')"Benchmark: routine = fourdp, cplex =",cplex,", isign=",isign 1864 write(std_out,'(a)')" Running "//TRIM(msg) 1865 1866 write(funt,'(a)')"# "//TRIM(msg) 1867 do set=1,nsetups 1868 write(funt,'(a,5(a,i0))') "#",& 1869 & " fftalg = " ,fft_setups(1,set),& 1870 & ", fftcache = " ,fft_setups(2,set),& 1871 & ", ndat = " ,fft_setups(3,set),& 1872 & ", nthreads = " ,fft_setups(4,set),& 1873 & ", available = ",fft_setups(5,set) 1874 end do 1875 1876 ABI_MALLOC(prof_res,(2,necut,nsetups)) 1877 1878 do set=1,nsetups 1879 ! 1880 do iec=1,necut 1881 call fft_test_nullify(Ftest) 1882 call fft_test_init(Ftest,fft_setups(:,set),k_gamma,ecut_list(iec),boxcutmin,rprimd,nsym,symrel,MPI_enreg_in) 1883 1884 call time_fourdp(Ftest,isign,cplex,header,Ftprof) 1885 1886 prof_res(1,iec,set) = Ftprof%cpu_time /Ftprof%ncalls 1887 prof_res(2,iec,set) = Ftprof%wall_time/Ftprof%ncalls 1888 ! 1889 ! Save FFT divisions. 1890 if (set==1) ngfft_ecut(:,iec) = Ftest%ngfft 1891 1892 call fftprof_free(Ftprof) 1893 call fft_test_free(Ftest) 1894 end do 1895 ! 1896 end do 1897 ! 1898 ! Write the wall-time as a function of ecut. 1899 write(frm,*)"(f7.1,3i4,",nsetups,"(f7.4))" 1900 do iec=1,necut 1901 write(funt,frm) ecut_list(iec),ngfft_ecut(4:6,iec),prof_res(2,iec,:) 1902 end do 1903 1904 close(funt) 1905 ABI_FREE(prof_res) 1906 1907 end subroutine prof_fourdp
m_FFT_prof/prof_fourwf [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
prof_fourwf
FUNCTION
profile fourwf
INPUTS
OUTPUT
PARENTS
fftprof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
1932 subroutine prof_fourwf(fft_setups,cplex,option,kpoint,necut,ecut_arth,boxcutmin,rprimd,nsym,symrel,MPI_enreg_in) 1933 1934 1935 !This section has been created automatically by the script Abilint (TD). 1936 !Do not modify the following lines by hand. 1937 #undef ABI_FUNC 1938 #define ABI_FUNC 'prof_fourwf' 1939 !End of the abilint section 1940 1941 implicit none 1942 1943 !Arguments ----------------------------------- 1944 !scalars 1945 integer,intent(in) :: nsym,cplex,necut,option 1946 real(dp),intent(in) :: boxcutmin 1947 type(MPI_type),intent(in) :: MPI_enreg_in 1948 !arrays 1949 integer,intent(in) :: fft_setups(:,:),symrel(3,3,nsym) 1950 real(dp),intent(in) :: ecut_arth(2) 1951 real(dp),intent(in) :: kpoint(3),rprimd(3,3) 1952 1953 !Local variables------------------------------- 1954 !scalars 1955 integer :: iec,nsetups,set,funt,istwf_k 1956 type(FFT_test_t) :: Ftest 1957 type(FFT_prof_t) :: Ftprof 1958 character(len=500) :: msg,frm,header 1959 character(len=fnlen) :: fname 1960 !arrays 1961 integer :: ngfft_ecut(18,necut) 1962 real(dp) :: ecut_list(necut) 1963 real(dp),allocatable :: prof_res(:,:,:) 1964 1965 ! ********************************************************************* 1966 1967 nsetups = SIZE(fft_setups,DIM=2) 1968 ecut_list = arth(ecut_arth(1),ecut_arth(2),necut) 1969 istwf_k = set_istwfk(kpoint) 1970 ! 1971 ! Open file and write header with info. 1972 write(fname,'(3(a,i1))')"PROF_fourwf_cplex",cplex,"_option",option,"_istwfk",istwf_k 1973 if (open_file(fname,msg,newunit=funt) /= 0) then 1974 MSG_ERROR(msg) 1975 end if 1976 1977 write(msg,'(3(a,i1))')"Benchmark: routine = fourwf, cplex = ",cplex,", option= ",option,", istwfk= ",istwf_k 1978 write(std_out,'(a)')" Running "//TRIM(msg) 1979 1980 write(funt,'(a)')"# "//TRIM(msg) 1981 do set=1,nsetups 1982 write(funt,'(a,5(a,i0))') "#",& 1983 & " fftalg = " ,fft_setups(1,set),& 1984 & ", fftcache = " ,fft_setups(2,set),& 1985 & ", ndat = " ,fft_setups(3,set),& 1986 & ", nthreads = " ,fft_setups(4,set),& 1987 & ", available = ",fft_setups(5,set) 1988 end do 1989 1990 ABI_MALLOC(prof_res,(2,necut,nsetups)) 1991 1992 do set=1,nsetups 1993 ! 1994 do iec=1,necut 1995 call fft_test_nullify(Ftest) 1996 call fft_test_init(Ftest,fft_setups(:,set),kpoint,ecut_list(iec),boxcutmin,rprimd,nsym,symrel,MPI_enreg_in) 1997 1998 call time_fourwf(Ftest,cplex,option,header,Ftprof) 1999 2000 prof_res(1,iec,set) = Ftprof%cpu_time /Ftprof%ncalls 2001 prof_res(2,iec,set) = Ftprof%wall_time/Ftprof%ncalls 2002 ! 2003 ! Save FFT divisions. 2004 if (set==1) ngfft_ecut(:,iec) = Ftest%ngfft 2005 2006 call fftprof_free(Ftprof) 2007 call fft_test_free(Ftest) 2008 end do 2009 ! 2010 end do 2011 ! 2012 ! Write the wall-time as a function of ecut. 2013 write(frm,*)"(f7.1,3i4,",nsetups,"(f7.4))" 2014 do iec=1,necut 2015 write(funt,frm) ecut_list(iec),ngfft_ecut(4:6,iec),prof_res(2,iec,:) 2016 end do 2017 2018 close(funt) 2019 ABI_FREE(prof_res) 2020 2021 end subroutine prof_fourwf
m_FFT_prof/prof_rhotwg [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
prof_rhotwg
FUNCTION
profile rhotwg
INPUTS
OUTPUT
PARENTS
fftprof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
2046 subroutine prof_rhotwg(fft_setups,map2sphere,use_padfft,necut,ecut_arth,osc_ecut,boxcutmin,& 2047 & rprimd,nsym,symrel,gmet,MPI_enreg_in) 2048 2049 2050 !This section has been created automatically by the script Abilint (TD). 2051 !Do not modify the following lines by hand. 2052 #undef ABI_FUNC 2053 #define ABI_FUNC 'prof_rhotwg' 2054 !End of the abilint section 2055 2056 implicit none 2057 2058 !Arguments ----------------------------------- 2059 !scalars 2060 integer,intent(in) :: nsym,necut,map2sphere,use_padfft 2061 real(dp),intent(in) :: boxcutmin,osc_ecut 2062 type(MPI_type),intent(in) :: MPI_enreg_in 2063 !arrays 2064 integer,intent(in) :: fft_setups(:,:),symrel(3,3,nsym) 2065 real(dp),intent(in) :: ecut_arth(2) 2066 real(dp),intent(in) :: rprimd(3,3),gmet(3,3) 2067 2068 !Local variables------------------------------- 2069 !scalars 2070 integer :: iec,nsetups,set,funt,osc_npw 2071 type(FFT_test_t) :: Ftest 2072 type(FFT_prof_t) :: Ftprof 2073 character(len=500) :: msg,frm,header 2074 character(len=fnlen) :: fname 2075 !arrays 2076 integer,allocatable :: osc_gvec(:,:) 2077 integer :: ngfft_ecut(18,necut) 2078 real(dp),parameter :: k_gamma(3)=zero 2079 real(dp) :: ecut_list(necut) 2080 real(dp),allocatable :: prof_res(:,:,:) 2081 2082 ! ********************************************************************* 2083 2084 nsetups = SIZE(fft_setups,DIM=2) 2085 ecut_list = arth(ecut_arth(1),ecut_arth(2),necut) 2086 ! 2087 ! Open file and write header with info. 2088 write(fname,'(2(a,i1))')"PROF_rhotwg_map2sphere",map2sphere,"_use_padfft",use_padfft 2089 2090 if (open_file(fname,msg,newunit=funt) /= 0) then 2091 MSG_ERROR(msg) 2092 end if 2093 2094 write(msg,'(2(a,i0),a,f5.1)')& 2095 & "Benchmark: routine = rho_tw_g, map2sphere = ",map2sphere,", use_padfft = ",use_padfft,", osc_ecut = ",osc_ecut 2096 write(std_out,'(a)')" Running "//TRIM(msg) 2097 2098 write(funt,'(a)')"# "//TRIM(msg) 2099 do set=1,nsetups 2100 write(funt,'(a,5(a,i0))') "#",& 2101 & " fftalg = " ,fft_setups(1,set),& 2102 & ", fftcache = " ,fft_setups(2,set),& 2103 & ", ndat = " ,fft_setups(3,set),& 2104 & ", nthreads = " ,fft_setups(4,set),& 2105 & ", available = ",fft_setups(5,set) 2106 end do 2107 2108 call get_kg((/zero,zero,zero/),1,osc_ecut,gmet,osc_npw,osc_gvec) 2109 ! TODO should reorder by shells to be consistent with the GW part! 2110 ! Moreover I guess this ordering is more efficient when we have 2111 ! to map the box to the G-sphere! 2112 2113 ABI_MALLOC(prof_res,(2,necut,nsetups)) 2114 2115 do set=1,nsetups 2116 ! 2117 do iec=1,necut 2118 call fft_test_nullify(Ftest) 2119 call fft_test_init(Ftest,fft_setups(:,set),k_gamma,ecut_list(iec),boxcutmin,rprimd,nsym,symrel,MPI_enreg_in) 2120 2121 call time_rhotwg(Ftest,map2sphere,use_padfft,osc_npw,osc_gvec,header,Ftprof) 2122 2123 prof_res(1,iec,set) = Ftprof%cpu_time /Ftprof%ncalls 2124 prof_res(2,iec,set) = Ftprof%wall_time/Ftprof%ncalls 2125 ! 2126 ! Save FFT divisions. 2127 if (set==1) ngfft_ecut(:,iec) = Ftest%ngfft 2128 2129 call fftprof_free(Ftprof) 2130 call fft_test_free(Ftest) 2131 end do 2132 ! 2133 end do 2134 ! 2135 ! Write the wall-time as a function of ecut. 2136 write(frm,*)"(f7.1,3i4,",nsetups,"(f7.4))" 2137 do iec=1,necut 2138 write(funt,frm) ecut_list(iec),ngfft_ecut(4:6,iec),prof_res(2,iec,:) 2139 end do 2140 2141 close(funt) 2142 ABI_FREE(prof_res) 2143 ABI_FREE(osc_gvec) 2144 2145 end subroutine prof_rhotwg
m_FFT_prof/time_fftbox [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
time_fftbox
FUNCTION
Profiling of the fftbox_[io]p routines.
INPUTS
OUTPUT
PARENTS
fftprof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
992 subroutine time_fftbox(Ftest,isign,inplace,header,Ftprof) 993 994 995 !This section has been created automatically by the script Abilint (TD). 996 !Do not modify the following lines by hand. 997 #undef ABI_FUNC 998 #define ABI_FUNC 'time_fftbox' 999 !End of the abilint section 1000 1001 implicit none 1002 1003 !Arguments ----------------------------------- 1004 !scalars 1005 integer,intent(in) :: isign,inplace 1006 character(len=500),intent(inout) :: header !vz_i 1007 type(FFT_test_t),intent(inout) :: Ftest 1008 type(FFT_prof_t),intent(out) :: Ftprof 1009 1010 !Local variables------------------------------- 1011 !scalars 1012 integer,parameter :: nspinor1=1 1013 integer :: icall,i1,i2,i3,n1,n2,n3,ifft,ndat,nfft,dat,padat 1014 real(dp) :: cpu_time,wall_time,gflops,gsq 1015 character(len=500) :: msg 1016 character(len=TNAME_LEN) :: test_name 1017 type(fftbox_plan3_t) :: plan 1018 !arrays 1019 integer,parameter :: g0(3) = (/1,-2,1/) 1020 integer :: gg(3) 1021 complex(dpc),allocatable :: ffc(:),ggc(:),results(:) 1022 ! ********************************************************************* 1023 1024 test_name = name_of(Ftest) 1025 1026 if (Ftest%available==0) then 1027 call fftprof_init(Ftprof,test_name,0,0,0,zero,zero,zero) 1028 RETURN 1029 end if 1030 1031 call xomp_set_num_threads(Ftest%nthreads) 1032 1033 if (Ftest%ngfft(7)/100 == FFT_FFTW3) then 1034 call fftw3_set_nthreads(Ftest%nthreads) 1035 end if 1036 1037 n1=Ftest%ngfft(1); n2=Ftest%ngfft(2); n3=Ftest%ngfft(3) 1038 nfft = Ftest%nfft 1039 ndat = Ftest%ndat 1040 1041 write(header,'(3(a,i2))')" fftbox with isign ",isign,", in-place ",inplace,", ndat ",ndat 1042 1043 ABI_MALLOC(ffc,(nfft*ndat)) 1044 ABI_MALLOC(ggc,(nfft*ndat)) 1045 ABI_MALLOC(results,(nfft*ndat)) 1046 ffc=czero; ggc=czero; results=czero 1047 1048 if (isign==-1) then 1049 call calc_ceigr(g0,nfft,nspinor1,Ftest%ngfft,ffc) 1050 else if (isign==1) then 1051 ifft=0 1052 do i3=1,n3 1053 gg(3)=i3-1; if (i3>1+n3/2) gg(3)=i3-n3-1 ! TODO recheck this 1054 do i2=1,n2 1055 gg(2)=i2-1; if (i2>1+n2/2) gg(2)=i2-n2-1 1056 do i1=1,n1 1057 gg(1)=i1-1; if (i1>1+n1/2) gg(1)=i1-n1-1 1058 gsq = two_pi**2 * DOT_PRODUCT(gg,MATMUL(Ftest%gmet,gg)) 1059 ifft=ifft+1 1060 ffc(ifft) = EXP(-gsq) 1061 end do 1062 end do 1063 end do 1064 else 1065 MSG_ERROR("Wrong isign") 1066 end if 1067 1068 ! Replicate and scale input data 1069 do dat=2,ndat 1070 padat = (dat-1) * nfft 1071 do ifft=1,nfft 1072 ffc(ifft+padat) = DBLE(dat) * ffc(ifft) 1073 end do 1074 end do 1075 1076 call cwtime(cpu_time,wall_time,gflops,"start") 1077 1078 ! No augmentation here. 1079 call fftbox_plan3_many(plan,ndat,Ftest%ngfft(1:3),Ftest%ngfft(1:3),Ftest%ngfft(7),isign) 1080 1081 select case (inplace) 1082 case (0) 1083 do icall=1,NCALLS_FOR_TEST 1084 ifft = empty_cache(CACHE_KBSIZE) 1085 call fftbox_execute(plan,ffc,ggc) 1086 ! Store results at the first call. 1087 if (icall==1) results = ggc 1088 end do 1089 case (1) 1090 do icall=1,NCALLS_FOR_TEST 1091 ifft = empty_cache(CACHE_KBSIZE) 1092 call fftbox_execute(plan,ffc) 1093 ! Store results at the first call. 1094 if (icall==1) results = ffc 1095 end do 1096 case default 1097 write(msg,'(a,i0)')" Wrong value for inplace= ",inplace 1098 MSG_ERROR(msg) 1099 end select 1100 1101 call cwtime(cpu_time,wall_time,gflops,"stop") 1102 1103 ABI_FREE(ffc) 1104 ABI_FREE(ggc) 1105 1106 call fftprof_init(Ftprof,test_name,Ftest%nthreads,NCALLS_FOR_TEST,Ftest%ndat,& 1107 & cpu_time,wall_time,gflops,results=results) 1108 1109 ABI_FREE(results) 1110 1111 end subroutine time_fftbox
m_FFT_prof/time_fftu [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
time_fftu
FUNCTION
Profiling of the fftu routines.
INPUTS
OUTPUT
PARENTS
fftprof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
1641 subroutine time_fftu(Ftest,isign,header,Ftprof) 1642 1643 1644 !This section has been created automatically by the script Abilint (TD). 1645 !Do not modify the following lines by hand. 1646 #undef ABI_FUNC 1647 #define ABI_FUNC 'time_fftu' 1648 !End of the abilint section 1649 1650 implicit none 1651 1652 !Arguments ----------------------------------- 1653 !scalars 1654 integer,intent(in) :: isign 1655 character(len=500),intent(out) :: header 1656 type(FFT_test_t),intent(inout) :: Ftest 1657 type(FFT_prof_t),intent(out) :: Ftprof 1658 1659 !Local variables------------------------------- 1660 !scalars 1661 integer,parameter :: nspinor1=1 1662 integer :: icall,i1,i2,i3,n1,n2,n3,ifft,npw_k,ndat,dat,nfft,cnt,ipw,padat,istwf_k 1663 !integer :: n4,n5,n6,n456,idx 1664 real(dp) :: cpu_time,wall_time,gflops,gsq,g0dotr 1665 logical :: not_implemented 1666 character(len=500) :: msg 1667 character(len=TNAME_LEN) :: test_name 1668 !arrays 1669 integer,parameter :: g0(3) = (/1,-2,1/) 1670 integer :: gg(3) 1671 integer,allocatable :: kg_k(:,:),gbound(:,:) 1672 complex(dpc),allocatable :: ug(:),results(:),ur(:) 1673 ! ********************************************************************* 1674 1675 test_name = name_of(Ftest) 1676 1677 n1=Ftest%ngfft(1); n2=Ftest%ngfft(2); n3=Ftest%ngfft(3) 1678 !n4=Ftest%ngfft(4); n5=Ftest%ngfft(5); n6=Ftest%ngfft(6) 1679 1680 nfft = n1*n2*n3 1681 !n456 = n4*n5*n6 1682 ndat = Ftest%ndat 1683 1684 write(header,'(2(a,i2))')" fftu with isign ",isign,", ndat ",ndat 1685 1686 ! TODO: zero-pad not available with SG2001 routines. 1687 not_implemented = (Ftest%ngfft(7) == 412) 1688 1689 if (Ftest%available==0.or.not_implemented) then 1690 call fftprof_init(Ftprof,test_name,0,0,0,zero,zero,zero) 1691 RETURN 1692 end if 1693 1694 call xomp_set_num_threads(Ftest%nthreads) 1695 1696 if (Ftest%ngfft(7)/100 == FFT_FFTW3) then 1697 call fftw3_set_nthreads(Ftest%nthreads) 1698 end if 1699 ! 1700 istwf_k = Ftest%istwf_k 1701 !istwf_k = 1 ! Do not use istwf_k trick here 1702 call get_kg(Ftest%kpoint,istwf_k,Ftest%ecut,Ftest%gmet,npw_k,kg_k) 1703 1704 ABI_MALLOC(gbound,(2*Ftest%mgfft+8,2)) 1705 call sphereboundary(gbound,istwf_k,kg_k,Ftest%mgfft,npw_k) 1706 1707 ABI_MALLOC(ug,(npw_k*ndat)) 1708 ABI_MALLOC(ur, (nfft*ndat)) 1709 ABI_MALLOC(results,(nfft*ndat)) 1710 results=czero ! needed for R --> G since npw_k < nfft 1711 1712 if (isign==1) then 1713 do cnt=0,(npw_k * ndat) - 1 1714 ipw = 1 + MOD(cnt, npw_k) 1715 gg = kg_k(:,ipw) 1716 gsq = two_pi**2 * DOT_PRODUCT(gg,MATMUL(Ftest%gmet,gg)) 1717 ug(cnt+1) = DCMPLX(EXP(-gsq), zero) 1718 end do 1719 ! 1720 ! Replicate input data 1721 do dat=2,ndat 1722 padat = (dat-1) * npw_k 1723 do ipw=1,npw_k 1724 ug(ipw+padat) = DBLE(dat) * ug(ipw) 1725 end do 1726 end do 1727 1728 else if (isign==-1) then 1729 1730 do i3=0,n3-1 1731 do i2=0,n2-1 1732 do i1=0,n1-1 1733 g0dotr= two_pi*( g0(1)*(i1/DBLE(n1)) & 1734 & +g0(2)*(i2/DBLE(n2)) & 1735 & +g0(3)*(i3/DBLE(n3)) ) 1736 ifft = 1 + i1 + i2*n1 + i3*n2*n3 1737 ur(ifft)=DCMPLX(DCOS(g0dotr),DSIN(g0dotr)) 1738 end do 1739 end do 1740 end do 1741 ! 1742 ! Replicate input data 1743 do dat=2,ndat 1744 padat = (dat-1)*nfft 1745 do ifft=1,nfft 1746 ur(ifft+padat) = DBLE(dat) * ur(ifft) 1747 end do 1748 end do 1749 1750 else 1751 write(msg,'(a,i0)')" Wrong isign= ",isign 1752 MSG_ERROR(msg) 1753 end if 1754 1755 call cwtime(cpu_time,wall_time,gflops,"start") 1756 1757 do icall=1,NCALLS_FOR_TEST 1758 ifft = empty_cache(CACHE_KBSIZE) 1759 1760 !call fftpad(ug,Ftest%ngfft,n1,n2,n3,n4,n5,n6,ndat,Ftest%mgfft,isign,gbound) 1761 if (isign==+1) then 1762 call fft_ug(npw_k,nfft,nspinor1,ndat,Ftest%mgfft,Ftest%ngfft,istwf_k,kg_k,gbound,ug,ur) 1763 ! Store results at the first call. 1764 if (icall==1) then 1765 call xcopy(nfft*ndat,ur,1,results,1) 1766 end if 1767 1768 else 1769 call fft_ur(npw_k,nfft,nspinor1,ndat,Ftest%mgfft,Ftest%ngfft,istwf_k,kg_k,gbound,ur,ug) 1770 ! Store results at the first call. 1771 if (icall==1) then 1772 call xcopy(npw_k*ndat,ug,1,results,1) 1773 end if 1774 end if 1775 !if (isign==-1) then 1776 ! write(Ftest%ngfft(7),*)"ngfft, isign ",Ftest%ngfft(7),isign 1777 ! do idx =1,n4*n5*n6*ndat; write(Ftest%ngfft(7),*)idx,results(idx); end do 1778 !end if 1779 end do 1780 1781 call cwtime(cpu_time,wall_time,gflops,"stop") 1782 1783 ABI_FREE(kg_k) 1784 ABI_FREE(gbound) 1785 ABI_FREE(ug) 1786 ABI_FREE(ur) 1787 1788 call fftprof_init(Ftprof,test_name,Ftest%nthreads,NCALLS_FOR_TEST,ndat,& 1789 & cpu_time,wall_time,gflops,results=results) 1790 1791 ABI_FREE(results) 1792 1793 end subroutine time_fftu
m_FFT_prof/time_fourdp [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
time_fourdp
FUNCTION
Profiling of the fourdp routine.
INPUTS
OUTPUT
PARENTS
fftprof,m_fft_prof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
846 subroutine time_fourdp(Ftest,isign,cplex,header,Ftprof) 847 848 849 !This section has been created automatically by the script Abilint (TD). 850 !Do not modify the following lines by hand. 851 #undef ABI_FUNC 852 #define ABI_FUNC 'time_fourdp' 853 !End of the abilint section 854 855 implicit none 856 857 !Arguments ----------------------------------- 858 !scalars 859 integer,intent(in) :: cplex,isign 860 character(len=500),intent(out) :: header 861 type(FFT_test_t),intent(inout) :: Ftest 862 type(FFT_prof_t),intent(out) :: Ftprof 863 864 !Local variables------------------------------- 865 !scalars 866 integer,parameter :: nspinor1=1 867 integer :: icall,i1,i2,i3,n1,n2,n3,ifft 868 real(dp) :: cpu_time,wall_time,gflops 869 real(dp) :: gsq 870 character(len=500) :: msg 871 character(len=TNAME_LEN) :: test_name 872 !arrays 873 integer :: gg(3) 874 integer,parameter :: g0(3)=(/1,2,-1/) 875 real(dp),allocatable :: fofg(:,:),fofr(:) 876 complex(dpc),allocatable :: results(:),ctmp(:) 877 878 ! ********************************************************************* 879 880 test_name = name_of(Ftest) 881 n1=Ftest%ngfft(1) 882 n2=Ftest%ngfft(2) 883 n3=Ftest%ngfft(3) 884 885 write(header,'(2(a,i2),a)')" fourdp with cplex ",cplex,", isign ",isign,", ndat 1" 886 887 if (Ftest%available==0) then 888 call fftprof_init(Ftprof,test_name,0,0,0,zero,zero,zero) 889 RETURN 890 end if 891 892 call xomp_set_num_threads(Ftest%nthreads) 893 894 if (Ftest%ngfft(7)/100 == FFT_FFTW3) then 895 call fftw3_set_nthreads(Ftest%nthreads) 896 end if 897 898 ABI_MALLOC(fofg,(2,Ftest%nfft)) 899 ABI_MALLOC(fofr,(cplex*Ftest%nfft)) 900 ! 901 ! Initialize input data. 902 if (isign==1) then ! initialize fofg 903 fofg = zero 904 ifft=0 905 do i3=1,n3 906 gg(3)=i3-1; if (i3>1+n3/2) gg(3)=i3-n3-1 ! TODO recheck this 907 do i2=1,n2 908 gg(2)=i2-1; if (i2>1+n2/2) gg(2)=i2-n2-1 909 do i1=1,n1 910 gg(1)=i1-1; if (i1>1+n1/2) gg(1)=i1-n1-1 911 gsq = two_pi**2 * DOT_PRODUCT(gg,MATMUL(Ftest%gmet,gg)) 912 ifft=ifft+1 913 fofg(1,ifft) = EXP(-gsq) 914 fofg(2,ifft) = zero 915 end do 916 end do 917 end do 918 919 else ! init fofr 920 if (cplex==2) then 921 call calc_eigr(g0,Ftest%nfft,Ftest%ngfft,fofr) 922 else if (cplex==1) then 923 ABI_MALLOC(ctmp,(Ftest%nfft)) 924 call calc_ceigr(g0,Ftest%nfft,nspinor1,Ftest%ngfft,ctmp) 925 fofr = REAL(ctmp) 926 ABI_FREE(ctmp) 927 else 928 write(msg,'(a,i0)')" Wrong cplex: ",cplex 929 MSG_ERROR(msg) 930 end if 931 end if 932 933 ABI_MALLOC(results,(Ftest%nfft)) 934 results=czero 935 936 call cwtime(cpu_time,wall_time,gflops,"start") 937 938 do icall=1,NCALLS_FOR_TEST 939 ! 940 ifft = empty_cache(CACHE_KBSIZE) 941 call fourdp(cplex,fofg,fofr,isign,Ftest%MPI_enreg,Ftest%nfft,Ftest%ngfft,Ftest%paral_kgb,0) 942 ! 943 ! Store results at the first call. 944 if (icall==1) then 945 if (isign==1) then 946 if (cplex==1) then 947 results = DCMPLX(fofr, zero) 948 else if (cplex==2) then 949 results = DCMPLX( fofr(1:2*Ftest%nfft:2), fofr(2:2*Ftest%nfft:2) ) 950 end if 951 else if (isign==-1) then 952 results = DCMPLX(fofg(1,:),fofg(2,:)) 953 end if 954 end if 955 end do 956 957 call cwtime(cpu_time,wall_time,gflops,"stop") 958 959 ABI_FREE(fofg) 960 ABI_FREE(fofr) 961 962 call fftprof_init(Ftprof,test_name,Ftest%nthreads,NCALLS_FOR_TEST,& 963 & Ftest%ndat,cpu_time,wall_time,gflops,results=results) 964 965 ABI_FREE(results) 966 967 end subroutine time_fourdp
m_FFT_prof/time_fourwf [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
time_fourwf
FUNCTION
Profiling of the fourwf routine.
INPUTS
OUTPUT
PARENTS
fftprof,m_fft_prof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
1136 subroutine time_fourwf(Ftest,cplex,option_fourwf,header,Ftprof) 1137 1138 1139 !This section has been created automatically by the script Abilint (TD). 1140 !Do not modify the following lines by hand. 1141 #undef ABI_FUNC 1142 #define ABI_FUNC 'time_fourwf' 1143 !End of the abilint section 1144 1145 implicit none 1146 1147 !Arguments ----------------------------------- 1148 !scalars 1149 integer,intent(in) :: cplex,option_fourwf 1150 character(len=500),intent(out) :: header 1151 type(FFT_test_t),intent(inout) :: Ftest 1152 type(FFT_prof_t),intent(out) :: Ftprof 1153 1154 !Local variables------------------------------- 1155 !scalars 1156 integer,parameter :: tim0=0 1157 integer :: n1,n2,n3,n4,n5,n6,npw_out,icall,i1,i2,i3,idx,ipw,ndat,cnt,dat,padat 1158 integer :: fftalg,fftalga,fftalgc 1159 real(dp),parameter :: weight_i=one,weight_r=one 1160 real(dp) :: cpu_time,wall_time,gflops,gsq,g0dotr 1161 logical :: isbuggy,not_supported 1162 character(len=500) :: msg 1163 character(len=TNAME_LEN) :: test_name 1164 !arrays 1165 integer,parameter :: g0(3)=(/1,-1,2/) !g0(3)=(/1,0,0/) 1166 integer :: gg(3) 1167 integer,allocatable :: gbound_in(:,:),gbound_out(:,:) 1168 real(dp),allocatable :: denpot(:,:,:),fofg_in(:,:) 1169 real(dp),allocatable :: fofr_4(:,:,:,:),fofg_out(:,:) 1170 complex(dpc),allocatable :: results(:) 1171 1172 ! ********************************************************************* 1173 1174 test_name = name_of(Ftest) 1175 1176 fftalg = Ftest%ngfft(7); fftalga = fftalg/100; fftalgc = MOD(fftalg,10) 1177 1178 isbuggy = & 1179 & (option_fourwf==3 .and. fftalga==FFT_SG2002 .and. fftalgc /= 0 .and. any(Ftest%istwf_k == [3,4,5,6,7,8,9])) ! see sg_fourwf 1180 !isbuggy = .False. 1181 1182 !FIXME problems with the unitary tests reference files! 1183 not_supported = ( & 1184 & (fftalgc == 2 .and. option_fourwf==0 .and. Ftest%istwf_k > 2) & 1185 & ) 1186 !not_supported = .FALSE. 1187 1188 npw_out= Ftest%npw_kout 1189 ndat = Ftest%ndat 1190 n1=Ftest%ngfft(1); n2=Ftest%ngfft(2); n3=Ftest%ngfft(3) 1191 n4=Ftest%ngfft(4); n5=Ftest%ngfft(5); n6=Ftest%ngfft(6) 1192 1193 write(header,'(4(a,i2))')" fourwf with option ",option_fourwf,", cplex ",cplex,", ndat ",ndat,", istwf_k ",Ftest%istwf_k 1194 1195 if (isbuggy .or. not_supported .or. Ftest%available==0) then 1196 call fftprof_init(Ftprof,test_name,0,0,0,zero,zero,zero) 1197 RETURN 1198 end if 1199 1200 call xomp_set_num_threads(Ftest%nthreads) 1201 1202 if (Ftest%ngfft(7)/100 == FFT_FFTW3) then 1203 call fftw3_set_nthreads(Ftest%nthreads) 1204 end if 1205 1206 ! FFTW3 does not need gbound_in but oh well 1207 ABI_MALLOC(gbound_in,(2*Ftest%mgfft+8,2)) 1208 call sphereboundary(gbound_in,Ftest%istwf_k,Ftest%kg_k,Ftest%mgfft,Ftest%npw_k) 1209 1210 ABI_MALLOC(gbound_out,(2*Ftest%mgfft+8,2)) 1211 call sphereboundary(gbound_out,Ftest%istwf_k,Ftest%kg_kout,Ftest%mgfft,Ftest%npw_kout) 1212 1213 ABI_MALLOC(denpot,(cplex*n4,n5,n6)) 1214 ABI_MALLOC(fofg_in,(2,Ftest%npw_k*ndat)) 1215 ABI_MALLOC(fofg_out,(2,npw_out*ndat)) 1216 ABI_MALLOC(fofr_4,(2,n4,n5,n6*ndat)) 1217 1218 denpot = zero 1219 fofg_in = zero 1220 fofg_out = zero 1221 fofr_4 = zero 1222 1223 ABI_MALLOC(results,(Ftest%nfft*ndat)) 1224 results=czero 1225 1226 select case (option_fourwf) 1227 case (0,1,2) 1228 !! for option==0, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere; 1229 !! fofr(2,n4,n5,n6) contains the output Fourier Transform of fofgin; 1230 !! no use of denpot, fofgout and npwout. 1231 !! for option==1, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere; 1232 !! denpot(cplex*n4,n5,n6) contains the input density at input, 1233 !! and the updated density at output (accumulated); 1234 !! no use of fofgout and npwout. 1235 !! for option==2, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere; 1236 !! denpot(cplex*n4,n5,n6) contains the input local potential; 1237 !! fofgout(2,npwout*ndat) contains the output function; 1238 !! 1239 do cnt=0,(Ftest%npw_k * ndat) - 1 1240 ipw = 1 + MOD(cnt, Ftest%npw_k) 1241 gg = Ftest%kg_k(:,ipw) 1242 gsq = two_pi**2 * DOT_PRODUCT(gg,MATMUL(Ftest%gmet,gg)) 1243 fofg_in(1,cnt+1) = EXP(-gsq) 1244 fofg_in(2,cnt+1) = zero 1245 end do 1246 1247 if (option_fourwf==1) then ! Init denpot 1248 denpot = one 1249 end if 1250 ! 1251 if (option_fourwf==2) then ! Init denpot 1252 ! 1253 if (cplex==1) then 1254 do i3=0,n3-1 1255 do i2=0,n2-1 1256 do i1=0,n1-1 1257 g0dotr= two_pi*( g0(1)*(i1/DBLE(n1)) & 1258 & +g0(2)*(i2/DBLE(n2)) & 1259 & +g0(3)*(i3/DBLE(n3)) ) 1260 denpot(i1+1,i2+1,i3+1)=COS(g0dotr) 1261 end do 1262 end do 1263 end do 1264 else if (cplex==2) then 1265 do i3=0,n3-1 1266 do i2=0,n2-1 1267 idx=1 1268 do i1=0,n1-1 1269 g0dotr= two_pi*( g0(1)*(i1/DBLE(n1)) & 1270 & +g0(2)*(i2/DBLE(n2)) & 1271 & +g0(3)*(i3/DBLE(n3)) ) 1272 1273 denpot(idx, i2+1,i3+1)= COS(g0dotr) 1274 denpot(idx+1,i2+1,i3+1)= SIN(g0dotr) 1275 idx=idx+2 1276 end do 1277 end do 1278 end do 1279 end if 1280 end if 1281 1282 case (3) 1283 !! for option==3, fofr(2,n4,n5,n6*ndat) contains the input real space wavefunction; 1284 !! fofgout(2,npwout*ndat) contains its output Fourier transform; 1285 !! no use of fofgin and npwin. 1286 idx=0 1287 do dat=1,ndat 1288 padat = (dat-1)*n6 1289 do i3=0,n3-1 1290 do i2=0,n2-1 1291 do i1=0,n1-1 1292 g0dotr= two_pi*( g0(1)*(i1/DBLE(n1)) & 1293 +g0(2)*(i2/DBLE(n2)) & 1294 +g0(3)*(i3/DBLE(n3)) ) 1295 !fofr_4(1,i1+1,i2+1,i3+1+padat)=COS(g0dotr) 1296 !fofr_4(2,i1+1,i2+1,i3+1+padat)=SIN(g0dotr) 1297 fofr_4(1,i1+1,i2+1,i3+1+padat)=10 * EXP(-g0dotr**2) 1298 fofr_4(2,i1+1,i2+1,i3+1+padat)=zero 1299 idx=idx+1 1300 results(idx) = DCMPLX(fofr_4(1,i1+1,i2+1,i3+1+padat), fofr_4(2,i1+1,i2+1,i3+1+padat)) 1301 end do 1302 end do 1303 end do 1304 end do 1305 1306 case default 1307 write(msg,'(a,i0)')" Wrong value for option_fourwf: ",option_fourwf 1308 MSG_ERROR(msg) 1309 end select 1310 1311 call cwtime(cpu_time,wall_time,gflops,"start") 1312 1313 do icall=1,NCALLS_FOR_TEST 1314 1315 i1 = empty_cache(CACHE_KBSIZE) 1316 1317 call fourwf(cplex,denpot,fofg_in,fofg_out,fofr_4,gbound_in,gbound_out,Ftest%istwf_k,& 1318 & Ftest%kg_k,Ftest%kg_kout,Ftest%mgfft,Ftest%MPI_enreg,ndat,Ftest%ngfft,Ftest%npw_k,npw_out,n4,n5,n6,option_fourwf,& 1319 & Ftest%paral_kgb,tim0,weight_r,weight_i) 1320 ! 1321 ! Store results at the first call. 1322 if (icall==1) then 1323 select case (option_fourwf) 1324 case (0) 1325 !! for option==0, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere; 1326 !! fofr(2,n4,n5,n6) contains the output Fourier Transform of fofgin; 1327 !! no use of denpot, fofgout and npwout. 1328 idx=0 1329 do dat=1,ndat 1330 padat = (dat-1)*n6 1331 do i3=1,n3 1332 do i2=1,n2 1333 do i1=1,n1 1334 idx=idx+1 1335 results(idx) = DCMPLX(fofr_4(1,i1,i2,i3+padat),fofr_4(2,i1,i2,i3+padat)) 1336 end do 1337 end do 1338 end do 1339 end do 1340 case (1) 1341 !! for option==1, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere; 1342 !! denpot(cplex*n4,n5,n6) contains the input density at input, 1343 !! and the updated density at output (accumulated); 1344 !! no use of fofgout and npwout. 1345 if (cplex==1) then 1346 idx=0 1347 do i3=1,n3 1348 do i2=1,n2 1349 do i1=1,n1 1350 idx=idx+1 1351 results(idx) = DCMPLX(denpot(i1,i2,i3),zero) 1352 end do 1353 end do 1354 end do 1355 1356 else if (cplex==2) then 1357 idx=0 1358 do i3=1,n3 1359 do i2=1,n2 1360 do i1=1,2*n1,2 1361 idx=idx+1 1362 results(idx) = DCMPLX(denpot(i1,i2,i3),denpot(i1+1,i2,i3)) 1363 end do 1364 end do 1365 end do 1366 else 1367 MSG_ERROR("Wrong cplex") 1368 end if 1369 1370 case (2) 1371 !! for option==2, fofgin(2,npwin*ndat)=holds input wavefunction in G sphere; 1372 !! denpot(cplex*n4,n5,n6) contains the input local potential; 1373 !! fofgout(2,npwout*ndat) contains the output function; 1374 do ipw=1,npw_out*ndat 1375 results(ipw) = DCMPLX(fofg_out(1,ipw),fofg_out(2,ipw)) 1376 end do 1377 case (3) 1378 !! for option==3, fofr(2,n4,n5,n6*ndat) contains the input real space wavefunction; 1379 !! fofgout(2,npwout*ndat) contains its output Fourier transform; 1380 !! no use of fofgin and npwin. 1381 do ipw=1,npw_out*ndat 1382 results(ipw) = DCMPLX(fofg_out(1,ipw),fofg_out(2,ipw)) 1383 end do 1384 !write(Ftest%ngfft(7),*)"results opt 3 Ftest%ngfft(7)",Ftest%ngfft(7) 1385 !do dat=1,ndat 1386 ! do ipw=1,npw_out 1387 ! idx = ipw + (dat-1)*npw_out 1388 ! write(Ftest%ngfft(7),*)ipw,dat,results(idx) 1389 ! end do 1390 !end do 1391 end select 1392 end if 1393 end do 1394 1395 call cwtime(cpu_time,wall_time,gflops,"stop") 1396 1397 ABI_FREE(denpot) 1398 ABI_FREE(fofg_in) 1399 ABI_FREE(fofg_out) 1400 ABI_FREE(fofr_4) 1401 ABI_FREE(gbound_in) 1402 ABI_FREE(gbound_out) 1403 1404 call fftprof_init(Ftprof,test_name,Ftest%nthreads,NCALLS_FOR_TEST,& 1405 & Ftest%ndat,cpu_time,wall_time,gflops,results=results) 1406 1407 ABI_FREE(results) 1408 1409 end subroutine time_fourwf
m_FFT_prof/time_rhotwg [ Functions ]
[ Top ] [ m_FFT_prof ] [ Functions ]
NAME
time_rhotwg
FUNCTION
Profiling of the rho_tw_g routine.
INPUTS
OUTPUT
PARENTS
fftprof,m_fft_prof
CHILDREN
fft_test_free,fft_test_init,fft_test_nullify,fftprof_free,get_kg random_number,time_rhotwg
SOURCE
1480 subroutine time_rhotwg(Ftest,map2sphere,use_padfft,osc_npw,osc_gvec,header,Ftprof) 1481 1482 1483 !This section has been created automatically by the script Abilint (TD). 1484 !Do not modify the following lines by hand. 1485 #undef ABI_FUNC 1486 #define ABI_FUNC 'time_rhotwg' 1487 !End of the abilint section 1488 1489 implicit none 1490 1491 !Arguments ----------------------------------- 1492 !scalars 1493 integer,intent(in) :: map2sphere,use_padfft,osc_npw 1494 character(len=500),intent(out) :: header 1495 type(FFT_test_t),intent(inout) :: Ftest 1496 type(FFT_prof_t),intent(out) :: Ftprof 1497 !arrays 1498 integer,intent(in) :: osc_gvec(3,osc_npw) 1499 1500 !Local variables------------------------------- 1501 !scalars 1502 integer,parameter :: nspinor1=1,dim_rtwg=1,istwfk1=1 1503 integer :: icall,ifft,itim1,itim2,nfft,dat,sprc,ptr,ndat 1504 integer :: n1,n2,n3,n4,n5,n6 1505 real(dp) :: cpu_time,wall_time,gflops 1506 complex(dpc) :: ktabp1=cone,ktabp2=cone 1507 character(len=TNAME_LEN) :: test_name 1508 logical :: not_implemented 1509 type(MPI_type) :: MPI_enreg_seq 1510 !arrays 1511 !integer,parameter :: g1(3)=(/1,1,2/),g2(3)=(/-1,-2,-1/) 1512 integer,parameter :: g1(3)=(/-1,0,0/),g2(3)=(/1,0,0/) 1513 integer,allocatable :: gbound(:,:) 1514 integer,allocatable :: ktabr1(:),ktabr2(:) 1515 integer,allocatable :: igfftg0(:) 1516 real(dp),parameter :: spinrot1(4)=(/one,zero,zero,one/),spinrot2(4)=(/one,zero,zero,one/) 1517 logical,allocatable :: mask(:) 1518 complex(dpc),allocatable :: results(:) 1519 complex(gwpc),allocatable :: rhotwg(:) 1520 complex(gwpc),allocatable :: wfn1(:),wfn2(:) 1521 1522 ! ********************************************************************* 1523 1524 test_name = name_of(Ftest) 1525 1526 nfft = Ftest%nfft 1527 ndat = Ftest%ndat 1528 n1=Ftest%ngfft(1); n2=Ftest%ngfft(2); n3=Ftest%ngfft(3) 1529 n4=Ftest%ngfft(4); n5=Ftest%ngfft(5); n6=Ftest%ngfft(6) 1530 1531 write(header,'(3(a,i2))')"rho_tw_g with use_padfft ",use_padfft,", map2sphere ",map2sphere,", ndat ",ndat 1532 1533 ! TODO: zero-pad not available with SG2001 routines. 1534 not_implemented = (use_padfft==1.and.Ftest%ngfft(7) == 412) 1535 1536 if (Ftest%available==0.or.not_implemented) then 1537 call fftprof_init(Ftprof,test_name,0,0,0,zero,zero,zero) 1538 RETURN 1539 end if 1540 1541 call xomp_set_num_threads(Ftest%nthreads) 1542 1543 if (Ftest%ngfft(7)/100 == FFT_FFTW3) then 1544 call fftw3_set_nthreads(Ftest%nthreads) 1545 end if 1546 1547 call initmpi_seq(MPI_enreg_seq) 1548 call init_distribfft_seq(MPI_enreg_seq%distribfft,'c',n2,n3,'all') 1549 1550 itim1=1; itim2=1 1551 ABI_MALLOC(ktabr1,(nfft)) 1552 ABI_MALLOC(ktabr2,(nfft)) 1553 1554 do ifft=1,nfft 1555 ktabr1(ifft)= ifft 1556 ktabr2(ifft)= ifft 1557 end do 1558 1559 ABI_MALLOC(igfftg0,(osc_npw*map2sphere)) 1560 1561 if (map2sphere>0) then 1562 ABI_MALLOC(mask,(osc_npw)) 1563 call kgindex(igfftg0,osc_gvec,mask,MPI_enreg_seq,Ftest%ngfft,osc_npw) 1564 ABI_CHECK(ALL(mask)," FFT parallelism not supported") 1565 ABI_FREE(mask) 1566 end if 1567 1568 ABI_MALLOC(gbound,(2*Ftest%mgfft+8,2*use_padfft)) 1569 if (use_padfft==1) then 1570 call sphereboundary(gbound,istwfk1,osc_gvec,Ftest%mgfft,osc_npw) 1571 end if 1572 1573 ABI_MALLOC(wfn1,(nfft*nspinor1*ndat)) 1574 ABI_MALLOC(wfn2,(nfft*nspinor1*ndat)) 1575 1576 do dat=1,ndat 1577 do sprc=1,nspinor1 1578 ptr = 1 + (sprc-1)*nfft + (dat-1)*nfft*nspinor1 1579 call calc_ceigr(g1,nfft,nspinor1,Ftest%ngfft,wfn1(ptr:)) 1580 call calc_ceigr(g2,nfft,nspinor1,Ftest%ngfft,wfn2(ptr:)) 1581 end do 1582 end do 1583 1584 ABI_MALLOC(rhotwg,(osc_npw*dim_rtwg*ndat)) 1585 ABI_MALLOC(results,(osc_npw*dim_rtwg*ndat)) 1586 1587 call cwtime(cpu_time,wall_time,gflops,"start") 1588 1589 do icall=1,NCALLS_FOR_TEST 1590 1591 ifft = empty_cache(CACHE_KBSIZE) 1592 call rho_tw_g(nspinor1,osc_npw,nfft,ndat,Ftest%ngfft,map2sphere,use_padfft,igfftg0,gbound,& 1593 & wfn1,itim1,ktabr1,ktabp1,spinrot1,& 1594 & wfn2,itim2,ktabr2,ktabp2,spinrot2,& 1595 & dim_rtwg,rhotwg) 1596 ! Store results at the first call. 1597 if (icall==1) results = rhotwg 1598 end do 1599 1600 call cwtime(cpu_time,wall_time,gflops,"stop") 1601 1602 ABI_FREE(ktabr1) 1603 ABI_FREE(ktabr2) 1604 ABI_FREE(igfftg0) 1605 ABI_FREE(gbound) 1606 ABI_FREE(rhotwg) 1607 ABI_FREE(wfn1) 1608 ABI_FREE(wfn2) 1609 1610 call fftprof_init(Ftprof,test_name,Ftest%nthreads,NCALLS_FOR_TEST,& 1611 & Ftest%ndat,cpu_time,wall_time,gflops,results) 1612 1613 call destroy_mpi_enreg(MPI_enreg_seq) 1614 ABI_FREE(results) 1615 1616 end subroutine time_rhotwg