TABLE OF CONTENTS


ABINIT/lapackprof [ Programs ]

[ Top ] [ Programs ]

NAME

 lapackprof

FUNCTION

  Utility for profiling the (Sca)Lapack libraries supported by abinit.

COPYRIGHT

 Copyright (C) 2004-2018 ABINIT group (MG)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

INPUTS

  (main program)

OUTPUT

  Timing analysis of the different libraries and algorithms.

NOTES

PARENTS

CHILDREN

      abi_io_redirect,abimem_init,abinit_doctor,cg_zaxpy,cg_zcopy,cg_zgemm
      cg_zgemv,cwtime,destroy_mpi_enreg,herald,init_mpi_enreg,lower,projbd
      pw_orthon,random_number,sqmat_itranspose,test_xginv,wrtout,xgerc,xheevx
      xhpev,xmpi_bcast,xmpi_end,xmpi_init,xomp_set_num_threads,xomp_show_info

SOURCE

 33 #if defined HAVE_CONFIG_H
 34 #include "config.h"
 35 #endif
 36 
 37 #include "abi_common.h"
 38 
 39 program lapackprof
 40 
 41  use defs_basis
 42  use defs_abitypes
 43  use m_build_info
 44  use m_abicore
 45  use m_xmpi
 46  use m_xomp
 47  use m_errors
 48  use m_hide_blas
 49  use m_cgtools
 50  use m_hide_lapack
 51 
 52  use m_fstrings,      only : lower, itoa, strcat
 53  use m_specialmsg,    only : specialmsg_getcount, herald
 54  use m_time,          only : cwtime
 55  use m_io_tools,      only : prompt
 56  use m_numeric_tools, only : arth
 57  use m_mpinfo,        only : init_mpi_enreg, destroy_mpi_enreg
 58 
 59 !This section has been created automatically by the script Abilint (TD).
 60 !Do not modify the following lines by hand.
 61 #undef ABI_FUNC
 62 #define ABI_FUNC 'lapackprof'
 63 !End of the abilint section
 64 
 65  implicit none
 66 
 67 !Local variables-------------------------------
 68 !scalars
 69  integer,parameter :: master=0,nspinor1=1
 70  integer :: comm,npw,ierr,my_rank,ii,isz,jj,it,step,icall,nfound
 71  integer :: istwfk=1,useoverlap,mcg,mgsc,band,g0,idx,iall,ortalgo
 72  real(dp) ::  ctime,wtime,gflops
 73  logical :: do_check,test_ortho,test_copy,test_dotc,test_axpy,test_gemv !,test_gemm
 74  character(len=24) :: skinds
 75  character(len=500) :: header,routname
 76  type(MPI_type) :: MPI_enreg
 77 !arrays
 78  integer,allocatable :: sizes(:)
 79  real(dp) :: alpha(2),beta(2),dot(2)
 80  real(dp),allocatable :: cg(:,:),gsc(:,:),ortho_check(:,:,:)
 81  real(dp),allocatable :: cg1(:,:),cg2(:,:),cg3(:,:),ene(:),direc(:,:),scprod(:,:)
 82  complex(dpc),allocatable :: zvec(:),zmat(:,:),wmat(:,:),zpmat(:),evec(:,:)
 83 ! complex(spc),allocatable :: vec(:),mat(:,:)
 84  type(latime_t) :: Tres
 85 ! ==========  INPUT FILE ==============
 86  integer :: ncalls=50,nband=200,nsizes=20,nthreads=1
 87  logical :: debug_mode=.FALSE.
 88  character(len=500) :: tasks="all"
 89  integer :: size_arth(2) = (/1000,2000/)
 90  namelist /CONTROL/ tasks, debug_mode,nthreads
 91  namelist /SYSTEM/ ncalls,nband,nsizes,size_arth
 92 
 93 ! *************************************************************************
 94 
 95  call xmpi_init()
 96 
 97 !Change communicator for I/O (mandatory!)
 98  call abi_io_redirect(new_io_comm=xmpi_world)
 99 
100 !Initialize memory profiling if it is activated
101 !if a full abimem.mocc report is desired, set the argument of abimem_init to "2" instead of "0"
102 !note that abimem.mocc files can easily be multiple GB in size so don't use this option normally
103 #ifdef HAVE_MEM_PROFILING
104  call abimem_init(0)
105 #endif
106 
107  call init_mpi_enreg(mpi_enreg)
108 
109  comm    = xmpi_world
110  my_rank = xmpi_comm_rank(comm)
111 
112  call herald("LAPACKPROF",abinit_version,std_out)
113 
114  if (my_rank == master) then
115    write(std_out,'(a)')" Tool for profiling and testing the (Sca)Lapack libraries used in ABINIT."
116  end if
117 
118 !Read input file
119  read(std_in, NML=CONTROL)
120  read(std_in, NML=SYSTEM)
121 
122  call xomp_set_num_threads(nthreads)
123  call xomp_show_info(std_out)
124 
125  call lower(tasks)
126 
127 !replace "+" with white spaces
128  do ii=1,LEN_TRIM(tasks)
129    if (tasks(ii:ii) == "+") tasks(ii:ii) = " "
130  end do
131 !%call str_replace_chars(tasks,"+"," ")
132  tasks = " "//TRIM(tasks)//""
133 
134  call xmpi_bcast(tasks,master,comm,ierr)
135  call xmpi_bcast(size_arth,master,comm,ierr)
136  call xmpi_bcast(nsizes,master,comm,ierr)
137 
138  iall=INDEX (tasks,"all")
139 
140  test_ortho = (iall>0 .or. INDEX(tasks," ortho")>0 )
141  test_copy  = (iall>0 .or. INDEX(tasks," copy")>0 )
142  test_dotc  = (iall>0 .or. INDEX(tasks," dotc")>0 )
143  test_axpy  = (iall>0 .or. INDEX(tasks," axpy")>0 )
144  test_gemv  = (iall>0 .or. INDEX(tasks," gemv")>0 )
145 !test_gemm = (iall>0 .or. INDEX(tasks," gemm")>0 )
146 
147  ABI_MALLOC(sizes,(nsizes))
148  sizes = arth(size_arth(1),size_arth(2),nsizes)
149 
150  if (INDEX(tasks," projbd")>0) then
151    istwfk =1
152    useoverlap = 0
153 
154    routname = "projbd"
155    header = strcat("BEGIN_BENCHMARK: ",routname)
156    write(std_out,'(a)')TRIM(header)
157 
158    do isz=1,nsizes
159      npw = sizes(isz)
160      mcg  = npw * nband
161      mgsc = mcg * useoverlap
162      ABI_MALLOC(cg,(2,mcg))
163      call random_number(cg)
164 
165      ABI_MALLOC(gsc,(2,mgsc))
166 
167      ABI_MALLOC(direc, (2,npw*nspinor1))
168      direc = zero
169 
170      ABI_MALLOC(scprod, (2,nband))
171 
172      call cwtime(ctime,wtime,gflops,"start")
173 
174      call projbd(cg,direc,0,0,0,istwfk,mcg,mgsc,nband,npw,nspinor1,gsc,scprod,0,0,useoverlap,&
175 &     mpi_enreg%me_g0,mpi_enreg%comm_fft)
176 
177      call cwtime(ctime,wtime,gflops,"stop")
178      write(std_out,'(2(a,i0),2(a,f9.6))')" npw = ",npw,", nband = ",nband,", cpu_time = ",ctime,", wall_time = ",wtime
179 
180      ABI_FREE(scprod)
181      ABI_FREE(direc)
182 
183      ABI_FREE(cg)
184      ABI_FREE(gsc)
185    end do
186 
187    header = strcat("END_BENCHMARK: ",routname)
188    write(std_out,'(a)')TRIM(header)
189  end if
190 
191  if (test_ortho) then
192    istwfk =1
193    useoverlap = 0
194 
195    do ortalgo=1,4,1
196      routname = strcat(" pw_orthon, ortalgo = ",itoa(ortalgo))
197      header = strcat("BEGIN_BENCHMARK: ",routname)
198      write(std_out,'(a)')TRIM(header)
199 !
200      do isz=1,nsizes
201        npw = sizes(isz)
202        mcg  = npw * nband
203        mgsc = mcg * useoverlap
204        ABI_MALLOC(cg,(2,mcg))
205        call random_number(cg)
206 
207        ABI_MALLOC(gsc,(2,mgsc))
208 
209        if (istwfk/=1) then
210          do band=1,nband
211            g0 = 1 + (band-1)*npw
212            cg(2,g0) = zero
213          end do
214        end if
215 
216        call cwtime(ctime,wtime,gflops,"start")
217 
218        call pw_orthon(0,0,istwfk,mcg,mgsc,npw,nband,ortalgo,gsc,useoverlap,cg,&
219 &       mpi_enreg%me_g0,mpi_enreg%comm_bandspinorfft)
220 
221        call cwtime(ctime,wtime,gflops,"stop")
222 
223        write(std_out,'(2(a,i0),2(a,f9.6))')" npw = ",npw,", nband = ",nband,", cpu_time = ",ctime,", wall_time = ",wtime
224 
225        if (debug_mode) then
226          ABI_MALLOC(ortho_check,(2,nband,nband))
227          if (istwfk/=1) then
228            do band=1,nband
229              g0 = 1 + (band-1)*npw
230              cg(:,g0) = half * cg(:,g0)
231            end do
232          end if
233 
234          call cg_zgemm("C","N",npw,nband,nband,cg,cg,ortho_check)
235 
236          if (istwfk/=1) ortho_check = two * ortho_check
237 
238          do band=1,nband
239            ortho_check(1,band,band) = ortho_check(1,band,band) - one
240          end do
241 
242          write(std_out,*)"MAX ABS error",MAXVAL( ABS(RESHAPE(ortho_check,(/2*nband*nband/)) ))
243          ABI_FREE(ortho_check)
244        end if
245 
246        ABI_FREE(cg)
247        ABI_FREE(gsc)
248      end do
249 
250      header = strcat("END_BENCHMARK: ",routname)
251      write(std_out,'(a)')TRIM(header)
252    end do ! ortalgo
253  end if
254 
255 !copy
256  if (test_copy) then
257 !
258    do step=1,2
259 !
260      if (step==1) routname = " f90_zcopy"
261      if (step==2) routname = " cg_zcopy"
262      header = strcat("BEGIN_BENCHMARK: ",routname)
263      write(std_out,'(a)')TRIM(header)
264 !
265      do isz=1,nsizes
266        npw  = sizes(isz)
267        ABI_MALLOC(cg1,(2,npw))
268        ABI_MALLOC(cg2,(2,npw))
269        cg1 = zero
270 
271        call cwtime(ctime,wtime,gflops,"start")
272        if (step==1) then
273          do ii=1,ncalls
274 !$OMP PARALLEL DO
275            do jj=1,npw
276              cg2(1,jj) = cg1(1,jj)
277              cg2(2,jj) = cg1(2,jj)
278            end do
279          end do
280        else
281          do ii=1,ncalls
282            call cg_zcopy(npw,cg1,cg2)
283          end do
284        end if
285        call cwtime(ctime,wtime,gflops,"stop")
286        write(std_out,'(a,i0,2(a,f9.6))')" npw = ",npw,", cpu_time = ",ctime,", wall_time = ",wtime
287 
288        ABI_FREE(cg1)
289        ABI_FREE(cg2)
290      end do
291 
292      header = strcat("END_BENCHMARK: ",routname)
293      write(std_out,'(a)')TRIM(header)
294    end do
295  end if
296 
297 !zdotc
298  if (test_dotc) then
299 !
300    do step=1,2
301 !
302      if (step==1) routname = " f90_zdotc"
303      if (step==2) routname = " cg_zdotc"
304      header = strcat("BEGIN_BENCHMARK: ",routname)
305      write(std_out,'(a)')TRIM(header)
306 
307      do isz=1,nsizes
308        npw  = sizes(isz)
309        ABI_MALLOC(cg1,(2,npw))
310        ABI_MALLOC(cg2,(2,npw))
311        call random_number(cg1)
312        call random_number(cg2)
313 
314        call cwtime(ctime,wtime,gflops,"start")
315        if (step==1) then
316          do jj=1,ncalls
317            dot = zero
318 !$OMP PARALLEL DO REDUCTION(+:dot)
319            do ii=1,npw
320              dot(1) = dot(1) + cg1(1,ii)*cg2(1,ii) + cg1(2,ii)*cg2(2,ii)
321              dot(2) = dot(2) + cg1(1,ii)*cg2(2,ii) - cg1(2,ii)*cg2(1,ii)
322            end do
323          end do
324        else
325          do ii=1,ncalls
326            dot = cg_zdotc(npw,cg1,cg2)
327          end do
328        end if
329        call cwtime(ctime,wtime,gflops,"stop")
330        write(std_out,'(a,i0,2(a,f9.6))')" npw = ",npw,", cpu_time = ",ctime,", wall_time = ",wtime
331        ABI_FREE(cg1)
332        ABI_FREE(cg2)
333      end do
334 
335      header = strcat("END_BENCHMARK: ",routname)
336      write(std_out,'(a)')TRIM(header)
337    end do
338  end if
339 
340 !axpy
341  if (test_axpy) then
342 !  alpha = (/one,zero/)
343    alpha = (/one,two/)
344    do step=1,2
345 !
346      if (step==1) routname = " f90_axpy"
347      if (step==2) routname = " cg_axpy"
348      header = strcat("BEGIN_BENCHMARK: ",routname)
349      write(std_out,'(a)')TRIM(header)
350 
351      do isz=1,nsizes
352        npw = sizes(isz)
353        ABI_MALLOC(cg1,(2,npw))
354        ABI_MALLOC(cg2,(2,npw))
355        cg2 = zero
356 
357        do jj=1,npw
358          cg1(:,jj) = jj
359        end do
360 
361        call cwtime(ctime,wtime,gflops,"start")
362        if (step==1) then
363          jj = 0
364          do icall=1,ncalls
365 !          call random_number(cg1)
366            call random_number(cg2(:,1:1))
367 !          cg2 = zero
368            do ii=1,npw
369              jj = jj+1
370              cg2(1,ii) = alpha(1)*cg1(1,ii) - alpha(2)*cg1(2,ii) + cg2(1,ii)
371              cg2(2,ii) = alpha(1)*cg1(2,ii) + alpha(2)*cg1(1,ii) + cg2(2,ii)
372            end do
373          end do
374        else
375          do icall=1,ncalls
376 !          call random_number(cg1)
377 !          call random_number(cg2)
378 !          cg2 = zero
379            call random_number(cg2(:,1:1))
380            call cg_zaxpy(npw,alpha,cg1,cg2)
381          end do
382        end if
383        call cwtime(ctime,wtime,gflops,"stop")
384        write(std_out,'(a,i0,2(a,f9.6))')" npw = ",npw,", cpu_time = ",ctime,", wall_time = ",wtime
385        ABI_FREE(cg1)
386        ABI_FREE(cg2)
387      end do
388 
389      header = strcat("END_BENCHMARK: ",routname)
390      write(std_out,'(a)')TRIM(header)
391    end do
392  end if
393 
394 !cg_zgemv
395  if (test_gemv) then
396    alpha = (/one,two/)
397    beta = (/zero,zero/)
398    do step=1,2
399      if (step==1) routname = " f90_zgemv"
400      if (step==2) routname = " cg_zgemv"
401      header = strcat("BEGIN_BENCHMARK: ",routname)
402      write(std_out,'(a)')TRIM(header)
403 
404      do isz=1,nsizes
405        npw = sizes(isz)
406        ABI_MALLOC(cg1,(2,npw*npw))
407        ABI_MALLOC(cg2,(2,npw))
408        ABI_MALLOC(cg3,(2,npw))
409 
410        do jj=1,npw*npw
411          cg1(:,jj) = jj
412        end do
413        cg2 = one
414        cg3 = zero
415 
416        call cwtime(ctime,wtime,gflops,"start")
417        if (step==1) then
418 !        do icall=1,ncalls
419 !        do jj=1,npw
420 !        ar=scprod(1,iband);ai=scprod(2,iband)
421 !        do ipw=1,npw_sp
422 !        cg_re=cg(1,index1+ipw)
423 !        cg_im=cg(2,index1+ipw)
424 !        direc(1,ipw)=direc(1,ipw)-ar*cg_re+ai*cg_im
425 !        direc(2,ipw)=direc(2,ipw)-ar*cg_im-ai*cg_re
426 !        end do
427 !        end do
428 !        end do
429 !        !cg3(1,ii) = alpha(1)*cg1(1,ii) - alpha(2)*cg1(2,ii) + cg2(1,ii)
430 !        !cg3(2,ii) = alpha(1)*cg1(2,ii) + alpha(2)*cg1(1,ii) + cg2(2,ii)
431 !        end do
432 !        end do
433        else
434          do icall=1,ncalls
435            call cg_zgemv("N",npw,npw,cg1,cg2,cg3)
436          end do
437        end if
438        call cwtime(ctime,wtime,gflops,"stop")
439        write(std_out,'(a,i0,2(a,f9.6))')" npw = ",npw,", cpu_time = ",ctime,", wall_time = ",wtime
440 
441        ABI_FREE(cg1)
442        ABI_FREE(cg2)
443        ABI_FREE(cg3)
444      end do
445 
446      header = strcat("END_BENCHMARK: ",routname)
447      write(std_out,'(a)')TRIM(header)
448    end do
449  end if
450 
451 !itranspose
452  if (.FALSE.) then
453 !  if (.TRUE.) then
454    do step=1,2
455      if (step==1) routname = " f90_transpose"
456      if (step==2) routname = " mkl_transponse"
457      header = strcat("BEGIN_BENCHMARK: ",routname)
458      write(std_out,'(a)')TRIM(header)
459 
460      do isz=1,nsizes
461        npw = sizes(isz)
462        ABI_MALLOC(zmat,(npw,npw))
463        ABI_MALLOC(wmat,(npw,npw))
464        do jj=1,npw
465          do ii=1,npw
466            zmat(ii,jj) = DCMPLX(ii,jj)
467          end do
468        end do
469 
470        call cwtime(ctime,wtime,gflops,"start")
471        if (step==1) then
472          do icall=1,ncalls
473 !          wmat = TRANSPOSE(zmat)
474            zmat = TRANSPOSE(zmat)
475          end do
476        else
477          do icall=1,ncalls
478 !          call sqmat_otranspose(npw,zmat,wmat)
479            call sqmat_itranspose(npw,zmat)
480          end do
481        end if
482        call cwtime(ctime,wtime,gflops,"stop")
483        write(std_out,'(a,i0,2(a,f9.6))')"npw = ",npw,", cpu_time ",ctime,", wall_time ",wtime
484 
485        ABI_FREE(zmat)
486        ABI_FREE(wmat)
487      end do
488 
489      header = strcat("END_BENCHMARK: ",routname)
490      write(std_out,'(a)')TRIM(header)
491    end do
492  end if
493 
494 !zgerc
495  if (.FALSE.) then
496    do step=1,2
497      if (step==1) routname = " f90_zgerc"
498      if (step==2) routname = " zgerc"
499      header = strcat("BEGIN_BENCHMARK: ",routname)
500      write(std_out,'(a)')TRIM(header)
501 
502      do isz=1,nsizes
503        npw  = sizes(isz)
504        ABI_MALLOC(zvec,(npw))
505        ABI_MALLOC(zmat,(npw,npw))
506        zvec = cone; zmat = czero
507 
508        call cwtime(ctime,wtime,gflops,"start")
509        if (step==1) then ! Home made zgerc
510          do it=1,ncalls
511            zmat = czero
512 !$OMP PARALLEL DO
513            do jj=1,npw
514              do ii=1,npw
515                zmat(ii,jj) = zmat(ii,jj) + CONJG(zvec(ii)) * zvec(jj)
516              end do
517            end do
518          end do
519        else
520          do jj=1,ncalls
521            zmat = czero
522            call XGERC(npw,npw,(1._dp,0._dp),zvec,1,zvec,1,zmat,npw)
523          end do
524        end if
525        call cwtime(ctime,wtime,gflops,"stop")
526        write(std_out,'(a,i0,2(a,f9.6))')" npw = ",npw,", cpu_time ",ctime,", wall_time ",wtime
527 
528        ABI_FREE(zvec)
529        ABI_FREE(zmat)
530      end do
531 
532      header = strcat("END_BENCHMARK: ",routname)
533      write(std_out,'(a)')TRIM(header)
534    end do
535 
536 !  do isz=1,nsizes
537 !  npw  = sizes(isz)
538 !  ABI_MALLOC(vec,(npw))
539 !  ABI_MALLOC(mat,(npw,npw))
540 !  vec = cone; mat = czero
541 
542 !  call cwtime(ctime,wtime,gflops,"start")
543 
544 !  do jj=1,ncalls
545 !  call XGERC(npw,npw,(1._sp,0._sp),vec,1,vec,1,mat,npw)
546 !  end do
547 
548 !  call cwtime(ctime,wtime,gflops,"stop")
549 
550 !  write(std_out,'(a,i0,2f9.3)')" CGERG size, cpu_time, wall_time, max_abserr ",npw,ctime,wtime
551 
552 !  ABI_FREE(vec)
553 !  ABI_FREE(mat)
554 !  end do
555  end if
556 
557 !xginv
558  if (.FALSE.) then
559    do_check = .FALSE.
560 !  do_check = .TRUE.
561    do ii=1,nsizes
562      npw  = sizes(ii)
563      call test_xginv(npw,skinds,do_check,Tres,comm)
564 
565      if (my_rank==master) then
566        write(std_out,'(a,i0,3f9.3)')&
567 &       " routine = xginv, size, cpu_time, wall_time, max_abserr ",Tres%msize,Tres%ctime,Tres%wtime,Tres%max_abserr
568      end if
569    end do
570  end if
571 
572 !xhpev vs xheev
573  if (.FALSE.) then
574    do step=1,2
575 !
576      do isz=1,nsizes
577        npw = sizes(isz)
578        ABI_MALLOC(ene,(npw))
579        ABI_MALLOC(evec,(npw,npw))
580 
581        ABI_MALLOC(zpmat,(npw*(npw+1)/2))
582        zpmat = czero
583        idx = 0
584        do jj=1,npw
585          do ii=1,jj
586            idx = idx + 1
587            zpmat(idx) = cone
588          end do
589        end do
590 
591        if (step==1) then
592          call cwtime(ctime,wtime,gflops,"start")
593          call xhpev("V","U",npw,zpmat,ene,evec,npw)
594        else
595          ABI_MALLOC(zmat,(npw,npw))
596          do jj=1,npw
597            do ii=1,jj
598              idx = ii + jj*(jj-1)/2
599              zmat(ii,jj) = zpmat(idx)
600            end do
601          end do
602          call cwtime(ctime,wtime,gflops,"start")
603 !        call xheev("V","U",npw,zmat,ene)
604          call xheevx("V","A","U",npw,zmat,zero,zero,1,1,zero,nfound,ene,evec,npw)
605          ABI_FREE(zmat)
606        end if
607        call cwtime(ctime,wtime,gflops,"stop")
608        write(std_out,'(a,i0,2(a,f9.6))')"EIG PROBLEM: size = ",npw,", cpu_time ",ctime,", wall_time ",wtime
609      end do
610 
611      ABI_FREE(ene)
612      ABI_FREE(evec)
613      ABI_FREE(zpmat)
614    end do
615  end if
616 !
617 !===============================
618 !=== End of run, free memory ===
619 !===============================
620  call wrtout(std_out,ch10//" Analysis completed.","COLL")
621 
622  ABI_FREE(sizes)
623 
624  call destroy_mpi_enreg(MPI_enreg)
625 
626 !Writes information on file about the memory before ending mpi module, if memory profiling is enabled
627  !ABI_ALLOCATE(nband, (2))
628  call abinit_doctor("__ioprof")
629 
630  call xmpi_end()
631 
632  end program lapackprof