TABLE OF CONTENTS


ABINIT/lapackprof [ Programs ]

[ Top ] [ Programs ]

NAME

 lapackprof

FUNCTION

  Utility for profiling Linear Algebra libraries used by Abinit.

COPYRIGHT

 Copyright (C) 2004-2024 ABINIT group (MG)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

INPUTS

  (main program)

SOURCE

 19 #if defined HAVE_CONFIG_H
 20 #include "config.h"
 21 #endif
 22 
 23 #include "abi_common.h"
 24 
 25 program lapackprof
 26 
 27  use defs_basis
 28  use m_abicore
 29  use m_xmpi
 30  use m_xomp
 31  use m_errors
 32  use m_hide_blas
 33  use m_cgtools
 34  use m_hide_lapack
 35  use m_yaml
 36 
 37  use defs_abitypes,   only : MPI_type
 38  use m_build_info,    only : abinit_version
 39  use m_fstrings,      only : lower, itoa, sjoin !, strcat
 40  use m_specialmsg,    only : specialmsg_getcount, herald
 41  use m_argparse,      only : get_arg, get_arg_list, get_start_step_num
 42  use m_time,          only : cwtime
 43  use m_io_tools,      only : prompt
 44  use m_numeric_tools, only : arth
 45  use m_mpinfo,        only : init_mpi_enreg, destroy_mpi_enreg
 46 
 47  implicit none
 48 
 49 !Local variables-------------------------------
 50 !scalars
 51  integer,parameter :: master = 0
 52  integer :: comm, npw, my_rank, ii, isz, jj, it, step, icall, nfound, nspinor !ierr,
 53  integer :: istwfk, mcg, mgsc, band, g0, idx, ortalgo, abimem_level, prtvol, usepaw, debug, me_g0
 54  real(dp) ::  ctime, wtime, gflops, abimem_limit_mb, max_absimag
 55  !logical :: do_check
 56  character(len=500) :: method, command, arg, msg !header,
 57  type(MPI_type) :: MPI_enreg
 58  type(yamldoc_t) :: ydoc
 59 !arrays
 60  integer,allocatable :: sizes(:)
 61  real(dp) :: alpha(2), beta(2) ,dot(2)
 62  real(dp),allocatable :: cg(:,:), gsc(:,:), ortho_check(:,:,:)
 63  real(dp),allocatable :: cg1(:,:), cg2(:,:), cg3(:,:), ene(:), direc(:,:), scprod(:,:)
 64  complex(dpc),allocatable :: zvec(:), zmat(:,:), wmat(:,:), zpmat(:), evec(:,:)
 65 ! complex(spc),allocatable :: vec(:), mat(:,:)
 66  !type(latime_t) :: Tres
 67  integer :: ncalls, nband, nsizes, nthreads
 68  integer :: npw_start_step_num(3)
 69 
 70 ! *************************************************************************
 71 
 72  ! Change communicator for I/O (mandatory!)
 73  call abi_io_redirect(new_io_comm=xmpi_world)
 74 
 75  call xmpi_init()
 76  comm  = xmpi_world; my_rank = xmpi_comm_rank(comm)
 77 
 78  ! Initialize memory profiling if it is activated
 79  ! if a full abimem.mocc report is desired, set the argument of abimem_init to "2" instead of "0"
 80  ! note that abimem.mocc files can easily be multiple GB in size so don't use this option normally
 81  ABI_CHECK(get_arg("abimem-level", abimem_level, msg, default=0) == 0, msg)
 82  ABI_CHECK(get_arg("abimem-limit-mb", abimem_limit_mb, msg, default=20.0_dp) == 0, msg)
 83 #ifdef HAVE_MEM_PROFILING
 84  call abimem_init(abimem_level, limit_mb=abimem_limit_mb)
 85 #endif
 86 
 87  call herald("LAPACKPROF", abinit_version, std_out)
 88 
 89  ! Command line options.
 90  do ii=1,command_argument_count()
 91    call get_command_argument(ii, arg)
 92    if (arg == "-v" .or. arg == "--version") then
 93      write(std_out,"(a)") trim(abinit_version); goto 100
 94 
 95    else if (arg == "-h" .or. arg == "--help") then
 96      ! TODO: Document the different options.
 97      write(std_out,*)"-v, --version              Show version number and exit."
 98      write(std_out,*)"-h, --help                 Show this help and exit."
 99      write(std_out,*)" "
100      write(std_out,*)"=== Options for developers ==="
101      write(std_out,*)" "
102      write(std_out,*)"test_v1complete FILE [--symv1scf 1] [--potfile foo.nc]"
103      goto 100
104    end if
105  end do
106 
107  call get_command_argument(1, command)
108  ABI_CHECK(get_arg("prtvol", prtvol, msg, default=0) == 0, msg)
109  ABI_CHECK(get_arg("istwfk", istwfk, msg, default=1) == 0, msg)
110  ABI_CHECK(get_arg("usepaw", usepaw, msg, default=0) == 0, msg)
111  ABI_CHECK(get_arg("ncalls", ncalls, msg, default=5) == 0, msg)
112  ABI_CHECK(get_arg("nband", nband, msg, default=50) == 0, msg)
113  ABI_CHECK(get_arg("nspinor", nspinor, msg, default=1) == 0, msg)
114  ABI_CHECK(get_arg("nthreads", nthreads, msg, default=1) == 0, msg)
115  ABI_CHECK(get_arg("debug", nthreads, msg, default=0) == 0, msg)
116  ABI_CHECK(get_start_step_num("npw", npw_start_step_num, msg, default=[1000, 2000, 20]) == 0, msg)
117 
118  if (my_rank == master) write(std_out,'(a)')" Tool for profiling and testing Linear Algebra routines used in ABINIT."
119 
120  call xomp_set_num_threads(nthreads)
121  call xomp_show_info(std_out)
122  call init_mpi_enreg(mpi_enreg)
123  me_g0 = mpi_enreg%me_g0
124 
125  ! Output metadata i.e parameters that do not change during the benchmark.
126  ydoc = yamldoc_open('LapackProfMetadata') !, info=info, width=width)
127  call ydoc%add_ints("istwfk, usepaw, ncalls, nband, nspinor, nthreads", &
128                      [istwfk, usepaw, ncalls, nband, nspinor, nthreads] &
129                     ) !, int_fmt, width, dict_key, multiline_trig, ignore)
130  call ydoc%write_and_free(std_out)
131 
132  nsizes = npw_start_step_num(3)
133  ABI_MALLOC(sizes, (nsizes))
134  sizes = arth(npw_start_step_num(1), npw_start_step_num(2), nsizes)
135 
136  select case (command)
137  case ("projbd")
138    write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command))
139    write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "nband", "cpu_time", "wall_time"
140 
141    do isz=1,nsizes
142      npw = sizes(isz)
143      mcg = npw * nband; mgsc = mcg * usepaw
144      ABI_MALLOC_RAND(cg, (2, mcg))
145      ABI_MALLOC(gsc, (2, mgsc))
146      gsc = cg
147      ABI_CALLOC(direc, (2, npw*nspinor))
148      ABI_MALLOC(scprod, (2, nband))
149 
150      call cwtime(ctime,wtime,gflops,"start")
151      call projbd(cg, direc, 0, 0, 0, istwfk, mcg, mgsc, nband, npw, nspinor, gsc, scprod, 0, 0, usepaw, &
152                  me_g0, mpi_enreg%comm_fft)
153      call cwtime(ctime,wtime,gflops,"stop")
154      write(std_out,'(1x,i8,1x,i6,2(1x,f12.6))') npw, nband, ctime, wtime
155 
156      ABI_FREE(scprod)
157      ABI_FREE(direc)
158      ABI_FREE(cg)
159      ABI_FREE(gsc)
160    end do
161 
162    write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ", command))
163 
164  case ("pw_orthon")
165    write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ",command))
166    write(std_out, "(a1,a8,1x,a6,1x,a7,2(1x,a12))")"#", "npw", "nband", "ortalgo", "cpu_time", "wall_time"
167 
168    do ortalgo=0,4,1
169      do isz=1,nsizes
170        npw = sizes(isz)
171        mcg  = npw * nband
172        mgsc = mcg * usepaw
173        ABI_MALLOC_RAND(cg, (2, mcg))
174        call cg_set_imag0_to_zero(istwfk, me_g0, npw, nband, cg, max_absimag)
175        ABI_MALLOC(gsc, (2, mgsc))
176        gsc = cg
177 
178        call cwtime(ctime, wtime, gflops, "start")
179        call pw_orthon(0,0, istwfk, mcg, mgsc, npw, nband, ortalgo, gsc, usepaw, cg,&
180                       me_g0, mpi_enreg%comm_bandspinorfft)
181        call cwtime(ctime,wtime,gflops,"stop")
182        write(std_out,'(1x,i8,1x,i6,1x,i7,2(1x,f12.6))') npw, nband, ortalgo, ctime, wtime
183 
184        if (debug == 1) then
185          ABI_MALLOC(ortho_check,(2, nband, nband))
186          if (istwfk/=1) then
187            do band=1,nband
188              g0 = 1 + (band-1)*npw
189              cg(:,g0) = half * cg(:,g0)
190            end do
191          end if
192          call cg_zgemm("C", "N", npw, nband, nband, cg, cg, ortho_check)
193          if (istwfk/=1) ortho_check = two * ortho_check
194          do band=1,nband
195            ortho_check(1,band,band) = ortho_check(1,band,band) - one
196          end do
197          write(std_out,*)"DEBUG: Max Abs error:",MAXVAL( ABS(RESHAPE(ortho_check, [2*nband*nband])))
198          ABI_FREE(ortho_check)
199        end if
200 
201        ABI_FREE(cg)
202        ABI_FREE(gsc)
203      end do ! isz
204      write(std_out,'(a)')trim(sjoin("# end ortalgo: ", itoa(ortalgo)))
205    end do ! ortalgo
206    write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ", command))
207 
208  case ("copy")
209    write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command))
210    write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time"
211    do step=1,2
212      if (step == 1) method = "F90"
213      if (step == 2) method = "BLAS"
214      do isz=1,nsizes
215        npw  = sizes(isz)
216        ABI_CALLOC(cg1, (2, npw))
217        ABI_MALLOC(cg2, (2, npw))
218 
219        call cwtime(ctime, wtime, gflops, "start")
220        if (step==1) then
221          do ii=1,ncalls
222            do jj=1,npw
223              cg2(:,jj) = cg1(:,jj)
224            end do
225          end do
226        else
227          do ii=1,ncalls
228            call cg_zcopy(npw, cg1, cg2)
229          end do
230        end if
231        call cwtime(ctime, wtime, gflops, "stop")
232        write(std_out,'(1x,i8,1x,a6,2(1x,f12.6))')npw, trim(method), ctime, wtime
233 
234        ABI_FREE(cg1)
235        ABI_FREE(cg2)
236      end do
237 
238      write(std_out,'(a)')trim(sjoin("# end method: ", method))
239    end do
240    write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ", command))
241 
242  case ("zdotc")
243    write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command))
244    write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time"
245    do step=1,2
246      if (step == 1) method = " F90"
247      if (step == 2) method = " BLAS"
248      do isz=1,nsizes
249        npw  = sizes(isz)
250        ABI_MALLOC_RAND(cg1,(2, npw))
251        ABI_MALLOC_RAND(cg2,(2, npw))
252 
253        call cwtime(ctime, wtime, gflops, "start")
254        if (step == 1) then
255          do jj=1,ncalls
256            dot = zero
257 !$OMP PARALLEL DO REDUCTION(+:dot)
258            do ii=1,npw
259              dot(1) = dot(1) + cg1(1,ii)*cg2(1,ii) + cg1(2,ii)*cg2(2,ii)
260              dot(2) = dot(2) + cg1(1,ii)*cg2(2,ii) - cg1(2,ii)*cg2(1,ii)
261            end do
262          end do
263        else
264          do ii=1,ncalls
265            dot = cg_zdotc(npw,cg1,cg2)
266          end do
267        end if
268        call cwtime(ctime, wtime, gflops, "stop")
269        write(std_out,'(1x,i8,1x,a6,2(1x,f12.6))')npw, trim(method), ctime, wtime
270        ABI_FREE(cg1)
271        ABI_FREE(cg2)
272      end do
273      write(std_out,'(a)')trim(sjoin("# end method: ", method))
274    end do
275    write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ",method))
276 
277  case ("axpy")
278    write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command))
279    write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time"
280    alpha = [one, two]
281    do step=1,2
282      if (step == 1) method = "F90"
283      if (step == 2) method = "BLAS"
284      do isz=1,nsizes
285        npw = sizes(isz)
286        ABI_MALLOC(cg1, (2, npw))
287        ABI_MALLOC(cg2, (2, npw))
288        cg2 = zero
289 
290        do jj=1,npw
291          cg1(:,jj) = jj
292        end do
293 
294        call cwtime(ctime, wtime, gflops, "start")
295        if (step == 1) then
296          jj = 0
297          do icall=1,ncalls
298            !call random_number(cg1)
299            call random_number(cg2(:,1:1))
300            !cg2 = zero
301            do ii=1,npw
302              jj = jj+1
303              cg2(1,ii) = alpha(1)*cg1(1,ii) - alpha(2)*cg1(2,ii) + cg2(1,ii)
304              cg2(2,ii) = alpha(1)*cg1(2,ii) + alpha(2)*cg1(1,ii) + cg2(2,ii)
305            end do
306          end do
307        else
308          do icall=1,ncalls
309            !call random_number(cg1)
310            !call random_number(cg2)
311            !cg2 = zero
312            call random_number(cg2(:,1:1))
313            call cg_zaxpy(npw, alpha, cg1, cg2)
314          end do
315        end if
316        call cwtime(ctime, wtime, gflops, "stop")
317        write(std_out,'(1x,i8,1x,a6,2(1x,f12.6))')npw, trim(method), ctime, wtime
318        ABI_FREE(cg1)
319        ABI_FREE(cg2)
320      end do
321      write(std_out,'(a)')trim(sjoin("# end method: ", method))
322    end do
323    write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ",command))
324 
325  case ("zgemv")
326    write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command))
327    write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time"
328    alpha = [one, two]
329    beta = [zero, zero]
330    do step=1,2
331      if (step == 1) method = "F90"
332      if (step == 2) method = "BLAS"
333      !write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ",method))
334 
335      do isz=1,nsizes
336        npw = sizes(isz)
337        ABI_MALLOC(cg1, (2, npw*npw))
338        ABI_MALLOC(cg2, (2, npw))
339        ABI_MALLOC(cg3, (2, npw))
340 
341        do jj=1,npw*npw
342          cg1(:,jj) = jj
343        end do
344        cg2 = one
345        cg3 = zero
346 
347        call cwtime(ctime, wtime, gflops, "start")
348        if (step == 1) then
349          !do icall=1,ncalls
350          !do jj=1,npw
351          !ar=scprod(1,iband);ai=scprod(2,iband)
352          !do ipw=1,npw_sp
353          !cg_re=cg(1,index1+ipw)
354          !cg_im=cg(2,index1+ipw)
355          !direc(1,ipw)=direc(1,ipw)-ar*cg_re+ai*cg_im
356          !direc(2,ipw)=direc(2,ipw)-ar*cg_im-ai*cg_re
357          !end do
358          !end do
359          !end do
360          !!cg3(1,ii) = alpha(1)*cg1(1,ii) - alpha(2)*cg1(2,ii) + cg2(1,ii)
361          !!cg3(2,ii) = alpha(1)*cg1(2,ii) + alpha(2)*cg1(1,ii) + cg2(2,ii)
362          !end do
363          !end do
364        else
365          do icall=1,ncalls
366            call cg_zgemv("N", npw, npw, cg1, cg2, cg3)
367          end do
368        end if
369        call cwtime(ctime, wtime, gflops, "stop")
370        write(std_out,'(1x,i8,1x,a6,2(1x,f12.6))')npw, trim(method), ctime, wtime
371 
372        ABI_FREE(cg1)
373        ABI_FREE(cg2)
374        ABI_FREE(cg3)
375      end do
376      write(std_out,'(a)')trim(sjoin("# end method: ", method))
377    end do
378    write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ",command))
379 
380  case ("itranspose", "otranspose")
381    write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ",command))
382    write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time"
383 
384    do step=1,2
385      if (step == 1) method = "F90"
386      if (step == 2) method = "MKL"
387      do isz=1,nsizes
388        npw = sizes(isz)
389        ABI_CALLOC(zmat, (npw, npw))
390        ABI_CALLOC(wmat, (npw, npw))
391 
392        call cwtime(ctime, wtime, gflops, "start")
393        if (step == 1) then
394          do icall=1,ncalls
395            zmat = transpose(zmat)
396          end do
397        else
398          do icall=1,ncalls
399            select case (command)
400            case ("otranspose")
401              call sqmat_otranspose(npw, zmat, wmat)
402            case ("itranspose")
403              call sqmat_itranspose(npw, zmat)
404            end select
405          end do
406        end if
407        call cwtime(ctime, wtime, gflops, "stop")
408        write(std_out,'(1x,i8,1x,a6,2(1x,f12.6))')npw, trim(method), ctime, wtime
409 
410        ABI_FREE(zmat)
411        ABI_FREE(wmat)
412      end do
413      write(std_out,'(a)')trim(sjoin("# end method: ", method))
414    end do
415    write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ",command))
416 
417   case ("zgemm3m")
418     write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command))
419     write(std_out, "(a1,2(a8,1x),a6,2(1x,a12))")"#", "npw", "nband", "type", "cpu_time", "wall_time"
420     do step=1,2
421       if (step == 1) method = "ZGEMM"
422       if (step == 2) method = "ZGEMM3m"
423       do isz=1,nsizes
424         npw  = sizes(isz)
425         ABI_MALLOC_RAND(cg1, (2, npw*nband))
426         ABI_MALLOC_RAND(cg2, (2, npw*nband))
427         ABI_MALLOC_RAND(cg3, (2, nband*nband))
428 
429         call cwtime(ctime, wtime, gflops, "start")
430         if (step == 1) then
431           call ZGEMM("C", "N", nband, nband, npw, cone, cg1, npw, cg2, npw, czero, cg3, nband)
432         else
433 #ifdef HAVE_LINALG_GEMM3M
434           call ZGEMM3M("C", "N", nband, nband, npw, cone, cg1, npw, cg2, npw, czero, cg3, nband)
435 #else
436           ABI_ERROR("ZGEMM3M is not available")
437 #endif
438         end if
439         call cwtime(ctime, wtime, gflops, "stop")
440         write(std_out,'(1x,2(i8,1x),a6,2(1x,f12.6))')npw, nband, trim(method), ctime, wtime
441 
442        ABI_FREE(cg1)
443        ABI_FREE(cg2)
444        ABI_FREE(cg3)
445       end do
446     end do
447 
448   case ("zgemmt")
449     write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command))
450     write(std_out, "(a1,2(a8,1x),a6,2(1x,a12))")"#", "npw", "nband", "type", "cpu_time", "wall_time"
451     do step=1,2
452       if (step == 1) method = "ZGEMM"
453       if (step == 2) method = "ZGEMMT"
454       do isz=1,nsizes
455         npw  = sizes(isz)
456         ABI_MALLOC_RAND(cg1, (2, npw*nband))
457         ABI_MALLOC_RAND(cg2, (2, npw*nband))
458         ABI_MALLOC_RAND(cg3, (2, nband*nband))
459 
460         call cwtime(ctime, wtime, gflops, "start")
461         if (step == 1) then
462           call ZGEMM("C", "N", nband, nband, npw, cone, cg1, npw, cg2, npw, czero, cg3, nband)
463         else
464 #ifdef HAVE_LINALG_GEMMT
465           call ZGEMMT("U", "C", "N", nband, npw, cone, cg1, npw, cg2, npw, czero, cg3, nband)
466 #else
467           ABI_ERROR("ZGEMMT is not available")
468 #endif
469         end if
470         call cwtime(ctime, wtime, gflops, "stop")
471         write(std_out,'(1x,2(i8,1x),a6,2(1x,f12.6))')npw, nband, trim(method), ctime, wtime
472 
473        ABI_FREE(cg1)
474        ABI_FREE(cg2)
475        ABI_FREE(cg3)
476       end do
477     end do
478 
479   case ("zherk_vs_zgemm")
480     write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command))
481     write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time"
482     do step=1,2
483       if (step == 1) method = "ZGEMM"
484       if (step == 2) method = "ZHERK"
485       do isz=1,nsizes
486         npw  = sizes(isz)
487         ABI_MALLOC_RAND(cg1, (2, npw*nband))
488         ABI_MALLOC_RAND(cg3, (2, nband*nband))
489 
490         call cwtime(ctime, wtime, gflops, "start")
491         if (step == 1) then
492           call ZGEMM("C", "N", nband, nband, npw, cone, cg1, npw, cg1, npw, czero, cg3, nband)
493         else
494           call ZHERK("U", "C", nband, npw, cone, cg1, npw, czero, cg3, nband)
495         end if
496         call cwtime(ctime, wtime, gflops, "stop")
497         write(std_out,'(1x,2(i8,1x),a6,2(1x,f12.6))')npw, nband, trim(method), ctime, wtime
498 
499        ABI_FREE(cg1)
500        ABI_FREE(cg3)
501       end do
502     end do
503 
504   case ("zgerc")
505     write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command))
506     write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time"
507     do step=1,2
508       if (step == 1) method = "F90"
509       if (step == 2) method = "BLAS"
510       do isz=1,nsizes
511         npw  = sizes(isz)
512         ABI_MALLOC(zvec, (npw))
513         ABI_MALLOC(zmat, (npw, npw))
514         zvec = cone; zmat = czero
515 
516         call cwtime(ctime, wtime, gflops, "start")
517         if (step == 1) then ! Home made zgerc
518           do it=1,ncalls
519             zmat = czero
520 !$OMP PARALLEL DO
521              do jj=1,npw
522                do ii=1,npw
523                  zmat(ii,jj) = zmat(ii,jj) + CONJG(zvec(ii)) * zvec(jj)
524                end do
525              end do
526            end do
527         else
528           do jj=1,ncalls
529             zmat = czero
530             call XGERC(npw,npw,(1._dp,0._dp),zvec,1,zvec,1,zmat,npw)
531           end do
532         end if
533         call cwtime(ctime, wtime, gflops, "stop")
534         write(std_out,'(1x,i8,1x,a6,2(1x,f12.6))')npw, trim(method), ctime, wtime
535 
536         ABI_FREE(zvec)
537         ABI_FREE(zmat)
538       end do
539 
540       write(std_out,'(a)')trim(sjoin("# end method: ", method))
541     end do
542     write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ", command))
543 
544     !do isz=1,nsizes
545     !npw  = sizes(isz)
546     !ABI_MALLOC(vec,(npw))
547     !ABI_MALLOC(mat,(npw,npw))
548     !vec = cone; mat = czero
549 
550     !call cwtime(ctime,wtime,gflops,"start")
551 
552     !do jj=1,ncalls
553     !call XGERC(npw,npw,(1._sp,0._sp),vec,1,vec,1,mat,npw)
554     !end do
555 
556     !call cwtime(ctime,wtime,gflops,"stop")
557 
558     !write(std_out,'(a,i0,2f9.3)')" CGERG size, cpu_time, wall_time, max_abserr ",npw,ctime,wtime
559 
560     !ABI_FREE(vec)
561     !ABI_FREE(mat)
562     !end do
563 
564  !case ("xginv")
565  !  do_check = .FALSE.
566  !  !do_check = .TRUE.
567  !  do ii=1,nsizes
568  !    npw  = sizes(ii)
569  !    call test_xginv(npw, skinds, do_check, Tres, comm)
570 
571  !    if (my_rank==master) then
572  !      write(std_out,'(a,i0,3f9.3)')&
573  !       " routine = xginv, size, cpu_time, wall_time, max_abserr ",Tres%msize,Tres%ctime,Tres%wtime,Tres%max_abserr
574  !    end if
575  !  end do
576 
577  case ("xhpev_vs_xheev")
578    write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command))
579    write(std_out, "(a1,a8,1x,a9,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time"
580 
581    do step=1,2
582      if (step == 1) method = "PACK"
583      if (step == 2) method = "NOPACK"
584      do isz=1,nsizes
585        npw = sizes(isz)
586        ABI_REMALLOC(ene, (npw))
587        ABI_REMALLOC(evec, (npw, npw))
588        ABI_REMALLOC(zpmat, (npw*(npw+1)/2))
589        zpmat = czero
590        idx = 0
591        do jj=1,npw
592          do ii=1,jj
593            idx = idx + 1
594            zpmat(idx) = cone
595          end do
596        end do
597 
598        if (step == 1) then
599          call cwtime(ctime,wtime,gflops,"start")
600          call xhpev("V", "U", npw, zpmat, ene, evec, npw)
601        else
602          ABI_MALLOC(zmat, (npw, npw))
603          do jj=1,npw
604            do ii=1,jj
605              idx = ii + jj*(jj-1)/2
606              zmat(ii,jj) = zpmat(idx)
607            end do
608          end do
609          call cwtime(ctime, wtime, gflops, "start")
610          !call xheev("V","U",npw,zmat,ene)
611          call xheevx("V","A","U",npw,zmat,zero,zero,1,1,zero,nfound,ene,evec,npw)
612          ABI_FREE(zmat)
613        end if
614        call cwtime(ctime, wtime, gflops, "stop")
615        write(std_out,'(1x,i8,1x,a8,2(1x,f12.6))')npw, trim(method), ctime, wtime
616      end do
617 
618      ABI_FREE(ene)
619      ABI_FREE(evec)
620      ABI_FREE(zpmat)
621      write(std_out,'(a)')trim(sjoin("# end method: ", method))
622    end do
623    write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ",command))
624 
625  case default
626    ABI_ERROR(sjoin("Wrong command:", command))
627  end select
628 
629  ABI_FREE(sizes)
630  call destroy_mpi_enreg(MPI_enreg)
631 
632  call wrtout(std_out, ch10//" Analysis completed.")
633  call abinit_doctor("__lapackprof")
634 
635 100 call xmpi_end()
636 
637  end program lapackprof