TABLE OF CONTENTS
ABINIT/lapackprof [ Programs ]
NAME
lapackprof
FUNCTION
Utility for profiling Linear Algebra libraries used by Abinit.
COPYRIGHT
Copyright (C) 2004-2024 ABINIT group (MG) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
INPUTS
(main program)
SOURCE
19 #if defined HAVE_CONFIG_H 20 #include "config.h" 21 #endif 22 23 #include "abi_common.h" 24 25 program lapackprof 26 27 use defs_basis 28 use m_abicore 29 use m_xmpi 30 use m_xomp 31 use m_errors 32 use m_hide_blas 33 use m_cgtools 34 use m_hide_lapack 35 use m_yaml 36 37 use defs_abitypes, only : MPI_type 38 use m_build_info, only : abinit_version 39 use m_fstrings, only : lower, itoa, sjoin !, strcat 40 use m_specialmsg, only : specialmsg_getcount, herald 41 use m_argparse, only : get_arg, get_arg_list, get_start_step_num 42 use m_time, only : cwtime 43 use m_io_tools, only : prompt 44 use m_numeric_tools, only : arth 45 use m_mpinfo, only : init_mpi_enreg, destroy_mpi_enreg 46 47 implicit none 48 49 !Local variables------------------------------- 50 !scalars 51 integer,parameter :: master = 0 52 integer :: comm, npw, my_rank, ii, isz, jj, it, step, icall, nfound, nspinor !ierr, 53 integer :: istwfk, mcg, mgsc, band, g0, idx, ortalgo, abimem_level, prtvol, usepaw, debug, me_g0 54 real(dp) :: ctime, wtime, gflops, abimem_limit_mb, max_absimag 55 !logical :: do_check 56 character(len=500) :: method, command, arg, msg !header, 57 type(MPI_type) :: MPI_enreg 58 type(yamldoc_t) :: ydoc 59 !arrays 60 integer,allocatable :: sizes(:) 61 real(dp) :: alpha(2), beta(2) ,dot(2) 62 real(dp),allocatable :: cg(:,:), gsc(:,:), ortho_check(:,:,:) 63 real(dp),allocatable :: cg1(:,:), cg2(:,:), cg3(:,:), ene(:), direc(:,:), scprod(:,:) 64 complex(dpc),allocatable :: zvec(:), zmat(:,:), wmat(:,:), zpmat(:), evec(:,:) 65 ! complex(spc),allocatable :: vec(:), mat(:,:) 66 !type(latime_t) :: Tres 67 integer :: ncalls, nband, nsizes, nthreads 68 integer :: npw_start_step_num(3) 69 70 ! ************************************************************************* 71 72 ! Change communicator for I/O (mandatory!) 73 call abi_io_redirect(new_io_comm=xmpi_world) 74 75 call xmpi_init() 76 comm = xmpi_world; my_rank = xmpi_comm_rank(comm) 77 78 ! Initialize memory profiling if it is activated 79 ! if a full abimem.mocc report is desired, set the argument of abimem_init to "2" instead of "0" 80 ! note that abimem.mocc files can easily be multiple GB in size so don't use this option normally 81 ABI_CHECK(get_arg("abimem-level", abimem_level, msg, default=0) == 0, msg) 82 ABI_CHECK(get_arg("abimem-limit-mb", abimem_limit_mb, msg, default=20.0_dp) == 0, msg) 83 #ifdef HAVE_MEM_PROFILING 84 call abimem_init(abimem_level, limit_mb=abimem_limit_mb) 85 #endif 86 87 call herald("LAPACKPROF", abinit_version, std_out) 88 89 ! Command line options. 90 do ii=1,command_argument_count() 91 call get_command_argument(ii, arg) 92 if (arg == "-v" .or. arg == "--version") then 93 write(std_out,"(a)") trim(abinit_version); goto 100 94 95 else if (arg == "-h" .or. arg == "--help") then 96 ! TODO: Document the different options. 97 write(std_out,*)"-v, --version Show version number and exit." 98 write(std_out,*)"-h, --help Show this help and exit." 99 write(std_out,*)" " 100 write(std_out,*)"=== Options for developers ===" 101 write(std_out,*)" " 102 write(std_out,*)"test_v1complete FILE [--symv1scf 1] [--potfile foo.nc]" 103 goto 100 104 end if 105 end do 106 107 call get_command_argument(1, command) 108 ABI_CHECK(get_arg("prtvol", prtvol, msg, default=0) == 0, msg) 109 ABI_CHECK(get_arg("istwfk", istwfk, msg, default=1) == 0, msg) 110 ABI_CHECK(get_arg("usepaw", usepaw, msg, default=0) == 0, msg) 111 ABI_CHECK(get_arg("ncalls", ncalls, msg, default=5) == 0, msg) 112 ABI_CHECK(get_arg("nband", nband, msg, default=50) == 0, msg) 113 ABI_CHECK(get_arg("nspinor", nspinor, msg, default=1) == 0, msg) 114 ABI_CHECK(get_arg("nthreads", nthreads, msg, default=1) == 0, msg) 115 ABI_CHECK(get_arg("debug", nthreads, msg, default=0) == 0, msg) 116 ABI_CHECK(get_start_step_num("npw", npw_start_step_num, msg, default=[1000, 2000, 20]) == 0, msg) 117 118 if (my_rank == master) write(std_out,'(a)')" Tool for profiling and testing Linear Algebra routines used in ABINIT." 119 120 call xomp_set_num_threads(nthreads) 121 call xomp_show_info(std_out) 122 call init_mpi_enreg(mpi_enreg) 123 me_g0 = mpi_enreg%me_g0 124 125 ! Output metadata i.e parameters that do not change during the benchmark. 126 ydoc = yamldoc_open('LapackProfMetadata') !, info=info, width=width) 127 call ydoc%add_ints("istwfk, usepaw, ncalls, nband, nspinor, nthreads", & 128 [istwfk, usepaw, ncalls, nband, nspinor, nthreads] & 129 ) !, int_fmt, width, dict_key, multiline_trig, ignore) 130 call ydoc%write_and_free(std_out) 131 132 nsizes = npw_start_step_num(3) 133 ABI_MALLOC(sizes, (nsizes)) 134 sizes = arth(npw_start_step_num(1), npw_start_step_num(2), nsizes) 135 136 select case (command) 137 case ("projbd") 138 write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command)) 139 write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "nband", "cpu_time", "wall_time" 140 141 do isz=1,nsizes 142 npw = sizes(isz) 143 mcg = npw * nband; mgsc = mcg * usepaw 144 ABI_MALLOC_RAND(cg, (2, mcg)) 145 ABI_MALLOC(gsc, (2, mgsc)) 146 gsc = cg 147 ABI_CALLOC(direc, (2, npw*nspinor)) 148 ABI_MALLOC(scprod, (2, nband)) 149 150 call cwtime(ctime,wtime,gflops,"start") 151 call projbd(cg, direc, 0, 0, 0, istwfk, mcg, mgsc, nband, npw, nspinor, gsc, scprod, 0, 0, usepaw, & 152 me_g0, mpi_enreg%comm_fft) 153 call cwtime(ctime,wtime,gflops,"stop") 154 write(std_out,'(1x,i8,1x,i6,2(1x,f12.6))') npw, nband, ctime, wtime 155 156 ABI_FREE(scprod) 157 ABI_FREE(direc) 158 ABI_FREE(cg) 159 ABI_FREE(gsc) 160 end do 161 162 write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ", command)) 163 164 case ("pw_orthon") 165 write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ",command)) 166 write(std_out, "(a1,a8,1x,a6,1x,a7,2(1x,a12))")"#", "npw", "nband", "ortalgo", "cpu_time", "wall_time" 167 168 do ortalgo=0,4,1 169 do isz=1,nsizes 170 npw = sizes(isz) 171 mcg = npw * nband 172 mgsc = mcg * usepaw 173 ABI_MALLOC_RAND(cg, (2, mcg)) 174 call cg_set_imag0_to_zero(istwfk, me_g0, npw, nband, cg, max_absimag) 175 ABI_MALLOC(gsc, (2, mgsc)) 176 gsc = cg 177 178 call cwtime(ctime, wtime, gflops, "start") 179 call pw_orthon(0,0, istwfk, mcg, mgsc, npw, nband, ortalgo, gsc, usepaw, cg,& 180 me_g0, mpi_enreg%comm_bandspinorfft) 181 call cwtime(ctime,wtime,gflops,"stop") 182 write(std_out,'(1x,i8,1x,i6,1x,i7,2(1x,f12.6))') npw, nband, ortalgo, ctime, wtime 183 184 if (debug == 1) then 185 ABI_MALLOC(ortho_check,(2, nband, nband)) 186 if (istwfk/=1) then 187 do band=1,nband 188 g0 = 1 + (band-1)*npw 189 cg(:,g0) = half * cg(:,g0) 190 end do 191 end if 192 call cg_zgemm("C", "N", npw, nband, nband, cg, cg, ortho_check) 193 if (istwfk/=1) ortho_check = two * ortho_check 194 do band=1,nband 195 ortho_check(1,band,band) = ortho_check(1,band,band) - one 196 end do 197 write(std_out,*)"DEBUG: Max Abs error:",MAXVAL( ABS(RESHAPE(ortho_check, [2*nband*nband]))) 198 ABI_FREE(ortho_check) 199 end if 200 201 ABI_FREE(cg) 202 ABI_FREE(gsc) 203 end do ! isz 204 write(std_out,'(a)')trim(sjoin("# end ortalgo: ", itoa(ortalgo))) 205 end do ! ortalgo 206 write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ", command)) 207 208 case ("copy") 209 write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command)) 210 write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time" 211 do step=1,2 212 if (step == 1) method = "F90" 213 if (step == 2) method = "BLAS" 214 do isz=1,nsizes 215 npw = sizes(isz) 216 ABI_CALLOC(cg1, (2, npw)) 217 ABI_MALLOC(cg2, (2, npw)) 218 219 call cwtime(ctime, wtime, gflops, "start") 220 if (step==1) then 221 do ii=1,ncalls 222 do jj=1,npw 223 cg2(:,jj) = cg1(:,jj) 224 end do 225 end do 226 else 227 do ii=1,ncalls 228 call cg_zcopy(npw, cg1, cg2) 229 end do 230 end if 231 call cwtime(ctime, wtime, gflops, "stop") 232 write(std_out,'(1x,i8,1x,a6,2(1x,f12.6))')npw, trim(method), ctime, wtime 233 234 ABI_FREE(cg1) 235 ABI_FREE(cg2) 236 end do 237 238 write(std_out,'(a)')trim(sjoin("# end method: ", method)) 239 end do 240 write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ", command)) 241 242 case ("zdotc") 243 write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command)) 244 write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time" 245 do step=1,2 246 if (step == 1) method = " F90" 247 if (step == 2) method = " BLAS" 248 do isz=1,nsizes 249 npw = sizes(isz) 250 ABI_MALLOC_RAND(cg1,(2, npw)) 251 ABI_MALLOC_RAND(cg2,(2, npw)) 252 253 call cwtime(ctime, wtime, gflops, "start") 254 if (step == 1) then 255 do jj=1,ncalls 256 dot = zero 257 !$OMP PARALLEL DO REDUCTION(+:dot) 258 do ii=1,npw 259 dot(1) = dot(1) + cg1(1,ii)*cg2(1,ii) + cg1(2,ii)*cg2(2,ii) 260 dot(2) = dot(2) + cg1(1,ii)*cg2(2,ii) - cg1(2,ii)*cg2(1,ii) 261 end do 262 end do 263 else 264 do ii=1,ncalls 265 dot = cg_zdotc(npw,cg1,cg2) 266 end do 267 end if 268 call cwtime(ctime, wtime, gflops, "stop") 269 write(std_out,'(1x,i8,1x,a6,2(1x,f12.6))')npw, trim(method), ctime, wtime 270 ABI_FREE(cg1) 271 ABI_FREE(cg2) 272 end do 273 write(std_out,'(a)')trim(sjoin("# end method: ", method)) 274 end do 275 write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ",method)) 276 277 case ("axpy") 278 write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command)) 279 write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time" 280 alpha = [one, two] 281 do step=1,2 282 if (step == 1) method = "F90" 283 if (step == 2) method = "BLAS" 284 do isz=1,nsizes 285 npw = sizes(isz) 286 ABI_MALLOC(cg1, (2, npw)) 287 ABI_MALLOC(cg2, (2, npw)) 288 cg2 = zero 289 290 do jj=1,npw 291 cg1(:,jj) = jj 292 end do 293 294 call cwtime(ctime, wtime, gflops, "start") 295 if (step == 1) then 296 jj = 0 297 do icall=1,ncalls 298 !call random_number(cg1) 299 call random_number(cg2(:,1:1)) 300 !cg2 = zero 301 do ii=1,npw 302 jj = jj+1 303 cg2(1,ii) = alpha(1)*cg1(1,ii) - alpha(2)*cg1(2,ii) + cg2(1,ii) 304 cg2(2,ii) = alpha(1)*cg1(2,ii) + alpha(2)*cg1(1,ii) + cg2(2,ii) 305 end do 306 end do 307 else 308 do icall=1,ncalls 309 !call random_number(cg1) 310 !call random_number(cg2) 311 !cg2 = zero 312 call random_number(cg2(:,1:1)) 313 call cg_zaxpy(npw, alpha, cg1, cg2) 314 end do 315 end if 316 call cwtime(ctime, wtime, gflops, "stop") 317 write(std_out,'(1x,i8,1x,a6,2(1x,f12.6))')npw, trim(method), ctime, wtime 318 ABI_FREE(cg1) 319 ABI_FREE(cg2) 320 end do 321 write(std_out,'(a)')trim(sjoin("# end method: ", method)) 322 end do 323 write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ",command)) 324 325 case ("zgemv") 326 write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command)) 327 write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time" 328 alpha = [one, two] 329 beta = [zero, zero] 330 do step=1,2 331 if (step == 1) method = "F90" 332 if (step == 2) method = "BLAS" 333 !write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ",method)) 334 335 do isz=1,nsizes 336 npw = sizes(isz) 337 ABI_MALLOC(cg1, (2, npw*npw)) 338 ABI_MALLOC(cg2, (2, npw)) 339 ABI_MALLOC(cg3, (2, npw)) 340 341 do jj=1,npw*npw 342 cg1(:,jj) = jj 343 end do 344 cg2 = one 345 cg3 = zero 346 347 call cwtime(ctime, wtime, gflops, "start") 348 if (step == 1) then 349 !do icall=1,ncalls 350 !do jj=1,npw 351 !ar=scprod(1,iband);ai=scprod(2,iband) 352 !do ipw=1,npw_sp 353 !cg_re=cg(1,index1+ipw) 354 !cg_im=cg(2,index1+ipw) 355 !direc(1,ipw)=direc(1,ipw)-ar*cg_re+ai*cg_im 356 !direc(2,ipw)=direc(2,ipw)-ar*cg_im-ai*cg_re 357 !end do 358 !end do 359 !end do 360 !!cg3(1,ii) = alpha(1)*cg1(1,ii) - alpha(2)*cg1(2,ii) + cg2(1,ii) 361 !!cg3(2,ii) = alpha(1)*cg1(2,ii) + alpha(2)*cg1(1,ii) + cg2(2,ii) 362 !end do 363 !end do 364 else 365 do icall=1,ncalls 366 call cg_zgemv("N", npw, npw, cg1, cg2, cg3) 367 end do 368 end if 369 call cwtime(ctime, wtime, gflops, "stop") 370 write(std_out,'(1x,i8,1x,a6,2(1x,f12.6))')npw, trim(method), ctime, wtime 371 372 ABI_FREE(cg1) 373 ABI_FREE(cg2) 374 ABI_FREE(cg3) 375 end do 376 write(std_out,'(a)')trim(sjoin("# end method: ", method)) 377 end do 378 write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ",command)) 379 380 case ("itranspose", "otranspose") 381 write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ",command)) 382 write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time" 383 384 do step=1,2 385 if (step == 1) method = "F90" 386 if (step == 2) method = "MKL" 387 do isz=1,nsizes 388 npw = sizes(isz) 389 ABI_CALLOC(zmat, (npw, npw)) 390 ABI_CALLOC(wmat, (npw, npw)) 391 392 call cwtime(ctime, wtime, gflops, "start") 393 if (step == 1) then 394 do icall=1,ncalls 395 zmat = transpose(zmat) 396 end do 397 else 398 do icall=1,ncalls 399 select case (command) 400 case ("otranspose") 401 call sqmat_otranspose(npw, zmat, wmat) 402 case ("itranspose") 403 call sqmat_itranspose(npw, zmat) 404 end select 405 end do 406 end if 407 call cwtime(ctime, wtime, gflops, "stop") 408 write(std_out,'(1x,i8,1x,a6,2(1x,f12.6))')npw, trim(method), ctime, wtime 409 410 ABI_FREE(zmat) 411 ABI_FREE(wmat) 412 end do 413 write(std_out,'(a)')trim(sjoin("# end method: ", method)) 414 end do 415 write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ",command)) 416 417 case ("zgemm3m") 418 write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command)) 419 write(std_out, "(a1,2(a8,1x),a6,2(1x,a12))")"#", "npw", "nband", "type", "cpu_time", "wall_time" 420 do step=1,2 421 if (step == 1) method = "ZGEMM" 422 if (step == 2) method = "ZGEMM3m" 423 do isz=1,nsizes 424 npw = sizes(isz) 425 ABI_MALLOC_RAND(cg1, (2, npw*nband)) 426 ABI_MALLOC_RAND(cg2, (2, npw*nband)) 427 ABI_MALLOC_RAND(cg3, (2, nband*nband)) 428 429 call cwtime(ctime, wtime, gflops, "start") 430 if (step == 1) then 431 call ZGEMM("C", "N", nband, nband, npw, cone, cg1, npw, cg2, npw, czero, cg3, nband) 432 else 433 #ifdef HAVE_LINALG_GEMM3M 434 call ZGEMM3M("C", "N", nband, nband, npw, cone, cg1, npw, cg2, npw, czero, cg3, nband) 435 #else 436 ABI_ERROR("ZGEMM3M is not available") 437 #endif 438 end if 439 call cwtime(ctime, wtime, gflops, "stop") 440 write(std_out,'(1x,2(i8,1x),a6,2(1x,f12.6))')npw, nband, trim(method), ctime, wtime 441 442 ABI_FREE(cg1) 443 ABI_FREE(cg2) 444 ABI_FREE(cg3) 445 end do 446 end do 447 448 case ("zgemmt") 449 write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command)) 450 write(std_out, "(a1,2(a8,1x),a6,2(1x,a12))")"#", "npw", "nband", "type", "cpu_time", "wall_time" 451 do step=1,2 452 if (step == 1) method = "ZGEMM" 453 if (step == 2) method = "ZGEMMT" 454 do isz=1,nsizes 455 npw = sizes(isz) 456 ABI_MALLOC_RAND(cg1, (2, npw*nband)) 457 ABI_MALLOC_RAND(cg2, (2, npw*nband)) 458 ABI_MALLOC_RAND(cg3, (2, nband*nband)) 459 460 call cwtime(ctime, wtime, gflops, "start") 461 if (step == 1) then 462 call ZGEMM("C", "N", nband, nband, npw, cone, cg1, npw, cg2, npw, czero, cg3, nband) 463 else 464 #ifdef HAVE_LINALG_GEMMT 465 call ZGEMMT("U", "C", "N", nband, npw, cone, cg1, npw, cg2, npw, czero, cg3, nband) 466 #else 467 ABI_ERROR("ZGEMMT is not available") 468 #endif 469 end if 470 call cwtime(ctime, wtime, gflops, "stop") 471 write(std_out,'(1x,2(i8,1x),a6,2(1x,f12.6))')npw, nband, trim(method), ctime, wtime 472 473 ABI_FREE(cg1) 474 ABI_FREE(cg2) 475 ABI_FREE(cg3) 476 end do 477 end do 478 479 case ("zherk_vs_zgemm") 480 write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command)) 481 write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time" 482 do step=1,2 483 if (step == 1) method = "ZGEMM" 484 if (step == 2) method = "ZHERK" 485 do isz=1,nsizes 486 npw = sizes(isz) 487 ABI_MALLOC_RAND(cg1, (2, npw*nband)) 488 ABI_MALLOC_RAND(cg3, (2, nband*nband)) 489 490 call cwtime(ctime, wtime, gflops, "start") 491 if (step == 1) then 492 call ZGEMM("C", "N", nband, nband, npw, cone, cg1, npw, cg1, npw, czero, cg3, nband) 493 else 494 call ZHERK("U", "C", nband, npw, cone, cg1, npw, czero, cg3, nband) 495 end if 496 call cwtime(ctime, wtime, gflops, "stop") 497 write(std_out,'(1x,2(i8,1x),a6,2(1x,f12.6))')npw, nband, trim(method), ctime, wtime 498 499 ABI_FREE(cg1) 500 ABI_FREE(cg3) 501 end do 502 end do 503 504 case ("zgerc") 505 write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command)) 506 write(std_out, "(a1,a8,1x,a6,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time" 507 do step=1,2 508 if (step == 1) method = "F90" 509 if (step == 2) method = "BLAS" 510 do isz=1,nsizes 511 npw = sizes(isz) 512 ABI_MALLOC(zvec, (npw)) 513 ABI_MALLOC(zmat, (npw, npw)) 514 zvec = cone; zmat = czero 515 516 call cwtime(ctime, wtime, gflops, "start") 517 if (step == 1) then ! Home made zgerc 518 do it=1,ncalls 519 zmat = czero 520 !$OMP PARALLEL DO 521 do jj=1,npw 522 do ii=1,npw 523 zmat(ii,jj) = zmat(ii,jj) + CONJG(zvec(ii)) * zvec(jj) 524 end do 525 end do 526 end do 527 else 528 do jj=1,ncalls 529 zmat = czero 530 call XGERC(npw,npw,(1._dp,0._dp),zvec,1,zvec,1,zmat,npw) 531 end do 532 end if 533 call cwtime(ctime, wtime, gflops, "stop") 534 write(std_out,'(1x,i8,1x,a6,2(1x,f12.6))')npw, trim(method), ctime, wtime 535 536 ABI_FREE(zvec) 537 ABI_FREE(zmat) 538 end do 539 540 write(std_out,'(a)')trim(sjoin("# end method: ", method)) 541 end do 542 write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ", command)) 543 544 !do isz=1,nsizes 545 !npw = sizes(isz) 546 !ABI_MALLOC(vec,(npw)) 547 !ABI_MALLOC(mat,(npw,npw)) 548 !vec = cone; mat = czero 549 550 !call cwtime(ctime,wtime,gflops,"start") 551 552 !do jj=1,ncalls 553 !call XGERC(npw,npw,(1._sp,0._sp),vec,1,vec,1,mat,npw) 554 !end do 555 556 !call cwtime(ctime,wtime,gflops,"stop") 557 558 !write(std_out,'(a,i0,2f9.3)')" CGERG size, cpu_time, wall_time, max_abserr ",npw,ctime,wtime 559 560 !ABI_FREE(vec) 561 !ABI_FREE(mat) 562 !end do 563 564 !case ("xginv") 565 ! do_check = .FALSE. 566 ! !do_check = .TRUE. 567 ! do ii=1,nsizes 568 ! npw = sizes(ii) 569 ! call test_xginv(npw, skinds, do_check, Tres, comm) 570 571 ! if (my_rank==master) then 572 ! write(std_out,'(a,i0,3f9.3)')& 573 ! " routine = xginv, size, cpu_time, wall_time, max_abserr ",Tres%msize,Tres%ctime,Tres%wtime,Tres%max_abserr 574 ! end if 575 ! end do 576 577 case ("xhpev_vs_xheev") 578 write(std_out,'(a)')trim(sjoin("BEGIN_BENCHMARK: ", command)) 579 write(std_out, "(a1,a8,1x,a9,2(1x,a12))")"#", "npw", "type", "cpu_time", "wall_time" 580 581 do step=1,2 582 if (step == 1) method = "PACK" 583 if (step == 2) method = "NOPACK" 584 do isz=1,nsizes 585 npw = sizes(isz) 586 ABI_REMALLOC(ene, (npw)) 587 ABI_REMALLOC(evec, (npw, npw)) 588 ABI_REMALLOC(zpmat, (npw*(npw+1)/2)) 589 zpmat = czero 590 idx = 0 591 do jj=1,npw 592 do ii=1,jj 593 idx = idx + 1 594 zpmat(idx) = cone 595 end do 596 end do 597 598 if (step == 1) then 599 call cwtime(ctime,wtime,gflops,"start") 600 call xhpev("V", "U", npw, zpmat, ene, evec, npw) 601 else 602 ABI_MALLOC(zmat, (npw, npw)) 603 do jj=1,npw 604 do ii=1,jj 605 idx = ii + jj*(jj-1)/2 606 zmat(ii,jj) = zpmat(idx) 607 end do 608 end do 609 call cwtime(ctime, wtime, gflops, "start") 610 !call xheev("V","U",npw,zmat,ene) 611 call xheevx("V","A","U",npw,zmat,zero,zero,1,1,zero,nfound,ene,evec,npw) 612 ABI_FREE(zmat) 613 end if 614 call cwtime(ctime, wtime, gflops, "stop") 615 write(std_out,'(1x,i8,1x,a8,2(1x,f12.6))')npw, trim(method), ctime, wtime 616 end do 617 618 ABI_FREE(ene) 619 ABI_FREE(evec) 620 ABI_FREE(zpmat) 621 write(std_out,'(a)')trim(sjoin("# end method: ", method)) 622 end do 623 write(std_out,'(a)')trim(sjoin("END_BENCHMARK: ",command)) 624 625 case default 626 ABI_ERROR(sjoin("Wrong command:", command)) 627 end select 628 629 ABI_FREE(sizes) 630 call destroy_mpi_enreg(MPI_enreg) 631 632 call wrtout(std_out, ch10//" Analysis completed.") 633 call abinit_doctor("__lapackprof") 634 635 100 call xmpi_end() 636 637 end program lapackprof