TABLE OF CONTENTS


ABINIT/m_timana [ Modules ]

[ Top ] [ Modules ]

NAME

  m_timana

FUNCTION

 Analyse the timing, and print in unit ab_out. Some discussion of the
 number of calls to different routines is also provided, as comments,
 at the end of the routine, as well as, in the single dataset mode (ndtset<2),
 a detailed analysis of the time-consuming routines.

COPYRIGHT

  Copyright (C) 1998-2024 ABINIT group (XG, GMR)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

19 #if defined HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include "abi_common.h"
24 
25 module m_timana
26 
27  use defs_basis
28  use m_abicore
29  use m_xmpi
30  use m_xomp
31 
32  use m_time,        only : time_accu, timab, TIMER_SIZE
33  use defs_abitypes, only : MPI_type
34 
35  implicit none
36 
37  private

ABINIT/timana [ Functions ]

[ Top ] [ Functions ]

NAME

 timana

FUNCTION

 Analyse the timing, and print in unit ab_out. Some discussion of the
 number of calls to different routines is also provided, as comments,
 at the end of the routine, as well as, in the single dataset mode (ndtset<2),
 a detailed analysis of the time-consuming routines.

INPUTS

  mpi_enreg=information about MPI parallelization
  natom=number of atoms in cell.
  nband(nkpt*nsppol)=number of bands at each k point, for each polarization
  ndtset=number of datasets
  nfft=(effective) number of FFT grid points (for this processor)
  nkpt=number of k points
  npwtot(nkpt)=number of planewaves in basis at this k point
  nsppol=1 for unpolarized, 2 for spin-polarized
  timopt= if >0, write short analysis, if <0, write full analysis
          if timopt>=2, or timopt==-2 do not time the timer

OUTPUT

  (only writing)

NOTES

 *) One can suppress the cpu timer call in timein.f, if line 315 of the present routine is uncommented.

 *) The number of fourwf and nonlop calls can be computed as follows, in the
    groud-state case, with no reading of wavefunctions (irdwfk==0 and the like),
    and iscf>0 :

    1) For fourwf.f

    In each cgwf call, there will be
    1 call (isign=+1 and -1) for the first gradient calculation,
    and iline calls for the line minimizations,
    minus the number of ffts skipped because some wfs are sufficiently converged
    (there is a counter for that, see the log file)

    There are nband*nkpt*(nstep+2) calls to cgwf presently, where the
    (nstep+2) comes from the number of the presence of 2 nonscf loops
    in the first 2 steps.
    Thus, the number of fourwf calls in cgwf is
    nband*nkpt*(nstep+2)*(1+iline) - nskip_fourwf_in_cgwf

    To compute the density (either in vtowfk or in vtorho - by a mkrho call - )
    at each step, there will be nband*nkpt one-way calls,
    minus the number of bands skipped because the occupation number
    is too small (smaller than 1.0d-14). There is another counter for that.
    Thus, the number of fourwf calls for the density is
    nband*nkpt*nstep - nskip_fourwf_for_density

    For example, for Si with nline=3, nkpt=2, nband=4, nstep=10, and supposing
    no fourwf calls are skipped, there will be
    at most 4*2*12=96 calls to cgwf, with 4 two-way fft,
    that is 384 two-way ffts,
    and 4*2*10=80 one-way ffts to make the density.
    Altogether 464-nskip one-way ffts at most.

    2) For nonlop.f

    Presently, there are three different types of call to nonlop :
    for energy and gradient wrt wavefunctions (choice=1), for forces (choice=2),
    and for stresses (choice=3).

    In each cgwf call, there will be one nonlop call for two fourwf calls
    (independently of the number of skipped fourwf calls, since
    nonlop is also skipped then). These are the only calls with choice=1.
    Thus the number will be
    nband*nkpt*(nstep+2)*(1+iline) - nskip_fourwf_in_cgwf

    The number of choice=2 nonlop calls is equal to the number of fourwf calls
    to make the density, that is
    nband*nkpt*nstep - nskip_fourwf_for_density

    The number of choice=8 calls is equal to the number of occupied bands
    at the end of the calculation :
    nband(occupied)*nkpt
    The number of bands skipped then is not counted.

    NOTE : the number of fourwf calls is equal to
    the # of nonlop (choice=1) calls + the # of nonlop (choice=2) calls

SOURCE

 132 subroutine timana(mpi_enreg,natom,nband,ndtset,nfft,nkpt,npwtot,nsppol,timopt)
 133 
 134 !Arguments ------------------------------------
 135 !scalars
 136  integer,intent(in) :: natom,ndtset,nfft,nkpt,nsppol,timopt
 137  type(MPI_type),intent(in) :: mpi_enreg
 138 !arrays
 139  integer,intent(in) :: nband(nkpt*nsppol),npwtot(nkpt)
 140 
 141 !Local variables-------------------------------
 142 !scalars
 143  integer :: aslot,bslot,cslot,flag_count,flag_write,ierr,ii,ikpt,ipart
 144  integer :: ilist,isort,islot,isppol,itim,itimab,ltimab,maxii,me
 145  integer :: npart,nlist,nothers,nproc,nthreads,return_ncount
 146  integer(i8b) :: nbdmean,npwmean,npwnbdmean
 147  integer :: spaceworld,temp_list,totcount,tslot,utimab,ount
 148  real(dp) :: cpunm,lflops,other_cpu,other_wal,percent_limit,subcpu,subwal,timab_cpu,timab_wall,wallnm
 149  character(len=500) :: msg
 150 !arrays
 151  integer(i8b) :: basic(TIMER_SIZE),ndata(TIMER_SIZE),tslots(TIMER_SIZE)
 152  integer :: ncount(TIMER_SIZE)
 153  integer,allocatable :: list(:)
 154  real(dp) :: ftimes(2,TIMER_SIZE),ftsec(2),mflops(TIMER_SIZE),nflops(TIMER_SIZE),times(2,TIMER_SIZE),tsec(2),my_tsec(2)
 155  character(len=32) :: names(-1:TIMER_SIZE),entry_name
 156  character(len=*),parameter :: format01040 ="('- ',a32,f15.3,f6.1,f14.3,f6.1,i15,16x,f7.2,1x,f10.2)"
 157  character(len=*),parameter :: format01041 ="('- ',a32,f15.3,f6.1,f14.3,f6.1,i15,3x,g12.3,1x,f7.2,1x,f10.2)"
 158  character(len=*),parameter :: format_head1="(a,t46,a,t54,a,t65,a,t72,a,t80,a,t96,a,3x,a7,1x,a10)"
 159  character(len=*),parameter :: format_head2="(a,t46,a,t54,a,t65,a,t72,a,t80,a,t92,a)"
 160 
 161 ! *************************************************************************
 162 
 163  01200 format(  '- subtotal                     ',f18.3,f6.1,f14.3,f6.1,31x,f7.2,1x,f10.2)
 164  01201 format(/,'- subtotal                     ',f18.3,f6.1,f14.3,f6.1,31x,f7.2,1x,f10.2)
 165 
 166  ount = ab_out
 167 
 168  call timab(49,1,tsec)
 169 
 170 !The means are computed as integers, for later compatibility
 171  nbdmean=0; npwmean=0; npwnbdmean=0
 172  do isppol=1,nsppol
 173    do ikpt=1,nkpt
 174      npwmean=npwmean+npwtot(ikpt)
 175      npwnbdmean=npwnbdmean+npwtot(ikpt)*nband(ikpt+(isppol-1)*nkpt)
 176      nbdmean=nbdmean+nband(ikpt+(isppol-1)*nkpt)
 177    end do
 178  end do
 179 
 180  ! initialize ftime, valgrind complains on line 832 = sum up of all Gflops
 181  ftimes=zero
 182 
 183  npwmean=int(dble(npwmean)/dble(nkpt*nsppol))
 184  npwnbdmean=int(dble(npwnbdmean)/dble(nkpt*nsppol))
 185  nbdmean=int(dble(nbdmean)/dble(nkpt*nsppol))
 186 
 187 !List of timed subroutines, eventual initialisation of the number of data, and declaration of a slot as being "basic"
 188 !Channels 1 to 299 are for optdriver=0 (GS), 1 (RF) and 2 (Suscep), at random
 189 !Channels 300 to 399 are for optdriver=3 (Screening)
 190 !Channels 400 to 499 are for optdriver=4 (Sigma)
 191 !Channels 500 to 529 are for optdriver=5 (Nonlinear)
 192 !Channels 530 to 549 are for various counters
 193 !Channels 550 to 599 are for PAW
 194 !Channels 600 to 619 are for Recursion Method
 195 !Channels 620 to 639 are for DMFT
 196 !Channels 650 to 699 are for bethe_salpeter code.
 197 !Channels 700 to 799 are for optdriver=0 (again ...)
 198 !Channels 800 to 899 are for the detailed analysis of fourwf
 199 !Channels 900 to 1499 are for optdriver=0 (again ...)
 200 !Channels 1500 to 1599 are for Hartree-Fock.
 201 !Channels 1700 to 1747 are for GWLS.
 202 
 203  names(1:TIMER_SIZE)='***                             '
 204 !Basic slots are not overlapping. Their sum should cover most of the code.
 205 !WARNING : the slots from 1 to 99 should be avoided in the future ... They are hard to track.
 206  basic(1:TIMER_SIZE)=0
 207  names(1)='abinit                          '
 208  names(5)='ewald                           ' ; basic(5)=1
 209  names(6)='setsym                          ' ; basic(6)=1
 210  names(9)='fourdp                          ' ; basic(9)=1 ;    ndata(9)=nfft
 211  names(10)='hartre                          '
 212  names(11)='xc:pot/=fourdp                  '; basic(11)=1;    ndata(11)=nfft*nsppol
 213  names(12)='mkcore                          '; basic(12)=1
 214  names(13)='mkresi                          '
 215  names(14)='rwwf                            '; basic(13)=1
 216  names(15)='pspini                          '; basic(15)=1
 217  names(16)='mkffnl                          '; basic(16)=1
 218  names(17)='symrhg(no FFT)                  '; basic(17)=1
 219  names(19)='inwffil                         '
 220 
 221  names(22)='cgwf                            '
 222  names(23)='kpgsph                          '; basic(23)=1   ! Actually, should not be basic ... too complicated, too much overlap ...
 223  names(28)='vtowfk                          '
 224  names(30)='vtowfk  (afterloop)             '
 225  names(31)='vtowfk  (1)                     '; basic(31)=1
 226  names(37)='stress                          '; basic(37)=1   ! Actually, should not be basic !
 227  names(38)='ewald2 (+vdw_dftd)              '; basic(38)=1
 228  names(39)='vtowfk (loop)                   '
 229  names(40)='cgwf-O(npw)                     '
 230  names(47)='ingeo/symgroup                  '
 231  names(48)='communic.MPI                    '
 232  names(49)='timana(1)                       '
 233  names(50)='timing timab                    '; basic(50)=1
 234  names(51)='total timab                     '
 235  names(53)='forces-mkcore                   '
 236  names(54)='scfcv_core(1)                   '
 237  names(55)='stress-mkcore                   '
 238  names(57)='rhotov                          '
 239  names(59)='energy                          '
 240  names(61)='dfpt_scfcv : synchro            '
 241  names(62)='kpgio :synchro                  '
 242  names(63)='mkrho :synchro                  '
 243  names(64)='strkin:synchro                  '
 244  names(65)='forstrnps:synchr                '
 245  names(66)='vtorho:synchro                  '; basic(66)=1
 246  names(67)='wfsinp:synchro                  '
 247  names(69)='forces                          '; basic(69)=1 ! Actually, should not be basic !
 248  names(70)='vtorho(symrhg)                  '
 249  names(71)='mkrho :MPIrhor                  '
 250  names(72)='mklocl(2)                       '
 251  names(73)='status                          '; basic(73)=1
 252  names(74)='newocc                          '
 253  names(75)='nonlop(apply)                   '; basic(75)=1; ndata(75)=npwmean*natom
 254  names(76)='nonlop(forces)                  '; basic(76)=1; ndata(76)=npwmean*natom
 255  names(77)='nonlop(forstr)                  '; basic(77)=1; ndata(77)=npwmean*natom
 256  names(78)='nonlop(dyfrnl)                  '
 257  names(79)='nonlop(ddk)                     '
 258  names(80)='etotfor/=forces                 '
 259  names(81)='xc:pot                          ' ! rhotoxc_coll, except the call to hartre.f
 260  names(82)='xc:fourdp                       '
 261  names(83)='newvtr/rho(3):io                '; basic(83)=1
 262  names(84)='suscep                          '
 263  names(85)='suscep:MPI                      '; basic(85)=1
 264  names(86)='suscep:synchro                  '; basic(86)=1
 265  names(87)='suskXX:loop(1)                  '
 266  names(88)='suskXX:loop(2)                  '
 267  names(89)='suscep:other                    '
 268  names(90)='dielmt                          '; basic(90)=1
 269  names(91)='setvtr                          '
 270  names(92)='setvtr:mkcore                   '
 271  names(93)='newvtr                          '
 272  names(94)='newrho                          '
 273  names(95)='tddft                           '
 274  names(96)='dieltcel                        '; basic(96)=1
 275  names(97)='nonlop(total)                   '
 276 
 277  names(101)='abinit(init,iofn1,herald)      '; basic(101)=1
 278  names(102)='get_dtsets_pspheads            '; basic(102)=1
 279  names(103)='abinit(outvars)                '; basic(103)=1
 280  names(104)='abinit(chkinp,chkvars)         '; basic(104)=1
 281  names(105)='abinit(after driver)           '; basic(105)=1
 282 
 283  names(111)='dfpt_nstdy                      '
 284  names(112)='dfpt_nstwf                      '
 285  names(113)='dfpt_vtowfk(contrib)            '; basic(113)=1
 286  names(118)='dfpt_vtorho (1)                 '; basic(118)=1
 287  names(120)='dfpt_scfcv                      '
 288  names(121)='dfpt_vtorho                     '
 289  names(122)='dfpt_cgwf                       '
 290  names(124)='dfpt_vtorho (1)(2)              '
 291  names(125)='dfpt_vtorho (2)                 '
 292  names(126)='dfpt_vtorho-kpt loop            '; basic(126)=1
 293  names(127)='dfpt_vtorho (4)                 '
 294  names(128)='dfpt_vtowfk                     '
 295  names(129)='dfpt_vtorho:MPI                 '; basic(129)=1
 296  names(130)='dfpt_vtowfk (3)                 '; basic(130)=1
 297  names(131)='dfpt_vtowfk (1)                 '; basic(131)=1
 298  names(132)='respfn                          '
 299  names(133)='respfn(kpgio)                   '
 300  names(134)='respfn(pspini)                  '
 301  names(135)='respfn(inwffil)                 '
 302  names(136)='respfn(frozen)                  '
 303  names(137)='respfn(dfpt_dyxc1+bef.dfpt_lop) '
 304  names(138)='respfn(after dfpt_loper)        '
 305  names(139)='dfpt_vtowfk (loop)              '
 306  names(140)='dfpt_cgwf-O(npw)                '; basic(140)=1
 307  names(141)='dfpt_loper                      '
 308  names(142)='dfpt_loper(kpgio)               '
 309  names(143)='dfpt_loper(getmpw)              '
 310  names(144)='dfpt_loper(inwffil)             '
 311  names(146)='dfpt_loper(outwf)               '
 312  names(147)='dfpt_loper(eig2tot)             '
 313  names(148)='eig2tot                         '; basic(148)=1
 314  names(150)='dfpt_nselt/nstdy/nstpaw         '
 315  names(152)='dfpt_scfcv-scprqt               '
 316  names(154)='dfpt_scfcv  (1)                 '; basic(154)=1
 317  names(157)='dfpt_rhotov                     '
 318  names(158)='dfpt_newvtr                     '
 319  names(159)='d2frnl                          '
 320  names(160)='dfpt_scfcv (6)                  '
 321  names(161)='dfpt_nstdy:synchro              '; basic(161)=1
 322  names(166)='dfpt_vtorho:synchro             '; basic(166)=1
 323  names(181)='dfpt_mkvxc                      '
 324  names(182)='dfpt_dyxc1                      '; basic(182)=1
 325 !names(184)='dfpt_dyxc1(analysis)            '
 326 
 327  names(191)='invars2                         '
 328  names(192)='inkpts                          '
 329  names(193)='fresid                          '
 330 
 331  names(195)='getgh1c_setup'; basic(195) = 1
 332  names(196)='getgh1c'; basic(196) = 1
 333  names(197)='getgh1c%dfpt_cgwf               '
 334  names(198)='getgh1c%dfpt_nstwf              '
 335  names(199)='getgh1c%dfpt_nstpaw             '
 336 
 337  names(210)='projbd                          '; basic(210)=1;    ndata(210)=npwnbdmean
 338  names(211)='projbd%cgwf                     '
 339  names(212)='projbd%dfpt_cgwf                '
 340  names(213)='projbd%dfpt_nstpaw              '
 341  names(214)='corrmetalwf1%dfpt_vtowfk        '
 342 
 343  names(220)='nonlop%(other)                  '
 344  names(221)='nonlop%getghc                   '
 345  names(222)='nonlop%vtowfk                   '
 346  names(223)='nonlop%energy                   '
 347  names(224)='nonlop%forstrnps                '
 348  names(225)='nonlop%dfpt_nstwf               '
 349  names(226)='nonlop%d2frnl                   '
 350  names(227)='nonlop%dfpt_cgwf !2             '
 351  names(228)='nonlop%dfpt_cgwf !5             '
 352  names(229)='nonlop%outkss                   '
 353  names(230)='nonlop%vtowfk(rhoij)            '
 354  names(231)='nonlop%prep_nonl%vtowfk         '
 355  names(232)='nonlop%prep_nonl%forstrn        '
 356  names(233)='nonlop%appinvovl                '
 357  names(234)='nonlop%prep_nonl%energy         '
 358  names(235)='nonlop%getchc                   '; basic(235)=1
 359  names(236)='nonlop%getcsc                   '; basic(236)=1
 360  names(237)='nonlop%fock_getghc              '; basic(237)=1
 361 
 362  names(250)='afterscfloop                    '
 363  names(251)='afterscfloop(wvl)               '
 364  names(252)='afterscfloop(pol/magn)          '
 365  names(253)='afterscfloop(grad/lapl)         '
 366  names(254)='afterscfloop(kin.en.den)        '
 367  names(255)='afterscfloop(elf)               '
 368  names(256)='afterscfloop(forstr)            '
 369  names(257)='afterscfloop(final)             '
 370 
 371  names(270)='rwwf%(other)                    '
 372  names(271)='rwwf%vtorho                     '
 373  names(272)='rwwf%initwf(GS)                 '
 374  names(273)='rwwf%energy                     '
 375  names(274)='rwwf%wfsinp(GS)                 '
 376  names(275)='rwwf%mkrho                      '
 377  names(276)='rwwf%outwf                      '
 378  names(277)='rwwf%strnps                     '
 379  names(278)='rwwf%tddft                      '
 380  names(279)='rwwf%suscep                     '
 381  names(281)='rwwf%wfsinp(RF)                 '
 382  names(282)='rwwf%mkrho2                     '
 383  names(283)='rwwf%outwf2                     '
 384  names(284)='rwwf%dfpt_dyfnl                 '
 385  names(285)='rwwf%dfpt_mkrho                 '
 386  names(286)='rwwf%dfpt_nstwf                 '
 387  names(287)='rwwf%dfpt_vtorho                '
 388  names(288)='rwwf%dfpt_vtowfk                '
 389  names(289)='rwwf%dfpt_nstdy                 '
 390  names(290)='rwwf%initwf(RF)                 '
 391  names(291)='rwwf%newkpt(GS)                 '
 392  names(292)='rwwf%newkpt(RF)                 '
 393 
 394  ! wfd
 395  names(300)='wfd_read_wfk                    '; basic(300) = 1
 396 
 397  names(301)='screening                       '
 398  names(302)='screening(init1)                '
 399  names(304)='screening(KS=>QP[wfrg])         '
 400  names(305)='screening(density)              '
 401  names(306)='screening(q-loop,init )         '
 402  names(307)='screening(cchi0q0)              '
 403  names(308)='screening(cchi0)                '
 404  names(309)='screening(q-loop,end)           '
 405  names(310)='screening(wrt scr files)        '
 406  names(315)='screening(pawin)                '
 407  names(316)='screening(wfs)                  '
 408  names(319)='screening(1)                    '
 409  names(320)='screening(paw)                  '; basic(320)=1
 410  names(321)='screening(2)                    '
 411 
 412  names(331)='cchi0                           '
 413  names(332)='cchi0(rho_tw_g)                 '
 414  names(333)='cchi0(assembly)                 '
 415 
 416  names(350)='getghc                          '
 417  names(351)='getghc%cgwf                     '
 418  names(352)='getghc%dfpt_cgwf                '
 419  names(353)='getghc%mkresi                   '
 420  names(354)='getghc%kss_ddiago               '
 421  names(355)='getghc%lobpcgwf                 '
 422  names(356)='getghc%prep_getghc              '
 423  names(357)='getghc%other lobpcg             '
 424  names(358)='getghc%update_mmat              '
 425  names(359)='getghc(/=fourXX,nonlop,fock_XX) '; basic(359)=1
 426  names(360)='getghc(fock_XX)                 '
 427 
 428 
 429  names(401)='sigma                           '
 430  names(402)='sigma(Init1)                    '
 431  names(403)='setup_sigma                     '
 432  names(404)='sigma(rdkss)                    '
 433  names(405)='sigma(Init2)                    '
 434  names(406)='sigma(make_vhxc)                '
 435  names(407)='sigma(vHxc_me)                  '
 436  names(408)='sigma(hqp_init)                 '
 437  names(409)='sigma(getW)                     '
 438  names(410)='sigma/=fourdp                   '; basic(410)=1
 439 
 440  names(421)='sigma(calc_sigx_me)             '
 441  names(423)='sigma(cohsex_me)                '
 442  names(424)='sigma(calc_sigc_me)             '
 443  names(425)='sigma(solve_dyson)              '
 444  names(426)='sigma(finalize)                 '
 445 
 446  names(430)='calc_sigx_me                    '
 447 
 448  names(431)='calc_sigc_me                    '
 449  names(432)='calc_sigc_me(Init)              '
 450  names(433)='calc_sigc_me(Init spin)         '
 451  names(434)='calc_sigc_me(Init q)            '
 452  names(435)='calc_sigc_me(eet_sigma)         '
 453  names(436)='calc_sigc_me(1)                 '
 454  names(437)='calc_sigc_me(rho_tw_g)          '
 455  names(438)='calc_sigc_me(2)                 '
 456  names(439)='calc_sigc_me(sigma_me)          '
 457  names(440)='calc_sigc_me(wfd_barrier        '
 458  names(441)='calc_sigc_me(xmpi_sum)          '
 459  names(442)='calc_sigc_me(final ops)         '
 460  names(443)='calc_sigc_me(ac_lrk_appl)       '
 461  names(444)='calc_sigc_me(ac_lrk_diag)       '
 462 
 463  names(445)='calc_sigc_me(loop)              '
 464 
 465  names(490)='solve_dyson                     '
 466  names(491)='cohsex_me                       '
 467 
 468  names(501)='nonlinear                       '
 469  names(502)='pead_nl_loop                    '
 470  names(503)='dfptnl_loop                     '
 471  names(511)='dfptnl_mv                       '; basic(511)=1
 472  names(512)='pead_nl_resp                    '; basic(512)=1
 473  names(513)='dfptnl_pert                     '
 474  names(514)='rf2_init                        '
 475 
 476  names(520)='lobpcgwf(init)                  '; if(abs(timopt)==4)basic(520)=1
 477  names(521)='lobpcgwf(bef.getghc 1           '; if(abs(timopt)==4)basic(521)=1
 478  names(522)='lobpcgwf(aft.getghc 1           '; if(abs(timopt)==4)basic(522)=1
 479  names(523)='lobpcgwf(bef.getghc 2           '; if(abs(timopt)==4)basic(523)=1
 480  names(524)='lobpcgwf(aft.getghc 2           '; if(abs(timopt)==4)basic(524)=1
 481  names(525)='lobpcgwf(aft.loop)              '; if(abs(timopt)==4)basic(525)=1
 482  names(526)='lobpcgwf(prep-getghc)           '
 483 
 484  names(530)='lobpcgwf                        '
 485  names(532)='xgemm%lobpcg                    '
 486  names(533)='xmpi_sum%lobpcg                 '
 487  names(535)='xorthon-xtrsm                   '
 488  names(536)='xprecon%lobpcg                  '
 489  names(537)='prep_fourwf%vtow                '
 490  names(538)='prep_fourwf%mkrh                '
 491  names(539)='prep_fourwf                     '
 492 
 493  names(540)='sg_fourwf%fourwf                '
 494  names(541)='back_wf%sg_fourw                '
 495  names(542)='forw_wf%sg_fourw                '
 496  names(543)='alltoall%back_wf                '
 497  names(544)='alltoall%forw_wf                '
 498  names(545)='prep_getghc(alltoall)           '
 499  names(547)='alltoall%prep_fo                '
 500  names(548)='allgather%prep_f                '
 501  names(549)='symrhg%mkrho                    '
 502 
 503  names(550)='forces:pawatm2ff                '
 504  names(551)='stress:pawatm2ff                '
 505  names(552)='setvtr:pawatm2ff                '
 506  names(553)='pawinit                         '; basic(553)=1
 507  names(554)='vtowfk:rhoij                    '
 508  names(555)='vtorho:pawmkrhoij               '; basic(555)=1
 509  names(556)='pawmkrho                        '; basic(556)=1
 510  names(557)='pawmkrho:symrhoij               '; basic(557)=1
 511  names(558)='scfcv_core:mknhat               '
 512  names(559)='nhatgrid                        '; basic(559)=1
 513  names(560)='pawdenpot                       '; basic(560)=1
 514  names(561)='pawdij/symdij                   '; basic(561)=1
 515  names(562)='respfn:pawatm2ff                '; basic(562)=1
 516  names(563)='dfpt_dyfro:pawatm2ff            '; basic(563)=1
 517  names(564)='dfpt_scfcv:dfpt_mknhat          '; basic(564)=1
 518  names(565)='getgsc                          '
 519  names(566)='dfpt_nstpaw                     '; basic(566)=1
 520  names(567)='pawnstd2e                       '
 521  names(568)='stress%strhar                   '
 522 
 523  names(570)='prep_nonlop                     '
 524  names(572)='prep_nonlop%vtowfk              '
 525  names(573)='prep_nonlop%forstrnps           '
 526 
 527  names(575)='prep_bandfft_tabs               '; basic(575)=1
 528 
 529  names(578)='vtowfk(cprj_rotate)             '
 530  names(581)='prep_nonlop(alltoall)           '
 531  names(583)='vtowfk(pw_orthon)               '
 532  names(584)='xcopy%lobpcg                    '
 533  names(585)='vtowfk(subdiago)                '
 534  names(586)='vtowfk(nonlocalpart)            '
 535  names(587)='zheegv-dsyegv                   '
 536 
 537  names(588)='vtowfk(ssdiag)                  '; basic(588)=1
 538  names(589)='vtowfk(contrib)                 '; basic(589)=1
 539  names(590)='vtowfk(2)                       '
 540  names(591)='vtowfk(3)                       '
 541 
 542  names(593)='set_paw_pert                    '
 543  names(594)='get_exchange_atom               '
 544  names(595)='pawrhoij_redistribute           '
 545  names(596)='paw_ij_redistribute             '
 546  names(597)='paw_an_redistribute             '
 547  names(598)='pawfgrtab_redistribute          '
 548 
 549  names(600)='vtorhorec                       '
 550  names(601)='Definitions                     '
 551  names(602)='getngrec                        '
 552  names(603)='green_kernel                    '
 553  names(604)='transgrid (c->f)                '
 554  names(605)='recursion (other)               '
 555  names(606)='recursion (den)                 '
 556  names(607)='recursion (cuda)                '
 557  names(608)='recursion_nl                    '
 558  names(609)='fermisolverec                   '
 559  names(610)='entropyrec                      '
 560  names(611)='gran_potrec                     '
 561  names(612)='nonlocal-energy                 '
 562  names(613)='sync. cpu (wait)                '
 563  names(614)='sync. gpu (wait)                '
 564  names(615)='vn_nl_rec                       '
 565  names(616)='null recursion                  '
 566  names(617)='recursion (other_cuda)          '
 567 
 568  names(620)='datafordmft                     '
 569  names(621)='initialize dmft loop            '
 570  names(622)='impurity_solve                  '
 571  names(623)='Dyson                           '
 572  names(624)='compute_green                   '
 573  names(625)='integrate_green                 '
 574  names(626)='dmft-other                      '
 575  names(627)='Print/Read self                 '
 576 
 577  names(630)='prep_getghc                     '
 578  names(631)='prep_getghc(before if)          '
 579  names(632)='prep_getghc(bef. getghc)        '
 580  names(633)='prep_getghc(betw getghc)        '
 581  names(634)='prep_getghc(aft. getghc)        '
 582  names(635)='prep_getghc(getghc - 1 )        '
 583  names(636)='prep_getghc(getghc - 2 )        '
 584  names(637)='prep_getghc(getghc - 3 )        '
 585  names(638)='prep_getghc(getghc - 4 )        '
 586 
 587  names(640)='driver                          '
 588  names(641)='driver(bef. loop dtset)         '
 589  names(642)='driver(bef. select case)        '
 590  names(643)='driver(aft. select case)        '
 591  names(644)='driver(aft. loop dtset)         '
 592 
 593  names(650)='bse                             '
 594  names(651)='bse(Init1)                      '; basic(651)=1
 595  names(652)='setup_bse                       '; basic(652)=1
 596  names(653)='bse(rdkss)                      '; basic(653)=1
 597  names(654)='bse(rdmkeps^-1)                 '; basic(654)=1
 598  names(655)='bse(mkrho)                      '; basic(655)=1
 599  names(656)='bse(mkexcham)                   '; basic(656)=1
 600  names(657)='bse(mkexceps)                   '; basic(657)=1
 601  names(658)='bse(wfd_wave_free)              '; basic(658)=1
 602  names(659)='bse(mk_pawhur_t)                '; basic(659)=1
 603  names(660)='bse(exc_diago_driver)           '; basic(660)=1
 604  names(661)='bse(exc_haydock_driver)         '; basic(661)=1
 605 
 606 
 607  names(670)='exc_build_ham                   '
 608  names(671)='exc_build_ham(q=0)              '
 609  names(672)='exc_build_ham(block-res)        '
 610  names(673)='exc_build_ham(block-coupling)   '
 611 
 612  names(680)='exc_build_block                 '
 613  names(681)='exc_build_block(init,read)      '
 614  names(682)='exc_build_block(Coulomb)        '
 615  names(683)='exc_build_block(exchange)       '
 616  names(684)='exc_build_block(synchro)        '
 617  names(685)='exc_build_block(write_ha        '
 618  names(686)='exc_build_block(exch.spi        '
 619 
 620  names(690)='exc_haydock_driver              '
 621  names(691)='exc_haydock_driver(read)        '
 622  names(692)='exc_haydock_driver(prep)        '
 623  names(693)='exc_haydock_driver(wo lf        '
 624  names(694)='exc_haydock_driver(apply)       '
 625  names(695)='exc_haydock_driver(end)         '
 626  names(696)='exc_haydock_driver(inter        '
 627  names(697)='exc_haydock_driver(matmul)      '
 628 !Slots up to 699 are reserved for bethe_salpeter code.
 629 
 630  names(710)='inwffil                         '
 631  names(711)='inwffil(read header)            '
 632  names(712)='inwffil(init params)            '
 633  names(713)='inwffil(prepa wfsinp)           '
 634  names(714)='inwffil(call wfsinp)            '
 635  names(715)='inwffil(after wfsinp)           '
 636  names(716)='inwffil(spin convert)           '
 637  names(717)='inwffil(call newkpt)            '
 638  names(718)='inwffil(excl. calls)            '; basic(718)=1
 639 
 640  names(720)='wfsinp                          '
 641  names(721)='wfsinp(before loop)             '
 642  names(722)='wfsinp(find kpt)                '
 643  names(723)='wfsinp(prepa initwf)            '
 644  names(724)='wfsinp(call  initwf)            '
 645  names(725)='wfsinp(transfer of wfs)         '
 646  names(726)='wfsinp(call rwwf)               '
 647  names(727)='wfsinp(wfconv section)          '
 648  names(728)='wfsinp(excl. calls)             '; basic(728)=1
 649 
 650  names(740)='suscep_stat                     '
 651  names(741)='suscep_stat(init)               '
 652  names(742)='suscep_stat(bef. susk-mm        '
 653  names(743)='suscep_stat(susk-mm)            '
 654  names(744)='suscep_stat(extrapol)           '
 655  names(745)='suscep_stat:synchro             '
 656  names(746)='suscep_stat:MPI                 '
 657  names(747)='suscep_stat(symmetries)         '
 658 
 659  names(750)='susk                            '
 660  names(751)='susk (init)                     '; basic(751)=1
 661  names(752)='susk (loop)                     '
 662  names(753)='susk:MPI (1)                    '; basic(753)=1
 663  names(754)='susk (accumul.)                 '
 664  names(755)='susk:MPI (2)                    '; basic(755)=1
 665  names(756)='susk (loop except FFT)          '; basic(756)=1
 666  names(757)='susk (accumul.except FFT        '; basic(757)=1
 667 
 668  names(760)='suskmm                          '
 669  names(761)='suskmm (init)                   '; basic(761)=1
 670  names(762)='suskmm (loop : part1)           '
 671  names(763)='suskmm (loop : part2)           '
 672  names(764)='suskmm(loop1 except FFT)        '; basic(764)=1
 673  names(765)='suskmm(loop2 except FFT)        '; basic(765)=1
 674 
 675  names(770)='initwf                          '
 676  names(771)='initwf(before rwwf)             '; basic(771)=1
 677  names(772)='initwf(after rwwf)              '; basic(772)=1
 678 
 679  names(780)='newkpt                          '
 680  names(781)='newkpt(before loop)             '
 681  names(782)='newkpt(before rwwf)             '
 682  names(783)='newkpt(after rwwf)              '
 683  names(784)='newkpt(call wfconv)             '
 684  names(785)='newkpt(finalize loop)           '
 685  names(786)='newkpt(after loop   )           '
 686  names(787)='newkpt:synchro                  '
 687  names(788)='newkpt(excl. rwwf   )           '; basic(788)=1
 688 
 689  names(790)='mkrho                           '
 690  names(791)='mkrho%gstate                    '
 691  names(792)='mkrho%vtorho                    '
 692  names(793)='mkrho%energy                    '
 693  names(794)='mkrho%respfn                    '
 694  names(795)='mkrho%afterscfloop              '
 695  names(796)='mkrho%scfcv_core                '
 696  names(798)='mkrho/=                         '; basic(798)=1
 697  names(799)='mkrho/=+fourwf                  '
 698 
 699  names(801)='fourwf                          '
 700  names(802)='fourwf%(pot)                    '; basic(802)=1;    ndata(802)=2*nfft
 701  names(803)='fourwf%(den)                    '; basic(803)=1;    ndata(803)=nfft
 702  names(804)='fourwf%(G->r)                   '; basic(804)=1
 703  names(805)='fourwf%(r->G)                   '; basic(805)=1
 704 
 705 
 706  names(840)='fourwf%(other)                  '
 707  names(841)='fourwf%getghc                   '
 708  names(842)='fourwf%vtowfk                   '
 709  names(843)='fourwf%mkrho                    '
 710  names(844)='fourwf%dfpt_cgwf                '
 711  names(845)='fourwf%dfpt_accrho%dfpt_vtowfk  '
 712  names(846)='fourwf%mkrho2                   '
 713  names(847)='fourwf%dfpt_mkrho               '
 714  names(850)='fourwf%fock_getghc              '
 715  names(854)='fourwf%tddft                    '
 716  names(855)='fourwf%outkss                   '
 717  names(856)='fourwf%prep_four                '
 718  names(858)='fourwf%dfpt_accrho%idfpt_nstpaw '
 719  names(861)='fourwf%suskmm !0 part 1         '
 720  names(862)='fourwf%suskmm !0 part 2         '
 721  names(871)='fourwf%suskmm !3 part 1         '
 722  names(872)='fourwf%suskmm !3 part 2         '
 723  names(880)='fourwf%cgwf_cprj                '
 724 
 725  names(901)='newvtr(before selection)        '
 726  names(902)='newvtr(bef. prcref_PMA)         '
 727  names(903)='newvtr(call prcref_PMA)         '
 728  names(904)='newvtr(aft. prcref_PMA)         '
 729  names(905)='newvtr(mean potential)          '
 730 
 731  names(910)='forstr                          '
 732  names(911)='forstr(forstrnps)               '
 733  names(912)='forstr(pawgrnl)                 '
 734  names(913)='forstr(forces)                  '
 735  names(914)='forstr(stress)                  '
 736 
 737  names(920)='forstrnps                       '
 738  names(921)='forstrnps(bef.loop k spin)      '
 739  names(922)='forstrnps(bef.loop band)        '
 740  names(923)='forstrnps(copy)                 '
 741  names(924)='forstrnps(nonlop+prep_ba        '
 742  names(925)='forstrnps(kinetic contr)        '
 743  names(926)='forstrnps(fock_getghc)          '
 744  names(927)='forstrnps(aft.loop band block)  '
 745  names(928)='forstrnps(aft.loop k spin)      '
 746 
 747  names(933)='outkss                          '
 748  names(934)='outkss(Gsort+hd)                '
 749  names(935)='outkss(k-loop)                  '
 750  names(936)='outkss(diago)                   '; basic(936)=1
 751  names(937)='outkss(MPI_exch)                '; basic(937)=1
 752  names(938)='outkss(write)                   '
 753 
 754  names(940)='rhotov                          '
 755  names(941)='rhotov(rhotoxc)                 '
 756  names(942)='rhotov(dotprod_vn)              '
 757  names(943)='rhotov(PSolver_rhohxc)          '
 758  names(944)='rhotov(rhohxcpositron)          '
 759  names(945)='rhotov(other)                   '
 760 
 761  names(980)='vtorho                          '
 762  names(981)='vtorho(bef. spin loop)          '
 763  names(982)='vtorho(bef. kpt  loop)          '
 764  names(983)='vtorho(Berry)                   '
 765  names(984)='vtorho(bef. vtowfk)             '
 766  names(985)='vtorho(aft. vtowfk)             '
 767  names(986)='vtorho(aft. kpt loop)           '
 768  names(987)='vtorho(leave_test)              '; basic(987)=1
 769  names(988)='vtorho(aft. spin loop)          '
 770  names(989)='vtorho(MPI)                     '; basic(989)=1
 771  names(990)='vtorho(newocc)                  '
 772  names(991)='vtorho(DMFT)                    '
 773  names(992)='vtorho(mkrho 1)                 '
 774  names(993)='vtorho(highest occ. eig)        '
 775  names(994)='vtorho(mkrho 2)                 '
 776  names(995)='vtorho(tddft)                   '
 777  names(996)='vtorho(suscep_stat)             '
 778  names(997)='vtorho(init kpt loop)           '
 779 
 780  names(1001)='initberry                       '; basic(1001)=1
 781  names(1002)='initberry(before listkk)        '
 782  names(1003)='initberry(call listkk)          '
 783  names(1004)='initberry(after listkk)         '
 784  names(1005)='initberry(find neighb.)         '
 785  names(1006)='initberry(build strings)        '
 786  names(1007)='initberry(PAW on-site)          '
 787  names(1008)='initberry(pwind)                '
 788  names(1009)='initberry(MPI stuff)            '
 789 
 790 
 791  names(1021)='get_dtsets_pspheads(pspheads)   ';
 792  names(1022)='get_dtsets_pspheads(indefo)     ';
 793  names(1023)='get_dtsets_pspheads(invars2m)   ';
 794 
 795  names(1091)='listkk                          '; basic(1091) = 1
 796 
 797  names(1150)='outscfcv                        '
 798  names(1151)='outscfcv(preparation)           '
 799  names(1152)='outscfcv(mlwfovlp)              '
 800  names(1153)='outscfcv([PAW]prtden)           '
 801  names(1154)='outscfcv(output GSR)            '
 802  names(1155)='outscfcv(output VCLMB)          '
 803  names(1156)='outscfcv(prtelf)                '
 804  names(1157)='outscfcv(prt grden)             '
 805  names(1158)='outscfcv(prt kden)              '
 806  names(1159)='outscfcv(prt lden)              '
 807  names(1160)='outscfcv(prtpot)                '
 808  names(1161)='outscfcv(prtgeo,cif)            '
 809  names(1162)='outscfcv(prtstm)                '
 810  names(1163)='outscfcv(prt 1dm)               '
 811  names(1164)='outscfcv(prtvha,vpsp,... vxc)   '
 812  names(1165)='outscfcv(prtdos)                '
 813  names(1166)='outscfcv(calcdenmagsph)         '
 814  names(1167)='outscfcv(mag_penalty_e)         '
 815  names(1168)='outscfcv(pawprt)                '
 816  names(1169)='outscfcv(optics)                '
 817  names(1170)='outscfcv(pawmkaewf)             '
 818  names(1171)='outscfcv(plowf)                 '
 819  names(1172)='outscfcv(gw)                    '
 820  names(1173)='outscfcv(poslifetime)           '
 821  names(1174)='outscfcv(posdoppler)            '
 822  names(1175)='outscfcv(outwant)               '
 823  names(1176)='outscfcv(calc_efg)              '
 824  names(1177)='outscfcv(calc_fc)               '
 825  names(1178)='outscfcv(prt_ebands)            '
 826  names(1179)='outscfcv(prt_surf)              '
 827  names(1180)='outscfcv(prtnest)               '
 828  names(1181)='outscfcv(prtdipole)             '
 829  names(1182)='outscfcv(prtblztrp)             '
 830  names(1183)='outscfcv(ebands_interpol_kpath) '
 831 
 832 !names(1190)='outscfcv(gsr1) '
 833 !names(1191)='outscfcv(gsr2) '
 834 !names(1192)='outscfcv(gsr3) '
 835 !names(1193)='outscfcv(gsr4) '
 836 !names(1194)='outscfcv(gsr5) '
 837 !names(1195)='outscfcv(gsr6) '
 838 
 839  names(1200)='gstateimg                       '
 840  names(1203)='gstateimg(init)                 '
 841  names(1204)='gstateimg(bef. loop img)        '
 842  names(1205)='gstateimg(bef. gstate)          '
 843  names(1206)='gstateimg(aft. gstate)          '
 844  names(1208)='gstateimg(leave_test)           '
 845  names(1209)='gstateimg(aft. loop img)        '
 846  names(1210)='gstateimg(finalize)             '
 847 
 848  names(1211)='gstate(1)                       '
 849  names(1212)='gstate(pspini)                  '
 850  names(1213)='gstate(2)                       '
 851  names(1214)='gstate(init rhor rhog)          '
 852  names(1215)='gstate(init history)            '
 853  names(1225)='gstate(...scfcv)                '
 854  names(1226)='gstate(prt gap)                 '
 855  names(1227)='gstate(prtwf)                   '
 856  names(1228)='gstate(clnup1)                  '
 857  names(1229)='gstate(prtelfield)              '
 858  names(1230)='gstate(DDB)                     '
 859  names(1231)='gstate(clnup2)                  '
 860 
 861  names(1232)='gstate                          '
 862 
 863  names(1260)='fourdp%(other)                  '
 864  names(1261)='fourdp%rhotwg%ch                '
 865  names(1262)='fourdp%rhotwg%si                '
 866  names(1263)='fourdp%ckxcldag                 '
 867  names(1264)='fourdp%fftwfn%ch                '
 868  names(1265)='fourdp%fftwfn%si                '
 869  names(1266)='fourdp%rec%rho                  '
 870  names(1267)='fourdp%rec%ek                   '
 871  names(1268)='fourdp%newvtr                   '
 872  names(1269)='fourdp%newrho                   '
 873  names(1270)='fourdp%fock_getghc              '
 874 
 875  names(1280)='read_rho                        '
 876  names(1281)='interpolate_denpot              '
 877 
 878  names(1290)='getcprj(all)                    '
 879  names(1291)='getcprj%opernla                 '; basic(1291)=1
 880  names(1292)='getcprj%opernla_mv              '; basic(1292)=1
 881  names(1293)='getcprj(cgwf_cprj)              '
 882  names(1294)='getcprj(ctocprj)                '
 883  names(1295)='getcprj(vtowfk)                 '
 884  names(1299)='getcprj(other)                  '
 885 
 886  names(1300)='cgwf_cprj                       '
 887  names(1301)='cgwf_cprj%other                 '
 888  names(1302)='pawcprj(zaxpby)                 '
 889  names(1303)='pawcprj(projbd)                 '; basic(1303)=1
 890  names(1304)='subham(dotprod_g)               '; basic(1304)=1
 891  names(1305)='cgwf_cprj%npw_work              '; basic(1305)=1
 892 
 893  names(1360)='getcsc(all)                     '
 894  names(1361)='getcsc%dotprod_g                '; basic(1361)=1
 895  names(1362)='getcsc%other                    '
 896  names(1363)='getcsc(cgwf_cprj)               '
 897  names(1364)='getcsc(subovl)                  '
 898 
 899  names(1370)='getchc                          '
 900  names(1371)='getchc%local                    '; basic(1371)=1
 901  names(1372)='getchc%kin                      '; basic(1372)=1
 902  names(1375)='getchc%other                    '
 903 
 904  names(1440)='scfcv_core                      '
 905  names(1441)='scfcv_core(before nstep loop)   '
 906  names(1442)='scfcv_core(ini moved atm inside)'
 907  names(1443)='scfcv_core(ini fock)            '
 908  names(1444)='scfcv_core(fock wfmixing)       '
 909  names(1445)='scfcv_core(fock_updatecwaveocc) '
 910  names(1446)='scfcv_core(fock2ACE)            '
 911  names(1447)='scfcv_core(setup_positron)      '
 912  names(1448)='scfcv_core(setvtr)              '
 913  names(1449)='scfcv_core(loop, PAW)           '
 914  names(1450)='scfcv_core-read                 '
 915  names(1451)='scfcv_core(vtorho(f))           '
 916  names(1452)='scfcv_core(etotfor)             '
 917  names(1453)='scfcv-scprqt                    '; basic(1453)=1
 918  names(1454)='scfcv_core(qui loop)            '
 919  names(1455)='scfcv_core(mix den - newrho)    '
 920  names(1456)='scfcv_core(Berry)               '
 921  names(1457)='scfcv_core(rhotov)              '
 922  names(1458)='scfcv_core(mix pot)             '
 923  names(1459)='scfcv_core(just after scf)      '
 924  names(1460)='scfcv_core(afterscfloop)        '
 925  names(1461)='scfcv_core(outscfcv)            '
 926  names(1462)='scfcv_core(free)                '
 927 
 928  names(1501)='fock_init                       '; basic(1501)=1
 929  names(1502)='fock_updatecwaveocc             '; basic(1502)=1
 930  names(1503)='fock_updatecwaveocc(MPI)        '; ! 100 % nested inside 1502
 931 
 932  names(1504)='fock_getghc                     '; !1504 = 1505 + 1506 + 1507
 933  names(1505)='fock_getghc(init)               '; ! 100 % nested inside 1504
 934  names(1506)='fock_getghc-kmu_loop            '; ! 100 % nested inside 1504, 1506 = 1521+ ... 1528
 935  names(1507)='fock_getghc(post-k)             '; ! 100 % nested inside 1504
 936  names(1512)='fock_getghc(fourwf)             '
 937  names(1513)='fock_getghc(fourdp)             '
 938  names(1514)='fock_getghc(nonlop)             '
 939  names(1515)='fock_getghc(/=fourXX,nonlop)    ';  basic(1515)=1  ! ulterior slot for test
 940 
 941 !Partitioning of the loop on k points inside fock_getghc (1506)
 942  names(1521)='fock_getghc(init k loop)        '
 943  names(1522)='fock_getghc(j loop fourwf)      '
 944  names(1523)='fock_getghc(calc_rhor_munu)     '
 945  names(1524)='fock_getghc(calc_rhog_munu)     '
 946  names(1525)='fock_getghc(calc_vloc)          '
 947  names(1526)='fock_getghc(calc_dij_fock_hat)  '
 948  names(1527)='fock_getghc(calc_vlocpsi)       '
 949  names(1528)='fock_getghc(clean k loop)       '
 950 
 951 !Partitioning in small blocs without fourXX and nonlop. One has to add 1521, 1523, 1527, 1528
 952  names(1541)='fock_getghc(init wo fourwf)     '; !related to 1505
 953  names(1542)='fock_getghc(j loop wo fourwf)   '; !related to 1522
 954  names(1544)='fock_getghc(calc_rhog_munu wo fo'; !related to 1524
 955  names(1545)='fock_getghc(calc_vloc wo fourXX)'; !related to 1525
 956  names(1546)='fock_getghc(calc_dij_fock_hat wo'; !related to 1526
 957  names(1547)='fock_getghc(post-k wo fourXX+MPI'; !related to 1507
 958  names(1548)='fock_getghc(post-k xmpi_sum)    '; !related to 1507
 959 
 960 
 961  names(1560)='fock2ACE                        '
 962  names(1561)='fock2ACE(init)                  '; basic(1561)=1
 963  names(1562)='fock2ACE(main/=fock_getghc)     '; basic(1562)=1
 964  names(1563)='fock2ACE(fock_getghc)           '
 965  names(1565)='fock2ACE(finalize)              '; basic(1565)=1
 966 
 967  names(1580)='fock_ACE_getghc                 '; basic(1580)=1
 968 
 969  ! Chebfi
 970  names(1600) = 'chebfi                        '
 971  names(1601) = 'chebfi(alltoall)              '; basic(1601) = 1
 972  names(1602) = 'chebfi(appinvovl)             '
 973  names(1603) = 'chebfi(rotation)              '
 974  names(1604) = 'chebfi(subdiago)              '
 975  names(1605) = 'chebfi(subham)                '
 976  names(1606) = 'chebfi(ortho)                 '
 977  names(1607) = 'chebfi(getghc)                '
 978  names(1608) = 'chebfi(residuals)             '
 979  names(1609) = 'chebfi(update_eigens)         '
 980  names(1610) = 'chebfi(sync)'
 981 
 982  names(1630) = 'chebfi(opernla)               '
 983  names(1631) = 'chebfi(opernlb)               '
 984  names(1632) = 'chebfi(inv_s)                 '
 985 
 986  names(1620) = 'mkinvovl                      '
 987  names(1621) = 'mkinvovl(build_d)             '
 988  names(1622) = 'mkinvovl(build_ptp)           '
 989 
 990  names(1633) = "rmm_diis:build_hij            "; basic(1633) = 1
 991  names(1634) = "rmm_diis:band_opt             "; basic(1634) = 1
 992 
 993  ! lobpcg2
 994  names(1640) = 'lobpcgwf2                      ';
 995  names(1641) = 'lobpcg_Bortho(X)               '
 996  names(1642) = 'lobpcg_Bortho(XW)              '
 997  names(1643) = 'lobpcg_Bortho(XWP)             '
 998  names(1644) = 'lobpcg_Bortho(Xall)            '
 999  names(1645) = 'lobpcg_RR(X)                   '
1000  names(1646) = 'lobpcg_RR(XW)                  '
1001  names(1647) = 'lobpcg_RR(XWP)                 '
1002  names(1648) = 'lobpcg_RR(Xall)                '
1003 
1004  names(1651) = 'lobpcg_init                    '
1005  names(1652) = 'lobpcg_free                    '
1006 ! names(1653) = 'lobpcg_run                     '
1007  names(1654) = 'lobpcg_getAX_BX                '
1008  names(1655) = 'lobpcg_orthoWrtPrev            '
1009 ! names(1656) = 'lobpcg_Bortho                  '
1010 ! names(1657) = 'lobpcg_RayleighRitz            '
1011  names(1658) = 'lobpcg_maxResidu               '
1012  names(1659) = 'lobpcg_run@getAX_BX            '
1013  names(1660) = 'lobpcg_pcond                   '
1014 
1015  ! xg_t
1016  names(1662) = 'xgTransposer_transpose@ColsRows'
1017  names(1663) = 'xgTransposer_transpose@Linalg  '
1018  names(1664) = 'xgTransposer_*@all2all         '
1019  names(1665) = 'xgTransposer_*@gatherv         '
1020  names(1666) = 'xgTransposer_@reorganize       '
1021  names(1667) = 'xgTransposer_init              '
1022  names(1668) = 'xgTransposer_free              '
1023  names(1669) = 'xgTransposer_transpose         '
1024  names(1670) = 'xgBlock_gemm(blas)             '
1025  names(1671) = 'xgBlock_trsm                   '
1026  names(1672) = 'xgBlock_potrf                  '
1027  names(1673) = 'xgBlock_set                    '
1028  names(1674) = 'xgBlock_get                    '
1029  names(1675) = 'xgBlock_heev                   '
1030  names(1676) = 'xgBlock_heevd                  '
1031  names(1677) = 'xgBlock_hpev                   '
1032  names(1678) = 'xgBlock_hpevd                  '
1033  names(1679) = 'xgBlock_hegv                   '
1034  names(1680) = 'xgBlock_hegvx                  '
1035  names(1681) = 'xgBlock_hegvd                  '
1036  names(1682) = 'xgBlock_hpgv                   '
1037  names(1683) = 'xgBlock_hpgvx                  '
1038  names(1684) = 'xgBlock_hpgvd                  '
1039  names(1685) = 'xgBlock_copy                   '
1040  names(1686) = 'xgBlock_cshift                 '
1041  names(1687) = 'xgBlock_pack                   '
1042  names(1688) = 'xgBlock_gemm(mpi)              '
1043  names(1690) = 'xgScalapack_init               '
1044  names(1691) = 'xgScalapack_free               '
1045  names(1692) = 'xgScalapack_heev               '
1046  names(1693) = 'xgScalapack_hegv               '
1047  names(1694) = 'xgScalapack_scatter            '
1048 
1049  ! GWLS GW code
1050  names(1701)='gwls_sternheimer                ';basic(1701)=1
1051  names(1702)='exchange and correlation        '
1052  names(1703)='correl. shift lanczos           '
1053  names(1704)='Dielectric matrix               '
1054  names(1705)='Model Dielectric matrix         '
1055  names(1706)='setup proj. sternheimer         '
1056  names(1707)='compute proj.sternheimer        '
1057  names(1708)='eps^{-1} - eps_m^{-1}           '
1058  names(1709)='eps_m^{-1} - 1                  '
1059  names(1710)='Modify Lbasis Coulomb           '
1060  names(1711)='Diag eps^{-1}-eps_m^{-1}        '
1061  names(1712)='exact  AT shift lanczos         '
1062  names(1713)='model  AT shift lanczos         '
1063  names(1714)='exact  BT shift lanczos         '
1064  names(1715)='model  BT shift lanczos         '
1065  names(1716)='compute poles                   '
1066  names(1717)='Sigma_A Lanczos                 '
1067  names(1718)='Sigma_B num. integrands         '
1068 
1069 
1070  names(1719)='gwls: extract_QR                ';basic(1719)=1
1071  names(1720)='gwls: extract_SVD               ';basic(1720)=1
1072 
1073  ! these entry are not in a logical order.
1074  names(1721)='gwls: gstateimg                 '
1075  names(1722)='prepareValenceWfk               '
1076 
1077  names(1723)='gwls: sqmr                      ';basic(1723)=1
1078 
1079 
1080  names(1724)='gwls: Pk                        ';basic(1724)=1
1081  names(1725)='Pk- allocating                  '
1082  names(1726)='Pk- wfk to denpot               '
1083  names(1727)='Pk- wfk product with val        '
1084  names(1728)='Pk- pc_k                        '
1085  names(1729)='Pk- sqmr case 1                 '
1086  names(1730)='Pk- sqmr case 2                 '
1087  names(1731)='Pk- sqmr case 3                 '
1088  names(1732)='Pk-  qmr case 4                 '
1089  names(1733)='Pk- apply H (case 2)            '
1090 
1091 
1092  names(1734)='gwls: Pk_model                  ';basic(1734)=1
1093  names(1735)='Pk_model- allocating            '
1094  names(1736)='Pk_model- wfk to denpot         '
1095  names(1737)='Pk_model- wfk x val             '
1096  names(1738)='Pk_model- pc_k                  '
1097  names(1739)='Pk_model- act with Y            '
1098  names(1740)='Pk_model- add contrib.          '
1099 
1100 
1101  names(1741)='gwls: calc eps_m^-1(w)-1        ';basic(1741)=1
1102  names(1742)='Allocating                      '
1103  names(1743)='modifying Lanczos basis         '
1104  names(1744)='calc <mod_L_1|Y|mod_L_2>        '
1105  names(1745)='    make array hermitian        '
1106  names(1746)='               xsum_mpi         '
1107  names(1747)='inv eps_m and subtract 1        '
1108 
1109  ! IFC object
1110  names(1748)='ifc_fourq'; basic(1748) = 1
1111  !names(1749)='ewald9'; basic(1749) = 1
1112  !names(1750)='gtdyn9'; basic(1750) = 1
1113  !names(1751)='dfpt_phfrq'; basic(1751) = 1
1114 
1115  ! chebfi2
1116  names(1750) = 'chebfiwf2                     '; basic(1750) = 1
1117  names(1751) = 'chebfi2_init                  '
1118  names(1752) = 'chebfi2_free                  '
1119 ! names(1753) = 'chebfi2_run                   '
1120  names(1754) = 'chebfi2_getAX_BX              '
1121  names(1755) = 'chebfi2_invovl                '
1122  names(1756) = 'chebfi2_residu                '
1123  names(1757) = 'chebfi2_RayleighRitz          '
1124 ! names(1758) = 'chebfi2_pcond                 '
1125  names(1759) = 'chebfi2_RR_q                  '
1126  names(1760) = 'chebfi2_next_p                '
1127  names(1761) = 'chebfi2_swap                  '
1128  names(1762) = 'chebfi2_amp_f                 '
1129  names(1763) = 'chebfi2_alltoall              '
1130  names(1769) = 'chebfi2_X_NP@init             '
1131  names(1770) = 'chebfi2_AX_BX@init            '
1132 
1133  names(1780)='ctgk_rotate'; basic(1780) = 1
1134 
1135  names(1795) = 'RayleighRitz@diago            '; ndata(1795) = nbdmean*nbdmean
1136  names(1796) = 'RayleighRitz@gemm_1           '
1137  names(1797) = 'RayleighRitz@gemm_2           '
1138 
1139  ! DVDB object
1140  names(1800)='dvdb_new                        '; basic(1800) = 1
1141  names(1801)='dvdb_qcache_read                '; basic(1801) = 1
1142  names(1802)='dvdb_readsym_qbz                '; basic(1802) = 1
1143  names(1803)='dvdb_rotate_fqg                 '; basic(1803) = 1
1144  names(1804)='v1phq_rotate                    '; basic(1804) = 1
1145  names(1805)='dvdb_readsym_allv1              '; basic(1805) = 1
1146  names(1806)='dvdb_collect_v1_3natom          '; basic(1806) = 1
1147  names(1807)='dvdb_qcache_update              '; basic(1807) = 1
1148  names(1808)='dvdb_ftqcache_build             '; basic(1808) = 1
1149  names(1809)='dvdb_get_ftqbz                  '; basic(1809) = 1
1150 
1151  ! SIGEPH
1152  !names(1900)='sigph_pre_qloop                '; basic(1900) = 1
1153  !names(1901)='sigph_qloop_preamble           '; basic(1901) = 1
1154  !names(1902)='sigph_qloop_cg_and_h1          '; basic(1902) = 1
1155  names(1903)='sigph_bsum                      '; basic(1903) = 1
1156  names(1904)='sigph_bsum_1                    '; basic(1904) = 1
1157  names(1905)='sigph_bsum_2                    '; basic(1905) = 1
1158  names(1906)='sigph_bsum_3                    '; basic(1906) = 1
1159  names(1907)='sigph_bsum_4                    '; basic(1907) = 1
1160  names(1908)='sigph_prep_stern                '; basic(1908) = 1
1161  names(1909)='sigph_stern                     '; basic(1909) = 1
1162  names(1910)='sigph_post_stern                '; basic(1910) = 1
1163 
1164  ! GWR code
1165  names(1919)='ugb_from_diago                  '; basic(1919) = 1
1166  names(1920)='gwr_init                        '; basic(1920) = 1
1167  names(1921)='gwr_read_ugb_from_wfk           '; basic(1921) = 1
1168  names(1922)='gwr_build_green                 '; basic(1922) = 1
1169  names(1923)='gwr_build_tchi                  '; basic(1923) = 1
1170  names(1924)='gwr_build_wc                    '; basic(1924) = 1
1171  names(1925)='gwr_build_sigmac                '; basic(1925) = 1
1172  names(1926)='gwr_build_sigxme                '; basic(1926) = 1
1173  names(1927)='gwr_build_head_wings            '; basic(1927) = 1
1174  names(1928)='gwr_rpa_energy                  '; basic(1928) = 1
1175  !names(1929)='gwr_gk_to_scbox                '; basic(1929) = 1
1176  !names(1930)='gwr_wcq_to_scbox               '; basic(1930) = 1
1177  !names(1931)='gsph2box                       '; basic(1931) = 1
1178 
1179  ! TIMER_SIZE is 1999. See m_time
1180  names(TIMER_SIZE)='(other)                         ' ! This is a generic slot, to compute a complement
1181 
1182 !==================================================================================
1183 
1184  spaceworld= mpi_enreg%comm_world
1185  nproc     = mpi_enreg%nproc
1186  me        = mpi_enreg%me
1187  nthreads  = 1
1188  nthreads  = xomp_get_num_threads(open_parallel=.true.)
1189  if(nthreads<1) nthreads=1
1190 
1191  call timab(49,2,tsec)
1192 
1193  if(abs(timopt)==1 .or. timopt==-3 .or. timopt==-4)then ! Time the timing routine (precision should be better than 3%)
1194    ltimab=1
1195    utimab=1000
1196    maxii=20
1197 !  maxii=1    ! Uncomment this line if no timer is provided in timein.f
1198    do ii=1,20
1199 
1200      call timab(50,1,tsec)
1201      do itimab=ltimab,utimab
1202 !      The channel 51 is here used as a dummy channel
1203        call timab(51,1,tsec)
1204        call timab(51,2,tsec)
1205      end do
1206      call timab(50,2,tsec)
1207      call time_accu(50,return_ncount,tsec, lflops, ftsec)
1208 !    Exit the timing loop if the CPU time is bigger than 0.10 second
1209 !    of if the number of calls is too large.
1210 !    Since the accuracy of the timing is expected to be better than 0.01 sec,
1211 !    gives about 10% accuracy
1212      if(tsec(1)>0.10_dp)then
1213        exit
1214      else
1215        ltimab=utimab+1
1216 !      Increase the number of timab calls in a block.
1217 !      This small factor of increase allows to have less than
1218 !      0.15 second for this testing
1219        utimab=(3*utimab)/2
1220      end if
1221    end do
1222 !  Get the time per combined call timab(*,1,tsec) + timab(*,2,tsec)
1223    timab_cpu=tsec(1)/utimab
1224    timab_wall=tsec(2)/utimab
1225    if(timopt<0 .and. me==0 .and. timopt/=-2)then
1226      write(ount,*)
1227      write(ount,*)'Test the timer : '
1228      write(ount,*)' a combined call timab(*,1,tsec) + timab(*,2,tsec) is '
1229      write(ount, '(a,es14.4,a,es14.4,a)' )'- CPU time =',timab_cpu,' sec,    Wall time =',timab_wall,' sec'
1230    end if
1231  else
1232    timab_cpu=zero; timab_wall=zero
1233  end if
1234 
1235 !Eventually reenable the timab routine
1236  call timab(1,5,tsec)
1237 
1238 !Get overall elapsed cpu and wall clock time
1239  call timab(1,2,tsec)
1240  call time_accu(1,return_ncount,tsec,lflops,ftsec)
1241  ncount(1)=return_ncount
1242 
1243 !Sum over all procs
1244  my_tsec(:)=tsec(:)
1245  call xmpi_sum(my_tsec,tsec,2,spaceworld,ierr)
1246 
1247 !Only the world master writes
1248  if (me==0) then
1249    write(ount,'(/,a,f13.1,f12.2,f11.3)')'- Total cpu        time (s,m,h):',tsec(1),tsec(1)/60._dp,tsec(1)/3600._dp
1250    write(ount,'(a,f13.1,f12.2,f11.3)')  '- Total wall clock time (s,m,h):',tsec(2),tsec(2)/60._dp,tsec(2)/3600._dp
1251  end if
1252 
1253 !Get separate time reports from all timed sections
1254  totcount=0
1255  do itim=1,TIMER_SIZE
1256    call time_accu(itim,return_ncount,times(:,itim),nflops(itim),ftimes(:,itim))
1257    ncount(itim)=return_ncount
1258    totcount=totcount+return_ncount
1259  end do
1260 
1261 !Estimate additional timings.
1262 
1263 !Estimate the values associated with timab, put it in channel 51
1264  ncount(51)=totcount
1265  times(1,51)=timab_cpu*totcount
1266  times(2,51)=timab_wall*totcount
1267 
1268 !Gather the different parts of selected time slots
1269 !Or, alternatively, deduce the value of the complement of some time slots.
1270 !This loop is finished when the default case is hit (see below)
1271  do ii=1,TIMER_SIZE
1272 
1273    tslots(:)=0
1274 
1275 !  List first the time slot in which the result will be accumulated.
1276 !  If this number is negative, the positive value will be used for the time slot, but the ncount will be set to -1 .
1277 !  Then, list the time slots whose value will be either accumulate or subtracted. The latter is obtained by
1278 !  entering a minus sign in front of the time slot number ...
1279 !  If a negative number is present in the list, while the accumulated time slot is positive,
1280 !  then the number of counts will be set to the value of the first routine to be accumulated.
1281    select case(ii)
1282 !    Gather the different parts of nonlop  (SHOULD BE REEXAMINED !)
1283    case(1)
1284      tslots(:6)=(/75, 221,223,229,233,237/)
1285    case(2)
1286      tslots(:4)=(/76, 222,225,227/)
1287    case(3)
1288      tslots(:2)=(/77, 224/)
1289    case(4)
1290      tslots(:2)=(/78, 226/)
1291    case(5)
1292      tslots(:2)=(/79, 228/)
1293    case(6)
1294 !      Gather the different parts of selected time channels
1295      tslots(:10)=(/97, 75,76,77,78,79,220,230,231,232/)
1296    case(7)
1297 !      Gather the different parts of fourwf (NOTE : should attribute the channel 840 to one of the 4 modes !!!)
1298      tslots(:3)=(/802, 841,844/)
1299    case(8)
1300      tslots(:4)=(/803, 842,843,846/)
1301    case(9)
1302      tslots(:11)=(/804, 845,847,848,850,854,858,859,861,862,880/)
1303    case(10)
1304      tslots(:6)=(/805, 849,851,857,871,872/)
1305    case(11)
1306 !      In the following, the part coming from the prep_fourwf interface is added to the total.
1307      tslots(:7)=(/801, 802,803,804,805,840,856/)
1308    case(13)
1309 !      Gather the different parts of prep_fourwf
1310      tslots(:3)=(/539, 537,538/)
1311    case(14)
1312 !      Gather the different parts of fourdp
1313      tslots(:12)=(/9, 1260,1261,1262,1263,1264,1265,1266,1267,1268,1269,1270/)
1314    case(15)
1315 !      Gather the different parts of getghc
1316      tslots(:9)=(/350,351,352,353,354,355,356,357,358/)
1317    case(16)
1318 !      Gather the different parts of projbd
1319      tslots(:3)=(/210, 211,212/)
1320    case(17)
1321 !      Gather the different parts of rwwf (wavefunctions read/write)
1322      tslots(:24)=&
1323 &     (/14, 270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292/)
1324    case(18)
1325 !      Estimate the complement of getghc (non fourwf, non fourdp, non nonlop, non fock_XX)
1326      tslots(:8)=(/-359, 350,-221,-235,-236,-841,-360,-1580/)
1327    case(19)
1328 !      Estimate the complement of cgwf (non getghc,projbd)
1329      tslots(:6)=(/-40, 22,530,1300,-351,-211/)
1330    case(20)
1331 !      Estimate the complement of dfpt_cgwf (non getghc,projbd,nonlop,fourwf)
1332 !    tslots(:8)=(/-140, 122,-202,-197,-212,-227,-228,-844/)  ! 197 includes some nonlop and fourwf, so there is double counting ...
1333      tslots(:7)=(/-140, 122,-352,-212,-227,-228,-844/)
1334    case(21)
1335 !      Estimate different complements in vtowfk
1336 !      vtowfk(ssdiag) (= vtowfk(loop)  -cgwf-lobpcgwf_old-cgwf_cprj-lobpcgwf2-chebfi - getcprj(vtowfk) - getcsc(subovl))
1337      tslots(:9)=(/-588, 39,-22,-530,-1300,-1600,-1640,-1295,-1364/)
1338    case(22)
1339 !      vtowfk(contrib) (= vtowfk (afterloop) - nonlop%vtowfk - fourwf%vtowfk )
1340      tslots(:4)=(/589, 30,-222,-842/)
1341    case(23)
1342 !      vtowfk (1) = vtowfk - vtowfk(loop) - vtowfk(afterloop)
1343      tslots(:4)=(/31, 28,-39,-30/)
1344    case(24)
1345 !      Estimate different complements in dfpt_vtowfk
1346 !      dfpt_vtowfk(contrib) (= vtowfk3(loop) - cgwf - fourwf%vtowfk3 - rwwf%vtowfk3 - corrmetalwf1)
1347      tslots(:6)=(/-113, 139,-122,-845,-288,-214/)
1348    case(25)
1349 !      vtowfk (1) = dfpt_vtowfk - vtowfk3(loop) - vtowfk3 (3)
1350      tslots(:4)=(/ 131, 128,-139,-130/)
1351    case(28)
1352 !      dfpt_vtorho-kpt loop (= dfpt_vtowfk (2) - vtowfk3 - rwwf)
1353      tslots(:4)=(/126,125,-128,-287/)
1354    case(29)
1355 !      Estimate complement in mkrho
1356      tslots(:3)=(/798,799,-843/)
1357    case(30)
1358 !      Estimate complement in dfpt_looppert
1359 !      dfpt_looppert(other) (= loper3 - loper3(kpgio) - loper3(getmpw) - loper3(inwffil)
1360 !      dfpt_scfcv - dfpt_looppert(outwf) -loper3(eigt2tot)
1361      tslots(:8)=(/145,141,-142,-143,-144,-120,-146,-147/)
1362    case(31)
1363 !      Estimate complement in sigma
1364 !      sigma/=fourdp = sigma - fourdp%rhotwg%si - fourdp%fftwfn%si
1365      tslots(:4)=(/410,401,-262,-265/)
1366    case(32)
1367 !      Estimate complement in bethe_salpeter
1368      tslots(:2)=(/699,650/)
1369    case(33)
1370 !      Estimate complement in susk
1371 !      NOTE : fourwf%susk _PAW should actually be split between susk (loop except FFT)
1372 !      and susk (accumul.except FFT . But a renumbering of the fourwf splitting should be done ...
1373 !      susk (loop except FFT) = susk (loop) - fourwf%susk !0 - fourwf%susk !3
1374      tslots(:4)=(/756,752,-848,-849/)
1375    case(34)
1376 !      susk (accumul.except FFT = susk (accumul) - fourwf%susk !3bis - fourwf%susk _PAW
1377      tslots(:4)=(/757,754,-859,-857/)
1378    case(35)
1379 !      Estimate complement in suskmm
1380 !      NOTE : fourwf%susk _PAW should actually be split between susk (loop except FFT)
1381 !      and suskmm (accum.except FFT . But a renumbering of the fourwf splitting should be done ...
1382 !      suskmm (loop except FFT) = suskmm (loop) - fourwf%suskmm !0 part 1 - fourwf%suskmm !3 part 1
1383      tslots(:4)=(/764,762,-861,-871/)
1384    case(36)
1385 !      suskmm (accum.except FFT = suskmm (accumul) - fourwf%suskmm !0 part 2 - fourwf%suskmm !3 part 2 - fourwf%susk _PAW
1386      tslots(:5)=(/765,763,-862,-872,-857/)
1387    case(37)
1388 !      inwffil(excl. calls) = inwffil - inwffil(call wfsinp) - inwffil(call newkpt);
1389      tslots(:4)=(/718,710,-714,-717/)
1390    case(38)
1391 !      wfsinp(excl. calls) = wfsinp - wfsinp(call  initwf) - wfsinp(call rwwf)
1392      tslots(:4)=(/728,720,-724,-727/)
1393    case(39)
1394 !      newkpt(excl. rwwf   )=newkpt(before loop) + newkpt(before rwwf) + newkpt(after rwwf)
1395 !      newkpt(call wfconv) + newkpt(finalize loop) + newkpt(after loop   )
1396      tslots(:7)=(/-788,781,782,783,784,785,786/)
1397    case(40)
1398 !      More complements in vtowfk
1399 !      vtowfk (2) = vtowfk (loop) - cgwf - lobpcg - subdiago - pw_orthon - cprj_rotate - getcprj(vtowfk)
1400      tslots(:10)=(/-590,39,-22,-1300,-1600,-530,-585,-583,-578,-1295/)
1401    case(41)
1402 !      vtowfk (3) = vtowfk (afterloop) - nonlop%vtowfk - prep_nonlop%vtowfk - fourwf%vtowfk - prep_fourwf%vtowfk - vtowfk(nonlocalpart)
1403      tslots(:7)=(/-591,30,-222,-572,-842,-537,-586/)
1404    case(43)
1405 !      mkrho = mkrho%gstate + mkrho%vtorho + mkrho%energy + mkrho%respfn + mkrho%afterscfloop + mkrho%scfcv_core
1406      tslots(:7)=(/790,791,792,793,794,795,796/)
1407    case(44)
1408 !      Estimate the complement of dmft (in vtorho, only)
1409      tslots(:9)=(/-626, 991,-620,-621,-622,-623,-624,-625,-627/)
1410 !   case(45)
1411 !!      Estimate the complement of nonlop_ylm
1412 !     tslots(:10)=(/1119,1100,-1101,-1102,-1103,-1104,-1105,-1106,-1107,-1108/)
1413    case(46)
1414 !      Sum the calls of getcprj
1415      tslots(:4)=(/1290,1293,1294,1295/)
1416    case(47)
1417 !      Estimate the complement of getcprj
1418      tslots(:5)=(/1299,1290,-1293,-1294,-1295/)
1419    case(48)
1420 !      Estimate the complement of cgwf_cprj
1421      tslots(:12)=(/1301,1300,-1302,-1303,-1304,-1305,-1293,-1363,-1370,-351,-211,-880/)
1422    case(49)
1423 !      Sum calls of getcsc
1424      tslots(:3)=(/1360,1363,1364/)
1425    case(50)
1426 !      Estimate the complement of getcsc
1427      tslots(:4)=(/1362,1360,-1363,-1364/)
1428    case(51)
1429 !      Estimate the complement of getchc
1430      tslots(:5)=(/1375,1370,-235,-1371,-1372/)
1431 
1432    case default
1433      cycle
1434    end select
1435 
1436    tslot=tslots(1)
1437    aslot=abs(tslot)
1438    ncount(    aslot)=0 ; if (tslot<0)ncount(aslot)=-1
1439    times(1:2, aslot)=zero
1440    nflops(    aslot)=zero
1441    ftimes(1:2,aslot)=zero
1442    flag_count=1
1443    do islot=2,TIMER_SIZE
1444      bslot=tslots(islot)
1445      cslot=abs(bslot)
1446      if(bslot>0)then
1447        if(tslot>0)ncount(aslot)=ncount(aslot)+ncount(cslot)
1448        times(1:2, aslot)=times(1:2, aslot)+times(1:2,cslot)
1449        nflops(    aslot)=nflops(    aslot)+nflops(   cslot)
1450        ftimes(1:2,aslot)=ftimes(1:2,aslot)+ftimes(1:2,cslot)
1451      else if(bslot<0)then
1452        if(tslot>0)flag_count=-1
1453        times(1:2, aslot)=times(1:2, aslot)-times(1:2,cslot)
1454        nflops(    aslot)=nflops(    aslot)-nflops(   cslot)
1455        ftimes(1:2,aslot)=ftimes(1:2,aslot)-ftimes(1:2,cslot)
1456      else if(bslot==0)then
1457        exit
1458      end if
1459    end do
1460    if(flag_count==-1)ncount(aslot)=ncount(abs(tslots(2)))
1461  end do
1462 
1463 !For the following sections, the number of counts is non standard, and thus these sections have not been placed
1464 !in the previous doloop.
1465 
1466 !Compute xc part of rhotoxc and dfpt_mkvxc, minus the calls to fourdp inside that part
1467  ncount(11)=ncount(81)+ncount(181)
1468  times(1:2,11)=times(1:2,81)+times(1:2,181)-times(1:2,82)
1469  ftimes(1:2,11)=ftimes(1:2,81)+ftimes(1:2,181)-ftimes(1:2,82)
1470  nflops(11)=nflops(81)+nflops(181)-nflops(82)
1471 
1472 !Estimate different complements in dfpt_vtorho
1473 !dfpt_vtorho (1) (= vtorho3 (1,2) - vtorho3(2) - vtorho3:synchro )
1474  ncount(118)=ncount(121)
1475  times(1:2,118)=times(1:2,124)-times(1:2,125)-times(1:2,166)
1476  ftimes(1:2,118)=ftimes(1:2,124)-ftimes(1:2,125)-ftimes(1:2,166)
1477  nflops(118)=nflops(124)-nflops(125)-nflops(166)
1478 
1479 !Calculating Gigaflops for all cases
1480  do itim=1,TIMER_SIZE
1481    mflops(itim)=-2
1482    if(abs(ftimes(1,itim)) > tol10) then ! VALGRIND complains that here there is a jump on uninitialized values
1483      mflops(itim)=nflops(itim)*1.e-9/ftimes(1,itim)
1484    else
1485      mflops(itim)=-1
1486    end if
1487  end do
1488 
1489 !Warning if the time is negative
1490  do itim=1,TIMER_SIZE
1491    if(times(1,itim)<-tol6 .or. times(2,itim)<-tol6 .or. ncount(itim)<-1 )then
1492      write(msg, '(6a,i4,4a,es16.6,a,es16.6,a,i6,a,es16.6)' ) ch10,&
1493       ' timana: WARNING -',ch10,&
1494       '  One among cpu, wall and ncount is negative.',ch10,&
1495       '  Timing section #',itim,', name :  ',names(itim),ch10,&
1496       '  CPU =',times(1,itim),', Wall=',times(2,itim),' ncount=',ncount(itim),' flops=',nflops(itim)
1497      call wrtout(std_out,msg,'PERS')
1498    end if
1499  end do
1500 
1501 !List of major independent code sections
1502  ABI_MALLOC(list, (TIMER_SIZE))
1503  list(:)=0
1504  nlist=0
1505  do itim=1,TIMER_SIZE
1506    if(basic(itim)/=0)then
1507      nlist=nlist+1
1508      list(nlist)=itim
1509    end if
1510  end do
1511 
1512  percent_limit=0.5_dp
1513  if (timopt<0) percent_limit=0.0001_dp
1514  !if (timopt<0) percent_limit=tol12
1515 
1516 !In case there is parallelism, report times for node 0
1517 !if (me==0 .and. nproc>1) then
1518  if (me==0) then
1519 
1520 !  Find normalization to report timing as % total time
1521    cpunm=100._dp/tsec(1)
1522    wallnm=100._dp/tsec(2)
1523 
1524 !  (0) Take care of major independent code sections for this account of node 0 timing
1525 
1526    write(ount,  '(a,a,a,a,/,a,a,a)' ) '-',ch10,&
1527     '- For major independent code sections,',' cpu and wall times (sec),',&
1528     '-  as well as % of the time and number of calls for node 0',ch10,&
1529     '-'
1530 
1531    write(ount,"(3(a,i0),a)")&
1532     "-<BEGIN_TIMER mpi_nprocs = ",nproc,", omp_nthreads = ",nthreads,", mpi_rank = ",me,">"
1533 
1534 !  write(ount,"(2(a,f13.1))")"- tot_cpu_time = ",tsec(1),   ", tot_wall_time = ",tsec(2)
1535    write(ount,"(2(a,f13.1))")"- cpu_time =  ",my_tsec(1),", wall_time =  ",my_tsec(2)
1536    write(ount,"(a)")"-"
1537 
1538    write(ount,format_head1)&
1539      '- routine','cpu','%','wall','%',' number of calls ',' Gflops ', 'Speedup', 'Efficacity'
1540    write(ount,format_head2)&
1541      '-                ','   ',' ','    ',' ','  (-1=no count)'
1542 
1543 !  Sort the list by decreasing CPU time
1544    do ii=1,nlist
1545      do ilist=1,nlist-1
1546        if (times(1,list(ilist))<times(1,list(ilist+1))) then
1547          temp_list=list(ilist)
1548          list(ilist)=list(ilist+1)
1549          list(ilist+1)=temp_list
1550        end if
1551      end do
1552    end do
1553 
1554    subcpu=zero; subwal=zero; other_cpu=zero; other_wal=zero; nothers=0
1555 
1556    do ilist=1,nlist
1557      isort = list(ilist)
1558 
1559      if ( ((times(1,isort)*cpunm  > percent_limit .and.       &
1560             times(2,isort)*wallnm > percent_limit      ).or.  &
1561 !          Also print the name of routines with anomalous negative timing. This is to help debugging.
1562             (times(1,isort)*cpunm  < -tol3 .or.                &
1563              times(2,isort)*wallnm < -tol3              ))     &
1564            .and. ncount(isort) /= 0) then ! Timing analysis
1565 
1566        times(2,isort)=times(2,isort)+tol14
1567        write(ount,format01041)names(isort),&
1568          times(1,isort),times(1,isort)*cpunm,times(2,isort),times(2,isort)*wallnm,ncount(isort),mflops(isort), &
1569          times(1,isort)/times(2,isort),times(1,isort)/times(2,isort)/nthreads
1570      else
1571        nothers=nothers+1
1572        other_cpu=other_cpu+times(1,isort)
1573        other_wal=other_wal+times(2,isort)
1574      end if
1575 
1576      subcpu=subcpu+times(1,isort)
1577      subwal=subwal+times(2,isort)
1578    end do
1579 
1580    write(entry_name,"(a,i0,a)")"others (",nothers,")"
1581    other_wal = other_wal + tol14
1582    write(ount,format01041)entry_name,other_cpu,other_cpu*cpunm,other_wal,other_wal*wallnm,-1,-1.0, &
1583      other_cpu/other_wal,other_cpu/other_wal/nthreads
1584    write(ount,"(a)")"-<END_TIMER>"
1585 
1586    write(ount,'(a)' ) '-'
1587    subwal = subwal + tol14
1588    write(ount,01200) subcpu,subcpu*cpunm,subwal,subwal*wallnm,subcpu/subwal,subcpu/subwal/nthreads
1589  end if
1590 
1591 !Now, gather all information
1592  call xmpi_sum(times,spaceworld,ierr)
1593  call xmpi_sum(ncount,spaceworld,ierr)
1594  call xmpi_sum(ftimes,spaceworld,ierr)
1595  call xmpi_sum(nflops,spaceworld,ierr)
1596 
1597  if (me==0) then ! Only the world master writes
1598 
1599 !  Find normalization to report timing as % total time
1600    cpunm=100._dp/tsec(1)
1601    wallnm=100._dp/tsec(2)
1602 
1603 !  Calculating Gigaflops for all process
1604    do itim=1,TIMER_SIZE
1605      mflops(itim)=-2
1606      if(abs(ftimes(1,itim)) > tol10) then ! VALGRIND complains that here there is a jump on uninitialized values
1607        mflops(itim)=nflops(itim)*1.e-9/ftimes(1,itim)
1608      else
1609        mflops(itim)=-1
1610      end if
1611    end do
1612 
1613 !  _______________________________________
1614 
1615 !  Write timing output for cpu times
1616 
1617 !  (1) Take care of major independent code sections
1618    write(ount,'(/,a,/,a,/)' )&
1619      '- For major independent code sections, cpu and wall times (sec),',&
1620      '- as well as % of the total time and number of calls '
1621 
1622    write(ount,"(2(a,i0),a)")&
1623      "-<BEGIN_TIMER mpi_nprocs = ",nproc,", omp_nthreads = ",nthreads,", mpi_rank = world>"
1624 
1625    write(ount,"(2(a,f13.1))")"- cpu_time = ",tsec(1),   ", wall_time = ",tsec(2)
1626    write(ount,"(a)")"-"
1627 
1628    write(ount,format_head1)&
1629     '- routine        ','cpu','%','wall','%', ' number of calls ',' Gflops ', &
1630     'Speedup', 'Efficacity'
1631    write(ount,format_head2)&
1632     '-                ','   ',' ','    ',' ','  (-1=no count)'
1633 
1634 !  Sort the list by decreasing CPU time
1635    do ii=1,nlist
1636      do ilist=1,nlist-1
1637        if(times(1,list(ilist))<times(1,list(ilist+1)))then
1638          temp_list=list(ilist)
1639          list(ilist)=list(ilist+1)
1640          list(ilist+1)=temp_list
1641        end if
1642      end do
1643    end do
1644 
1645    subcpu=zero; subwal=zero; other_cpu=zero; other_wal=zero; nothers=0
1646 
1647    do ilist=1,nlist
1648      isort = list(ilist)
1649      if( (times(1,isort)*cpunm > percent_limit .and. times(2,isort)*wallnm> percent_limit) .and. ncount(isort)/=0 )then
1650 
1651        times(2,isort)=times(2,isort)+tol14
1652        write(ount,format01041)names(isort),&
1653          times(1,isort),times(1,isort)*cpunm,times(2,isort),times(2,isort)*wallnm,ncount(isort),mflops(isort), &
1654          times(1,isort)/times(2,isort),times(1,isort)/times(2,isort)/nthreads
1655      else
1656        nothers=nothers+1
1657        other_cpu=other_cpu+times(1,isort)
1658        other_wal=other_wal+times(2,isort)
1659      end if
1660      subcpu=subcpu+times(1,isort)
1661      subwal=subwal+times(2,isort)
1662    end do
1663 
1664    other_wal = other_wal + tol14
1665    write(entry_name,"(a,i0,a)")"others (",nothers,")"
1666    write(ount,format01041)entry_name,other_cpu,other_cpu*cpunm,other_wal,other_wal*wallnm,-1,-1.0, &
1667      other_cpu/other_wal,other_cpu/other_wal/nthreads
1668 
1669    write(ount,"(a)")"-<END_TIMER>"
1670 
1671    subwal = subwal + tol14
1672    write(ount,01201) subcpu,subcpu*cpunm,subwal,subwal*wallnm,subcpu/subwal,subcpu/subwal/nthreads
1673 
1674 !  (2) Partitionings
1675    if (timopt<0) then
1676 
1677      npart=1000
1678      do ipart=1,npart
1679        list(:)=0
1680        select case(ipart)
1681 
1682        case(1)
1683          list(:10)=(/1,101,102,103,104,640,105,49,50,TIMER_SIZE/)      ; msg='abinit '
1684        case(2)
1685          list(:13)=(/640,641,642,700,132,84,301,401,501,650,643,644,TIMER_SIZE/)        ; msg='driver '
1686        case(3)
1687          list(:32)=(/ (ii,ii=1200,1231,1) /)                         ; msg='gstateimg+gstate '
1688        case(4)
1689          list(:24)=(/ (ii,ii=1440,1462,1),TIMER_SIZE/)               ; msg='scfcv_core '
1690        case(5)
1691          list(:7)=(/940,941,942,943,944,945,TIMER_SIZE/)             ; msg= 'rhotov '
1692        case(6)
1693          list(:22)=(/980,981,982,983,984,28,985,271,986,987,988,989,990,991,992,993,994,995,996,997,1620,TIMER_SIZE/)
1694          msg= 'vtorho '
1695        case(7)
1696          list(:18)=(/28,31,22,530,585,583,590,222,572,842,537,586,591,578,1295,1300,1600,TIMER_SIZE/) ; msg='vtowfk '
1697        case(8)
1698          if(abs(timopt)==3)then
1699            list(:11)=(/530,354,355,571,532,533,630,535,536,584,587/)  ; msg='lobpcgwf (abs(timopt)==3)'
1700          else if(abs(timopt)==4)then
1701            list(:8)=(/530,520,521,522,523,524,525,526/)               ; msg='lobpcgwf (abs(timopt)==4)'
1702 !            else
1703 !            list(:3)=(/530,354,355/)
1704 !            msg='lobpcgwf (light analysis: for a deeper one, use abs(timopt)=3 or 4)'
1705          end if
1706        case(9)
1707          list(:4)=(/22,351,40,211/)                                  ; msg='cgwf '
1708        case(10)
1709          list(:8)=(/132,133,134,135,136,137,138,141/)                ; msg='respfn '
1710        case(11)
1711          list(:8)=(/141,142,143,144,120,146,147,TIMER_SIZE/)         ; msg='dfpt_looppert '
1712        case(12)
1713          list(:9)=(/120,154,121,157,152,158,160,150,564/)            ; msg='dfpt_scfcv '
1714        case(13)
1715          list(:9)=(/121,118,128,126,287,166,129,127,556/)            ; msg='dfpt_vtorho '
1716        case(14)
1717          list(:9)=(/128,131,122,845,288,214,113,130,565/)            ; msg='dfpt_vtowfk '
1718        case(15)
1719          list(:8)=(/122,140,352,197,212,227,228,844/)                ; msg='dfpt_cgwf '
1720        case(16)
1721          list(:8)=(/350,841,221,359,235,236,360,1580/)              ; msg='getghc '
1722        case(17)
1723          list(:21)=(/801,840,841,842,843,844,845,846,847,848,849,850,851,852,853,854,855,856,857,858,880/)
1724          msg='fourwf (upwards partitioning)'
1725        case(18)
1726          list(:5)=(/933,934,936,937,938/)                            ; msg='outkss '
1727        case(19)
1728          list(:14)=(/301,302,315,316,319,304,305,320,321,306,307,308,309,310/)
1729          msg='screening '
1730        case(20)
1731          list(:13)=(/401,402,403,404,405,406,407,408,409,421,423,424,425/); msg='sigma  '
1732        case(21)
1733          list(:9)=(/431,432,433,434,435,445,440,441,442/)            ; msg='calc_sigc_me '
1734        case(23)
1735          list(:11)=(/630,631,632,633,634,545,635,636,637,638,TIMER_SIZE/)         ; msg='prep_getghc '
1736        case(24)
1737          list(:4)=(/539,856,547,548/)                                ; msg='prep_fourwf '
1738        case(25)
1739          list(:5)=(/570,231,232,581,TIMER_SIZE/)                     ; msg='prep_nonlop '
1740        case(26)
1741          list(:6)=(/(ii,ii=790,795,1)/)                  ; msg='mkrho (upwards partitioning)'
1742 !          Disabled (temporarily ?) because the partitioning was not correct
1743 !          case(27);list(:17)=(/600,601,602,603,604,605,617,606,607,608,609,610,611,612,613,614,615/)
1744 !          msg='vtorhorec '
1745        case(28)
1746          list(:10)=(/650,651,653,654,655,656,658,659,660,661/)       ; msg='bethe_salpeter '
1747        case(29)
1748          list(:8)=(/ (ii,ii=740,747,1) /)                            ; msg='suscep_stat '
1749        case(30)
1750          list(:9)=(/750,751,848,849,753,756,859,757,755/)            ; msg='susk '
1751        case(31)
1752          list(:8)=(/760,761,764,861,871,765,862,872/)                ; msg='suskmm '
1753        case(32)
1754          list(:8)=(/ (ii,ii=710,717,1) /)                            ; msg='inwffil '
1755        case(33)
1756          list(:10)=(/720,721,722,723,724,725,726,727,67,TIMER_SIZE/) ; msg='wfsinp '
1757        case(34)
1758          list(:5)=(/770,771,772,272,290/)                            ; msg='initwf '
1759        case(35)
1760          list(:9)=(/780,781,782,783,784,785,786,291,292/)            ; msg='newkpt '
1761        case(36)
1762          list(:8)=(/93,901,902,903,904,905,268,TIMER_SIZE/)          ; msg='newvtr '
1763        case(37)
1764          list(:2)=(/94,269/)                                         ; msg='newrho '
1765        case(38)
1766          list(:12)=(/9,1260,1261,1262,1263,1264,1265,1266,1267,1268,1269,1270/) ; msg='fourdp (upwards partitioning)'
1767        case(39)
1768          list(:8)=(/ (ii,ii=250,257,1) /)                            ; msg='afterscfloop '
1769        case(40)
1770          list(:5)=(/ (ii,ii=910,914,1) /)                            ; msg='forstr '
1771        case(41)
1772          list(:9)=(/920,921,922,923,924,925,926,927,928/)            ; msg='forstrnps '
1773        case(42)
1774          list(:4)=(/670,671,672,673/)                                ; msg='exc_build_ham '
1775        case(43)
1776          list(:7)=(/ (ii,ii=680,686,1) /)                            ; msg='exc_build_block'
1777        case(44)
1778          list(:8)=(/ (ii,ii=690,697,1) /)                            ; msg='exc_haydock_driver '
1779        case(45)
1780          list(:50)=(/ (ii,ii=1150,1199,1) /)                         ; msg='outscfcv '
1781        case(46)
1782          list(:8)=(/ (ii,ii=620,627,1) /)                            ; msg='dmft '
1783        case(47)
1784          list(:9)=(/ (ii,ii=1001,1009,1) /)                          ; msg='initberry '
1785        case(50)
1786          list(:5)=(/1560,1561,1562,1563,1565/)                       ; msg='fock2ACE '
1787        case(51)
1788          list(:5)=(/1504,1515,850,1270,237/)                         ; msg='fock_getghc -original'
1789        case(52)
1790          list(:5)=(/1504,1515,1512,1513,1514/)                       ; msg='fock_getghc -tight'
1791        case(53)
1792          list(:4)=(/1504,1505,1506,1507/)                            ; msg='fock_getghc big blocs'
1793        case(54)
1794          list(:11)=(/1504,1505,(ii,ii=1521,1528,1),1507/)            ; msg='fock_getghc small blocs'
1795        case(55)
1796          list(:15)=(/1504,1512,1513,1514,1541,1521,1542,1523,1544,1545,1546,1527,1528,1547,1548/)
1797              msg='fock_getghc small blocs + fourXX,  nonlop, xmpi_sum '
1798        case(60)
1799          list(:13)=(/1600,1607,1630,1631,1632,1601,1603,1604,1605,1606,1608,1609,1610/) ; msg = 'chebfi'
1800        case(61)
1801          list(:3)=(/1620,1621,1622/)                                 ; msg = 'mkinvovl'
1802        case(70)
1803          list(:5)=(/1701,1702,1703,1721,1722/)                       ; msg='gwls GW code'
1804        case(71)
1805          list(:16)=(/ (ii,ii=1703,1718,1) /)                         ; msg='gwls: compute_correlations_shift_lanczos'
1806        case(72)
1807          list(:10)=(/ (ii,ii=1724,1733,1) /)                         ; msg='gwls: Applying the susceptibility Pk'
1808        case(73)
1809          list(:7)=(/ (ii,ii=1734,1740,1) /)                          ; msg='gwls: Applying the model susceptibility Pk_model'
1810        case(74)
1811          list(:7)=(/ (ii,ii=1741,1747,1) /)              ; msg='gwls: computing the matrix elements of eps_model^{-1}(w) -1 '
1812        case(75)
1813          list(:19)=(/ (ii,ii=1640,1648,1), (ii,ii=1651,1660,1)/)     ; msg='lobpcgwf2 core engine '
1814        case(76)
1815          list(:12)=(/1750,1751,1752,1754,1755,1756,1757,1759,1760,1761,1762,1763/) ; msg='chebfiwf2 core engine '
1816        case(77)
1817          list(:5)=(/1690,1691,1692,1693,1694/) ; msg='low-level xgScalapack type '
1818        case(78)
1819          list(:8)=(/1662,1663,1664,1665,1666,1667,1668,1669/) ; msg='low-level xgTransposer type '
1820        case(79)
1821          list(:12)=(/1300,1293,1302,1303,1304,1305,1363,1370,351,211,880,1301/) ; msg='cgwf_cprj'
1822 !       case(80)
1823 !         list(:10)=(/1100,1101,1102,1103,1104,1105,1106,1107,1108,1119/) ; msg='nonlop_ylm'
1824        case(81)
1825          list(:5)=(/1290,1293,1294,1295,1299/) ; msg='getcprj'
1826        case(82)
1827          list(:4)=(/1360,1363,1364,1362/)      ; msg='getcsc'
1828        case(83)
1829          list(:5)=(/1370,235,1371,1372,1375/)  ; msg='getchc'
1830        case(84)
1831          list(:19)=(/ (ii,ii=1670,1688,1) /)                         ; msg='low-level xgBlock type '
1832        case default
1833          cycle ! This allows one to disable temporarily some partitionings
1834 
1835        end select
1836 
1837        nlist=0
1838        do itim=1,TIMER_SIZE
1839          if(list(itim)/=0)then
1840            nlist=nlist+1
1841          else
1842            exit
1843          end if
1844        end do
1845 
1846        if(nlist==0)then
1847          cycle
1848        end if
1849 
1850        if(ncount(list(1))/=0)then
1851          write(ount,'(/,a,a)')' Partitioning of ',trim(msg)
1852          subcpu=zero
1853          subwal=zero
1854          do ilist=1,nlist
1855            isort = list(ilist)
1856 !          When the LAST item is TIMER_SIZE, a complement is evaluated (count number set to -1)
1857            if(ilist==nlist .and. list(nlist)==TIMER_SIZE)then
1858              times(1,TIMER_SIZE)=times(1,list(1))-subcpu
1859              times(2,TIMER_SIZE)=times(2,list(1))-subwal
1860              ncount(TIMER_SIZE)=-1
1861              ftimes(1,TIMER_SIZE)=zero
1862              mflops(TIMER_SIZE)=0
1863 #if defined HAVE_TEST_TIME_PARTITIONING
1864              if(times(2,TIMER_SIZE)>1.2d0 .and. wallnm*times(2,TIMER_SIZE)>3.d0)then
1865                write(ount, '(3a,es16.6,4a,es16.6,2a)')&
1866                 ' Note : the partitioning does not work well for this routine.',ch10,&
1867                 '   The (other) Wall time            ',times(2,TIMER_SIZE),ch10,&
1868                 '   is bigger than 1.2 secs. ',ch10,&
1869                 '   The (other) Wall time percentage ',wallnm*times(2,TIMER_SIZE),ch10,&
1870                 '   is bigger than 3% '
1871              else if (times(2,TIMER_SIZE)<0.2d0 .and. wallnm*times(2,TIMER_SIZE)<-0.2d0)then
1872                write(ount, '(3a,es16.6,2a)')&
1873                 ' Note : the partitioning does not work well for this routine.',ch10,&
1874                 '   The (other) Wall time percentage ',wallnm*times(2,TIMER_SIZE),ch10,&
1875                 '   is negative '
1876              end if
1877 #endif
1878            end if
1879            if(ncount(isort)/=0)then
1880              ! Do not write a slot if the wall time ratio is below a threshold
1881              ! However, also identifies when the wall time of a slot (here, a complement) is negative
1882              if(times(2,isort)*wallnm>0.02d0 .or. ilist==1 .or. times(2,isort)*wallnm<-tol3)then
1883                if((times(2,isort)*wallnm>0.02d0.or.ilist==1).and.times(2,isort) < 0.0001)times(2,isort)=-1.d0
1884                times(2,isort)=times(2,isort)+tol14
1885                write(ount,format01040)names(isort),&
1886                  times(1,isort),times(1,isort)*cpunm,&
1887                  times(2,isort),times(2,isort)*wallnm,ncount(isort), &
1888                  times(1,isort)/times(2,isort),times(1,isort)/times(2,isort)/nthreads
1889              end if
1890              if(ilist/=1)then
1891                subcpu=subcpu+times(1,isort)
1892                subwal=subwal+times(2,isort)
1893              else
1894                write(ount, '(a)' ) ' '
1895              end if
1896            end if
1897          end do
1898 
1899          subwal = subwal + tol14
1900          write(ount, 01201 ) subcpu,subcpu*cpunm,subwal,subwal*wallnm, subcpu/subwal,subcpu/subwal/nthreads
1901 #ifdef HAVE_TEST_TIME_PARTITIONING
1902          if( wallnm*abs(subwal-times(2,list(1)))>1.d0 .and. abs(subwal-times(2,list(1)))>0.2d0 )then
1903            write(ount, '(3a,es16.6,2a,es16.6,4a,es16.6,2a,es16.6,6a,i4)')&
1904             ' Note : the partitioning does not work well for this routine.',ch10,&
1905             '   The subtotal Wall time            ',subwal,ch10,&
1906             '   differs from the total Wall time  ',times(2,list(1)),ch10,&
1907             '   by more than 0.2 secs.',ch10,&
1908             '   The subtotal Wall time percentage ',wallnm*subwal,ch10,&
1909             '   differs from the total Wall time %',wallnm*times(2,list(1)),ch10,&
1910             '   by more than 1%. ',ch10,&
1911             '   The partitioning might not have been coded properly.',ch10,&
1912             '   nlist=',nlist
1913            do ilist=1,nlist
1914              write(ount, '(a,i4,i4,es16.6,i8)' )&
1915               ' ilist,list(ilist),wallnm*times(2,list(ilist)),ncount(list(ilist))=',&
1916               ilist,isort,wallnm*times(2,isort),ncount(isort)
1917            end do
1918          end if
1919 #endif
1920        end if
1921 
1922      end do ! End of loop on partitionings
1923 
1924 !    For parallel case
1925      if(xmpi_paral==1)then
1926        write(ount, '(a,/,a)' )'-','-Synchronisation (=leave_test) and MPI calls '
1927        nlist=14
1928        list(:nlist)=(/48,61,62,63,64,65,66,67,71,85,86,543,544,787/)
1929        subcpu=zero; subwal=zero
1930        if(ncount(list(1))/=0)then
1931          do ilist=1,nlist
1932            isort = list(ilist)
1933 !
1934            if (ncount(isort)/=0) then
1935              times(2,isort)=times(2,isort)+tol14
1936              write(ount,format01040)names(isort),&
1937               times(1,isort),times(1,isort)*cpunm,&
1938               times(2,isort),times(2,isort)*wallnm,ncount(isort), &
1939               times(1,isort)/times(2,isort),times(1,isort)/times(2,isort)/nthreads
1940 
1941              if(ilist/=1)then
1942                subcpu=subcpu+times(1,isort)
1943                subwal=subwal+times(2,isort)
1944              else
1945                write(ount, '(a)' ) '-'
1946              end if
1947            end if !ncount
1948          end do !ilist
1949 
1950          subwal = subwal + tol14
1951          write(ount, 01200 ) subcpu,subcpu*cpunm,subwal,subwal*wallnm, subcpu/subwal,subcpu/subwal/nthreads
1952        end if !ncount
1953      end if !xmpi_paral
1954 
1955      nlist=27
1956      list(:nlist)=(/47,49,51,801,72,73,74,77,78,79,97,82,87,88,436,437,438,439,443,444,804,805,331,332,333,1280,1281/)
1957      flag_write=1
1958      do ilist=1,nlist
1959        isort = list(ilist)
1960        if(ncount(isort)/=0)then
1961          if(flag_write==1)then
1962            write(ount, '(/,a)' ) ' Additional information'
1963            flag_write=0
1964          end if
1965          times(2,isort)=times(2,isort)+tol14
1966          write(ount,format01040)names(isort),&
1967            times(1,isort),times(1,isort)*cpunm,times(2,isort),times(2,isort)*wallnm,ncount(isort), &
1968            times(1,isort)/times(2,isort),times(1,isort)/times(2,isort)/nthreads
1969        end if
1970      end do
1971 
1972      nlist=23
1973      list(:nlist)=(/550,551,552,553,554,555,556,558,559,560,561,562,563,564,565,566,567,593,594,595,596,597,598/)
1974      flag_write=1
1975      do ilist=1,nlist
1976        isort = list(ilist)
1977        if(ncount(isort)/=0)then
1978          if(flag_write==1)then
1979            write(ount, '(/,a)' ) ' Additional information about PAW segments'
1980            flag_write=0
1981          end if
1982          times(2,isort)=times(2,isort)+tol14
1983          write(ount,format01040)names(isort),&
1984            times(1,isort),times(1,isort)*cpunm,times(2,isort),times(2,isort)*wallnm,ncount(isort), &
1985            times(1,isort)/times(2,isort),times(1,isort)/times(2,isort)/nthreads
1986        end if
1987      end do
1988 
1989      nlist=3
1990      list(:nlist)=(/1795,1796,1797/)
1991      flag_write=1
1992      do ilist=1,nlist
1993        isort = list(ilist)
1994        if(ncount(isort)/=0)then
1995          if(flag_write==1)then
1996            write(ount, '(/,a)' ) ' Additional information about diagonalization algorithm segments'
1997            flag_write=0
1998          end if
1999          times(2,isort)=times(2,isort)+tol14
2000          write(ount,format01040)names(isort),&
2001            times(1,isort),times(1,isort)*cpunm,times(2,isort),times(2,isort)*wallnm,ncount(isort), &
2002            times(1,isort)/times(2,isort),times(1,isort)/times(2,isort)/nthreads
2003        end if
2004      end do
2005 
2006 !    The detailed analysis cannot be done in the multidataset mode
2007      if(ndtset<2)then
2008        write(ount, '(/,/,a,/,a,/,a)' ) &
2009         ' Detailed analysis of some time consuming routines ',&
2010         '                                  tcpu    ncalls  tcpu/ncalls    ndata tcpu/ncalls/ndata',&
2011         '                                 (sec)                (msec)              (microsec)'
2012        nlist=9
2013        list(:nlist)=(/802,803,9,75,76,77,210,11,1795/)
2014        do ilist=1,nlist
2015          isort = list(ilist)
2016          if(ncount(isort)/=0)then
2017            write(ount, '(a,a24,f12.3,i10,f12.3,i10,f12.3)' )'- ',names(isort),&
2018              times(1,isort),ncount(isort),&
2019              1000.0_dp*times(1,isort)/dble(ncount(isort)),ndata(isort),&
2020              1000000.0_dp*times(1,isort)/dble(ncount(isort)*dble(ndata(isort)))
2021          else
2022            write(ount, '(a,a24,f12.3,i10)' )'- ',names(isort),times(1,isort),ncount(isort)
2023          end if
2024        end do !ilist
2025      else
2026        write(ount,'(/,a)') ' timana : in multi dataset mode, the more detailed analysis is not done.'
2027      end if !ndtset
2028 
2029    end if ! End the condition of timopt<0
2030 
2031  end if ! me==0
2032 
2033  ABI_FREE(list)
2034 
2035 end subroutine timana