TABLE OF CONTENTS


ABINIT/timana [ Functions ]

[ Top ] [ Functions ]

NAME

 timana

FUNCTION

 Analyse the timing, and print in unit ab_out. Some discussion of the
 number of calls to different routines is also provided, as comments,
 at the end of the routine, as well as, in the single dataset mode (ndtset<2),
 a detailed analysis of the time-consuming routines.

COPYRIGHT

 Copyright (C) 1998-2018 ABINIT group (XG, GMR)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

  mpi_enreg=information about MPI parallelization
  natom=number of atoms in cell.
  nband(nkpt*nsppol)=number of bands at each k point, for each polarization
  ndtset=number of datasets
  nfft=(effective) number of FFT grid points (for this processor)
  nkpt=number of k points
  npwtot(nkpt)=number of planewaves in basis at this k point
  nsppol=1 for unpolarized, 2 for spin-polarized
  timopt= if >0, write short analysis, if <0, write full analysis
          if timopt>=2, or timopt==-2 do not time the timer

OUTPUT

  (only writing)

NOTES

 *) One can suppress the cpu timer call in timein.f, if line 315 of the present routine is uncommented.

 *) The number of fourwf and nonlop calls can be computed as follows, in the
    groud-state case, with no reading of wavefunctions (irdwfk==0 and the like),
    and iscf>0 :
    
    1) For fourwf.f
    
    In each cgwf call, there will be
    1 call (isign=+1 and -1) for the first gradient calculation,
    and iline calls for the line minimizations,
    minus the number of ffts skipped because some wfs are sufficiently converged
    (there is a counter for that, see the log file)
    
    There are nband*nkpt*(nstep+2) calls to cgwf presently, where the
    (nstep+2) comes from the number of the presence of 2 nonscf loops
    in the first 2 steps.
    Thus, the number of fourwf calls in cgwf is
    nband*nkpt*(nstep+2)*(1+iline) - nskip_fourwf_in_cgwf
    
    To compute the density (either in vtowfk or in vtorho - by a mkrho call - )
    at each step, there will be nband*nkpt one-way calls,
    minus the number of bands skipped because the occupation number
    is too small (smaller than 1.0d-14). There is another counter for that.
    Thus, the number of fourwf calls for the density is
    nband*nkpt*nstep - nskip_fourwf_for_density
    
    For example, for Si with nline=3, nkpt=2, nband=4, nstep=10, and supposing
    no fourwf calls are skipped, there will be
    at most 4*2*12=96 calls to cgwf, with 4 two-way fft,
    that is 384 two-way ffts,
    and 4*2*10=80 one-way ffts to make the density.
    Altogether 464-nskip one-way ffts at most.
    
    2) For nonlop.f
    
    Presently, there are three different types of call to nonlop :
    for energy and gradient wrt wavefunctions (choice=1), for forces (choice=2),
    and for stresses (choice=3).
    
    In each cgwf call, there will be one nonlop call for two fourwf calls
    (independently of the number of skipped fourwf calls, since
    nonlop is also skipped then). These are the only calls with choice=1.
    Thus the number will be
    nband*nkpt*(nstep+2)*(1+iline) - nskip_fourwf_in_cgwf
    
    The number of choice=2 nonlop calls is equal to the number of fourwf calls
    to make the density, that is
    nband*nkpt*nstep - nskip_fourwf_for_density
    
    The number of choice=8 calls is equal to the number of occupied bands
    at the end of the calculation :
    nband(occupied)*nkpt
    The number of bands skipped then is not counted.
    
    NOTE : the number of fourwf calls is equal to
    the # of nonlop (choice=1) calls + the # of nonlop (choice=2) calls

PARENTS

      abinit

CHILDREN

      timab,time_accu,wrtout,xmpi_sum

SOURCE

 101 #if defined HAVE_CONFIG_H
 102 #include "config.h"
 103 #endif
 104 
 105 #include "abi_common.h"
 106 
 107 subroutine timana(mpi_enreg,natom,nband,ndtset,nfft,nkpt,npwtot,nsppol,timopt)
 108 
 109  use defs_basis
 110  use defs_abitypes
 111  use m_xmpi
 112  use m_profiling_abi
 113  use m_xomp
 114 
 115  use defs_time,    only : mtim
 116 
 117 !This section has been created automatically by the script Abilint (TD).
 118 !Do not modify the following lines by hand.
 119 #undef ABI_FUNC
 120 #define ABI_FUNC 'timana'
 121  use interfaces_14_hidewrite
 122  use interfaces_18_timing
 123 !End of the abilint section
 124 
 125  implicit none
 126 
 127 !Arguments ------------------------------------
 128 !scalars
 129  integer,intent(in) :: natom,ndtset,nfft,nkpt,nsppol,timopt
 130  type(MPI_type),intent(in) :: mpi_enreg
 131 !arrays
 132  integer,intent(in) :: nband(nkpt*nsppol),npwtot(nkpt)
 133 
 134 !Local variables-------------------------------
 135 !scalars
 136  integer :: aslot,bslot,cslot,flag_count,flag_write,ierr,ii,ikpt,ipart 
 137  integer :: ilist,isort,islot,isppol,itim,itimab,ltimab,maxii,me
 138  integer :: npart,nlist,nothers,nproc,nthreads,return_ncount
 139  integer(i8b) :: npwmean,npwnbdmean
 140  integer :: spaceworld,temp_list,totcount,tslot,utimab,ount
 141  real(dp) :: cpunm,lflops,other_cpu,other_wal,percent_limit,subcpu,subwal,timab_cpu,timab_wall,wallnm
 142  character(len=500) :: message
 143 !arrays
 144  integer(i8b) :: basic(mtim),ndata(mtim),tslots(mtim)
 145  integer :: ncount(mtim)
 146  integer,allocatable :: list(:)
 147  real(dp) :: ftimes(2,mtim),ftsec(2),mflops(mtim),nflops(mtim),times(2,mtim),tsec(2),my_tsec(2)
 148  character(len=32) :: names(-1:mtim),entry_name
 149  character(len=*),parameter :: format01040 ="('- ',a24,f12.3,f6.1,f11.3,f6.1,i15,16x,f7.2,1x,f10.2)"
 150  character(len=*),parameter :: format01041 ="('- ',a24,f12.3,f6.1,f11.3,f6.1,i15,3x,g12.3,1x,f7.2,1x,f10.2)"
 151  character(len=*),parameter :: format01042 ="('- ',a24,f12.3,f6.1,f11.3,g12.3,i15)"
 152  character(len=*),parameter :: format01045 ="('-',i3,a19,f15.3,f6.1,f11.3,f6.1)"
 153  !character(len=*),parameter ::  format01200 ="('- subtotal     ',f15.3,f6.1,f11.3,f6.1)"
 154 
 155 ! *************************************************************************
 156 
 157  01200 format('- subtotal             ',f15.3,f6.1,f11.3,f6.1,31x,f7.2,1x,f10.2)
 158  01201 format(/,'- subtotal             ',f15.3,f6.1,f11.3,f6.1,31x,f7.2,1x,f10.2)
 159 
 160  ount = ab_out
 161 
 162  call timab(49,1,tsec)
 163 
 164 !The means are computed as integers, for later compatibility
 165  npwmean=0; npwnbdmean=0
 166  do isppol=1,nsppol
 167    do ikpt=1,nkpt
 168      npwmean=npwmean+npwtot(ikpt)
 169      npwnbdmean=npwnbdmean+npwtot(ikpt)*nband(ikpt+(isppol-1)*nkpt)
 170    end do
 171  end do
 172 
 173 !initialize ftime, valgrind complains on line 832 = sum up of all Gflops
 174  ftimes=zero
 175 
 176  npwmean=dble(npwmean)/dble(nkpt*nsppol)
 177  npwnbdmean=dble(npwnbdmean)/dble(nkpt*nsppol)
 178 
 179 !List of timed subroutines, eventual initialisation of the number of data, and declaration of a slot as being "basic"
 180 !Channels 1 to 299 are for optdriver=0 (GS), 1 (RF) and 2 (Suscep), at random
 181 !Channels 300 to 399 are for optdriver=3 (Screening)
 182 !Channels 400 to 499 are for optdriver=4 (Sigma)
 183 !Channels 500 to 529 are for optdriver=5 (Nonlinear)
 184 !Channels 530 to 549 are for various counters
 185 !Channels 550 to 599 are for PAW
 186 !Channels 600 to 619 are for Recursion Method
 187 !Channels 620 to 639 are for DMFT
 188 !Channels 650 to 699 are for bethe_salpeter code.
 189 !Channels 700 to 799 are for optdriver=0 (again ...)
 190 !Channels 800 to 899 are for the detailed analysis of fourwf
 191 !Channels 900 to 1499 are for optdriver=0 (again ...)
 192 !Channels 1500 to 1519 are for Hartree-Fock.
 193 !Channels 1700 to 1747 are for GWLS.
 194 !Channels 1520 and beyond are not yet attributed.
 195 
 196  names(1:mtim)='***                             '
 197 !Basic slots are not overlapping. Their sum should cover most of the code.
 198 !WARNING : the slots from 1 to 99 should be avoided in the future ... They are hard to track.
 199  basic(1:mtim)=0
 200  names(1)='abinit                          '
 201  names(5)='ewald                           ' ; basic(5)=1
 202  names(6)='setsym                          ' ; basic(6)=1
 203  names(9)='fourdp                          ' ; basic(9)=1 ;    ndata(9)=nfft
 204  names(10)='hartre                          '
 205  names(11)='xc:pot/=fourdp                  '; basic(11)=1;    ndata(11)=nfft*nsppol
 206  names(12)='mkcore                          '; basic(12)=1
 207  names(13)='mkresi                          '
 208  names(14)='rwwf                            '; basic(13)=1
 209  names(15)='pspini                          '; basic(15)=1
 210  names(16)='mkffnl                          '
 211  names(17)='symrhg(no FFT)                  '; basic(17)=1
 212  names(19)='inwffil                         '
 213 
 214  names(22)='cgwf                            '
 215  names(23)='kpgsph                          '; basic(23)=1   ! Actually, should not be basic ... too complicated, too much overlap ...
 216  names(28)='vtowfk                          '
 217  names(30)='vtowfk  (afterloop)             '
 218  names(31)='vtowfk  (1)                     '; basic(31)=1
 219  names(32)='gstate                          '
 220  names(33)='gstate->kpgsph                  '
 221  names(34)='gstate  (2)                     '
 222  names(35)='gstate(...scfcv)                '
 223  names(36)='gstate  (3)                     '
 224  names(37)='stress                          '; basic(37)=1   ! Actually, should not be basic !
 225  names(38)='ewald2 (+vdw_dftd)              '; basic(38)=1
 226  names(39)='vtowfk (loop)                   '
 227  names(40)='cgwf-O(npw)                     '
 228  names(41)='abinit(1)                       '
 229  names(42)='abinit(2)                       '; basic(42)=1
 230  names(43)='indefo+macroin+invars2m         '
 231  names(44)='abinit(4)                       '
 232  names(45)='abinit(5)                       '
 233  names(46)='abinit(6)                       '
 234  names(47)='ingeo/symgroup                  '
 235  names(48)='communic.MPI                    '
 236  names(49)='timana(1)                       '
 237  names(50)='timing timab                    '; basic(50)=1
 238  names(51)='total timab                     '
 239  names(52)='scfcv-scprqt                    '; basic(52)=1
 240  names(53)='forces-mkcore                   '
 241  names(54)='scfcv   (1)                     '
 242  names(55)='stress-mkcore                   '
 243  names(56)='scfcv-read                      '
 244  names(57)='rhotov                          '
 245  names(59)='energy                          '
 246  names(60)='scfcv(etotfor)                  '
 247  names(61)='scfcv :synchro                  '
 248  names(62)='kpgio :synchro                  '
 249  names(63)='mkrho :synchro                  '
 250  names(64)='strkin:synchro                  '
 251  names(65)='forstrnps:synchr                '
 252  names(66)='vtorho:synchro                  '; basic(66)=1
 253  names(67)='wfsinp:synchro                  '
 254  names(68)='scfcv(mix den - newrho)         '
 255  names(69)='forces                          '; basic(69)=1 ! Actually, should not be basic !
 256  names(70)='vtorho(symrhg)                  '
 257  names(71)='mkrho :MPIrhor                  '
 258  names(72)='mklocl(2)                       '
 259  names(73)='status                          '; basic(73)=1
 260  names(74)='newocc                          '
 261  names(75)='nonlop(apply)                   '; basic(75)=1; ndata(75)=npwmean*natom
 262  names(76)='nonlop(forces)                  '; basic(76)=1; ndata(76)=npwmean*natom
 263  names(77)='nonlop(forstr)                  '; basic(77)=1; ndata(77)=npwmean*natom
 264  names(78)='nonlop(dyfrnl)                  '
 265  names(79)='nonlop(ddk)                     '
 266  names(80)='etotfor/=forces                 '
 267  names(81)='xc:pot                          ' ! rhotoxc_coll, except the call to hartre.f
 268  names(82)='xc:fourdp                       '
 269  names(83)='newvtr/rho(3):io                '; basic(83)=1
 270  names(84)='suscep                          '
 271  names(85)='suscep:MPI                      '; basic(85)=1
 272  names(86)='suscep:synchro                  '; basic(86)=1
 273  names(87)='suskXX:loop(1)                  '
 274  names(88)='suskXX:loop(2)                  '
 275  names(89)='suscep:other                    '
 276  names(90)='dielmt                          '; basic(90)=1
 277  names(91)='setvtr                          '
 278  names(92)='setvtr:mkcore                   '
 279  names(93)='newvtr                          '
 280  names(94)='newrho                          '
 281  names(95)='tddft                           '
 282  names(96)='dieltcel                        '; basic(96)=1
 283  names(97)='nonlop(total)                   '
 284  names(98)='getghc-other                    '; basic(98)=1
 285 
 286  names(101)='dfpt_nstdy                      '
 287  names(102)='dfpt_nstwf                      '
 288  names(108)='dfpt_vtowfk(contrib)            '; basic(108)=1
 289  names(118)='dfpt_vtorho (1)                 '; basic(118)=1
 290  names(120)='dfpt_scfcv                      '
 291  names(121)='dfpt_vtorho                     '
 292  names(122)='dfpt_cgwf                       '
 293  names(124)='dfpt_vtorho (1)(2)              '
 294  names(125)='dfpt_vtorho (2)                 '
 295  names(126)='dfpt_vtorho-kpt loop            '; basic(126)=1
 296  names(127)='dfpt_vtorho (4)                 '
 297  names(128)='dfpt_vtowfk                     '
 298  names(129)='dfpt_vtorho:MPI                 '; basic(129)=1
 299  names(130)='dfpt_vtowfk (3)                 '; basic(130)=1
 300  names(131)='dfpt_vtowfk (1)                 '; basic(131)=1
 301  names(132)='respfn                          '
 302  names(133)='respfn(kpgio)                   '
 303  names(134)='respfn(pspini)                  '
 304  names(135)='respfn(inwffil)                 '
 305  names(136)='respfn(frozen)                  '
 306  names(137)='respfn(dfpt_dyxc1+bef.dfpt_lop) '
 307  names(138)='respfn(after dfpt_loper)        '
 308  names(139)='dfpt_vtowfk (loop)              '
 309  names(140)='dfpt_cgwf-O(npw)                '; basic(140)=1
 310  names(141)='dfpt_loper                      '
 311  names(142)='dfpt_loper(kpgio)               '
 312  names(143)='dfpt_loper(getmpw)              '
 313  names(144)='dfpt_loper(inwffil)             '
 314  names(146)='dfpt_loper(outwf)               '
 315  names(147)='dfpt_loper(eig2tot)             '
 316  names(148)='eig2tot                         '; basic(148)=1
 317  names(150)='dfpt_nselt/nstdy/nstpaw         '
 318  names(152)='dfpt_scfcv-scprqt               '
 319  names(154)='dfpt_scfcv  (1)                 '; basic(154)=1
 320  names(157)='dfpt_rhotov                     '
 321  names(158)='dfpt_newvtr                     '
 322  names(159)='d2frnl                          '
 323  names(160)='dfpt_scfcv (6)                  '
 324  names(161)='dfpt_nstdy:synchro              '; basic(161)=1
 325  names(166)='dfpt_vtorho:synchro             '; basic(166)=1
 326  names(181)='dfpt_mkvxc                      '
 327  names(182)='dfpt_dyxc1                      '; basic(182)=1
 328 !names(184)='dfpt_dyxc1(analysis)            '
 329 
 330  names(191)='invars2                         '; basic(191)=1
 331  names(192)='inkpts                          '
 332  names(193)='fresid                          '
 333 
 334  names(197)='getgh1c%dfpt_cgwf               '
 335  names(198)='getgh1c%dfpt_nstwf              '
 336  names(199)='getgh1c%dfpt_nstpaw             '
 337 
 338  names(200)='getghc                          '
 339  names(201)='getghc%cgwf                     '
 340  names(202)='getghc%dfpt_cgwf                '
 341  names(203)='getghc%mkresi                   '
 342  names(204)='getghc%kss_ddiago               '
 343  names(205)='getghc%lobpcgwf                 '
 344  names(206)='getghc%prep_getghc              '
 345  names(207)='getghc%other lobpcg             '
 346  names(208)='getghc%update_mmat              '
 347 
 348  names(210)='projbd                          '; basic(210)=1;    ndata(210)=npwnbdmean
 349  names(211)='projbd%cgwf                     '
 350  names(212)='projbd%dfpt_cgwf                '
 351  names(213)='projbd%dfpt_nstpaw              '
 352  names(214)='corrmetalwf1%dfpt_vtowfk        '
 353 
 354  names(220)='nonlop%(other)                  '
 355  names(221)='nonlop%getghc                   '
 356  names(222)='nonlop%vtowfk                   '
 357  names(223)='nonlop%energy                   '
 358  names(224)='nonlop%forstrnps                '
 359  names(225)='nonlop%dfpt_nstwf               '
 360  names(226)='nonlop%d2frnl                   '
 361  names(227)='nonlop%dfpt_cgwf !2             '
 362  names(228)='nonlop%dfpt_cgwf !5             '
 363  names(229)='nonlop%outkss                   '
 364  names(230)='nonlop%vtowfk(rhoij)            '
 365  names(231)='nonlop%prep_nonl%vtowfk         '
 366  names(232)='nonlop%prep_nonl%forstrn        '
 367  names(233)='nonlop%appinvovl                '
 368  names(234)='nonlop%prep_nonl%energy         '
 369 
 370  names(238)='scfcv                           '
 371  names(239)='scfcv(Berry)                    '
 372  names(240)='scfcv(iniloop, setvtr  )        '
 373  names(241)='scfcv(loop, PAW)                '
 374  names(242)='scfcv(vtorho(f))                '
 375  names(243)='scfcv(rhotov)                   '
 376  names(244)='scfcv(qui loop)                 '
 377  names(245)='scfcv(mix pot)                  '
 378  names(246)='scfcv(just after scf)           '
 379  names(247)='scfcv(afterscfloop)             '
 380  names(248)='scfcv(outscfcv)                 '
 381  names(249)='scfcv(free)                     '
 382 
 383  names(250)='afterscfloop                    '
 384  names(251)='afterscfloop(wvl)               '
 385  names(252)='afterscfloop(pol/magn)          '
 386  names(253)='afterscfloop(grad/lapl)         '
 387  names(254)='afterscfloop(kin.en.den)        '
 388  names(255)='afterscfloop(elf)               '
 389  names(256)='afterscfloop(forstr)            '
 390  names(257)='afterscfloop(final)             '
 391 
 392  names(260)='fourdp%(other)                  '
 393  names(261)='fourdp%rhotwg%ch                '
 394  names(262)='fourdp%rhotwg%si                '
 395  names(263)='fourdp%ckxcldag                 '
 396  names(264)='fourdp%fftwfn%ch                '
 397  names(265)='fourdp%fftwfn%si                '
 398  names(266)='fourdp%rec%rho                  '
 399  names(267)='fourdp%rec%ek                   '
 400  names(268)='fourdp%newvtr                   '
 401  names(269)='fourdp%newrho                   '
 402 
 403  names(270)='rwwf%(other)                    '
 404  names(271)='rwwf%vtorho                     '
 405  names(272)='rwwf%initwf(GS)                 '
 406  names(273)='rwwf%energy                     '
 407  names(274)='rwwf%wfsinp(GS)                 '
 408  names(275)='rwwf%mkrho                      '
 409  names(276)='rwwf%outwf                      '
 410  names(277)='rwwf%strnps                     '
 411  names(278)='rwwf%tddft                      '
 412  names(279)='rwwf%suscep                     '
 413  names(281)='rwwf%wfsinp(RF)                 '
 414  names(282)='rwwf%mkrho2                     '
 415  names(283)='rwwf%outwf2                     '
 416  names(284)='rwwf%dfpt_dyfnl                 '
 417  names(285)='rwwf%dfpt_mkrho                 '
 418  names(286)='rwwf%dfpt_nstwf                 '
 419  names(287)='rwwf%dfpt_vtorho                '
 420  names(288)='rwwf%dfpt_vtowfk                '
 421  names(289)='rwwf%dfpt_nstdy                 '
 422  names(290)='rwwf%initwf(RF)                 '
 423  names(291)='rwwf%newkpt(GS)                 '
 424  names(292)='rwwf%newkpt(RF)                 '
 425 
 426  names(301)='screening                       '
 427  names(302)='screening(init1)                '
 428  names(304)='screening(KS=>QP[wfrg])         '
 429  names(305)='screening(density)              '
 430  names(306)='screening(q-loop,init )         '
 431  names(307)='screening(cchi0q0)              '
 432  names(308)='screening(cchi0)                '
 433  names(309)='screening(q-loop,end)           '
 434  names(310)='screening(wrt scr files)        '
 435  names(315)='screening(pawin)                '
 436  names(316)='screening(wfs)                  '
 437  names(319)='screening(1)                    '
 438  names(320)='screening(paw)                  '; basic(320)=1
 439  names(321)='screening(2)                    '
 440 
 441  names(331)='cchi0                           '
 442  names(332)='cchi0(rho_tw_g)                 '
 443  names(333)='cchi0(assembly)                 '
 444 
 445  names(401)='sigma                           '
 446  names(402)='sigma(Init1)                    '
 447  names(403)='setup_sigma                     '
 448  names(404)='sigma(rdkss)                    '
 449  names(405)='sigma(Init2)                    '
 450  names(406)='sigma(make_vhxc)                '
 451  names(407)='sigma(vHxc_me)                  '
 452  names(408)='sigma(hqp_init)                 '
 453  names(409)='sigma(getW)                     '
 454  names(410)='sigma/=fourdp                   '; basic(410)=1
 455 
 456  names(421)='sigma(calc_sigx_me)             '
 457  names(423)='sigma(cohsex_me)                '
 458  names(424)='sigma(calc_sigc_me)             '
 459  names(425)='sigma(solve_dyson)              '
 460  names(426)='sigma(finalize)                 '
 461 
 462  names(430)='calc_sigx_me                    '
 463 
 464  names(431)='calc_sigc_me                    '
 465  names(432)='calc_sigc_me(Init)              '
 466  names(433)='calc_sigc_me(Init spin)         '
 467  names(434)='calc_sigc_me(Init q)            '
 468  names(435)='calc_sigc_me(eet_sigma)         '
 469  names(436)='calc_sigc_me(1)                 '
 470  names(437)='calc_sigc_me(rho_tw_g)          '
 471  names(438)='calc_sigc_me(2)                 '
 472  names(439)='calc_sigc_me(sigma_me)          '
 473  names(440)='calc_sigc_me(wfd_barrier        '
 474  names(441)='calc_sigc_me(xmpi_sum)          '
 475  names(442)='calc_sigc_me(final ops)         '
 476 
 477  names(445)='calc_sigc_me(loop)              '
 478 
 479  names(490)='solve_dyson                     '
 480  names(491)='cohsex_me                       '
 481 
 482  names(501)='nonlinear                       '
 483  names(502)='dfptnl_loop                     '
 484  names(511)='dfptnl_mv                       '; basic(511)=1
 485  names(512)='dfptnl_resp                     '; basic(512)=1
 486 
 487  names(520)='lobpcgwf(init)                  '; if(abs(timopt)==4)basic(520)=1
 488  names(521)='lobpcgwf(bef.getghc 1           '; if(abs(timopt)==4)basic(521)=1
 489  names(522)='lobpcgwf(aft.getghc 1           '; if(abs(timopt)==4)basic(522)=1
 490  names(523)='lobpcgwf(bef.getghc 2           '; if(abs(timopt)==4)basic(523)=1
 491  names(524)='lobpcgwf(aft.getghc 2           '; if(abs(timopt)==4)basic(524)=1
 492  names(525)='lobpcgwf(aft.loop)              '; if(abs(timopt)==4)basic(525)=1
 493  names(526)='lobpcgwf(prep-getghc)           '
 494 
 495  names(530)='lobpcgwf                        '
 496  names(532)='xgemm%lobpcg                    '
 497  names(533)='xmpi_sum%lobpcg                 '
 498  names(535)='xorthon-xtrsm                   '
 499  names(536)='xprecon%lobpcg                  '
 500  names(537)='prep_fourwf%vtow                '
 501  names(538)='prep_fourwf%mkrh                '
 502  names(539)='prep_fourwf                     '
 503 
 504  names(540)='sg_fourwf%fourwf                '
 505  names(541)='back_wf%sg_fourw                '
 506  names(542)='forw_wf%sg_fourw                '
 507  names(543)='alltoall%back_wf                '
 508  names(544)='alltoall%forw_wf                '
 509  names(545)='prep_getghc(alltoall)           '
 510  names(547)='alltoall%prep_fo                '
 511  names(548)='allgather%prep_f                '
 512  names(549)='symrhg%mkrho                    '
 513  
 514  names(550)='forces:pawatm2ff                '
 515  names(551)='stress:pawatm2ff                '
 516  names(552)='setvtr:pawatm2ff                '
 517  names(553)='pawinit                         '; basic(553)=1
 518  names(554)='vtowfk:rhoij                    '
 519  names(555)='vtorho:pawmkrhoij               '; basic(555)=1
 520  names(556)='pawmkrho                        '; basic(556)=1
 521  names(557)='pawmkrho:symrhoij               '; basic(557)=1
 522  names(558)='scfcv:mknhat                    '
 523  names(559)='nhatgrid                        '; basic(559)=1
 524  names(560)='pawdenpot                       '; basic(560)=1
 525  names(561)='pawdij/symdij                   '; basic(561)=1
 526  names(562)='respfn:pawatm2ff                '; basic(562)=1
 527  names(563)='dfpt_dyfro:pawatm2ff            '; basic(563)=1
 528  names(564)='dfpt_scfcv:dfpt_mknhat          '; basic(564)=1
 529  names(565)='getgsc                          '
 530  names(566)='dfpt_nstpaw                     '; basic(566)=1
 531  names(567)='pawnstd2e                       '
 532  names(568)='stress%strhar                   '
 533 
 534  names(570)='prep_nonlop                     '
 535  names(572)='prep_nonlop%vtowfk              '
 536  names(573)='prep_nonlop%forstrnps           '
 537 
 538  names(575)='prep_bandfft_tabs               '; basic(575)=1
 539 
 540  names(581)='prep_nonlop(alltoall)           '
 541  names(583)='vtowfk(pw_orthon)               '
 542  names(584)='xcopy%lobpcg                    '
 543  names(585)='vtowfk(subdiago)                '
 544  names(586)='vtowfk(nonlocalpart)            '
 545  names(587)='zheegv-dsyegv                   '
 546 
 547  names(588)='vtowfk(ssdiag)                  '; basic(588)=1
 548  names(589)='vtowfk(contrib)                 '; basic(589)=1
 549  names(590)='vtowfk(2)                       ' 
 550  names(591)='vtowfk(3)                       ' 
 551 
 552  names(593)='set_paw_pert                    '
 553  names(594)='get_exchange_atom               '
 554  names(595)='pawrhoij_redistribute           '
 555  names(596)='paw_ij_redistribute             '
 556  names(597)='paw_an_redistribute             '
 557  names(598)='pawfgrtab_redistribute          '
 558 
 559  names(600)='vtorhorec                       '
 560  names(601)='Definitions                     '
 561  names(602)='getngrec                        '
 562  names(603)='green_kernel                    '
 563  names(604)='transgrid (c->f)                '
 564  names(605)='recursion (other)               '
 565  names(606)='recursion (den)                 '
 566  names(607)='recursion (cuda)                '
 567  names(608)='recursion_nl                    '
 568  names(609)='fermisolverec                   '
 569  names(610)='entropyrec                      '
 570  names(611)='gran_potrec                     '
 571  names(612)='nonlocal-energy                 '
 572  names(613)='sync. cpu (wait)                '
 573  names(614)='sync. gpu (wait)                '
 574  names(615)='vn_nl_rec                       '
 575  names(616)='null recursion                  '
 576  names(617)='recursion (other_cuda)          '
 577 
 578  names(620)='datafordmft                     '
 579  names(621)='initialize dmft loop            '
 580  names(622)='impurity_solve                  '
 581  names(623)='Dyson                           '
 582  names(624)='compute_green                   '
 583  names(625)='integrate_green                 '
 584  names(626)='dmft-other                      '
 585  names(627)='Print/Read self                 '
 586 
 587  names(630)='prep_getghc                     '
 588  names(631)='prep_getghc(before if)          '
 589  names(632)='prep_getghc(bef. getghc)        '
 590  names(633)='prep_getghc(betw getghc)        '
 591  names(634)='prep_getghc(aft. getghc)        '
 592  names(635)='prep_getghc(getghc - 1 )        '
 593  names(636)='prep_getghc(getghc - 2 )        '
 594  names(637)='prep_getghc(getghc - 3 )        '
 595  names(638)='prep_getghc(getghc - 4 )        '
 596 
 597  names(640)='driver                          '
 598  names(641)='driver(bef. loop dtset)         '
 599  names(642)='driver(bef. select case)        '
 600  names(643)='driver(aft. select case)        '
 601  names(644)='driver(aft. loop dtset)         '
 602 
 603  names(650)='bse                             '
 604  names(651)='bse(Init1)                      '; basic(651)=1
 605  names(652)='setup_bse                       '; basic(652)=1
 606  names(653)='bse(rdkss)                      '; basic(653)=1
 607  names(654)='bse(rdmkeps^-1)                 '; basic(654)=1
 608  names(655)='bse(mkrho)                      '; basic(655)=1
 609  names(656)='bse(mkexcham)                   '; basic(656)=1
 610  names(657)='bse(mkexceps)                   '; basic(657)=1
 611  names(658)='bse(wfd_wave_free)              '; basic(658)=1
 612  names(659)='bse(mk_pawhur_t)                '; basic(659)=1
 613  names(660)='bse(exc_diago_driver)           '; basic(660)=1
 614  names(661)='bse(exc_haydock_driver)         '; basic(661)=1
 615 
 616 
 617  names(670)='exc_build_ham                   '
 618  names(671)='exc_build_ham(q=0)              '
 619  names(672)='exc_build_ham(block-res)        '
 620  names(673)='exc_build_ham(block-coupling)   '
 621 
 622  names(680)='exc_build_block                 '
 623  names(681)='exc_build_block(init,read)      '
 624  names(682)='exc_build_block(Coulomb)        '
 625  names(683)='exc_build_block(exchange)       '
 626  names(684)='exc_build_block(synchro)        '
 627  names(685)='exc_build_block(write_ha        '
 628  names(686)='exc_build_block(exch.spi        '
 629 
 630  names(690)='exc_haydock_driver              '   
 631  names(691)='exc_haydock_driver(read)        '
 632  names(692)='exc_haydock_driver(prep)        '
 633  names(693)='exc_haydock_driver(wo lf        '
 634  names(694)='exc_haydock_driver(apply)       '
 635  names(695)='exc_haydock_driver(end)         '
 636  names(696)='exc_haydock_driver(inter        '
 637  names(697)='exc_haydock_driver(matmul)      '
 638 !Slots up to 699 are reserved for bethe_salpeter code.
 639 
 640  names(700)='gstateimg                       '
 641  names(701)='gstate(pspini)                  '
 642  names(702)='gstateimg(leave_test)           '
 643  names(703)='gstateimg(init)                 '
 644  names(704)='gstateimg(bef. loop img)        '
 645  names(705)='gstateimg(bef. gstate)          '
 646  names(706)='gstateimg(aft. gstate)          '
 647  names(707)='gstateimg(aft. loop img)        '
 648  names(708)='gstateimg(finalize)             '
 649  
 650 
 651  names(710)='inwffil                         '
 652  names(711)='inwffil(read header)            '
 653  names(712)='inwffil(init params)            '
 654  names(713)='inwffil(prepa wfsinp)           '
 655  names(714)='inwffil(call wfsinp)            '
 656  names(715)='inwffil(after wfsinp)           '
 657  names(716)='inwffil(spin convert)           '
 658  names(717)='inwffil(call newkpt)            '
 659  names(718)='inwffil(excl. calls)            '; basic(718)=1
 660 
 661  names(720)='wfsinp                          '
 662  names(721)='wfsinp(before loop)             '
 663  names(722)='wfsinp(find kpt)                '
 664  names(723)='wfsinp(prepa initwf)            '
 665  names(724)='wfsinp(call  initwf)            '
 666  names(725)='wfsinp(transfer of wfs)         '
 667  names(726)='wfsinp(call rwwf)               '
 668  names(727)='wfsinp(wfconv section)          '
 669  names(728)='wfsinp(excl. calls)             '; basic(728)=1
 670 
 671  names(740)='suscep_stat                     '
 672  names(741)='suscep_stat(init)               '
 673  names(742)='suscep_stat(bef. susk-mm        '
 674  names(743)='suscep_stat(susk-mm)            '
 675  names(744)='suscep_stat(extrapol)           '
 676  names(745)='suscep_stat:synchro             '
 677  names(746)='suscep_stat:MPI                 '
 678  names(747)='suscep_stat(symmetries)         '
 679 
 680  names(750)='susk                            '
 681  names(751)='susk (init)                     '; basic(751)=1
 682  names(752)='susk (loop)                     '
 683  names(753)='susk:MPI (1)                    '; basic(753)=1
 684  names(754)='susk (accumul.)                 '
 685  names(755)='susk:MPI (2)                    '; basic(755)=1
 686  names(756)='susk (loop except FFT)          '; basic(756)=1
 687  names(757)='susk (accumul.except FFT        '; basic(757)=1
 688 
 689  names(760)='suskmm                          '
 690  names(761)='suskmm (init)                   '; basic(761)=1
 691  names(762)='suskmm (loop : part1)           '
 692  names(763)='suskmm (loop : part2)           '
 693  names(764)='suskmm(loop1 except FFT)        '; basic(764)=1
 694  names(765)='suskmm(loop2 except FFT)        '; basic(765)=1
 695 
 696  names(770)='initwf                          '
 697  names(771)='initwf(before rwwf)             '; basic(771)=1
 698  names(772)='initwf(after rwwf)              '; basic(772)=1
 699 
 700  names(780)='newkpt                          '
 701  names(781)='newkpt(before loop)             '
 702  names(782)='newkpt(before rwwf)             '
 703  names(783)='newkpt(after rwwf)              '
 704  names(784)='newkpt(call wfconv)             '
 705  names(785)='newkpt(finalize loop)           '
 706  names(786)='newkpt(after loop   )           '
 707  names(787)='newkpt:synchro                  '
 708  names(788)='newkpt(excl. rwwf   )           '; basic(788)=1
 709 
 710  names(790)='mkrho                           '
 711  names(791)='mkrho%gstate                    '
 712  names(792)='mkrho%vtorho                    '
 713  names(793)='mkrho%energy                    '
 714  names(794)='mkrho%respfn                    '
 715  names(795)='mkrho%afterscfloop              '
 716  names(796)='mkrho%scfcv                     '
 717  names(798)='mkrho/=                         '; basic(798)=1
 718  names(799)='mkrho/=+fourwf                  '
 719 
 720  names(801)='fourwf                          '
 721  names(802)='fourwf%(pot)                    '; basic(802)=1;    ndata(802)=2*nfft
 722  names(803)='fourwf%(den)                    '; basic(803)=1;    ndata(803)=nfft
 723  names(804)='fourwf%(G->r)                   '; basic(804)=1
 724  names(805)='fourwf%(r->G)                   '; basic(805)=1
 725 
 726 
 727  names(840)='fourwf%(other)                  '
 728  names(841)='fourwf%getghc                   '
 729  names(842)='fourwf%vtowfk                   '
 730  names(843)='fourwf%mkrho                    '
 731  names(844)='fourwf%dfpt_cgwf                '
 732  names(845)='fourwf%dfpt_accrho%dfpt_vtowfk  '
 733  names(846)='fourwf%mkrho2                   '
 734  names(847)='fourwf%dfpt_mkrho               '
 735  names(854)='fourwf%tddft                    '
 736  names(855)='fourwf%outkss                   '
 737  names(856)='fourwf%prep_four                '
 738  names(858)='fourwf%dfpt_accrho%idfpt_nstpaw '
 739  names(861)='fourwf%suskmm !0 part 1         '
 740  names(862)='fourwf%suskmm !0 part 2         '
 741  names(871)='fourwf%suskmm !3 part 1         '
 742  names(872)='fourwf%suskmm !3 part 2         '
 743 
 744  names(901)='newvtr(before selection)        '
 745  names(902)='newvtr(bef. prcref_PMA)         '
 746  names(903)='newvtr(call prcref_PMA)         '
 747  names(904)='newvtr(aft. prcref_PMA)         '
 748  names(905)='newvtr(mean potential)          '
 749 
 750  names(910)='forstr                          '
 751  names(911)='forstr(forstrnps)               '
 752  names(912)='forstr(pawgrnl)                 '
 753  names(913)='forstr(forces)                  '
 754  names(914)='forstr(stress)                  '
 755 
 756  names(920)='forstrnps                       '
 757  names(921)='forstrnps(bef.loop spin)        '
 758  names(922)='forstrnps(bef.loop band)        '
 759  names(923)='forstrnps(copy)                 '
 760  names(924)='forstrnps(kinetic contr)        '
 761  names(925)='forstrnps(aft.loop kptsp        '
 762  names(926)='forstrnps(nonlop+prep_ba        '
 763  names(927)='forstrnps(bef.loop kpt)         '
 764 
 765  names(933)='outkss                          '
 766  names(934)='outkss(Gsort+hd)                '
 767  names(935)='outkss(k-loop)                  '
 768  names(936)='outkss(diago)                   '; basic(936)=1
 769  names(937)='outkss(MPI_exch)                '; basic(937)=1
 770  names(938)='outkss(write)                   '
 771 
 772  names(940)='rhotov                          '
 773  names(941)='rhotov(rhotoxc)                 '
 774  names(942)='rhotov(dotprod_vn)              '
 775  names(943)='rhotov(PSolver_rhohxc)          '
 776  names(944)='rhotov(rhohxcpositron)          '
 777  names(945)='rhotov(other)                   '
 778 
 779  names(950)='outscfcv                        '
 780  names(951)='outscfcv(mlwfovlp)              '
 781  names(952)='outscfcv([PAW]prtden)           '
 782  names(953)='outscfcv(prtelf)                '
 783  names(954)='outscfcv(prt[g,k,l]den)         '
 784  names(955)='outscfcv(prtwf)                 '
 785  names(956)='outscfcv(prtpot)                '
 786  names(957)='outscfcv(prt geo misc.)         '
 787  names(958)='outscfcv(prt stm,vha,..)        '
 788  names(959)='outscfcv(prtdos)                '
 789  names(960)='outscfcv(calcdensph)            '
 790  names(961)='outscfcv(pawprt)                '
 791  names(962)='outscfcv(optics)                '
 792  names(963)='outscfcv(pawmkaewf)             '
 793  names(964)='outscfcv(outkss)                '
 794  names(965)='outscfcv(poslifetime)           '
 795  names(966)='outscfcv(outwant)               '
 796  names(967)='outscfcv(cal[cs,efg,fc])        '
 797  names(968)='outscfcv(prt[surf,nest])        '
 798  names(969)='outscfcv(misc.)                 '
 799 
 800  names(980)='vtorho                          '
 801  names(981)='vtorho(bef. spin loop)          '
 802  names(982)='vtorho(bef. kpt  loop)          '
 803  names(983)='vtorho(Berry)                   '
 804  names(984)='vtorho(bef. vtowfk)             '
 805  names(985)='vtorho(aft. vtowfk)             '
 806  names(986)='vtorho(aft. kpt loop)           '
 807  names(987)='vtorho(leave_test)              '; basic(987)=1
 808  names(988)='vtorho(aft. spin loop)          '
 809  names(989)='vtorho(MPI)                     '; basic(989)=1
 810  names(990)='vtorho(newocc)                  '
 811  names(991)='vtorho(DMFT)                    '
 812  names(992)='vtorho(mkrho 1)                 '
 813  names(993)='vtorho(highest occ. eig)        '
 814  names(994)='vtorho(mkrho 2)                 '
 815  names(995)='vtorho(tddft)                   '
 816  names(996)='vtorho(suscep_stat)             '
 817  names(997)='vtorho(init kpt loop)           '
 818 
 819  names(1001)='initberry                       '; basic(1001)=1
 820  names(1002)='initberry(before listkk)        '
 821  names(1003)='initberry(call listkk)          '
 822  names(1004)='initberry(after listkk)         '
 823  names(1005)='initberry(find neighb.)         '
 824  names(1006)='initberry(build strings)        '
 825  names(1007)='initberry(PAW on-site)          '
 826  names(1008)='initberry(pwind)                '
 827  names(1009)='initberry(MPI stuff)            '
 828  names(1021)='listkk                          '
 829 
 830 ! CMartins : TEST for HF
 831  names(1501)='HF_init                         '; basic(1501)=1
 832  names(1502)='HF_updatecgocc                  '; basic(1502)=1
 833  names(1503)='HF_updatecgocc-MPI              '; basic(1503)=1
 834  names(1504)='HF_getghc                       '; basic(1504)=1
 835  names(1505)='HF_getghc-init                  '; basic(1505)=1
 836  names(1506)='HF_getghc-kmu_loop              '; basic(1506)=1
 837  names(1507)='HF_getghc-calc_vlocpsi          '; basic(1507)=1
 838  names(1508)='HF_getghc-mult-cwf*cwocc        '; basic(1508)=1
 839  names(1509)='HF_getghc-calc_rhog_munu        '; basic(1509)=1
 840  names(1510)='HF_getghc-calc_vloc             '; basic(1510)=1
 841  names(1511)='HF_getghc-calc_ghc              '; basic(1511)=1
 842 
 843  ! Chebfi
 844  names(1600) = 'chebfi                        '; basic(1601) = 1
 845  names(1601) = 'chebfi(alltoall)              '
 846  names(1602) = 'chebfi(appinvovl)             '
 847  names(1603) = 'chebfi(rotation)              '
 848  names(1604) = 'chebfi(subdiago)              '
 849  names(1605) = 'chebfi(subham)                '
 850  names(1606) = 'chebfi(ortho)                 '
 851  names(1607) = 'chebfi(getghc)                '
 852  names(1608) = 'chebfi(residuals)             '
 853  names(1609) = 'chebfi(update_eigens)         '
 854  names(1610) = 'chebfi(sync)'
 855 
 856  names(1630) = 'chebfi(opernla)               '
 857  names(1631) = 'chebfi(opernlb)               '
 858  names(1632) = 'chebfi(inv_s)                 '
 859 
 860  names(1620) = 'mkinvovl                      '
 861  names(1621) = 'mkinvovl(build_d)             '
 862  names(1622) = 'mkinvovl(build_ptp)           '
 863  
 864  ! lobpcg2
 865  names(1650) = 'lobpcgwf2                     '; basic(1650) = 1
 866  names(1651) = 'lobpcg_init                    '
 867  names(1652) = 'lobpcg_free                    '
 868  names(1653) = 'lobpcg_run                     '
 869  names(1654) = 'lobpcg_getAX_BX                '
 870  names(1655) = 'lobpcg_orthoWrtPrev            '
 871  names(1656) = 'lobpcg_Bortho                  '
 872  names(1657) = 'lobpcg_RayleighRitz            '
 873  names(1658) = 'lobpcg_maxResidu               '
 874  names(1659) = 'lobpcg_run@getAX_BX            '
 875  names(1660) = 'lobpcg_pcond                   '
 876  names(1661) = 'lobpcg_RayleighRitz@hegv       '
 877 
 878  ! xg_t
 879  names(1670) = 'xgBlock_potrf                  '
 880  names(1671) = 'xgBlock_trsm                   '
 881  names(1672) = 'xgBlock_gemm                   '
 882  names(1673) = 'xgBlock_set                    '
 883  names(1674) = 'xgBlock_get                    '
 884  names(1675) = 'xgBlock_heev                   '
 885  names(1676) = 'xgBlock_heevd                  '
 886  names(1677) = 'xgBlock_hpev                   '
 887  names(1678) = 'xgBlock_hpevd                  '
 888  names(1679) = 'xgBlock_hegv                   '
 889  names(1680) = 'xgBlock_hegvx                  '
 890  names(1681) = 'xgBlock_hegvd                  '
 891  names(1682) = 'xgBlock_hpgv                   '
 892  names(1683) = 'xgBlock_hpgvx                  '
 893  names(1684) = 'xgBlock_hpgvd                  '
 894  names(1685) = 'xgBlock_copy                   '
 895  names(1686) = 'xgBlock_cshift                 '
 896  names(1687) = 'xgBlock_pack                   '
 897  names(1690) = 'xgScalapack_init               '
 898  names(1691) = 'xgScalapack_free               '
 899  names(1692) = 'xgScalapack_heev               '
 900  names(1693) = 'xgScalapack_hegv               '
 901  names(1694) = 'xgScalapack_scatter            '
 902 
 903  ! GWLS GW code
 904  names(1701)='gwls_sternheimer                ';basic(1701)=1
 905  names(1702)='exchange and correlation        '
 906  names(1703)='correl. shift lanczos           '
 907  names(1704)='Dielectric matrix               '
 908  names(1705)='Model Dielectric matrix         '
 909  names(1706)='setup proj. sternheimer         '
 910  names(1707)='compute proj.sternheimer        '
 911  names(1708)='eps^{-1} - eps_m^{-1}           '
 912  names(1709)='eps_m^{-1} - 1                  '           
 913  names(1710)='Modify Lbasis Coulomb           '   
 914  names(1711)='Diag eps^{-1}-eps_m^{-1}        '   
 915  names(1712)='exact  AT shift lanczos         '   
 916  names(1713)='model  AT shift lanczos         '   
 917  names(1714)='exact  BT shift lanczos         '   
 918  names(1715)='model  BT shift lanczos         '
 919  names(1716)='compute poles                   ' 
 920  names(1717)='Sigma_A Lanczos                 ' 
 921  names(1718)='Sigma_B num. integrands         ' 
 922 
 923 
 924  names(1719)='gwls: extract_QR                ';basic(1719)=1
 925  names(1720)='gwls: extract_SVD               ';basic(1720)=1
 926 
 927  ! these entry are not in a logical order.
 928  names(1721)='gwls: gstateimg                 '
 929  names(1722)='prepareValenceWfk               '
 930 
 931  names(1723)='gwls: sqmr                      ';basic(1723)=1
 932 
 933 
 934  names(1724)='gwls: Pk                        ';basic(1724)=1
 935  names(1725)='Pk- allocating                  '
 936  names(1726)='Pk- wfk to denpot               '
 937  names(1727)='Pk- wfk product with val        '
 938  names(1728)='Pk- pc_k                        '
 939  names(1729)='Pk- sqmr case 1                 '
 940  names(1730)='Pk- sqmr case 2                 '
 941  names(1731)='Pk- sqmr case 3                 '
 942  names(1732)='Pk-  qmr case 4                 '
 943  names(1733)='Pk- apply H (case 2)            '
 944 
 945 
 946  names(1734)='gwls: Pk_model                  ';basic(1734)=1
 947  names(1735)='Pk_model- allocating            '
 948  names(1736)='Pk_model- wfk to denpot         '
 949  names(1737)='Pk_model- wfk x val             '
 950  names(1738)='Pk_model- pc_k                  '
 951  names(1739)='Pk_model- act with Y            '
 952  names(1740)='Pk_model- add contrib.          '
 953 
 954 
 955  names(1741)='gwls: calc eps_m^-1(w)-1        ';basic(1741)=1
 956  names(1742)='Allocating                      '
 957  names(1743)='modifying Lanczos basis         '
 958  names(1744)='calc <mod_L_1|Y|mod_L_2>        '
 959  names(1745)='    make array hermitian        '
 960  names(1746)='               xsum_mpi         '
 961  names(1747)='inv eps_m and subtract 1        '
 962 
 963 
 964  names(mtim)='(other)                         ' ! This is a generic slot, to compute a complement
 965 
 966 !==================================================================================
 967 
 968  spaceworld= mpi_enreg%comm_world
 969  nproc     = mpi_enreg%nproc
 970  me        = mpi_enreg%me
 971  nthreads  = xomp_get_num_threads(open_parallel=.true.)
 972 
 973  call timab(49,2,tsec)
 974 
 975  if(abs(timopt)==1 .or. timopt==-3 .or. timopt==-4)then ! Time the timing routine (precision should be better than 3%)
 976    ltimab=1
 977    utimab=1000
 978    maxii=20
 979 !  maxii=1    ! Uncomment this line if no timer is provided in timein.f
 980    do ii=1,20
 981 
 982      call timab(50,1,tsec)
 983      do itimab=ltimab,utimab
 984 !      The channel 51 is here used as a dummy channel
 985        call timab(51,1,tsec)
 986        call timab(51,2,tsec)
 987      end do
 988      call timab(50,2,tsec)
 989      call time_accu(50,return_ncount,tsec, lflops, ftsec)
 990 !    Exit the timing loop if the CPU time is bigger than 0.10 second
 991 !    of if the number of calls is too large.
 992 !    Since the accuracy of the timing is expected to be better than 0.01 sec,
 993 !    gives about 10% accuracy
 994      if(tsec(1)>0.10_dp)then
 995        exit
 996      else
 997        ltimab=utimab+1
 998 !      Increase the number of timab calls in a block.
 999 !      This small factor of increase allows to have less than
1000 !      0.15 second for this testing
1001        utimab=(3*utimab)/2
1002      end if
1003    end do
1004 !  Get the time per combined call timab(*,1,tsec) + timab(*,2,tsec)
1005    timab_cpu=tsec(1)/utimab
1006    timab_wall=tsec(2)/utimab
1007    if(timopt<0 .and. me==0 .and. timopt/=-2)then
1008      write(ount,*)
1009      write(ount,*)'Test the timer : '
1010      write(ount,*)' a combined call timab(*,1,tsec) + timab(*,2,tsec) is '
1011      write(ount, '(a,es14.4,a,es14.4,a)' )&
1012 &     '- CPU time =',timab_cpu,' sec,    Wall time =',timab_wall,' sec'
1013    end if
1014  else
1015    timab_cpu=zero; timab_wall=zero
1016  end if
1017 
1018 !Eventually reenable the timab routine
1019  call timab(1,5,tsec)
1020 
1021 !Get overall elapsed cpu and wall clock time
1022  call timab(1,2,tsec)
1023  call time_accu(1,return_ncount,tsec,lflops,ftsec)
1024  ncount(1)=return_ncount
1025 
1026 !Sum over all procs
1027  my_tsec(:)=tsec(:)
1028  call xmpi_sum(my_tsec,tsec,2,spaceworld,ierr)
1029 
1030 !Only the world master writes
1031  if (me==0) then
1032    write(ount,'(/,a,f13.1,f12.2,f11.3)')'- Total cpu        time (s,m,h):',tsec(1),tsec(1)/60._dp,tsec(1)/3600._dp
1033    write(ount,'(a,f13.1,f12.2,f11.3)')  '- Total wall clock time (s,m,h):',tsec(2),tsec(2)/60._dp,tsec(2)/3600._dp
1034  end if
1035 
1036 !Get separate time reports from all timed sections
1037  totcount=0
1038  do itim=1,mtim
1039    call time_accu(itim,return_ncount,times(:,itim),nflops(itim),ftimes(:,itim))
1040    ncount(itim)=return_ncount
1041    totcount=totcount+return_ncount
1042  end do
1043 
1044 !Estimate additional timings.
1045 
1046 !Estimate the values associated with timab, put it in channel 51
1047  ncount(51)=totcount
1048  times(1,51)=timab_cpu*totcount
1049  times(2,51)=timab_wall*totcount
1050 
1051 !Gather the different parts of selected time slots
1052 !Or, alternatively, deduce the value of the complement of some time slots.
1053 !This loop is finished when the default case is hit (see below)
1054  do ii=1,mtim
1055 
1056    tslots(:)=0
1057 
1058 !  List first the time slot in which the result will be accumulated.
1059 !  If this number is negative, the positive value will be used for the time slot, but the ncount will be set to -1 .
1060 !  Then, list the time slots whose value will be either accumulate or subtracted. The latter is obtained by
1061 !  entering a minus sign in front of the time slot number ...
1062 !  If a negative number is present in the list, while the accumulated time slot is positive,
1063 !  then the number of counts will be set to the value of the first routine to be accumulated.
1064    select case(ii)
1065 !    Gather the different parts of nonlop  (SHOULD BE REEXAMINED !)
1066    case(1) 
1067      tslots(:5)=(/75, 221,223,229,233/)
1068    case(2) 
1069      tslots(:4)=(/76, 222,225,227/)
1070    case(3) 
1071      tslots(:2)=(/77, 224/)
1072    case(4) 
1073      tslots(:2)=(/78, 226/)
1074    case(5) 
1075      tslots(:2)=(/79, 228/)
1076    case(6) 
1077 !      Gather the different parts of selected time channels
1078      tslots(:10)=(/97, 75,76,77,78,79,220,230,231,232/)
1079    case(7) 
1080 !      Gather the different parts of fourwf (NOTE : should attribute the channel 840 to one of the 4 modes !!!)
1081      tslots(:3)=(/802, 841,844/)
1082    case(8) 
1083      tslots(:4)=(/803, 842,843,846/)
1084    case(9) 
1085      tslots(:10)=(/804, 845,847,848,850,854,858,859,861,862/)
1086    case(10) 
1087      tslots(:6)=(/805, 849,851,857,871,872/)
1088    case(11) 
1089 !      In the following, the part coming from the prep_fourwf interface is added to the total.
1090      tslots(:7)=(/801, 802,803,804,805,840,856/)
1091    case(13) 
1092 !      Gather the different parts of prep_fourwf
1093      tslots(:3)=(/539, 537,538/)
1094    case(14) 
1095 !      Gather the different parts of fourdp
1096      tslots(:11)=(/9, 260,261,262,263,264,265,266,267,268,269/)
1097    case(15) 
1098 !      Gather the different parts of getghc
1099      tslots(:9)=(/200, 201,202,203,204,205,206,207,208/)
1100    case(16) 
1101 !      Gather the different parts of projbd
1102      tslots(:3)=(/210, 211,212/)
1103    case(17)
1104 !      Gather the different parts of rwwf (wavefunctions read/write)
1105      tslots(:24)=&
1106 &     (/14, 270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292/)
1107    case(18) 
1108 !      Estimate the complement of getghc (non fourwf, non nonlop)
1109      tslots(:4)=(/-98, 200,-841,-221/)
1110    case(19) 
1111 !      Estimate the complement of cgwf (non getghc,projbd)
1112      tslots(:5)=(/-40, 22,530,-201,-211/)
1113    case(20) 
1114 !      Estimate the complement of dfpt_cgwf (non getghc,projbd,nonlop,fourwf)
1115      tslots(:8)=(/-140, 122,-202,-197,-212,-227,-228,-844/)
1116    case(21) 
1117 !      Estimate different complements in vtowfk
1118 !      vtowfk(ssdiag) (= vtowfk(loop)    - cgwf )
1119      tslots(:5)=(/-588, 39,-22,-530, -1600/)
1120    case(22) 
1121 !      vtowfk(contrib) (= vtowfk (afterloop) - nonlop%vtowfk - fourwf%vtowfk )
1122      tslots(:4)=(/589, 30,-222,-842/)
1123    case(23) 
1124 !      vtowfk (1) = vtowfk - vtowfk(loop) - vtowfk(afterloop)
1125      tslots(:4)=(/31, 28,-39,-30/)
1126    case(24) 
1127 !      Estimate different complements in dfpt_vtowfk
1128 !      dfpt_vtowfk(contrib) (= vtowfk3(loop) - cgwf - fourwf%vtowfk3 - rwwf%vtowfk3 - corrmetalwf1)
1129      tslots(:6)=(/-108, 139,-122,-845,-288,-214/)
1130    case(25) 
1131 !      vtowfk (1) = dfpt_vtowfk - vtowfk3(loop) - vtowfk3 (3)
1132      tslots(:4)=(/ 131, 128,-139,-130/)
1133    case(28) 
1134 !      dfpt_vtorho-kpt loop (= dfpt_vtowfk (2) - vtowfk3 - rwwf)
1135      tslots(:4)=(/126,125,-128,-287/)
1136    case(29) 
1137 !      Estimate complement in mkrho
1138      tslots(:3)=(/798,799,-843/)
1139    case(30) 
1140 !      Estimate complement in dfpt_looppert
1141 !      dfpt_looppert(other) (= loper3 - loper3(kpgio) - loper3(getmpw) - loper3(inwffil)
1142 !      dfpt_scfcv - dfpt_looppert(outwf) -loper3(eigt2tot)
1143      tslots(:8)=(/145,141,-142,-143,-144,-120,-146,-147/)
1144    case(31) 
1145 !      Estimate complement in sigma
1146 !      sigma/=fourdp = sigma - fourdp%rhotwg%si - fourdp%fftwfn%si
1147      tslots(:4)=(/410,401,-262,-265/)
1148    case(32) 
1149 !      Estimate complement in bethe_salpeter
1150      tslots(:2)=(/699,650/)
1151    case(33) 
1152 !      Estimate complement in susk
1153 !      NOTE : fourwf%susk _PAW should actually be split between susk (loop except FFT)
1154 !      and susk (accumul.except FFT . But a renumbering of the fourwf splitting should be done ...
1155 !      susk (loop except FFT) = susk (loop) - fourwf%susk !0 - fourwf%susk !3
1156      tslots(:4)=(/756,752,-848,-849/)
1157    case(34) 
1158 !      susk (accumul.except FFT = susk (accumul) - fourwf%susk !3bis - fourwf%susk _PAW
1159      tslots(:4)=(/757,754,-859,-857/)
1160    case(35) 
1161 !      Estimate complement in suskmm
1162 !      NOTE : fourwf%susk _PAW should actually be split between susk (loop except FFT)
1163 !      and suskmm (accum.except FFT . But a renumbering of the fourwf splitting should be done ...
1164 !      suskmm (loop except FFT) = suskmm (loop) - fourwf%suskmm !0 part 1 - fourwf%suskmm !3 part 1
1165      tslots(:4)=(/764,762,-861,-871/)
1166    case(36) 
1167 !      suskmm (accum.except FFT = suskmm (accumul) - fourwf%suskmm !0 part 2 - fourwf%suskmm !3 part 2 - fourwf%susk _PAW
1168      tslots(:5)=(/765,763,-862,-872,-857/)
1169    case(37) 
1170 !      inwffil(excl. calls) = inwffil - inwffil(call wfsinp) - inwffil(call newkpt);
1171      tslots(:4)=(/718,710,-714,-717/)
1172    case(38) 
1173 !      wfsinp(excl. calls) = wfsinp - wfsinp(call  initwf) - wfsinp(call rwwf)
1174      tslots(:4)=(/728,720,-724,-727/)
1175    case(39) 
1176 !      newkpt(excl. rwwf   )=newkpt(before loop) + newkpt(before rwwf) + newkpt(after rwwf)
1177 !      newkpt(call wfconv) + newkpt(finalize loop) + newkpt(after loop   )
1178      tslots(:7)=(/-788,781,782,783,784,785,786/)
1179    case(40) 
1180 !      More complements in vtowfk
1181 !      vtowfk (2) = vtowfk (loop) - cgwf - lobpcg - subdiago - pw_orthon
1182      tslots(:7)=(/-590,39,-22,-530,-585,-583, -1600/)
1183    case(41) 
1184 !      vtowfk (3) = vtowfk (afterloop) - nonlop%vtowfk - prep_nonlop%vtowfk - fourwf%vtowfk - prep_fourwf%vtowfk - vtowfk(nonlocalpart)
1185      tslots(:7)=(/-591,30,-222,-572,-842,-537,-586/)
1186    case(43) 
1187 !      mkrho = mkrho%gstate + mkrho%vtorho + mkrho%energy + mkrho%respfn + mkrho%afterscfloop + mkrho%scfcv
1188      tslots(:7)=(/790,791,792,793,794,795,796/)
1189    case(44) 
1190 !      Estimate the complement of dmft (in vtorho, only)
1191      tslots(:9)=(/-626, 991,-620,-621,-622,-623,-624,-625,-627/)
1192 
1193    case default
1194      cycle
1195    end select
1196 
1197    tslot=tslots(1)
1198    aslot=abs(tslot)
1199    ncount(    aslot)=0 ; if (tslot<0)ncount(aslot)=-1
1200    times(1:2, aslot)=zero
1201    nflops(    aslot)=zero
1202    ftimes(1:2,aslot)=zero
1203    flag_count=1
1204    do islot=2,mtim
1205      bslot=tslots(islot)
1206      cslot=abs(bslot)
1207      if(bslot>0)then
1208        if(tslot>0)ncount(aslot)=ncount(aslot)+ncount(cslot)
1209        times(1:2, aslot)=times(1:2, aslot)+times(1:2,cslot)
1210        nflops(    aslot)=nflops(    aslot)+nflops(   cslot)
1211        ftimes(1:2,aslot)=ftimes(1:2,aslot)+ftimes(1:2,cslot)
1212      else if(bslot<0)then
1213        if(tslot>0)flag_count=-1
1214        times(1:2, aslot)=times(1:2, aslot)-times(1:2,cslot)
1215        nflops(    aslot)=nflops(    aslot)-nflops(   cslot)
1216        ftimes(1:2,aslot)=ftimes(1:2,aslot)-ftimes(1:2,cslot)
1217      else if(bslot==0)then
1218        exit
1219      end if
1220    end do
1221    if(flag_count==-1)ncount(aslot)=ncount(abs(tslots(2)))
1222  end do
1223 
1224 !For the following sections, the number of counts is non standard, and thus these sections have not been placed
1225 !in the previous doloop.
1226 
1227 !Compute xc part of rhotoxc and dfpt_mkvxc, minus the calls to fourdp inside that part
1228  ncount(11)=ncount(81)+ncount(181)
1229  times(1:2,11)=times(1:2,81)+times(1:2,181)-times(1:2,82)
1230  ftimes(1:2,11)=ftimes(1:2,81)+ftimes(1:2,181)-ftimes(1:2,82)
1231  nflops(11)=nflops(81)+nflops(181)-nflops(82)
1232 
1233 !Estimate different complements in dfpt_vtorho
1234 !dfpt_vtorho (1) (= vtorho3 (1,2) - vtorho3(2) - vtorho3:synchro )
1235  ncount(118)=ncount(121)
1236  times(1:2,118)=times(1:2,124)-times(1:2,125)-times(1:2,166)
1237  ftimes(1:2,118)=ftimes(1:2,124)-ftimes(1:2,125)-ftimes(1:2,166)
1238  nflops(118)=nflops(124)-nflops(125)-nflops(166)
1239 
1240 
1241 
1242 !Calculating Gigaflops for all cases
1243  do itim=1,mtim
1244    mflops(itim)=-2
1245    if(abs(ftimes(1,itim)) > tol10) then ! VALGRIND complains that here there is a jump on uninitialized values
1246      mflops(itim)=nflops(itim)*1.e-9/ftimes(1,itim)
1247    else
1248      mflops(itim)=-1
1249    end if
1250  end do
1251 
1252 !Warning if the time is negative
1253  do itim=1,mtim
1254    if(times(1,itim)<-tol6 .or. times(2,itim)<-tol6 .or. ncount(itim)<-1 )then
1255      write(message, '(6a,i4,4a,es16.6,a,es16.6,a,i6,a,es16.6)' ) ch10,&
1256 &     ' timana : WARNING -',ch10,&
1257 &     '  One among cpu, wall and ncount is negative.',ch10,&
1258 &     '  Timing section #',itim,', name :  ',names(itim),ch10,&
1259 &     '  CPU =',times(1,itim),', Wall=',times(2,itim),' ncount=',ncount(itim),' flops=',nflops(itim)
1260      call wrtout(std_out,message,'PERS')
1261    end if
1262  end do
1263 
1264 !List of major independent code sections
1265  ABI_ALLOCATE(list,(mtim))
1266  list(:)=0
1267  nlist=0
1268  do itim=1,mtim
1269    if(basic(itim)/=0)then
1270      nlist=nlist+1
1271      list(nlist)=itim
1272    end if
1273  end do
1274 
1275  percent_limit=0.5_dp; if (timopt<0) percent_limit=0.0001_dp
1276 
1277 !In case there is parallelism, report times for node 0
1278 !if (me==0 .and. nproc>1) then
1279  if (me==0) then
1280 
1281 !  Find normalization to report timing as % total time
1282    cpunm=100._dp/tsec(1)
1283    wallnm=100._dp/tsec(2)
1284 
1285 !  (0) Take care of major independent code sections for this account of node 0 timing
1286 
1287    write(ount,  '(a,a,a,a,/,a,a,a)' ) '-',ch10,&
1288 &   '- For major independent code sections,',' cpu and wall times (sec),',&
1289 &   '-  as well as % of the time and number of calls for node 0',&
1290 &   '-'
1291 
1292    write(ount,"(3(a,i0),a)")&
1293 &   "-<BEGIN_TIMER mpi_nprocs = ",nproc,", omp_nthreads = ",nthreads,", mpi_rank = ",me,">"
1294 
1295 !  write(ount,"(2(a,f13.1))")"- tot_cpu_time = ",tsec(1),   ", tot_wall_time = ",tsec(2)
1296    write(ount,"(2(a,f13.1))")"- cpu_time =  ",my_tsec(1),", wall_time =  ",my_tsec(2)
1297    write(ount,"(a)")"-"
1298 
1299    write(ount, '(a,t34,a,t42,a,t50,a,t59,a,t65,a,t82,a,3x,a7,1x,a10)' )&
1300 &   '- routine','cpu','%','wall','%',' number of calls ',' Gflops ', 'Speedup', 'Efficacity'
1301    write(ount,'(a,t35,a,t43,a,t51,a,t60,a,t66,a,t78,a)')&
1302 &   '-                ','   ',' ','    ',' ','  (-1=no count)'
1303 
1304 !  Sort the list by decreasing CPU time
1305    do ii=1,nlist
1306      do ilist=1,nlist-1
1307        if (times(1,list(ilist))<times(1,list(ilist+1))) then
1308          temp_list=list(ilist)
1309          list(ilist)=list(ilist+1)
1310          list(ilist+1)=temp_list
1311        end if
1312      end do
1313    end do
1314 
1315    subcpu=zero; subwal=zero; other_cpu=zero; other_wal=zero; nothers=0
1316 
1317    do ilist=1,nlist
1318      isort = list(ilist)
1319 
1320      if ( (times(1,isort)*cpunm  > percent_limit .and. &
1321 &     times(2,isort)*wallnm > percent_limit) .and. ncount(isort)/=0 ) then ! Timing analysis
1322 
1323        write(ount,format01041)names(isort),&
1324 &       times(1,isort),times(1,isort)*cpunm,times(2,isort),times(2,isort)*wallnm,ncount(isort),mflops(isort), &
1325 &       times(1,isort)/times(2,isort),times(1,isort)/times(2,isort)/nthreads
1326 
1327      else
1328        nothers=nothers+1
1329        other_cpu=other_cpu+times(1,isort)
1330        other_wal=other_wal+times(2,isort)
1331      end if
1332 
1333      subcpu=subcpu+times(1,isort)
1334      subwal=subwal+times(2,isort)
1335    end do
1336 
1337    other_wal = other_wal + tol14
1338    write(entry_name,"(a,i0,a)")"others (",nothers,")"
1339    write(ount,format01041)entry_name,other_cpu,other_cpu*cpunm,other_wal,other_wal*wallnm,-1,-1.0, &
1340 &   other_cpu/other_wal,other_cpu/other_wal/nthreads
1341    write(ount,"(a)")"-<END_TIMER>"
1342 
1343    write(ount,'(a)' ) '-'
1344    subwal = subwal + tol14
1345    write(ount,01200) subcpu,subcpu*cpunm,subwal,subwal*wallnm,subcpu/subwal,subcpu/subwal/nthreads
1346  end if
1347 
1348 !Now, gather all information
1349  call xmpi_sum(times,spaceworld,ierr)
1350  call xmpi_sum(ncount,spaceworld,ierr)
1351  call xmpi_sum(ftimes,spaceworld,ierr)
1352  call xmpi_sum(nflops,spaceworld,ierr)
1353 
1354  if (me==0) then ! Only the world master writes
1355 
1356 !  Find normalization to report timing as % total time
1357    cpunm=100._dp/tsec(1)
1358    wallnm=100._dp/tsec(2)
1359 
1360 !  Calculating Gigaflops for all process
1361    do itim=1,mtim
1362      mflops(itim)=-2
1363      if(abs(ftimes(1,itim)) > tol10) then ! VALGRIND complains that here there is a jump on uninitialized values
1364        mflops(itim)=nflops(itim)*1.e-9/ftimes(1,itim)
1365      else
1366        mflops(itim)=-1
1367      end if
1368    end do
1369 
1370 !  _______________________________________
1371 
1372 !  Write timing output for cpu times
1373 
1374 !  (1) Take care of major independent code sections
1375    write(ount,'(/,a,/,a,/)' )&
1376 &   '- For major independent code sections, cpu and wall times (sec),',&
1377 &   '- as well as % of the total time and number of calls '
1378 
1379    write(ount,"(2(a,i0),a)")&
1380 &   "-<BEGIN_TIMER mpi_nprocs = ",nproc,", omp_nthreads = ",nthreads,", mpi_rank = world>"
1381 
1382    write(ount,"(2(a,f13.1))")"- cpu_time = ",tsec(1),   ", wall_time = ",tsec(2)
1383 !  write(ount,"(2(a,f13.1))")"- my_cpu_time =  ",my_tsec(1),", my_wall_time =  ",my_tsec(2)
1384    write(ount,"(a)")"-"
1385 
1386    write(ount,'(a,t35,a,t43,a,t51,a,t60,a,t66,a,t82,a,3x,a7,1x,a10)')&
1387 &   '- routine        ','cpu','%','wall','%', ' number of calls ',' Gflops ', &
1388    'Speedup', 'Efficacity'
1389    write(ount,'(a,t35,a,t43,a,t51,a,t60,a,t66,a,t78,a)')&
1390 &   '-                ','   ',' ','    ',' ','  (-1=no count)'
1391 
1392 !  Sort the list by decreasing CPU time
1393    do ii=1,nlist
1394      do ilist=1,nlist-1
1395        if(times(1,list(ilist))<times(1,list(ilist+1)))then
1396          temp_list=list(ilist)
1397          list(ilist)=list(ilist+1)
1398          list(ilist+1)=temp_list
1399        end if
1400      end do
1401    end do
1402 
1403    subcpu=zero; subwal=zero; other_cpu=zero; other_wal=zero; nothers=0
1404 
1405    do ilist=1,nlist
1406      isort = list(ilist)
1407      if( (times(1,isort)*cpunm > percent_limit .and.  &
1408 &     times(2,isort)*wallnm> percent_limit) .and. ncount(isort)/=0 )then
1409 
1410        write(ount,format01041)names(isort),&
1411 &       times(1,isort),times(1,isort)*cpunm,times(2,isort),times(2,isort)*wallnm,ncount(isort),mflops(isort), &
1412 &       times(1,isort)/times(2,isort),times(1,isort)/times(2,isort)/nthreads
1413      else
1414        nothers=nothers+1
1415        other_cpu=other_cpu+times(1,isort)
1416        other_wal=other_wal+times(2,isort)
1417      end if
1418      subcpu=subcpu+times(1,isort)
1419      subwal=subwal+times(2,isort)
1420    end do
1421 
1422    other_wal = other_wal + tol14
1423    write(entry_name,"(a,i0,a)")"others (",nothers,")"
1424    write(ount,format01041)entry_name,other_cpu,other_cpu*cpunm,other_wal,other_wal*wallnm,-1,-1.0, &
1425 &   other_cpu/other_wal,other_cpu/other_wal/nthreads
1426 
1427    write(ount,"(a)")"-<END_TIMER>"
1428 
1429    subwal = subwal + tol14
1430    write(ount,01201) subcpu,subcpu*cpunm,subwal,subwal*wallnm,subcpu/subwal,subcpu/subwal/nthreads
1431 
1432 !  (2) Partitionings
1433    if (timopt<0) then
1434 
1435      npart=1000
1436      do ipart=1,npart 
1437        list(:)=0 
1438        select case(ipart)
1439 
1440        case(1)
1441          list(:11)=(/1,41,42,43,44,45,640,46,49,50,mtim/)      ; message='abinit '
1442        case(2)
1443          list(:13)=(/640,641,642,700,132,84,301,401,501,650,643,644,mtim/)  ; message='driver '
1444        case(3)
1445          list(:13)=(/700,703,704,705,33,701,34,35,36,706,702,707,708/)       ; message='gstateimg+gstate '
1446        case(4)
1447          list(:19)=(/238,54,240,241,56,242,60,52,68,239,243,244,245,246,247,248,61,249,mtim/); message='scfcv '
1448        case(5)
1449          list(:7)=(/940,941,942,943,944,945,mtim/)             ; message= 'rhotov '
1450        case(6)
1451          list(:22)=(/980,981,982,983,984,28,985,271,986,987,988,989,990,991,992,993,994,995,996,997,1620,mtim/)
1452          message= 'vtorho '
1453        case(7)
1454          list(:15)=(/28,31,22,530,585,583,590,222,572,842,537,586,591,1600,mtim/) ; message='vtowfk '
1455        case(8)
1456          if(abs(timopt)==3)then
1457            list(:11)=(/530,204,205,571,532,533,630,535,536,584,587/)  ; message='lobpcgwf (abs(timopt)==3)'
1458          else if(abs(timopt)==4)then
1459            list(:8)=(/530,520,521,522,523,524,525,526/)               ; message='lobpcgwf (abs(timopt)==4)'
1460 !            else
1461 !            list(:3)=(/530,204,205/) 
1462 !            message='lobpcgwf (light analysis: for a deeper one, use abs(timopt)=3 or 4)'
1463          end if
1464        case(9)
1465          list(:4)=(/22,201,40,211/)                            ; message='cgwf '
1466        case(10)
1467          list(:8)=(/132,133,134,135,136,137,138,141/)          ; message='respfn '
1468        case(11)
1469          list(:8)=(/141,142,143,144,120,146,147,mtim/)         ; message='dfpt_looppert '
1470        case(12)
1471          list(:9)=(/120,154,121,157,152,158,160,150,564/) ; message='dfpt_scfcv '
1472        case(13)
1473          list(:9)=(/121,118,128,126,287,166,129,127,556/)      ; message='dfpt_vtorho '
1474        case(14)
1475          list(:9)=(/128,131,122,845,288,214,108,130,565/)      ; message='dfpt_vtowfk '
1476        case(15)
1477          list(:8)=(/122,140,202,197,212,227,228,844/)          ; message='dfpt_cgwf '
1478        case(16)
1479          list(:4)=(/200,841,221,98/)                           ; message='getghc '
1480        case(17)
1481          list(:20)=(/801,840,841,842,843,844,845,846,847,848,849,850,851,852,853,854,855,856,857,858/)
1482          message='fourwf (upwards partitioning)'
1483        case(18)
1484          list(:5)=(/933,934,936,937,938/)                      ; message='outkss '
1485        case(19)
1486          list(:14)=(/301,302,315,316,319,304,305,320,321,306,307,308,309,310/) 
1487          message='screening '
1488        case(20)
1489          list(:13)=(/401,402,403,404,405,406,407,408,409,421,423,424,425/); message='sigma  '
1490        case(21)
1491          list(:9)=(/431,432,433,434,435,445,440,441,442/)     ; message='calc_sigc_me '
1492        case(23)
1493          list(:11)=(/630,631,632,633,634,545,635,636,637,638,mtim/)         ; message='prep_getghc '
1494        case(24)
1495          list(:4)=(/539,856,547,548/)                          ; message='prep_fourwf '
1496        case(25)
1497          list(:5)=(/570,231,232,581,mtim/)                     ; message='prep_nonlop '
1498        case(26)
1499          list(:6)=(/790,791,792,793,794,795/)                  ; message='mkrho (upwards partitioning)'
1500 !          Disabled (temporarily ?) because the partitioning was not correct
1501 !          case(27);list(:17)=(/600,601,602,603,604,605,617,606,607,608,609,610,611,612,613,614,615/)
1502 !          message='vtorhorec '
1503        case(28)
1504          list(:10)=(/650,651,653,654,655,656,658,659,660,661/) 
1505          message='bethe_salpeter '
1506        case(29)
1507          list(:8)=(/740,741,742,743,744,745,746,747/)          ; message='suscep_stat '
1508        case(30)
1509          list(:9)=(/750,751,848,849,753,756,859,757,755/)      ; message='susk '
1510        case(31)
1511          list(:8)=(/760,761,764,861,871,765,862,872/)          ; message='suskmm '
1512        case(32)
1513          list(:8)=(/710,711,712,713,714,715,716,717/)          ; message='inwffil '
1514        case(33)
1515          list(:10)=(/720,721,722,723,724,725,726,727,67,mtim/)  ; message='wfsinp '
1516        case(34)
1517          list(:5)=(/770,771,772,272,290/)                      ; message='initwf '
1518        case(35)
1519          list(:9)=(/780,781,782,783,784,785,786,291,292/)      ; message='newkpt '
1520        case(36)
1521          list(:8)=(/93,901,902,903,904,905,268,mtim/)          ; message='newvtr '
1522        case(37)
1523          list(:2)=(/94,269/)                                   ; message='newrho '
1524        case(38)
1525          list(:11)=(/9,260,261,262,263,264,265,266,267,268,269/) ; message=' fourdp (upwards partitioning)'
1526        case(39)
1527          list(:8)=(/250,251,252,253,254,255,256,257/)          ; message='afterscfloop '
1528        case(40)
1529          list(:5)=(/910,911,912,913,914/)                      ; message='forstr '
1530        case(41)
1531          list(:10)=(/920,921,927,922,923,926,924,65,925,mtim/) ; message='forstrnps '
1532        case(42)
1533          list(:4)=(/670,671,672,673/)                          ; message='exc_build_ham '
1534        case(43)
1535          list(:7)=(/680,681,682,683,684,685,686/)              ; message='exc_build_block'
1536        case(44)
1537          list(:8)=(/690,691,692,693,694,695,696,697/)                  ; message='exc_haydock_driver '
1538        case(45)
1539          list(:20)=(/950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969/)
1540          message='outscfcv '
1541        case(46)
1542          list(:8)=(/620,621,622,623,624,625,626,627/)          ; message='dmft '
1543        case(47)
1544          list(:9)=(/1001,1002,1003,1004,1005,1006,1007,1008,1009/) 
1545          message='initberry '
1546        case(50)
1547          list(:12)=(/1500,1501,1502,1503,1504,1505,1506,1507,1508,1509,1510,1511/)          ; message='hartreefock '
1548        case(60)
1549          list(:13) = (/1600,1607,1630,1631,1632,1601,1603,1604,1605,1606,1608,1609,1610/)
1550          message = 'chebfi'
1551        case(61)
1552          list(:3) = (/1620,1621,1622/)
1553          message = 'mkinvovl'
1554        case(70)
1555          list(:5)=(/1701,1702,1703,1721,1722/)
1556          message='gwls GW code'
1557        case(71)
1558          list(:16)=(/1703,1704,1705,1706,1707,1708,1709,1710,1711,1712,1713,1714,1715,1716,1717,1718/) 
1559          message='gwls: compute_correlations_shift_lanczos'
1560        case(72)
1561          list(:10)=(/1724,1725,1726,1727,1728,1729,1730,1731,1732,1733/) 
1562          message='gwls: Applying the susceptibility Pk'
1563        case(73)
1564          list(:7)=(/1734,1735,1736,1737,1738,1739,1740/) 
1565          message='gwls: Applying the model susceptibility Pk_model'
1566        case(74)
1567          list(:7)=(/1741,1742,1743,1744,1745,1746,1747/) 
1568          message='gwls: computing the matrix elements of eps_model^{-1}(w) -1 '
1569        case(75)
1570          list(:12)=(/1650,1651,1652,1653,1654,1655,1656,1657,1658,1659,1660,1661/)
1571          message='lobpcgwf2 core engine '
1572        case(76)
1573          list(:18)=(/1670,1671,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682,1683,1684,1685,1686,1687/)
1574          message='low-level xgBlock type '
1575        case(77)
1576          list(:5)=(/1690,1691,1692,1693,1694/)
1577          message='low-level xgScalapack type '
1578        case default   
1579          cycle ! This allows to disable temporarily some partitionings
1580          
1581        end select
1582 
1583        nlist=0
1584        do itim=1,mtim
1585          if(list(itim)/=0)then
1586            nlist=nlist+1
1587          else
1588            exit
1589          end if
1590        end do
1591 
1592        if(nlist==0)then
1593          cycle
1594        end if
1595 
1596        if(ncount(list(1))/=0)then
1597          write(ount,'(/,a,a)')' Partitioning of ',trim(message)
1598          subcpu=zero
1599          subwal=zero
1600          do ilist=1,nlist
1601            isort = list(ilist)
1602 !          When the LAST item is mtim, a complement is evaluated (count number set to -1)
1603            if(ilist==nlist .and. list(nlist)==mtim)then
1604              times(1,mtim)=times(1,list(1))-subcpu
1605              times(2,mtim)=times(2,list(1))-subwal
1606              ncount(mtim)=-1
1607              ftimes(1,mtim)=zero
1608              mflops(mtim)=0
1609 #if defined HAVE_TEST_TIME_PARTITIONING
1610              if(times(2,mtim)>1.2d0 .and. wallnm*times(2,mtim)>3.d0)then
1611                write(ount, '(3a,es16.6,4a,es16.6,2a)')&
1612 &               ' Note : the partitioning does not work well for this routine.',ch10,&
1613 &               '   The (other) Wall time            ',times(2,mtim),ch10,&
1614 &               '   is bigger than 1.2 secs. ',ch10,&
1615 &               '   The (other) Wall time percentage ',wallnm*times(2,mtim),ch10,&
1616 &               '   is bigger than 3% '
1617              else if (times(2,mtim)<0.2d0 .and. wallnm*times(2,mtim)<-0.2d0)then
1618                write(ount, '(3a,es16.6,2a)')&
1619 &               ' Note : the partitioning does not work well for this routine.',ch10,&
1620 &               '   The (other) Wall time percentage ',wallnm*times(2,mtim),ch10,&
1621 &               '   is negative '
1622              end if
1623 #endif
1624            end if
1625            if(ncount(isort)/=0)then
1626              if(times(2,isort)*wallnm>0.02d0 .or. ilist==1)then   ! Does not write a slot if the wall time ratio is below a threshold
1627                if ( times(2,isort) < 0.0001 ) times(2,isort) = -1.d0
1628                write(ount,format01040)names(isort),&
1629 &               times(1,isort),times(1,isort)*cpunm,&
1630 &               times(2,isort),times(2,isort)*wallnm,ncount(isort), &
1631 &               times(1,isort)/times(2,isort),times(1,isort)/times(2,isort)/nthreads
1632              end if
1633              if(ilist/=1)then
1634                subcpu=subcpu+times(1,isort)
1635                subwal=subwal+times(2,isort)
1636              else
1637                write(ount, '(a)' ) ' '
1638              end if
1639            end if
1640          end do
1641 
1642          subwal = subwal + tol14
1643          write(ount, 01201 ) subcpu,subcpu*cpunm,subwal,subwal*wallnm, subcpu/subwal,subcpu/subwal/nthreads
1644 #ifdef HAVE_TEST_TIME_PARTITIONING
1645          if( wallnm*abs(subwal-times(2,list(1)))>1.d0 .and. abs(subwal-times(2,list(1)))>0.2d0 )then
1646            write(ount, '(3a,es16.6,2a,es16.6,4a,es16.6,2a,es16.6,6a,i4)')&
1647 &           ' Note : the partitioning does not work well for this routine.',ch10,&
1648 &           '   The subtotal Wall time            ',subwal,ch10,&
1649 &           '   differs from the total Wall time  ',times(2,list(1)),ch10,&
1650 &           '   by more than 0.2 secs.',ch10,&
1651 &           '   The subtotal Wall time percentage ',wallnm*subwal,ch10,&
1652 &           '   differs from the total Wall time %',wallnm*times(2,list(1)),ch10,&
1653 &           '   by more than 1%. ',ch10,&
1654 &           '   The partitioning might not have been coded properly.',ch10,&
1655 &           '   nlist=',nlist
1656            do ilist=1,nlist
1657              write(ount, '(a,i4,i4,es16.6,i8)' )&
1658 &             ' ilist,list(ilist),wallnm*times(2,list(ilist)),ncount(list(ilist))=',&
1659 &             ilist,isort,wallnm*times(2,isort),ncount(isort)
1660            end do
1661          end if
1662 #endif
1663        end if
1664 
1665      end do ! End of loop on partitionings
1666 
1667 !    For parallel case
1668      if(xmpi_paral==1)then
1669        write(ount, '(a,/,a)' )'-','-Synchronisation (=leave_test) and MPI calls '
1670        nlist=14
1671        list(:14)=(/48,61,62,63,64,65,66,67,71,85,86,543,544,787/)
1672        subcpu=zero; subwal=zero
1673        if(ncount(list(1))/=0)then
1674          do ilist=1,nlist
1675            isort = list(ilist)
1676 !          
1677            if (ncount(isort)/=0) then
1678              write(ount,format01040)names(isort),&
1679 &             times(1,isort),times(1,isort)*cpunm,&
1680 &             times(2,isort),times(2,isort)*wallnm,ncount(isort), &
1681 &             times(1,isort)/(tol14+times(2,isort)),times(1,isort)/(times(2,isort)+tol14)/nthreads
1682 
1683              if(ilist/=1)then
1684                subcpu=subcpu+times(1,isort)
1685                subwal=subwal+times(2,isort)
1686              else
1687                write(ount, '(a)' ) '-'
1688              end if
1689            end if !ncount
1690 !          
1691          end do !ilist
1692 
1693          subwal = subwal + tol14
1694          write(ount, 01200 ) subcpu,subcpu*cpunm,subwal,subwal*wallnm, subcpu/subwal,subcpu/subwal/nthreads
1695        end if !ncount
1696      end if !xmpi_paral
1697 
1698      nlist=23
1699      list(:23)=(/47,49,51,801,72,73,74,77,78,79,97,82,87,88,436,437,438,439,804,805,331,332,333/)
1700      flag_write=1
1701      do ilist=1,nlist
1702        isort = list(ilist)
1703        if(ncount(isort)/=0)then
1704          if(flag_write==1)then
1705            write(ount, '(/,a)' ) ' Additional information'
1706            flag_write=0
1707          end if
1708          write(ount,format01040)names(isort),&
1709 &         times(1,isort),times(1,isort)*cpunm,times(2,isort),times(2,isort)*wallnm,ncount(isort), &
1710 &         times(1,isort)/(tol14+times(2,isort)),times(1,isort)/(tol14+times(2,isort))/nthreads
1711        end if
1712      end do
1713 
1714      nlist=23
1715      list(:23)=(/550,551,552,553,554,555,556,558,559,560,561,562,563,564,565,566,567,593,594,595,596,597,598/)
1716      flag_write=1
1717      do ilist=1,nlist
1718        isort = list(ilist)
1719        if(ncount(isort)/=0)then
1720          if(flag_write==1)then
1721            write(ount, '(/,a)' ) ' Additional information about PAW segments'
1722            flag_write=0
1723          end if
1724          write(ount,format01040)names(isort),&
1725 &         times(1,isort),times(1,isort)*cpunm,times(2,isort),times(2,isort)*wallnm,ncount(isort), &
1726 &         times(1,isort)/(tol14+times(2,isort)),times(1,isort)/(tol14+times(2,isort))/nthreads
1727        end if
1728      end do
1729 
1730 !    The detailed analysis cannot be done in the multidataset mode
1731      if(ndtset<2)then
1732        write(ount, '(/,/,a,/,a,/,a)' ) &
1733 &       ' Detailed analysis of some time consuming routines ',&
1734 &       '                                  tcpu    ncalls  tcpu/ncalls    ndata tcpu/ncalls/ndata',&
1735 &       '                                 (sec)                (msec)              (microsec)'
1736        nlist=8
1737        list(:8)=(/802,803,9,75,76,77,210,11/)
1738        do ilist=1,nlist
1739          isort = list(ilist)
1740          if(ncount(isort)/=0)then
1741            write(ount, '(a,a24,f12.3,i10,f12.3,i10,f12.3)' )'- ',names(isort),&
1742 &           times(1,isort),ncount(isort),&
1743 &           1000.0_dp*times(1,isort)/dble(ncount(isort)),ndata(isort),&
1744 &           1000000.0_dp*times(1,isort)/dble(ncount(isort)*dble(ndata(isort)))
1745          else
1746            write(ount, '(a,a24,f12.3,i10)' )'- ',names(isort),&
1747 &           times(1,isort),ncount(isort)
1748          end if
1749        end do !ilist
1750      else
1751        write(ount,'(/,a)') ' timana : in multi dataset mode, the more detailed analysis is not done.'
1752      end if !ndtset
1753 
1754    end if ! End the condition of timopt<0
1755 
1756  end if ! me==0
1757 
1758  ABI_DEALLOCATE(list)
1759 
1760 end subroutine timana