TABLE OF CONTENTS


ABINIT/tddft [ Functions ]

[ Top ] [ Functions ]

NAME

 tddft

FUNCTION

 Compute the excitation energies within TDLDA
 from input wavefunctions, eigenenergies, and band occupations.

COPYRIGHT

 Copyright (C) 1999-2017 ABINIT group (XG, JYR, MB, MBELAND, SHAMEL)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

  cg(2,mpw*nspinor*mband*mkmem*nsppol)=wf in G space
  dtfil <type(datafiles_type)>=variables related to files
  dtset <type(dataset_type)>=all input variables for this dataset
  eigen(mband*nkpt*nsppol)=array for holding eigenvalues (hartree)
  etotal=total energy of the ground-state (Ha)
  gmet(3,3)=metrix tensor in G space in Bohr**-2.
  gprimd(3,3)=dimensional reciprocal space primitive translations
  gsqcut=cutoff on (k+G)^2 (bohr^-2)
  kg(3,mpw*mkmem)=reduced planewave coordinates.
  kxc(nfft,nkxc)=exchange-correlation kernel
  mband=maximum number of bands
  mgfftdiel=maximum size of 1D FFTs, for the computation of the dielectric matrix
  mkmem=number of k-points treated by this node.
  mpi_enreg=information about MPI parallelization
  mpw=maximum allowed value for npw
  nfft=(effective) number of FFT grid points (for this processor)
       WARNING about parallelization: see below
  ngfftdiel(18)=contain all needed information about 3D FFT, for dielectric matrix,
                see ~abinit/doc/input_variables/vargs.htm#ngfft
  nkpt=number of k points
  nkxc=second dimension of the array kxc (see rhohxc for a description)
  npwarr(nkpt)=number of planewaves at each k point
  nspinor=number of spinorial components of the wavefunctions
  nsppol=1 for unpolarized, 2 for spin-polarized
  occ(mband*nkpt*nsppol)=
          occupation numbers for each band (usually 2.0) at each k point
  ucvol=unit cell volume (Bohr**3)
  wffnew=unit number for current wf disk file

OUTPUT

  (only writing)

 WARNING:
 This routine should not be parallelized on space for the time being,
    because the already existing parallelisation is not the usual one, found
    in the majority of ABINIT routines.

NOTES

 * Only accept nspinor=1, nsppol=1, nkpt=1 (Gamma point), and occopt<3
   (insulating occupation numbers).
   It is expected to make it work for nsppol=2 in the future.

 * For the oscillator strengths, see the paper
   ''Time-Dependent Density Functional Response Theory of Molecular
     systems: Theory, Computational Methods, and Functionals'', by M.E. Casida,
   in Recent Developments and Applications of Modern Density Functional
   Theory, edited by J.M. Seminario (Elsevier, Amsterdam, 1996).

PARENTS

      vtorho

CHILDREN

      fourdp,fourwf,hartre,matr3inv,mpi_bcast,mpi_gatherv,mpi_reduce
      mpi_scatterv,sort_dp,sphereboundary,timab,wrtout,xmpi_barrier
      xmpi_bcast,xmpi_exch,xmpi_sum,zhpev

SOURCE

  78 #if defined HAVE_CONFIG_H
  79 #include "config.h"
  80 #endif
  81 
  82 #include "abi_common.h"
  83 
  84  subroutine tddft(cg,dtfil,dtset,eigen,etotal,gmet,gprimd,gsqcut,&
  85 &  kg,kxc,mband,mgfftdiel,mkmem,mpi_enreg,mpw,nfft,ngfftdiel,nkpt,nkxc,&
  86 &  npwarr,nspinor,nsppol,occ,ucvol,wffnew)
  87 
  88  use defs_basis
  89  use defs_abitypes
  90  use m_profiling_abi
  91  use m_xmpi
  92  use m_errors
  93  use m_wffile
  94  use m_sort
  95 #if defined HAVE_MPI2
  96  use mpi
  97 #endif
  98 
  99  use m_io_tools, only : get_unit
 100 
 101 !This section has been created automatically by the script Abilint (TD).
 102 !Do not modify the following lines by hand.
 103 #undef ABI_FUNC
 104 #define ABI_FUNC 'tddft'
 105  use interfaces_14_hidewrite
 106  use interfaces_18_timing
 107  use interfaces_32_util
 108  use interfaces_52_fft_mpi_noabirule
 109  use interfaces_53_ffts
 110  use interfaces_56_xc
 111 !End of the abilint section
 112 
 113  implicit none
 114 
 115 #if defined HAVE_MPI1
 116  include 'mpif.h'
 117 #endif
 118 
 119 !Arguments ------------------------------------
 120  integer, intent(in) :: mband,mgfftdiel,mkmem,mpw,nfft,nkpt,nkxc,nsppol
 121  integer, intent(in) :: nspinor
 122  real(dp), intent(in) :: etotal,gsqcut,ucvol
 123  type(datafiles_type), intent(in) :: dtfil
 124  type(dataset_type), intent(in) :: dtset
 125  type(MPI_type), intent(in) :: mpi_enreg
 126  type(wffile_type), intent(inout) :: wffnew
 127  integer, intent(in) :: kg(3,mpw*mkmem),ngfftdiel(18),npwarr(nkpt)
 128  real(dp), intent(in) :: cg(2,mpw*nspinor*mband*mkmem*nsppol),eigen(mband*nkpt*nsppol)
 129  real(dp), intent(in) :: gmet(3,3),gprimd(3,3),kxc(nfft,nkxc),occ(mband*nkpt*nsppol)
 130 
 131 !Local variables-------------------------------
 132  integer,parameter :: nexcitout=20
 133  integer :: cplex,i1,i2,i3,iband,idir,ier,ierr,iexcit,iexcit1,iexcit2,ifft
 134  integer :: old_iexcit,ii,jj,isppol,jsppol,isppol1,isppol2,isppol_l,isppol_n
 135  integer :: isppol_n1,isppol_n2,iocc_n1,iocc_n2,iunocc_n1,iunocc_n2,temp_unit2
 136  integer :: ikpt,index,iocc,iocc1,iocc2,iocc_l,iocc_n
 137  integer :: istwf_k,iunocc,iunocc1,iunocc2,iunocc_l,iunocc_n,istate,jexcit
 138  integer :: jexcit_cbase,master,mcg_disk,me_loc
 139  integer :: nband_k(nsppol), nband_occ(nsppol), nband_unocc(nsppol)
 140  integer :: nstate_k, nstate_occ, nstate_unocc, nexcit_pol(nsppol)
 141  integer :: nstate_win,ndiel,ndiel1,ndiel2,ndiel3,ndiel4
 142  integer :: ndiel5,ndiel6,nexcit,nexcit_max,nexcit_win,nfftdiel,nlargest,nnext
 143  integer :: nnext1,nnext2
 144  integer :: nproc_loc,npw_k,pole_approx,sing_trip,spaceComm,tim_fourwf
 145  integer :: tim_rwwf,save_iomode
 146  integer :: rec,recl,idummy,jdummy
 147  real(dp) :: buffer,buffer_inv,diffeig,eigunocc,emax_win
 148  real(dp) :: factor,ff,flargest,fnext,fr_invsquare,fr_power
 149  real(dp) :: fnext1,fnext2
 150  real(dp) :: normint,myproduct,saa,sab,sbb
 151  real(dp) :: sumx
 152  real(dp) :: sum_kernel(2/nsppol)
 153  real(dp) :: weight,xx
 154  logical :: am_master,file_exist
 155  logical, allocatable :: done_excit(:,:),done_sexc(:),done_sexc2(:)
 156  character(len=18) :: chain1,chain2
 157  character(len=500) :: message
 158  integer,allocatable :: flag_state_win(:),gbound(:,:),indarr(:),index_state(:)
 159  integer,allocatable :: kg_k(:,:)
 160  integer,allocatable :: excit_coords(:,:)
 161  integer :: count_to_do, count, displ, countmax, displmax
 162  integer :: ijexcit, ijexcit2, sendcount
 163  real(dp) :: f_sing_trip(2/nsppol),sendbuf(5-nsppol)
 164  real(dp) :: cauchy(7),poscart(3),qphon(3),rprimd(3,3),tsec(2),dummy(2,1)
 165  integer :: iomode,action,me,nmaster,sender,source,sread,sskip
 166  integer :: formeig,icg,ikg,nband_k_
 167  logical :: mydata, tmaster, swrite
 168  integer,allocatable ::  kg_disk(:,:)
 169  real(dp),allocatable :: cg_disk(:,:),cg_tmp(:,:)
 170  real(dp),allocatable :: cwavef(:,:),eexcit(:)
 171  real(dp),allocatable :: eexcit2(:)
 172  real(dp),allocatable :: matr(:)
 173  real(dp),allocatable :: kxc_for_tddft(:,:,:,:,:,:),omega_tddft_casida(:,:,:,:,:,:,:)
 174  real(dp),allocatable :: osc_str(:,:),pos(:,:),rhoaug(:,:,:),rhog(:,:)
 175  real(dp),allocatable :: sexc(:,:),sqrtks(:),vec(:,:,:),vhartr(:),wfprod(:,:,:)
 176  real(dp) :: omega_tddft_casida_dummy(2/nsppol)
 177  real(dp),allocatable :: wfraug(:,:,:,:),wfrspa(:,:,:,:),work(:),zhpev1(:,:)
 178  real(dp),allocatable :: zhpev2(:)
 179 #if defined HAVE_MPI
 180  integer :: iproc
 181  integer :: ipwnbd
 182  integer,allocatable :: counts(:),displs(:),recvcounts(:),tmpbuf(:)
 183  real(dp), allocatable :: recvbuf(:,:)
 184 #endif
 185 
 186 ! *************************************************************************
 187 
 188 !Init mpi_comm
 189  spaceComm=mpi_enreg%comm_cell
 190 
 191  am_master=.true.
 192  master = 0
 193  nproc_loc = xmpi_comm_size(spaceComm) !Init ntot proc max
 194  me_loc    = xmpi_comm_rank(spaceComm) !Define who i am
 195 
 196 #if defined HAVE_MPI
 197 
 198  if (me_loc/=0) then
 199    am_master=.FALSE.
 200  end if
 201  write(message, '(a,i3,a)' ) ' TDDFT ',nproc_loc,' CPU synchronized'
 202  call wrtout(std_out,message,'COLL')
 203  write(message, '(a,3D12.5,a,3D12.5,a,3D12.5)' ) ' gmet ',&
 204 & gmet(1,1),gmet(1,2),gmet(1,3),ch10,&
 205 & gmet(2,1),gmet(2,2),gmet(2,3),ch10,&
 206 & gmet(3,1),gmet(3,2),gmet(3,3)
 207  call wrtout(std_out,message,'COLL')
 208 #endif
 209 
 210 
 211 !COMMENT these values should become arguments
 212 !the two first define the energy window
 213 
 214  emax_win=greatest_real*tol6
 215  if(dtset%td_maxene>tol6)then
 216    emax_win = dtset%td_maxene
 217  end if
 218 
 219  call timab(95,1,tsec)
 220 
 221  istwf_k=dtset%istwfk(1)
 222 
 223  if(nkpt/=1 .or. &
 224 & abs(dtset%kptns(1,1))+abs(dtset%kptns(2,1))+abs(dtset%kptns(3,1))>1.0d-6 )then
 225    write(message, '(a,a,a,a,a,i4,a,3es14.6,a,a,a,a,a)' )&
 226 &   'The computation of excited states using TDDFT is only allowed',ch10,&
 227 &   'with nkpt=1, kpt=(0 0 0), but the following values are input:',ch10,&
 228 &   'nkpt=',nkpt,', kpt=',dtset%kptns(1:3,1),'.',ch10,&
 229 &   'Action : in the input file, set nkpt to 1 and kpt to 0 0 0 ,',ch10,&
 230 &   'or change iscf.'
 231    MSG_ERROR(message)
 232  end if
 233 
 234  if(nspinor/=1)then
 235    write(message, '(a,a,a,a,a,a,a)' )&
 236 &   'The computation of excited states using TDDFT is restricted',ch10,&
 237 &   'for the time being to nspinor=1, while input nspinor=2.',ch10,&
 238 &   'Action : if you want to compute excited states within TDDFT,',ch10,&
 239 &   'set nsppol to 1 in the input file. Otherwise, do not use iscf=-1.'
 240    MSG_ERROR(message)
 241  end if
 242 
 243 
 244  if(nsppol==2 .and. (dtset%ixc==22 .or. dtset%ixc==20))then
 245    write(message, '(a,a,a,a,a,a,a,a,a,a,a)' )&
 246 &   'The computation of excited states using TDDFT in the spin',ch10,&
 247 &   'polarized case for the time being cannot be used with ixc=20',ch10,&
 248 &   'or ixc=22',ch10,&
 249 &   'Action : if you want to compute excited states within TDDFT,',ch10,&
 250 &   'set ixc different from 20 or 22. Otherwise, do not use iscf=-1',ch10,&
 251 &   'with nsppol=2.'
 252    MSG_ERROR(message)
 253  end if
 254 
 255 
 256  if(dtset%occopt>2)then
 257    write(message, '(a,a,a,i2,a,a,a,a,a)' )&
 258 &   'The computation of excited states using TDDFT is only allowed',ch10,&
 259 &   'with occopt=0, 1, or 2, while input occopt=',dtset%occopt,'.',ch10,&
 260 &   'Action : if you want to compute excited states within TDDFT,',ch10,&
 261 &   'set occopt=0, 1, or 2 in the input file. Otherwise, do not use iscf=-1.'
 262    MSG_ERROR(message)
 263  end if
 264 
 265 !Examine the occupation numbers, and determine the number of
 266 !occupied and unoccupied states and band.
 267 !States are numerated as usual in Abinit, before all spin up band
 268 !and after all spin down bands.
 269 !Note that if nsppol==1 nstate=nband_k
 270  do isppol=1,nsppol
 271    nband_k(isppol)=dtset%nband(isppol)
 272    nband_occ(isppol)=0
 273    do iband=1,nband_k(isppol)
 274      if(abs(occ(iband+(isppol-1)*nband_k(1))-two/nsppol)<tol6)  &
 275 &     nband_occ(isppol)=nband_occ(isppol)+1
 276    end do
 277    nband_unocc(isppol)=nband_k(isppol)-nband_occ(isppol)
 278 !  next line make no sense if spin flip is taken into account
 279    nexcit_pol(isppol)=nband_occ(isppol)*nband_unocc(isppol)
 280  end do
 281  nstate_k=nband_k(1)+(nsppol-1)*nband_k(nsppol)
 282  nstate_occ=nband_occ(1)+(nsppol-1)*nband_occ(nsppol)
 283  nstate_unocc=nstate_k-nstate_occ
 284 !next line to be changed if spin fli is taken into account
 285  nexcit=nexcit_pol(1)+(nsppol-1)*nexcit_pol(nsppol)
 286 
 287 !number of plane wave (does it work even for nsppol=2 ??)
 288  npw_k=npwarr(1)
 289 
 290 !mux number of excitations that is taken into account
 291  if(dtset%td_mexcit==0)then
 292    nexcit_max=nexcit
 293  else
 294    nexcit_max =dtset%td_mexcit
 295  end if
 296 
 297 !DEBUG
 298 !write(std_out,*) nband_occ(1),nband_unocc(1)
 299 !write(std_out,*) nband_occ(nsppol),nband_unocc(nsppol)
 300 !END DEBUG
 301 
 302 
 303  if(nsppol==1)then
 304    write(message, '(a,a,a,a,i4,a,i4,a,a,i4,a,a,a,i6,a)' )ch10,&
 305 &   ' *** TDDFT : computation of excited states *** ',ch10,&
 306 &   ' Splitting of',dtset%nband(1),' states in',nband_occ(1),' occupied states,',&
 307 &   ' and',nband_unocc(1),' unoccupied states,',ch10,&
 308 &   ' giving',nexcit,' excitations.'
 309    call wrtout(std_out,message,'COLL')
 310    call wrtout(ab_out,message,'COLL')
 311  else
 312    write(message, '(a,a,a,a,i4,a,i4,a,a,i4,a,a,a,i6,a,a,a)' )ch10,&
 313 &   ' *** TDDFT : computation of excited states *** ',ch10,&
 314 &   ' Splitting of',nstate_k,' states in',nstate_occ,' occupied states,',&
 315 &   ' and',nstate_unocc,' unoccupied states,',ch10,&
 316 &   ' giving',nexcit,' excitations. Note that spin flip is not possible actually.',ch10,&
 317 &   ' So the number of excitation is the half of the product of the number of state'
 318    call wrtout(std_out,message,'COLL')
 319    call wrtout(ab_out,message,'COLL')
 320  end if
 321 
 322 !Allocate the matrices to be diagonalized.
 323 !Use a simple storage mode, to be improved in the future.
 324  ii=max(nband_occ(1),nband_occ(nsppol))
 325  jj=max(nband_unocc(1),nband_unocc(nsppol))
 326  ABI_ALLOCATE(omega_tddft_casida,(ii,jj,nsppol,ii,jj,nsppol,2/nsppol))
 327  ABI_ALLOCATE(eexcit,(nexcit))
 328  ABI_ALLOCATE(sqrtks,(nexcit))
 329  ABI_ALLOCATE(flag_state_win,(nstate_k))
 330  omega_tddft_casida(:,:,:,:,:,:,:)=zero
 331 
 332 
 333 !Fill the diagonal elements with square of differences of KS eigenvalues
 334 !(also not very efficient, but OK for the present first coding)
 335 !Also compute the square root of Kohn-Sham eigenvalue differences
 336  do isppol=1,nsppol
 337    do iunocc=1,nband_unocc(isppol)
 338      eigunocc=eigen(iunocc+nband_occ(isppol)+(isppol-1)*nband_k(1))
 339      do iocc=1,nband_occ(isppol)
 340        iexcit=iocc+(isppol-1)*nexcit_pol(1)+nband_occ(isppol)*(iunocc-1)
 341        diffeig=eigunocc-eigen(iocc+(isppol-1)*nband_k(1))
 342        do sing_trip=1,2/nsppol
 343          omega_tddft_casida(iocc,iunocc,isppol,iocc,iunocc,isppol,sing_trip)=diffeig**2
 344        end do
 345        eexcit(iexcit)=diffeig
 346        sqrtks(iexcit)=sqrt(diffeig)
 347      end do
 348    end do
 349  end do
 350 
 351 
 352 
 353 !Sort the excitation energies : note that the array eexcit is reordered
 354  ABI_ALLOCATE(indarr,(nexcit))
 355  indarr(:)=(/ (ii,ii=1,nexcit) /)
 356  call sort_dp(nexcit,eexcit,indarr,tol14)
 357 
 358 !Determine an energy window for the excitations
 359 !to take into account. This is necessary for large systems
 360 
 361  nexcit_win = 0
 362  do iexcit = 1, nexcit
 363    if ((eexcit(iexcit) < emax_win ).and.(nexcit_win < nexcit_max)) then
 364      nexcit_win = nexcit_win + 1
 365 
 366 !    DEBUG
 367 !    write(message,'(a,F12.5,a,a,i2,a,a,i2)') 'excitation energy:', eexcit(indarr(iexcit)),ch10, &
 368 !    &                                        'excitation number:', indarr(iexcit),ch10,         &
 369 !    &                                        'nexcit_win:       ', nexcit_win
 370 !    call wrtout(std_out,message,'COLL')
 371 !    ENDDEBUG
 372 
 373    end if
 374  end do
 375 
 376 !identification of the bands contributing to the
 377 !nexcit_win  excitations within the window
 378 
 379 
 380  nstate_win = 0
 381  flag_state_win(:) = 0
 382  do iexcit = 1, nexcit_win
 383    iexcit1 = indarr(iexcit)
 384    isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
 385    iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
 386    iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
 387    if (flag_state_win(nband_occ(isppol1)+(isppol1-1)*nband_k(1)+iunocc1)==0) &
 388 &   flag_state_win(nband_occ(isppol1)+(isppol1-1)*nband_k(1)+iunocc1) =1
 389    if (flag_state_win(iocc1+(isppol1-1)*nband_k(1))==0) &
 390 &   flag_state_win(iocc1+(isppol1-1)*nband_k(1)) =1
 391 !  DEBUG
 392 !  write(message,'(a,i2,a,a,i2,i2,a,a,i2,a,a,i2)') 'isppol:', isppol1,ch10, &
 393 !  &                                       'iocc,iunocc:', iocc1,iunocc1,ch10,         &
 394 !  &                                       'flag_state_win:', flag_state_win(iocc1+(isppol1-1)*nband_k(1)),  &
 395 !  &                               ch10,   'flag_state_win:', flag_state_win(nband_occ(isppol1)+(isppol1-1)*nband_k(1)+iunocc1)
 396 !  call wrtout(std_out,message,'COLL')
 397 !  END DEBUG
 398 
 399  end do
 400 
 401  do isppol=1,nsppol
 402    do iband=1,nband_k(isppol)
 403      nstate_win=nstate_win+flag_state_win(iband+(isppol-1)*nband_k(1))
 404    end do
 405  end do
 406 
 407 
 408  write(message,'(a,a,i5)') ch10,'Nr of states to Fourier transform : ',nstate_win
 409  call wrtout(std_out,message,'COLL')
 410 
 411  ndiel1=ngfftdiel(1) ; ndiel2=ngfftdiel(2) ; ndiel3=ngfftdiel(3)
 412 !ndiel4,ndiel5,ndiel6 are FFT dimensions, modified to avoid cache trashing
 413  ndiel4=ngfftdiel(4) ; ndiel5=ngfftdiel(5) ; ndiel6=ngfftdiel(6)
 414 
 415 !The evaluation of integrals, later, needs the following factor
 416  normint=one/(ucvol*dble(ndiel1*ndiel2*ndiel3))
 417 
 418 !Setup the positions in real space for later integration
 419  call matr3inv(gprimd,rprimd)
 420 
 421  ABI_ALLOCATE(pos,(max(ndiel1,ndiel2,ndiel3),3))
 422 
 423 !Select the reduced position of the point with respect to the box center,
 424 !in the interval ]-0.5,0.5].
 425  buffer=0.05_dp ; buffer_inv=one/buffer
 426  do idir=1,3
 427    if(idir==1)ndiel=ndiel1
 428    if(idir==2)ndiel=ndiel2
 429    if(idir==3)ndiel=ndiel3
 430    do ii=1,ndiel
 431 !    dtset%boxcenter(3)=reduced coordinates of the center of the box,
 432 !    in view of the computation of the oscillator strength
 433      pos(ii,idir)=(ii-1)/(one*ndiel)-dtset%boxcenter(idir)
 434      pos(ii,idir)=pos(ii,idir)-nint(pos(ii,idir)-tol12)
 435 !    The linear behaviour is cut-off when one becomes
 436 !    close to the boundaries : the buffer allows to match smoothly
 437 !    one side of the cell to the other. This is important
 438 !    to get rid of small breakings of symmetry, that are
 439 !    confusing in accurate tests
 440      if(abs(pos(ii,idir))>half-buffer)then
 441 !      xx is always positive, and goes linearly from 1 to 0
 442 !      in the buffer region
 443        xx=(half-abs(pos(ii,idir)))*buffer_inv
 444 !      The cut-off is applied to pos(:,:)
 445        pos(ii,idir)=pos(ii,idir)*xx*(two-xx)
 446 !      DEBUG
 447 !      if (idir==1)then
 448 !      write(std_out,'(i2)') ndiel
 449 !      write(std_out,'(a,i2,a,F12.5,F12.5)')'idiel : ',ii,'   x : ',pos(ii,idir),&
 450 !      &    dtset%boxcenter(idir)
 451 !      endif
 452 !      ENDDEBUG
 453      end if
 454    end do ! ii
 455  end do ! idir
 456 
 457 !need to run in MPI I/O case
 458  if (wffnew%iomode == IO_MODE_MPI ) then
 459    save_iomode=wffnew%iomode
 460    wffnew%iomode = IO_MODE_FORTRAN
 461  else
 462 !  Do not store value but set to have save_iomode /= 1
 463    save_iomode = IO_MODE_FORTRAN
 464  end if
 465 
 466 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 467 !2009 Chunping Hu
 468 !need to collect wavefunctions from processors to master
 469  me=me_loc
 470 
 471  tim_rwwf =0
 472  source = master
 473  sread = master
 474  tmaster=(master==me)
 475  swrite=tmaster
 476  sender=-1
 477 
 478  iomode=wffnew%iomode
 479 
 480  if(am_master)then
 481 #if defined HAVE_MPI
 482    ABI_ALLOCATE(cg_tmp,(2,mpw*nspinor*mband*nsppol))
 483 #endif
 484  end if
 485 
 486  ABI_ALLOCATE(kg_disk,(3,mpw))
 487  mcg_disk=mpw*nspinor*mband
 488  formeig=0
 489 
 490 #if defined HAVE_MPI
 491  call xmpi_barrier(spaceComm)
 492  ABI_ALLOCATE(cg_disk,(2,mcg_disk))
 493 #endif
 494 
 495  icg=0
 496  if(mpi_enreg%paralbd==0) tim_rwwf=6
 497  if(mpi_enreg%paralbd==1)tim_rwwf=12
 498 
 499  do isppol=1,nsppol
 500    ikg=0
 501    do ikpt=1,nkpt
 502      nband_k_=dtset%nband(ikpt+(isppol-1)*nkpt)
 503      npw_k=npwarr(ikpt)
 504 #if defined HAVE_MPI
 505      if (dtset%usewvl == 0) then
 506        call xmpi_barrier(spaceComm)
 507 !      Must transfer the wavefunctions to the master processor
 508 !      Separate sections for paralbd=1 or other values ; might be merged
 509        if(mpi_enreg%paralbd==0) then
 510          nmaster=0
 511          source=minval(mpi_enreg%proc_distrb(ikpt,1:nband_k_,isppol))
 512          mydata=.false.
 513          if(source==me)mydata=.true.
 514          action=0
 515 !        I am the master node, and I have the data in cg or cg_disk
 516          if((tmaster).and.(mydata))action=1
 517 !        I am not the master, and I have the data => send to master
 518          if((.not.tmaster).and.(mydata))action=2
 519 !        I am the master, and I receive the data
 520          if((tmaster).and.(.not.mydata))action=3
 521 !        I have the data in cg or cg_disk ( MPI_IO case)
 522          if (iomode==IO_MODE_MPI) then
 523            action = 0
 524            sender=-1
 525            swrite=.false.
 526            if (mydata)then
 527              action=1
 528              swrite=.true.
 529              sender=me
 530            end if
 531          end if
 532 !        I am the master node, and I have the data in cg or cg_disk
 533 !        I have the data in cg or cg_disk ( MPI_IO case)
 534          if(action==1)then
 535 !          Copy from kg to kg_disk
 536            kg_disk(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg)
 537 !          Copy from cg to cg_disk
 538            do ipwnbd=1,nband_k_*npw_k*nspinor
 539              cg_disk(1,ipwnbd)=cg(1,ipwnbd+icg)
 540              cg_disk(2,ipwnbd)=cg(2,ipwnbd+icg)
 541            end do
 542          end if
 543 !        I am not the master, and I have the data => send to master
 544 !        I am the master, and I receive the data
 545          if ( action==2.or.action==3) then
 546            call timab(48,1,tsec)
 547            if(action==2)then
 548              call xmpi_exch(kg(:,1+ikg:npw_k+ikg),3*npw_k,source,kg_disk,nmaster,spaceComm,ierr)
 549              call xmpi_exch(cg(:,icg+1:icg+nband_k_*npw_k*nspinor),2*nband_k_*npw_k*nspinor &
 550 &             ,source,cg_disk,nmaster,spaceComm,ierr)
 551            else
 552              call xmpi_exch(kg_disk,3*npw_k,source,kg_disk,nmaster,spaceComm,ierr)
 553              call xmpi_exch(cg_disk,2*nband_k_*npw_k*nspinor,source,cg_disk,nmaster,spaceComm,ierr)
 554            end if
 555            call timab(48,2,tsec)
 556          end if
 557        else if(mpi_enreg%paralbd==1)then
 558          nmaster=0
 559 #if defined HAVE_MPI_IO
 560          sender=-1
 561          if( iomode ==IO_MODE_MPI ) then
 562            nmaster=mpi_enreg%proc_distrb(ikpt,1,isppol)
 563            sender=nmaster
 564          end if
 565 #endif
 566 !        Note the loop over bands
 567          do iband=1,nband_k_
 568 !          The message passing related to kg is counted as one band
 569            action=0
 570 !          I am the master node, and I have the data in cg or cg_disk
 571            if( mpi_enreg%proc_distrb(ikpt,iband,isppol)==nmaster .and. me==nmaster) then
 572              action=1
 573 !            I am not the master, and I have the data => send to master
 574            elseif( mpi_enreg%proc_distrb(ikpt,iband,isppol)==me .and. me/=nmaster ) then
 575              action = 2
 576 !            I am the master, and I receive the data
 577            elseif( mpi_enreg%proc_distrb(ikpt,iband,isppol)/=me .and. me==nmaster ) then
 578              action=3
 579            end if
 580            if(action==1) then
 581 !            I am the master node, and I have the data in cg or cg_disk
 582 !            Copy from kg to kg_disk
 583              if(iband==1)kg_disk(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg)
 584 !            Copy from cg to cg_disk
 585              do ipwnbd=1,npw_k*nspinor
 586                cg_disk(1,(iband-1)*npw_k*nspinor+ipwnbd)= cg(1,(iband-1)*npw_k*nspinor+ipwnbd+icg)
 587                cg_disk(2,(iband-1)*npw_k*nspinor+ipwnbd)= cg(2,(iband-1)*npw_k*nspinor+ipwnbd+icg)
 588              end do
 589            end if  ! action=1
 590            if ( action==2.or.action==3) then
 591 !            action=2 :  I am not the master, and I have the data => send to master
 592 !            action=3 :  I am the master, and I receive the data
 593              call timab(48,1,tsec)
 594              if ( iband == 1 ) then
 595                if (action==2) then
 596                  call xmpi_exch(kg(:,1+ikg:npw_k+ikg),3*npw_k,mpi_enreg%proc_distrb(ikpt,iband,isppol) &
 597 &                 ,kg_disk,nmaster,spaceComm,ierr)
 598                else
 599                  call xmpi_exch(kg_disk,3*npw_k,mpi_enreg%proc_distrb(ikpt,iband,isppol)  &
 600 &                 ,kg_disk,nmaster,spaceComm,ierr)
 601                end if
 602              end if       ! iband =1
 603              ipwnbd=(iband-1)*npw_k*nspinor
 604              if (action==2)then
 605                call xmpi_exch( cg(:,ipwnbd+icg+1:ipwnbd+icg+npw_k*nspinor),2*npw_k*nspinor &
 606 &               ,mpi_enreg%proc_distrb(ikpt,iband,isppol)                    &
 607 &               ,cg_disk(:,ipwnbd+1:ipwnbd+npw_k*nspinor),nmaster,spaceComm,ierr)
 608              else
 609                call xmpi_exch( cg_disk(:,ipwnbd+1:ipwnbd+npw_k*nspinor),2*npw_k*nspinor    &
 610 &               ,mpi_enreg%proc_distrb(ikpt,iband,isppol)                    &
 611 &               ,cg_disk(:,ipwnbd+1:ipwnbd+npw_k*nspinor),nmaster,spaceComm,ierr)
 612              end if
 613              call timab(48,2,tsec)
 614            end if        ! action=2 or action=3
 615            if(iomode ==IO_MODE_MPI) then
 616 !            I have the data in cg or cg_disk
 617              swrite=.false.
 618              if (nmaster == me) then
 619                swrite=.true.
 620              end if
 621            end if
 622 !          End of loop over bands
 623          end do
 624 !        End of paralbd=1
 625        end if
 626      end if
 627 #endif
 628 
 629 !    The wavefunctions for the present k point and spin are stored into cg_tmp
 630      if(am_master)then
 631 #if defined HAVE_MPI
 632        cg_tmp(:,icg+1:icg+nband_k_*npw_k*nspinor)=cg_disk(:,:)
 633 #endif
 634      end if
 635 
 636      sskip=1
 637 #if defined HAVE_MPI
 638      if (dtset%usewvl == 0) then
 639        sskip=0
 640        if(.not.(proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,1,nband_k_,isppol,me)))sskip=1
 641      end if
 642 #endif
 643      if(sskip==1)then
 644        icg=icg+npw_k*nspinor*nband_k_
 645        ikg=ikg+npw_k
 646      end if
 647 
 648    end do ! ikpt
 649  end do ! isppol
 650  ABI_DEALLOCATE(kg_disk)
 651 #if defined HAVE_MPI
 652  ABI_DEALLOCATE(cg_disk)
 653 #endif
 654 !!!!!!!end of collecting wavefunction to master!!!!!!
 655 
 656 
 657  if(am_master)then
 658 !  -----------------------------------------------------------
 659 !  The disk access is only done by master...
 660 
 661    ABI_ALLOCATE(gbound,(2*mgfftdiel+8,2))
 662    ABI_ALLOCATE(kg_k,(3,npw_k))
 663 
 664    ikpt=1
 665 !  Only one k point
 666 !  do isppol=1,nsppol
 667    kg_k(:,1:npw_k)=kg(:,1:npw_k)
 668    call sphereboundary(gbound,istwf_k,kg_k,mgfftdiel,npw_k)
 669 !  enddo
 670 
 671  end if ! am_master
 672 
 673 !need to run in MPI I/O case
 674  if ( save_iomode == 1 )  wffnew%iomode = IO_MODE_MPI
 675 !call wrtout(std_out,'After reading the wavefunction','COLL')
 676 
 677 !Use a simple implementation for the computation of the kernel elements
 678  if (am_master) then
 679    ABI_ALLOCATE(cwavef,(2,mpw))
 680    ABI_ALLOCATE(rhoaug,(ndiel4,ndiel5,ndiel6))
 681    ABI_ALLOCATE(wfraug,(2,ndiel4,ndiel5,ndiel6))
 682  end if
 683  ABI_ALLOCATE(index_state,(nstate_k))
 684 
 685 ! all real-space states are kept in memory
 686  ABI_ALLOCATE(wfrspa,(ndiel4,ndiel5,ndiel6,nstate_win))
 687 
 688 !DEBUG
 689 !write(message,'(a)') 'After allocating wfrspa'
 690 !call wrtout(std_out,message,'COLL')
 691 !ENDDEBUG
 692 
 693  weight=zero
 694 
 695 !Generate states in real space, only for states contributing to excitations in window
 696  istate=0
 697 
 698  do isppol=1,nsppol
 699    do iband=1,nband_k(isppol)
 700 
 701      if(flag_state_win(iband+(isppol-1)*nband_k(1)) == 1) then
 702        istate=istate+1
 703        index_state(iband+(isppol-1)*nband_k(1))=istate
 704 
 705        if (am_master) then
 706 #if defined HAVE_MPI
 707 !        Obtain Fourier transform in fft box
 708          cwavef(:,1:npw_k)=cg_tmp(:,1+(iband-1)*npw_k+(isppol-1)* &
 709 &         (npw_k*nband_k(1)) : iband*npw_k+(isppol-1)*(npw_k*nband_k(1)))
 710 #else
 711          cwavef(:,1:npw_k)=cg(:,1+(iband-1)*npw_k+(isppol-1)* (npw_k*nband_k(1)) : iband*npw_k+(isppol-1)*(npw_k*nband_k(1)))
 712 #endif
 713 
 714 !        DEBUG
 715 !        write(std_out,*)' iband : ',iband, ' isppol', isppol, '  -> index ', &
 716 !        &            istate,index_state(iband+(isppol-1)*nband_k(1))
 717 !        ENDDEBUG
 718 
 719          tim_fourwf=14
 720 !        This call should be made by master, and then the results be sent to the other procs
 721 
 722          call fourwf(1,rhoaug,cwavef,dummy,wfraug,gbound,gbound,&
 723 &         istwf_k,kg_k,kg_k,mgfftdiel,mpi_enreg,1,ngfftdiel,npw_k,1,ndiel4,ndiel5,ndiel6,&
 724 &         0,dtset%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=dtset%use_gpu_cuda)
 725 
 726 !        DEBUG
 727 !        write(std_out,'(a,i5)')' After Fourier proc ',me_loc
 728 !        ENDDEBUG
 729 
 730 !        Fix the phase, and checks that the wavefunction is real
 731 !        (should be merged with routine fxphas)
 732          saa=zero ; sab=zero ; sbb=zero
 733          do i3=1,ndiel3
 734            do i2=1,ndiel2
 735              do i1=1,ndiel1
 736                saa=saa+wfraug(1,i1,i2,i3)**2
 737                sbb=sbb+wfraug(2,i1,i2,i3)**2
 738                sab=sab+wfraug(1,i1,i2,i3)*wfraug(2,i1,i2,i3)
 739              end do
 740            end do
 741          end do
 742 
 743          if(sbb>5.0d-9)then
 744 
 745            write(message, '(a,a,a,es20.10,a,i4,a,i2,a)' )&
 746 &           'The imaginary part of wavefunctions should be practically zero.',ch10,&
 747 &           'This is not the case, since sbb=',sbb,' for iband=',iband,'with sppol=',1,'.'
 748            MSG_WARNING(message)
 749            if(sbb>1.0d-7)then
 750              MSG_ERROR("sbb>1.0d-7")
 751            end if
 752          end if
 753 
 754 !        Possibility of writing to disk
 755 
 756          wfrspa(:,:,:,istate)=wfraug(1,:,:,:)
 757        end if !am_master
 758 
 759      end if
 760 
 761 !    End loop on iband
 762    end do
 763 !  End loop on nsppol
 764  end do
 765 
 766  if (am_master) then
 767    ABI_DEALLOCATE(gbound)
 768    ABI_DEALLOCATE(kg_k)
 769    ABI_DEALLOCATE(cwavef)
 770    ABI_DEALLOCATE(rhoaug)
 771    ABI_DEALLOCATE(wfraug)
 772 #if defined HAVE_MPI
 773    ABI_DEALLOCATE(cg_tmp)
 774 #else
 775 #endif
 776  end if
 777  ABI_DEALLOCATE(flag_state_win)
 778 
 779 ! send wfrspa from master to world
 780  call xmpi_bcast(wfrspa,master,spaceComm,ierr)
 781  !call MPI_BCAST(wfrspa,nbuf,MPI_DOUBLE_PRECISION,master,spaceComm,ierr)
 782 
 783 
 784 !DEBUG
 785 !#if defined HAVE_MPI
 786 !call xmpi_barrier(spaceComm)
 787 !write(message,'(a)')' after iband loop synchronization done...'
 788 !call  wrtout(std_out,message,'COLL')
 789 !#endif
 790 !ENDDEBUG
 791 
 792 !Compute the xc kernel, in the form needed for the singlet or triplet
 793 !excitation energy.
 794 !In the case ixc=20, kxc vanishes, but no change is made here, for simplicity.
 795 !(ixc=20 implemented only in the not spin polyrized case)
 796 
 797 !DEBUG
 798 !write(std_out,*)' tddft : xc kernel '
 799 !do ifft=1,nkxc,41
 800 !write(std_out,*)ifft,kxc(ifft,1),kxc(ifft,2),kxc(ifft,3)
 801 !enddo
 802 !stop
 803 !ENDDEBUG
 804 
 805  ABI_ALLOCATE(kxc_for_tddft,(ndiel1,ndiel2,ndiel3,nsppol,nsppol,2/nsppol))
 806  if(dtset%ixc/=22)then
 807    do isppol=1,nsppol
 808      do jsppol=1,nsppol
 809        index=1
 810        do i3=1,ndiel3
 811          do i2=1,ndiel2
 812            do i1=1,ndiel1
 813              do sing_trip=1,2/nsppol
 814                kxc_for_tddft(i1,i2,i3,isppol,jsppol,sing_trip)=two/nsppol* &
 815 &               (kxc(index,isppol+jsppol-1)-(sing_trip-1)*kxc(index,2))
 816              end do
 817              index=index+1
 818            end do
 819          end do
 820        end do
 821      end do
 822    end do
 823  else
 824 !  This is for the Burke-Petersilka-Gross hybrid, with ixc=22
 825 !  However, the implementation in case of spin-polarized system should not be expected to be the correct one !
 826    do isppol=1,nsppol
 827      do jsppol=1,nsppol
 828        index=1
 829        do i3=1,ndiel3
 830          do i2=1,ndiel2
 831            do i1=1,ndiel1
 832              do sing_trip=1,2/nsppol
 833                kxc_for_tddft(i1,i2,i3,isppol,jsppol,sing_trip)=((-1)**(sing_trip+1))*kxc(index,2)
 834              end do
 835              index=index+1
 836            end do
 837          end do
 838        end do
 839      end do
 840    end do
 841  end if
 842 
 843  pole_approx=0
 844 
 845  ABI_ALLOCATE(excit_coords,(nexcit_win**2,2))
 846 
 847 #if defined HAVE_MPI
 848  ABI_ALLOCATE(counts,(0:nproc_loc-1))
 849  ABI_ALLOCATE(displs,(0:nproc_loc-1))
 850  ABI_ALLOCATE(recvcounts,(0:nproc_loc-1))
 851  ABI_ALLOCATE(recvbuf,(5-nsppol,nproc_loc-1))
 852 #endif
 853 
 854 !DEBUG
 855 !write(std_out,*)'before first loop'
 856 !ENDDEBUG
 857 
 858 !0000000000000000000000000000000000000000000000000000000
 859 !check if matrix file fname_tdexcit exists on disk
 860 !if the file is present,calculation is a continuation
 861  if (am_master) then
 862    inquire(file=trim(dtfil%fnametmp_tdexcit),exist=file_exist)
 863 !  for direct access to excitation file
 864    if(nsppol==1)then
 865      inquire(iolength=recl) omega_tddft_casida(1,1,1,1,1,1,1),omega_tddft_casida(1,1,1,1,1,1,1), iexcit,jexcit
 866    else
 867      inquire(iolength=recl) omega_tddft_casida(1,1,1,1,1,1,1), iexcit,jexcit
 868    end if
 869 
 870    temp_unit2 = get_unit()
 871    open(temp_unit2, file=trim(dtfil%fnametmp_tdexcit),form='unformatted', recl=recl, access='DIRECT')
 872 
 873    ABI_ALLOCATE(done_excit,(nexcit_win,nexcit_win))
 874 
 875    if(file_exist)then
 876      write(std_out,*)'TDDFT continues from a previous run'
 877      rec=0
 878      do iexcit=1,nexcit_win
 879        iexcit2 = indarr(iexcit)
 880        isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
 881        iunocc2 = (iexcit2-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
 882        iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
 883        do jexcit=1,nexcit_win
 884          iexcit1 = indarr(jexcit)
 885          isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
 886          iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
 887          iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
 888 
 889          rec=rec+1 ! record of the entry in the excitation file
 890          if(nsppol==1)then
 891            read(temp_unit2,rec=rec) omega_tddft_casida_dummy(1), omega_tddft_casida_dummy(2), idummy, jdummy
 892          else
 893            read(temp_unit2,rec=rec) omega_tddft_casida_dummy(1), idummy, jdummy
 894          end if
 895          done_excit(jexcit,iexcit)= ( idummy /= -1 .and. jdummy /= -1 ) ! if true, eigsqr_singlet and eigsqr_triplet are ok
 896 !        and a true is marked in the logical array done_excit
 897          if (done_excit(jexcit,iexcit)) then
 898            do sing_trip=1,2/nsppol
 899              omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)= &
 900 &             omega_tddft_casida_dummy(sing_trip)
 901            end do
 902          end if
 903        end do
 904      end do
 905 
 906    else
 907      write(std_out,*)'no excitation matrix on disk'
 908      write(std_out,*)'TDDFT starts from scratch'
 909      done_excit = .false. ! initialize the logical array to false
 910      rec=0
 911      do iexcit=1,nexcit_win
 912        do jexcit=1,nexcit_win
 913          rec=rec+1
 914          if(nsppol==1)then
 915            write(temp_unit2,rec=rec) zero,zero,-1,-1
 916          else
 917            write(temp_unit2,rec=rec) zero,-1,-1
 918          end if
 919        end do
 920      end do
 921    end if
 922 
 923 !  Need to list the elements to compute, taking the symmetry into account: valid only for Gamma point but this is already the case
 924    count_to_do=0
 925    do iexcit=1,nexcit_win
 926      do jexcit=1,iexcit
 927        if (.not. done_excit(jexcit,iexcit)) then
 928          count_to_do=count_to_do+1
 929          excit_coords(count_to_do,1)=iexcit
 930          excit_coords(count_to_do,2)=jexcit
 931        end if
 932      end do
 933    end do
 934 
 935    ABI_DEALLOCATE(done_excit)
 936 
 937 
 938 
 939 #if defined HAVE_MPI
 940 !  Compute limits for load balancing
 941    do iproc=0,nproc_loc-1
 942      displs(iproc)=(iproc*count_to_do)/nproc_loc
 943      counts(iproc)=min(((iproc+1)*count_to_do)/nproc_loc,count_to_do)-displs(iproc)
 944    end do
 945 #endif
 946 
 947  end if ! am_master
 948 
 949 
 950 #if defined HAVE_MPI
 951  call MPI_BCAST(count_to_do,1,MPI_INTEGER,master,spaceComm,ierr)
 952 #endif
 953 
 954  displ=(me_loc*count_to_do)/nproc_loc
 955  count=min(((me_loc+1)*count_to_do)/nproc_loc,count_to_do)-displ
 956  displmax=((nproc_loc-1)*count_to_do)/nproc_loc
 957  countmax=count_to_do-displmax
 958 
 959  write(message,'(A,I6)') 'Maximum number of matrix elements per processor = ',countmax
 960  call wrtout(std_out,message,'COLL')
 961 
 962 
 963 #if defined HAVE_MPI
 964 !Need to dispatch the elements to compute to the different processes
 965  ABI_ALLOCATE(tmpbuf,(nexcit_win**2))
 966  tmpbuf=0
 967  call MPI_Scatterv(excit_coords(1,1),counts,displs,MPI_INTEGER,tmpbuf,count,MPI_INTEGER,0,spaceComm,ierr)
 968  excit_coords(:,1)=tmpbuf(:)
 969  tmpbuf=0
 970  call MPI_Scatterv(excit_coords(1,2),counts,displs,MPI_INTEGER,tmpbuf,count,MPI_INTEGER,0,spaceComm,ierr)
 971  excit_coords(:,2)=tmpbuf(:)
 972  ABI_DEALLOCATE(tmpbuf)
 973 #endif
 974 
 975  nfftdiel=ndiel1*ndiel2*ndiel3
 976  ABI_ALLOCATE(wfprod,(ndiel1,ndiel2,ndiel3))
 977  ABI_ALLOCATE(work,(nfftdiel))
 978  ABI_ALLOCATE(sexc,(3,nexcit_win))
 979  ABI_ALLOCATE(done_sexc,(nexcit_win))
 980  ABI_ALLOCATE(done_sexc2,(nexcit_win))
 981  ABI_ALLOCATE(rhog,(2,nfftdiel))
 982  ABI_ALLOCATE(vhartr,(nfftdiel))
 983 
 984  sexc(:,:)=zero
 985  done_sexc(:)=.false.
 986 
 987 !----------------------------------------------------------
 988 !Main double loop
 989 
 990  old_iexcit=0
 991  do ijexcit=1,countmax
 992 !  we really loop only through count, but we need to go through countmax
 993 !  to make sure that all processes execute MPI_Gatherv below
 994    if (ijexcit <= count) then
 995      iexcit=excit_coords(ijexcit,1)
 996      jexcit=excit_coords(ijexcit,2)
 997 
 998      iexcit2 = indarr(iexcit)
 999      isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
1000      iunocc2 = (iexcit2-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
1001      iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
1002 
1003      iexcit1 = indarr(jexcit)
1004      isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
1005      iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
1006      iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
1007 
1008      if (old_iexcit /= iexcit) then
1009 !      We start a new column of the matrix
1010 !      DEBUG
1011 !      write(message,'(a,i5,a,i3)')'treating  iexcit =  ',iexcit,&
1012 !      &'  with proc ',me_loc
1013 !      call wrtout(std_out,message,'PERS')
1014 !      ENDDEBUG
1015 
1016 !      DEBUG
1017 !      write(message,'(a,i3)')'Multiplicating phi1 phi2, on proc ',me_loc
1018 !      call wrtout(std_out,message,'PERS')
1019 !      ENDDEBUG
1020 
1021        ifft=1
1022        do i3=1,ndiel3
1023          do i2=1,ndiel2
1024            do i1=1,ndiel1
1025              wfprod(i1,i2,i3)=wfrspa(i1,i2,i3,index_state(iocc2+(isppol2-1)*nband_k(1))) &
1026 &             *wfrspa(i1,i2,i3,index_state(iunocc2+nband_occ(isppol2)+(isppol2-1)*nband_k(1)))
1027              work(ifft)=wfprod(i1,i2,i3)
1028              ifft=ifft+1
1029            end do
1030          end do
1031        end do
1032        if (jexcit == 1) then
1033          do i3=1,ndiel3
1034            do i2=1,ndiel2
1035              do i1=1,ndiel1
1036                do idir=1,3
1037                  poscart(idir)=rprimd(idir,1)*pos(i1,1)+&
1038 &                 rprimd(idir,2)*pos(i2,2)+&
1039 &                 rprimd(idir,3)*pos(i3,3)
1040                  sexc(idir,iexcit)=sexc(idir,iexcit)+poscart(idir)*wfprod(i1,i2,i3)
1041                end do
1042              end do
1043            end do
1044            done_sexc(iexcit)=.true.
1045          end do
1046        end if
1047 
1048 !      For the singlet correction, must compute the hartre potential created
1049 !      by the product of wavefunctions
1050        cplex=1 ; qphon(:)=zero
1051 
1052 !      DEBUG
1053 !      write(message,'(a,i3)')'Before Fourdp, on proc ',me_loc
1054 !      call wrtout(std_out,message,'PERS')
1055 !      ENDDEBUG
1056 
1057        call fourdp(cplex,rhog,work,-1,mpi_enreg,nfftdiel,ngfftdiel,dtset%paral_kgb,0)
1058 
1059 !      DEBUG
1060 !      write(message,'(a,i3)')'Before Hartree, on proc ',me_loc
1061 !      call wrtout(std_out,message,'PERS')
1062 !      write(std_out,*)'CPU ',me_loc,ch10,&
1063 !      &            '   cplex : ',cplex,ch10,&
1064 !      &            '   gmet(3,3)  : ',gmet(3,3),ch10,&
1065 !      &            '   gsqcut : ',gsqcut,ch10,&
1066 !      &            '   qphon(1) : ',qphon(1),ch10,&
1067 !      &            '   rhog(1,1) :,',rhog(1,1),ch10,&
1068 !      &            '   vhartr(1) :,',vhartr(1)
1069 !      ENDDEBUG
1070 
1071        call hartre(cplex,gmet,gsqcut,0,mpi_enreg,nfftdiel,ngfftdiel,dtset%paral_kgb,qphon,rhog,vhartr)
1072 
1073 !      DEBUG
1074 !      write(message,'(a,i3)')'After Hartree, on proc ',me_loc
1075 !      call wrtout(std_out,message,'PERS')
1076 !      ENDDEBUG
1077      end if
1078      old_iexcit=iexcit
1079 
1080 !    DEBUG
1081 !    write(std_out,*)'  treating  iexcit =  ',jexcit
1082 !    write(std_out,*)'   indarr(iexcit) =',iexcit1,iocc1,iunocc1
1083 !    write(std_out,*)'   index ',index_state(iocc1+(isppol1-1)*nband_k(1)), &
1084 !    &                       index_state(iunocc1+nband_occ(isppol1)+(isppol1-1)*nband_k(1))
1085 !    ENDDEBUG
1086 
1087      if(pole_approx==0 .or. (iunocc1==iunocc2 .and. iocc1==iocc2 .and. isppol1==isppol2))then
1088        sum_kernel(:)=zero
1089        f_sing_trip(1)=two/dble(nsppol)
1090        if(nsppol==1) f_sing_trip(2)=zero
1091 !      For Fermi-Amaldi kxc, the xc contribution is -1/2 the Hartree contribution
1092 !      to the triplet state. The following factors combines both contributions.
1093        if(dtset%ixc==20 .or. dtset%ixc==22)then
1094          if(nsppol==1)then
1095            f_sing_trip(1)= one
1096            f_sing_trip(2)=-one
1097          end if
1098        end if
1099        ifft=1
1100        do i3=1,ndiel3
1101          do i2=1,ndiel2
1102            do i1=1,ndiel1
1103              myproduct=wfrspa(i1,i2,i3,index_state(iocc1+(isppol1-1)*nband_k(1))) &
1104 &             *wfrspa(i1,i2,i3,index_state(iunocc1+nband_occ(isppol1)+(isppol1-1)*nband_k(1)))
1105              do sing_trip=1,2/nsppol
1106                sum_kernel(sing_trip)=sum_kernel(sing_trip)+&
1107 &               myproduct*(f_sing_trip(sing_trip)*vhartr(ifft)+kxc_for_tddft(i1,i2,i3,isppol1,isppol2,sing_trip) &
1108                *wfprod(i1,i2,i3))
1109              end do
1110              ifft=ifft+1
1111            end do ! i1
1112          end do ! i2
1113        end do ! i3
1114 
1115 !      The factor two is coherent with the formulas of Vasiliev et al
1116        factor=two*sqrtks(iexcit1)*sqrtks(iexcit2)*normint
1117        do sing_trip=1,2/nsppol
1118          omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)=   &
1119 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)+  &
1120 &         factor*sum_kernel(sing_trip)
1121        end do
1122 
1123 !      End condition of being diagonal element if pole approximation
1124      end if
1125 
1126 !    Continue writing excitation matrix
1127      if (am_master) then
1128 !      the master writes its results to disk
1129        if(nsppol==1)then
1130          write(temp_unit2, rec=(iexcit-1)*nexcit_win+jexcit ) &
1131 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1),&
1132 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2),&
1133 &         iexcit, jexcit
1134        else
1135          write(temp_unit2, rec=(iexcit-1)*nexcit_win+jexcit ) &
1136 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1137 &         iexcit, jexcit
1138        end if
1139 
1140 !      DEBUG
1141 !      if(nsppol==1)then
1142 !      write(std_out,*)'singlet: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1),&
1143 !      &             'iexcit: ',iexcit,'jexcit :',jexcit
1144 !      write(std_out,*)'triplet: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2),&
1145 !      &             'iexcit: ',iexcit,'jexcit :',jexcit
1146 !      else
1147 !      write(std_out,*)'excitation: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1),&
1148 !      &             'iexcit: ',iexcit,'jexcit :',jexcit
1149 !      endif
1150 !      ENDDEBUG
1151 
1152        sendcount=0
1153      else
1154        sendcount=5-nsppol
1155 
1156        if(nsppol==1)then
1157          sendbuf=(/ omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1158 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2), &
1159 &         real(iexcit,dp), real(jexcit,dp) /)
1160        else
1161          sendbuf=(/ omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1162 &         real(iexcit,dp), real(jexcit,dp) /)
1163        end if
1164      end if ! am_master
1165    else
1166 !    ijexcit > count
1167 
1168 !    done with local work, so send message of zero length
1169      sendcount=0
1170 
1171    end if ! ijexcit <= count
1172 
1173 #if defined HAVE_MPI
1174    if (am_master) then
1175 
1176 !    Compute displacements and counts for the gathering of the results
1177      displs(0)=0
1178      recvcounts(0)=0
1179      do iproc=1,nproc_loc-1
1180        recvcounts(iproc)=min(((iproc+1)*count_to_do)/nproc_loc,count_to_do)-(iproc*count_to_do)/nproc_loc
1181        if (recvcounts(iproc) < countmax .and. ijexcit==countmax) then
1182          recvcounts(iproc)=0
1183        else
1184          recvcounts(iproc)=5-nsppol
1185        end if
1186        displs(iproc)=displs(iproc-1)+recvcounts(iproc-1)
1187      end do
1188    end if
1189 
1190 !  ***********************************************
1191 !  ***** I have to ask about that ****************
1192 !  ***********************************************
1193    call MPI_Gatherv(sendbuf,sendcount,MPI_DOUBLE_PRECISION,recvbuf,recvcounts,displs, &
1194 &   MPI_DOUBLE_PRECISION,0,spaceComm,ierr)
1195 
1196    if (am_master) then
1197 
1198 !    Extract eigsqr_singlet, eigsqr_triplet, iexcit, jexcit from receive buffer and
1199 !    write to file
1200      do ijexcit2=1,sum(recvcounts)/(5-nsppol)
1201        iexcit=int(recvbuf(4-nsppol,ijexcit2))
1202        jexcit=int(recvbuf(5-nsppol,ijexcit2))
1203 
1204        iexcit2 = indarr(iexcit)
1205        isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
1206        iunocc2 = (iexcit2-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
1207        iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
1208 
1209        iexcit1 = indarr(jexcit)
1210        isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
1211        iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
1212        iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
1213 
1214        do sing_trip=1,2/nsppol
1215          omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)= &
1216 &         recvbuf(sing_trip,ijexcit2)
1217        end do
1218 
1219        if(nsppol==1)then
1220          write(temp_unit2, rec=(iexcit-1)*nexcit_win+jexcit ) &
1221 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1222 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2), &
1223 &         iexcit, jexcit
1224        else
1225          write(temp_unit2, rec=(iexcit-1)*nexcit_win+jexcit ) &
1226          omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1227 &         iexcit, jexcit
1228        end if
1229 !      DEBUG
1230 !      if(nsppol==1)then
1231 !      write(std_out,*)'singlet: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1232 !      &                       'iexcit: ',iexcit,'jexcit :',jexcit
1233 !      write(std_out,*)'triplet: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2),
1234 !      &                       'iexcit: ',iexcit,'jexcit :',jexcit
1235 !      else
1236 !      write(std_out,*)'excitation: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1237 !      &                        'iexcit: ',iexcit,'jexcit :',jexcit
1238 !      endif
1239 !      ENDDEBUG
1240 
1241      end do
1242 
1243    end if
1244 #endif
1245 
1246 !  End indices loops
1247  end do ! ijexcit
1248 
1249 !End of the main double loop
1250 !--------------------------------------------------------------------
1251 
1252 
1253 #if defined HAVE_MPI
1254 !sexc needs to be summed here since it used only by master
1255  call xmpi_barrier(spaceComm)
1256 !call xmpi_sum_master(sexc,master,spaceComm,ierr) ! Does not work on some machines
1257  call xmpi_sum(sexc,spaceComm,ierr)
1258  done_sexc2=done_sexc
1259  call MPI_Reduce(done_sexc2,done_sexc,nexcit_win,MPI_LOGICAL,MPI_LOR,master,spaceComm,ierr)
1260 #endif
1261 
1262 
1263  if (am_master) then
1264 !  We compute sexc again if it was not done. Will only be executed if
1265 !  there was a restart from values read from logical unit temp_unit2.
1266 
1267    do iexcit=1,nexcit_win
1268 
1269 !    DEBUG
1270 !    write(std_out,*)'do on excitation',iexcit
1271 !    END DEBUG
1272 
1273      if (.not.done_sexc(iexcit)) then
1274        iexcit2 = indarr(iexcit)
1275        isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
1276        iunocc2 = (iexcit1-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
1277        iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
1278        do i3=1,ndiel3
1279          do i2=1,ndiel2
1280            do i1=1,ndiel1
1281              wfprod(i1,i2,i3)=wfrspa(i1,i2,i3,index_state(iocc2+(isppol2-1)*nband_k(1))) &
1282 &             *wfrspa(i1,i2,i3,index_state(iunocc2+nband_occ(isppol2)+ (isppol2-1)*nband_k(1)))
1283              do idir=1,3
1284                poscart(idir)=rprimd(idir,1)*pos(i1,1)+&
1285 &               rprimd(idir,2)*pos(i2,2)+&
1286 &               rprimd(idir,3)*pos(i3,3)
1287                sexc(idir,iexcit)=sexc(idir,iexcit)+poscart(idir)*wfprod(i1,i2,i3)
1288              end do
1289            end do
1290          end do
1291        end do
1292      end if
1293    end do
1294  end if
1295 
1296  ABI_DEALLOCATE(work)
1297  ABI_DEALLOCATE(rhog)
1298  ABI_DEALLOCATE(pos)
1299  ABI_DEALLOCATE(vhartr)
1300  ABI_DEALLOCATE(kxc_for_tddft)
1301  ABI_DEALLOCATE(wfprod)
1302  ABI_DEALLOCATE(index_state)
1303  ABI_DEALLOCATE(excit_coords)
1304  ABI_DEALLOCATE(wfrspa)
1305 
1306 !Write the first excitation energies
1307  write(message, '(a,a,es18.8,a,a,a,a,a,a,a,a,a)' )ch10,&
1308 & '  Ground state total energy (Ha) :',etotal,ch10,ch10,&
1309 & '  Kohn-Sham energy differences,',ch10,&
1310 & '  corresponding total energies and oscillator strengths (X,Y,Z and average)-',ch10,&
1311 & '  (oscillator strengths smaller than 1.e-6 are set to zero)',ch10,&
1312 & '  Transition  (Ha)  and   (eV)   Tot. Ene. (Ha)  Aver     XX       YY       ZZ'
1313  call wrtout(ab_out,message,'COLL')
1314  call wrtout(std_out,message,'COLL')
1315 
1316 #if defined HAVE_MPI
1317  ABI_DEALLOCATE(counts)
1318  ABI_DEALLOCATE(displs)
1319  ABI_DEALLOCATE(recvbuf)
1320  ABI_DEALLOCATE(recvcounts)
1321 #endif
1322 
1323  if (am_master) then
1324 
1325    ABI_ALLOCATE(osc_str,(7,nexcit))
1326 
1327    do iexcit=1,nexcit_win
1328      iexcit2 = indarr(iexcit)
1329      isppol = min((iexcit2-1)/nexcit_pol(1) +1,2)
1330      iunocc = (iexcit2-(isppol-1)*nexcit_pol(1)-1)/nband_occ(isppol)+1
1331      iocc   = iexcit2-(isppol-1)*nexcit_pol(1)-(iunocc-1)*nband_occ(isppol)
1332 
1333      osc_str(1,iexcit)=zero
1334      do idir=1,3
1335 !      One of the factor of two comes from the spin degeneracy,
1336 !      the other comes from Eq.(40) of Casida
1337        osc_str(idir+1,iexcit)=&
1338 &       (sexc(idir,iexcit)*sqrtks(iexcit2)*normint*ucvol)**2*two*two/nsppol
1339        osc_str(1,iexcit)=osc_str(1,iexcit)&
1340 &       +osc_str(idir+1,iexcit)*third
1341      end do
1342      do ii=1,4
1343        if(abs(osc_str(ii,iexcit))<tol6)osc_str(ii,iexcit)=zero
1344      end do
1345 !    Changed, the whole spectrum is written
1346 !    The array eexcit has been reordered previously, the others also
1347      if(nsppol==1)then
1348        write(message, '(i4,a,i3,2es12.5,es13.5,es11.4,3es9.2)' ) &
1349 &       iocc,'->',iunocc+nband_occ(isppol),            &
1350 &       eexcit(iexcit), eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal, &
1351 !      XG 020209 : Jean-Yves, I assume that the printout of sexc is for debugging ?!
1352 !      &   osc_str(1:4,iexcit),sexc(1:3,iexcit)
1353 &       osc_str(1:4,iexcit)
1354      else
1355        write(message, '(i4,a,i3,a,i1,2es12.5,es13.5,es11.4,3es9.2)' ) &
1356 &       iocc,'->',iunocc+nband_occ(isppol),' s:',isppol,            &
1357 &       eexcit(iexcit), eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal, &
1358 &       osc_str(1:4,iexcit)
1359      end if
1360      call wrtout(ab_out,message,'COLL')
1361      call wrtout(std_out,message,'COLL')
1362 
1363    end do
1364 
1365 !  Check of the Sum rule for Casida eq.47,
1366 !  only exact if complete basis of excitations, as well as local potentials only.
1367    sumx=zero
1368    do iexcit=1,nexcit_win
1369      sumx=sumx+osc_str(1,iexcit)
1370    end do
1371    write(message, '(a,es16.6)' )'  Sum of osc. strength : ',sumx
1372    call wrtout(ab_out,message,'COLL')
1373    call wrtout(std_out,message,'COLL')
1374 
1375 !  -Diagonalize the excitation matrices----------------------------
1376 
1377    ABI_ALLOCATE(eexcit2,(nexcit_win))
1378    ABI_ALLOCATE(vec,(2,nexcit_win,nexcit_win))
1379 
1380    do sing_trip=1,2/nsppol
1381 
1382      if(pole_approx==0)then
1383 
1384        ABI_ALLOCATE(matr,(nexcit_win*(nexcit_win+1)))
1385        ABI_ALLOCATE(zhpev1,(2,2*nexcit_win-1))
1386        ABI_ALLOCATE(zhpev2,(3*nexcit_win-2))
1387        matr(:)=zero
1388        ier=0
1389 !      DEBUG
1390 !      write(std_out,*)' after allocation matrices     '
1391 !      ENDDEBUG
1392 
1393 
1394 !      Store the matrix in proper mode before calling zhpev
1395 
1396        index=1
1397        do iexcit=1,nexcit_win
1398          iexcit2 = indarr(iexcit)
1399          isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
1400          iunocc2 = (iexcit2-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
1401          iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
1402          do jexcit=1,iexcit
1403            iexcit1 = indarr(jexcit)
1404            isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
1405            iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
1406            iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
1407 
1408            matr(index)=omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)
1409            matr(index+1)=zero
1410            index=index+2
1411          end do
1412 
1413        end do
1414 
1415 !      DEBUG
1416 !      write(std_out,*)' after filling    matrices     '
1417 !      ENDDEBUG
1418 
1419        call ZHPEV ('V','U',nexcit_win,matr,eexcit2,vec,nexcit_win,zhpev1,&
1420 &       zhpev2,ier)
1421 
1422        ABI_DEALLOCATE(matr)
1423        ABI_DEALLOCATE(zhpev1)
1424        ABI_DEALLOCATE(zhpev2)
1425 !      DEBUG
1426 !      write(std_out,*)' after deallocating matrices     '
1427 !      ENDDEBUG
1428 
1429 
1430      else
1431 
1432        vec(:,:,:)=zero
1433        do isppol=1, nsppol
1434          do iunocc=1,nband_k(isppol)
1435            do iocc=1,nband_k(isppol)
1436              index=iocc+nband_k(isppol)*(iunocc-1)+(isppol-1)*nexcit_pol(1)
1437              eexcit2(index)=omega_tddft_casida(iocc,iunocc,isppol,iocc,iunocc,isppol,sing_trip)
1438              vec(1,index,index)=one
1439            end do
1440          end do
1441        end do
1442 
1443      end if
1444 
1445 !    Compute the excitation energies from the square root of eexcit2
1446 !    eexcit(:)=sqrt(eexcit2(:)
1447 
1448      ABI_DEALLOCATE(eexcit)
1449      ABI_ALLOCATE(eexcit,(nexcit_win))
1450 
1451      eexcit(:)=sqrt(dabs(eexcit2(:))+tol10**2)
1452 !    Write the first excitation energies
1453      if(sing_trip==1)then
1454        if(nsppol==1)then
1455          write(message, '(a,a,a,a,a,a)' )ch10,&
1456 &         '  TDDFT singlet excitation energies (at most 20 of them are printed),',ch10,&
1457 &         '  and corresponding total energies.                ',ch10,&
1458 &         '  Excit#   (Ha)    and    (eV)    total energy (Ha)    major contributions '
1459        else
1460          write(message, '(a,a,a,a,a,a)' )ch10,&
1461 &         '  TDDFT   mixed excitation energies (at most 40 of them are printed),',ch10,&
1462 &         '  and corresponding total energies.                ',ch10,&
1463 &         '  Excit#   (Ha)    and    (eV)    total energy (Ha)    major contributions '
1464        end if
1465      else
1466        write(message, '(a,a,a,a,a,a)' )ch10,&
1467 &       '  TDDFT triplet excitation energies (at most 20 of them are printed),',ch10,&
1468 &       '  and corresponding total energies.                ',ch10,&
1469 &       '  Excit#   (Ha)    and    (eV)    total energy (Ha)    major contributions '
1470      end if
1471      call wrtout(ab_out,message,'COLL')
1472      call wrtout(std_out,message,'COLL')
1473      call wrtout(std_out,' tddft : before iexcit loop',"COLL")
1474 
1475      do iexcit=1,min(nexcit_win,nexcitout)
1476        write(std_out,*)' tddft : iexcit=',iexcit
1477 !      Select largest and next contributions
1478        flargest=zero ; fnext=zero
1479        nlargest=0 ; nnext=0
1480        if(nsppol==2)then
1481          fnext1=zero  ;  fnext2=zero
1482          nnext1=0  ;  nnext2=0
1483        end if
1484        do jexcit=1,nexcit_win
1485          ff=vec(1,jexcit,iexcit)**2+vec(2,jexcit,iexcit)**2
1486          if(ff>flargest+tol12)then
1487            if(nsppol==2)then
1488              nnext2=nnext1  ; fnext2=fnext1
1489              nnext1=nnext   ; fnext1=fnext
1490            end if
1491            nnext=nlargest ; fnext=flargest
1492            nlargest=indarr(jexcit) ; flargest=ff
1493          else if(ff>fnext+tol12)then
1494            if(nsppol==2)then
1495              nnext2=nnext1 ; fnext2=fnext1
1496              nnext1=nnext  ; fnext1=fnext
1497            end if
1498            nnext=indarr(jexcit) ; fnext=ff
1499          else if(nsppol==2)then
1500            if(ff>fnext1+tol12)then
1501              nnext2=nnext1  ; fnext2=fnext1
1502              nnext1=indarr(jexcit) ; fnext1=ff
1503            else if(ff>fnext2+tol12)then
1504              nnext2=indarr(jexcit) ; fnext2=ff
1505            end if
1506          end if
1507 
1508        end do
1509 
1510        isppol_l = min((nlargest-1)/nexcit_pol(1) +1,nsppol)
1511        iunocc_l = (nlargest-(isppol_l-1)*nexcit_pol(1)-1)/nband_occ(isppol_l)+1
1512        iocc_l   = nlargest-(isppol_l-1)*nexcit_pol(1)-(iunocc_l-1)*nband_occ(isppol_l)
1513        isppol_n = min((nnext-1)/nexcit_pol(1) +1,nsppol)
1514        iunocc_n = (nnext-(isppol_n-1)*nexcit_pol(1)-1)/nband_occ(isppol_n)+1
1515        iocc_n   = nnext-(isppol_n-1)*nexcit_pol(1)-(iunocc_n-1)*nband_occ(isppol_n)
1516        if(nsppol==2)then
1517          isppol_n1 = min((nnext1-1)/nexcit_pol(1) +1,nsppol)
1518          iunocc_n1 = (nnext1-(isppol_n1-1)*nexcit_pol(1)-1)/nband_occ(isppol_n1)+1
1519          iocc_n1   = nnext1-(isppol_n1-1)*nexcit_pol(1)-(iunocc_n1-1)*nband_occ(isppol_n1)
1520          isppol_n2 = min((nnext2-1)/nexcit_pol(1) +1,nsppol)
1521          iunocc_n2 = (nnext2-(isppol_n2-1)*nexcit_pol(1)-1)/nband_occ(isppol_n2)+1
1522          iocc_n2   = nnext2-(isppol_n2-1)*nexcit_pol(1)-(iunocc_n2-1)*nband_occ(isppol_n2)
1523        end if
1524 
1525        if(nsppol==1)then
1526          write(message,'(i4,es15.5,es14.5,es16.6,f8.2,a,i3,a,i3,a,f6.2,a,i3,a,i3,a)') &
1527 &         iexcit,eexcit(iexcit),&
1528 &         eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal,&
1529 &         flargest,'(',iocc_l,'->',iunocc_l+nband_occ(1),')',&
1530 &         fnext,   '(',iocc_n,'->',iunocc_n+nband_occ(1),')'
1531          call wrtout(ab_out,message,'COLL')
1532          call wrtout(std_out,message,'COLL')
1533        else
1534          write(chain1,'(f8.2,a,i3,a,i3,a)')flargest,'(',iocc_l,'->',iunocc_l+nband_occ(isppol_l),')'
1535          write(chain2,'(f8.2,a,i3,a,i3,a)')fnext,'(',iocc_n,'->',iunocc_n+nband_occ(isppol_n),')'
1536          if(trim(chain1)==trim(chain2))then
1537            write(message,'(i4,es15.5,es14.5,es16.6,a,a,a,a)') &
1538 &           iexcit,eexcit(iexcit),&
1539 &           eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal,trim(chain1),'(1)',trim(chain2),'(2)'
1540          else
1541            write(message,'(i4,es15.5,es14.5,es16.6,a,a,i1,a,a,a,i1,a)') &
1542 &           iexcit,eexcit(iexcit),&
1543 &           eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal,trim(chain1),'(',isppol_l,')',&
1544 &           trim(chain2),'(',isppol_n,')'
1545          end if
1546          call wrtout(ab_out,message,'COLL')
1547          call wrtout(std_out,message,'COLL')
1548          write(chain1,'(f8.2,a,i3,a,i3,a)')fnext1,'(',iocc_n1,'->',iunocc_n1+nband_occ(isppol_n1),')'
1549          write(chain2,'(f8.2,a,i3,a,i3,a)')fnext2,'(',iocc_n2,'->',iunocc_n2+nband_occ(isppol_n2),')'
1550          if(trim(chain1)==trim(chain2))then
1551            write(message,'(a,a,a,a,a)' ) &
1552 &           '                                                 ',&
1553 &           chain1,'(1)',chain2,'(2)'
1554          else
1555            write(message,'(a,a,a,i1,a,a,a,i1,a)' ) &
1556 &           '                                                 ',&
1557 &           chain1,'(',isppol_n1,')',&
1558 &           chain2,'(',isppol_n2,')'
1559          end if
1560          call wrtout(ab_out,message,'COLL')
1561          call wrtout(std_out,message,'COLL')
1562        end if
1563      end do
1564 
1565 !    For each iexcit excitation, compute the oscillator strength (Casida, eq 47)
1566      write(message, '(a,a,a,a)' )ch10,&
1567 &     '  Oscillator strengths :  (elements smaller than 1.e-6 are set to zero)',ch10,&
1568 &     '  Excit#   (Ha)   Average    XX        YY        ZZ         XY        XZ        YZ'
1569      call wrtout(ab_out,message,'COLL')
1570      call wrtout(std_out,message,'COLL')
1571 
1572      do iexcit=1,nexcit_win
1573 
1574 !      One of the factor of two comes from the spin degeneracy,
1575 !      the other comes from Eq.(40) of Casida
1576        factor=(normint*ucvol)**2*two*two/nsppol
1577 
1578        osc_str(:,iexcit)=zero
1579 
1580 !      factor=(normint*ucvol)**2*two*two/nsppol
1581 
1582        do jexcit=1,nexcit_win
1583          jexcit_cbase=indarr(jexcit)
1584          do idir=1,3
1585            osc_str(idir+1,iexcit)=osc_str(idir+1,iexcit)+ &
1586 &           sexc(idir,jexcit)*sqrtks(jexcit_cbase)*sqrt(factor)* &
1587 &           vec(1,jexcit,iexcit)
1588          end do ! idir
1589        end do ! jexcit
1590 
1591 !      The "standard" definition of the oscillator strength is the square
1592 !      of the matrix elements.
1593 !      So, instead of the coding
1594 !      do idir=1,3
1595 !      osc_str(1,iexcit)=osc_str(1,iexcit)+osc_str(idir+1,iexcit)**2*third
1596 !      enddo
1597 !      I think that the following is more "standard"
1598 !      Now, osc_str(2:4,iexcit) are the X, Y and Z matrix elements, not
1599 !      yet the oscillator strengths
1600        osc_str(5,iexcit)=osc_str(2,iexcit)*osc_str(3,iexcit)   ! off diag XY
1601        osc_str(6,iexcit)=osc_str(2,iexcit)*osc_str(4,iexcit)   ! off diag XZ
1602        osc_str(7,iexcit)=osc_str(3,iexcit)*osc_str(4,iexcit)   ! off diag ZZ
1603        do idir=1,3
1604 !        Here the X,Y, and Z matrix elements are combined to give diagonal osc. strengths
1605          osc_str(idir+1,iexcit)=osc_str(idir+1,iexcit)**2
1606          osc_str(1,iexcit)=osc_str(1,iexcit)+osc_str(idir+1,iexcit)*third ! compute the trace
1607        end do
1608 !      At this stage, osc_str(1,iexcit) is exactly the same as from your coding
1609 !      ***End of section to be checked
1610 
1611        do ii=1,7
1612          if(abs(osc_str(ii,iexcit))<tol6)osc_str(ii,iexcit)=zero
1613        end do
1614 !      XG 020209 : Jean-Yves, the off-diagonal oscillator strengths
1615 !      can become negative. It is important for automatic
1616 !      checking that the numbers are separated by a blank, even
1617 !      if they are negative. So replace the following format, to have at least one blank.
1618        write(message, '(i4,es12.5,es10.3,3es10.3,3es10.2)' )iexcit,eexcit(iexcit),osc_str(1:7,iexcit)
1619        call wrtout(ab_out,message,'COLL')
1620        call wrtout(std_out,message,'COLL')
1621      end do
1622 
1623 !    Check of the Sum rule for Casida eq.47,
1624 !    only exact if complete basis of excitations, as well as local potentials only.
1625      sumx=zero
1626      do iexcit=1,nexcit_win
1627        sumx=sumx+osc_str(1,iexcit)
1628      end do
1629      write(message, '(a,es16.6)' )'  Sum of osc. strength : ',sumx
1630      call wrtout(ab_out,message,'COLL')
1631      call wrtout(std_out,message,'COLL')
1632 
1633 !    If singlet, compute Cauchy coefficients
1634      if(sing_trip==1.AND.nsppol==1)then
1635        cauchy(:)=zero
1636        do iexcit=1,nexcit_win
1637          fr_invsquare=one/(eexcit(iexcit)**2)
1638          fr_power=one
1639          do ii=1,7
1640            fr_power=fr_power*fr_invsquare
1641            cauchy(ii)=cauchy(ii)+osc_str(1,iexcit)*fr_power
1642          end do
1643        end do
1644        write(message, '(a,es11.3,a,es11.3,a,es11.3,a,a,es11.3,a,es11.3,a,es11.3,a,es11.3)' ) &
1645 &       '  Cauchy coeffs (au) : ( -2)->',cauchy(1),&
1646 &       ', ( -4)->',cauchy(2),', ( -6)->',cauchy(3),ch10,&
1647 &       '    (-8)->',cauchy(4),', (-10)->',cauchy(5),', (-12)->',cauchy(6),', (-14)->',cauchy(7)
1648        call wrtout(ab_out,message,'COLL')
1649        call wrtout(std_out,message,'COLL')
1650      end if
1651 
1652 !    End the loop on singlet or triplet
1653    end do
1654 
1655    ABI_DEALLOCATE(eexcit2)
1656    ABI_DEALLOCATE(vec)
1657    ABI_DEALLOCATE(osc_str)
1658 
1659 !! The temporary files should be deleted at the end of this routine
1660    close(temp_unit2,status='delete')
1661    call timab(95,2,tsec)
1662  end if  ! end of am_master
1663 
1664  ABI_DEALLOCATE(omega_tddft_casida)
1665  ABI_DEALLOCATE(eexcit)
1666  ABI_DEALLOCATE(sqrtks)
1667  ABI_DEALLOCATE(sexc)
1668  ABI_DEALLOCATE(done_sexc)
1669  ABI_DEALLOCATE(indarr)
1670  ABI_DEALLOCATE(done_sexc2)
1671 
1672 end subroutine tddft