TABLE OF CONTENTS


ABINIT/m_tddft [ Modules ]

[ Top ] [ Modules ]

NAME

  m_tddft

FUNCTION

  Routines for computing excitation energies within TDDFT

COPYRIGHT

 Copyright (C) 1999-2024 ABINIT group (XG, JYR, MB, MBELAND, SHAMEL)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

NOTES

SOURCE

19 #if defined HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include "abi_common.h"
24 
25 module m_tddft
26 
27  use defs_basis
28  use m_abicore
29  use m_xmpi
30  use m_errors
31  use m_wffile
32  use m_sort
33  use m_dtset
34  use m_dtfil
35  use m_xmpi, only : xmpi_bcast,xmpi_gatherv,xmpi_scatterv
36  use iso_c_binding, only : c_ptr,c_loc,c_f_pointer
37 
38  use defs_abitypes, only : MPI_type
39  use m_io_tools, only : get_unit
40  use m_symtk,    only : matr3inv
41  use m_time,     only : timab
42  use m_fftcore,  only : sphereboundary
43  use m_spacepar, only : hartre
44  use m_mpinfo,   only : proc_distrb_cycle
45  use m_fft,      only : fourwf, fourdp
46 
47  implicit none
48 
49  private

m_tddft/tddft [ Functions ]

[ Top ] [ m_tddft ] [ Functions ]

NAME

 tddft

FUNCTION

 Compute the excitation energies within TDLDA
 from input wavefunctions, eigenenergies, and band occupations.

INPUTS

  cg(2,mpw*nspinor*mband*mkmem*nsppol)=wf in G space
  dtfil <type(datafiles_type)>=variables related to files
  dtset <type(dataset_type)>=all input variables for this dataset
  eigen(mband*nkpt*nsppol)=array for holding eigenvalues (hartree)
  etotal=total energy of the ground-state (Ha)
  gmet(3,3)=metrix tensor in G space in Bohr**-2.
  gprimd(3,3)=dimensional reciprocal space primitive translations
  gsqcut=cutoff on (k+G)^2 (bohr^-2)
  kg(3,mpw*mkmem)=reduced planewave coordinates.
  kxc(nfft,nkxc)=exchange-correlation kernel
  mband=maximum number of bands
  mgfftdiel=maximum size of 1D FFTs, for the computation of the dielectric matrix
  mkmem=number of k-points treated by this node.
  mpi_enreg=information about MPI parallelization
  mpw=maximum allowed value for npw
  nfft=(effective) number of FFT grid points (for this processor)
       WARNING about parallelization: see below
  ngfftdiel(18)=contain all needed information about 3D FFT, for dielectric matrix,
                see ~abinit/doc/variables/vargs.htm#ngfft
  nkpt=number of k points
  nkxc=second dimension of the array kxc (see rhotoxc for a description)
  npwarr(nkpt)=number of planewaves at each k point
  nspinor=number of spinorial components of the wavefunctions
  nsppol=1 for unpolarized, 2 for spin-polarized
  occ(mband*nkpt*nsppol)=
          occupation numbers for each band (usually 2.0) at each k point
  ucvol=unit cell volume (Bohr**3)
  wffnew=unit number for current wf disk file

OUTPUT

  (only writing)

 WARNING:
 This routine should not be parallelized on space for the time being,
    because the already existing parallelisation is not the usual one, found
    in the majority of ABINIT routines.

NOTES

 * Only accept nspinor=1, nsppol=1, nkpt=1 (Gamma point), and occopt<3
   (insulating occupation numbers).
   It is expected to make it work for nsppol=2 in the future.

 * For the oscillator strengths, see the paper
   ''Time-Dependent Density Functional Response Theory of Molecular
     systems: Theory, Computational Methods, and Functionals'', by M.E. Casida,
   in Recent Developments and Applications of Modern Density Functional
   Theory, edited by J.M. Seminario (Elsevier, Amsterdam, 1996).

SOURCE

 120  subroutine tddft(cg,dtfil,dtset,eigen,etotal,gmet,gprimd,gsqcut,&
 121 &  kg,kxc,mband,mgfftdiel,mkmem,mpi_enreg,mpw,nfft,ngfftdiel,nkpt,nkxc,&
 122 &  npwarr,nspinor,nsppol,occ,ucvol,wffnew)
 123 
 124 !Arguments ------------------------------------
 125  integer, intent(in) :: mband,mgfftdiel,mkmem,mpw,nfft,nkpt,nkxc,nsppol
 126  integer, intent(in) :: nspinor
 127  real(dp), intent(in) :: etotal,gsqcut,ucvol
 128  type(datafiles_type), intent(in) :: dtfil
 129  type(dataset_type), intent(in) :: dtset
 130  type(MPI_type), intent(in) :: mpi_enreg
 131  type(wffile_type), intent(inout) :: wffnew
 132  integer, intent(in) :: kg(3,mpw*mkmem),ngfftdiel(18),npwarr(nkpt)
 133  real(dp), intent(in) :: cg(2,mpw*nspinor*mband*mkmem*nsppol),eigen(mband*nkpt*nsppol)
 134  real(dp), intent(in) :: gmet(3,3),gprimd(3,3),kxc(nfft,nkxc),occ(mband*nkpt*nsppol)
 135 
 136 !Local variables-------------------------------
 137  integer,parameter :: nexcitout=20
 138  integer :: cplex,i1,i2,i3,iband,idir,ier,ierr,iexcit,iexcit1,iexcit2,ifft
 139  integer :: old_iexcit,ii,jj,isppol,jsppol,isppol1,isppol2,isppol_l,isppol_n
 140  integer :: isppol_n1,isppol_n2,iocc_n1,iocc_n2,iunocc_n1,iunocc_n2,temp_unit2
 141  integer :: ikpt,index,iocc,iocc1,iocc2,iocc_l,iocc_n
 142  integer :: istwf_k,iunocc,iunocc1,iunocc2,iunocc_l,iunocc_n,istate,jexcit
 143  integer :: jexcit_cbase,master,mcg_disk,me_loc
 144  integer :: nband_k(nsppol), nband_occ(nsppol), nband_unocc(nsppol)
 145  integer :: nstate_k, nstate_occ, nstate_unocc, nexcit_pol(nsppol)
 146  integer :: nstate_win,ndiel,ndiel1,ndiel2,ndiel3,ndiel4
 147  integer :: ndiel5,ndiel6,nexcit,nexcit_max,nexcit_win,nfftdiel,nlargest,nnext
 148  integer :: nnext1,nnext2
 149  integer :: nproc_loc,npw_k,pole_approx,sing_trip,spaceComm,mtag,tim_fourwf
 150  integer :: tim_rwwf,save_iomode
 151  integer :: rec,recl,idummy,jdummy
 152  real(dp) :: buffer,buffer_inv,diffeig,eigunocc,emax_win
 153  real(dp) :: factor,ff,flargest,fnext,fr_invsquare,fr_power
 154  real(dp) :: fnext1,fnext2
 155  real(dp) :: normint,myproduct,saa,sab,sbb
 156  real(dp) :: sumx
 157  real(dp) :: sum_kernel(2/nsppol)
 158  real(dp) :: weight,xx
 159  logical :: am_master,file_exist
 160  logical, allocatable :: done_excit(:,:),done_sexc(:) !,done_sexc2(:)
 161  character(len=18) :: chain1,chain2
 162  character(len=500) :: message
 163  integer,allocatable :: flag_state_win(:),gbound(:,:),indarr(:),index_state(:)
 164  integer,allocatable :: kg_k(:,:)
 165  integer,allocatable :: excit_coords(:,:)
 166  integer :: count_to_do, count, displ, countmax, displmax
 167  integer :: ijexcit, ijexcit2, sendcount
 168  real(dp) :: f_sing_trip(2/nsppol),sendbuf(5-nsppol)
 169  real(dp) :: cauchy(7),poscart(3),rprimd(3,3),tsec(2),dummy(2,1)
 170  integer :: iomode,action,me,nmaster,sender,source,sread,sskip
 171  integer :: formeig,icg,ikg,nband_k_
 172  logical :: mydata, tmaster, swrite
 173  integer,allocatable ::  kg_disk(:,:)
 174  integer,allocatable :: counts(:),displs(:),recvcounts(:),tmpbuf(:)
 175  real(dp),allocatable :: cg_disk(:,:),cg_tmp(:,:)
 176  real(dp),allocatable :: cwavef(:,:),eexcit(:)
 177  real(dp),allocatable :: eexcit2(:)
 178  real(dp),allocatable :: matr(:)
 179  real(dp),allocatable :: kxc_for_tddft(:,:,:,:,:,:),omega_tddft_casida(:,:,:,:,:,:,:)
 180  real(dp),allocatable :: osc_str(:,:),pos(:,:),rhoaug(:,:,:),rhog(:,:)
 181  real(dp),allocatable :: sexc(:,:),sqrtks(:),vec(:,:,:),vhartr(:),wfprod(:,:,:)
 182  real(dp) :: omega_tddft_casida_dummy(2/nsppol)
 183  real(dp),allocatable :: wfraug(:,:,:,:),wfrspa(:,:,:,:),work(:),zhpev1(:,:)
 184  real(dp),allocatable :: zhpev2(:)
 185 
 186  integer :: iproc
 187  integer :: ipwnbd
 188  real(dp), allocatable,target :: recvbuf(:,:)
 189  real(dp),pointer :: recvbuf_ptr(:)
 190  type(c_ptr) :: cptr
 191 
 192 ! *************************************************************************
 193 
 194 !Init mpi_comm
 195  spaceComm=mpi_enreg%comm_cell
 196 
 197  am_master=.true.
 198  master = 0
 199  nproc_loc = xmpi_comm_size(spaceComm) !Init ntot proc max
 200  me_loc    = xmpi_comm_rank(spaceComm) !Define who i am
 201 
 202 #if defined HAVE_MPI
 203  if (me_loc/=0) then
 204    am_master=.FALSE.
 205  end if
 206  write(message, '(a,i3,a)' ) ' TDDFT ',nproc_loc,' CPU synchronized'
 207  call wrtout(std_out,message,'COLL')
 208  write(message, '(a,3D12.5,a,3D12.5,a,3D12.5)' ) ' gmet ',&
 209 & gmet(1,1),gmet(1,2),gmet(1,3),ch10,&
 210 & gmet(2,1),gmet(2,2),gmet(2,3),ch10,&
 211 & gmet(3,1),gmet(3,2),gmet(3,3)
 212  call wrtout(std_out,message,'COLL')
 213 #endif
 214 
 215 
 216 !COMMENT these values should become arguments
 217 !the two first define the energy window
 218 
 219  emax_win=greatest_real*tol6
 220  if(dtset%td_maxene>tol6)then
 221    emax_win = dtset%td_maxene
 222  end if
 223 
 224  call timab(95,1,tsec)
 225 
 226  istwf_k=dtset%istwfk(1)
 227 
 228  if(nkpt/=1 .or. &
 229 & abs(dtset%kptns(1,1))+abs(dtset%kptns(2,1))+abs(dtset%kptns(3,1))>1.0d-6 )then
 230    write(message, '(a,a,a,a,a,i4,a,3es14.6,a,a,a,a,a)' )&
 231 &   'The computation of excited states using TDDFT is only allowed',ch10,&
 232 &   'with nkpt=1, kpt=(0 0 0), but the following values are input:',ch10,&
 233 &   'nkpt=',nkpt,', kpt=',dtset%kptns(1:3,1),'.',ch10,&
 234 &   'Action: in the input file, set nkpt to 1 and kpt to 0 0 0 ,',ch10,&
 235 &   'or change iscf.'
 236    ABI_ERROR(message)
 237  end if
 238 
 239  if(nspinor/=1)then
 240    write(message, '(a,a,a,a,a,a,a)' )&
 241 &   'The computation of excited states using TDDFT is restricted',ch10,&
 242 &   'for the time being to nspinor=1, while input nspinor=2.',ch10,&
 243 &   'Action: if you want to compute excited states within TDDFT,',ch10,&
 244 &   'set nsppol to 1 in the input file. Otherwise, do not use iscf=-1.'
 245    ABI_ERROR(message)
 246  end if
 247 
 248 
 249  if(nsppol==2 .and. (dtset%ixc==22 .or. dtset%ixc==20))then
 250    write(message, '(a,a,a,a,a,a,a,a,a,a,a)' )&
 251 &   'The computation of excited states using TDDFT in the spin',ch10,&
 252 &   'polarized case for the time being cannot be used with ixc=20',ch10,&
 253 &   'or ixc=22',ch10,&
 254 &   'Action: if you want to compute excited states within TDDFT,',ch10,&
 255 &   'set ixc different from 20 or 22. Otherwise, do not use iscf=-1',ch10,&
 256 &   'with nsppol=2.'
 257    ABI_ERROR(message)
 258  end if
 259 
 260 
 261  if(dtset%occopt>2)then
 262    write(message, '(a,a,a,i2,a,a,a,a,a)' )&
 263 &   'The computation of excited states using TDDFT is only allowed',ch10,&
 264 &   'with occopt=0, 1, or 2, while input occopt=',dtset%occopt,'.',ch10,&
 265 &   'Action: if you want to compute excited states within TDDFT,',ch10,&
 266 &   'set occopt=0, 1, or 2 in the input file. Otherwise, do not use iscf=-1.'
 267    ABI_ERROR(message)
 268  end if
 269 
 270 !Examine the occupation numbers, and determine the number of
 271 !occupied and unoccupied states and band.
 272 !States are numerated as usual in Abinit, before all spin up band
 273 !and after all spin down bands.
 274 !Note that if nsppol==1 nstate=nband_k
 275  do isppol=1,nsppol
 276    nband_k(isppol)=dtset%nband(isppol)
 277    nband_occ(isppol)=0
 278    do iband=1,nband_k(isppol)
 279      if(abs(occ(iband+(isppol-1)*nband_k(1))-two/nsppol)<tol6)  &
 280 &     nband_occ(isppol)=nband_occ(isppol)+1
 281    end do
 282    nband_unocc(isppol)=nband_k(isppol)-nband_occ(isppol)
 283 !  next line make no sense if spin flip is taken into account
 284    nexcit_pol(isppol)=nband_occ(isppol)*nband_unocc(isppol)
 285  end do
 286  nstate_k=nband_k(1)+(nsppol-1)*nband_k(nsppol)
 287  nstate_occ=nband_occ(1)+(nsppol-1)*nband_occ(nsppol)
 288  nstate_unocc=nstate_k-nstate_occ
 289 !next line to be changed if spin fli is taken into account
 290  nexcit=nexcit_pol(1)+(nsppol-1)*nexcit_pol(nsppol)
 291 
 292 !number of plane wave (does it work even for nsppol=2 ??)
 293  npw_k=npwarr(1)
 294 
 295 !mux number of excitations that is taken into account
 296  if(dtset%td_mexcit==0)then
 297    nexcit_max=nexcit
 298  else
 299    nexcit_max =dtset%td_mexcit
 300  end if
 301 
 302 !DEBUG
 303 !write(std_out,*) nband_occ(1),nband_unocc(1)
 304 !write(std_out,*) nband_occ(nsppol),nband_unocc(nsppol)
 305 !END DEBUG
 306 
 307 
 308  if(nsppol==1)then
 309    write(message, '(a,a,a,a,i4,a,i4,a,a,i4,a,a,a,i6,a)' )ch10,&
 310 &   ' *** TDDFT : computation of excited states *** ',ch10,&
 311 &   ' Splitting of',dtset%nband(1),' states in',nband_occ(1),' occupied states,',&
 312 &   ' and',nband_unocc(1),' unoccupied states,',ch10,&
 313 &   ' giving',nexcit,' excitations.'
 314    call wrtout(std_out,message,'COLL')
 315    call wrtout(ab_out,message,'COLL')
 316  else
 317    write(message, '(a,a,a,a,i4,a,i4,a,a,i4,a,a,a,i6,a,a,a)' )ch10,&
 318 &   ' *** TDDFT : computation of excited states *** ',ch10,&
 319 &   ' Splitting of',nstate_k,' states in',nstate_occ,' occupied states,',&
 320 &   ' and',nstate_unocc,' unoccupied states,',ch10,&
 321 &   ' giving',nexcit,' excitations. Note that spin flip is not possible actually.',ch10,&
 322 &   ' So the number of excitation is the half of the product of the number of state'
 323    call wrtout(std_out,message,'COLL')
 324    call wrtout(ab_out,message,'COLL')
 325  end if
 326 
 327 !Allocate the matrices to be diagonalized.
 328 !Use a simple storage mode, to be improved in the future.
 329  ii=max(nband_occ(1),nband_occ(nsppol))
 330  jj=max(nband_unocc(1),nband_unocc(nsppol))
 331  ABI_MALLOC(omega_tddft_casida,(ii,jj,nsppol,ii,jj,nsppol,2/nsppol))
 332  ABI_MALLOC(eexcit,(nexcit))
 333  ABI_MALLOC(sqrtks,(nexcit))
 334  ABI_MALLOC(flag_state_win,(nstate_k))
 335  omega_tddft_casida(:,:,:,:,:,:,:)=zero
 336 
 337 
 338 !Fill the diagonal elements with square of differences of KS eigenvalues
 339 !(also not very efficient, but OK for the present first coding)
 340 !Also compute the square root of Kohn-Sham eigenvalue differences
 341  do isppol=1,nsppol
 342    do iunocc=1,nband_unocc(isppol)
 343      eigunocc=eigen(iunocc+nband_occ(isppol)+(isppol-1)*nband_k(1))
 344      do iocc=1,nband_occ(isppol)
 345        iexcit=iocc+(isppol-1)*nexcit_pol(1)+nband_occ(isppol)*(iunocc-1)
 346        diffeig=eigunocc-eigen(iocc+(isppol-1)*nband_k(1))
 347        do sing_trip=1,2/nsppol
 348          omega_tddft_casida(iocc,iunocc,isppol,iocc,iunocc,isppol,sing_trip)=diffeig**2
 349        end do
 350        eexcit(iexcit)=diffeig
 351        sqrtks(iexcit)=sqrt(diffeig)
 352      end do
 353    end do
 354  end do
 355 
 356 
 357 
 358 !Sort the excitation energies : note that the array eexcit is reordered
 359  ABI_MALLOC(indarr,(nexcit))
 360  indarr(:)=(/ (ii,ii=1,nexcit) /)
 361  call sort_dp(nexcit,eexcit,indarr,tol14)
 362 
 363 !Determine an energy window for the excitations
 364 !to take into account. This is necessary for large systems
 365 
 366  nexcit_win = 0
 367  do iexcit = 1, nexcit
 368    if ((eexcit(iexcit) < emax_win ).and.(nexcit_win < nexcit_max)) then
 369      nexcit_win = nexcit_win + 1
 370 
 371 !    DEBUG
 372 !    write(message,'(a,F12.5,a,a,i2,a,a,i2)') 'excitation energy:', eexcit(indarr(iexcit)),ch10, &
 373 !    &                                        'excitation number:', indarr(iexcit),ch10,         &
 374 !    &                                        'nexcit_win:       ', nexcit_win
 375 !    call wrtout(std_out,message,'COLL')
 376 !    ENDDEBUG
 377 
 378    end if
 379  end do
 380 
 381 !identification of the bands contributing to the
 382 !nexcit_win  excitations within the window
 383 
 384 
 385  nstate_win = 0
 386  flag_state_win(:) = 0
 387  do iexcit = 1, nexcit_win
 388    iexcit1 = indarr(iexcit)
 389    isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
 390    iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
 391    iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
 392    if (flag_state_win(nband_occ(isppol1)+(isppol1-1)*nband_k(1)+iunocc1)==0) &
 393 &   flag_state_win(nband_occ(isppol1)+(isppol1-1)*nband_k(1)+iunocc1) =1
 394    if (flag_state_win(iocc1+(isppol1-1)*nband_k(1))==0) &
 395 &   flag_state_win(iocc1+(isppol1-1)*nband_k(1)) =1
 396 !  DEBUG
 397 !  write(message,'(a,i2,a,a,i2,i2,a,a,i2,a,a,i2)') 'isppol:', isppol1,ch10, &
 398 !  &                                       'iocc,iunocc:', iocc1,iunocc1,ch10,         &
 399 !  &                                       'flag_state_win:', flag_state_win(iocc1+(isppol1-1)*nband_k(1)),  &
 400 !  &                               ch10,   'flag_state_win:', flag_state_win(nband_occ(isppol1)+(isppol1-1)*nband_k(1)+iunocc1)
 401 !  call wrtout(std_out,message,'COLL')
 402 !  END DEBUG
 403 
 404  end do
 405 
 406  do isppol=1,nsppol
 407    do iband=1,nband_k(isppol)
 408      nstate_win=nstate_win+flag_state_win(iband+(isppol-1)*nband_k(1))
 409    end do
 410  end do
 411 
 412 
 413  write(message,'(a,a,i5)') ch10,'Nr of states to Fourier transform : ',nstate_win
 414  call wrtout(std_out,message,'COLL')
 415 
 416  ndiel1=ngfftdiel(1) ; ndiel2=ngfftdiel(2) ; ndiel3=ngfftdiel(3)
 417 !ndiel4,ndiel5,ndiel6 are FFT dimensions, modified to avoid cache trashing
 418  ndiel4=ngfftdiel(4) ; ndiel5=ngfftdiel(5) ; ndiel6=ngfftdiel(6)
 419 
 420 !The evaluation of integrals, later, needs the following factor
 421  normint=one/(ucvol*dble(ndiel1*ndiel2*ndiel3))
 422 
 423 !Setup the positions in real space for later integration
 424  call matr3inv(gprimd,rprimd)
 425 
 426  ABI_MALLOC(pos,(max(ndiel1,ndiel2,ndiel3),3))
 427 
 428 !Select the reduced position of the point with respect to the box center,
 429 !in the interval ]-0.5,0.5].
 430  buffer=0.05_dp ; buffer_inv=one/buffer
 431  do idir=1,3
 432    if(idir==1)ndiel=ndiel1
 433    if(idir==2)ndiel=ndiel2
 434    if(idir==3)ndiel=ndiel3
 435    do ii=1,ndiel
 436 !    dtset%boxcenter(3)=reduced coordinates of the center of the box,
 437 !    in view of the computation of the oscillator strength
 438      pos(ii,idir)=(ii-1)/(one*ndiel)-dtset%boxcenter(idir)
 439      pos(ii,idir)=pos(ii,idir)-nint(pos(ii,idir)-tol12)
 440 !    The linear behaviour is cut-off when one becomes
 441 !    close to the boundaries : the buffer allows to match smoothly
 442 !    one side of the cell to the other. This is important
 443 !    to get rid of small breakings of symmetry, that are
 444 !    confusing in accurate tests
 445      if(abs(pos(ii,idir))>half-buffer)then
 446 !      xx is always positive, and goes linearly from 1 to 0
 447 !      in the buffer region
 448        xx=(half-abs(pos(ii,idir)))*buffer_inv
 449 !      The cut-off is applied to pos(:,:)
 450        pos(ii,idir)=pos(ii,idir)*xx*(two-xx)
 451 !      DEBUG
 452 !      if (idir==1)then
 453 !      write(std_out,'(i2)') ndiel
 454 !      write(std_out,'(a,i2,a,F12.5,F12.5)')'idiel : ',ii,'   x : ',pos(ii,idir),&
 455 !      &    dtset%boxcenter(idir)
 456 !      endif
 457 !      ENDDEBUG
 458      end if
 459    end do ! ii
 460  end do ! idir
 461 
 462 !need to run in MPI I/O case
 463  if (wffnew%iomode == IO_MODE_MPI ) then
 464    save_iomode=wffnew%iomode
 465    wffnew%iomode = IO_MODE_FORTRAN
 466  else
 467 !  Do not store value but set to have save_iomode /= 1
 468    save_iomode = IO_MODE_FORTRAN
 469  end if
 470 
 471 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 472 !2009 Chunping Hu
 473 !need to collect wavefunctions from processors to master
 474  me=me_loc
 475 
 476  tim_rwwf =0
 477  source = master
 478  sread = master
 479  tmaster=(master==me)
 480  swrite=tmaster
 481  sender=-1
 482 
 483  iomode=wffnew%iomode
 484 
 485  if(am_master)then
 486 #if defined HAVE_MPI
 487    ABI_MALLOC(cg_tmp,(2,mpw*nspinor*mband*nsppol))
 488 #endif
 489  end if
 490 
 491  ABI_MALLOC(kg_disk,(3,mpw))
 492  mcg_disk=mpw*nspinor*mband
 493  formeig=0
 494 
 495 #if defined HAVE_MPI
 496  call xmpi_barrier(spaceComm)
 497  ABI_MALLOC(cg_disk,(2,mcg_disk))
 498 #endif
 499 
 500  icg=0
 501  if(mpi_enreg%paralbd==0) tim_rwwf=6
 502  if(mpi_enreg%paralbd==1)tim_rwwf=12
 503 
 504  do isppol=1,nsppol
 505    ikg=0
 506    do ikpt=1,nkpt
 507      nband_k_=dtset%nband(ikpt+(isppol-1)*nkpt)
 508      npw_k=npwarr(ikpt)
 509 #if defined HAVE_MPI
 510      if (dtset%usewvl == 0) then
 511        mtag=ikpt+(isppol-1)*nkpt
 512        call xmpi_barrier(spaceComm)
 513 !      Must transfer the wavefunctions to the master processor
 514 !      Separate sections for paralbd=1 or other values ; might be merged
 515        if(mpi_enreg%paralbd==0) then
 516          nmaster=0
 517          source=minval(mpi_enreg%proc_distrb(ikpt,1:nband_k_,isppol))
 518          mydata=.false.
 519          if(source==me)mydata=.true.
 520          action=0
 521 !        I am the master node, and I have the data in cg or cg_disk
 522          if((tmaster).and.(mydata))action=1
 523 !        I am not the master, and I have the data => send to master
 524          if((.not.tmaster).and.(mydata))action=2
 525 !        I am the master, and I receive the data
 526          if((tmaster).and.(.not.mydata))action=3
 527 !        I have the data in cg or cg_disk ( MPI_IO case)
 528          if (iomode==IO_MODE_MPI) then
 529            action = 0
 530            sender=-1
 531            swrite=.false.
 532            if (mydata)then
 533              action=1
 534              swrite=.true.
 535              sender=me
 536            end if
 537          end if
 538 !        I am the master node, and I have the data in cg or cg_disk
 539 !        I have the data in cg or cg_disk ( MPI_IO case)
 540          if(action==1)then
 541 !          Copy from kg to kg_disk
 542            kg_disk(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg)
 543 !          Copy from cg to cg_disk
 544            do ipwnbd=1,nband_k_*npw_k*nspinor
 545              cg_disk(1,ipwnbd)=cg(1,ipwnbd+icg)
 546              cg_disk(2,ipwnbd)=cg(2,ipwnbd+icg)
 547            end do
 548          end if
 549 !        I am not the master, and I have the data => send to master
 550 !        I am the master, and I receive the data
 551          if ( action==2.or.action==3) then
 552            call timab(48,1,tsec)
 553            if(action==2)then
 554              call xmpi_exch(kg(:,1+ikg:npw_k+ikg),3*npw_k,source,kg_disk,nmaster,spaceComm,2*(mtag-1)+1,ierr)
 555              call xmpi_exch(cg(:,icg+1:icg+nband_k_*npw_k*nspinor),2*nband_k_*npw_k*nspinor &
 556 &             ,source,cg_disk,nmaster,spaceComm,2*(mtag-1)+2,ierr)
 557            else
 558              call xmpi_exch(kg_disk,3*npw_k,source,kg_disk,nmaster,spaceComm,2*(mtag-1)+1,ierr)
 559              call xmpi_exch(cg_disk,2*nband_k_*npw_k*nspinor,source,cg_disk,nmaster,spaceComm,2*(mtag-1)+2,ierr)
 560            end if
 561            call timab(48,2,tsec)
 562          end if
 563        else if(mpi_enreg%paralbd==1)then
 564          nmaster=0
 565 #if defined HAVE_MPI_IO
 566          sender=-1
 567          if( iomode ==IO_MODE_MPI ) then
 568            nmaster=mpi_enreg%proc_distrb(ikpt,1,isppol)
 569            sender=nmaster
 570          end if
 571 #endif
 572 !        Note the loop over bands
 573          do iband=1,nband_k_
 574 !          The message passing related to kg is counted as one band
 575            action=0
 576 !          I am the master node, and I have the data in cg or cg_disk
 577            if( mpi_enreg%proc_distrb(ikpt,iband,isppol)==nmaster .and. me==nmaster) then
 578              action=1
 579 !            I am not the master, and I have the data => send to master
 580            elseif( mpi_enreg%proc_distrb(ikpt,iband,isppol)==me .and. me/=nmaster ) then
 581              action = 2
 582 !            I am the master, and I receive the data
 583            elseif( mpi_enreg%proc_distrb(ikpt,iband,isppol)/=me .and. me==nmaster ) then
 584              action=3
 585            end if
 586            if(action==1) then
 587 !            I am the master node, and I have the data in cg or cg_disk
 588 !            Copy from kg to kg_disk
 589              if(iband==1)kg_disk(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg)
 590 !            Copy from cg to cg_disk
 591              do ipwnbd=1,npw_k*nspinor
 592                cg_disk(1,(iband-1)*npw_k*nspinor+ipwnbd)= cg(1,(iband-1)*npw_k*nspinor+ipwnbd+icg)
 593                cg_disk(2,(iband-1)*npw_k*nspinor+ipwnbd)= cg(2,(iband-1)*npw_k*nspinor+ipwnbd+icg)
 594              end do
 595            end if  ! action=1
 596            if ( action==2.or.action==3) then
 597 !            action=2 :  I am not the master, and I have the data => send to master
 598 !            action=3 :  I am the master, and I receive the data
 599              call timab(48,1,tsec)
 600              if ( iband == 1 ) then
 601                if (action==2) then
 602                  call xmpi_exch(kg(:,1+ikg:npw_k+ikg),3*npw_k,mpi_enreg%proc_distrb(ikpt,iband,isppol) &
 603 &                 ,kg_disk,nmaster,spaceComm,iband*(mtag-1)+1,ierr)
 604                else
 605                  call xmpi_exch(kg_disk,3*npw_k,mpi_enreg%proc_distrb(ikpt,iband,isppol)  &
 606 &                 ,kg_disk,nmaster,spaceComm,iband*(mtag-1)+1,ierr)
 607                end if
 608              end if       ! iband =1
 609              ipwnbd=(iband-1)*npw_k*nspinor
 610              if (action==2)then
 611                call xmpi_exch( cg(:,ipwnbd+icg+1:ipwnbd+icg+npw_k*nspinor),2*npw_k*nspinor &
 612 &               ,mpi_enreg%proc_distrb(ikpt,iband,isppol)                    &
 613 &               ,cg_disk(:,ipwnbd+1:ipwnbd+npw_k*nspinor),nmaster,spaceComm,iband*(mtag-1)+2,ierr)
 614              else
 615                call xmpi_exch( cg_disk(:,ipwnbd+1:ipwnbd+npw_k*nspinor),2*npw_k*nspinor    &
 616 &               ,mpi_enreg%proc_distrb(ikpt,iband,isppol)                    &
 617 &               ,cg_disk(:,ipwnbd+1:ipwnbd+npw_k*nspinor),nmaster,spaceComm,iband*(mtag-1)+2,ierr)
 618              end if
 619              call timab(48,2,tsec)
 620            end if        ! action=2 or action=3
 621            if(iomode ==IO_MODE_MPI) then
 622 !            I have the data in cg or cg_disk
 623              swrite=.false.
 624              if (nmaster == me) then
 625                swrite=.true.
 626              end if
 627            end if
 628 !          End of loop over bands
 629          end do
 630 !        End of paralbd=1
 631        end if
 632      end if
 633 #endif
 634 
 635 !    The wavefunctions for the present k point and spin are stored into cg_tmp
 636      if(am_master)then
 637 #if defined HAVE_MPI
 638        cg_tmp(:,icg+1:icg+nband_k_*npw_k*nspinor)=cg_disk(:,:)
 639 #endif
 640      end if
 641 
 642      sskip=1
 643 #if defined HAVE_MPI
 644      if (dtset%usewvl == 0) then
 645        sskip=0
 646        if(.not.(proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,1,nband_k_,isppol,me)))sskip=1
 647      end if
 648 #endif
 649      if(sskip==1)then
 650        icg=icg+npw_k*nspinor*nband_k_
 651        ikg=ikg+npw_k
 652      end if
 653 
 654    end do ! ikpt
 655  end do ! isppol
 656  ABI_FREE(kg_disk)
 657 #if defined HAVE_MPI
 658  ABI_FREE(cg_disk)
 659 #endif
 660 !!!!!!!end of collecting wavefunction to master!!!!!!
 661 
 662 
 663  if(am_master)then
 664 !  -----------------------------------------------------------
 665 !  The disk access is only done by master...
 666 
 667    ABI_MALLOC(gbound,(2*mgfftdiel+8,2))
 668    ABI_MALLOC(kg_k,(3,npw_k))
 669 
 670    ikpt=1
 671 !  Only one k point
 672 !  do isppol=1,nsppol
 673    kg_k(:,1:npw_k)=kg(:,1:npw_k)
 674    call sphereboundary(gbound,istwf_k,kg_k,mgfftdiel,npw_k)
 675 !  enddo
 676 
 677  end if ! am_master
 678 
 679 !need to run in MPI I/O case
 680  if ( save_iomode == 1 )  wffnew%iomode = IO_MODE_MPI
 681 !call wrtout(std_out,'After reading the wavefunction','COLL')
 682 
 683 !Use a simple implementation for the computation of the kernel elements
 684  if (am_master) then
 685    ABI_MALLOC(cwavef,(2,mpw))
 686    ABI_MALLOC(rhoaug,(ndiel4,ndiel5,ndiel6))
 687    ABI_MALLOC(wfraug,(2,ndiel4,ndiel5,ndiel6))
 688  end if
 689  ABI_MALLOC(index_state,(nstate_k))
 690 
 691 ! all real-space states are kept in memory
 692  ABI_MALLOC(wfrspa,(ndiel4,ndiel5,ndiel6,nstate_win))
 693 
 694 !DEBUG
 695 !write(message,'(a)') 'After allocating wfrspa'
 696 !call wrtout(std_out,message,'COLL')
 697 !ENDDEBUG
 698 
 699  weight=zero
 700 
 701 !Generate states in real space, only for states contributing to excitations in window
 702  istate=0
 703 
 704  do isppol=1,nsppol
 705    do iband=1,nband_k(isppol)
 706 
 707      if(flag_state_win(iband+(isppol-1)*nband_k(1)) == 1) then
 708        istate=istate+1
 709        index_state(iband+(isppol-1)*nband_k(1))=istate
 710 
 711        if (am_master) then
 712 #if defined HAVE_MPI
 713 !        Obtain Fourier transform in fft box
 714          cwavef(:,1:npw_k)=cg_tmp(:,1+(iband-1)*npw_k+(isppol-1)* &
 715 &         (npw_k*nband_k(1)) : iband*npw_k+(isppol-1)*(npw_k*nband_k(1)))
 716 #else
 717          cwavef(:,1:npw_k)=cg(:,1+(iband-1)*npw_k+(isppol-1)* (npw_k*nband_k(1)) : iband*npw_k+(isppol-1)*(npw_k*nband_k(1)))
 718 #endif
 719 
 720 !        write(std_out,*)' iband : ',iband, ' isppol', isppol, '  -> index ', &
 721 !        &            istate,index_state(iband+(isppol-1)*nband_k(1))
 722 
 723          tim_fourwf=14
 724 !        This call should be made by master, and then the results be sent to the other procs
 725 
 726          call fourwf(1,rhoaug,cwavef,dummy,wfraug,gbound,gbound,&
 727 &         istwf_k,kg_k,kg_k,mgfftdiel,mpi_enreg,1,ngfftdiel,npw_k,1,ndiel4,ndiel5,ndiel6,&
 728 &         0,tim_fourwf,weight,weight,gpu_option=dtset%gpu_option)
 729 
 730 !        write(std_out,'(a,i5)')' After Fourier proc ',me_loc
 731 
 732 !        Fix the phase, and checks that the wavefunction is real
 733 !        (should be merged with routine fxphas)
 734          saa=zero ; sab=zero ; sbb=zero
 735          do i3=1,ndiel3
 736            do i2=1,ndiel2
 737              do i1=1,ndiel1
 738                saa=saa+wfraug(1,i1,i2,i3)**2
 739                sbb=sbb+wfraug(2,i1,i2,i3)**2
 740                sab=sab+wfraug(1,i1,i2,i3)*wfraug(2,i1,i2,i3)
 741              end do
 742            end do
 743          end do
 744 
 745          if(sbb>5.0d-9)then
 746 
 747            write(message, '(a,a,a,es20.10,a,i4,a,i2,a)' )&
 748 &           'The imaginary part of wavefunctions should be practically zero.',ch10,&
 749 &           'This is not the case, since sbb=',sbb,' for iband=',iband,'with sppol=',1,'.'
 750            ABI_WARNING(message)
 751            if(sbb>1.0d-7)then
 752              ABI_ERROR("sbb>1.0d-7")
 753            end if
 754          end if
 755 
 756 !        Possibility of writing to disk
 757 
 758          wfrspa(:,:,:,istate)=wfraug(1,:,:,:)
 759        end if !am_master
 760 
 761      end if
 762 
 763 !    End loop on iband
 764    end do
 765 !  End loop on nsppol
 766  end do
 767 
 768  if (am_master) then
 769    ABI_FREE(gbound)
 770    ABI_FREE(kg_k)
 771    ABI_FREE(cwavef)
 772    ABI_FREE(rhoaug)
 773    ABI_FREE(wfraug)
 774 #if defined HAVE_MPI
 775    ABI_FREE(cg_tmp)
 776 #else
 777 #endif
 778  end if
 779  ABI_FREE(flag_state_win)
 780 
 781 ! send wfrspa from master to world
 782  call xmpi_bcast(wfrspa,master,spaceComm,ierr)
 783  !call MPI_BCAST(wfrspa,nbuf,MPI_DOUBLE_PRECISION,master,spaceComm,ierr)
 784 
 785 
 786 !DEBUG
 787 !#if defined HAVE_MPI
 788 !call xmpi_barrier(spaceComm)
 789 !write(message,'(a)')' after iband loop synchronization done...'
 790 !call  wrtout(std_out,message,'COLL')
 791 !#endif
 792 !ENDDEBUG
 793 
 794 !Compute the xc kernel, in the form needed for the singlet or triplet
 795 !excitation energy.
 796 !In the case ixc=20, kxc vanishes, but no change is made here, for simplicity.
 797 !(ixc=20 implemented only in the not spin polarized case)
 798 
 799 !DEBUG
 800 !write(std_out,*)' tddft : xc kernel '
 801 !do ifft=1,nkxc,41
 802 !write(std_out,*)ifft,kxc(ifft,1),kxc(ifft,2),kxc(ifft,3)
 803 !enddo
 804 !stop
 805 !ENDDEBUG
 806 
 807  ABI_MALLOC(kxc_for_tddft,(ndiel1,ndiel2,ndiel3,nsppol,nsppol,2/nsppol))
 808  if(dtset%ixc/=22)then
 809    do isppol=1,nsppol
 810      do jsppol=1,nsppol
 811        index=1
 812        do i3=1,ndiel3
 813          do i2=1,ndiel2
 814            do i1=1,ndiel1
 815              do sing_trip=1,2/nsppol
 816                kxc_for_tddft(i1,i2,i3,isppol,jsppol,sing_trip)=two/nsppol* &
 817 &               (kxc(index,isppol+jsppol-1)-(sing_trip-1)*kxc(index,2))
 818              end do
 819              index=index+1
 820            end do
 821          end do
 822        end do
 823      end do
 824    end do
 825  else
 826 !  This is for the Burke-Petersilka-Gross hybrid, with ixc=22
 827 !  However, the implementation in case of spin-polarized system should not be expected to be the correct one !
 828    do isppol=1,nsppol
 829      do jsppol=1,nsppol
 830        index=1
 831        do i3=1,ndiel3
 832          do i2=1,ndiel2
 833            do i1=1,ndiel1
 834              do sing_trip=1,2/nsppol
 835                kxc_for_tddft(i1,i2,i3,isppol,jsppol,sing_trip)=((-1)**(sing_trip+1))*kxc(index,2)
 836              end do
 837              index=index+1
 838            end do
 839          end do
 840        end do
 841      end do
 842    end do
 843  end if
 844 
 845  pole_approx=0
 846 
 847  ABI_MALLOC(excit_coords,(nexcit_win**2,2))
 848 
 849 if (xmpi_paral==1) then
 850   ABI_MALLOC(counts,(0:nproc_loc-1))
 851   ABI_MALLOC(displs,(0:nproc_loc-1))
 852   ABI_MALLOC(recvcounts,(0:nproc_loc-1))
 853   ABI_MALLOC(recvbuf,(5-nsppol,nproc_loc-1))
 854 end if
 855 
 856 !DEBUG
 857 !write(std_out,*)'before first loop'
 858 !ENDDEBUG
 859 
 860 !0000000000000000000000000000000000000000000000000000000
 861 !check if matrix file fname_tdexcit exists on disk
 862 !if the file is present,calculation is a continuation
 863  if (am_master) then
 864    inquire(file=trim(dtfil%fnametmp_tdexcit),exist=file_exist)
 865 !  for direct access to excitation file
 866    if(nsppol==1)then
 867      inquire(iolength=recl) omega_tddft_casida(1,1,1,1,1,1,1),omega_tddft_casida(1,1,1,1,1,1,1), iexcit,jexcit
 868    else
 869      inquire(iolength=recl) omega_tddft_casida(1,1,1,1,1,1,1), iexcit,jexcit
 870    end if
 871 
 872    temp_unit2 = get_unit()
 873    open(temp_unit2, file=trim(dtfil%fnametmp_tdexcit),form='unformatted', recl=recl, access='DIRECT')
 874 
 875    ABI_MALLOC(done_excit,(nexcit_win,nexcit_win))
 876 
 877    if(file_exist)then
 878      write(std_out,*)'TDDFT continues from a previous run'
 879      rec=0
 880      do iexcit=1,nexcit_win
 881        iexcit2 = indarr(iexcit)
 882        isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
 883        iunocc2 = (iexcit2-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
 884        iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
 885        do jexcit=1,nexcit_win
 886          iexcit1 = indarr(jexcit)
 887          isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
 888          iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
 889          iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
 890 
 891          rec=rec+1 ! record of the entry in the excitation file
 892          if(nsppol==1)then
 893            read(temp_unit2,rec=rec) omega_tddft_casida_dummy(1), omega_tddft_casida_dummy(2), idummy, jdummy
 894          else
 895            read(temp_unit2,rec=rec) omega_tddft_casida_dummy(1), idummy, jdummy
 896          end if
 897          done_excit(jexcit,iexcit)= ( idummy /= -1 .and. jdummy /= -1 ) ! if true, eigsqr_singlet and eigsqr_triplet are ok
 898 !        and a true is marked in the logical array done_excit
 899          if (done_excit(jexcit,iexcit)) then
 900            do sing_trip=1,2/nsppol
 901              omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)= &
 902 &             omega_tddft_casida_dummy(sing_trip)
 903            end do
 904          end if
 905        end do
 906      end do
 907 
 908    else
 909      write(std_out,*)'no excitation matrix on disk'
 910      write(std_out,*)'TDDFT starts from scratch'
 911      done_excit = .false. ! initialize the logical array to false
 912      rec=0
 913      do iexcit=1,nexcit_win
 914        do jexcit=1,nexcit_win
 915          rec=rec+1
 916          if(nsppol==1)then
 917            write(temp_unit2,rec=rec) zero,zero,-1,-1
 918          else
 919            write(temp_unit2,rec=rec) zero,-1,-1
 920          end if
 921        end do
 922      end do
 923    end if
 924 
 925 !  Need to list the elements to compute, taking the symmetry into account: valid only for Gamma point but this is already the case
 926    count_to_do=0
 927    do iexcit=1,nexcit_win
 928      do jexcit=1,iexcit
 929        if (.not. done_excit(jexcit,iexcit)) then
 930          count_to_do=count_to_do+1
 931          excit_coords(count_to_do,1)=iexcit
 932          excit_coords(count_to_do,2)=jexcit
 933        end if
 934      end do
 935    end do
 936 
 937    ABI_FREE(done_excit)
 938 
 939 
 940 
 941 if (xmpi_paral==1) then
 942 ! Compute limits for load balancing
 943   do iproc=0,nproc_loc-1
 944     displs(iproc)=(iproc*count_to_do)/nproc_loc
 945     counts(iproc)=min(((iproc+1)*count_to_do)/nproc_loc,count_to_do)-displs(iproc)
 946   end do
 947 end if
 948 
 949  end if ! am_master
 950 
 951  call xmpi_bcast(count_to_do,master,spaceComm,ierr)
 952 
 953  displ=(me_loc*count_to_do)/nproc_loc
 954  count=min(((me_loc+1)*count_to_do)/nproc_loc,count_to_do)-displ
 955  displmax=((nproc_loc-1)*count_to_do)/nproc_loc
 956  countmax=count_to_do-displmax
 957 
 958  write(message,'(A,I6)') 'Maximum number of matrix elements per processor = ',countmax
 959  call wrtout(std_out,message,'COLL')
 960 
 961 if (xmpi_paral==1) then
 962 !  Need to dispatch the elements to compute to the different processes
 963    ABI_MALLOC(tmpbuf,(nexcit_win**2))
 964    tmpbuf=0
 965    call xmpi_scatterv(excit_coords(:,1),counts,displs,tmpbuf,count,0,spaceComm,ierr)
 966    excit_coords(:,1)=tmpbuf(:)
 967    tmpbuf=0
 968    call xmpi_scatterv(excit_coords(:,2),counts,displs,tmpbuf,count,0,spaceComm,ierr)
 969    excit_coords(:,2)=tmpbuf(:)
 970    ABI_FREE(tmpbuf)
 971  end if
 972 
 973  nfftdiel=ndiel1*ndiel2*ndiel3
 974  ABI_MALLOC(wfprod,(ndiel1,ndiel2,ndiel3))
 975  ABI_MALLOC(work,(nfftdiel))
 976  ABI_MALLOC(sexc,(3,nexcit_win))
 977  ABI_MALLOC(done_sexc,(nexcit_win))
 978 !ABI_MALLOC(done_sexc2,(nexcit_win))
 979  ABI_MALLOC(rhog,(2,nfftdiel))
 980  ABI_MALLOC(vhartr,(nfftdiel))
 981 
 982  sexc(:,:)=zero
 983  done_sexc(:)=.false.
 984 
 985 !----------------------------------------------------------
 986 !Main double loop
 987 
 988  old_iexcit=0
 989  do ijexcit=1,countmax
 990 !  we really loop only through count, but we need to go through countmax
 991 !  to make sure that all processes execute MPI_Gatherv below
 992    if (ijexcit <= count) then
 993      iexcit=excit_coords(ijexcit,1)
 994      jexcit=excit_coords(ijexcit,2)
 995 
 996      iexcit2 = indarr(iexcit)
 997      isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
 998      iunocc2 = (iexcit2-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
 999      iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
1000 
1001      iexcit1 = indarr(jexcit)
1002      isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
1003      iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
1004      iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
1005 
1006      if (old_iexcit /= iexcit) then
1007 !      We start a new column of the matrix
1008 !      DEBUG
1009 !      write(message,'(a,i5,a,i3)')'treating  iexcit =  ',iexcit,&
1010 !      &'  with proc ',me_loc
1011 !      call wrtout(std_out,message,'PERS')
1012 !      ENDDEBUG
1013 
1014 !      DEBUG
1015 !      write(message,'(a,i3)')'Multiplicating phi1 phi2, on proc ',me_loc
1016 !      call wrtout(std_out,message,'PERS')
1017 !      ENDDEBUG
1018 
1019        ifft=1
1020        do i3=1,ndiel3
1021          do i2=1,ndiel2
1022            do i1=1,ndiel1
1023              wfprod(i1,i2,i3)=wfrspa(i1,i2,i3,index_state(iocc2+(isppol2-1)*nband_k(1))) &
1024 &             *wfrspa(i1,i2,i3,index_state(iunocc2+nband_occ(isppol2)+(isppol2-1)*nband_k(1)))
1025              work(ifft)=wfprod(i1,i2,i3)
1026              ifft=ifft+1
1027            end do
1028          end do
1029        end do
1030        if (jexcit == 1) then
1031          do i3=1,ndiel3
1032            do i2=1,ndiel2
1033              do i1=1,ndiel1
1034                do idir=1,3
1035                  poscart(idir)=rprimd(idir,1)*pos(i1,1)+&
1036 &                 rprimd(idir,2)*pos(i2,2)+&
1037 &                 rprimd(idir,3)*pos(i3,3)
1038                  sexc(idir,iexcit)=sexc(idir,iexcit)+poscart(idir)*wfprod(i1,i2,i3)
1039                end do
1040              end do
1041            end do
1042            done_sexc(iexcit)=.true.
1043          end do
1044        end if
1045 
1046 !      For the singlet correction, must compute the hartre potential created
1047 !      by the product of wavefunctions
1048        cplex=1
1049 
1050 !      DEBUG
1051 !      write(message,'(a,i3)')'Before Fourdp, on proc ',me_loc
1052 !      call wrtout(std_out,message,'PERS')
1053 !      ENDDEBUG
1054 
1055        call fourdp(cplex,rhog,work,-1,mpi_enreg,nfftdiel,1,ngfftdiel,0)
1056 
1057 !      DEBUG
1058 !      write(message,'(a,i3)')'Before Hartree, on proc ',me_loc
1059 !      call wrtout(std_out,message,'PERS')
1060 !      write(std_out,*)'CPU ',me_loc,ch10,&
1061 !      &            '   cplex : ',cplex,ch10,&
1062 !      &            '   gmet(3,3)  : ',gmet(3,3),ch10,&
1063 !      &            '   gsqcut : ',gsqcut,ch10,&
1064 !      &            '   rhog(1,1) :,',rhog(1,1),ch10,&
1065 !      &            '   vhartr(1) :,',vhartr(1)
1066 !      ENDDEBUG
1067 
1068        call hartre(cplex,gsqcut,dtset%icutcoul,0,mpi_enreg,nfftdiel,ngfftdiel,&
1069                    &dtset%nkpt,dtset%rcut,rhog,rprimd,dtset%vcutgeo,vhartr)
1070 
1071 !      DEBUG
1072 !      write(message,'(a,i3)')'After Hartree, on proc ',me_loc
1073 !      call wrtout(std_out,message,'PERS')
1074 !      ENDDEBUG
1075      end if
1076      old_iexcit=iexcit
1077 
1078 !    DEBUG
1079 !    write(std_out,*)'  treating  iexcit =  ',jexcit
1080 !    write(std_out,*)'   indarr(iexcit) =',iexcit1,iocc1,iunocc1
1081 !    write(std_out,*)'   index ',index_state(iocc1+(isppol1-1)*nband_k(1)), &
1082 !    &                       index_state(iunocc1+nband_occ(isppol1)+(isppol1-1)*nband_k(1))
1083 !    ENDDEBUG
1084 
1085      if(pole_approx==0 .or. (iunocc1==iunocc2 .and. iocc1==iocc2 .and. isppol1==isppol2))then
1086        sum_kernel(:)=zero
1087        f_sing_trip(1)=two/dble(nsppol)
1088        if(nsppol==1) f_sing_trip(2)=zero
1089 !      For Fermi-Amaldi kxc, the xc contribution is -1/2 the Hartree contribution
1090 !      to the triplet state. The following factors combines both contributions.
1091        if(dtset%ixc==20 .or. dtset%ixc==22)then
1092          if(nsppol==1)then
1093            f_sing_trip(1)= one
1094            f_sing_trip(2)=-one
1095          end if
1096        end if
1097        ifft=1
1098        do i3=1,ndiel3
1099          do i2=1,ndiel2
1100            do i1=1,ndiel1
1101              myproduct=wfrspa(i1,i2,i3,index_state(iocc1+(isppol1-1)*nband_k(1))) &
1102 &             *wfrspa(i1,i2,i3,index_state(iunocc1+nband_occ(isppol1)+(isppol1-1)*nband_k(1)))
1103              do sing_trip=1,2/nsppol
1104                sum_kernel(sing_trip)=sum_kernel(sing_trip)+&
1105 &               myproduct*(f_sing_trip(sing_trip)*vhartr(ifft)+kxc_for_tddft(i1,i2,i3,isppol1,isppol2,sing_trip) &
1106                *wfprod(i1,i2,i3))
1107              end do
1108              ifft=ifft+1
1109            end do ! i1
1110          end do ! i2
1111        end do ! i3
1112 
1113 !      The factor two is coherent with the formulas of Vasiliev et al
1114        factor=two*sqrtks(iexcit1)*sqrtks(iexcit2)*normint
1115        do sing_trip=1,2/nsppol
1116          omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)=   &
1117 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)+  &
1118 &         factor*sum_kernel(sing_trip)
1119        end do
1120 
1121 !      End condition of being diagonal element if pole approximation
1122      end if
1123 
1124 !    Continue writing excitation matrix
1125      if (am_master) then
1126 !      the master writes its results to disk
1127        if(nsppol==1)then
1128          write(temp_unit2, rec=(iexcit-1)*nexcit_win+jexcit ) &
1129 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1),&
1130 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2),&
1131 &         iexcit, jexcit
1132        else
1133          write(temp_unit2, rec=(iexcit-1)*nexcit_win+jexcit ) &
1134 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1135 &         iexcit, jexcit
1136        end if
1137 
1138 !      DEBUG
1139 !      if(nsppol==1)then
1140 !      write(std_out,*)'singlet: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1),&
1141 !      &             'iexcit: ',iexcit,'jexcit :',jexcit
1142 !      write(std_out,*)'triplet: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2),&
1143 !      &             'iexcit: ',iexcit,'jexcit :',jexcit
1144 !      else
1145 !      write(std_out,*)'excitation: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1),&
1146 !      &             'iexcit: ',iexcit,'jexcit :',jexcit
1147 !      endif
1148 !      ENDDEBUG
1149 
1150        sendcount=0
1151      else
1152        sendcount=5-nsppol
1153 
1154        if(nsppol==1)then
1155          sendbuf=(/ omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1156 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2), &
1157 &         real(iexcit,dp), real(jexcit,dp) /)
1158        else
1159          sendbuf=(/ omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1160 &         real(iexcit,dp), real(jexcit,dp) /)
1161        end if
1162      end if ! am_master
1163    else
1164 !    ijexcit > count
1165 
1166 !    done with local work, so send message of zero length
1167      sendcount=0
1168 
1169    end if ! ijexcit <= count
1170 
1171 if (xmpi_paral==1) then
1172    if (am_master) then
1173 
1174 !    Compute displacements and counts for the gathering of the results
1175      displs(0)=0
1176      recvcounts(0)=0
1177      do iproc=1,nproc_loc-1
1178        recvcounts(iproc)=min(((iproc+1)*count_to_do)/nproc_loc,count_to_do)-(iproc*count_to_do)/nproc_loc
1179        if (recvcounts(iproc) < countmax .and. ijexcit==countmax) then
1180          recvcounts(iproc)=0
1181        else
1182          recvcounts(iproc)=5-nsppol
1183        end if
1184        displs(iproc)=displs(iproc-1)+recvcounts(iproc-1)
1185      end do
1186    end if
1187 
1188    if (nproc_loc>1) then
1189      cptr=c_loc(recvbuf) ; call c_f_pointer(cptr,recvbuf_ptr,[size(recvbuf)])
1190      call xmpi_gatherv(sendbuf,sendcount,recvbuf_ptr,recvcounts,displs,0,spaceComm,ierr)
1191    end if
1192 
1193    if (am_master) then
1194 
1195 !    Extract eigsqr_singlet, eigsqr_triplet, iexcit, jexcit from receive buffer and
1196 !    write to file
1197      do ijexcit2=1,sum(recvcounts)/(5-nsppol)
1198        iexcit=int(recvbuf(4-nsppol,ijexcit2))
1199        jexcit=int(recvbuf(5-nsppol,ijexcit2))
1200 
1201        iexcit2 = indarr(iexcit)
1202        isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
1203        iunocc2 = (iexcit2-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
1204        iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
1205 
1206        iexcit1 = indarr(jexcit)
1207        isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
1208        iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
1209        iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
1210 
1211        do sing_trip=1,2/nsppol
1212          omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)= &
1213 &         recvbuf(sing_trip,ijexcit2)
1214        end do
1215 
1216        if(nsppol==1)then
1217          write(temp_unit2, rec=(iexcit-1)*nexcit_win+jexcit ) &
1218 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1219 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2), &
1220 &         iexcit, jexcit
1221        else
1222          write(temp_unit2, rec=(iexcit-1)*nexcit_win+jexcit ) &
1223          omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1224 &         iexcit, jexcit
1225        end if
1226 !      DEBUG
1227 !      if(nsppol==1)then
1228 !      write(std_out,*)'singlet: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1229 !      &                       'iexcit: ',iexcit,'jexcit :',jexcit
1230 !      write(std_out,*)'triplet: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2),
1231 !      &                       'iexcit: ',iexcit,'jexcit :',jexcit
1232 !      else
1233 !      write(std_out,*)'excitation: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1234 !      &                        'iexcit: ',iexcit,'jexcit :',jexcit
1235 !      endif
1236 !      ENDDEBUG
1237 
1238      end do
1239 
1240    end if
1241 end if ! MPI parallel
1242 
1243 !  End indices loops
1244  end do ! ijexcit
1245 
1246 !End of the main double loop
1247 !--------------------------------------------------------------------
1248 
1249 
1250 if (xmpi_paral==1) then
1251 ! sexc needs to be summed here since it used only by master
1252   call xmpi_barrier(spaceComm)
1253 ! call xmpi_sum_master(sexc,master,spaceComm,ierr) ! Does not work on some machines
1254   call xmpi_sum(sexc,spaceComm,ierr)
1255   call xmpi_lor(done_sexc,spaceComm)
1256   !done_sexc2=done_sexc
1257   !call MPI_Reduce(done_sexc2,done_sexc,nexcit_win,MPI_LOGICAL,MPI_LOR,master,spaceComm,ierr)
1258 end if
1259 
1260 
1261  if (am_master) then
1262 !  We compute sexc again if it was not done. Will only be executed if
1263 !  there was a restart from values read from logical unit temp_unit2.
1264 
1265    do iexcit=1,nexcit_win
1266 
1267 !    DEBUG
1268 !    write(std_out,*)'do on excitation',iexcit
1269 !    END DEBUG
1270 
1271      if (.not.done_sexc(iexcit)) then
1272        iexcit2 = indarr(iexcit)
1273        isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
1274        iunocc2 = (iexcit1-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
1275        iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
1276        do i3=1,ndiel3
1277          do i2=1,ndiel2
1278            do i1=1,ndiel1
1279              wfprod(i1,i2,i3)=wfrspa(i1,i2,i3,index_state(iocc2+(isppol2-1)*nband_k(1))) &
1280 &             *wfrspa(i1,i2,i3,index_state(iunocc2+nband_occ(isppol2)+ (isppol2-1)*nband_k(1)))
1281              do idir=1,3
1282                poscart(idir)=rprimd(idir,1)*pos(i1,1)+&
1283 &               rprimd(idir,2)*pos(i2,2)+&
1284 &               rprimd(idir,3)*pos(i3,3)
1285                sexc(idir,iexcit)=sexc(idir,iexcit)+poscart(idir)*wfprod(i1,i2,i3)
1286              end do
1287            end do
1288          end do
1289        end do
1290      end if
1291    end do
1292  end if
1293 
1294  ABI_FREE(work)
1295  ABI_FREE(rhog)
1296  ABI_FREE(pos)
1297  ABI_FREE(vhartr)
1298  ABI_FREE(kxc_for_tddft)
1299  ABI_FREE(wfprod)
1300  ABI_FREE(index_state)
1301  ABI_FREE(excit_coords)
1302  ABI_FREE(wfrspa)
1303 
1304 !Write the first excitation energies
1305  write(message, '(a,a,es18.8,a,a,a,a,a,a,a,a,a)' )ch10,&
1306 & '  Ground state total energy (Ha) :',etotal,ch10,ch10,&
1307 & '  Kohn-Sham energy differences,',ch10,&
1308 & '  corresponding total energies and oscillator strengths (X,Y,Z and average)-',ch10,&
1309 & '  (oscillator strengths smaller than 1.e-6 are set to zero)',ch10,&
1310 & '  Transition  (Ha)  and   (eV)   Tot. Ene. (Ha)  Aver     XX       YY       ZZ'
1311  call wrtout(ab_out,message,'COLL')
1312  call wrtout(std_out,message,'COLL')
1313 
1314 if (xmpi_paral==1) then
1315  ABI_FREE(counts)
1316  ABI_FREE(displs)
1317  ABI_FREE(recvbuf)
1318  ABI_FREE(recvcounts)
1319 end if
1320 
1321  if (am_master) then
1322 
1323    ABI_MALLOC(osc_str,(7,nexcit))
1324 
1325    do iexcit=1,nexcit_win
1326      iexcit2 = indarr(iexcit)
1327      isppol = min((iexcit2-1)/nexcit_pol(1) +1,2)
1328      iunocc = (iexcit2-(isppol-1)*nexcit_pol(1)-1)/nband_occ(isppol)+1
1329      iocc   = iexcit2-(isppol-1)*nexcit_pol(1)-(iunocc-1)*nband_occ(isppol)
1330 
1331      osc_str(1,iexcit)=zero
1332      do idir=1,3
1333 !      One of the factor of two comes from the spin degeneracy,
1334 !      the other comes from Eq.(40) of Casida
1335        osc_str(idir+1,iexcit)=&
1336 &       (sexc(idir,iexcit)*sqrtks(iexcit2)*normint*ucvol)**2*two*two/nsppol
1337        osc_str(1,iexcit)=osc_str(1,iexcit)&
1338 &       +osc_str(idir+1,iexcit)*third
1339      end do
1340      do ii=1,4
1341        if(abs(osc_str(ii,iexcit))<tol6)osc_str(ii,iexcit)=zero
1342      end do
1343 !    Changed, the whole spectrum is written
1344 !    The array eexcit has been reordered previously, the others also
1345      if(nsppol==1)then
1346        write(message, '(i4,a,i3,2es12.5,es13.5,es11.4,3es9.2)' ) &
1347 &       iocc,'->',iunocc+nband_occ(isppol),            &
1348 &       eexcit(iexcit), eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal, &
1349 !      XG 020209 : Jean-Yves, I assume that the printout of sexc is for debugging ?!
1350 !      &   osc_str(1:4,iexcit),sexc(1:3,iexcit)
1351 &       osc_str(1:4,iexcit)
1352      else
1353        write(message, '(i4,a,i3,a,i1,2es12.5,es13.5,es11.4,3es9.2)' ) &
1354 &       iocc,'->',iunocc+nband_occ(isppol),' s:',isppol,            &
1355 &       eexcit(iexcit), eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal, &
1356 &       osc_str(1:4,iexcit)
1357      end if
1358      call wrtout(ab_out,message,'COLL')
1359      call wrtout(std_out,message,'COLL')
1360 
1361    end do
1362 
1363 !  Check of the Sum rule for Casida eq.47,
1364 !  only exact if complete basis of excitations, as well as local potentials only.
1365    sumx=zero
1366    do iexcit=1,nexcit_win
1367      sumx=sumx+osc_str(1,iexcit)
1368    end do
1369    write(message, '(a,es16.6)' )'  Sum of osc. strength : ',sumx
1370    call wrtout(ab_out,message,'COLL')
1371    call wrtout(std_out,message,'COLL')
1372 
1373 !  -Diagonalize the excitation matrices----------------------------
1374 
1375    ABI_MALLOC(eexcit2,(nexcit_win))
1376    ABI_MALLOC(vec,(2,nexcit_win,nexcit_win))
1377 
1378    do sing_trip=1,2/nsppol
1379 
1380      if(pole_approx==0)then
1381 
1382        ABI_MALLOC(matr,(nexcit_win*(nexcit_win+1)))
1383        ABI_MALLOC(zhpev1,(2,2*nexcit_win-1))
1384        ABI_MALLOC(zhpev2,(3*nexcit_win-2))
1385        matr(:)=zero
1386        ier=0
1387 !      DEBUG
1388 !      write(std_out,*)' after allocation matrices     '
1389 !      ENDDEBUG
1390 
1391 
1392 !      Store the matrix in proper mode before calling zhpev
1393 
1394        index=1
1395        do iexcit=1,nexcit_win
1396          iexcit2 = indarr(iexcit)
1397          isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
1398          iunocc2 = (iexcit2-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
1399          iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
1400          do jexcit=1,iexcit
1401            iexcit1 = indarr(jexcit)
1402            isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
1403            iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
1404            iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
1405 
1406            matr(index)=omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)
1407            matr(index+1)=zero
1408            index=index+2
1409          end do
1410 
1411        end do
1412 
1413 !      DEBUG
1414 !      write(std_out,*)' after filling    matrices     '
1415 !      ENDDEBUG
1416 
1417        call ZHPEV ('V','U',nexcit_win,matr,eexcit2,vec,nexcit_win,zhpev1,&
1418 &       zhpev2,ier)
1419 
1420        ABI_FREE(matr)
1421        ABI_FREE(zhpev1)
1422        ABI_FREE(zhpev2)
1423 !      DEBUG
1424 !      write(std_out,*)' after deallocating matrices     '
1425 !      ENDDEBUG
1426 
1427 
1428      else
1429 
1430        vec(:,:,:)=zero
1431        do isppol=1, nsppol
1432          do iunocc=1,nband_k(isppol)
1433            do iocc=1,nband_k(isppol)
1434              index=iocc+nband_k(isppol)*(iunocc-1)+(isppol-1)*nexcit_pol(1)
1435              eexcit2(index)=omega_tddft_casida(iocc,iunocc,isppol,iocc,iunocc,isppol,sing_trip)
1436              vec(1,index,index)=one
1437            end do
1438          end do
1439        end do
1440 
1441      end if
1442 
1443 !    Compute the excitation energies from the square root of eexcit2
1444 !    eexcit(:)=sqrt(eexcit2(:)
1445 
1446      ABI_FREE(eexcit)
1447      ABI_MALLOC(eexcit,(nexcit_win))
1448 
1449      eexcit(:)=sqrt(dabs(eexcit2(:))+tol10**2)
1450 !    Write the first excitation energies
1451      if(sing_trip==1)then
1452        if(nsppol==1)then
1453          write(message, '(a,a,a,a,a,a)' )ch10,&
1454 &         '  TDDFT singlet excitation energies (at most 20 of them are printed),',ch10,&
1455 &         '  and corresponding total energies.                ',ch10,&
1456 &         '  Excit#   (Ha)    and    (eV)    total energy (Ha)    major contributions '
1457        else
1458          write(message, '(a,a,a,a,a,a)' )ch10,&
1459 &         '  TDDFT   mixed excitation energies (at most 40 of them are printed),',ch10,&
1460 &         '  and corresponding total energies.                ',ch10,&
1461 &         '  Excit#   (Ha)    and    (eV)    total energy (Ha)    major contributions '
1462        end if
1463      else
1464        write(message, '(a,a,a,a,a,a)' )ch10,&
1465 &       '  TDDFT triplet excitation energies (at most 20 of them are printed),',ch10,&
1466 &       '  and corresponding total energies.                ',ch10,&
1467 &       '  Excit#   (Ha)    and    (eV)    total energy (Ha)    major contributions '
1468      end if
1469      call wrtout(ab_out,message,'COLL')
1470      call wrtout(std_out,message,'COLL')
1471      call wrtout(std_out,' tddft : before iexcit loop',"COLL")
1472 
1473      do iexcit=1,min(nexcit_win,nexcitout)
1474        write(std_out,*)' tddft : iexcit=',iexcit
1475 !      Select largest and next contributions
1476        flargest=zero ; fnext=zero
1477        nlargest=0 ; nnext=0
1478        if(nsppol==2)then
1479          fnext1=zero  ;  fnext2=zero
1480          nnext1=0  ;  nnext2=0
1481        end if
1482        do jexcit=1,nexcit_win
1483          ff=vec(1,jexcit,iexcit)**2+vec(2,jexcit,iexcit)**2
1484          if(ff>flargest+tol12)then
1485            if(nsppol==2)then
1486              nnext2=nnext1  ; fnext2=fnext1
1487              nnext1=nnext   ; fnext1=fnext
1488            end if
1489            nnext=nlargest ; fnext=flargest
1490            nlargest=indarr(jexcit) ; flargest=ff
1491          else if(ff>fnext+tol12)then
1492            if(nsppol==2)then
1493              nnext2=nnext1 ; fnext2=fnext1
1494              nnext1=nnext  ; fnext1=fnext
1495            end if
1496            nnext=indarr(jexcit) ; fnext=ff
1497          else if(nsppol==2)then
1498            if(ff>fnext1+tol12)then
1499              nnext2=nnext1  ; fnext2=fnext1
1500              nnext1=indarr(jexcit) ; fnext1=ff
1501            else if(ff>fnext2+tol12)then
1502              nnext2=indarr(jexcit) ; fnext2=ff
1503            end if
1504          end if
1505 
1506        end do
1507 
1508        isppol_l = min((nlargest-1)/nexcit_pol(1) +1,nsppol)
1509        iunocc_l = (nlargest-(isppol_l-1)*nexcit_pol(1)-1)/nband_occ(isppol_l)+1
1510        iocc_l   = nlargest-(isppol_l-1)*nexcit_pol(1)-(iunocc_l-1)*nband_occ(isppol_l)
1511        isppol_n = min((nnext-1)/nexcit_pol(1) +1,nsppol)
1512        iunocc_n = (nnext-(isppol_n-1)*nexcit_pol(1)-1)/nband_occ(isppol_n)+1
1513        iocc_n   = nnext-(isppol_n-1)*nexcit_pol(1)-(iunocc_n-1)*nband_occ(isppol_n)
1514        if(nsppol==2)then
1515          isppol_n1 = min((nnext1-1)/nexcit_pol(1) +1,nsppol)
1516          iunocc_n1 = (nnext1-(isppol_n1-1)*nexcit_pol(1)-1)/nband_occ(isppol_n1)+1
1517          iocc_n1   = nnext1-(isppol_n1-1)*nexcit_pol(1)-(iunocc_n1-1)*nband_occ(isppol_n1)
1518          isppol_n2 = min((nnext2-1)/nexcit_pol(1) +1,nsppol)
1519          iunocc_n2 = (nnext2-(isppol_n2-1)*nexcit_pol(1)-1)/nband_occ(isppol_n2)+1
1520          iocc_n2   = nnext2-(isppol_n2-1)*nexcit_pol(1)-(iunocc_n2-1)*nband_occ(isppol_n2)
1521        end if
1522 
1523        if(nsppol==1)then
1524          write(message,'(i4,es15.5,es14.5,es16.6,f8.2,a,i3,a,i3,a,f6.2,a,i3,a,i3,a)') &
1525 &         iexcit,eexcit(iexcit),&
1526 &         eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal,&
1527 &         flargest,'(',iocc_l,'->',iunocc_l+nband_occ(1),')',&
1528 &         fnext,   '(',iocc_n,'->',iunocc_n+nband_occ(1),')'
1529          call wrtout(ab_out,message,'COLL')
1530          call wrtout(std_out,message,'COLL')
1531        else
1532          write(chain1,'(f8.2,a,i3,a,i3,a)')flargest,'(',iocc_l,'->',iunocc_l+nband_occ(isppol_l),')'
1533          write(chain2,'(f8.2,a,i3,a,i3,a)')fnext,'(',iocc_n,'->',iunocc_n+nband_occ(isppol_n),')'
1534          if(trim(chain1)==trim(chain2))then
1535            write(message,'(i4,es15.5,es14.5,es16.6,a,a,a,a)') &
1536 &           iexcit,eexcit(iexcit),&
1537 &           eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal,trim(chain1),'(1)',trim(chain2),'(2)'
1538          else
1539            write(message,'(i4,es15.5,es14.5,es16.6,a,a,i1,a,a,a,i1,a)') &
1540 &           iexcit,eexcit(iexcit),&
1541 &           eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal,trim(chain1),'(',isppol_l,')',&
1542 &           trim(chain2),'(',isppol_n,')'
1543          end if
1544          call wrtout(ab_out,message,'COLL')
1545          call wrtout(std_out,message,'COLL')
1546          write(chain1,'(f8.2,a,i3,a,i3,a)')fnext1,'(',iocc_n1,'->',iunocc_n1+nband_occ(isppol_n1),')'
1547          write(chain2,'(f8.2,a,i3,a,i3,a)')fnext2,'(',iocc_n2,'->',iunocc_n2+nband_occ(isppol_n2),')'
1548          if(trim(chain1)==trim(chain2))then
1549            write(message,'(a,a,a,a,a)' ) &
1550 &           '                                                 ',&
1551 &           chain1,'(1)',chain2,'(2)'
1552          else
1553            write(message,'(a,a,a,i1,a,a,a,i1,a)' ) &
1554 &           '                                                 ',&
1555 &           chain1,'(',isppol_n1,')',&
1556 &           chain2,'(',isppol_n2,')'
1557          end if
1558          call wrtout(ab_out,message,'COLL')
1559          call wrtout(std_out,message,'COLL')
1560        end if
1561      end do
1562 
1563 !    For each iexcit excitation, compute the oscillator strength (Casida, eq 47)
1564      write(message, '(a,a,a,a)' )ch10,&
1565 &     '  Oscillator strengths :  (elements smaller than 1.e-6 are set to zero)',ch10,&
1566 &     '  Excit#   (Ha)   Average    XX        YY        ZZ         XY        XZ        YZ'
1567      call wrtout(ab_out,message,'COLL')
1568      call wrtout(std_out,message,'COLL')
1569 
1570      do iexcit=1,nexcit_win
1571 
1572 !      One of the factor of two comes from the spin degeneracy,
1573 !      the other comes from Eq.(40) of Casida
1574        factor=(normint*ucvol)**2*two*two/nsppol
1575 
1576        osc_str(:,iexcit)=zero
1577 
1578 !      factor=(normint*ucvol)**2*two*two/nsppol
1579 
1580        do jexcit=1,nexcit_win
1581          jexcit_cbase=indarr(jexcit)
1582          do idir=1,3
1583            osc_str(idir+1,iexcit)=osc_str(idir+1,iexcit)+ &
1584 &           sexc(idir,jexcit)*sqrtks(jexcit_cbase)*sqrt(factor)* &
1585 &           vec(1,jexcit,iexcit)
1586          end do ! idir
1587        end do ! jexcit
1588 
1589 !      The "standard" definition of the oscillator strength is the square
1590 !      of the matrix elements.
1591 !      So, instead of the coding
1592 !      do idir=1,3
1593 !      osc_str(1,iexcit)=osc_str(1,iexcit)+osc_str(idir+1,iexcit)**2*third
1594 !      enddo
1595 !      I think that the following is more "standard"
1596 !      Now, osc_str(2:4,iexcit) are the X, Y and Z matrix elements, not
1597 !      yet the oscillator strengths
1598        osc_str(5,iexcit)=osc_str(2,iexcit)*osc_str(3,iexcit)   ! off diag XY
1599        osc_str(6,iexcit)=osc_str(2,iexcit)*osc_str(4,iexcit)   ! off diag XZ
1600        osc_str(7,iexcit)=osc_str(3,iexcit)*osc_str(4,iexcit)   ! off diag ZZ
1601        do idir=1,3
1602 !        Here the X,Y, and Z matrix elements are combined to give diagonal osc. strengths
1603          osc_str(idir+1,iexcit)=osc_str(idir+1,iexcit)**2
1604          osc_str(1,iexcit)=osc_str(1,iexcit)+osc_str(idir+1,iexcit)*third ! compute the trace
1605        end do
1606 !      At this stage, osc_str(1,iexcit) is exactly the same as from your coding
1607 !      ***End of section to be checked
1608 
1609        do ii=1,7
1610          if(abs(osc_str(ii,iexcit))<tol6)osc_str(ii,iexcit)=zero
1611        end do
1612 !      XG 020209 : Jean-Yves, the off-diagonal oscillator strengths
1613 !      can become negative. It is important for automatic
1614 !      checking that the numbers are separated by a blank, even
1615 !      if they are negative. So replace the following format, to have at least one blank.
1616        write(message, '(i4,es12.5,es10.3,3es10.3,3es10.2)' )iexcit,eexcit(iexcit),osc_str(1:7,iexcit)
1617        call wrtout(ab_out,message,'COLL')
1618        call wrtout(std_out,message,'COLL')
1619      end do
1620 
1621 !    Check of the Sum rule for Casida eq.47,
1622 !    only exact if complete basis of excitations, as well as local potentials only.
1623      sumx=zero
1624      do iexcit=1,nexcit_win
1625        sumx=sumx+osc_str(1,iexcit)
1626      end do
1627      write(message, '(a,es16.6)' )'  Sum of osc. strength : ',sumx
1628      call wrtout(ab_out,message,'COLL')
1629      call wrtout(std_out,message,'COLL')
1630 
1631 !    If singlet, compute Cauchy coefficients
1632      if(sing_trip==1.AND.nsppol==1)then
1633        cauchy(:)=zero
1634        do iexcit=1,nexcit_win
1635          fr_invsquare=one/(eexcit(iexcit)**2)
1636          fr_power=one
1637          do ii=1,7
1638            fr_power=fr_power*fr_invsquare
1639            cauchy(ii)=cauchy(ii)+osc_str(1,iexcit)*fr_power
1640          end do
1641        end do
1642        write(message, '(a,es11.3,a,es11.3,a,es11.3,a,a,es11.3,a,es11.3,a,es11.3,a,es11.3)' ) &
1643 &       '  Cauchy coeffs (au) : ( -2)->',cauchy(1),&
1644 &       ', ( -4)->',cauchy(2),', ( -6)->',cauchy(3),ch10,&
1645 &       '    (-8)->',cauchy(4),', (-10)->',cauchy(5),', (-12)->',cauchy(6),', (-14)->',cauchy(7)
1646        call wrtout(ab_out,message,'COLL')
1647        call wrtout(std_out,message,'COLL')
1648      end if
1649 
1650 !    End the loop on singlet or triplet
1651    end do
1652 
1653    ABI_FREE(eexcit2)
1654    ABI_FREE(vec)
1655    ABI_FREE(osc_str)
1656 
1657 !! The temporary files should be deleted at the end of this routine
1658    close(temp_unit2,status='delete')
1659    call timab(95,2,tsec)
1660  end if  ! end of am_master
1661 
1662  ABI_FREE(omega_tddft_casida)
1663  ABI_FREE(eexcit)
1664  ABI_FREE(sqrtks)
1665  ABI_FREE(sexc)
1666  ABI_FREE(done_sexc)
1667  ABI_FREE(indarr)
1668 !ABI_FREE(done_sexc2)
1669 
1670 end subroutine tddft