TABLE OF CONTENTS


ABINIT/m_tddft [ Modules ]

[ Top ] [ Modules ]

NAME

  m_tddft

FUNCTION

  Routines for computing excitation energies within TDDFT

COPYRIGHT

 Copyright (C) 1999-2018 ABINIT group (XG, JYR, MB, MBELAND, SHAMEL)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

PARENTS

CHILDREN

NOTES

SOURCE

24 #if defined HAVE_CONFIG_H
25 #include "config.h"
26 #endif
27 
28 #include "abi_common.h"
29 
30 module m_tddft
31 
32  use defs_basis
33  use defs_abitypes
34  use m_abicore
35  use m_xmpi
36  use m_errors
37  use m_wffile
38  use m_sort
39 #if defined HAVE_MPI2
40  use mpi
41 #endif
42 
43  use m_io_tools, only : get_unit
44  use m_symtk,    only : matr3inv
45  use m_time,     only : timab
46  use m_fftcore,  only : sphereboundary
47  use m_spacepar, only : hartre
48  use m_mpinfo,   only : proc_distrb_cycle
49  use m_fft,      only : fourwf, fourdp
50 
51  implicit none
52 
53  private

m_tddft/tddft [ Functions ]

[ Top ] [ m_tddft ] [ Functions ]

NAME

 tddft

FUNCTION

 Compute the excitation energies within TDLDA
 from input wavefunctions, eigenenergies, and band occupations.

INPUTS

  cg(2,mpw*nspinor*mband*mkmem*nsppol)=wf in G space
  dtfil <type(datafiles_type)>=variables related to files
  dtset <type(dataset_type)>=all input variables for this dataset
  eigen(mband*nkpt*nsppol)=array for holding eigenvalues (hartree)
  etotal=total energy of the ground-state (Ha)
  gmet(3,3)=metrix tensor in G space in Bohr**-2.
  gprimd(3,3)=dimensional reciprocal space primitive translations
  gsqcut=cutoff on (k+G)^2 (bohr^-2)
  kg(3,mpw*mkmem)=reduced planewave coordinates.
  kxc(nfft,nkxc)=exchange-correlation kernel
  mband=maximum number of bands
  mgfftdiel=maximum size of 1D FFTs, for the computation of the dielectric matrix
  mkmem=number of k-points treated by this node.
  mpi_enreg=information about MPI parallelization
  mpw=maximum allowed value for npw
  nfft=(effective) number of FFT grid points (for this processor)
       WARNING about parallelization: see below
  ngfftdiel(18)=contain all needed information about 3D FFT, for dielectric matrix,
                see ~abinit/doc/variables/vargs.htm#ngfft
  nkpt=number of k points
  nkxc=second dimension of the array kxc (see rhotoxc for a description)
  npwarr(nkpt)=number of planewaves at each k point
  nspinor=number of spinorial components of the wavefunctions
  nsppol=1 for unpolarized, 2 for spin-polarized
  occ(mband*nkpt*nsppol)=
          occupation numbers for each band (usually 2.0) at each k point
  ucvol=unit cell volume (Bohr**3)
  wffnew=unit number for current wf disk file

OUTPUT

  (only writing)

 WARNING:
 This routine should not be parallelized on space for the time being,
    because the already existing parallelisation is not the usual one, found
    in the majority of ABINIT routines.

NOTES

 * Only accept nspinor=1, nsppol=1, nkpt=1 (Gamma point), and occopt<3
   (insulating occupation numbers).
   It is expected to make it work for nsppol=2 in the future.

 * For the oscillator strengths, see the paper
   ''Time-Dependent Density Functional Response Theory of Molecular
     systems: Theory, Computational Methods, and Functionals'', by M.E. Casida,
   in Recent Developments and Applications of Modern Density Functional
   Theory, edited by J.M. Seminario (Elsevier, Amsterdam, 1996).

PARENTS

      vtorho

CHILDREN

      fourdp,fourwf,hartre,matr3inv,mpi_bcast,mpi_gatherv,mpi_reduce
      mpi_scatterv,sort_dp,sphereboundary,timab,wrtout,xmpi_barrier
      xmpi_bcast,xmpi_exch,xmpi_sum,zhpev

SOURCE

 132  subroutine tddft(cg,dtfil,dtset,eigen,etotal,gmet,gprimd,gsqcut,&
 133 &  kg,kxc,mband,mgfftdiel,mkmem,mpi_enreg,mpw,nfft,ngfftdiel,nkpt,nkxc,&
 134 &  npwarr,nspinor,nsppol,occ,ucvol,wffnew)
 135 
 136 
 137 !This section has been created automatically by the script Abilint (TD).
 138 !Do not modify the following lines by hand.
 139 #undef ABI_FUNC
 140 #define ABI_FUNC 'tddft'
 141 !End of the abilint section
 142 
 143  implicit none
 144 
 145 !Arguments ------------------------------------
 146  integer, intent(in) :: mband,mgfftdiel,mkmem,mpw,nfft,nkpt,nkxc,nsppol
 147  integer, intent(in) :: nspinor
 148  real(dp), intent(in) :: etotal,gsqcut,ucvol
 149  type(datafiles_type), intent(in) :: dtfil
 150  type(dataset_type), intent(in) :: dtset
 151  type(MPI_type), intent(in) :: mpi_enreg
 152  type(wffile_type), intent(inout) :: wffnew
 153  integer, intent(in) :: kg(3,mpw*mkmem),ngfftdiel(18),npwarr(nkpt)
 154  real(dp), intent(in) :: cg(2,mpw*nspinor*mband*mkmem*nsppol),eigen(mband*nkpt*nsppol)
 155  real(dp), intent(in) :: gmet(3,3),gprimd(3,3),kxc(nfft,nkxc),occ(mband*nkpt*nsppol)
 156 
 157 !Local variables-------------------------------
 158  integer,parameter :: nexcitout=20
 159  integer :: cplex,i1,i2,i3,iband,idir,ier,ierr,iexcit,iexcit1,iexcit2,ifft
 160  integer :: old_iexcit,ii,jj,isppol,jsppol,isppol1,isppol2,isppol_l,isppol_n
 161  integer :: isppol_n1,isppol_n2,iocc_n1,iocc_n2,iunocc_n1,iunocc_n2,temp_unit2
 162  integer :: ikpt,index,iocc,iocc1,iocc2,iocc_l,iocc_n
 163  integer :: istwf_k,iunocc,iunocc1,iunocc2,iunocc_l,iunocc_n,istate,jexcit
 164  integer :: jexcit_cbase,master,mcg_disk,me_loc
 165  integer :: nband_k(nsppol), nband_occ(nsppol), nband_unocc(nsppol)
 166  integer :: nstate_k, nstate_occ, nstate_unocc, nexcit_pol(nsppol)
 167  integer :: nstate_win,ndiel,ndiel1,ndiel2,ndiel3,ndiel4
 168  integer :: ndiel5,ndiel6,nexcit,nexcit_max,nexcit_win,nfftdiel,nlargest,nnext
 169  integer :: nnext1,nnext2
 170  integer :: nproc_loc,npw_k,pole_approx,sing_trip,spaceComm,tim_fourwf
 171  integer :: tim_rwwf,save_iomode
 172  integer :: rec,recl,idummy,jdummy
 173  real(dp) :: buffer,buffer_inv,diffeig,eigunocc,emax_win
 174  real(dp) :: factor,ff,flargest,fnext,fr_invsquare,fr_power
 175  real(dp) :: fnext1,fnext2
 176  real(dp) :: normint,myproduct,saa,sab,sbb
 177  real(dp) :: sumx
 178  real(dp) :: sum_kernel(2/nsppol)
 179  real(dp) :: weight,xx
 180  logical :: am_master,file_exist
 181  logical, allocatable :: done_excit(:,:),done_sexc(:),done_sexc2(:)
 182  character(len=18) :: chain1,chain2
 183  character(len=500) :: message
 184  integer,allocatable :: flag_state_win(:),gbound(:,:),indarr(:),index_state(:)
 185  integer,allocatable :: kg_k(:,:)
 186  integer,allocatable :: excit_coords(:,:)
 187  integer :: count_to_do, count, displ, countmax, displmax
 188  integer :: ijexcit, ijexcit2, sendcount
 189  real(dp) :: f_sing_trip(2/nsppol),sendbuf(5-nsppol)
 190  real(dp) :: cauchy(7),poscart(3),rprimd(3,3),tsec(2),dummy(2,1)
 191  integer :: iomode,action,me,nmaster,sender,source,sread,sskip
 192  integer :: formeig,icg,ikg,nband_k_
 193  logical :: mydata, tmaster, swrite
 194  integer,allocatable ::  kg_disk(:,:)
 195  real(dp),allocatable :: cg_disk(:,:),cg_tmp(:,:)
 196  real(dp),allocatable :: cwavef(:,:),eexcit(:)
 197  real(dp),allocatable :: eexcit2(:)
 198  real(dp),allocatable :: matr(:)
 199  real(dp),allocatable :: kxc_for_tddft(:,:,:,:,:,:),omega_tddft_casida(:,:,:,:,:,:,:)
 200  real(dp),allocatable :: osc_str(:,:),pos(:,:),rhoaug(:,:,:),rhog(:,:)
 201  real(dp),allocatable :: sexc(:,:),sqrtks(:),vec(:,:,:),vhartr(:),wfprod(:,:,:)
 202  real(dp) :: omega_tddft_casida_dummy(2/nsppol)
 203  real(dp),allocatable :: wfraug(:,:,:,:),wfrspa(:,:,:,:),work(:),zhpev1(:,:)
 204  real(dp),allocatable :: zhpev2(:)
 205 #if defined HAVE_MPI
 206  integer :: iproc
 207  integer :: ipwnbd
 208  integer,allocatable :: counts(:),displs(:),recvcounts(:),tmpbuf(:)
 209  real(dp), allocatable :: recvbuf(:,:)
 210 #endif
 211 
 212 ! *************************************************************************
 213 
 214 !Init mpi_comm
 215  spaceComm=mpi_enreg%comm_cell
 216 
 217  am_master=.true.
 218  master = 0
 219  nproc_loc = xmpi_comm_size(spaceComm) !Init ntot proc max
 220  me_loc    = xmpi_comm_rank(spaceComm) !Define who i am
 221 
 222 #if defined HAVE_MPI
 223  if (me_loc/=0) then
 224    am_master=.FALSE.
 225  end if
 226  write(message, '(a,i3,a)' ) ' TDDFT ',nproc_loc,' CPU synchronized'
 227  call wrtout(std_out,message,'COLL')
 228  write(message, '(a,3D12.5,a,3D12.5,a,3D12.5)' ) ' gmet ',&
 229 & gmet(1,1),gmet(1,2),gmet(1,3),ch10,&
 230 & gmet(2,1),gmet(2,2),gmet(2,3),ch10,&
 231 & gmet(3,1),gmet(3,2),gmet(3,3)
 232  call wrtout(std_out,message,'COLL')
 233 #endif
 234 
 235 
 236 !COMMENT these values should become arguments
 237 !the two first define the energy window
 238 
 239  emax_win=greatest_real*tol6
 240  if(dtset%td_maxene>tol6)then
 241    emax_win = dtset%td_maxene
 242  end if
 243 
 244  call timab(95,1,tsec)
 245 
 246  istwf_k=dtset%istwfk(1)
 247 
 248  if(nkpt/=1 .or. &
 249 & abs(dtset%kptns(1,1))+abs(dtset%kptns(2,1))+abs(dtset%kptns(3,1))>1.0d-6 )then
 250    write(message, '(a,a,a,a,a,i4,a,3es14.6,a,a,a,a,a)' )&
 251 &   'The computation of excited states using TDDFT is only allowed',ch10,&
 252 &   'with nkpt=1, kpt=(0 0 0), but the following values are input:',ch10,&
 253 &   'nkpt=',nkpt,', kpt=',dtset%kptns(1:3,1),'.',ch10,&
 254 &   'Action: in the input file, set nkpt to 1 and kpt to 0 0 0 ,',ch10,&
 255 &   'or change iscf.'
 256    MSG_ERROR(message)
 257  end if
 258 
 259  if(nspinor/=1)then
 260    write(message, '(a,a,a,a,a,a,a)' )&
 261 &   'The computation of excited states using TDDFT is restricted',ch10,&
 262 &   'for the time being to nspinor=1, while input nspinor=2.',ch10,&
 263 &   'Action: if you want to compute excited states within TDDFT,',ch10,&
 264 &   'set nsppol to 1 in the input file. Otherwise, do not use iscf=-1.'
 265    MSG_ERROR(message)
 266  end if
 267 
 268 
 269  if(nsppol==2 .and. (dtset%ixc==22 .or. dtset%ixc==20))then
 270    write(message, '(a,a,a,a,a,a,a,a,a,a,a)' )&
 271 &   'The computation of excited states using TDDFT in the spin',ch10,&
 272 &   'polarized case for the time being cannot be used with ixc=20',ch10,&
 273 &   'or ixc=22',ch10,&
 274 &   'Action: if you want to compute excited states within TDDFT,',ch10,&
 275 &   'set ixc different from 20 or 22. Otherwise, do not use iscf=-1',ch10,&
 276 &   'with nsppol=2.'
 277    MSG_ERROR(message)
 278  end if
 279 
 280 
 281  if(dtset%occopt>2)then
 282    write(message, '(a,a,a,i2,a,a,a,a,a)' )&
 283 &   'The computation of excited states using TDDFT is only allowed',ch10,&
 284 &   'with occopt=0, 1, or 2, while input occopt=',dtset%occopt,'.',ch10,&
 285 &   'Action: if you want to compute excited states within TDDFT,',ch10,&
 286 &   'set occopt=0, 1, or 2 in the input file. Otherwise, do not use iscf=-1.'
 287    MSG_ERROR(message)
 288  end if
 289 
 290 !Examine the occupation numbers, and determine the number of
 291 !occupied and unoccupied states and band.
 292 !States are numerated as usual in Abinit, before all spin up band
 293 !and after all spin down bands.
 294 !Note that if nsppol==1 nstate=nband_k
 295  do isppol=1,nsppol
 296    nband_k(isppol)=dtset%nband(isppol)
 297    nband_occ(isppol)=0
 298    do iband=1,nband_k(isppol)
 299      if(abs(occ(iband+(isppol-1)*nband_k(1))-two/nsppol)<tol6)  &
 300 &     nband_occ(isppol)=nband_occ(isppol)+1
 301    end do
 302    nband_unocc(isppol)=nband_k(isppol)-nband_occ(isppol)
 303 !  next line make no sense if spin flip is taken into account
 304    nexcit_pol(isppol)=nband_occ(isppol)*nband_unocc(isppol)
 305  end do
 306  nstate_k=nband_k(1)+(nsppol-1)*nband_k(nsppol)
 307  nstate_occ=nband_occ(1)+(nsppol-1)*nband_occ(nsppol)
 308  nstate_unocc=nstate_k-nstate_occ
 309 !next line to be changed if spin fli is taken into account
 310  nexcit=nexcit_pol(1)+(nsppol-1)*nexcit_pol(nsppol)
 311 
 312 !number of plane wave (does it work even for nsppol=2 ??)
 313  npw_k=npwarr(1)
 314 
 315 !mux number of excitations that is taken into account
 316  if(dtset%td_mexcit==0)then
 317    nexcit_max=nexcit
 318  else
 319    nexcit_max =dtset%td_mexcit
 320  end if
 321 
 322 !DEBUG
 323 !write(std_out,*) nband_occ(1),nband_unocc(1)
 324 !write(std_out,*) nband_occ(nsppol),nband_unocc(nsppol)
 325 !END DEBUG
 326 
 327 
 328  if(nsppol==1)then
 329    write(message, '(a,a,a,a,i4,a,i4,a,a,i4,a,a,a,i6,a)' )ch10,&
 330 &   ' *** TDDFT : computation of excited states *** ',ch10,&
 331 &   ' Splitting of',dtset%nband(1),' states in',nband_occ(1),' occupied states,',&
 332 &   ' and',nband_unocc(1),' unoccupied states,',ch10,&
 333 &   ' giving',nexcit,' excitations.'
 334    call wrtout(std_out,message,'COLL')
 335    call wrtout(ab_out,message,'COLL')
 336  else
 337    write(message, '(a,a,a,a,i4,a,i4,a,a,i4,a,a,a,i6,a,a,a)' )ch10,&
 338 &   ' *** TDDFT : computation of excited states *** ',ch10,&
 339 &   ' Splitting of',nstate_k,' states in',nstate_occ,' occupied states,',&
 340 &   ' and',nstate_unocc,' unoccupied states,',ch10,&
 341 &   ' giving',nexcit,' excitations. Note that spin flip is not possible actually.',ch10,&
 342 &   ' So the number of excitation is the half of the product of the number of state'
 343    call wrtout(std_out,message,'COLL')
 344    call wrtout(ab_out,message,'COLL')
 345  end if
 346 
 347 !Allocate the matrices to be diagonalized.
 348 !Use a simple storage mode, to be improved in the future.
 349  ii=max(nband_occ(1),nband_occ(nsppol))
 350  jj=max(nband_unocc(1),nband_unocc(nsppol))
 351  ABI_ALLOCATE(omega_tddft_casida,(ii,jj,nsppol,ii,jj,nsppol,2/nsppol))
 352  ABI_ALLOCATE(eexcit,(nexcit))
 353  ABI_ALLOCATE(sqrtks,(nexcit))
 354  ABI_ALLOCATE(flag_state_win,(nstate_k))
 355  omega_tddft_casida(:,:,:,:,:,:,:)=zero
 356 
 357 
 358 !Fill the diagonal elements with square of differences of KS eigenvalues
 359 !(also not very efficient, but OK for the present first coding)
 360 !Also compute the square root of Kohn-Sham eigenvalue differences
 361  do isppol=1,nsppol
 362    do iunocc=1,nband_unocc(isppol)
 363      eigunocc=eigen(iunocc+nband_occ(isppol)+(isppol-1)*nband_k(1))
 364      do iocc=1,nband_occ(isppol)
 365        iexcit=iocc+(isppol-1)*nexcit_pol(1)+nband_occ(isppol)*(iunocc-1)
 366        diffeig=eigunocc-eigen(iocc+(isppol-1)*nband_k(1))
 367        do sing_trip=1,2/nsppol
 368          omega_tddft_casida(iocc,iunocc,isppol,iocc,iunocc,isppol,sing_trip)=diffeig**2
 369        end do
 370        eexcit(iexcit)=diffeig
 371        sqrtks(iexcit)=sqrt(diffeig)
 372      end do
 373    end do
 374  end do
 375 
 376 
 377 
 378 !Sort the excitation energies : note that the array eexcit is reordered
 379  ABI_ALLOCATE(indarr,(nexcit))
 380  indarr(:)=(/ (ii,ii=1,nexcit) /)
 381  call sort_dp(nexcit,eexcit,indarr,tol14)
 382 
 383 !Determine an energy window for the excitations
 384 !to take into account. This is necessary for large systems
 385 
 386  nexcit_win = 0
 387  do iexcit = 1, nexcit
 388    if ((eexcit(iexcit) < emax_win ).and.(nexcit_win < nexcit_max)) then
 389      nexcit_win = nexcit_win + 1
 390 
 391 !    DEBUG
 392 !    write(message,'(a,F12.5,a,a,i2,a,a,i2)') 'excitation energy:', eexcit(indarr(iexcit)),ch10, &
 393 !    &                                        'excitation number:', indarr(iexcit),ch10,         &
 394 !    &                                        'nexcit_win:       ', nexcit_win
 395 !    call wrtout(std_out,message,'COLL')
 396 !    ENDDEBUG
 397 
 398    end if
 399  end do
 400 
 401 !identification of the bands contributing to the
 402 !nexcit_win  excitations within the window
 403 
 404 
 405  nstate_win = 0
 406  flag_state_win(:) = 0
 407  do iexcit = 1, nexcit_win
 408    iexcit1 = indarr(iexcit)
 409    isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
 410    iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
 411    iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
 412    if (flag_state_win(nband_occ(isppol1)+(isppol1-1)*nband_k(1)+iunocc1)==0) &
 413 &   flag_state_win(nband_occ(isppol1)+(isppol1-1)*nband_k(1)+iunocc1) =1
 414    if (flag_state_win(iocc1+(isppol1-1)*nband_k(1))==0) &
 415 &   flag_state_win(iocc1+(isppol1-1)*nband_k(1)) =1
 416 !  DEBUG
 417 !  write(message,'(a,i2,a,a,i2,i2,a,a,i2,a,a,i2)') 'isppol:', isppol1,ch10, &
 418 !  &                                       'iocc,iunocc:', iocc1,iunocc1,ch10,         &
 419 !  &                                       'flag_state_win:', flag_state_win(iocc1+(isppol1-1)*nband_k(1)),  &
 420 !  &                               ch10,   'flag_state_win:', flag_state_win(nband_occ(isppol1)+(isppol1-1)*nband_k(1)+iunocc1)
 421 !  call wrtout(std_out,message,'COLL')
 422 !  END DEBUG
 423 
 424  end do
 425 
 426  do isppol=1,nsppol
 427    do iband=1,nband_k(isppol)
 428      nstate_win=nstate_win+flag_state_win(iband+(isppol-1)*nband_k(1))
 429    end do
 430  end do
 431 
 432 
 433  write(message,'(a,a,i5)') ch10,'Nr of states to Fourier transform : ',nstate_win
 434  call wrtout(std_out,message,'COLL')
 435 
 436  ndiel1=ngfftdiel(1) ; ndiel2=ngfftdiel(2) ; ndiel3=ngfftdiel(3)
 437 !ndiel4,ndiel5,ndiel6 are FFT dimensions, modified to avoid cache trashing
 438  ndiel4=ngfftdiel(4) ; ndiel5=ngfftdiel(5) ; ndiel6=ngfftdiel(6)
 439 
 440 !The evaluation of integrals, later, needs the following factor
 441  normint=one/(ucvol*dble(ndiel1*ndiel2*ndiel3))
 442 
 443 !Setup the positions in real space for later integration
 444  call matr3inv(gprimd,rprimd)
 445 
 446  ABI_ALLOCATE(pos,(max(ndiel1,ndiel2,ndiel3),3))
 447 
 448 !Select the reduced position of the point with respect to the box center,
 449 !in the interval ]-0.5,0.5].
 450  buffer=0.05_dp ; buffer_inv=one/buffer
 451  do idir=1,3
 452    if(idir==1)ndiel=ndiel1
 453    if(idir==2)ndiel=ndiel2
 454    if(idir==3)ndiel=ndiel3
 455    do ii=1,ndiel
 456 !    dtset%boxcenter(3)=reduced coordinates of the center of the box,
 457 !    in view of the computation of the oscillator strength
 458      pos(ii,idir)=(ii-1)/(one*ndiel)-dtset%boxcenter(idir)
 459      pos(ii,idir)=pos(ii,idir)-nint(pos(ii,idir)-tol12)
 460 !    The linear behaviour is cut-off when one becomes
 461 !    close to the boundaries : the buffer allows to match smoothly
 462 !    one side of the cell to the other. This is important
 463 !    to get rid of small breakings of symmetry, that are
 464 !    confusing in accurate tests
 465      if(abs(pos(ii,idir))>half-buffer)then
 466 !      xx is always positive, and goes linearly from 1 to 0
 467 !      in the buffer region
 468        xx=(half-abs(pos(ii,idir)))*buffer_inv
 469 !      The cut-off is applied to pos(:,:)
 470        pos(ii,idir)=pos(ii,idir)*xx*(two-xx)
 471 !      DEBUG
 472 !      if (idir==1)then
 473 !      write(std_out,'(i2)') ndiel
 474 !      write(std_out,'(a,i2,a,F12.5,F12.5)')'idiel : ',ii,'   x : ',pos(ii,idir),&
 475 !      &    dtset%boxcenter(idir)
 476 !      endif
 477 !      ENDDEBUG
 478      end if
 479    end do ! ii
 480  end do ! idir
 481 
 482 !need to run in MPI I/O case
 483  if (wffnew%iomode == IO_MODE_MPI ) then
 484    save_iomode=wffnew%iomode
 485    wffnew%iomode = IO_MODE_FORTRAN
 486  else
 487 !  Do not store value but set to have save_iomode /= 1
 488    save_iomode = IO_MODE_FORTRAN
 489  end if
 490 
 491 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 492 !2009 Chunping Hu
 493 !need to collect wavefunctions from processors to master
 494  me=me_loc
 495 
 496  tim_rwwf =0
 497  source = master
 498  sread = master
 499  tmaster=(master==me)
 500  swrite=tmaster
 501  sender=-1
 502 
 503  iomode=wffnew%iomode
 504 
 505  if(am_master)then
 506 #if defined HAVE_MPI
 507    ABI_ALLOCATE(cg_tmp,(2,mpw*nspinor*mband*nsppol))
 508 #endif
 509  end if
 510 
 511  ABI_ALLOCATE(kg_disk,(3,mpw))
 512  mcg_disk=mpw*nspinor*mband
 513  formeig=0
 514 
 515 #if defined HAVE_MPI
 516  call xmpi_barrier(spaceComm)
 517  ABI_ALLOCATE(cg_disk,(2,mcg_disk))
 518 #endif
 519 
 520  icg=0
 521  if(mpi_enreg%paralbd==0) tim_rwwf=6
 522  if(mpi_enreg%paralbd==1)tim_rwwf=12
 523 
 524  do isppol=1,nsppol
 525    ikg=0
 526    do ikpt=1,nkpt
 527      nband_k_=dtset%nband(ikpt+(isppol-1)*nkpt)
 528      npw_k=npwarr(ikpt)
 529 #if defined HAVE_MPI
 530      if (dtset%usewvl == 0) then
 531        call xmpi_barrier(spaceComm)
 532 !      Must transfer the wavefunctions to the master processor
 533 !      Separate sections for paralbd=1 or other values ; might be merged
 534        if(mpi_enreg%paralbd==0) then
 535          nmaster=0
 536          source=minval(mpi_enreg%proc_distrb(ikpt,1:nband_k_,isppol))
 537          mydata=.false.
 538          if(source==me)mydata=.true.
 539          action=0
 540 !        I am the master node, and I have the data in cg or cg_disk
 541          if((tmaster).and.(mydata))action=1
 542 !        I am not the master, and I have the data => send to master
 543          if((.not.tmaster).and.(mydata))action=2
 544 !        I am the master, and I receive the data
 545          if((tmaster).and.(.not.mydata))action=3
 546 !        I have the data in cg or cg_disk ( MPI_IO case)
 547          if (iomode==IO_MODE_MPI) then
 548            action = 0
 549            sender=-1
 550            swrite=.false.
 551            if (mydata)then
 552              action=1
 553              swrite=.true.
 554              sender=me
 555            end if
 556          end if
 557 !        I am the master node, and I have the data in cg or cg_disk
 558 !        I have the data in cg or cg_disk ( MPI_IO case)
 559          if(action==1)then
 560 !          Copy from kg to kg_disk
 561            kg_disk(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg)
 562 !          Copy from cg to cg_disk
 563            do ipwnbd=1,nband_k_*npw_k*nspinor
 564              cg_disk(1,ipwnbd)=cg(1,ipwnbd+icg)
 565              cg_disk(2,ipwnbd)=cg(2,ipwnbd+icg)
 566            end do
 567          end if
 568 !        I am not the master, and I have the data => send to master
 569 !        I am the master, and I receive the data
 570          if ( action==2.or.action==3) then
 571            call timab(48,1,tsec)
 572            if(action==2)then
 573              call xmpi_exch(kg(:,1+ikg:npw_k+ikg),3*npw_k,source,kg_disk,nmaster,spaceComm,ierr)
 574              call xmpi_exch(cg(:,icg+1:icg+nband_k_*npw_k*nspinor),2*nband_k_*npw_k*nspinor &
 575 &             ,source,cg_disk,nmaster,spaceComm,ierr)
 576            else
 577              call xmpi_exch(kg_disk,3*npw_k,source,kg_disk,nmaster,spaceComm,ierr)
 578              call xmpi_exch(cg_disk,2*nband_k_*npw_k*nspinor,source,cg_disk,nmaster,spaceComm,ierr)
 579            end if
 580            call timab(48,2,tsec)
 581          end if
 582        else if(mpi_enreg%paralbd==1)then
 583          nmaster=0
 584 #if defined HAVE_MPI_IO
 585          sender=-1
 586          if( iomode ==IO_MODE_MPI ) then
 587            nmaster=mpi_enreg%proc_distrb(ikpt,1,isppol)
 588            sender=nmaster
 589          end if
 590 #endif
 591 !        Note the loop over bands
 592          do iband=1,nband_k_
 593 !          The message passing related to kg is counted as one band
 594            action=0
 595 !          I am the master node, and I have the data in cg or cg_disk
 596            if( mpi_enreg%proc_distrb(ikpt,iband,isppol)==nmaster .and. me==nmaster) then
 597              action=1
 598 !            I am not the master, and I have the data => send to master
 599            elseif( mpi_enreg%proc_distrb(ikpt,iband,isppol)==me .and. me/=nmaster ) then
 600              action = 2
 601 !            I am the master, and I receive the data
 602            elseif( mpi_enreg%proc_distrb(ikpt,iband,isppol)/=me .and. me==nmaster ) then
 603              action=3
 604            end if
 605            if(action==1) then
 606 !            I am the master node, and I have the data in cg or cg_disk
 607 !            Copy from kg to kg_disk
 608              if(iband==1)kg_disk(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg)
 609 !            Copy from cg to cg_disk
 610              do ipwnbd=1,npw_k*nspinor
 611                cg_disk(1,(iband-1)*npw_k*nspinor+ipwnbd)= cg(1,(iband-1)*npw_k*nspinor+ipwnbd+icg)
 612                cg_disk(2,(iband-1)*npw_k*nspinor+ipwnbd)= cg(2,(iband-1)*npw_k*nspinor+ipwnbd+icg)
 613              end do
 614            end if  ! action=1
 615            if ( action==2.or.action==3) then
 616 !            action=2 :  I am not the master, and I have the data => send to master
 617 !            action=3 :  I am the master, and I receive the data
 618              call timab(48,1,tsec)
 619              if ( iband == 1 ) then
 620                if (action==2) then
 621                  call xmpi_exch(kg(:,1+ikg:npw_k+ikg),3*npw_k,mpi_enreg%proc_distrb(ikpt,iband,isppol) &
 622 &                 ,kg_disk,nmaster,spaceComm,ierr)
 623                else
 624                  call xmpi_exch(kg_disk,3*npw_k,mpi_enreg%proc_distrb(ikpt,iband,isppol)  &
 625 &                 ,kg_disk,nmaster,spaceComm,ierr)
 626                end if
 627              end if       ! iband =1
 628              ipwnbd=(iband-1)*npw_k*nspinor
 629              if (action==2)then
 630                call xmpi_exch( cg(:,ipwnbd+icg+1:ipwnbd+icg+npw_k*nspinor),2*npw_k*nspinor &
 631 &               ,mpi_enreg%proc_distrb(ikpt,iband,isppol)                    &
 632 &               ,cg_disk(:,ipwnbd+1:ipwnbd+npw_k*nspinor),nmaster,spaceComm,ierr)
 633              else
 634                call xmpi_exch( cg_disk(:,ipwnbd+1:ipwnbd+npw_k*nspinor),2*npw_k*nspinor    &
 635 &               ,mpi_enreg%proc_distrb(ikpt,iband,isppol)                    &
 636 &               ,cg_disk(:,ipwnbd+1:ipwnbd+npw_k*nspinor),nmaster,spaceComm,ierr)
 637              end if
 638              call timab(48,2,tsec)
 639            end if        ! action=2 or action=3
 640            if(iomode ==IO_MODE_MPI) then
 641 !            I have the data in cg or cg_disk
 642              swrite=.false.
 643              if (nmaster == me) then
 644                swrite=.true.
 645              end if
 646            end if
 647 !          End of loop over bands
 648          end do
 649 !        End of paralbd=1
 650        end if
 651      end if
 652 #endif
 653 
 654 !    The wavefunctions for the present k point and spin are stored into cg_tmp
 655      if(am_master)then
 656 #if defined HAVE_MPI
 657        cg_tmp(:,icg+1:icg+nband_k_*npw_k*nspinor)=cg_disk(:,:)
 658 #endif
 659      end if
 660 
 661      sskip=1
 662 #if defined HAVE_MPI
 663      if (dtset%usewvl == 0) then
 664        sskip=0
 665        if(.not.(proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,1,nband_k_,isppol,me)))sskip=1
 666      end if
 667 #endif
 668      if(sskip==1)then
 669        icg=icg+npw_k*nspinor*nband_k_
 670        ikg=ikg+npw_k
 671      end if
 672 
 673    end do ! ikpt
 674  end do ! isppol
 675  ABI_DEALLOCATE(kg_disk)
 676 #if defined HAVE_MPI
 677  ABI_DEALLOCATE(cg_disk)
 678 #endif
 679 !!!!!!!end of collecting wavefunction to master!!!!!!
 680 
 681 
 682  if(am_master)then
 683 !  -----------------------------------------------------------
 684 !  The disk access is only done by master...
 685 
 686    ABI_ALLOCATE(gbound,(2*mgfftdiel+8,2))
 687    ABI_ALLOCATE(kg_k,(3,npw_k))
 688 
 689    ikpt=1
 690 !  Only one k point
 691 !  do isppol=1,nsppol
 692    kg_k(:,1:npw_k)=kg(:,1:npw_k)
 693    call sphereboundary(gbound,istwf_k,kg_k,mgfftdiel,npw_k)
 694 !  enddo
 695 
 696  end if ! am_master
 697 
 698 !need to run in MPI I/O case
 699  if ( save_iomode == 1 )  wffnew%iomode = IO_MODE_MPI
 700 !call wrtout(std_out,'After reading the wavefunction','COLL')
 701 
 702 !Use a simple implementation for the computation of the kernel elements
 703  if (am_master) then
 704    ABI_ALLOCATE(cwavef,(2,mpw))
 705    ABI_ALLOCATE(rhoaug,(ndiel4,ndiel5,ndiel6))
 706    ABI_ALLOCATE(wfraug,(2,ndiel4,ndiel5,ndiel6))
 707  end if
 708  ABI_ALLOCATE(index_state,(nstate_k))
 709 
 710 ! all real-space states are kept in memory
 711  ABI_ALLOCATE(wfrspa,(ndiel4,ndiel5,ndiel6,nstate_win))
 712 
 713 !DEBUG
 714 !write(message,'(a)') 'After allocating wfrspa'
 715 !call wrtout(std_out,message,'COLL')
 716 !ENDDEBUG
 717 
 718  weight=zero
 719 
 720 !Generate states in real space, only for states contributing to excitations in window
 721  istate=0
 722 
 723  do isppol=1,nsppol
 724    do iband=1,nband_k(isppol)
 725 
 726      if(flag_state_win(iband+(isppol-1)*nband_k(1)) == 1) then
 727        istate=istate+1
 728        index_state(iband+(isppol-1)*nband_k(1))=istate
 729 
 730        if (am_master) then
 731 #if defined HAVE_MPI
 732 !        Obtain Fourier transform in fft box
 733          cwavef(:,1:npw_k)=cg_tmp(:,1+(iband-1)*npw_k+(isppol-1)* &
 734 &         (npw_k*nband_k(1)) : iband*npw_k+(isppol-1)*(npw_k*nband_k(1)))
 735 #else
 736          cwavef(:,1:npw_k)=cg(:,1+(iband-1)*npw_k+(isppol-1)* (npw_k*nband_k(1)) : iband*npw_k+(isppol-1)*(npw_k*nband_k(1)))
 737 #endif
 738 
 739 !        DEBUG
 740 !        write(std_out,*)' iband : ',iband, ' isppol', isppol, '  -> index ', &
 741 !        &            istate,index_state(iband+(isppol-1)*nband_k(1))
 742 !        ENDDEBUG
 743 
 744          tim_fourwf=14
 745 !        This call should be made by master, and then the results be sent to the other procs
 746 
 747          call fourwf(1,rhoaug,cwavef,dummy,wfraug,gbound,gbound,&
 748 &         istwf_k,kg_k,kg_k,mgfftdiel,mpi_enreg,1,ngfftdiel,npw_k,1,ndiel4,ndiel5,ndiel6,&
 749 &         0,dtset%paral_kgb,tim_fourwf,weight,weight,use_gpu_cuda=dtset%use_gpu_cuda)
 750 
 751 !        DEBUG
 752 !        write(std_out,'(a,i5)')' After Fourier proc ',me_loc
 753 !        ENDDEBUG
 754 
 755 !        Fix the phase, and checks that the wavefunction is real
 756 !        (should be merged with routine fxphas)
 757          saa=zero ; sab=zero ; sbb=zero
 758          do i3=1,ndiel3
 759            do i2=1,ndiel2
 760              do i1=1,ndiel1
 761                saa=saa+wfraug(1,i1,i2,i3)**2
 762                sbb=sbb+wfraug(2,i1,i2,i3)**2
 763                sab=sab+wfraug(1,i1,i2,i3)*wfraug(2,i1,i2,i3)
 764              end do
 765            end do
 766          end do
 767 
 768          if(sbb>5.0d-9)then
 769 
 770            write(message, '(a,a,a,es20.10,a,i4,a,i2,a)' )&
 771 &           'The imaginary part of wavefunctions should be practically zero.',ch10,&
 772 &           'This is not the case, since sbb=',sbb,' for iband=',iband,'with sppol=',1,'.'
 773            MSG_WARNING(message)
 774            if(sbb>1.0d-7)then
 775              MSG_ERROR("sbb>1.0d-7")
 776            end if
 777          end if
 778 
 779 !        Possibility of writing to disk
 780 
 781          wfrspa(:,:,:,istate)=wfraug(1,:,:,:)
 782        end if !am_master
 783 
 784      end if
 785 
 786 !    End loop on iband
 787    end do
 788 !  End loop on nsppol
 789  end do
 790 
 791  if (am_master) then
 792    ABI_DEALLOCATE(gbound)
 793    ABI_DEALLOCATE(kg_k)
 794    ABI_DEALLOCATE(cwavef)
 795    ABI_DEALLOCATE(rhoaug)
 796    ABI_DEALLOCATE(wfraug)
 797 #if defined HAVE_MPI
 798    ABI_DEALLOCATE(cg_tmp)
 799 #else
 800 #endif
 801  end if
 802  ABI_DEALLOCATE(flag_state_win)
 803 
 804 ! send wfrspa from master to world
 805  call xmpi_bcast(wfrspa,master,spaceComm,ierr)
 806  !call MPI_BCAST(wfrspa,nbuf,MPI_DOUBLE_PRECISION,master,spaceComm,ierr)
 807 
 808 
 809 !DEBUG
 810 !#if defined HAVE_MPI
 811 !call xmpi_barrier(spaceComm)
 812 !write(message,'(a)')' after iband loop synchronization done...'
 813 !call  wrtout(std_out,message,'COLL')
 814 !#endif
 815 !ENDDEBUG
 816 
 817 !Compute the xc kernel, in the form needed for the singlet or triplet
 818 !excitation energy.
 819 !In the case ixc=20, kxc vanishes, but no change is made here, for simplicity.
 820 !(ixc=20 implemented only in the not spin polyrized case)
 821 
 822 !DEBUG
 823 !write(std_out,*)' tddft : xc kernel '
 824 !do ifft=1,nkxc,41
 825 !write(std_out,*)ifft,kxc(ifft,1),kxc(ifft,2),kxc(ifft,3)
 826 !enddo
 827 !stop
 828 !ENDDEBUG
 829 
 830  ABI_ALLOCATE(kxc_for_tddft,(ndiel1,ndiel2,ndiel3,nsppol,nsppol,2/nsppol))
 831  if(dtset%ixc/=22)then
 832    do isppol=1,nsppol
 833      do jsppol=1,nsppol
 834        index=1
 835        do i3=1,ndiel3
 836          do i2=1,ndiel2
 837            do i1=1,ndiel1
 838              do sing_trip=1,2/nsppol
 839                kxc_for_tddft(i1,i2,i3,isppol,jsppol,sing_trip)=two/nsppol* &
 840 &               (kxc(index,isppol+jsppol-1)-(sing_trip-1)*kxc(index,2))
 841              end do
 842              index=index+1
 843            end do
 844          end do
 845        end do
 846      end do
 847    end do
 848  else
 849 !  This is for the Burke-Petersilka-Gross hybrid, with ixc=22
 850 !  However, the implementation in case of spin-polarized system should not be expected to be the correct one !
 851    do isppol=1,nsppol
 852      do jsppol=1,nsppol
 853        index=1
 854        do i3=1,ndiel3
 855          do i2=1,ndiel2
 856            do i1=1,ndiel1
 857              do sing_trip=1,2/nsppol
 858                kxc_for_tddft(i1,i2,i3,isppol,jsppol,sing_trip)=((-1)**(sing_trip+1))*kxc(index,2)
 859              end do
 860              index=index+1
 861            end do
 862          end do
 863        end do
 864      end do
 865    end do
 866  end if
 867 
 868  pole_approx=0
 869 
 870  ABI_ALLOCATE(excit_coords,(nexcit_win**2,2))
 871 
 872 #if defined HAVE_MPI
 873  ABI_ALLOCATE(counts,(0:nproc_loc-1))
 874  ABI_ALLOCATE(displs,(0:nproc_loc-1))
 875  ABI_ALLOCATE(recvcounts,(0:nproc_loc-1))
 876  ABI_ALLOCATE(recvbuf,(5-nsppol,nproc_loc-1))
 877 #endif
 878 
 879 !DEBUG
 880 !write(std_out,*)'before first loop'
 881 !ENDDEBUG
 882 
 883 !0000000000000000000000000000000000000000000000000000000
 884 !check if matrix file fname_tdexcit exists on disk
 885 !if the file is present,calculation is a continuation
 886  if (am_master) then
 887    inquire(file=trim(dtfil%fnametmp_tdexcit),exist=file_exist)
 888 !  for direct access to excitation file
 889    if(nsppol==1)then
 890      inquire(iolength=recl) omega_tddft_casida(1,1,1,1,1,1,1),omega_tddft_casida(1,1,1,1,1,1,1), iexcit,jexcit
 891    else
 892      inquire(iolength=recl) omega_tddft_casida(1,1,1,1,1,1,1), iexcit,jexcit
 893    end if
 894 
 895    temp_unit2 = get_unit()
 896    open(temp_unit2, file=trim(dtfil%fnametmp_tdexcit),form='unformatted', recl=recl, access='DIRECT')
 897 
 898    ABI_ALLOCATE(done_excit,(nexcit_win,nexcit_win))
 899 
 900    if(file_exist)then
 901      write(std_out,*)'TDDFT continues from a previous run'
 902      rec=0
 903      do iexcit=1,nexcit_win
 904        iexcit2 = indarr(iexcit)
 905        isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
 906        iunocc2 = (iexcit2-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
 907        iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
 908        do jexcit=1,nexcit_win
 909          iexcit1 = indarr(jexcit)
 910          isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
 911          iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
 912          iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
 913 
 914          rec=rec+1 ! record of the entry in the excitation file
 915          if(nsppol==1)then
 916            read(temp_unit2,rec=rec) omega_tddft_casida_dummy(1), omega_tddft_casida_dummy(2), idummy, jdummy
 917          else
 918            read(temp_unit2,rec=rec) omega_tddft_casida_dummy(1), idummy, jdummy
 919          end if
 920          done_excit(jexcit,iexcit)= ( idummy /= -1 .and. jdummy /= -1 ) ! if true, eigsqr_singlet and eigsqr_triplet are ok
 921 !        and a true is marked in the logical array done_excit
 922          if (done_excit(jexcit,iexcit)) then
 923            do sing_trip=1,2/nsppol
 924              omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)= &
 925 &             omega_tddft_casida_dummy(sing_trip)
 926            end do
 927          end if
 928        end do
 929      end do
 930 
 931    else
 932      write(std_out,*)'no excitation matrix on disk'
 933      write(std_out,*)'TDDFT starts from scratch'
 934      done_excit = .false. ! initialize the logical array to false
 935      rec=0
 936      do iexcit=1,nexcit_win
 937        do jexcit=1,nexcit_win
 938          rec=rec+1
 939          if(nsppol==1)then
 940            write(temp_unit2,rec=rec) zero,zero,-1,-1
 941          else
 942            write(temp_unit2,rec=rec) zero,-1,-1
 943          end if
 944        end do
 945      end do
 946    end if
 947 
 948 !  Need to list the elements to compute, taking the symmetry into account: valid only for Gamma point but this is already the case
 949    count_to_do=0
 950    do iexcit=1,nexcit_win
 951      do jexcit=1,iexcit
 952        if (.not. done_excit(jexcit,iexcit)) then
 953          count_to_do=count_to_do+1
 954          excit_coords(count_to_do,1)=iexcit
 955          excit_coords(count_to_do,2)=jexcit
 956        end if
 957      end do
 958    end do
 959 
 960    ABI_DEALLOCATE(done_excit)
 961 
 962 
 963 
 964 #if defined HAVE_MPI
 965 !  Compute limits for load balancing
 966    do iproc=0,nproc_loc-1
 967      displs(iproc)=(iproc*count_to_do)/nproc_loc
 968      counts(iproc)=min(((iproc+1)*count_to_do)/nproc_loc,count_to_do)-displs(iproc)
 969    end do
 970 #endif
 971 
 972  end if ! am_master
 973 
 974 
 975 #if defined HAVE_MPI
 976  call MPI_BCAST(count_to_do,1,MPI_INTEGER,master,spaceComm,ierr)
 977 #endif
 978 
 979  displ=(me_loc*count_to_do)/nproc_loc
 980  count=min(((me_loc+1)*count_to_do)/nproc_loc,count_to_do)-displ
 981  displmax=((nproc_loc-1)*count_to_do)/nproc_loc
 982  countmax=count_to_do-displmax
 983 
 984  write(message,'(A,I6)') 'Maximum number of matrix elements per processor = ',countmax
 985  call wrtout(std_out,message,'COLL')
 986 
 987 
 988 #if defined HAVE_MPI
 989 !Need to dispatch the elements to compute to the different processes
 990  ABI_ALLOCATE(tmpbuf,(nexcit_win**2))
 991  tmpbuf=0
 992  call MPI_Scatterv(excit_coords(1,1),counts,displs,MPI_INTEGER,tmpbuf,count,MPI_INTEGER,0,spaceComm,ierr)
 993  excit_coords(:,1)=tmpbuf(:)
 994  tmpbuf=0
 995  call MPI_Scatterv(excit_coords(1,2),counts,displs,MPI_INTEGER,tmpbuf,count,MPI_INTEGER,0,spaceComm,ierr)
 996  excit_coords(:,2)=tmpbuf(:)
 997  ABI_DEALLOCATE(tmpbuf)
 998 #endif
 999 
1000  nfftdiel=ndiel1*ndiel2*ndiel3
1001  ABI_ALLOCATE(wfprod,(ndiel1,ndiel2,ndiel3))
1002  ABI_ALLOCATE(work,(nfftdiel))
1003  ABI_ALLOCATE(sexc,(3,nexcit_win))
1004  ABI_ALLOCATE(done_sexc,(nexcit_win))
1005  ABI_ALLOCATE(done_sexc2,(nexcit_win))
1006  ABI_ALLOCATE(rhog,(2,nfftdiel))
1007  ABI_ALLOCATE(vhartr,(nfftdiel))
1008 
1009  sexc(:,:)=zero
1010  done_sexc(:)=.false.
1011 
1012 !----------------------------------------------------------
1013 !Main double loop
1014 
1015  old_iexcit=0
1016  do ijexcit=1,countmax
1017 !  we really loop only through count, but we need to go through countmax
1018 !  to make sure that all processes execute MPI_Gatherv below
1019    if (ijexcit <= count) then
1020      iexcit=excit_coords(ijexcit,1)
1021      jexcit=excit_coords(ijexcit,2)
1022 
1023      iexcit2 = indarr(iexcit)
1024      isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
1025      iunocc2 = (iexcit2-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
1026      iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
1027 
1028      iexcit1 = indarr(jexcit)
1029      isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
1030      iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
1031      iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
1032 
1033      if (old_iexcit /= iexcit) then
1034 !      We start a new column of the matrix
1035 !      DEBUG
1036 !      write(message,'(a,i5,a,i3)')'treating  iexcit =  ',iexcit,&
1037 !      &'  with proc ',me_loc
1038 !      call wrtout(std_out,message,'PERS')
1039 !      ENDDEBUG
1040 
1041 !      DEBUG
1042 !      write(message,'(a,i3)')'Multiplicating phi1 phi2, on proc ',me_loc
1043 !      call wrtout(std_out,message,'PERS')
1044 !      ENDDEBUG
1045 
1046        ifft=1
1047        do i3=1,ndiel3
1048          do i2=1,ndiel2
1049            do i1=1,ndiel1
1050              wfprod(i1,i2,i3)=wfrspa(i1,i2,i3,index_state(iocc2+(isppol2-1)*nband_k(1))) &
1051 &             *wfrspa(i1,i2,i3,index_state(iunocc2+nband_occ(isppol2)+(isppol2-1)*nband_k(1)))
1052              work(ifft)=wfprod(i1,i2,i3)
1053              ifft=ifft+1
1054            end do
1055          end do
1056        end do
1057        if (jexcit == 1) then
1058          do i3=1,ndiel3
1059            do i2=1,ndiel2
1060              do i1=1,ndiel1
1061                do idir=1,3
1062                  poscart(idir)=rprimd(idir,1)*pos(i1,1)+&
1063 &                 rprimd(idir,2)*pos(i2,2)+&
1064 &                 rprimd(idir,3)*pos(i3,3)
1065                  sexc(idir,iexcit)=sexc(idir,iexcit)+poscart(idir)*wfprod(i1,i2,i3)
1066                end do
1067              end do
1068            end do
1069            done_sexc(iexcit)=.true.
1070          end do
1071        end if
1072 
1073 !      For the singlet correction, must compute the hartre potential created
1074 !      by the product of wavefunctions
1075        cplex=1
1076 
1077 !      DEBUG
1078 !      write(message,'(a,i3)')'Before Fourdp, on proc ',me_loc
1079 !      call wrtout(std_out,message,'PERS')
1080 !      ENDDEBUG
1081 
1082        call fourdp(cplex,rhog,work,-1,mpi_enreg,nfftdiel,ngfftdiel,dtset%paral_kgb,0)
1083 
1084 !      DEBUG
1085 !      write(message,'(a,i3)')'Before Hartree, on proc ',me_loc
1086 !      call wrtout(std_out,message,'PERS')
1087 !      write(std_out,*)'CPU ',me_loc,ch10,&
1088 !      &            '   cplex : ',cplex,ch10,&
1089 !      &            '   gmet(3,3)  : ',gmet(3,3),ch10,&
1090 !      &            '   gsqcut : ',gsqcut,ch10,&
1091 !      &            '   rhog(1,1) :,',rhog(1,1),ch10,&
1092 !      &            '   vhartr(1) :,',vhartr(1)
1093 !      ENDDEBUG
1094 
1095        call hartre(cplex,gsqcut,0,mpi_enreg,nfftdiel,ngfftdiel,dtset%paral_kgb,rhog,rprimd,vhartr)
1096 
1097 !      DEBUG
1098 !      write(message,'(a,i3)')'After Hartree, on proc ',me_loc
1099 !      call wrtout(std_out,message,'PERS')
1100 !      ENDDEBUG
1101      end if
1102      old_iexcit=iexcit
1103 
1104 !    DEBUG
1105 !    write(std_out,*)'  treating  iexcit =  ',jexcit
1106 !    write(std_out,*)'   indarr(iexcit) =',iexcit1,iocc1,iunocc1
1107 !    write(std_out,*)'   index ',index_state(iocc1+(isppol1-1)*nband_k(1)), &
1108 !    &                       index_state(iunocc1+nband_occ(isppol1)+(isppol1-1)*nband_k(1))
1109 !    ENDDEBUG
1110 
1111      if(pole_approx==0 .or. (iunocc1==iunocc2 .and. iocc1==iocc2 .and. isppol1==isppol2))then
1112        sum_kernel(:)=zero
1113        f_sing_trip(1)=two/dble(nsppol)
1114        if(nsppol==1) f_sing_trip(2)=zero
1115 !      For Fermi-Amaldi kxc, the xc contribution is -1/2 the Hartree contribution
1116 !      to the triplet state. The following factors combines both contributions.
1117        if(dtset%ixc==20 .or. dtset%ixc==22)then
1118          if(nsppol==1)then
1119            f_sing_trip(1)= one
1120            f_sing_trip(2)=-one
1121          end if
1122        end if
1123        ifft=1
1124        do i3=1,ndiel3
1125          do i2=1,ndiel2
1126            do i1=1,ndiel1
1127              myproduct=wfrspa(i1,i2,i3,index_state(iocc1+(isppol1-1)*nband_k(1))) &
1128 &             *wfrspa(i1,i2,i3,index_state(iunocc1+nband_occ(isppol1)+(isppol1-1)*nband_k(1)))
1129              do sing_trip=1,2/nsppol
1130                sum_kernel(sing_trip)=sum_kernel(sing_trip)+&
1131 &               myproduct*(f_sing_trip(sing_trip)*vhartr(ifft)+kxc_for_tddft(i1,i2,i3,isppol1,isppol2,sing_trip) &
1132                *wfprod(i1,i2,i3))
1133              end do
1134              ifft=ifft+1
1135            end do ! i1
1136          end do ! i2
1137        end do ! i3
1138 
1139 !      The factor two is coherent with the formulas of Vasiliev et al
1140        factor=two*sqrtks(iexcit1)*sqrtks(iexcit2)*normint
1141        do sing_trip=1,2/nsppol
1142          omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)=   &
1143 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)+  &
1144 &         factor*sum_kernel(sing_trip)
1145        end do
1146 
1147 !      End condition of being diagonal element if pole approximation
1148      end if
1149 
1150 !    Continue writing excitation matrix
1151      if (am_master) then
1152 !      the master writes its results to disk
1153        if(nsppol==1)then
1154          write(temp_unit2, rec=(iexcit-1)*nexcit_win+jexcit ) &
1155 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1),&
1156 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2),&
1157 &         iexcit, jexcit
1158        else
1159          write(temp_unit2, rec=(iexcit-1)*nexcit_win+jexcit ) &
1160 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1161 &         iexcit, jexcit
1162        end if
1163 
1164 !      DEBUG
1165 !      if(nsppol==1)then
1166 !      write(std_out,*)'singlet: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1),&
1167 !      &             'iexcit: ',iexcit,'jexcit :',jexcit
1168 !      write(std_out,*)'triplet: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2),&
1169 !      &             'iexcit: ',iexcit,'jexcit :',jexcit
1170 !      else
1171 !      write(std_out,*)'excitation: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1),&
1172 !      &             'iexcit: ',iexcit,'jexcit :',jexcit
1173 !      endif
1174 !      ENDDEBUG
1175 
1176        sendcount=0
1177      else
1178        sendcount=5-nsppol
1179 
1180        if(nsppol==1)then
1181          sendbuf=(/ omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1182 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2), &
1183 &         real(iexcit,dp), real(jexcit,dp) /)
1184        else
1185          sendbuf=(/ omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1186 &         real(iexcit,dp), real(jexcit,dp) /)
1187        end if
1188      end if ! am_master
1189    else
1190 !    ijexcit > count
1191 
1192 !    done with local work, so send message of zero length
1193      sendcount=0
1194 
1195    end if ! ijexcit <= count
1196 
1197 #if defined HAVE_MPI
1198    if (am_master) then
1199 
1200 !    Compute displacements and counts for the gathering of the results
1201      displs(0)=0
1202      recvcounts(0)=0
1203      do iproc=1,nproc_loc-1
1204        recvcounts(iproc)=min(((iproc+1)*count_to_do)/nproc_loc,count_to_do)-(iproc*count_to_do)/nproc_loc
1205        if (recvcounts(iproc) < countmax .and. ijexcit==countmax) then
1206          recvcounts(iproc)=0
1207        else
1208          recvcounts(iproc)=5-nsppol
1209        end if
1210        displs(iproc)=displs(iproc-1)+recvcounts(iproc-1)
1211      end do
1212    end if
1213 
1214 !  ***********************************************
1215 !  ***** I have to ask about that ****************
1216 !  ***********************************************
1217    call MPI_Gatherv(sendbuf,sendcount,MPI_DOUBLE_PRECISION,recvbuf,recvcounts,displs, &
1218 &   MPI_DOUBLE_PRECISION,0,spaceComm,ierr)
1219 
1220    if (am_master) then
1221 
1222 !    Extract eigsqr_singlet, eigsqr_triplet, iexcit, jexcit from receive buffer and
1223 !    write to file
1224      do ijexcit2=1,sum(recvcounts)/(5-nsppol)
1225        iexcit=int(recvbuf(4-nsppol,ijexcit2))
1226        jexcit=int(recvbuf(5-nsppol,ijexcit2))
1227 
1228        iexcit2 = indarr(iexcit)
1229        isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
1230        iunocc2 = (iexcit2-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
1231        iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
1232 
1233        iexcit1 = indarr(jexcit)
1234        isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
1235        iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
1236        iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
1237 
1238        do sing_trip=1,2/nsppol
1239          omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)= &
1240 &         recvbuf(sing_trip,ijexcit2)
1241        end do
1242 
1243        if(nsppol==1)then
1244          write(temp_unit2, rec=(iexcit-1)*nexcit_win+jexcit ) &
1245 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1246 &         omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2), &
1247 &         iexcit, jexcit
1248        else
1249          write(temp_unit2, rec=(iexcit-1)*nexcit_win+jexcit ) &
1250          omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1251 &         iexcit, jexcit
1252        end if
1253 !      DEBUG
1254 !      if(nsppol==1)then
1255 !      write(std_out,*)'singlet: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1256 !      &                       'iexcit: ',iexcit,'jexcit :',jexcit
1257 !      write(std_out,*)'triplet: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,2),
1258 !      &                       'iexcit: ',iexcit,'jexcit :',jexcit
1259 !      else
1260 !      write(std_out,*)'excitation: ',omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,1), &
1261 !      &                        'iexcit: ',iexcit,'jexcit :',jexcit
1262 !      endif
1263 !      ENDDEBUG
1264 
1265      end do
1266 
1267    end if
1268 #endif
1269 
1270 !  End indices loops
1271  end do ! ijexcit
1272 
1273 !End of the main double loop
1274 !--------------------------------------------------------------------
1275 
1276 
1277 #if defined HAVE_MPI
1278 !sexc needs to be summed here since it used only by master
1279  call xmpi_barrier(spaceComm)
1280 !call xmpi_sum_master(sexc,master,spaceComm,ierr) ! Does not work on some machines
1281  call xmpi_sum(sexc,spaceComm,ierr)
1282  done_sexc2=done_sexc
1283  call MPI_Reduce(done_sexc2,done_sexc,nexcit_win,MPI_LOGICAL,MPI_LOR,master,spaceComm,ierr)
1284 #endif
1285 
1286 
1287  if (am_master) then
1288 !  We compute sexc again if it was not done. Will only be executed if
1289 !  there was a restart from values read from logical unit temp_unit2.
1290 
1291    do iexcit=1,nexcit_win
1292 
1293 !    DEBUG
1294 !    write(std_out,*)'do on excitation',iexcit
1295 !    END DEBUG
1296 
1297      if (.not.done_sexc(iexcit)) then
1298        iexcit2 = indarr(iexcit)
1299        isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
1300        iunocc2 = (iexcit1-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
1301        iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
1302        do i3=1,ndiel3
1303          do i2=1,ndiel2
1304            do i1=1,ndiel1
1305              wfprod(i1,i2,i3)=wfrspa(i1,i2,i3,index_state(iocc2+(isppol2-1)*nband_k(1))) &
1306 &             *wfrspa(i1,i2,i3,index_state(iunocc2+nband_occ(isppol2)+ (isppol2-1)*nband_k(1)))
1307              do idir=1,3
1308                poscart(idir)=rprimd(idir,1)*pos(i1,1)+&
1309 &               rprimd(idir,2)*pos(i2,2)+&
1310 &               rprimd(idir,3)*pos(i3,3)
1311                sexc(idir,iexcit)=sexc(idir,iexcit)+poscart(idir)*wfprod(i1,i2,i3)
1312              end do
1313            end do
1314          end do
1315        end do
1316      end if
1317    end do
1318  end if
1319 
1320  ABI_DEALLOCATE(work)
1321  ABI_DEALLOCATE(rhog)
1322  ABI_DEALLOCATE(pos)
1323  ABI_DEALLOCATE(vhartr)
1324  ABI_DEALLOCATE(kxc_for_tddft)
1325  ABI_DEALLOCATE(wfprod)
1326  ABI_DEALLOCATE(index_state)
1327  ABI_DEALLOCATE(excit_coords)
1328  ABI_DEALLOCATE(wfrspa)
1329 
1330 !Write the first excitation energies
1331  write(message, '(a,a,es18.8,a,a,a,a,a,a,a,a,a)' )ch10,&
1332 & '  Ground state total energy (Ha) :',etotal,ch10,ch10,&
1333 & '  Kohn-Sham energy differences,',ch10,&
1334 & '  corresponding total energies and oscillator strengths (X,Y,Z and average)-',ch10,&
1335 & '  (oscillator strengths smaller than 1.e-6 are set to zero)',ch10,&
1336 & '  Transition  (Ha)  and   (eV)   Tot. Ene. (Ha)  Aver     XX       YY       ZZ'
1337  call wrtout(ab_out,message,'COLL')
1338  call wrtout(std_out,message,'COLL')
1339 
1340 #if defined HAVE_MPI
1341  ABI_DEALLOCATE(counts)
1342  ABI_DEALLOCATE(displs)
1343  ABI_DEALLOCATE(recvbuf)
1344  ABI_DEALLOCATE(recvcounts)
1345 #endif
1346 
1347  if (am_master) then
1348 
1349    ABI_ALLOCATE(osc_str,(7,nexcit))
1350 
1351    do iexcit=1,nexcit_win
1352      iexcit2 = indarr(iexcit)
1353      isppol = min((iexcit2-1)/nexcit_pol(1) +1,2)
1354      iunocc = (iexcit2-(isppol-1)*nexcit_pol(1)-1)/nband_occ(isppol)+1
1355      iocc   = iexcit2-(isppol-1)*nexcit_pol(1)-(iunocc-1)*nband_occ(isppol)
1356 
1357      osc_str(1,iexcit)=zero
1358      do idir=1,3
1359 !      One of the factor of two comes from the spin degeneracy,
1360 !      the other comes from Eq.(40) of Casida
1361        osc_str(idir+1,iexcit)=&
1362 &       (sexc(idir,iexcit)*sqrtks(iexcit2)*normint*ucvol)**2*two*two/nsppol
1363        osc_str(1,iexcit)=osc_str(1,iexcit)&
1364 &       +osc_str(idir+1,iexcit)*third
1365      end do
1366      do ii=1,4
1367        if(abs(osc_str(ii,iexcit))<tol6)osc_str(ii,iexcit)=zero
1368      end do
1369 !    Changed, the whole spectrum is written
1370 !    The array eexcit has been reordered previously, the others also
1371      if(nsppol==1)then
1372        write(message, '(i4,a,i3,2es12.5,es13.5,es11.4,3es9.2)' ) &
1373 &       iocc,'->',iunocc+nband_occ(isppol),            &
1374 &       eexcit(iexcit), eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal, &
1375 !      XG 020209 : Jean-Yves, I assume that the printout of sexc is for debugging ?!
1376 !      &   osc_str(1:4,iexcit),sexc(1:3,iexcit)
1377 &       osc_str(1:4,iexcit)
1378      else
1379        write(message, '(i4,a,i3,a,i1,2es12.5,es13.5,es11.4,3es9.2)' ) &
1380 &       iocc,'->',iunocc+nband_occ(isppol),' s:',isppol,            &
1381 &       eexcit(iexcit), eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal, &
1382 &       osc_str(1:4,iexcit)
1383      end if
1384      call wrtout(ab_out,message,'COLL')
1385      call wrtout(std_out,message,'COLL')
1386 
1387    end do
1388 
1389 !  Check of the Sum rule for Casida eq.47,
1390 !  only exact if complete basis of excitations, as well as local potentials only.
1391    sumx=zero
1392    do iexcit=1,nexcit_win
1393      sumx=sumx+osc_str(1,iexcit)
1394    end do
1395    write(message, '(a,es16.6)' )'  Sum of osc. strength : ',sumx
1396    call wrtout(ab_out,message,'COLL')
1397    call wrtout(std_out,message,'COLL')
1398 
1399 !  -Diagonalize the excitation matrices----------------------------
1400 
1401    ABI_ALLOCATE(eexcit2,(nexcit_win))
1402    ABI_ALLOCATE(vec,(2,nexcit_win,nexcit_win))
1403 
1404    do sing_trip=1,2/nsppol
1405 
1406      if(pole_approx==0)then
1407 
1408        ABI_ALLOCATE(matr,(nexcit_win*(nexcit_win+1)))
1409        ABI_ALLOCATE(zhpev1,(2,2*nexcit_win-1))
1410        ABI_ALLOCATE(zhpev2,(3*nexcit_win-2))
1411        matr(:)=zero
1412        ier=0
1413 !      DEBUG
1414 !      write(std_out,*)' after allocation matrices     '
1415 !      ENDDEBUG
1416 
1417 
1418 !      Store the matrix in proper mode before calling zhpev
1419 
1420        index=1
1421        do iexcit=1,nexcit_win
1422          iexcit2 = indarr(iexcit)
1423          isppol2 = min((iexcit2-1)/nexcit_pol(1) +1,2)
1424          iunocc2 = (iexcit2-(isppol2-1)*nexcit_pol(1)-1)/nband_occ(isppol2)+1
1425          iocc2   = iexcit2-(isppol2-1)*nexcit_pol(1)-(iunocc2-1)*nband_occ(isppol2)
1426          do jexcit=1,iexcit
1427            iexcit1 = indarr(jexcit)
1428            isppol1 = min((iexcit1-1)/nexcit_pol(1) +1,2)
1429            iunocc1 = (iexcit1-(isppol1-1)*nexcit_pol(1)-1)/nband_occ(isppol1)+1
1430            iocc1   = iexcit1-(isppol1-1)*nexcit_pol(1)-(iunocc1-1)*nband_occ(isppol1)
1431 
1432            matr(index)=omega_tddft_casida(iocc1,iunocc1,isppol1,iocc2,iunocc2,isppol2,sing_trip)
1433            matr(index+1)=zero
1434            index=index+2
1435          end do
1436 
1437        end do
1438 
1439 !      DEBUG
1440 !      write(std_out,*)' after filling    matrices     '
1441 !      ENDDEBUG
1442 
1443        call ZHPEV ('V','U',nexcit_win,matr,eexcit2,vec,nexcit_win,zhpev1,&
1444 &       zhpev2,ier)
1445 
1446        ABI_DEALLOCATE(matr)
1447        ABI_DEALLOCATE(zhpev1)
1448        ABI_DEALLOCATE(zhpev2)
1449 !      DEBUG
1450 !      write(std_out,*)' after deallocating matrices     '
1451 !      ENDDEBUG
1452 
1453 
1454      else
1455 
1456        vec(:,:,:)=zero
1457        do isppol=1, nsppol
1458          do iunocc=1,nband_k(isppol)
1459            do iocc=1,nband_k(isppol)
1460              index=iocc+nband_k(isppol)*(iunocc-1)+(isppol-1)*nexcit_pol(1)
1461              eexcit2(index)=omega_tddft_casida(iocc,iunocc,isppol,iocc,iunocc,isppol,sing_trip)
1462              vec(1,index,index)=one
1463            end do
1464          end do
1465        end do
1466 
1467      end if
1468 
1469 !    Compute the excitation energies from the square root of eexcit2
1470 !    eexcit(:)=sqrt(eexcit2(:)
1471 
1472      ABI_DEALLOCATE(eexcit)
1473      ABI_ALLOCATE(eexcit,(nexcit_win))
1474 
1475      eexcit(:)=sqrt(dabs(eexcit2(:))+tol10**2)
1476 !    Write the first excitation energies
1477      if(sing_trip==1)then
1478        if(nsppol==1)then
1479          write(message, '(a,a,a,a,a,a)' )ch10,&
1480 &         '  TDDFT singlet excitation energies (at most 20 of them are printed),',ch10,&
1481 &         '  and corresponding total energies.                ',ch10,&
1482 &         '  Excit#   (Ha)    and    (eV)    total energy (Ha)    major contributions '
1483        else
1484          write(message, '(a,a,a,a,a,a)' )ch10,&
1485 &         '  TDDFT   mixed excitation energies (at most 40 of them are printed),',ch10,&
1486 &         '  and corresponding total energies.                ',ch10,&
1487 &         '  Excit#   (Ha)    and    (eV)    total energy (Ha)    major contributions '
1488        end if
1489      else
1490        write(message, '(a,a,a,a,a,a)' )ch10,&
1491 &       '  TDDFT triplet excitation energies (at most 20 of them are printed),',ch10,&
1492 &       '  and corresponding total energies.                ',ch10,&
1493 &       '  Excit#   (Ha)    and    (eV)    total energy (Ha)    major contributions '
1494      end if
1495      call wrtout(ab_out,message,'COLL')
1496      call wrtout(std_out,message,'COLL')
1497      call wrtout(std_out,' tddft : before iexcit loop',"COLL")
1498 
1499      do iexcit=1,min(nexcit_win,nexcitout)
1500        write(std_out,*)' tddft : iexcit=',iexcit
1501 !      Select largest and next contributions
1502        flargest=zero ; fnext=zero
1503        nlargest=0 ; nnext=0
1504        if(nsppol==2)then
1505          fnext1=zero  ;  fnext2=zero
1506          nnext1=0  ;  nnext2=0
1507        end if
1508        do jexcit=1,nexcit_win
1509          ff=vec(1,jexcit,iexcit)**2+vec(2,jexcit,iexcit)**2
1510          if(ff>flargest+tol12)then
1511            if(nsppol==2)then
1512              nnext2=nnext1  ; fnext2=fnext1
1513              nnext1=nnext   ; fnext1=fnext
1514            end if
1515            nnext=nlargest ; fnext=flargest
1516            nlargest=indarr(jexcit) ; flargest=ff
1517          else if(ff>fnext+tol12)then
1518            if(nsppol==2)then
1519              nnext2=nnext1 ; fnext2=fnext1
1520              nnext1=nnext  ; fnext1=fnext
1521            end if
1522            nnext=indarr(jexcit) ; fnext=ff
1523          else if(nsppol==2)then
1524            if(ff>fnext1+tol12)then
1525              nnext2=nnext1  ; fnext2=fnext1
1526              nnext1=indarr(jexcit) ; fnext1=ff
1527            else if(ff>fnext2+tol12)then
1528              nnext2=indarr(jexcit) ; fnext2=ff
1529            end if
1530          end if
1531 
1532        end do
1533 
1534        isppol_l = min((nlargest-1)/nexcit_pol(1) +1,nsppol)
1535        iunocc_l = (nlargest-(isppol_l-1)*nexcit_pol(1)-1)/nband_occ(isppol_l)+1
1536        iocc_l   = nlargest-(isppol_l-1)*nexcit_pol(1)-(iunocc_l-1)*nband_occ(isppol_l)
1537        isppol_n = min((nnext-1)/nexcit_pol(1) +1,nsppol)
1538        iunocc_n = (nnext-(isppol_n-1)*nexcit_pol(1)-1)/nband_occ(isppol_n)+1
1539        iocc_n   = nnext-(isppol_n-1)*nexcit_pol(1)-(iunocc_n-1)*nband_occ(isppol_n)
1540        if(nsppol==2)then
1541          isppol_n1 = min((nnext1-1)/nexcit_pol(1) +1,nsppol)
1542          iunocc_n1 = (nnext1-(isppol_n1-1)*nexcit_pol(1)-1)/nband_occ(isppol_n1)+1
1543          iocc_n1   = nnext1-(isppol_n1-1)*nexcit_pol(1)-(iunocc_n1-1)*nband_occ(isppol_n1)
1544          isppol_n2 = min((nnext2-1)/nexcit_pol(1) +1,nsppol)
1545          iunocc_n2 = (nnext2-(isppol_n2-1)*nexcit_pol(1)-1)/nband_occ(isppol_n2)+1
1546          iocc_n2   = nnext2-(isppol_n2-1)*nexcit_pol(1)-(iunocc_n2-1)*nband_occ(isppol_n2)
1547        end if
1548 
1549        if(nsppol==1)then
1550          write(message,'(i4,es15.5,es14.5,es16.6,f8.2,a,i3,a,i3,a,f6.2,a,i3,a,i3,a)') &
1551 &         iexcit,eexcit(iexcit),&
1552 &         eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal,&
1553 &         flargest,'(',iocc_l,'->',iunocc_l+nband_occ(1),')',&
1554 &         fnext,   '(',iocc_n,'->',iunocc_n+nband_occ(1),')'
1555          call wrtout(ab_out,message,'COLL')
1556          call wrtout(std_out,message,'COLL')
1557        else
1558          write(chain1,'(f8.2,a,i3,a,i3,a)')flargest,'(',iocc_l,'->',iunocc_l+nband_occ(isppol_l),')'
1559          write(chain2,'(f8.2,a,i3,a,i3,a)')fnext,'(',iocc_n,'->',iunocc_n+nband_occ(isppol_n),')'
1560          if(trim(chain1)==trim(chain2))then
1561            write(message,'(i4,es15.5,es14.5,es16.6,a,a,a,a)') &
1562 &           iexcit,eexcit(iexcit),&
1563 &           eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal,trim(chain1),'(1)',trim(chain2),'(2)'
1564          else
1565            write(message,'(i4,es15.5,es14.5,es16.6,a,a,i1,a,a,a,i1,a)') &
1566 &           iexcit,eexcit(iexcit),&
1567 &           eexcit(iexcit)*Ha_eV,eexcit(iexcit)+etotal,trim(chain1),'(',isppol_l,')',&
1568 &           trim(chain2),'(',isppol_n,')'
1569          end if
1570          call wrtout(ab_out,message,'COLL')
1571          call wrtout(std_out,message,'COLL')
1572          write(chain1,'(f8.2,a,i3,a,i3,a)')fnext1,'(',iocc_n1,'->',iunocc_n1+nband_occ(isppol_n1),')'
1573          write(chain2,'(f8.2,a,i3,a,i3,a)')fnext2,'(',iocc_n2,'->',iunocc_n2+nband_occ(isppol_n2),')'
1574          if(trim(chain1)==trim(chain2))then
1575            write(message,'(a,a,a,a,a)' ) &
1576 &           '                                                 ',&
1577 &           chain1,'(1)',chain2,'(2)'
1578          else
1579            write(message,'(a,a,a,i1,a,a,a,i1,a)' ) &
1580 &           '                                                 ',&
1581 &           chain1,'(',isppol_n1,')',&
1582 &           chain2,'(',isppol_n2,')'
1583          end if
1584          call wrtout(ab_out,message,'COLL')
1585          call wrtout(std_out,message,'COLL')
1586        end if
1587      end do
1588 
1589 !    For each iexcit excitation, compute the oscillator strength (Casida, eq 47)
1590      write(message, '(a,a,a,a)' )ch10,&
1591 &     '  Oscillator strengths :  (elements smaller than 1.e-6 are set to zero)',ch10,&
1592 &     '  Excit#   (Ha)   Average    XX        YY        ZZ         XY        XZ        YZ'
1593      call wrtout(ab_out,message,'COLL')
1594      call wrtout(std_out,message,'COLL')
1595 
1596      do iexcit=1,nexcit_win
1597 
1598 !      One of the factor of two comes from the spin degeneracy,
1599 !      the other comes from Eq.(40) of Casida
1600        factor=(normint*ucvol)**2*two*two/nsppol
1601 
1602        osc_str(:,iexcit)=zero
1603 
1604 !      factor=(normint*ucvol)**2*two*two/nsppol
1605 
1606        do jexcit=1,nexcit_win
1607          jexcit_cbase=indarr(jexcit)
1608          do idir=1,3
1609            osc_str(idir+1,iexcit)=osc_str(idir+1,iexcit)+ &
1610 &           sexc(idir,jexcit)*sqrtks(jexcit_cbase)*sqrt(factor)* &
1611 &           vec(1,jexcit,iexcit)
1612          end do ! idir
1613        end do ! jexcit
1614 
1615 !      The "standard" definition of the oscillator strength is the square
1616 !      of the matrix elements.
1617 !      So, instead of the coding
1618 !      do idir=1,3
1619 !      osc_str(1,iexcit)=osc_str(1,iexcit)+osc_str(idir+1,iexcit)**2*third
1620 !      enddo
1621 !      I think that the following is more "standard"
1622 !      Now, osc_str(2:4,iexcit) are the X, Y and Z matrix elements, not
1623 !      yet the oscillator strengths
1624        osc_str(5,iexcit)=osc_str(2,iexcit)*osc_str(3,iexcit)   ! off diag XY
1625        osc_str(6,iexcit)=osc_str(2,iexcit)*osc_str(4,iexcit)   ! off diag XZ
1626        osc_str(7,iexcit)=osc_str(3,iexcit)*osc_str(4,iexcit)   ! off diag ZZ
1627        do idir=1,3
1628 !        Here the X,Y, and Z matrix elements are combined to give diagonal osc. strengths
1629          osc_str(idir+1,iexcit)=osc_str(idir+1,iexcit)**2
1630          osc_str(1,iexcit)=osc_str(1,iexcit)+osc_str(idir+1,iexcit)*third ! compute the trace
1631        end do
1632 !      At this stage, osc_str(1,iexcit) is exactly the same as from your coding
1633 !      ***End of section to be checked
1634 
1635        do ii=1,7
1636          if(abs(osc_str(ii,iexcit))<tol6)osc_str(ii,iexcit)=zero
1637        end do
1638 !      XG 020209 : Jean-Yves, the off-diagonal oscillator strengths
1639 !      can become negative. It is important for automatic
1640 !      checking that the numbers are separated by a blank, even
1641 !      if they are negative. So replace the following format, to have at least one blank.
1642        write(message, '(i4,es12.5,es10.3,3es10.3,3es10.2)' )iexcit,eexcit(iexcit),osc_str(1:7,iexcit)
1643        call wrtout(ab_out,message,'COLL')
1644        call wrtout(std_out,message,'COLL')
1645      end do
1646 
1647 !    Check of the Sum rule for Casida eq.47,
1648 !    only exact if complete basis of excitations, as well as local potentials only.
1649      sumx=zero
1650      do iexcit=1,nexcit_win
1651        sumx=sumx+osc_str(1,iexcit)
1652      end do
1653      write(message, '(a,es16.6)' )'  Sum of osc. strength : ',sumx
1654      call wrtout(ab_out,message,'COLL')
1655      call wrtout(std_out,message,'COLL')
1656 
1657 !    If singlet, compute Cauchy coefficients
1658      if(sing_trip==1.AND.nsppol==1)then
1659        cauchy(:)=zero
1660        do iexcit=1,nexcit_win
1661          fr_invsquare=one/(eexcit(iexcit)**2)
1662          fr_power=one
1663          do ii=1,7
1664            fr_power=fr_power*fr_invsquare
1665            cauchy(ii)=cauchy(ii)+osc_str(1,iexcit)*fr_power
1666          end do
1667        end do
1668        write(message, '(a,es11.3,a,es11.3,a,es11.3,a,a,es11.3,a,es11.3,a,es11.3,a,es11.3)' ) &
1669 &       '  Cauchy coeffs (au) : ( -2)->',cauchy(1),&
1670 &       ', ( -4)->',cauchy(2),', ( -6)->',cauchy(3),ch10,&
1671 &       '    (-8)->',cauchy(4),', (-10)->',cauchy(5),', (-12)->',cauchy(6),', (-14)->',cauchy(7)
1672        call wrtout(ab_out,message,'COLL')
1673        call wrtout(std_out,message,'COLL')
1674      end if
1675 
1676 !    End the loop on singlet or triplet
1677    end do
1678 
1679    ABI_DEALLOCATE(eexcit2)
1680    ABI_DEALLOCATE(vec)
1681    ABI_DEALLOCATE(osc_str)
1682 
1683 !! The temporary files should be deleted at the end of this routine
1684    close(temp_unit2,status='delete')
1685    call timab(95,2,tsec)
1686  end if  ! end of am_master
1687 
1688  ABI_DEALLOCATE(omega_tddft_casida)
1689  ABI_DEALLOCATE(eexcit)
1690  ABI_DEALLOCATE(sqrtks)
1691  ABI_DEALLOCATE(sexc)
1692  ABI_DEALLOCATE(done_sexc)
1693  ABI_DEALLOCATE(indarr)
1694  ABI_DEALLOCATE(done_sexc2)
1695 
1696 end subroutine tddft