TABLE OF CONTENTS


ABINIT/exc_build_block [ Functions ]

[ Top ] [ Functions ]

NAME

  exc_build_block

FUNCTION

  Calculate and write the excitonic Hamiltonian on an external binary file (Fortran file open
  in random mode) for subsequent treatment in the Bethe-Salpeter code.

COPYRIGHT

 Copyright (C) 1992-2009 EXC group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida)
 Copyright (C) 2009-2018 ABINIT group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida, M.Giantomassi)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

  BSp<excparam>=The parameters for the Bethe-Salpeter calculation.
  Cryst<crystal_t>=Info on the crystalline structure.
  Kmesh<kmesh_t>=The list of k-points in the BZ, IBZ and symmetry tables.
  Qmesh<kmesh_t>=The list of q-points for epsilon^{-1} and related symmetry tables.
  ktabr(nfftot_osc,BSp%nkbz)=The FFT index of $(R^{-1}(r-\tau))$ where R is symmetry needed to obtains
    the k-points from the irreducible image.  Used to symmetrize u_Sk where S = \transpose R^{-1}
  Gsph_x<gsphere_t>=Info on the G-sphere used to describe wavefunctions and W (the largest one is actually stored).
  Gsph_c<gsphere_t>=Info on the G-sphere used to describe the correlation part.
  Vcp<vcoul_t>=The Coulomb interaction in reciprocal space. A cutoff can be used
  W<screen_t>=Data type gathering info and data for W.
  nfftot_osc=Total Number of FFT points used for the oscillator matrix elements.
  ngfft_osc(18)=Info on the FFT algorithm used to calculate the oscillator matrix elements.
  Psps<Pseudopotential_type>=Variables related to pseudopotentials
  Pawtab(Psps%ntypat)<pawtab_type>=PAW tabulated starting data.
  Pawang<pawang_type>=PAW angular mesh and related data.
  Paw_pwff(Cryst%ntypat*Wfd%usepaw)<pawpwff_t>=Form factor used to calculate the onsite matrix
    elements of a plane wave.
  Wfd<wfd_t>=Handler for the wavefunctions.
    prtvol=Verbosity level.
  rhxtwg_q0
  is_resonant
  fname
  comm=MPI communicator.

OUTPUT

  The excitonic Hamiltonian is saved on an external binary file (see below).

NOTES

  *) Version for K_V = K_C (q=0), thus KP_V = KP_C
  *) No exchange limit: use LDA energies in case.
  *) Symmetry of H(-k-k') = H*(k k') not used.
  *) Coulomb term can be approssimateed as diagonal in G.
  *) Valence bands treated from lomo on.
  *) Symmetries of the sub-blocks are used to reduce the number of elements to calculate.

            ____________
           |_(cv)__(vc)_|
   H_exc = |  R      C  |
           | -C*    -R* |

   where C is symmetric and R is Hermitian provided that the QP energies are real.

  For nsppol=1 ==> R = diag-W+2v; C = -W+2v
  since the Hamiltonian can be diagonalized in the spin-singlet basis set thanks to
  the fact that spin triplet does not contribute to the optical limit of epsilon.

  For nsppol=2 ==> R = diag-W+v; C = -W+v
  Now the matrix elements depend on the spin of the transitions but only those
  transitions in which the spin of the electron and of the hole are equal contribute
  to the macroscopic dielectric function. Moreover only the exchange term can connect transitions of different spin.
  When nsppol==2 the transitions are ordered using | (cv up) | (cv dwn) | (vc up) | (vc down) |

  The resonant block is given by:

      |  (v'c' up)       | (v'c' dwn)   |
      -----------------------------------           where v_{-+} = v_{+-}^H when the momentum of the photon is neglected.
      | [diag-W+v]++     |      v+-     | (vc up)   Note that v_{+-} is not Hermitian due to the presence of different spins.
  R = -----------------------------------           Actually it reduces to a Hermitian matrix when the system is not spin polarized.
      |     v-+          | [diag-W+v]-- | (vc dwn)  but in this case one should use nsppol=1.
      -----------------------------------           As a consequence the entire matrix is calculated and stored on file.

  The coupling block is given by:

      |  (c'v' up)   |    (c'v dwn)     |
      -----------------------------------           where v_{-+} = v_{+-}^t when the momentum of the photon is neglected.
      | [-W+v]++     |      v+-         | (vc up)   Also in this case the entire matrix v_{+-} has to be calculated
  C = -----------------------------------           and stored on file.
      |     v-+      |    [-W+v]--      | (vc dwn)
      -----------------------------------

PARENTS

      exc_build_ham

CHILDREN

      get_bz_item,timab,wrtout,xmpi_split_work2_i4b

SOURCE

  97 #if defined HAVE_CONFIG_H
  98 #include "config.h"
  99 #endif
 100 
 101 #include "abi_common.h"
 102 
 103 subroutine exc_build_block(BSp,Cryst,Kmesh,Qmesh,ktabr,Gsph_x,Gsph_c,Vcp,Wfd,W,Hdr_bse,&
 104 &  nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff,rhxtwg_q0,is_resonant,fname)
 105 
 106  use defs_basis
 107  use defs_datatypes
 108  use defs_abitypes
 109  use m_profiling_abi
 110  use m_bs_defs
 111  use m_bse_io
 112  use m_xmpi
 113  use m_errors
 114 #if defined HAVE_MPI2
 115  use mpi
 116 #endif
 117  use m_hdr
 118 
 119  use m_gwdefs,       only : czero_gw, cone_gw, GW_TOLQ0
 120  use m_time,         only : cwtime
 121  use m_io_tools,     only : get_unit, open_file
 122  use m_blas,         only : xdotc, xgemv
 123  use m_geometry,     only : normv
 124  use m_crystal,      only : crystal_t
 125  use m_gsphere,      only : gsphere_t, gsph_fft_tabs
 126  use m_vcoul,        only : vcoul_t
 127  use m_bz_mesh,      only : kmesh_t, get_BZ_item, get_BZ_diff, has_BZ_item, isamek, findqg0
 128  use m_pawpwij,     only : pawpwff_t, pawpwij_t, pawpwij_init, pawpwij_free, paw_rho_tw_g
 129  use m_pawang,       only : pawang_type
 130  use m_pawtab,       only : pawtab_type
 131  use m_pawcprj,      only : pawcprj_type, pawcprj_alloc, pawcprj_free
 132  use m_wfd,          only : wfd_t, wfd_get_ur, wfd_get_cprj, wfd_change_ngfft, wfd_ihave_ur, wfd_ihave_cprj, &
 133 &                           wfd_sym_ur
 134  use m_oscillators,  only : rho_tw_g, sym_rhotwgq0
 135  use m_screen
 136 
 137 !This section has been created automatically by the script Abilint (TD).
 138 !Do not modify the following lines by hand.
 139 #undef ABI_FUNC
 140 #define ABI_FUNC 'exc_build_block'
 141  use interfaces_14_hidewrite
 142  use interfaces_18_timing
 143  use interfaces_65_paw
 144 !End of the abilint section
 145 
 146  implicit none
 147 
 148 #if defined HAVE_MPI1
 149  include 'mpif.h'
 150 #endif
 151 
 152 !Arguments ------------------------------------
 153 !scalars
 154  integer,intent(in) :: nfftot_osc
 155  character(len=*),intent(in) :: fname
 156  logical,intent(in) :: is_resonant
 157  type(excparam),intent(in) :: BSp
 158  type(screen_t),intent(inout) :: W
 159  type(kmesh_t),intent(in) :: Kmesh,Qmesh
 160  type(crystal_t),intent(in) :: Cryst
 161  type(vcoul_t),intent(in) :: Vcp
 162  type(gsphere_t),intent(in) :: Gsph_x,Gsph_c
 163  type(Pseudopotential_type),intent(in) :: Psps
 164  type(Hdr_type),intent(inout) :: Hdr_bse
 165  type(pawang_type),intent(in) :: Pawang
 166  type(wfd_t),target,intent(inout) :: Wfd
 167 !arrays
 168  integer,intent(in) :: ngfft_osc(18)
 169  integer,intent(in) :: ktabr(nfftot_osc,Kmesh%nbz)
 170  complex(gwpc),intent(in) :: rhxtwg_q0(BSp%npweps,BSp%lomo_min:BSp%humo_max,BSp%lomo_min:BSp%humo_max,Wfd%nkibz,Wfd%nsppol)
 171  type(Pawtab_type),intent(in) :: Pawtab(Psps%ntypat*Wfd%usepaw)
 172  type(pawpwff_t),intent(in) :: Paw_pwff(Psps%ntypat*Wfd%usepaw)
 173 
 174 !Local variables ------------------------------
 175 !scalars
 176  integer,parameter :: map2sphere=1,ndat1=1,master=0
 177  integer(i8b) :: bsize_my_block
 178  integer :: nspinor,nsppol,ISg,mpi_err,tmp_size,ngx
 179  integer :: ik_bz,ikp_bz,col_glob,itpk_min,itpk_max
 180  integer :: dim_rtwg,bsh_unt,ncol,dump_unt,npweps
 181 #ifdef HAVE_MPI_IO
 182  integer :: amode,mpi_fh,hmat_type,offset_err,old_type
 183  integer(XMPI_OFFSET_KIND) :: ehdr_offset,my_offset
 184  logical,parameter :: is_fortran_file=.TRUE.
 185 #endif
 186  integer :: neh1,neh2,ig,nblocks
 187  integer :: ik_ibz,itim_k,ikp_ibz,itim_kp,isym_k,isym_kp
 188  integer :: iq_bz,iq_ibz,isym_q,itim_q,iqbz0,rank
 189  integer :: iv,ivp,ic,icp,jj,nrows,sender,my_ncols
 190  integer :: use_padfft,prev_nrows,spin1,spin2,block
 191  integer :: ierr,nproc,my_rank,mgfft_osc,fftalga_osc,comm
 192  integer(i8b) :: tot_nels,prev_nels,prev_ncols,nels,ir,it,itp,ist,iend,my_hsize
 193  real(dp) :: faq,kx_fact,cputime,walltime,gflops
 194  complex(spc) :: http,ctemp
 195  complex(dpc) :: ph_mkpt,ph_mkt,ene_t,ene_tp
 196  logical,parameter :: with_umklp=.FALSE.
 197  logical :: use_mpiio,do_coulomb_term,do_exchange_term,w_is_diagonal,isirred
 198  logical :: is_qeq0
 199  character(len=500) :: msg
 200 !arrays
 201  integer :: bidx(2,4),g0(3),spin_ids(2,3)
 202  integer(i8b) :: nels_block(3)
 203  integer :: my_cols(2),my_rows(2),proc_end(2),proc_start(2)
 204  integer :: my_extrema(2,2),sender_extrema(2,2),my_starts(2),my_ends(2)
 205  integer,allocatable :: igfftg0(:),ktabr_k(:),ktabr_kp(:),id_tab(:)
 206  integer,allocatable :: ncols_of(:)
 207  integer(i8b),allocatable :: t_start(:),t_stop(:),hsize_of(:)
 208  integer,allocatable :: col_start(:),col_stop(:)
 209  integer,allocatable :: gbound(:,:)
 210  real(dp) :: kbz(3),kpbz(3),qbz(3),spinrot_k(4),spinrot_kp(4),kmkp(3),tsec(2)
 211  complex(dpc),allocatable :: my_bsham(:),buffer(:),buffer_2d(:,:),my_kxssp(:,:),prev_col(:)
 212 !DBYG
 213  complex(dpc),allocatable :: acoeffs(:),bcoeffs(:),ccoeffs(:) ! Coeff of W = a/q^2 + b/q + c
 214  integer :: a_unt, b_unt, c_unt
 215  complex(dpc) :: aatmp, bbtmp, cctmp
 216  complex(gwpc),allocatable :: aa_vpv(:),aa_cpc(:),aa_ctccp(:)
 217  complex(gwpc),allocatable :: bb_vpv1(:),bb_cpc1(:),bb_ctccp1(:)
 218  complex(gwpc),allocatable :: bb_vpv2(:),bb_cpc2(:),bb_ctccp2(:)
 219  complex(gwpc),allocatable :: cc_vpv(:),cc_cpc(:),cc_ctccp(:)
 220  complex(dpc),allocatable :: abuffer(:),aprev_col(:)
 221  complex(dpc),allocatable :: bbuffer(:),bprev_col(:)
 222  complex(dpc),allocatable :: cbuffer(:),cprev_col(:)
 223  character(len=fnlen) :: tmpfname
 224  integer :: ii
 225 !END DBYG
 226  complex(gwpc),allocatable :: vc_sqrt_qbz(:)
 227  complex(gwpc),allocatable :: rhotwg1(:),rhotwg2(:),rhxtwg_vpv(:),rhxtwg_cpc(:),ctccp(:)
 228  complex(gwpc),target,allocatable :: ur_ckp(:),ur_vkp(:),ur_vk(:),ur_ck(:)
 229  complex(gwpc),ABI_CONTIGUOUS pointer :: ptur_ckp(:),ptur_vkp(:),ptur_vk(:),ptur_ck(:)
 230  type(pawcprj_type),target,allocatable :: Cp_tmp1(:,:),Cp_tmp2(:,:)
 231  type(pawcprj_type),target,allocatable :: Cp_tmp3(:,:),Cp_tmp4(:,:)
 232  type(pawcprj_type),allocatable :: Cp_ckp(:,:),Cp_vkp(:,:)
 233  type(pawcprj_type),allocatable :: Cp_vk(:,:),Cp_ck(:,:)
 234  type(pawcprj_type),pointer :: ptcp_ckp(:,:),ptcp_vkp(:,:),ptcp_vk(:,:),ptcp_ck(:,:)
 235  type(pawpwij_t),allocatable :: Pwij_q(:)
 236 #ifdef HAVE_MPI_IO
 237  integer(XMPI_OFFSET_KIND) :: tmp_off,my_offpad
 238  integer(XMPI_OFFSET_KIND),allocatable :: bsize_frecord(:),offset_of_block(:)
 239 #endif
 240 #ifdef DEV_MG_DEBUG_MODE
 241  integer,allocatable :: ttp_check(:,:)
 242 #endif
 243 
 244 !************************************************************************
 245 
 246  call timab(680,1,tsec)
 247  call timab(681,1,tsec)
 248 
 249  DBG_ENTER("COLL")
 250 
 251  ABI_CHECK(Wfd%nspinor==1,"nspinor==2 not coded")
 252  ABI_CHECK(nfftot_osc==PRODUCT(ngfft_osc(1:3)),"mismatch in FFT size")
 253 
 254  if (Wfd%nsppol==2) then
 255    MSG_WARNING("nsppol==2 is still under testing")
 256  end if
 257  !
 258  ! MPI variables.
 259  comm    = Wfd%comm
 260  nproc   = Wfd%nproc
 261  my_rank = Wfd%my_rank
 262 
 263  !
 264  ! Basic constants.
 265  nspinor = Wfd%nspinor
 266  nsppol  = Wfd%nsppol
 267  dim_rtwg=1; faq = one/(Cryst%ucvol*Kmesh%nbz)
 268  npweps = Bsp%npweps
 269  !
 270  ! Prepare the FFT tables to have u(r) on the ngfft_osc mesh.
 271  mgfft_osc = MAXVAL(ngfft_osc(1:3))
 272  fftalga_osc = ngfft_osc(7)/100
 273  if ( ANY(ngfft_osc(1:3) /= Wfd%ngfft(1:3)) ) then
 274    call wfd_change_ngfft(Wfd,Cryst,Psps,ngfft_osc)
 275  end if
 276 
 277  ABI_MALLOC(igfftg0,(npweps))
 278  ABI_MALLOC(ktabr_k,(nfftot_osc))
 279  ABI_MALLOC(ktabr_kp,(nfftot_osc))
 280  ABI_MALLOC(id_tab,(nfftot_osc))
 281  id_tab = (/(ic, ic=1,nfftot_osc)/)
 282  !
 283  ! Workspace arrays for wavefunctions and oscillator matrix elements.
 284  ABI_MALLOC(rhxtwg_vpv,(npweps))
 285  ABI_MALLOC(rhxtwg_cpc,(npweps))
 286 
 287  if (BSp%prep_interp) then
 288    call wrtout(std_out,"Preparing BSE interpolation","COLL")
 289    ABI_MALLOC(aa_vpv,(npweps))
 290    ABI_MALLOC(bb_vpv1,(npweps))
 291    ABI_MALLOC(bb_vpv2,(npweps))
 292    ABI_MALLOC(cc_vpv,(npweps))
 293 
 294    ABI_MALLOC(aa_cpc,(npweps))
 295    ABI_MALLOC(bb_cpc1,(npweps))
 296    ABI_MALLOC(bb_cpc2,(npweps))
 297    ABI_MALLOC(cc_cpc,(npweps))
 298  end if
 299 
 300  ABI_MALLOC(ur_ckp,(nspinor*nfftot_osc))
 301  ABI_MALLOC(ur_vkp,(nspinor*nfftot_osc))
 302  ABI_MALLOC(ur_ck ,(nspinor*nfftot_osc))
 303  ABI_MALLOC(ur_vk ,(nspinor*nfftot_osc))
 304 
 305  if (Wfd%usepaw==1) then
 306    ABI_DT_MALLOC(Cp_vk,(Wfd%natom,nspinor))
 307    call pawcprj_alloc(Cp_vk,0,Wfd%nlmn_atm)
 308    ABI_DT_MALLOC(Cp_ck,(Wfd%natom,nspinor))
 309    call pawcprj_alloc(Cp_ck,0,Wfd%nlmn_atm)
 310    ABI_DT_MALLOC(Cp_ckp,(Wfd%natom,nspinor))
 311    call pawcprj_alloc(Cp_ckp,0,Wfd%nlmn_atm)
 312    ABI_DT_MALLOC(Cp_vkp,(Wfd%natom,nspinor))
 313    call pawcprj_alloc(Cp_vkp,0,Wfd%nlmn_atm)
 314 
 315    ABI_DT_MALLOC(Cp_tmp1,(Wfd%natom,nspinor))
 316    call pawcprj_alloc(Cp_tmp1,0,Wfd%nlmn_atm)
 317    ABI_DT_MALLOC(Cp_tmp2,(Wfd%natom,nspinor))
 318    call pawcprj_alloc(Cp_tmp2,0,Wfd%nlmn_atm)
 319    ABI_DT_MALLOC(Cp_tmp3,(Wfd%natom,nspinor))
 320    call pawcprj_alloc(Cp_tmp3,0,Wfd%nlmn_atm)
 321    ABI_DT_MALLOC(Cp_tmp4,(Wfd%natom,nspinor))
 322    call pawcprj_alloc(Cp_tmp4,0,Wfd%nlmn_atm)
 323  end if
 324  !
 325  ! Identify the index of q==0
 326  iqbz0=0
 327  do iq_bz=1,Qmesh%nbz
 328    if (ALL(ABS(Qmesh%bz(:,iq_bz))<tol3)) iqbz0 = iq_bz
 329  end do
 330  ABI_CHECK(iqbz0/=0,"q=0 not found")
 331  !
 332  ! Treat the spin polarization.
 333  spin_ids(:,1) = (/1,1/)
 334  spin_ids(:,2) = (/2,2/)
 335  spin_ids(:,3) = (/1,2/)
 336 
 337  nblocks=1
 338  kx_fact=two
 339  nels_block(:)=0
 340  nels_block(1)=BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2
 341  tot_nels=nels_block(1)
 342 
 343  if (nsppol==2) then
 344    nblocks=3
 345    kx_fact=one
 346    nels_block(1) = BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2   ! Only the upper triangle for block 1 and 2
 347    nels_block(2) = BSp%nreh(2)*(BSp%nreh(2)+1_i8b)/2
 348    nels_block(3) = BSp%nreh(1)*BSp%nreh(2)*1_i8b       ! Note: Block 3 does not have symmetries.
 349    tot_nels= SUM(nels_block)
 350  end if
 351  !
 352  ! Distribute the calculation of the matrix elements among the nodes.
 353  ! * tstart and t_stop give the initial and final transition index treated by each node.
 354  ! * my_hsize is the number of transitions treated by this processor
 355  ! * my_cols(1:2) gives the initial and final column treated by this node.
 356  !
 357  use_mpiio=.FALSE.
 358 #ifdef HAVE_MPI_IO
 359  use_mpiio = (nproc>1)
 360 #endif
 361  use_mpiio=.FALSE.
 362  !use_mpiio=.TRUE.
 363 
 364  if (is_resonant) then
 365    if (use_mpiio) then
 366      write(msg,'(2a,f6.2,a)')&
 367 &      ". Writing resonant excitonic Hamiltonian on file "//TRIM(fname)," via MPI-IO; file size= ",two*tot_nels*dpc*b2Gb," [Gb]."
 368    else
 369      write(msg,'(2a,f6.2,a)')&
 370 &      ". Writing resonant excitonic Hamiltonian on file "//TRIM(fname),"; file size= ",two*dpc*tot_nels*b2Gb," [Gb]."
 371    end if
 372  else
 373    if (use_mpiio) then
 374      write(msg,'(2a,f6.2,a)')&
 375 &      ". Writing coupling excitonic Hamiltonian on file "//TRIM(fname)," via MPI-IO; file size= ",tot_nels*2*dpc*b2Gb," [Gb]."
 376    else
 377      write(msg,'(2a,f6.2,a)')&
 378 &      ". Writing coupling excitonic Hamiltonian on file "//TRIM(fname),"; file size= ",two*dpc*tot_nels*b2Gb," [Gb]."
 379    end if
 380  end if
 381  call wrtout(std_out,msg,"COLL",do_flush=.True.)
 382  call wrtout(ab_out,msg,"COLL",do_flush=.True.)
 383  !
 384  ! Master writes the BSE header with Fortran IO.
 385  if (my_rank==master) then
 386 
 387    if (open_file(fname,msg,newunit=bsh_unt,form="unformatted",action="write") /= 0) then
 388       MSG_ERROR(msg)
 389    end if
 390    call exc_write_bshdr(bsh_unt,Bsp,Hdr_bse)
 391    ! To force the writing (needed for MPI-IO).
 392    close(bsh_unt)
 393 
 394    if (.not.use_mpiio) then ! Reopen the file and skip the header.
 395      if (open_file(fname,msg,newunit=bsh_unt,form="unformatted",action="readwrite") /= 0) then
 396         MSG_ERROR(msg)
 397      end if
 398      call exc_skip_bshdr(bsh_unt,ierr)
 399    end if
 400 
 401    if (BSp%prep_interp) then
 402      tmpfname = fname
 403      ii = LEN_TRIM(fname)
 404      tmpfname(ii-2:ii+1) = 'ABSR'
 405      if (open_file(tmpfname,msg,newunit=a_unt,form='unformatted',action="write") /= 0) then
 406        MSG_ERROR(msg)
 407      end if
 408      tmpfname(ii-2:ii+1) = 'BBSR'
 409      if (open_file(tmpfname,msg,newunit=b_unt,form='unformatted',action="write") /= 0) then
 410        MSG_ERROR(msg)
 411      end if
 412      tmpfname(ii-2:ii+1) = 'CBSR'
 413      if (open_file(tmpfname,msg,newunit=c_unt,form='unformatted',action="write") /= 0) then
 414        MSG_ERROR(msg)
 415      end if
 416      call exc_write_bshdr(a_unt,Bsp,Hdr_bse)
 417      call exc_write_bshdr(b_unt,Bsp,Hdr_bse)
 418      call exc_write_bshdr(c_unt,Bsp,Hdr_bse)
 419      close(a_unt)
 420      close(b_unt)
 421      close(c_unt)
 422      if (.not.use_mpiio) then ! Reopen the file and skip the header.
 423        tmpfname(ii-2:ii+1) = 'ABSR'
 424        if (open_file(tmpfname,msg,newunit=a_unt,form='unformatted',action="readwrite") /= 0) then
 425           MSG_ERROR(msg)
 426        end if
 427        call exc_skip_bshdr(a_unt,ierr)
 428        tmpfname(ii-2:ii+1) = 'BBSR'
 429        if (open_file(tmpfname,msg,newunit=b_unt,form='unformatted',action="readwrite") /= 0) then
 430           MSG_ERROR(msg)
 431        end if
 432        call exc_skip_bshdr(b_unt,ierr)
 433        tmpfname(ii-2:ii+1) = 'CBSR'
 434        if (open_file(tmpfname,msg,newunit=c_unt,form='unformatted',action="readwrite") /= 0) then
 435           MSG_ERROR(msg)
 436        end if
 437        call exc_skip_bshdr(c_unt,ierr)
 438      end if
 439    end if
 440  end if
 441 
 442  call xmpi_barrier(comm)
 443 
 444  if (use_mpiio) then
 445 #ifdef HAVE_MPI_IO
 446    ! Open the file with MPI-IO
 447    amode = MPI_MODE_RDWR
 448    !amode = MPI_MODE_CREATE + MPI_MODE_RDWR,
 449 
 450    call MPI_FILE_OPEN(comm, fname, amode, MPI_INFO_NULL, mpi_fh, mpi_err)
 451    ABI_CHECK_MPI(mpi_err,"opening: "//TRIM(fname))
 452 
 453    ! Skip the header.
 454    call exc_skip_bshdr_mpio(mpi_fh,xmpio_collective,ehdr_offset)
 455 
 456    ! Precompute the offset of the each block including the Fortran markers.
 457    ABI_MALLOC(offset_of_block,(nblocks))
 458    offset_of_block(1) = ehdr_offset
 459    do block=2,nblocks
 460      tmp_off = offset_of_block(block-1) + nels_block(block-1)*xmpi_bsize_dpc
 461      tmp_off = tmp_off + Bsp%nreh(block-1)*2*xmpio_bsize_frm  ! markers.
 462      offset_of_block(block) = tmp_off
 463    end do
 464 #endif
 465  end if
 466 
 467  call timab(681,2,tsec)
 468 
 469  do block=1,nsppol
 470    !
 471    ! Indices used to loop over bands.
 472    ! bidx contains the starting and final indices used to loop over bands.
 473    !
 474    !      (b3,b4)
 475    !         |... ...|
 476    ! (b1,b2) |... ...|
 477    !
 478    ! Resonant matrix is given by
 479    !      (v',c')
 480    !       |... ...|
 481    ! (v,c) |... ...|
 482    !
 483    ! Coupling matrix is given by
 484    !       (c',v')
 485    !       |... ...|
 486    ! (v,c) |... ...|
 487 
 488    if (is_resonant) then
 489      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
 490      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
 491      bidx(:,3) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b3
 492      bidx(:,4) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b4
 493    else
 494      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
 495      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
 496      bidx(:,3) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b3
 497      bidx(:,4) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b4
 498    end if
 499 
 500    spin1 = spin_ids(1,block)
 501    spin2 = spin_ids(2,block)
 502 
 503    do_coulomb_term  = (Bsp%use_coulomb_term .and. (spin1==spin2))
 504    do_exchange_term = (Bsp%exchange_term>0)
 505    w_is_diagonal    = BSp%use_diagonal_Wgg
 506    !
 507    ! Distribution of the matrix elements among the nodes.
 508    ! Note that rank0 will get the first transitions.
 509    nels=nels_block(block)
 510    ABI_MALLOC(t_start,(0:nproc-1))
 511    ABI_MALLOC(t_stop,(0:nproc-1))
 512    call xmpi_split_work2_i8b(nels,nproc,t_start,t_stop,msg,ierr)
 513    if (ierr/=0) then
 514      MSG_WARNING(msg)
 515    end if
 516 
 517    ABI_MALLOC(hsize_of,(0:nproc-1))
 518    hsize_of=0
 519    do rank=0,nproc-1
 520      if (t_stop(rank)>=t_start(rank)) hsize_of(rank) = t_stop(rank)-t_start(rank)+1
 521      !write(std_out,*)"nels",nels,hsize_of(rank)
 522    end do
 523 
 524    my_hsize = hsize_of(my_rank)
 525    if (my_hsize<=0) then
 526      write(msg,'(a,i0)')"Wrong number of transitions: my_hsize= ",my_hsize
 527      MSG_ERROR(msg)
 528    end if
 529    if (my_hsize /= INT(my_hsize,KIND=i4b)) then
 530      write(msg,'(a,i0)')"Size of local block too large for a default integer, Increase the number of CPUs: my_hsize= ",my_hsize
 531      MSG_ERROR(msg)
 532    end if
 533 
 534    my_cols=0
 535    do itp=1,Bsp%nreh(block)
 536      do it=1,itp
 537        ir = it + itp*(itp-1_i8b)/2
 538        if (ir==t_start(my_rank)) then
 539          my_rows(1) = it
 540          my_cols(1) = itp
 541        end if
 542        if (ir==t_stop(my_rank)) then
 543          my_rows(2) = it
 544          my_cols(2) = itp
 545        end if
 546      end do
 547    end do
 548 
 549    my_starts = [my_rows(1),my_cols(1)]
 550    my_ends   = [my_rows(2),my_cols(2)]
 551    !
 552    ! * Announce the treatment of submatrix treated by each node.
 553    bsize_my_block = 2*dpc*my_hsize
 554    write(msg,'(4(a,i0))')' Treating ',my_hsize,'/',nels,' matrix elements, from column ',my_cols(1),' up to column ',my_cols(2)
 555    call wrtout(std_out,msg,'PERS')
 556 
 557    if (is_resonant) then
 558      write(msg,'(a,f8.1,a)')&
 559 &     ' Calculating resonant blocks. Memory required: ',bsize_my_block*b2Mb,' Mb. '
 560    else
 561      write(msg,'(a,f8.1,a)')&
 562 &     ' Calculating coupling blocks. Memory required: ',bsize_my_block*b2Mb,' Mb. '
 563    end if
 564    call wrtout(std_out,msg,"COLL")
 565 
 566    ! Allocate big (scalable) buffer to store the BS matrix on this node.
 567    ABI_STAT_MALLOC(my_bsham,(t_start(my_rank):t_stop(my_rank)), ierr)
 568    ABI_CHECK(ierr==0, 'Not enough memory for exc Hamiltonian')
 569 
 570    if (BSp%prep_interp) then
 571      ! Allocate big (scalable) buffers to store a,b,c coeffients
 572      ABI_STAT_MALLOC(acoeffs,(t_start (my_rank):t_stop(my_rank)), ierr)
 573      ABI_CHECK(ierr==0, 'Not enough memory for acoeffs')
 574 
 575      ABI_STAT_MALLOC(bcoeffs,(t_start(my_rank):t_stop(my_rank)), ierr)
 576      ABI_CHECK(ierr==0, 'Not enough memory for bcoeffs')
 577 
 578      ABI_STAT_MALLOC(ccoeffs,(t_start(my_rank):t_stop(my_rank)), ierr)
 579      ABI_CHECK(ierr==0, 'Not enough memory for ccoeffs')
 580    end if
 581 
 582    if (do_coulomb_term) then ! Construct Coulomb term.
 583 
 584      call timab(682,1,tsec) ! exc_build_ham(Coulomb)
 585 
 586      write(msg,'(a,2i2,a)')" Calculating direct Coulomb term for (spin1,spin2) ",spin1,spin2," using full W_{GG'} ..."
 587      if (w_is_diagonal) then
 588         write(msg,'(a,2i2,a)')&
 589 &        " Calculating direct Coulomb term for (spin1, spin2) ",spin1,spin2," using diagonal approximation for W_{GG'} ..."
 590      end if
 591      call wrtout(std_out,msg,"COLL")
 592 
 593      ABI_MALLOC(ctccp,(npweps))
 594 
 595      if (BSp%prep_interp) then
 596        ABI_MALLOC(aa_ctccp,(npweps))
 597        ABI_MALLOC(bb_ctccp1,(npweps))
 598        ABI_MALLOC(bb_ctccp2,(npweps))
 599        ABI_MALLOC(cc_ctccp,(npweps))
 600      end if
 601 
 602      ABI_MALLOC(vc_sqrt_qbz,(npweps))
 603 
 604 #ifdef DEV_MG_DEBUG_MODE
 605      ABI_MALLOC(ttp_check,(BSp%nreh(block),BSp%nreh(block)))
 606      ttp_check=0
 607 #endif
 608 
 609      do ikp_bz=1,BSp%nkbz ! Loop over kp
 610        ! NOTE: this way of looping is good for bulk but it's not optimal in the
 611        !       case of systems sampled only at Gamma e.g. isolated systems in which
 612        !       one should take advantage of Hermiticity by looping over c-v !!!!
 613 
 614        ! Check whether (vp,cp,ikp_bz,spin2) belongs to the set of columns treated by me for some vp,cp
 615        ! Be careful since vcks2t contains zeros corresponding to transitions that should be skipped.
 616        itpk_min = MINVAL(Bsp%vcks2t(:,:,ikp_bz,spin2), MASK=(Bsp%vcks2t(:,:,ikp_bz,spin2)>0) )
 617        itpk_max = MAXVAL(Bsp%vcks2t(:,:,ikp_bz,spin2))
 618        if (my_cols(2)<itpk_min .or. my_cols(1)>itpk_max) CYCLE
 619 
 620        write(msg,'(3(a,i0))')" status: ",ikp_bz,"/",BSp%nkbz," done by node ",my_rank
 621        call wrtout(std_out,msg,"PERS",do_flush=.True.)
 622 
 623        ! * Get ikp_ibz, non-symmorphic phase, ph_mkpt, and symmetries from ikp_bz.
 624        call get_BZ_item(Kmesh,ikp_bz,kpbz,ikp_ibz,isym_kp,itim_kp,ph_mkpt,isirred=isirred)
 625        !ABI_CHECK(isirred,"not irred!")
 626        !ABI_CHECK(ph_mkpt == cone, "Wrong phase!")
 627 
 628        ktabr_kp(:) = ktabr(:,ikp_bz)
 629        spinrot_kp(:)=Cryst%spinrot(:,isym_kp)
 630        !ABI_CHECK(ALL(ktabr_kp == id_tab), "wrong tab")
 631 
 632        do ik_bz=1,ikp_bz ! Loop over k
 633          !
 634          ! * Get ik_ibz, non-symmorphic phase, ph_mkt, and symmetries from ik_bz
 635          call get_BZ_item(Kmesh,ik_bz,kbz,ik_ibz,isym_k,itim_k,ph_mkt,isirred=isirred)
 636          !ABI_CHECK(isirred,"not irred!")
 637          !ABI_CHECK(ph_mkt == cone, "Wrong phase!")
 638 
 639          ktabr_k(:) = ktabr(:,ik_bz)
 640          spinrot_k(:)=Cryst%spinrot(:,isym_k)
 641          !ABI_CHECK(ALL(ktabr_k == id_tab), "wrong tab")
 642          !if(itim_k==2) CYCLE ! time-reversal or not
 643          !
 644          ! * Find q = K-KP-G0 in the full BZ.
 645          kmkp = Kmesh%bz(:,ik_bz) - Kmesh%bz(:,ikp_bz)
 646          call findqg0(iq_bz,g0,kmkp,Qmesh%nbz,Qmesh%bz,BSp%mG0)
 647 
 648          ! Evaluate the tables needed for the padded FFT performed in rhotwg. Note that we have
 649          ! to pass G-G0 to sphereboundary instead of G as we need FFT results on the shifted G-sphere,
 650          ! If Gamma is not inside G-G0 one has to disable FFT padding as sphereboundary will give wrong tables.
 651          ! * Get the G-G0 shift for the FFT of the oscillators.
 652          !
 653          ABI_MALLOC(gbound,(2*mgfft_osc+8,2))
 654          call gsph_fft_tabs(Gsph_c,g0,mgfft_osc,ngfft_osc,use_padfft,gbound,igfftg0)
 655          if ( ANY(fftalga_osc == (/2,4/)) ) use_padfft=0 ! Pad-FFT is not coded in rho_tw_g
 656          if (use_padfft==0) then
 657            ABI_FREE(gbound)
 658            ABI_MALLOC(gbound,(2*mgfft_osc+8,2*use_padfft))
 659          end if
 660          !
 661          ! * Get iq_ibz, and symmetries from iq_bz
 662          call get_BZ_item(Qmesh,iq_bz,qbz,iq_ibz,isym_q,itim_q)
 663          is_qeq0 = (normv(qbz,Cryst%gmet,'G')<GW_TOLQ0)
 664 
 665          ! Symmetrize em1(omega=0)
 666          call screen_symmetrizer(W,iq_bz,Cryst,Gsph_c,Qmesh,Vcp)
 667          !
 668          ! * Set up table of |q_BZ+G|
 669          if (iq_ibz==1) then
 670            do ig=1,npweps
 671              isg = Gsph_c%rottb(ig,itim_q,isym_q)
 672              vc_sqrt_qbz(isg)=Vcp%vcqlwl_sqrt(ig,1)
 673            end do
 674          else
 675            do ig=1,npweps
 676              isg = Gsph_c%rottb(ig,itim_q,isym_q)
 677              vc_sqrt_qbz(isg) = Vcp%vc_sqrt(ig,iq_ibz)
 678            end do
 679          end if
 680 
 681          ! === Evaluate oscillator matrix elements ===
 682          ! * $ <phj/r|e^{-i(q+G)}|phi/r> - <tphj/r|e^{-i(q+G)}|tphi/r> $ in packed form.
 683          if (Wfd%usepaw==1.and.ik_bz/=ikp_bz) then
 684            ABI_DT_MALLOC(Pwij_q,(Cryst%ntypat))
 685            call pawpwij_init(Pwij_q,npweps,Qmesh%bz(:,iq_bz),Gsph_c%gvec,Cryst%rprimd,Psps,Pawtab,Paw_pwff)
 686          end if
 687 
 688          ! =======================================
 689          ! === Loop over the four band indeces ===
 690          ! =======================================
 691          !
 692          do ic=bidx(1,2),bidx(2,2) !do ic=BSp%lumo,BSp%nbnds
 693 
 694            if (wfd_ihave_ur(Wfd,ic,ik_ibz,spin1,how="Stored")) then
 695              ptur_ck =>  Wfd%Wave(ic,ik_ibz,spin1)%ur
 696            else
 697              call wfd_get_ur(Wfd,ic,ik_ibz,spin1,ur_ck)
 698              ptur_ck => ur_ck
 699            end if
 700            !
 701            ! Get cprj for this (c,kbz,s1) in the BZ.
 702            ! * phase due to the umklapp G0 in k-q is already included.
 703            if (Wfd%usepaw==1) then
 704              if (wfd_ihave_cprj(Wfd,ic,ik_ibz,spin1,how="Stored")) then
 705                ptcp_ck =>  Wfd%Wave(ic,ik_ibz,spin1)%cprj
 706              else
 707                call wfd_get_cprj(Wfd,ic,ik_ibz,spin1,Cryst,Cp_tmp1,sorted=.FALSE.)
 708                ptcp_ck =>  Cp_tmp1
 709              end if
 710              call paw_symcprj_op(ik_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_ck,Cp_ck)
 711            end if
 712 
 713            do icp=bidx(1,4),bidx(2,4)  !do icp=BSp%lumo,BSp%nbnds
 714              ! * Calculate matrix-elements rhxtwg_cpc
 715              !
 716              if (ik_bz==ikp_bz) then ! Already in memory.
 717                rhxtwg_cpc(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,icp,ic,ik_ibz,spin1),Gsph_c)
 718 
 719              else ! Calculate matrix element from wfr.
 720                ! TODO: change the order of the loops.
 721 
 722                if (wfd_ihave_ur(Wfd,icp,ikp_ibz,spin2,how="Stored")) then
 723                  ptur_ckp => Wfd%Wave(icp,ikp_ibz,spin2)%ur
 724                else
 725                  call wfd_get_ur(Wfd,icp,ikp_ibz,spin2,ur_ckp)
 726                  ptur_ckp => ur_ckp
 727                end if
 728 
 729                ! Load cprj for this (c,k,s2) in the BZ.
 730                ! * Do not care about umklapp G0 in k-q as the phase is already included.
 731                if (Wfd%usepaw==1) then
 732                  if (wfd_ihave_cprj(Wfd,icp,ikp_ibz,spin2,how="Stored")) then
 733                    ptcp_ckp =>  Wfd%Wave(icp,ikp_ibz,spin2)%cprj
 734                  else
 735                    call wfd_get_cprj(Wfd,icp,ikp_ibz,spin2,Cryst,Cp_tmp2,sorted=.FALSE.)
 736                    ptcp_ckp =>  Cp_tmp2
 737                  end if
 738                  call paw_symcprj_op(ikp_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_ckp,Cp_ckp)
 739                end if
 740 
 741                call rho_tw_g(nspinor,npweps,nfftot_osc,ndat1,ngfft_osc,map2sphere,use_padfft,igfftg0,gbound,&
 742 &                ptur_ckp,itim_kp,ktabr_kp,ph_mkpt,spinrot_kp,&
 743 &                ptur_ck ,itim_k ,ktabr_k ,ph_mkt ,spinrot_k ,&
 744 &                dim_rtwg,rhxtwg_cpc)
 745 
 746                if (Wfd%usepaw==1) then ! Add PAW onsite contribution.
 747                  call paw_rho_tw_g(npweps,dim_rtwg,nspinor,Cryst%natom,Cryst%ntypat,Cryst%typat,Cryst%xred,Gsph_c%gvec,&
 748 &                  Cp_ckp,Cp_ck,Pwij_q,rhxtwg_cpc)
 749                end if
 750              end if
 751 
 752              if (BSp%prep_interp) then
 753                aa_cpc = rhxtwg_cpc
 754                aa_cpc(2:) = czero
 755                bb_cpc1 = vc_sqrt_qbz*rhxtwg_cpc
 756                bb_cpc1(1) = czero
 757                bb_cpc2 = rhxtwg_cpc
 758                bb_cpc2(2:) = czero
 759 
 760                if(ik_bz == ikp_bz) then
 761                   ! Enforce orthogonality of the wavefunctions.
 762                   if(icp == ic) then
 763                     aa_cpc(1) = cone
 764                     bb_cpc2(1) = cone
 765                   else
 766                     aa_cpc(1) = czero
 767                     bb_cpc2(1) = czero
 768                   end if
 769                end if
 770 
 771                ! MG TODO: a does not require a call to w0gemv
 772                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,aa_cpc,aa_ctccp)
 773                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,bb_cpc1,bb_ctccp1)
 774                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,bb_cpc2,bb_ctccp2)
 775 
 776                cc_cpc = vc_sqrt_qbz*rhxtwg_cpc
 777                cc_cpc(1) = czero
 778 
 779                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,cc_cpc,cc_ctccp)
 780              end if
 781 
 782              ! Prepare sum_GG' rho_c'c*(G) W_qbz(G,G') rho_v'v(G')
 783              ! First sum on G: sum_G rho_c'c(G) W_qbz*(G,G') (W_qbz conjugated)
 784              rhxtwg_cpc = rhxtwg_cpc * vc_sqrt_qbz
 785              call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,rhxtwg_cpc,ctccp)
 786 
 787              do iv=bidx(1,1),bidx(2,1)    !do iv=BSp%lomo,BSp%homo
 788                it = BSp%vcks2t(iv,ic,ik_bz,spin1); if (it==0) CYCLE ! ir-uv-cutoff
 789                ene_t = BSp%Trans(it,spin1)%en
 790 
 791                ! TODO: use this but change the order of the loops.
 792                if (wfd_ihave_ur(Wfd,iv,ik_ibz,spin1,how="Stored")) then
 793                  ptur_vk => Wfd%Wave(iv,ik_ibz,spin1)%ur
 794                else
 795                  call wfd_get_ur(Wfd,iv,ik_ibz,spin1,ur_vk)
 796                  ptur_vk => ur_vk
 797                end if
 798                !
 799                ! Load cprj for this (v,k,s1) in the BZ.
 800                ! * Do not care about umklapp G0 in k-q as the phase is already included.
 801                if (Wfd%usepaw==1) then
 802                  if (wfd_ihave_cprj(Wfd,iv,ik_ibz,spin1,how="Stored")) then
 803                    ptcp_vk =>  Wfd%Wave(iv,ik_ibz,spin1)%cprj
 804                  else
 805                    call wfd_get_cprj(Wfd,iv,ik_ibz,spin1,Cryst,Cp_tmp3,sorted=.FALSE.)
 806                    ptcp_vk => Cp_tmp3
 807                  end if
 808                  call paw_symcprj_op(ik_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_vk,Cp_vk)
 809                end if
 810 
 811                do ivp=bidx(1,3),bidx(2,3) !do ivp=BSp%lomo,BSp%homo
 812 
 813                  if (is_resonant) then
 814                    itp = BSp%vcks2t(ivp,icp,ikp_bz,spin2)
 815                  else ! have to exchange band indeces
 816                    itp = BSp%vcks2t(icp,ivp,ikp_bz,spin2)
 817                  end if
 818 
 819                  if (itp==0) CYCLE ! ir-uv-cutoff
 820 
 821                  ! FIXME Temporary work around, when ikp_bz == ik it might happen that itp<it
 822                  ! should rewrite the loops using contracted k-dependent indeces for bands
 823                  if (itp<it) CYCLE
 824 
 825                  ir = it + itp*(itp-1)/2
 826                  if (ir<t_start(my_rank).or.ir>t_stop(my_rank)) CYCLE
 827 
 828                  ene_tp = BSp%Trans(itp,spin2)%en
 829 
 830                  ! ============================================
 831                  ! === Calculate matrix elements rhxtwg_vpv ===
 832                  ! ============================================
 833                  if (ik_bz==ikp_bz) then
 834                    ! Already in memory.
 835                    rhxtwg_vpv(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,ivp,iv,ik_ibz,spin1),Gsph_c)
 836 
 837                  else
 838                    ! Calculate matrix element from wfr.
 839                    if (wfd_ihave_ur(Wfd,ivp,ikp_ibz,spin2,how="Stored")) then
 840                      ptur_vkp => Wfd%Wave(ivp,ikp_ibz,spin2)%ur
 841                    else
 842                      call wfd_get_ur(Wfd,ivp,ikp_ibz,spin2,ur_vkp)
 843                      ptur_vkp => ur_vkp
 844                    end if
 845                    !
 846                    ! Load cprj for this (vp,kp,s2) in the BZ.
 847                    ! * Do not care about umklapp G0 in k-q as the phase is already included.
 848                    if (Wfd%usepaw==1) then
 849                      if (wfd_ihave_cprj(Wfd,ivp,ikp_ibz,spin2,how="Stored")) then
 850                        ptcp_vkp =>  Wfd%Wave(ivp,ikp_ibz,spin2)%cprj
 851                      else
 852                        call wfd_get_cprj(Wfd,ivp,ikp_ibz,spin2,Cryst,Cp_tmp4,sorted=.FALSE.)
 853                        ptcp_vkp => Cp_tmp4
 854                      end if
 855                      call paw_symcprj_op(ikp_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_vkp,Cp_vkp)
 856                    end if
 857 
 858                    call rho_tw_g(nspinor,npweps,nfftot_osc,ndat1,ngfft_osc,map2sphere,use_padfft,igfftg0,gbound,&
 859 &                     ptur_vkp,itim_kp,ktabr_kp,ph_mkpt,spinrot_kp,&
 860 &                     ptur_vk ,itim_k ,ktabr_k ,ph_mkt ,spinrot_k ,&
 861 &                     dim_rtwg,rhxtwg_vpv)
 862 
 863                    if (Wfd%usepaw==1) then ! Add PAW onsite contribution.
 864                      call paw_rho_tw_g(npweps,dim_rtwg,nspinor,Cryst%natom,Cryst%ntypat,Cryst%typat,Cryst%xred,&
 865 &                      Gsph_c%gvec,Cp_vkp,Cp_vk,Pwij_q,rhxtwg_vpv)
 866                    end if
 867                  end if
 868 
 869                  ! Index in the global Hamiltonian matrix.
 870                  ir = it + itp*(itp-1_i8b)/2
 871 
 872                  if (ir<t_start(my_rank).or.ir>t_stop(my_rank)) then
 873                    write(msg,'(a,3(1x,i0))')" Gonna SIGFAULT, ir, t_start, t_stop ",ir,t_start(my_rank),t_stop(my_rank)
 874                    MSG_ERROR(msg)
 875                  end if
 876                  !ABI_CHECK(itp >= it,"itp < it")
 877 
 878                  if (BSp%prep_interp) then
 879                    ! Save a,b, c coefficients.
 880                    aa_vpv = rhxtwg_vpv
 881                    aa_vpv(2:) = czero
 882                    bb_vpv1 = rhxtwg_vpv
 883                    bb_vpv1(2:) = czero
 884                    bb_vpv2 = vc_sqrt_qbz*rhxtwg_vpv
 885                    bb_vpv2(1) = czero
 886 
 887                    if (ik_bz == ikp_bz) then
 888                       ! Enforce orthogonality of the wavefunctions.
 889                       if (ivp == iv) then
 890                         aa_vpv(1) = cone
 891                         bb_vpv1(1) = cone
 892                       else
 893                         aa_vpv(1) = czero
 894                         bb_vpv1(1) = czero
 895                       end if
 896                    end if
 897 
 898                    cc_vpv = vc_sqrt_qbz*rhxtwg_vpv
 899                    cc_vpv(1) = czero
 900 
 901                    aatmp = -faq * xdotc(npweps,aa_ctccp,1,aa_vpv,1)
 902                    bbtmp = -faq * xdotc(npweps,bb_ctccp1,1,bb_vpv1,1)-faq*xdotc(npweps,bb_ctccp2,1,bb_vpv2,1)
 903                    cctmp = -faq * xdotc(npweps,cc_ctccp,1,cc_vpv,1)
 904 
 905                    acoeffs(ir) = aatmp
 906                    bcoeffs(ir) = bbtmp
 907                    ccoeffs(ir) = cctmp
 908                  end if
 909 
 910                  ! sum_G2 rho_c'c(G) W_qbz(G,G') rho_v'v(G')
 911                  rhxtwg_vpv = vc_sqrt_qbz * rhxtwg_vpv
 912                  http = - faq * xdotc(npweps,ctccp,1,rhxtwg_vpv,1)
 913 
 914                  ! Save result taking into account the symmetry of the matrix.
 915                  ! Note that the diagonal of the resonant block is not forced to be real
 916                  my_bsham(ir) = http
 917 
 918 #ifdef DEV_MG_DEBUG_MODE
 919                  ttp_check(it,itp) = ttp_check(it,itp)+1
 920 #endif
 921                end do !ivp
 922              end do !iv
 923            end do !icp
 924          end do !ic
 925 
 926          ABI_FREE(gbound)
 927 
 928          if (Wfd%usepaw==1.and.ik_bz/=ikp_bz) then ! Free the onsite contribution for this q.
 929            call pawpwij_free(Pwij_q)
 930            ABI_DT_FREE(Pwij_q)
 931          end if
 932 
 933        end do ! ik_bz
 934      end do ! Fat loop over ikp_bz
 935 
 936 #ifdef DEV_MG_DEBUG_MODE
 937      do itp=1,BSp%nreh(block)
 938        do it=1,BSp%nreh(block)
 939         ir = it + itp*(itp-1_i8b)/2
 940          if (itp>=it .and. ttp_check(it,itp) /= 1) then
 941            if (ir>=t_start(my_rank).and.ir<=t_stop(my_rank)) then
 942              write(std_out,*)"WARN: upper triangle is not 1 ",it,itp,ttp_check(it,itp)
 943              write(std_out,*)TRIM(repr_trans(Bsp%Trans(it ,spin1)))
 944              write(std_out,*)TRIM(repr_trans(Bsp%Trans(itp,spin2)))
 945            end if
 946          end if
 947          if (itp< it .and. ttp_check(it,itp) /= 0) then
 948            write(std_out,*)"WARN: then lower triangle is not 0 ",it,itp,ttp_check(it,itp)
 949            write(std_out,*)TRIM(repr_trans(Bsp%Trans(it ,spin1)))
 950            write(std_out,*)TRIM(repr_trans(Bsp%Trans(itp,spin2)))
 951          end if
 952        end do
 953      end do
 954      ierr = SUM(SUM(ttp_check,DIM=2),DIM=1)
 955      if (ierr/=my_hsize) then
 956        write(msg,'(a,2i0)')"ierr/=my_hsize",ierr,my_hsize
 957        MSG_ERROR(msg)
 958      end if
 959      ABI_FREE(ttp_check)
 960 #endif
 961 
 962      ABI_FREE(ctccp)
 963      if(Bsp%prep_interp) then
 964        ABI_FREE(aa_ctccp)
 965        ABI_FREE(bb_ctccp1)
 966        ABI_FREE(bb_ctccp2)
 967        ABI_FREE(cc_ctccp)
 968      end if
 969 
 970      ABI_FREE(vc_sqrt_qbz)
 971      call wrtout(std_out,' Coulomb term completed',"COLL")
 972 
 973      call timab(682,2,tsec) ! exc_build_ham(Coulomb)
 974    end if ! do_coulomb_term
 975    !
 976    ! =====================
 977    ! === Exchange term ===
 978    ! =====================
 979    ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
 980    ! TODO might used enlarged G-sphere for better convergence.
 981    if (do_exchange_term) then
 982 
 983      !call exc_build_v(spin1,spin2,nsppol,npweps,Bsp,Cryst,Kmesh,Qmesh,Gsph_x,Gsph_c,Vcp,&
 984      ! &  is_resonant,rhxtwg_q0,nproc,my_rank,t_start,t_stop,my_bsham,comm)
 985 
 986      call timab(683,1,tsec) ! exc_build_ham(exchange)
 987 
 988      write(msg,'(a,2i2,a)')" Calculating exchange term for (spin1,spin2) ",spin1,spin2," ..."
 989      call wrtout(std_out,msg,"COLL")
 990 
 991      ABI_MALLOC(rhotwg1,(npweps))
 992      ABI_MALLOC(rhotwg2,(npweps))
 993 
 994      ngx = Gsph_x%ng
 995      ABI_MALLOC(vc_sqrt_qbz,(ngx))
 996 
 997      ! * Get iq_ibz, and symmetries from iq_bz.
 998      iq_bz = iqbz0 ! q = 0 -> iqbz0
 999      call get_BZ_item(Qmesh,iq_bz,qbz,iq_ibz,isym_q,itim_q)
1000 
1001      ! * Set up table of |q(BZ)+G|
1002      if (iq_ibz==1) then
1003        do ig=1,ngx
1004          ISg = Gsph_x%rottb(ig,itim_q,isym_q)
1005          vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
1006        end do
1007      else
1008         MSG_ERROR("iq_ibz should be 1")
1009      end if
1010 
1011      do itp=1,BSp%nreh(block) ! Loop over transition tp = (kp,vp,cp,spin2)
1012 
1013        if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
1014        ene_tp = Bsp%Trans(itp,spin2)%en
1015        ikp_bz = Bsp%Trans(itp,spin2)%k
1016        ivp    = Bsp%Trans(itp,spin2)%v
1017        icp    = Bsp%Trans(itp,spin2)%c
1018 
1019        ikp_ibz = Kmesh%tab (ikp_bz)
1020        isym_kp = Kmesh%tabo(ikp_bz)
1021        itim_kp = (3-Kmesh%tabi(ikp_bz))/2
1022 
1023        if (is_resonant) then
1024          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
1025        else ! Code for coupling block.
1026          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
1027        end if
1028        !
1029        ! Multiply by the Coulomb term.
1030         do ig=2,npweps
1031           rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
1032         end do
1033 
1034        do it=1,itp ! Loop over transition t = (k,v,c,spin1)
1035          ir = it + itp*(itp-1_i8b)/2
1036          if (ir<t_start(my_rank) .or. ir>t_stop(my_rank)) CYCLE
1037 
1038          ene_t = Bsp%Trans(it,spin1)%en
1039          ik_bz = Bsp%Trans(it,spin1)%k
1040          iv    = Bsp%Trans(it,spin1)%v
1041          ic    = Bsp%Trans(it,spin1)%c
1042 
1043          ik_ibz = Kmesh%tab(ik_bz)
1044          isym_k = Kmesh%tabo(ik_bz)
1045          itim_k = (3-Kmesh%tabi(ik_bz))/2
1046          !if (itim_k==2) CYCLE ! time-reversal or not
1047 
1048          rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
1049          !
1050          ! sum over G/=0
1051          ctemp = xdotc(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
1052          ctemp = faq * kx_fact * ctemp
1053 
1054          ! exchange term is non divergent !
1055          if (BSp%prep_interp) then
1056            ccoeffs(ir) = ccoeffs(ir) + ctemp
1057          end if
1058 
1059          my_bsham(ir) = my_bsham(ir) + ctemp
1060        end do !it
1061      end do !itp
1062 
1063      ABI_FREE(rhotwg1)
1064      ABI_FREE(rhotwg2)
1065      ABI_FREE(vc_sqrt_qbz)
1066 
1067      call timab(683,2,tsec) ! exc_build_ham(exchange)
1068    end if ! do_exchange_term
1069    !
1070    ! =====================
1071    ! === Diagonal term ===
1072    ! =====================
1073    if (is_resonant .and. spin1==spin2) then
1074      write(msg,'(a,2i2,a)')" Adding diagonal term for (spin1,spin2) ",spin1,spin2," ..."
1075      call wrtout(std_out,msg,"COLL")
1076      do it=1,BSp%nreh(block)
1077        ir = it + it*(it-1_i8b)/2
1078        if (ir>=t_start(my_rank) .and. ir<=t_stop(my_rank)) my_bsham(ir) = my_bsham(ir) + Bsp%Trans(it,spin1)%en
1079      end do
1080    end if
1081 
1082    if (.FALSE.) then
1083      dump_unt = get_unit()
1084      msg=' Coupling Hamiltonian matrix elements: '
1085      if (is_resonant) msg=' Reasonant Hamiltonian matrix elements: '
1086      call wrtout(dump_unt,msg,"PERS")
1087      call wrtout(dump_unt,'    k  v  c  s      k" v" c" s"       H',"PERS")
1088      do itp=1,BSp%nreh(block)
1089        ikp_bz = Bsp%Trans(itp,spin2)%k
1090        ivp    = Bsp%Trans(itp,spin2)%v
1091        icp    = Bsp%Trans(itp,spin2)%c
1092        do it=1,itp
1093          ik_bz = Bsp%Trans(it,spin1)%k
1094          iv    = Bsp%Trans(it,spin1)%v
1095          ic    = Bsp%Trans(it,spin1)%c
1096          ir = it + itp*(itp-1_i8b)/2
1097          if (ir>=t_start(my_rank).and.ir<=t_stop(my_rank)) then
1098            http = my_bsham(ir)
1099            !if (ABS(http) > tol3) then
1100            write(msg,'(2(i0,1x),2(i5,3i3,3x),2f7.3)')it,itp, ik_bz,iv,ic,spin1, ikp_bz,ivp,icp,spin2, http
1101            call wrtout(dump_unt,msg,"PERS")
1102            !end if
1103          end if
1104        end do
1105      end do
1106    end if
1107 
1108 !DBYG
1109    if (.False.) then
1110      dump_unt = get_unit()
1111      dump_unt = 999
1112      msg=' Coupling Hamiltonian matrix elements: '
1113      if (is_resonant) msg=' Resonant Hamiltonian matrix elements: '
1114      call wrtout(dump_unt,msg,"PERS")
1115      call wrtout(dump_unt,'    k v  c  s      k" v" c" s"       H',"PERS")
1116      do itp=1,BSp%nreh(block)
1117        ikp_bz = Bsp%Trans(itp,spin2)%k
1118        ivp    = Bsp%Trans(itp,spin2)%v
1119        icp    = Bsp%Trans(itp,spin2)%c
1120        do it=1,BSp%nreh(block)
1121          ik_bz = Bsp%Trans(it,spin1)%k
1122          iv    = Bsp%Trans(it,spin1)%v
1123          ic    = Bsp%Trans(it,spin1)%c
1124          if(it > itp) then
1125            ir = itp+it*(it-1_i8b)/2
1126          else
1127            ir = it + itp*(itp-1_i8b)/2
1128          end if
1129          if (ir>=t_start(my_rank).and.ir<=t_stop(my_rank)) then
1130            if(it > itp) then
1131              http = CONJG(my_bsham(ir))
1132              if (BSp%prep_interp) then
1133                aatmp = CONJG(acoeffs(ir))
1134                bbtmp = CONJG(bcoeffs(ir))
1135                cctmp = CONJG(ccoeffs(ir))
1136              end if
1137            else
1138              http = my_bsham(ir)
1139              if (BSp%prep_interp) then
1140                aatmp = acoeffs(ir)
1141                bbtmp = bcoeffs(ir)
1142                cctmp = ccoeffs(ir)
1143              end if
1144            end if
1145            if (it == itp) http = http - Bsp%Trans(it,spin1)%en
1146            !if (ABS(http) > tol3) then
1147            if (BSp%prep_interp) then
1148              write(msg,'(2(i0,1x),2(i5,3i3,3x),2f24.20,2f24.20,2f24.20,2f24.20)')it,itp, ik_bz,iv,ic,spin1, ikp_bz,ivp,icp,&
1149 &   spin2, http, aatmp, bbtmp, cctmp
1150            else
1151              write(msg,'(2(i0,1x),2(i5,3i3,3x),2f24.20)')it,itp, ik_bz,iv,ic,spin1, ikp_bz,ivp,icp,spin2, http
1152            end if
1153            call wrtout(dump_unt,msg,"PERS")
1154            !end if
1155          end if
1156        end do
1157      end do
1158    end if
1159 
1160    call timab(684,1,tsec) ! exc_build_ham(synchro)
1161    call xmpi_barrier(comm)
1162    call timab(684,2,tsec) ! exc_build_ham(synchro)
1163    !
1164    ! =================================
1165    ! === Write Hamiltonian on disk ===
1166    ! =================================
1167    call timab(685,1,tsec) ! exc_build_ham(write_ham)
1168    if (use_mpiio) then
1169 #ifdef HAVE_MPI_IO
1170      ! Write the Hamiltonian with collective MPI-IO.
1171      if (BSp%prep_interp) then
1172        MSG_ERROR("Preparation of interpolation technique not yet coded with MPI-IO")
1173      end if
1174      ABI_CHECK(nsppol==1,"nsppol==2 not coded, offset is wrong")
1175      !
1176      old_type = MPI_DOUBLE_COMPLEX
1177      call xmpio_create_fherm_packed(my_starts,my_ends,is_fortran_file,my_offset,old_type,hmat_type,offset_err)
1178 
1179      if (offset_err/=0) then
1180        write(msg,"(3a)")&
1181 &        "Global position index cannot be stored in a standard Fortran integer. ",ch10,&
1182 &        "BSE matrix cannot be written with a single MPI-IO call. "
1183        MSG_ERROR(msg)
1184      end if
1185      !
1186      ! Each node uses a different offset to skip the header and the blocks written by the other CPUs.
1187      my_offset = offset_of_block(block) + my_offset
1188 
1189      call MPI_FILE_SET_VIEW(mpi_fh, my_offset, MPI_BYTE, hmat_type, 'native', MPI_INFO_NULL, mpi_err)
1190      ABI_CHECK_MPI(mpi_err,"SET_VIEW")
1191 
1192      call MPI_TYPE_FREE(hmat_type,mpi_err)
1193      ABI_CHECK_MPI(mpi_err,"MPI_TYPE_FREE")
1194 
1195      if (hsize_of(my_rank) /= INT(hsize_of(my_rank),kind=i4b) ) then
1196        MSG_ERROR("Wraparound error")
1197      end if
1198 
1199      tmp_size = INT(hsize_of(my_rank))
1200      call MPI_FILE_WRITE_ALL(mpi_fh, my_bsham, tmp_size, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, mpi_err)
1201      ABI_CHECK_MPI(mpi_err,"FILE_WRITE")
1202 
1203      ! It seems that personal calls in make the code stuck
1204      !if (is_fortran_file .and. my_rank==master) then ! Master writes the Fortran record markers.
1205      ! Write the Fortran record markers.
1206      neh2=BSp%nreh(block)
1207      ABI_MALLOC(bsize_frecord,(neh2))
1208      bsize_frecord = (/(col_glob * xmpi_bsize_dpc, col_glob=1,neh2)/)
1209      ! ehdr_offset points to the end of the header.
1210      !call xmpio_write_frmarkers(mpi_fh,ehdr_offset,xmpio_collective,neh2,bsize_frecord,mpi_err)
1211      my_offset = offset_of_block(block)
1212      call xmpio_write_frmarkers(mpi_fh,my_offset,xmpio_collective,neh2,bsize_frecord,ierr)
1213      ABI_CHECK(ierr==0,"Error while writing Fortran markers")
1214      ABI_FREE(bsize_frecord)
1215 #else
1216      MSG_BUG("You should not be here!")
1217 #endif
1218    else
1219      ! Use FORTRAN IO with sequential access mode.
1220      ! * Each node sends its data to master node.
1221      ! * Blocks are distributed according to the rank of the node.
1222      ! * Matrix is written by columns hence make sure that the last column is completely written.
1223      call cwtime(cputime,walltime,gflops,"start")
1224 
1225      if (my_rank==master) then
1226        prev_nrows=0; if (my_cols(2) /= my_rows(2)) prev_nrows = my_rows(2)
1227        ncol = my_cols(2)-my_cols(1)+1
1228        ist=1
1229        do jj=1,ncol
1230          col_glob = my_starts(2) + jj - 1
1231          nrows = col_glob; if (jj==ncol) nrows=my_rows(2)
1232          iend = ist + nrows -1
1233          write(bsh_unt) my_bsham(ist:iend)
1234          if (BSp%prep_interp) then
1235            write(a_unt) acoeffs(ist:iend)
1236            write(b_unt) bcoeffs(ist:iend)
1237            write(c_unt) ccoeffs(ist:iend)
1238          end if
1239          ist=iend+1
1240        end do
1241        write(msg,'(2(a,i0))')" Wraparound error: iend=",iend," my_hsize=",hsize_of(my_rank)
1242        ABI_CHECK(iend==hsize_of(my_rank),msg)
1243        ABI_FREE(my_bsham)
1244        if (BSp%prep_interp) then
1245          ABI_FREE(acoeffs)
1246          ABI_FREE(bcoeffs)
1247          ABI_FREE(ccoeffs)
1248        end if
1249      end if
1250 
1251      call xmpi_barrier(comm)
1252      !
1253      ! Collect data from the other nodes.
1254      do sender=1,nproc-1
1255        ! If I'm not involved, jump to the end of the loop and wait there (sequential IO? Of course!)
1256        if (all(my_rank /= [sender, master])) goto 100
1257 
1258        if (my_rank==master)  then
1259          ABI_MALLOC(buffer,(hsize_of(sender)))
1260          if (BSp%prep_interp) then
1261            ABI_MALLOC(abuffer,(hsize_of(sender)))
1262            ABI_MALLOC(bbuffer,(hsize_of(sender)))
1263            ABI_MALLOC(cbuffer,(hsize_of(sender)))
1264          end if
1265        end if
1266        tmp_size = INT(hsize_of(sender),kind=i4b)
1267        call xmpi_exch(my_bsham,tmp_size,sender,buffer,master,comm,mpi_err)
1268        if (BSp%prep_interp) then
1269          call xmpi_exch(acoeffs,tmp_size,sender,abuffer,master,comm,mpi_err)
1270          call xmpi_exch(bcoeffs,tmp_size,sender,bbuffer,master,comm,mpi_err)
1271          call xmpi_exch(ccoeffs,tmp_size,sender,cbuffer,master,comm,mpi_err)
1272        end if
1273 
1274        ! TODO Be careful with the MPI TAG here, add optional Arguments in xmpi_exch so that the TAG can be specified!
1275        proc_start = (/my_rows(1),my_cols(1)/)
1276        proc_end   = (/my_rows(2),my_cols(2)/)
1277        my_extrema(:,1) = proc_start
1278        my_extrema(:,2) = proc_end
1279 
1280        sender_extrema = my_extrema ! just to avoid NAN on sender. xechh_mpi is not well designed
1281        call xmpi_exch(my_extrema,4,sender,sender_extrema,master,comm,mpi_err)
1282 
1283        if (my_rank==master) then
1284           proc_start = sender_extrema(:,1)
1285           proc_end   = sender_extrema(:,2)
1286           !write(std_out,*)"proc_start, proc_end",proc_start,proc_end
1287 
1288          if (prev_nrows>0) then ! backspace the file if the last record written was not complete.
1289            !write(std_out,*)" master node had to call backspace"
1290            backspace(bsh_unt)
1291            ABI_MALLOC(prev_col,(prev_nrows))
1292            read(bsh_unt) prev_col
1293            backspace(bsh_unt)
1294 
1295            if (BSp%prep_interp) then
1296              backspace(a_unt)
1297              ABI_MALLOC(aprev_col,(prev_nrows))
1298              read(a_unt) aprev_col
1299              backspace(a_unt)
1300 
1301              backspace(b_unt)
1302              ABI_MALLOC(bprev_col,(prev_nrows))
1303              read(b_unt) bprev_col
1304              backspace(b_unt)
1305 
1306              backspace(c_unt)
1307              ABI_MALLOC(cprev_col,(prev_nrows))
1308              read(c_unt) cprev_col
1309              backspace(c_unt)
1310            end if
1311          end if
1312          !
1313          ! Write the columns owned by sender.
1314          ncol = proc_end(2)-proc_start(2)+1
1315          ist=1
1316          do jj=1,ncol
1317            col_glob = proc_start(2) + jj-1
1318            nrows = col_glob
1319            if (jj==1   )  nrows=col_glob - proc_start(1) + 1
1320            if (jj==ncol) then
1321              nrows=proc_end(1)
1322              if (ncol==1)  nrows=proc_end(1) - proc_start(1) + 1
1323            end if
1324            iend = ist + nrows -1
1325            !write(std_out,*)"Using nrows, ist, iend=",nrows,ist,iend
1326            if (jj==1 .and. prev_nrows>0) then ! join prev_col and this subcolumn.
1327              write(bsh_unt) CMPLX(prev_col,kind=dpc),CMPLX(buffer(ist:iend),kind=dpc)
1328              if (BSp%prep_interp) then
1329                write(a_unt) CMPLX(aprev_col,kind=dpc),CMPLX(abuffer(ist:iend),kind=dpc)
1330                write(b_unt) CMPLX(bprev_col,kind=dpc),CMPLX(bbuffer(ist:iend),kind=dpc)
1331                write(c_unt) CMPLX(cprev_col,kind=dpc),CMPLX(cbuffer(ist:iend),kind=dpc)
1332              end if
1333              prev_nrows = prev_nrows + iend-ist+1
1334            else
1335              write(bsh_unt) CMPLX(buffer(ist:iend),kind=dpc)
1336              if (BSp%prep_interp) then
1337                write(a_unt) CMPLX(abuffer(ist:iend),kind=dpc)
1338                write(b_unt) CMPLX(bbuffer(ist:iend),kind=dpc)
1339                write(c_unt) CMPLX(cbuffer(ist:iend),kind=dpc)
1340              end if
1341              prev_nrows=0
1342            end if
1343            ist=iend+1
1344          end do
1345          if (ncol>1) then ! Reset prev_nrows if a new column has begun.
1346            prev_nrows = proc_end(1)
1347            if (proc_end(1) == proc_end(2)) prev_nrows = 0
1348          end if
1349          if (iend/=hsize_of(sender)) then
1350            write(msg,'(2(a,i0))')" Wraparound error: iend=",iend," my_hsize=",hsize_of(sender)
1351            MSG_ERROR(msg)
1352          end if
1353          if (allocated(prev_col)) then
1354            ABI_FREE(prev_col)
1355          end if
1356          if (BSp%prep_interp) then
1357            if (allocated(aprev_col)) then
1358              ABI_FREE(aprev_col)
1359            end if
1360            if (allocated(bprev_col)) then
1361              ABI_FREE(bprev_col)
1362            end if
1363            if (allocated(cprev_col)) then
1364              ABI_FREE(cprev_col)
1365            end if
1366          end if
1367          ABI_FREE(buffer)
1368          if (BSp%prep_interp) then
1369            ABI_FREE(abuffer)
1370            ABI_FREE(bbuffer)
1371            ABI_FREE(cbuffer)
1372          end if
1373        end if ! master
1374        !
1375 100    call xmpi_barrier(comm)
1376      end do ! sender
1377 
1378      call cwtime(cputime,walltime,gflops,"stop")
1379      write(msg,'(2(a,f9.1),a)')" Fortran-IO completed. cpu_time: ",cputime,"[s], walltime: ",walltime," [s]"
1380      call wrtout(std_out,msg,"COLL",do_flush=.True.)
1381    end if ! use_mpiio
1382    call timab(685,2,tsec) ! exc_build_ham(write_ham)
1383    !
1384    if (allocated(my_bsham)) then
1385      ABI_FREE(my_bsham)
1386    end if
1387    if (BSp%prep_interp) then
1388      if (allocated(acoeffs)) then
1389        ABI_FREE(acoeffs)
1390      end if
1391      if (allocated(bcoeffs)) then
1392        ABI_FREE(bcoeffs)
1393      end if
1394      if (allocated(ccoeffs)) then
1395        ABI_FREE(ccoeffs)
1396      end if
1397    end if
1398    ABI_FREE(t_start)
1399    ABI_FREE(t_stop)
1400    ABI_FREE(hsize_of)
1401  end do ! block
1402  !
1403  ! ===========================================
1404  ! === Exchange term for spin_up spin_down ===
1405  ! ===========================================
1406 
1407  if (nsppol==2) then
1408    call timab(686,2,tsec) ! exc_build_ham(exch.spin)
1409    block=3
1410    neh1=BSp%nreh(1)
1411    neh2=BSp%nreh(2)
1412    !
1413    ! The oscillators at q=0 are available on each node for both spin.
1414    ! Here the calculation of the block is parallelized over columns.
1415    ABI_MALLOC(col_start,(0:nproc-1))
1416    ABI_MALLOC(col_stop,(0:nproc-1))
1417    call xmpi_split_work2_i4b(neh2,nproc,col_start,col_stop,msg,ierr) !check this but it should be OK.
1418    if (ierr/=0) then
1419      MSG_WARNING(msg)
1420    end if
1421 
1422    my_cols(1) = col_start(my_rank)
1423    my_cols(2) = col_stop (my_rank)
1424    if (my_cols(2)-my_cols(1)<=0) then
1425      MSG_ERROR("One of the processors has zero columns!")
1426    end if
1427 
1428    ABI_MALLOC(ncols_of,(0:nproc-1))
1429    ncols_of=0
1430    do rank=0,nproc-1
1431      if (col_stop(rank)>=col_start(rank)) ncols_of(rank) = col_stop(rank)-col_start(rank)+1
1432    end do
1433 
1434    ABI_FREE(col_start)
1435    ABI_FREE(col_stop)
1436    !
1437    ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
1438    ! TODO might used enlarged G-sphere for better convergence.
1439    ! Note that my_kxssp is always written on file when nsppol=2, even when
1440    ! non-local field effects are neglected.
1441    ABI_MALLOC(my_kxssp,(neh1,my_cols(1):my_cols(2)))
1442    my_kxssp=czero
1443 
1444    if (do_exchange_term) then
1445      spin1=1; spin2=2
1446      write(msg,'(a,2i2,a)')" Calculating exchange term for (spin1,spin2) ",spin1,spin2," ..."
1447      call wrtout(std_out,msg,"COLL")
1448 
1449      ABI_MALLOC(rhotwg1,(npweps))
1450      ABI_MALLOC(rhotwg2,(npweps))
1451 
1452      ngx = Gsph_x%ng
1453      ABI_MALLOC(vc_sqrt_qbz,(ngx))
1454      !
1455      ! * Get iq_ibz, and symmetries from iq_bz.
1456      iq_bz = iqbz0 ! q = 0 -> iqbz0
1457      call get_BZ_item(Qmesh,iq_bz,qbz,iq_ibz,isym_q,itim_q)
1458      !
1459      ! * Set up table of |q(BZ)+G|
1460      if (iq_ibz==1) then
1461        do ig=1,ngx
1462          ISg = Gsph_x%rottb(ig,itim_q,isym_q)
1463          vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
1464        end do
1465      else
1466         MSG_ERROR("iq_ibz should be 1")
1467      end if
1468 
1469      do itp=1,neh2 ! Loop over transition tp = (kp,vp,cp,spin2)
1470 
1471        if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
1472        ene_tp = Bsp%Trans(itp,spin2)%en
1473        ikp_bz = Bsp%Trans(itp,spin2)%k
1474        ivp    = Bsp%Trans(itp,spin2)%v
1475        icp    = Bsp%Trans(itp,spin2)%c
1476 
1477        ikp_ibz = Kmesh%tab (ikp_bz)
1478        isym_kp = Kmesh%tabo(ikp_bz)
1479        itim_kp = (3-Kmesh%tabi(ikp_bz))/2
1480 
1481        if (is_resonant) then
1482          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
1483        else ! Code for coupling block.
1484          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
1485        end if
1486        !
1487        ! Multiply by the Coulomb term.
1488         do ig=2,npweps
1489           rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
1490         end do
1491 
1492        do it=1,neh1 ! Loop over transition t = (k,v,c,spin1) FULL matrix.
1493 
1494          ene_t = Bsp%Trans(it,spin1)%en
1495          ik_bz = Bsp%Trans(it,spin1)%k
1496          iv    = Bsp%Trans(it,spin1)%v
1497          ic    = Bsp%Trans(it,spin1)%c
1498 
1499          ik_ibz = Kmesh%tab(ik_bz)
1500          isym_k = Kmesh%tabo(ik_bz)
1501          itim_k = (3-Kmesh%tabi(ik_bz))/2
1502          !if (itim_k==2) CYCLE ! time-reversal or not
1503 
1504          rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
1505          !
1506          ! sum over G/=0
1507          ctemp = XDOTC(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
1508          ctemp = faq * kx_fact * ctemp
1509 
1510          my_kxssp(it,itp) = ctemp
1511        end do !it
1512      end do !itp
1513 
1514      ABI_FREE(rhotwg1)
1515      ABI_FREE(rhotwg2)
1516      ABI_FREE(vc_sqrt_qbz)
1517    end if ! do_exchange_term
1518    call timab(686,2,tsec) ! exc_build_ham(exch.spin)
1519    !
1520    ! =====================================
1521    ! === Write the Hamiltonian on disk ===
1522    ! =====================================
1523    call timab(685,1,tsec) ! exc_build_ham(write_ham)
1524 
1525    if (use_mpiio) then
1526 #ifdef HAVE_MPI_IO
1527      my_ncols=ncols_of(my_rank); old_type=MPI_DOUBLE_COMPLEX
1528      call xmpio_create_fsubarray_2D((/neh1,my_ncols/),(/neh1,my_ncols/),(/1,1/),old_type,hmat_type,my_offpad,mpi_err)
1529      ABI_CHECK_MPI(mpi_err,"fsubarray_2D")
1530      !
1531      ! Each node uses a different offset to skip the header and the blocks written by the other CPUs.
1532      prev_nels=0
1533      prev_ncols=0
1534      if (my_rank>0) then
1535        prev_ncols = SUM(ncols_of(0:my_rank-1))
1536        prev_nels = neh1*prev_ncols
1537      end if
1538      tmp_off = prev_nels*xmpi_bsize_dpc + prev_ncols*2*xmpio_bsize_frm
1539 
1540      my_offset = offset_of_block(block) + tmp_off + my_offpad
1541 
1542      call MPI_FILE_SET_VIEW(mpi_fh, my_offset, MPI_BYTE, hmat_type, 'native', MPI_INFO_NULL, mpi_err)
1543      ABI_CHECK_MPI(mpi_err,"SET_VIEW")
1544 
1545      call MPI_TYPE_FREE(hmat_type,mpi_err)
1546      ABI_CHECK_MPI(mpi_err,"MPI_TYPE_FREE")
1547 
1548      tmp_size = INT(neh1*my_ncols)
1549      call MPI_FILE_WRITE_ALL(mpi_fh, my_kxssp,tmp_size, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, mpi_err)
1550      ABI_CHECK_MPI(mpi_err,"FILE_WRITE")
1551 
1552      ! It seems that personal calls in make the code stuck
1553      ! Master writes the Fortran record markers.
1554      ABI_MALLOC(bsize_frecord,(neh2))
1555      bsize_frecord = neh1 * xmpi_bsize_dpc
1556      ! ehdr_offset points to the end of the header.
1557      !call xmpio_write_frmarkers(mpi_fh,ehdr_offset,xmpio_collective,neh2,bsize_frecord,mpi_err)
1558      my_offset = offset_of_block(block)
1559      call xmpio_write_frmarkers(mpi_fh,my_offset,xmpio_collective,neh2,bsize_frecord,ierr)
1560      ABI_CHECK(ierr==0,"Error while writing Fortran markers")
1561      ABI_FREE(bsize_frecord)
1562 #else
1563      MSG_BUG("You should not be here")
1564 #endif
1565    else
1566      ! Use FORTRAN IO with sequential access mode.
1567      ! * Each node sends its data to master node.
1568      ! * Columns are distributed according to the rank of the node.
1569      if (my_rank==master) then
1570        do jj=my_cols(1),my_cols(2)
1571          write(bsh_unt) my_kxssp(:,jj)
1572        end do
1573        ABI_FREE(my_kxssp)
1574      end if
1575 
1576      call xmpi_barrier(comm)
1577      !
1578      ! Collect data from the other nodes.
1579      do sender=1,nproc-1
1580        ! If I'm not involved, jump to the end of the loop and wait there (sequential IO? Of course!)
1581        if (all(my_rank /= [sender, master])) goto 200
1582 
1583        if (my_rank==master)  then
1584          ABI_MALLOC(buffer_2d,(neh1,ncols_of(sender)))
1585        end if
1586        call xmpi_exch(my_kxssp,neh1*ncols_of(sender),sender,buffer_2d,master,comm,mpi_err)
1587        !
1588        if (my_rank==master) then ! Write the columns owned by sender.
1589          do jj=1,ncols_of(sender)
1590            write(bsh_unt) buffer_2d(:,jj)
1591          end do
1592          ABI_FREE(buffer_2d)
1593        end if ! master
1594        !
1595 200    call xmpi_barrier(comm)
1596      end do ! sender
1597    end if
1598    call timab(685,2,tsec) ! exc_build_ham(write_ham)
1599 
1600    ABI_FREE(ncols_of)
1601    if (allocated(my_kxssp))  then
1602      ABI_FREE(my_kxssp)
1603    end if
1604  end if
1605  !
1606  ! Close the file.
1607  if (use_mpiio) then
1608 #ifdef HAVE_MPI_IO
1609    call MPI_FILE_CLOSE(mpi_fh, mpi_err)
1610    ABI_CHECK_MPI(mpi_err,"FILE_CLOSE")
1611    ABI_FREE(offset_of_block)
1612 #endif
1613  end if
1614 
1615  ! master closes the Fortran files.
1616  if (my_rank==master) then
1617    close(bsh_unt)
1618    if (BSp%prep_interp) then
1619      close(a_unt)
1620      close(b_unt)
1621      close(c_unt)
1622    end if
1623  end if
1624  !
1625  ! Free memory.
1626  ABI_FREE(igfftg0)
1627  ABI_FREE(ktabr_k)
1628  ABI_FREE(id_tab)
1629  ABI_FREE(ktabr_kp)
1630  ABI_FREE(rhxtwg_vpv)
1631  ABI_FREE(rhxtwg_cpc)
1632  if (BSp%prep_interp) then
1633    ABI_FREE(aa_vpv)
1634    ABI_FREE(bb_vpv1)
1635    ABI_FREE(bb_vpv2)
1636    ABI_FREE(cc_vpv)
1637    ABI_FREE(aa_cpc)
1638    ABI_FREE(bb_cpc1)
1639    ABI_FREE(bb_cpc2)
1640    ABI_FREE(cc_cpc)
1641  end if
1642  ABI_FREE(ur_ckp)
1643  ABI_FREE(ur_vkp)
1644  ABI_FREE(ur_vk)
1645  ABI_FREE(ur_ck)
1646 
1647  ! Deallocation for PAW.
1648  if (Wfd%usepaw==1) then
1649    call pawcprj_free(Cp_vk)
1650    ABI_DT_FREE(Cp_vk)
1651    call pawcprj_free(Cp_ck)
1652    ABI_DT_FREE(Cp_ck)
1653    call pawcprj_free(Cp_ckp)
1654    ABI_DT_FREE(Cp_ckp)
1655    call pawcprj_free(Cp_vkp)
1656    ABI_DT_FREE(Cp_vkp)
1657    call pawcprj_free(Cp_tmp1)
1658    ABI_DT_FREE(Cp_tmp1)
1659    call pawcprj_free(Cp_tmp2)
1660    ABI_DT_FREE(Cp_tmp2)
1661    call pawcprj_free(Cp_tmp3)
1662    ABI_DT_FREE(Cp_tmp3)
1663    call pawcprj_free(Cp_tmp4)
1664    ABI_DT_FREE(Cp_tmp4)
1665  end if
1666 
1667  call xmpi_barrier(comm)
1668 
1669  DBG_EXIT("COLL")
1670 
1671  call timab(680,2,tsec)
1672 
1673 contains

ABINIT/exc_build_v [ Functions ]

[ Top ] [ Functions ]

NAME

  exc_build_v

FUNCTION

  Calculate and write the excitonic Hamiltonian on an external binary file (Fortran file open
  in random mode) for subsequent treatment in the Bethe-Salpeter code.

INPUTS

  BSp<excparam>=The parameters for the Bethe-Salpeter calculation.
  Cryst<crystal_t>=Info on the crystalline structure.
  Kmesh<kmesh_t>=The list of k-points in the BZ, IBZ and symmetry tables.
  Qmesh<kmesh_t>=The list of q-points for epsilon^{-1} and related symmetry tables.
  Gsph_x<gsphere_t>=Info on the G-sphere used to describe wavefunctions and W (the largest one is actually stored).
  Gsph_c<gsphere_t>=Info on the G-sphere used to describe the correlation part.
  Vcp<vcoul_t>=The Coulomb interaction in reciprocal space. A cutoff can be used
  rhxtwg_q0
  is_resonant
  comm=MPI communicator.

OUTPUT

NOTES

  *) Version for K_V = K_C (q=0), thus KP_V = KP_C
  *) No exchange limit: use LDA energies in case.
  *) Symmetry of H(-k-k') = H*(k k') not used.
  *) Coulomb term can be approssimateed as diagonal in G.
  *) Valence bands treated from lomo on.
  *) Symmetries of the sub-blocks are used to reduce the number of elements to calculate.

            ____________
           |_(cv)__(vc)_|
   H_exc = |  R      C  |
           | -C*    -R* |

   where C is symmetric and R is Hermitian provided that the QP energies are real.

  For nsppol=1 ==> R = diag-W+2v; C = -W+2v
  since the Hamiltonian can be diagonalized in the spin-singlet basis set thanks to
  the fact that spin triplet does not contribute to the optical limit of epsilon.

  For nsppol=2 ==> R = diag-W+v; C = -W+v
  Now the matrix elements depend on the spin of the transitions but only those
  transitions in which the spin of the electron and of the hole are equal contribute
  to the macroscopic dielectric function. Moreover only the exchange term can connect transitions of different spin.
  When nsppol==2 the transitions are ordered using | (cv up) | (cv dwn) | (vc up) | (vc down) |

  The resonant block is given by:

      |  (v'c' up)       | (v'c' dwn)   |
      -----------------------------------           where v_{-+} = v_{+-}^H when the momentum of the photon is neglected.
      | [diag-W+v]++     |      v+-     | (vc up)   Note that v_{+-} is not Hermitian due to the presence of different spins.
  R = -----------------------------------           Actually it reduces to a Hermitian matrix when the system is not spin polarized.
      |     v-+          | [diag-W+v]-- | (vc dwn)  but in this case one should use nsppol=1.
      -----------------------------------           As a consequence the entire matrix is calculated and stored on file.

  The coupling block is given by:

      |  (c'v' up)   |    (c'v dwn)     |
      -----------------------------------           where v_{-+} = v_{+-}^t when the momentum of the photon is neglected.
      | [-W+v]++     |      v+-         | (vc up)   Also in this case the entire matrix v_{+-} has to be calculated
  C = -----------------------------------           and stored on file.
      |     v-+      |    [-W+v]--      | (vc dwn)
      -----------------------------------

PARENTS

CHILDREN

      get_bz_item,timab,wrtout,xmpi_split_work2_i4b

SOURCE

1748 subroutine exc_build_v(spin1,spin2,nsppol,npweps,Bsp,Cryst,Kmesh,Qmesh,Gsph_x,Gsph_c,Vcp,&
1749 &  is_resonant,rhxtwg_q0,nproc,my_rank,t_start,t_stop,my_bsham)
1750 
1751  use defs_basis
1752  use defs_datatypes
1753  use defs_abitypes
1754  use m_profiling_abi
1755  use m_bs_defs
1756  use m_xmpi
1757  use m_errors
1758 
1759  use m_time,         only : cwtime
1760  use m_blas,         only : xdotc
1761  use m_crystal,      only : crystal_t
1762  use m_gsphere,      only : gsphere_t
1763  use m_vcoul,        only : vcoul_t
1764  use m_bz_mesh,      only : kmesh_t, get_BZ_item
1765  use m_oscillators,  only : sym_rhotwgq0
1766 
1767 !This section has been created automatically by the script Abilint (TD).
1768 !Do not modify the following lines by hand.
1769 #undef ABI_FUNC
1770 #define ABI_FUNC 'exc_build_v'
1771  use interfaces_14_hidewrite
1772  use interfaces_18_timing
1773 !End of the abilint section
1774 
1775  implicit none
1776 
1777 !Arguments ------------------------------------
1778 !scalars
1779  integer,intent(in) :: spin1,spin2,nsppol,npweps,nproc,my_rank
1780  logical,intent(in) :: is_resonant
1781  type(excparam),intent(in) :: BSp
1782  type(kmesh_t),intent(in) :: Kmesh,Qmesh
1783  type(crystal_t),intent(in) :: Cryst
1784  type(vcoul_t),intent(in) :: Vcp
1785  type(gsphere_t),intent(in) :: Gsph_x,Gsph_c
1786 !arrays
1787  integer(i8b),intent(in) :: t_start(0:nproc-1),t_stop(0:nproc-1)
1788  complex(gwpc),intent(in) :: rhxtwg_q0(npweps,BSp%lomo_min:BSp%humo_max,BSp%lomo_min:BSp%humo_max,Kmesh%nibz,nsppol)
1789  complex(dpc),intent(inout) :: my_bsham(t_start(my_rank):t_stop(my_rank))
1790 
1791 !Local variables ------------------------------
1792 !scalars
1793  integer :: ISg,ngx,ik_bz,ikp_bz,dim_rtwg
1794  integer :: neh1,neh2,ig,nblocks
1795  integer :: ik_ibz,itim_k,ikp_ibz,itim_kp,isym_k,isym_kp
1796  integer :: iq_bz,iq_ibz,isym_q,itim_q,iqbz0,rank
1797  integer :: iv,ivp,ic,icp
1798  integer :: block,ierr
1799  integer(i8b) :: tot_nels,ir,it,itp
1800  real(dp) :: faq,kx_fact
1801  complex(spc) :: ctemp
1802  character(len=500) :: msg
1803 !arrays
1804  integer :: bidx(2,4),spin_ids(2,3)
1805  integer(i8b) :: nels_block(3)
1806  integer :: my_cols(2),my_rows(2) !,proc_end(2),proc_start(2)
1807  integer,allocatable :: ncols_of(:)
1808  integer,allocatable :: col_start(:),col_stop(:)
1809  real(dp) :: qbz(3),tsec(2) !kbz(3),kpbz(3),
1810  complex(dpc),allocatable :: my_kxssp(:,:)
1811  complex(gwpc),allocatable :: vc_sqrt_qbz(:),rhotwg1(:),rhotwg2(:)
1812 
1813 !************************************************************************
1814 
1815  DBG_ENTER("COLL")
1816 
1817  write(msg,'(a,2i2,a)')" Calculating exchange term for (spin1,spin2) ",spin1,spin2," ..."
1818  call wrtout(std_out,msg,"COLL")
1819 
1820  ! Basic constants.
1821  dim_rtwg=1; faq = one/(Cryst%ucvol*Kmesh%nbz)
1822 
1823  ! Identify the index of q==0
1824  iqbz0=0
1825  do iq_bz=1,Qmesh%nbz
1826    if (ALL(ABS(Qmesh%bz(:,iq_bz))<tol3)) iqbz0 = iq_bz
1827  end do
1828  ABI_CHECK(iqbz0/=0,"q=0 not found")
1829  !
1830  ! Treat the spin polarization.
1831  spin_ids(:,1) = (/1,1/)
1832  spin_ids(:,2) = (/2,2/)
1833  spin_ids(:,3) = (/1,2/)
1834 
1835  nblocks=1
1836  kx_fact=two
1837  nels_block(:)=0
1838  nels_block(1)=BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2
1839  tot_nels=nels_block(1)
1840 
1841  if (nsppol==2) then
1842    nblocks=3
1843    kx_fact=one
1844    nels_block(1) = BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2   ! Only the upper triangle for block 1 and 2
1845    nels_block(2) = BSp%nreh(2)*(BSp%nreh(2)+1_i8b)/2
1846    nels_block(3) = BSp%nreh(1)*BSp%nreh(2)*1_i8b       ! Note: Block 3 does not have symmetries.
1847    tot_nels= SUM(nels_block)
1848  end if
1849  !
1850  ! Distribute the calculation of the matrix elements among the nodes.
1851  ! * tstart and t_stop give the initial and final transition index treated by each node.
1852  ! * my_hsize is the number of transitions treated by this processor
1853  ! * my_cols(1:2) gives the initial and final column treated by this node.
1854  !
1855  do block=1,nsppol
1856    !
1857    ! Indices used to loop over bands.
1858    ! bidx contains the starting and final indices used to loop over bands.
1859    !
1860    !      (b3,b4)
1861    !         |... ...|
1862    ! (b1,b2) |... ...|
1863    !
1864    ! Resonant matrix is given by
1865    !      (v',c')
1866    !       |... ...|
1867    ! (v,c) |... ...|
1868    !
1869    ! Coupling matrix is given by
1870    !       (c',v')
1871    !       |... ...|
1872    ! (v,c) |... ...|
1873 
1874    if (is_resonant) then
1875      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
1876      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
1877      bidx(:,3) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b3
1878      bidx(:,4) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b4
1879    else
1880      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
1881      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
1882      bidx(:,3) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b3
1883      bidx(:,4) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b4
1884    end if
1885 
1886    !spin1 = spin_ids(1,block)
1887    !spin2 = spin_ids(2,block)
1888 
1889    my_cols=0
1890    do itp=1,Bsp%nreh(block)
1891      do it=1,itp
1892        ir = it + itp*(itp-1_i8b)/2
1893        if (ir==t_start(my_rank)) then
1894          my_rows(1) = it
1895          my_cols(1) = itp
1896        end if
1897        if (ir==t_stop(my_rank)) then
1898          my_rows(2) = it
1899          my_cols(2) = itp
1900        end if
1901      end do
1902    end do
1903 
1904    ! Allocate big (scalable) buffer to store the BS matrix on this node.
1905    !ABI_MALLOC(my_bsham,(t_start(my_rank):t_stop(my_rank)))
1906    !
1907    ! =====================
1908    ! === Exchange term ===
1909    ! =====================
1910    ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
1911    ! TODO might used enlarged G-sphere for better convergence.
1912 !if (do_exchange_term) then
1913    call timab(683,1,tsec) ! exc_build_ham(exchange)
1914 
1915    ABI_MALLOC(rhotwg1,(npweps))
1916    ABI_MALLOC(rhotwg2,(npweps))
1917 
1918    ngx = Gsph_x%ng
1919    ABI_MALLOC(vc_sqrt_qbz,(ngx))
1920 
1921    ! * Get iq_ibz, and symmetries from iq_bz.
1922    iq_bz = iqbz0 ! q = 0 -> iqbz0
1923    call get_BZ_item(Qmesh,iq_bz,qbz,iq_ibz,isym_q,itim_q)
1924 
1925    ! * Set up table of |q(BZ)+G|
1926    if (iq_ibz==1) then
1927      do ig=1,ngx
1928        ISg = Gsph_x%rottb(ig,itim_q,isym_q)
1929        vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
1930      end do
1931    else
1932       MSG_ERROR("iq_ibz should be 1")
1933    end if
1934 
1935    do itp=1,BSp%nreh(block) ! Loop over transition tp = (kp,vp,cp,spin2)
1936 
1937      if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
1938      ikp_bz = Bsp%Trans(itp,spin2)%k
1939      ivp    = Bsp%Trans(itp,spin2)%v
1940      icp    = Bsp%Trans(itp,spin2)%c
1941 
1942      ikp_ibz = Kmesh%tab (ikp_bz)
1943      isym_kp = Kmesh%tabo(ikp_bz)
1944      itim_kp = (3-Kmesh%tabi(ikp_bz))/2
1945 
1946      if (is_resonant) then
1947        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
1948      else ! Code for coupling block.
1949        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
1950      end if
1951      !
1952      ! Multiply by the Coulomb term.
1953       do ig=2,npweps
1954         rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
1955       end do
1956 
1957      do it=1,itp ! Loop over transition t = (k,v,c,spin1)
1958        ir = it + itp*(itp-1_i8b)/2
1959        if (ir<t_start(my_rank) .or. ir>t_stop(my_rank)) CYCLE
1960 
1961        ik_bz   = Bsp%Trans(it,spin1)%k
1962        iv      = Bsp%Trans(it,spin1)%v
1963        ic      = Bsp%Trans(it,spin1)%c
1964 
1965        ik_ibz = Kmesh%tab(ik_bz)
1966        isym_k = Kmesh%tabo(ik_bz)
1967        itim_k = (3-Kmesh%tabi(ik_bz))/2
1968        !if (itim_k==2) CYCLE ! time-reversal or not
1969 
1970        rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
1971        !
1972        ! sum over G/=0
1973        ctemp = xdotc(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
1974        ctemp = faq * kx_fact * ctemp
1975 
1976        ! exchange term is non divergent !
1977        !if (BSp%prep_interp) then
1978        !  ccoeffs(ir) = ccoeffs(ir) + ctemp
1979        !end if
1980 
1981        my_bsham(ir) = my_bsham(ir) + ctemp
1982      end do !it
1983    end do !itp
1984 
1985    ABI_FREE(rhotwg1)
1986    ABI_FREE(rhotwg2)
1987    ABI_FREE(vc_sqrt_qbz)
1988 
1989    call timab(683,2,tsec) ! exc_build_ham(exchange)
1990 !end if ! do_exchange_term
1991  end do ! block
1992 
1993  !
1994  ! ===========================================
1995  ! === Exchange term for spin_up spin_down ===
1996  ! ===========================================
1997 
1998 if (nsppol==2) then
1999  call timab(686,2,tsec) ! exc_build_ham(exch.spin)
2000  block=3
2001  neh1=BSp%nreh(1)
2002  neh2=BSp%nreh(2)
2003  !
2004  ! The oscillators at q=0 are available on each node for both spin.
2005  ! Here the calculation of the block is parallelized over columns.
2006  ABI_MALLOC(col_start,(0:nproc-1))
2007  ABI_MALLOC(col_stop,(0:nproc-1))
2008  call xmpi_split_work2_i4b(neh2,nproc,col_start,col_stop,msg,ierr) !check this but it should be OK.
2009  if (ierr/=0) then
2010    MSG_WARNING(msg)
2011  end if
2012 
2013  my_cols(1) = col_start(my_rank)
2014  my_cols(2) = col_stop (my_rank)
2015  if (my_cols(2)-my_cols(1)<=0) then
2016    MSG_ERROR("One of the processors has zero columns!")
2017  end if
2018 
2019  ABI_MALLOC(ncols_of,(0:nproc-1))
2020  ncols_of=0
2021  do rank=0,nproc-1
2022    if (col_stop(rank)>=col_start(rank)) ncols_of(rank) = col_stop(rank)-col_start(rank)+1
2023  end do
2024 
2025  ABI_FREE(col_start)
2026  ABI_FREE(col_stop)
2027  !
2028  ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
2029  ! TODO might used enlarged G-sphere for better convergence.
2030  ! Note that my_kxssp is always written on file when nsppol=2, even when
2031  ! non-local field effects are neglected.
2032  ABI_MALLOC(my_kxssp,(neh1,my_cols(1):my_cols(2)))
2033  my_kxssp=czero
2034 
2035  !if (do_exchange_term) then
2036    !spin1=1; spin2=2
2037    ABI_MALLOC(rhotwg1,(npweps))
2038    ABI_MALLOC(rhotwg2,(npweps))
2039 
2040    ngx = Gsph_x%ng
2041    ABI_MALLOC(vc_sqrt_qbz,(ngx))
2042    !
2043    ! * Get iq_ibz, and symmetries from iq_bz.
2044    iq_bz = iqbz0 ! q = 0 -> iqbz0
2045    call get_BZ_item(Qmesh,iq_bz,qbz,iq_ibz,isym_q,itim_q)
2046    !
2047    ! * Set up table of |q(BZ)+G|
2048    if (iq_ibz==1) then
2049      do ig=1,ngx
2050        ISg = Gsph_x%rottb(ig,itim_q,isym_q)
2051        vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
2052      end do
2053    else
2054       MSG_ERROR("iq_ibz should be 1")
2055    end if
2056 
2057    do itp=1,neh2 ! Loop over transition tp = (kp,vp,cp,spin2)
2058 
2059      if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
2060      ikp_bz = Bsp%Trans(itp,spin2)%k
2061      ivp    = Bsp%Trans(itp,spin2)%v
2062      icp    = Bsp%Trans(itp,spin2)%c
2063 
2064      ikp_ibz = Kmesh%tab (ikp_bz)
2065      isym_kp = Kmesh%tabo(ikp_bz)
2066      itim_kp = (3-Kmesh%tabi(ikp_bz))/2
2067 
2068      if (is_resonant) then
2069        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
2070      else ! Code for coupling block.
2071        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
2072      end if
2073      !
2074      ! Multiply by the Coulomb term.
2075       do ig=2,npweps
2076         rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
2077       end do
2078 
2079      do it=1,neh1 ! Loop over transition t = (k,v,c,spin1) FULL matrix.
2080        ik_bz = Bsp%Trans(it,spin1)%k
2081        iv    = Bsp%Trans(it,spin1)%v
2082        ic    = Bsp%Trans(it,spin1)%c
2083 
2084        ik_ibz = Kmesh%tab(ik_bz)
2085        isym_k = Kmesh%tabo(ik_bz)
2086        itim_k = (3-Kmesh%tabi(ik_bz))/2
2087        !if (itim_k==2) CYCLE ! time-reversal or not
2088 
2089        rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
2090        !
2091        ! sum over G/=0
2092        ctemp = XDOTC(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
2093        ctemp = faq * kx_fact * ctemp
2094 
2095        my_kxssp(it,itp) = ctemp
2096      end do !it
2097    end do !itp
2098 
2099    ABI_FREE(rhotwg1)
2100    ABI_FREE(rhotwg2)
2101    ABI_FREE(vc_sqrt_qbz)
2102  !end if ! do_exchange_term
2103  call timab(686,2,tsec) ! exc_build_ham(exch.spin)
2104 
2105  ABI_FREE(ncols_of)
2106  if (allocated(my_kxssp))  then
2107    ABI_FREE(my_kxssp)
2108  end if
2109  end if
2110 
2111  DBG_EXIT("COLL")
2112 
2113 end subroutine exc_build_v
2114 
2115 end subroutine exc_build_block