TABLE OF CONTENTS


ABINIT/m_exc_build [ Modules ]

[ Top ] [ Modules ]

NAME

  m_exc_build

FUNCTION

  Build BSE Hamiltonian in e-h reprensentation.

COPYRIGHT

  Copyright (C) 1992-2009 EXC group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida)
  Copyright (C) 2009-2018 ABINIT group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida, M.Giantomassi)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

PARENTS

CHILDREN

SOURCE

22 #if defined HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25 
26 #include "abi_common.h"
27 
28 module m_exc_build
29 
30  use defs_basis
31  use m_abicore
32  use m_bs_defs
33  use m_bse_io
34  use m_xmpi
35  use m_errors
36  use m_screen
37 #if defined HAVE_MPI2
38  use mpi
39 #endif
40  use m_hdr
41 
42  use defs_datatypes, only : pseudopotential_type
43  use defs_abitypes,  only : hdr_type
44  use m_gwdefs,       only : czero_gw, cone_gw, GW_TOLQ0
45  use m_time,         only : cwtime, timab
46  use m_io_tools,     only : get_unit, open_file
47  use m_hide_blas,    only : xdotc, xgemv
48  use m_geometry,     only : normv
49  use m_crystal,      only : crystal_t
50  use m_gsphere,      only : gsphere_t, gsph_fft_tabs
51  use m_vcoul,        only : vcoul_t
52  use m_bz_mesh,      only : kmesh_t, get_BZ_item, get_BZ_diff, has_BZ_item, isamek, findqg0
53  use m_pawpwij,      only : pawpwff_t, pawpwij_t, pawpwij_init, pawpwij_free, paw_rho_tw_g
54  use m_pawang,       only : pawang_type
55  use m_pawtab,       only : pawtab_type
56  use m_pawcprj,      only : pawcprj_type, pawcprj_alloc, pawcprj_free
57  use m_paw_sym,      only : paw_symcprj_op
58  use m_wfd,          only : wfd_t, wfd_get_ur, wfd_get_cprj, wfd_change_ngfft, wfd_ihave_ur, wfd_ihave_cprj
59  use m_oscillators,  only : rho_tw_g, sym_rhotwgq0
60 
61  implicit none
62 
63 #if defined HAVE_MPI1
64  include 'mpif.h'
65 #endif
66 
67  private

m_exc_build/exc_build_block [ Functions ]

[ Top ] [ m_exc_build ] [ Functions ]

NAME

  exc_build_block

FUNCTION

  Calculate and write the excitonic Hamiltonian on an external binary file (Fortran file open
  in random mode) for subsequent treatment in the Bethe-Salpeter code.

INPUTS

  BSp<excparam>=The parameters for the Bethe-Salpeter calculation.
  Cryst<crystal_t>=Info on the crystalline structure.
  Kmesh<kmesh_t>=The list of k-points in the BZ, IBZ and symmetry tables.
  Qmesh<kmesh_t>=The list of q-points for epsilon^{-1} and related symmetry tables.
  ktabr(nfftot_osc,BSp%nkbz)=The FFT index of $(R^{-1}(r-\tau))$ where R is symmetry needed to obtains
    the k-points from the irreducible image.  Used to symmetrize u_Sk where S = \transpose R^{-1}
  Gsph_x<gsphere_t>=Info on the G-sphere used to describe wavefunctions and W (the largest one is actually stored).
  Gsph_c<gsphere_t>=Info on the G-sphere used to describe the correlation part.
  Vcp<vcoul_t>=The Coulomb interaction in reciprocal space. A cutoff can be used
  W<screen_t>=Data type gathering info and data for W.
  nfftot_osc=Total Number of FFT points used for the oscillator matrix elements.
  ngfft_osc(18)=Info on the FFT algorithm used to calculate the oscillator matrix elements.
  Psps<Pseudopotential_type>=Variables related to pseudopotentials
  Pawtab(Psps%ntypat)<pawtab_type>=PAW tabulated starting data.
  Pawang<pawang_type>=PAW angular mesh and related data.
  Paw_pwff(Cryst%ntypat*Wfd%usepaw)<pawpwff_t>=Form factor used to calculate the onsite matrix
    elements of a plane wave.
  Wfd<wfd_t>=Handler for the wavefunctions.
    prtvol=Verbosity level.
  rhxtwg_q0
  is_resonant
  fname
  comm=MPI communicator.

OUTPUT

  The excitonic Hamiltonian is saved on an external binary file (see below).

NOTES

  *) Version for K_V = K_C (q=0), thus KP_V = KP_C
  *) No exchange limit: use LDA energies in case.
  *) Symmetry of H(-k-k') = H*(k k') not used.
  *) Coulomb term can be approssimateed as diagonal in G.
  *) Valence bands treated from lomo on.
  *) Symmetries of the sub-blocks are used to reduce the number of elements to calculate.

            ____________
           |_(cv)__(vc)_|
   H_exc = |  R      C  |
           | -C*    -R* |

   where C is symmetric and R is Hermitian provided that the QP energies are real.

  For nsppol=1 ==> R = diag-W+2v; C = -W+2v
  since the Hamiltonian can be diagonalized in the spin-singlet basis set thanks to
  the fact that spin triplet does not contribute to the optical limit of epsilon.

  For nsppol=2 ==> R = diag-W+v; C = -W+v
  Now the matrix elements depend on the spin of the transitions but only those
  transitions in which the spin of the electron and of the hole are equal contribute
  to the macroscopic dielectric function. Moreover only the exchange term can connect transitions of different spin.
  When nsppol==2 the transitions are ordered using | (cv up) | (cv dwn) | (vc up) | (vc down) |

  The resonant block is given by:

      |  (v'c' up)       | (v'c' dwn)   |
      -----------------------------------           where v_{-+} = v_{+-}^H when the momentum of the photon is neglected.
      | [diag-W+v]++     |      v+-     | (vc up)   Note that v_{+-} is not Hermitian due to the presence of different spins.
  R = -----------------------------------           Actually it reduces to a Hermitian matrix when the system is not spin polarized.
      |     v-+          | [diag-W+v]-- | (vc dwn)  but in this case one should use nsppol=1.
      -----------------------------------           As a consequence the entire matrix is calculated and stored on file.

  The coupling block is given by:

      |  (c'v' up)   |    (c'v dwn)     |
      -----------------------------------           where v_{-+} = v_{+-}^t when the momentum of the photon is neglected.
      | [-W+v]++     |      v+-         | (vc up)   Also in this case the entire matrix v_{+-} has to be calculated
  C = -----------------------------------           and stored on file.
      |     v-+      |    [-W+v]--      | (vc dwn)
      -----------------------------------

PARENTS

      exc_build_ham

CHILDREN

      get_bz_item,timab,wrtout,xmpi_split_work2_i4b

SOURCE

 163 subroutine exc_build_block(BSp,Cryst,Kmesh,Qmesh,ktabr,Gsph_x,Gsph_c,Vcp,Wfd,W,Hdr_bse,&
 164 &  nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff,rhxtwg_q0,is_resonant,fname)
 165 
 166 
 167 !This section has been created automatically by the script Abilint (TD).
 168 !Do not modify the following lines by hand.
 169 #undef ABI_FUNC
 170 #define ABI_FUNC 'exc_build_block'
 171 !End of the abilint section
 172 
 173  implicit none
 174 
 175 !Arguments ------------------------------------
 176 !scalars
 177  integer,intent(in) :: nfftot_osc
 178  character(len=*),intent(in) :: fname
 179  logical,intent(in) :: is_resonant
 180  type(excparam),intent(in) :: BSp
 181  type(screen_t),intent(inout) :: W
 182  type(kmesh_t),intent(in) :: Kmesh,Qmesh
 183  type(crystal_t),intent(in) :: Cryst
 184  type(vcoul_t),intent(in) :: Vcp
 185  type(gsphere_t),intent(in) :: Gsph_x,Gsph_c
 186  type(Pseudopotential_type),intent(in) :: Psps
 187  type(Hdr_type),intent(inout) :: Hdr_bse
 188  type(pawang_type),intent(in) :: Pawang
 189  type(wfd_t),target,intent(inout) :: Wfd
 190 !arrays
 191  integer,intent(in) :: ngfft_osc(18)
 192  integer,intent(in) :: ktabr(nfftot_osc,Kmesh%nbz)
 193  complex(gwpc),intent(in) :: rhxtwg_q0(BSp%npweps,BSp%lomo_min:BSp%humo_max,BSp%lomo_min:BSp%humo_max,Wfd%nkibz,Wfd%nsppol)
 194  type(Pawtab_type),intent(in) :: Pawtab(Psps%ntypat*Wfd%usepaw)
 195  type(pawpwff_t),intent(in) :: Paw_pwff(Psps%ntypat*Wfd%usepaw)
 196 
 197 !Local variables ------------------------------
 198 !scalars
 199  integer,parameter :: map2sphere=1,ndat1=1,master=0
 200  integer(i8b) :: bsize_my_block
 201  integer :: nspinor,nsppol,ISg,mpi_err,tmp_size,ngx
 202  integer :: ik_bz,ikp_bz,col_glob,itpk_min,itpk_max
 203  integer :: dim_rtwg,bsh_unt,ncol,dump_unt,npweps
 204 #ifdef HAVE_MPI_IO
 205  integer :: amode,mpi_fh,hmat_type,offset_err,old_type
 206  integer(XMPI_OFFSET_KIND) :: ehdr_offset,my_offset
 207  logical,parameter :: is_fortran_file=.TRUE.
 208 #endif
 209  integer :: neh1,neh2,ig,nblocks
 210  integer :: ik_ibz,itim_k,ikp_ibz,itim_kp,isym_k,isym_kp
 211  integer :: iq_bz,iq_ibz,isym_q,itim_q,iqbz0,rank
 212  integer :: iv,ivp,ic,icp,jj,nrows,sender,my_ncols
 213  integer :: use_padfft,prev_nrows,spin1,spin2,block
 214  integer :: ierr,nproc,my_rank,mgfft_osc,fftalga_osc,comm
 215  integer(i8b) :: tot_nels,prev_nels,prev_ncols,nels,ir,it,itp,ist,iend,my_hsize
 216  real(dp) :: faq,kx_fact,cputime,walltime,gflops
 217  complex(spc) :: http,ctemp
 218  complex(dpc) :: ph_mkpt,ph_mkt,ene_t,ene_tp
 219  logical,parameter :: with_umklp=.FALSE.
 220  logical :: use_mpiio,do_coulomb_term,do_exchange_term,w_is_diagonal,isirred
 221  logical :: is_qeq0
 222  character(len=500) :: msg
 223 !arrays
 224  integer :: bidx(2,4),g0(3),spin_ids(2,3)
 225  integer(i8b) :: nels_block(3)
 226  integer :: my_cols(2),my_rows(2),proc_end(2),proc_start(2)
 227  integer :: my_extrema(2,2),sender_extrema(2,2),my_starts(2),my_ends(2)
 228  integer,allocatable :: igfftg0(:),ktabr_k(:),ktabr_kp(:),id_tab(:)
 229  integer,allocatable :: ncols_of(:)
 230  integer(i8b),allocatable :: t_start(:),t_stop(:),hsize_of(:)
 231  integer,allocatable :: col_start(:),col_stop(:)
 232  integer,allocatable :: gbound(:,:)
 233  real(dp) :: kbz(3),kpbz(3),qbz(3),spinrot_k(4),spinrot_kp(4),kmkp(3),tsec(2)
 234  complex(dpc),allocatable :: my_bsham(:),buffer(:),buffer_2d(:,:),my_kxssp(:,:),prev_col(:)
 235 !DBYG
 236  complex(dpc),allocatable :: acoeffs(:),bcoeffs(:),ccoeffs(:) ! Coeff of W = a/q^2 + b/q + c
 237  integer :: a_unt, b_unt, c_unt
 238  complex(dpc) :: aatmp, bbtmp, cctmp
 239  complex(gwpc),allocatable :: aa_vpv(:),aa_cpc(:),aa_ctccp(:)
 240  complex(gwpc),allocatable :: bb_vpv1(:),bb_cpc1(:),bb_ctccp1(:)
 241  complex(gwpc),allocatable :: bb_vpv2(:),bb_cpc2(:),bb_ctccp2(:)
 242  complex(gwpc),allocatable :: cc_vpv(:),cc_cpc(:),cc_ctccp(:)
 243  complex(dpc),allocatable :: abuffer(:),aprev_col(:)
 244  complex(dpc),allocatable :: bbuffer(:),bprev_col(:)
 245  complex(dpc),allocatable :: cbuffer(:),cprev_col(:)
 246  character(len=fnlen) :: tmpfname
 247  integer :: ii
 248 !END DBYG
 249  complex(gwpc),allocatable :: vc_sqrt_qbz(:)
 250  complex(gwpc),allocatable :: rhotwg1(:),rhotwg2(:),rhxtwg_vpv(:),rhxtwg_cpc(:),ctccp(:)
 251  complex(gwpc),target,allocatable :: ur_ckp(:),ur_vkp(:),ur_vk(:),ur_ck(:)
 252  complex(gwpc),ABI_CONTIGUOUS pointer :: ptur_ckp(:),ptur_vkp(:),ptur_vk(:),ptur_ck(:)
 253  type(pawcprj_type),target,allocatable :: Cp_tmp1(:,:),Cp_tmp2(:,:)
 254  type(pawcprj_type),target,allocatable :: Cp_tmp3(:,:),Cp_tmp4(:,:)
 255  type(pawcprj_type),allocatable :: Cp_ckp(:,:),Cp_vkp(:,:)
 256  type(pawcprj_type),allocatable :: Cp_vk(:,:),Cp_ck(:,:)
 257  type(pawcprj_type),pointer :: ptcp_ckp(:,:),ptcp_vkp(:,:),ptcp_vk(:,:),ptcp_ck(:,:)
 258  type(pawpwij_t),allocatable :: Pwij_q(:)
 259 #ifdef HAVE_MPI_IO
 260  integer(XMPI_OFFSET_KIND) :: tmp_off,my_offpad
 261  integer(XMPI_OFFSET_KIND),allocatable :: bsize_frecord(:),offset_of_block(:)
 262 #endif
 263 #ifdef DEV_MG_DEBUG_MODE
 264  integer,allocatable :: ttp_check(:,:)
 265 #endif
 266 
 267 !************************************************************************
 268 
 269  call timab(680,1,tsec)
 270  call timab(681,1,tsec)
 271 
 272  DBG_ENTER("COLL")
 273 
 274  ABI_CHECK(Wfd%nspinor==1,"nspinor==2 not coded")
 275  ABI_CHECK(nfftot_osc==PRODUCT(ngfft_osc(1:3)),"mismatch in FFT size")
 276 
 277  if (Wfd%nsppol==2) then
 278    MSG_WARNING("nsppol==2 is still under testing")
 279  end if
 280  !
 281  ! MPI variables.
 282  comm    = Wfd%comm
 283  nproc   = Wfd%nproc
 284  my_rank = Wfd%my_rank
 285 
 286  !
 287  ! Basic constants.
 288  nspinor = Wfd%nspinor
 289  nsppol  = Wfd%nsppol
 290  dim_rtwg=1; faq = one/(Cryst%ucvol*Kmesh%nbz)
 291  npweps = Bsp%npweps
 292  !
 293  ! Prepare the FFT tables to have u(r) on the ngfft_osc mesh.
 294  mgfft_osc = MAXVAL(ngfft_osc(1:3))
 295  fftalga_osc = ngfft_osc(7)/100
 296  if ( ANY(ngfft_osc(1:3) /= Wfd%ngfft(1:3)) ) then
 297    call wfd_change_ngfft(Wfd,Cryst,Psps,ngfft_osc)
 298  end if
 299 
 300  ABI_MALLOC(igfftg0,(npweps))
 301  ABI_MALLOC(ktabr_k,(nfftot_osc))
 302  ABI_MALLOC(ktabr_kp,(nfftot_osc))
 303  ABI_MALLOC(id_tab,(nfftot_osc))
 304  id_tab = (/(ic, ic=1,nfftot_osc)/)
 305  !
 306  ! Workspace arrays for wavefunctions and oscillator matrix elements.
 307  ABI_MALLOC(rhxtwg_vpv,(npweps))
 308  ABI_MALLOC(rhxtwg_cpc,(npweps))
 309 
 310  if (BSp%prep_interp) then
 311    call wrtout(std_out,"Preparing BSE interpolation","COLL")
 312    ABI_MALLOC(aa_vpv,(npweps))
 313    ABI_MALLOC(bb_vpv1,(npweps))
 314    ABI_MALLOC(bb_vpv2,(npweps))
 315    ABI_MALLOC(cc_vpv,(npweps))
 316 
 317    ABI_MALLOC(aa_cpc,(npweps))
 318    ABI_MALLOC(bb_cpc1,(npweps))
 319    ABI_MALLOC(bb_cpc2,(npweps))
 320    ABI_MALLOC(cc_cpc,(npweps))
 321  end if
 322 
 323  ABI_MALLOC(ur_ckp,(nspinor*nfftot_osc))
 324  ABI_MALLOC(ur_vkp,(nspinor*nfftot_osc))
 325  ABI_MALLOC(ur_ck ,(nspinor*nfftot_osc))
 326  ABI_MALLOC(ur_vk ,(nspinor*nfftot_osc))
 327 
 328  if (Wfd%usepaw==1) then
 329    ABI_DT_MALLOC(Cp_vk,(Wfd%natom,nspinor))
 330    call pawcprj_alloc(Cp_vk,0,Wfd%nlmn_atm)
 331    ABI_DT_MALLOC(Cp_ck,(Wfd%natom,nspinor))
 332    call pawcprj_alloc(Cp_ck,0,Wfd%nlmn_atm)
 333    ABI_DT_MALLOC(Cp_ckp,(Wfd%natom,nspinor))
 334    call pawcprj_alloc(Cp_ckp,0,Wfd%nlmn_atm)
 335    ABI_DT_MALLOC(Cp_vkp,(Wfd%natom,nspinor))
 336    call pawcprj_alloc(Cp_vkp,0,Wfd%nlmn_atm)
 337 
 338    ABI_DT_MALLOC(Cp_tmp1,(Wfd%natom,nspinor))
 339    call pawcprj_alloc(Cp_tmp1,0,Wfd%nlmn_atm)
 340    ABI_DT_MALLOC(Cp_tmp2,(Wfd%natom,nspinor))
 341    call pawcprj_alloc(Cp_tmp2,0,Wfd%nlmn_atm)
 342    ABI_DT_MALLOC(Cp_tmp3,(Wfd%natom,nspinor))
 343    call pawcprj_alloc(Cp_tmp3,0,Wfd%nlmn_atm)
 344    ABI_DT_MALLOC(Cp_tmp4,(Wfd%natom,nspinor))
 345    call pawcprj_alloc(Cp_tmp4,0,Wfd%nlmn_atm)
 346  end if
 347  !
 348  ! Identify the index of q==0
 349  iqbz0=0
 350  do iq_bz=1,Qmesh%nbz
 351    if (ALL(ABS(Qmesh%bz(:,iq_bz))<tol3)) iqbz0 = iq_bz
 352  end do
 353  ABI_CHECK(iqbz0/=0,"q=0 not found")
 354  !
 355  ! Treat the spin polarization.
 356  spin_ids(:,1) = (/1,1/)
 357  spin_ids(:,2) = (/2,2/)
 358  spin_ids(:,3) = (/1,2/)
 359 
 360  nblocks=1
 361  kx_fact=two
 362  nels_block(:)=0
 363  nels_block(1)=BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2
 364  tot_nels=nels_block(1)
 365 
 366  if (nsppol==2) then
 367    nblocks=3
 368    kx_fact=one
 369    nels_block(1) = BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2   ! Only the upper triangle for block 1 and 2
 370    nels_block(2) = BSp%nreh(2)*(BSp%nreh(2)+1_i8b)/2
 371    nels_block(3) = BSp%nreh(1)*BSp%nreh(2)*1_i8b       ! Note: Block 3 does not have symmetries.
 372    tot_nels= SUM(nels_block)
 373  end if
 374  !
 375  ! Distribute the calculation of the matrix elements among the nodes.
 376  ! * tstart and t_stop give the initial and final transition index treated by each node.
 377  ! * my_hsize is the number of transitions treated by this processor
 378  ! * my_cols(1:2) gives the initial and final column treated by this node.
 379  !
 380  use_mpiio=.FALSE.
 381 #ifdef HAVE_MPI_IO
 382  use_mpiio = (nproc>1)
 383 #endif
 384  use_mpiio=.FALSE.
 385  !use_mpiio=.TRUE.
 386 
 387  if (is_resonant) then
 388    if (use_mpiio) then
 389      write(msg,'(2a,f6.2,a)')&
 390 &      ". Writing resonant excitonic Hamiltonian on file "//TRIM(fname)," via MPI-IO; file size= ",two*tot_nels*dpc*b2Gb," [Gb]."
 391    else
 392      write(msg,'(2a,f6.2,a)')&
 393 &      ". Writing resonant excitonic Hamiltonian on file "//TRIM(fname),"; file size= ",two*dpc*tot_nels*b2Gb," [Gb]."
 394    end if
 395  else
 396    if (use_mpiio) then
 397      write(msg,'(2a,f6.2,a)')&
 398 &      ". Writing coupling excitonic Hamiltonian on file "//TRIM(fname)," via MPI-IO; file size= ",tot_nels*2*dpc*b2Gb," [Gb]."
 399    else
 400      write(msg,'(2a,f6.2,a)')&
 401 &      ". Writing coupling excitonic Hamiltonian on file "//TRIM(fname),"; file size= ",two*dpc*tot_nels*b2Gb," [Gb]."
 402    end if
 403  end if
 404  call wrtout(std_out,msg,"COLL",do_flush=.True.)
 405  call wrtout(ab_out,msg,"COLL",do_flush=.True.)
 406  !
 407  ! Master writes the BSE header with Fortran IO.
 408  if (my_rank==master) then
 409 
 410    if (open_file(fname,msg,newunit=bsh_unt,form="unformatted",action="write") /= 0) then
 411       MSG_ERROR(msg)
 412    end if
 413    call exc_write_bshdr(bsh_unt,Bsp,Hdr_bse)
 414    ! To force the writing (needed for MPI-IO).
 415    close(bsh_unt)
 416 
 417    if (.not.use_mpiio) then ! Reopen the file and skip the header.
 418      if (open_file(fname,msg,newunit=bsh_unt,form="unformatted",action="readwrite") /= 0) then
 419         MSG_ERROR(msg)
 420      end if
 421      call exc_skip_bshdr(bsh_unt,ierr)
 422    end if
 423 
 424    if (BSp%prep_interp) then
 425      tmpfname = fname
 426      ii = LEN_TRIM(fname)
 427      tmpfname(ii-2:ii+1) = 'ABSR'
 428      if (open_file(tmpfname,msg,newunit=a_unt,form='unformatted',action="write") /= 0) then
 429        MSG_ERROR(msg)
 430      end if
 431      tmpfname(ii-2:ii+1) = 'BBSR'
 432      if (open_file(tmpfname,msg,newunit=b_unt,form='unformatted',action="write") /= 0) then
 433        MSG_ERROR(msg)
 434      end if
 435      tmpfname(ii-2:ii+1) = 'CBSR'
 436      if (open_file(tmpfname,msg,newunit=c_unt,form='unformatted',action="write") /= 0) then
 437        MSG_ERROR(msg)
 438      end if
 439      call exc_write_bshdr(a_unt,Bsp,Hdr_bse)
 440      call exc_write_bshdr(b_unt,Bsp,Hdr_bse)
 441      call exc_write_bshdr(c_unt,Bsp,Hdr_bse)
 442      close(a_unt)
 443      close(b_unt)
 444      close(c_unt)
 445      if (.not.use_mpiio) then ! Reopen the file and skip the header.
 446        tmpfname(ii-2:ii+1) = 'ABSR'
 447        if (open_file(tmpfname,msg,newunit=a_unt,form='unformatted',action="readwrite") /= 0) then
 448           MSG_ERROR(msg)
 449        end if
 450        call exc_skip_bshdr(a_unt,ierr)
 451        tmpfname(ii-2:ii+1) = 'BBSR'
 452        if (open_file(tmpfname,msg,newunit=b_unt,form='unformatted',action="readwrite") /= 0) then
 453           MSG_ERROR(msg)
 454        end if
 455        call exc_skip_bshdr(b_unt,ierr)
 456        tmpfname(ii-2:ii+1) = 'CBSR'
 457        if (open_file(tmpfname,msg,newunit=c_unt,form='unformatted',action="readwrite") /= 0) then
 458           MSG_ERROR(msg)
 459        end if
 460        call exc_skip_bshdr(c_unt,ierr)
 461      end if
 462    end if
 463  end if
 464 
 465  call xmpi_barrier(comm)
 466 
 467  if (use_mpiio) then
 468 #ifdef HAVE_MPI_IO
 469    ! Open the file with MPI-IO
 470    amode = MPI_MODE_RDWR
 471    !amode = MPI_MODE_CREATE + MPI_MODE_RDWR,
 472 
 473    call MPI_FILE_OPEN(comm, fname, amode, MPI_INFO_NULL, mpi_fh, mpi_err)
 474    ABI_CHECK_MPI(mpi_err,"opening: "//TRIM(fname))
 475 
 476    ! Skip the header.
 477    call exc_skip_bshdr_mpio(mpi_fh,xmpio_collective,ehdr_offset)
 478 
 479    ! Precompute the offset of the each block including the Fortran markers.
 480    ABI_MALLOC(offset_of_block,(nblocks))
 481    offset_of_block(1) = ehdr_offset
 482    do block=2,nblocks
 483      tmp_off = offset_of_block(block-1) + nels_block(block-1)*xmpi_bsize_dpc
 484      tmp_off = tmp_off + Bsp%nreh(block-1)*2*xmpio_bsize_frm  ! markers.
 485      offset_of_block(block) = tmp_off
 486    end do
 487 #endif
 488  end if
 489 
 490  call timab(681,2,tsec)
 491 
 492  do block=1,nsppol
 493    !
 494    ! Indices used to loop over bands.
 495    ! bidx contains the starting and final indices used to loop over bands.
 496    !
 497    !      (b3,b4)
 498    !         |... ...|
 499    ! (b1,b2) |... ...|
 500    !
 501    ! Resonant matrix is given by
 502    !      (v',c')
 503    !       |... ...|
 504    ! (v,c) |... ...|
 505    !
 506    ! Coupling matrix is given by
 507    !       (c',v')
 508    !       |... ...|
 509    ! (v,c) |... ...|
 510 
 511    if (is_resonant) then
 512      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
 513      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
 514      bidx(:,3) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b3
 515      bidx(:,4) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b4
 516    else
 517      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
 518      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
 519      bidx(:,3) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b3
 520      bidx(:,4) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b4
 521    end if
 522 
 523    spin1 = spin_ids(1,block)
 524    spin2 = spin_ids(2,block)
 525 
 526    do_coulomb_term  = (Bsp%use_coulomb_term .and. (spin1==spin2))
 527    do_exchange_term = (Bsp%exchange_term>0)
 528    w_is_diagonal    = BSp%use_diagonal_Wgg
 529    !
 530    ! Distribution of the matrix elements among the nodes.
 531    ! Note that rank0 will get the first transitions.
 532    nels=nels_block(block)
 533    ABI_MALLOC(t_start,(0:nproc-1))
 534    ABI_MALLOC(t_stop,(0:nproc-1))
 535    call xmpi_split_work2_i8b(nels,nproc,t_start,t_stop,msg,ierr)
 536    if (ierr/=0) then
 537      MSG_WARNING(msg)
 538    end if
 539 
 540    ABI_MALLOC(hsize_of,(0:nproc-1))
 541    hsize_of=0
 542    do rank=0,nproc-1
 543      if (t_stop(rank)>=t_start(rank)) hsize_of(rank) = t_stop(rank)-t_start(rank)+1
 544      !write(std_out,*)"nels",nels,hsize_of(rank)
 545    end do
 546 
 547    my_hsize = hsize_of(my_rank)
 548    if (my_hsize<=0) then
 549      write(msg,'(a,i0)')"Wrong number of transitions: my_hsize= ",my_hsize
 550      MSG_ERROR(msg)
 551    end if
 552    if (my_hsize /= INT(my_hsize,KIND=i4b)) then
 553      write(msg,'(a,i0)')"Size of local block too large for a default integer, Increase the number of CPUs: my_hsize= ",my_hsize
 554      MSG_ERROR(msg)
 555    end if
 556 
 557    my_cols=0
 558    do itp=1,Bsp%nreh(block)
 559      do it=1,itp
 560        ir = it + itp*(itp-1_i8b)/2
 561        if (ir==t_start(my_rank)) then
 562          my_rows(1) = it
 563          my_cols(1) = itp
 564        end if
 565        if (ir==t_stop(my_rank)) then
 566          my_rows(2) = it
 567          my_cols(2) = itp
 568        end if
 569      end do
 570    end do
 571 
 572    my_starts = [my_rows(1),my_cols(1)]
 573    my_ends   = [my_rows(2),my_cols(2)]
 574    !
 575    ! * Announce the treatment of submatrix treated by each node.
 576    bsize_my_block = 2*dpc*my_hsize
 577    write(msg,'(4(a,i0))')' Treating ',my_hsize,'/',nels,' matrix elements, from column ',my_cols(1),' up to column ',my_cols(2)
 578    call wrtout(std_out,msg,'PERS')
 579 
 580    if (is_resonant) then
 581      write(msg,'(a,f8.1,a)')&
 582 &     ' Calculating resonant blocks. Memory required: ',bsize_my_block*b2Mb,' Mb. '
 583    else
 584      write(msg,'(a,f8.1,a)')&
 585 &     ' Calculating coupling blocks. Memory required: ',bsize_my_block*b2Mb,' Mb. '
 586    end if
 587    call wrtout(std_out,msg,"COLL")
 588 
 589    ! Allocate big (scalable) buffer to store the BS matrix on this node.
 590    ABI_STAT_MALLOC(my_bsham,(t_start(my_rank):t_stop(my_rank)), ierr)
 591    ABI_CHECK(ierr==0, 'Not enough memory for exc Hamiltonian')
 592 
 593    if (BSp%prep_interp) then
 594      ! Allocate big (scalable) buffers to store a,b,c coeffients
 595      ABI_STAT_MALLOC(acoeffs,(t_start (my_rank):t_stop(my_rank)), ierr)
 596      ABI_CHECK(ierr==0, 'Not enough memory for acoeffs')
 597 
 598      ABI_STAT_MALLOC(bcoeffs,(t_start(my_rank):t_stop(my_rank)), ierr)
 599      ABI_CHECK(ierr==0, 'Not enough memory for bcoeffs')
 600 
 601      ABI_STAT_MALLOC(ccoeffs,(t_start(my_rank):t_stop(my_rank)), ierr)
 602      ABI_CHECK(ierr==0, 'Not enough memory for ccoeffs')
 603    end if
 604 
 605    if (do_coulomb_term) then ! Construct Coulomb term.
 606 
 607      call timab(682,1,tsec) ! exc_build_ham(Coulomb)
 608 
 609      write(msg,'(a,2i2,a)')" Calculating direct Coulomb term for (spin1,spin2) ",spin1,spin2," using full W_{GG'} ..."
 610      if (w_is_diagonal) then
 611         write(msg,'(a,2i2,a)')&
 612 &        " Calculating direct Coulomb term for (spin1, spin2) ",spin1,spin2," using diagonal approximation for W_{GG'} ..."
 613      end if
 614      call wrtout(std_out,msg,"COLL")
 615 
 616      ABI_MALLOC(ctccp,(npweps))
 617 
 618      if (BSp%prep_interp) then
 619        ABI_MALLOC(aa_ctccp,(npweps))
 620        ABI_MALLOC(bb_ctccp1,(npweps))
 621        ABI_MALLOC(bb_ctccp2,(npweps))
 622        ABI_MALLOC(cc_ctccp,(npweps))
 623      end if
 624 
 625      ABI_MALLOC(vc_sqrt_qbz,(npweps))
 626 
 627 #ifdef DEV_MG_DEBUG_MODE
 628      ABI_MALLOC(ttp_check,(BSp%nreh(block),BSp%nreh(block)))
 629      ttp_check=0
 630 #endif
 631 
 632      do ikp_bz=1,BSp%nkbz ! Loop over kp
 633        ! NOTE: this way of looping is good for bulk but it's not optimal in the
 634        !       case of systems sampled only at Gamma e.g. isolated systems in which
 635        !       one should take advantage of Hermiticity by looping over c-v !!!!
 636 
 637        ! Check whether (vp,cp,ikp_bz,spin2) belongs to the set of columns treated by me for some vp,cp
 638        ! Be careful since vcks2t contains zeros corresponding to transitions that should be skipped.
 639        itpk_min = MINVAL(Bsp%vcks2t(:,:,ikp_bz,spin2), MASK=(Bsp%vcks2t(:,:,ikp_bz,spin2)>0) )
 640        itpk_max = MAXVAL(Bsp%vcks2t(:,:,ikp_bz,spin2))
 641        if (my_cols(2)<itpk_min .or. my_cols(1)>itpk_max) CYCLE
 642 
 643        write(msg,'(3(a,i0))')" status: ",ikp_bz,"/",BSp%nkbz," done by node ",my_rank
 644        call wrtout(std_out,msg,"PERS",do_flush=.True.)
 645 
 646        ! * Get ikp_ibz, non-symmorphic phase, ph_mkpt, and symmetries from ikp_bz.
 647        call get_BZ_item(Kmesh,ikp_bz,kpbz,ikp_ibz,isym_kp,itim_kp,ph_mkpt,isirred=isirred)
 648        !ABI_CHECK(isirred,"not irred!")
 649        !ABI_CHECK(ph_mkpt == cone, "Wrong phase!")
 650 
 651        ktabr_kp(:) = ktabr(:,ikp_bz)
 652        spinrot_kp(:)=Cryst%spinrot(:,isym_kp)
 653        !ABI_CHECK(ALL(ktabr_kp == id_tab), "wrong tab")
 654 
 655        do ik_bz=1,ikp_bz ! Loop over k
 656          !
 657          ! * Get ik_ibz, non-symmorphic phase, ph_mkt, and symmetries from ik_bz
 658          call get_BZ_item(Kmesh,ik_bz,kbz,ik_ibz,isym_k,itim_k,ph_mkt,isirred=isirred)
 659          !ABI_CHECK(isirred,"not irred!")
 660          !ABI_CHECK(ph_mkt == cone, "Wrong phase!")
 661 
 662          ktabr_k(:) = ktabr(:,ik_bz)
 663          spinrot_k(:)=Cryst%spinrot(:,isym_k)
 664          !ABI_CHECK(ALL(ktabr_k == id_tab), "wrong tab")
 665          !if(itim_k==2) CYCLE ! time-reversal or not
 666          !
 667          ! * Find q = K-KP-G0 in the full BZ.
 668          kmkp = Kmesh%bz(:,ik_bz) - Kmesh%bz(:,ikp_bz)
 669          call findqg0(iq_bz,g0,kmkp,Qmesh%nbz,Qmesh%bz,BSp%mG0)
 670 
 671          ! Evaluate the tables needed for the padded FFT performed in rhotwg. Note that we have
 672          ! to pass G-G0 to sphereboundary instead of G as we need FFT results on the shifted G-sphere,
 673          ! If Gamma is not inside G-G0 one has to disable FFT padding as sphereboundary will give wrong tables.
 674          ! * Get the G-G0 shift for the FFT of the oscillators.
 675          !
 676          ABI_MALLOC(gbound,(2*mgfft_osc+8,2))
 677          call gsph_fft_tabs(Gsph_c,g0,mgfft_osc,ngfft_osc,use_padfft,gbound,igfftg0)
 678          if ( ANY(fftalga_osc == (/2,4/)) ) use_padfft=0 ! Pad-FFT is not coded in rho_tw_g
 679          if (use_padfft==0) then
 680            ABI_FREE(gbound)
 681            ABI_MALLOC(gbound,(2*mgfft_osc+8,2*use_padfft))
 682          end if
 683          !
 684          ! * Get iq_ibz, and symmetries from iq_bz
 685          call get_BZ_item(Qmesh,iq_bz,qbz,iq_ibz,isym_q,itim_q)
 686          is_qeq0 = (normv(qbz,Cryst%gmet,'G')<GW_TOLQ0)
 687 
 688          ! Symmetrize em1(omega=0)
 689          call screen_symmetrizer(W,iq_bz,Cryst,Gsph_c,Qmesh,Vcp)
 690          !
 691          ! * Set up table of |q_BZ+G|
 692          if (iq_ibz==1) then
 693            do ig=1,npweps
 694              isg = Gsph_c%rottb(ig,itim_q,isym_q)
 695              vc_sqrt_qbz(isg)=Vcp%vcqlwl_sqrt(ig,1)
 696            end do
 697          else
 698            do ig=1,npweps
 699              isg = Gsph_c%rottb(ig,itim_q,isym_q)
 700              vc_sqrt_qbz(isg) = Vcp%vc_sqrt(ig,iq_ibz)
 701            end do
 702          end if
 703 
 704          ! === Evaluate oscillator matrix elements ===
 705          ! * $ <phj/r|e^{-i(q+G)}|phi/r> - <tphj/r|e^{-i(q+G)}|tphi/r> $ in packed form.
 706          if (Wfd%usepaw==1.and.ik_bz/=ikp_bz) then
 707            ABI_DT_MALLOC(Pwij_q,(Cryst%ntypat))
 708            call pawpwij_init(Pwij_q,npweps,Qmesh%bz(:,iq_bz),Gsph_c%gvec,Cryst%rprimd,Psps,Pawtab,Paw_pwff)
 709          end if
 710 
 711          ! =======================================
 712          ! === Loop over the four band indeces ===
 713          ! =======================================
 714          !
 715          do ic=bidx(1,2),bidx(2,2) !do ic=BSp%lumo,BSp%nbnds
 716 
 717            if (wfd_ihave_ur(Wfd,ic,ik_ibz,spin1,how="Stored")) then
 718              ptur_ck =>  Wfd%Wave(ic,ik_ibz,spin1)%ur
 719            else
 720              call wfd_get_ur(Wfd,ic,ik_ibz,spin1,ur_ck)
 721              ptur_ck => ur_ck
 722            end if
 723            !
 724            ! Get cprj for this (c,kbz,s1) in the BZ.
 725            ! * phase due to the umklapp G0 in k-q is already included.
 726            if (Wfd%usepaw==1) then
 727              if (wfd_ihave_cprj(Wfd,ic,ik_ibz,spin1,how="Stored")) then
 728                ptcp_ck =>  Wfd%Wave(ic,ik_ibz,spin1)%cprj
 729              else
 730                call wfd_get_cprj(Wfd,ic,ik_ibz,spin1,Cryst,Cp_tmp1,sorted=.FALSE.)
 731                ptcp_ck =>  Cp_tmp1
 732              end if
 733              call paw_symcprj_op(ik_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_ck,Cp_ck)
 734            end if
 735 
 736            do icp=bidx(1,4),bidx(2,4)  !do icp=BSp%lumo,BSp%nbnds
 737              ! * Calculate matrix-elements rhxtwg_cpc
 738              !
 739              if (ik_bz==ikp_bz) then ! Already in memory.
 740                rhxtwg_cpc(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,icp,ic,ik_ibz,spin1),Gsph_c)
 741 
 742              else ! Calculate matrix element from wfr.
 743                ! TODO: change the order of the loops.
 744 
 745                if (wfd_ihave_ur(Wfd,icp,ikp_ibz,spin2,how="Stored")) then
 746                  ptur_ckp => Wfd%Wave(icp,ikp_ibz,spin2)%ur
 747                else
 748                  call wfd_get_ur(Wfd,icp,ikp_ibz,spin2,ur_ckp)
 749                  ptur_ckp => ur_ckp
 750                end if
 751 
 752                ! Load cprj for this (c,k,s2) in the BZ.
 753                ! * Do not care about umklapp G0 in k-q as the phase is already included.
 754                if (Wfd%usepaw==1) then
 755                  if (wfd_ihave_cprj(Wfd,icp,ikp_ibz,spin2,how="Stored")) then
 756                    ptcp_ckp =>  Wfd%Wave(icp,ikp_ibz,spin2)%cprj
 757                  else
 758                    call wfd_get_cprj(Wfd,icp,ikp_ibz,spin2,Cryst,Cp_tmp2,sorted=.FALSE.)
 759                    ptcp_ckp =>  Cp_tmp2
 760                  end if
 761                  call paw_symcprj_op(ikp_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_ckp,Cp_ckp)
 762                end if
 763 
 764                call rho_tw_g(nspinor,npweps,nfftot_osc,ndat1,ngfft_osc,map2sphere,use_padfft,igfftg0,gbound,&
 765 &                ptur_ckp,itim_kp,ktabr_kp,ph_mkpt,spinrot_kp,&
 766 &                ptur_ck ,itim_k ,ktabr_k ,ph_mkt ,spinrot_k ,&
 767 &                dim_rtwg,rhxtwg_cpc)
 768 
 769                if (Wfd%usepaw==1) then ! Add PAW onsite contribution.
 770                  call paw_rho_tw_g(npweps,dim_rtwg,nspinor,Cryst%natom,Cryst%ntypat,Cryst%typat,Cryst%xred,Gsph_c%gvec,&
 771 &                  Cp_ckp,Cp_ck,Pwij_q,rhxtwg_cpc)
 772                end if
 773              end if
 774 
 775              if (BSp%prep_interp) then
 776                aa_cpc = rhxtwg_cpc
 777                aa_cpc(2:) = czero
 778                bb_cpc1 = vc_sqrt_qbz*rhxtwg_cpc
 779                bb_cpc1(1) = czero
 780                bb_cpc2 = rhxtwg_cpc
 781                bb_cpc2(2:) = czero
 782 
 783                if(ik_bz == ikp_bz) then
 784                   ! Enforce orthogonality of the wavefunctions.
 785                   if(icp == ic) then
 786                     aa_cpc(1) = cone
 787                     bb_cpc2(1) = cone
 788                   else
 789                     aa_cpc(1) = czero
 790                     bb_cpc2(1) = czero
 791                   end if
 792                end if
 793 
 794                ! MG TODO: a does not require a call to w0gemv
 795                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,aa_cpc,aa_ctccp)
 796                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,bb_cpc1,bb_ctccp1)
 797                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,bb_cpc2,bb_ctccp2)
 798 
 799                cc_cpc = vc_sqrt_qbz*rhxtwg_cpc
 800                cc_cpc(1) = czero
 801 
 802                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,cc_cpc,cc_ctccp)
 803              end if
 804 
 805              ! Prepare sum_GG' rho_c'c*(G) W_qbz(G,G') rho_v'v(G')
 806              ! First sum on G: sum_G rho_c'c(G) W_qbz*(G,G') (W_qbz conjugated)
 807              rhxtwg_cpc = rhxtwg_cpc * vc_sqrt_qbz
 808              call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,rhxtwg_cpc,ctccp)
 809 
 810              do iv=bidx(1,1),bidx(2,1)    !do iv=BSp%lomo,BSp%homo
 811                it = BSp%vcks2t(iv,ic,ik_bz,spin1); if (it==0) CYCLE ! ir-uv-cutoff
 812                ene_t = BSp%Trans(it,spin1)%en
 813 
 814                ! TODO: use this but change the order of the loops.
 815                if (wfd_ihave_ur(Wfd,iv,ik_ibz,spin1,how="Stored")) then
 816                  ptur_vk => Wfd%Wave(iv,ik_ibz,spin1)%ur
 817                else
 818                  call wfd_get_ur(Wfd,iv,ik_ibz,spin1,ur_vk)
 819                  ptur_vk => ur_vk
 820                end if
 821                !
 822                ! Load cprj for this (v,k,s1) in the BZ.
 823                ! * Do not care about umklapp G0 in k-q as the phase is already included.
 824                if (Wfd%usepaw==1) then
 825                  if (wfd_ihave_cprj(Wfd,iv,ik_ibz,spin1,how="Stored")) then
 826                    ptcp_vk =>  Wfd%Wave(iv,ik_ibz,spin1)%cprj
 827                  else
 828                    call wfd_get_cprj(Wfd,iv,ik_ibz,spin1,Cryst,Cp_tmp3,sorted=.FALSE.)
 829                    ptcp_vk => Cp_tmp3
 830                  end if
 831                  call paw_symcprj_op(ik_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_vk,Cp_vk)
 832                end if
 833 
 834                do ivp=bidx(1,3),bidx(2,3) !do ivp=BSp%lomo,BSp%homo
 835 
 836                  if (is_resonant) then
 837                    itp = BSp%vcks2t(ivp,icp,ikp_bz,spin2)
 838                  else ! have to exchange band indeces
 839                    itp = BSp%vcks2t(icp,ivp,ikp_bz,spin2)
 840                  end if
 841 
 842                  if (itp==0) CYCLE ! ir-uv-cutoff
 843 
 844                  ! FIXME Temporary work around, when ikp_bz == ik it might happen that itp<it
 845                  ! should rewrite the loops using contracted k-dependent indeces for bands
 846                  if (itp<it) CYCLE
 847 
 848                  ir = it + itp*(itp-1)/2
 849                  if (ir<t_start(my_rank).or.ir>t_stop(my_rank)) CYCLE
 850 
 851                  ene_tp = BSp%Trans(itp,spin2)%en
 852 
 853                  ! ============================================
 854                  ! === Calculate matrix elements rhxtwg_vpv ===
 855                  ! ============================================
 856                  if (ik_bz==ikp_bz) then
 857                    ! Already in memory.
 858                    rhxtwg_vpv(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,ivp,iv,ik_ibz,spin1),Gsph_c)
 859 
 860                  else
 861                    ! Calculate matrix element from wfr.
 862                    if (wfd_ihave_ur(Wfd,ivp,ikp_ibz,spin2,how="Stored")) then
 863                      ptur_vkp => Wfd%Wave(ivp,ikp_ibz,spin2)%ur
 864                    else
 865                      call wfd_get_ur(Wfd,ivp,ikp_ibz,spin2,ur_vkp)
 866                      ptur_vkp => ur_vkp
 867                    end if
 868                    !
 869                    ! Load cprj for this (vp,kp,s2) in the BZ.
 870                    ! * Do not care about umklapp G0 in k-q as the phase is already included.
 871                    if (Wfd%usepaw==1) then
 872                      if (wfd_ihave_cprj(Wfd,ivp,ikp_ibz,spin2,how="Stored")) then
 873                        ptcp_vkp =>  Wfd%Wave(ivp,ikp_ibz,spin2)%cprj
 874                      else
 875                        call wfd_get_cprj(Wfd,ivp,ikp_ibz,spin2,Cryst,Cp_tmp4,sorted=.FALSE.)
 876                        ptcp_vkp => Cp_tmp4
 877                      end if
 878                      call paw_symcprj_op(ikp_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_vkp,Cp_vkp)
 879                    end if
 880 
 881                    call rho_tw_g(nspinor,npweps,nfftot_osc,ndat1,ngfft_osc,map2sphere,use_padfft,igfftg0,gbound,&
 882 &                     ptur_vkp,itim_kp,ktabr_kp,ph_mkpt,spinrot_kp,&
 883 &                     ptur_vk ,itim_k ,ktabr_k ,ph_mkt ,spinrot_k ,&
 884 &                     dim_rtwg,rhxtwg_vpv)
 885 
 886                    if (Wfd%usepaw==1) then ! Add PAW onsite contribution.
 887                      call paw_rho_tw_g(npweps,dim_rtwg,nspinor,Cryst%natom,Cryst%ntypat,Cryst%typat,Cryst%xred,&
 888 &                      Gsph_c%gvec,Cp_vkp,Cp_vk,Pwij_q,rhxtwg_vpv)
 889                    end if
 890                  end if
 891 
 892                  ! Index in the global Hamiltonian matrix.
 893                  ir = it + itp*(itp-1_i8b)/2
 894 
 895                  if (ir<t_start(my_rank).or.ir>t_stop(my_rank)) then
 896                    write(msg,'(a,3(1x,i0))')" Gonna SIGFAULT, ir, t_start, t_stop ",ir,t_start(my_rank),t_stop(my_rank)
 897                    MSG_ERROR(msg)
 898                  end if
 899                  !ABI_CHECK(itp >= it,"itp < it")
 900 
 901                  if (BSp%prep_interp) then
 902                    ! Save a,b, c coefficients.
 903                    aa_vpv = rhxtwg_vpv
 904                    aa_vpv(2:) = czero
 905                    bb_vpv1 = rhxtwg_vpv
 906                    bb_vpv1(2:) = czero
 907                    bb_vpv2 = vc_sqrt_qbz*rhxtwg_vpv
 908                    bb_vpv2(1) = czero
 909 
 910                    if (ik_bz == ikp_bz) then
 911                       ! Enforce orthogonality of the wavefunctions.
 912                       if (ivp == iv) then
 913                         aa_vpv(1) = cone
 914                         bb_vpv1(1) = cone
 915                       else
 916                         aa_vpv(1) = czero
 917                         bb_vpv1(1) = czero
 918                       end if
 919                    end if
 920 
 921                    cc_vpv = vc_sqrt_qbz*rhxtwg_vpv
 922                    cc_vpv(1) = czero
 923 
 924                    aatmp = -faq * xdotc(npweps,aa_ctccp,1,aa_vpv,1)
 925                    bbtmp = -faq * xdotc(npweps,bb_ctccp1,1,bb_vpv1,1)-faq*xdotc(npweps,bb_ctccp2,1,bb_vpv2,1)
 926                    cctmp = -faq * xdotc(npweps,cc_ctccp,1,cc_vpv,1)
 927 
 928                    acoeffs(ir) = aatmp
 929                    bcoeffs(ir) = bbtmp
 930                    ccoeffs(ir) = cctmp
 931                  end if
 932 
 933                  ! sum_G2 rho_c'c(G) W_qbz(G,G') rho_v'v(G')
 934                  rhxtwg_vpv = vc_sqrt_qbz * rhxtwg_vpv
 935                  http = - faq * xdotc(npweps,ctccp,1,rhxtwg_vpv,1)
 936 
 937                  ! Save result taking into account the symmetry of the matrix.
 938                  ! Note that the diagonal of the resonant block is not forced to be real
 939                  my_bsham(ir) = http
 940 
 941 #ifdef DEV_MG_DEBUG_MODE
 942                  ttp_check(it,itp) = ttp_check(it,itp)+1
 943 #endif
 944                end do !ivp
 945              end do !iv
 946            end do !icp
 947          end do !ic
 948 
 949          ABI_FREE(gbound)
 950 
 951          if (Wfd%usepaw==1.and.ik_bz/=ikp_bz) then ! Free the onsite contribution for this q.
 952            call pawpwij_free(Pwij_q)
 953            ABI_DT_FREE(Pwij_q)
 954          end if
 955 
 956        end do ! ik_bz
 957      end do ! Fat loop over ikp_bz
 958 
 959 #ifdef DEV_MG_DEBUG_MODE
 960      do itp=1,BSp%nreh(block)
 961        do it=1,BSp%nreh(block)
 962         ir = it + itp*(itp-1_i8b)/2
 963          if (itp>=it .and. ttp_check(it,itp) /= 1) then
 964            if (ir>=t_start(my_rank).and.ir<=t_stop(my_rank)) then
 965              write(std_out,*)"WARN: upper triangle is not 1 ",it,itp,ttp_check(it,itp)
 966              write(std_out,*)TRIM(repr_trans(Bsp%Trans(it ,spin1)))
 967              write(std_out,*)TRIM(repr_trans(Bsp%Trans(itp,spin2)))
 968            end if
 969          end if
 970          if (itp< it .and. ttp_check(it,itp) /= 0) then
 971            write(std_out,*)"WARN: then lower triangle is not 0 ",it,itp,ttp_check(it,itp)
 972            write(std_out,*)TRIM(repr_trans(Bsp%Trans(it ,spin1)))
 973            write(std_out,*)TRIM(repr_trans(Bsp%Trans(itp,spin2)))
 974          end if
 975        end do
 976      end do
 977      ierr = SUM(SUM(ttp_check,DIM=2),DIM=1)
 978      if (ierr/=my_hsize) then
 979        write(msg,'(a,2i0)')"ierr/=my_hsize",ierr,my_hsize
 980        MSG_ERROR(msg)
 981      end if
 982      ABI_FREE(ttp_check)
 983 #endif
 984 
 985      ABI_FREE(ctccp)
 986      if(Bsp%prep_interp) then
 987        ABI_FREE(aa_ctccp)
 988        ABI_FREE(bb_ctccp1)
 989        ABI_FREE(bb_ctccp2)
 990        ABI_FREE(cc_ctccp)
 991      end if
 992 
 993      ABI_FREE(vc_sqrt_qbz)
 994      call wrtout(std_out,' Coulomb term completed',"COLL")
 995 
 996      call timab(682,2,tsec) ! exc_build_ham(Coulomb)
 997    end if ! do_coulomb_term
 998    !
 999    ! =====================
1000    ! === Exchange term ===
1001    ! =====================
1002    ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
1003    ! TODO might used enlarged G-sphere for better convergence.
1004    if (do_exchange_term) then
1005 
1006      !call exc_build_v(spin1,spin2,nsppol,npweps,Bsp,Cryst,Kmesh,Qmesh,Gsph_x,Gsph_c,Vcp,&
1007      ! &  is_resonant,rhxtwg_q0,nproc,my_rank,t_start,t_stop,my_bsham,comm)
1008 
1009      call timab(683,1,tsec) ! exc_build_ham(exchange)
1010 
1011      write(msg,'(a,2i2,a)')" Calculating exchange term for (spin1,spin2) ",spin1,spin2," ..."
1012      call wrtout(std_out,msg,"COLL")
1013 
1014      ABI_MALLOC(rhotwg1,(npweps))
1015      ABI_MALLOC(rhotwg2,(npweps))
1016 
1017      ngx = Gsph_x%ng
1018      ABI_MALLOC(vc_sqrt_qbz,(ngx))
1019 
1020      ! * Get iq_ibz, and symmetries from iq_bz.
1021      iq_bz = iqbz0 ! q = 0 -> iqbz0
1022      call get_BZ_item(Qmesh,iq_bz,qbz,iq_ibz,isym_q,itim_q)
1023 
1024      ! * Set up table of |q(BZ)+G|
1025      if (iq_ibz==1) then
1026        do ig=1,ngx
1027          ISg = Gsph_x%rottb(ig,itim_q,isym_q)
1028          vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
1029        end do
1030      else
1031         MSG_ERROR("iq_ibz should be 1")
1032      end if
1033 
1034      do itp=1,BSp%nreh(block) ! Loop over transition tp = (kp,vp,cp,spin2)
1035 
1036        if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
1037        ene_tp = Bsp%Trans(itp,spin2)%en
1038        ikp_bz = Bsp%Trans(itp,spin2)%k
1039        ivp    = Bsp%Trans(itp,spin2)%v
1040        icp    = Bsp%Trans(itp,spin2)%c
1041 
1042        ikp_ibz = Kmesh%tab (ikp_bz)
1043        isym_kp = Kmesh%tabo(ikp_bz)
1044        itim_kp = (3-Kmesh%tabi(ikp_bz))/2
1045 
1046        if (is_resonant) then
1047          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
1048        else ! Code for coupling block.
1049          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
1050        end if
1051        !
1052        ! Multiply by the Coulomb term.
1053         do ig=2,npweps
1054           rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
1055         end do
1056 
1057        do it=1,itp ! Loop over transition t = (k,v,c,spin1)
1058          ir = it + itp*(itp-1_i8b)/2
1059          if (ir<t_start(my_rank) .or. ir>t_stop(my_rank)) CYCLE
1060 
1061          ene_t = Bsp%Trans(it,spin1)%en
1062          ik_bz = Bsp%Trans(it,spin1)%k
1063          iv    = Bsp%Trans(it,spin1)%v
1064          ic    = Bsp%Trans(it,spin1)%c
1065 
1066          ik_ibz = Kmesh%tab(ik_bz)
1067          isym_k = Kmesh%tabo(ik_bz)
1068          itim_k = (3-Kmesh%tabi(ik_bz))/2
1069          !if (itim_k==2) CYCLE ! time-reversal or not
1070 
1071          rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
1072          !
1073          ! sum over G/=0
1074          ctemp = xdotc(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
1075          ctemp = faq * kx_fact * ctemp
1076 
1077          ! exchange term is non divergent !
1078          if (BSp%prep_interp) then
1079            ccoeffs(ir) = ccoeffs(ir) + ctemp
1080          end if
1081 
1082          my_bsham(ir) = my_bsham(ir) + ctemp
1083        end do !it
1084      end do !itp
1085 
1086      ABI_FREE(rhotwg1)
1087      ABI_FREE(rhotwg2)
1088      ABI_FREE(vc_sqrt_qbz)
1089 
1090      call timab(683,2,tsec) ! exc_build_ham(exchange)
1091    end if ! do_exchange_term
1092    !
1093    ! =====================
1094    ! === Diagonal term ===
1095    ! =====================
1096    if (is_resonant .and. spin1==spin2) then
1097      write(msg,'(a,2i2,a)')" Adding diagonal term for (spin1,spin2) ",spin1,spin2," ..."
1098      call wrtout(std_out,msg,"COLL")
1099      do it=1,BSp%nreh(block)
1100        ir = it + it*(it-1_i8b)/2
1101        if (ir>=t_start(my_rank) .and. ir<=t_stop(my_rank)) my_bsham(ir) = my_bsham(ir) + Bsp%Trans(it,spin1)%en
1102      end do
1103    end if
1104 
1105    if (.FALSE.) then
1106      dump_unt = get_unit()
1107      msg=' Coupling Hamiltonian matrix elements: '
1108      if (is_resonant) msg=' Reasonant Hamiltonian matrix elements: '
1109      call wrtout(dump_unt,msg,"PERS")
1110      call wrtout(dump_unt,'    k  v  c  s      k" v" c" s"       H',"PERS")
1111      do itp=1,BSp%nreh(block)
1112        ikp_bz = Bsp%Trans(itp,spin2)%k
1113        ivp    = Bsp%Trans(itp,spin2)%v
1114        icp    = Bsp%Trans(itp,spin2)%c
1115        do it=1,itp
1116          ik_bz = Bsp%Trans(it,spin1)%k
1117          iv    = Bsp%Trans(it,spin1)%v
1118          ic    = Bsp%Trans(it,spin1)%c
1119          ir = it + itp*(itp-1_i8b)/2
1120          if (ir>=t_start(my_rank).and.ir<=t_stop(my_rank)) then
1121            http = my_bsham(ir)
1122            !if (ABS(http) > tol3) then
1123            write(msg,'(2(i0,1x),2(i5,3i3,3x),2f7.3)')it,itp, ik_bz,iv,ic,spin1, ikp_bz,ivp,icp,spin2, http
1124            call wrtout(dump_unt,msg,"PERS")
1125            !end if
1126          end if
1127        end do
1128      end do
1129    end if
1130 
1131 !DBYG
1132    if (.False.) then
1133      dump_unt = get_unit()
1134      dump_unt = 999
1135      msg=' Coupling Hamiltonian matrix elements: '
1136      if (is_resonant) msg=' Resonant Hamiltonian matrix elements: '
1137      call wrtout(dump_unt,msg,"PERS")
1138      call wrtout(dump_unt,'    k v  c  s      k" v" c" s"       H',"PERS")
1139      do itp=1,BSp%nreh(block)
1140        ikp_bz = Bsp%Trans(itp,spin2)%k
1141        ivp    = Bsp%Trans(itp,spin2)%v
1142        icp    = Bsp%Trans(itp,spin2)%c
1143        do it=1,BSp%nreh(block)
1144          ik_bz = Bsp%Trans(it,spin1)%k
1145          iv    = Bsp%Trans(it,spin1)%v
1146          ic    = Bsp%Trans(it,spin1)%c
1147          if(it > itp) then
1148            ir = itp+it*(it-1_i8b)/2
1149          else
1150            ir = it + itp*(itp-1_i8b)/2
1151          end if
1152          if (ir>=t_start(my_rank).and.ir<=t_stop(my_rank)) then
1153            if(it > itp) then
1154              http = CONJG(my_bsham(ir))
1155              if (BSp%prep_interp) then
1156                aatmp = CONJG(acoeffs(ir))
1157                bbtmp = CONJG(bcoeffs(ir))
1158                cctmp = CONJG(ccoeffs(ir))
1159              end if
1160            else
1161              http = my_bsham(ir)
1162              if (BSp%prep_interp) then
1163                aatmp = acoeffs(ir)
1164                bbtmp = bcoeffs(ir)
1165                cctmp = ccoeffs(ir)
1166              end if
1167            end if
1168            if (it == itp) http = http - Bsp%Trans(it,spin1)%en
1169            !if (ABS(http) > tol3) then
1170            if (BSp%prep_interp) then
1171              write(msg,'(2(i0,1x),2(i5,3i3,3x),2f24.20,2f24.20,2f24.20,2f24.20)')it,itp, ik_bz,iv,ic,spin1, ikp_bz,ivp,icp,&
1172 &   spin2, http, aatmp, bbtmp, cctmp
1173            else
1174              write(msg,'(2(i0,1x),2(i5,3i3,3x),2f24.20)')it,itp, ik_bz,iv,ic,spin1, ikp_bz,ivp,icp,spin2, http
1175            end if
1176            call wrtout(dump_unt,msg,"PERS")
1177            !end if
1178          end if
1179        end do
1180      end do
1181    end if
1182 
1183    call timab(684,1,tsec) ! exc_build_ham(synchro)
1184    call xmpi_barrier(comm)
1185    call timab(684,2,tsec) ! exc_build_ham(synchro)
1186    !
1187    ! =================================
1188    ! === Write Hamiltonian on disk ===
1189    ! =================================
1190    call timab(685,1,tsec) ! exc_build_ham(write_ham)
1191    if (use_mpiio) then
1192 #ifdef HAVE_MPI_IO
1193      ! Write the Hamiltonian with collective MPI-IO.
1194      if (BSp%prep_interp) then
1195        MSG_ERROR("Preparation of interpolation technique not yet coded with MPI-IO")
1196      end if
1197      ABI_CHECK(nsppol==1,"nsppol==2 not coded, offset is wrong")
1198      !
1199      old_type = MPI_DOUBLE_COMPLEX
1200      call xmpio_create_fherm_packed(my_starts,my_ends,is_fortran_file,my_offset,old_type,hmat_type,offset_err)
1201 
1202      if (offset_err/=0) then
1203        write(msg,"(3a)")&
1204 &        "Global position index cannot be stored in a standard Fortran integer. ",ch10,&
1205 &        "BSE matrix cannot be written with a single MPI-IO call. "
1206        MSG_ERROR(msg)
1207      end if
1208      !
1209      ! Each node uses a different offset to skip the header and the blocks written by the other CPUs.
1210      my_offset = offset_of_block(block) + my_offset
1211 
1212      call MPI_FILE_SET_VIEW(mpi_fh, my_offset, MPI_BYTE, hmat_type, 'native', MPI_INFO_NULL, mpi_err)
1213      ABI_CHECK_MPI(mpi_err,"SET_VIEW")
1214 
1215      call MPI_TYPE_FREE(hmat_type,mpi_err)
1216      ABI_CHECK_MPI(mpi_err,"MPI_TYPE_FREE")
1217 
1218      if (hsize_of(my_rank) /= INT(hsize_of(my_rank),kind=i4b) ) then
1219        MSG_ERROR("Wraparound error")
1220      end if
1221 
1222      tmp_size = INT(hsize_of(my_rank))
1223      call MPI_FILE_WRITE_ALL(mpi_fh, my_bsham, tmp_size, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, mpi_err)
1224      ABI_CHECK_MPI(mpi_err,"FILE_WRITE")
1225 
1226      ! It seems that personal calls in make the code stuck
1227      !if (is_fortran_file .and. my_rank==master) then ! Master writes the Fortran record markers.
1228      ! Write the Fortran record markers.
1229      neh2=BSp%nreh(block)
1230      ABI_MALLOC(bsize_frecord,(neh2))
1231      bsize_frecord = (/(col_glob * xmpi_bsize_dpc, col_glob=1,neh2)/)
1232      ! ehdr_offset points to the end of the header.
1233      !call xmpio_write_frmarkers(mpi_fh,ehdr_offset,xmpio_collective,neh2,bsize_frecord,mpi_err)
1234      my_offset = offset_of_block(block)
1235      call xmpio_write_frmarkers(mpi_fh,my_offset,xmpio_collective,neh2,bsize_frecord,ierr)
1236      ABI_CHECK(ierr==0,"Error while writing Fortran markers")
1237      ABI_FREE(bsize_frecord)
1238 #else
1239      MSG_BUG("You should not be here!")
1240 #endif
1241    else
1242      ! Use FORTRAN IO with sequential access mode.
1243      ! * Each node sends its data to master node.
1244      ! * Blocks are distributed according to the rank of the node.
1245      ! * Matrix is written by columns hence make sure that the last column is completely written.
1246      call cwtime(cputime,walltime,gflops,"start")
1247 
1248      if (my_rank==master) then
1249        prev_nrows=0; if (my_cols(2) /= my_rows(2)) prev_nrows = my_rows(2)
1250        ncol = my_cols(2)-my_cols(1)+1
1251        ist=1
1252        do jj=1,ncol
1253          col_glob = my_starts(2) + jj - 1
1254          nrows = col_glob; if (jj==ncol) nrows=my_rows(2)
1255          iend = ist + nrows -1
1256          write(bsh_unt) my_bsham(ist:iend)
1257          if (BSp%prep_interp) then
1258            write(a_unt) acoeffs(ist:iend)
1259            write(b_unt) bcoeffs(ist:iend)
1260            write(c_unt) ccoeffs(ist:iend)
1261          end if
1262          ist=iend+1
1263        end do
1264        write(msg,'(2(a,i0))')" Wraparound error: iend=",iend," my_hsize=",hsize_of(my_rank)
1265        ABI_CHECK(iend==hsize_of(my_rank),msg)
1266        ABI_FREE(my_bsham)
1267        if (BSp%prep_interp) then
1268          ABI_FREE(acoeffs)
1269          ABI_FREE(bcoeffs)
1270          ABI_FREE(ccoeffs)
1271        end if
1272      end if
1273 
1274      call xmpi_barrier(comm)
1275      !
1276      ! Collect data from the other nodes.
1277      do sender=1,nproc-1
1278        ! If I'm not involved, jump to the end of the loop and wait there (sequential IO? Of course!)
1279        if (all(my_rank /= [sender, master])) goto 100
1280 
1281        if (my_rank==master)  then
1282          ABI_MALLOC(buffer,(hsize_of(sender)))
1283          if (BSp%prep_interp) then
1284            ABI_MALLOC(abuffer,(hsize_of(sender)))
1285            ABI_MALLOC(bbuffer,(hsize_of(sender)))
1286            ABI_MALLOC(cbuffer,(hsize_of(sender)))
1287          end if
1288        end if
1289        tmp_size = INT(hsize_of(sender),kind=i4b)
1290        call xmpi_exch(my_bsham,tmp_size,sender,buffer,master,comm,mpi_err)
1291        if (BSp%prep_interp) then
1292          call xmpi_exch(acoeffs,tmp_size,sender,abuffer,master,comm,mpi_err)
1293          call xmpi_exch(bcoeffs,tmp_size,sender,bbuffer,master,comm,mpi_err)
1294          call xmpi_exch(ccoeffs,tmp_size,sender,cbuffer,master,comm,mpi_err)
1295        end if
1296 
1297        ! TODO Be careful with the MPI TAG here, add optional Arguments in xmpi_exch so that the TAG can be specified!
1298        proc_start = (/my_rows(1),my_cols(1)/)
1299        proc_end   = (/my_rows(2),my_cols(2)/)
1300        my_extrema(:,1) = proc_start
1301        my_extrema(:,2) = proc_end
1302 
1303        sender_extrema = my_extrema ! just to avoid NAN on sender. xechh_mpi is not well designed
1304        call xmpi_exch(my_extrema,4,sender,sender_extrema,master,comm,mpi_err)
1305 
1306        if (my_rank==master) then
1307           proc_start = sender_extrema(:,1)
1308           proc_end   = sender_extrema(:,2)
1309           !write(std_out,*)"proc_start, proc_end",proc_start,proc_end
1310 
1311          if (prev_nrows>0) then ! backspace the file if the last record written was not complete.
1312            !write(std_out,*)" master node had to call backspace"
1313            backspace(bsh_unt)
1314            ABI_MALLOC(prev_col,(prev_nrows))
1315            read(bsh_unt) prev_col
1316            backspace(bsh_unt)
1317 
1318            if (BSp%prep_interp) then
1319              backspace(a_unt)
1320              ABI_MALLOC(aprev_col,(prev_nrows))
1321              read(a_unt) aprev_col
1322              backspace(a_unt)
1323 
1324              backspace(b_unt)
1325              ABI_MALLOC(bprev_col,(prev_nrows))
1326              read(b_unt) bprev_col
1327              backspace(b_unt)
1328 
1329              backspace(c_unt)
1330              ABI_MALLOC(cprev_col,(prev_nrows))
1331              read(c_unt) cprev_col
1332              backspace(c_unt)
1333            end if
1334          end if
1335          !
1336          ! Write the columns owned by sender.
1337          ncol = proc_end(2)-proc_start(2)+1
1338          ist=1
1339          do jj=1,ncol
1340            col_glob = proc_start(2) + jj-1
1341            nrows = col_glob
1342            if (jj==1   )  nrows=col_glob - proc_start(1) + 1
1343            if (jj==ncol) then
1344              nrows=proc_end(1)
1345              if (ncol==1)  nrows=proc_end(1) - proc_start(1) + 1
1346            end if
1347            iend = ist + nrows -1
1348            !write(std_out,*)"Using nrows, ist, iend=",nrows,ist,iend
1349            if (jj==1 .and. prev_nrows>0) then ! join prev_col and this subcolumn.
1350              write(bsh_unt) CMPLX(prev_col,kind=dpc),CMPLX(buffer(ist:iend),kind=dpc)
1351              if (BSp%prep_interp) then
1352                write(a_unt) CMPLX(aprev_col,kind=dpc),CMPLX(abuffer(ist:iend),kind=dpc)
1353                write(b_unt) CMPLX(bprev_col,kind=dpc),CMPLX(bbuffer(ist:iend),kind=dpc)
1354                write(c_unt) CMPLX(cprev_col,kind=dpc),CMPLX(cbuffer(ist:iend),kind=dpc)
1355              end if
1356              prev_nrows = prev_nrows + iend-ist+1
1357            else
1358              write(bsh_unt) CMPLX(buffer(ist:iend),kind=dpc)
1359              if (BSp%prep_interp) then
1360                write(a_unt) CMPLX(abuffer(ist:iend),kind=dpc)
1361                write(b_unt) CMPLX(bbuffer(ist:iend),kind=dpc)
1362                write(c_unt) CMPLX(cbuffer(ist:iend),kind=dpc)
1363              end if
1364              prev_nrows=0
1365            end if
1366            ist=iend+1
1367          end do
1368          if (ncol>1) then ! Reset prev_nrows if a new column has begun.
1369            prev_nrows = proc_end(1)
1370            if (proc_end(1) == proc_end(2)) prev_nrows = 0
1371          end if
1372          if (iend/=hsize_of(sender)) then
1373            write(msg,'(2(a,i0))')" Wraparound error: iend=",iend," my_hsize=",hsize_of(sender)
1374            MSG_ERROR(msg)
1375          end if
1376          if (allocated(prev_col)) then
1377            ABI_FREE(prev_col)
1378          end if
1379          if (BSp%prep_interp) then
1380            if (allocated(aprev_col)) then
1381              ABI_FREE(aprev_col)
1382            end if
1383            if (allocated(bprev_col)) then
1384              ABI_FREE(bprev_col)
1385            end if
1386            if (allocated(cprev_col)) then
1387              ABI_FREE(cprev_col)
1388            end if
1389          end if
1390          ABI_FREE(buffer)
1391          if (BSp%prep_interp) then
1392            ABI_FREE(abuffer)
1393            ABI_FREE(bbuffer)
1394            ABI_FREE(cbuffer)
1395          end if
1396        end if ! master
1397        !
1398 100    call xmpi_barrier(comm)
1399      end do ! sender
1400 
1401      call cwtime(cputime,walltime,gflops,"stop")
1402      write(msg,'(2(a,f9.1),a)')" Fortran-IO completed. cpu_time: ",cputime,"[s], walltime: ",walltime," [s]"
1403      call wrtout(std_out,msg,"COLL",do_flush=.True.)
1404    end if ! use_mpiio
1405    call timab(685,2,tsec) ! exc_build_ham(write_ham)
1406    !
1407    if (allocated(my_bsham)) then
1408      ABI_FREE(my_bsham)
1409    end if
1410    if (BSp%prep_interp) then
1411      if (allocated(acoeffs)) then
1412        ABI_FREE(acoeffs)
1413      end if
1414      if (allocated(bcoeffs)) then
1415        ABI_FREE(bcoeffs)
1416      end if
1417      if (allocated(ccoeffs)) then
1418        ABI_FREE(ccoeffs)
1419      end if
1420    end if
1421    ABI_FREE(t_start)
1422    ABI_FREE(t_stop)
1423    ABI_FREE(hsize_of)
1424  end do ! block
1425  !
1426  ! ===========================================
1427  ! === Exchange term for spin_up spin_down ===
1428  ! ===========================================
1429 
1430  if (nsppol==2) then
1431    call timab(686,2,tsec) ! exc_build_ham(exch.spin)
1432    block=3
1433    neh1=BSp%nreh(1)
1434    neh2=BSp%nreh(2)
1435    !
1436    ! The oscillators at q=0 are available on each node for both spin.
1437    ! Here the calculation of the block is parallelized over columns.
1438    ABI_MALLOC(col_start,(0:nproc-1))
1439    ABI_MALLOC(col_stop,(0:nproc-1))
1440    call xmpi_split_work2_i4b(neh2,nproc,col_start,col_stop,msg,ierr) !check this but it should be OK.
1441    if (ierr/=0) then
1442      MSG_WARNING(msg)
1443    end if
1444 
1445    my_cols(1) = col_start(my_rank)
1446    my_cols(2) = col_stop (my_rank)
1447    if (my_cols(2)-my_cols(1)<=0) then
1448      MSG_ERROR("One of the processors has zero columns!")
1449    end if
1450 
1451    ABI_MALLOC(ncols_of,(0:nproc-1))
1452    ncols_of=0
1453    do rank=0,nproc-1
1454      if (col_stop(rank)>=col_start(rank)) ncols_of(rank) = col_stop(rank)-col_start(rank)+1
1455    end do
1456 
1457    ABI_FREE(col_start)
1458    ABI_FREE(col_stop)
1459    !
1460    ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
1461    ! TODO might used enlarged G-sphere for better convergence.
1462    ! Note that my_kxssp is always written on file when nsppol=2, even when
1463    ! non-local field effects are neglected.
1464    ABI_MALLOC(my_kxssp,(neh1,my_cols(1):my_cols(2)))
1465    my_kxssp=czero
1466 
1467    if (do_exchange_term) then
1468      spin1=1; spin2=2
1469      write(msg,'(a,2i2,a)')" Calculating exchange term for (spin1,spin2) ",spin1,spin2," ..."
1470      call wrtout(std_out,msg,"COLL")
1471 
1472      ABI_MALLOC(rhotwg1,(npweps))
1473      ABI_MALLOC(rhotwg2,(npweps))
1474 
1475      ngx = Gsph_x%ng
1476      ABI_MALLOC(vc_sqrt_qbz,(ngx))
1477      !
1478      ! * Get iq_ibz, and symmetries from iq_bz.
1479      iq_bz = iqbz0 ! q = 0 -> iqbz0
1480      call get_BZ_item(Qmesh,iq_bz,qbz,iq_ibz,isym_q,itim_q)
1481      !
1482      ! * Set up table of |q(BZ)+G|
1483      if (iq_ibz==1) then
1484        do ig=1,ngx
1485          ISg = Gsph_x%rottb(ig,itim_q,isym_q)
1486          vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
1487        end do
1488      else
1489         MSG_ERROR("iq_ibz should be 1")
1490      end if
1491 
1492      do itp=1,neh2 ! Loop over transition tp = (kp,vp,cp,spin2)
1493 
1494        if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
1495        ene_tp = Bsp%Trans(itp,spin2)%en
1496        ikp_bz = Bsp%Trans(itp,spin2)%k
1497        ivp    = Bsp%Trans(itp,spin2)%v
1498        icp    = Bsp%Trans(itp,spin2)%c
1499 
1500        ikp_ibz = Kmesh%tab (ikp_bz)
1501        isym_kp = Kmesh%tabo(ikp_bz)
1502        itim_kp = (3-Kmesh%tabi(ikp_bz))/2
1503 
1504        if (is_resonant) then
1505          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
1506        else ! Code for coupling block.
1507          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
1508        end if
1509        !
1510        ! Multiply by the Coulomb term.
1511         do ig=2,npweps
1512           rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
1513         end do
1514 
1515        do it=1,neh1 ! Loop over transition t = (k,v,c,spin1) FULL matrix.
1516 
1517          ene_t = Bsp%Trans(it,spin1)%en
1518          ik_bz = Bsp%Trans(it,spin1)%k
1519          iv    = Bsp%Trans(it,spin1)%v
1520          ic    = Bsp%Trans(it,spin1)%c
1521 
1522          ik_ibz = Kmesh%tab(ik_bz)
1523          isym_k = Kmesh%tabo(ik_bz)
1524          itim_k = (3-Kmesh%tabi(ik_bz))/2
1525          !if (itim_k==2) CYCLE ! time-reversal or not
1526 
1527          rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
1528          !
1529          ! sum over G/=0
1530          ctemp = XDOTC(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
1531          ctemp = faq * kx_fact * ctemp
1532 
1533          my_kxssp(it,itp) = ctemp
1534        end do !it
1535      end do !itp
1536 
1537      ABI_FREE(rhotwg1)
1538      ABI_FREE(rhotwg2)
1539      ABI_FREE(vc_sqrt_qbz)
1540    end if ! do_exchange_term
1541    call timab(686,2,tsec) ! exc_build_ham(exch.spin)
1542    !
1543    ! =====================================
1544    ! === Write the Hamiltonian on disk ===
1545    ! =====================================
1546    call timab(685,1,tsec) ! exc_build_ham(write_ham)
1547 
1548    if (use_mpiio) then
1549 #ifdef HAVE_MPI_IO
1550      my_ncols=ncols_of(my_rank); old_type=MPI_DOUBLE_COMPLEX
1551      call xmpio_create_fsubarray_2D((/neh1,my_ncols/),(/neh1,my_ncols/),(/1,1/),old_type,hmat_type,my_offpad,mpi_err)
1552      ABI_CHECK_MPI(mpi_err,"fsubarray_2D")
1553      !
1554      ! Each node uses a different offset to skip the header and the blocks written by the other CPUs.
1555      prev_nels=0
1556      prev_ncols=0
1557      if (my_rank>0) then
1558        prev_ncols = SUM(ncols_of(0:my_rank-1))
1559        prev_nels = neh1*prev_ncols
1560      end if
1561      tmp_off = prev_nels*xmpi_bsize_dpc + prev_ncols*2*xmpio_bsize_frm
1562 
1563      my_offset = offset_of_block(block) + tmp_off + my_offpad
1564 
1565      call MPI_FILE_SET_VIEW(mpi_fh, my_offset, MPI_BYTE, hmat_type, 'native', MPI_INFO_NULL, mpi_err)
1566      ABI_CHECK_MPI(mpi_err,"SET_VIEW")
1567 
1568      call MPI_TYPE_FREE(hmat_type,mpi_err)
1569      ABI_CHECK_MPI(mpi_err,"MPI_TYPE_FREE")
1570 
1571      tmp_size = INT(neh1*my_ncols)
1572      call MPI_FILE_WRITE_ALL(mpi_fh, my_kxssp,tmp_size, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, mpi_err)
1573      ABI_CHECK_MPI(mpi_err,"FILE_WRITE")
1574 
1575      ! It seems that personal calls in make the code stuck
1576      ! Master writes the Fortran record markers.
1577      ABI_MALLOC(bsize_frecord,(neh2))
1578      bsize_frecord = neh1 * xmpi_bsize_dpc
1579      ! ehdr_offset points to the end of the header.
1580      !call xmpio_write_frmarkers(mpi_fh,ehdr_offset,xmpio_collective,neh2,bsize_frecord,mpi_err)
1581      my_offset = offset_of_block(block)
1582      call xmpio_write_frmarkers(mpi_fh,my_offset,xmpio_collective,neh2,bsize_frecord,ierr)
1583      ABI_CHECK(ierr==0,"Error while writing Fortran markers")
1584      ABI_FREE(bsize_frecord)
1585 #else
1586      MSG_BUG("You should not be here")
1587 #endif
1588    else
1589      ! Use FORTRAN IO with sequential access mode.
1590      ! * Each node sends its data to master node.
1591      ! * Columns are distributed according to the rank of the node.
1592      if (my_rank==master) then
1593        do jj=my_cols(1),my_cols(2)
1594          write(bsh_unt) my_kxssp(:,jj)
1595        end do
1596        ABI_FREE(my_kxssp)
1597      end if
1598 
1599      call xmpi_barrier(comm)
1600      !
1601      ! Collect data from the other nodes.
1602      do sender=1,nproc-1
1603        ! If I'm not involved, jump to the end of the loop and wait there (sequential IO? Of course!)
1604        if (all(my_rank /= [sender, master])) goto 200
1605 
1606        if (my_rank==master)  then
1607          ABI_MALLOC(buffer_2d,(neh1,ncols_of(sender)))
1608        end if
1609        call xmpi_exch(my_kxssp,neh1*ncols_of(sender),sender,buffer_2d,master,comm,mpi_err)
1610        !
1611        if (my_rank==master) then ! Write the columns owned by sender.
1612          do jj=1,ncols_of(sender)
1613            write(bsh_unt) buffer_2d(:,jj)
1614          end do
1615          ABI_FREE(buffer_2d)
1616        end if ! master
1617        !
1618 200    call xmpi_barrier(comm)
1619      end do ! sender
1620    end if
1621    call timab(685,2,tsec) ! exc_build_ham(write_ham)
1622 
1623    ABI_FREE(ncols_of)
1624    if (allocated(my_kxssp))  then
1625      ABI_FREE(my_kxssp)
1626    end if
1627  end if
1628  !
1629  ! Close the file.
1630  if (use_mpiio) then
1631 #ifdef HAVE_MPI_IO
1632    call MPI_FILE_CLOSE(mpi_fh, mpi_err)
1633    ABI_CHECK_MPI(mpi_err,"FILE_CLOSE")
1634    ABI_FREE(offset_of_block)
1635 #endif
1636  end if
1637 
1638  ! master closes the Fortran files.
1639  if (my_rank==master) then
1640    close(bsh_unt)
1641    if (BSp%prep_interp) then
1642      close(a_unt)
1643      close(b_unt)
1644      close(c_unt)
1645    end if
1646  end if
1647  !
1648  ! Free memory.
1649  ABI_FREE(igfftg0)
1650  ABI_FREE(ktabr_k)
1651  ABI_FREE(id_tab)
1652  ABI_FREE(ktabr_kp)
1653  ABI_FREE(rhxtwg_vpv)
1654  ABI_FREE(rhxtwg_cpc)
1655  if (BSp%prep_interp) then
1656    ABI_FREE(aa_vpv)
1657    ABI_FREE(bb_vpv1)
1658    ABI_FREE(bb_vpv2)
1659    ABI_FREE(cc_vpv)
1660    ABI_FREE(aa_cpc)
1661    ABI_FREE(bb_cpc1)
1662    ABI_FREE(bb_cpc2)
1663    ABI_FREE(cc_cpc)
1664  end if
1665  ABI_FREE(ur_ckp)
1666  ABI_FREE(ur_vkp)
1667  ABI_FREE(ur_vk)
1668  ABI_FREE(ur_ck)
1669 
1670  ! Deallocation for PAW.
1671  if (Wfd%usepaw==1) then
1672    call pawcprj_free(Cp_vk)
1673    ABI_DT_FREE(Cp_vk)
1674    call pawcprj_free(Cp_ck)
1675    ABI_DT_FREE(Cp_ck)
1676    call pawcprj_free(Cp_ckp)
1677    ABI_DT_FREE(Cp_ckp)
1678    call pawcprj_free(Cp_vkp)
1679    ABI_DT_FREE(Cp_vkp)
1680    call pawcprj_free(Cp_tmp1)
1681    ABI_DT_FREE(Cp_tmp1)
1682    call pawcprj_free(Cp_tmp2)
1683    ABI_DT_FREE(Cp_tmp2)
1684    call pawcprj_free(Cp_tmp3)
1685    ABI_DT_FREE(Cp_tmp3)
1686    call pawcprj_free(Cp_tmp4)
1687    ABI_DT_FREE(Cp_tmp4)
1688  end if
1689 
1690  call xmpi_barrier(comm)
1691 
1692  DBG_EXIT("COLL")
1693 
1694  call timab(680,2,tsec)
1695 
1696 end subroutine exc_build_block

m_exc_build/exc_build_ham [ Functions ]

[ Top ] [ m_exc_build ] [ Functions ]

NAME

  exc_build_ham

FUNCTION

  Calculate and write the excitonic Hamiltonian on an external binary file (Fortran file open
  in random mode) for subsequent treatment in the Bethe-Salpeter code.

INPUTS

  BSp<excparam>=The parameters for the Bethe-Salpeter calculation.
  BS_files<excfiles>=File names internally used in the BS code.
  Cryst<crystal_t>=Info on the crystalline structure.
  Kmesh<kmesh_t>=The list of k-points in the BZ, IBZ and symmetry tables.
  Qmesh<kmesh_t>=The list of q-points for epsilon^{-1} and related symmetry tables.
  ktabr(nfftot_osc,BSp%nkbz)=The FFT index of $(R^{-1}(r-\tau))$ where R is symmetry needed to obtains
    the k-points from the irreducible image.  Used to symmetrize u_Sk where S = \transpose R^{-1}
  Gsph_x<gsphere_t>=Info on the G-sphere used to describe wavefunctions and W (the largest one is actually stored).
  Gsph_c<gsphere_t>=Info on the G-sphere used to describe the correlation part.
  Vcp<vcoul_t>=The Coulomb interaction in reciprocal space. A cutoff can be used
  W<screen_t>=Data type gathering info and data for W.
  nfftot_osc=Total Number of FFT points used for the oscillator matrix elements.
  ngfft_osc(18)=Info on the FFT algorithm used to calculate the oscillator matrix elements.
  Psps<Pseudopotential_type>=Variables related to pseudopotentials
  Pawtab(Psps%ntypat)<pawtab_type>=PAW tabulated starting data.
  Pawang<pawang_type>=PAW angular mesh and related data.
  Paw_pwff(Cryst%ntypat*Wfd%usepaw)<pawpwff_t>=Form factor used to calculate the onsite mat. elements of a plane wave.
  Wfd<wfd_t>=Handler for the wavefunctions.

OUTPUT

  The excitonic Hamiltonian is saved on an external binary file (see below).

PARENTS

      bethe_salpeter

CHILDREN

      cwtime,get_bz_item,gsph_fft_tabs,paw_rho_tw_g,pawcprj_alloc
      pawcprj_free,pawpwij_free,pawpwij_init,rho_tw_g,timab,wfd_change_ngfft
      wfd_get_cprj,wfd_get_ur,wrtout,xmpi_distab,xmpi_sum

SOURCE

2165 subroutine exc_build_ham(BSp,BS_files,Cryst,Kmesh,Qmesh,ktabr,Gsph_x,Gsph_c,Vcp,&
2166 & Wfd,W,Hdr_bse,nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff)
2167 
2168 
2169 !This section has been created automatically by the script Abilint (TD).
2170 !Do not modify the following lines by hand.
2171 #undef ABI_FUNC
2172 #define ABI_FUNC 'exc_build_ham'
2173 !End of the abilint section
2174 
2175  implicit none
2176 
2177 !Arguments ------------------------------------
2178 !scalars
2179  integer,intent(in) :: nfftot_osc
2180  type(excparam),intent(in) :: BSp
2181  type(excfiles),intent(in) :: BS_files
2182  type(screen_t),intent(inout) :: W
2183  type(kmesh_t),intent(in) :: Kmesh,Qmesh
2184  type(crystal_t),intent(in) :: Cryst
2185  type(vcoul_t),intent(in) :: Vcp
2186  type(gsphere_t),intent(in) :: Gsph_x,Gsph_c
2187  type(Pseudopotential_type),intent(in) :: Psps
2188  type(Hdr_type),intent(inout) :: Hdr_bse
2189  type(pawang_type),intent(in) :: Pawang
2190  type(wfd_t),target,intent(inout) :: Wfd
2191 !arrays
2192  integer,intent(in) :: ngfft_osc(18)
2193  integer,intent(in) :: ktabr(nfftot_osc,Kmesh%nbz)
2194  type(Pawtab_type),intent(in) :: Pawtab(Psps%ntypat*Wfd%usepaw)
2195  type(pawpwff_t),intent(in) :: Paw_pwff(Psps%ntypat*Wfd%usepaw)
2196 
2197 !Local variables ------------------------------
2198 !scalars
2199  logical :: do_resonant,do_coupling
2200  !character(len=500) :: msg
2201 !arrays
2202  real(dp) :: tsec(2)
2203  complex(gwpc),allocatable :: all_mgq0(:,:,:,:,:)
2204 
2205 !************************************************************************
2206 
2207  call timab(670,1,tsec)
2208 
2209  ABI_CHECK(Wfd%nspinor==1,"nspinor==2 not coded")
2210  ABI_CHECK(nfftot_osc==PRODUCT(ngfft_osc(1:3)),"mismatch in FFT size")
2211 
2212  if (BSp%have_complex_ene) then
2213    MSG_ERROR("Complex energies are not supported yet")
2214  end if
2215 
2216  ! Do we have to compute some block?
2217  do_resonant = (BS_files%in_hreso == BSE_NOFILE)
2218  do_coupling = (BS_files%in_hcoup == BSE_NOFILE)
2219 
2220  if (BSp%use_coupling == 0) then
2221    if (.not.do_resonant) then
2222      call wrtout(std_out,"Will skip the calculation of resonant block (will use BSR file)","COLL")
2223      goto 100
2224    end if
2225  else
2226    if (.not. do_resonant .and. .not. do_coupling) then
2227      call wrtout(std_out,"Will skip the calculation of both resonant and coupling block (will use BSR and BSC files)","COLL")
2228      goto 100
2229    end if
2230  end if
2231 
2232  ! Compute M_{k,q=0}^{b,b}(G) for all k-points in the IBZ and each pair b, b'
2233  ! used for the exchange part and part of the Coulomb term.
2234  call wrtout(std_out," Calculating all matrix elements for q=0 to save CPU time","COLL")
2235 
2236  call wfd_all_mgq0(Wfd,Cryst,Qmesh,Gsph_x,Vcp,Psps,Pawtab,Paw_pwff,&
2237 &  Bsp%lomo_spin,Bsp%homo_spin,Bsp%humo_spin,nfftot_osc,ngfft_osc,Bsp%npweps,all_mgq0)
2238 
2239  ! ========================
2240  ! ==== Resonant Block ====
2241  ! ========================
2242  if (do_resonant) then
2243    call timab(672,1,tsec)
2244    call exc_build_block(BSp,Cryst,Kmesh,Qmesh,ktabr,Gsph_x,Gsph_c,Vcp,&
2245 &    Wfd,W,Hdr_bse,nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff,all_mgq0,.TRUE.,BS_files%out_hreso)
2246    call timab(672,2,tsec)
2247  end if
2248 
2249  ! ========================
2250  ! ==== Coupling Block ====
2251  ! ========================
2252  if (do_coupling.and.BSp%use_coupling>0) then
2253    call timab(673,1,tsec)
2254    call exc_build_block(BSp,Cryst,Kmesh,Qmesh,ktabr,Gsph_x,Gsph_c,Vcp,&
2255 &    Wfd,W,Hdr_bse,nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff,all_mgq0,.FALSE.,BS_files%out_hcoup)
2256    call timab(673,2,tsec)
2257  end if
2258  !
2259  ! * Free memory.
2260  ABI_FREE(all_mgq0)
2261 
2262 100 call timab(670,2,tsec)
2263 
2264 end subroutine exc_build_ham

m_exc_build/exc_build_v [ Functions ]

[ Top ] [ m_exc_build ] [ Functions ]

NAME

  exc_build_v

FUNCTION

  Calculate and write the excitonic Hamiltonian on an external binary file (Fortran file open
  in random mode) for subsequent treatment in the Bethe-Salpeter code.

INPUTS

  BSp<excparam>=The parameters for the Bethe-Salpeter calculation.
  Cryst<crystal_t>=Info on the crystalline structure.
  Kmesh<kmesh_t>=The list of k-points in the BZ, IBZ and symmetry tables.
  Qmesh<kmesh_t>=The list of q-points for epsilon^{-1} and related symmetry tables.
  Gsph_x<gsphere_t>=Info on the G-sphere used to describe wavefunctions and W (the largest one is actually stored).
  Gsph_c<gsphere_t>=Info on the G-sphere used to describe the correlation part.
  Vcp<vcoul_t>=The Coulomb interaction in reciprocal space. A cutoff can be used
  rhxtwg_q0
  is_resonant
  comm=MPI communicator.

OUTPUT

NOTES

  *) Version for K_V = K_C (q=0), thus KP_V = KP_C
  *) No exchange limit: use LDA energies in case.
  *) Symmetry of H(-k-k') = H*(k k') not used.
  *) Coulomb term can be approssimateed as diagonal in G.
  *) Valence bands treated from lomo on.
  *) Symmetries of the sub-blocks are used to reduce the number of elements to calculate.

            ____________
           |_(cv)__(vc)_|
   H_exc = |  R      C  |
           | -C*    -R* |

   where C is symmetric and R is Hermitian provided that the QP energies are real.

  For nsppol=1 ==> R = diag-W+2v; C = -W+2v
  since the Hamiltonian can be diagonalized in the spin-singlet basis set thanks to
  the fact that spin triplet does not contribute to the optical limit of epsilon.

  For nsppol=2 ==> R = diag-W+v; C = -W+v
  Now the matrix elements depend on the spin of the transitions but only those
  transitions in which the spin of the electron and of the hole are equal contribute
  to the macroscopic dielectric function. Moreover only the exchange term can connect transitions of different spin.
  When nsppol==2 the transitions are ordered using | (cv up) | (cv dwn) | (vc up) | (vc down) |

  The resonant block is given by:

      |  (v'c' up)       | (v'c' dwn)   |
      -----------------------------------           where v_{-+} = v_{+-}^H when the momentum of the photon is neglected.
      | [diag-W+v]++     |      v+-     | (vc up)   Note that v_{+-} is not Hermitian due to the presence of different spins.
  R = -----------------------------------           Actually it reduces to a Hermitian matrix when the system is not spin polarized.
      |     v-+          | [diag-W+v]-- | (vc dwn)  but in this case one should use nsppol=1.
      -----------------------------------           As a consequence the entire matrix is calculated and stored on file.

  The coupling block is given by:

      |  (c'v' up)   |    (c'v dwn)     |
      -----------------------------------           where v_{-+} = v_{+-}^t when the momentum of the photon is neglected.
      | [-W+v]++     |      v+-         | (vc up)   Also in this case the entire matrix v_{+-} has to be calculated
  C = -----------------------------------           and stored on file.
      |     v-+      |    [-W+v]--      | (vc dwn)
      -----------------------------------

PARENTS

CHILDREN

      get_bz_item,timab,wrtout,xmpi_split_work2_i4b

SOURCE

1771 subroutine exc_build_v(spin1,spin2,nsppol,npweps,Bsp,Cryst,Kmesh,Qmesh,Gsph_x,Gsph_c,Vcp,&
1772 &  is_resonant,rhxtwg_q0,nproc,my_rank,t_start,t_stop,my_bsham)
1773 
1774 
1775 !This section has been created automatically by the script Abilint (TD).
1776 !Do not modify the following lines by hand.
1777 #undef ABI_FUNC
1778 #define ABI_FUNC 'exc_build_v'
1779 !End of the abilint section
1780 
1781  implicit none
1782 
1783 !Arguments ------------------------------------
1784 !scalars
1785  integer,intent(in) :: spin1,spin2,nsppol,npweps,nproc,my_rank
1786  logical,intent(in) :: is_resonant
1787  type(excparam),intent(in) :: BSp
1788  type(kmesh_t),intent(in) :: Kmesh,Qmesh
1789  type(crystal_t),intent(in) :: Cryst
1790  type(vcoul_t),intent(in) :: Vcp
1791  type(gsphere_t),intent(in) :: Gsph_x,Gsph_c
1792 !arrays
1793  integer(i8b),intent(in) :: t_start(0:nproc-1),t_stop(0:nproc-1)
1794  complex(gwpc),intent(in) :: rhxtwg_q0(npweps,BSp%lomo_min:BSp%humo_max,BSp%lomo_min:BSp%humo_max,Kmesh%nibz,nsppol)
1795  complex(dpc),intent(inout) :: my_bsham(t_start(my_rank):t_stop(my_rank))
1796 
1797 !Local variables ------------------------------
1798 !scalars
1799  integer :: ISg,ngx,ik_bz,ikp_bz,dim_rtwg
1800  integer :: neh1,neh2,ig,nblocks
1801  integer :: ik_ibz,itim_k,ikp_ibz,itim_kp,isym_k,isym_kp
1802  integer :: iq_bz,iq_ibz,isym_q,itim_q,iqbz0,rank
1803  integer :: iv,ivp,ic,icp
1804  integer :: block,ierr
1805  integer(i8b) :: tot_nels,ir,it,itp
1806  real(dp) :: faq,kx_fact
1807  complex(spc) :: ctemp
1808  character(len=500) :: msg
1809 !arrays
1810  integer :: bidx(2,4),spin_ids(2,3)
1811  integer(i8b) :: nels_block(3)
1812  integer :: my_cols(2),my_rows(2) !,proc_end(2),proc_start(2)
1813  integer,allocatable :: ncols_of(:)
1814  integer,allocatable :: col_start(:),col_stop(:)
1815  real(dp) :: qbz(3),tsec(2) !kbz(3),kpbz(3),
1816  complex(dpc),allocatable :: my_kxssp(:,:)
1817  complex(gwpc),allocatable :: vc_sqrt_qbz(:),rhotwg1(:),rhotwg2(:)
1818 
1819 !************************************************************************
1820 
1821  DBG_ENTER("COLL")
1822 
1823  write(msg,'(a,2i2,a)')" Calculating exchange term for (spin1,spin2) ",spin1,spin2," ..."
1824  call wrtout(std_out,msg,"COLL")
1825 
1826  ! Basic constants.
1827  dim_rtwg=1; faq = one/(Cryst%ucvol*Kmesh%nbz)
1828 
1829  ! Identify the index of q==0
1830  iqbz0=0
1831  do iq_bz=1,Qmesh%nbz
1832    if (ALL(ABS(Qmesh%bz(:,iq_bz))<tol3)) iqbz0 = iq_bz
1833  end do
1834  ABI_CHECK(iqbz0/=0,"q=0 not found")
1835  !
1836  ! Treat the spin polarization.
1837  spin_ids(:,1) = (/1,1/)
1838  spin_ids(:,2) = (/2,2/)
1839  spin_ids(:,3) = (/1,2/)
1840 
1841  nblocks=1
1842  kx_fact=two
1843  nels_block(:)=0
1844  nels_block(1)=BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2
1845  tot_nels=nels_block(1)
1846 
1847  if (nsppol==2) then
1848    nblocks=3
1849    kx_fact=one
1850    nels_block(1) = BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2   ! Only the upper triangle for block 1 and 2
1851    nels_block(2) = BSp%nreh(2)*(BSp%nreh(2)+1_i8b)/2
1852    nels_block(3) = BSp%nreh(1)*BSp%nreh(2)*1_i8b       ! Note: Block 3 does not have symmetries.
1853    tot_nels= SUM(nels_block)
1854  end if
1855  !
1856  ! Distribute the calculation of the matrix elements among the nodes.
1857  ! * tstart and t_stop give the initial and final transition index treated by each node.
1858  ! * my_hsize is the number of transitions treated by this processor
1859  ! * my_cols(1:2) gives the initial and final column treated by this node.
1860  !
1861  do block=1,nsppol
1862    !
1863    ! Indices used to loop over bands.
1864    ! bidx contains the starting and final indices used to loop over bands.
1865    !
1866    !      (b3,b4)
1867    !         |... ...|
1868    ! (b1,b2) |... ...|
1869    !
1870    ! Resonant matrix is given by
1871    !      (v',c')
1872    !       |... ...|
1873    ! (v,c) |... ...|
1874    !
1875    ! Coupling matrix is given by
1876    !       (c',v')
1877    !       |... ...|
1878    ! (v,c) |... ...|
1879 
1880    if (is_resonant) then
1881      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
1882      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
1883      bidx(:,3) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b3
1884      bidx(:,4) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b4
1885    else
1886      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
1887      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
1888      bidx(:,3) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b3
1889      bidx(:,4) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b4
1890    end if
1891 
1892    !spin1 = spin_ids(1,block)
1893    !spin2 = spin_ids(2,block)
1894 
1895    my_cols=0
1896    do itp=1,Bsp%nreh(block)
1897      do it=1,itp
1898        ir = it + itp*(itp-1_i8b)/2
1899        if (ir==t_start(my_rank)) then
1900          my_rows(1) = it
1901          my_cols(1) = itp
1902        end if
1903        if (ir==t_stop(my_rank)) then
1904          my_rows(2) = it
1905          my_cols(2) = itp
1906        end if
1907      end do
1908    end do
1909 
1910    ! Allocate big (scalable) buffer to store the BS matrix on this node.
1911    !ABI_MALLOC(my_bsham,(t_start(my_rank):t_stop(my_rank)))
1912    !
1913    ! =====================
1914    ! === Exchange term ===
1915    ! =====================
1916    ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
1917    ! TODO might used enlarged G-sphere for better convergence.
1918 !if (do_exchange_term) then
1919    call timab(683,1,tsec) ! exc_build_ham(exchange)
1920 
1921    ABI_MALLOC(rhotwg1,(npweps))
1922    ABI_MALLOC(rhotwg2,(npweps))
1923 
1924    ngx = Gsph_x%ng
1925    ABI_MALLOC(vc_sqrt_qbz,(ngx))
1926 
1927    ! * Get iq_ibz, and symmetries from iq_bz.
1928    iq_bz = iqbz0 ! q = 0 -> iqbz0
1929    call get_BZ_item(Qmesh,iq_bz,qbz,iq_ibz,isym_q,itim_q)
1930 
1931    ! * Set up table of |q(BZ)+G|
1932    if (iq_ibz==1) then
1933      do ig=1,ngx
1934        ISg = Gsph_x%rottb(ig,itim_q,isym_q)
1935        vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
1936      end do
1937    else
1938       MSG_ERROR("iq_ibz should be 1")
1939    end if
1940 
1941    do itp=1,BSp%nreh(block) ! Loop over transition tp = (kp,vp,cp,spin2)
1942 
1943      if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
1944      ikp_bz = Bsp%Trans(itp,spin2)%k
1945      ivp    = Bsp%Trans(itp,spin2)%v
1946      icp    = Bsp%Trans(itp,spin2)%c
1947 
1948      ikp_ibz = Kmesh%tab (ikp_bz)
1949      isym_kp = Kmesh%tabo(ikp_bz)
1950      itim_kp = (3-Kmesh%tabi(ikp_bz))/2
1951 
1952      if (is_resonant) then
1953        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
1954      else ! Code for coupling block.
1955        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
1956      end if
1957      !
1958      ! Multiply by the Coulomb term.
1959       do ig=2,npweps
1960         rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
1961       end do
1962 
1963      do it=1,itp ! Loop over transition t = (k,v,c,spin1)
1964        ir = it + itp*(itp-1_i8b)/2
1965        if (ir<t_start(my_rank) .or. ir>t_stop(my_rank)) CYCLE
1966 
1967        ik_bz   = Bsp%Trans(it,spin1)%k
1968        iv      = Bsp%Trans(it,spin1)%v
1969        ic      = Bsp%Trans(it,spin1)%c
1970 
1971        ik_ibz = Kmesh%tab(ik_bz)
1972        isym_k = Kmesh%tabo(ik_bz)
1973        itim_k = (3-Kmesh%tabi(ik_bz))/2
1974        !if (itim_k==2) CYCLE ! time-reversal or not
1975 
1976        rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
1977        !
1978        ! sum over G/=0
1979        ctemp = xdotc(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
1980        ctemp = faq * kx_fact * ctemp
1981 
1982        ! exchange term is non divergent !
1983        !if (BSp%prep_interp) then
1984        !  ccoeffs(ir) = ccoeffs(ir) + ctemp
1985        !end if
1986 
1987        my_bsham(ir) = my_bsham(ir) + ctemp
1988      end do !it
1989    end do !itp
1990 
1991    ABI_FREE(rhotwg1)
1992    ABI_FREE(rhotwg2)
1993    ABI_FREE(vc_sqrt_qbz)
1994 
1995    call timab(683,2,tsec) ! exc_build_ham(exchange)
1996 !end if ! do_exchange_term
1997  end do ! block
1998 
1999  !
2000  ! ===========================================
2001  ! === Exchange term for spin_up spin_down ===
2002  ! ===========================================
2003 
2004 if (nsppol==2) then
2005  call timab(686,2,tsec) ! exc_build_ham(exch.spin)
2006  block=3
2007  neh1=BSp%nreh(1)
2008  neh2=BSp%nreh(2)
2009  !
2010  ! The oscillators at q=0 are available on each node for both spin.
2011  ! Here the calculation of the block is parallelized over columns.
2012  ABI_MALLOC(col_start,(0:nproc-1))
2013  ABI_MALLOC(col_stop,(0:nproc-1))
2014  call xmpi_split_work2_i4b(neh2,nproc,col_start,col_stop,msg,ierr) !check this but it should be OK.
2015  if (ierr/=0) then
2016    MSG_WARNING(msg)
2017  end if
2018 
2019  my_cols(1) = col_start(my_rank)
2020  my_cols(2) = col_stop (my_rank)
2021  if (my_cols(2)-my_cols(1)<=0) then
2022    MSG_ERROR("One of the processors has zero columns!")
2023  end if
2024 
2025  ABI_MALLOC(ncols_of,(0:nproc-1))
2026  ncols_of=0
2027  do rank=0,nproc-1
2028    if (col_stop(rank)>=col_start(rank)) ncols_of(rank) = col_stop(rank)-col_start(rank)+1
2029  end do
2030 
2031  ABI_FREE(col_start)
2032  ABI_FREE(col_stop)
2033  !
2034  ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
2035  ! TODO might used enlarged G-sphere for better convergence.
2036  ! Note that my_kxssp is always written on file when nsppol=2, even when
2037  ! non-local field effects are neglected.
2038  ABI_MALLOC(my_kxssp,(neh1,my_cols(1):my_cols(2)))
2039  my_kxssp=czero
2040 
2041  !if (do_exchange_term) then
2042    !spin1=1; spin2=2
2043    ABI_MALLOC(rhotwg1,(npweps))
2044    ABI_MALLOC(rhotwg2,(npweps))
2045 
2046    ngx = Gsph_x%ng
2047    ABI_MALLOC(vc_sqrt_qbz,(ngx))
2048    !
2049    ! * Get iq_ibz, and symmetries from iq_bz.
2050    iq_bz = iqbz0 ! q = 0 -> iqbz0
2051    call get_BZ_item(Qmesh,iq_bz,qbz,iq_ibz,isym_q,itim_q)
2052    !
2053    ! * Set up table of |q(BZ)+G|
2054    if (iq_ibz==1) then
2055      do ig=1,ngx
2056        ISg = Gsph_x%rottb(ig,itim_q,isym_q)
2057        vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
2058      end do
2059    else
2060       MSG_ERROR("iq_ibz should be 1")
2061    end if
2062 
2063    do itp=1,neh2 ! Loop over transition tp = (kp,vp,cp,spin2)
2064 
2065      if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
2066      ikp_bz = Bsp%Trans(itp,spin2)%k
2067      ivp    = Bsp%Trans(itp,spin2)%v
2068      icp    = Bsp%Trans(itp,spin2)%c
2069 
2070      ikp_ibz = Kmesh%tab (ikp_bz)
2071      isym_kp = Kmesh%tabo(ikp_bz)
2072      itim_kp = (3-Kmesh%tabi(ikp_bz))/2
2073 
2074      if (is_resonant) then
2075        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
2076      else ! Code for coupling block.
2077        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
2078      end if
2079      !
2080      ! Multiply by the Coulomb term.
2081       do ig=2,npweps
2082         rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
2083       end do
2084 
2085      do it=1,neh1 ! Loop over transition t = (k,v,c,spin1) FULL matrix.
2086        ik_bz = Bsp%Trans(it,spin1)%k
2087        iv    = Bsp%Trans(it,spin1)%v
2088        ic    = Bsp%Trans(it,spin1)%c
2089 
2090        ik_ibz = Kmesh%tab(ik_bz)
2091        isym_k = Kmesh%tabo(ik_bz)
2092        itim_k = (3-Kmesh%tabi(ik_bz))/2
2093        !if (itim_k==2) CYCLE ! time-reversal or not
2094 
2095        rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
2096        !
2097        ! sum over G/=0
2098        ctemp = XDOTC(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
2099        ctemp = faq * kx_fact * ctemp
2100 
2101        my_kxssp(it,itp) = ctemp
2102      end do !it
2103    end do !itp
2104 
2105    ABI_FREE(rhotwg1)
2106    ABI_FREE(rhotwg2)
2107    ABI_FREE(vc_sqrt_qbz)
2108  !end if ! do_exchange_term
2109  call timab(686,2,tsec) ! exc_build_ham(exch.spin)
2110 
2111  ABI_FREE(ncols_of)
2112  if (allocated(my_kxssp))  then
2113    ABI_FREE(my_kxssp)
2114  end if
2115  end if
2116 
2117  DBG_EXIT("COLL")
2118 
2119 end subroutine exc_build_v

m_exc_build/wfd_all_mgq0 [ Functions ]

[ Top ] [ m_exc_build ] [ Functions ]

NAME

  wfd_all_mgq0

FUNCTION

INPUTS

  Wfd<wfd_t>=Handler for the wavefunctions.
  Cryst<crystal_t>=Info on the crystalline structure.
  Qmesh<kmesh_t>=The list of q-points for epsilon^{-1} and related symmetry tables.
  Gsph_x<gsphere_t>=G-sphere with the G-vectors in mgq0.
  Vcp<vcoul_t>=The Coulomb interaction in reciprocal space. A cutoff can be used
  Psps<Pseudopotential_type>=Variables related to pseudopotentials
  Pawtab(Psps%ntypat)<pawtab_type>=PAW tabulated starting data.
  Paw_pwff(Cryst%ntypat*Wfd%usepaw)<pawpwff_t>=Form factor used to calculate the onsite mat. elements of a plane wave.
  lomo_spin(Wfd%nsppol)=Lowest occupied band for each spin
  homo_spin(Wfd%nsppol)=Highest occupied band for each spin
  humo_spin(Wfd%nsppol)=Highest unoccupied band for each spin
  nfftot_osc=Total Number of FFT points used for the oscillator matrix elements.
  ngfft_osc(18)=Info on the FFT algorithm used to calculate the oscillator matrix elements.
  npweps=Number of G-vectors in mgq0.

OUTPUT

   mgq0(npweps,lomo_min:humo_max,lomo_min:humo_max,Wfd%nkibz,Wfd%nsppol)
     Allocated here and filled with the matrix elements on each node.

PARENTS

      exc_build_ham

CHILDREN

      cwtime,get_bz_item,gsph_fft_tabs,paw_rho_tw_g,pawcprj_alloc
      pawcprj_free,pawpwij_free,pawpwij_init,rho_tw_g,timab,wfd_change_ngfft
      wfd_get_cprj,wfd_get_ur,wrtout,xmpi_distab,xmpi_sum

SOURCE

2303 subroutine wfd_all_mgq0(Wfd,Cryst,Qmesh,Gsph_x,Vcp,&
2304 & Psps,Pawtab,Paw_pwff,lomo_spin,homo_spin,humo_spin,nfftot_osc,ngfft_osc,npweps,mgq0)
2305 
2306 
2307 !This section has been created automatically by the script Abilint (TD).
2308 !Do not modify the following lines by hand.
2309 #undef ABI_FUNC
2310 #define ABI_FUNC 'wfd_all_mgq0'
2311 !End of the abilint section
2312 
2313  implicit none
2314 
2315 !Arguments ------------------------------------
2316 !scalars
2317  integer,intent(in) :: nfftot_osc,npweps
2318  type(kmesh_t),intent(in) :: Qmesh
2319  type(crystal_t),intent(in) :: Cryst
2320  type(vcoul_t),intent(in) :: Vcp
2321  type(gsphere_t),intent(in) :: Gsph_x
2322  type(Pseudopotential_type),intent(in) :: Psps
2323  type(wfd_t),target,intent(inout) :: Wfd
2324 !arrays
2325  integer,intent(in) :: lomo_spin(Wfd%nsppol),homo_spin(Wfd%nsppol),humo_spin(Wfd%nsppol)
2326  integer,intent(in) :: ngfft_osc(18)
2327  complex(gwpc),allocatable,intent(out) :: mgq0(:,:,:,:,:)
2328  type(Pawtab_type),intent(in) :: Pawtab(Psps%ntypat)
2329  type(pawpwff_t),intent(in) :: Paw_pwff(Psps%ntypat*Wfd%usepaw)
2330 
2331 !Local variables ------------------------------
2332 !scalars
2333  integer,parameter :: map2sphere1=1,dim_rtwg1=1,ndat1=1
2334  integer :: use_padfft,mgfft_osc,fftalga_osc,ii
2335  integer :: ik_ibz,itim_k,isym_k,iq_bz,iq_ibz,isym_q,itim_q,iqbz0
2336  integer :: ierr,iv,ic,spin,lomo_min,humo_max !,inv_ipw,ipw
2337  real(dp) :: cpu,wall,gflops !q0vol,fcc_const
2338  complex(dpc) :: ph_mkt
2339  character(len=500) :: msg
2340 !arrays
2341  integer,allocatable :: igfftg0(:),task_distrib(:,:,:,:)
2342  integer,allocatable :: gbound(:,:),id_tab(:)
2343  real(dp) :: qbz(3),spinrot_k(4),tsec(2)
2344  complex(gwpc),allocatable :: rhotwg1(:)
2345  complex(gwpc),target,allocatable :: ur1(:),ur2(:)
2346  complex(gwpc),ABI_CONTIGUOUS pointer :: ptr_ur1(:),ptr_ur2(:)
2347  type(pawcprj_type),allocatable :: Cp1(:,:),Cp2(:,:)
2348  type(pawpwij_t),allocatable :: Pwij_q0(:)
2349 
2350 !************************************************************************
2351 
2352  call timab(671,1,tsec)
2353 
2354  ABI_CHECK(Wfd%nspinor==1,"nspinor==2 not coded")
2355  ABI_CHECK(nfftot_osc==PRODUCT(ngfft_osc(1:3)),"mismatch in FFT size")
2356 
2357  lomo_min = MINVAL(lomo_spin); humo_max = MAXVAL(humo_spin)
2358 
2359  if ( ANY(ngfft_osc(1:3) /= Wfd%ngfft(1:3)) ) then
2360    call wfd_change_ngfft(Wfd,Cryst,Psps,ngfft_osc)
2361  end if
2362 
2363  mgfft_osc   = MAXVAL(ngfft_osc(1:3))
2364  fftalga_osc = ngfft_osc(7)/100 !; fftalgc_osc=MOD(ngfft_osc(7),10)
2365 
2366  ! (temporary) Table used for the wavefunction in the IBZ.
2367  ABI_MALLOC(id_tab, (Wfd%nfft))
2368  id_tab = (/(ii, ii=1,Wfd%nfft)/)
2369 
2370  ! Analytic integration of 4pi/q^2 over the volume element:
2371  ! $4pi/V \int_V d^3q 1/q^2 =4pi bz_geometric_factor V^(-2/3)$
2372  ! i_sz=4*pi*bz_geometry_factor*q0_vol**(-two_thirds) where q0_vol= V_BZ/N_k
2373  ! bz_geometry_factor: sphere=7.79, fcc=7.44, sc=6.188, bcc=6.946, wz=5.255
2374  ! (see gwa.pdf, appendix A.4)
2375 
2376  ! If q=0 and C=V then set up rho-twiddle(G=0) to reflect an
2377  ! analytic integration of q**-2 over the volume element:
2378  ! <q**-2> = 7.44 V**(-2/3)   (for fcc cell)
2379 
2380  ! q0vol = (8.0*pi**3) / (Cryst%ucvol*Kmesh%nbz)
2381  ! fcc_const = SQRT(7.44*q0vol**(-2.0/3.0))
2382  ! rtw = (6.0*pi**2/(Cryst%ucvol*Kmesh%nkbz))**(1./3.)
2383  ! Average of (q+q')**-2 integration for head of Coulomb matrix
2384  ! INTRTW(QL) = (2*pi*rtw + pi*(rtw**2/QL-QL)*LOG((QL+rtw)/(QL-rtw)))
2385  ! &              * (Cryst%ucvol*Kmesh%nbz)/(2*pi)**3. * QL*QL
2386 
2387  if (Wfd%usepaw==1) then
2388    ABI_DT_MALLOC(Cp1,(Wfd%natom,Wfd%nspinor))
2389    call pawcprj_alloc(Cp1,0,Wfd%nlmn_atm)
2390    ABI_DT_MALLOC(Cp2,(Wfd%natom,Wfd%nspinor))
2391    call pawcprj_alloc(Cp2,0,Wfd%nlmn_atm)
2392  end if
2393 
2394  ABI_MALLOC(ur1,(nfftot_osc*Wfd%nspinor))
2395  ABI_MALLOC(ur2,(nfftot_osc*Wfd%nspinor))
2396 
2397  ! Identify q==0
2398  iqbz0=0
2399  do iq_bz=1,Qmesh%nbz
2400    if (ALL(ABS(Qmesh%bz(:,iq_bz))<tol3)) iqbz0 = iq_bz
2401  end do
2402  ABI_CHECK(iqbz0/=0,"q=0 not found in q-point list!")
2403 
2404  ! * Get iq_ibz, and symmetries from iqbz0.
2405  call get_BZ_item(Qmesh,iqbz0,qbz,iq_ibz,isym_q,itim_q)
2406 
2407  if (Wfd%usepaw==1) then ! Prepare onsite contributions at q==0
2408    ABI_DT_MALLOC(Pwij_q0,(Cryst%ntypat))
2409    call pawpwij_init(Pwij_q0,npweps,Qmesh%bz(:,iqbz0),Gsph_x%gvec,Cryst%rprimd,Psps,Pawtab,Paw_pwff)
2410  end if
2411  !
2412  ! Tables for the FFT of the oscillators.
2413  !  a) FFT index of the G sphere (only vertical transitions, unlike cchi0, no need to shift the sphere).
2414  !  b) gbound table for the zero-padded FFT performed in rhotwg.
2415  ABI_MALLOC(igfftg0,(Gsph_x%ng))
2416  ABI_MALLOC(gbound,(2*mgfft_osc+8,2))
2417  call gsph_fft_tabs(Gsph_x,(/0,0,0/),mgfft_osc,ngfft_osc,use_padfft,gbound,igfftg0)
2418  if ( ANY(fftalga_osc == (/2,4/)) ) use_padfft=0 ! Pad-FFT is not coded in rho_tw_g
2419  if (use_padfft==0) then
2420    ABI_FREE(gbound)
2421    ABI_MALLOC(gbound,(2*mgfft_osc+8,2*use_padfft))
2422  end if
2423 
2424  ABI_MALLOC(rhotwg1,(npweps))
2425 
2426  ABI_STAT_MALLOC(mgq0, (npweps,lomo_min:humo_max,lomo_min:humo_max,Wfd%nkibz,Wfd%nsppol), ierr)
2427  ABI_CHECK(ierr==0, "out-of-memory in mgq0")
2428  mgq0 = czero
2429 
2430  call cwtime(cpu,wall,gflops,"start")
2431 
2432  do spin=1,Wfd%nsppol
2433    ! Distribute the calculation of the matrix elements.
2434    ! processors have the entire set of wavefunctions hence we divide the workload
2435    ! without checking if the pair of states is available. Last dimension is fake.
2436    ABI_MALLOC(task_distrib,(lomo_spin(spin):humo_spin(spin),lomo_spin(spin):humo_spin(spin),Wfd%nkibz,1))
2437    call xmpi_distab(Wfd%nproc,task_distrib)
2438 
2439    ! loop over the k-points in IBZ
2440    do ik_ibz=1,Wfd%nkibz
2441      if ( ALL(task_distrib(:,:,ik_ibz,1)/= Wfd%my_rank) ) CYCLE
2442 
2443      ! Don't need to symmetrize the wavefunctions.
2444      itim_k=1; isym_k=1; ph_mkt=cone; spinrot_k=Cryst%spinrot(:,isym_k)
2445 
2446      do iv=lomo_spin(spin),humo_spin(spin) ! Loop over band V
2447        if ( ALL(task_distrib(:,iv,ik_ibz,1)/=Wfd%my_rank) ) CYCLE
2448 
2449        if (wfd_ihave_ur(Wfd,iv,ik_ibz,spin,how="Stored")) then
2450          ptr_ur1 =>  Wfd%Wave(iv,ik_ibz,spin)%ur
2451        else
2452          call wfd_get_ur(Wfd,iv,ik_ibz,spin,ur1)
2453          ptr_ur1 =>  ur1
2454        end if
2455 
2456        if (Wfd%usepaw==1) then
2457          call wfd_get_cprj(Wfd,iv,ik_ibz,spin,Cryst,Cp1,sorted=.FALSE.)
2458        end if
2459 
2460        ! Loop over band C
2461        do ic=lomo_spin(spin),humo_spin(spin)
2462          if ( task_distrib(ic,iv,ik_ibz,1)/=Wfd%my_rank ) CYCLE
2463 
2464          if (wfd_ihave_ur(Wfd,ic,ik_ibz,spin,how="Stored")) then
2465            ptr_ur2 =>  Wfd%Wave(ic,ik_ibz,spin)%ur
2466          else
2467            call wfd_get_ur(Wfd,ic,ik_ibz,spin,ur2)
2468            ptr_ur2 =>  ur2
2469          end if
2470 
2471          if (Wfd%usepaw==1) then
2472            call wfd_get_cprj(Wfd,ic,ik_ibz,spin,Cryst,Cp2,sorted=.FALSE.)
2473          end if
2474 
2475          call rho_tw_g(Wfd%nspinor,npweps,nfftot_osc,ndat1,ngfft_osc,map2sphere1,use_padfft,igfftg0,gbound,&
2476 &          ptr_ur1,1,id_tab,ph_mkt,spinrot_k,&
2477 &          ptr_ur2,1,id_tab,ph_mkt,spinrot_k,&
2478 &          dim_rtwg1,rhotwg1)
2479 
2480          if (Wfd%usepaw==1) then ! Add PAW onsite contribution.
2481            call paw_rho_tw_g(npweps,dim_rtwg1,Wfd%nspinor,Cryst%natom,Cryst%ntypat,Cryst%typat,Cryst%xred,Gsph_x%gvec,&
2482 &            Cp1,Cp2,Pwij_q0,rhotwg1)
2483          end if
2484 
2485          ! If q=0 treat Exchange and Coulomb-term independently
2486          if (iv <= homo_spin(spin) .and. ic <= homo_spin(spin) .or. &
2487 &            iv >  homo_spin(spin) .and. ic >  homo_spin(spin)) then
2488 
2489            if (iv/=ic) then !COULOMB term: C/=V: ignore them
2490              rhotwg1(1) = czero_gw
2491            else
2492              ! If q=0 and C=V then set up rho-twiddle(G=0) to reflect an
2493              ! analytic integration of q**-2 over the volume element:
2494              ! <q**-2> = 7.44 V**(-2/3)   (for fcc cell)
2495              !rhotwg1(1) = fcc_const * qpg(1,iqbz0)
2496              rhotwg1(1) = SQRT(GWPC_CMPLX(Vcp%i_sz,zero)) / Vcp%vcqlwl_sqrt(1,1)
2497              !if (vcut) rhotwg1(1) = 1.0
2498            end if
2499 
2500          else
2501            ! At present this term is set to zero
2502            ! EXCHANGE term: limit value.
2503            ! Set up rho-twiddle(G=0) using small vector q instead of zero and k.p perturbation theory (see notes)
2504            rhotwg1(1) = czero_gw
2505          end if
2506 
2507          mgq0(:,iv,ic,ik_ibz,spin) = rhotwg1(:)
2508        end do !ic
2509      end do !iv
2510    end do !ik_ibz
2511 
2512    ABI_FREE(task_distrib)
2513  end do !spin
2514 
2515  ! TODO: One can speedup the calculation by computing the upper triangle of the
2516  ! matrix in (b,b') space and then take advantage of the symmetry property:
2517  !
2518  !   M_{k,0}{{bb'}(G)^* = M{k,0}{b'b'}(-G)
2519 
2520 #if 0
2521  !!!! $OMP PARALLEL DO COLLAPSE(3) PRIVATE(inv_ipw)
2522  do spin=1,Wfd%nsppol
2523    do ik_ibz=1,Wfd%nkibz
2524      do iv=lomo_spin(spin),humo_spin(spin)
2525        do ic=1,iv-1
2526          do ipw=1,npweps
2527            inv_ipw = gsph_x%g2mg(ipw)
2528            mgq0(inv_ipw,ic,iv,ik_ibz,spin) = mgq0(ipw,iv,ic,ik_ibz,spin)
2529          end do
2530        end do
2531      end do
2532    end do
2533  end do
2534 #endif
2535  !
2536  ! Gather matrix elements on each node.
2537  call xmpi_sum(mgq0,Wfd%comm,ierr)
2538 
2539  call cwtime(cpu,wall,gflops,"stop")
2540  write(msg,'(2(a,f9.6))')"cpu_time = ",cpu,", wall_time = ",wall
2541  call wrtout(std_out,msg,"PERS")
2542 
2543  ABI_FREE(rhotwg1)
2544  ABI_FREE(igfftg0)
2545  ABI_FREE(gbound)
2546  ABI_FREE(ur1)
2547  ABI_FREE(ur2)
2548  ABI_FREE(id_tab)
2549 
2550  if (Wfd%usepaw==1) then
2551    ! Deallocation for PAW.
2552    call pawpwij_free(Pwij_q0)
2553    ABI_DT_FREE(Pwij_q0)
2554    call pawcprj_free(Cp1)
2555    ABI_DT_FREE(Cp1)
2556    call pawcprj_free(Cp2)
2557    ABI_DT_FREE(Cp2)
2558  end if
2559 
2560  call timab(671,2,tsec)
2561 
2562 end subroutine wfd_all_mgq0