TABLE OF CONTENTS


ABINIT/m_exc_build [ Modules ]

[ Top ] [ Modules ]

NAME

  m_exc_build

FUNCTION

  Build BSE Hamiltonian in e-h reprensentation.

COPYRIGHT

  Copyright (C) 1992-2009 EXC group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida)
  Copyright (C) 2009-2022 ABINIT group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida, M.Giantomassi)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

17 #if defined HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include "abi_common.h"
22 
23 module m_exc_build
24 
25  use defs_basis
26  use m_abicore
27  use m_bs_defs
28  use m_bse_io
29  use m_xmpi
30  use m_errors
31  use m_screen
32 #if defined HAVE_MPI2
33  use mpi
34 #endif
35  use m_hdr
36 
37  use m_wfd,          only : wfdgw_t, wave_t, WFD_STORED
38  use defs_datatypes, only : pseudopotential_type
39  use m_gwdefs,       only : czero_gw, cone_gw, GW_TOLQ0
40  use m_time,         only : cwtime, timab
41  use m_io_tools,     only : get_unit, open_file
42  use m_hide_blas,    only : xdotc, xgemv
43  use m_geometry,     only : normv
44  use m_crystal,      only : crystal_t
45  use m_gsphere,      only : gsphere_t
46  use m_vcoul,        only : vcoul_t
47  use m_bz_mesh,      only : kmesh_t, findqg0
48  use m_pawpwij,      only : pawpwff_t, pawpwij_t, pawpwij_init, pawpwij_free, paw_rho_tw_g
49  use m_pawang,       only : pawang_type
50  use m_pawtab,       only : pawtab_type
51  use m_pawcprj,      only : pawcprj_type, pawcprj_alloc, pawcprj_free
52  use m_paw_sym,      only : paw_symcprj_op
53  use m_oscillators,  only : rho_tw_g, sym_rhotwgq0
54 
55  implicit none
56 
57 #if defined HAVE_MPI1
58  include 'mpif.h'
59 #endif
60 
61  private

m_exc_build/exc_build_block [ Functions ]

[ Top ] [ m_exc_build ] [ Functions ]

NAME

  exc_build_block

FUNCTION

  Calculate and write the excitonic Hamiltonian on an external binary file (Fortran file open
  in random mode) for subsequent treatment in the Bethe-Salpeter code.

INPUTS

  BSp<excparam>=The parameters for the Bethe-Salpeter calculation.
  Cryst<crystal_t>=Info on the crystalline structure.
  Kmesh<kmesh_t>=The list of k-points in the BZ, IBZ and symmetry tables.
  Qmesh<kmesh_t>=The list of q-points for epsilon^{-1} and related symmetry tables.
  ktabr(nfftot_osc,BSp%nkbz)=The FFT index of $(R^{-1}(r-\tau))$ where R is symmetry needed to obtains
    the k-points from the irreducible image.  Used to symmetrize u_Sk where S = \transpose R^{-1}
  Gsph_x<gsphere_t>=Info on the G-sphere used to describe wavefunctions and W (the largest one is actually stored).
  Gsph_c<gsphere_t>=Info on the G-sphere used to describe the correlation part.
  Vcp<vcoul_t>=The Coulomb interaction in reciprocal space. A cutoff can be used
  W<screen_t>=Data type gathering info and data for W.
  nfftot_osc=Total Number of FFT points used for the oscillator matrix elements.
  ngfft_osc(18)=Info on the FFT algorithm used to calculate the oscillator matrix elements.
  Psps<Pseudopotential_type>=Variables related to pseudopotentials
  Pawtab(Psps%ntypat)<pawtab_type>=PAW tabulated starting data.
  Pawang<pawang_type>=PAW angular mesh and related data.
  Paw_pwff(Cryst%ntypat*Wfd%usepaw)<pawpwff_t>=Form factor used to calculate the onsite matrix
    elements of a plane wave.
  Wfd<wfdgw_t>=Handler for the wavefunctions.
    prtvol=Verbosity level.
  rhxtwg_q0
  is_resonant
  fname
  comm=MPI communicator.

OUTPUT

  The excitonic Hamiltonian is saved on an external binary file (see below).

NOTES

  *) Version for K_V = K_C (q=0), thus KP_V = KP_C
  *) No exchange limit: use DFT energies in case.
  *) Symmetry of H(-k-k') = H*(k k') not used.
  *) Coulomb term can be approximated as diagonal in G.
  *) Valence bands treated from lomo on.
  *) Symmetries of the sub-blocks are used to reduce the number of elements to calculate.

            ____________
           |_(cv)__(vc)_|
   H_exc = |  R      C  |
           | -C*    -R* |

   where C is symmetric and R is Hermitian provided that the QP energies are real.

  For nsppol=1 ==> R = diag-W+2v; C = -W+2v
  since the Hamiltonian can be diagonalized in the spin-singlet basis set thanks to
  the fact that spin triplet does not contribute to the optical limit of epsilon.

  For nsppol=2 ==> R = diag-W+v; C = -W+v
  Now the matrix elements depend on the spin of the transitions but only those
  transitions in which the spin of the electron and of the hole are equal contribute
  to the macroscopic dielectric function. Moreover only the exchange term can connect transitions of different spin.
  When nsppol==2 the transitions are ordered using | (cv up) | (cv dwn) | (vc up) | (vc down) |

  The resonant block is given by:

      |  (v'c' up)       | (v'c' dwn)   |
      -----------------------------------           where v_{-+} = v_{+-}^H when the momentum of the photon is neglected.
      | [diag-W+v]++     |      v+-     | (vc up)   Note that v_{+-} is not Hermitian due to the presence of different spins.
  R = -----------------------------------           Actually it reduces to a Hermitian matrix when the system is not spin polarized.
      |     v-+          | [diag-W+v]-- | (vc dwn)  but in this case one should use nsppol=1.
      -----------------------------------           As a consequence the entire matrix is calculated and stored on file.

  The coupling block is given by:

      |  (c'v' up)   |    (c'v dwn)     |
      -----------------------------------           where v_{-+} = v_{+-}^t when the momentum of the photon is neglected.
      | [-W+v]++     |      v+-         | (vc up)   Also in this case the entire matrix v_{+-} has to be calculated
  C = -----------------------------------           and stored on file.
      |     v-+      |    [-W+v]--      | (vc dwn)
      -----------------------------------

SOURCE

 151 subroutine exc_build_block(BSp,Cryst,Kmesh,Qmesh,ktabr,Gsph_x,Gsph_c,Vcp,Wfd,W,Hdr_bse,&
 152 &  nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff,rhxtwg_q0,is_resonant,fname)
 153 
 154 !Arguments ------------------------------------
 155 !scalars
 156  integer,intent(in) :: nfftot_osc
 157  character(len=*),intent(in) :: fname
 158  logical,intent(in) :: is_resonant
 159  type(excparam),intent(in) :: BSp
 160  type(screen_t),intent(inout) :: W
 161  type(kmesh_t),intent(in) :: Kmesh,Qmesh
 162  type(crystal_t),intent(in) :: Cryst
 163  type(vcoul_t),intent(in) :: Vcp
 164  type(gsphere_t),intent(in) :: Gsph_x,Gsph_c
 165  type(Pseudopotential_type),intent(in) :: Psps
 166  type(Hdr_type),intent(inout) :: Hdr_bse
 167  type(pawang_type),intent(in) :: Pawang
 168  type(wfdgw_t),target,intent(inout) :: Wfd
 169 !arrays
 170  integer,intent(in) :: ngfft_osc(18)
 171  integer,intent(in) :: ktabr(nfftot_osc,Kmesh%nbz)
 172  complex(gwpc),intent(in) :: rhxtwg_q0(BSp%npweps,BSp%lomo_min:BSp%humo_max,BSp%lomo_min:BSp%humo_max,Wfd%nkibz,Wfd%nsppol)
 173  type(Pawtab_type),intent(in) :: Pawtab(Psps%ntypat*Wfd%usepaw)
 174  type(pawpwff_t),intent(in) :: Paw_pwff(Psps%ntypat*Wfd%usepaw)
 175 
 176 !Local variables ------------------------------
 177 !scalars
 178  integer,parameter :: map2sphere=1,ndat1=1,master=0
 179  integer(i8b) :: bsize_my_block
 180  integer :: nspinor,nsppol,ISg,mpi_err,tmp_size,ngx
 181  integer :: ik_bz,ikp_bz,col_glob,itpk_min,itpk_max
 182  integer :: dim_rtwg,bsh_unt,ncol,dump_unt,npweps
 183 #ifdef HAVE_MPI_IO
 184  integer :: amode,mpi_fh,hmat_type,offset_err,old_type
 185  integer(XMPI_OFFSET_KIND) :: ehdr_offset,my_offset
 186  logical,parameter :: is_fortran_file=.TRUE.
 187 #endif
 188  integer :: neh1,neh2,ig,nblocks
 189  integer :: ik_ibz,itim_k,ikp_ibz,itim_kp,isym_k,isym_kp
 190  integer :: iq_bz,iq_ibz,isym_q,itim_q,iqbz0,rank
 191  integer :: iv,ivp,ic,icp,jj,nrows,sender,my_ncols
 192  integer :: use_padfft,prev_nrows,spin1,spin2,block
 193  integer :: ierr,nproc,my_rank,mgfft_osc,fftalga_osc,comm
 194  integer(i8b) :: tot_nels,prev_nels,prev_ncols,nels,ir,it,itp,ist,iend,my_hsize
 195  real(dp) :: faq,kx_fact,cputime,walltime,gflops
 196  complex(spc) :: http,ctemp
 197  complex(dpc) :: ph_mkpt,ph_mkt,ene_t,ene_tp
 198  logical,parameter :: with_umklp=.FALSE.
 199  logical :: use_mpiio,do_coulomb_term,do_exchange_term,w_is_diagonal,isirred
 200  logical :: is_qeq0
 201  character(len=500) :: msg
 202  type(wave_t),pointer :: wave_ck,  wave_ckp, wave_vk, wave_vkp
 203 !arrays
 204  integer :: bidx(2,4),g0(3),spin_ids(2,3)
 205  integer(i8b) :: nels_block(3)
 206  integer :: my_cols(2),my_rows(2),proc_end(2),proc_start(2)
 207  integer :: my_extrema(2,2),sender_extrema(2,2),my_starts(2),my_ends(2)
 208  integer,allocatable :: igfftg0(:),ktabr_k(:),ktabr_kp(:),id_tab(:)
 209  integer,allocatable :: ncols_of(:)
 210  integer(i8b),allocatable :: t_start(:),t_stop(:),hsize_of(:)
 211  integer,allocatable :: col_start(:),col_stop(:)
 212  integer,allocatable :: gbound(:,:)
 213  real(dp) :: kbz(3),kpbz(3),qbz(3),spinrot_k(4),spinrot_kp(4),kmkp(3),tsec(2)
 214  complex(dpc),allocatable :: my_bsham(:),buffer(:),buffer_2d(:,:),my_kxssp(:,:),prev_col(:)
 215 !DBYG
 216  complex(dpc),allocatable :: acoeffs(:),bcoeffs(:),ccoeffs(:) ! Coeff of W = a/q^2 + b/q + c
 217  integer :: a_unt, b_unt, c_unt
 218  complex(dpc) :: aatmp, bbtmp, cctmp
 219  complex(gwpc),allocatable :: aa_vpv(:),aa_cpc(:),aa_ctccp(:)
 220  complex(gwpc),allocatable :: bb_vpv1(:),bb_cpc1(:),bb_ctccp1(:)
 221  complex(gwpc),allocatable :: bb_vpv2(:),bb_cpc2(:),bb_ctccp2(:)
 222  complex(gwpc),allocatable :: cc_vpv(:),cc_cpc(:),cc_ctccp(:)
 223  complex(dpc),allocatable :: abuffer(:),aprev_col(:)
 224  complex(dpc),allocatable :: bbuffer(:),bprev_col(:)
 225  complex(dpc),allocatable :: cbuffer(:),cprev_col(:)
 226  character(len=fnlen) :: tmpfname
 227  integer :: ii
 228 !END DBYG
 229  complex(gwpc),allocatable :: vc_sqrt_qbz(:)
 230  complex(gwpc),allocatable :: rhotwg1(:),rhotwg2(:),rhxtwg_vpv(:),rhxtwg_cpc(:),ctccp(:)
 231  complex(gwpc),target,allocatable :: ur_ckp(:),ur_vkp(:),ur_vk(:),ur_ck(:)
 232  complex(gwpc),ABI_CONTIGUOUS pointer :: ptur_ckp(:),ptur_vkp(:),ptur_vk(:),ptur_ck(:)
 233  type(pawcprj_type),target,allocatable :: Cp_tmp1(:,:),Cp_tmp2(:,:)
 234  type(pawcprj_type),target,allocatable :: Cp_tmp3(:,:),Cp_tmp4(:,:)
 235  type(pawcprj_type),allocatable :: Cp_ckp(:,:),Cp_vkp(:,:)
 236  type(pawcprj_type),allocatable :: Cp_vk(:,:),Cp_ck(:,:)
 237  type(pawcprj_type),pointer :: ptcp_ckp(:,:),ptcp_vkp(:,:),ptcp_vk(:,:),ptcp_ck(:,:)
 238  type(pawpwij_t),allocatable :: Pwij_q(:)
 239 #ifdef HAVE_MPI_IO
 240  integer(XMPI_OFFSET_KIND) :: tmp_off,my_offpad
 241  integer(XMPI_OFFSET_KIND),allocatable :: bsize_frecord(:),offset_of_block(:)
 242 #endif
 243 #ifdef DEV_MG_DEBUG_MODE
 244  integer,allocatable :: ttp_check(:,:)
 245 #endif
 246 
 247 !************************************************************************
 248 
 249  call timab(680,1,tsec)
 250  call timab(681,1,tsec)
 251 
 252  DBG_ENTER("COLL")
 253 
 254  ABI_CHECK(Wfd%nspinor==1,"nspinor==2 not coded")
 255  ABI_CHECK(nfftot_osc==PRODUCT(ngfft_osc(1:3)),"mismatch in FFT size")
 256 
 257  if (Wfd%nsppol==2) then
 258    ABI_WARNING("nsppol==2 is still under testing")
 259  end if
 260  !
 261  ! MPI variables.
 262  comm    = Wfd%comm
 263  nproc   = Wfd%nproc
 264  my_rank = Wfd%my_rank
 265 
 266  !
 267  ! Basic constants.
 268  nspinor = Wfd%nspinor
 269  nsppol  = Wfd%nsppol
 270  dim_rtwg=1; faq = one/(Cryst%ucvol*Kmesh%nbz)
 271  npweps = Bsp%npweps
 272  !
 273  ! Prepare the FFT tables to have u(r) on the ngfft_osc mesh.
 274  mgfft_osc = MAXVAL(ngfft_osc(1:3))
 275  fftalga_osc = ngfft_osc(7)/100
 276  if ( ANY(ngfft_osc(1:3) /= Wfd%ngfft(1:3)) ) then
 277    call wfd%change_ngfft(Cryst,Psps,ngfft_osc)
 278  end if
 279 
 280  ABI_MALLOC(igfftg0,(npweps))
 281  ABI_MALLOC(ktabr_k,(nfftot_osc))
 282  ABI_MALLOC(ktabr_kp,(nfftot_osc))
 283  ABI_MALLOC(id_tab,(nfftot_osc))
 284  id_tab = (/(ic, ic=1,nfftot_osc)/)
 285  !
 286  ! Workspace arrays for wavefunctions and oscillator matrix elements.
 287  ABI_MALLOC(rhxtwg_vpv,(npweps))
 288  ABI_MALLOC(rhxtwg_cpc,(npweps))
 289 
 290  if (BSp%prep_interp) then
 291    call wrtout(std_out,"Preparing BSE interpolation")
 292    ABI_MALLOC(aa_vpv,(npweps))
 293    ABI_MALLOC(bb_vpv1,(npweps))
 294    ABI_MALLOC(bb_vpv2,(npweps))
 295    ABI_MALLOC(cc_vpv,(npweps))
 296 
 297    ABI_MALLOC(aa_cpc,(npweps))
 298    ABI_MALLOC(bb_cpc1,(npweps))
 299    ABI_MALLOC(bb_cpc2,(npweps))
 300    ABI_MALLOC(cc_cpc,(npweps))
 301  end if
 302 
 303  ABI_MALLOC(ur_ckp,(nspinor*nfftot_osc))
 304  ABI_MALLOC(ur_vkp,(nspinor*nfftot_osc))
 305  ABI_MALLOC(ur_ck ,(nspinor*nfftot_osc))
 306  ABI_MALLOC(ur_vk ,(nspinor*nfftot_osc))
 307 
 308  if (Wfd%usepaw==1) then
 309    ABI_MALLOC(Cp_vk,(Wfd%natom,nspinor))
 310    call pawcprj_alloc(Cp_vk,0,Wfd%nlmn_atm)
 311    ABI_MALLOC(Cp_ck,(Wfd%natom,nspinor))
 312    call pawcprj_alloc(Cp_ck,0,Wfd%nlmn_atm)
 313    ABI_MALLOC(Cp_ckp,(Wfd%natom,nspinor))
 314    call pawcprj_alloc(Cp_ckp,0,Wfd%nlmn_atm)
 315    ABI_MALLOC(Cp_vkp,(Wfd%natom,nspinor))
 316    call pawcprj_alloc(Cp_vkp,0,Wfd%nlmn_atm)
 317 
 318    ABI_MALLOC(Cp_tmp1,(Wfd%natom,nspinor))
 319    call pawcprj_alloc(Cp_tmp1,0,Wfd%nlmn_atm)
 320    ABI_MALLOC(Cp_tmp2,(Wfd%natom,nspinor))
 321    call pawcprj_alloc(Cp_tmp2,0,Wfd%nlmn_atm)
 322    ABI_MALLOC(Cp_tmp3,(Wfd%natom,nspinor))
 323    call pawcprj_alloc(Cp_tmp3,0,Wfd%nlmn_atm)
 324    ABI_MALLOC(Cp_tmp4,(Wfd%natom,nspinor))
 325    call pawcprj_alloc(Cp_tmp4,0,Wfd%nlmn_atm)
 326  end if
 327  !
 328  ! Identify the index of q==0
 329  iqbz0=0
 330  do iq_bz=1,Qmesh%nbz
 331    if (ALL(ABS(Qmesh%bz(:,iq_bz))<tol3)) iqbz0 = iq_bz
 332  end do
 333  ABI_CHECK(iqbz0/=0,"q=0 not found")
 334  !
 335  ! Treat the spin polarization.
 336  spin_ids(:,1) = (/1,1/)
 337  spin_ids(:,2) = (/2,2/)
 338  spin_ids(:,3) = (/1,2/)
 339 
 340  nblocks=1
 341  kx_fact=two
 342  nels_block(:)=0
 343  nels_block(1)=BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2
 344  tot_nels=nels_block(1)
 345 
 346  if (nsppol==2) then
 347    nblocks=3
 348    kx_fact=one
 349    nels_block(1) = BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2   ! Only the upper triangle for block 1 and 2
 350    nels_block(2) = BSp%nreh(2)*(BSp%nreh(2)+1_i8b)/2
 351    nels_block(3) = BSp%nreh(1)*BSp%nreh(2)*1_i8b       ! Note: Block 3 does not have symmetries.
 352    tot_nels= SUM(nels_block)
 353  end if
 354  !
 355  ! Distribute the calculation of the matrix elements among the nodes.
 356  ! * tstart and t_stop give the initial and final transition index treated by each node.
 357  ! * my_hsize is the number of transitions treated by this processor
 358  ! * my_cols(1:2) gives the initial and final column treated by this node.
 359  !
 360  use_mpiio=.FALSE.
 361 #ifdef HAVE_MPI_IO
 362  use_mpiio = (nproc>1)
 363 #endif
 364  use_mpiio=.FALSE.
 365  !use_mpiio=.TRUE.
 366 
 367  if (is_resonant) then
 368    if (use_mpiio) then
 369      write(msg,'(2a,f6.2,a)')&
 370 &      ". Writing resonant excitonic Hamiltonian on file "//TRIM(fname)," via MPI-IO; file size= ",two*tot_nels*dpc*b2Gb," [Gb]."
 371    else
 372      write(msg,'(2a,f6.2,a)')&
 373 &      ". Writing resonant excitonic Hamiltonian on file "//TRIM(fname),"; file size= ",two*dpc*tot_nels*b2Gb," [Gb]."
 374    end if
 375  else
 376    if (use_mpiio) then
 377      write(msg,'(2a,f6.2,a)')&
 378 &      ". Writing coupling excitonic Hamiltonian on file "//TRIM(fname)," via MPI-IO; file size= ",tot_nels*2*dpc*b2Gb," [Gb]."
 379    else
 380      write(msg,'(2a,f6.2,a)')&
 381 &      ". Writing coupling excitonic Hamiltonian on file "//TRIM(fname),"; file size= ",two*dpc*tot_nels*b2Gb," [Gb]."
 382    end if
 383  end if
 384  call wrtout([std_out, ab_out], msg, do_flush=.True.)
 385  !
 386  ! Master writes the BSE header with Fortran IO.
 387  if (my_rank==master) then
 388 
 389    if (open_file(fname,msg,newunit=bsh_unt,form="unformatted",action="write") /= 0) then
 390       ABI_ERROR(msg)
 391    end if
 392    call exc_write_bshdr(bsh_unt,Bsp,Hdr_bse)
 393    ! To force the writing (needed for MPI-IO).
 394    close(bsh_unt)
 395 
 396    if (.not.use_mpiio) then ! Reopen the file and skip the header.
 397      if (open_file(fname,msg,newunit=bsh_unt,form="unformatted",action="readwrite") /= 0) then
 398         ABI_ERROR(msg)
 399      end if
 400      call exc_skip_bshdr(bsh_unt,ierr)
 401    end if
 402 
 403    if (BSp%prep_interp) then
 404      tmpfname = fname
 405      ii = LEN_TRIM(fname)
 406      tmpfname(ii-2:ii+1) = 'ABSR'
 407      if (open_file(tmpfname,msg,newunit=a_unt,form='unformatted',action="write") /= 0) then
 408        ABI_ERROR(msg)
 409      end if
 410      tmpfname(ii-2:ii+1) = 'BBSR'
 411      if (open_file(tmpfname,msg,newunit=b_unt,form='unformatted',action="write") /= 0) then
 412        ABI_ERROR(msg)
 413      end if
 414      tmpfname(ii-2:ii+1) = 'CBSR'
 415      if (open_file(tmpfname,msg,newunit=c_unt,form='unformatted',action="write") /= 0) then
 416        ABI_ERROR(msg)
 417      end if
 418      call exc_write_bshdr(a_unt,Bsp,Hdr_bse)
 419      call exc_write_bshdr(b_unt,Bsp,Hdr_bse)
 420      call exc_write_bshdr(c_unt,Bsp,Hdr_bse)
 421      close(a_unt)
 422      close(b_unt)
 423      close(c_unt)
 424      if (.not.use_mpiio) then ! Reopen the file and skip the header.
 425        tmpfname(ii-2:ii+1) = 'ABSR'
 426        if (open_file(tmpfname,msg,newunit=a_unt,form='unformatted',action="readwrite") /= 0) then
 427           ABI_ERROR(msg)
 428        end if
 429        call exc_skip_bshdr(a_unt,ierr)
 430        tmpfname(ii-2:ii+1) = 'BBSR'
 431        if (open_file(tmpfname,msg,newunit=b_unt,form='unformatted',action="readwrite") /= 0) then
 432           ABI_ERROR(msg)
 433        end if
 434        call exc_skip_bshdr(b_unt,ierr)
 435        tmpfname(ii-2:ii+1) = 'CBSR'
 436        if (open_file(tmpfname,msg,newunit=c_unt,form='unformatted',action="readwrite") /= 0) then
 437           ABI_ERROR(msg)
 438        end if
 439        call exc_skip_bshdr(c_unt,ierr)
 440      end if
 441    end if
 442  end if
 443 
 444  call xmpi_barrier(comm)
 445 
 446  if (use_mpiio) then
 447 #ifdef HAVE_MPI_IO
 448    ! Open the file with MPI-IO
 449    amode = MPI_MODE_RDWR
 450    !amode = MPI_MODE_CREATE + MPI_MODE_RDWR,
 451 
 452    call MPI_FILE_OPEN(comm, fname, amode, MPI_INFO_NULL, mpi_fh, mpi_err)
 453    ABI_CHECK_MPI(mpi_err,"opening: "//TRIM(fname))
 454 
 455    ! Skip the header.
 456    call exc_skip_bshdr_mpio(mpi_fh,xmpio_collective,ehdr_offset)
 457 
 458    ! Precompute the offset of the each block including the Fortran markers.
 459    ABI_MALLOC(offset_of_block,(nblocks))
 460    offset_of_block(1) = ehdr_offset
 461    do block=2,nblocks
 462      tmp_off = offset_of_block(block-1) + nels_block(block-1)*xmpi_bsize_dpc
 463      tmp_off = tmp_off + Bsp%nreh(block-1)*2*xmpio_bsize_frm  ! markers.
 464      offset_of_block(block) = tmp_off
 465    end do
 466 #endif
 467  end if
 468 
 469  call timab(681,2,tsec)
 470 
 471  do block=1,nsppol
 472    !
 473    ! Indices used to loop over bands.
 474    ! bidx contains the starting and final indices used to loop over bands.
 475    !
 476    !      (b3,b4)
 477    !         |... ...|
 478    ! (b1,b2) |... ...|
 479    !
 480    ! Resonant matrix is given by
 481    !      (v',c')
 482    !       |... ...|
 483    ! (v,c) |... ...|
 484    !
 485    ! Coupling matrix is given by
 486    !       (c',v')
 487    !       |... ...|
 488    ! (v,c) |... ...|
 489 
 490    if (is_resonant) then
 491      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
 492      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
 493      bidx(:,3) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b3
 494      bidx(:,4) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b4
 495    else
 496      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
 497      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
 498      bidx(:,3) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b3
 499      bidx(:,4) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b4
 500    end if
 501 
 502    spin1 = spin_ids(1,block)
 503    spin2 = spin_ids(2,block)
 504 
 505    do_coulomb_term  = (Bsp%use_coulomb_term .and. (spin1==spin2))
 506    do_exchange_term = (Bsp%exchange_term>0)
 507    w_is_diagonal    = BSp%use_diagonal_Wgg
 508    !
 509    ! Distribution of the matrix elements among the nodes.
 510    ! Note that rank0 will get the first transitions.
 511    nels=nels_block(block)
 512    ABI_MALLOC(t_start,(0:nproc-1))
 513    ABI_MALLOC(t_stop,(0:nproc-1))
 514    call xmpi_split_work2_i8b(nels,nproc,t_start,t_stop)
 515 
 516    ABI_MALLOC(hsize_of,(0:nproc-1))
 517    hsize_of=0
 518    do rank=0,nproc-1
 519      if (t_stop(rank)>=t_start(rank)) hsize_of(rank) = t_stop(rank)-t_start(rank)+1
 520      !write(std_out,*)"nels",nels,hsize_of(rank)
 521    end do
 522 
 523    my_hsize = hsize_of(my_rank)
 524    if (my_hsize<=0) then
 525      write(msg,'(a,i0)')"Wrong number of transitions: my_hsize= ",my_hsize
 526      ABI_ERROR(msg)
 527    end if
 528    if (my_hsize /= INT(my_hsize,KIND=i4b)) then
 529      write(msg,'(a,i0)')"Size of local block too large for a default integer, Increase the number of CPUs: my_hsize= ",my_hsize
 530      ABI_ERROR(msg)
 531    end if
 532 
 533    my_cols=0
 534    do itp=1,Bsp%nreh(block)
 535      do it=1,itp
 536        ir = it + itp*(itp-1_i8b)/2
 537        if (ir==t_start(my_rank)) then
 538          my_rows(1) = it
 539          my_cols(1) = itp
 540        end if
 541        if (ir==t_stop(my_rank)) then
 542          my_rows(2) = it
 543          my_cols(2) = itp
 544        end if
 545      end do
 546    end do
 547 
 548    my_starts = [my_rows(1),my_cols(1)]
 549    my_ends   = [my_rows(2),my_cols(2)]
 550    !
 551    ! Announce the treatment of submatrix treated by each node.
 552    bsize_my_block = 2*dpc*my_hsize
 553    write(msg,'(4(a,i0))')' Treating ',my_hsize,'/',nels,' matrix elements, from column ',my_cols(1),' up to column ',my_cols(2)
 554    call wrtout(std_out, msg)
 555 
 556    if (is_resonant) then
 557      write(msg,'(a,f8.1,a)')' Calculating resonant blocks. Memory required: ',bsize_my_block*b2Mb,' [Mb] <<< MEM'
 558    else
 559      write(msg,'(a,f8.1,a)')' Calculating coupling blocks. Memory required: ',bsize_my_block*b2Mb,' [Mb] <<< MEM'
 560    end if
 561    call wrtout(std_out, msg)
 562 
 563    ! Allocate big (scalable) buffer to store the BS matrix on this node.
 564    ABI_MALLOC_OR_DIE(my_bsham,(t_start(my_rank):t_stop(my_rank)), ierr)
 565 
 566    if (BSp%prep_interp) then
 567      ! Allocate big (scalable) buffers to store a,b,c coeffients
 568      ABI_MALLOC_OR_DIE(acoeffs,(t_start (my_rank):t_stop(my_rank)), ierr)
 569      ABI_MALLOC_OR_DIE(bcoeffs,(t_start(my_rank):t_stop(my_rank)), ierr)
 570      ABI_MALLOC_OR_DIE(ccoeffs,(t_start(my_rank):t_stop(my_rank)), ierr)
 571    end if
 572 
 573    if (do_coulomb_term) then ! Construct Coulomb term.
 574 
 575      call timab(682,1,tsec) ! exc_build_ham(Coulomb)
 576 
 577      write(msg,'(a,2i2,a)')" Calculating direct Coulomb term for (spin1,spin2) ",spin1,spin2," using full W_{GG'} ..."
 578      if (w_is_diagonal) then
 579         write(msg,'(a,2i2,a)')&
 580 &        " Calculating direct Coulomb term for (spin1, spin2) ",spin1,spin2," using diagonal approximation for W_{GG'} ..."
 581      end if
 582      call wrtout(std_out, msg)
 583 
 584      ABI_MALLOC(ctccp,(npweps))
 585 
 586      if (BSp%prep_interp) then
 587        ABI_MALLOC(aa_ctccp,(npweps))
 588        ABI_MALLOC(bb_ctccp1,(npweps))
 589        ABI_MALLOC(bb_ctccp2,(npweps))
 590        ABI_MALLOC(cc_ctccp,(npweps))
 591      end if
 592 
 593      ABI_MALLOC(vc_sqrt_qbz,(npweps))
 594 
 595 #ifdef DEV_MG_DEBUG_MODE
 596      ABI_MALLOC(ttp_check,(BSp%nreh(block),BSp%nreh(block)))
 597      ttp_check=0
 598 #endif
 599 
 600      do ikp_bz=1,BSp%nkbz ! Loop over kp
 601        ! NOTE: this way of looping is good for bulk but it's not optimal in the
 602        !       case of systems sampled only at Gamma e.g. isolated systems in which
 603        !       one should take advantage of Hermiticity by looping over c-v !!!!
 604 
 605        ! Check whether (vp,cp,ikp_bz,spin2) belongs to the set of columns treated by me for some vp,cp
 606        ! Be careful since vcks2t contains zeros corresponding to transitions that should be skipped.
 607        itpk_min = MINVAL(Bsp%vcks2t(:,:,ikp_bz,spin2), MASK=(Bsp%vcks2t(:,:,ikp_bz,spin2)>0) )
 608        itpk_max = MAXVAL(Bsp%vcks2t(:,:,ikp_bz,spin2))
 609        if (my_cols(2)<itpk_min .or. my_cols(1)>itpk_max) CYCLE
 610 
 611        write(msg,'(3(a,i0))')" status: ",ikp_bz,"/",BSp%nkbz," done by node ",my_rank
 612        call wrtout(std_out, msg, do_flush=.True.)
 613 
 614        ! * Get ikp_ibz, non-symmorphic phase, ph_mkpt, and symmetries from ikp_bz.
 615        call kmesh%get_BZ_item(ikp_bz,kpbz,ikp_ibz,isym_kp,itim_kp,ph_mkpt,isirred=isirred)
 616        !ABI_CHECK(isirred,"not irred!")
 617        !ABI_CHECK(ph_mkpt == cone, "Wrong phase!")
 618 
 619        ktabr_kp(:) = ktabr(:,ikp_bz)
 620        spinrot_kp(:)=Cryst%spinrot(:,isym_kp)
 621        !ABI_CHECK(ALL(ktabr_kp == id_tab), "wrong tab")
 622 
 623        do ik_bz=1,ikp_bz ! Loop over k
 624          !
 625          ! * Get ik_ibz, non-symmorphic phase, ph_mkt, and symmetries from ik_bz
 626          call kmesh%get_BZ_item(ik_bz,kbz,ik_ibz,isym_k,itim_k,ph_mkt,isirred=isirred)
 627          !ABI_CHECK(isirred,"not irred!")
 628          !ABI_CHECK(ph_mkt == cone, "Wrong phase!")
 629 
 630          ktabr_k(:) = ktabr(:,ik_bz)
 631          spinrot_k(:)=Cryst%spinrot(:,isym_k)
 632          !ABI_CHECK(ALL(ktabr_k == id_tab), "wrong tab")
 633          !if(itim_k==2) CYCLE ! time-reversal or not
 634          !
 635          ! * Find q = K-KP-G0 in the full BZ.
 636          kmkp = Kmesh%bz(:,ik_bz) - Kmesh%bz(:,ikp_bz)
 637          call findqg0(iq_bz,g0,kmkp,Qmesh%nbz,Qmesh%bz,BSp%mG0)
 638 
 639          ! Evaluate the tables needed for the padded FFT performed in rhotwg. Note that we have
 640          ! to pass G-G0 to sphereboundary instead of G as we need FFT results on the shifted G-sphere,
 641          ! If Gamma is not inside G-G0 one has to disable FFT padding as sphereboundary will give wrong tables.
 642          ! * Get the G-G0 shift for the FFT of the oscillators.
 643          !
 644          ABI_MALLOC(gbound,(2*mgfft_osc+8,2))
 645          call Gsph_c%fft_tabs(g0,mgfft_osc,ngfft_osc,use_padfft,gbound,igfftg0)
 646 #ifdef FC_IBM
 647  ! XLF does not deserve this optimization (problem with [v67mbpt][t03])
 648  use_padfft = 0
 649 #endif
 650          if ( ANY(fftalga_osc == (/2,4/)) ) use_padfft=0 ! Pad-FFT is not coded in rho_tw_g
 651          if (use_padfft==0) then
 652            ABI_FREE(gbound)
 653            ABI_MALLOC(gbound,(2*mgfft_osc+8,2*use_padfft))
 654          end if
 655          !
 656          ! * Get iq_ibz, and symmetries from iq_bz
 657          call qmesh%get_BZ_item(iq_bz,qbz,iq_ibz,isym_q,itim_q)
 658          is_qeq0 = (normv(qbz,Cryst%gmet,'G')<GW_TOLQ0)
 659 
 660          ! Symmetrize em1(omega=0)
 661          call screen_symmetrizer(W,iq_bz,Cryst,Gsph_c,Qmesh,Vcp)
 662          !
 663          ! * Set up table of |q_BZ+G|
 664          if (iq_ibz==1) then
 665            do ig=1,npweps
 666              isg = Gsph_c%rottb(ig,itim_q,isym_q)
 667              vc_sqrt_qbz(isg)=Vcp%vcqlwl_sqrt(ig,1)
 668            end do
 669          else
 670            do ig=1,npweps
 671              isg = Gsph_c%rottb(ig,itim_q,isym_q)
 672              vc_sqrt_qbz(isg) = Vcp%vc_sqrt(ig,iq_ibz)
 673            end do
 674          end if
 675 
 676          ! === Evaluate oscillator matrix elements ===
 677          ! * $ <phj/r|e^{-i(q+G)}|phi/r> - <tphj/r|e^{-i(q+G)}|tphi/r> $ in packed form.
 678          if (Wfd%usepaw==1.and.ik_bz/=ikp_bz) then
 679            ABI_MALLOC(Pwij_q,(Cryst%ntypat))
 680            call pawpwij_init(Pwij_q,npweps,Qmesh%bz(:,iq_bz),Gsph_c%gvec,Cryst%rprimd,Psps,Pawtab,Paw_pwff)
 681          end if
 682 
 683          ! =======================================
 684          ! === Loop over the four band indeces ===
 685          ! =======================================
 686          do ic=bidx(1,2),bidx(2,2) !do ic=BSp%lumo,BSp%nbnds
 687 
 688            ABI_CHECK(wfd%get_wave_ptr(ic, ik_ibz, spin1, wave_ck, msg) == 0, msg)
 689            if (wave_ck%has_ur == WFD_STORED) then
 690              ptur_ck => wave_ck%ur
 691            else
 692              call wfd%get_ur(ic,ik_ibz,spin1,ur_ck)
 693              ptur_ck => ur_ck
 694            end if
 695            !
 696            ! Get cprj for this (c,kbz,s1) in the BZ.
 697            ! phase due to the umklapp G0 in k-q is already included.
 698            if (Wfd%usepaw==1) then
 699              if (wave_ck%has_cprj == WFD_STORED) then
 700                ptcp_ck => wave_ck%cprj
 701              else
 702                call wfd%get_cprj(ic,ik_ibz,spin1,Cryst,Cp_tmp1,sorted=.FALSE.)
 703                ptcp_ck => Cp_tmp1
 704              end if
 705              call paw_symcprj_op(ik_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_ck,Cp_ck)
 706            end if
 707 
 708            do icp=bidx(1,4),bidx(2,4)  !do icp=BSp%lumo,BSp%nbnds
 709              ! Calculate matrix-elements rhxtwg_cpc
 710              !
 711              if (ik_bz==ikp_bz) then ! Already in memory.
 712                rhxtwg_cpc(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,icp,ic,ik_ibz,spin1),Gsph_c)
 713 
 714              else
 715                ! Calculate matrix element from wfr.
 716                ! TODO: change the order of the loops.
 717 
 718                ABI_CHECK(wfd%get_wave_ptr(icp, ikp_ibz, spin2, wave_ckp, msg) == 0, msg)
 719                if (wave_ckp%has_ur == WFD_STORED) then
 720                  ptur_ckp => wave_ckp%ur
 721                else
 722                  call wfd%get_ur(icp,ikp_ibz,spin2,ur_ckp)
 723                  ptur_ckp => ur_ckp
 724                end if
 725 
 726                ! Load cprj for this (c,k,s2) in the BZ.
 727                ! Do not care about umklapp G0 in k-q as the phase is already included.
 728                if (Wfd%usepaw==1) then
 729                  if (wave_ckp%has_cprj == WFD_STORED) then
 730                    ptcp_ckp =>  wave_ckp%cprj
 731                  else
 732                    call wfd%get_cprj(icp,ikp_ibz,spin2,Cryst,Cp_tmp2,sorted=.FALSE.)
 733                    ptcp_ckp =>  Cp_tmp2
 734                  end if
 735                  call paw_symcprj_op(ikp_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_ckp,Cp_ckp)
 736                end if
 737 
 738                call rho_tw_g(nspinor,npweps,nfftot_osc,ndat1,ngfft_osc,map2sphere,use_padfft,igfftg0,gbound,&
 739                  ptur_ckp,itim_kp,ktabr_kp,ph_mkpt,spinrot_kp,&
 740                  ptur_ck ,itim_k ,ktabr_k ,ph_mkt ,spinrot_k ,&
 741                  dim_rtwg,rhxtwg_cpc)
 742 
 743                if (Wfd%usepaw==1) then ! Add PAW onsite contribution.
 744                  call paw_rho_tw_g(npweps,dim_rtwg,nspinor,Cryst%natom,Cryst%ntypat,Cryst%typat,Cryst%xred,Gsph_c%gvec,&
 745                   Cp_ckp,Cp_ck,Pwij_q,rhxtwg_cpc)
 746                end if
 747              end if
 748 
 749              if (BSp%prep_interp) then
 750                aa_cpc = rhxtwg_cpc
 751                aa_cpc(2:) = czero
 752                bb_cpc1 = vc_sqrt_qbz*rhxtwg_cpc
 753                bb_cpc1(1) = czero
 754                bb_cpc2 = rhxtwg_cpc
 755                bb_cpc2(2:) = czero
 756 
 757                if(ik_bz == ikp_bz) then
 758                   ! Enforce orthogonality of the wavefunctions.
 759                   if(icp == ic) then
 760                     aa_cpc(1) = cone
 761                     bb_cpc2(1) = cone
 762                   else
 763                     aa_cpc(1) = czero
 764                     bb_cpc2(1) = czero
 765                   end if
 766                end if
 767 
 768                ! MG TODO: a does not require a call to w0gemv
 769                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,aa_cpc,aa_ctccp)
 770                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,bb_cpc1,bb_ctccp1)
 771                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,bb_cpc2,bb_ctccp2)
 772 
 773                cc_cpc = vc_sqrt_qbz*rhxtwg_cpc
 774                cc_cpc(1) = czero
 775 
 776                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,cc_cpc,cc_ctccp)
 777              end if
 778 
 779              ! Prepare sum_GG' rho_c'c*(G) W_qbz(G,G') rho_v'v(G')
 780              ! First sum on G: sum_G rho_c'c(G) W_qbz*(G,G') (W_qbz conjugated)
 781              rhxtwg_cpc = rhxtwg_cpc * vc_sqrt_qbz
 782              call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,rhxtwg_cpc,ctccp)
 783 
 784              do iv=bidx(1,1),bidx(2,1)    !do iv=BSp%lomo,BSp%homo
 785                it = BSp%vcks2t(iv,ic,ik_bz,spin1); if (it==0) CYCLE ! ir-uv-cutoff
 786                ene_t = BSp%Trans(it,spin1)%en
 787 
 788                ! TODO: use this but change the order of the loops.
 789                ABI_CHECK(wfd%get_wave_ptr(iv, ik_ibz, spin1, wave_vk, msg) == 0, msg)
 790 
 791                if (wave_vk%has_ur == WFD_STORED) then
 792                  ptur_vk => wave_vk%ur
 793                else
 794                  call wfd%get_ur(iv,ik_ibz,spin1,ur_vk)
 795                  ptur_vk => ur_vk
 796                end if
 797                !
 798                ! Load cprj for this (v,k,s1) in the BZ.
 799                ! Do not care about umklapp G0 in k-q as the phase is already included.
 800                if (Wfd%usepaw==1) then
 801                  if (wave_vk%has_cprj == WFD_STORED) then
 802                    ptcp_vk => wave_vk%cprj
 803                  else
 804                    call wfd%get_cprj(iv,ik_ibz,spin1,Cryst,Cp_tmp3,sorted=.FALSE.)
 805                    ptcp_vk => Cp_tmp3
 806                  end if
 807                  call paw_symcprj_op(ik_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_vk,Cp_vk)
 808                end if
 809 
 810                do ivp=bidx(1,3),bidx(2,3) !do ivp=BSp%lomo,BSp%homo
 811 
 812                  if (is_resonant) then
 813                    itp = BSp%vcks2t(ivp,icp,ikp_bz,spin2)
 814                  else ! have to exchange band indeces
 815                    itp = BSp%vcks2t(icp,ivp,ikp_bz,spin2)
 816                  end if
 817 
 818                  if (itp==0) CYCLE ! ir-uv-cutoff
 819 
 820                  ! FIXME Temporary work around, when ikp_bz == ik it might happen that itp<it
 821                  ! should rewrite the loops using contracted k-dependent indeces for bands
 822                  if (itp<it) CYCLE
 823 
 824                  ir = it + itp*(itp-1)/2
 825                  if (ir<t_start(my_rank).or.ir>t_stop(my_rank)) CYCLE
 826 
 827                  ene_tp = BSp%Trans(itp,spin2)%en
 828 
 829                  ! ============================================
 830                  ! === Calculate matrix elements rhxtwg_vpv ===
 831                  ! ============================================
 832                  if (ik_bz==ikp_bz) then
 833                    ! Already in memory.
 834                    rhxtwg_vpv(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,ivp,iv,ik_ibz,spin1),Gsph_c)
 835 
 836                  else
 837 
 838                    ABI_CHECK(wfd%get_wave_ptr(ivp, ikp_ibz, spin2, wave_vkp, msg) == 0, msg)
 839 
 840                    ! Calculate matrix element from wfr.
 841                    if (wave_vkp%has_ur == WFD_STORED) then
 842                      ptur_vkp => wave_vkp%ur
 843                    else
 844                      call wfd%get_ur(ivp,ikp_ibz,spin2,ur_vkp)
 845                      ptur_vkp => ur_vkp
 846                    end if
 847                    !
 848                    ! Load cprj for this (vp,kp,s2) in the BZ.
 849                    ! Do not care about umklapp G0 in k-q as the phase is already included.
 850                    if (Wfd%usepaw==1) then
 851                      if (wave_vkp%has_cprj == WFD_STORED) then
 852                        ptcp_vkp =>  wave_vkp%cprj
 853                      else
 854                        call wfd%get_cprj(ivp,ikp_ibz,spin2,Cryst,Cp_tmp4,sorted=.FALSE.)
 855                        ptcp_vkp => Cp_tmp4
 856                      end if
 857                      call paw_symcprj_op(ikp_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_vkp,Cp_vkp)
 858                    end if
 859 
 860                    call rho_tw_g(nspinor,npweps,nfftot_osc,ndat1,ngfft_osc,map2sphere,use_padfft,igfftg0,gbound,&
 861                      ptur_vkp,itim_kp,ktabr_kp,ph_mkpt,spinrot_kp,&
 862                      ptur_vk ,itim_k ,ktabr_k ,ph_mkt ,spinrot_k ,&
 863                      dim_rtwg,rhxtwg_vpv)
 864 
 865                    if (Wfd%usepaw==1) then ! Add PAW onsite contribution.
 866                      call paw_rho_tw_g(npweps,dim_rtwg,nspinor,Cryst%natom,Cryst%ntypat,Cryst%typat,Cryst%xred,&
 867                        Gsph_c%gvec,Cp_vkp,Cp_vk,Pwij_q,rhxtwg_vpv)
 868                    end if
 869                  end if
 870 
 871                  ! Index in the global Hamiltonian matrix.
 872                  ir = it + itp*(itp-1_i8b)/2
 873 
 874                  if (ir<t_start(my_rank).or.ir>t_stop(my_rank)) then
 875                    write(msg,'(a,3(1x,i0))')" Gonna SIGFAULT, ir, t_start, t_stop ",ir,t_start(my_rank),t_stop(my_rank)
 876                    ABI_ERROR(msg)
 877                  end if
 878                  !ABI_CHECK(itp >= it,"itp < it")
 879 
 880                  if (BSp%prep_interp) then
 881                    ! Save a,b, c coefficients.
 882                    aa_vpv = rhxtwg_vpv
 883                    aa_vpv(2:) = czero
 884                    bb_vpv1 = rhxtwg_vpv
 885                    bb_vpv1(2:) = czero
 886                    bb_vpv2 = vc_sqrt_qbz*rhxtwg_vpv
 887                    bb_vpv2(1) = czero
 888 
 889                    if (ik_bz == ikp_bz) then
 890                       ! Enforce orthogonality of the wavefunctions.
 891                       if (ivp == iv) then
 892                         aa_vpv(1) = cone
 893                         bb_vpv1(1) = cone
 894                       else
 895                         aa_vpv(1) = czero
 896                         bb_vpv1(1) = czero
 897                       end if
 898                    end if
 899 
 900                    cc_vpv = vc_sqrt_qbz*rhxtwg_vpv
 901                    cc_vpv(1) = czero
 902 
 903                    aatmp = -faq * xdotc(npweps,aa_ctccp,1,aa_vpv,1)
 904                    bbtmp = -faq * xdotc(npweps,bb_ctccp1,1,bb_vpv1,1)-faq*xdotc(npweps,bb_ctccp2,1,bb_vpv2,1)
 905                    cctmp = -faq * xdotc(npweps,cc_ctccp,1,cc_vpv,1)
 906 
 907                    acoeffs(ir) = aatmp
 908                    bcoeffs(ir) = bbtmp
 909                    ccoeffs(ir) = cctmp
 910                  end if
 911 
 912                  ! sum_G2 rho_c'c(G) W_qbz(G,G') rho_v'v(G')
 913                  rhxtwg_vpv = vc_sqrt_qbz * rhxtwg_vpv
 914                  http = - faq * xdotc(npweps,ctccp,1,rhxtwg_vpv,1)
 915 
 916                  ! Save result taking into account the symmetry of the matrix.
 917                  ! Note that the diagonal of the resonant block is not forced to be real
 918                  my_bsham(ir) = http
 919 
 920 #ifdef DEV_MG_DEBUG_MODE
 921                  ttp_check(it,itp) = ttp_check(it,itp)+1
 922 #endif
 923                end do !ivp
 924              end do !iv
 925            end do !icp
 926          end do !ic
 927 
 928          ABI_FREE(gbound)
 929 
 930          if (Wfd%usepaw==1.and.ik_bz/=ikp_bz) then ! Free the onsite contribution for this q.
 931            call pawpwij_free(Pwij_q)
 932            ABI_FREE(Pwij_q)
 933          end if
 934 
 935        end do ! ik_bz
 936      end do ! Fat loop over ikp_bz
 937 
 938 #ifdef DEV_MG_DEBUG_MODE
 939      do itp=1,BSp%nreh(block)
 940        do it=1,BSp%nreh(block)
 941         ir = it + itp*(itp-1_i8b)/2
 942          if (itp>=it .and. ttp_check(it,itp) /= 1) then
 943            if (ir>=t_start(my_rank).and.ir<=t_stop(my_rank)) then
 944              write(std_out,*)"WARN: upper triangle is not 1 ",it,itp,ttp_check(it,itp)
 945              write(std_out,*)TRIM(repr_trans(Bsp%Trans(it ,spin1)))
 946              write(std_out,*)TRIM(repr_trans(Bsp%Trans(itp,spin2)))
 947            end if
 948          end if
 949          if (itp< it .and. ttp_check(it,itp) /= 0) then
 950            write(std_out,*)"WARN: then lower triangle is not 0 ",it,itp,ttp_check(it,itp)
 951            write(std_out,*)TRIM(repr_trans(Bsp%Trans(it ,spin1)))
 952            write(std_out,*)TRIM(repr_trans(Bsp%Trans(itp,spin2)))
 953          end if
 954        end do
 955      end do
 956      ierr = SUM(SUM(ttp_check,DIM=2),DIM=1)
 957      if (ierr/=my_hsize) then
 958        write(msg,'(a,2i0)')"ierr/=my_hsize",ierr,my_hsize
 959        ABI_ERROR(msg)
 960      end if
 961      ABI_FREE(ttp_check)
 962 #endif
 963 
 964      ABI_FREE(ctccp)
 965      if(Bsp%prep_interp) then
 966        ABI_FREE(aa_ctccp)
 967        ABI_FREE(bb_ctccp1)
 968        ABI_FREE(bb_ctccp2)
 969        ABI_FREE(cc_ctccp)
 970      end if
 971 
 972      ABI_FREE(vc_sqrt_qbz)
 973      call wrtout(std_out,' Coulomb term completed')
 974 
 975      call timab(682,2,tsec) ! exc_build_ham(Coulomb)
 976    end if ! do_coulomb_term
 977    !
 978    ! =====================
 979    ! === Exchange term ===
 980    ! =====================
 981    ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
 982    ! TODO might used enlarged G-sphere for better convergence.
 983    if (do_exchange_term) then
 984 
 985      !call exc_build_v(spin1,spin2,nsppol,npweps,Bsp,Cryst,Kmesh,Qmesh,Gsph_x,Gsph_c,Vcp,&
 986      ! &  is_resonant,rhxtwg_q0,nproc,my_rank,t_start,t_stop,my_bsham,comm)
 987 
 988      call timab(683,1,tsec) ! exc_build_ham(exchange)
 989 
 990      write(msg,'(a,2i2,a)')" Calculating exchange term for (spin1,spin2) ",spin1,spin2," ..."
 991      call wrtout(std_out, msg)
 992 
 993      ABI_MALLOC(rhotwg1,(npweps))
 994      ABI_MALLOC(rhotwg2,(npweps))
 995 
 996      ngx = Gsph_x%ng
 997      ABI_MALLOC(vc_sqrt_qbz,(ngx))
 998 
 999      ! * Get iq_ibz, and symmetries from iq_bz.
1000      iq_bz = iqbz0 ! q = 0 -> iqbz0
1001      call qmesh%get_BZ_item(iq_bz,qbz,iq_ibz,isym_q,itim_q)
1002 
1003      ! * Set up table of |q(BZ)+G|
1004      if (iq_ibz==1) then
1005        do ig=1,ngx
1006          ISg = Gsph_x%rottb(ig,itim_q,isym_q)
1007          vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
1008        end do
1009      else
1010         ABI_ERROR("iq_ibz should be 1")
1011      end if
1012 
1013      do itp=1,BSp%nreh(block) ! Loop over transition tp = (kp,vp,cp,spin2)
1014 
1015        if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
1016        ene_tp = Bsp%Trans(itp,spin2)%en
1017        ikp_bz = Bsp%Trans(itp,spin2)%k
1018        ivp    = Bsp%Trans(itp,spin2)%v
1019        icp    = Bsp%Trans(itp,spin2)%c
1020 
1021        ikp_ibz = Kmesh%tab (ikp_bz)
1022        isym_kp = Kmesh%tabo(ikp_bz)
1023        itim_kp = (3-Kmesh%tabi(ikp_bz))/2
1024 
1025        if (is_resonant) then
1026          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
1027        else ! Code for coupling block.
1028          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
1029        end if
1030        !
1031        ! Multiply by the Coulomb term.
1032         do ig=2,npweps
1033           rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
1034         end do
1035 
1036        do it=1,itp ! Loop over transition t = (k,v,c,spin1)
1037          ir = it + itp*(itp-1_i8b)/2
1038          if (ir<t_start(my_rank) .or. ir>t_stop(my_rank)) CYCLE
1039 
1040          ene_t = Bsp%Trans(it,spin1)%en
1041          ik_bz = Bsp%Trans(it,spin1)%k
1042          iv    = Bsp%Trans(it,spin1)%v
1043          ic    = Bsp%Trans(it,spin1)%c
1044 
1045          ik_ibz = Kmesh%tab(ik_bz)
1046          isym_k = Kmesh%tabo(ik_bz)
1047          itim_k = (3-Kmesh%tabi(ik_bz))/2
1048          !if (itim_k==2) CYCLE ! time-reversal or not
1049 
1050          rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
1051          !
1052          ! sum over G/=0
1053          ctemp = xdotc(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
1054          ctemp = faq * kx_fact * ctemp
1055 
1056          ! exchange term is non divergent !
1057          if (BSp%prep_interp) then
1058            ccoeffs(ir) = ccoeffs(ir) + ctemp
1059          end if
1060 
1061          my_bsham(ir) = my_bsham(ir) + ctemp
1062        end do !it
1063      end do !itp
1064 
1065      ABI_FREE(rhotwg1)
1066      ABI_FREE(rhotwg2)
1067      ABI_FREE(vc_sqrt_qbz)
1068 
1069      call timab(683,2,tsec) ! exc_build_ham(exchange)
1070    end if ! do_exchange_term
1071    !
1072    ! =====================
1073    ! === Diagonal term ===
1074    ! =====================
1075    if (is_resonant .and. spin1==spin2) then
1076      write(msg,'(a,2i2,a)')" Adding diagonal term for (spin1,spin2) ",spin1,spin2," ..."
1077      call wrtout(std_out, msg)
1078      do it=1,BSp%nreh(block)
1079        ir = it + it*(it-1_i8b)/2
1080        if (ir>=t_start(my_rank) .and. ir<=t_stop(my_rank)) my_bsham(ir) = my_bsham(ir) + Bsp%Trans(it,spin1)%en
1081      end do
1082    end if
1083 
1084    if (.FALSE.) then
1085      dump_unt = get_unit()
1086      msg=' Coupling Hamiltonian matrix elements: '
1087      if (is_resonant) msg=' Reasonant Hamiltonian matrix elements: '
1088      call wrtout(dump_unt, msg)
1089      call wrtout(dump_unt,'    k  v  c  s      k" v" c" s"       H')
1090      do itp=1,BSp%nreh(block)
1091        ikp_bz = Bsp%Trans(itp,spin2)%k
1092        ivp    = Bsp%Trans(itp,spin2)%v
1093        icp    = Bsp%Trans(itp,spin2)%c
1094        do it=1,itp
1095          ik_bz = Bsp%Trans(it,spin1)%k
1096          iv    = Bsp%Trans(it,spin1)%v
1097          ic    = Bsp%Trans(it,spin1)%c
1098          ir = it + itp*(itp-1_i8b)/2
1099          if (ir>=t_start(my_rank).and.ir<=t_stop(my_rank)) then
1100            http = my_bsham(ir)
1101            !if (ABS(http) > tol3) then
1102            write(msg,'(2(i0,1x),2(i5,3i3,3x),2f7.3)')it,itp, ik_bz,iv,ic,spin1, ikp_bz,ivp,icp,spin2, http
1103            call wrtout(dump_unt, msg)
1104            !end if
1105          end if
1106        end do
1107      end do
1108    end if
1109 
1110 !DBYG
1111    if (.False.) then
1112      dump_unt = get_unit()
1113      dump_unt = 999
1114      msg=' Coupling Hamiltonian matrix elements: '
1115      if (is_resonant) msg=' Resonant Hamiltonian matrix elements: '
1116      call wrtout(dump_unt, msg)
1117      call wrtout(dump_unt,'    k v  c  s      k" v" c" s"       H')
1118      do itp=1,BSp%nreh(block)
1119        ikp_bz = Bsp%Trans(itp,spin2)%k
1120        ivp    = Bsp%Trans(itp,spin2)%v
1121        icp    = Bsp%Trans(itp,spin2)%c
1122        do it=1,BSp%nreh(block)
1123          ik_bz = Bsp%Trans(it,spin1)%k
1124          iv    = Bsp%Trans(it,spin1)%v
1125          ic    = Bsp%Trans(it,spin1)%c
1126          if(it > itp) then
1127            ir = itp+it*(it-1_i8b)/2
1128          else
1129            ir = it + itp*(itp-1_i8b)/2
1130          end if
1131          if (ir>=t_start(my_rank).and.ir<=t_stop(my_rank)) then
1132            if(it > itp) then
1133              http = CONJG(my_bsham(ir))
1134              if (BSp%prep_interp) then
1135                aatmp = CONJG(acoeffs(ir))
1136                bbtmp = CONJG(bcoeffs(ir))
1137                cctmp = CONJG(ccoeffs(ir))
1138              end if
1139            else
1140              http = my_bsham(ir)
1141              if (BSp%prep_interp) then
1142                aatmp = acoeffs(ir)
1143                bbtmp = bcoeffs(ir)
1144                cctmp = ccoeffs(ir)
1145              end if
1146            end if
1147            if (it == itp) http = http - Bsp%Trans(it,spin1)%en
1148            !if (ABS(http) > tol3) then
1149            if (BSp%prep_interp) then
1150              write(msg,'(2(i0,1x),2(i5,3i3,3x),2f24.20,2f24.20,2f24.20,2f24.20)')it,itp, ik_bz,iv,ic,spin1, ikp_bz,ivp,icp,&
1151 &   spin2, http, aatmp, bbtmp, cctmp
1152            else
1153              write(msg,'(2(i0,1x),2(i5,3i3,3x),2f24.20)')it,itp, ik_bz,iv,ic,spin1, ikp_bz,ivp,icp,spin2, http
1154            end if
1155            call wrtout(dump_unt, msg)
1156            !end if
1157          end if
1158        end do
1159      end do
1160    end if
1161 
1162    call timab(684,1,tsec) ! exc_build_ham(synchro)
1163    call xmpi_barrier(comm)
1164    call timab(684,2,tsec) ! exc_build_ham(synchro)
1165    !
1166    ! =================================
1167    ! === Write Hamiltonian on disk ===
1168    ! =================================
1169    call timab(685,1,tsec) ! exc_build_ham(write_ham)
1170    if (use_mpiio) then
1171 #ifdef HAVE_MPI_IO
1172      ! Write the Hamiltonian with collective MPI-IO.
1173      if (BSp%prep_interp) then
1174        ABI_ERROR("Preparation of interpolation technique not yet coded with MPI-IO")
1175      end if
1176      ABI_CHECK(nsppol==1,"nsppol==2 not coded, offset is wrong")
1177      !
1178      old_type = MPI_DOUBLE_COMPLEX
1179      call xmpio_create_fherm_packed(my_starts,my_ends,is_fortran_file,my_offset,old_type,hmat_type,offset_err)
1180 
1181      if (offset_err/=0) then
1182        write(msg,"(3a)")&
1183 &        "Global position index cannot be stored in a standard Fortran integer. ",ch10,&
1184 &        "BSE matrix cannot be written with a single MPI-IO call. "
1185        ABI_ERROR(msg)
1186      end if
1187      !
1188      ! Each node uses a different offset to skip the header and the blocks written by the other CPUs.
1189      my_offset = offset_of_block(block) + my_offset
1190 
1191      call MPI_FILE_SET_VIEW(mpi_fh, my_offset, MPI_BYTE, hmat_type, 'native', MPI_INFO_NULL, mpi_err)
1192      ABI_CHECK_MPI(mpi_err,"SET_VIEW")
1193 
1194      call MPI_TYPE_FREE(hmat_type,mpi_err)
1195      ABI_CHECK_MPI(mpi_err,"MPI_TYPE_FREE")
1196 
1197      if (hsize_of(my_rank) /= INT(hsize_of(my_rank),kind=i4b) ) then
1198        ABI_ERROR("Wraparound error")
1199      end if
1200 
1201      tmp_size = INT(hsize_of(my_rank))
1202      call MPI_FILE_WRITE_ALL(mpi_fh, my_bsham, tmp_size, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, mpi_err)
1203      ABI_CHECK_MPI(mpi_err,"FILE_WRITE")
1204 
1205      ! It seems that personal calls in make the code stuck
1206      !if (is_fortran_file .and. my_rank==master) then ! Master writes the Fortran record markers.
1207      ! Write the Fortran record markers.
1208      neh2=BSp%nreh(block)
1209      ABI_MALLOC(bsize_frecord,(neh2))
1210      bsize_frecord = (/(col_glob * xmpi_bsize_dpc, col_glob=1,neh2)/)
1211      ! ehdr_offset points to the end of the header.
1212      !call xmpio_write_frmarkers(mpi_fh,ehdr_offset,xmpio_collective,neh2,bsize_frecord,mpi_err)
1213      my_offset = offset_of_block(block)
1214      call xmpio_write_frmarkers(mpi_fh,my_offset,xmpio_collective,neh2,bsize_frecord,ierr)
1215      ABI_CHECK(ierr==0,"Error while writing Fortran markers")
1216      ABI_FREE(bsize_frecord)
1217 #else
1218      ABI_BUG("You should not be here!")
1219 #endif
1220    else
1221      ! Use FORTRAN IO with sequential access mode.
1222      ! * Each node sends its data to master node.
1223      ! * Blocks are distributed according to the rank of the node.
1224      ! * Matrix is written by columns hence make sure that the last column is completely written.
1225      call cwtime(cputime,walltime,gflops,"start")
1226 
1227      if (my_rank==master) then
1228        prev_nrows=0; if (my_cols(2) /= my_rows(2)) prev_nrows = my_rows(2)
1229        ncol = my_cols(2)-my_cols(1)+1
1230        ist=1
1231        do jj=1,ncol
1232          col_glob = my_starts(2) + jj - 1
1233          nrows = col_glob; if (jj==ncol) nrows=my_rows(2)
1234          iend = ist + nrows -1
1235          write(bsh_unt) my_bsham(ist:iend)
1236          if (BSp%prep_interp) then
1237            write(a_unt) acoeffs(ist:iend)
1238            write(b_unt) bcoeffs(ist:iend)
1239            write(c_unt) ccoeffs(ist:iend)
1240          end if
1241          ist=iend+1
1242        end do
1243        write(msg,'(2(a,i0))')" Wraparound error: iend=",iend," my_hsize=",hsize_of(my_rank)
1244        ABI_CHECK(iend==hsize_of(my_rank),msg)
1245        ABI_FREE(my_bsham)
1246        if (BSp%prep_interp) then
1247          ABI_FREE(acoeffs)
1248          ABI_FREE(bcoeffs)
1249          ABI_FREE(ccoeffs)
1250        end if
1251      end if
1252 
1253      call xmpi_barrier(comm)
1254      !
1255      ! Collect data from the other nodes.
1256      do sender=1,nproc-1
1257        ! If I'm not involved, jump to the end of the loop and wait there (sequential IO? Of course!)
1258        if (all(my_rank /= [sender, master])) goto 100
1259 
1260        if (my_rank==master)  then
1261          ABI_MALLOC(buffer,(hsize_of(sender)))
1262          if (BSp%prep_interp) then
1263            ABI_MALLOC(abuffer,(hsize_of(sender)))
1264            ABI_MALLOC(bbuffer,(hsize_of(sender)))
1265            ABI_MALLOC(cbuffer,(hsize_of(sender)))
1266          end if
1267        end if
1268        tmp_size = INT(hsize_of(sender),kind=i4b)
1269        call xmpi_exch(my_bsham,tmp_size,sender,buffer,master,comm,10*block+1,mpi_err)
1270        if (BSp%prep_interp) then
1271          call xmpi_exch(acoeffs,tmp_size,sender,abuffer,master,comm,10*block+2,mpi_err)
1272          call xmpi_exch(bcoeffs,tmp_size,sender,bbuffer,master,comm,10*block+3,mpi_err)
1273          call xmpi_exch(ccoeffs,tmp_size,sender,cbuffer,master,comm,10*block+4,mpi_err)
1274        end if
1275 
1276        ! TODO Be careful with the MPI TAG here, add optional Arguments in xmpi_exch so that the TAG can be specified!
1277        proc_start = (/my_rows(1),my_cols(1)/)
1278        proc_end   = (/my_rows(2),my_cols(2)/)
1279        my_extrema(:,1) = proc_start
1280        my_extrema(:,2) = proc_end
1281 
1282        sender_extrema = my_extrema ! just to avoid NAN on sender. xechh_mpi is not well designed
1283        call xmpi_exch(my_extrema,4,sender,sender_extrema,master,comm,10*block+5,mpi_err)
1284 
1285        if (my_rank==master) then
1286           proc_start = sender_extrema(:,1)
1287           proc_end   = sender_extrema(:,2)
1288           !write(std_out,*)"proc_start, proc_end",proc_start,proc_end
1289 
1290          if (prev_nrows>0) then ! backspace the file if the last record written was not complete.
1291            !write(std_out,*)" master node had to call backspace"
1292            backspace(bsh_unt)
1293            ABI_MALLOC(prev_col,(prev_nrows))
1294            read(bsh_unt) prev_col
1295            backspace(bsh_unt)
1296 
1297            if (BSp%prep_interp) then
1298              backspace(a_unt)
1299              ABI_MALLOC(aprev_col,(prev_nrows))
1300              read(a_unt) aprev_col
1301              backspace(a_unt)
1302 
1303              backspace(b_unt)
1304              ABI_MALLOC(bprev_col,(prev_nrows))
1305              read(b_unt) bprev_col
1306              backspace(b_unt)
1307 
1308              backspace(c_unt)
1309              ABI_MALLOC(cprev_col,(prev_nrows))
1310              read(c_unt) cprev_col
1311              backspace(c_unt)
1312            end if
1313          end if
1314          !
1315          ! Write the columns owned by sender.
1316          ncol = proc_end(2)-proc_start(2)+1
1317          ist=1
1318          do jj=1,ncol
1319            col_glob = proc_start(2) + jj-1
1320            nrows = col_glob
1321            if (jj==1   )  nrows=col_glob - proc_start(1) + 1
1322            if (jj==ncol) then
1323              nrows=proc_end(1)
1324              if (ncol==1)  nrows=proc_end(1) - proc_start(1) + 1
1325            end if
1326            iend = ist + nrows -1
1327            !write(std_out,*)"Using nrows, ist, iend=",nrows,ist,iend
1328            if (jj==1 .and. prev_nrows>0) then ! join prev_col and this subcolumn.
1329              write(bsh_unt) CMPLX(prev_col,kind=dpc),CMPLX(buffer(ist:iend),kind=dpc)
1330              if (BSp%prep_interp) then
1331                write(a_unt) CMPLX(aprev_col,kind=dpc),CMPLX(abuffer(ist:iend),kind=dpc)
1332                write(b_unt) CMPLX(bprev_col,kind=dpc),CMPLX(bbuffer(ist:iend),kind=dpc)
1333                write(c_unt) CMPLX(cprev_col,kind=dpc),CMPLX(cbuffer(ist:iend),kind=dpc)
1334              end if
1335              prev_nrows = prev_nrows + iend-ist+1
1336            else
1337              write(bsh_unt) CMPLX(buffer(ist:iend),kind=dpc)
1338              if (BSp%prep_interp) then
1339                write(a_unt) CMPLX(abuffer(ist:iend),kind=dpc)
1340                write(b_unt) CMPLX(bbuffer(ist:iend),kind=dpc)
1341                write(c_unt) CMPLX(cbuffer(ist:iend),kind=dpc)
1342              end if
1343              prev_nrows=0
1344            end if
1345            ist=iend+1
1346          end do
1347          if (ncol>1) then ! Reset prev_nrows if a new column has begun.
1348            prev_nrows = proc_end(1)
1349            if (proc_end(1) == proc_end(2)) prev_nrows = 0
1350          end if
1351          if (iend/=hsize_of(sender)) then
1352            write(msg,'(2(a,i0))')" Wraparound error: iend=",iend," my_hsize=",hsize_of(sender)
1353            ABI_ERROR(msg)
1354          end if
1355          ABI_SFREE(prev_col)
1356          if (BSp%prep_interp) then
1357            ABI_SFREE(aprev_col)
1358            ABI_SFREE(bprev_col)
1359            ABI_SFREE(cprev_col)
1360          end if
1361          ABI_FREE(buffer)
1362          if (BSp%prep_interp) then
1363            ABI_FREE(abuffer)
1364            ABI_FREE(bbuffer)
1365            ABI_FREE(cbuffer)
1366          end if
1367        end if ! master
1368        !
1369 100    call xmpi_barrier(comm)
1370      end do ! sender
1371 
1372      call cwtime(cputime,walltime,gflops,"stop")
1373      write(msg,'(2(a,f9.1),a)')" Fortran-IO completed. cpu_time: ",cputime,"[s], walltime: ",walltime," [s]"
1374      call wrtout(std_out, msg, do_flush=.True.)
1375    end if ! use_mpiio
1376    call timab(685,2,tsec) ! exc_build_ham(write_ham)
1377    !
1378    ABI_SFREE(my_bsham)
1379    if (BSp%prep_interp) then
1380      ABI_SFREE(acoeffs)
1381      ABI_SFREE(bcoeffs)
1382      ABI_SFREE(ccoeffs)
1383    end if
1384    ABI_FREE(t_start)
1385    ABI_FREE(t_stop)
1386    ABI_FREE(hsize_of)
1387  end do ! block
1388  !
1389  ! ===========================================
1390  ! === Exchange term for spin_up spin_down ===
1391  ! ===========================================
1392 
1393  if (nsppol==2) then
1394    call timab(686,2,tsec) ! exc_build_ham(exch.spin)
1395    block=3
1396    neh1=BSp%nreh(1)
1397    neh2=BSp%nreh(2)
1398    !
1399    ! The oscillators at q=0 are available on each node for both spin.
1400    ! Here the calculation of the block is parallelized over columns.
1401    ABI_MALLOC(col_start,(0:nproc-1))
1402    ABI_MALLOC(col_stop,(0:nproc-1))
1403    call xmpi_split_work2_i4b(neh2,nproc,col_start,col_stop)
1404 
1405    my_cols(1) = col_start(my_rank)
1406    my_cols(2) = col_stop (my_rank)
1407    if (my_cols(2)-my_cols(1)<=0) then
1408      ABI_ERROR("One of the processors has zero columns!")
1409    end if
1410 
1411    ABI_MALLOC(ncols_of,(0:nproc-1))
1412    ncols_of=0
1413    do rank=0,nproc-1
1414      if (col_stop(rank)>=col_start(rank)) ncols_of(rank) = col_stop(rank)-col_start(rank)+1
1415    end do
1416 
1417    ABI_FREE(col_start)
1418    ABI_FREE(col_stop)
1419    !
1420    ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
1421    ! TODO might used enlarged G-sphere for better convergence.
1422    ! Note that my_kxssp is always written on file when nsppol=2, even when
1423    ! non-local field effects are neglected.
1424    ABI_MALLOC(my_kxssp,(neh1,my_cols(1):my_cols(2)))
1425    my_kxssp=czero
1426 
1427    if (do_exchange_term) then
1428      spin1=1; spin2=2
1429      write(msg,'(a,2i2,a)')" Calculating exchange term for (spin1,spin2) ",spin1,spin2," ..."
1430      call wrtout(std_out, msg)
1431 
1432      ABI_MALLOC(rhotwg1,(npweps))
1433      ABI_MALLOC(rhotwg2,(npweps))
1434 
1435      ngx = Gsph_x%ng
1436      ABI_MALLOC(vc_sqrt_qbz,(ngx))
1437      !
1438      ! * Get iq_ibz, and symmetries from iq_bz.
1439      iq_bz = iqbz0 ! q = 0 -> iqbz0
1440      call qmesh%get_BZ_item(iq_bz,qbz,iq_ibz,isym_q,itim_q)
1441      !
1442      ! * Set up table of |q(BZ)+G|
1443      if (iq_ibz==1) then
1444        do ig=1,ngx
1445          ISg = Gsph_x%rottb(ig,itim_q,isym_q)
1446          vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
1447        end do
1448      else
1449         ABI_ERROR("iq_ibz should be 1")
1450      end if
1451 
1452      do itp=1,neh2 ! Loop over transition tp = (kp,vp,cp,spin2)
1453 
1454        if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
1455        ene_tp = Bsp%Trans(itp,spin2)%en
1456        ikp_bz = Bsp%Trans(itp,spin2)%k
1457        ivp    = Bsp%Trans(itp,spin2)%v
1458        icp    = Bsp%Trans(itp,spin2)%c
1459 
1460        ikp_ibz = Kmesh%tab (ikp_bz)
1461        isym_kp = Kmesh%tabo(ikp_bz)
1462        itim_kp = (3-Kmesh%tabi(ikp_bz))/2
1463 
1464        if (is_resonant) then
1465          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
1466        else ! Code for coupling block.
1467          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
1468        end if
1469        !
1470        ! Multiply by the Coulomb term.
1471         do ig=2,npweps
1472           rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
1473         end do
1474 
1475        do it=1,neh1 ! Loop over transition t = (k,v,c,spin1) FULL matrix.
1476 
1477          ene_t = Bsp%Trans(it,spin1)%en
1478          ik_bz = Bsp%Trans(it,spin1)%k
1479          iv    = Bsp%Trans(it,spin1)%v
1480          ic    = Bsp%Trans(it,spin1)%c
1481 
1482          ik_ibz = Kmesh%tab(ik_bz)
1483          isym_k = Kmesh%tabo(ik_bz)
1484          itim_k = (3-Kmesh%tabi(ik_bz))/2
1485          !if (itim_k==2) CYCLE ! time-reversal or not
1486 
1487          rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
1488          !
1489          ! sum over G/=0
1490          ctemp = XDOTC(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
1491          ctemp = faq * kx_fact * ctemp
1492 
1493          my_kxssp(it,itp) = ctemp
1494        end do !it
1495      end do !itp
1496 
1497      ABI_FREE(rhotwg1)
1498      ABI_FREE(rhotwg2)
1499      ABI_FREE(vc_sqrt_qbz)
1500    end if ! do_exchange_term
1501    call timab(686,2,tsec) ! exc_build_ham(exch.spin)
1502    !
1503    ! =====================================
1504    ! === Write the Hamiltonian on disk ===
1505    ! =====================================
1506    call timab(685,1,tsec) ! exc_build_ham(write_ham)
1507 
1508    if (use_mpiio) then
1509 #ifdef HAVE_MPI_IO
1510      my_ncols=ncols_of(my_rank); old_type=MPI_DOUBLE_COMPLEX
1511      call xmpio_create_fsubarray_2D((/neh1,my_ncols/),(/neh1,my_ncols/),(/1,1/),old_type,hmat_type,my_offpad,mpi_err)
1512      ABI_CHECK_MPI(mpi_err,"fsubarray_2D")
1513      !
1514      ! Each node uses a different offset to skip the header and the blocks written by the other CPUs.
1515      prev_nels=0
1516      prev_ncols=0
1517      if (my_rank>0) then
1518        prev_ncols = SUM(ncols_of(0:my_rank-1))
1519        prev_nels = neh1*prev_ncols
1520      end if
1521      tmp_off = prev_nels*xmpi_bsize_dpc + prev_ncols*2*xmpio_bsize_frm
1522 
1523      my_offset = offset_of_block(block) + tmp_off + my_offpad
1524 
1525      call MPI_FILE_SET_VIEW(mpi_fh, my_offset, MPI_BYTE, hmat_type, 'native', MPI_INFO_NULL, mpi_err)
1526      ABI_CHECK_MPI(mpi_err,"SET_VIEW")
1527 
1528      call MPI_TYPE_FREE(hmat_type,mpi_err)
1529      ABI_CHECK_MPI(mpi_err,"MPI_TYPE_FREE")
1530 
1531      tmp_size = INT(neh1*my_ncols)
1532      call MPI_FILE_WRITE_ALL(mpi_fh, my_kxssp,tmp_size, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, mpi_err)
1533      ABI_CHECK_MPI(mpi_err,"FILE_WRITE")
1534 
1535      ! It seems that personal calls in make the code stuck
1536      ! Master writes the Fortran record markers.
1537      ABI_MALLOC(bsize_frecord,(neh2))
1538      bsize_frecord = neh1 * xmpi_bsize_dpc
1539      ! ehdr_offset points to the end of the header.
1540      !call xmpio_write_frmarkers(mpi_fh,ehdr_offset,xmpio_collective,neh2,bsize_frecord,mpi_err)
1541      my_offset = offset_of_block(block)
1542      call xmpio_write_frmarkers(mpi_fh,my_offset,xmpio_collective,neh2,bsize_frecord,ierr)
1543      ABI_CHECK(ierr==0,"Error while writing Fortran markers")
1544      ABI_FREE(bsize_frecord)
1545 #else
1546      ABI_BUG("You should not be here")
1547 #endif
1548    else
1549      ! Use FORTRAN IO with sequential access mode.
1550      ! * Each node sends its data to master node.
1551      ! * Columns are distributed according to the rank of the node.
1552      if (my_rank==master) then
1553        do jj=my_cols(1),my_cols(2)
1554          write(bsh_unt) my_kxssp(:,jj)
1555        end do
1556        ABI_FREE(my_kxssp)
1557      end if
1558 
1559      call xmpi_barrier(comm)
1560      !
1561      ! Collect data from the other nodes.
1562      do sender=1,nproc-1
1563        ! If I'm not involved, jump to the end of the loop and wait there (sequential IO? Of course!)
1564        if (all(my_rank /= [sender, master])) goto 200
1565 
1566        if (my_rank==master)  then
1567          ABI_MALLOC(buffer_2d,(neh1,ncols_of(sender)))
1568        end if
1569        call xmpi_exch(my_kxssp,neh1*ncols_of(sender),sender,buffer_2d,master,comm,5,mpi_err)
1570        !
1571        if (my_rank==master) then ! Write the columns owned by sender.
1572          do jj=1,ncols_of(sender)
1573            write(bsh_unt) buffer_2d(:,jj)
1574          end do
1575          ABI_FREE(buffer_2d)
1576        end if ! master
1577        !
1578 200    call xmpi_barrier(comm)
1579      end do ! sender
1580    end if
1581    call timab(685,2,tsec) ! exc_build_ham(write_ham)
1582 
1583    ABI_FREE(ncols_of)
1584    ABI_SFREE(my_kxssp)
1585  end if
1586 
1587  ! Close the file.
1588  if (use_mpiio) then
1589 #ifdef HAVE_MPI_IO
1590    call MPI_FILE_CLOSE(mpi_fh, mpi_err)
1591    ABI_CHECK_MPI(mpi_err,"FILE_CLOSE")
1592    ABI_FREE(offset_of_block)
1593 #endif
1594  end if
1595 
1596  ! master closes the Fortran files.
1597  if (my_rank==master) then
1598    close(bsh_unt)
1599    if (BSp%prep_interp) then
1600      close(a_unt)
1601      close(b_unt)
1602      close(c_unt)
1603    end if
1604  end if
1605 
1606  ! Free memory.
1607  ABI_FREE(igfftg0)
1608  ABI_FREE(ktabr_k)
1609  ABI_FREE(id_tab)
1610  ABI_FREE(ktabr_kp)
1611  ABI_FREE(rhxtwg_vpv)
1612  ABI_FREE(rhxtwg_cpc)
1613  if (BSp%prep_interp) then
1614    ABI_FREE(aa_vpv)
1615    ABI_FREE(bb_vpv1)
1616    ABI_FREE(bb_vpv2)
1617    ABI_FREE(cc_vpv)
1618    ABI_FREE(aa_cpc)
1619    ABI_FREE(bb_cpc1)
1620    ABI_FREE(bb_cpc2)
1621    ABI_FREE(cc_cpc)
1622  end if
1623  ABI_FREE(ur_ckp)
1624  ABI_FREE(ur_vkp)
1625  ABI_FREE(ur_vk)
1626  ABI_FREE(ur_ck)
1627 
1628  ! Deallocation for PAW.
1629  if (Wfd%usepaw==1) then
1630    call pawcprj_free(Cp_vk)
1631    ABI_FREE(Cp_vk)
1632    call pawcprj_free(Cp_ck)
1633    ABI_FREE(Cp_ck)
1634    call pawcprj_free(Cp_ckp)
1635    ABI_FREE(Cp_ckp)
1636    call pawcprj_free(Cp_vkp)
1637    ABI_FREE(Cp_vkp)
1638    call pawcprj_free(Cp_tmp1)
1639    ABI_FREE(Cp_tmp1)
1640    call pawcprj_free(Cp_tmp2)
1641    ABI_FREE(Cp_tmp2)
1642    call pawcprj_free(Cp_tmp3)
1643    ABI_FREE(Cp_tmp3)
1644    call pawcprj_free(Cp_tmp4)
1645    ABI_FREE(Cp_tmp4)
1646  end if
1647 
1648  call xmpi_barrier(comm)
1649 
1650  DBG_EXIT("COLL")
1651 
1652  call timab(680,2,tsec)
1653 
1654 end subroutine exc_build_block

m_exc_build/exc_build_ham [ Functions ]

[ Top ] [ m_exc_build ] [ Functions ]

NAME

  exc_build_ham

FUNCTION

  Calculate and write the excitonic Hamiltonian on an external binary file (Fortran file open
  in random mode) for subsequent treatment in the Bethe-Salpeter code.

INPUTS

  BSp<excparam>=The parameters for the Bethe-Salpeter calculation.
  BS_files<excfiles>=File names internally used in the BS code.
  Cryst<crystal_t>=Info on the crystalline structure.
  Kmesh<kmesh_t>=The list of k-points in the BZ, IBZ and symmetry tables.
  Qmesh<kmesh_t>=The list of q-points for epsilon^{-1} and related symmetry tables.
  ktabr(nfftot_osc,BSp%nkbz)=The FFT index of $(R^{-1}(r-\tau))$ where R is symmetry needed to obtains
    the k-points from the irreducible image.  Used to symmetrize u_Sk where S = \transpose R^{-1}
  Gsph_x<gsphere_t>=Info on the G-sphere used to describe wavefunctions and W (the largest one is actually stored).
  Gsph_c<gsphere_t>=Info on the G-sphere used to describe the correlation part.
  Vcp<vcoul_t>=The Coulomb interaction in reciprocal space. A cutoff can be used
  W<screen_t>=Data type gathering info and data for W.
  nfftot_osc=Total Number of FFT points used for the oscillator matrix elements.
  ngfft_osc(18)=Info on the FFT algorithm used to calculate the oscillator matrix elements.
  Psps<Pseudopotential_type>=Variables related to pseudopotentials
  Pawtab(Psps%ntypat)<pawtab_type>=PAW tabulated starting data.
  Pawang<pawang_type>=PAW angular mesh and related data.
  Paw_pwff(Cryst%ntypat*Wfd%usepaw)<pawpwff_t>=Form factor used to calculate the onsite mat. elements of a plane wave.
  Wfd<wfdgw_t>=Handler for the wavefunctions.

OUTPUT

  The excitonic Hamiltonian is saved on an external binary file (see below).

SOURCE

2094 subroutine exc_build_ham(BSp,BS_files,Cryst,Kmesh,Qmesh,ktabr,Gsph_x,Gsph_c,Vcp,&
2095 & Wfd,W,Hdr_bse,nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff)
2096 
2097 !Arguments ------------------------------------
2098 !scalars
2099  integer,intent(in) :: nfftot_osc
2100  type(excparam),intent(in) :: BSp
2101  type(excfiles),intent(in) :: BS_files
2102  type(screen_t),intent(inout) :: W
2103  type(kmesh_t),intent(in) :: Kmesh,Qmesh
2104  type(crystal_t),intent(in) :: Cryst
2105  type(vcoul_t),intent(in) :: Vcp
2106  type(gsphere_t),intent(in) :: Gsph_x,Gsph_c
2107  type(Pseudopotential_type),intent(in) :: Psps
2108  type(Hdr_type),intent(inout) :: Hdr_bse
2109  type(pawang_type),intent(in) :: Pawang
2110  type(wfdgw_t),target,intent(inout) :: Wfd
2111 !arrays
2112  integer,intent(in) :: ngfft_osc(18)
2113  integer,intent(in) :: ktabr(nfftot_osc,Kmesh%nbz)
2114  type(Pawtab_type),intent(in) :: Pawtab(Psps%ntypat*Wfd%usepaw)
2115  type(pawpwff_t),intent(in) :: Paw_pwff(Psps%ntypat*Wfd%usepaw)
2116 
2117 !Local variables ------------------------------
2118 !scalars
2119  logical :: do_resonant,do_coupling
2120  !character(len=500) :: msg
2121 !arrays
2122  real(dp) :: tsec(2)
2123  complex(gwpc),allocatable :: all_mgq0(:,:,:,:,:)
2124 
2125 !************************************************************************
2126 
2127  call timab(670,1,tsec)
2128 
2129  ABI_CHECK(Wfd%nspinor==1,"nspinor==2 not coded")
2130  ABI_CHECK(nfftot_osc==PRODUCT(ngfft_osc(1:3)),"mismatch in FFT size")
2131 
2132  if (BSp%have_complex_ene) then
2133    ABI_ERROR("Complex energies are not supported yet")
2134  end if
2135 
2136  ! Do we have to compute some block?
2137  do_resonant = (BS_files%in_hreso == BSE_NOFILE)
2138  do_coupling = (BS_files%in_hcoup == BSE_NOFILE)
2139 
2140  if (BSp%use_coupling == 0) then
2141    if (.not.do_resonant) then
2142      call wrtout(std_out,"Will skip the calculation of resonant block (will use BSR file)")
2143      goto 100
2144    end if
2145  else
2146    if (.not. do_resonant .and. .not. do_coupling) then
2147      call wrtout(std_out,"Will skip the calculation of both resonant and coupling block (will use BSR and BSC files)")
2148      goto 100
2149    end if
2150  end if
2151 
2152  ! Compute M_{k,q=0}^{b,b}(G) for all k-points in the IBZ and each pair b, b'
2153  ! used for the exchange part and part of the Coulomb term.
2154  call wrtout(std_out," Calculating all matrix elements for q=0 to save CPU time")
2155 
2156  call wfd_all_mgq0(Wfd,Cryst,Qmesh,Gsph_x,Vcp,Psps,Pawtab,Paw_pwff,&
2157 &  Bsp%lomo_spin,Bsp%homo_spin,Bsp%humo_spin,nfftot_osc,ngfft_osc,Bsp%npweps,all_mgq0)
2158 
2159  ! ========================
2160  ! ==== Resonant Block ====
2161  ! ========================
2162  if (do_resonant) then
2163    call timab(672,1,tsec)
2164    call exc_build_block(BSp,Cryst,Kmesh,Qmesh,ktabr,Gsph_x,Gsph_c,Vcp,&
2165 &    Wfd,W,Hdr_bse,nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff,all_mgq0,.TRUE.,BS_files%out_hreso)
2166    call timab(672,2,tsec)
2167  end if
2168 
2169  ! ========================
2170  ! ==== Coupling Block ====
2171  ! ========================
2172  if (do_coupling.and.BSp%use_coupling>0) then
2173    call timab(673,1,tsec)
2174    call exc_build_block(BSp,Cryst,Kmesh,Qmesh,ktabr,Gsph_x,Gsph_c,Vcp,&
2175 &    Wfd,W,Hdr_bse,nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff,all_mgq0,.FALSE.,BS_files%out_hcoup)
2176    call timab(673,2,tsec)
2177  end if
2178 
2179  ! Free memory.
2180  ABI_FREE(all_mgq0)
2181 
2182 100 call timab(670,2,tsec)
2183 
2184 end subroutine exc_build_ham

m_exc_build/exc_build_v [ Functions ]

[ Top ] [ m_exc_build ] [ Functions ]

NAME

  exc_build_v

FUNCTION

  Calculate and write the excitonic Hamiltonian on an external binary file (Fortran file open
  in random mode) for subsequent treatment in the Bethe-Salpeter code.

INPUTS

  BSp<excparam>=The parameters for the Bethe-Salpeter calculation.
  Cryst<crystal_t>=Info on the crystalline structure.
  Kmesh<kmesh_t>=The list of k-points in the BZ, IBZ and symmetry tables.
  Qmesh<kmesh_t>=The list of q-points for epsilon^{-1} and related symmetry tables.
  Gsph_x<gsphere_t>=Info on the G-sphere used to describe wavefunctions and W (the largest one is actually stored).
  Gsph_c<gsphere_t>=Info on the G-sphere used to describe the correlation part.
  Vcp<vcoul_t>=The Coulomb interaction in reciprocal space. A cutoff can be used
  rhxtwg_q0
  is_resonant
  comm=MPI communicator.

OUTPUT

NOTES

  *) Version for K_V = K_C (q=0), thus KP_V = KP_C
  *) No exchange limit: use DFT energies in case.
  *) Symmetry of H(-k-k') = H*(k k') not used.
  *) Coulomb term can be approximated as diagonal in G.
  *) Valence bands treated from lomo on.
  *) Symmetries of the sub-blocks are used to reduce the number of elements to calculate.

            ____________
           |_(cv)__(vc)_|
   H_exc = |  R      C  |
           | -C*    -R* |

   where C is symmetric and R is Hermitian provided that the QP energies are real.

  For nsppol=1 ==> R = diag-W+2v; C = -W+2v
  since the Hamiltonian can be diagonalized in the spin-singlet basis set thanks to
  the fact that spin triplet does not contribute to the optical limit of epsilon.

  For nsppol=2 ==> R = diag-W+v; C = -W+v
  Now the matrix elements depend on the spin of the transitions but only those
  transitions in which the spin of the electron and of the hole are equal contribute
  to the macroscopic dielectric function. Moreover only the exchange term can connect transitions of different spin.
  When nsppol==2 the transitions are ordered using | (cv up) | (cv dwn) | (vc up) | (vc down) |

  The resonant block is given by:

      |  (v'c' up)       | (v'c' dwn)   |
      -----------------------------------           where v_{-+} = v_{+-}^H when the momentum of the photon is neglected.
      | [diag-W+v]++     |      v+-     | (vc up)   Note that v_{+-} is not Hermitian due to the presence of different spins.
  R = -----------------------------------           Actually it reduces to a Hermitian matrix when the system is not spin polarized.
      |     v-+          | [diag-W+v]-- | (vc dwn)  but in this case one should use nsppol=1.
      -----------------------------------           As a consequence the entire matrix is calculated and stored on file.

  The coupling block is given by:

      |  (c'v' up)   |    (c'v dwn)     |
      -----------------------------------           where v_{-+} = v_{+-}^t when the momentum of the photon is neglected.
      | [-W+v]++     |      v+-         | (vc up)   Also in this case the entire matrix v_{+-} has to be calculated
  C = -----------------------------------           and stored on file.
      |     v-+      |    [-W+v]--      | (vc dwn)
      -----------------------------------

SOURCE

1724 subroutine exc_build_v(spin1,spin2,nsppol,npweps,Bsp,Cryst,Kmesh,Qmesh,Gsph_x,Gsph_c,Vcp,&
1725 &  is_resonant,rhxtwg_q0,nproc,my_rank,t_start,t_stop,my_bsham)
1726 
1727 !Arguments ------------------------------------
1728 !scalars
1729  integer,intent(in) :: spin1,spin2,nsppol,npweps,nproc,my_rank
1730  logical,intent(in) :: is_resonant
1731  type(excparam),intent(in) :: BSp
1732  type(kmesh_t),intent(in) :: Kmesh,Qmesh
1733  type(crystal_t),intent(in) :: Cryst
1734  type(vcoul_t),intent(in) :: Vcp
1735  type(gsphere_t),intent(in) :: Gsph_x,Gsph_c
1736 !arrays
1737  integer(i8b),intent(in) :: t_start(0:nproc-1),t_stop(0:nproc-1)
1738  complex(gwpc),intent(in) :: rhxtwg_q0(npweps,BSp%lomo_min:BSp%humo_max,BSp%lomo_min:BSp%humo_max,Kmesh%nibz,nsppol)
1739  complex(dpc),intent(inout) :: my_bsham(t_start(my_rank):t_stop(my_rank))
1740 
1741 !Local variables ------------------------------
1742 !scalars
1743  integer :: ISg,ngx,ik_bz,ikp_bz,dim_rtwg
1744  integer :: neh1,neh2,ig,nblocks
1745  integer :: ik_ibz,itim_k,ikp_ibz,itim_kp,isym_k,isym_kp
1746  integer :: iq_bz,iq_ibz,isym_q,itim_q,iqbz0,rank
1747  integer :: iv,ivp,ic,icp
1748  integer :: block
1749  integer(i8b) :: tot_nels,ir,it,itp
1750  real(dp) :: faq,kx_fact
1751  complex(spc) :: ctemp
1752  character(len=500) :: msg
1753 !arrays
1754  integer :: bidx(2,4),spin_ids(2,3)
1755  integer(i8b) :: nels_block(3)
1756  integer :: my_cols(2),my_rows(2) !,proc_end(2),proc_start(2)
1757  integer,allocatable :: ncols_of(:)
1758  integer,allocatable :: col_start(:),col_stop(:)
1759  real(dp) :: qbz(3),tsec(2) !kbz(3),kpbz(3),
1760  complex(dpc),allocatable :: my_kxssp(:,:)
1761  complex(gwpc),allocatable :: vc_sqrt_qbz(:),rhotwg1(:),rhotwg2(:)
1762 
1763 !************************************************************************
1764 
1765  DBG_ENTER("COLL")
1766 
1767  write(msg,'(a,2i2,a)')" Calculating exchange term for (spin1,spin2) ",spin1,spin2," ..."
1768  call wrtout(std_out, msg)
1769 
1770  ! Basic constants.
1771  dim_rtwg=1; faq = one/(Cryst%ucvol*Kmesh%nbz)
1772 
1773  ! Identify the index of q==0
1774  iqbz0=0
1775  do iq_bz=1,Qmesh%nbz
1776    if (ALL(ABS(Qmesh%bz(:,iq_bz))<tol3)) iqbz0 = iq_bz
1777  end do
1778  ABI_CHECK(iqbz0/=0,"q=0 not found")
1779  !
1780  ! Treat the spin polarization.
1781  spin_ids(:,1) = (/1,1/)
1782  spin_ids(:,2) = (/2,2/)
1783  spin_ids(:,3) = (/1,2/)
1784 
1785  nblocks=1
1786  kx_fact=two
1787  nels_block(:)=0
1788  nels_block(1)=BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2
1789  tot_nels=nels_block(1)
1790 
1791  if (nsppol==2) then
1792    nblocks=3
1793    kx_fact=one
1794    nels_block(1) = BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2   ! Only the upper triangle for block 1 and 2
1795    nels_block(2) = BSp%nreh(2)*(BSp%nreh(2)+1_i8b)/2
1796    nels_block(3) = BSp%nreh(1)*BSp%nreh(2)*1_i8b       ! Note: Block 3 does not have symmetries.
1797    tot_nels= SUM(nels_block)
1798  end if
1799  !
1800  ! Distribute the calculation of the matrix elements among the nodes.
1801  ! * tstart and t_stop give the initial and final transition index treated by each node.
1802  ! * my_hsize is the number of transitions treated by this processor
1803  ! * my_cols(1:2) gives the initial and final column treated by this node.
1804  !
1805  do block=1,nsppol
1806    !
1807    ! Indices used to loop over bands.
1808    ! bidx contains the starting and final indices used to loop over bands.
1809    !
1810    !      (b3,b4)
1811    !         |... ...|
1812    ! (b1,b2) |... ...|
1813    !
1814    ! Resonant matrix is given by
1815    !      (v',c')
1816    !       |... ...|
1817    ! (v,c) |... ...|
1818    !
1819    ! Coupling matrix is given by
1820    !       (c',v')
1821    !       |... ...|
1822    ! (v,c) |... ...|
1823 
1824    if (is_resonant) then
1825      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
1826      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
1827      bidx(:,3) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b3
1828      bidx(:,4) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b4
1829    else
1830      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
1831      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
1832      bidx(:,3) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b3
1833      bidx(:,4) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b4
1834    end if
1835 
1836    !spin1 = spin_ids(1,block)
1837    !spin2 = spin_ids(2,block)
1838 
1839    my_cols=0
1840    do itp=1,Bsp%nreh(block)
1841      do it=1,itp
1842        ir = it + itp*(itp-1_i8b)/2
1843        if (ir==t_start(my_rank)) then
1844          my_rows(1) = it
1845          my_cols(1) = itp
1846        end if
1847        if (ir==t_stop(my_rank)) then
1848          my_rows(2) = it
1849          my_cols(2) = itp
1850        end if
1851      end do
1852    end do
1853 
1854    ! Allocate big (scalable) buffer to store the BS matrix on this node.
1855    !ABI_MALLOC(my_bsham,(t_start(my_rank):t_stop(my_rank)))
1856    !
1857    ! =====================
1858    ! === Exchange term ===
1859    ! =====================
1860    ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
1861    ! TODO might used enlarged G-sphere for better convergence.
1862 !if (do_exchange_term) then
1863    call timab(683,1,tsec) ! exc_build_ham(exchange)
1864 
1865    ABI_MALLOC(rhotwg1,(npweps))
1866    ABI_MALLOC(rhotwg2,(npweps))
1867 
1868    ngx = Gsph_x%ng
1869    ABI_MALLOC(vc_sqrt_qbz,(ngx))
1870 
1871    ! * Get iq_ibz, and symmetries from iq_bz.
1872    iq_bz = iqbz0 ! q = 0 -> iqbz0
1873    call qmesh%get_BZ_item(iq_bz,qbz,iq_ibz,isym_q,itim_q)
1874 
1875    ! * Set up table of |q(BZ)+G|
1876    if (iq_ibz==1) then
1877      do ig=1,ngx
1878        ISg = Gsph_x%rottb(ig,itim_q,isym_q)
1879        vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
1880      end do
1881    else
1882       ABI_ERROR("iq_ibz should be 1")
1883    end if
1884 
1885    do itp=1,BSp%nreh(block) ! Loop over transition tp = (kp,vp,cp,spin2)
1886 
1887      if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
1888      ikp_bz = Bsp%Trans(itp,spin2)%k
1889      ivp    = Bsp%Trans(itp,spin2)%v
1890      icp    = Bsp%Trans(itp,spin2)%c
1891 
1892      ikp_ibz = Kmesh%tab (ikp_bz)
1893      isym_kp = Kmesh%tabo(ikp_bz)
1894      itim_kp = (3-Kmesh%tabi(ikp_bz))/2
1895 
1896      if (is_resonant) then
1897        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
1898      else ! Code for coupling block.
1899        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
1900      end if
1901      !
1902      ! Multiply by the Coulomb term.
1903       do ig=2,npweps
1904         rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
1905       end do
1906 
1907      do it=1,itp ! Loop over transition t = (k,v,c,spin1)
1908        ir = it + itp*(itp-1_i8b)/2
1909        if (ir<t_start(my_rank) .or. ir>t_stop(my_rank)) CYCLE
1910 
1911        ik_bz   = Bsp%Trans(it,spin1)%k
1912        iv      = Bsp%Trans(it,spin1)%v
1913        ic      = Bsp%Trans(it,spin1)%c
1914 
1915        ik_ibz = Kmesh%tab(ik_bz)
1916        isym_k = Kmesh%tabo(ik_bz)
1917        itim_k = (3-Kmesh%tabi(ik_bz))/2
1918        !if (itim_k==2) CYCLE ! time-reversal or not
1919 
1920        rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
1921        !
1922        ! sum over G/=0
1923        ctemp = xdotc(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
1924        ctemp = faq * kx_fact * ctemp
1925 
1926        ! exchange term is non divergent !
1927        !if (BSp%prep_interp) then
1928        !  ccoeffs(ir) = ccoeffs(ir) + ctemp
1929        !end if
1930 
1931        my_bsham(ir) = my_bsham(ir) + ctemp
1932      end do !it
1933    end do !itp
1934 
1935    ABI_FREE(rhotwg1)
1936    ABI_FREE(rhotwg2)
1937    ABI_FREE(vc_sqrt_qbz)
1938 
1939    call timab(683,2,tsec) ! exc_build_ham(exchange)
1940 !end if ! do_exchange_term
1941  end do ! block
1942 
1943  !
1944  ! ===========================================
1945  ! === Exchange term for spin_up spin_down ===
1946  ! ===========================================
1947 
1948 if (nsppol==2) then
1949  call timab(686,2,tsec) ! exc_build_ham(exch.spin)
1950  block=3
1951  neh1=BSp%nreh(1)
1952  neh2=BSp%nreh(2)
1953  !
1954  ! The oscillators at q=0 are available on each node for both spin.
1955  ! Here the calculation of the block is parallelized over columns.
1956  ABI_MALLOC(col_start,(0:nproc-1))
1957  ABI_MALLOC(col_stop,(0:nproc-1))
1958  call xmpi_split_work2_i4b(neh2,nproc,col_start,col_stop) !check this but it should be OK.
1959 
1960  my_cols(1) = col_start(my_rank)
1961  my_cols(2) = col_stop (my_rank)
1962  if (my_cols(2)-my_cols(1)<=0) then
1963    ABI_ERROR("One of the processors has zero columns!")
1964  end if
1965 
1966  ABI_MALLOC(ncols_of,(0:nproc-1))
1967  ncols_of=0
1968  do rank=0,nproc-1
1969    if (col_stop(rank)>=col_start(rank)) ncols_of(rank) = col_stop(rank)-col_start(rank)+1
1970  end do
1971 
1972  ABI_FREE(col_start)
1973  ABI_FREE(col_stop)
1974  !
1975  ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
1976  ! TODO might used enlarged G-sphere for better convergence.
1977  ! Note that my_kxssp is always written on file when nsppol=2, even when
1978  ! non-local field effects are neglected.
1979  ABI_MALLOC(my_kxssp,(neh1,my_cols(1):my_cols(2)))
1980  my_kxssp=czero
1981 
1982  !if (do_exchange_term) then
1983    !spin1=1; spin2=2
1984    ABI_MALLOC(rhotwg1,(npweps))
1985    ABI_MALLOC(rhotwg2,(npweps))
1986 
1987    ngx = Gsph_x%ng
1988    ABI_MALLOC(vc_sqrt_qbz,(ngx))
1989    !
1990    ! * Get iq_ibz, and symmetries from iq_bz.
1991    iq_bz = iqbz0 ! q = 0 -> iqbz0
1992    call qmesh%get_BZ_item(iq_bz,qbz,iq_ibz,isym_q,itim_q)
1993    !
1994    ! * Set up table of |q(BZ)+G|
1995    if (iq_ibz==1) then
1996      do ig=1,ngx
1997        ISg = Gsph_x%rottb(ig,itim_q,isym_q)
1998        vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
1999      end do
2000    else
2001       ABI_ERROR("iq_ibz should be 1")
2002    end if
2003 
2004    do itp=1,neh2 ! Loop over transition tp = (kp,vp,cp,spin2)
2005 
2006      if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
2007      ikp_bz = Bsp%Trans(itp,spin2)%k
2008      ivp    = Bsp%Trans(itp,spin2)%v
2009      icp    = Bsp%Trans(itp,spin2)%c
2010 
2011      ikp_ibz = Kmesh%tab (ikp_bz)
2012      isym_kp = Kmesh%tabo(ikp_bz)
2013      itim_kp = (3-Kmesh%tabi(ikp_bz))/2
2014 
2015      if (is_resonant) then
2016        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
2017      else ! Code for coupling block.
2018        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
2019      end if
2020      !
2021      ! Multiply by the Coulomb term.
2022       do ig=2,npweps
2023         rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
2024       end do
2025 
2026      do it=1,neh1 ! Loop over transition t = (k,v,c,spin1) FULL matrix.
2027        ik_bz = Bsp%Trans(it,spin1)%k
2028        iv    = Bsp%Trans(it,spin1)%v
2029        ic    = Bsp%Trans(it,spin1)%c
2030 
2031        ik_ibz = Kmesh%tab(ik_bz)
2032        isym_k = Kmesh%tabo(ik_bz)
2033        itim_k = (3-Kmesh%tabi(ik_bz))/2
2034        !if (itim_k==2) CYCLE ! time-reversal or not
2035 
2036        rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
2037        !
2038        ! sum over G/=0
2039        ctemp = XDOTC(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
2040        ctemp = faq * kx_fact * ctemp
2041 
2042        my_kxssp(it,itp) = ctemp
2043      end do !it
2044    end do !itp
2045 
2046    ABI_FREE(rhotwg1)
2047    ABI_FREE(rhotwg2)
2048    ABI_FREE(vc_sqrt_qbz)
2049  !end if ! do_exchange_term
2050  call timab(686,2,tsec) ! exc_build_ham(exch.spin)
2051 
2052  ABI_FREE(ncols_of)
2053  ABI_SFREE(my_kxssp)
2054  end if
2055 
2056  DBG_EXIT("COLL")
2057 
2058 end subroutine exc_build_v

m_exc_build/wfd_all_mgq0 [ Functions ]

[ Top ] [ m_exc_build ] [ Functions ]

NAME

  wfd_all_mgq0

FUNCTION

INPUTS

  Wfd<wfdgw_t>=Handler for the wavefunctions.
  Cryst<crystal_t>=Info on the crystalline structure.
  Qmesh<kmesh_t>=The list of q-points for epsilon^{-1} and related symmetry tables.
  Gsph_x<gsphere_t>=G-sphere with the G-vectors in mgq0.
  Vcp<vcoul_t>=The Coulomb interaction in reciprocal space. A cutoff can be used
  Psps<Pseudopotential_type>=Variables related to pseudopotentials
  Pawtab(Psps%ntypat)<pawtab_type>=PAW tabulated starting data.
  Paw_pwff(Cryst%ntypat*Wfd%usepaw)<pawpwff_t>=Form factor used to calculate the onsite mat. elements of a plane wave.
  lomo_spin(Wfd%nsppol)=Lowest occupied band for each spin
  homo_spin(Wfd%nsppol)=Highest occupied band for each spin
  humo_spin(Wfd%nsppol)=Highest unoccupied band for each spin
  nfftot_osc=Total Number of FFT points used for the oscillator matrix elements.
  ngfft_osc(18)=Info on the FFT algorithm used to calculate the oscillator matrix elements.
  npweps=Number of G-vectors in mgq0.

OUTPUT

   mgq0(npweps,lomo_min:humo_max,lomo_min:humo_max,Wfd%nkibz,Wfd%nsppol)
     Allocated here and filled with the matrix elements on each node.

SOURCE

2215 subroutine wfd_all_mgq0(Wfd,Cryst,Qmesh,Gsph_x,Vcp,&
2216 & Psps,Pawtab,Paw_pwff,lomo_spin,homo_spin,humo_spin,nfftot_osc,ngfft_osc,npweps,mgq0)
2217 
2218 !Arguments ------------------------------------
2219 !scalars
2220  integer,intent(in) :: nfftot_osc,npweps
2221  type(kmesh_t),intent(in) :: Qmesh
2222  type(crystal_t),intent(in) :: Cryst
2223  type(vcoul_t),intent(in) :: Vcp
2224  type(gsphere_t),intent(in) :: Gsph_x
2225  type(Pseudopotential_type),intent(in) :: Psps
2226  type(wfdgw_t),target,intent(inout) :: Wfd
2227 !arrays
2228  integer,intent(in) :: lomo_spin(Wfd%nsppol),homo_spin(Wfd%nsppol),humo_spin(Wfd%nsppol)
2229  integer,intent(in) :: ngfft_osc(18)
2230  complex(gwpc),allocatable,intent(out) :: mgq0(:,:,:,:,:)
2231  type(Pawtab_type),intent(in) :: Pawtab(Psps%ntypat)
2232  type(pawpwff_t),intent(in) :: Paw_pwff(Psps%ntypat*Wfd%usepaw)
2233 
2234 !Local variables ------------------------------
2235 !scalars
2236  integer,parameter :: map2sphere1=1,dim_rtwg1=1,ndat1=1
2237  integer :: use_padfft,mgfft_osc,fftalga_osc,ii
2238  integer :: ik_ibz,itim_k,isym_k,iq_bz,iq_ibz,isym_q,itim_q,iqbz0
2239  integer :: ierr,iv,ic,spin,lomo_min,humo_max !,inv_ipw,ipw
2240  real(dp) :: cpu,wall,gflops !q0vol,fcc_const
2241  complex(dpc) :: ph_mkt
2242  character(len=500) :: msg
2243  type(wave_t),pointer :: wave_v, wave_c
2244 !arrays
2245  integer,allocatable :: igfftg0(:),task_distrib(:,:,:,:)
2246  integer,allocatable :: gbound(:,:),id_tab(:)
2247  real(dp) :: qbz(3),spinrot_k(4),tsec(2)
2248  complex(gwpc),allocatable :: rhotwg1(:)
2249  complex(gwpc),target,allocatable :: ur1(:),ur2(:)
2250  complex(gwpc),ABI_CONTIGUOUS pointer :: ptr_ur1(:),ptr_ur2(:)
2251  type(pawcprj_type),allocatable :: Cp1(:,:),Cp2(:,:)
2252  type(pawpwij_t),allocatable :: Pwij_q0(:)
2253 
2254 !************************************************************************
2255 
2256  call timab(671,1,tsec)
2257 
2258  ABI_CHECK(Wfd%nspinor==1,"nspinor==2 not coded")
2259  ABI_CHECK(nfftot_osc==PRODUCT(ngfft_osc(1:3)),"mismatch in FFT size")
2260 
2261  lomo_min = MINVAL(lomo_spin); humo_max = MAXVAL(humo_spin)
2262 
2263  if ( ANY(ngfft_osc(1:3) /= Wfd%ngfft(1:3)) ) call wfd%change_ngfft(Cryst,Psps,ngfft_osc)
2264 
2265  mgfft_osc   = MAXVAL(ngfft_osc(1:3))
2266  fftalga_osc = ngfft_osc(7)/100 !; fftalgc_osc=MOD(ngfft_osc(7),10)
2267 
2268  ! (temporary) Table used for the wavefunction in the IBZ.
2269  ABI_MALLOC(id_tab, (Wfd%nfft))
2270  id_tab = (/(ii, ii=1,Wfd%nfft)/)
2271 
2272  ! Analytic integration of 4pi/q^2 over the volume element:
2273  ! $4pi/V \int_V d^3q 1/q^2 =4pi bz_geometric_factor V^(-2/3)$
2274  ! i_sz=4*pi*bz_geometry_factor*q0_vol**(-two_thirds) where q0_vol= V_BZ/N_k
2275  ! bz_geometry_factor: sphere=7.79, fcc=7.44, sc=6.188, bcc=6.946, wz=5.255
2276  ! (see gwa.pdf, appendix A.4)
2277 
2278  ! If q=0 and C=V then set up rho-twiddle(G=0) to reflect an
2279  ! analytic integration of q**-2 over the volume element:
2280  ! <q**-2> = 7.44 V**(-2/3)   (for fcc cell)
2281 
2282  ! q0vol = (8.0*pi**3) / (Cryst%ucvol*Kmesh%nbz)
2283  ! fcc_const = SQRT(7.44*q0vol**(-2.0/3.0))
2284  ! rtw = (6.0*pi**2/(Cryst%ucvol*Kmesh%nkbz))**(1./3.)
2285  ! Average of (q+q')**-2 integration for head of Coulomb matrix
2286  ! INTRTW(QL) = (2*pi*rtw + pi*(rtw**2/QL-QL)*LOG((QL+rtw)/(QL-rtw)))
2287  ! &              * (Cryst%ucvol*Kmesh%nbz)/(2*pi)**3. * QL*QL
2288 
2289  if (Wfd%usepaw==1) then
2290    ABI_MALLOC(Cp1,(Wfd%natom,Wfd%nspinor))
2291    call pawcprj_alloc(Cp1,0,Wfd%nlmn_atm)
2292    ABI_MALLOC(Cp2,(Wfd%natom,Wfd%nspinor))
2293    call pawcprj_alloc(Cp2,0,Wfd%nlmn_atm)
2294  end if
2295 
2296  ABI_MALLOC(ur1,(nfftot_osc*Wfd%nspinor))
2297  ABI_MALLOC(ur2,(nfftot_osc*Wfd%nspinor))
2298 
2299  ! Identify q==0
2300  iqbz0=0
2301  do iq_bz=1,Qmesh%nbz
2302    if (ALL(ABS(Qmesh%bz(:,iq_bz))<tol3)) iqbz0 = iq_bz
2303  end do
2304  ABI_CHECK(iqbz0/=0,"q=0 not found in q-point list!")
2305 
2306  ! * Get iq_ibz, and symmetries from iqbz0.
2307  call qmesh%get_BZ_item(iqbz0,qbz,iq_ibz,isym_q,itim_q)
2308 
2309  if (Wfd%usepaw==1) then ! Prepare onsite contributions at q==0
2310    ABI_MALLOC(Pwij_q0,(Cryst%ntypat))
2311    call pawpwij_init(Pwij_q0,npweps,Qmesh%bz(:,iqbz0),Gsph_x%gvec,Cryst%rprimd,Psps,Pawtab,Paw_pwff)
2312  end if
2313  !
2314  ! Tables for the FFT of the oscillators.
2315  !  a) FFT index of the G sphere (only vertical transitions, unlike cchi0, no need to shift the sphere).
2316  !  b) gbound table for the zero-padded FFT performed in rhotwg.
2317  ABI_MALLOC(igfftg0,(Gsph_x%ng))
2318  ABI_MALLOC(gbound,(2*mgfft_osc+8,2))
2319  call Gsph_x%fft_tabs((/0,0,0/),mgfft_osc,ngfft_osc,use_padfft,gbound,igfftg0)
2320  if ( ANY(fftalga_osc == (/2,4/)) ) use_padfft=0 ! Pad-FFT is not coded in rho_tw_g
2321 #ifdef FC_IBM
2322  ! XLF does not deserve this optimization (problem with [v67mbpt][t03])
2323  use_padfft = 0
2324 #endif
2325  if (use_padfft==0) then
2326    ABI_FREE(gbound)
2327    ABI_MALLOC(gbound,(2*mgfft_osc+8,2*use_padfft))
2328  end if
2329 
2330  ABI_MALLOC(rhotwg1,(npweps))
2331 
2332  ABI_MALLOC_OR_DIE(mgq0, (npweps,lomo_min:humo_max,lomo_min:humo_max,Wfd%nkibz,Wfd%nsppol), ierr)
2333  mgq0 = czero
2334 
2335  call cwtime(cpu,wall,gflops,"start")
2336 
2337  do spin=1,Wfd%nsppol
2338    ! Distribute the calculation of the matrix elements.
2339    ! processors have the entire set of wavefunctions hence we divide the workload
2340    ! without checking if the pair of states is available. Last dimension is fake.
2341    ABI_MALLOC(task_distrib,(lomo_spin(spin):humo_spin(spin),lomo_spin(spin):humo_spin(spin),Wfd%nkibz,1))
2342    call xmpi_distab(Wfd%nproc,task_distrib)
2343 
2344    ! loop over the k-points in IBZ
2345    do ik_ibz=1,Wfd%nkibz
2346      if ( ALL(task_distrib(:,:,ik_ibz,1)/= Wfd%my_rank) ) CYCLE
2347 
2348      ! Don't need to symmetrize the wavefunctions.
2349      itim_k=1; isym_k=1; ph_mkt=cone; spinrot_k=Cryst%spinrot(:,isym_k)
2350 
2351      do iv=lomo_spin(spin),humo_spin(spin) ! Loop over band V
2352        if ( ALL(task_distrib(:,iv,ik_ibz,1)/=Wfd%my_rank) ) CYCLE
2353 
2354        ABI_CHECK(wfd%get_wave_ptr(iv, ik_ibz, spin, wave_v, msg) == 0, msg)
2355 
2356        if (wave_v%has_ur == WFD_STORED) then
2357          ptr_ur1 =>  wave_v%ur
2358        else
2359          call wfd%get_ur(iv,ik_ibz,spin,ur1)
2360          ptr_ur1 =>  ur1
2361        end if
2362 
2363        if (Wfd%usepaw==1) call wfd%get_cprj(iv,ik_ibz,spin,Cryst,Cp1,sorted=.FALSE.)
2364 
2365        ! Loop over band C
2366        do ic=lomo_spin(spin),humo_spin(spin)
2367          if ( task_distrib(ic,iv,ik_ibz,1)/=Wfd%my_rank ) CYCLE
2368 
2369          ABI_CHECK(wfd%get_wave_ptr(ic, ik_ibz, spin, wave_c, msg) == 0, msg)
2370 
2371          if (wave_c%has_ur == WFD_STORED) then
2372            ptr_ur2 =>  wave_c%ur
2373          else
2374            call wfd%get_ur(ic,ik_ibz,spin,ur2)
2375            ptr_ur2 =>  ur2
2376          end if
2377 
2378          if (Wfd%usepaw==1) call wfd%get_cprj(ic,ik_ibz,spin,Cryst,Cp2,sorted=.FALSE.)
2379 
2380          call rho_tw_g(Wfd%nspinor,npweps,nfftot_osc,ndat1,ngfft_osc,map2sphere1,use_padfft,igfftg0,gbound,&
2381            ptr_ur1,1,id_tab,ph_mkt,spinrot_k,&
2382            ptr_ur2,1,id_tab,ph_mkt,spinrot_k,&
2383            dim_rtwg1,rhotwg1)
2384 
2385          if (Wfd%usepaw==1) then
2386            ! Add PAW onsite contribution.
2387            call paw_rho_tw_g(npweps,dim_rtwg1,Wfd%nspinor,Cryst%natom,Cryst%ntypat,Cryst%typat,Cryst%xred,Gsph_x%gvec,&
2388              Cp1,Cp2,Pwij_q0,rhotwg1)
2389          end if
2390 
2391          ! If q=0 treat Exchange and Coulomb-term independently
2392          if (iv <= homo_spin(spin) .and. ic <= homo_spin(spin) .or. &
2393              iv >  homo_spin(spin) .and. ic >  homo_spin(spin)) then
2394 
2395            if (iv/=ic) then !COULOMB term: C/=V: ignore them
2396              rhotwg1(1) = czero_gw
2397            else
2398              ! If q=0 and C=V then set up rho-twiddle(G=0) to reflect an
2399              ! analytic integration of q**-2 over the volume element:
2400              ! <q**-2> = 7.44 V**(-2/3)   (for fcc cell)
2401              !rhotwg1(1) = fcc_const * qpg(1,iqbz0)
2402              rhotwg1(1) = SQRT(GWPC_CMPLX(Vcp%i_sz,zero)) / Vcp%vcqlwl_sqrt(1,1)
2403              !if (vcut) rhotwg1(1) = 1.0
2404            end if
2405 
2406          else
2407            ! At present this term is set to zero
2408            ! EXCHANGE term: limit value.
2409            ! Set up rho-twiddle(G=0) using small vector q instead of zero and k.p perturbation theory (see notes)
2410            rhotwg1(1) = czero_gw
2411          end if
2412 
2413          mgq0(:,iv,ic,ik_ibz,spin) = rhotwg1(:)
2414        end do !ic
2415      end do !iv
2416    end do !ik_ibz
2417 
2418    ABI_FREE(task_distrib)
2419  end do !spin
2420 
2421  ! TODO: One can speedup the calculation by computing the upper triangle of the
2422  ! matrix in (b,b') space and then take advantage of the symmetry property:
2423  !
2424  !   M_{k,0}{{bb'}(G)^* = M{k,0}{b'b'}(-G)
2425 
2426 #if 0
2427  !!!! $OMP PARALLEL DO COLLAPSE(3) PRIVATE(inv_ipw)
2428  do spin=1,Wfd%nsppol
2429    do ik_ibz=1,Wfd%nkibz
2430      do iv=lomo_spin(spin),humo_spin(spin)
2431        do ic=1,iv-1
2432          do ipw=1,npweps
2433            inv_ipw = gsph_x%g2mg(ipw)
2434            mgq0(inv_ipw,ic,iv,ik_ibz,spin) = mgq0(ipw,iv,ic,ik_ibz,spin)
2435          end do
2436        end do
2437      end do
2438    end do
2439  end do
2440 #endif
2441  !
2442  ! Gather matrix elements on each node.
2443  call xmpi_sum(mgq0,Wfd%comm,ierr)
2444 
2445  call cwtime(cpu,wall,gflops,"stop")
2446  write(msg,'(2(a,f9.6))')"cpu_time = ",cpu,", wall_time = ",wall
2447  call wrtout(std_out, msg)
2448 
2449  ABI_FREE(rhotwg1)
2450  ABI_FREE(igfftg0)
2451  ABI_FREE(gbound)
2452  ABI_FREE(ur1)
2453  ABI_FREE(ur2)
2454  ABI_FREE(id_tab)
2455 
2456  if (Wfd%usepaw==1) then
2457    ! Deallocation for PAW.
2458    call pawpwij_free(Pwij_q0)
2459    ABI_FREE(Pwij_q0)
2460    call pawcprj_free(Cp1)
2461    ABI_FREE(Cp1)
2462    call pawcprj_free(Cp2)
2463    ABI_FREE(Cp2)
2464  end if
2465 
2466  call timab(671,2,tsec)
2467 
2468 end subroutine wfd_all_mgq0