TABLE OF CONTENTS


ABINIT/m_exc_itdiago [ Modules ]

[ Top ] [ Modules ]

NAME

 m_exc_itdiago

FUNCTION

  Iterative diagonalization of the BSE Hamiltonian with band-by-band conjugate gradient method

COPYRIGHT

  Copyright (C) 2008-2018 ABINIT group (MG)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

PARENTS

CHILDREN

SOURCE

21 #if defined HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include "abi_common.h"
26 
27 
28 MODULE m_exc_itdiago
29 
30  use defs_basis
31  use defs_abitypes
32  use m_bs_defs
33  use m_errors
34  use m_abicore
35  use m_linalg_interfaces
36  use m_xmpi
37 #if defined HAVE_MPI2
38  use mpi
39 #endif
40 
41  use m_io_tools,      only : open_file
42  use m_time,          only : cwtime
43  use m_numeric_tools, only : stats_t, stats_eval
44  use m_hide_lapack,   only : xhpev !xheev,
45  use m_bse_io,        only : exc_read_rcblock
46 
47  implicit none
48 
49  private
50 
51 #ifdef HAVE_MPI1
52  include 'mpif.h'
53 #endif
54 
55  public :: exc_iterative_diago    ! Calculates eigenvalues and eigenvectors of the Resonant BSE Hamiltonian

m_exc_itdiago/convergence_degree [ Functions ]

[ Top ] [ m_exc_itdiago ] [ Functions ]

NAME

 convergence_degree

FUNCTION

  Return the degree of convergence from the input residual.

INPUTS

  resid=Residual.

OUTPUT

PARENTS

CHILDREN

SOURCE

1227 function convergence_degree(resid)
1228 
1229 
1230 !This section has been created automatically by the script Abilint (TD).
1231 !Do not modify the following lines by hand.
1232 #undef ABI_FUNC
1233 #define ABI_FUNC 'convergence_degree'
1234 !End of the abilint section
1235 
1236  implicit none
1237 
1238 !Arguments
1239  integer :: convergence_degree
1240  real(dp),intent(in) :: resid
1241 
1242 !************************************************************************
1243 
1244  if (resid<tolwfr_) then
1245    convergence_degree = STRICT
1246  else
1247    convergence_degree = WORST
1248    if (resid<tolwfr_*10**5) convergence_degree = MEDIUM
1249  end if
1250 
1251 end function convergence_degree

m_exc_itdiago/exc_check_phi_block [ Functions ]

[ Top ] [ m_exc_itdiago ] [ Functions ]

NAME

 exc_check_phi_block

FUNCTION

  Debugging tools

INPUTS

OUTPUT

PARENTS

      m_exc_itdiago

CHILDREN

      xmpi_barrier,xmpi_exch,xmpi_sum

SOURCE

1275 subroutine exc_check_phi_block(string)
1276 
1277 
1278 !This section has been created automatically by the script Abilint (TD).
1279 !Do not modify the following lines by hand.
1280 #undef ABI_FUNC
1281 #define ABI_FUNC 'exc_check_phi_block'
1282 !End of the abilint section
1283 
1284  implicit none
1285 
1286 !Arguments ------------------------------------
1287 !scalars
1288  character(len=*),intent(in) :: string
1289 
1290 !Local variables ------------------------------
1291 !scalars
1292  integer :: ii,jj,ierr
1293  real(dp) :: err,rdum
1294 !arrays
1295  complex(dpc),allocatable :: lbuff(:,:)
1296 
1297 !************************************************************************
1298 
1299  if (.not. DEBUGME) return
1300 
1301 #if 0
1302  ABI_MALLOC(lbuff,(hexc_size,nstates))
1303  err = -one
1304  do irank=1,nproc-1
1305    call xmpi_exch(phi_block,hexc_size*nstates,irank,lbuff,master,comm,ierr)
1306    if (my_rank==master) then
1307      lbuff = lbuff-phi_block
1308      err = MAX(err,MAXVAL(MAXVAL(ABS(lbuff),DIM=1)))
1309    end if
1310    call xmpi_barrier(comm)
1311  end do
1312  ABI_FREE(lbuff)
1313 #else
1314 
1315  ABI_MALLOC(lbuff,(nstates,nstates))
1316  lbuff=czero
1317  do jj=1,nstates
1318    do ii=1,jj
1319      lbuff(ii,jj) = DOT_PRODUCT( phi_block(my_t1:my_t2,ii), phi_block(my_t1:my_t2,jj) )
1320    end do
1321  end do
1322  call xmpi_sum(lbuff,comm,ierr)
1323 
1324  err = -one
1325  do jj=1,nstates
1326    do ii=1,jj
1327      if (ii==jj) then
1328        rdum =  ABS(lbuff(ii,jj)-one)
1329      else
1330        rdum =  ABS(lbuff(ii,jj))
1331      end if
1332      err = MAX(err,rdum)
1333    end do
1334  end do
1335  ABI_FREE(lbuff)
1336 #endif
1337 
1338  if (my_rank==master) then
1339    write(std_out,*)"After ",TRIM(string),", MAX inconsistency error in phi_block= ",err
1340  end if
1341 
1342  !write(std_out,*)"master casts its own data"
1343  !call xmpi_bcast(phi_block,master,comm,ierr)
1344 
1345 end subroutine exc_check_phi_block

m_exc_itdiago/exc_cholesky_ortho [ Functions ]

[ Top ] [ m_exc_itdiago ] [ Functions ]

NAME

 exc_cholesky_ortho

FUNCTION

  This routine performs the orthogonalization of the trial eigenstates using the
  Cholesky Algorithm.

SIDE EFFECTS

   phi_block(my_t1:my_t2,nstates)=Contains the trial eigenstates.

PARENTS

      m_exc_itdiago

CHILDREN

      xmpi_barrier,xmpi_exch,xmpi_sum

SOURCE

1120 subroutine exc_cholesky_ortho()
1121 
1122 
1123 !This section has been created automatically by the script Abilint (TD).
1124 !Do not modify the following lines by hand.
1125 #undef ABI_FUNC
1126 #define ABI_FUNC 'exc_cholesky_ortho'
1127 !End of the abilint section
1128 
1129  implicit none
1130 
1131 !Local variables ------------------------------
1132  integer :: my_info,ii,jj,ipack,ierr
1133 !arrays
1134  complex(dpc),allocatable :: overlap(:,:),povlp(:)
1135 
1136 !************************************************************************
1137 
1138  ! 1) overlap_ij =  <phi_i|phi_j>
1139  ABI_MALLOC(overlap,(nstates,nstates))
1140 
1141 #if defined HAVE_BSE_UNPACKED
1142  overlap = czero
1143 
1144  call ZGEMM('C','N',nstates,nstates,my_nt,cone,phi_block,my_nt,phi_block,my_nt,czero,overlap,nstates)
1145  call xmpi_sum(overlap,comm,ierr)
1146 
1147  do ii=1,nstates
1148    overlap(ii,ii)=REAL(overlap(ii,ii),kind=dp)
1149  end do
1150 
1151  ! 2) Cholesky factorization: overlap = U^H U with U upper triangle matrix.
1152  call ZPOTRF('U',nstates,overlap,nstates,my_info)
1153  if (my_info/=0)  then
1154    write(msg,'(a,i3)')' ZPOTRF returned info= ',my_info
1155    MSG_ERROR(msg)
1156  end if
1157 
1158 #else
1159 
1160  ! 1) Calculate overlap_ij =  <phi_i|phi_j> in packed form.
1161  ABI_MALLOC(povlp,(nstates*(nstates+1)/2))
1162  povlp = czero; ipack=0
1163  do jj=1,nstates
1164    do ii=1,jj
1165      ipack=ipack+1
1166      povlp(ipack) = DOT_PRODUCT( phi_block(my_t1:my_t2,ii), phi_block(my_t1:my_t2,jj) )
1167      if (ii==jj) povlp(ipack) = REAL(povlp(ipack),kind=dp)
1168    end do
1169  end do
1170  call xmpi_sum(povlp,comm,ierr)
1171 
1172  ! 2) Cholesky factorization: overlap = U^H U with U upper triangle matrix.
1173  call ZPPTRF("U",nstates,povlp,my_info)
1174  if (my_info/=0)  then
1175    write(msg,'(a,i3)')' ZPPTRF returned info= ',my_info
1176    MSG_ERROR(msg)
1177  end if
1178  !call xmpi_sum(povlp,comm,ierr)
1179  !povlp=povlp/nproc
1180 
1181  !unpack povlp to prepare call to ZTRSM.
1182  ipack=0
1183  do jj=1,nstates
1184    do ii=1,jj
1185      ipack=ipack+1
1186      if (ii/=jj) then
1187        overlap(ii,jj)=      povlp(ipack)
1188        overlap(jj,ii)=CONJG(povlp(ipack))
1189      else
1190        overlap(ii,ii)=REAL(povlp(ipack),kind=dp)
1191      end if
1192    end do
1193  end do
1194  ABI_FREE(povlp)
1195 #endif
1196 
1197  ! Check if this can be done with Scalapack. Direct PZTRSM is not provided
1198 
1199  ! 3) Solve X U = phi_block, on exit the phi_block treated by this node is orthonormalized.
1200  !call ZTRSM('R','U','N','N',hexc_size,nstates,cone,overlap,nstates,phi_block,hexc_size)
1201  call ZTRSM('Right','Upper','Normal','Normal',my_nt,nstates,cone,overlap,nstates,phi_block,my_nt)
1202  ABI_FREE(overlap)
1203 
1204 end subroutine exc_cholesky_ortho

m_exc_itdiago/exc_init_phi_block [ Functions ]

[ Top ] [ m_exc_itdiago ] [ Functions ]

NAME

 exc_init_phi_block

FUNCTION

  Initialize the eigenstates either from file or fill them with random number
  if restart file is not available

INPUTS

  ihexc_fname=
    Name of the file from which the eigenvectors will be read.
    Empty string to initialize trial eigenvectors with random numbers.

SIDE EFFECTS

   phi_block(my_t1:my_t2,nstates)=Contains the trial eigenstates.

PARENTS

      m_exc_itdiago

CHILDREN

      xmpi_barrier,xmpi_exch,xmpi_sum

SOURCE

669 subroutine exc_init_phi_block(ihexc_fname,use_mpio,comm)
670 
671 
672 !This section has been created automatically by the script Abilint (TD).
673 !Do not modify the following lines by hand.
674 #undef ABI_FUNC
675 #define ABI_FUNC 'exc_init_phi_block'
676 !End of the abilint section
677 
678  implicit none
679 
680 !Arguments ------------------------------------
681 !scalars
682  integer,intent(in) :: comm
683  logical,intent(in) :: use_mpio
684  character(len=*),intent(in) :: ihexc_fname
685 
686 !Local variables ------------------------------
687  integer :: eig_unt,hexc_size_restart,ii,state,seed
688  integer ::  fold1,fold2,foldim,foldre
689  real(dp) :: cputime,walltime,gflops
690  character(len=500) :: errmsg
691 !arrays
692  complex(dpc),allocatable :: buffer_dpc(:)
693 #ifdef HAVE_MPI_IO
694  integer:: amode,mpi_fh,mpi_err,old_type,etype,eig_type,my_nel,ierr,my_nrows
695  integer(XMPI_OFFSET_KIND) :: ehdr_offset,my_offset,my_offpad,fmarker
696  integer(XMPI_OFFSET_KIND),allocatable :: bsize_frecord(:)
697  integer :: array_of_sizes(2),array_of_subsizes(2),array_of_starts(2)
698 #endif
699 
700 !************************************************************************
701 
702  if (LEN_TRIM(ihexc_fname) == 0) then
703    call wrtout(std_out," Initializing eigenvectors with random numbers","COLL")
704    !
705    ! Use random number generator. For portability, use only integer numbers
706    ! The series of couples (fold1,fold2) is periodic with a period of
707    ! 3x5x7x11x13x17x19x23x29x31, that is, larger than 2**32, the largest integer*4
708    ! fold1 is between 0 and 34, fold2 is between 0 and 114. As sums of five
709    ! uniform random variables, their distribution is close to a gaussian
710    ! the gaussian distributions are folded, in order to be back to a uniform distribution
711    ! foldre is between 0 and 20, foldim is between 0 and 18.
712    !
713    do state=1,nstates
714      do ii=my_t1,my_t2
715        seed=ii+(state-1)*hexc_size ! Different seed for different transitions and bands
716        fold1 =mod(seed,3)+mod(seed,5)+mod(seed,7)+mod(seed,11)+mod(seed,13)
717        fold2 =mod(seed,17)+mod(seed,19)+mod(seed,23)+mod(seed,29)+mod(seed,31)
718        foldre=mod(fold1+fold2,21)
719        foldim=mod(3*fold1+2*fold2,19)
720 
721        phi_block(ii,state) = DCMPLX(foldre,foldim)
722      end do
723    end do
724 
725  else
726 
727    call cwtime(cputime, walltime, gflops, "start")
728 
729    if (.not.use_mpio) then
730      call wrtout(std_out," Initializing eigenvectors from file: "//TRIM(ihexc_fname)//" using Fortran IO.","COLL")
731 
732      if (open_file(ihexc_fname,msg,newunit=eig_unt,form='unformatted',status="old") /=0 ) then
733        MSG_ERROR(msg)
734      end if
735 
736      read(eig_unt, err=10, iomsg=errmsg) hexc_size_restart
737      ABI_CHECK(hexc_size_restart==hexc_size,"hexc_size_restart /= hexc_size")
738      read(eig_unt, err=10, iomsg=errmsg) !skip DCMPLX(exevl(1:hexc_size))
739 
740      ABI_MALLOC(buffer_dpc,(hexc_size))
741      do ii=1,nstates
742        read(eig_unt, err=10, iomsg=errmsg) buffer_dpc
743        phi_block(my_t1:my_t2,ii) = buffer_dpc(my_t1:my_t2)
744      end do
745      ABI_FREE(buffer_dpc)
746 
747      close(eig_unt, err=10, iomsg=errmsg)
748    else
749      call wrtout(std_out," Initializing eigenvectors from file: "//TRIM(ihexc_fname)//" using MPI-IO.","COLL")
750 #ifdef HAVE_MPI_IO
751      !
752      ! Open the file with MPI-IO
753      amode=MPI_MODE_RDONLY
754 
755      call MPI_FILE_OPEN(comm, ihexc_fname, amode, MPI_INFO_NULL, mpi_fh, mpi_err)
756      msg = " MPI_IO error opening file: "//TRIM(ihexc_fname)
757      ABI_CHECK_MPI(mpi_err,msg)
758 
759      ! Move the file pointer to skip the first two records.
760      ehdr_offset=0
761      call xmpio_read_frm(mpi_fh,ehdr_offset,xmpio_collective,fmarker,mpi_err)
762      write(std_out,*)"fmarker first record ",fmarker
763      call xmpio_read_frm(mpi_fh,ehdr_offset,xmpio_collective,fmarker,mpi_err)
764      write(std_out,*)"fmarker first record ",fmarker
765      !%call hdr_mpio_skip(mpi_fh,fform,ehdr_offset)
766      !%ehdr_offset = 4*xmpio_bsize_frm + xmpio_bsize_int + nstates*xmpio_bsize_dpc
767 
768      etype=MPI_BYTE; old_type=MPI_DOUBLE_COMPLEX
769 
770      my_nrows=my_t2-my_t1+1; old_type=MPI_DOUBLE_COMPLEX
771      array_of_sizes    = (/hexc_size,nstates/)
772      array_of_subsizes = (/my_nrows,nstates/)
773      array_of_starts   = (/my_t1,1/)
774      call xmpio_create_fsubarray_2D(array_of_sizes,array_of_subsizes,array_of_starts,old_type,eig_type,my_offpad,mpi_err)
775      ABI_CHECK_MPI(mpi_err,"fsubarray_2D")
776      !
777      ! Each node uses a different offset to skip the header and the blocks written by the other CPUs.
778      my_offset = ehdr_offset + my_offpad
779 
780      call MPI_FILE_SET_VIEW(mpi_fh, my_offset, etype, eig_type, 'native', MPI_INFO_NULL, mpi_err)
781      ABI_CHECK_MPI(mpi_err,"SET_VIEW")
782 
783      call MPI_TYPE_FREE(eig_type,mpi_err)
784      ABI_CHECK_MPI(mpi_err,"MPI_TYPE_FREE")
785 
786      my_nel = my_nrows*nstates
787      call MPI_FILE_READ_ALL(mpi_fh, phi_block, my_nel, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, mpi_err)
788      ABI_CHECK_MPI(mpi_err,"FILE_READ")
789 
790      ! It seems that personal calls make the code stuck
791      ! check the fortran markers.
792      ABI_MALLOC(bsize_frecord,(nstates))
793      bsize_frecord = hexc_size * xmpi_bsize_dpc
794      ! ehdr_offset points to the end of the header.
795      call xmpio_check_frmarkers(mpi_fh,ehdr_offset,xmpio_collective,nstates,bsize_frecord,ierr)
796      ABI_CHECK(ierr==0,"Error in Fortran markers")
797      ABI_FREE(bsize_frecord)
798      !
799      ! Close the file.
800      call MPI_FILE_CLOSE(mpi_fh, mpi_err)
801      ABI_CHECK_MPI(mpi_err,"FILE_CLOSE")
802 #else
803      MSG_ERROR("You should not be here")
804 #endif
805    end if
806 
807    call cwtime(cputime, walltime, gflops, "stop")
808    write(msg,'(2(a,f9.1),a)')" IO operation completed. cpu_time: ",cputime,"[s], walltime: ",walltime," [s]"
809    call wrtout(std_out, msg, "COLL", do_flush=.True.)
810  end if
811 
812  return
813 
814  ! Handle IO-error
815 10 continue
816  MSG_ERROR(errmsg)
817 
818 end subroutine exc_init_phi_block

m_exc_itdiago/exc_iterative_diago [ Functions ]

[ Top ] [ m_exc_itdiago ] [ Functions ]

NAME

  exc_iterative_diago

FUNCTION

  Calculates eigenvalues and eigenvectors of the Hermitian excitonic Hamiltonian (coupling is neglected).

INPUTS

  Bsp
    %nreh=Rank of the resonant block of the Hamiltonian.
    %nstates=Number of eigenstates required.
    %nline=Max number of line minimizations.
    %tolwfr=Tolerance on the residuals.
    %nbdbuf
    %nstep
  BS_files<excfiles>=Datatype storing names and files used in the Bethe-Salpeter code.
    %exh=Name of the file storing the excitonin resonant part.
    %out_eig_out=Name of the file where final results are store.
    %in_eig=Name of the file used to initialize the calculation.
  comm=MPI communicator.

OUTPUT

  Eigenvalues and eigenvectors are written on file %out_eig

NOTES

  Concernig the approach followed to parallelize this routine: the most important
  bottleneck is represent by the storage of the excitonic Hamiltonian since a
  large number of k-points is needed to obtain converged exciton energies.
  The number of eigenstates is usually much smaller than the rank of the full matrix,
  this is especially true if we are only interested in the binding energy of the
  exciton or in the excitonic states close to the single-particle gap.
  Therefore good scaling and good performance should be obtained by distributing
  the row of the excitonic Hamiltonian among the nodes while the required
  eigenvectors are duplicated on each node. The memory needed to stores the eigenvalues
  scales like nreh*nstates where nreh is the rank of the Hamiltonian and this might
  render the calculation unfeasible when nstates is large.
  On the other hand, having the complex set of trial eigenvectors on each node permits to parallelize
  tasks such as the application of the Hamiltonian as well as the orthogonalization or the sub-space rotation
  the later two algorithms represent the most CPU demanding part in standard KS calculations
  as they scale with the third power of the number of atoms.
  The conjugate direction and the gradient as well as Hphi are not distributed as the line minimization
  requires the evaluation of <cg_dir_|H_exc|cg_dir>.
  Note that this routine has been written having in mind an homogeneous network of machines.
  A network made of different CPU will lead to unpredictable results as each node has
  to check for the converge of the calculation.

PARENTS

      m_exc_diago

CHILDREN

      xmpi_barrier,xmpi_exch,xmpi_sum

SOURCE

116 subroutine exc_iterative_diago(BSp,BS_files,Hdr_bse,prtvol,comm)
117 
118 
119 !This section has been created automatically by the script Abilint (TD).
120 !Do not modify the following lines by hand.
121 #undef ABI_FUNC
122 #define ABI_FUNC 'exc_iterative_diago'
123 !End of the abilint section
124 
125  implicit none
126 
127 !Arguments ------------------------------------
128 !scalars
129  integer,intent(in) :: comm,prtvol
130  type(excparam),intent(in) :: BSp
131  type(excfiles),intent(in) ::  BS_files
132  type(Hdr_type),intent(in) :: Hdr_bse
133 
134 !Local variables ------------------------------
135 !scalars
136  integer,parameter :: STRICT=2,MEDIUM=1,WORST=0
137  integer,parameter :: master=0
138  integer(i8b) :: bsize_hmat,bsize_phi_block
139  integer :: hexc_size,nstates,nline,nbdbuf,nstep
140  integer :: cg_nsteps,my_t1,my_t2,my_nt
141  integer :: ii,jj,state,line,max_nline
142  integer :: cg_step,nbdbuf_,nsppol,ierr,nproc,my_rank
143  real(dp) :: exc_gap,exc_maxene,norm,etrial,etrial_old,deltae,tolwfr
144 ! real(dp) :: deold
145  real(dp) :: dhd,dhc,den,fac,poly,xx,root,swap,tan2th,diff,tolwfr_
146  !complex(dpc) :: cg_gamma,dotgg,old_dotgg
147  real(dp) :: cg_gamma,dotgg,old_dotgg
148  real(dp) :: max_resid,costh,sinth
149  complex(dpc) :: zz,kprc
150  logical,parameter :: DEBUGME=.False.
151  logical :: use_mpio,is_resonant,diago_is_real
152  character(len=500) :: msg
153  character(len=fnlen) :: hexc_fname,ihexc_fname,oeig_fname
154  type(stats_t) :: stats
155 !arrays
156  integer :: nline_for(Bsp%nstates),convergence_of(Bsp%nstates)
157  real(dp) :: resid(Bsp%nstates),exc_energy(Bsp%nstates),rbuf2(2)
158 ! real(dp),allocatable :: gsc(:,:),cg(:,:)
159  !complex,allocatable :: hexc(:,:)
160  complex(dpc),allocatable :: hexc(:,:),hji(:),vec_tmp(:)
161  complex(dpc),ABI_CONTIGUOUS pointer :: my_phi(:)
162  real(dp),allocatable :: hexc_diagonal(:)
163  complex(dpc),target,allocatable :: phi_block(:,:)
164  complex(dpc),allocatable :: hphi(:) !,buffer_dpc(:)
165  complex(dpc),allocatable :: cg_dir(:),grad_dir(:),prc_dir(:)
166  complex(dpc),allocatable :: old_cg_dir(:)
167 
168 !************************************************************************
169 
170  DBG_ENTER("COLL")
171 
172  if (Bsp%use_coupling>0) then
173    MSG_ERROR("CG Method does not support coupling")
174  end if
175 
176  nsppol = Hdr_bse%nsppol
177  if (Hdr_bse%nsppol == 2) then
178    MSG_WARNING("nsppol==2 with cg method is still under development")
179  end if
180 
181  nproc = xmpi_comm_size(comm); my_rank = xmpi_comm_rank(comm)
182 
183  use_mpio=.FALSE.
184 #ifdef HAVE_MPI_IO
185  use_mpio = (nproc > 1)
186 #endif
187  use_mpio=.FALSE.
188  !use_mpio = .TRUE.
189 
190  hexc_size = SUM(Bsp%nreh)
191  nstates= Bsp%nstates
192  nline  = Bsp%nline
193  nbdbuf = Bsp%nbdbuf
194  nstep  = Bsp%niter
195  tolwfr = Bsp%cg_tolwfr
196 
197  write(msg,'(a,i0)')' Iterative diagonalization of the resonant excitonic Hamiltonian, Matrix size= ',hexc_size
198  call wrtout(std_out,msg,"COLL")
199  call wrtout(ab_out,msg,"COLL")
200 
201  ABI_CHECK(hexc_size>=nproc,"hexc_size<nproc!")
202  ABI_CHECK(nstates <= hexc_size,"nstates cannot be greater that hexc size!")
203 
204  ! Divide the columns of the Hamiltonian among the nodes.
205  call xmpi_split_work(hexc_size,comm,my_t1,my_t2,msg,ierr)
206  if (ierr/=0) then
207    MSG_WARNING(msg)
208  end if
209 
210  my_nt = my_t2-my_t1+1
211  write(msg,'(a,i0,a)')" Will handle ",my_nt," columns of the excitonic Hamiltonian. "
212  call wrtout(std_out,msg,"PERS")
213 
214  tolwfr_ = tolwfr
215  if (tolwfr < 10**(-30)) then
216    tolwfr_ = tol12
217    write(msg,'(2(a,es12.4))')" Input tolwfr= ",tolwfr," Using tolwfr= ",tolwfr_
218    MSG_WARNING(msg)
219  end if
220 
221  cg_nsteps = nstep
222  if (cg_nsteps<=0) then
223    cg_nsteps = 30
224    write(msg,'(2(a,es12.4))')" Input nstep= ",nstep," Using cg_nsteps= ",cg_nsteps
225    MSG_WARNING(msg)
226  end if
227 
228  nbdbuf_ = nbdbuf
229  if (nbdbuf<=0) then
230    nbdbuf_ = 4
231    write(msg,'(2(a,i0))')" Input nbdbuf= ",nbdbuf," Using nbdbuf= ",nbdbuf_
232    MSG_WARNING(msg)
233  end if
234 
235  write(msg,"(4(a,i0,a),a,es12.4)")&
236 &  " cg_nsteps: ",cg_nsteps,ch10,&
237 &  " nstates:   ",nstates,ch10,&
238 &  " nline:     ",nline,ch10,&
239 &  " nbdbuf:    ",nbdbuf_,ch10,&
240 &  " tolwfr:    ",tolwfr_
241  call wrtout(std_out,msg,"COLL")
242  call wrtout(ab_out,msg,"COLL")
243 
244  bsize_hmat = 2*dpc*hexc_size*my_nt
245  write(msg,'(a,f8.1,a)')' Allocating excitonic Hamiltonian. Memory requested: ',bsize_hmat*b2Mb,' Mb.'
246  call wrtout(std_out,msg,"COLL")
247 
248  ABI_MALLOC(hexc_diagonal,(my_t1:my_t2))
249  ABI_STAT_MALLOC(hexc,(hexc_size,my_t1:my_t2), ierr)
250  ABI_CHECK(ierr==0, 'out of memory: excitonic hamiltonian')
251  !
252  ! Read and construct full excitonic Hamiltonian using Hermiticity.
253  if (BS_files%in_hreso /= BSE_NOFILE) then
254    hexc_fname = BS_files%in_hreso
255  else
256    hexc_fname = BS_files%out_hreso
257  end if
258  !
259  ! Read the resonant block from file.
260  is_resonant=.TRUE.
261  diago_is_real=(.not.BSp%have_complex_ene)
262  call exc_read_rcblock(hexc_fname,Bsp,is_resonant,diago_is_real,nsppol,Bsp%nreh,hexc_size,my_t1,my_t2,hexc,use_mpio,comm)
263  !
264  ! Save diagonal part for preconditioning.
265  do jj=my_t1,my_t2
266    hexc_diagonal(jj) = REAL(hexc(jj,jj),kind=dp)
267  end do
268  !
269  ! === Initialisation of the excitonic wavefunctions ===
270  ! Two cases are possible.
271  !   1) Fill trial eigenvectors with random numbers
272  !      One needs to initialize wfs in such a way to avoid symmetry traps,
273  !      and to avoid linear dependencies between wavefunctions
274  !   2) Read eigenstates generated by a previous calculation.
275 
276  bsize_phi_block = 2*spc*my_nt*nstates
277  write(msg,'(a,f8.1,a)')' Allocating BSE eigenvectors. Memory requested: ',bsize_phi_block*b2Mb,' Mb.'
278  call wrtout(std_out,msg,"COLL",do_flush=.True.)
279 
280  ABI_STAT_MALLOC(phi_block,(my_t1:my_t2,nstates), ierr)
281  ABI_CHECK(ierr==0, "out-of-memory phi_block")
282 
283  ihexc_fname = ""
284  if (BS_files%in_eig /= BSE_NOFILE) ihexc_fname = BS_files%in_eig
285 
286  call exc_init_phi_block(ihexc_fname,use_mpio,comm)
287  !
288  ! =========================
289  ! === Orthogonalization ===
290  ! =========================
291  call exc_cholesky_ortho()
292  call exc_check_phi_block("First cholesky ortho")
293 
294  ! * Sub-space rotation.
295  call exc_subspace_rotation()
296  call exc_check_phi_block("First subspace_rotation")
297  !
298  ! ===========================
299  ! ==== Conjugate gradient ===
300  ! ===========================
301  ABI_MALLOC(hphi,(hexc_size))
302  ABI_MALLOC(cg_dir,(hexc_size))
303  ABI_MALLOC(old_cg_dir,(hexc_size))
304  ABI_MALLOC(grad_dir,(hexc_size))
305  ABI_MALLOC(prc_dir,(hexc_size))
306 
307  max_nline=nline
308  resid(:)=HUGE(one); nline_for(1:nstates)=max_nline; convergence_of(1:nstates)=WORST
309 
310  do cg_step=1,cg_nsteps
311    do state=1,nstates
312 
313      if (prtvol>=10) then ! Tell us what is going on:
314        write(msg,'(a,i6,2x,a,i3,a)')' --- exc_iterative_diago is called for state ',state,'for',nline_for(state),' lines'
315        call wrtout(std_out,msg,'PERS')
316      end if
317 
318      ! Extraction of the vector that is iteratively updated.
319      my_phi => phi_block(my_t1:my_t2,state)
320 
321      do line=1,nline_for(state)
322        ! Compute etrial=<phi|H|phi> and the residual [H-etrial]|phi>.
323        hphi = czero
324        hphi = MATMUL(hexc, my_phi)
325        call xmpi_sum(hphi,comm,ierr)
326 
327        etrial = DOT_PRODUCT(my_phi, hphi(my_t1:my_t2))
328        call xmpi_sum(etrial,comm,ierr)
329        exc_energy(state) =  etrial
330 
331        ! Compute residual (squared) norm.
332        grad_dir(my_t1:my_t2) = hphi(my_t1:my_t2) - etrial*my_phi
333        resid(state) =  DOT_PRODUCT(grad_dir(my_t1:my_t2), grad_dir(my_t1:my_t2))
334        call xmpi_sum(resid(state),comm,ierr)
335        convergence_of(state) = convergence_degree(resid(state))
336        !
337        ! Check that etrial is decreasing on succeeding lines:
338        if (line>1 .and. (etrial > etrial_old+tol12)) then
339          write(msg,'(a,i8,a,1p,e14.6,a1,3x,a,1p,e14.6,a1)')&
340 &          'New trial exc_energy at line ',line,' = ',etrial,ch10,&
341 &          'is higher than former:',etrial_old,ch10
342          MSG_WARNING(msg)
343        end if
344        etrial_old = etrial
345        !
346        ! If residual sufficiently small stop line minimization.
347        if (convergence_of(state)==STRICT) then
348          if (prtvol>=10) then
349            write(msg,'(a,i4,a,i2,a,es12.4)')&
350 &            ' exc_iterative_diago: state ',state,' converged after ',line,&
351 &            ' line minimizations : resid =',resid(state)
352            call wrtout(std_out,msg,'PERS')
353          end if
354          EXIT !line
355        end if
356 
357        ! === PROJECT THE STEEPEST DESCENT DIRECTION OVER THE SUBSPACE ORTHOGONAL TO OTHER BANDS ===
358        ! The following projection over the subspace orthogonal to occupied bands
359        ! is optional. It is a bit more accurate, but doubles the number of N^3 ops.
360        ! It is done only if ortalg>=0.
361 
362        ! Project the steepest descent direction: direc(2,npw)=<G|H|Cnk> - \sum_{(i<=n)} <G|H|Cik> , normalized.
363 
364        ! Grad_dir is already orthogonal to this band
365        ABI_MALLOC(hji,(nstates))
366        hji=czero
367 
368        ! MG TODO Don't know why here we sum over i=<=n!!!!!!!!
369        do jj=1,nstates
370          if (jj/=state) hji(jj) = DOT_PRODUCT(phi_block(:,jj), hphi(my_t1:my_t2) )
371        end do
372        call xmpi_sum(hji,comm,ierr)
373 
374        do jj=1,nstates
375          if (jj/=state) grad_dir(my_t1:my_t2) = grad_dir(my_t1:my_t2) - hji(jj)*phi_block(:,jj)
376        end do
377        ABI_FREE(hji)
378        !
379        ! === PRECONDITION THE STEEPEST DESCENT DIRECTION ===
380        den = DOT_PRODUCT(grad_dir(my_t1:my_t2), hexc_diagonal(my_t1:my_t2)*grad_dir(my_t1:my_t2) )
381        call xmpi_sum(den,comm,ierr)
382 
383        do ii=my_t1,my_t2
384          ! Teter polynomial ratio, modified according to Kresse, Furthmuller, PRB 54, 11169 (1996) [[cite:Kresse1996]]
385          xx = hexc_diagonal(ii)/den 
386          poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp))
387          fac=poly/(poly+16._dp*xx**4)
388          kprc = fac*four/(three*den)
389          prc_dir(ii) = kprc * grad_dir(ii)
390        end do
391        !
392        ! * PROJECT THE PRECOND. STEEPEST DESCENT DIRECTION OVER THE SUBSPACE ORTHOGONAL TO OTHER BANDS.
393        ABI_MALLOC(hji,(nstates))
394        hji=czero
395        do jj=1,nstates
396          hji(jj) = DOT_PRODUCT(phi_block(:,jj), prc_dir(my_t1:my_t2) )
397        end do
398        call xmpi_sum(hji,comm,ierr)
399 
400        do jj=1,nstates
401          prc_dir(my_t1:my_t2) = prc_dir(my_t1:my_t2) - hji(jj)*phi_block(:,jj)
402        end do
403        ABI_FREE(hji)
404        !
405        ! === COMPUTE THE CONJUGATE-GRADIENT ===
406        dotgg = DOT_PRODUCT(prc_dir(my_t1:my_t2),grad_dir(my_t1:my_t2))
407        call xmpi_sum(dotgg,comm,ierr)
408 
409        if (line==1) then ! At first iteration, cg_gamma is set to zero
410          cg_gamma=zero
411          old_dotgg=dotgg
412          cg_dir = prc_dir
413          old_cg_dir = cg_dir
414        else
415          cg_gamma=dotgg/old_dotgg
416          old_dotgg=dotgg
417          !write(std_out,*)"cg_gamma= ",cg_gamma
418          !cg_dir = prc_dir + cg_gamma*cg_dir
419          cg_dir = prc_dir + cg_gamma*old_cg_dir !TODO check this, anyhow it is much faster.
420          old_cg_dir =cg_dir  ! old_cg_dir is used to store the previsou CG direction, cg_dir will be orthonormalized to the band
421        end if
422        !
423        ! === PROJECTION OF THE CONJUGATED GRADIENT ===
424        zz = DOT_PRODUCT(my_phi, cg_dir(my_t1:my_t2))
425        call xmpi_sum(zz,comm,ierr)
426        cg_dir(my_t1:my_t2) = cg_dir(my_t1:my_t2) -zz*my_phi(:)
427 
428        norm = DOT_PRODUCT(cg_dir(my_t1:my_t2), cg_dir(my_t1:my_t2) )
429        call xmpi_sum(norm,comm,ierr)
430        norm = SQRT(norm)
431        cg_dir = cg_dir/norm ! Have to normalize it.
432 
433        ! Line minimization of the Raileigh functional.
434        ABI_MALLOC(vec_tmp,(hexc_size))
435        vec_tmp=czero
436        vec_tmp = MATMUL(hexc, cg_dir(my_t1:my_t2))
437        call xmpi_sum(vec_tmp,comm,ierr)
438 
439        !if (my_rank==master) then
440        !  write(777,*)"cg_step, state, line",cg_step, state, line
441        !  write(777,*)vec_tmp
442        !end if
443 
444        dhd = DOT_PRODUCT( cg_dir(my_t1:my_t2), vec_tmp(my_t1:my_t2))  ! is this always real?
445        dhc = REAL( DOT_PRODUCT( my_phi, vec_tmp(my_t1:my_t2) ))
446        ABI_FREE(vec_tmp)
447 
448        rbuf2 = (/dhd,dhc/)
449        call xmpi_sum(rbuf2,comm,ierr)
450        dhd = rbuf2(1)
451        dhc = rbuf2(2)
452 
453        !write(201*(my_rank+1),*)"cg_step, state, line dotgg dhd dhc",cg_step,state,line,dotgg,dhd,dhc
454 
455 ! Taken from cgwf
456        ! Compute tan(2 theta),sin(theta) and cos(theta)
457        tan2th=2.0_dp*dhc/(etrial-dhd)
458 
459        if (abs(tan2th)<1.d-05) then
460          costh=1.0_dp-0.125_dp*tan2th**2
461          sinth=0.5_dp*tan2th*(1.0_dp-0.375_dp*tan2th**2)
462          ! Check that result is above machine precision
463          ! FIXME  This part is not safe on clusters made of different machines or different compiation options.
464          if (abs(sinth)<epsilon(0._dp)) then
465            write(msg, '(a,es16.4)' ) ' exc_iterative_diago: converged with tan2th= ',tan2th
466            call wrtout(std_out,msg,'PERS')
467            EXIT !Exit from the loop on line
468          end if
469 
470        else
471          root =sqrt(1.0_dp+tan2th**2)
472          costh=sqrt(0.5_dp+0.5_dp/root)
473          sinth=sign(sqrt(0.5_dp-0.5_dp/root),tan2th)
474        end if
475        !
476        ! Check for lower of two possible roots (same sign as curvature at theta where slope is zero)
477        diff=(etrial-dhd)
478        if (diff>zero) then !   Swap c and d if value of diff is positive
479          swap=costh
480          costh=-sinth
481          sinth=swap
482          if (prtvol<0 .or. prtvol>=10) then
483            write(msg,'(a,i4,es16.6)')' exc_iterative_diago: swap roots, line,diff= ',line,diff
484            call wrtout(std_out,msg,'PERS')
485          end if
486        end if
487        !
488        ! === GENERATE NEW |wf>, H|wf>  =============
489        my_phi = costh*my_phi + sinth*cg_dir(my_t1:my_t2)
490        !write(100*(my_rank+1),*)"cg_step state, line costh sinth etrial",cg_step,state,line,costh,sinth,etrial
491 !end  taken from cgwf
492 
493        !norm = SQRT( DOT_PRODUCT(my_phi,my_phi) )
494        !my_phi = my_phi /norm
495        !write(std_out,*)norm
496        !write(std_out,*)DOT_PRODUCT(hphi,my_phi),cos(theta_min)
497 
498        ! ======================================================================
499        ! =========== CHECK CONVERGENCE AGAINST TRIAL ENERGY ===================
500        ! ======================================================================
501        ! Compute delta(E)
502        !deltae=chc*(costh**2-1._dp)+dhd*sinth**2+2._dp*costh*sinth*dhc
503        deltae=etrial*(costh**2-1._dp)+dhd*sinth**2+2._dp*costh*sinth*dhc
504 
505        ! Check convergence and eventually exit
506        ! if (line==1) then
507        !   deold=deltae
508        ! else if (abs(deltae)<0.005_dp*abs(deold) .and. line/=nline_for(state))then
509        !  if (prtvol>=10)then
510        !   write(msg, '(a,i4,1x,a,1p,e12.4,a,e12.4,a)' ) &
511        !&   ' cgwf: line',line,&
512        !&   ' deltae=',deltae,' < 0.005*',deold,' =>skip lines'
513        !   call wrtout(std_out,msg,'PERS')
514        !  end if
515        !  exc_energy(state) = exc_energy(state) + deltae
516        !  EXIT
517        ! end if
518      end do ! LOOP FOR A GIVEN BAND. Note that there are three "exit" instructions inside
519      ! Modify nline_for(state) according to converge degree.
520      !if (convergence_of(state) == STRICT) nline_for(state) = MAX(max_nline-2,2)
521      !if (convergence_of(state) == MEDIUM) nline_for(state) = MAX(max_nline-1,2)
522      !if (convergence_of(state) == WORST ) nline_for(state) = max_nline
523    end do !state
524 
525    if (prtvol>2) then
526      do ii=0,(nstates-1)/8
527        write(msg,'(a,8es10.2)')' res:',(resid(state),state=1+ii*8,MIN(nstates,8+ii*8))
528        call wrtout(std_out,msg,'COLL')
529      end do
530      do ii=0,(nstates-1)/8
531        write(msg,'(a,8es10.2)')' ene:',(exc_energy(state),state=1+ii*8,MIN(nstates,8+ii*8))
532        call wrtout(std_out,msg,'COLL')
533      end do
534    end if
535 
536    write(msg,'(a,i0)')"After cg_step: ",cg_step
537    call exc_check_phi_block(msg)
538 
539    ! Find largest residual over bands and Print residuals
540    max_resid=MAXVAL( resid(:MAX(1,nstates-nbdbuf_)) )
541 
542    if (max_resid < tolwfr_) then
543      write(msg,'(a,i0,2(a,es10.2),a,i0,a)')&
544 &      " After ",cg_step," iterations, max_resid= ",max_resid," < tolwfr= ",tolwfr_," ( Excluding nbdbuf= ",nbdbuf_,")"
545      call wrtout(std_out,msg,'COLL')
546      EXIT ! cg_step
547    end if
548 
549    if (cg_step==1.or.MOD(cg_step,1)==0) then
550      call wrtout(std_out," Subspace rotation + exc_cholesky_ortho ","COLL")
551 
552      call exc_subspace_rotation()
553      call exc_cholesky_ortho()
554 
555      !mcg=hexc_size; mgsc=hexc_size; useoverlap=0
556      !allocate(cg(2,mcg),gsc(2,mgsc*useoverlap))
557      !do ii=1,nstates
558      ! cg(1,:) = REAL (phi_block(:,ii))
559      ! cg(2,:) = AIMAG(phi_block(:,ii))
560      ! call fxphas(cg,gsc,0,0,1,mcg,mgsc,MPI_enreg_seq,1,hexc_size,useoverlap)
561      ! phi_block(:,ii)=CMPLX(cg(1,:),cg(2,:))
562      !end do
563      !deallocate(cg,gsc)
564 
565    end if
566 
567 !XG20141126 : At present, keep this fake test. Seems that there is some bug on shiva
568 !(compiler ?) so that the result is erroneous without it. Even suppressing the
569 !"stop" instruction triggers the bug ...
570      if(cg_step==cg_nsteps+1)then
571        write(std_out,*)' One should not be here : cg_step==cg_nsteps+1 '
572        stop
573      endif
574 
575  end do !cg_step
576 
577  ! Release some memory before entering RMM-DIIS
578  ABI_FREE(hphi)
579  ABI_FREE(cg_dir)
580  ABI_FREE(old_cg_dir)
581  ABI_FREE(grad_dir)
582  ABI_FREE(prc_dir)
583 
584  do ii=0,(nstates-1)/8
585    write(msg,'(a,8es10.2)')' res:',(resid(state),state=1+ii*8,min(nstates,8+ii*8))
586    call wrtout(std_out,msg,'COLL')
587  end do
588  do ii=0,(nstates-1)/8
589    write(msg,'(a,8es10.2)')' ene:',(exc_energy(state),state=1+ii*8,min(nstates,8+ii*8))
590    call wrtout(std_out,msg,'COLL')
591  end do
592 
593  stats = stats_eval(resid)
594 
595  write(msg,"(4(a,es10.2))")&
596 &  ". Residuals: min value: ",stats%min,", Max value: ",stats%max,", mean: ",stats%mean,", stdev: ",stats%stdev
597  call wrtout(std_out,msg)
598  call wrtout(ab_out,msg)
599 
600  if (max_resid > tolwfr_) then
601    write(msg,'(2a,i5,2a,2(a,es10.2),a,i3,a)')ch10,&
602 &    " WARNING: conjugate-gradient not converged after ",cg_step," iterations.",ch10,&
603 &    " max_resid= ",max_resid," > tolwfr= ",tolwfr_," ( Excluding nbdbuf= ",nbdbuf_,")"
604    call wrtout(ab_out,msg,'COLL')
605    call wrtout(std_out,msg,'COLL')
606  end if
607 
608  exc_gap    = MINVAL(exc_energy)
609  exc_maxene = MAXVAL(exc_energy)
610 
611  write(msg,'(a,2(a,f7.2,2a))')ch10,&
612 &  " First excitonic eigenvalue= ",exc_gap*Ha_eV,   " [eV]. ",ch10,&
613 &  " Last  excitonic eigenvalue= ",exc_maxene*Ha_eV," [eV]. ",ch10
614  call wrtout(std_out,msg,"COLL")
615  call wrtout(ab_out,msg,"COLL")
616 
617  call exc_check_phi_block("END OF CONJUGATE-GRADIENT")
618 
619  ABI_FREE(hexc)
620  ABI_FREE(hexc_diagonal)
621  !
622  ! =====================================
623  ! ==== Write final results on file ====
624  ! =====================================
625  oeig_fname = BS_files%out_eig
626  if (oeig_fname== BSE_NOFILE) then
627    MSG_WARNING("oeig_fname was set to "//TRIM(BSE_NOFILE))
628    oeig_fname = TRIM(BS_files%out_basename)//"_BSEIG"
629    MSG_WARNING("using oeig_fname : "//TRIM(oeig_fname))
630  end if
631 
632  call exc_write_phi_block(oeig_fname,use_mpio)
633 
634  ABI_FREE(phi_block)
635 
636  call xmpi_barrier(comm)
637 
638  DBG_EXIT("COLL")
639 
640 CONTAINS  !===========================================================

m_exc_itdiago/exc_subspace_rotation [ Functions ]

[ Top ] [ m_exc_itdiago ] [ Functions ]

NAME

 exc_subspace_rotation

FUNCTION

  This routine performs the subspace rotation.

SIDE EFFECTS

   phi_block(my_t1:my_t2,nstates)=Contains the trial eigenstates.

PARENTS

      m_exc_itdiago

CHILDREN

      xmpi_barrier,xmpi_exch,xmpi_sum

SOURCE

1008 subroutine exc_subspace_rotation()
1009 
1010 
1011 !This section has been created automatically by the script Abilint (TD).
1012 !Do not modify the following lines by hand.
1013 #undef ABI_FUNC
1014 #define ABI_FUNC 'exc_subspace_rotation'
1015 !End of the abilint section
1016 
1017  implicit none
1018 
1019 !Local variables ------------------------------
1020  integer :: ii,jj,ipack,ierr
1021 !arrays
1022  real(dp),allocatable :: sub_ene(:)
1023 ! real(dp),allocatable :: evec(:,:)
1024  complex(dpc),allocatable :: sub_ham(:,:),sub_pham(:),hphi_tot(:)
1025 ! complex(dpc),allocatable :: phi_tmp(:,:)
1026 
1027 !************************************************************************
1028 
1029  ! * Sub-space rotation. Calculate <phi_i|H|phi_j> in packed form.
1030  ! TODO: this part can be rewritten using BLAS3 routines.
1031 
1032  ABI_MALLOC(hphi_tot,(hexc_size))
1033  ABI_MALLOC(sub_pham,(nstates*(nstates+1)/2))
1034  sub_pham=czero; ipack=0
1035 
1036  do jj=1,nstates
1037    hphi_tot = czero
1038    hphi_tot(:) = MATMUL(hexc, phi_block(:,jj))
1039    call xmpi_sum(hphi_tot,comm,ierr)
1040 
1041    do ii=1,jj
1042      ipack=ipack+1
1043      sub_pham(ipack) = DOT_PRODUCT(phi_block(my_t1:my_t2,ii), hphi_tot(my_t1:my_t2) )
1044      if (ii==jj) sub_pham(ipack) = REAL(sub_pham(ipack),kind=dp)
1045    end do
1046  end do
1047  call xmpi_sum(sub_pham,comm,ierr)
1048 
1049  ABI_MALLOC(sub_ham,(nstates,nstates))
1050  sub_ham=czero
1051  ABI_MALLOC(sub_ene,(nstates))
1052 
1053  call xhpev("Vectors","Upper",nstates,sub_pham,sub_ene,sub_ham,nstates) !,comm)
1054 
1055  ABI_FREE(hphi_tot)
1056  ABI_FREE(sub_pham)
1057  ABI_FREE(sub_ene)
1058 
1059  !do ii=1,nstates
1060  ! norm = DOT_PRODUCT(sub_ham(:,ii),sub_ham(:,ii))
1061  ! write(std_out,*)"norm subspac",norm
1062  ! sub_ham(:,ii) = sub_ham(:,ii)/norm
1063  !end do
1064 
1065  !allocate(evec(2*nstates,nstates))
1066 
1067  !do ii=1,nstates
1068  ! do jj=1,nstates
1069  ! evec(jj,  ii) = REAL (sub_ham(jj,ii))
1070  ! evec(jj+1,ii) = AIMAG(sub_ham(jj,ii))
1071  ! end do
1072  !end do
1073 
1074  !call normev(evec,nstates,nstates)
1075 
1076  !do ii=1,nstates
1077  ! do jj=1,nstates
1078  !  sub_ham(jj,ii) = CMPLX( evec(jj,ii),evec(jj+1,ii) )
1079  ! end do
1080  !end do
1081  !deallocate(evec)
1082 
1083 #if 0
1084  ABI_MALLOC(phi_tmp,(my_nt,nstates))
1085  phi_tmp = phi_block
1086 
1087  call ZGEMM('N','N',my_nt,nstates,nstates,cone,phi_tmp,my_nt,sub_ham,nstates,czero,phi_block,my_nt)
1088 
1089  ABI_FREE(phi_tmp)
1090 #else
1091  phi_block = MATMUL(phi_block,sub_ham)
1092 #endif
1093 
1094  ABI_FREE(sub_ham)
1095 
1096 end subroutine exc_subspace_rotation

m_exc_itdiago/exc_write_phi_block [ Functions ]

[ Top ] [ m_exc_itdiago ] [ Functions ]

NAME

 exc_write_phi_block

FUNCTION

  Write phi_block on the Fortran file oeig_fname.

PARENTS

      m_exc_itdiago

CHILDREN

      xmpi_barrier,xmpi_exch,xmpi_sum

SOURCE

838 subroutine exc_write_phi_block(oeig_fname,use_mpio)
839 
840 
841 !This section has been created automatically by the script Abilint (TD).
842 !Do not modify the following lines by hand.
843 #undef ABI_FUNC
844 #define ABI_FUNC 'exc_write_phi_block'
845 !End of the abilint section
846 
847  implicit none
848 
849 !Arguments ------------------------------------
850 !scalars
851  character(len=*),intent(in) :: oeig_fname
852  logical,intent(in) :: use_mpio
853 
854 !Local variables ------------------------------
855  integer :: eig_unt,state,mpi_err !,fform
856  real(dp) :: cputime,walltime,gflops
857  character(len=500) :: msg,errmsg
858 ! type(Hdr_type) :: hexc_Hdr
859  logical :: do_ep_lifetime
860 !!arrays
861  complex(dpc),allocatable :: buffer_dpc(:)
862 #ifdef HAVE_MPI_IO
863  integer:: amode,mpi_fh,old_type,etype,eig_type,my_nel,ierr,my_nrows
864  integer(XMPI_OFFSET_KIND) :: ehdr_offset,my_offset,my_offpad,fmarker
865  integer(XMPI_OFFSET_KIND),allocatable :: bsize_frecord(:)
866  integer :: array_of_sizes(2),array_of_subsizes(2),array_of_starts(2)
867 #endif
868 
869 !************************************************************************
870 
871  do_ep_lifetime = .FALSE.
872 
873  call cwtime(cputime, walltime, gflops, "start")
874 
875  if (.not.use_mpio) then
876 
877    ! * Master writes the header.
878    if (my_rank==master) then
879      call wrtout(std_out," Writing eigenstates on file "//TRIM(oeig_fname)//" via Fortran-IO","COLL")
880      if (open_file(oeig_fname,msg, newunit=eig_unt, form='unformatted') /= 0) then
881        MSG_ERROR(msg)
882      end if
883      write(eig_unt, err=10, iomsg=errmsg) do_ep_lifetime
884      write(eig_unt, err=10, iomsg=errmsg) hexc_size, nstates
885      write(eig_unt, err=10, iomsg=errmsg) CMPLX(exc_energy(1:nstates),kind=dpc)
886    end if
887 
888    ! Wavefunctions are gathered on the master node band-by-band.
889    ! TODO bands should be treated in blocks to minimize the number of MPI calls.
890    ABI_STAT_MALLOC(buffer_dpc,(hexc_size), ierr)
891    ABI_CHECK(ierr==0, "out of memory buffer_dpc")
892 
893    do state=1,nstates
894      buffer_dpc=czero
895      buffer_dpc(my_t1:my_t2) = phi_block(:,state)
896      call xmpi_sum_master(buffer_dpc,master,comm,mpi_err)
897      if (my_rank==master) write(eig_unt, err=10, iomsg=errmsg) buffer_dpc(1:hexc_size)
898    end do
899    ABI_FREE(buffer_dpc)
900 
901    if (my_rank==master) close(eig_unt, err=10, iomsg=errmsg)
902 
903  else
904 #ifdef HAVE_MPI_IO
905    call wrtout(std_out," Writing eigenstates on file "//TRIM(oeig_fname)//" with MPI-IO","COLL")
906 
907    ! Write the header.
908    if (my_rank==master) then
909      ! Write header using Fortran primitives.
910      if (open_file(oeig_fname,msg,newunit=eig_unt,form='unformatted') /= 0) then
911        MSG_ERROR(msg)
912      end if
913      write(eig_unt, err=10, iomsg=errmsg) nstates
914      write(eig_unt, err=10, iomsg=errmsg) CMPLX(exc_energy(1:nstates),kind=dpc)
915      ! TODO: change setup_bse so that Hdr_bse reflects the parameters of the run.
916      close(eig_unt, err=10, iomsg=errmsg)
917    end if
918 
919    call xmpi_barrier(comm)
920 
921    ! Open the file with MPI-IO
922    amode=MPI_MODE_RDWR
923 
924    call MPI_FILE_OPEN(comm, oeig_fname, amode, MPI_INFO_NULL, mpi_fh, mpi_err)
925    msg = " MPI_IO error opening file: "//TRIM(oeig_fname)
926    ABI_CHECK_MPI(mpi_err,msg)
927 
928    ! Move the file pointer to skip the first two records.
929    ehdr_offset=0
930    call xmpio_read_frm(mpi_fh,ehdr_offset,xmpio_collective,fmarker,mpi_err)
931    !write(std_out,*)"fmarker first record ",fmarker
932    call xmpio_read_frm(mpi_fh,ehdr_offset,xmpio_collective,fmarker,mpi_err)
933    !write(std_out,*)"fmarker first record ",fmarker
934    !$call hdr_mpio_skip(mpi_fh,fform,ehdr_offset)
935    !$ehdr_offset = 4*xmpio_bsize_frm + xmpio_bsize_int + nstates*xmpio_bsize_dpc
936 
937    etype=MPI_BYTE; old_type=MPI_DOUBLE_COMPLEX
938 
939    my_nrows=my_t2-my_t1+1; old_type=MPI_DOUBLE_COMPLEX
940    array_of_sizes    = (/hexc_size,nstates/)
941    array_of_subsizes = (/my_nrows,nstates/)
942    array_of_starts   = (/my_t1,1/)
943    call xmpio_create_fsubarray_2D(array_of_sizes,array_of_subsizes,array_of_starts,old_type,eig_type,my_offpad,mpi_err)
944    ABI_CHECK_MPI(mpi_err,"fsubarray_2D")
945    !
946    ! Each node uses a different offset to skip the header and the blocks written by the other CPUs.
947    my_offset = ehdr_offset + my_offpad
948 
949    call MPI_FILE_SET_VIEW(mpi_fh, my_offset, etype, eig_type, 'native', MPI_INFO_NULL, mpi_err)
950    ABI_CHECK_MPI(mpi_err,"SET_VIEW")
951 
952    call MPI_TYPE_FREE(eig_type,mpi_err)
953    ABI_CHECK_MPI(mpi_err,"MPI_TYPE_FREE")
954 
955    my_nel = my_nrows*nstates
956    call MPI_FILE_WRITE_ALL(mpi_fh, phi_block, my_nel, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, mpi_err)
957    ABI_CHECK_MPI(mpi_err,"FILE_WRITE")
958 
959    ! It seems that personal calls make the code stuck
960    ABI_MALLOC(bsize_frecord,(nstates))
961    bsize_frecord = hexc_size * xmpi_bsize_dpc
962    ! ehdr_offset points to the end of the header.
963    call xmpio_write_frmarkers(mpi_fh,ehdr_offset,xmpio_collective,nstates,bsize_frecord,ierr)
964    ABI_CHECK(ierr==0,"Error while writing Fortran markers")
965    ABI_FREE(bsize_frecord)
966    !
967    ! Close the file.
968    call MPI_FILE_CLOSE(mpi_fh, mpi_err)
969    ABI_CHECK_MPI(mpi_err,"FILE_CLOSE")
970 #else
971    MSG_ERROR("MPI-IO support not enabled")
972 #endif
973  end if
974 
975  call cwtime(cputime, walltime, gflops, "stop")
976  write(msg,'(2(a,f9.1),a)')" IO operation completed. cpu_time: ",cputime,"[s], walltime: ",walltime," [s]"
977  call wrtout(std_out, msg, "COLL", do_flush=.True.)
978 
979  return
980 
981  ! Handle IO-error
982 10 continue
983  MSG_ERROR(errmsg)
984 
985 end subroutine exc_write_phi_block