TABLE OF CONTENTS


ABINIT/cheb_oracle [ Functions ]

[ Top ] [ Functions ]

NAME

 cheb_oracle

FUNCTION

 Returns the number of necessary iterations to decrease residual by at least tol
 Here as in the rest of the code, the convention is that residuals are squared (||Ax-lx||^2)

INPUTS

 x= input variable
 a= left bound of the interval
 b= right bound of the interval
 tol= needed precision
 nmax= max number of iterations

OUTPUT

 n= number of iterations needed to decrease residual by tol

PARENTS

      chebfi

CHILDREN

NOTES

SOURCE

685 function cheb_oracle(x, a, b, tol, nmax) result(n)
686 
687 
688 !This section has been created automatically by the script Abilint (TD).
689 !Do not modify the following lines by hand.
690 #undef ABI_FUNC
691 #define ABI_FUNC 'cheb_oracle'
692 !End of the abilint section
693 
694  implicit none
695 
696  real(dp) :: tol
697 
698  integer :: nmax
699  integer :: n, i
700  real(dp), intent(in) :: x, a, b
701  real(dp) :: y, xred, temp
702  real(dp) :: yim1
703 
704 ! *************************************************************************
705 
706  xred = (x-(a+b)/2)/(b-a)*2
707  y = xred
708  yim1 = one
709 
710  n = nmax
711  if(1/(y**2) < tol) then
712    n = 1
713  else
714    do i=2, nmax-1
715      temp = y
716      y = 2*xred*y - yim1
717      yim1 = temp
718      if(1/(y**2) < tol) then
719        n = i
720        exit
721      end if
722    end do
723  end if
724 
725 end function cheb_oracle

ABINIT/cheb_poly [ Functions ]

[ Top ] [ Functions ]

NAME

 cheb_poly

FUNCTION

 Computes the value of the Chebyshev polynomial of degree n on the interval [a,b] at x

INPUTS

 x= input variable
 n= degree
 a= left bound of the interval
 b= right bound of the interval

OUTPUT

 y= Tn(x)

PARENTS

      chebfi

CHILDREN

NOTES

SOURCE

627 function cheb_poly(x, n, a, b) result(y)
628 
629 
630 !This section has been created automatically by the script Abilint (TD).
631 !Do not modify the following lines by hand.
632 #undef ABI_FUNC
633 #define ABI_FUNC 'cheb_poly'
634 !End of the abilint section
635 
636  implicit none
637 
638  integer, intent(in) :: n
639  integer :: i
640  real(dp), intent(in) :: x, a, b
641  real(dp) :: y, xred, temp
642  real(dp) :: yim1
643 
644 ! *************************************************************************
645 
646  xred = (x-(a+b)/2)/(b-a)*2
647  y = xred
648  yim1 = one
649  do i=2, n
650    temp = y
651    y = 2*xred*y - yim1
652    yim1 = temp
653  end do
654 
655 end function cheb_poly

ABINIT/chebfi [ Functions ]

[ Top ] [ Functions ]

NAME

 chebfi

FUNCTION

 this routine updates the wave functions at a given k-point,
 using the ChebFi method (see paper by A. Levitt and M. Torrent)

INPUTS

  dtset <type(dataset_type)>=all input variales for this dataset
  gs_hamk <type(gs_hamiltonian_type)>=all data for the hamiltonian at k
  kinpw(npw)=(modified) kinetic energy for each plane wave (hartree)
  mpi_enreg=information about MPI parallelization
  nband=number of bands at this k point for that spin polarization
  npw=number of plane waves at this k point
  nspinor=number of plane waves at this k point
  prtvol=control print volume and debugging output

OUTPUT

  eig(nband)=array for holding eigenvalues (hartree)
  resid(nband)=residuals for each states
  If gs_hamk%usepaw==1:
    gsc(2,*)=<g|s|c> matrix elements (s=overlap)
  If gs_hamk%usepaw==0
    enl(nband)=contribution from each band to nonlocal pseudopotential part of total energy, at this k-point

SIDE EFFECTS

  cg(2,*)=updated wavefunctions

PARENTS

      vtowfk

CHILDREN

      apply_invovl,dotprod_g,getghc,pawcprj_alloc,pawcprj_axpby,pawcprj_copy
      pawcprj_free,prep_getghc,prep_index_wavef_bandpp
      rayleigh_ritz_distributed,rayleigh_ritz_subdiago,timab,wrtout
      xmpi_alltoallv,xmpi_barrier,xmpi_max,xmpi_min,xmpi_sum

NOTES

  -- TODO --
  Normev?
  Ecutsm
  nspinor 2
  spinors parallelisation
  fock
  -- Performance --
  Improve load balancing
  Don't diagonalize converged eigenvectors, just orthogonalize
  Maybe don't diagonalize so often (once every two outer iterations?)
  Benchmark diagonalizations, choose np_slk
  How to chose npfft?
  Implement MINRES for invovl
  -- LOBPCG --
  Improve stability (see paper by Lehoucq Sorensen, maybe use bunch-kaufman factorizations?)

SOURCE

115 subroutine chebfi(cg,dtset,eig,enl,gs_hamk,gsc,kinpw,mpi_enreg,nband,npw,nspinor,prtvol,resid)
116 
117 
118 !This section has been created automatically by the script Abilint (TD).
119 !Do not modify the following lines by hand.
120 #undef ABI_FUNC
121 #define ABI_FUNC 'chebfi'
122 !End of the abilint section
123 
124  implicit none
125 
126 !Arguments ------------------------------------
127  type(gs_hamiltonian_type),intent(inout) :: gs_hamk
128  type(dataset_type),intent(in) :: dtset
129  type(mpi_type),intent(inout) :: mpi_enreg
130  integer,intent(in) :: nband,npw,prtvol,nspinor
131  real(dp),intent(inout), target :: cg(2,npw*nspinor*nband),gsc(2,npw*nspinor*nband)
132  real(dp),intent(in) :: kinpw(npw)
133  real(dp),intent(out) :: resid(nband)
134  real(dp),intent(out) :: enl(nband*(1-gs_hamk%usepaw))
135  real(dp),intent(out) :: eig(nband)
136 
137 !Local variables-------------------------------
138  real(dp) :: pcon(npw)
139  real(dp) :: filter_low
140  real(dp) :: filter_center, filter_radius
141  real(dp), dimension(2, npw*nspinor*nband), target :: ghc, gvnlc
142  real(dp), allocatable, dimension(:,:) :: cg_filter_next, cg_filter_prev, gsm1hc_filter, gsc_filter_prev, gsc_filter_next
143  real(dp), allocatable, dimension(:,:), target :: cg_alltoall1,gsc_alltoall1,ghc_alltoall1,gvnlc_alltoall1
144  real(dp), allocatable, dimension(:,:), target :: cg_alltoall2,gsc_alltoall2,ghc_alltoall2,gvnlc_alltoall2
145  real(dp), pointer, dimension(:,:) :: cg_filter, gsc_filter, ghc_filter, gvnlc_filter
146  real(dp) :: resid_vec(2, npw*nspinor)
147  logical :: paw
148  integer :: shift, shift_cg_loadbalanced
149  integer :: iband, iline, ispinor
150  integer :: sij_opt, cpopt
151  real(dp) :: eval, tsec(2)
152  integer :: tim_getghc = 5, ierr
153  integer :: i
154  integer, allocatable :: index_wavef_band(:)
155  real(dp) :: maxeig, mineig
156  real(dp), allocatable :: resids_filter(:), residvec_filter(:,:)
157  integer, allocatable :: nline_bands(:)
158  integer :: iactive, nactive
159  real(dp) :: ampfactor
160  integer :: nline_max, nline_decrease, nline_tolwfr
161  ! real(dp) :: load_imbalance
162  integer :: mcg
163  real(dp) :: dprod_r, dprod_i
164  character(len=500) :: message
165  integer :: rdisplsloc(mpi_enreg%nproc_band),recvcountsloc(mpi_enreg%nproc_band)
166  integer :: sdisplsloc(mpi_enreg%nproc_band), sendcountsloc(mpi_enreg%nproc_band)
167  integer :: ikpt_this_proc, npw_filter, nband_filter
168  type(pawcprj_type), allocatable :: cwaveprj(:,:), cwaveprj_next(:,:), cwaveprj_prev(:,:)
169  ! integer :: nline_total
170 
171  ! timers
172  integer, parameter :: timer_chebfi = 1600, timer_alltoall = 1601, timer_apply_inv_ovl = 1602, timer_rotation = 1603
173  integer, parameter :: timer_subdiago = 1604, timer_subham = 1605, timer_ortho = 1606, timer_getghc = 1607
174  integer, parameter :: timer_residuals = 1608, timer_update_eigen = 1609, timer_sync = 1610
175 
176 ! *************************************************************************
177 
178  !======================================================================================================
179  ! Initialize, transpose input cg if paral_kgb
180  !======================================================================================================
181  call timab(timer_chebfi,1,tsec)
182 
183  !Initializations
184  paw = gs_hamk%usepaw == 1
185  mcg = npw*nspinor*nband
186 
187  ! Init pcon
188  pcon = (27+kinpw*(18+kinpw*(12+8*kinpw))) / (27+kinpw*(18+kinpw*(12+8*kinpw)) + 16*kinpw**4)
189 
190  ghc=zero; gvnlc=zero
191 
192  ! Initialize the _filter pointers. Depending on paral_kgb, they might point to the actual arrays or to _alltoall variables
193  if (dtset%paral_kgb == 1) then
194    ikpt_this_proc = bandfft_kpt_get_ikpt()
195    npw_filter = bandfft_kpt(ikpt_this_proc)%ndatarecv
196    nband_filter = mpi_enreg%bandpp
197 
198    ABI_ALLOCATE(cg_alltoall1, (2, npw_filter*nspinor*nband_filter))
199    ABI_ALLOCATE(gsc_alltoall1, (2, npw_filter*nspinor*nband_filter))
200    ABI_ALLOCATE(ghc_alltoall1, (2, npw_filter*nspinor*nband_filter))
201    ABI_ALLOCATE(gvnlc_alltoall1, (2, npw_filter*nspinor*nband_filter))
202    ABI_ALLOCATE(cg_alltoall2, (2, npw_filter*nspinor*nband_filter))
203    ABI_ALLOCATE(gsc_alltoall2, (2, npw_filter*nspinor*nband_filter))
204    ABI_ALLOCATE(ghc_alltoall2, (2, npw_filter*nspinor*nband_filter))
205    ABI_ALLOCATE(gvnlc_alltoall2, (2, npw_filter*nspinor*nband_filter))
206 
207    ! Init tranpose variables
208    recvcountsloc=bandfft_kpt(ikpt_this_proc)%recvcounts*2*nspinor*mpi_enreg%bandpp
209    rdisplsloc=bandfft_kpt(ikpt_this_proc)%rdispls*2*nspinor*mpi_enreg%bandpp
210    sendcountsloc=bandfft_kpt(ikpt_this_proc)%sendcounts*2*nspinor
211    sdisplsloc=bandfft_kpt(ikpt_this_proc)%sdispls*2*nspinor
212 
213    ! Load balancing, so that each processor has approximately the same number of converged and non-converged bands
214    ! for two procs, rearrange 1 2 3 4 5 6 as 1 4 2 5 3 6
215    !
216    ! trick to save memory: ghc has the necessary size, and will be overwritten afterwards anyway
217 #define cg_loadbalanced ghc
218    shift = 0
219    do i=1, mpi_enreg%nproc_band
220      do iband=1, mpi_enreg%bandpp
221        shift_cg_loadbalanced = (i-1 + (iband-1)*mpi_enreg%nproc_band)*npw*nspinor
222        cg_loadbalanced(:, shift+1:shift+npw*nspinor) = cg(:, shift_cg_loadbalanced+1:shift_cg_loadbalanced+npw*nspinor)
223        shift = shift + npw*nspinor
224      end do
225    end do
226 
227    ! Transpose input cg into cg_alloall1. cg_alltoall1 is now (npw_filter, nband_filter)
228    call timab(timer_alltoall, 1, tsec)
229    call xmpi_alltoallv(cg_loadbalanced,sendcountsloc,sdisplsloc,cg_alltoall1,&
230 &   recvcountsloc,rdisplsloc,mpi_enreg%comm_band,ierr)
231    call timab(timer_alltoall, 2, tsec)
232 #undef cg_loadbalanced
233 
234    ! sort according to bandpp (from lobpcg, I don't fully understand what's going on but it works and it's fast)
235    call prep_index_wavef_bandpp(mpi_enreg%nproc_band,mpi_enreg%bandpp,&
236 &   nspinor,bandfft_kpt(ikpt_this_proc)%ndatarecv,&
237 &   bandfft_kpt(ikpt_this_proc)%recvcounts,bandfft_kpt(ikpt_this_proc)%rdispls,&
238 &   index_wavef_band)
239    cg_alltoall2(:,:) = cg_alltoall1(:,index_wavef_band)
240 
241    cg_filter => cg_alltoall2
242    gsc_filter => gsc_alltoall2
243    ghc_filter => ghc_alltoall2
244    gvnlc_filter => gvnlc_alltoall2
245  else
246    npw_filter = npw
247    nband_filter = nband
248 
249    cg_filter => cg
250    gsc_filter => gsc
251    ghc_filter => ghc
252    gvnlc_filter => gvnlc
253  end if
254  ! from here to the next alltoall, all computation is done on _filter variables, agnostic
255  ! to whether it's nband x npw (paral_kgb == 0) or ndatarecv*bandpp (paral_kgb = 1)
256 
257  ! Allocate filter variables for the application of the Chebyshev polynomial
258  ABI_ALLOCATE(cg_filter_next, (2, npw_filter*nspinor*nband_filter))
259  ABI_ALLOCATE(cg_filter_prev, (2, npw_filter*nspinor*nband_filter))
260  ABI_ALLOCATE(gsc_filter_prev, (2, npw_filter*nspinor*nband_filter))
261  ABI_ALLOCATE(gsc_filter_next, (2, npw_filter*nspinor*nband_filter))
262  ABI_ALLOCATE(gsm1hc_filter, (2, npw_filter*nspinor*nband_filter))
263 
264  ! PAW init
265  if(paw) then
266    ABI_DATATYPE_ALLOCATE(cwaveprj, (gs_hamk%natom,nspinor*nband_filter))
267    ABI_DATATYPE_ALLOCATE(cwaveprj_next, (gs_hamk%natom,nspinor*nband_filter))
268    ABI_DATATYPE_ALLOCATE(cwaveprj_prev, (gs_hamk%natom,nspinor*nband_filter))
269    call pawcprj_alloc(cwaveprj,0,gs_hamk%dimcprj)
270    call pawcprj_alloc(cwaveprj_next,0,gs_hamk%dimcprj)
271    call pawcprj_alloc(cwaveprj_prev,0,gs_hamk%dimcprj)
272 
273    sij_opt = 1 ! recompute S
274    cpopt = 0 ! save cprojs
275  else
276    sij_opt = 0
277    cpopt = -1
278  end if
279 
280 
281 
282  !======================================================================================================
283  ! Data in npfft x npband distribution. First getghc, update eigenvalues and residuals
284  !======================================================================================================
285  write(message, *) 'First getghc'
286  call wrtout(std_out,message,'COLL')
287 
288  ! get_ghc on cg
289  call timab(timer_getghc, 1, tsec)
290  if (dtset%paral_kgb == 0) then
291    call getghc(cpopt,cg_filter,cwaveprj,ghc_filter,gsc_filter,gs_hamk,gvnlc_filter,&
292 &   eval,mpi_enreg,nband,prtvol,sij_opt,tim_getghc,0)
293  else
294    call prep_getghc(cg_filter,gs_hamk,gvnlc_filter,ghc_filter,gsc_filter,eval,nband,mpi_enreg,&
295 &   prtvol,sij_opt,cpopt,cwaveprj,already_transposed=.true.)
296  end if
297  call timab(timer_getghc, 2, tsec)
298 
299  ! Debug barrier: should be invisible
300  call timab(timer_sync, 1, tsec)
301  call xmpi_barrier(mpi_enreg%comm_band)
302  call timab(timer_sync, 2, tsec)
303 
304  write(message, *) 'Computing residuals'
305  call wrtout(std_out,message,'COLL')
306  ! update eigenvalues and residuals
307  call timab(timer_update_eigen, 1, tsec)
308  ABI_ALLOCATE(resids_filter, (nband_filter))
309  ABI_ALLOCATE(residvec_filter, (2, npw_filter*nspinor))
310  ABI_ALLOCATE(nline_bands, (nband_filter))
311  do iband=1, nband_filter
312    shift = npw_filter*nspinor*(iband-1)
313    call dotprod_g(eig(iband),dprod_i,gs_hamk%istwf_k,npw_filter*nspinor,1,ghc_filter(:, shift+1:shift+npw_filter*nspinor),&
314 &   cg_filter(:, shift+1:shift+npw_filter*nspinor),mpi_enreg%me_g0,mpi_enreg%comm_spinorfft)
315    if(paw) then
316      call dotprod_g(dprod_r,dprod_i,gs_hamk%istwf_k,npw_filter*nspinor,1,gsc_filter(:, shift+1:shift+npw_filter*nspinor),&
317 &     cg_filter(:, shift+1:shift+npw_filter*nspinor),mpi_enreg%me_g0,mpi_enreg%comm_spinorfft)
318      eig(iband) = eig(iband)/dprod_r
319    end if
320 
321    if(paw) then
322      residvec_filter = ghc_filter(:, shift+1 : shift+npw_filter*nspinor) &
323 &     - eig(iband)*gsc_filter(:, shift+1 : shift+npw_filter*nspinor)
324    else
325      residvec_filter = ghc_filter(:, shift+1 : shift+npw_filter*nspinor) &
326 &     - eig(iband)*cg_filter(:, shift+1 : shift+npw_filter*nspinor)
327    end if
328    resids_filter(iband) = SUM(residvec_filter**2)
329  end do
330  call xmpi_sum(resids_filter,mpi_enreg%comm_fft,ierr)
331  call xmpi_max(MAXVAL(eig(1:nband_filter)),maxeig,mpi_enreg%comm_band,ierr)
332  call xmpi_min(MINVAL(eig(1:nband_filter)),mineig,mpi_enreg%comm_band,ierr)
333  filter_low = maxeig
334  call timab(timer_update_eigen, 2, tsec)
335 
336  ! Decide how many iterations per band are needed
337  ! don't go above this, or face bad conditioning of the Gram matrix.
338  nline_max = cheb_oracle(mineig, filter_low, dtset%ecut, 1e-16_dp, 40)
339  ! if(mpi_enreg%me == 0) write(0, *) nline_max
340  do iband=1, nband_filter
341    ! nline necessary to converge to tolwfr
342    nline_tolwfr = cheb_oracle(eig(iband), filter_low, dtset%ecut, dtset%tolwfr / resids_filter(iband), dtset%nline)
343    ! nline necessary to decrease residual by a constant factor
344    nline_decrease = cheb_oracle(eig(iband), filter_low, dtset%ecut, 0.1_dp, dtset%nline)
345 
346    nline_bands(iband) = MAX(MIN(nline_tolwfr, nline_decrease, nline_max, dtset%nline), 1)
347    nline_bands(iband) = dtset%nline ! fiddle with this to use locking
348  end do
349 
350 
351  !!!!! Uncomment for diagnostics
352  ! nline_total = SUM(nline_bands)
353  ! call xmpi_sum(nline_total, mpi_enreg%comm_band, ierr)
354  ! load_imbalance = (SUM(nline_bands) - REAL(nline_total)/REAL(mpi_enreg%nproc_band)) / &
355  ! &                (REAL(nline_total)/REAL(mpi_enreg%nproc_band))
356  ! call xmax_mpi(load_imbalance, mpi_enreg%comm_band, ierr)
357 
358  ! write(message, *) 'Mean nline', REAL(nline_total)/REAL(nband), 'max imbalance (%)', load_imbalance*100
359  ! call wrtout(std_out,message,'COLL')
360 
361  ABI_DEALLOCATE(resids_filter)
362  ABI_DEALLOCATE(residvec_filter)
363 
364  !======================================================================================================
365  ! Chebyshev polynomial application
366  !======================================================================================================
367  ! Filter by a chebyshev polynomial of degree nline
368  do iline=1,dtset%nline
369    ! Filter only on [iactive, iactive+nactive-1]
370    iactive = nband_filter
371    do iband = 1, nband_filter
372      ! does iband need an iteration?
373      if (nline_bands(iband) >= iline) then
374        iactive = iband
375        exit
376      end if
377    end do
378    nactive = nband_filter - iactive + 1
379    shift = npw_filter*nspinor*(iactive-1) + 1
380    ! trick the legacy prep_getghc
381    mpi_enreg%bandpp = nactive
382 
383    ! Define the filter position
384    filter_center = (dtset%ecut+filter_low)/2
385    filter_radius = (dtset%ecut-filter_low)/2
386 
387    ! write(message, *) 'Applying invovl, iteration', iline
388    ! call wrtout(std_out,message,'COLL')
389 
390    ! If paw, have to apply S^-1
391    if(paw) then
392      call timab(timer_apply_inv_ovl, 1, tsec)
393      call apply_invovl(gs_hamk, ghc_filter(:,shift:), gsm1hc_filter(:,shift:), cwaveprj_next(:,iactive:), &
394 &     npw_filter, nactive, mpi_enreg, nspinor)
395      call timab(timer_apply_inv_ovl, 2, tsec)
396    else
397      gsm1hc_filter(:,shift:) = ghc_filter(:,shift:)
398    end if
399 
400    ! Chebyshev iteration: UPDATE cg
401    if(iline == 1) then
402      cg_filter_next(:,shift:) = one/filter_radius * (gsm1hc_filter(:,shift:) - filter_center*cg_filter(:,shift:))
403    else
404      cg_filter_next(:,shift:) = two/filter_radius * (gsm1hc_filter(:,shift:) - filter_center*cg_filter(:,shift:)) &
405 &     - cg_filter_prev(:,shift:)
406    end if
407    ! Update gsc and cwaveprj
408    if(paw) then
409      if(iline == 1) then
410        gsc_filter_next(:,shift:) = one/filter_radius * (ghc_filter(:,shift:) - filter_center*gsc_filter(:,shift:))
411        !cwaveprj_next = one/filter_radius * (cwaveprj_next - filter_center*cwaveprj)
412        call pawcprj_axpby(-filter_center/filter_radius, one/filter_radius,cwaveprj(:,iactive:),cwaveprj_next(:,iactive:))
413      else
414        gsc_filter_next(:,shift:) = two/filter_radius * (ghc_filter(:,shift:) - filter_center*gsc_filter(:,shift:))&
415 &       - gsc_filter_prev(:,shift:)
416        !cwaveprj_next = two/filter_radius * (cwaveprj_next - filter_center*cwaveprj) - cwaveprj_prev
417        call pawcprj_axpby(-two*filter_center/filter_radius, two/filter_radius,cwaveprj(:,iactive:),cwaveprj_next(:,iactive:))
418        call pawcprj_axpby(-one, one,cwaveprj_prev(:,iactive:),cwaveprj_next(:,iactive:))
419      end if
420    end if
421 
422    ! Bookkeeping of the _prev variables
423    cg_filter_prev(:,shift:) = cg_filter(:,shift:)
424    cg_filter(:,shift:) = cg_filter_next(:,shift:)
425    if(paw) then
426      gsc_filter_prev(:,shift:) = gsc_filter(:,shift:)
427      gsc_filter(:,shift:) = gsc_filter_next(:,shift:)
428 
429      call pawcprj_copy(cwaveprj(:,iactive:),cwaveprj_prev(:,iactive:))
430      call pawcprj_copy(cwaveprj_next(:,iactive:),cwaveprj(:,iactive:))
431    end if
432 
433    ! Update ghc
434    if(paw) then
435      !! DEBUG use this to remove the optimization and recompute gsc/cprojs
436      ! sij_opt = 1
437      ! cpopt = 0
438 
439      sij_opt = 0 ! gsc is already computed
440      cpopt = 2 ! reuse cprojs
441    else
442      sij_opt = 0
443      cpopt = -1
444    end if
445 
446    write(message, *) 'Getghc, iteration', iline
447    call wrtout(std_out,message,'COLL')
448 
449    call timab(timer_getghc, 1, tsec)
450    if (dtset%paral_kgb == 0) then
451      call getghc(cpopt,cg_filter(:,shift:),cwaveprj(:,iactive:),ghc_filter(:,shift:),&
452 &     gsc_filter(:,shift:),gs_hamk,gvnlc_filter(:,shift:),eval,mpi_enreg,&
453 &     nband,prtvol,sij_opt,tim_getghc,0)
454    else
455      call prep_getghc(cg_filter(:,shift:),gs_hamk,gvnlc_filter(:,shift:),ghc_filter(:,shift:),&
456 &     gsc_filter(:,shift:),eval,nband,mpi_enreg,prtvol,sij_opt,cpopt,&
457 &     cwaveprj(:,iactive:),already_transposed=.true.)
458    end if
459 
460    ! end of the trick
461    mpi_enreg%bandpp = nband_filter
462 
463    call timab(timer_getghc, 2, tsec)
464  end do ! end loop on nline
465 
466  ! normalize according to the previously computed rayleigh quotients (inaccurate, but cheap)
467  do iband = 1, nband_filter
468    ampfactor = cheb_poly(eig(iband), nline_bands(iband), filter_low, dtset%ecut)
469    if(abs(ampfactor) < 1e-3) ampfactor = 1e-3 ! just in case, avoid amplifying too much
470    shift = npw_filter*nspinor*(iband-1)
471    cg_filter(:, shift+1:shift+npw_filter*nspinor) = cg_filter(:, shift+1:shift+npw_filter*nspinor) / ampfactor
472    ghc_filter(:, shift+1:shift+npw_filter*nspinor) = ghc_filter(:, shift+1:shift+npw_filter*nspinor) / ampfactor
473    if(paw) then
474      gsc_filter(:, shift+1:shift+npw_filter*nspinor) = gsc_filter(:, shift+1:shift+npw_filter*nspinor) / ampfactor
475    else
476      gvnlc_filter(:, shift+1:shift+npw_filter*nspinor) = gvnlc_filter(:, shift+1:shift+npw_filter*nspinor) / ampfactor
477    end if
478  end do
479 
480  ! Cleanup
481  if(paw) then
482    call pawcprj_free(cwaveprj)
483    call pawcprj_free(cwaveprj_next)
484    call pawcprj_free(cwaveprj_prev)
485    ABI_DATATYPE_DEALLOCATE(cwaveprj)
486    ABI_DATATYPE_DEALLOCATE(cwaveprj_next)
487    ABI_DATATYPE_DEALLOCATE(cwaveprj_prev)
488  end if
489  ABI_DEALLOCATE(nline_bands)
490  ABI_DEALLOCATE(cg_filter_next)
491  ABI_DEALLOCATE(cg_filter_prev)
492  ABI_DEALLOCATE(gsc_filter_prev)
493  ABI_DEALLOCATE(gsc_filter_next)
494  ABI_DEALLOCATE(gsm1hc_filter)
495 
496  !======================================================================================================
497  ! Filtering done, tranpose back
498  !======================================================================================================
499 
500  write(message, *) 'Filtering done, transposing back'
501  call wrtout(std_out,message,'COLL')
502 
503  ! transpose back
504  if(dtset%paral_kgb == 1) then
505    cg_alltoall1(:,index_wavef_band) = cg_alltoall2(:,:)
506    ghc_alltoall1(:,index_wavef_band) = ghc_alltoall2(:,:)
507    if(paw) then
508      gsc_alltoall1(:,index_wavef_band) = gsc_alltoall2(:,:)
509    else
510      gvnlc_alltoall1(:,index_wavef_band) = gvnlc_alltoall2(:,:)
511    end if
512 
513    ABI_DEALLOCATE(index_wavef_band)
514 
515    call timab(timer_sync, 1, tsec)
516    call xmpi_barrier(mpi_enreg%comm_band)
517    call timab(timer_sync, 2, tsec)
518 
519    call timab(timer_alltoall, 1, tsec)
520 
521   ! Do we pack the arrays in the alltoall, saving latency, or do we do it separately, saving memory and copies?
522    call xmpi_alltoallv(cg_alltoall1,recvcountsloc,rdisplsloc,cg,&
523 &   sendcountsloc,sdisplsloc,mpi_enreg%comm_band,ierr)
524    call xmpi_alltoallv(ghc_alltoall1,recvcountsloc,rdisplsloc,ghc,&
525 &   sendcountsloc,sdisplsloc,mpi_enreg%comm_band,ierr)
526    if(paw) then
527      call xmpi_alltoallv(gsc_alltoall1,recvcountsloc,rdisplsloc,gsc,&
528 &     sendcountsloc,sdisplsloc,mpi_enreg%comm_band,ierr)
529    else
530      call xmpi_alltoallv(gvnlc_alltoall1,recvcountsloc,rdisplsloc,gvnlc,&
531 &     sendcountsloc,sdisplsloc,mpi_enreg%comm_band,ierr)
532    end if
533    call timab(timer_alltoall, 2, tsec)
534 
535    if(mpi_enreg%paral_kgb == 1) then
536      ABI_DEALLOCATE(cg_alltoall1)
537      ABI_DEALLOCATE(gsc_alltoall1)
538      ABI_DEALLOCATE(ghc_alltoall1)
539      ABI_DEALLOCATE(gvnlc_alltoall1)
540      ABI_DEALLOCATE(cg_alltoall2)
541      ABI_DEALLOCATE(gsc_alltoall2)
542      ABI_DEALLOCATE(ghc_alltoall2)
543      ABI_DEALLOCATE(gvnlc_alltoall2)
544    end if
545  else
546    ! nothing to do, the _filter variables already point to the right ones
547  end if
548 
549 
550 
551  !======================================================================================================
552  ! Data in (npfft*npband) x 1 distribution. Rayleigh-Ritz step
553  !======================================================================================================
554 
555  ! _subdiago might use less memory when using only one proc, should maybe call it, or just remove it
556  ! and always call _distributed
557 #if defined HAVE_LINALG_SCALAPACK
558  call rayleigh_ritz_distributed(cg,ghc,gsc,gvnlc,eig,gs_hamk%istwf_k,mpi_enreg,nband,npw,nspinor,gs_hamk%usepaw)
559 #else
560  call rayleigh_ritz_subdiago(cg,ghc,gsc,gvnlc,eig,gs_hamk%istwf_k,mpi_enreg,nband,npw,nspinor,gs_hamk%usepaw)
561 #endif
562 
563  ! Build residuals
564  call timab(timer_residuals, 1, tsec)
565  do iband=1,nband
566    shift = npw*nspinor*(iband-1)
567    if(paw) then
568      resid_vec = ghc(:, shift+1 : shift+npw*nspinor) - eig(iband)*gsc(:, shift+1 : shift+npw*nspinor)
569    else
570      resid_vec = ghc(:, shift+1 : shift+npw*nspinor) - eig(iband)*cg (:, shift+1 : shift+npw*nspinor)
571    end if
572 
573    ! precondition resid_vec
574    do ispinor = 1,nspinor
575      resid_vec(1, npw*(ispinor-1)+1:npw*ispinor) = resid_vec(1, npw*(ispinor-1)+1:npw*ispinor) * pcon
576      resid_vec(2, npw*(ispinor-1)+1:npw*ispinor) = resid_vec(2, npw*(ispinor-1)+1:npw*ispinor) * pcon
577    end do
578 
579    call dotprod_g(resid(iband),dprod_i,gs_hamk%istwf_k,npw*nspinor,1,resid_vec,&
580 &   resid_vec,mpi_enreg%me_g0,mpi_enreg%comm_bandspinorfft)
581 
582    if(.not. paw) then
583      call dotprod_g(enl(iband),dprod_i,gs_hamk%istwf_k,npw*nspinor,1,cg(:, shift+1:shift+npw*nspinor),&
584 &     gvnlc(:, shift+1:shift+npw_filter*nspinor),mpi_enreg%me_g0,mpi_enreg%comm_bandspinorfft)
585    end if
586  end do
587  call timab(timer_residuals, 2, tsec)
588 
589  ! write(message, '(a,4e10.2)') 'Resids (1, N, min, max) ', resid(1), resid(nband), MINVAL(resid), MAXVAL(resid)
590  ! call wrtout(std_out,message,'COLL')
591 
592  ! write(message,*)'Eigens(1,nocc,nband) ',eig(1), eig(ilastocc),eig(nband)
593  ! call wrtout(std_out,message,'COLL')
594  ! write(message,*)'Resids(1,nocc,nband) ',resid(1), resid(ilastocc),resid(nband)
595  ! call wrtout(std_out,message,'COLL')
596 
597  call timab(timer_chebfi,2,tsec)
598 
599 end subroutine chebfi

ABINIT/m_chebfi [ Modules ]

[ Top ] [ Modules ]

NAME

  m_chebfi

FUNCTION

COPYRIGHT

  Copyright (C) 2014-2018 ABINIT group (AL)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

PARENTS

CHILDREN

SOURCE

21 #if defined HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include "abi_common.h"
26 
27 module m_chebfi
28 
29  use defs_basis
30  use defs_abitypes
31  use m_errors
32  use m_xmpi
33  use m_abicore
34  use m_abi_linalg
35  use m_rayleigh_ritz
36  use m_invovl
37 
38  use m_time,          only : timab
39  use m_cgtools,       only : dotprod_g
40  use m_bandfft_kpt,   only : bandfft_kpt, bandfft_kpt_get_ikpt
41  use m_pawcprj,       only : pawcprj_type, pawcprj_alloc, pawcprj_free, pawcprj_axpby, pawcprj_copy
42  use m_hamiltonian,   only : gs_hamiltonian_type
43  use m_getghc,        only : getghc
44  use m_prep_kgb,      only : prep_getghc, prep_index_wavef_bandpp
45 
46  implicit none
47 
48  private