TABLE OF CONTENTS


ABINIT/m_invovl [ Modules ]

[ Top ] [ Modules ]

NAME

  m_invovl

FUNCTION

  Provides functions to invert the overlap matrix S. Used by Chebyshev in PAW
  See paper by A. Levitt and M. Torrent for details
  S = 1 + projs * s_projs * projs'
  S^-1 = 1 + projs * inv_s_projs * projs', with
  inv_s_projs = - (s_projs^-1 + projs'*projs)^-1

COPYRIGHT

 Copyright (C) 2013-2018 ABINIT group (AL)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

PARENTS

CHILDREN

SOURCE

25 #if defined HAVE_CONFIG_H
26 #include "config.h"
27 #endif
28 
29 #include "abi_common.h"
30 
31 MODULE m_invovl
32 
33  use defs_basis
34  use defs_abitypes
35  use m_errors
36  use m_xmpi
37  use m_abicore
38 
39  use m_time,        only : timab
40  use m_hamiltonian, only : gs_hamiltonian_type
41  use m_bandfft_kpt, only : bandfft_kpt_get_ikpt
42  use m_pawcprj,     only : pawcprj_type, pawcprj_alloc, pawcprj_free, pawcprj_axpby
43  use m_nonlop,      only : nonlop
44  use m_prep_kgb,    only : prep_nonlop
45 
46  implicit none
47 
48  private
49 
50 !public procedures.
51  public :: init_invovl
52  public :: make_invovl
53  public :: apply_invovl
54  public :: destroy_invovl

m_invovl/apply_block [ Functions ]

[ Top ] [ m_invovl ] [ Functions ]

NAME

 apply_block

FUNCTION

 Helper function: applies a block-diagonal matrix mat(lmnmax, lmnmax, ntypat)

INPUTS

PARENTS

      m_invovl

CHILDREN

      dsymm,zhemm

SOURCE

739 subroutine apply_block(ham, cplx, mat, nprojs, ndat, x, y)
740 
741   use m_abi_linalg
742 
743 !This section has been created automatically by the script Abilint (TD).
744 !Do not modify the following lines by hand.
745 #undef ABI_FUNC
746 #define ABI_FUNC 'apply_block'
747 !End of the abilint section
748 
749   implicit none
750 
751   integer,intent(in) :: ndat, nprojs, cplx
752   real(dp), intent(inout) :: x(cplx, nprojs, ndat), y(cplx, nprojs, ndat)
753   type(gs_hamiltonian_type),intent(in) :: ham
754   real(dp), intent(in) :: mat(cplx, ham%lmnmax, ham%lmnmax, ham%ntypat)
755 
756   integer :: nlmn, shift, itypat, idat
757 
758 ! *************************************************************************
759 
760   do idat = 1, ndat
761     shift = 1
762     do itypat=1, ham%ntypat
763       nlmn = count(ham%indlmn(3,:,itypat)>0)
764       !! apply mat to all atoms at once
765       ! perform natom multiplications of size nlmn
766       if(cplx == 2) then
767         call ZHEMM('L','U', nlmn, ham%nattyp(itypat), cone, mat(:, :, :, itypat), ham%lmnmax, &
768 &                x(:, shift:shift+nlmn*ham%nattyp(itypat)-1, idat), nlmn, czero, &
769 &                y(:,shift:shift+nlmn*ham%nattyp(itypat)-1,idat), nlmn)
770       else
771         call DSYMM('L','U', nlmn, ham%nattyp(itypat), one, mat(:, :, :, itypat), ham%lmnmax, &
772 &                x(:, shift:shift+nlmn*ham%nattyp(itypat)-1, idat), nlmn, zero, &
773 &                y(:,shift:shift+nlmn*ham%nattyp(itypat)-1,idat), nlmn)
774       end if
775       shift = shift + ham%nattyp(itypat)*nlmn
776     end do
777   end do
778 
779 end subroutine apply_block

m_invovl/apply_invovl [ Functions ]

[ Top ] [ m_invovl ] [ Functions ]

NAME

 apply_invovl

FUNCTION

 Applies the inverse of the overlap matrix to cwavef

INPUTS

PARENTS

      chebfi

CHILDREN

      dsymm,zhemm

SOURCE

465  subroutine apply_invovl(ham, cwavef, sm1cwavef, cwaveprj, npw, ndat, mpi_enreg, nspinor)
466 
467 
468 !This section has been created automatically by the script Abilint (TD).
469 !Do not modify the following lines by hand.
470 #undef ABI_FUNC
471 #define ABI_FUNC 'apply_invovl'
472 !End of the abilint section
473 
474  implicit none
475 
476  ! args
477  type(gs_hamiltonian_type),intent(in) :: ham
478  integer,intent(in) :: npw, ndat
479  integer,intent(in) :: nspinor
480  real(dp), intent(inout) :: cwavef(2, npw*nspinor*ndat) ! TODO should be in, fix nonlop
481  type(mpi_type) :: mpi_enreg
482  real(dp), intent(inout) :: sm1cwavef(2, npw*nspinor*ndat)
483  type(pawcprj_type), intent(inout) :: cwaveprj(ham%natom,nspinor*ndat)
484 
485  real(dp),allocatable :: proj(:,:,:),sm1proj(:,:,:),PtPsm1proj(:,:,:)
486  integer :: idat, iatom, nlmn, shift
487  real(dp) :: tsec(2)
488 
489  integer :: choice, cpopt, paw_opt , cplx
490  character :: blas_transpose
491  type(pawcprj_type) :: cwaveprj_in(ham%natom,nspinor*ndat)
492 
493  integer :: ikpt_this_proc
494  integer, parameter :: tim_nonlop = 13
495  ! dummies
496  real(dp) :: enlout(ndat), lambda_block(1), gvnlc(1,1)
497  integer, parameter :: nnlout = 0, idir = 0, signs = 2
498 
499  type(invovl_kpt_type), pointer :: invovl
500  integer, parameter :: timer_apply_inv_ovl_opernla = 1630, timer_apply_inv_ovl_opernlb = 1631, &
501 &                      timer_apply_inv_ovl_inv_s = 1632
502 
503 ! *************************************************************************
504 
505  ikpt_this_proc=bandfft_kpt_get_ikpt()
506  invovl => invovl_kpt(ikpt_this_proc)
507 
508  if(ham%istwf_k == 1) then
509    cplx = 2
510    blas_transpose = 'c'
511  else
512    cplx = 1
513    blas_transpose = 't'
514  end if
515 
516  ABI_ALLOCATE(proj, (cplx,invovl%nprojs,nspinor*ndat))
517  ABI_ALLOCATE(sm1proj, (cplx,invovl%nprojs,nspinor*ndat))
518  ABI_ALLOCATE(PtPsm1proj, (cplx,invovl%nprojs,nspinor*ndat))
519  proj = zero
520  sm1proj = zero
521  PtPsm1proj = zero
522 
523  call timab(timer_apply_inv_ovl_opernla, 1, tsec)
524 
525  call pawcprj_alloc(cwaveprj_in,0,ham%dimcprj)
526 
527  ! get the cprj
528  choice = 0 ! only compute cprj, nothing else
529  cpopt = 0 ! compute and save cprj
530  paw_opt = 3 ! S nonlocal operator
531  if (mpi_enreg%paral_kgb==1) then
532    call prep_nonlop(choice,cpopt,cwaveprj_in,enlout,ham,idir,lambda_block,ndat,mpi_enreg,&
533 &                   nnlout,paw_opt,signs,sm1cwavef,tim_nonlop,cwavef,gvnlc,already_transposed=.true.)
534  else
535    call nonlop(choice,cpopt,cwaveprj_in,enlout,ham,idir,lambda_block,mpi_enreg,ndat,nnlout,&
536 &              paw_opt,signs,sm1cwavef,tim_nonlop,cwavef,gvnlc)
537  end if
538 
539  call timab(timer_apply_inv_ovl_opernla, 2, tsec)
540  call timab(timer_apply_inv_ovl_inv_s, 1, tsec)
541 
542  ! copy cwaveprj_in to proj(:,:)
543  do idat=1, ndat*nspinor
544    shift = 0
545    do iatom = 1, ham%natom
546      nlmn = cwaveprj_in(iatom, idat)%nlmn
547      proj(1:cplx, shift+1:shift+nlmn, idat) = cwaveprj_in(iatom, idat)%cp(1:cplx, 1:nlmn)
548      shift = shift + nlmn
549    end do
550  end do
551 
552  !multiply by S^1
553  call solve_inner(invovl, ham, cplx, mpi_enreg, proj, ndat*nspinor, sm1proj, PtPsm1proj)
554  sm1proj = - sm1proj
555  PtPsm1proj = - PtPsm1proj
556 
557  ! copy sm1proj to cwaveprj(:,:)
558  do idat=1, ndat*nspinor
559    shift = 0
560    do iatom = 1, ham%natom
561      nlmn = cwaveprj(iatom, idat)%nlmn
562      cwaveprj(iatom, idat)%cp(1:cplx, 1:nlmn) = sm1proj(1:cplx, shift+1:shift+nlmn, idat)
563      shift = shift + nlmn
564    end do
565  end do
566  call timab(timer_apply_inv_ovl_inv_s, 2, tsec)
567  call timab(timer_apply_inv_ovl_opernlb, 1, tsec)
568 
569  ! get the corresponding wf
570  cpopt = 2 ! reuse cprj
571  choice = 7 ! get wf from cprj, without the application of S
572  paw_opt = 3
573  if (mpi_enreg%paral_kgb==1) then
574    call prep_nonlop(choice,cpopt,cwaveprj,enlout,ham,idir,lambda_block,ndat,mpi_enreg,nnlout,&
575 &                   paw_opt,signs,sm1cwavef,tim_nonlop,cwavef,gvnlc,already_transposed=.true.)
576  else
577    call nonlop(choice,cpopt,cwaveprj,enlout,ham,idir,lambda_block,mpi_enreg,ndat,nnlout,paw_opt,&
578 &              signs,sm1cwavef,tim_nonlop,cwavef,gvnlc)
579  end if
580 
581  call timab(timer_apply_inv_ovl_opernlb, 2, tsec)
582 
583  ! copy PtPSm1proj to cwaveprj(:,:)
584  do idat=1, ndat*nspinor
585    shift = 0
586    do iatom = 1, ham%natom
587      nlmn = cwaveprj(iatom, idat)%nlmn
588      cwaveprj(iatom, idat)%cp(1:cplx, 1:nlmn) = PtPsm1proj(1:cplx, shift+1:shift+nlmn, idat)
589      shift = shift + nlmn
590    end do
591  end do
592 
593  sm1cwavef = cwavef + sm1cwavef
594  call pawcprj_axpby(one, one, cwaveprj_in, cwaveprj)
595  call pawcprj_free(cwaveprj_in)
596 
597  ABI_DEALLOCATE(proj)
598  ABI_DEALLOCATE(sm1proj)
599  ABI_DEALLOCATE(PtPsm1proj)
600 
601 end subroutine apply_invovl

m_invovl/destroy_invovl [ Functions ]

[ Top ] [ m_invovl ] [ Functions ]

NAME

 destroy_invovl

FUNCTION

 Destruction of the invovl_kpt array

INPUTS

 nkpt= number of k-points

PARENTS

      gstate

CHILDREN

      dsymm,zhemm

SOURCE

151  subroutine destroy_invovl(nkpt)
152 
153 
154 !This section has been created automatically by the script Abilint (TD).
155 !Do not modify the following lines by hand.
156 #undef ABI_FUNC
157 #define ABI_FUNC 'destroy_invovl'
158 !End of the abilint section
159 
160   integer, intent(in) :: nkpt
161   integer :: ikpt
162 
163 ! *************************************************************************
164 
165   ! TODO add cycling if kpt parallelism
166   do ikpt=1,nkpt
167     if(invovl_kpt(ikpt)%nprojs == -1) then
168       ! write(0, *) 'ERROR invovl_kpt is unallocated'
169       cycle
170     end if
171 
172     ABI_DEALLOCATE(invovl_kpt(ikpt)%gram_projs)
173     ABI_DEALLOCATE(invovl_kpt(ikpt)%inv_sij)
174     ABI_DEALLOCATE(invovl_kpt(ikpt)%inv_s_approx)
175     invovl_kpt(ikpt)%nprojs = -1
176   end do
177 
178   ABI_DATATYPE_DEALLOCATE(invovl_kpt)
179 
180  end subroutine destroy_invovl

m_invovl/init_invovl [ Functions ]

[ Top ] [ m_invovl ] [ Functions ]

NAME

 init_invovl

FUNCTION

 Initalization of the invovl_kpt array

INPUTS

 nkpt= number of k-points

PARENTS

      gstate

CHILDREN

      dsymm,zhemm

SOURCE

111  subroutine init_invovl(nkpt)
112 
113 
114 !This section has been created automatically by the script Abilint (TD).
115 !Do not modify the following lines by hand.
116 #undef ABI_FUNC
117 #define ABI_FUNC 'init_invovl'
118 !End of the abilint section
119 
120   integer, intent(in) :: nkpt
121   integer :: ikpt
122 
123 ! *************************************************************************
124 
125   ABI_DATATYPE_ALLOCATE(invovl_kpt, (nkpt))
126   ! TODO add cycling if kpt parallelism
127   do ikpt=1,nkpt
128     invovl_kpt(ikpt)%nprojs = -1
129   end do
130 
131  end subroutine init_invovl

m_invovl/invovl_kpt_type [ Types ]

[ Top ] [ m_invovl ] [ Types ]

NAME

 invovl_kpt_type

FUNCTION

 Contains information needed to invert the overlap matrix S

SOURCE

67  type, public :: invovl_kpt_type
68 
69    integer :: nprojs
70    !  total number of projectors
71    !    nlmn for a specific atom = count(indlmn(3,:,itypat)>0)
72    !  A value of -1 means that the following arrays are not allocated
73 
74    real(dp), allocatable :: gram_projs(:,:,:)
75    ! gram_projs(cplx, nprojs, nprojs)
76    ! projs' * projs
77 
78    real(dp), allocatable :: inv_sij(:, :,:,:)
79    ! inv_sij(cplx, lmnmax, lmnmax, ntypat)
80    ! inverse of ham%sij
81 
82    real(dp), allocatable :: inv_s_approx(:,:,:,:)
83    ! inv_s_approx(cplx, lmnmax, lmnmax, ntypat)
84    ! preconditionner
85 
86 end type invovl_kpt_type

m_invovl/make_invovl [ Functions ]

[ Top ] [ m_invovl ] [ Functions ]

NAME

 make_invovl

FUNCTION

 Builds of the invovl structure

INPUTS

PARENTS

      vtorho

CHILDREN

      dsymm,zhemm

SOURCE

200 subroutine make_invovl(ham, dimffnl, ffnl, ph3d, mpi_enreg)
201 
202  use m_abi_linalg
203 
204 !This section has been created automatically by the script Abilint (TD).
205 !Do not modify the following lines by hand.
206 #undef ABI_FUNC
207 #define ABI_FUNC 'make_invovl'
208 !End of the abilint section
209 
210  implicit none
211 
212  type(gs_hamiltonian_type),intent(in) :: ham
213  integer, intent(in) :: dimffnl
214  real(dp),intent(in) :: ffnl(ham%npw_k,dimffnl,ham%lmnmax,ham%ntypat)
215  real(dp),intent(in) :: ph3d(2,ham%npw_k,ham%matblk)
216  type(mpi_type) :: mpi_enreg
217 
218  real(dp) :: atom_projs(2, ham%npw_k, ham%lmnmax)
219  real(dp) :: temp(ham%npw_k)
220  complex(dpc), allocatable :: work(:)
221  real(dp), allocatable :: projs(:,:,:)
222  real(dp), allocatable :: gram_proj(:,:,:)
223  integer, allocatable :: ipiv(:)
224 
225  integer :: itypat, ilmn, nlmn, jlmn, ia, iaph3d, shift
226  integer :: il, ilm, jlm, ipw, info, ierr, cplx
227  integer :: ikpt_this_proc
228  logical :: parity
229  real(dp) :: tsec(2)
230  character(len=500) :: message
231  character :: blas_transpose
232 
233  type(invovl_kpt_type), pointer :: invovl
234  integer :: array_nprojs_pp(mpi_enreg%nproc_fft)
235  integer :: iproc, slice_size
236  real(dp), allocatable :: gramwork(:,:,:)
237 
238  integer, parameter :: timer_mkinvovl = 1620, timer_mkinvovl_build_d = 1621, timer_mkinvovl_build_ptp = 1622
239 
240 ! *************************************************************************
241 
242  !! S = 1 + PDP', so
243  !! S^-1 = 1 + P inv_s_projs P', with
244  !! inv_s_projs = - (D^-1 + P'*P)^-1
245 
246  if(ham%istwf_k == 1) then
247    cplx = 2
248    blas_transpose = 'c'
249  else
250    cplx = 1
251    blas_transpose = 't'
252  end if
253 
254  ikpt_this_proc=bandfft_kpt_get_ikpt()
255  invovl => invovl_kpt(ikpt_this_proc)
256 
257  if(invovl%nprojs /= -1) then
258    ! We have been here before, cleanup before remaking
259    ABI_DEALLOCATE(invovl%gram_projs)
260    ABI_DEALLOCATE(invovl%inv_sij)
261    ABI_DEALLOCATE(invovl%inv_s_approx)
262    invovl%nprojs = -1
263  end if
264 
265  iaph3d = 1
266 
267  call timab(timer_mkinvovl,1,tsec)
268  call timab(timer_mkinvovl_build_d,1,tsec)
269 
270  ! build nprojs
271  invovl%nprojs = 0
272  do itypat=1,ham%ntypat
273    invovl%nprojs = invovl%nprojs + count(ham%indlmn(3,:,itypat)>0)*ham%nattyp(itypat)
274  end do
275 
276  ABI_ALLOCATE(projs, (2, ham%npw_k, invovl%nprojs))
277  ABI_ALLOCATE(invovl%inv_sij, (cplx, ham%lmnmax, ham%lmnmax, ham%ntypat))
278  ABI_ALLOCATE(invovl%inv_s_approx, (cplx, ham%lmnmax, ham%lmnmax, ham%ntypat))
279  ! workspace for inversion
280  ABI_ALLOCATE(ipiv, (ham%lmnmax))
281  ABI_ALLOCATE(work, (64*ham%lmnmax))
282 
283  projs = zero
284  invovl%inv_sij = zero
285  invovl%inv_s_approx = zero
286 
287  shift = 0
288  do itypat = 1, ham%ntypat
289    nlmn = count(ham%indlmn(3,:,itypat)>0)
290    !! unpack ham%sij into inv_sij
291    do jlmn = 1, nlmn
292      invovl%inv_sij(1, jlmn, jlmn, itypat) = ham%sij(jlmn*(jlmn-1)/2 + jlmn, itypat)
293      jlm=ham%indlmn(4,jlmn, itypat)
294      do ilmn = 1, jlmn-1
295        ilm=ham%indlmn(4,ilmn, itypat)
296        if (ilm == jlm) then ! sparsity check
297          invovl%inv_sij(1, ilmn, jlmn, itypat) = ham%sij(jlmn*(jlmn-1)/2 + ilmn, itypat)
298          invovl%inv_sij(1, jlmn, ilmn, itypat) = ham%sij(jlmn*(jlmn-1)/2 + ilmn, itypat)
299        end if
300      end do
301    end do
302 
303    ! Invert sij
304    if(cplx == 2) then
305      call ZHETRF('U', nlmn, invovl%inv_sij(:,:,:,itypat), ham%lmnmax, ipiv, work, (64*nlmn), info)
306      call ZHETRI('U', nlmn, invovl%inv_sij(:,:,:,itypat), ham%lmnmax, ipiv, work, info)
307    else
308      call DSYTRF('U', nlmn, invovl%inv_sij(:,:,:,itypat), ham%lmnmax, ipiv, work, (64*nlmn), info)
309      call DSYTRI('U', nlmn, invovl%inv_sij(:,:,:,itypat), ham%lmnmax, ipiv, work, info)
310    end if
311    ! complete the matrix
312    do ilm=1, nlmn
313      do jlm=1, ilm-1
314        invovl%inv_sij(1,ilm,jlm,itypat) =  invovl%inv_sij(1,jlm,ilm,itypat)
315      end do
316    end do
317 
318    !! loop on atoms to build atom_projs and fill projs, s_projs
319    do ia = 1, ham%nattyp(itypat)
320 
321      !! build atom_projs, from opernlb
322      !! P = 4pi/sqrt(ucvol)* conj(diag(ph3d)) * ffnl * diag(parity), with parity = (-i)^l
323      atom_projs(:,:,:) = 0
324 
325      ! start from 4pi/sqrt(ucvol)*ffnl
326      ! atom_projs(1, :, 1:nlmn) = four_pi/sqrt(ham%ucvol) * ffnl(:, 1, 1:nlmn)
327      ! TODO vectorize (DCOPY with stride)
328      do ipw=1, ham%npw_k
329        atom_projs(1,ipw, 1:nlmn) = four_pi/sqrt(ham%ucvol) * ffnl(ipw, 1, 1:nlmn, itypat)
330      end do
331 
332      ! multiply by (-i)^l
333      do ilmn=1,nlmn
334        il=mod(ham%indlmn(1,ilmn, itypat),4);
335        parity=(mod(il,2)==0)
336        if (il>1) then
337          ! multiply by -1
338          atom_projs(:,:,ilmn) = -atom_projs(:,:,ilmn)
339        end if
340        if(.not. parity) then
341          ! multiply by -i
342          temp = atom_projs(2,:,ilmn)
343          atom_projs(2,:,ilmn) = -atom_projs(1,:,ilmn)
344          atom_projs(1,:,ilmn) =  temp
345        end if
346      end do
347 
348      ! multiply by conj(ph3d)
349      do ilmn=1,nlmn
350        temp = atom_projs(1, :, ilmn)
351        atom_projs(1, :, ilmn) = atom_projs(1, :, ilmn) * ph3d(1, :, iaph3d) + atom_projs(2, :, ilmn) * ph3d(2, :, iaph3d)
352        atom_projs(2, :, ilmn) = atom_projs(2, :, ilmn) * ph3d(1, :, iaph3d) - temp                   * ph3d(2, :, iaph3d)
353      end do
354 
355      ! me_g0 trick
356      if(ham%istwf_k == 2 .and. mpi_enreg%me_g0 == 1) then
357        atom_projs(1,1,:) = atom_projs(1,1,:) / sqrt2
358        atom_projs(2,1,:) = zero
359      end if
360      if(ham%istwf_k == 2) then
361        atom_projs(:,:,:) = atom_projs(:,:,:)*sqrt2
362      end if
363 
364 
365      !! atom_projs and typat_s_projs are built, copy them to projs and inv_s_projs
366      projs(:, :, shift+1:shift+nlmn) = atom_projs(:, :, 1:nlmn)
367      shift = shift + nlmn
368 
369      ! build inv_s_approx = (D^-1+PtP)^-1 restricted to a single atom block
370      ! can be optimized (real, build directly from ffnl)
371      if(ia == 1) then
372        ! D^-1
373        invovl%inv_s_approx(1, :, :, itypat) = invovl%inv_sij(1, :, :, itypat)
374        ! + PtP
375        ABI_ALLOCATE(gram_proj, (cplx, nlmn, nlmn))
376        call abi_xgemm(blas_transpose,'N', nlmn, nlmn, (3-cplx)*ham%npw_k, cone, atom_projs(:,:,1), (3-cplx)*ham%npw_k, &
377 &                     atom_projs(:,:,1), (3-cplx)*ham%npw_k, czero, gram_proj(:,:,1), nlmn,x_cplx=cplx)
378        call xmpi_sum(gram_proj,mpi_enreg%comm_bandspinorfft,ierr)
379        invovl%inv_s_approx(:,1:nlmn,1:nlmn,itypat) = invovl%inv_s_approx(:,1:nlmn,1:nlmn,itypat) + gram_proj(:,:,:)
380        ABI_DEALLOCATE(gram_proj)
381        ! ^-1
382        if(cplx == 2) then
383          call ZHETRF('U', nlmn, invovl%inv_s_approx(:,:,:,itypat), ham%lmnmax, ipiv, work, (64*nlmn), info)
384          call ZHETRI('U', nlmn, invovl%inv_s_approx(:,:,:,itypat), ham%lmnmax, ipiv, work, info)
385        else
386          call DSYTRF('U', nlmn, invovl%inv_s_approx(:,:,:,itypat), ham%lmnmax, ipiv, work, (64*nlmn), info)
387          call DSYTRI('U', nlmn, invovl%inv_s_approx(:,:,:,itypat), ham%lmnmax, ipiv, work, info)
388        end if
389        ! complete lower triangle of matrix
390        do ilm=1, nlmn
391          do jlm=1, ilm-1
392            invovl%inv_s_approx(1, ilm, jlm, itypat) =  invovl%inv_s_approx(1, jlm, ilm, itypat)
393            if(cplx == 2) then
394              invovl%inv_s_approx(2, ilm, jlm, itypat) =  -invovl%inv_s_approx(2, jlm, ilm, itypat)
395            end if
396          end do
397        end do
398      end if
399 
400      iaph3d = iaph3d + 1
401    end do
402  end do
403  ABI_DEALLOCATE(ipiv)
404  ABI_DEALLOCATE(work)
405 
406  call timab(timer_mkinvovl_build_d, 2, tsec)
407  call timab(timer_mkinvovl_build_ptp, 1, tsec)
408 
409  ! Compute P'P one column slice at a time (might be too big to fit in one proc)
410  if(mpi_enreg%paral_kgb == 1) then
411    ! Split the work evenly the fft processors
412    array_nprojs_pp(:) = invovl%nprojs / mpi_enreg%nproc_fft
413    ! not enough work, there's MOD(nprojs,mpi_enreg%nproc_fft) tasks left
414    ! assign them to the first ones
415    array_nprojs_pp(1:MOD(invovl%nprojs,mpi_enreg%nproc_fft)) = array_nprojs_pp(1:MOD(invovl%nprojs,mpi_enreg%nproc_fft)) + 1
416  else
417    array_nprojs_pp = invovl%nprojs
418  end if
419  ABI_ALLOCATE(invovl%gram_projs, (cplx,invovl%nprojs,array_nprojs_pp(mpi_enreg%me_fft+1)))
420  shift = 0
421  do iproc = 1, mpi_enreg%nproc_fft
422    ! compute local contribution to slice iproc of gram_projs
423    slice_size = array_nprojs_pp(iproc)
424    ABI_ALLOCATE(gramwork, (cplx,invovl%nprojs,slice_size))
425    call abi_xgemm(blas_transpose,'N', invovl%nprojs, slice_size, (3-cplx)*ham%npw_k, cone, projs(:,:,1), (3-cplx)*ham%npw_k, &
426 &                      projs(:, :, shift+1), (3-cplx)*ham%npw_k, czero, gramwork(:,:,1), invovl%nprojs,x_cplx=cplx)
427    shift = shift + slice_size
428    ! reduce on proc i
429    call xmpi_sum_master(gramwork, iproc-1, mpi_enreg%comm_fft, ierr)
430    if(iproc == mpi_enreg%me_fft+1) then
431      invovl%gram_projs = gramwork
432    end if
433    ABI_DEALLOCATE(gramwork)
434  end do
435  call xmpi_sum(invovl%gram_projs,mpi_enreg%comm_band,ierr)
436 
437  call timab(timer_mkinvovl_build_ptp, 2, tsec)
438  call timab(timer_mkinvovl,2,tsec)
439 
440  ABI_DEALLOCATE(projs)
441 
442  write(message,*) 'Invovl built'
443  call wrtout(std_out,message,'COLL')
444 
445 end subroutine make_invovl

m_invovl/solve_inner [ Functions ]

[ Top ] [ m_invovl ] [ Functions ]

NAME

 solve_inner

FUNCTION

 Helper function: iteratively solves the inner system

INPUTS

PARENTS

      m_invovl

CHILDREN

      dsymm,zhemm

SOURCE

620 subroutine solve_inner(invovl, ham, cplx, mpi_enreg, proj, ndat, sm1proj, PtPsm1proj)
621 
622  use m_abi_linalg
623 
624 !This section has been created automatically by the script Abilint (TD).
625 !Do not modify the following lines by hand.
626 #undef ABI_FUNC
627 #define ABI_FUNC 'solve_inner'
628 !End of the abilint section
629 
630  implicit none
631 
632  integer,intent(in) :: ndat,cplx
633  type(invovl_kpt_type), intent(in) :: invovl
634  real(dp), intent(inout) :: proj(cplx, invovl%nprojs,ndat), sm1proj(cplx, invovl%nprojs,ndat), PtPsm1proj(cplx, invovl%nprojs,ndat)
635  real(dp), allocatable :: temp_proj(:,:,:)
636  type(mpi_type), intent(in) :: mpi_enreg
637  type(gs_hamiltonian_type),intent(in) :: ham
638 
639  integer :: array_nlmntot_pp(mpi_enreg%nproc_fft)
640  integer :: nlmntot_this_proc, ibeg, iend, ierr, i, nprojs
641  real(dp) :: resid(cplx, invovl%nprojs,ndat), precondresid(cplx, invovl%nprojs,ndat)
642  real(dp) :: normprojs(ndat), errs(ndat), maxerr, previous_maxerr
643  character(len=500) :: message
644 
645  real(dp), parameter :: precision = 1e-16 ! maximum relative error. TODO: use tolwfr ?
646  real(dp) :: convergence_rate
647  integer :: additional_steps_to_take
648 
649 ! *************************************************************************
650 
651  nprojs = invovl%nprojs
652  normprojs = SUM(SUM(proj**2, 1),1)
653  call xmpi_sum(normprojs, mpi_enreg%comm_fft, ierr)
654 
655  ! Compute work distribution : split nprojs evenly between the fft processors
656  if(mpi_enreg%paral_kgb == 1) then
657    array_nlmntot_pp(:) = nprojs / mpi_enreg%nproc_fft
658    ! not enough work, there's MOD(nprojs,mpi_enreg%nproc_fft) tasks left
659    ! assign them to the first ones
660    array_nlmntot_pp(1:MOD(nprojs,mpi_enreg%nproc_fft)) = array_nlmntot_pp(1:MOD(nprojs,mpi_enreg%nproc_fft)) + 1
661    ibeg = SUM(array_nlmntot_pp(1:mpi_enreg%me_fft)) + 1
662    iend = ibeg + array_nlmntot_pp(1+mpi_enreg%me_fft) - 1
663    nlmntot_this_proc = iend - ibeg + 1
664  else
665    ibeg = 1
666    iend = nprojs
667    nlmntot_this_proc = nprojs
668  end if
669 
670  ABI_ALLOCATE(temp_proj, (cplx, nlmntot_this_proc,ndat))
671 
672  ! first guess for sm1proj
673  call apply_block(ham, cplx, invovl%inv_s_approx, nprojs, ndat, proj, sm1proj)
674 
675  ! Iterative refinement
676  ! TODO use a more efficient iterative algorithm than iterative refinement, use locking
677  additional_steps_to_take = -1
678  do i=1, 30
679   ! compute resid = proj - (D^-1 + PtP)sm1proj
680    call apply_block(ham, cplx, invovl%inv_sij, nprojs, ndat, sm1proj, resid)
681    temp_proj = sm1proj(:,ibeg:iend,:)
682    call abi_xgemm('N', 'N', nprojs, ndat, nlmntot_this_proc, cone, invovl%gram_projs(:,:,1), nprojs, &
683 &            temp_proj(:,:,1), nlmntot_this_proc, czero, PtPsm1proj(:,:,1), nprojs, x_cplx=cplx)
684    call xmpi_sum(PtPsm1proj, mpi_enreg%comm_fft, ierr)
685    resid = proj - resid - Ptpsm1proj
686    ! exit check
687    errs = SUM(SUM(resid**2, 1),1)
688    call xmpi_sum(errs, mpi_enreg%comm_fft, ierr)
689 
690    maxerr = sqrt(MAXVAL(errs/normprojs))
691    if(maxerr < precision .or. additional_steps_to_take == 1) then
692      exit
693      ! We might stall and never get to the specified precision because of machine errors.
694      ! If we got to 1e-10, extrapolate convergence rate and determine the number of additional
695      ! steps to take to reach precision
696    else if(maxerr < 1e-10 .and. additional_steps_to_take == -1) then
697      convergence_rate = -LOG(1e-10) / i
698      additional_steps_to_take = CEILING(-LOG(precision/1e-10)/convergence_rate) + 1
699    else if(additional_steps_to_take > 0) then
700      if(previous_maxerr<maxerr)exit
701      additional_steps_to_take = additional_steps_to_take - 1
702    end if
703    previous_maxerr=maxerr
704 
705    ! add preconditionned residual
706    call apply_block(ham, cplx, invovl%inv_s_approx, nprojs, ndat, resid, precondresid)
707    sm1proj = sm1proj + precondresid
708  end do
709 
710  if(maxerr >= precision .and. maxerr >= 1e-10) then
711    write(message, *) 'In invovl, max error was', maxerr, ' after 30 iterations'
712    MSG_WARNING(message)
713  else
714    ! write(message,'(a,i2,a,es13.5)') 'Iterative solver in invovl finished in ', i, ' iterations, error', maxerr
715    ! call wrtout(std_out,message,'COLL')
716  end if
717 
718  ABI_DEALLOCATE(temp_proj)
719 
720 end subroutine solve_inner